854 lines
28 KiB
Python
854 lines
28 KiB
Python
import pytest
|
|
from core.builtin_concepts import BuiltinConcepts
|
|
from core.concept import Concept
|
|
from core.sheerka import Sheerka, ExecutionContext
|
|
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \
|
|
ParsingExpressionVisitor, TerminalNode, NonTerminalNode, LexerNode, ConceptMatch, ZeroOrMore, OneOrMore
|
|
from sdp.sheerkaDataProvider import Event
|
|
|
|
|
|
class ConceptVisitor(ParsingExpressionVisitor):
|
|
def __init__(self):
|
|
self.concepts = set()
|
|
|
|
def visit_ConceptMatch(self, node):
|
|
self.concepts.add(node.concept)
|
|
|
|
|
|
def u(parsing_expression, start, end, children=None):
|
|
if isinstance(parsing_expression, str):
|
|
parsing_expression = StrMatch(parsing_expression)
|
|
|
|
if isinstance(parsing_expression, StrMatch):
|
|
return TerminalNode(parsing_expression, start, end, parsing_expression.to_match)
|
|
|
|
return NonTerminalNode(parsing_expression, start, end, [], children)
|
|
|
|
|
|
@pytest.mark.parametrize("match, text", [
|
|
("foo", "foo"),
|
|
("'foo'", "'foo'"),
|
|
("1", "1"),
|
|
("3.14", "3.14"),
|
|
("+", "+"),
|
|
(StrMatch("foo"), "foo"),
|
|
(StrMatch("'foo'"), "'foo'"),
|
|
(StrMatch("1"), "1"),
|
|
(StrMatch("3.14"), "3.14"),
|
|
(StrMatch("+"), "+"),
|
|
])
|
|
def test_i_can_match_simple_tokens(match, text):
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
concepts = {foo: text}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, text)
|
|
|
|
assert res.status
|
|
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res.value.value == [ConceptNode(foo, 0, 0, source=text, underlying=u(match, 0, 0))]
|
|
|
|
|
|
def test_i_can_match_multiple_concepts_in_one_input():
|
|
context = get_context()
|
|
one = Concept(name="one")
|
|
two = Concept(name="two")
|
|
concepts = {one: "one", two: "two"}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "one two one")
|
|
|
|
assert res.status
|
|
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res.value.value == [
|
|
ConceptNode(one, 0, 0, source="one", underlying=u("one", 0, 0)),
|
|
ConceptNode(two, 2, 2, source="two", underlying=u("two", 2, 2)),
|
|
ConceptNode(one, 4, 4, source="one", underlying=u("one", 4, 4)),
|
|
]
|
|
|
|
|
|
def test_i_cannot_match_an_unknown_input():
|
|
context = get_context()
|
|
parser = ConceptLexerParser() # no grammar registered
|
|
|
|
res = parser.parse(context, "foo")
|
|
|
|
assert not res.status
|
|
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
|
|
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
|
|
assert res.value.body[0].body == "foo"
|
|
|
|
|
|
def test_i_cannot_match_when_part_of_the_input_is_unknown():
|
|
context = get_context()
|
|
one = Concept(name="one")
|
|
two = Concept(name="two")
|
|
concepts = {one: "one", two: "two"}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "one two three")
|
|
assert not res.status
|
|
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res.value.try_parsed == [
|
|
ConceptNode(one, 0, 0, source="one", underlying=u("one", 0, 0)),
|
|
ConceptNode(two, 2, 2, source="two", underlying=u("two", 2, 2))] # these two were recognized
|
|
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
|
|
assert res.value.body[0].body == "three"
|
|
|
|
|
|
def test_i_can_match_sequence():
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
concepts = {foo: Sequence("one", "two", "three")}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "one two three")
|
|
|
|
assert res.status
|
|
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res.value.value == [
|
|
ConceptNode(foo, 0, 4, source="one two three", underlying=u(concepts[foo], 0, 4, [
|
|
u("one", 0, 0),
|
|
u("two", 2, 2),
|
|
u("three", 4, 4)]))]
|
|
|
|
|
|
def test_wrong_sequence_is_not_matched():
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
concepts = {foo: Sequence("one", "two", "three")}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "one two three one")
|
|
|
|
assert not res.status
|
|
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res.value.try_parsed == [(foo, "one two three")]
|
|
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
|
|
assert res.value.body[0].body == "one"
|
|
|
|
|
|
def test_i_cannot_match_sequence_if_end_of_file():
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
concepts = {foo: Sequence("one", "two", "three")}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "one two")
|
|
assert not res.status
|
|
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res.value.try_parsed == []
|
|
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
|
|
assert res.value.body[0].body == "one"
|
|
|
|
|
|
def test_i_always_choose_the_longest_match():
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
concepts = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")}
|
|
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "one two three")
|
|
|
|
assert res.status
|
|
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res.value.value == [(foo, "one two three")]
|
|
|
|
|
|
def test_i_can_match_several_sequences():
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
concepts = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")}
|
|
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "one two three one two")
|
|
|
|
assert res.status
|
|
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res.value.value == [
|
|
(foo, 0, 4, "one two three"),
|
|
(bar, 6, 8, "one two"),
|
|
]
|
|
|
|
|
|
def test_i_can_match_ordered_choice():
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
concepts = {foo: OrderedChoice("one", "two")}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res1 = parser.parse(context, "one")
|
|
assert res1.status
|
|
assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res1.value.body == [
|
|
ConceptNode(foo, 0, 0, source="one", underlying=u(concepts[foo], 0, 0, [u("one", 0, 0)]))]
|
|
|
|
res2 = parser.parse(context, "two")
|
|
assert res2.status
|
|
assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res2.value.body == [
|
|
ConceptNode(foo, 0, 0, source="two", underlying=u(concepts[foo], 0, 0, [u("two", 0, 0)]))]
|
|
|
|
res3 = parser.parse(context, "three")
|
|
assert not res3.status
|
|
assert context.sheerka.isinstance(res3.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
|
|
assert res3.value.body[0].body == "three"
|
|
|
|
|
|
def test_i_cannot_match_ordered_choice_with_empty_alternative():
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
concepts = {foo: Sequence(OrderedChoice("one", ""), "two")}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "ok") # because token[0] is not "one" and not "" (it is 'two')
|
|
assert not res.status
|
|
|
|
|
|
def test_i_can_mix_sequences_and_ordered_choices():
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
|
|
concepts = {foo: Sequence(OrderedChoice("twenty", "thirty"), "one", "ok")}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res1 = parser.parse(context, "twenty one ok")
|
|
assert res1.status
|
|
assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res1.value.body == [ConceptNode(foo, 0, 4, source="twenty one ok",
|
|
underlying=u(concepts[foo], 0, 4, [
|
|
u(OrderedChoice("twenty", "thirty"), 0, 0, [u("twenty", 0, 0)]),
|
|
u("one", 2, 2),
|
|
u("ok", 4, 4)]))]
|
|
|
|
res2 = parser.parse(context, "thirty one ok")
|
|
assert res2.status
|
|
assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res2.value.body == [ConceptNode(foo, 0, 4, source="thirty one ok",
|
|
underlying=u(concepts[foo], 0, 4, [
|
|
u(OrderedChoice("twenty", "thirty"), 0, 0, [u("thirty", 0, 0)]),
|
|
u("one", 2, 2),
|
|
u("ok", 4, 4)]))]
|
|
|
|
res3 = parser.parse(context, "twenty one")
|
|
assert not res3.status
|
|
assert res3.value.body[0].body == "twenty"
|
|
assert res3.value.try_parsed == []
|
|
|
|
|
|
def test_i_can_mix_ordered_choices_and_sequences():
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
|
|
concepts = {foo: OrderedChoice(Sequence("twenty", "thirty"), "one")}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "twenty thirty")
|
|
assert res.status
|
|
|
|
res = parser.parse(context, "one")
|
|
assert res.status
|
|
|
|
|
|
def test_i_cannot_parse_empty_optional():
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
|
|
concepts = {foo: Optional("one")}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "")
|
|
assert not res.status
|
|
assert context.sheerka.isinstance(res.value, BuiltinConcepts.IS_EMPTY)
|
|
|
|
|
|
def test_i_can_parse_optional():
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
|
|
concepts = {foo: Optional("one")}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "one")
|
|
assert res.status
|
|
assert res.value.value == [ConceptNode(foo, 0, 0, source="one",
|
|
underlying=u(concepts[foo], 0, 0, [u("one", 0, 0)]))]
|
|
|
|
|
|
def test_i_can_parse_sequence_starting_with_optional():
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
|
|
concepts = {foo: Sequence(Optional("twenty"), "one")}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "twenty one")
|
|
assert res.status
|
|
assert res.value.body == [ConceptNode(
|
|
foo, 0, 2,
|
|
source="twenty one",
|
|
underlying=u(concepts[foo], 0, 2,
|
|
[
|
|
u(Optional("twenty"), 0, 0, [u("twenty", 0, 0)]),
|
|
u("one", 2, 2)]
|
|
))]
|
|
|
|
res = parser.parse(context, "one")
|
|
assert res.status
|
|
assert res.value.body == [ConceptNode(foo, 0, 0, source="one",
|
|
underlying=u(concepts[foo], 0, 0, [u("one", 0, 0)]))]
|
|
|
|
|
|
def test_i_can_parse_sequence_ending_with_optional():
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
|
|
concepts = {foo: Sequence("one", "two", Optional("three"))}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "one two three")
|
|
assert res.status
|
|
assert res.value.body == [(foo, 0, 4, "one two three")]
|
|
|
|
res = parser.parse(context, "one two")
|
|
assert res.status
|
|
assert res.value.body == [(foo, 0, 2, "one two")]
|
|
|
|
|
|
def test_i_can_parse_sequence_with_optional_in_between():
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
|
|
concepts = {foo: Sequence("one", Optional("two"), "three")}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "one two three")
|
|
assert res.status
|
|
assert res.value.body == [(foo, 0, 4, "one two three")]
|
|
|
|
res = parser.parse(context, "one three")
|
|
assert res.status
|
|
assert res.value.body == [(foo, 0, 2, "one three")]
|
|
|
|
|
|
def test_i_cannot_parse_wrong_input_with_optional():
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
|
|
concepts = {foo: Optional("one")}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "two")
|
|
assert not res.status
|
|
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res.value.try_parsed == []
|
|
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
|
|
assert res.value.body[0].body == "two"
|
|
|
|
|
|
def test_i_can_use_reference():
|
|
# when there are multiple matches for the same input
|
|
# Do I need to create a choice concept ?
|
|
# No, create a return value for every possible graph
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
|
|
concepts = {foo: Sequence("one", "two"), bar: foo}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "one two")
|
|
assert len(res) == 2
|
|
|
|
assert res[0].status
|
|
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[0].value.body == [ConceptNode(foo, 0, 2, source="one two",
|
|
underlying=u(concepts[foo], 0, 2, [u("one", 0, 0), u("two", 2, 2)]))]
|
|
|
|
assert res[1].status
|
|
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[1].value.body == [ConceptNode(bar, 0, 2, source="one two",
|
|
underlying=u(ConceptMatch(foo, rule_name="foo"), 0, 2,
|
|
[u(concepts[foo], 0, 2, [u("one", 0, 0), u("two", 2, 2)])]))]
|
|
|
|
|
|
def test_i_can_use_context_reference_with_multiple_levels():
|
|
"""
|
|
Same than previous one, but with reference of reference
|
|
:return:
|
|
"""
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
baz = Concept(name="baz")
|
|
|
|
concepts = {foo: Sequence("one", "two"), bar: foo, baz: bar}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "one two")
|
|
assert len(res) == 3
|
|
|
|
assert res[0].status
|
|
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[0].value.body == [(foo, 0, 2, "one two")]
|
|
|
|
assert res[1].status
|
|
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[1].value.body == [(bar, 0, 2, "one two")]
|
|
|
|
assert res[2].status
|
|
assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[2].value.body == [(baz, 0, 2, "one two")]
|
|
|
|
|
|
def test_order_is_not_important_when_using_references():
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
|
|
concepts = {bar: foo, foo: Sequence("one", "two")}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "one two")
|
|
assert len(res) == 2
|
|
assert res[0].value.body == [(bar, 0, 2, "one two")]
|
|
assert res[1].value.body == [(foo, 0, 2, "one two")]
|
|
|
|
|
|
def test_i_can_parse_when_reference():
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
|
|
concepts = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "twenty two")
|
|
assert res.status
|
|
assert res.value.body == [(bar, 0, 2, "twenty two")]
|
|
|
|
res = parser.parse(context, "thirty one")
|
|
assert res.status
|
|
assert res.value.body == [(bar, 0, 2, "thirty one")]
|
|
|
|
res = parser.parse(context, "twenty")
|
|
assert res.status
|
|
assert res.value.body == [(foo, 0, 0, "twenty")]
|
|
|
|
|
|
def test_i_can_detect_duplicates_when_reference():
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
|
|
concepts = {
|
|
bar: Sequence(foo, Optional(OrderedChoice("one", "two"))),
|
|
foo: OrderedChoice("twenty", "thirty")
|
|
}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "twenty")
|
|
assert len(res) == 2
|
|
assert res[0].status
|
|
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[0].value.body == [(bar, 0, 0, "twenty")]
|
|
|
|
assert res[1].status
|
|
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[1].value.body == [(foo, 0, 0, "twenty")]
|
|
|
|
|
|
def test_i_can_parse_concept_reference_that_is_not_in_grammar():
|
|
context = get_context()
|
|
one = Concept(name="one")
|
|
two = Concept(name="two")
|
|
foo = Concept(name="foo")
|
|
context.sheerka.add_in_cache(one)
|
|
context.sheerka.add_in_cache(two)
|
|
|
|
concepts = {foo: Sequence("twenty", OrderedChoice(one, two))}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "twenty two")
|
|
assert res.status
|
|
assert res.value.body == [(foo, 0, 2, "twenty two")]
|
|
|
|
res = parser.parse(context, "twenty one")
|
|
assert res.status
|
|
assert res.value.body == [(foo, 0, 2, "twenty one")]
|
|
|
|
|
|
def test_i_can_parse_zero_or_more():
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
|
|
concepts = {foo: ZeroOrMore("one")}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "one one")
|
|
assert res.status
|
|
assert res.value.value == [ConceptNode(foo, 0, 2, source="one one",
|
|
underlying=u(concepts[foo], 0, 2, [
|
|
u("one", 0, 0),
|
|
u("one", 2, 2)]))]
|
|
|
|
|
|
def test_i_can_parse_sequence_and_zero_or_more():
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
|
|
concepts = {foo: Sequence(ZeroOrMore("one"), "two")}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "one one two")
|
|
assert res.status
|
|
assert res.value.value == [ConceptNode(foo, 0, 4, source="one one two",
|
|
underlying=u(concepts[foo], 0, 4, [
|
|
u(ZeroOrMore("one"), 0, 2, [
|
|
u("one", 0, 0),
|
|
u("one", 2, 2)]),
|
|
u("two", 4, 4)]))]
|
|
|
|
res = parser.parse(context, "two")
|
|
assert res.status
|
|
assert res.value.value == [ConceptNode(foo, 0, 0, source="two",
|
|
underlying=u(concepts[foo], 0, 0, [u("two", 0, 0)]))]
|
|
|
|
|
|
def test_i_cannot_parse_zero_and_more_when_wrong_entry():
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
|
|
concepts = {foo: ZeroOrMore("one")}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "one two")
|
|
assert not res.status
|
|
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res.value.try_parsed == [
|
|
ConceptNode(foo, 0, 0, source="one", underlying=u(ZeroOrMore("one"), 0, 0, [u("one", 0, 0)]))]
|
|
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
|
|
assert res.value.body[0].body == "two"
|
|
|
|
res = parser.parse(context, "two")
|
|
assert not res.status
|
|
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res.value.try_parsed == []
|
|
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
|
|
assert res.value.body[0].body == "two"
|
|
|
|
|
|
def test_i_can_parse_zero_and_more_with_separator():
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
|
|
concepts = {foo: ZeroOrMore("one", sep=",")}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "one, one , one")
|
|
assert res.status
|
|
assert res.value.value == [ConceptNode(foo, 0, 7, source="one, one , one",
|
|
underlying=u(concepts[foo], 0, 7, [
|
|
u("one", 0, 0),
|
|
u("one", 3, 3),
|
|
u("one", 7, 7)]))]
|
|
|
|
|
|
def test_that_zero_and_more_is_greedy():
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
|
|
concepts = {foo: ZeroOrMore("one"), bar: "one"}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "one one one")
|
|
assert res.status
|
|
assert res.value.value == [(foo, 0, 4, "one one one")]
|
|
|
|
|
|
def test_i_can_parse_one_and_more():
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
|
|
concepts = {foo: OneOrMore("one")}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "one one")
|
|
assert res.status
|
|
assert res.value.value == [ConceptNode(foo, 0, 2, source="one one",
|
|
underlying=u(concepts[foo], 0, 2, [
|
|
u("one", 0, 0),
|
|
u("one", 2, 2)]))]
|
|
|
|
|
|
def test_i_can_parse_sequence_and_one_or_more():
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
|
|
concepts = {foo: Sequence(OneOrMore("one"), "two")}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "one one two")
|
|
assert res.status
|
|
assert res.value.value == [ConceptNode(foo, 0, 4, source="one one two",
|
|
underlying=u(concepts[foo], 0, 4, [
|
|
u(ZeroOrMore("one"), 0, 2, [
|
|
u("one", 0, 0),
|
|
u("one", 2, 2)]),
|
|
u("two", 4, 4)]))]
|
|
|
|
res = parser.parse(context, "two")
|
|
assert not res.status
|
|
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res.value.try_parsed == []
|
|
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
|
|
assert res.value.body[0].body == "two"
|
|
|
|
|
|
def test_i_can_parse_one_and_more_with_separator():
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
|
|
concepts = {foo: OneOrMore("one", sep=",")}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "one, one , one")
|
|
assert res.status
|
|
assert res.value.value == [ConceptNode(foo, 0, 7, source="one, one , one",
|
|
underlying=u(concepts[foo], 0, 7, [
|
|
u("one", 0, 0),
|
|
u("one", 3, 3),
|
|
u("one", 7, 7)]))]
|
|
|
|
|
|
def test_that_one_and_more_is_greedy():
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
|
|
concepts = {foo: OneOrMore("one"), bar: "one"}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
|
|
res = parser.parse(context, "one one one")
|
|
assert res.status
|
|
assert res.value.value == [(foo, 0, 4, "one one one")]
|
|
|
|
|
|
def test_i_can_detect_infinite_recursion():
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
|
|
concepts = {
|
|
bar: foo,
|
|
foo: bar
|
|
}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(get_context(), concepts)
|
|
|
|
assert bar not in parser.concepts_grammars
|
|
assert foo not in parser.concepts_grammars
|
|
|
|
|
|
def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice():
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
|
|
concepts = {
|
|
bar: foo,
|
|
foo: OrderedChoice(bar, "foo")
|
|
}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(get_context(), concepts)
|
|
|
|
assert foo not in parser.concepts_grammars # removed because of the infinite recursion
|
|
assert bar not in parser.concepts_grammars # removed because of the infinite recursion
|
|
|
|
# the other way around is possible
|
|
context = get_context()
|
|
concepts = {
|
|
bar: foo,
|
|
foo: OrderedChoice("foo", bar)
|
|
}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(context, concepts)
|
|
assert foo in parser.concepts_grammars
|
|
assert bar in parser.concepts_grammars
|
|
|
|
res = parser.parse(context, "foo")
|
|
assert len(res) == 2
|
|
assert res[0].status
|
|
assert res[0].value.body == [(bar, 0, 0, "foo")]
|
|
assert res[1].status
|
|
assert res[1].value.body == [(foo, 0, 0, "foo")]
|
|
|
|
|
|
def test_i_can_detect_indirect_infinite_recursion_with_sequence():
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
|
|
concepts = {
|
|
bar: foo,
|
|
foo: Sequence("one", bar, "two")
|
|
}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(get_context(), concepts)
|
|
|
|
assert foo not in parser.concepts_grammars # removed because of the infinite recursion
|
|
assert bar not in parser.concepts_grammars # removed because of the infinite recursion
|
|
|
|
|
|
def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choice():
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
|
|
concepts = {
|
|
bar: foo,
|
|
foo: Sequence("one", OrderedChoice(bar, "other"), "two")
|
|
}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(get_context(), concepts)
|
|
|
|
assert foo not in parser.concepts_grammars # removed because of the infinite recursion
|
|
assert bar not in parser.concepts_grammars # removed because of the infinite recursion
|
|
|
|
|
|
def test_infinite_recursion_does_not_fail_if_a_concept_is_missing():
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
|
|
concepts = {
|
|
foo: bar
|
|
}
|
|
parser = ConceptLexerParser()
|
|
parser.initialize(get_context(), concepts)
|
|
|
|
assert foo in parser.concepts_grammars
|
|
|
|
|
|
def test_i_can_detect_indirect_infinite_recursion_with_optional():
|
|
# TODO infinite recursion with optional
|
|
pass
|
|
|
|
|
|
def test_i_can_detect_indirect_infinite_recursion_with_zero_and_more():
|
|
# TODO infinite recursion with optional
|
|
pass
|
|
|
|
|
|
def test_i_can_detect_indirect_infinite_recursion_with_one_and_more():
|
|
# TODO infinite recursion with optional
|
|
pass
|
|
|
|
|
|
def test_i_can_visit_parsing_expression():
|
|
mult = Concept(name="mult")
|
|
add = Concept(name="add")
|
|
|
|
visitor = ConceptVisitor()
|
|
visitor.visit(Sequence(mult, Optional(Sequence("+", add))))
|
|
|
|
assert sorted(list(visitor.concepts)) == ["add", "mult"]
|
|
|
|
|
|
def test_i_can_initialize_rule_names():
|
|
context = get_context()
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
|
|
concepts = {foo: Sequence("one", "two"), bar: foo}
|
|
parser = ConceptLexerParser()
|
|
ret = parser.initialize(context, concepts)
|
|
return_value = ret.body
|
|
|
|
assert return_value[foo].rule_name == ""
|
|
assert return_value[bar].rule_name == "foo"
|
|
|
|
|
|
#
|
|
# def test_i_can_parse_basic_arithmetic_operations_and_resolve_properties():
|
|
# context = get_context()
|
|
# add = Concept(name="add")
|
|
# mult = Concept(name="mult")
|
|
# atom = Concept(name="atom")
|
|
#
|
|
# concepts = {
|
|
# add: Sequence(mult, Optional(Sequence(OrderedChoice('+', '-', rule_name="sign"), add))),
|
|
# mult: Sequence(atom, Optional(Sequence(OrderedChoice('*', '/'), mult))),
|
|
# atom: OrderedChoice(OrderedChoice('1', '2', '3'), Sequence('(', add, ')')),
|
|
# }
|
|
#
|
|
# parser = ConceptLexerParser()
|
|
# parser.register(concepts)
|
|
#
|
|
# # res = parser.parse(context, "1")
|
|
# # assert len(res) == 3 # add, mult, atom
|
|
# #
|
|
# # res = parser.parse(context, "1 * 2")
|
|
# # assert len(res) == 2 # add and mult
|
|
# #
|
|
# # res = parser.parse(context, "1 + 2")
|
|
# # assert res.status
|
|
# # assert res.value.value == [ConceptNode(add, 0, 4, source="1 + 2")]
|
|
#
|
|
# res = parser.parse(context, "1 * 2 + 3")
|
|
# assert res.status
|
|
# assert res.value.value == [ConceptNode(add, 0, 4, source="1 + 2 + 3")]
|
|
|
|
|
|
def test_i_can_register_concepts_with_the_same_name():
|
|
# TODO : concepts are registered by name,
|
|
# what when two concepts have the same name ?
|
|
pass
|
|
|
|
|
|
def test_i_can_parse_very_very_long_input():
|
|
# TODO: In the current implementation, all the tokens are loaded in memory
|
|
# It's clearly not the good approach
|
|
pass
|
|
|
|
|
|
def get_context():
|
|
sheerka = Sheerka(skip_builtins_in_db=True)
|
|
sheerka.initialize("mem://")
|
|
|
|
return ExecutionContext("sheerka", Event(), sheerka)
|