import pytest from core.builtin_concepts import BuiltinConcepts from core.concept import Concept from core.sheerka import Sheerka, ExecutionContext from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \ CrossRef @pytest.mark.parametrize("match, text", [ ("foo", "foo"), ("'foo'", "'foo'"), ("1", "1"), ("3.14", "3.14"), ("+", "+"), (StrMatch("foo"), "foo"), (StrMatch("'foo'"), "'foo'"), (StrMatch("1"), "1"), (StrMatch("3.14"), "3.14"), (StrMatch("+"), "+"), ]) def test_i_can_match_simple_tokens(match, text): context = get_context() foo = Concept(name="foo") concepts = {foo: text} parser = ConceptLexerParser() parser.initialize(concepts) res = parser.parse(context, text) assert res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) assert res.value.value == [ConceptNode(foo, 0, 0, source=text)] def test_i_can_match_multiple_concepts_in_one_input(): context = get_context() one = Concept(name="one") two = Concept(name="two") concepts = {one: "one", two: "two"} parser = ConceptLexerParser() parser.initialize(concepts) res = parser.parse(context, "one two one") assert res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) assert res.value.value == [ ConceptNode(one, 0, 0, source="one"), ConceptNode(two, 2, 2, source="two"), ConceptNode(one, 4, 4, source="one"), ] def test_i_cannot_match_an_unknown_input(): context = get_context() parser = ConceptLexerParser() # no grammar registered res = parser.parse(context, "foo") assert not res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT) assert res.value.body[0].body == "foo" def test_i_cannot_match_when_part_of_the_input_is_unknown(): context = get_context() one = Concept(name="one") two = Concept(name="two") concepts = {one: "one", two: "two"} parser = ConceptLexerParser() parser.initialize(concepts) res = parser.parse(context, "one two three") assert not res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) assert res.value.try_parsed == [ ConceptNode(one, 0, 0, source="one"), ConceptNode(two, 2, 2, source="two")] # these two were recognized assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT) assert res.value.body[0].body == "three" def test_i_can_match_sequence(): context = get_context() foo = Concept(name="foo") concepts = {foo: Sequence("one", "two", "three")} parser = ConceptLexerParser() parser.initialize(concepts) res = parser.parse(context, "one two three") assert res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) assert res.value.value == [ConceptNode(foo, 0, 4, source="one two three")] def test_wrong_sequence_is_not_matched(): context = get_context() foo = Concept(name="foo") concepts = {foo: Sequence("one", "two", "three")} parser = ConceptLexerParser() parser.initialize(concepts) res = parser.parse(context, "one two three one") assert not res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) assert res.value.try_parsed == [ConceptNode(foo, 0, 4, source="one two three")] assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT) assert res.value.body[0].body == "one" def test_i_cannot_match_sequence_if_end_of_file(): context = get_context() foo = Concept(name="foo") concepts = {foo: Sequence("one", "two", "three")} parser = ConceptLexerParser() parser.initialize(concepts) res = parser.parse(context, "one two") assert not res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) assert res.value.try_parsed == [] assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT) assert res.value.body[0].body == "one" def test_i_always_choose_the_longest_match(): context = get_context() foo = Concept(name="foo") bar = Concept(name="bar") concepts = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")} parser = ConceptLexerParser() parser.initialize(concepts) res = parser.parse(context, "one two three") assert res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) assert res.value.value == [ConceptNode(foo, 0, 4, source="one two three")] def test_i_can_match_several_sequences(): context = get_context() foo = Concept(name="foo") bar = Concept(name="bar") concepts = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")} parser = ConceptLexerParser() parser.initialize(concepts) res = parser.parse(context, "one two three one two") assert res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) assert res.value.value == [ ConceptNode(foo, 0, 4, source="one two three"), ConceptNode(bar, 6, 8, source="one two"), ] def test_i_can_match_ordered_choice(): context = get_context() foo = Concept(name="foo") concepts = {foo: OrderedChoice("one", "two")} parser = ConceptLexerParser() parser.initialize(concepts) res1 = parser.parse(context, "one") assert res1.status assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT) assert res1.value.body == [ConceptNode(foo, 0, 0, source="one")] res2 = parser.parse(context, "two") assert res2.status assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT) assert res2.value.body == [ConceptNode(foo, 0, 0, source="two")] res3 = parser.parse(context, "three") assert not res3.status assert context.sheerka.isinstance(res3.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT) assert res3.value.body[0].body == "three" def test_i_cannot_match_ordered_choice_with_empty_alternative(): context = get_context() foo = Concept(name="foo") concepts = {foo: Sequence(OrderedChoice("one", ""), "two")} parser = ConceptLexerParser() parser.initialize(concepts) res = parser.parse(context, "ok") # because token[0] is not "one" and not "" (it is 'two') assert not res.status def test_i_can_mix_sequences_and_ordered_choices(): context = get_context() foo = Concept(name="foo") concepts = {foo: Sequence(OrderedChoice("twenty", "thirty"), "one", "ok")} parser = ConceptLexerParser() parser.initialize(concepts) res1 = parser.parse(context, "twenty one ok") assert res1.status assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT) assert res1.value.body == [ConceptNode(foo, 0, 4, source="twenty one ok")] res2 = parser.parse(context, "thirty one ok") assert res2.status assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT) assert res2.value.body == [ConceptNode(foo, 0, 4, source="thirty one ok")] res3 = parser.parse(context, "twenty one") assert not res3.status assert res3.value.body[0].body == "twenty" assert res3.value.try_parsed == [] def test_i_can_mix_ordered_choices_and_sequences(): context = get_context() foo = Concept(name="foo") concepts = {foo: OrderedChoice(Sequence("twenty", "thirty"), "one")} parser = ConceptLexerParser() parser.initialize(concepts) res = parser.parse(context, "twenty thirty") assert res.status res = parser.parse(context, "one") assert res.status def test_i_cannot_parse_empty_optional(): context = get_context() foo = Concept(name="foo") concepts = {foo: Optional("one")} parser = ConceptLexerParser() parser.initialize(concepts) res = parser.parse(context, "") assert not res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.IS_EMPTY) def test_i_can_parse_optional(): context = get_context() foo = Concept(name="foo") concepts = {foo: Optional("one")} parser = ConceptLexerParser() parser.initialize(concepts) res = parser.parse(context, "one") assert res.status assert res.value.value == [ConceptNode(foo, 0, 0, source="one")] def test_i_can_parse_sequence_starting_with_optional(): context = get_context() foo = Concept(name="foo") concepts = {foo: Sequence(Optional("twenty"), "one")} parser = ConceptLexerParser() parser.initialize(concepts) res = parser.parse(context, "twenty one") assert res.status assert res.value.body == [ConceptNode(foo, 0, 2, source="twenty one")] res = parser.parse(context, "one") assert res.status assert res.value.body == [ConceptNode(foo, 0, 0, source="one")] def test_i_can_parse_sequence_ending_with_optional(): context = get_context() foo = Concept(name="foo") concepts = {foo: Sequence("one", "two", Optional("three"))} parser = ConceptLexerParser() parser.initialize(concepts) res = parser.parse(context, "one two three") assert res.status assert res.value.body == [ConceptNode(foo, 0, 4, source="one two three")] res = parser.parse(context, "one two") assert res.status assert res.value.body == [ConceptNode(foo, 0, 2, source="one two")] def test_i_can_parse_sequence_with_optional_in_between(): context = get_context() foo = Concept(name="foo") concepts = {foo: Sequence("one", Optional("two"), "three")} parser = ConceptLexerParser() parser.initialize(concepts) res = parser.parse(context, "one two three") assert res.status assert res.value.body == [ConceptNode(foo, 0, 4, source="one two three")] res = parser.parse(context, "one three") assert res.status assert res.value.body == [ConceptNode(foo, 0, 2, source="one three")] def test_i_can_use_reference(): # The problem here is when there are multiple match for the same input # The parsing result is a list of all concepts found # So it's already a list that represents a sequence, not a choice # So I need to create a choice concept # create the return value for every possible graph # --> The latter seems to be the best as we don't defer the resolution of the problem to someone else context = get_context() foo = Concept(name="foo") bar = Concept(name="bar") concepts = {foo: Sequence("one", "two"), bar: foo} parser = ConceptLexerParser() parser.initialize(concepts) res = parser.parse(context, "one two") assert len(res) == 2 assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [ConceptNode(foo, 0, 2, source="one two")] assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [ConceptNode(bar, 0, 2, source="one two")] def test_i_can_use_context_reference_with_multiple_levels(): """ Same than previous one, but with reference of reference :return: """ context = get_context() foo = Concept(name="foo") bar = Concept(name="bar") baz = Concept(name="baz") concepts = {foo: Sequence("one", "two"), bar: foo, baz: bar} parser = ConceptLexerParser() parser.initialize(concepts) res = parser.parse(context, "one two") assert len(res) == 3 assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [ConceptNode(foo, 0, 2, source="one two")] assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [ConceptNode(bar, 0, 2, source="one two")] assert res[2].status assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT) assert res[2].value.body == [ConceptNode(baz, 0, 2, source="one two")] def test_order_is_not_important_when_using_references(): context = get_context() foo = Concept(name="foo") bar = Concept(name="bar") concepts = {bar: foo, foo: Sequence("one", "two")} parser = ConceptLexerParser() parser.initialize(concepts) res = parser.parse(context, "one two") assert len(res) == 2 assert res[0].value.body == [ConceptNode(bar, 0, 2, source="one two")] assert res[1].value.body == [ConceptNode(foo, 0, 2, source="one two")] def test_i_can_parse_when_reference(): context = get_context() foo = Concept(name="foo") bar = Concept(name="bar") concepts = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")} parser = ConceptLexerParser() parser.initialize(concepts) res = parser.parse(context, "twenty two") assert res.status assert res.value.body == [ConceptNode(bar, 0, 2, source="twenty two")] res = parser.parse(context, "thirty one") assert res.status assert res.value.body == [ConceptNode(bar, 0, 2, source="thirty one")] res = parser.parse(context, "twenty") assert res.status assert res.value.body == [ConceptNode(foo, 0, 0, source="twenty")] def test_i_can_detect_duplicates_when_reference(): context = get_context() foo = Concept(name="foo") bar = Concept(name="bar") concepts = { bar: Sequence(foo, Optional(OrderedChoice("one", "two"))), foo: OrderedChoice("twenty", "thirty") } parser = ConceptLexerParser() parser.initialize(concepts) res = parser.parse(context, "twenty") assert len(res) == 2 assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [ConceptNode(bar, 0, 0, source="twenty")] assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [ConceptNode(foo, 0, 0, source="twenty")] def test_i_can_detect_infinite_recursion(): foo = Concept(name="foo") bar = Concept(name="bar") concepts = { bar: foo, foo: bar } parser = ConceptLexerParser() parser.initialize(concepts) assert bar not in parser.concepts_dict assert foo not in parser.concepts_dict def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice(): foo = Concept(name="foo") bar = Concept(name="bar") concepts = { bar: foo, foo: OrderedChoice(bar, "foo") } parser = ConceptLexerParser() parser.initialize(concepts) assert foo not in parser.concepts_dict # removed because of the infinite recursion assert bar not in parser.concepts_dict # removed because of the infinite recursion # the other way around is possible context = get_context() concepts = { bar: foo, foo: OrderedChoice("foo", bar) } parser = ConceptLexerParser() parser.initialize(concepts) assert foo in parser.concepts_dict assert bar in parser.concepts_dict res = parser.parse(context, "foo") assert len(res) == 2 assert res[0].status assert res[0].value.body == [ConceptNode(bar, 0, 0, source="foo")] assert res[1].status assert res[1].value.body == [ConceptNode(foo, 0, 0, source="foo")] def test_i_can_detect_indirect_infinite_recursion_with_sequence(): foo = Concept(name="foo") bar = Concept(name="bar") concepts = { bar: foo, foo: Sequence("one", bar, "two") } parser = ConceptLexerParser() parser.initialize(concepts) assert foo not in parser.concepts_dict # removed because of the infinite recursion assert bar not in parser.concepts_dict # removed because of the infinite recursion def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choice(): foo = Concept(name="foo") bar = Concept(name="bar") concepts = { bar: foo, foo: Sequence("one", OrderedChoice(bar, "other"), "two") } parser = ConceptLexerParser() parser.initialize(concepts) assert foo not in parser.concepts_dict # removed because of the infinite recursion assert bar not in parser.concepts_dict # removed because of the infinite recursion def test_i_can_detect_indirect_infinite_recursion_with_optional(): # TODO infinite recursion with optional pass # # def test_i_can_parse_basic_arithmetic_operations_and_resolve_properties(): # context = get_context() # add = Concept(name="add") # mult = Concept(name="mult") # atom = Concept(name="atom") # # concepts = { # add: Sequence(mult, Optional(Sequence(OrderedChoice('+', '-', rule_name="sign"), add))), # mult: Sequence(atom, Optional(Sequence(OrderedChoice('*', '/'), mult))), # atom: OrderedChoice(OrderedChoice('1', '2', '3'), Sequence('(', add, ')')), # } # # parser = ConceptLexerParser() # parser.register(concepts) # # # res = parser.parse(context, "1") # # assert len(res) == 3 # add, mult, atom # # # # res = parser.parse(context, "1 * 2") # # assert len(res) == 2 # add and mult # # # # res = parser.parse(context, "1 + 2") # # assert res.status # # assert res.value.value == [ConceptNode(add, 0, 4, source="1 + 2")] # # res = parser.parse(context, "1 * 2 + 3") # assert res.status # assert res.value.value == [ConceptNode(add, 0, 4, source="1 + 2 + 3")] def test_i_can_register_concepts_with_the_same_name(): # TODO : concepts are registered by name, # what when two concepts have the same name ? pass def test_i_can_parse_very_very_long_input(): # TODO: In the current implementation, all the tokens are loaded in memory # It's clearly not the good approach pass def get_context(): sheerka = Sheerka(skip_builtins_in_db=True) sheerka.initialize("mem://") return ExecutionContext("sheerka", "xxxx", sheerka)