import pytest import core.utils from core.builtin_concepts import BuiltinConcepts from core.concept import Concept, ConceptParts, DoNotResolve from core.sheerka import Sheerka, ExecutionContext from core.tokenizer import Tokenizer, TokenKind, Token from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \ ParsingExpressionVisitor, TerminalNode, NonTerminalNode, LexerNode, ConceptMatch, ZeroOrMore, OneOrMore, \ UnrecognizedTokensNode from sdp.sheerkaDataProvider import Event class ConceptVisitor(ParsingExpressionVisitor): def __init__(self): self.concepts = set() def visit_ConceptMatch(self, node): self.concepts.add(node.concept) def u(parsing_expression, start, end, children=None): """ u stands for underlying :param parsing_expression: :param start: :param end: :param children: :return: """ if isinstance(parsing_expression, str): parsing_expression = StrMatch(parsing_expression) if isinstance(parsing_expression, StrMatch): return TerminalNode(parsing_expression, start, end, parsing_expression.to_match) return NonTerminalNode(parsing_expression, start, end, [], children) def evaluated(concept): c = Concept(name=concept.name, body=concept.name) def t(text): if text.startswith("'") or text.startswith('"'): return Token(TokenKind.STRING, text, 0, 0, 0) if text.startswith(" "): return Token(TokenKind.WHITESPACE, text, 0, 0, 0) return Token(TokenKind.IDENTIFIER, text, 0, 0, 0) def get_context(): sheerka = Sheerka(skip_builtins_in_db=True) sheerka.initialize("mem://") return ExecutionContext("sheerka", Event(), sheerka) def get_expected(concept, text=None): c = Concept(name=concept.name) c.cached_asts[ConceptParts.BODY] = DoNotResolve(text or concept.name) c.init_key() return c def cbody(concept): """cbody stands for compiled body""" return concept.cached_asts[ConceptParts.BODY] def cprop(concept, prop_name): """cbody stands for compiled property""" return concept.cached_asts[prop_name] def init(concepts, grammar): context = get_context() for c in concepts: context.sheerka.add_in_cache(c) parser = ConceptLexerParser() parser.initialize(context, grammar) return context, parser def execute(concepts, grammar, text): context, parser = init(concepts, grammar) res = parser.parse(context, text) wrapper = res.value return_value = res.value.value return context, res, wrapper, return_value @pytest.mark.parametrize("match, text", [ ("foo", "foo"), ("'foo'", "'foo'"), ("1", "1"), ("3.14", "3.14"), ("+", "+"), (StrMatch("foo"), "foo"), (StrMatch("'foo'"), "'foo'"), (StrMatch("1"), "1"), (StrMatch("3.14"), "3.14"), (StrMatch("+"), "+"), ]) def test_i_can_match_simple_tokens(match, text): foo = Concept(name="foo") grammar = {foo: match} context, res, wrapper, return_value = execute([foo], grammar, text) assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [ConceptNode(get_expected(foo, text), 0, 0, source=text, underlying=u(match, 0, 0))] def test_i_can_match_multiple_concepts_in_one_input(): one = Concept(name="one") two = Concept(name="two") grammar = {one: "one", two: "two"} context, res, wrapper, return_value = execute([one, two], grammar, "one two one") assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [ ConceptNode(get_expected(one), 0, 0, source="one", underlying=u("one", 0, 0)), ConceptNode(get_expected(two), 2, 2, source="two", underlying=u("two", 2, 2)), ConceptNode(get_expected(one), 4, 4, source="one", underlying=u("one", 4, 4)), ] def test_i_can_match_sequence(): foo = Concept(name="foo") grammar = {foo: Sequence("one", "two", "three")} context, res, wrapper, return_value = execute([foo], grammar, "one two three") assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [ ConceptNode( get_expected(foo, "one two three"), 0, 4, source="one two three", underlying=u(grammar[foo], 0, 4, [ u("one", 0, 0), u("two", 2, 2), u("three", 4, 4)]))] def test_i_always_choose_the_longest_match(): foo = Concept(name="foo") bar = Concept(name="bar") grammar = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")} context, res, wrapper, return_value = execute([foo, bar], grammar, "one two three") assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [("foo", 0, 4, "one two three")] def test_i_can_match_several_sequences(): foo = Concept(name="foo") bar = Concept(name="bar") grammar = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")} context, res, wrapper, return_value = execute([foo, bar], grammar, "one two three one two") assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [ ("foo", 0, 4, "one two three"), ("bar", 6, 8, "one two"), ] def test_i_can_match_ordered_choice(): foo = Concept(name="foo") grammar = {foo: OrderedChoice("one", "two")} context, parser = init([foo], grammar) res1 = parser.parse(context, "one") assert res1.status assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT) assert res1.value.body == [("foo", 0, 0, "one")] assert res1.value.body[0].underlying == u(grammar[foo], 0, 0, [u("one", 0, 0)]) res2 = parser.parse(context, "two") assert res2.status assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT) assert res2.value.body == [("foo", 0, 0, "two")] assert res2.value.body[0].underlying == u(grammar[foo], 0, 0, [u("two", 0, 0)]) res3 = parser.parse(context, "three") assert not res3.status assert context.sheerka.isinstance(res3.value, BuiltinConcepts.PARSER_RESULT) assert res3.value.value == [ UnrecognizedTokensNode(0, 0, [t("three")]) ] def test_i_cannot_match_ordered_choice_with_empty_alternative(): foo = Concept(name="foo") grammar = {foo: Sequence(OrderedChoice("one", ""), "two")} context, res, wrapper, return_value = execute([foo], grammar, "ok") assert not res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [ UnrecognizedTokensNode(0, 0, [t("ok")]) ] def test_i_can_mix_sequences_and_ordered_choices(): foo = Concept(name="foo") grammar = {foo: Sequence(OrderedChoice("twenty", "thirty"), "one", "ok")} context, parser = init([foo], grammar) res1 = parser.parse(context, "twenty one ok") assert res1.status assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT) assert res1.value.body == [ConceptNode(get_expected(foo, "twenty one ok"), 0, 4, source="twenty one ok", underlying=u(grammar[foo], 0, 4, [ u(OrderedChoice("twenty", "thirty"), 0, 0, [u("twenty", 0, 0)]), u("one", 2, 2), u("ok", 4, 4)]))] res2 = parser.parse(context, "thirty one ok") assert res2.status assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT) assert res2.value.body == [ConceptNode(get_expected(foo, "thirty one ok"), 0, 4, source="thirty one ok", underlying=u(grammar[foo], 0, 4, [ u(OrderedChoice("twenty", "thirty"), 0, 0, [u("thirty", 0, 0)]), u("one", 2, 2), u("ok", 4, 4)]))] res3 = parser.parse(context, "twenty one") assert not res3.status assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT) assert res3.value.value == [ UnrecognizedTokensNode(0, 2, [t("twenty"), t(" "), t("one")]) ] def test_i_can_mix_ordered_choices_and_sequences(): foo = Concept(name="foo") grammar = {foo: OrderedChoice(Sequence("twenty", "thirty"), "one")} context, parser = init([foo], grammar) res = parser.parse(context, "twenty thirty") assert res.status assert res.value.value == [("foo", 0, 2, "twenty thirty")] res = parser.parse(context, "one") assert res.status assert res.value.value == [("foo", 0, 0, "one")] def test_i_cannot_parse_empty_optional(): foo = Concept(name="foo") grammar = {foo: Optional("one")} context, parser = init([foo], grammar) res = parser.parse(context, "") return_value = res.value assert not res.status assert context.sheerka.isinstance(return_value, BuiltinConcepts.IS_EMPTY) def test_i_can_parse_optional(): foo = Concept(name="foo") grammar = {foo: Optional("one")} context, res, wrapper, return_value = execute([foo], grammar, "one") assert res.status assert return_value == [ConceptNode(get_expected(foo, "one"), 0, 0, source="one", underlying=u(grammar[foo], 0, 0, [u("one", 0, 0)]))] def test_i_can_parse_sequence_starting_with_optional(): foo = Concept(name="foo") grammar = {foo: Sequence(Optional("twenty"), "one")} context, parser = init([foo], grammar) res = parser.parse(context, "twenty one") assert res.status assert res.value.body == [ConceptNode( get_expected(foo, "twenty one"), 0, 2, source="twenty one", underlying=u(grammar[foo], 0, 2, [ u(Optional("twenty"), 0, 0, [u("twenty", 0, 0)]), u("one", 2, 2)] ))] res = parser.parse(context, "one") assert res.status assert res.value.body == [ConceptNode(get_expected(foo, "one"), 0, 0, source="one", underlying=u(grammar[foo], 0, 0, [u("one", 0, 0)]))] def test_i_can_parse_sequence_ending_with_optional(): foo = Concept(name="foo") grammar = {foo: Sequence("one", "two", Optional("three"))} context, parser = init([foo], grammar) res = parser.parse(context, "one two three") assert res.status assert res.value.body == [("foo", 0, 4, "one two three")] res = parser.parse(context, "one two") assert res.status assert res.value.body == [("foo", 0, 2, "one two")] def test_i_can_parse_sequence_with_optional_in_between(): foo = Concept(name="foo") grammar = {foo: Sequence("one", Optional("two"), "three")} context, parser = init([foo], grammar) res = parser.parse(context, "one two three") assert res.status assert res.value.body == [("foo", 0, 4, "one two three")] res = parser.parse(context, "one three") assert res.status assert res.value.body == [("foo", 0, 2, "one three")] def test_i_cannot_parse_wrong_input_with_optional(): foo = Concept(name="foo") grammar = {foo: Optional("one")} context, res, wrapper, return_value = execute([foo], grammar, "two") assert not res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [ UnrecognizedTokensNode(0, 0, [t("two")]) ] def test_i_can_use_reference(): # when there are multiple matches for the same input # Do I need to create a choice concept ? # No, create a return value for every possible graph foo = Concept(name="foo") bar = Concept(name="bar") grammar = {foo: Sequence("one", "two"), bar: foo} context, parser = init([foo, bar], grammar) res = parser.parse(context, "one two") assert len(res) == 2 assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [("foo", 0, 2, "one two")] concept_found_1 = res[0].value.body[0].concept assert cbody(concept_found_1) == DoNotResolve("one two") assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [("bar", 0, 2, "one two")] concept_found_2 = res[1].value.body[0].concept # the body and the prop['foo'] are the same concept 'foo' assert cbody(concept_found_2) == get_expected(foo, "one two") assert id(cprop(concept_found_2, "foo")) == id(cbody(concept_found_2)) def test_i_can_use_a_reference_with_a_body(): """ Same test than before (test_i_can_use_reference()) but this time, the concept 'foo' already has a body. :return: """ foo = Concept(name="foo", body="'foo'") bar = Concept(name="bar") grammar = {foo: Sequence("one", "two"), bar: foo} context, parser = init([foo, bar], grammar) res = parser.parse(context, "one two") assert len(res) == 2 assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [("foo", 0, 2, "one two")] concept_found_1 = res[0].value.body[0].concept assert concept_found_1.body == "'foo'" assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [("bar", 0, 2, "one two")] concept_found_2 = res[1].value.body[0].concept # the body and the prop['foo'] are the same concept 'foo' assert cbody(concept_found_2) == foo assert id(cprop(concept_found_2, "foo")) == id(cbody(concept_found_2)) def test_i_can_use_context_reference_with_multiple_levels(): """ Same than previous one, but with reference of reference :return: """ foo = Concept(name="foo") bar = Concept(name="bar") baz = Concept(name="baz") grammar = {foo: Sequence("one", "two"), bar: foo, baz: bar} context, parser = init([foo, bar, baz], grammar) res = parser.parse(context, "one two") assert len(res) == 3 assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [("foo", 0, 2, "one two")] concept_found_1 = res[0].value.body[0].concept assert cbody(concept_found_1) == DoNotResolve("one two") assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [("bar", 0, 2, "one two")] concept_found_2 = res[1].value.body[0].concept assert cbody(concept_found_2) == get_expected(foo, "one two") assert id(cprop(concept_found_2, "foo")) == id(cbody(concept_found_2)) assert res[2].status assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT) assert res[2].value.body == [("baz", 0, 2, "one two")] concept_found_3 = res[2].value.body[0].concept expected_foo = get_expected(foo, "one two") assert cbody(concept_found_3) == get_expected(bar, expected_foo) assert cprop(concept_found_3, "foo") == expected_foo assert id(cprop(concept_found_3, "bar")) == id(cbody(concept_found_3)) def test_order_is_not_important_when_using_references(): """ Same test than test_i_can_use_reference(), but this time, 'bar' is declared before 'foo' So the order of the result is different :return: """ foo = Concept(name="foo") bar = Concept(name="bar") grammar = {bar: foo, foo: Sequence("one", "two")} context, parser = init([foo, bar], grammar) res = parser.parse(context, "one two") assert len(res) == 2 assert res[0].value.body == [("bar", 0, 2, "one two")] assert res[1].value.body == [("foo", 0, 2, "one two")] def test_i_can_parse_when_reference(): foo = Concept(name="foo") bar = Concept(name="bar") grammar = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")} context, parser = init([foo, bar], grammar) res = parser.parse(context, "twenty two") assert res.status assert res.value.body == [("bar", 0, 2, "twenty two")] concept_found = res.value.body[0].concept assert cbody(concept_found) == DoNotResolve("twenty two") assert cprop(concept_found, "foo") == get_expected(foo, "twenty") res = parser.parse(context, "thirty one") assert res.status assert res.value.body == [("bar", 0, 2, "thirty one")] concept_found = res.value.body[0].concept assert cbody(concept_found) == DoNotResolve("thirty one") assert cprop(concept_found, "foo") == get_expected(foo, "thirty") res = parser.parse(context, "twenty") assert res.status assert res.value.body == [("foo", 0, 0, "twenty")] concept_found = res.value.body[0].concept assert cbody(concept_found) == DoNotResolve("twenty") def test_i_can_parse_when_reference_has_a_body(): foo = Concept(name="foo", body="'one'") bar = Concept(name="bar") grammar = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")} context, parser = init([foo, bar], grammar) res = parser.parse(context, "twenty two") assert res.status assert res.value.body == [("bar", 0, 2, "twenty two")] concept_found = res.value.body[0].concept assert cbody(concept_found) == DoNotResolve("twenty two") assert cprop(concept_found, "foo") == foo res = parser.parse(context, "twenty") assert res.status assert res.value.body == [("foo", 0, 0, "twenty")] concept_found = res.value.body[0].concept assert concept_found.body == "'one'" def test_i_can_parse_multiple_results(): foo = Concept(name="foo") bar = Concept(name="bar") grammar = { bar: Sequence("one", "two"), foo: Sequence("one", OrderedChoice("two", "three")) } context, parser = init([foo, bar], grammar) res = parser.parse(context, "one two") assert len(res) == 2 assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [("bar", 0, 2, "one two")] concept_found_0 = res[0].value.body[0].concept assert cbody(concept_found_0) == DoNotResolve("one two") assert len(concept_found_0.props) == 0 assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [("foo", 0, 2, "one two")] concept_found_1 = res[1].value.body[0].concept assert cbody(concept_found_1) == DoNotResolve("one two") assert len(concept_found_1.props) == 0 def test_i_can_parse_multiple_results_times_two(): foo = Concept(name="foo") bar = Concept(name="bar") grammar = { bar: Sequence("one", "two"), foo: Sequence("one", OrderedChoice("two", "three")) } context, parser = init([foo, bar], grammar) res = parser.parse(context, "one two one two") assert len(res) == 4 assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [("bar", "one two"), ("bar", "one two")] assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [("foo", "one two"), ("bar", "one two")] assert res[2].status assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT) assert res[2].value.body == [("bar", "one two"), ("foo", "one two")] assert res[3].status assert context.sheerka.isinstance(res[3].value, BuiltinConcepts.PARSER_RESULT) assert res[3].value.body == [("foo", "one two"), ("foo", "one two")] def test_i_can_parse_multiple_results_when_reference(): """ TODO : There should no be two answer, has the one with bar is totally useless Note that bar = Sequence(foo, OrderedChoice("one", "two")) does not match :return: """ foo = Concept(name="foo") bar = Concept(name="bar") grammar = { bar: Sequence(foo, Optional(OrderedChoice("one", "two"))), foo: OrderedChoice("twenty", "thirty") } context, parser = init([foo, bar], grammar) res = parser.parse(context, "twenty") assert len(res) == 2 assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [("bar", 0, 0, "twenty")] assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [("foo", 0, 0, "twenty")] def test_i_can_parse_concept_reference_that_is_not_in_grammar(): one = Concept(name="one") two = Concept(name="two") foo = Concept(name="foo") grammar = {foo: Sequence("twenty", OrderedChoice(one, two))} context, parser = init([one, two, foo], grammar) parser = ConceptLexerParser() parser.initialize(context, grammar) res = parser.parse(context, "twenty two") assert res.status assert res.value.body == [("foo", 0, 2, "twenty two")] concept_found = res.value.body[0].concept assert cbody(concept_found) == DoNotResolve("twenty two") assert cprop(concept_found, "two") == get_expected(two, "two") res = parser.parse(context, "twenty one") assert res.status assert res.value.body == [("foo", 0, 2, "twenty one")] def test_i_can_parse_zero_or_more(): foo = Concept(name="foo") grammar = {foo: ZeroOrMore("one")} context, res, wrapper, return_value = execute([foo], grammar, "one one") assert res.status assert return_value == [("foo", 0, 2, "one one")] assert return_value[0].underlying == u(grammar[foo], 0, 2, [u("one", 0, 0), u("one", 2, 2)]) concept_found = return_value[0].concept assert cbody(concept_found) == DoNotResolve("one one") def test_i_can_parse_sequence_and_zero_or_more(): foo = Concept(name="foo") grammar = {foo: Sequence(ZeroOrMore("one"), "two")} context, parser = init([foo], grammar) res = parser.parse(context, "one one two") assert res.status assert res.value.value == [("foo", 0, 4, "one one two")] res = parser.parse(context, "two") assert res.status assert res.value.value == [("foo", 0, 0, "two")] def test_i_cannot_parse_zero_and_more_when_wrong_entry(): foo = Concept(name="foo") grammar = {foo: ZeroOrMore("one")} context, parser = init([foo], grammar) parser = ConceptLexerParser() parser.initialize(context, grammar) res = parser.parse(context, "one two") assert not res.status assert res.value.value == [ ("foo", 0, 0, "one"), UnrecognizedTokensNode(1, 2, [t(" "), t("two")]) ] res = parser.parse(context, "two") assert not res.status assert res.value.value == [ UnrecognizedTokensNode(0, 0, [t("two")]) ] def test_i_can_parse_zero_and_more_with_separator(): foo = Concept(name="foo") grammar = {foo: ZeroOrMore("one", sep=",")} context, res, wrapper, return_value = execute([foo], grammar, "one, one , one") assert res.status assert return_value == [("foo", 0, 7, "one, one , one")] def test_that_zero_and_more_is_greedy(): foo = Concept(name="foo") bar = Concept(name="bar") grammar = {foo: ZeroOrMore("one"), bar: "one"} context, res, wrapper, return_value = execute([foo], grammar, "one one one") assert res.status assert return_value == [("foo", 0, 4, "one one one")] def test_i_can_parse_one_and_more(): foo = Concept(name="foo") grammar = {foo: OneOrMore("one")} context, res, wrapper, return_value = execute([foo], grammar, "one one") assert res.status assert return_value == [("foo", 0, 2, "one one")] assert return_value[0].underlying == u(grammar[foo], 0, 2, [ u("one", 0, 0), u("one", 2, 2)]) def test_i_can_parse_sequence_and_one_or_more(): foo = Concept(name="foo") grammar = {foo: Sequence(OneOrMore("one"), "two")} context, parser = init([foo], grammar) res = parser.parse(context, "one one two") assert res.status assert res.value.value == [("foo", 0, 4, "one one two")] res = parser.parse(context, "two") assert not res.status assert res.value.value == [ UnrecognizedTokensNode(0, 0, [t("two")]) ] def test_i_can_parse_one_and_more_with_separator(): foo = Concept(name="foo") grammar = {foo: OneOrMore("one", sep=",")} context, res, wrapper, return_value = execute([foo], grammar, "one, one , one") assert res.status assert return_value == [("foo", 0, 7, "one, one , one")] assert return_value[0].underlying == u(grammar[foo], 0, 7, [ u("one", 0, 0), u("one", 3, 3), u("one", 7, 7)]) def test_that_one_and_more_is_greedy(): foo = Concept(name="foo") bar = Concept(name="bar") grammar = {foo: OneOrMore("one"), bar: "one"} context, res, wrapper, return_value = execute([foo], grammar, "one one one") assert res.status assert return_value == [("foo", 0, 4, "one one one")] def test_i_can_detect_infinite_recursion(): foo = Concept(name="foo") bar = Concept(name="bar") grammar = { bar: foo, foo: bar } parser = ConceptLexerParser() parser.initialize(get_context(), grammar) assert bar not in parser.concepts_grammars assert foo not in parser.concepts_grammars def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice(): foo = Concept(name="foo") bar = Concept(name="bar") grammar = { bar: foo, foo: OrderedChoice(bar, "foo") } parser = ConceptLexerParser() parser.initialize(get_context(), grammar) assert foo not in parser.concepts_grammars # removed because of the infinite recursion assert bar not in parser.concepts_grammars # removed because of the infinite recursion # the other way around is possible grammar = { bar: foo, foo: OrderedChoice("foo", bar) } context, parser = init([foo, bar], grammar) assert foo in parser.concepts_grammars assert bar in parser.concepts_grammars res = parser.parse(context, "foo") assert len(res) == 2 assert res[0].status assert res[0].value.body == [("bar", 0, 0, "foo")] assert res[1].status assert res[1].value.body == [("foo", 0, 0, "foo")] def test_i_can_detect_indirect_infinite_recursion_with_sequence(): foo = Concept(name="foo") bar = Concept(name="bar") grammar = { bar: foo, foo: Sequence("one", bar, "two") } parser = ConceptLexerParser() parser.initialize(get_context(), grammar) assert foo not in parser.concepts_grammars # removed because of the infinite recursion assert bar not in parser.concepts_grammars # removed because of the infinite recursion def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choice(): foo = Concept(name="foo") bar = Concept(name="bar") grammar = { bar: foo, foo: Sequence("one", OrderedChoice(bar, "other"), "two") } parser = ConceptLexerParser() parser.initialize(get_context(), grammar) assert foo not in parser.concepts_grammars # removed because of the infinite recursion assert bar not in parser.concepts_grammars # removed because of the infinite recursion def test_infinite_recursion_does_not_fail_if_a_concept_is_missing(): foo = Concept(name="foo") bar = Concept(name="bar") grammar = { foo: bar } parser = ConceptLexerParser() parser.initialize(get_context(), grammar) assert foo in parser.concepts_grammars def test_i_can_detect_indirect_infinite_recursion_with_optional(): # TODO infinite recursion with optional pass def test_i_can_detect_indirect_infinite_recursion_with_zero_and_more(): # TODO infinite recursion with optional pass def test_i_can_detect_indirect_infinite_recursion_with_one_and_more(): # TODO infinite recursion with optional pass def test_i_can_visit_parsing_expression(): mult = Concept(name="mult") add = Concept(name="add") visitor = ConceptVisitor() visitor.visit(Sequence(mult, Optional(Sequence("+", add)))) assert sorted(list(visitor.concepts)) == ["add", "mult"] def test_i_can_initialize_rule_names(): context = get_context() foo = Concept(name="foo") bar = Concept(name="bar") grammar = {foo: Sequence("one", "two"), bar: foo} parser = ConceptLexerParser() ret = parser.initialize(context, grammar) return_value = ret.body assert return_value[foo].rule_name == "" assert return_value[bar].rule_name == "foo" @pytest.mark.parametrize("text, end_position", [ ("foo", 0), ("foo bar", 2), ("foo bar ", 3), (" foo bar ", 4) ]) def test_cannot_parser_unknown_concepts(text, end_position): context, res, wrapper, return_value = execute([], {}, text) tokens = list(Tokenizer(text))[:-1] assert not res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [UnrecognizedTokensNode(0, end_position, tokens)] def test_i_cannot_parse_when_part_of_the_input_is_unrecognized(): one = Concept(name="one") two = Concept(name="two") grammar = {one: "one", two: "two"} context, res, wrapper, return_value = execute([one, two], grammar, "one two three") assert not res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [ ConceptNode(get_expected(one, "one"), 0, 0, source="one", underlying=u("one", 0, 0)), ConceptNode(get_expected(two, "two"), 2, 2, source="two", underlying=u("two", 2, 2)), UnrecognizedTokensNode(3, 4, [t(" "), t("three")]) ] def test_i_cannot_parse_when_wrong_sequence(): foo = Concept(name="foo") grammar = {foo: Sequence("one", "two", "three")} context, res, wrapper, return_value = execute([foo], grammar, "one two three one") assert not res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [ ("foo", "one two three"), UnrecognizedTokensNode(5, 6, [t(" "), t("one")]) ] def test_i_cannot_parse_when_sequence_cannot_match_because_of_end_of_file(): foo = Concept(name="foo") grammar = {foo: Sequence("one", "two", "three")} context, res, wrapper, return_value = execute([foo], grammar, "one two") assert not res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [ UnrecognizedTokensNode(0, 2, [t("one"), t(" "), t("two")]) ] def test_i_cannot_parse_multiple_results_when_unknown_tokens_at_the_end(): foo = Concept(name="foo") bar = Concept(name="bar") grammar = { bar: Sequence("one", "two"), foo: Sequence("one", OrderedChoice("two", "three")) } context, parser = init([foo, bar], grammar) res = parser.parse(context, "one two four five") assert len(res) == 2 assert not res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [ ("bar", 0, 2, "one two"), UnrecognizedTokensNode(3, 6, [t(" "), t("four"), t(" "), t("five")]) ] assert not res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [ ("foo", 0, 2, "one two"), UnrecognizedTokensNode(3, 6, [t(" "), t("four"), t(" "), t("five")]) ] def test_i_cannot_parse_multiple_results_when_beginning_by_unknown_tokens(): foo = Concept(name="foo") bar = Concept(name="bar") grammar = { bar: Sequence("one", "two"), foo: Sequence("one", OrderedChoice("two", "three")) } context, parser = init([foo, bar], grammar) res = parser.parse(context, "four five one two") assert len(res) == 2 assert not res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [ UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), ("bar", 4, 6, "one two"), ] assert not res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [ UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), ("foo", 4, 6, "one two"), ] def test_i_cannot_parse_multiple_results_when_surrounded_by_unknown_tokens(): foo = Concept(name="foo") bar = Concept(name="bar") grammar = { bar: Sequence("one", "two"), foo: Sequence("one", OrderedChoice("two", "three")) } context, parser = init([foo, bar], grammar) res = parser.parse(context, "four five one two six seven") assert len(res) == 2 assert not res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [ UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), ("bar", 4, 6, "one two"), UnrecognizedTokensNode(7, 10, [t(" "), t("six"), t(" "), t("seven")]), ] assert not res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [ UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), ("foo", 4, 6, "one two"), UnrecognizedTokensNode(7, 10, [t(" "), t("six"), t(" "), t("seven")]), ] def test_i_cannot_parse_multiple_results_when_unknown_tokens_in_the_middle(): context = get_context() foo = Concept(name="foo") bar = Concept(name="bar") baz = Concept(name="baz") grammar = { bar: Sequence("one", "two"), foo: Sequence("one", OrderedChoice("two", "three")), baz: StrMatch("six"), } context, parser = init([foo, bar, baz], grammar) res = parser.parse(context, "one two four five six") assert len(res) == 2 assert not res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [ ("bar", 0, 2, "one two"), UnrecognizedTokensNode(3, 7, [t(" "), t("four"), t(" "), t("five"), t(" ")]), ("baz", 8, 8, "six"), ] assert not res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [ ("foo", 0, 2, "one two"), UnrecognizedTokensNode(3, 7, [t(" "), t("four"), t(" "), t("five"), t(" ")]), ("baz", 8, 8, "six"), ] def test_i_can_get_the_inner_concept_when_possible(): foo = Concept(name="foo") one = Concept(name="one") grammar = {foo: Sequence(Optional(ZeroOrMore(one)), ZeroOrMore("one"))} context, res, wrapper, return_value = execute([foo, one], grammar, "one") assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [("foo", 0, 0, "one")] concept_found = return_value[0].concept assert cbody(concept_found) == get_expected(one, "one") assert id(cprop(concept_found, "one")) == id(cbody(concept_found)) def test_i_can_get_the_inner_concept_when_possible_with_rule_name(): foo = Concept(name="foo") one = Concept(name="one") grammar = {foo: Sequence( Optional(ZeroOrMore(one, rule_name="zero"), rule_name="opt"), ZeroOrMore("one"), rule_name="seq")} context, res, wrapper, return_value = execute([foo, one], grammar, "one") assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [("foo", 0, 0, "one")] concept_found = return_value[0].concept assert cbody(concept_found) == get_expected(one, "one") assert id(cprop(concept_found, "one")) == id(cbody(concept_found)) assert id(cprop(concept_found, "zero")) == id(cbody(concept_found)) assert id(cprop(concept_found, "opt")) == id(cbody(concept_found)) assert id(cprop(concept_found, "seq")) == id(cbody(concept_found)) def test_i_get_multiple_props_when_zero_or_more(): foo = Concept(name="foo") one = Concept(name="one") grammar = {foo: ZeroOrMore(one)} context, res, wrapper, return_value = execute([foo, one], grammar, "one one one") assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [("foo", 0, 4, "one one one")] concept_found = return_value[0].concept assert cbody(concept_found) == DoNotResolve("one one one") assert len(concept_found.cached_asts["one"]) == 3 assert cprop(concept_found, "one")[0] == get_expected(one) assert cprop(concept_found, "one")[1] == get_expected(one) assert cprop(concept_found, "one")[2] == get_expected(one) assert id(cprop(concept_found, "one")[0]) != id(cprop(concept_found, "one")[1]) assert id(cprop(concept_found, "one")[1]) != id(cprop(concept_found, "one")[2]) assert id(cprop(concept_found, "one")[2]) != id(cprop(concept_found, "one")[0]) def test_i_get_multiple_props_when_zero_or_more_and_different_values(): foo = Concept(name="foo") one = Concept(name="one") grammar = {foo: ZeroOrMore(Sequence(one, "ok", rule_name="seq")), one: OrderedChoice("one", "un", "uno")} context, res, wrapper, return_value = execute([foo, one], grammar, "one ok un ok uno ok") assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [("foo", "one ok un ok uno ok")] concept_found = return_value[0].concept assert cprop(concept_found, "one")[0] == get_expected(one, "one") assert cprop(concept_found, "one")[1] == get_expected(one, "un") assert cprop(concept_found, "one")[2] == get_expected(one, "uno") assert cprop(concept_found, "seq")[0] == DoNotResolve("one ok") assert cprop(concept_found, "seq")[1] == DoNotResolve("un ok") assert cprop(concept_found, "seq")[2] == DoNotResolve("uno ok") # # def test_i_can_parse_basic_arithmetic_operations_and_resolve_properties(): # context = get_context() # add = Concept(name="add") # mult = Concept(name="mult") # atom = Concept(name="atom") # # grammar = { # add: Sequence(mult, Optional(Sequence(OrderedChoice('+', '-', rule_name="sign"), add))), # mult: Sequence(atom, Optional(Sequence(OrderedChoice('*', '/'), mult))), # atom: OrderedChoice(OrderedChoice('1', '2', '3'), Sequence('(', add, ')')), # } # # parser = ConceptLexerParser() # parser.register(grammar) # # # res = parser.parse(context, "1") # # assert len(res) == 3 # add, mult, atom # # # # res = parser.parse(context, "1 * 2") # # assert len(res) == 2 # add and mult # # # # res = parser.parse(context, "1 + 2") # # assert res.status # # assert return_value == [ConceptNode(add, 0, 4, source="1 + 2")] # # res = parser.parse(context, "1 * 2 + 3") # assert res.status # assert return_value == [ConceptNode(add, 0, 4, source="1 + 2 + 3")] def test_i_can_register_concepts_with_the_same_name(): # TODO : concepts are registered by name, # what when two concepts have the same name ? pass def test_i_can_parse_very_very_long_input(): # TODO: In the current implementation, all the tokens are loaded in memory # It's clearly not the good approach pass