from ast import Str import pytest from core.builtin_concepts import BuiltinConcepts from core.concept import Concept, ConceptParts, DoNotResolve from core.tokenizer import Tokenizer, TokenKind, Token from parsers.BnfParser import BnfParser from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \ ParsingExpressionVisitor, TerminalNode, NonTerminalNode, ZeroOrMore, OneOrMore, \ UnrecognizedTokensNode, cnode, short_cnode, ConceptExpression, ConceptGroupExpression from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka class ConceptVisitor(ParsingExpressionVisitor): def __init__(self): self.concepts = set() def visit_ConceptExpression(self, node): self.concepts.add(node.concept) def u(parsing_expression, start, end, children=None): """ u stands for underlying :param parsing_expression: :param start: :param end: :param children: :return: """ if isinstance(parsing_expression, str): parsing_expression = StrMatch(parsing_expression) if isinstance(parsing_expression, StrMatch): return TerminalNode(parsing_expression, start, end, parsing_expression.to_match) return NonTerminalNode(parsing_expression, start, end, [], children) def evaluated(concept): c = Concept(name=concept.name, body=concept.name) def t(text): if text.startswith("'") or text.startswith('"'): return Token(TokenKind.STRING, text, 0, 0, 0) if text.startswith(" "): return Token(TokenKind.WHITESPACE, text, 0, 0, 0) return Token(TokenKind.IDENTIFIER, text, 0, 0, 0) def get_expected(concept, text=None): c = Concept(name=concept.name) c.compiled[ConceptParts.BODY] = DoNotResolve(text or concept.name) c.init_key() c.metadata.id = concept.id return c def cbody(concept): """cbody stands for compiled body""" if not ConceptParts.BODY in concept.compiled: return None return concept.compiled[ConceptParts.BODY] def cprop(concept, prop_name): """cbody stands for compiled property""" return concept.compiled[prop_name] class TestConceptLexerParser(TestUsingMemoryBasedSheerka): def init(self, concepts, grammar): context = self.get_context() for c in concepts: context.sheerka.add_in_cache(c) context.sheerka.set_id_if_needed(c, False) parser = ConceptLexerParser() parser.initialize(context, grammar) return context, parser def execute(self, concepts, grammar, text): context, parser = self.init(concepts, grammar) res = parser.parse(context, text) wrapper = res.value return_value = res.value.value return context, res, wrapper, return_value @pytest.mark.parametrize("match, text", [ ("foo", "foo"), ("'foo'", "'foo'"), ("1", "1"), ("3.14", "3.14"), ("+", "+"), (StrMatch("foo"), "foo"), (StrMatch("'foo'"), "'foo'"), (StrMatch("1"), "1"), (StrMatch("3.14"), "3.14"), (StrMatch("+"), "+"), ]) def test_i_can_match_simple_tokens(self, match, text): foo = Concept(name="foo") grammar = {foo: match} context, res, wrapper, return_value = self.execute([foo], grammar, text) assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [ConceptNode(get_expected(foo, text), 0, 0, source=text, underlying=u(match, 0, 0))] def test_i_can_match_multiple_concepts_in_one_input(self): one = Concept(name="one") two = Concept(name="two") grammar = {one: "one", two: "two"} context, res, wrapper, return_value = self.execute([one, two], grammar, "one two one") assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [ ConceptNode(get_expected(one), 0, 0, source="one", underlying=u("one", 0, 0)), ConceptNode(get_expected(two), 2, 2, source="two", underlying=u("two", 2, 2)), ConceptNode(get_expected(one), 4, 4, source="one", underlying=u("one", 4, 4)), ] def test_i_can_match_sequence(self): foo = Concept(name="foo") grammar = {foo: Sequence("one", "two", "three")} context, res, wrapper, return_value = self.execute([foo], grammar, "one two three") assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [ ConceptNode( get_expected(foo, "one two three"), 0, 4, source="one two three", underlying=u(grammar[foo], 0, 4, [ u("one", 0, 0), u("two", 2, 2), u("three", 4, 4)]))] def test_i_always_choose_the_longest_match(self): foo = Concept(name="foo") bar = Concept(name="bar") grammar = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")} context, res, wrapper, return_value = self.execute([foo, bar], grammar, "one two three") assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [cnode("foo", 0, 4, "one two three")] def test_i_can_match_several_sequences(self): foo = Concept(name="foo") bar = Concept(name="bar") grammar = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")} context, res, wrapper, return_value = self.execute([foo, bar], grammar, "one two three one two") assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [ cnode("foo", 0, 4, "one two three"), cnode("bar", 6, 8, "one two"), ] def test_i_can_match_ordered_choice(self): foo = Concept(name="foo") grammar = {foo: OrderedChoice("one", "two")} context, parser = self.init([foo], grammar) res1 = parser.parse(context, "one") assert res1.status assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT) assert res1.value.body == [cnode("foo", 0, 0, "one")] assert res1.value.body[0].underlying == u(grammar[foo], 0, 0, [u("one", 0, 0)]) res2 = parser.parse(context, "two") assert res2.status assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT) assert res2.value.body == [cnode("foo", 0, 0, "two")] assert res2.value.body[0].underlying == u(grammar[foo], 0, 0, [u("two", 0, 0)]) res3 = parser.parse(context, "three") assert not res3.status assert context.sheerka.isinstance(res3.value, BuiltinConcepts.PARSER_RESULT) assert res3.value.value == [ UnrecognizedTokensNode(0, 0, [t("three")]) ] def test_i_cannot_match_ordered_choice_with_empty_alternative(self): foo = Concept(name="foo") grammar = {foo: Sequence(OrderedChoice("one", ""), "two")} context, res, wrapper, return_value = self.execute([foo], grammar, "ok") assert not res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [ UnrecognizedTokensNode(0, 0, [t("ok")]) ] def test_i_can_mix_sequences_and_ordered_choices(self): foo = Concept(name="foo") grammar = {foo: Sequence(OrderedChoice("twenty", "thirty"), "one", "ok")} context, parser = self.init([foo], grammar) res1 = parser.parse(context, "twenty one ok") assert res1.status assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT) assert res1.value.body == [ConceptNode(get_expected(foo, "twenty one ok"), 0, 4, source="twenty one ok", underlying=u(grammar[foo], 0, 4, [ u(OrderedChoice("twenty", "thirty"), 0, 0, [u("twenty", 0, 0)]), u("one", 2, 2), u("ok", 4, 4)]))] res2 = parser.parse(context, "thirty one ok") assert res2.status assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT) assert res2.value.body == [ConceptNode(get_expected(foo, "thirty one ok"), 0, 4, source="thirty one ok", underlying=u(grammar[foo], 0, 4, [ u(OrderedChoice("twenty", "thirty"), 0, 0, [u("thirty", 0, 0)]), u("one", 2, 2), u("ok", 4, 4)]))] res3 = parser.parse(context, "twenty one") assert not res3.status assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT) assert res3.value.value == [ UnrecognizedTokensNode(0, 2, [t("twenty"), t(" "), t("one")]) ] def test_i_can_mix_ordered_choices_and_sequences(self): foo = Concept(name="foo") grammar = {foo: OrderedChoice(Sequence("twenty", "thirty"), "one")} context, parser = self.init([foo], grammar) res = parser.parse(context, "twenty thirty") assert res.status assert res.value.value == [cnode("foo", 0, 2, "twenty thirty")] res = parser.parse(context, "one") assert res.status assert res.value.value == [cnode("foo", 0, 0, "one")] def test_i_cannot_parse_empty_optional(self): foo = Concept(name="foo") grammar = {foo: Optional("one")} context, parser = self.init([foo], grammar) res = parser.parse(context, "") return_value = res.value assert not res.status assert context.sheerka.isinstance(return_value, BuiltinConcepts.IS_EMPTY) def test_i_can_parse_optional(self): foo = Concept(name="foo") grammar = {foo: Optional("one")} context, res, wrapper, return_value = self.execute([foo], grammar, "one") assert res.status assert return_value == [ConceptNode(get_expected(foo, "one"), 0, 0, source="one", underlying=u(grammar[foo], 0, 0, [u("one", 0, 0)]))] def test_i_can_parse_sequence_starting_with_optional(self): foo = Concept(name="foo") grammar = {foo: Sequence(Optional("twenty"), "one")} context, parser = self.init([foo], grammar) res = parser.parse(context, "twenty one") assert res.status assert res.value.body == [ConceptNode( get_expected(foo, "twenty one"), 0, 2, source="twenty one", underlying=u(grammar[foo], 0, 2, [ u(Optional("twenty"), 0, 0, [u("twenty", 0, 0)]), u("one", 2, 2)] ))] res = parser.parse(context, "one") assert res.status assert res.value.body == [ConceptNode(get_expected(foo, "one"), 0, 0, source="one", underlying=u(grammar[foo], 0, 0, [u("one", 0, 0)]))] def test_i_can_parse_sequence_ending_with_optional(self): foo = Concept(name="foo") grammar = {foo: Sequence("one", "two", Optional("three"))} context, parser = self.init([foo], grammar) res = parser.parse(context, "one two three") assert res.status assert res.value.body == [cnode("foo", 0, 4, "one two three")] res = parser.parse(context, "one two") assert res.status assert res.value.body == [cnode("foo", 0, 2, "one two")] def test_i_can_parse_sequence_with_optional_in_between(self): foo = Concept(name="foo") grammar = {foo: Sequence("one", Optional("two"), "three")} context, parser = self.init([foo], grammar) res = parser.parse(context, "one two three") assert res.status assert res.value.body == [cnode("foo", 0, 4, "one two three")] res = parser.parse(context, "one three") assert res.status assert res.value.body == [cnode("foo", 0, 2, "one three")] def test_i_cannot_parse_wrong_input_with_optional(self): foo = Concept(name="foo") grammar = {foo: Optional("one")} context, res, wrapper, return_value = self.execute([foo], grammar, "two") assert not res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [ UnrecognizedTokensNode(0, 0, [t("two")]) ] def test_i_can_use_reference(self): # when there are multiple matches for the same input # Do I need to create a choice concept ? # No, create a return value for every possible graph foo = Concept(name="foo") bar = Concept(name="bar") grammar = {foo: Sequence("one", "two"), bar: foo} context, parser = self.init([foo, bar], grammar) res = parser.parse(context, "one two") assert len(res) == 2 assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [cnode("foo", 0, 2, "one two")] concept_found_1 = res[0].value.body[0].concept assert cbody(concept_found_1) == DoNotResolve("one two") assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [cnode("bar", 0, 2, "one two")] concept_found_2 = res[1].value.body[0].concept # the body and the prop['foo'] are the same concept 'foo' assert cbody(concept_found_2) == get_expected(foo, "one two") assert id(cprop(concept_found_2, "foo")) == id(cbody(concept_found_2)) def test_i_can_use_a_reference_with_a_body(self): """ Same test than before (test_i_can_use_reference()) but this time, the concept 'foo' already has a body. :return: """ foo = Concept(name="foo", body="'foo'") bar = Concept(name="bar") grammar = {foo: Sequence("one", "two"), bar: foo} context, parser = self.init([foo, bar], grammar) res = parser.parse(context, "one two") assert len(res) == 2 assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [cnode("foo", 0, 2, "one two")] concept_found_1 = res[0].value.body[0].concept assert concept_found_1.metadata.body == "'foo'" assert cbody(concept_found_1) is None assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [cnode("bar", 0, 2, "one two")] concept_found_2 = res[1].value.body[0].concept assert cbody(concept_found_2) == foo # the body and the prop['foo'] are the same concept 'foo' assert id(cprop(concept_found_2, "foo")) == id(cbody(concept_found_2)) def test_i_can_use_context_reference_with_multiple_levels(self): """ Same than previous one, but with reference of reference :return: """ foo = Concept(name="foo") bar = Concept(name="bar") baz = Concept(name="baz") grammar = {foo: Sequence("one", "two"), bar: foo, baz: bar} context, parser = self.init([foo, bar, baz], grammar) res = parser.parse(context, "one two") assert len(res) == 3 assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [cnode("foo", 0, 2, "one two")] concept_found_1 = res[0].value.body[0].concept assert cbody(concept_found_1) == DoNotResolve("one two") assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [cnode("bar", 0, 2, "one two")] concept_found_2 = res[1].value.body[0].concept assert cbody(concept_found_2) == get_expected(foo, "one two") assert id(cprop(concept_found_2, "foo")) == id(cbody(concept_found_2)) assert res[2].status assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT) assert res[2].value.body == [cnode("baz", 0, 2, "one two")] concept_found_3 = res[2].value.body[0].concept expected_foo = get_expected(foo, "one two") assert cbody(concept_found_3) == get_expected(bar, expected_foo) assert cprop(concept_found_3, "foo") == expected_foo assert id(cprop(concept_found_3, "bar")) == id(cbody(concept_found_3)) def test_order_is_not_important_when_using_references(self): """ Same test than test_i_can_use_reference(), but this time, 'bar' is declared before 'foo' So the order of the result is different :return: """ foo = Concept(name="foo") bar = Concept(name="bar") grammar = {bar: foo, foo: Sequence("one", "two")} context, parser = self.init([foo, bar], grammar) res = parser.parse(context, "one two") assert len(res) == 2 assert res[0].value.body == [cnode("bar", 0, 2, "one two")] assert res[1].value.body == [cnode("foo", 0, 2, "one two")] def test_i_can_parse_when_reference(self): foo = Concept(name="foo") bar = Concept(name="bar") grammar = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")} context, parser = self.init([foo, bar], grammar) res = parser.parse(context, "twenty two") assert res.status assert res.value.body == [cnode("bar", 0, 2, "twenty two")] concept_found = res.value.body[0].concept assert cbody(concept_found) == DoNotResolve("twenty two") assert cprop(concept_found, "foo") == get_expected(foo, "twenty") res = parser.parse(context, "thirty one") assert res.status assert res.value.body == [cnode("bar", 0, 2, "thirty one")] concept_found = res.value.body[0].concept assert cbody(concept_found) == DoNotResolve("thirty one") assert cprop(concept_found, "foo") == get_expected(foo, "thirty") res = parser.parse(context, "twenty") assert res.status assert res.value.body == [cnode("foo", 0, 0, "twenty")] concept_found = res.value.body[0].concept assert cbody(concept_found) == DoNotResolve("twenty") def test_i_can_parse_when_reference_has_a_body(self): foo = Concept(name="foo", body="'one'") bar = Concept(name="bar") grammar = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")} context, parser = self.init([foo, bar], grammar) res = parser.parse(context, "twenty two") assert res.status assert res.value.body == [cnode("bar", 0, 2, "twenty two")] concept_found = res.value.body[0].concept assert cbody(concept_found) == DoNotResolve("twenty two") assert cprop(concept_found, "foo") == foo res = parser.parse(context, "twenty") assert res.status assert res.value.body == [cnode("foo", 0, 0, "twenty")] concept_found = res.value.body[0].concept assert concept_found.metadata.body == "'one'" def test_i_can_parse_multiple_results(self): foo = Concept(name="foo") bar = Concept(name="bar") grammar = { bar: Sequence("one", "two"), foo: Sequence("one", OrderedChoice("two", "three")) } context, parser = self.init([foo, bar], grammar) res = parser.parse(context, "one two") assert len(res) == 2 assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [cnode("bar", 0, 2, "one two")] concept_found_0 = res[0].value.body[0].concept assert cbody(concept_found_0) == DoNotResolve("one two") assert len(concept_found_0.props) == 0 assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [cnode("foo", 0, 2, "one two")] concept_found_1 = res[1].value.body[0].concept assert cbody(concept_found_1) == DoNotResolve("one two") assert len(concept_found_1.props) == 0 def test_i_can_parse_multiple_results_times_two(self): foo = Concept(name="foo") bar = Concept(name="bar") grammar = { bar: Sequence("one", "two"), foo: Sequence("one", OrderedChoice("two", "three")) } context, parser = self.init([foo, bar], grammar) res = parser.parse(context, "one two one two") assert len(res) == 4 assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [short_cnode("bar", "one two"), short_cnode("bar", "one two")] assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [short_cnode("foo", "one two"), short_cnode("bar", "one two")] assert res[2].status assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT) assert res[2].value.body == [short_cnode("bar", "one two"), short_cnode("foo", "one two")] assert res[3].status assert context.sheerka.isinstance(res[3].value, BuiltinConcepts.PARSER_RESULT) assert res[3].value.body == [short_cnode("foo", "one two"), short_cnode("foo", "one two")] def test_i_can_parse_multiple_results_when_reference(self): """ TODO : There should no be two answer, has the one with bar is totally useless Note that bar = Sequence(foo, OrderedChoice("one", "two")) does not match :return: """ foo = Concept(name="foo") bar = Concept(name="bar") grammar = { bar: Sequence(foo, Optional(OrderedChoice("one", "two"))), foo: OrderedChoice("twenty", "thirty") } context, parser = self.init([foo, bar], grammar) res = parser.parse(context, "twenty") assert len(res) == 2 assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [cnode("bar", 0, 0, "twenty")] assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [cnode("foo", 0, 0, "twenty")] def test_i_can_parse_concept_reference_that_is_not_in_grammar(self): one = Concept(name="one") two = Concept(name="two") foo = Concept(name="foo") grammar = {foo: Sequence("twenty", OrderedChoice(one, two))} context, parser = self.init([one, two, foo], grammar) res = parser.parse(context, "twenty two") assert res.status assert res.value.body == [cnode("foo", 0, 2, "twenty two")] concept_found = res.value.body[0].concept assert cbody(concept_found) == DoNotResolve("twenty two") assert cprop(concept_found, "two") == get_expected(two, "two") res = parser.parse(context, "twenty one") assert res.status assert res.value.body == [cnode("foo", 0, 2, "twenty one")] def test_i_can_initialize_when_cyclic_reference(self): foo = Concept(name="foo") grammar = {foo: Optional("one", ConceptExpression("foo"))} context, parser = self.init([foo], grammar) assert parser.concepts_grammars[foo] == Optional("one", ConceptExpression(foo, rule_name="foo")) def test_i_cannot_initialize_when_cyclic_reference_when_concept_is_under_construction_and_not_known(self): foo = Concept(name="foo").init_key() grammar = {foo: Optional("one", ConceptExpression("foo"))} context = self.get_context() parser = ConceptLexerParser() parser.initialize(context, grammar) assert parser.concepts_grammars[foo] == Optional("one", ConceptExpression("foo", rule_name="foo")) def test_i_can_initialize_when_cyclic_reference_when_concept_is_under_construction_and_known(self): foo = Concept(name="foo").init_key() grammar = {foo: Optional("one", ConceptExpression("foo"))} context = self.get_context() context.concepts["foo"] = foo parser = ConceptLexerParser() parser.initialize(context, grammar) assert parser.concepts_grammars[foo] == Optional("one", ConceptExpression(foo, rule_name="foo")) def test_i_can_parse_concept_reference_that_is_group(self): """ if one is number, then number is a 'group' a group can be found under the sdp entry 'all_' """ context = self.get_context() one = Concept(name="one") two = Concept(name="two") number = Concept(name="number") foo = Concept(name="foo") for c in [one, two, number, foo]: context.sheerka.set_id_if_needed(c, False) context.sheerka.add_in_cache(c) context.sheerka.add_concept_to_set(context, one, number) context.sheerka.add_concept_to_set(context, two, number) grammar = {foo: Sequence("twenty", number)} parser = ConceptLexerParser() parser.initialize(context, grammar) res = parser.parse(context, "twenty two") assert res.status assert res.value.body == [cnode("foo", 0, 2, "twenty two")] concept_found = res.value.body[0].concept assert cbody(concept_found) == DoNotResolve("twenty two") assert cprop(concept_found, "two") == get_expected(two, "two") assert cprop(concept_found, "number") == get_expected(number, get_expected(two, "two")) res = parser.parse(context, "twenty one") assert res.status assert res.value.body == [cnode("foo", 0, 2, "twenty one")] concept_found = res.value.body[0].concept assert cbody(concept_found) == DoNotResolve("twenty one") assert cprop(concept_found, "one") == get_expected(one, "one") assert cprop(concept_found, "number") == get_expected(number, get_expected(one, "one")) def test_i_can_parse_zero_or_more(self): foo = Concept(name="foo") grammar = {foo: ZeroOrMore("one")} context, res, wrapper, return_value = self.execute([foo], grammar, "one one") assert res.status assert return_value == [cnode("foo", 0, 2, "one one")] assert return_value[0].underlying == u(grammar[foo], 0, 2, [u("one", 0, 0), u("one", 2, 2)]) concept_found = return_value[0].concept assert cbody(concept_found) == DoNotResolve("one one") def test_i_can_parse_sequence_and_zero_or_more(self): foo = Concept(name="foo") grammar = {foo: Sequence(ZeroOrMore("one"), "two")} context, parser = self.init([foo], grammar) res = parser.parse(context, "one one two") assert res.status assert res.value.value == [cnode("foo", 0, 4, "one one two")] res = parser.parse(context, "two") assert res.status assert res.value.value == [cnode("foo", 0, 0, "two")] def test_i_cannot_parse_zero_and_more_when_wrong_entry(self): foo = Concept(name="foo") grammar = {foo: ZeroOrMore("one")} context, parser = self.init([foo], grammar) parser = ConceptLexerParser() parser.initialize(context, grammar) res = parser.parse(context, "one two") assert not res.status assert res.value.value == [ cnode("foo", 0, 0, "one"), UnrecognizedTokensNode(1, 2, [t(" "), t("two")]) ] res = parser.parse(context, "two") assert not res.status assert res.value.value == [ UnrecognizedTokensNode(0, 0, [t("two")]) ] def test_i_can_parse_zero_and_more_with_separator(self): foo = Concept(name="foo") grammar = {foo: ZeroOrMore("one", sep=",")} context, res, wrapper, return_value = self.execute([foo], grammar, "one, one , one") assert res.status assert return_value == [cnode("foo", 0, 7, "one, one , one")] def test_that_zero_and_more_is_greedy(self): foo = Concept(name="foo") bar = Concept(name="bar") grammar = {foo: ZeroOrMore("one"), bar: "one"} context, res, wrapper, return_value = self.execute([foo], grammar, "one one one") assert res.status assert return_value == [cnode("foo", 0, 4, "one one one")] def test_i_can_parse_one_and_more(self): foo = Concept(name="foo") grammar = {foo: OneOrMore("one")} context, res, wrapper, return_value = self.execute([foo], grammar, "one one") assert res.status assert return_value == [cnode("foo", 0, 2, "one one")] assert return_value[0].underlying == u(grammar[foo], 0, 2, [ u("one", 0, 0), u("one", 2, 2)]) def test_i_can_parse_sequence_and_one_or_more(self): foo = Concept(name="foo") grammar = {foo: Sequence(OneOrMore("one"), "two")} context, parser = self.init([foo], grammar) res = parser.parse(context, "one one two") assert res.status assert res.value.value == [cnode("foo", 0, 4, "one one two")] res = parser.parse(context, "two") assert not res.status assert res.value.value == [ UnrecognizedTokensNode(0, 0, [t("two")]) ] def test_i_can_parse_one_and_more_with_separator(self): foo = Concept(name="foo") grammar = {foo: OneOrMore("one", sep=",")} context, res, wrapper, return_value = self.execute([foo], grammar, "one, one , one") assert res.status assert return_value == [cnode("foo", 0, 7, "one, one , one")] assert return_value[0].underlying == u(grammar[foo], 0, 7, [ u("one", 0, 0), u("one", 3, 3), u("one", 7, 7)]) def test_that_one_and_more_is_greedy(self): foo = Concept(name="foo") bar = Concept(name="bar") grammar = {foo: OneOrMore("one"), bar: "one"} context, res, wrapper, return_value = self.execute([foo], grammar, "one one one") assert res.status assert return_value == [cnode("foo", 0, 4, "one one one")] def test_i_can_detect_infinite_recursion(self): foo = Concept(name="foo") bar = Concept(name="bar") grammar = { bar: foo, foo: bar } parser = ConceptLexerParser() parser.initialize(self.get_context(), grammar) assert bar not in parser.concepts_grammars assert foo not in parser.concepts_grammars def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice(self): foo = Concept(name="foo") bar = Concept(name="bar") grammar = { bar: foo, foo: OrderedChoice(bar, "foo") } parser = ConceptLexerParser() parser.initialize(self.get_context(), grammar) assert foo not in parser.concepts_grammars # removed because of the infinite recursion assert bar not in parser.concepts_grammars # removed because of the infinite recursion # the other way around is possible grammar = { bar: foo, foo: OrderedChoice("foo", bar) } context, parser = self.init([foo, bar], grammar) assert foo in parser.concepts_grammars assert bar in parser.concepts_grammars res = parser.parse(context, "foo") assert len(res) == 2 assert res[0].status assert res[0].value.body == [cnode("bar", 0, 0, "foo")] assert res[1].status assert res[1].value.body == [cnode("foo", 0, 0, "foo")] def test_i_can_detect_indirect_infinite_recursion_with_sequence(self): foo = Concept(name="foo") bar = Concept(name="bar") grammar = { bar: foo, foo: Sequence("one", bar, "two") } parser = ConceptLexerParser() parser.initialize(self.get_context(), grammar) assert foo not in parser.concepts_grammars # removed because of the infinite recursion assert bar not in parser.concepts_grammars # removed because of the infinite recursion def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choice(self): foo = Concept(name="foo") bar = Concept(name="bar") grammar = { bar: foo, foo: Sequence("one", OrderedChoice(bar, "other"), "two") } parser = ConceptLexerParser() parser.initialize(self.get_context(), grammar) assert foo not in parser.concepts_grammars # removed because of the infinite recursion assert bar not in parser.concepts_grammars # removed because of the infinite recursion def test_infinite_recursion_does_not_fail_if_a_concept_is_missing(self): foo = Concept(name="foo") bar = Concept(name="bar") grammar = { foo: bar } parser = ConceptLexerParser() parser.initialize(self.get_context(), grammar) assert foo in parser.concepts_grammars def test_i_can_detect_indirect_infinite_recursion_with_optional(self): # TODO infinite recursion with optional pass def test_i_can_detect_indirect_infinite_recursion_with_zero_and_more(self): # TODO infinite recursion with optional pass def test_i_can_detect_indirect_infinite_recursion_with_one_and_more(self): # TODO infinite recursion with optional pass def test_i_can_visit_parsing_expression(self): mult = Concept(name="mult") add = Concept(name="add") visitor = ConceptVisitor() visitor.visit(Sequence(mult, Optional(Sequence("+", add)))) assert sorted(list(visitor.concepts)) == ["add", "mult"] def test_i_can_initialize_rule_names(self): context = self.get_context() foo = Concept(name="foo") bar = Concept(name="bar") grammar = {foo: Sequence("one", "two"), bar: foo} parser = ConceptLexerParser() ret = parser.initialize(context, grammar) return_value = ret.body assert return_value[foo].rule_name == "" assert return_value[bar].rule_name == "foo" @pytest.mark.parametrize("text, end_position", [ ("foo", 0), ("foo bar", 2), ("foo bar ", 3), (" foo bar ", 4) ]) def test_cannot_parser_unknown_concepts(self, text, end_position): context, res, wrapper, return_value = self.execute([], {}, text) tokens = list(Tokenizer(text))[:-1] assert not res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [UnrecognizedTokensNode(0, end_position, tokens)] def test_i_cannot_parse_when_part_of_the_input_is_unrecognized(self): one = Concept(name="one") two = Concept(name="two") grammar = {one: "one", two: "two"} context, res, wrapper, return_value = self.execute([one, two], grammar, "one two three") assert not res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [ ConceptNode(get_expected(one, "one"), 0, 0, source="one", underlying=u("one", 0, 0)), ConceptNode(get_expected(two, "two"), 2, 2, source="two", underlying=u("two", 2, 2)), UnrecognizedTokensNode(3, 4, [t(" "), t("three")]) ] def test_i_cannot_parse_when_wrong_sequence(self): foo = Concept(name="foo") grammar = {foo: Sequence("one", "two", "three")} context, res, wrapper, return_value = self.execute([foo], grammar, "one two three one") assert not res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [ short_cnode("foo", "one two three"), UnrecognizedTokensNode(5, 6, [t(" "), t("one")]) ] def test_i_cannot_parse_when_sequence_cannot_match_because_of_end_of_file(self): foo = Concept(name="foo") grammar = {foo: Sequence("one", "two", "three")} context, res, wrapper, return_value = self.execute([foo], grammar, "one two") assert not res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [ UnrecognizedTokensNode(0, 2, [t("one"), t(" "), t("two")]) ] def test_i_cannot_parse_multiple_results_when_unknown_tokens_at_the_end(self): foo = Concept(name="foo") bar = Concept(name="bar") grammar = { bar: Sequence("one", "two"), foo: Sequence("one", OrderedChoice("two", "three")) } context, parser = self.init([foo, bar], grammar) res = parser.parse(context, "one two four five") assert len(res) == 2 assert not res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [ cnode("bar", 0, 2, "one two"), UnrecognizedTokensNode(3, 6, [t(" "), t("four"), t(" "), t("five")]) ] assert not res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [ cnode("foo", 0, 2, "one two"), UnrecognizedTokensNode(3, 6, [t(" "), t("four"), t(" "), t("five")]) ] def test_i_cannot_parse_multiple_results_when_beginning_by_unknown_tokens(self): foo = Concept(name="foo") bar = Concept(name="bar") grammar = { bar: Sequence("one", "two"), foo: Sequence("one", OrderedChoice("two", "three")) } context, parser = self.init([foo, bar], grammar) res = parser.parse(context, "four five one two") assert len(res) == 2 assert not res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [ UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), cnode("bar", 4, 6, "one two"), ] assert not res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [ UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), cnode("foo", 4, 6, "one two"), ] def test_i_cannot_parse_multiple_results_when_surrounded_by_unknown_tokens(self): foo = Concept(name="foo") bar = Concept(name="bar") grammar = { bar: Sequence("one", "two"), foo: Sequence("one", OrderedChoice("two", "three")) } context, parser = self.init([foo, bar], grammar) res = parser.parse(context, "four five one two six seven") assert len(res) == 2 assert not res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [ UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), cnode("bar", 4, 6, "one two"), UnrecognizedTokensNode(7, 10, [t(" "), t("six"), t(" "), t("seven")]), ] assert not res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [ UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), cnode("foo", 4, 6, "one two"), UnrecognizedTokensNode(7, 10, [t(" "), t("six"), t(" "), t("seven")]), ] def test_i_cannot_parse_multiple_results_when_unknown_tokens_in_the_middle(self): context = self.get_context() foo = Concept(name="foo") bar = Concept(name="bar") baz = Concept(name="baz") grammar = { bar: Sequence("one", "two"), foo: Sequence("one", OrderedChoice("two", "three")), baz: StrMatch("six"), } context, parser = self.init([foo, bar, baz], grammar) res = parser.parse(context, "one two four five six") assert len(res) == 2 assert not res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [ cnode("bar", 0, 2, "one two"), UnrecognizedTokensNode(3, 7, [t(" "), t("four"), t(" "), t("five"), t(" ")]), cnode("baz", 8, 8, "six"), ] assert not res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [ cnode("foo", 0, 2, "one two"), UnrecognizedTokensNode(3, 7, [t(" "), t("four"), t(" "), t("five"), t(" ")]), cnode("baz", 8, 8, "six"), ] def test_i_can_get_the_inner_concept_when_possible(self): foo = Concept(name="foo") one = Concept(name="one") grammar = {foo: Sequence(Optional(ZeroOrMore(one)), ZeroOrMore("one"))} context, res, wrapper, return_value = self.execute([foo, one], grammar, "one") assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [cnode("foo", 0, 0, "one")] concept_found = return_value[0].concept assert cbody(concept_found) == get_expected(one, "one") assert id(cprop(concept_found, "one")) == id(cbody(concept_found)) def test_i_can_get_the_inner_concept_when_possible_with_rule_name(self): foo = Concept(name="foo") one = Concept(name="one") grammar = {foo: Sequence( Optional(ZeroOrMore(one, rule_name="zero"), rule_name="opt"), ZeroOrMore("one"), rule_name="seq")} context, res, wrapper, return_value = self.execute([foo, one], grammar, "one") assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [cnode("foo", 0, 0, "one")] concept_found = return_value[0].concept assert cbody(concept_found) == get_expected(one, "one") assert id(cprop(concept_found, "one")) == id(cbody(concept_found)) assert id(cprop(concept_found, "zero")) == id(cbody(concept_found)) assert id(cprop(concept_found, "opt")) == id(cbody(concept_found)) assert id(cprop(concept_found, "seq")) == id(cbody(concept_found)) def test_i_get_multiple_props_when_zero_or_more(self): foo = Concept(name="foo") one = Concept(name="one") grammar = {foo: ZeroOrMore(one)} context, res, wrapper, return_value = self.execute([foo, one], grammar, "one one one") assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [cnode("foo", 0, 4, "one one one")] concept_found = return_value[0].concept assert cbody(concept_found) == DoNotResolve("one one one") assert len(concept_found.compiled["one"]) == 3 assert cprop(concept_found, "one")[0] == get_expected(one) assert cprop(concept_found, "one")[1] == get_expected(one) assert cprop(concept_found, "one")[2] == get_expected(one) assert id(cprop(concept_found, "one")[0]) != id(cprop(concept_found, "one")[1]) assert id(cprop(concept_found, "one")[1]) != id(cprop(concept_found, "one")[2]) assert id(cprop(concept_found, "one")[2]) != id(cprop(concept_found, "one")[0]) def test_i_get_multiple_props_when_zero_or_more_and_different_values(self): foo = Concept(name="foo") one = Concept(name="one") grammar = {foo: ZeroOrMore(Sequence(one, "ok", rule_name="seq")), one: OrderedChoice("one", "un", "uno")} context, res, wrapper, return_value = self.execute([foo, one], grammar, "one ok un ok uno ok") assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [short_cnode("foo", "one ok un ok uno ok")] concept_found = return_value[0].concept assert cprop(concept_found, "one")[0] == get_expected(one, "one") assert cprop(concept_found, "one")[1] == get_expected(one, "un") assert cprop(concept_found, "one")[2] == get_expected(one, "uno") assert cprop(concept_found, "seq")[0] == DoNotResolve("one ok") assert cprop(concept_found, "seq")[1] == DoNotResolve("un ok") assert cprop(concept_found, "seq")[2] == DoNotResolve("uno ok") @pytest.mark.parametrize("rule, expected", [ (StrMatch("string"), "'string'"), (StrMatch("string", rule_name="rule_name"), "'string'=rule_name"), (Sequence(StrMatch("foo"), StrMatch("bar")), "('foo' 'bar')"), (Sequence(StrMatch("foo"), StrMatch("bar"), rule_name="rule_name"), "('foo' 'bar')=rule_name"), (OrderedChoice(StrMatch("foo"), StrMatch("bar")), "('foo'|'bar')"), (OrderedChoice(StrMatch("foo"), StrMatch("bar"), rule_name="rule_name"), "('foo'|'bar')=rule_name"), (Optional(StrMatch("foo")), "'foo'?"), (Optional(StrMatch("foo"), rule_name="rule_name"), "'foo'?=rule_name"), (ZeroOrMore(StrMatch("foo")), "'foo'*"), (ZeroOrMore(StrMatch("foo"), rule_name="rule_name"), "'foo'*=rule_name"), (OneOrMore(StrMatch("foo")), "'foo'+"), (OneOrMore(StrMatch("foo"), rule_name="rule_name"), "'foo'+=rule_name"), (Sequence( Optional(StrMatch("foo"), rule_name="a"), ZeroOrMore(StrMatch("bar"), rule_name="b"), OneOrMore(StrMatch("baz"), rule_name="c"), rule_name="d"), "('foo'?=a 'bar'*=b 'baz'+=c)=d"), (OrderedChoice( Optional(StrMatch("foo"), rule_name="a"), ZeroOrMore(StrMatch("bar"), rule_name="b"), OneOrMore(StrMatch("baz"), rule_name="c"), rule_name="d"), "('foo'?=a|'bar'*=b|'baz'+=c)=d"), (Sequence( OrderedChoice(StrMatch("foo"), StrMatch("bar"), rule_name="a"), OrderedChoice(StrMatch("x"), StrMatch("y"), rule_name="b"), rule_name="c"), "(('foo'|'bar')=a ('x'|'y')=b)=c") ]) def test_i_can_encode_grammar(self, rule, expected): foo = Concept(name="foo") grammar = {foo: rule} context, parser = self.init([foo], grammar) encoded = parser.encode_grammar(parser.concepts_grammars) assert encoded["c:foo|1001:"] == expected bnf_parser = BnfParser() parse_res = bnf_parser.parse(context, encoded["c:foo|1001:"]) assert parse_res.status assert parse_res.value.value == rule def test_i_can_encode_grammar_when_concept_simple(self): foo = Concept(name="foo") bar = Concept(name="bar") grammar = {foo: ConceptExpression(bar)} context, parser = self.init([foo, bar], grammar) encoded = parser.encode_grammar(parser.concepts_grammars) assert encoded["c:foo|1001:"] == "c:bar|1002:=bar" bnf_parser = BnfParser() parse_res = bnf_parser.parse(context, encoded["c:foo|1001:"]) assert parse_res.status assert parse_res.value.value == grammar[foo] def test_i_can_encode_grammar_when_concepts(self): foo = Concept(name="foo") bar = Concept(name="bar") baz = Concept(name="baz") grammar = {foo: Sequence( StrMatch("a"), OrderedChoice(ConceptExpression(bar), OneOrMore(ConceptExpression(baz)), rule_name="oc"), rule_name="s")} context, parser = self.init([foo, bar, baz], grammar) encoded = parser.encode_grammar(parser.concepts_grammars) assert encoded["c:foo|1001:"] == "('a' (c:bar|1002:=bar|c:baz|1003:=baz+)=oc)=s" bnf_parser = BnfParser() parse_res = bnf_parser.parse(context, encoded["c:foo|1001:"]) assert parse_res.status assert parse_res.value.value == grammar[foo] def test_i_can_encode_grammar_when_set_concepts(self): foo = Concept(name="foo") bar = Concept(name="bar") baz = Concept(name="baz") grammar = {foo: Sequence( StrMatch("a"), OrderedChoice(bar, OneOrMore(ConceptExpression(baz)), rule_name="oc"), rule_name="s")} context = self.get_context() for c in [foo, bar, baz]: context.sheerka.add_in_cache(c) context.sheerka.set_id_if_needed(c, False) context.sheerka.add_concept_to_set(context, baz, bar) parser = ConceptLexerParser() parser.initialize(context, grammar) encoded = parser.encode_grammar(parser.concepts_grammars) assert encoded["c:foo|1001:"] == "('a' (c:bar|1002:=bar|c:baz|1003:=baz+)=oc)=s" bnf_parser = BnfParser() parse_res = bnf_parser.parse(context, encoded["c:foo|1001:"]) assert parse_res.status expected = Sequence( StrMatch("a"), OrderedChoice(ConceptGroupExpression(bar, rule_name="bar"), OneOrMore(ConceptExpression(baz, rule_name="baz")), rule_name="oc"), rule_name="s") assert parse_res.value.value == expected # # def test_i_can_parse_basic_arithmetic_operations_and_resolve_properties(self): # context = self.get_context() # add = Concept(name="add") # mult = Concept(name="mult") # atom = Concept(name="atom") # # grammar = { # add: Sequence(mult, Optional(Sequence(OrderedChoice('+', '-', rule_name="sign"), add))), # mult: Sequence(atom, Optional(Sequence(OrderedChoice('*', '/'), mult))), # atom: OrderedChoice(OrderedChoice('1', '2', '3'), Sequence('(', add, ')')), # } # # parser = ConceptLexerParser() # parser.register(grammar) # # # res = parser.parse(context, "1") # # assert len(res) == 3 # add, mult, atom # # # # res = parser.parse(context, "1 * 2") # # assert len(res) == 2 # add and mult # # # # res = parser.parse(context, "1 + 2") # # assert res.status # # assert return_value == [ConceptNode(add, 0, 4, source="1 + 2")] # # res = parser.parse(context, "1 * 2 + 3") # assert res.status # assert return_value == [ConceptNode(add, 0, 4, source="1 + 2 + 3")] def test_i_can_register_concepts_with_the_same_name(self): # TODO : concepts are registered by name, # what when two concepts have the same name ? pass def test_i_can_parse_very_very_long_input(self): # TODO: In the current implementation, all the tokens are loaded in memory # It's clearly not the good approach pass