import re import pytest import tests.parsers.parsers_utils from core.builtin_concepts import BuiltinConcepts from core.concept import Concept, ConceptParts, DoNotResolve, CC, DEFINITION_TYPE_BNF from core.global_symbols import NotInit from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager from core.sheerka.services.SheerkaExecute import ParserInput from parsers.BaseNodeParser import CNC, UTN, CN, NoMatchingTokenError, SCN from parsers.BnfDefinitionParser import BnfDefinitionParser from parsers.BnfNodeParser import StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \ Optional, ZeroOrMore, OneOrMore, ConceptExpression, UnOrderedChoice, BnfNodeParser, RegExMatch, \ BnfNodeFirstTokenVisitor, Match, RegExDef, VariableExpression from tests.BaseTest import BaseTest from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka from tests.evaluators.EvaluatorTestsUtils import python_ret_val cmap = { "one": Concept("one"), "two": Concept("two"), "three": Concept("three"), "four": Concept("four"), "thirty": Concept("thirty", body="30"), "forty": Concept("forty", body="40"), "fifty": Concept("fifty", body="50"), "number": Concept("number"), "foo": Concept("foo"), "bar": Concept("bar"), "baz": Concept("baz"), "one hundred": Concept("one hundred", body="100"), "one_hundred": Concept("'one hundred'", body="100"), "hundreds": Concept("hundreds", definition="number=n1 'hundred' 'and' number=n2", where="n1 < 10 and n2 < 100", body="n1 * 100 + n2").def_var("n1").def_var("n2"), "bnf baz": Concept("bnf baz", definition="'baz'"), # this one should be chosen "plus": Concept("plus", definition="one 'plus' two").def_var("a").def_var("b"), 'foo then bar': Concept("foo then bar", definition="foo bar").def_var("foo").def_var("bar"), 'foo or bar': Concept("foo or bar", definition="foo | bar").def_var("foo").def_var("bar"), 'one or more foo': Concept("one or more foo", definition="foo+").def_var("foo"), "t1": Concept("t1", definition="'twenty' (one|two)=unit").def_var("unit").def_var("one").def_var("two"), "three_four": Concept("three_four", definition="three | four").def_var("three").def_var("four"), "t2": Concept("t2", definition="'twenty' three_four=unit").def_var("unit").def_var("three").def_var("four"), # bnf with variable "one thing": Concept("one x", definition="one x").def_var("x"), "x shoe": Concept("x shoe", definition="x 'shoe'").def_var("x"), # testing keywords "def_only": Concept("def"), "def number": Concept("def number", definition="def (one|two)=number"), # sequence of keywords using bnf definition # "def_concept_bnf": Concept("def_concept_bnf", definition="'def' 'concept'"), # "def concept_bnf number": Concept("def number", definition="def_concept_bnf (one|two)=number"), # sequence of keywords using def definition # "def_concept_def": Concept("def_concept_def", definition="def concept", definition_type=DEFINITION_TYPE_DEF), # "def concept_def number": Concept("def number", definition="def_concept_def (one|two|three)=number"), "filter": Concept("filter", definition="'--filter' (one | two)") } def u(parsing_expression, start, end, children=None): """ u stands for underlying :param parsing_expression: :param start: :param end: :param children: :return: """ if isinstance(parsing_expression, str): parsing_expression = StrMatch(parsing_expression) if isinstance(parsing_expression, Match): return TerminalNode(parsing_expression, start, end, parsing_expression.to_match, parsing_expression.to_match) return NonTerminalNode(parsing_expression, start, end, [], children) def compute_expected_array(my_concepts_map, expression, expected, exclude_body=False): return tests.parsers.parsers_utils.compute_expected_array( my_concepts_map, expression, expected, init_empty_body=True, exclude_body=exclude_body) class TestBnfNodeParser(TestUsingMemoryBasedSheerka): shared_ontology = None @classmethod def setup_class(cls): init_test_helper = cls().init_test(cache_only=False, ontology="#TestBnfNodeParser#") sheerka, context, *updated = init_test_helper.with_concepts(*cmap.values(), create_new=True).unpack() for i, concept_name in enumerate(cmap): cmap[concept_name] = updated[i] # end of initialisation sheerka = TestBnfNodeParser.sheerka sheerka.set_isa(context, cmap["one"], cmap["number"]) sheerka.set_isa(context, cmap["two"], cmap["number"]) sheerka.set_isa(context, cmap["three"], cmap["number"]) sheerka.set_isa(context, cmap["four"], cmap["number"]) sheerka.set_isa(context, cmap["thirty"], cmap["number"]) sheerka.set_isa(context, cmap["forty"], cmap["number"]) sheerka.set_isa(context, cmap["fifty"], cmap["number"]) sheerka.set_isa(context, cmap["one hundred"], cmap["number"]) sheerka.set_isa(context, cmap["hundreds"], cmap["number"]) # Pay attention. 'twenties (t1 and t2) are not set as 'number' thirties = cls.update_bnf(context, Concept("thirties", definition="thirty number", where="number < 10", body="thirty + number").def_var("thirty").def_var("number")) cmap["thirties"] = sheerka.create_new_concept(context, thirties).body.body sheerka.set_isa(context, sheerka.new("thirties"), sheerka.new("number")) forties = cls.update_bnf(context, Concept("forties", definition="forty number", where="number < 10", body="forty + number").def_var("forty").def_var("number")) cmap["forties"] = sheerka.create_new_concept(context, forties).body.body sheerka.set_isa(context, sheerka.new("forties"), sheerka.new("number")) fifties = cls.update_bnf(context, Concept("fifties", definition="fifty number", where="number < 10", body="fifty + number").def_var("fifty").def_var("number")) cmap["fifties"] = sheerka.create_new_concept(context, fifties).body.body sheerka.set_isa(context, sheerka.new("fifties"), sheerka.new("number")) thousands = cls.update_bnf(context, Concept("thousands", definition="number 'thousand'", where="number < 999", body="number * 1000").def_var("number")) cmap["thousands"] = sheerka.create_new_concept(context, thousands).body.body sheerka.set_isa(context, sheerka.new("thousands"), sheerka.new("number")) cls.shared_ontology = sheerka.get_ontology(context) sheerka.pop_ontology(context) @staticmethod def update_bnf(context, concept): bnf_parser = BnfDefinitionParser() res = bnf_parser.parse(context, concept.get_metadata().definition) if res.status: concept.set_bnf(res.value.value) concept.get_metadata().definition_type = DEFINITION_TYPE_BNF else: raise Exception(res) return concept def init_parser(self, my_concepts_map=None, init_from_sheerka=False, **kwargs): if my_concepts_map is None: sheerka, context = self.init_test().unpack() sheerka.add_ontology(context, self.shared_ontology) else: sheerka, context, *updated = self.init_test().with_concepts(*my_concepts_map.values(), **kwargs).unpack() for i, pair in enumerate(my_concepts_map): my_concepts_map[pair] = updated[i] if init_from_sheerka: parser = BnfNodeParser(sheerka=sheerka) else: parser = BnfNodeParser().init_from_concepts(context, my_concepts_map.values()) return sheerka, context, parser def validate_get_concepts_sequences(self, my_map, text, expected, multiple_result=False, post_init_concepts=None): sheerka, context, *updated = self.init_test().with_concepts(*my_map.values(), create_new=False).unpack() sequences = self.exec_get_concepts_sequences(context, my_map, text, expected, multiple_result, post_init_concepts, *updated) return sequences @staticmethod def exec_get_concepts_sequences(context, my_map, text, expected, multiple_result=False, post_init_concepts=None, *concepts): sheerka = context.sheerka if not multiple_result: expected_array = [compute_expected_array(my_map, text, expected)] else: expected_array = [compute_expected_array(my_map, text, e) for e in expected] if post_init_concepts: post_init_concepts(sheerka, context) parser = BnfNodeParser() parser.init_from_concepts(context, concepts) parser.reset_parser(context, ParserInput(text)) bnf_parsers_helpers = parser.get_concepts_sequences(context) assert len(bnf_parsers_helpers) == len(expected_array) for parser_helper, expected_sequence in zip(bnf_parsers_helpers, expected_array): to_compare = tests.parsers.parsers_utils.get_test_obj(expected_sequence, parser_helper.sequence) # assert parser_helper.sequence == expected_sequence assert to_compare == expected_sequence if len(bnf_parsers_helpers) == 1: return bnf_parsers_helpers[0].sequence else: return [pe.sequence for pe in bnf_parsers_helpers] def test_i_cannot_parse_empty_strings(self): sheerka, context, parser = self.init_parser({}, singleton=True) res = parser.parse(context, ParserInput("")) assert not res.status assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) assert res.body.reason == BuiltinConcepts.IS_EMPTY @pytest.mark.parametrize("expr, text", [ (StrMatch("foo"), "foo"), (StrMatch("'foo'"), "'foo'"), (StrMatch("1"), "1"), (StrMatch("3.14"), "3.14"), (StrMatch("+"), "+"), ]) def test_i_can_match_str_bnf(self, expr, text): my_map = { text: self.bnf_concept("foo", expr) } sequence = self.validate_get_concepts_sequences(my_map, text, [text]) assert sequence[0].underlying == u(expr, 0, 0) @pytest.mark.parametrize("expr, text, end", [ (RegExMatch("bar"), "bar", 0), (RegExMatch("[a-z]+"), "xyz", 0), (RegExMatch("[a-z=]+"), "uvt=xyz=abc", 4), ]) def test_i_can_match_regex_bnf(self, expr, text, end): my_map = { text: self.bnf_concept("foo", expr) } sequence = self.validate_get_concepts_sequences(my_map, text, [text]) assert sequence[0].underlying.start == 0 assert sequence[0].underlying.end == end assert sequence[0].underlying.parsing_expression == expr @pytest.mark.parametrize("expr, text, end", [ (Sequence(StrMatch("foo"), RegExMatch("bar")), "foo bar", 2), (Sequence(StrMatch("foo"), RegExMatch("[a-z]+")), "foo xyz", 2), (Sequence(StrMatch("foo"), RegExMatch("[a-z=]+")), "foo uvt=xyz=abc", 6), ]) def test_i_can_match_sequence_str_regex(self, expr, text, end): my_map = { text: self.bnf_concept("foo", expr) } sequence = self.validate_get_concepts_sequences(my_map, text, [text]) assert sequence[0].underlying == u(expr, 0, end, sequence[0].underlying.children) @pytest.mark.parametrize("expr, text, end", [ (Sequence(RegExMatch("bar"), StrMatch("foo")), "bar foo", 2), (Sequence(RegExMatch("[a-z]+"), StrMatch("foo")), "xyz foo", 2), (Sequence(RegExMatch("[a-z=]+"), StrMatch("foo")), "uvt=xyz=abc foo", 6), ]) def test_i_can_match_sequence_regex_str(self, expr, text, end): my_map = { text: self.bnf_concept("foo", expr) } sequence = self.validate_get_concepts_sequences(my_map, text, [text]) assert sequence[0].underlying == u(expr, 0, end, sequence[0].underlying.children) def test_i_can_match_sequence_str_regex_str(self): text = "foo uvt=xyz=abc baz" expr = Sequence(StrMatch("foo"), RegExMatch("[a-z=]+"), StrMatch("baz")) my_map = { text: self.bnf_concept("foo", expr) } sequence = self.validate_get_concepts_sequences(my_map, text, [text]) assert sequence[0].underlying == u(expr, 0, 8, sequence[0].underlying.children) def test_i_can_match_multiple_concepts_in_one_input(self): my_map = { "one": self.bnf_concept("one"), "two": self.bnf_concept("two"), } text = "one two one" expected = ["one", "two", ("one", 1)] self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("one two three", [CNC("foo", source="one two three")]), ("one two", []), ("one two four", []), ]) def test_i_can_match_sequence(self, text, expected): my_map = { "foo": self.bnf_concept("foo", Sequence("one", "two", "three")), } self.validate_get_concepts_sequences(my_map, text, expected) def test_i_always_choose_the_longest_match(self): my_map = { "foo": self.bnf_concept("foo", Sequence("one", "two", "three")), "bar": self.bnf_concept("bar", Sequence("one", "two")), } text = "one two three" expected = [CNC("foo", source=text)] self.validate_get_concepts_sequences(my_map, text, expected) def test_i_can_use_skip_whitespace_when_mixing_sequence_and_strmatch(self): # to match '--filter' in one word my_map = { "filter": self.bnf_concept("filter", Sequence(StrMatch("-", skip_whitespace=False), StrMatch("-", skip_whitespace=False), "filter")), } sheerka, context, *updated = self.init_concepts(*my_map.values(), create_new=False, singleton=True) parser = BnfNodeParser() parser.init_from_concepts(context, updated) text = "--filter" expected = [CN("filter", source="--filter")] expected_array = compute_expected_array(my_map, text, expected) parser.reset_parser(context, ParserInput(text)) bnf_parsers_helpers = parser.get_concepts_sequences(context) assert bnf_parsers_helpers[0].sequence == expected_array assert not bnf_parsers_helpers[0].has_unrecognized # but I cannot parse text = "- - filter" parser.reset_parser(context, ParserInput(text)) bnf_parsers_helpers = parser.get_concepts_sequences(context) assert bnf_parsers_helpers[0].has_unrecognized def test_i_can_match_multiple_sequences(self): my_map = { "foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"), StrMatch("three"))), "bar": self.bnf_concept("bar", Sequence(StrMatch("one"), StrMatch("two"))), } text = "one two three one two" expected = [ CNC("foo", source="one two three"), CNC("bar", source="one two", start=6, end=8)] self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("one", [CNC("foo", source="one")]), ("two", [CNC("foo", source="two")]), ("three", []), ]) def test_i_can_match_ordered_choice(self, text, expected): my_map = { "foo": self.bnf_concept("foo", OrderedChoice(StrMatch("one"), StrMatch("two"))) } self.validate_get_concepts_sequences(my_map, text, expected) def test_i_do_not_match_ordered_choice_with_empty_alternative(self): my_map = { "foo": self.bnf_concept("foo", OrderedChoice(StrMatch("one"), StrMatch(""))) } text = "" expected = [] self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("concept_three, expected", [ (Concept("three"), []), (BaseTest.bnf_concept("three", StrMatch("three")), [UTN('twenty '), "three"]) ]) def test_i_can_manage_sequence_with_wrong_order_choice(self, concept_three, expected): my_map = { "foo": self.bnf_concept("foo", Sequence( StrMatch("twenty"), OrderedChoice(StrMatch("one"), StrMatch("two")))), "three": concept_three} text = "twenty three" self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("ok thirty one", [CNC("foo", source="ok thirty one")]), ("ok twenty one", [CNC("foo", source="ok twenty one")]), ("ok one", []), ]) def test_i_can_mix_sequence_and_ordered(self, text, expected): my_map = { "foo": self.bnf_concept("foo", Sequence( StrMatch("ok"), OrderedChoice(StrMatch("twenty"), StrMatch("thirty")), StrMatch("one")) )} self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("twenty one", [CNC("foo", source="twenty one")]), ("twenty three", []), # three does not exist ("twenty four", []), # four exists but should not be seen ]) def test_i_can_mix_sequence_and_ordered_2(self, text, expected): my_map = { "foo": self.bnf_concept("foo", Sequence( StrMatch("twenty"), OrderedChoice(StrMatch("one"), StrMatch("two")))), "four": Concept("four")} self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("twenty thirty", [CNC("foo", source="twenty thirty")]), ("one", [CNC("foo", source="one")]), ]) def test_i_can_mix_ordered_choices_and_sequences(self, text, expected): my_map = { "foo": self.bnf_concept("foo", OrderedChoice( Sequence(StrMatch("twenty"), StrMatch("thirty")), StrMatch("one")))} self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("one", [CNC("foo", source="one")]), ("one two", [CNC("foo", source="one two")]), ("three", []), ]) def test_i_can_match_unordered_choice(self, text, expected): my_map = { "foo": self.bnf_concept("foo", UnOrderedChoice( StrMatch("one"), Sequence(StrMatch("one"), StrMatch("two")))), } self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("one", [CNC("foo", source="one")]), ("", []), ("two", []), ]) def test_i_can_match_optional(self, text, expected): my_map = { "foo": self.bnf_concept("foo", Optional(StrMatch("one"))) } self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("twenty one", [CNC("foo", source="twenty one")]), ("one", [CNC("foo", source="one")]), ]) def test_i_can_match_sequence_starting_with_optional(self, text, expected): my_map = { "foo": self.bnf_concept("foo", Sequence( Optional(StrMatch("twenty")), StrMatch("one"))) } self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("one two three", [CNC("foo", source="one two three")]), ("one two", [CNC("foo", source="one two")]), ]) def test_i_can_match_sequence_ending_with_optional(self, text, expected): my_map = { "foo": self.bnf_concept("foo", Sequence( StrMatch("one"), StrMatch("two"), Optional(StrMatch("three")))) } self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("one two three", [CNC("foo", source="one two three")]), ("one three", [CNC("foo", source="one three")]), ]) def test_i_can_match_sequence_with_optional_in_between(self, text, expected): my_map = { "foo": self.bnf_concept("foo", Sequence( StrMatch("one"), Optional(StrMatch("two")), StrMatch("three"))) } self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("", []), ("two", []), ("one", [CNC("foo", source="one")]), ("one one", [CNC("foo", source="one one")]), ]) def test_i_can_match_zero_or_more(self, text, expected): my_map = { "foo": self.bnf_concept("foo", ZeroOrMore(StrMatch("one"))) } self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("two", [CNC("foo", source="two")]), ("one two", [CNC("foo", source="one two")]), ("one one two", [CNC("foo", source="one one two")]), ]) def test_i_can_match_sequence_and_zero_or_more(self, text, expected): my_map = { "foo": self.bnf_concept("foo", Sequence( ZeroOrMore(StrMatch("one")), StrMatch("two") )) } self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("one, one , one", [CNC("foo", source="one, one , one")]), ]) def test_i_can_match_zero_or_more_with_separator(self, text, expected): my_map = { "foo": self.bnf_concept("foo", ZeroOrMore(StrMatch("one"), sep=",")) } self.validate_get_concepts_sequences(my_map, text, expected) def test_that_zero_or_more_is_greedy(self): my_map = { "foo": self.bnf_concept("foo", ZeroOrMore(StrMatch("one"))), "bar": self.bnf_concept("foo", StrMatch("one")) } text = "one one one" expected = [CNC("foo", source=text)] self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("", []), ("two", []), ("one", [CNC("foo", source="one")]), ("one one one", [CNC("foo", source="one one one")]), ]) def test_i_can_match_one_or_more(self, text, expected): my_map = { "foo": self.bnf_concept("foo", OneOrMore(StrMatch("one"))), } self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("two", []), ("one two", [CNC("foo", source="one two")]), ("one one two", [CNC("foo", source="one one two")]), ]) def test_i_can_match_sequence_one_and_or_more(self, text, expected): my_map = { "foo": self.bnf_concept("foo", Sequence( OneOrMore(StrMatch("one")), StrMatch("two") )) } self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("one, one , one", [CNC("foo", source="one, one , one")]), ]) def test_i_can_match_one_or_more_with_separator(self, text, expected): my_map = { "foo": self.bnf_concept("foo", OneOrMore(StrMatch("one"), sep=",")) } self.validate_get_concepts_sequences(my_map, text, expected) def test_that_one_or_more_is_greedy(self): my_map = { "foo": self.bnf_concept("foo", OneOrMore(StrMatch("one"))), "bar": self.bnf_concept("foo", StrMatch("one")) } text = "one one one" expected = [CNC("foo", source=text)] self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("one two", [ [CNC("foo", source="one two")], [CNC("bar", source="one two")]]), ("one two one two", [ [CNC("bar", source="one two"), CNC("bar", source="one two")], [CNC("foo", source="one two"), CNC("bar", source="one two")], [CNC("bar", source="one two"), CNC("foo", source="one two")], [CNC("foo", source="one two"), CNC("foo", source="one two")]]), ]) def test_i_can_have_multiple_results(self, text, expected): my_map = { "foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"))), "bar": self.bnf_concept("bar", Sequence( StrMatch("one"), OrderedChoice(StrMatch("two"), StrMatch("three")))), } text = "one two" expected = [[CNC("foo", source=text)], [CNC("bar", source=text)]] self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True) def test_i_can_refer_to_other_concepts(self): my_map = { "foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"))), "bar": self.bnf_concept("bar", ConceptExpression("foo")) } text = "one two" expected = [ [CNC("foo", source=text)], [CN("bar", source=text)] # Do not check the compiled part ] sequences = self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True) # explicit validations of the compiled concept_foo = sequences[0][0].concept assert concept_foo.body == NotInit assert concept_foo.get_compiled() == {ConceptParts.BODY: DoNotResolve("one two")} concept_bar = sequences[1][0].concept assert concept_bar.body == NotInit assert concept_bar.get_compiled() == { ConceptParts.BODY: concept_foo, "foo": concept_foo } assert id(concept_bar.get_compiled()[ConceptParts.BODY]) == id(concept_bar.get_compiled()["foo"]) def test_i_can_refer_to_other_concepts_with_body(self): my_map = { "foo": self.bnf_concept(Concept("foo", body="'foo'"), Sequence(StrMatch("one"), StrMatch("two"))), "bar": self.bnf_concept("bar", ConceptExpression("foo")) } text = "one two" expected = [ [CNC("foo", source=text)], [CN("bar", source=text)] # Do not check the compiled part ] sequences = self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True) # explicit validations of the compiled concept_foo = sequences[0][0].concept assert concept_foo.body == NotInit assert len(concept_foo.get_compiled()) == 0 # because there is a body defined in the metadata concept_bar = sequences[1][0].concept assert concept_bar.body == NotInit assert concept_bar.get_compiled() == { ConceptParts.BODY: concept_foo, "foo": concept_foo } def test_i_can_manage_concepts_reference_when_multiple_levels(self): my_map = { "foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"))), "bar": self.bnf_concept("bar", ConceptExpression("foo")), "baz": self.bnf_concept("baz", ConceptExpression("bar")), } text = "one two" expected = [ [CNC("foo", source=text)], [CN("bar", source=text)], # Do not check the compiled part [CN("baz", source=text)], # Do not check the compiled part ] sequences = self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True) # explicit validations of the compiled concept_foo = sequences[0][0].concept assert concept_foo.body == NotInit assert concept_foo.get_compiled() == {ConceptParts.BODY: DoNotResolve("one two")} concept_bar = sequences[1][0].concept assert concept_bar.body == NotInit assert concept_bar.get_compiled() == { ConceptParts.BODY: concept_foo, "foo": concept_foo } concept_baz = sequences[2][0].concept assert concept_baz.body == NotInit assert concept_baz.get_compiled() == { ConceptParts.BODY: concept_bar, "bar": concept_bar } def test_i_can_mix_reference_to_other_concepts(self): my_map = { "foo": self.bnf_concept("foo", OrderedChoice(StrMatch("twenty"), StrMatch("thirty"))), "bar": self.bnf_concept("bar", Sequence( ConceptExpression("foo"), OrderedChoice(StrMatch("one"), StrMatch("two")))), "three": Concept("three") } text = "twenty two" expected = [CN("bar", source="twenty two")] sequences = self.validate_get_concepts_sequences(my_map, text, expected) concept_bar = sequences[0].concept assert concept_bar.get_compiled() == { ConceptParts.BODY: DoNotResolve("twenty two"), "foo": my_map["foo"], } assert concept_bar.get_compiled()["foo"].get_compiled() == {ConceptParts.BODY: DoNotResolve("twenty")} text = "thirty one" expected = [CN("bar", source="thirty one")] sequences = self.validate_get_concepts_sequences(my_map, text, expected) concept_bar = sequences[0].concept assert concept_bar.get_compiled() == { ConceptParts.BODY: DoNotResolve("thirty one"), "foo": my_map["foo"], } assert concept_bar.get_compiled()["foo"].get_compiled() == {ConceptParts.BODY: DoNotResolve("thirty")} text = "thirty three" expected = [[CN("foo", source="thirty"), CN("three")], []] self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True) def test_i_can_mix_reference_to_other_concepts_2(self): # this time, we use concept expression my_map = { "twenty": self.bnf_concept("twenty", StrMatch("twenty")), "number": self.bnf_concept("number", OrderedChoice(StrMatch("one"), StrMatch("two"))), "twenties": self.bnf_concept("twenties", Sequence(ConceptExpression("twenty"), ConceptExpression("number"))), "three": Concept("three") } text = "twenty two" expected = [CNC("twenties", source="twenty two", twenty=CC("twenty", body=DoNotResolve("twenty")), number=CC("number", source="two", body=DoNotResolve("two")) )] self.validate_get_concepts_sequences(my_map, text, expected) text = "twenty three" expected = [[CN("twenty"), CN("three")], []] self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True) def test_i_can_mix_reference_to_other_concepts_when_body(self): my_map = { "foo": self.bnf_concept(Concept("foo", body="'foo'"), OrderedChoice(StrMatch("twenty"), StrMatch("thirty"))), "bar": self.bnf_concept("bar", Sequence( ConceptExpression("foo"), OrderedChoice(StrMatch("one"), StrMatch("two")))), } sheerka, context, *concepts = self.init_test().with_concepts(*my_map.values(), create_new=False).unpack() text = "twenty two" expected = [CN("bar", source="twenty two")] sequences = self.exec_get_concepts_sequences(context, my_map, text, expected, False, None, *concepts) concept_bar = sequences[0].concept assert concept_bar.get_compiled() == { ConceptParts.BODY: DoNotResolve("twenty two"), "foo": sheerka.new("foo"), } assert concept_bar.get_compiled()["foo"].get_compiled() == {} # as foo as a body text = "thirty one" expected = [CN("bar", source="thirty one")] sequences = self.exec_get_concepts_sequences(context, my_map, text, expected, False, None, *concepts) concept_bar = sequences[0].concept assert concept_bar.get_compiled() == { ConceptParts.BODY: DoNotResolve("thirty one"), "foo": sheerka.new("foo"), } assert concept_bar.get_compiled()["foo"].get_compiled() == {} def test_i_can_mix_zero_and_more_and_reference_to_other_concepts(self): my_map = { "foo": self.bnf_concept("foo", OrderedChoice(StrMatch("one"), StrMatch("two"), StrMatch("three"))), "bar": self.bnf_concept("bar", ZeroOrMore(ConceptExpression("foo"))), } text = "one two three" expected = [CN("bar", source="one two three")] sequences = self.validate_get_concepts_sequences(my_map, text, expected) concept_bar = sequences[0].concept assert concept_bar.get_compiled() == { ConceptParts.BODY: DoNotResolve("one two three"), "foo": [my_map["foo"], my_map["foo"], my_map["foo"]] } assert concept_bar.get_compiled()["foo"][0].get_compiled() == {ConceptParts.BODY: DoNotResolve("one")} assert concept_bar.get_compiled()["foo"][1].get_compiled() == {ConceptParts.BODY: DoNotResolve("two")} assert concept_bar.get_compiled()["foo"][2].get_compiled() == {ConceptParts.BODY: DoNotResolve("three")} def test_i_can_match_concept_reference_that_is_not_in_grammar(self): my_map = { "one": Concept("one"), "two": Concept("two"), "foo": self.bnf_concept("foo", Sequence( StrMatch("twenty"), OrderedChoice( ConceptExpression("one"), ConceptExpression("two"), rule_name="unit"))), } text = "twenty one" expected = [CN("foo", source="twenty one")] sequences = self.validate_get_concepts_sequences(my_map, text, expected) concept_foo = sequences[0].concept assert concept_foo.get_compiled() == { ConceptParts.BODY: DoNotResolve("twenty one"), "unit": my_map["one"], } def test_i_can_refer_to_group_concepts(self): my_map = { "one": Concept("one"), "two": Concept("two"), "number": Concept("number"), "foo": self.bnf_concept("foo", Sequence("twenty", ConceptExpression("number"))) } def pic(s, c): s.add_concept_to_set(c, my_map["one"], my_map["number"]) s.add_concept_to_set(c, my_map["two"], my_map["number"]) text = "twenty two" expected = [CN("foo", source="twenty two")] sequences = self.validate_get_concepts_sequences(my_map, text, expected, post_init_concepts=pic) # explicit validations of the compiled concept_foo = sequences[0].concept assert concept_foo.body == NotInit assert concept_foo.get_compiled() == {'number': CC(my_map["number"], body=my_map["two"], two=my_map["two"]), ConceptParts.BODY: DoNotResolve(value='twenty two')} text = "twenty one" expected = [CN("foo", source="twenty one")] sequences = self.validate_get_concepts_sequences(my_map, text, expected, post_init_concepts=pic) # explicit validations of the compiled concept_foo = sequences[0].concept assert concept_foo.body == NotInit assert concept_foo.get_compiled() == {'number': CC(my_map["number"], body=my_map["one"], one=my_map["one"]), ConceptParts.BODY: DoNotResolve(value='twenty one')} @pytest.mark.parametrize("expr, expected", [ ("one 'car'", [CNC("foo", source="one 'car'", x=python_ret_val("'car'"))]), # python ("one bar", [CNC("foo", source="one bar", x=CC("bar"))]), # simple concept ("one super car", [CNC("foo", source="one super car", x=CC("super car"))]), # long concept ("one shoe", [CNC("foo", source="one shoe", x=CC("thing", source="shoe", body=DoNotResolve("shoe")))]), # bnf ]) def test_i_can_match_variable_when_ending_with_one_variable(self, expr, expected): my_map = { "foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"))), "bar": Concept("bar"), "baz": Concept("baz"), "thing": Concept("thing", definition="'shoe'|'skirt'"), "super car": Concept("super car"), "plus": Concept("x plus y").def_var("x").def_var("y"), } self.validate_get_concepts_sequences(my_map, expr, expected) def test_i_can_match_variable_when_ending_with_one_variable_and_sya(self): my_map = { "foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"))), "bar": Concept("bar"), "baz": Concept("baz"), "plus": Concept("x plus y").def_var("x").def_var("y"), } expr = "one bar plus baz" expected = [ [CNC("foo", source="one bar", x=CC("bar")), UTN(" plus "), CN("baz")], [CNC("foo", source="one bar plus baz", x=CC("plus", source="bar plus baz", x="bar", y="baz"))], ] self.validate_get_concepts_sequences(my_map, expr, expected, multiple_result=True) def test_i_can_match_variable_when_ending_with_one_variable_and_multiple_results(self): my_map = { "foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"))), "pretty big": Concept("pretty big", body="'pretty big'"), "pbig": Concept("pretty big"), } expr = "one pretty big" expected = [ [CNC("foo", source="one pretty big", x=CC("pretty big"))], [CNC("foo", source="one pretty big", x=CC("pbig", source="pretty big"))] ] self.validate_get_concepts_sequences(my_map, expr, expected, multiple_result=True) def test_i_can_match_variable_when_ending_with_multiple_variables_and_multiple_results(self): my_map = { "foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"), VariableExpression("y"))), "pretty": Concept("pretty", body="pretty"), "pretty2": Concept("pretty"), "big": Concept("big", body="big"), } expr = "one pretty big" expected = [ [CNC("foo", source="one pretty big", x=CC("pretty"), y=CC("big"))], [CNC("foo", source="one pretty big", x=CC("pretty2", source="pretty"), y=CC("big"))] ] self.validate_get_concepts_sequences(my_map, expr, expected, multiple_result=True) @pytest.mark.parametrize("expr, expected", [ ("'my' shoe", [CNC("foo", source="'my' shoe", x=python_ret_val("'my' "))]), # python ("one shoe", [CNC("foo", source="one shoe", x=CC("one"))]), # concept ("my little shoe", [CNC("foo", source="my little shoe", x=CC("my little"))]), # long concept ("black shoe", [CNC("foo", source="black shoe", x=CC("color", source="black", body=DoNotResolve('black')))]), ]) def test_i_can_match_variable_when_starting_with_one_variable(self, expr, expected): my_map = { "foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe"))), "one": Concept("one"), "my little": Concept("my little"), "color": Concept("color", definition="'blue'|'black'"), "and": Concept("x and y").def_var("x").def_var("y"), } self.validate_get_concepts_sequences(my_map, expr, expected) def test_i_can_match_variable_when_starting_with_one_variable_and_sya(self): my_map = { "foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe"))), "tiny": Concept("tiny"), "beautiful": Concept("beautiful"), "but": Concept("x but y").def_var("x").def_var("y"), } expr = "tiny but beautiful shoe" expected_res = [ CNC("foo", source="tiny but beautiful shoe", x=CC("but", source="tiny but beautiful", x="tiny", y="beautiful"))] unwanted_res = [CN("tiny"), UTN(" but "), CNC("foo", source="beautiful shoe", x=CC("beautiful"))] self.validate_get_concepts_sequences(my_map, expr, [unwanted_res, expected_res], multiple_result=True) def test_i_can_match_variable_when_starting_with_multiple_variables(self): my_map = { "foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), VariableExpression("y"), VariableExpression("z"), StrMatch("shoe"))), "one": Concept("one"), "two": Concept("two"), "plus": Concept("x plus y").def_var("x").def_var("y"), } text = "one 'one' one plus two shoe" unwanted_res = [CN("one"), SCN(" 'one' "), ("one", 1), UTN(" plus "), CN("two")] expected_res = [CNC("foo", source="one 'one' one plus two shoe", x=CC("one"), y=python_ret_val(" 'one' "), z=CC("plus", source="one plus two", x="one", y="two"))] expected = [unwanted_res, expected_res] self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True) def test_i_can_match_variable_when_starting_with_one_variable_and_longer_str(self): my_map = { "foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("foo"), StrMatch("bar"), StrMatch("baz"))), "one": Concept("one") } text = "one foo bar baz" expected = [CNC("foo", source="one foo bar baz", x=CC("one"))] self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("expr, expected", [ ("one 'pretty' shoe", [CNC("foo", source="one 'pretty' shoe", x=python_ret_val("'pretty' "))]), # python ("one little shoe", [CNC("foo", source="one little shoe", x=CC("little"))]), # concept ("one very big shoe", [CNC("foo", source="one very big shoe", x=CC("very big"))]), # long concept ("one black shoe", [CNC("foo", source="one black shoe", x=CC("color", source="black", body=DoNotResolve('black')))]), ("one tiny but beautiful shoe", [CNC("foo", source="one tiny but beautiful shoe", x=CC("but", source="tiny but beautiful", x="tiny", y="beautiful "))]), ]) def test_i_can_match_variable_in_between(self, expr, expected): my_map = { "foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"), StrMatch("shoe"))), "little": Concept("little"), "very big": Concept("very big"), "color": Concept("color", definition="'blue'|'black'"), "but": Concept("x but y").def_var("x").def_var("y"), } self.validate_get_concepts_sequences(my_map, expr, expected) def test_i_can_match_variable_when_multiple_results_in_between(self): my_map = { "foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"), StrMatch("shoe"))), "pretty big": Concept("pretty big", body="'pretty big'"), "pbig": Concept("pretty big"), } expr = "one pretty big shoe" expected = [ [CNC("foo", source="one pretty big shoe", x=CC("pretty big"))], [CNC("foo", source="one pretty big shoe", x=CC("pbig", source="pretty big"))] ] self.validate_get_concepts_sequences(my_map, expr, expected, multiple_result=True) def test_i_can_match_regex_and_variable(self): my_map = { "foo": self.bnf_concept("foo", Sequence(RegExMatch("[a-z]+"), VariableExpression("x"))), "shoe": Concept("shoe") } text = "onyx shoe" expected = [CNC("foo", source="onyx shoe", x=CC("shoe"))] self.validate_get_concepts_sequences(my_map, text, expected) def test_i_can_match_variable_and_regex(self): my_map = { "foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), RegExMatch("[a-z]+"))), "one": Concept("one") } text = "one onyx" expected = [CNC("foo", source="one onyx", x=CC("one"))] self.validate_get_concepts_sequences(my_map, text, expected) def test_i_can_reuse_the_same_variable(self): # in this test, the variable appears several times, but only once in concept.compiled my_map = { "foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("equals"), VariableExpression("x"))), "one": Concept("one"), "two": Concept("two"), } sheerka, context, *updated = self.init_concepts(*my_map.values()) parser = BnfNodeParser() parser.init_from_concepts(context, updated) # same variable appears only once in the compiled variables text = "one equals one" expected = [CNC("foo", source="one equals one", x=CC("one"))] expected_sequence = compute_expected_array(my_map, text, expected) parser.reset_parser(context, ParserInput(text)) bnf_parsers_helpers = parser.get_concepts_sequences(context) to_compare = tests.parsers.parsers_utils.get_test_obj(expected_sequence, bnf_parsers_helpers[0].sequence) assert to_compare == expected def test_i_cannot_match_variable_when_variables_discrepancy(self): my_map = { "foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("equals"), VariableExpression("x"))), "one": Concept("one"), "one_1": Concept("one", body="1"), "two": Concept("two"), "two_2": Concept("two", body="2"), } sheerka, context, *updated = self.init_concepts(*my_map.values()) parser = BnfNodeParser() parser.init_from_concepts(context, updated) text = "one equals two" parser.reset_parser(context, ParserInput(text)) bnf_parsers_helpers = parser.get_concepts_sequences(context) assert bnf_parsers_helpers[0].sequence == [] @pytest.mark.parametrize("bar_expr, expected", [ (ConceptExpression("foo"), {}), (OrderedChoice(ConceptExpression("foo"), StrMatch("one")), {'one': ['1002']}), (Sequence(StrMatch("one"), ConceptExpression("foo"), StrMatch("two")), {'one': ['1001', '1002']}), ]) def test_i_can_detect_infinite_recursion(self, bar_expr, expected): my_map = { "foo": self.bnf_concept("foo", ConceptExpression("bar")), "bar": self.bnf_concept("bar", bar_expr), } sheerka, context, parser = self.init_parser(my_map, singleton=True) parser.context = context parser.sheerka = sheerka # every obvious cyclic recursion are removed from concept_by_first_keyword dict assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == expected # get_parsing_expression() also returns CHICKEN_AND_EGG parsing_expression = parser.get_parsing_expression(context, my_map["foo"]) assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG) assert sheerka.isinstance(parser.concepts_grammars.get(my_map["foo"].id), BuiltinConcepts.CHICKEN_AND_EGG) parsing_expression = parser.get_parsing_expression(context, my_map["bar"]) assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG) assert sheerka.isinstance(parser.concepts_grammars.get(my_map["bar"].id), BuiltinConcepts.CHICKEN_AND_EGG) def test_i_can_detect_longer_infinite_recursion(self): my_map = { "foo": self.bnf_concept("foo", ConceptExpression("bar")), "bar": self.bnf_concept("bar", ConceptExpression("baz")), "baz": self.bnf_concept("baz", ConceptExpression("qux")), "qux": self.bnf_concept("qux", ConceptExpression("foo")), } sheerka, context, parser = self.init_parser(my_map, singleton=True) parser.context = context parser.sheerka = sheerka # every obvious cyclic recursion are removed from concept_by_first_keyword dict assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {} parsing_expression = parser.get_parsing_expression(context, my_map["foo"]) assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG) assert sheerka.isinstance(parser.concepts_grammars.get(my_map["foo"].id), BuiltinConcepts.CHICKEN_AND_EGG) assert parser.concepts_grammars.get(my_map["foo"].id).body == ["1001", "1002", "1003", "1004", "1001"] assert sheerka.isinstance(parser.concepts_grammars.get(my_map["foo"].id), BuiltinConcepts.CHICKEN_AND_EGG) assert sheerka.isinstance(parser.concepts_grammars.get(my_map["bar"].id), BuiltinConcepts.CHICKEN_AND_EGG) assert sheerka.isinstance(parser.concepts_grammars.get(my_map["baz"].id), BuiltinConcepts.CHICKEN_AND_EGG) assert sheerka.isinstance(parser.concepts_grammars.get(my_map["qux"].id), BuiltinConcepts.CHICKEN_AND_EGG) def test_i_can_detect_partial_infinite_recursion(self): my_map = { "foo": self.bnf_concept("foo", ConceptExpression("bar")), "bar": self.bnf_concept("bar", ConceptExpression("baz")), "baz": self.bnf_concept("baz", ConceptExpression("qux")), "qux": self.bnf_concept("qux", ConceptExpression("baz")), } sheerka, context, parser = self.init_parser(my_map, singleton=True) parser.context = context parser.sheerka = sheerka # every obvious cyclic recursion are removed from concept_by_first_keyword dict assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {} parsing_expression = parser.get_parsing_expression(context, my_map["foo"]) assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG) assert sheerka.isinstance(parser.concepts_grammars.get(my_map["foo"].id), BuiltinConcepts.CHICKEN_AND_EGG) assert parser.concepts_grammars.get(my_map["foo"].id).body == ["1001", "1002", "1003", "1004", "1003"] assert sheerka.isinstance(parser.concepts_grammars.get(my_map["foo"].id), BuiltinConcepts.CHICKEN_AND_EGG) assert sheerka.isinstance(parser.concepts_grammars.get(my_map["bar"].id), BuiltinConcepts.CHICKEN_AND_EGG) assert sheerka.isinstance(parser.concepts_grammars.get(my_map["baz"].id), BuiltinConcepts.CHICKEN_AND_EGG) assert sheerka.isinstance(parser.concepts_grammars.get(my_map["qux"].id), BuiltinConcepts.CHICKEN_AND_EGG) @pytest.mark.parametrize("expr, expected", [ (OrderedChoice(StrMatch("bar"), ConceptExpression("foo")), False), (OrderedChoice(ConceptExpression("foo"), StrMatch("bar")), True), (OrderedChoice(Sequence(StrMatch("bar"), ConceptExpression("foo")), StrMatch("baz")), False), (OrderedChoice(Sequence(ConceptExpression("foo"), StrMatch("bar")), StrMatch("baz")), True) ]) def test_i_can_detect_ordered_choice_infinite_recursion(self, expr, expected): my_map = { "foo": self.bnf_concept("foo", expr), } sheerka, context, parser = self.init_parser(my_map) parser.context = context parser.sheerka = sheerka res = parser.get_parsing_expression(context, my_map["foo"]) assert sheerka.isinstance(res, BuiltinConcepts.CHICKEN_AND_EGG) == expected def test_i_can_get_parsing_expression_when_ending_by_concept_isa(self): my_map = { "one": Concept("one"), "twenty": Concept("twenty"), "number": Concept("number"), "twenties": self.bnf_concept("twenties", Sequence(ConceptExpression("twenty"), ConceptExpression("number"))) } sheerka, context, parser = self.init_parser(my_map) parser.context = context parser.sheerka = sheerka sheerka.set_isa(context, sheerka.new("one"), my_map["number"]) sheerka.set_isa(context, sheerka.new("twenty"), my_map["number"]) parser.concepts_grammars.clear() # make sure parsing expression is created from scratch parsing_expression = parser.get_parsing_expression(context, my_map["twenties"]) assert parsing_expression == Sequence( ConceptExpression(my_map["twenty"], rule_name="twenty"), ConceptExpression(my_map["number"], rule_name="number")) assert len(parsing_expression.nodes) == len(parsing_expression.elements) twenty_nodes = parsing_expression.nodes[0].nodes assert twenty_nodes == [StrMatch("twenty")] number_nodes = parsing_expression.nodes[1].nodes assert len(number_nodes) == 1 assert isinstance(number_nodes[0], UnOrderedChoice) assert len(number_nodes[0].nodes) == len(number_nodes[0].elements) assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes def test_i_can_get_parsing_expression_when_starting_by_isa_concept(self): my_map = { "one": Concept("one"), "two": Concept("two"), "number": Concept("number"), "hundreds": self.bnf_concept("hundreds", Sequence(ConceptExpression("number"), StrMatch("hundred"))) } sheerka, context, parser = self.init_parser(my_map, singleton=True) parser.context = context parser.sheerka = sheerka sheerka.set_isa(context, sheerka.new("one"), my_map["number"]) sheerka.set_isa(context, sheerka.new("two"), my_map["number"]) sheerka.set_isa(context, sheerka.new("hundreds"), my_map["number"]) parser.concepts_grammars.clear() # make sure parsing expression is created from scratch parsing_expression = parser.get_parsing_expression(context, my_map["hundreds"]) assert parsing_expression == Sequence( ConceptExpression(my_map["number"], rule_name="number"), StrMatch("hundred")) assert len(parsing_expression.nodes) == len(parsing_expression.elements) number_nodes = parsing_expression.nodes[0].nodes assert len(number_nodes) == 1 assert isinstance(number_nodes[0], UnOrderedChoice) assert len(number_nodes[0].nodes) == len(number_nodes[0].elements) assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes assert ConceptExpression(my_map["two"], rule_name="two") in number_nodes[0].nodes def test_i_can_get_parsing_expression_when_concept_is_part_of_a_group(self): my_map = { "one": Concept("one"), "twenty": Concept("twenty"), "number": Concept("number"), "twenties": self.bnf_concept("twenties", Sequence(ConceptExpression("twenty"), ConceptExpression("number"))) } sheerka, context, parser = self.init_parser(my_map, singleton=True) parser.context = context parser.sheerka = sheerka sheerka.set_isa(context, sheerka.new("one"), my_map["number"]) sheerka.set_isa(context, sheerka.new("twenty"), my_map["number"]) sheerka.set_isa(context, sheerka.new("twenties"), my_map["number"]) # <- twenties is also a number parser.concepts_grammars.clear() # make sure parsing expression is created from scratch parsing_expression = parser.get_parsing_expression(context, my_map["twenties"]) assert parsing_expression == Sequence( ConceptExpression(my_map["twenty"], rule_name="twenty"), ConceptExpression(my_map["number"], rule_name="number")) assert len(parsing_expression.nodes) == len(parsing_expression.elements) twenty_nodes = parsing_expression.nodes[0].nodes assert twenty_nodes == [StrMatch("twenty")] number_nodes = parsing_expression.nodes[1].nodes assert len(number_nodes) == 1 assert isinstance(number_nodes[0], UnOrderedChoice) assert len(number_nodes[0].nodes) == len(number_nodes[0].elements) assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes def test_i_can_get_parsing_expression_when_sequence_of_concepts(self): my_map = { "one": Concept("one"), "two_ones": self.bnf_concept("two_ones", Sequence(ConceptExpression("one"), ConceptExpression("one"))) } sheerka, context, parser = self.init_parser(my_map, singleton=True) parser.context = context parser.sheerka = sheerka parsing_expression = parser.get_parsing_expression(context, my_map["two_ones"]) assert parsing_expression == Sequence( ConceptExpression(my_map["one"], rule_name="one"), ConceptExpression(my_map["one"], rule_name="one")) @pytest.mark.parametrize("expr, text, expected", [ (ZeroOrMore(StrMatch("one"), sep=","), "one,", [CNC("foo", source="one"), UTN(",")]), (StrMatch("one"), "one two", [CNC("foo", source="one"), UTN(" two")]), (StrMatch("one"), "two one", [UTN("two "), CNC("foo", source="one")]), ]) def test_i_can_recognize_unknown_concepts(self, expr, text, expected): my_map = { "foo": self.bnf_concept("foo", expr) } self.validate_get_concepts_sequences(my_map, text, expected) def test_i_can_recognize_unknown_when_they_look_like_known(self): my_map = { "one two": self.bnf_concept("one two", Sequence("one", "two")), "three": self.bnf_concept("three") } text = "one three" expected = [UTN("one "), CNC("three", source="three")] self.validate_get_concepts_sequences(my_map, text, expected) def test_i_can_remove_duplicates(self): my_map = { "one two": self.bnf_concept("one two", Sequence("one", "two")), "one four": self.bnf_concept("one four", Sequence("one", "four")), "three": self.bnf_concept("three") } sheerka, context, parser = self.init_parser(my_map, singleton=True) parser.reset_parser(context, ParserInput("one three")) sequences = parser.get_concepts_sequences(context) sequence = parser.get_valid(sequences) assert len(sequence) == 1 @pytest.mark.parametrize("parser_input, expected_status, expected", [ ("baz", True, [CNC("bnf baz", source="baz")]), # the bnf one is chosen ("foo bar", True, [CNC("foo then bar", source="foo bar", foo="foo", bar="bar")]), ("bar", True, [CNC("foo or bar", source="bar", bar="bar", body="bar")]), ("one plus two", True, [CNC("plus", source="one plus two", one="one", two="two")]), ("twenty one", True, [CNC("t1", source="twenty one", unit="one")]), ("one 'car'", True, [CNC("one thing", source="one 'car'", x=python_ret_val("'car'"), one="one")]) ]) def test_i_can_parse_simple_expressions(self, parser_input, expected_status, expected): sheerka, context, parser = self.init_parser(init_from_sheerka=True) res = parser.parse(context, ParserInput(parser_input)) expected_array = compute_expected_array(cmap, parser_input, expected) parser_result = res.value concepts_nodes = res.value.value assert res.status == expected_status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) assert concepts_nodes == expected_array def test_i_can_parse_when_multiple_times_the_same_variable(self): sheerka, context, parser = self.init_parser(init_from_sheerka=True) text = "foo foo foo" expected_array = compute_expected_array(cmap, text, [CNC("one or more foo", source=text)]) expected_array[0].compiled["foo"] = [cmap["foo"], cmap["foo"], cmap["foo"]] res = parser.parse(context, ParserInput(text)) parser_result = res.value concepts_nodes = res.value.value assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) assert concepts_nodes == expected_array def test_i_can_test_when_expression_references_other_expressions(self): sheerka, context, parser = self.init_parser(init_from_sheerka=True) text = "twenty four" expected = CNC("t2", source=text, unit=CC("three_four", source="four", four=CC("four", body=DoNotResolve("four")), body=CC("four", body=DoNotResolve("four")))) expected_array = compute_expected_array(cmap, text, [expected]) res = parser.parse(context, ParserInput(text)) parser_result = res.value concepts_nodes = res.value.value assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) assert concepts_nodes == expected_array def test_i_can_parse_bnf_concept_mixed_with_isa_concepts(self): sheerka, context, parser = self.init_parser(init_from_sheerka=True) # thirties is defined in the global variable cmap as # thirties = cls.update_bnf(context, Concept("thirties", # definition="thirty number", # where="number < 10", # body="thirty + number").def_var("thirty").def_var("number")) text = "thirty one" expected = CNC("thirties", source=text, number=CC("number", source="one", one=CC("one", body=DoNotResolve("one")), body=CC("one", body=DoNotResolve("one"))), thirty="thirty") expected_array = compute_expected_array(cmap, text, [expected]) res = parser.parse(context, ParserInput(text)) parser_result = res.value concepts_nodes = res.value.value assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) assert concepts_nodes == expected_array def test_i_can_parse_bnf_concept_mixed_with_isa_concepts_2(self): # this time, three is a number, and also part of three_four, even if it is not relevant in t3 sheerka, context, parser = self.init_parser(init_from_sheerka=True) text = "thirty three" expected = CNC("thirties", source=text, number=CC("number", source="three", three=CC("three", body=DoNotResolve("three")), body=CC("three", body=DoNotResolve("three"))), thirty="thirty") expected_array = compute_expected_array(cmap, text, [expected]) res = parser.parse(context, ParserInput(text)) parser_result = res.value concepts_nodes = res.value.value assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) assert concepts_nodes == expected_array def test_i_can_parse_when_starting_by_isa_concept(self): """ Test of simple number + 'thousand' :return: """ sheerka, context, parser = self.init_parser(init_from_sheerka=True) sheerka.clear_bnf_definition() # to simulate restart text = "one thousand" one = CC("one", body=DoNotResolve("one")) expected = CNC("thousands", source=text, number=CC("number", source="one", one=one, body=one)) expected_array = compute_expected_array(cmap, text, [expected]) res = parser.parse(context, ParserInput(text)) parser_result = res.value concepts_nodes = res.value.value assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) assert concepts_nodes == expected_array def test_i_can_parse_fifty_one_thousand(self): """ Test of complex number + 'thousand' (complex because the number is a BNF concept) :return: """ sheerka, context, parser = self.init_parser(init_from_sheerka=True) sheerka.clear_bnf_definition() # to simulate restart text = "fifty one thousand" one = CC("one", body=DoNotResolve("one")) fifty_one = CC("fifties", source="fifty one", fifty="fifty", number=CC("number", source="one", body=one, one=one)) one_thousand = CC("thousands", source="one thousand", number=CC("number", source="one", body=one, one=one)) expected_thousand = CNC("thousands", source=text, number=CC("number", source="fifty one", fifties=fifty_one, body=fifty_one)) expected_fifties = CNC("fifties", source=text, fifty="fifty", number=CC("number", source="one thousand", thousands=one_thousand, body=one_thousand)) expected_thousands = compute_expected_array(cmap, text, [expected_thousand]) expected_fifties = compute_expected_array(cmap, text, [expected_fifties]) res = parser.parse(context, ParserInput(text)) assert res[0].status assert res[0].value.value == expected_thousands assert res[1].status assert res[1].value.value == expected_fifties def test_i_can_parse_one_hundred_thousand(self): sheerka, context, parser = self.init_parser(init_from_sheerka=True) sheerka.clear_bnf_definition() # to simulate restart text = "one hundred thousand" res = parser.parse(context, ParserInput(text)) parser_result = res.value assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) def test_i_can_parse_hundreds_like_expression(self): sheerka, context, parser = self.init_parser(init_from_sheerka=True) sheerka.clear_bnf_definition() # to simulate restart text = "three hundred and thirty two" three = CC("three", body=DoNotResolve("three")) two = CC("two", body=DoNotResolve("two")) thirty_two = CC("thirties", source="thirty two", thirty="thirty", number=CC("number", source="two", body=two, two=two)) expected = CNC("hundreds", source=text, n1=CC("number", source="three", body=three, three=three), n2=CC("number", source="thirty two", body=thirty_two, thirties=thirty_two)) expected_array = compute_expected_array(cmap, text, [expected]) res = parser.parse(context, ParserInput(text)) parser_result = res.value concepts_nodes = res.value.value assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) assert concepts_nodes == expected_array def test_i_can_parse_bnf_concept_mixed_with_isa_after_restart(self): sheerka, context, parser = self.init_parser(init_from_sheerka=True) sheerka.clear_bnf_definition() # to simulate restart for c in cmap.values(): sheerka.get_by_id(c.id).set_bnf(None) text = "thirty three" expected = CNC("thirties", source=text, number=CC("number", source="three", three=CC("three", body=DoNotResolve("three")), body=CC("three", body=DoNotResolve("three"))), thirty="thirty") expected_array = compute_expected_array(cmap, text, [expected]) res = parser.parse(context, ParserInput(text)) parser_result = res.value concepts_nodes = res.value.value assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) assert concepts_nodes == expected_array text = "forty one" expected = CNC("forties", source=text, number=CC("number", source="one", one=CC("one", body=DoNotResolve("one")), body=CC("one", body=DoNotResolve("one"))), forty="forty") expected_array = compute_expected_array(cmap, text, [expected]) res = parser.parse(context, ParserInput(text)) parser_result = res.value concepts_nodes = res.value.value assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) assert concepts_nodes == expected_array def test_i_can_parse_when_keyword(self): sheerka, context, parser = self.init_parser(init_from_sheerka=True) parser_input = "def one" expected = [CNC("def number", source="def one", number="one")] res = parser.parse(context, ParserInput(parser_input)) expected_array = compute_expected_array(cmap, parser_input, expected) expected_array[0].compiled["def"] = cmap["def_only"] parser_result = res.value concepts_nodes = res.value.value assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) assert concepts_nodes == expected_array def test_i_can_parse_filter(self): sheerka, context, parser = self.init_parser(init_from_sheerka=True) expression = "--filter one" expected = [CN("filter", source="--filter one")] res = parser.parse(context, ParserInput(expression)) expected_array = compute_expected_array(cmap, expression, expected) parser_result = res.value concepts_nodes = res.value.value assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) assert concepts_nodes == expected_array def test_i_can_parse_descent_grammar(self): my_map = { "factor": Concept("factor", definition="1 | 2 | 3"), "term": Concept("term", definition="factor ('*' factor)*"), "expr": Concept("expr", definition="term ('+' term)*"), } sheerka, context, parser = self.init_parser(my_map) text = "1 + 2 * 3" res = parser.parse(context, ParserInput(text)) parser_result = res.value concepts_nodes = res.value.value factor = my_map["factor"] term = my_map["term"] expr = my_map["expr"] assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) assert concepts_nodes == [CNC(expr, term=[CC(term, body=CC(factor, body=DoNotResolve("1")), factor=CC(factor, body=DoNotResolve("1"))), CC(term, body=DoNotResolve("2 * 3"), factor=[ CC(factor, body=DoNotResolve("2")), CC(factor, body=DoNotResolve("3")), ])], body=DoNotResolve("1 + 2 * 3"))] def test_i_can_parse_recursive_descent_grammar(self): my_map = { "factor": Concept("factor", definition="1 | 2 | 3"), "term": self.bnf_concept("term", OrderedChoice( Sequence(ConceptExpression("factor"), StrMatch("*"), ConceptExpression("term")), ConceptExpression("factor"))), "expr": self.bnf_concept("expr", OrderedChoice( Sequence(ConceptExpression("term"), StrMatch("+"), ConceptExpression("expr")), ConceptExpression("term"))), } sheerka, context, parser = self.init_parser(my_map) text = "1 + 2 * 3" res = parser.parse(context, ParserInput(text)) parser_result = res.value concepts_nodes = res.value.value factor = my_map["factor"] term = my_map["term"] expr = my_map["expr"] # concepts_nodes = res.value.value is too complicated to be validated assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) assert concepts_nodes == [CNC(expr, term=CC(term, body=CC(factor, body=DoNotResolve("1")), factor=CC(factor, body=DoNotResolve("1"))), expr=CC(expr, body=CC(term, body=DoNotResolve("2 * 3"), factor=CC(factor, body=DoNotResolve("2")), term=CC(term, body=CC(factor, body=DoNotResolve("3")), factor=CC(factor, body=DoNotResolve("3")))), term=CC(term, body=DoNotResolve("2 * 3"), factor=CC(factor, body=DoNotResolve("2")), term=CC(term, body=CC(factor, body=DoNotResolve("3")), factor=CC(factor, body=DoNotResolve("3"))))), body=DoNotResolve("1 + 2 * 3"))] def test_i_can_parse_simple_recursive_grammar(self): my_map = { "foo": self.bnf_concept("foo", Sequence(StrMatch("foo"), OrderedChoice(StrMatch("bar"), ConceptExpression("foo")))), } sheerka, context, parser = self.init_parser(my_map) assert parser.parse(context, ParserInput("foo bar")).status assert parser.parse(context, ParserInput("foo foo foo bar")).status assert not parser.parse(context, ParserInput("foo baz")).status @pytest.mark.parametrize("name, expected", [ (None, []), ("", []), ("foo", StrMatch("foo")), ("foo bar", Sequence(StrMatch("foo"), StrMatch("bar"))), ("'foo bar baz' qux", Sequence(StrMatch("foo", skip_whitespace=False), StrMatch(" ", skip_whitespace=False), StrMatch("bar", skip_whitespace=False), StrMatch(" ", skip_whitespace=False), StrMatch("baz"), StrMatch("qux"))), ]) def test_i_can_get_expression_from_concept_name(self, name, expected): assert BnfNodeParser.get_expression_from_concept_name(name) == expected def test_i_can_parse_when_multiple_layers(self): sheerka, context, parser = self.init_parser(init_from_sheerka=True) # sanity text = "thirty one" res = parser.parse(context, ParserInput(text)) assert res.status assert res.value.value == compute_expected_array(cmap, text, [CN("thirties", source=text)]) # add a layer, I still can parse the text sheerka.push_ontology(context, "new layer") parser = BnfNodeParser(sheerka=sheerka) res = parser.parse(context, ParserInput(text)) assert res.status assert res.value.value == compute_expected_array(cmap, text, [CN("thirties", source=text)]) def test_i_do_not_eat_unwanted_tokens_at_the_beginning_when_concept_with_variable(self): my_map = { "foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe"))), "one": Concept("one"), "two": Concept("two"), } sheerka, context, parser = self.init_parser(my_map) text = "two one shoe" res = parser.parse(context, ParserInput(text)) assert res.status assert res.value.value == compute_expected_array(my_map, text, [ CN("two"), CNC("foo", source="one shoe", x=CC("one"))]) def test_i_do_not_eat_unwanted_tokens_at_the_end_when_concept_with_variable(self): my_map = { "foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"))), "bar": Concept("bar"), "baz": Concept("baz"), } sheerka, context, parser = self.init_parser(my_map) text = "one bar baz" res = parser.parse(context, ParserInput(text)) assert res.status assert res.value.value == compute_expected_array(my_map, text, [ CNC("foo", source="one bar", x=CC("bar")), CN("baz")]) @pytest.mark.parametrize("parsing_expression, expected", [ (RegExMatch("a"), [RegExDef("a")]), (OrderedChoice(StrMatch("first"), RegExMatch("a|b")), ["first", RegExDef("a|b")]), (OrderedChoice(RegExMatch("a|b"), StrMatch("first")), [RegExDef("a|b"), "first"]), (Sequence(StrMatch("a"), RegExMatch("a|b")), ["a"]), (Sequence(RegExMatch("a|b"), StrMatch("a")), [RegExDef("a|b")]), (OneOrMore(StrMatch("a"), RegExMatch("a|b")), ["a"]), (OneOrMore(RegExMatch("a|b"), StrMatch("a")), [RegExDef("a|b")]), (ZeroOrMore(StrMatch("a"), RegExMatch("a|b")), ["a"]), (ZeroOrMore(RegExMatch("a|b"), StrMatch("a")), [RegExDef("a|b")]), ]) def test_i_can_get_first_item(self, parsing_expression, expected): sheerka = self.get_sheerka() visitor = BnfNodeFirstTokenVisitor(sheerka) visitor.visit(parsing_expression) assert visitor.first_tokens == expected def test_i_cannot_parse_regex_when_no_next_matching_token_cannot_be_found(self): sheerka, context, foo = self.init_test().with_concepts(Concept("foo", definition="r'abcd'"), create_new=True).unpack() parser = BnfNodeParser(sheerka=sheerka) res = parser.parse(context, ParserInput("abcdef")) assert not res.status assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) assert res.body.reason == [NoMatchingTokenError(4)] @pytest.mark.parametrize("text", [ "one", " one", "one ", " one " ]) def test_i_cannot_parse_empty_variable(self, text): sheerka, context, parser = self.init_parser(init_from_sheerka=True) res = parser.parse(context, ParserInput("one")) assert not res.status assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) @pytest.mark.parametrize("bnf, text", [ (Sequence(VariableExpression("x"), StrMatch("foo")), "one foo"), (Sequence(StrMatch("foo"), VariableExpression("x")), "foo one"), (Sequence(StrMatch("foo"), VariableExpression("x"), StrMatch("bar")), "foo one bar"), ]) def test_i_cannot_parse_variable_when_unrecognized_nodes(self, bnf, text): sheerka, context, foo = self.init_test().with_concepts( self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe"))) ).unpack() parser = BnfNodeParser() parser.init_from_concepts(context, [foo]) res = parser.parse(context, ParserInput(text)) assert not res.status assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) @pytest.mark.parametrize("to_match, ignore_case, multiline, explicit_flags", [ ("xxy", None, None, re.MULTILINE), ("xxy", True, True, re.MULTILINE), ("xxy", False, False, re.MULTILINE), ]) def test_i_can_serialize_reg_ex_def(self, to_match, ignore_case, multiline, explicit_flags): r = RegExDef(to_match, ignore_case, multiline, explicit_flags) serialized = r.serialize() r2 = RegExDef().deserialize(serialized) assert r == r2 def test_i_can_resolve_parsing_expression_for_variable_concept(self): sheerka, context, parser = self.init_parser(init_from_sheerka=True) expression = Sequence(VariableExpression("x"), StrMatch("x")) resolved = parser.resolve_parsing_expression(context, expression, {}, set(), set()) assert isinstance(resolved.nodes[0], VariableExpression) assert resolved.nodes[0].nodes[0] == resolved.nodes[1] def test_i_can_resolve_parsing_expression_when_ending_with_variable_concept(self): sheerka, context, parser = self.init_parser(init_from_sheerka=True) expression = Sequence(StrMatch("x"), VariableExpression("x")) resolved = parser.resolve_parsing_expression(context, expression, {}, set(), set()) assert isinstance(resolved.nodes[1], VariableExpression) assert resolved.nodes[0].nodes == [] # @pytest.mark.parametrize("parser_input, expected", [ # ("one", [ # (True, [CNC("bnf_one", source="one", one="one", body="one")]), # (True, [CNC("one_or_two", source="one", one="one", body="one")]), # ]), # ("two plus two", [ # (False, [CN("bnf_one"), UTN(" plus "), CN("one_or_two")]), # (False, [CN("one_or_two"), UTN(" plus "), CN("one_or_two")]), # ]) # ]) # def test_i_can_parse_when_multiple_results(self, parser_input, expected): # sheerka, context, parser = self.init_parser(init_from_sheerka=True) # # res = parser.parse(context, parser_input) # assert len(res) == len(expected) # # for res_i, expected_i in zip(res, expected): # assert res_i.status == expected_i[0] # expected_array = compute_expected_array(cmap, parser_input, expected_i[1]) # assert res_i.value.value == expected_array