import re import pytest import tests.parsers.parsers_utils from core.builtin_concepts import BuiltinConcepts from core.concept import Concept, ConceptParts, DoNotResolve, DEFINITION_TYPE_BNF from core.global_symbols import NotInit from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager from core.sheerka.services.SheerkaExecute import ParserInput from parsers.BaseNodeParser import NoMatchingTokenError from parsers.BnfDefinitionParser import BnfDefinitionParser from parsers.BnfNodeParser import StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \ Optional, ZeroOrMore, OneOrMore, ConceptExpression, UnOrderedChoice, BnfNodeParser, RegExMatch, \ BnfNodeFirstTokenVisitor, Match, RegExDef, VariableExpression from tests.BaseTest import BaseTest from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka from tests.evaluators.EvaluatorTestsUtils import python_ret_val from tests.parsers.parsers_utils import CNC, CN, UTN, CC, SCN, get_test_obj, compare_with_test_object cmap = { "one": Concept("one"), "two": Concept("two"), "three": Concept("three"), "four": Concept("four"), "thirty": Concept("thirty", body="30"), "forty": Concept("forty", body="40"), "fifty": Concept("fifty", body="50"), "number": Concept("number"), "foo": Concept("foo"), "bar": Concept("bar"), "baz": Concept("baz"), "one hundred": Concept("one hundred", body="100"), "one_hundred": Concept("'one hundred'", body="100"), "hundreds": Concept("hundreds", definition="number=n1 'hundred' 'and' number=n2", where="n1 < 10 and n2 < 100", body="n1 * 100 + n2").def_var("n1").def_var("n2"), "bnf baz": Concept("bnf baz", definition="'baz'"), # this one should be chosen "plus": Concept("plus", definition="one 'plus' two").def_var("a").def_var("b"), 'foo then bar': Concept("foo then bar", definition="foo bar").def_var("foo").def_var("bar"), 'foo or bar': Concept("foo or bar", definition="foo | bar").def_var("foo").def_var("bar"), 'one or more foo': Concept("one or more foo", definition="foo+").def_var("foo"), "t1": Concept("t1", definition="'twenty' (one|two)=unit").def_var("unit").def_var("one").def_var("two"), "three_four": Concept("three_four", definition="three | four").def_var("three").def_var("four"), "t2": Concept("t2", definition="'twenty' three_four=unit").def_var("unit").def_var("three").def_var("four"), # bnf with variable "one thing": Concept("one x", definition="one x").def_var("x"), "x shoe": Concept("x shoe", definition="x 'shoe'").def_var("x"), # testing keywords "def_only": Concept("def"), "def number": Concept("def number", definition="def (one|two)=number"), # sequence of keywords using bnf definition # "def_concept_bnf": Concept("def_concept_bnf", definition="'def' 'concept'"), # "def concept_bnf number": Concept("def number", definition="def_concept_bnf (one|two)=number"), # sequence of keywords using def definition # "def_concept_def": Concept("def_concept_def", definition="def concept", definition_type=DEFINITION_TYPE_DEF), # "def concept_def number": Concept("def number", definition="def_concept_def (one|two|three)=number"), "filter": Concept("filter", definition="'--filter' (one | two)") } def u(parsing_expression, start, end, children=None): """ u stands for underlying :param parsing_expression: :param start: :param end: :param children: :return: """ if isinstance(parsing_expression, str): parsing_expression = StrMatch(parsing_expression) if isinstance(parsing_expression, Match): return TerminalNode(parsing_expression, start, end, parsing_expression.to_match, parsing_expression.to_match) return NonTerminalNode(parsing_expression, start, end, [], children) def compute_expected_array(my_concepts_map, expression, expected, exclude_body=False): return tests.parsers.parsers_utils.compute_expected_array( my_concepts_map, expression, expected, init_empty_body=True, exclude_body=exclude_body) class TestBnfNodeParser(TestUsingMemoryBasedSheerka): shared_ontology = None @classmethod def setup_class(cls): init_test_helper = cls().init_test(cache_only=False, ontology="#TestBnfNodeParser#") sheerka, context, *updated = init_test_helper.with_concepts(*cmap.values(), create_new=True).unpack() for i, concept_name in enumerate(cmap): cmap[concept_name] = updated[i] # end of initialisation sheerka = TestBnfNodeParser.sheerka sheerka.set_isa(context, cmap["one"], cmap["number"]) sheerka.set_isa(context, cmap["two"], cmap["number"]) sheerka.set_isa(context, cmap["three"], cmap["number"]) sheerka.set_isa(context, cmap["four"], cmap["number"]) sheerka.set_isa(context, cmap["thirty"], cmap["number"]) sheerka.set_isa(context, cmap["forty"], cmap["number"]) sheerka.set_isa(context, cmap["fifty"], cmap["number"]) sheerka.set_isa(context, cmap["one hundred"], cmap["number"]) sheerka.set_isa(context, cmap["hundreds"], cmap["number"]) # Pay attention. 'twenties (t1 and t2) are not set as 'number' thirties = cls.update_bnf(context, Concept("thirties", definition="thirty number", where="number < 10", body="thirty + number").def_var("thirty").def_var("number")) cmap["thirties"] = sheerka.create_new_concept(context, thirties).body.body sheerka.set_isa(context, sheerka.new("thirties"), sheerka.new("number")) forties = cls.update_bnf(context, Concept("forties", definition="forty number", where="number < 10", body="forty + number").def_var("forty").def_var("number")) cmap["forties"] = sheerka.create_new_concept(context, forties).body.body sheerka.set_isa(context, sheerka.new("forties"), sheerka.new("number")) fifties = cls.update_bnf(context, Concept("fifties", definition="fifty number", where="number < 10", body="fifty + number").def_var("fifty").def_var("number")) cmap["fifties"] = sheerka.create_new_concept(context, fifties).body.body sheerka.set_isa(context, sheerka.new("fifties"), sheerka.new("number")) thousands = cls.update_bnf(context, Concept("thousands", definition="number 'thousand'", where="number < 999", body="number * 1000").def_var("number")) cmap["thousands"] = sheerka.create_new_concept(context, thousands).body.body sheerka.set_isa(context, sheerka.new("thousands"), sheerka.new("number")) cls.shared_ontology = sheerka.get_ontology(context) sheerka.pop_ontology(context) @staticmethod def update_bnf(context, concept): bnf_parser = BnfDefinitionParser() res = bnf_parser.parse(context, concept.get_metadata().definition) if res.status: concept.set_bnf(res.value.value) concept.get_metadata().definition_type = DEFINITION_TYPE_BNF else: raise Exception(res) return concept def init_parser(self, my_concepts_map=None, init_from_sheerka=False, **kwargs): if my_concepts_map is None: sheerka, context = self.init_test().unpack() sheerka.add_ontology(context, self.shared_ontology) else: sheerka, context, *updated = self.init_test().with_concepts(*my_concepts_map.values(), **kwargs).unpack() for i, pair in enumerate(my_concepts_map): my_concepts_map[pair] = updated[i] if init_from_sheerka: parser = BnfNodeParser(sheerka=sheerka) else: parser = BnfNodeParser().init_from_concepts(context, my_concepts_map.values()) return sheerka, context, parser def validate_get_concepts_sequences(self, my_map, text, expected, multiple_result=False, post_init_concepts=None): sheerka, context, *updated = self.init_test().with_concepts(*my_map.values(), create_new=False).unpack() sequences = self.exec_get_concepts_sequences(context, my_map, text, expected, multiple_result, post_init_concepts, *updated) return sequences @staticmethod def exec_get_concepts_sequences(context, my_map, text, expected, multiple_result=False, post_init_concepts=None, *concepts): sheerka = context.sheerka if not multiple_result: expected_array = [compute_expected_array(my_map, text, expected)] else: expected_array = [compute_expected_array(my_map, text, e) for e in expected] if post_init_concepts: post_init_concepts(sheerka, context) parser = BnfNodeParser() parser.init_from_concepts(context, concepts) parser.reset_parser(context, ParserInput(text)) bnf_parsers_helpers = parser.get_concepts_sequences(context) assert len(bnf_parsers_helpers) == len(expected_array) for parser_helper, expected_sequence in zip(bnf_parsers_helpers, expected_array): to_compare = tests.parsers.parsers_utils.get_test_obj(parser_helper.sequence, expected_sequence) # assert parser_helper.sequence == expected_sequence assert to_compare == expected_sequence if len(bnf_parsers_helpers) == 1: return bnf_parsers_helpers[0].sequence else: return [pe.sequence for pe in bnf_parsers_helpers] def test_i_cannot_parse_empty_strings(self): sheerka, context, parser = self.init_parser({}, singleton=True) res = parser.parse(context, ParserInput("")) assert not res.status assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) assert res.body.reason == BuiltinConcepts.IS_EMPTY @pytest.mark.parametrize("expr, text", [ (StrMatch("foo"), "foo"), (StrMatch("'foo'"), "'foo'"), (StrMatch("1"), "1"), (StrMatch("3.14"), "3.14"), (StrMatch("+"), "+"), ]) def test_i_can_match_str_bnf(self, expr, text): my_map = { text: self.bnf_concept("foo", expr) } sequence = self.validate_get_concepts_sequences(my_map, text, [text]) assert sequence[0].underlying == u(expr, 0, 0) @pytest.mark.parametrize("expr, text, end", [ (RegExMatch("bar"), "bar", 0), (RegExMatch("[a-z]+"), "xyz", 0), (RegExMatch("[a-z=]+"), "uvt=xyz=abc", 4), ]) def test_i_can_match_regex_bnf(self, expr, text, end): my_map = { text: self.bnf_concept("foo", expr) } sequence = self.validate_get_concepts_sequences(my_map, text, [text]) assert sequence[0].underlying.start == 0 assert sequence[0].underlying.end == end assert sequence[0].underlying.parsing_expression == expr @pytest.mark.parametrize("expr, text, end", [ (Sequence(StrMatch("foo"), RegExMatch("bar")), "foo bar", 2), (Sequence(StrMatch("foo"), RegExMatch("[a-z]+")), "foo xyz", 2), (Sequence(StrMatch("foo"), RegExMatch("[a-z=]+")), "foo uvt=xyz=abc", 6), ]) def test_i_can_match_sequence_str_regex(self, expr, text, end): my_map = { text: self.bnf_concept("foo", expr) } sequence = self.validate_get_concepts_sequences(my_map, text, [text]) assert sequence[0].underlying == u(expr, 0, end, sequence[0].underlying.children) @pytest.mark.parametrize("expr, text, end", [ (Sequence(RegExMatch("bar"), StrMatch("foo")), "bar foo", 2), (Sequence(RegExMatch("[a-z]+"), StrMatch("foo")), "xyz foo", 2), (Sequence(RegExMatch("[a-z=]+"), StrMatch("foo")), "uvt=xyz=abc foo", 6), ]) def test_i_can_match_sequence_regex_str(self, expr, text, end): my_map = { text: self.bnf_concept("foo", expr) } sequence = self.validate_get_concepts_sequences(my_map, text, [text]) assert sequence[0].underlying == u(expr, 0, end, sequence[0].underlying.children) def test_i_can_match_sequence_str_regex_str(self): text = "foo uvt=xyz=abc baz" expr = Sequence(StrMatch("foo"), RegExMatch("[a-z=]+"), StrMatch("baz")) my_map = { text: self.bnf_concept("foo", expr) } sequence = self.validate_get_concepts_sequences(my_map, text, [text]) assert sequence[0].underlying == u(expr, 0, 8, sequence[0].underlying.children) def test_i_can_match_multiple_concepts_in_one_input(self): my_map = { "one": self.bnf_concept("one"), "two": self.bnf_concept("two"), } text = "one two one" expected = ["one", "two", ("one", 1)] self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("one two three", [CNC("foo", "one two three")]), ("one two", []), ("one two four", []), ]) def test_i_can_match_sequence(self, text, expected): my_map = { "foo": self.bnf_concept("foo", Sequence("one", "two", "three")), } self.validate_get_concepts_sequences(my_map, text, expected) def test_i_always_choose_the_longest_match(self): my_map = { "foo": self.bnf_concept("foo", Sequence("one", "two", "three")), "bar": self.bnf_concept("bar", Sequence("one", "two")), } text = "one two three" expected = [CNC("foo", source=text)] self.validate_get_concepts_sequences(my_map, text, expected) def test_i_can_use_skip_whitespace_when_mixing_sequence_and_strmatch(self): # to match '--filter' in one word my_map = { "filter": self.bnf_concept("filter", Sequence(StrMatch("-", skip_whitespace=False), StrMatch("-", skip_whitespace=False), "filter")), } sheerka, context, *updated = self.init_concepts(*my_map.values(), create_new=False, singleton=True) parser = BnfNodeParser() parser.init_from_concepts(context, updated) text = "--filter" expected = [CN("filter", source="--filter")] expected_array = compute_expected_array(my_map, text, expected) parser.reset_parser(context, ParserInput(text)) bnf_parsers_helpers = parser.get_concepts_sequences(context) transformed = get_test_obj(bnf_parsers_helpers[0].sequence, expected_array) assert transformed == expected_array assert not bnf_parsers_helpers[0].has_unrecognized # but I cannot parse text = "- - filter" parser.reset_parser(context, ParserInput(text)) bnf_parsers_helpers = parser.get_concepts_sequences(context) assert bnf_parsers_helpers[0].has_unrecognized def test_i_can_match_multiple_sequences(self): my_map = { "foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"), StrMatch("three"))), "bar": self.bnf_concept("bar", Sequence(StrMatch("one"), StrMatch("two"))), } text = "one two three one two" expected = [ CNC("foo", "one two three"), CNC("bar", "one two", 6, 8)] self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("one", [CNC("foo", source="one")]), ("two", [CNC("foo", source="two")]), ("three", []), ]) def test_i_can_match_ordered_choice(self, text, expected): my_map = { "foo": self.bnf_concept("foo", OrderedChoice(StrMatch("one"), StrMatch("two"))) } self.validate_get_concepts_sequences(my_map, text, expected) def test_i_do_not_match_ordered_choice_with_empty_alternative(self): my_map = { "foo": self.bnf_concept("foo", OrderedChoice(StrMatch("one"), StrMatch(""))) } text = "" expected = [] self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("concept_three, expected", [ (Concept("three"), []), (BaseTest.bnf_concept("three", StrMatch("three")), [UTN('twenty '), "three"]) ]) def test_i_can_manage_sequence_with_wrong_order_choice(self, concept_three, expected): my_map = { "foo": self.bnf_concept("foo", Sequence( StrMatch("twenty"), OrderedChoice(StrMatch("one"), StrMatch("two")))), "three": concept_three} text = "twenty three" self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("ok thirty one", [CNC("foo", "ok thirty one")]), ("ok twenty one", [CNC("foo", "ok twenty one")]), ("ok one", []), ]) def test_i_can_mix_sequence_and_ordered(self, text, expected): my_map = { "foo": self.bnf_concept("foo", Sequence( StrMatch("ok"), OrderedChoice(StrMatch("twenty"), StrMatch("thirty")), StrMatch("one")) )} self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("twenty one", [CNC("foo", "twenty one")]), ("twenty three", []), # three does not exist ("twenty four", []), # four exists but should not be seen ]) def test_i_can_mix_sequence_and_ordered_2(self, text, expected): my_map = { "foo": self.bnf_concept("foo", Sequence( StrMatch("twenty"), OrderedChoice(StrMatch("one"), StrMatch("two")))), "four": Concept("four")} self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("twenty thirty", [CNC("foo", "twenty thirty")]), ("one", [CNC("foo", "one")]), ]) def test_i_can_mix_ordered_choices_and_sequences(self, text, expected): my_map = { "foo": self.bnf_concept("foo", OrderedChoice( Sequence(StrMatch("twenty"), StrMatch("thirty")), StrMatch("one")))} self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("one", [CNC("foo", "one")]), ("one two", [CNC("foo", "one two")]), ("three", []), ]) def test_i_can_match_unordered_choice(self, text, expected): my_map = { "foo": self.bnf_concept("foo", UnOrderedChoice( StrMatch("one"), Sequence(StrMatch("one"), StrMatch("two")))), } self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("one", [CNC("foo", "one")]), ("", []), ("two", []), ]) def test_i_can_match_optional(self, text, expected): my_map = { "foo": self.bnf_concept("foo", Optional(StrMatch("one"))) } self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("twenty one", [CNC("foo", "twenty one")]), ("one", [CNC("foo", "one")]), ]) def test_i_can_match_sequence_starting_with_optional(self, text, expected): my_map = { "foo": self.bnf_concept("foo", Sequence( Optional(StrMatch("twenty")), StrMatch("one"))) } self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("one two three", [CNC("foo", "one two three")]), ("one two", [CNC("foo", "one two")]), ]) def test_i_can_match_sequence_ending_with_optional(self, text, expected): my_map = { "foo": self.bnf_concept("foo", Sequence( StrMatch("one"), StrMatch("two"), Optional(StrMatch("three")))) } self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("one two three", [CNC("foo", "one two three")]), ("one three", [CNC("foo", "one three")]), ]) def test_i_can_match_sequence_with_optional_in_between(self, text, expected): my_map = { "foo": self.bnf_concept("foo", Sequence( StrMatch("one"), Optional(StrMatch("two")), StrMatch("three"))) } self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("", []), ("two", []), ("one", [CNC("foo", "one")]), ("one one", [CNC("foo", "one one")]), ]) def test_i_can_match_zero_or_more(self, text, expected): my_map = { "foo": self.bnf_concept("foo", ZeroOrMore(StrMatch("one"))) } self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("two", [CNC("foo", "two")]), ("one two", [CNC("foo", "one two")]), ("one one two", [CNC("foo", "one one two")]), ]) def test_i_can_match_sequence_and_zero_or_more(self, text, expected): my_map = { "foo": self.bnf_concept("foo", Sequence( ZeroOrMore(StrMatch("one")), StrMatch("two") )) } self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("one, one , one", [CNC("foo", "one, one , one")]), ]) def test_i_can_match_zero_or_more_with_separator(self, text, expected): my_map = { "foo": self.bnf_concept("foo", ZeroOrMore(StrMatch("one"), sep=",")) } self.validate_get_concepts_sequences(my_map, text, expected) def test_that_zero_or_more_is_greedy(self): my_map = { "foo": self.bnf_concept("foo", ZeroOrMore(StrMatch("one"))), "bar": self.bnf_concept("foo", StrMatch("one")) } text = "one one one" expected = [CNC("foo", text)] self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("", []), ("two", []), ("one", [CNC("foo", "one")]), ("one one one", [CNC("foo", "one one one")]), ]) def test_i_can_match_one_or_more(self, text, expected): my_map = { "foo": self.bnf_concept("foo", OneOrMore(StrMatch("one"))), } self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("two", []), ("one two", [CNC("foo", "one two")]), ("one one two", [CNC("foo", "one one two")]), ]) def test_i_can_match_sequence_one_and_or_more(self, text, expected): my_map = { "foo": self.bnf_concept("foo", Sequence( OneOrMore(StrMatch("one")), StrMatch("two") )) } self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("one, one , one", [CNC("foo", "one, one , one")]), ]) def test_i_can_match_one_or_more_with_separator(self, text, expected): my_map = { "foo": self.bnf_concept("foo", OneOrMore(StrMatch("one"), sep=",")) } self.validate_get_concepts_sequences(my_map, text, expected) def test_that_one_or_more_is_greedy(self): my_map = { "foo": self.bnf_concept("foo", OneOrMore(StrMatch("one"))), "bar": self.bnf_concept("foo", StrMatch("one")) } text = "one one one" expected = [CNC("foo", text)] self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("text, expected", [ ("one two", [ [CNC("foo", "one two")], [CNC("bar", "one two")]]), ("one two one two", [ [CNC("bar", "one two"), CNC("bar", "one two")], [CNC("foo", "one two"), CNC("bar", "one two")], [CNC("bar", "one two"), CNC("foo", "one two")], [CNC("foo", "one two"), CNC("foo", "one two")]]), ]) def test_i_can_have_multiple_results(self, text, expected): my_map = { "foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"))), "bar": self.bnf_concept("bar", Sequence( StrMatch("one"), OrderedChoice(StrMatch("two"), StrMatch("three")))), } text = "one two" expected = [[CNC("foo", text)], [CNC("bar", text)]] self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True) def test_i_can_refer_to_other_concepts(self): my_map = { "foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"))), "bar": self.bnf_concept("bar", ConceptExpression("foo")) } text = "one two" expected = [ [CNC("foo", text)], [CN("bar", text)] # Do not check the compiled part ] sequences = self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True) # explicit validations of the compiled concept_foo = sequences[0][0].concept assert concept_foo.body == NotInit assert concept_foo.get_compiled() == {ConceptParts.BODY: DoNotResolve("one two")} concept_bar = sequences[1][0].concept assert concept_bar.body == NotInit assert concept_bar.get_compiled() == { ConceptParts.BODY: concept_foo, "foo": concept_foo } assert id(concept_bar.get_compiled()[ConceptParts.BODY]) == id(concept_bar.get_compiled()["foo"]) def test_i_can_refer_to_other_concepts_with_body(self): my_map = { "foo": self.bnf_concept(Concept("foo", body="'foo'"), Sequence(StrMatch("one"), StrMatch("two"))), "bar": self.bnf_concept("bar", ConceptExpression("foo")) } text = "one two" expected = [ [CNC("foo", text)], [CN("bar", text)] # Do not check the compiled part ] sequences = self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True) # explicit validations of the compiled concept_foo = sequences[0][0].concept assert concept_foo.body == NotInit assert len(concept_foo.get_compiled()) == 0 # because there is a body defined in the metadata concept_bar = sequences[1][0].concept assert concept_bar.body == NotInit assert concept_bar.get_compiled() == { ConceptParts.BODY: concept_foo, "foo": concept_foo } def test_i_can_manage_concepts_reference_when_multiple_levels(self): my_map = { "foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"))), "bar": self.bnf_concept("bar", ConceptExpression("foo")), "baz": self.bnf_concept("baz", ConceptExpression("bar")), } text = "one two" expected = [ [CNC("foo", text)], [CN("bar", text)], # Do not check the compiled part [CN("baz", text)], # Do not check the compiled part ] sequences = self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True) # explicit validations of the compiled concept_foo = sequences[0][0].concept assert concept_foo.body == NotInit assert concept_foo.get_compiled() == {ConceptParts.BODY: DoNotResolve("one two")} concept_bar = sequences[1][0].concept assert concept_bar.body == NotInit assert concept_bar.get_compiled() == { ConceptParts.BODY: concept_foo, "foo": concept_foo } concept_baz = sequences[2][0].concept assert concept_baz.body == NotInit assert concept_baz.get_compiled() == { ConceptParts.BODY: concept_bar, "bar": concept_bar } def test_i_can_mix_reference_to_other_concepts(self): my_map = { "foo": self.bnf_concept("foo", OrderedChoice(StrMatch("twenty"), StrMatch("thirty"))), "bar": self.bnf_concept("bar", Sequence( ConceptExpression("foo"), OrderedChoice(StrMatch("one"), StrMatch("two")))), "three": Concept("three") } text = "twenty two" expected = [CN("bar", source="twenty two")] sequences = self.validate_get_concepts_sequences(my_map, text, expected) concept_bar = sequences[0].concept assert concept_bar.get_compiled() == { ConceptParts.BODY: DoNotResolve("twenty two"), "foo": my_map["foo"], } assert concept_bar.get_compiled()["foo"].get_compiled() == {ConceptParts.BODY: DoNotResolve("twenty")} text = "thirty one" expected = [CN("bar", source="thirty one")] sequences = self.validate_get_concepts_sequences(my_map, text, expected) concept_bar = sequences[0].concept assert concept_bar.get_compiled() == { ConceptParts.BODY: DoNotResolve("thirty one"), "foo": my_map["foo"], } assert concept_bar.get_compiled()["foo"].get_compiled() == {ConceptParts.BODY: DoNotResolve("thirty")} text = "thirty three" expected = [[CN("foo", source="thirty"), CN("three")], []] self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True) def test_i_can_mix_reference_to_other_concepts_2(self): # this time, we use concept expression my_map = { "twenty": self.bnf_concept("twenty", StrMatch("twenty")), "number": self.bnf_concept("number", OrderedChoice(StrMatch("one"), StrMatch("two"))), "twenties": self.bnf_concept("twenties", Sequence(ConceptExpression("twenty"), ConceptExpression("number"))), "three": Concept("three") } text = "twenty two" expected = [CNC("twenties", "twenty two", twenty=CC("twenty", body=DoNotResolve("twenty")), number=CC("number", source="two", body=DoNotResolve("two")) )] self.validate_get_concepts_sequences(my_map, text, expected) text = "twenty three" expected = [[CN("twenty"), CN("three")], []] self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True) def test_i_can_mix_reference_to_other_concepts_when_body(self): my_map = { "foo": self.bnf_concept(Concept("foo", body="'foo'"), OrderedChoice(StrMatch("twenty"), StrMatch("thirty"))), "bar": self.bnf_concept("bar", Sequence( ConceptExpression("foo"), OrderedChoice(StrMatch("one"), StrMatch("two")))), } sheerka, context, *concepts = self.init_test().with_concepts(*my_map.values(), create_new=False).unpack() text = "twenty two" expected = [CN("bar", source="twenty two")] sequences = self.exec_get_concepts_sequences(context, my_map, text, expected, False, None, *concepts) concept_bar = sequences[0].concept assert concept_bar.get_compiled() == { ConceptParts.BODY: DoNotResolve("twenty two"), "foo": sheerka.new("foo"), } assert concept_bar.get_compiled()["foo"].get_compiled() == {} # as foo as a body text = "thirty one" expected = [CN("bar", source="thirty one")] sequences = self.exec_get_concepts_sequences(context, my_map, text, expected, False, None, *concepts) concept_bar = sequences[0].concept assert concept_bar.get_compiled() == { ConceptParts.BODY: DoNotResolve("thirty one"), "foo": sheerka.new("foo"), } assert concept_bar.get_compiled()["foo"].get_compiled() == {} def test_i_can_mix_zero_and_more_and_reference_to_other_concepts(self): my_map = { "foo": self.bnf_concept("foo", OrderedChoice(StrMatch("one"), StrMatch("two"), StrMatch("three"))), "bar": self.bnf_concept("bar", ZeroOrMore(ConceptExpression("foo"))), } text = "one two three" expected = [CN("bar", source="one two three")] sequences = self.validate_get_concepts_sequences(my_map, text, expected) concept_bar = sequences[0].concept assert concept_bar.get_compiled() == { ConceptParts.BODY: DoNotResolve("one two three"), "foo": [my_map["foo"], my_map["foo"], my_map["foo"]] } assert concept_bar.get_compiled()["foo"][0].get_compiled() == {ConceptParts.BODY: DoNotResolve("one")} assert concept_bar.get_compiled()["foo"][1].get_compiled() == {ConceptParts.BODY: DoNotResolve("two")} assert concept_bar.get_compiled()["foo"][2].get_compiled() == {ConceptParts.BODY: DoNotResolve("three")} def test_i_can_match_concept_reference_that_is_not_in_grammar(self): my_map = { "one": Concept("one"), "two": Concept("two"), "foo": self.bnf_concept("foo", Sequence( StrMatch("twenty"), OrderedChoice( ConceptExpression("one"), ConceptExpression("two"), rule_name="unit"))), } text = "twenty one" expected = [CN("foo", "twenty one")] sequences = self.validate_get_concepts_sequences(my_map, text, expected) concept_foo = sequences[0].concept assert concept_foo.get_compiled() == { ConceptParts.BODY: DoNotResolve("twenty one"), "unit": my_map["one"], } def test_i_can_refer_to_group_concepts(self): my_map = { "one": Concept("one"), "two": Concept("two"), "number": Concept("number"), "foo": self.bnf_concept("foo", Sequence("twenty", ConceptExpression("number"))) } def pic(s, c): s.add_concept_to_set(c, my_map["one"], my_map["number"]) s.add_concept_to_set(c, my_map["two"], my_map["number"]) text = "twenty two" expected = [CN("foo", source="twenty two")] sequences = self.validate_get_concepts_sequences(my_map, text, expected, post_init_concepts=pic) # explicit validations of the compiled concept_foo = sequences[0].concept assert concept_foo.body == NotInit compare_with_test_object(concept_foo.get_compiled(), { 'number': CC(my_map["number"], body=my_map["two"], two=my_map["two"]), ConceptParts.BODY: DoNotResolve(value='twenty two')}) text = "twenty one" expected = [CN("foo", source="twenty one")] sequences = self.validate_get_concepts_sequences(my_map, text, expected, post_init_concepts=pic) # explicit validations of the compiled concept_foo = sequences[0].concept assert concept_foo.body == NotInit compare_with_test_object(concept_foo.get_compiled(), { 'number': CC(my_map["number"], body=my_map["one"], one=my_map["one"]), ConceptParts.BODY: DoNotResolve(value='twenty one')}) @pytest.mark.parametrize("expr, expected", [ ("one 'car'", [CNC("foo", "one 'car'", x=python_ret_val("'car'"))]), # python ("one bar", [CNC("foo", "one bar", x=CC("bar"))]), # simple concept ("one super car", [CNC("foo", "one super car", x=CC("super car"))]), # long concept ("one shoe", [CNC("foo", "one shoe", x=CC("thing", source="shoe", body=DoNotResolve("shoe")))]), # bnf ]) def test_i_can_match_variable_when_ending_with_one_variable(self, expr, expected): my_map = { "foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"))), "bar": Concept("bar"), "baz": Concept("baz"), "thing": Concept("thing", definition="'shoe'|'skirt'"), "super car": Concept("super car"), "plus": Concept("x plus y").def_var("x").def_var("y"), } self.validate_get_concepts_sequences(my_map, expr, expected) def test_i_can_match_variable_when_ending_with_one_variable_and_sya(self): my_map = { "foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"))), "bar": Concept("bar"), "baz": Concept("baz"), "plus": Concept("x plus y").def_var("x").def_var("y"), } expr = "one bar plus baz" expected = [ [CNC("foo", "one bar", x=CC("bar")), UTN(" plus "), CN("baz")], [CNC("foo", "one bar plus baz", x=CC("plus", source="bar plus baz", x="bar", y="baz"))], ] self.validate_get_concepts_sequences(my_map, expr, expected, multiple_result=True) def test_i_can_match_variable_when_ending_with_one_variable_and_multiple_results(self): my_map = { "foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"))), "pretty big": Concept("pretty big", body="'pretty big'"), "pbig": Concept("pretty big"), } expr = "one pretty big" expected = [ [CNC("foo", "one pretty big", x=CC("pretty big"))], [CNC("foo", "one pretty big", x=CC("pbig", source="pretty big"))] ] self.validate_get_concepts_sequences(my_map, expr, expected, multiple_result=True) def test_i_can_match_variable_when_ending_with_multiple_variables_and_multiple_results(self): my_map = { "foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"), VariableExpression("y"))), "pretty": Concept("pretty", body="pretty"), "pretty2": Concept("pretty"), "big": Concept("big", body="big"), } expr = "one pretty big" expected = [ [CNC("foo", "one pretty big", x=CC("pretty"), y=CC("big"))], [CNC("foo", "one pretty big", x=CC("pretty2", source="pretty"), y=CC("big"))] ] self.validate_get_concepts_sequences(my_map, expr, expected, multiple_result=True) @pytest.mark.parametrize("expr, expected", [ ("'my' shoe", [CNC("foo", "'my' shoe", x=python_ret_val("'my' "))]), # python ("one shoe", [CNC("foo", "one shoe", x=CC("one"))]), # concept ("my little shoe", [CNC("foo", "my little shoe", x=CC("my little"))]), # long concept ("black shoe", [CNC("foo", "black shoe", x=CC("color", source="black", body=DoNotResolve('black')))]), ]) def test_i_can_match_variable_when_starting_with_one_variable(self, expr, expected): my_map = { "foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe"))), "one": Concept("one"), "my little": Concept("my little"), "color": Concept("color", definition="'blue'|'black'"), "and": Concept("x and y").def_var("x").def_var("y"), } self.validate_get_concepts_sequences(my_map, expr, expected) def test_i_can_match_variable_when_starting_with_one_variable_and_sya(self): my_map = { "foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe"))), "tiny": Concept("tiny"), "beautiful": Concept("beautiful"), "but": Concept("x but y").def_var("x").def_var("y"), } expr = "tiny but beautiful shoe" expected_res = [ CNC("foo", "tiny but beautiful shoe", x=CC("but", source="tiny but beautiful", x="tiny", y="beautiful"))] unwanted_res = [CN("tiny"), UTN(" but "), CNC("foo", "beautiful shoe", x=CC("beautiful"))] self.validate_get_concepts_sequences(my_map, expr, [unwanted_res, expected_res], multiple_result=True) def test_i_can_match_variable_when_starting_with_multiple_variables(self): my_map = { "foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), VariableExpression("y"), VariableExpression("z"), StrMatch("shoe"))), "one": Concept("one"), "two": Concept("two"), "plus": Concept("x plus y").def_var("x").def_var("y"), } text = "one 'one' one plus two shoe" unwanted_res = [CN("one"), SCN(" 'one' "), ("one", 1), UTN(" plus "), CN("two")] expected_res = [CNC("foo", "one 'one' one plus two shoe", x=CC("one"), y=python_ret_val(" 'one' "), z=CC("plus", source="one plus two", x="one", y="two"))] expected = [unwanted_res, expected_res] self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True) def test_i_can_match_variable_when_starting_with_one_variable_and_longer_str(self): my_map = { "foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("foo"), StrMatch("bar"), StrMatch("baz"))), "one": Concept("one") } text = "one foo bar baz" expected = [CNC("foo", "one foo bar baz", x=CC("one"))] self.validate_get_concepts_sequences(my_map, text, expected) @pytest.mark.parametrize("expr, expected", [ ("one 'pretty' shoe", [CNC("foo", "one 'pretty' shoe", x=python_ret_val("'pretty' "))]), # python ("one little shoe", [CNC("foo", "one little shoe", x=CC("little"))]), # concept ("one very big shoe", [CNC("foo", "one very big shoe", x=CC("very big"))]), # long concept ("one black shoe", [CNC("foo", "one black shoe", x=CC("color", source="black", body=DoNotResolve('black')))]), ("one tiny but beautiful shoe", [CNC("foo", "one tiny but beautiful shoe", x=CC("but", source="tiny but beautiful", x="tiny", y="beautiful "))]), ]) def test_i_can_match_variable_in_between(self, expr, expected): my_map = { "foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"), StrMatch("shoe"))), "little": Concept("little"), "very big": Concept("very big"), "color": Concept("color", definition="'blue'|'black'"), "but": Concept("x but y").def_var("x").def_var("y"), } self.validate_get_concepts_sequences(my_map, expr, expected) def test_i_can_match_variable_when_multiple_results_in_between(self): my_map = { "foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"), StrMatch("shoe"))), "pretty big": Concept("pretty big", body="'pretty big'"), "pbig": Concept("pretty big"), } expr = "one pretty big shoe" expected = [ [CNC("foo", "one pretty big shoe", x=CC("pretty big"))], [CNC("foo", "one pretty big shoe", x=CC("pbig", source="pretty big"))] ] self.validate_get_concepts_sequences(my_map, expr, expected, multiple_result=True) def test_i_can_match_regex_and_variable(self): my_map = { "foo": self.bnf_concept("foo", Sequence(RegExMatch("[a-z]+"), VariableExpression("x"))), "shoe": Concept("shoe") } text = "onyx shoe" expected = [CNC("foo", "onyx shoe", x=CC("shoe"))] self.validate_get_concepts_sequences(my_map, text, expected) def test_i_can_match_variable_and_regex(self): my_map = { "foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), RegExMatch("[a-z]+"))), "one": Concept("one") } text = "one onyx" expected = [CNC("foo", "one onyx", x=CC("one"))] self.validate_get_concepts_sequences(my_map, text, expected) def test_i_can_reuse_the_same_variable(self): # in this test, the variable appears several times, but only once in concept.compiled my_map = { "foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("equals"), VariableExpression("x"))), "one": Concept("one"), "two": Concept("two"), } sheerka, context, *updated = self.init_concepts(*my_map.values()) parser = BnfNodeParser() parser.init_from_concepts(context, updated) # same variable appears only once in the compiled variables text = "one equals one" expected = [CNC("foo", "one equals one", x=CC("one"))] expected_sequence = compute_expected_array(my_map, text, expected) parser.reset_parser(context, ParserInput(text)) bnf_parsers_helpers = parser.get_concepts_sequences(context) to_compare = tests.parsers.parsers_utils.get_test_obj(bnf_parsers_helpers[0].sequence, expected_sequence) assert to_compare == expected def test_i_cannot_match_variable_when_variables_discrepancy(self): my_map = { "foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("equals"), VariableExpression("x"))), "one": Concept("one"), "one_1": Concept("one", body="1"), "two": Concept("two"), "two_2": Concept("two", body="2"), } sheerka, context, *updated = self.init_concepts(*my_map.values()) parser = BnfNodeParser() parser.init_from_concepts(context, updated) text = "one equals two" parser.reset_parser(context, ParserInput(text)) bnf_parsers_helpers = parser.get_concepts_sequences(context) assert bnf_parsers_helpers[0].sequence == [] @pytest.mark.parametrize("bar_expr, expected", [ (ConceptExpression("foo"), {}), (OrderedChoice(ConceptExpression("foo"), StrMatch("one")), {'one': ['1002']}), (Sequence(StrMatch("one"), ConceptExpression("foo"), StrMatch("two")), {'one': ['1001', '1002']}), ]) def test_i_can_detect_infinite_recursion(self, bar_expr, expected): my_map = { "foo": self.bnf_concept("foo", ConceptExpression("bar")), "bar": self.bnf_concept("bar", bar_expr), } sheerka, context, parser = self.init_parser(my_map, singleton=True) parser.context = context parser.sheerka = sheerka # every obvious cyclic recursion are removed from concept_by_first_keyword dict assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == expected # get_parsing_expression() also returns CHICKEN_AND_EGG parsing_expression = parser.get_parsing_expression(context, my_map["foo"]) assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG) assert sheerka.isinstance(parser.concepts_grammars.get(my_map["foo"].id), BuiltinConcepts.CHICKEN_AND_EGG) parsing_expression = parser.get_parsing_expression(context, my_map["bar"]) assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG) assert sheerka.isinstance(parser.concepts_grammars.get(my_map["bar"].id), BuiltinConcepts.CHICKEN_AND_EGG) def test_i_can_detect_longer_infinite_recursion(self): my_map = { "foo": self.bnf_concept("foo", ConceptExpression("bar")), "bar": self.bnf_concept("bar", ConceptExpression("baz")), "baz": self.bnf_concept("baz", ConceptExpression("qux")), "qux": self.bnf_concept("qux", ConceptExpression("foo")), } sheerka, context, parser = self.init_parser(my_map, singleton=True) parser.context = context parser.sheerka = sheerka # every obvious cyclic recursion are removed from concept_by_first_keyword dict assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {} parsing_expression = parser.get_parsing_expression(context, my_map["foo"]) assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG) assert sheerka.isinstance(parser.concepts_grammars.get(my_map["foo"].id), BuiltinConcepts.CHICKEN_AND_EGG) assert parser.concepts_grammars.get(my_map["foo"].id).body == ["1001", "1002", "1003", "1004", "1001"] assert sheerka.isinstance(parser.concepts_grammars.get(my_map["foo"].id), BuiltinConcepts.CHICKEN_AND_EGG) assert sheerka.isinstance(parser.concepts_grammars.get(my_map["bar"].id), BuiltinConcepts.CHICKEN_AND_EGG) assert sheerka.isinstance(parser.concepts_grammars.get(my_map["baz"].id), BuiltinConcepts.CHICKEN_AND_EGG) assert sheerka.isinstance(parser.concepts_grammars.get(my_map["qux"].id), BuiltinConcepts.CHICKEN_AND_EGG) def test_i_can_detect_partial_infinite_recursion(self): my_map = { "foo": self.bnf_concept("foo", ConceptExpression("bar")), "bar": self.bnf_concept("bar", ConceptExpression("baz")), "baz": self.bnf_concept("baz", ConceptExpression("qux")), "qux": self.bnf_concept("qux", ConceptExpression("baz")), } sheerka, context, parser = self.init_parser(my_map, singleton=True) parser.context = context parser.sheerka = sheerka # every obvious cyclic recursion are removed from concept_by_first_keyword dict assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {} parsing_expression = parser.get_parsing_expression(context, my_map["foo"]) assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG) assert sheerka.isinstance(parser.concepts_grammars.get(my_map["foo"].id), BuiltinConcepts.CHICKEN_AND_EGG) assert parser.concepts_grammars.get(my_map["foo"].id).body == ["1001", "1002", "1003", "1004", "1003"] assert sheerka.isinstance(parser.concepts_grammars.get(my_map["foo"].id), BuiltinConcepts.CHICKEN_AND_EGG) assert sheerka.isinstance(parser.concepts_grammars.get(my_map["bar"].id), BuiltinConcepts.CHICKEN_AND_EGG) assert sheerka.isinstance(parser.concepts_grammars.get(my_map["baz"].id), BuiltinConcepts.CHICKEN_AND_EGG) assert sheerka.isinstance(parser.concepts_grammars.get(my_map["qux"].id), BuiltinConcepts.CHICKEN_AND_EGG) @pytest.mark.parametrize("expr, expected", [ (OrderedChoice(StrMatch("bar"), ConceptExpression("foo")), False), (OrderedChoice(ConceptExpression("foo"), StrMatch("bar")), True), (OrderedChoice(Sequence(StrMatch("bar"), ConceptExpression("foo")), StrMatch("baz")), False), (OrderedChoice(Sequence(ConceptExpression("foo"), StrMatch("bar")), StrMatch("baz")), True) ]) def test_i_can_detect_ordered_choice_infinite_recursion(self, expr, expected): my_map = { "foo": self.bnf_concept("foo", expr), } sheerka, context, parser = self.init_parser(my_map) parser.context = context parser.sheerka = sheerka res = parser.get_parsing_expression(context, my_map["foo"]) assert sheerka.isinstance(res, BuiltinConcepts.CHICKEN_AND_EGG) == expected def test_i_can_get_parsing_expression_when_ending_by_concept_isa(self): my_map = { "one": Concept("one"), "twenty": Concept("twenty"), "number": Concept("number"), "twenties": self.bnf_concept("twenties", Sequence(ConceptExpression("twenty"), ConceptExpression("number"))) } sheerka, context, parser = self.init_parser(my_map) parser.context = context parser.sheerka = sheerka sheerka.set_isa(context, sheerka.new("one"), my_map["number"]) sheerka.set_isa(context, sheerka.new("twenty"), my_map["number"]) parser.concepts_grammars.clear() # make sure parsing expression is created from scratch parsing_expression = parser.get_parsing_expression(context, my_map["twenties"]) assert parsing_expression == Sequence( ConceptExpression(my_map["twenty"], rule_name="twenty"), ConceptExpression(my_map["number"], rule_name="number")) assert len(parsing_expression.nodes) == len(parsing_expression.elements) twenty_nodes = parsing_expression.nodes[0].nodes assert twenty_nodes == [StrMatch("twenty")] number_nodes = parsing_expression.nodes[1].nodes assert len(number_nodes) == 1 assert isinstance(number_nodes[0], UnOrderedChoice) assert len(number_nodes[0].nodes) == len(number_nodes[0].elements) assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes def test_i_can_get_parsing_expression_when_starting_by_isa_concept(self): my_map = { "one": Concept("one"), "two": Concept("two"), "number": Concept("number"), "hundreds": self.bnf_concept("hundreds", Sequence(ConceptExpression("number"), StrMatch("hundred"))) } sheerka, context, parser = self.init_parser(my_map, singleton=True) parser.context = context parser.sheerka = sheerka sheerka.set_isa(context, sheerka.new("one"), my_map["number"]) sheerka.set_isa(context, sheerka.new("two"), my_map["number"]) sheerka.set_isa(context, sheerka.new("hundreds"), my_map["number"]) parser.concepts_grammars.clear() # make sure parsing expression is created from scratch parsing_expression = parser.get_parsing_expression(context, my_map["hundreds"]) assert parsing_expression == Sequence( ConceptExpression(my_map["number"], rule_name="number"), StrMatch("hundred")) assert len(parsing_expression.nodes) == len(parsing_expression.elements) number_nodes = parsing_expression.nodes[0].nodes assert len(number_nodes) == 1 assert isinstance(number_nodes[0], UnOrderedChoice) assert len(number_nodes[0].nodes) == len(number_nodes[0].elements) assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes assert ConceptExpression(my_map["two"], rule_name="two") in number_nodes[0].nodes def test_i_can_get_parsing_expression_when_concept_is_part_of_a_group(self): my_map = { "one": Concept("one"), "twenty": Concept("twenty"), "number": Concept("number"), "twenties": self.bnf_concept("twenties", Sequence(ConceptExpression("twenty"), ConceptExpression("number"))) } sheerka, context, parser = self.init_parser(my_map, singleton=True) parser.context = context parser.sheerka = sheerka sheerka.set_isa(context, sheerka.new("one"), my_map["number"]) sheerka.set_isa(context, sheerka.new("twenty"), my_map["number"]) sheerka.set_isa(context, sheerka.new("twenties"), my_map["number"]) # <- twenties is also a number parser.concepts_grammars.clear() # make sure parsing expression is created from scratch parsing_expression = parser.get_parsing_expression(context, my_map["twenties"]) assert parsing_expression == Sequence( ConceptExpression(my_map["twenty"], rule_name="twenty"), ConceptExpression(my_map["number"], rule_name="number")) assert len(parsing_expression.nodes) == len(parsing_expression.elements) twenty_nodes = parsing_expression.nodes[0].nodes assert twenty_nodes == [StrMatch("twenty")] number_nodes = parsing_expression.nodes[1].nodes assert len(number_nodes) == 1 assert isinstance(number_nodes[0], UnOrderedChoice) assert len(number_nodes[0].nodes) == len(number_nodes[0].elements) assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes def test_i_can_get_parsing_expression_when_sequence_of_concepts(self): my_map = { "one": Concept("one"), "two_ones": self.bnf_concept("two_ones", Sequence(ConceptExpression("one"), ConceptExpression("one"))) } sheerka, context, parser = self.init_parser(my_map, singleton=True) parser.context = context parser.sheerka = sheerka parsing_expression = parser.get_parsing_expression(context, my_map["two_ones"]) assert parsing_expression == Sequence( ConceptExpression(my_map["one"], rule_name="one"), ConceptExpression(my_map["one"], rule_name="one")) @pytest.mark.parametrize("expr, text, expected", [ (ZeroOrMore(StrMatch("one"), sep=","), "one,", [CNC("foo", "one"), UTN(",")]), (StrMatch("one"), "one two", [CNC("foo", "one"), UTN(" two")]), (StrMatch("one"), "two one", [UTN("two "), CNC("foo", "one")]), ]) def test_i_can_recognize_unknown_concepts(self, expr, text, expected): my_map = { "foo": self.bnf_concept("foo", expr) } self.validate_get_concepts_sequences(my_map, text, expected) def test_i_can_recognize_unknown_when_they_look_like_known(self): my_map = { "one two": self.bnf_concept("one two", Sequence("one", "two")), "three": self.bnf_concept("three") } text = "one three" expected = [UTN("one "), CNC("three", "three")] self.validate_get_concepts_sequences(my_map, text, expected) def test_i_can_remove_duplicates(self): my_map = { "one two": self.bnf_concept("one two", Sequence("one", "two")), "one four": self.bnf_concept("one four", Sequence("one", "four")), "three": self.bnf_concept("three") } sheerka, context, parser = self.init_parser(my_map, singleton=True) parser.reset_parser(context, ParserInput("one three")) sequences = parser.get_concepts_sequences(context) sequence = parser.get_valid(sequences) assert len(sequence) == 1 @pytest.mark.parametrize("parser_input, expected_status, expected", [ ("baz", True, [CNC("bnf baz", "baz")]), # the bnf one is chosen ("foo bar", True, [CNC("foo then bar", "foo bar", foo="foo", bar="bar")]), ("bar", True, [CNC("foo or bar", "bar", bar="bar", body="bar")]), ("one plus two", True, [CNC("plus", "one plus two", one="one", two="two")]), ("twenty one", True, [CNC("t1", "twenty one", unit="one")]), ("one 'car'", True, [CNC("one thing", "one 'car'", x=python_ret_val("'car'"), one="one")]) ]) def test_i_can_parse_simple_expressions(self, parser_input, expected_status, expected): sheerka, context, parser = self.init_parser(init_from_sheerka=True) res = parser.parse(context, ParserInput(parser_input)) expected_array = compute_expected_array(cmap, parser_input, expected) parser_result = res.value concepts_nodes = res.value.value assert res.status == expected_status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) compare_with_test_object(concepts_nodes, expected_array) def test_i_can_parse_when_multiple_times_the_same_variable(self): sheerka, context, parser = self.init_parser(init_from_sheerka=True) text = "foo foo foo" expected_array = compute_expected_array(cmap, text, [CNC("one or more foo", source=text)]) expected_array[0].compiled["foo"] = [cmap["foo"], cmap["foo"], cmap["foo"]] res = parser.parse(context, ParserInput(text)) parser_result = res.value concepts_nodes = res.value.value assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) compare_with_test_object(concepts_nodes, expected_array) def test_i_can_test_when_expression_references_other_expressions(self): sheerka, context, parser = self.init_parser(init_from_sheerka=True) text = "twenty four" expected = CNC("t2", source=text, unit=CC("three_four", source="four", four=CC("four", body=DoNotResolve("four")), body=CC("four", body=DoNotResolve("four")))) expected_array = compute_expected_array(cmap, text, [expected]) res = parser.parse(context, ParserInput(text)) parser_result = res.value concepts_nodes = res.value.value assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) compare_with_test_object(concepts_nodes, expected_array) def test_i_can_parse_bnf_concept_mixed_with_isa_concepts(self): sheerka, context, parser = self.init_parser(init_from_sheerka=True) # thirties is defined in the global variable cmap as # thirties = cls.update_bnf(context, Concept("thirties", # definition="thirty number", # where="number < 10", # body="thirty + number").def_var("thirty").def_var("number")) text = "thirty one" expected = CNC("thirties", source=text, number=CC("number", source="one", one=CC("one", body=DoNotResolve("one")), body=CC("one", body=DoNotResolve("one"))), thirty="thirty") expected_array = compute_expected_array(cmap, text, [expected]) res = parser.parse(context, ParserInput(text)) parser_result = res.value concepts_nodes = res.value.value assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) compare_with_test_object(concepts_nodes, expected_array) def test_i_can_parse_bnf_concept_mixed_with_isa_concepts_2(self): # this time, three is a number, and also part of three_four, even if it is not relevant in t3 sheerka, context, parser = self.init_parser(init_from_sheerka=True) text = "thirty three" expected = CNC("thirties", source=text, number=CC("number", source="three", three=CC("three", body=DoNotResolve("three")), body=CC("three", body=DoNotResolve("three"))), thirty="thirty") expected_array = compute_expected_array(cmap, text, [expected]) res = parser.parse(context, ParserInput(text)) parser_result = res.value concepts_nodes = res.value.value assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) compare_with_test_object(concepts_nodes, expected_array) def test_i_can_parse_when_starting_by_isa_concept(self): """ Test of simple number + 'thousand' :return: """ sheerka, context, parser = self.init_parser(init_from_sheerka=True) sheerka.clear_bnf_definition() # to simulate restart text = "one thousand" one = CC("one", body=DoNotResolve("one")) expected = CNC("thousands", source=text, number=CC("number", source="one", one=one, body=one)) expected_array = compute_expected_array(cmap, text, [expected]) res = parser.parse(context, ParserInput(text)) parser_result = res.value concepts_nodes = res.value.value assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) compare_with_test_object(concepts_nodes, expected_array) def test_i_can_parse_fifty_one_thousand(self): """ Test of complex number + 'thousand' (complex because the number is a BNF concept) :return: """ sheerka, context, parser = self.init_parser(init_from_sheerka=True) sheerka.clear_bnf_definition() # to simulate restart text = "fifty one thousand" one = CC("one", body=DoNotResolve("one")) fifty_one = CC("fifties", source="fifty one", fifty="fifty", number=CC("number", source="one", body=one, one=one)) one_thousand = CC("thousands", source="one thousand", number=CC("number", source="one", body=one, one=one)) expected_thousand = CNC("thousands", source=text, number=CC("number", source="fifty one", fifties=fifty_one, body=fifty_one)) expected_fifties = CNC("fifties", source=text, fifty="fifty", number=CC("number", source="one thousand", thousands=one_thousand, body=one_thousand)) expected_thousands = compute_expected_array(cmap, text, [expected_thousand]) expected_fifties = compute_expected_array(cmap, text, [expected_fifties]) res = parser.parse(context, ParserInput(text)) assert res[0].status compare_with_test_object(res[0].value.value, expected_thousands) assert res[1].status compare_with_test_object(res[1].value.value, expected_fifties) def test_i_can_parse_one_hundred_thousand(self): sheerka, context, parser = self.init_parser(init_from_sheerka=True) sheerka.clear_bnf_definition() # to simulate restart text = "one hundred thousand" res = parser.parse(context, ParserInput(text)) parser_result = res.value assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) def test_i_can_parse_hundreds_like_expression(self): sheerka, context, parser = self.init_parser(init_from_sheerka=True) sheerka.clear_bnf_definition() # to simulate restart text = "three hundred and thirty two" three = CC("three", body=DoNotResolve("three")) two = CC("two", body=DoNotResolve("two")) thirty_two = CC("thirties", source="thirty two", thirty="thirty", number=CC("number", source="two", body=two, two=two)) expected = CNC("hundreds", source=text, n1=CC("number", source="three", body=three, three=three), n2=CC("number", source="thirty two", body=thirty_two, thirties=thirty_two)) expected_array = compute_expected_array(cmap, text, [expected]) res = parser.parse(context, ParserInput(text)) parser_result = res.value concepts_nodes = res.value.value assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) compare_with_test_object(concepts_nodes, expected_array) def test_i_can_parse_bnf_concept_mixed_with_isa_after_restart(self): sheerka, context, parser = self.init_parser(init_from_sheerka=True) sheerka.clear_bnf_definition() # to simulate restart for c in cmap.values(): sheerka.get_by_id(c.id).set_bnf(None) text = "thirty three" expected = CNC("thirties", source=text, number=CC("number", source="three", three=CC("three", body=DoNotResolve("three")), body=CC("three", body=DoNotResolve("three"))), thirty="thirty") expected_array = compute_expected_array(cmap, text, [expected]) res = parser.parse(context, ParserInput(text)) parser_result = res.value concepts_nodes = res.value.value assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) compare_with_test_object(concepts_nodes, expected_array) text = "forty one" expected = CNC("forties", source=text, number=CC("number", source="one", one=CC("one", body=DoNotResolve("one")), body=CC("one", body=DoNotResolve("one"))), forty="forty") expected_array = compute_expected_array(cmap, text, [expected]) res = parser.parse(context, ParserInput(text)) parser_result = res.value concepts_nodes = res.value.value assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) compare_with_test_object(concepts_nodes, expected_array) def test_i_can_parse_when_keyword(self): sheerka, context, parser = self.init_parser(init_from_sheerka=True) parser_input = "def one" expected = [CNC("def number", "def one", number="one")] res = parser.parse(context, ParserInput(parser_input)) expected_array = compute_expected_array(cmap, parser_input, expected) expected_array[0].compiled["def"] = cmap["def_only"] parser_result = res.value concepts_nodes = res.value.value assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) compare_with_test_object(concepts_nodes, expected_array) def test_i_can_parse_filter(self): sheerka, context, parser = self.init_parser(init_from_sheerka=True) expression = "--filter one" expected = [CN("filter", source="--filter one")] res = parser.parse(context, ParserInput(expression)) expected_array = compute_expected_array(cmap, expression, expected) parser_result = res.value concepts_nodes = res.value.value assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) compare_with_test_object(concepts_nodes, expected_array) def test_i_can_parse_descent_grammar(self): my_map = { "factor": Concept("factor", definition="1 | 2 | 3"), "term": Concept("term", definition="factor ('*' factor)*"), "expr": Concept("expr", definition="term ('+' term)*"), } sheerka, context, parser = self.init_parser(my_map) text = "1 + 2 * 3" res = parser.parse(context, ParserInput(text)) parser_result = res.value concepts_nodes = res.value.value factor = my_map["factor"] term = my_map["term"] expr = my_map["expr"] assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) compare_with_test_object(concepts_nodes, [CNC(expr, term=[CC(term, body=CC(factor, body=DoNotResolve("1")), factor=CC(factor, body=DoNotResolve("1"))), CC(term, body=DoNotResolve("2 * 3"), factor=[ CC(factor, body=DoNotResolve("2")), CC(factor, body=DoNotResolve("3")), ])], body=DoNotResolve("1 + 2 * 3"))]) def test_i_can_parse_recursive_descent_grammar(self): my_map = { "factor": Concept("factor", definition="1 | 2 | 3"), "term": self.bnf_concept("term", OrderedChoice( Sequence(ConceptExpression("factor"), StrMatch("*"), ConceptExpression("term")), ConceptExpression("factor"))), "expr": self.bnf_concept("expr", OrderedChoice( Sequence(ConceptExpression("term"), StrMatch("+"), ConceptExpression("expr")), ConceptExpression("term"))), } sheerka, context, parser = self.init_parser(my_map) text = "1 + 2 * 3" res = parser.parse(context, ParserInput(text)) parser_result = res.value concepts_nodes = res.value.value factor = my_map["factor"] term = my_map["term"] expr = my_map["expr"] # concepts_nodes = res.value.value is too complicated to be validated assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) compare_with_test_object(concepts_nodes, [CNC(expr, term=CC(term, body=CC(factor, body=DoNotResolve("1")), factor=CC(factor, body=DoNotResolve("1"))), expr=CC(expr, body=CC(term, body=DoNotResolve("2 * 3"), factor=CC(factor, body=DoNotResolve("2")), term=CC(term, body=CC(factor, body=DoNotResolve("3")), factor=CC(factor, body=DoNotResolve("3")))), term=CC(term, body=DoNotResolve("2 * 3"), factor=CC(factor, body=DoNotResolve("2")), term=CC(term, body=CC(factor, body=DoNotResolve("3")), factor=CC(factor, body=DoNotResolve("3"))))), body=DoNotResolve("1 + 2 * 3"))]) def test_i_can_parse_simple_recursive_grammar(self): my_map = { "foo": self.bnf_concept("foo", Sequence(StrMatch("foo"), OrderedChoice(StrMatch("bar"), ConceptExpression("foo")))), } sheerka, context, parser = self.init_parser(my_map) assert parser.parse(context, ParserInput("foo bar")).status assert parser.parse(context, ParserInput("foo foo foo bar")).status assert not parser.parse(context, ParserInput("foo baz")).status @pytest.mark.parametrize("name, expected", [ (None, []), ("", []), ("foo", StrMatch("foo")), ("foo bar", Sequence(StrMatch("foo"), StrMatch("bar"))), ("'foo bar baz' qux", Sequence(StrMatch("foo", skip_whitespace=False), StrMatch(" ", skip_whitespace=False), StrMatch("bar", skip_whitespace=False), StrMatch(" ", skip_whitespace=False), StrMatch("baz"), StrMatch("qux"))), ]) def test_i_can_get_expression_from_concept_name(self, name, expected): assert BnfNodeParser.get_expression_from_concept_name(name) == expected def test_i_can_parse_when_multiple_layers(self): sheerka, context, parser = self.init_parser(init_from_sheerka=True) # sanity text = "thirty one" res = parser.parse(context, ParserInput(text)) assert res.status compare_with_test_object(res.value.value, compute_expected_array(cmap, text, [CN("thirties", text)])) # add a layer, I still can parse the text sheerka.push_ontology(context, "new layer") parser = BnfNodeParser(sheerka=sheerka) res = parser.parse(context, ParserInput(text)) assert res.status compare_with_test_object(res.value.value, compute_expected_array(cmap, text, [CN("thirties", text)])) def test_i_do_not_eat_unwanted_tokens_at_the_beginning_when_concept_with_variable(self): my_map = { "foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe"))), "one": Concept("one"), "two": Concept("two"), } sheerka, context, parser = self.init_parser(my_map) text = "two one shoe" res = parser.parse(context, ParserInput(text)) assert res.status compare_with_test_object(res.value.value, compute_expected_array(my_map, text, [ CN("two"), CNC("foo", "one shoe", x=CC("one"))])) def test_i_do_not_eat_unwanted_tokens_at_the_end_when_concept_with_variable(self): my_map = { "foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"))), "bar": Concept("bar"), "baz": Concept("baz"), } sheerka, context, parser = self.init_parser(my_map) text = "one bar baz" res = parser.parse(context, ParserInput(text)) assert res.status compare_with_test_object(res.value.value, compute_expected_array(my_map, text, [ CNC("foo", "one bar", x=CC("bar")), CN("baz")])) @pytest.mark.parametrize("parsing_expression, expected", [ (RegExMatch("a"), [RegExDef("a")]), (OrderedChoice(StrMatch("first"), RegExMatch("a|b")), ["first", RegExDef("a|b")]), (OrderedChoice(RegExMatch("a|b"), StrMatch("first")), [RegExDef("a|b"), "first"]), (Sequence(StrMatch("a"), RegExMatch("a|b")), ["a"]), (Sequence(RegExMatch("a|b"), StrMatch("a")), [RegExDef("a|b")]), (OneOrMore(StrMatch("a"), RegExMatch("a|b")), ["a"]), (OneOrMore(RegExMatch("a|b"), StrMatch("a")), [RegExDef("a|b")]), (ZeroOrMore(StrMatch("a"), RegExMatch("a|b")), ["a"]), (ZeroOrMore(RegExMatch("a|b"), StrMatch("a")), [RegExDef("a|b")]), ]) def test_i_can_get_first_item(self, parsing_expression, expected): sheerka = self.get_sheerka() visitor = BnfNodeFirstTokenVisitor(sheerka) visitor.visit(parsing_expression) assert visitor.first_tokens == expected def test_i_cannot_parse_regex_when_no_next_matching_token_cannot_be_found(self): sheerka, context, foo = self.init_test().with_concepts(Concept("foo", definition="r'abcd'"), create_new=True).unpack() parser = BnfNodeParser(sheerka=sheerka) res = parser.parse(context, ParserInput("abcdef")) assert not res.status assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) assert res.body.reason == [NoMatchingTokenError(4)] @pytest.mark.parametrize("text", [ "one", " one", "one ", " one " ]) def test_i_cannot_parse_empty_variable(self, text): sheerka, context, parser = self.init_parser(init_from_sheerka=True) res = parser.parse(context, ParserInput("one")) assert not res.status assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) @pytest.mark.parametrize("bnf, text", [ (Sequence(VariableExpression("x"), StrMatch("foo")), "one foo"), (Sequence(StrMatch("foo"), VariableExpression("x")), "foo one"), (Sequence(StrMatch("foo"), VariableExpression("x"), StrMatch("bar")), "foo one bar"), ]) def test_i_cannot_parse_variable_when_unrecognized_nodes(self, bnf, text): sheerka, context, foo = self.init_test().with_concepts( self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe"))) ).unpack() parser = BnfNodeParser() parser.init_from_concepts(context, [foo]) res = parser.parse(context, ParserInput(text)) assert not res.status assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) @pytest.mark.parametrize("to_match, ignore_case, multiline, explicit_flags", [ ("xxy", None, None, re.MULTILINE), ("xxy", True, True, re.MULTILINE), ("xxy", False, False, re.MULTILINE), ]) def test_i_can_serialize_reg_ex_def(self, to_match, ignore_case, multiline, explicit_flags): r = RegExDef(to_match, ignore_case, multiline, explicit_flags) serialized = r.serialize() r2 = RegExDef().deserialize(serialized) assert r == r2 def test_i_can_resolve_parsing_expression_for_variable_concept(self): sheerka, context, parser = self.init_parser(init_from_sheerka=True) expression = Sequence(VariableExpression("x"), StrMatch("x")) resolved = parser.resolve_parsing_expression(context, expression, {}, set(), set()) assert isinstance(resolved.nodes[0], VariableExpression) assert resolved.nodes[0].nodes[0] == resolved.nodes[1] def test_i_can_resolve_parsing_expression_when_ending_with_variable_concept(self): sheerka, context, parser = self.init_parser(init_from_sheerka=True) expression = Sequence(StrMatch("x"), VariableExpression("x")) resolved = parser.resolve_parsing_expression(context, expression, {}, set(), set()) assert isinstance(resolved.nodes[1], VariableExpression) assert resolved.nodes[0].nodes == [] # @pytest.mark.parametrize("parser_input, expected", [ # ("one", [ # (True, [CNC("bnf_one", source="one", one="one", body="one")]), # (True, [CNC("one_or_two", source="one", one="one", body="one")]), # ]), # ("two plus two", [ # (False, [CN("bnf_one"), UTN(" plus "), CN("one_or_two")]), # (False, [CN("one_or_two"), UTN(" plus "), CN("one_or_two")]), # ]) # ]) # def test_i_can_parse_when_multiple_results(self, parser_input, expected): # sheerka, context, parser = self.init_parser(init_from_sheerka=True) # # res = parser.parse(context, parser_input) # assert len(res) == len(expected) # # for res_i, expected_i in zip(res, expected): # assert res_i.status == expected_i[0] # expected_array = compute_expected_array(cmap, parser_input, expected_i[1]) # assert res_i.value.value == expected_array