import pytest from core.builtin_concepts import BuiltinConcepts from core.concept import Concept, DEFINITION_TYPE_DEF from core.sheerka.services.SheerkaExecute import ParserInput from parsers.AtomNodeParser import AtomNodeParser from parsers.BaseNodeParser import cnode, utnode, CNC, SCN, CN from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka from tests.parsers.parsers_utils import compute_expected_array class TestAtomsParser(TestUsingMemoryBasedSheerka): def init_parser(self, my_map, create_new=False, singleton=True, use_sheerka=False): sheerka, context, *updated_concepts = self.init_concepts( *my_map.values(), create_new=create_new, singleton=singleton) if use_sheerka: parser = AtomNodeParser(sheerka=sheerka) else: parser = AtomNodeParser() parser.init_from_concepts(context, updated_concepts) return sheerka, context, parser def test_i_cannot_parse_empty_string(self): sheerka, context, parser = self.init_parser({}) res = parser.parse(context, ParserInput("")) assert not res.status assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY) @pytest.mark.parametrize("text, expected", [ ("foo", ["foo"]), ("foo bar", ["foo", "bar"]), ("foo bar twenties", ["foo", "bar", "twenties"]), ("a plus b", [CN("plus", 0, 4)]), ("mult", [CN("mult", 0, 0, "mult")]), ]) def test_i_can_parse_simple_sequences(self, text, expected): concepts_map = { "foo": Concept("foo"), "bar": Concept("bar"), "plus": Concept("a plus b").def_var("a").def_var("b"), "mult": Concept("mult", definition="a mult b", definition_type=DEFINITION_TYPE_DEF).def_var("a").def_var( "b"), "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"), } sheerka, context, parser = self.init_parser(concepts_map, create_new=True, use_sheerka=True) res = parser.parse(context, ParserInput(text)) wrapper = res.body lexer_nodes = res.body.body assert res.status expected_array = compute_expected_array(concepts_map, text, expected) assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert lexer_nodes == expected_array @pytest.mark.parametrize("text, expected", [ ("foo bar", ["foo bar"]), ("one two three", ["one two three"]), ("foo bar twenties one two three", ["foo bar", "twenties", "one two three"]), ]) def test_i_can_parse_long_names(self, text, expected): concepts_map = { "foo bar": Concept("foo bar"), "one two three": Concept("one two three"), "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"), } sheerka, context, parser = self.init_parser(concepts_map) res = parser.parse(context, ParserInput(text)) wrapper = res.body lexer_nodes = res.body.body assert res.status expected_array = compute_expected_array(concepts_map, text, expected) assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert lexer_nodes == expected_array @pytest.mark.parametrize("text, expected_status, expected", [ ("foo bar suffixed one", False, ["foo bar", " suffixed ", "one"]), ("foo bar one prefixed", False, ["foo bar", "one", " prefixed"]), ("foo bar one infix two", False, ["foo bar", "one", " infix ", "two"]), ("foo bar 1 + 1", False, ["foo bar", " 1 + 1"]), ("foo bar twenty one", False, ["foo bar", " twenty ", "one"]), ("foo bar x$!#", False, ["foo bar", " x$!#"]), ("suffixed one foo bar", False, ["suffixed ", "one", "foo bar"]), ("one prefixed foo bar", False, ["one", " prefixed ", "foo bar"]), ("one infix two foo bar", False, ["one", " infix ", "two", "foo bar"]), ("1 + 1 foo bar", False, ["1 + 1 ", "foo bar"]), ("twenty one foo bar", False, ["twenty ", "one", "foo bar"]), ("x$!# foo bar", False, ["x$!# ", "foo bar"]), ("func(one)", False, ["func(", "one", ")"]), ]) def test_i_can_parse_when_unrecognized(self, text, expected_status, expected): concepts_map = { "prefixed": Concept("a prefixed").def_var("a"), "suffixed": Concept("prefixed a").def_var("a"), "infix": Concept("a infix b").def_var("a").def_var("b"), "foo bar": Concept("foo bar"), "one": Concept("one"), "two": Concept("two"), "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"), } sheerka, context, parser = self.init_parser(concepts_map) res = parser.parse(context, ParserInput(text)) wrapper = res.body lexer_nodes = res.body.body assert res.status == expected_status expected_array = compute_expected_array(concepts_map, text, expected) assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert lexer_nodes == expected_array @pytest.mark.parametrize("text, expected_status, expected", [ (" one two ", True, [cnode("one", 1, 1, "one"), cnode("two", 3, 3, "two")]), (" one x$!# ", False, [cnode("one", 1, 1, "one"), utnode(2, 7, " x$!# ")]), (" foo bar x$!# ", False, [cnode("foo bar", 1, 3, "foo bar"), utnode(4, 9, " x$!# ")]), ]) def test_i_can_parse_when_surrounded_by_spaces(self, text, expected_status, expected): concepts_map = { "prefixed": Concept("a prefixed").def_var("a"), "suffixed": Concept("prefixed a").def_var("a"), "infix": Concept("a infix b").def_var("a").def_var("b"), "foo bar": Concept("foo bar"), "one": Concept("one"), "two": Concept("two"), "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"), } sheerka, context, parser = self.init_parser(concepts_map) res = parser.parse(context, ParserInput(text)) wrapper = res.body lexer_nodes = res.body.body assert res.status == expected_status expected_array = compute_expected_array(concepts_map, text, expected) assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert lexer_nodes == expected_array @pytest.mark.parametrize("text, expected", [ ("one two", [["one", "two"], ["one two"]]) ]) def test_i_can_parse_when_multiple_concepts_start_with_the_same_token(self, text, expected): concepts_map = { "one": Concept("one"), "two": Concept("two"), "one two": Concept("one two"), } sheerka, context, parser = self.init_parser(concepts_map) list_of_res = parser.parse(context, ParserInput(text)) assert len(list_of_res) == len(expected) for i, res in enumerate(list_of_res): wrapper = res.body lexer_nodes = res.body.body assert res.status expected_array = compute_expected_array(concepts_map, text, expected[i]) assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert lexer_nodes == expected_array def test_i_can_parse_multiple_concepts_when_long_names_and_unrecognized(self): concepts_map = { "one": Concept("one"), "two": Concept("two"), "one two": Concept("one two"), "one two x$!# one two": Concept("one two x$!# one two"), } text = "one two x$!# one two" sheerka, context, parser = self.init_parser(concepts_map) list_of_res = parser.parse(context, ParserInput(text)) expected = [ (False, ["one", "two", " x$!# ", ("one", 1), ("two", 1)]), (False, ["one", "two", " x$!# ", ("one two", 1)]), (False, ["one two", " x$!# ", ("one", 1), ("two", 1)]), (False, ["one two", " x$!# ", ("one two", 1)]), (True, ["one two x$!# one two"]), ] assert len(list_of_res) == len(expected) for res, expected in zip(list_of_res, expected): wrapper = res.body lexer_nodes = res.body.body assert res.status == expected[0] expected_array = compute_expected_array(concepts_map, text, expected[1]) assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert lexer_nodes == expected_array def test_i_can_parse_concepts_with_isa(self): concepts_map = { "one": Concept("one"), "number": Concept("number"), } sheerka, context, parser = self.init_parser(concepts_map) sheerka.set_isa(context, sheerka.new("one"), sheerka.new("number")) res = parser.parse(context, ParserInput("one")) lexer_nodes = res.body.body expected_array = compute_expected_array(concepts_map, "one", ["one"]) assert lexer_nodes == expected_array def test_i_can_parse_concepts_with_keyword(self): concepts_map = { "a special concept": Concept("a special concept"), "isa": Concept("isa"), } sheerka, context, parser = self.init_parser(concepts_map) res = parser.parse(context, ParserInput("a special concept")) lexer_nodes = res.body.body expected_array = compute_expected_array(concepts_map, "a special concept", ["a special concept"]) assert lexer_nodes == expected_array res = parser.parse(context, ParserInput("isa")) lexer_nodes = res.body.body expected_array = compute_expected_array(concepts_map, "isa", ["isa"]) assert lexer_nodes == expected_array def test_i_can_parse_concepts_when_sub_tokens(self): concepts_map = { "foo": Concept("foo"), "bar": Concept("bar"), } sheerka, context, parser = self.init_parser(concepts_map, create_new=True, use_sheerka=True) text = "not recognized foo bar not not recognized" expected = ["foo", "bar"] parser_input = ParserInput(text, start=3, end=7) res = parser.parse(context, parser_input) wrapper = res.body lexer_nodes = res.body.body assert res.status expected_array = compute_expected_array(concepts_map, text, expected) assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert lexer_nodes == expected_array @pytest.mark.parametrize("text", [ "foo", f"foo one", "x$!#", "twenty one", "1 + 1", "foo x$!#", "1 + 1 twenty one", ]) def test_i_cannot_parse_concepts_with_property_or_bnf_or_unrecognized(self, text): concepts_map = { "foo": Concept("foo a").def_var("a"), "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"), } sheerka, context, parser = self.init_parser(concepts_map) res = parser.parse(context, ParserInput(text)) assert not res.status assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) assert res.body.body == text @pytest.mark.parametrize("text, expected", [ ("hello foo bar", [ (True, [CNC("hello1", source="hello foo ", a="foo "), "bar"]), (True, [CNC("hello2", source="hello foo ", b="foo "), "bar"]), ]), ]) def test_i_can_parse_when_unrecognized_yield_multiple_values(self, text, expected): concepts_map = { "hello1": Concept("hello a").def_var("a"), "hello2": Concept("hello b").def_var("b"), "bar": Concept("bar") } sheerka, context, parser = self.init_parser(concepts_map, create_new=True, singleton=False) list_of_res = parser.parse(context, ParserInput(text)) assert len(list_of_res) == len(expected) for res, expected in zip(list_of_res, expected): wrapper = res.body lexer_nodes = res.body.body assert res.status == expected[0] expected_array = compute_expected_array(concepts_map, text, expected[1]) assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert lexer_nodes == expected_array @pytest.mark.parametrize("text, expected", [ ("1 + twenty one", [SCN("1 + twenty "), "one"]), ("one + twenty one", ["one", SCN(" + twenty "), ("one", 1)]), ]) def test_source_code_found_must_be_considered_as_potential_false_positive(self, text, expected): concepts_map = { "one": Concept("one") } sheerka, context, parser = self.init_parser(concepts_map) res = parser.parse(context, ParserInput(text)) wrapper = res.body lexer_nodes = res.body.body assert not res.status expected_array = compute_expected_array(concepts_map, text, expected) assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert lexer_nodes == expected_array @pytest.mark.parametrize("text, expected_is_evaluated", [ ("foo", False), ("bar", False), ("twenties", True), ("a plus b", True), ("mult", True), ]) def test_concepts_with_variables_must_not_be_evaluated(self, text, expected_is_evaluated): concepts_map = { "foo": Concept("foo"), "bar": Concept("bar", body="'bar'"), "plus": Concept("a plus b").def_var("a").def_var("b"), "mult": Concept("mult", definition="a mult b", definition_type=DEFINITION_TYPE_DEF).def_var("a").def_var( "b"), "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"), } sheerka, context, parser = self.init_parser(concepts_map, create_new=True, use_sheerka=True) res = parser.parse(context, ParserInput(text)) lexer_nodes = res.body.body assert res.status assert lexer_nodes[0].concept.metadata.is_evaluated == expected_is_evaluated