import pytest from core.builtin_concepts import BuiltinConcepts from core.concept import Concept from parsers.AtomNodeParser import AtomNodeParser from parsers.BaseNodeParser import cnode, utnode, CNC, scnode, SCN from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka from tests.parsers.parsers_utils import compute_expected_array class TestAtomsParser(TestUsingMemoryBasedSheerka): def init_parser(self, my_map, create_new=False, singleton=True): sheerka, context, *updated_concepts = self.init_concepts( *my_map.values(), create_new=create_new, singleton=singleton) parser = AtomNodeParser() parser.init_from_concepts(context, updated_concepts) return sheerka, context, parser def test_i_cannot_parse_empty_string(self): sheerka, context, parser = self.init_parser({}) res = parser.parse(context, "") assert not res.status assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY) @pytest.mark.parametrize("text, expected", [ ("foo", ["foo"]), ("foo bar", ["foo", "bar"]), ("foo bar twenties", ["foo", "bar", "twenties"]), ]) def test_i_can_parse_simple_sequences(self, text, expected): concepts_map = { "foo": Concept("foo"), "bar": Concept("bar"), "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"), } sheerka, context, parser = self.init_parser(concepts_map) res = parser.parse(context, text) wrapper = res.body lexer_nodes = res.body.body assert res.status expected_array = compute_expected_array(concepts_map, text, expected) assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert lexer_nodes == expected_array @pytest.mark.parametrize("text, expected", [ ("foo bar", ["foo bar"]), ("one two three", ["one two three"]), ("foo bar twenties one two three", ["foo bar", "twenties", "one two three"]), ]) def test_i_can_parse_long_names(self, text, expected): concepts_map = { "foo bar": Concept("foo bar"), "one two three": Concept("one two three"), "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"), } sheerka, context, parser = self.init_parser(concepts_map) res = parser.parse(context, text) wrapper = res.body lexer_nodes = res.body.body assert res.status expected_array = compute_expected_array(concepts_map, text, expected) assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert lexer_nodes == expected_array @pytest.mark.parametrize("text, expected_status, expected", [ ("foo bar suffixed one", False, ["foo bar", " suffixed ", "one"]), ("foo bar one prefixed", False, ["foo bar", "one", " prefixed"]), ("foo bar one infix two", False, ["foo bar", "one", " infix ", "two"]), ("foo bar 1 + 1", False, ["foo bar", " 1 + 1"]), ("foo bar twenty one", False, ["foo bar", " twenty ", "one"]), ("foo bar x$!#", False, ["foo bar", " x$!#"]), ("suffixed one foo bar", False, ["suffixed ", "one", "foo bar"]), ("one prefixed foo bar", False, ["one", " prefixed ", "foo bar"]), ("one infix two foo bar", False, ["one", " infix ", "two", "foo bar"]), ("1 + 1 foo bar", False, ["1 + 1 ", "foo bar"]), ("twenty one foo bar", False, ["twenty ", "one", "foo bar"]), ("x$!# foo bar", False, ["x$!# ", "foo bar"]), ("func(one)", False, ["func(", "one", ")"]), ]) def test_i_can_parse_when_unrecognized(self, text, expected_status, expected): concepts_map = { "prefixed": Concept("a prefixed").def_var("a"), "suffixed": Concept("prefixed a").def_var("a"), "infix": Concept("a infix b").def_var("a").def_var("b"), "foo bar": Concept("foo bar"), "one": Concept("one"), "two": Concept("two"), "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"), } sheerka, context, parser = self.init_parser(concepts_map) res = parser.parse(context, text) wrapper = res.body lexer_nodes = res.body.body assert res.status == expected_status expected_array = compute_expected_array(concepts_map, text, expected) assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert lexer_nodes == expected_array @pytest.mark.parametrize("text, expected_status, expected", [ (" one two ", True, [cnode("one", 1, 1, "one"), cnode("two", 3, 3, "two")]), (" one x$!# ", False, [cnode("one", 1, 1, "one"), utnode(2, 7, " x$!# ")]), (" foo bar x$!# ", False, [cnode("foo bar", 1, 3, "foo bar"), utnode(4, 9, " x$!# ")]), ]) def test_i_can_parse_when_surrounded_by_spaces(self, text, expected_status, expected): concepts_map = { "prefixed": Concept("a prefixed").def_var("a"), "suffixed": Concept("prefixed a").def_var("a"), "infix": Concept("a infix b").def_var("a").def_var("b"), "foo bar": Concept("foo bar"), "one": Concept("one"), "two": Concept("two"), "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"), } sheerka, context, parser = self.init_parser(concepts_map) res = parser.parse(context, text) wrapper = res.body lexer_nodes = res.body.body assert res.status == expected_status expected_array = compute_expected_array(concepts_map, text, expected) assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert lexer_nodes == expected_array @pytest.mark.parametrize("text, expected", [ ("one two", [["one", "two"], ["one two"]]) ]) def test_i_can_parse_when_multiple_concepts_start_with_the_same_token(self, text, expected): concepts_map = { "one": Concept("one"), "two": Concept("two"), "one two": Concept("one two"), } sheerka, context, parser = self.init_parser(concepts_map) list_of_res = parser.parse(context, text) assert len(list_of_res) == len(expected) for i, res in enumerate(list_of_res): wrapper = res.body lexer_nodes = res.body.body assert res.status expected_array = compute_expected_array(concepts_map, text, expected[i]) assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert lexer_nodes == expected_array def test_i_can_parse_multiple_concepts_when_long_names_and_unrecognized(self): concepts_map = { "one": Concept("one"), "two": Concept("two"), "one two": Concept("one two"), "one two x$!# one two": Concept("one two x$!# one two"), } text = "one two x$!# one two" sheerka, context, parser = self.init_parser(concepts_map) list_of_res = parser.parse(context, text) expected = [ (False, ["one", "two", " x$!# ", ("one", 1), ("two", 1)]), (False, ["one", "two", " x$!# ", ("one two", 1)]), (False, ["one two", " x$!# ", ("one", 1), ("two", 1)]), (False, ["one two", " x$!# ", ("one two", 1)]), (True, ["one two x$!# one two"]), ] assert len(list_of_res) == len(expected) for res, expected in zip(list_of_res, expected): wrapper = res.body lexer_nodes = res.body.body assert res.status == expected[0] expected_array = compute_expected_array(concepts_map, text, expected[1]) assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert lexer_nodes == expected_array @pytest.mark.parametrize("text", [ "foo", f"foo one", "x$!#", "twenty one", "1 + 1", "foo x$!#", "1 + 1 twenty one", ]) def test_i_cannot_parse_concepts_with_property_or_bnf_or_unrecognized(self, text): concepts_map = { "foo": Concept("foo a").def_var("a"), "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"), } sheerka, context, parser = self.init_parser(concepts_map) res = parser.parse(context, text) assert not res.status assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) assert res.body.body == text @pytest.mark.parametrize("text, expected", [ ("hello foo bar", [ (True, [CNC("hello1", source="hello foo ", a="foo "), "bar"]), (True, [CNC("hello2", source="hello foo ", b="foo "), "bar"]), ]), ]) def test_i_can_parse_when_unrecognized_yield_multiple_values(self, text, expected): concepts_map = { "hello1": Concept("hello a").def_var("a"), "hello2": Concept("hello b").def_var("b"), "bar": Concept("bar") } sheerka, context, parser = self.init_parser(concepts_map, create_new=True, singleton=False) list_of_res = parser.parse(context, text) assert len(list_of_res) == len(expected) for res, expected in zip(list_of_res, expected): wrapper = res.body lexer_nodes = res.body.body assert res.status == expected[0] expected_array = compute_expected_array(concepts_map, text, expected[1]) assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert lexer_nodes == expected_array @pytest.mark.parametrize("text, expected", [ ("1 + twenty one", [SCN("1 + twenty "), "one"]), ("one + twenty one", ["one", SCN(" + twenty "), ("one", 1)]), ]) def test_source_code_found_must_be_considered_as_potential_false_positive(self, text, expected): concepts_map = { "one": Concept("one") } sheerka, context, parser = self.init_parser(concepts_map) res = parser.parse(context, text) wrapper = res.body lexer_nodes = res.body.body assert not res.status expected_array = compute_expected_array(concepts_map, text, expected) assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert lexer_nodes == expected_array