Sheerka-Old/tests/parsers/test_AtomsParser.py

import pytest
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from parsers.AtomNodeParser import AtomNodeParser
from parsers.BaseNodeParser import cnode, utnode, CNC, scnode, SCN

from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
from tests.parsers.parsers_utils import compute_expected_array


class TestAtomsParser(TestUsingMemoryBasedSheerka):
    def init_parser(self, my_map, create_new=False, singleton=True):
        sheerka, context, *updated_concepts = self.init_concepts(
            *my_map.values(),
            create_new=create_new,
            singleton=singleton)

        parser = AtomNodeParser()
        parser.init_from_concepts(context, updated_concepts)

        return sheerka, context, parser

    def test_i_cannot_parse_empty_string(self):
        sheerka, context, parser = self.init_parser({})

        res = parser.parse(context, "")

        assert not res.status
        assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY)

    @pytest.mark.parametrize("text, expected", [
        ("foo", ["foo"]),
        ("foo bar", ["foo", "bar"]),
        ("foo bar twenties", ["foo", "bar", "twenties"]),
    ])
    def test_i_can_parse_simple_sequences(self, text, expected):
        concepts_map = {
            "foo": Concept("foo"),
            "bar": Concept("bar"),
            "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"),
        }

        sheerka, context, parser = self.init_parser(concepts_map)
        res = parser.parse(context, text)
        wrapper = res.body
        lexer_nodes = res.body.body

        assert res.status

        expected_array = compute_expected_array(concepts_map, text, expected)
        assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
        assert lexer_nodes == expected_array

    @pytest.mark.parametrize("text, expected", [
        ("foo bar", ["foo bar"]),
        ("one two three", ["one two three"]),
        ("foo bar twenties one two three", ["foo bar", "twenties", "one two three"]),
    ])
    def test_i_can_parse_long_names(self, text, expected):
        concepts_map = {
            "foo bar": Concept("foo bar"),
            "one two three": Concept("one two three"),
            "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"),
        }

        sheerka, context, parser = self.init_parser(concepts_map)
        res = parser.parse(context, text)
        wrapper = res.body
        lexer_nodes = res.body.body

        assert res.status

        expected_array = compute_expected_array(concepts_map, text, expected)
        assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
        assert lexer_nodes == expected_array

    @pytest.mark.parametrize("text, expected_status, expected", [
        ("foo bar suffixed one", False, ["foo bar", " suffixed ", "one"]),
        ("foo bar one prefixed", False, ["foo bar", "one", " prefixed"]),
        ("foo bar one infix two", False, ["foo bar", "one", " infix ", "two"]),
        ("foo bar 1 + 1", False, ["foo bar", " 1 + 1"]),
        ("foo bar twenty one", False, ["foo bar", " twenty ", "one"]),
        ("foo bar x$!#", False, ["foo bar", " x$!#"]),

        ("suffixed one foo bar", False, ["suffixed ", "one", "foo bar"]),
        ("one prefixed foo bar", False, ["one", " prefixed ", "foo bar"]),
        ("one infix two foo bar", False, ["one", " infix ", "two", "foo bar"]),
        ("1 + 1 foo bar", False, ["1 + 1 ", "foo bar"]),
        ("twenty one foo bar", False, ["twenty ", "one", "foo bar"]),
        ("x$!# foo bar", False, ["x$!# ", "foo bar"]),
        ("func(one)", False, ["func(", "one", ")"]),
    ])
    def test_i_can_parse_when_unrecognized(self, text, expected_status, expected):
        concepts_map = {
            "prefixed": Concept("a prefixed").def_var("a"),
            "suffixed": Concept("prefixed a").def_var("a"),
            "infix": Concept("a infix b").def_var("a").def_var("b"),
            "foo bar": Concept("foo bar"),
            "one": Concept("one"),
            "two": Concept("two"),
            "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"),
        }

        sheerka, context, parser = self.init_parser(concepts_map)
        res = parser.parse(context, text)
        wrapper = res.body
        lexer_nodes = res.body.body

        assert res.status == expected_status

        expected_array = compute_expected_array(concepts_map, text, expected)
        assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
        assert lexer_nodes == expected_array

    @pytest.mark.parametrize("text, expected_status, expected", [
        (" one two ", True, [cnode("one", 1, 1, "one"), cnode("two", 3, 3, "two")]),
        (" one x$!# ", False, [cnode("one", 1, 1, "one"), utnode(2, 7, " x$!# ")]),
        (" foo bar x$!# ", False, [cnode("foo bar", 1, 3, "foo bar"), utnode(4, 9, " x$!# ")]),
    ])
    def test_i_can_parse_when_surrounded_by_spaces(self, text, expected_status, expected):
        concepts_map = {
            "prefixed": Concept("a prefixed").def_var("a"),
            "suffixed": Concept("prefixed a").def_var("a"),
            "infix": Concept("a infix b").def_var("a").def_var("b"),
            "foo bar": Concept("foo bar"),
            "one": Concept("one"),
            "two": Concept("two"),
            "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"),
        }

        sheerka, context, parser = self.init_parser(concepts_map)
        res = parser.parse(context, text)
        wrapper = res.body
        lexer_nodes = res.body.body

        assert res.status == expected_status

        expected_array = compute_expected_array(concepts_map, text, expected)
        assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
        assert lexer_nodes == expected_array

    @pytest.mark.parametrize("text, expected", [
        ("one two", [["one", "two"], ["one two"]])
    ])
    def test_i_can_parse_when_multiple_concepts_start_with_the_same_token(self, text, expected):
        concepts_map = {
            "one": Concept("one"),
            "two": Concept("two"),
            "one two": Concept("one two"),
        }

        sheerka, context, parser = self.init_parser(concepts_map)
        list_of_res = parser.parse(context, text)
        assert len(list_of_res) == len(expected)

        for i, res in enumerate(list_of_res):
            wrapper = res.body
            lexer_nodes = res.body.body

            assert res.status
            expected_array = compute_expected_array(concepts_map, text, expected[i])
            assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
            assert lexer_nodes == expected_array

    def test_i_can_parse_multiple_concepts_when_long_names_and_unrecognized(self):
        concepts_map = {
            "one": Concept("one"),
            "two": Concept("two"),
            "one two": Concept("one two"),
            "one two x$!# one two": Concept("one two x$!# one two"),
        }

        text = "one two x$!# one two"
        sheerka, context, parser = self.init_parser(concepts_map)
        list_of_res = parser.parse(context, text)

        expected = [
            (False, ["one", "two", " x$!# ", ("one", 1), ("two", 1)]),
            (False, ["one", "two", " x$!# ", ("one two", 1)]),
            (False, ["one two", " x$!# ", ("one", 1), ("two", 1)]),
            (False, ["one two", " x$!# ", ("one two", 1)]),
            (True, ["one two x$!# one two"]),
        ]

        assert len(list_of_res) == len(expected)

        for res, expected in zip(list_of_res, expected):
            wrapper = res.body
            lexer_nodes = res.body.body

            assert res.status == expected[0]
            expected_array = compute_expected_array(concepts_map, text, expected[1])
            assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
            assert lexer_nodes == expected_array

    @pytest.mark.parametrize("text", [
        "foo",
        f"foo one",
        "x$!#",
        "twenty one",
        "1 + 1",
        "foo x$!#",
        "1 + 1 twenty one",
    ])
    def test_i_cannot_parse_concepts_with_property_or_bnf_or_unrecognized(self, text):
        concepts_map = {
            "foo": Concept("foo a").def_var("a"),
            "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"),
        }

        sheerka, context, parser = self.init_parser(concepts_map)
        res = parser.parse(context, text)

        assert not res.status
        assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
        assert res.body.body == text

    @pytest.mark.parametrize("text, expected", [
        ("hello foo bar",
         [
             (True, [CNC("hello1", source="hello foo ", a="foo "), "bar"]),
             (True, [CNC("hello2", source="hello foo ", b="foo "), "bar"]),
         ]),
    ])
    def test_i_can_parse_when_unrecognized_yield_multiple_values(self, text, expected):
        concepts_map = {
            "hello1": Concept("hello a").def_var("a"),
            "hello2": Concept("hello b").def_var("b"),
            "bar": Concept("bar")
        }

        sheerka, context, parser = self.init_parser(concepts_map, create_new=True, singleton=False)
        list_of_res = parser.parse(context, text)

        assert len(list_of_res) == len(expected)

        for res, expected in zip(list_of_res, expected):
            wrapper = res.body
            lexer_nodes = res.body.body

            assert res.status == expected[0]
            expected_array = compute_expected_array(concepts_map, text, expected[1])
            assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
            assert lexer_nodes == expected_array

    @pytest.mark.parametrize("text, expected", [
        ("1 + twenty one", [SCN("1 + twenty "), "one"]),
        ("one + twenty one", ["one", SCN(" + twenty "), ("one", 1)]),
    ])
    def test_source_code_found_must_be_considered_as_potential_false_positive(self, text, expected):
        concepts_map = {
            "one": Concept("one")
        }

        sheerka, context, parser = self.init_parser(concepts_map)
        res = parser.parse(context, text)
        wrapper = res.body
        lexer_nodes = res.body.body

        assert not res.status

        expected_array = compute_expected_array(concepts_map, text, expected)
        assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
        assert lexer_nodes == expected_array