384 lines
15 KiB
Python
384 lines
15 KiB
Python
import pytest
|
|
from core.builtin_concepts import BuiltinConcepts
|
|
from core.concept import Concept, DEFINITION_TYPE_DEF
|
|
from core.sheerka.services.SheerkaExecute import ParserInput
|
|
from parsers.SequenceNodeParser import SequenceNodeParser
|
|
from parsers.BaseNodeParser import cnode, utnode, CNC, SCN, CN
|
|
|
|
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
|
from tests.parsers.parsers_utils import compute_expected_array
|
|
|
|
|
|
class TestAtomsParser(TestUsingMemoryBasedSheerka):
|
|
def init_parser(self, my_map, create_new=False, singleton=True, use_sheerka=False):
|
|
sheerka, context, *updated_concepts = self.init_test().with_concepts(
|
|
*my_map.values(),
|
|
create_new=create_new).unpack()
|
|
|
|
if use_sheerka:
|
|
parser = SequenceNodeParser(sheerka=sheerka)
|
|
else:
|
|
parser = SequenceNodeParser()
|
|
parser.init_from_concepts(context, updated_concepts)
|
|
|
|
return sheerka, context, parser
|
|
|
|
def test_i_cannot_parse_empty_string(self):
|
|
sheerka, context, parser = self.init_parser({})
|
|
|
|
res = parser.parse(context, ParserInput(""))
|
|
|
|
assert not res.status
|
|
assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY)
|
|
|
|
@pytest.mark.parametrize("text, expected", [
|
|
("foo", ["foo"]),
|
|
("c:foo:", [CN("foo", source="c:foo:")]),
|
|
("c:|1001:", [CN("foo", source="c:|1001:")]),
|
|
(" foo", ["foo"]),
|
|
("foo ", ["foo"]),
|
|
(" foo ", ["foo"]),
|
|
("foo bar", ["foo", "bar"]),
|
|
("foo bar twenties", ["foo", "bar", "twenties"]),
|
|
("a plus b", [CN("plus", 0, 4)]),
|
|
("mult", [CN("mult", 0, 0, "mult")]),
|
|
])
|
|
def test_i_can_parse_simple_sequences(self, text, expected):
|
|
concepts_map = {
|
|
"foo": Concept("foo"),
|
|
"bar": Concept("bar"),
|
|
"plus": Concept("a plus b").def_var("a").def_var("b"),
|
|
"mult": Concept("mult", definition="a mult b", definition_type=DEFINITION_TYPE_DEF).def_var("a").def_var(
|
|
"b"),
|
|
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"),
|
|
}
|
|
|
|
sheerka, context, parser = self.init_parser(concepts_map, create_new=True, use_sheerka=True)
|
|
res = parser.parse(context, ParserInput(text))
|
|
wrapper = res.body
|
|
lexer_nodes = res.body.body
|
|
|
|
assert res.status
|
|
|
|
expected_array = compute_expected_array(concepts_map, text, expected)
|
|
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
assert lexer_nodes == expected_array
|
|
|
|
@pytest.mark.parametrize("text, expected", [
|
|
("foo bar", ["foo bar"]),
|
|
("one two three", ["one two three"]),
|
|
("foo bar twenties one two three", ["foo bar", "twenties", "one two three"]),
|
|
])
|
|
def test_i_can_parse_long_names(self, text, expected):
|
|
concepts_map = {
|
|
"foo bar": Concept("foo bar"),
|
|
"one two three": Concept("one two three"),
|
|
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"),
|
|
}
|
|
|
|
sheerka, context, parser = self.init_parser(concepts_map)
|
|
res = parser.parse(context, ParserInput(text))
|
|
wrapper = res.body
|
|
lexer_nodes = res.body.body
|
|
|
|
assert res.status
|
|
|
|
expected_array = compute_expected_array(concepts_map, text, expected)
|
|
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
assert lexer_nodes == expected_array
|
|
|
|
@pytest.mark.parametrize("text, expected_status, expected", [
|
|
("foo bar suffixed one", False, ["foo bar", " suffixed ", "one"]),
|
|
("foo bar one prefixed", False, ["foo bar", "one", " prefixed"]),
|
|
("foo bar one infix two", False, ["foo bar", "one", " infix ", "two"]),
|
|
("foo bar 1 + 1", False, ["foo bar", " 1 + 1"]),
|
|
("foo bar twenty one", False, ["foo bar", " twenty ", "one"]),
|
|
("foo bar x$!#", False, ["foo bar", " x$!#"]),
|
|
|
|
("suffixed one foo bar", False, ["suffixed ", "one", "foo bar"]),
|
|
("one prefixed foo bar", False, ["one", " prefixed ", "foo bar"]),
|
|
("one infix two foo bar", False, ["one", " infix ", "two", "foo bar"]),
|
|
("1 + 1 foo bar", False, ["1 + 1 ", "foo bar"]),
|
|
("twenty one foo bar", False, ["twenty ", "one", "foo bar"]),
|
|
("x$!# foo bar", False, ["x$!# ", "foo bar"]),
|
|
("func(one)", False, ["func(", "one", ")"]),
|
|
])
|
|
def test_i_can_parse_when_unrecognized(self, text, expected_status, expected):
|
|
concepts_map = {
|
|
"prefixed": Concept("a prefixed").def_var("a"),
|
|
"suffixed": Concept("prefixed a").def_var("a"),
|
|
"infix": Concept("a infix b").def_var("a").def_var("b"),
|
|
"foo bar": Concept("foo bar"),
|
|
"one": Concept("one"),
|
|
"two": Concept("two"),
|
|
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"),
|
|
}
|
|
|
|
sheerka, context, parser = self.init_parser(concepts_map)
|
|
res = parser.parse(context, ParserInput(text))
|
|
wrapper = res.body
|
|
lexer_nodes = res.body.body
|
|
|
|
assert res.status == expected_status
|
|
|
|
expected_array = compute_expected_array(concepts_map, text, expected)
|
|
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
assert lexer_nodes == expected_array
|
|
|
|
@pytest.mark.parametrize("text, expected_status, expected", [
|
|
(" one two ", True, [cnode("one", 1, 1, "one"), cnode("two", 3, 3, "two")]),
|
|
(" one x$!# ", False, [cnode("one", 1, 1, "one"), utnode(2, 7, " x$!# ")]),
|
|
(" foo bar x$!# ", False, [cnode("foo bar", 1, 3, "foo bar"), utnode(4, 9, " x$!# ")]),
|
|
])
|
|
def test_i_can_parse_when_surrounded_by_spaces(self, text, expected_status, expected):
|
|
concepts_map = {
|
|
"prefixed": Concept("a prefixed").def_var("a"),
|
|
"suffixed": Concept("prefixed a").def_var("a"),
|
|
"infix": Concept("a infix b").def_var("a").def_var("b"),
|
|
"foo bar": Concept("foo bar"),
|
|
"one": Concept("one"),
|
|
"two": Concept("two"),
|
|
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"),
|
|
}
|
|
|
|
sheerka, context, parser = self.init_parser(concepts_map)
|
|
res = parser.parse(context, ParserInput(text))
|
|
wrapper = res.body
|
|
lexer_nodes = res.body.body
|
|
|
|
assert res.status == expected_status
|
|
|
|
expected_array = compute_expected_array(concepts_map, text, expected)
|
|
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
assert lexer_nodes == expected_array
|
|
|
|
@pytest.mark.parametrize("text, expected", [
|
|
("one two", [["one", "two"], ["one two"]])
|
|
])
|
|
def test_i_can_parse_when_multiple_concepts_start_with_the_same_token(self, text, expected):
|
|
concepts_map = {
|
|
"one": Concept("one"),
|
|
"two": Concept("two"),
|
|
"one two": Concept("one two"),
|
|
}
|
|
|
|
sheerka, context, parser = self.init_parser(concepts_map)
|
|
list_of_res = parser.parse(context, ParserInput(text))
|
|
assert len(list_of_res) == len(expected)
|
|
|
|
for i, res in enumerate(list_of_res):
|
|
wrapper = res.body
|
|
lexer_nodes = res.body.body
|
|
|
|
assert res.status
|
|
expected_array = compute_expected_array(concepts_map, text, expected[i])
|
|
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
assert lexer_nodes == expected_array
|
|
|
|
def test_i_can_parse_multiple_concepts_when_long_names_and_unrecognized(self):
|
|
concepts_map = {
|
|
"one": Concept("one"),
|
|
"two": Concept("two"),
|
|
"one two": Concept("one two"),
|
|
"one two x$!# one two": Concept("one two x$!# one two"),
|
|
}
|
|
|
|
text = "one two x$!# one two"
|
|
sheerka, context, parser = self.init_parser(concepts_map)
|
|
list_of_res = parser.parse(context, ParserInput(text))
|
|
|
|
expected = [
|
|
(False, ["one", "two", " x$!# ", ("one", 1), ("two", 1)]),
|
|
(False, ["one", "two", " x$!# ", ("one two", 1)]),
|
|
(False, ["one two", " x$!# ", ("one", 1), ("two", 1)]),
|
|
(False, ["one two", " x$!# ", ("one two", 1)]),
|
|
(True, ["one two x$!# one two"]),
|
|
]
|
|
|
|
assert len(list_of_res) == len(expected)
|
|
|
|
for res, expected in zip(list_of_res, expected):
|
|
wrapper = res.body
|
|
lexer_nodes = res.body.body
|
|
|
|
assert res.status == expected[0]
|
|
expected_array = compute_expected_array(concepts_map, text, expected[1])
|
|
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
assert lexer_nodes == expected_array
|
|
|
|
def test_i_can_parse_concepts_with_isa(self):
|
|
concepts_map = {
|
|
"one": Concept("one"),
|
|
"number": Concept("number"),
|
|
}
|
|
|
|
sheerka, context, parser = self.init_parser(concepts_map)
|
|
sheerka.set_isa(context, sheerka.new("one"), sheerka.new("number"))
|
|
|
|
res = parser.parse(context, ParserInput("one"))
|
|
lexer_nodes = res.body.body
|
|
expected_array = compute_expected_array(concepts_map, "one", ["one"])
|
|
assert lexer_nodes == expected_array
|
|
|
|
def test_i_can_parse_concepts_with_keyword(self):
|
|
concepts_map = {
|
|
"a special concept": Concept("a special concept"),
|
|
"isa": Concept("isa"),
|
|
}
|
|
|
|
sheerka, context, parser = self.init_parser(concepts_map)
|
|
|
|
res = parser.parse(context, ParserInput("a special concept"))
|
|
lexer_nodes = res.body.body
|
|
expected_array = compute_expected_array(concepts_map, "a special concept", ["a special concept"])
|
|
assert lexer_nodes == expected_array
|
|
|
|
res = parser.parse(context, ParserInput("isa"))
|
|
lexer_nodes = res.body.body
|
|
expected_array = compute_expected_array(concepts_map, "isa", ["isa"])
|
|
assert lexer_nodes == expected_array
|
|
|
|
def test_i_can_parse_concepts_when_sub_tokens(self):
|
|
concepts_map = {
|
|
"foo": Concept("foo"),
|
|
"bar": Concept("bar"),
|
|
}
|
|
|
|
sheerka, context, parser = self.init_parser(concepts_map, create_new=True, use_sheerka=True)
|
|
text = "not recognized foo bar not not recognized"
|
|
expected = ["foo", "bar"]
|
|
parser_input = ParserInput(text, start=3, end=7)
|
|
res = parser.parse(context, parser_input)
|
|
wrapper = res.body
|
|
lexer_nodes = res.body.body
|
|
|
|
assert res.status
|
|
|
|
expected_array = compute_expected_array(concepts_map, text, expected)
|
|
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
assert lexer_nodes == expected_array
|
|
|
|
@pytest.mark.parametrize("text", [
|
|
"foo",
|
|
f"foo one",
|
|
"x$!#",
|
|
"twenty one",
|
|
"1 + 1",
|
|
"foo x$!#",
|
|
"1 + 1 twenty one",
|
|
])
|
|
def test_i_cannot_parse_concepts_with_property_or_bnf_or_unrecognized(self, text):
|
|
concepts_map = {
|
|
"foo": Concept("foo a").def_var("a"),
|
|
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"),
|
|
}
|
|
|
|
sheerka, context, parser = self.init_parser(concepts_map)
|
|
res = parser.parse(context, ParserInput(text))
|
|
|
|
assert not res.status
|
|
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
|
|
assert res.body.body == text
|
|
|
|
@pytest.mark.parametrize("text, expected", [
|
|
("hello foo bar",
|
|
[
|
|
(True, [CNC("hello1", source="hello foo ", a="foo "), "bar"]),
|
|
(True, [CNC("hello2", source="hello foo ", b="foo "), "bar"]),
|
|
]),
|
|
])
|
|
def test_i_can_parse_when_unrecognized_yield_multiple_values(self, text, expected):
|
|
concepts_map = {
|
|
"hello1": Concept("hello a").def_var("a"),
|
|
"hello2": Concept("hello b").def_var("b"),
|
|
"bar": Concept("bar")
|
|
}
|
|
|
|
sheerka, context, parser = self.init_parser(concepts_map, create_new=True, singleton=False)
|
|
list_of_res = parser.parse(context, ParserInput(text))
|
|
|
|
assert len(list_of_res) == len(expected)
|
|
|
|
for res, expected in zip(list_of_res, expected):
|
|
wrapper = res.body
|
|
lexer_nodes = res.body.body
|
|
|
|
assert res.status == expected[0]
|
|
expected_array = compute_expected_array(concepts_map, text, expected[1])
|
|
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
assert lexer_nodes == expected_array
|
|
|
|
@pytest.mark.parametrize("text, expected", [
|
|
("1 + twenty one", [SCN("1 + twenty "), "one"]),
|
|
("one + twenty one", ["one", SCN(" + twenty "), ("one", 1)]),
|
|
])
|
|
def test_source_code_found_must_be_considered_as_potential_false_positive(self, text, expected):
|
|
concepts_map = {
|
|
"one": Concept("one")
|
|
}
|
|
|
|
sheerka, context, parser = self.init_parser(concepts_map)
|
|
res = parser.parse(context, ParserInput(text))
|
|
wrapper = res.body
|
|
lexer_nodes = res.body.body
|
|
|
|
assert not res.status
|
|
|
|
expected_array = compute_expected_array(concepts_map, text, expected)
|
|
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
assert lexer_nodes == expected_array
|
|
|
|
@pytest.mark.parametrize("text, expected_is_evaluated", [
|
|
("foo", False),
|
|
("bar", False),
|
|
("twenties", True),
|
|
("a plus b", True),
|
|
("mult", True),
|
|
])
|
|
def test_concepts_with_variables_must_not_be_evaluated(self, text, expected_is_evaluated):
|
|
concepts_map = {
|
|
"foo": Concept("foo"),
|
|
"bar": Concept("bar", body="'bar'"),
|
|
"plus": Concept("a plus b").def_var("a").def_var("b"),
|
|
"mult": Concept("mult", definition="a mult b", definition_type=DEFINITION_TYPE_DEF).def_var("a").def_var(
|
|
"b"),
|
|
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"),
|
|
}
|
|
|
|
sheerka, context, parser = self.init_parser(concepts_map, create_new=True, use_sheerka=True)
|
|
res = parser.parse(context, ParserInput(text))
|
|
lexer_nodes = res.body.body
|
|
|
|
assert res.status
|
|
assert lexer_nodes[0].concept.get_metadata().is_evaluated == expected_is_evaluated
|
|
|
|
def test_the_parser_always_return_a_new_instance_of_the_concept(self):
|
|
concepts_map = {
|
|
"foo": Concept("foo"),
|
|
}
|
|
|
|
sheerka, context, parser = self.init_parser(concepts_map, create_new=True, use_sheerka=True)
|
|
res = parser.parse(context, ParserInput("foo"))
|
|
|
|
assert res.status
|
|
assert id(res.body.body[0].concept) != id(sheerka.get_by_name("foo"))
|
|
|
|
def test_i_can_only_parse_when_the_name_is_an_identifier(self):
|
|
# to prove that I can distinguish string from actual concept name
|
|
concepts_map = {
|
|
"foo": Concept("foo"),
|
|
}
|
|
|
|
sheerka, context, parser = self.init_parser(concepts_map, create_new=True, use_sheerka=True)
|
|
res = parser.parse(context, ParserInput("'foo'"))
|
|
|
|
assert not res.status
|
|
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
|
|
|
|
def test_i_do_not_parse_rules_instead_of_concepts(self):
|
|
sheerka, context, parser = self.init_parser({})
|
|
res = parser.parse(context, ParserInput("r:|20:"))
|
|
|
|
assert not res.status
|
|
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
|