459 lines
20 KiB
Python
459 lines
20 KiB
Python
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
|
|
from core.concept import Concept, CC
|
|
from core.tokenizer import Tokenizer, TokenKind
|
|
from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, scnode, cnode, \
|
|
utnode, SyaAssociativity, CN, CNC, UTN, SourceCodeWithConceptNode, SCWC, SourceCodeNode
|
|
from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser
|
|
|
|
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
|
from tests.parsers.parsers_utils import compute_expected_array, get_node
|
|
|
|
|
|
def get_input_nodes_from(my_concepts_map, full_expr, *args):
|
|
def _get_real_node(n):
|
|
if isinstance(n, CC):
|
|
concept = n.concept or Concept.update_from(my_concepts_map[n.concept_key])
|
|
for k, v in n.compiled.items():
|
|
concept.compiled[k] = _get_real_node(v)
|
|
return concept
|
|
|
|
if isinstance(n, (utnode, UTN)):
|
|
return UnrecognizedTokensNode(n.start, n.end, full_expr_as_tokens[n.start: n.end + 1])
|
|
|
|
if isinstance(n, (CNC, CN, cnode)):
|
|
concept = n.concept if hasattr(n, "concept") and n.concept else \
|
|
Concept().update_from(my_concepts_map[n.concept_key])
|
|
tokens = full_expr_as_tokens[n.start: n.end + 1]
|
|
if hasattr(n, "compiled"):
|
|
for k, v in n.compiled.items():
|
|
concept.compiled[k] = _get_real_node(v)
|
|
return ConceptNode(concept, n.start, n.end, tokens)
|
|
|
|
if isinstance(n, SCWC):
|
|
n.first = _get_real_node(n.first)
|
|
n.last = _get_real_node(n.first)
|
|
n.content = tuple(_get_real_node(nn) for nn in n.content)
|
|
return SourceCodeWithConceptNode(n.first, n.last, list(n.content))
|
|
|
|
if isinstance(n, (UnrecognizedTokensNode, ConceptNode, SourceCodeNode, SourceCodeWithConceptNode)):
|
|
return n
|
|
|
|
raise NotImplementedError()
|
|
|
|
res = []
|
|
full_expr_as_tokens = list(Tokenizer(full_expr))
|
|
tokens_for_get_node = [token.value for token in full_expr_as_tokens if token.type != TokenKind.EOF]
|
|
for arg in args:
|
|
node = get_node(my_concepts_map, tokens_for_get_node, arg)
|
|
res.append(_get_real_node(node))
|
|
|
|
return res
|
|
|
|
|
|
concepts_map = {
|
|
"5params": Concept("5params").def_var("a").def_var("b").def_var("c").def_var("d").def_var("e"),
|
|
"plus": Concept("a plus b", body="a + b").def_var("a").def_var("b"),
|
|
"mult": Concept("a mult b", body="a * b").def_var("a").def_var("b"),
|
|
"one": Concept("one", body="1"),
|
|
"two": Concept("two", body="2"),
|
|
"three": Concept("three", body="3"),
|
|
"twenties": Concept("twenties", definition="'twenty' (one|two)=unit", body="20 + unit").def_var("unit"),
|
|
"hello_atom": Concept("hello one"),
|
|
"hello_sya": Concept("hello a").def_var("a"),
|
|
"greetings_a": Concept("greetings a").def_var("a"),
|
|
"greetings_b": Concept("greetings b").def_var("b"),
|
|
|
|
}
|
|
|
|
|
|
class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka):
|
|
sheerka = None
|
|
|
|
@classmethod
|
|
def setup_class(cls):
|
|
t = TestUnrecognizedNodeParser()
|
|
TestUnrecognizedNodeParser.sheerka, context, _ = t.init_parser(concepts_map, create_new=True)
|
|
TestUnrecognizedNodeParser.sheerka.force_sya_def(context, [
|
|
(concepts_map["mult"].id, 20, SyaAssociativity.Right),
|
|
(concepts_map["plus"].id, 10, SyaAssociativity.Right),
|
|
])
|
|
|
|
def init_parser(self, my_concepts_map=None, **kwargs):
|
|
if my_concepts_map:
|
|
sheerka, context, *updated_concepts = self.init_concepts(*my_concepts_map.values(), **kwargs)
|
|
for i, pair in enumerate(my_concepts_map):
|
|
my_concepts_map[pair] = updated_concepts[i]
|
|
else:
|
|
sheerka = TestUnrecognizedNodeParser.sheerka
|
|
context = self.get_context(sheerka)
|
|
|
|
parser = UnrecognizedNodeParser()
|
|
return sheerka, context, parser
|
|
|
|
def test_i_can_validate_a_valid_concept_node(self):
|
|
sheerka, context, parser = self.init_parser()
|
|
node = get_input_nodes_from(concepts_map, "one", "one")[0]
|
|
|
|
res = UnrecognizedNodeParser().validate_concept_node(context, node)
|
|
|
|
assert res.status
|
|
assert res.body.concept == concepts_map["one"]
|
|
|
|
def test_i_can_validate_concept_unrecognized_tokens(self):
|
|
sheerka, context, parser = self.init_parser()
|
|
node = get_input_nodes_from(
|
|
concepts_map,
|
|
"5params one two three twenty one 1 + 2 one plus two mult three",
|
|
CNC("5params",
|
|
a=" one ",
|
|
b=" two three ",
|
|
c=" twenty one ",
|
|
d=utnode(12, 18, " 1 + 2 "),
|
|
e=" one plus two mult three"))[0]
|
|
|
|
res = UnrecognizedNodeParser().validate_concept_node(context, node)
|
|
|
|
assert res.status
|
|
|
|
concept = res.body.concept
|
|
assert concept == concepts_map["5params"]
|
|
|
|
assert len(concept.compiled["a"]) == 1
|
|
assert sheerka.isinstance(concept.compiled["a"][0], BuiltinConcepts.RETURN_VALUE)
|
|
assert concept.compiled["a"][0].status
|
|
assert concept.compiled["a"][0].who == "parsers.AtomNode"
|
|
assert concept.compiled["a"][0].body.body == [cnode("one", 1, 1, "one")]
|
|
|
|
assert len(concept.compiled["b"]) == 1
|
|
assert sheerka.isinstance(concept.compiled["b"][0], BuiltinConcepts.RETURN_VALUE)
|
|
assert concept.compiled["b"][0].status
|
|
assert concept.compiled["b"][0].who == "parsers.AtomNode"
|
|
assert concept.compiled["b"][0].body.body == [cnode("two", 1, 1, "two"), cnode("three", 3, 3, "three")]
|
|
|
|
assert len(concept.compiled["c"]) == 1
|
|
assert sheerka.isinstance(concept.compiled["c"][0], BuiltinConcepts.RETURN_VALUE)
|
|
assert concept.compiled["c"][0].status
|
|
assert concept.compiled["c"][0].who == "parsers.BnfNode"
|
|
expected_nodes = compute_expected_array(
|
|
concepts_map,
|
|
" twenty one ",
|
|
[CNC("twenties", source="twenty one", unit="one")])
|
|
assert concept.compiled["c"][0].body.body == expected_nodes
|
|
|
|
assert len(concept.compiled["d"]) == 1
|
|
assert sheerka.isinstance(concept.compiled["d"][0], BuiltinConcepts.RETURN_VALUE)
|
|
assert concept.compiled["d"][0].status
|
|
assert concept.compiled["d"][0].who == "parsers.Python"
|
|
assert concept.compiled["d"][0].body.source == " 1 + 2 "
|
|
|
|
assert len(concept.compiled["e"]) == 1
|
|
assert sheerka.isinstance(concept.compiled["e"][0], BuiltinConcepts.RETURN_VALUE)
|
|
assert concept.compiled["e"][0].status
|
|
assert concept.compiled["e"][0].who == "parsers.SyaNode"
|
|
expected_nodes = compute_expected_array(
|
|
concepts_map,
|
|
" one plus two mult three ",
|
|
[CNC("plus", a="one", b=CC("mult", a="two", b="three"))],
|
|
exclude_body=True)
|
|
|
|
assert concept.compiled["e"][0].body.body == expected_nodes
|
|
|
|
# # sanity check, I can evaluate the concept
|
|
# evaluated = sheerka.evaluate_concept(self.get_context(sheerka, eval_body=True), concept)
|
|
# assert evaluated.key == concept.key
|
|
# assert evaluated.get_value("a") ==
|
|
|
|
def test_i_can_validate_with_recursion(self):
|
|
sheerka, context, parser = self.init_parser()
|
|
|
|
node = get_input_nodes_from(
|
|
concepts_map,
|
|
"1 plus 2 mult twenty two",
|
|
CNC("plus",
|
|
a="1 ",
|
|
b=CC("mult", a=" 2 ", b=" twenty two")))[0]
|
|
|
|
res = UnrecognizedNodeParser().validate_concept_node(context, node)
|
|
|
|
assert res.status
|
|
assert res.body.concept == concepts_map["plus"]
|
|
assert len(res.body.concept.compiled["a"]) == 1
|
|
assert res.body.concept.compiled["a"][0].status
|
|
assert res.body.concept.compiled["a"][0].who == "parsers.Python"
|
|
assert res.body.concept.compiled["a"][0].body.source == "1 "
|
|
|
|
assert res.body.concept.compiled["b"] == concepts_map["mult"]
|
|
assert sheerka.isinstance(res.body.concept.compiled["b"].compiled["a"][0], BuiltinConcepts.RETURN_VALUE)
|
|
assert res.body.concept.compiled["b"].compiled["a"][0].status
|
|
assert res.body.concept.compiled["b"].compiled["a"][0].who == "parsers.Python"
|
|
assert res.body.concept.compiled["b"].compiled["a"][0].body.source == " 2 "
|
|
|
|
assert sheerka.isinstance(res.body.concept.compiled["b"].compiled["b"][0], BuiltinConcepts.RETURN_VALUE)
|
|
assert res.body.concept.compiled["b"].compiled["b"][0].status
|
|
assert res.body.concept.compiled["b"].compiled["b"][0].who == "parsers.BnfNode"
|
|
expected_nodes = compute_expected_array(
|
|
concepts_map,
|
|
" twenty two",
|
|
[CNC("twenties", source="twenty two", unit="two")])
|
|
assert res.body.concept.compiled["b"].compiled["b"][0].body.body == expected_nodes
|
|
|
|
# def test_i_can_validate_and_evaluate_a_concept_node_with_python(self):
|
|
# sheerka, context, parser = self.init_parser()
|
|
#
|
|
# node = get_input_nodes_from(
|
|
# concepts_map,
|
|
# "one plus 1 + 1",
|
|
# CNC("plus",
|
|
# a=UTN("one "),
|
|
# b=UTN("1 + 1")))[0]
|
|
#
|
|
# res = UnrecognizedNodeParser().validate_concept_node(context, node)
|
|
#
|
|
# assert res.status
|
|
# assert res.body.concept == concepts_map["plus"]
|
|
# assert res.body.concept.compiled["a"] == concepts_map["one"]
|
|
# assert len(res.body.concept.compiled["b"]) == 1
|
|
# assert sheerka.isinstance(res.body.concept.compiled["b"][0], BuiltinConcepts.RETURN_VALUE)
|
|
# assert res.body.concept.compiled["b"][0].status
|
|
# assert res.body.concept.compiled["b"][0].who == "parsers.Python"
|
|
# assert res.body.concept.compiled["b"][0].body.source == "1 + 1"
|
|
#
|
|
# # # evaluate
|
|
# # context = self.get_context(sheerka, eval_body=True)
|
|
# # evaluated = sheerka.evaluate_concept(context, res.body.concept)
|
|
# # assert evaluated.body == 3
|
|
|
|
# def test_i_can_validate_and_evaluate_concept_when_bnf_concept(self):
|
|
# sheerka, context, parser = self.init_parser()
|
|
# node = get_concept_node(concepts_map, "one plus twenty one", "plus", "one", "twenty one")
|
|
#
|
|
# res = UnrecognizedNodeParser().validate_concept_node(context, node)
|
|
#
|
|
# assert res.status
|
|
# assert res.body.concept == concepts_map["plus"]
|
|
# assert res.body.concept.compiled["a"] == concepts_map["one"]
|
|
# assert len(res.body.concept.compiled["b"]) == 1
|
|
# assert res.body.concept.compiled["b"][0].status
|
|
# assert res.body.concept.compiled["b"][0].who == "parsers.BnfNode"
|
|
#
|
|
# # evaluate
|
|
# context = self.get_context(sheerka, eval_body=True)
|
|
# evaluated = sheerka.evaluate_concept(context, res.body.concept)
|
|
# assert evaluated.body == 22
|
|
|
|
def test_i_can_parse_and_evaluate_unrecognized_python_node(self):
|
|
sheerka, context, parser = self.init_parser()
|
|
|
|
expression = "1 + 1"
|
|
nodes = get_input_nodes_from(concepts_map, expression, UTN(expression))
|
|
parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes)
|
|
|
|
res = parser.parse(context, parser_input)
|
|
parser_result = res.body
|
|
actual_nodes = res.body.body
|
|
|
|
assert res.status
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
assert len(actual_nodes) == 1
|
|
assert actual_nodes[0] == scnode(0, 4, expression)
|
|
|
|
def test_i_cannot_parse_unrecognized_python_that_looks_like_concept(self):
|
|
sheerka, context, parser = self.init_parser()
|
|
|
|
expression = "fake_concept_name" # as it's not a concept, it will be recognized as python node
|
|
nodes = get_input_nodes_from(concepts_map, expression, UTN(expression))
|
|
parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes)
|
|
|
|
res = parser.parse(context, parser_input)
|
|
parser_result = res.body
|
|
actual_nodes = res.body.body
|
|
|
|
assert not res.status
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
assert len(actual_nodes) == 1
|
|
assert actual_nodes[0] == nodes[0]
|
|
|
|
def test_i_can_parse_unrecognized_bnf_concept_node(self):
|
|
|
|
sheerka, context, parser = self.init_parser()
|
|
|
|
expression = "twenty one"
|
|
nodes = get_input_nodes_from(concepts_map, expression, UTN(expression))
|
|
parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes)
|
|
|
|
res = parser.parse(context, parser_input)
|
|
parser_result = res.body
|
|
actual_nodes = res.body.body
|
|
|
|
assert res.status
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
assert len(actual_nodes) == 1
|
|
expected_array = compute_expected_array(
|
|
concepts_map,
|
|
expression, [CNC("twenties", source=expression, unit="one")])
|
|
assert actual_nodes == expected_array
|
|
|
|
def test_i_can_parse_unrecognized_sya_concept_node(self):
|
|
sheerka, context, parser = self.init_parser()
|
|
|
|
expression = "one plus two mult three"
|
|
nodes = get_input_nodes_from(concepts_map, expression, UTN(expression))
|
|
parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes)
|
|
|
|
res = parser.parse(context, parser_input)
|
|
parser_result = res.body
|
|
actual_nodes = res.body.body
|
|
|
|
assert res.status
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
assert len(actual_nodes) == 1
|
|
|
|
expected_array = compute_expected_array(
|
|
concepts_map,
|
|
expression, [CNC("plus",
|
|
a="one",
|
|
b=CC("mult", source="two mult three", a="two", b="three"))],
|
|
exclude_body=True)
|
|
assert actual_nodes == expected_array
|
|
|
|
def test_i_can_parse_unrecognized_source_code_with_concept_node(self):
|
|
sheerka, context, parser = self.init_parser()
|
|
|
|
expression = "desc(a plus b)"
|
|
source_code_concepts = SCWC("desc(", ")", CNC("plus", a=UTN("a"), b=UTN("b")))
|
|
nodes = get_input_nodes_from(concepts_map, expression, source_code_concepts)
|
|
parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes)
|
|
|
|
res = parser.parse(context, parser_input)
|
|
parser_result = res.body
|
|
actual_nodes = res.body.body
|
|
|
|
assert not res.status # status is False to let PythonWithConceptParser validate the code
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
assert len(actual_nodes) == 1
|
|
assert actual_nodes[0].nodes[0].concept.metadata.is_evaluated # 'a plus b' is recognized as concept definition
|
|
|
|
def test_i_can_parse_unrecognized_source_code_with_concept_node_when_var_in_short_term_memory(self):
|
|
sheerka, context, parser = self.init_parser()
|
|
|
|
expression = "desc(a plus b)"
|
|
source_code_concepts = SCWC("desc(", ")", CNC("plus", a=UTN("a"), b=UTN("b")))
|
|
nodes = get_input_nodes_from(concepts_map, expression, source_code_concepts)
|
|
parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes)
|
|
|
|
context.add_to_short_term_memory("a", 1)
|
|
res = parser.parse(context, parser_input)
|
|
parser_result = res.body
|
|
actual_nodes = res.body.body
|
|
|
|
assert not res.status # status is False to let PythonWithConceptParser validate the code
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
assert len(actual_nodes) == 1
|
|
assert not actual_nodes[0].nodes[0].concept.metadata.is_evaluated # 'a plus b' need to be evaluated
|
|
|
|
def test_i_can_parse_sequences(self):
|
|
sheerka, context, parser = self.init_parser()
|
|
|
|
expression = "one plus two three"
|
|
sequence = get_input_nodes_from(concepts_map, expression,
|
|
CNC("plus", a="one", b="two"),
|
|
utnode(5, 6, " three"))
|
|
parser_input = ParserResultConcept("parsers.xxx", source="one plus two three", value=sequence)
|
|
|
|
res = parser.parse(context, parser_input)
|
|
actual_nodes = res.body.body
|
|
|
|
assert res.status
|
|
|
|
expected_array = compute_expected_array(
|
|
concepts_map,
|
|
expression, [
|
|
CNC("plus", a="one", b="two"),
|
|
CN("three", start=6, end=6)])
|
|
assert actual_nodes == expected_array
|
|
|
|
def test_i_can_parse_when_multiple_atom_and_sya(self):
|
|
sheerka, context, parser = self.init_parser()
|
|
expression = "two hello one three"
|
|
nodes = get_input_nodes_from(concepts_map, expression,
|
|
"two", UTN("hello one"), "three")
|
|
parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes)
|
|
|
|
res = parser.parse(context, parser_input)
|
|
assert len(res) == 2
|
|
assert res[0].status
|
|
assert res[1].status
|
|
|
|
actual_nodes0 = res[0].body.body
|
|
expected_0 = compute_expected_array(concepts_map, expression, [
|
|
CN("two", 0, 0),
|
|
CN("hello_atom", source="hello one", start=2, end=4),
|
|
CN("three", 6, 6)])
|
|
assert actual_nodes0 == expected_0
|
|
|
|
actual_nodes1 = res[1].body.body
|
|
expected_1 = compute_expected_array(concepts_map, expression, [
|
|
CN("two", 0, 0),
|
|
CNC("hello_sya", source="hello one", start=2, end=4, a="one"),
|
|
CN("three", 6, 6)],
|
|
exclude_body=True)
|
|
|
|
assert actual_nodes1 == expected_1
|
|
|
|
def test_i_can_parse_when_multiple_sya_concepts(self):
|
|
sheerka, context, parser = self.init_parser()
|
|
expression = "greetings two"
|
|
nodes = get_input_nodes_from(concepts_map, expression, UTN("greetings two"))
|
|
|
|
parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes)
|
|
|
|
res = parser.parse(context, parser_input)
|
|
assert len(res) == 2
|
|
assert res[0].status
|
|
assert res[1].status
|
|
|
|
actual_nodes0 = res[0].body.body
|
|
expected_0 = compute_expected_array(concepts_map, expression, [
|
|
CNC("greetings_a", source="greetings two", start=0, end=2, a="two")], exclude_body=True)
|
|
assert actual_nodes0 == expected_0
|
|
|
|
actual_nodes1 = res[1].body.body
|
|
expected_1 = compute_expected_array(concepts_map, expression, [
|
|
CNC("greetings_b", source="greetings two", start=0, end=2, b="two")], exclude_body=True)
|
|
assert actual_nodes1 == expected_1
|
|
|
|
def test_i_cannot_parse_when_some_unrecognized_remain(self):
|
|
sheerka, context, parser = self.init_parser()
|
|
expression = "twenty one + one"
|
|
nodes = get_input_nodes_from(concepts_map, expression, UTN("twenty "), "one", " + ", ("one", 1))
|
|
|
|
parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes)
|
|
res = parser.parse(context, parser_input)
|
|
|
|
assert not res.status
|
|
assert res.body.body == nodes
|
|
|
|
def test_i_cannot_parse_when_i_cannot_validate(self):
|
|
sheerka, context, parser = self.init_parser(concepts_map, create_new=True)
|
|
expression = "one plus unknown tokens"
|
|
nodes = get_input_nodes_from(concepts_map, expression,
|
|
CNC("plus", a="one ", b=" unknown tokens"))
|
|
|
|
parser_input = ParserResultConcept("parsers.xxx", source="six", value=nodes)
|
|
res = parser.parse(context, parser_input)
|
|
|
|
assert not res.status
|
|
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
|
|
|
|
def test_i_cannot_parse_when_unrecognized(self):
|
|
sheerka, context, parser = self.init_parser(concepts_map, create_new=True)
|
|
expression = "unknown tokens"
|
|
nodes = get_input_nodes_from(concepts_map, expression, UTN(expression))
|
|
|
|
parser_input = ParserResultConcept("parsers.xxx", source="six", value=nodes)
|
|
res = parser.parse(context, parser_input)
|
|
actual_nodes = res.body.body
|
|
|
|
assert not res.status
|
|
assert actual_nodes == nodes
|