Files
Sheerka-Old/tests/parsers/test_UnrecognizedNodeParser.py
T

504 lines
22 KiB
Python

from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
from core.concept import Concept, CC
from core.tokenizer import Tokenizer, TokenKind
from parsers.SequenceNodeParser import SequenceNodeParser
from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, scnode, cnode, \
utnode, SyaAssociativity, CN, CNC, UTN, SourceCodeWithConceptNode, SCWC, SourceCodeNode
from parsers.BnfNodeParser import BnfNodeParser
from parsers.SyaNodeParser import SyaNodeParser
from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
from tests.parsers.parsers_utils import compute_expected_array, get_node
def get_input_nodes_from(my_concepts_map, full_expr, *args):
def _get_real_node(n):
if isinstance(n, CC):
concept = n.concept or Concept.update_from(my_concepts_map[n.concept_key])
for k, v in n.compiled.items():
concept.get_compiled()[k] = _get_real_node(v)
return concept
if isinstance(n, (utnode, UTN)):
return UnrecognizedTokensNode(n.start, n.end, full_expr_as_tokens[n.start: n.end + 1])
if isinstance(n, (CNC, CN, cnode)):
concept = n.concept if hasattr(n, "concept") and n.concept else \
Concept().update_from(my_concepts_map[n.concept_key])
tokens = full_expr_as_tokens[n.start: n.end + 1]
if hasattr(n, "compiled"):
for k, v in n.compiled.items():
concept.get_compiled()[k] = _get_real_node(v)
return ConceptNode(concept, n.start, n.end, tokens)
if isinstance(n, SCWC):
n.first = _get_real_node(n.first)
n.last = _get_real_node(n.last)
n.content = tuple(_get_real_node(nn) for nn in n.content)
return SourceCodeWithConceptNode(n.first, n.last, list(n.content)).pseudo_fix_source()
if isinstance(n, (UnrecognizedTokensNode, ConceptNode, SourceCodeNode, SourceCodeWithConceptNode)):
return n
raise NotImplementedError()
res = []
full_expr_as_tokens = list(Tokenizer(full_expr))
tokens_for_get_node = [token.value for token in full_expr_as_tokens if token.type != TokenKind.EOF]
for arg in args:
node = get_node(my_concepts_map, tokens_for_get_node, arg)
res.append(_get_real_node(node))
return res
concepts_map = {
"5params": Concept("5params").def_var("a").def_var("b").def_var("c").def_var("d").def_var("e"),
"plus": Concept("a plus b", body="a + b").def_var("a").def_var("b"),
"mult": Concept("a mult b", body="a * b").def_var("a").def_var("b"),
"one": Concept("one", body="1"),
"two": Concept("two", body="2"),
"three": Concept("three", body="3"),
"twenties": Concept("twenties", definition="'twenty' (one|two)=unit", body="20 + unit").def_var("unit"),
"hello_atom": Concept("hello one"),
"hello_sya": Concept("hello a").def_var("a"),
"greetings_a": Concept("greetings a").def_var("a"),
"greetings_b": Concept("greetings b").def_var("b"),
}
class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka):
shared_ontology = None
@classmethod
def setup_class(cls):
init_test_helper = cls().init_test(cache_only=False, ontology="#TestUnrecognizedNodeParser#")
sheerka, context, *updated = init_test_helper.with_concepts(*concepts_map.values(), create_new=True).unpack()
for i, concept_name in enumerate(concepts_map):
concepts_map[concept_name] = updated[i]
sheerka.set_is_greater_than(context,
BuiltinConcepts.PRECEDENCE,
concepts_map["mult"],
concepts_map["plus"], 'Sya')
cls.shared_ontology = sheerka.get_ontology(context)
sheerka.pop_ontology(context)
def init_parser(self, my_concepts_map=None, **kwargs):
if my_concepts_map is None:
sheerka, context = self.init_test().unpack()
sheerka.add_ontology(context, self.shared_ontology)
else:
sheerka, context, *updated = self.init_test().with_concepts(*my_concepts_map.values(), **kwargs).unpack()
for i, pair in enumerate(my_concepts_map):
my_concepts_map[pair] = updated[i]
parser = UnrecognizedNodeParser()
return sheerka, context, parser
def test_i_can_validate_a_valid_concept_node(self):
sheerka, context, parser = self.init_parser()
node = get_input_nodes_from(concepts_map, "one", "one")[0]
res = UnrecognizedNodeParser().validate_concept_node(context, node)
assert res.status
assert res.body.concept == concepts_map["one"]
def test_i_can_validate_concept_unrecognized_tokens(self):
sheerka, context, parser = self.init_parser()
node = get_input_nodes_from(
concepts_map,
"5params one two three twenty one 1 + 2 one plus two mult three",
CNC("5params",
a=" one ",
b=" two three ",
c=" twenty one ",
d=utnode(12, 18, " 1 + 2 "),
e=" one plus two mult three"))[0]
res = UnrecognizedNodeParser().validate_concept_node(context, node)
assert res.status
concept = res.body.concept
assert concept == concepts_map["5params"]
assert len(concept.get_compiled()["a"]) == 1
assert sheerka.isinstance(concept.get_compiled()["a"][0], BuiltinConcepts.RETURN_VALUE)
assert concept.get_compiled()["a"][0].status
assert concept.get_compiled()["a"][0].who == "parsers." + SequenceNodeParser.NAME
assert concept.get_compiled()["a"][0].body.body == [cnode("one", 1, 1, "one")]
assert len(concept.get_compiled()["b"]) == 1
assert sheerka.isinstance(concept.get_compiled()["b"][0], BuiltinConcepts.RETURN_VALUE)
assert concept.get_compiled()["b"][0].status
assert concept.get_compiled()["b"][0].who == "parsers." + SequenceNodeParser.NAME
assert concept.get_compiled()["b"][0].body.body == [cnode("two", 1, 1, "two"), cnode("three", 3, 3, "three")]
assert len(concept.get_compiled()["c"]) == 1
assert sheerka.isinstance(concept.get_compiled()["c"][0], BuiltinConcepts.RETURN_VALUE)
assert concept.get_compiled()["c"][0].status
assert concept.get_compiled()["c"][0].who == "parsers." + BnfNodeParser.NAME
expected_nodes = compute_expected_array(
concepts_map,
" twenty one ",
[CNC("twenties", source="twenty one", unit="one")])
assert concept.get_compiled()["c"][0].body.body == expected_nodes
assert len(concept.get_compiled()["d"]) == 1
assert sheerka.isinstance(concept.get_compiled()["d"][0], BuiltinConcepts.RETURN_VALUE)
assert concept.get_compiled()["d"][0].status
assert concept.get_compiled()["d"][0].who == "parsers.Python"
assert concept.get_compiled()["d"][0].body.source == " 1 + 2 "
assert len(concept.get_compiled()["e"]) == 1
assert sheerka.isinstance(concept.get_compiled()["e"][0], BuiltinConcepts.RETURN_VALUE)
assert concept.get_compiled()["e"][0].status
assert concept.get_compiled()["e"][0].who == "parsers." + SyaNodeParser.NAME
expected_nodes = compute_expected_array(
concepts_map,
" one plus two mult three ",
[CNC("plus", a="one", b=CC("mult", a="two", b="three"))],
exclude_body=True)
assert concept.get_compiled()["e"][0].body.body == expected_nodes
# # sanity check, I can evaluate the concept
# evaluated = sheerka.evaluate_concept(self.get_context(sheerka, eval_body=True), concept)
# assert evaluated.key == concept.key
# assert evaluated.get_value("a") ==
def test_i_can_validate_with_recursion(self):
sheerka, context, parser = self.init_parser()
node = get_input_nodes_from(
concepts_map,
"1 plus 2 mult twenty two",
CNC("plus",
a="1 ",
b=CC("mult", a=" 2 ", b=" twenty two")))[0]
res = UnrecognizedNodeParser().validate_concept_node(context, node)
assert res.status
assert res.body.concept == concepts_map["plus"]
assert len(res.body.concept.get_compiled()["a"]) == 1
assert res.body.concept.get_compiled()["a"][0].status
assert res.body.concept.get_compiled()["a"][0].who == "parsers.Python"
assert res.body.concept.get_compiled()["a"][0].body.source == "1 "
assert res.body.concept.get_compiled()["b"] == concepts_map["mult"]
assert sheerka.isinstance(res.body.concept.get_compiled()["b"].get_compiled()["a"][0],
BuiltinConcepts.RETURN_VALUE)
assert res.body.concept.get_compiled()["b"].get_compiled()["a"][0].status
assert res.body.concept.get_compiled()["b"].get_compiled()["a"][0].who == "parsers.Python"
assert res.body.concept.get_compiled()["b"].get_compiled()["a"][0].body.source == " 2 "
assert sheerka.isinstance(res.body.concept.get_compiled()["b"].get_compiled()["b"][0],
BuiltinConcepts.RETURN_VALUE)
assert res.body.concept.get_compiled()["b"].get_compiled()["b"][0].status
assert res.body.concept.get_compiled()["b"].get_compiled()["b"][0].who == "parsers.Bnf"
expected_nodes = compute_expected_array(
concepts_map,
" twenty two",
[CNC("twenties", source="twenty two", unit="two")])
assert res.body.concept.get_compiled()["b"].get_compiled()["b"][0].body.body == expected_nodes
# def test_i_can_validate_and_evaluate_a_concept_node_with_python(self):
# sheerka, context, parser = self.init_parser()
#
# node = get_input_nodes_from(
# concepts_map,
# "one plus 1 + 1",
# CNC("plus",
# a=UTN("one "),
# b=UTN("1 + 1")))[0]
#
# res = UnrecognizedNodeParser().validate_concept_node(context, node)
#
# assert res.status
# assert res.body.concept == concepts_map["plus"]
# assert res.body.concept.get_compiled()["a"] == concepts_map["one"]
# assert len(res.body.concept.get_compiled()["b"]) == 1
# assert sheerka.isinstance(res.body.concept.get_compiled()["b"][0], BuiltinConcepts.RETURN_VALUE)
# assert res.body.concept.get_compiled()["b"][0].status
# assert res.body.concept.get_compiled()["b"][0].who == "parsers.Python"
# assert res.body.concept.get_compiled()["b"][0].body.source == "1 + 1"
#
# # # evaluate
# # context = self.get_context(sheerka, eval_body=True)
# # evaluated = sheerka.evaluate_concept(context, res.body.concept)
# # assert evaluated.body == 3
# def test_i_can_validate_and_evaluate_concept_when_bnf_concept(self):
# sheerka, context, parser = self.init_parser()
# node = get_concept_node(concepts_map, "one plus twenty one", "plus", "one", "twenty one")
#
# res = UnrecognizedNodeParser().validate_concept_node(context, node)
#
# assert res.status
# assert res.body.concept == concepts_map["plus"]
# assert res.body.concept.get_compiled()["a"] == concepts_map["one"]
# assert len(res.body.concept.get_compiled()["b"]) == 1
# assert res.body.concept.get_compiled()["b"][0].status
# assert res.body.concept.get_compiled()["b"][0].who == "parsers.BnfNode"
#
# # evaluate
# context = self.get_context(sheerka, eval_body=True)
# evaluated = sheerka.evaluate_concept(context, res.body.concept)
# assert evaluated.body == 22
def test_i_can_parse_and_evaluate_unrecognized_python_node(self):
sheerka, context, parser = self.init_parser()
expression = "1 + 1"
nodes = get_input_nodes_from(concepts_map, expression, UTN(expression))
parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes)
res = parser.parse(context, parser_input)
parser_result = res.body
actual_nodes = res.body.body
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert parser_result.source == expression
assert len(actual_nodes) == 1
assert actual_nodes[0] == scnode(0, 4, expression)
def test_i_cannot_parse_unrecognized_python_that_looks_like_concept(self):
sheerka, context, parser = self.init_parser()
expression = "fake_concept_name" # as it's not a concept, it will be recognized as python node
nodes = get_input_nodes_from(concepts_map, expression, UTN(expression))
parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes)
res = parser.parse(context, parser_input)
parser_result = res.body
actual_nodes = res.body.body
assert not res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert parser_result.source == expression
assert len(actual_nodes) == 1
assert actual_nodes[0] == nodes[0]
def test_i_can_parse_unrecognized_bnf_concept_node(self):
sheerka, context, parser = self.init_parser()
expression = "twenty one"
nodes = get_input_nodes_from(concepts_map, expression, UTN(expression))
parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes)
res = parser.parse(context, parser_input)
parser_result = res.body
actual_nodes = res.body.body
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert parser_result.source == expression
assert len(actual_nodes) == 1
expected_array = compute_expected_array(
concepts_map,
expression, [CNC("twenties", source=expression, unit="one")])
assert actual_nodes == expected_array
def test_i_can_parse_unrecognized_sya_concept_node(self):
sheerka, context, parser = self.init_parser()
expression = "one plus two mult three"
nodes = get_input_nodes_from(concepts_map, expression, UTN(expression))
parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes)
res = parser.parse(context, parser_input)
parser_result = res.body
actual_nodes = res.body.body
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert parser_result.source == expression
assert len(actual_nodes) == 1
expected_array = compute_expected_array(
concepts_map,
expression, [CNC("plus",
a="one",
b=CC("mult", source="two mult three", a="two", b="three"))],
exclude_body=True)
assert actual_nodes == expected_array
def test_i_can_parse_unrecognized_source_code_with_concept_node(self):
sheerka, context, parser = self.init_parser()
expression = "desc(a plus b)"
source_code_concepts = SCWC("desc(", ")", CNC("plus", a=UTN("a"), b=UTN("b")))
nodes = get_input_nodes_from(concepts_map, expression, source_code_concepts)
parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes)
res = parser.parse(context, parser_input)
parser_result = res.body
actual_nodes = res.body.body
assert not res.status # status is False to let PythonWithConceptParser validate the code
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert parser_result.source == expression
assert len(actual_nodes) == 1
assert actual_nodes[0].nodes[
0].concept.get_metadata().is_evaluated # 'a plus b' is recognized as concept definition
def test_i_can_parse_unrecognized_source_code_with_concept_node_when_var_in_short_term_memory(self):
sheerka, context, parser = self.init_parser()
expression = "desc(a plus b)"
source_code_concepts = SCWC("desc(", ")", CNC("plus", a=UTN("a"), b=UTN("b")))
nodes = get_input_nodes_from(concepts_map, expression, source_code_concepts)
parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes)
context.add_to_short_term_memory("a", 1)
res = parser.parse(context, parser_input)
parser_result = res.body
actual_nodes = res.body.body
assert not res.status # status is False to let PythonWithConceptParser validate the code
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert parser_result.source == expression
assert len(actual_nodes) == 1
assert not actual_nodes[0].nodes[0].concept.get_metadata().is_evaluated # 'a plus b' need to be evaluated
def test_i_can_parse_unrecognized_sya_concept_that_references_source_code(self):
sheerka, context, parser = self.init_parser()
expression = "hello get_user_name(twenty one)"
tmp_node = CNC("hello_sya",
source="hello get_user_name(twenty one)",
a=SCWC("get_user_name(", ")", CNC("twenties", source="twenty one", unit="one")))
nodes = get_input_nodes_from(concepts_map, expression, tmp_node)
parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes)
res = parser.parse(context, parser_input)
parser_result = res.body
actual_nodes = res.body.body
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert parser_result.source == expression
assert len(actual_nodes) == 1
expected_array = compute_expected_array(
concepts_map,
expression, [CN("hello_sya", source="hello get_user_name(twenty one)")],
exclude_body=True)
assert actual_nodes == expected_array
assert isinstance(actual_nodes[0].concept.get_compiled()["a"], list)
assert sheerka.isinstance(actual_nodes[0].concept.get_compiled()["a"][0], BuiltinConcepts.RETURN_VALUE)
def test_i_can_parse_sequences(self):
sheerka, context, parser = self.init_parser()
expression = "one plus two three"
sequence = get_input_nodes_from(concepts_map, expression,
CNC("plus", a="one", b="two"),
utnode(5, 6, " three"))
parser_input = ParserResultConcept("parsers.xxx", source="one plus two three", value=sequence)
res = parser.parse(context, parser_input)
actual_nodes = res.body.body
assert res.status
expected_array = compute_expected_array(
concepts_map,
expression, [
CNC("plus", a="one", b="two"),
CN("three", start=6, end=6)])
assert actual_nodes == expected_array
def test_i_can_parse_when_multiple_atom_and_sya(self):
sheerka, context, parser = self.init_parser()
expression = "two hello one three"
nodes = get_input_nodes_from(concepts_map, expression,
"two", UTN("hello one"), "three")
parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes)
res = parser.parse(context, parser_input)
assert len(res) == 2
assert res[0].status
assert res[1].status
actual_nodes0 = res[0].body.body
expected_0 = compute_expected_array(concepts_map, expression, [
CN("two", 0, 0),
CN("hello_atom", source="hello one", start=2, end=4),
CN("three", 6, 6)])
assert actual_nodes0 == expected_0
actual_nodes1 = res[1].body.body
expected_1 = compute_expected_array(concepts_map, expression, [
CN("two", 0, 0),
CNC("hello_sya", source="hello one", start=2, end=4, a="one"),
CN("three", 6, 6)],
exclude_body=True)
assert actual_nodes1 == expected_1
def test_i_can_parse_when_multiple_sya_concepts(self):
sheerka, context, parser = self.init_parser()
expression = "greetings two"
nodes = get_input_nodes_from(concepts_map, expression, UTN("greetings two"))
parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes)
res = parser.parse(context, parser_input)
assert len(res) == 2
assert res[0].status
assert res[1].status
actual_nodes0 = res[0].body.body
expected_0 = compute_expected_array(concepts_map, expression, [
CNC("greetings_a", source="greetings two", start=0, end=2, a="two")], exclude_body=True)
assert actual_nodes0 == expected_0
actual_nodes1 = res[1].body.body
expected_1 = compute_expected_array(concepts_map, expression, [
CNC("greetings_b", source="greetings two", start=0, end=2, b="two")], exclude_body=True)
assert actual_nodes1 == expected_1
def test_i_cannot_parse_when_some_unrecognized_remain(self):
sheerka, context, parser = self.init_parser()
expression = "twenty one + one"
nodes = get_input_nodes_from(concepts_map, expression, UTN("twenty "), "one", " + ", ("one", 1))
parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes)
res = parser.parse(context, parser_input)
assert not res.status
assert res.body.body == nodes
def test_i_cannot_parse_when_i_cannot_validate(self):
sheerka, context, parser = self.init_parser(concepts_map, create_new=True)
expression = "one plus unknown tokens"
nodes = get_input_nodes_from(concepts_map, expression,
CNC("plus", a="one ", b=" unknown tokens"))
parser_input = ParserResultConcept("parsers.xxx", source="six", value=nodes)
res = parser.parse(context, parser_input)
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
def test_i_cannot_parse_when_unrecognized(self):
sheerka, context, parser = self.init_parser(concepts_map, create_new=True)
expression = "unknown tokens"
nodes = get_input_nodes_from(concepts_map, expression, UTN(expression))
parser_input = ParserResultConcept("parsers.xxx", source="six", value=nodes)
res = parser.parse(context, parser_input)
actual_nodes = res.body.body
assert not res.status
assert actual_nodes == nodes