Implemented FunctionParser

This commit is contained in:
2020-09-17 14:11:09 +02:00
parent 8a866880bc
commit 177a6b1d5f
40 changed files with 1752 additions and 561 deletions
+201 -100
View File
@@ -1,14 +1,14 @@
import pytest
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, CC
from core.concept import Concept, CIO
from core.sheerka.services.SheerkaComparisonManager import SheerkaComparisonManager
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Tokenizer
from parsers.BaseNodeParser import utnode, ConceptNode, cnode, short_cnode, UnrecognizedTokensNode, \
SCWC, CNC, UTN, SourceCodeWithConceptNode
SCWC, CNC, UTN, SCN, CN
from parsers.PythonParser import PythonNode
from parsers.SyaNodeParser import SyaNodeParser, SyaConceptParserHelper, SyaAssociativity, \
NoneAssociativeSequenceErrorNode, TooManyParametersFound
NoneAssociativeSequenceErrorNode, TooManyParametersFound, InFixToPostFix, ParenthesisMismatchErrorNode
import tests.parsers.parsers_utils
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
@@ -633,21 +633,25 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
assert res_i.out == expected_array
@pytest.mark.parametrize("expression, expected", [
# I can't manage source code functions :-(
# ("function(one plus three) minus two", []),
# ("function(one plus three) minus two",
# [SCWC("function(", ")", CNC("plus", a="one", b="three")), "two", "minus"]),
("two minus function(one plus three)",
["two", SCWC("function(", ")", CNC("plus", a="one", b="three")), "minus"]),
("func1() minus func2()", [SCN("func1()"), SCN("func2()"), "minus"]),
("func1() comes with func2()", [SCN("func1()"), UTN(" comes with "), SCN("func2()")]),
# ("(one plus two) ", ["one", "two", "plus"]),
# ("(one prefixed) ", ["one", "prefixed"]),
# ("(suffixed one) ", ["one", "suffixed"]),
# ("(one ? two : three)", ["one", "two", "three", "?"]),
# ("square(square(one))", ["one", ("square", 1), "square"]),
# ("square ( square ( one ) )", ["one", ("square", 1), "square"]),
#
# ("square(one plus three) minus two", ["one", "three", "plus", "square", "two", "minus"]),
# ("square( one plus three ) minus two", ["one", "three", "plus", "square", "two", "minus"]),
# ("one minus square( two plus three ) ", ["one", "two", "three", "plus", "square", "minus"]),
#
# ("((one prefixed) prefixed)", ["one", "prefixed", ("prefixed", 1)]),
("(one plus two) ", ["one", "two", "plus"]),
("(one prefixed) ", ["one", "prefixed"]),
("(suffixed one) ", ["one", "suffixed"]),
("(one ? two : three)", ["one", "two", "three", "?"]),
("square(square(one))", ["one", ("square", 1), "square"]),
("square ( square ( one ) )", ["one", ("square", 1), "square"]),
("square(one plus three) minus two", ["one", "three", "plus", "square", "two", "minus"]),
("square( one plus three ) minus two", ["one", "three", "plus", "square", "two", "minus"]),
("one minus square( two plus three ) ", ["one", "two", "three", "plus", "square", "minus"]),
("((one prefixed) prefixed)", ["one", "prefixed", ("prefixed", 1)]),
("( ( one prefixed ) prefixed)", ["one", "prefixed", ("prefixed", 1)]),
("( ( square( one ) prefixed ) prefixed)", ["one", "square", "prefixed", ("prefixed", 1)]),
@@ -666,6 +670,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
])
def test_i_can_pos_fix_when_parenthesis(self, expression, expected):
sheerka, context, parser = self.init_parser()
context.add_to_protected_hints(BuiltinConcepts.DEBUG)
res = parser.infix_to_postfix(context, ParserInput(expression))
expected_array = compute_expected_array(cmap, expression, expected)
@@ -675,34 +680,30 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("expression, expected_sequences", [
# composition
("function(suffixed one)", [[SCWC("function(", ")", "one", "suffixed")]]),
("function(one prefixed)", [[SCWC("function(", ")", "one", "prefixed")]]),
("function(if one then two else three end)", [[SCWC("function(", ")", "one", "two", "three", "if")]]),
("function(suffixed twenty two)", [
[SCWC("function(", ")", "twenty ", "suffixed", "two")],
[SCWC("function(", ")", short_cnode("twenties", "twenty two"), "suffixed")]]),
("function(twenty two prefixed)", [
[SCWC("function(", ")", "twenty ", "two", "prefixed")],
[SCWC("function(", ")", short_cnode("twenties", "twenty two"), "prefixed")],
]),
("function(if one then twenty two else three end)", [
["')'", "one", "twenty ", "two"], # error
[SCWC("function(", ")", "one", short_cnode("twenties", "twenty two"), "three", "if")]
]),
("func1(func2(one two) three)", [
[SCWC("func1(", (")", 1), SCWC("func2(", ")", "one", "two"), "three")]]),
("function(suffixed one)", [[SCWC("function(", ")", CNC("suffixed", a="one"))]]),
("function(one prefixed)", [[SCWC("function(", ")", CNC("prefixed", a="one"))]]),
("function(if one then two else three end)",
[[SCWC("function(", ")", CNC("if", a="one", b="two", c="three", end=14))]]),
("function(suffixed twenty two)",
[[SCWC("function(", ")", CNC("suffixed", a=CIO("twenties", source="twenty two")))]]),
("function(twenty two prefixed)",
[[SCWC("function(", ")", CNC("prefixed", a=CIO("twenties", source="twenty two")))]]),
("function(if one then twenty two else three end)",
[[SCWC("function(", ")", CNC("if", a="one", b=CIO("twenties", source="twenty two"), c="three", end=16))]]),
("func1(func2(one two) three)",
[[SCWC("func1(", (")", 1), SCWC("func2(", ")", "one", "two"), "three")]]),
("twenty two(suffixed one)", [
["twenty ", SCWC("two(", ")", "one", "suffixed")],
[SCWC("twenty two(", ")", "one", "suffixed")],
["twenty ", SCWC("two(", ")", CNC("suffixed", a="one"))],
[CN("twenties", source="twenty two"), "one", "suffixed"],
]),
("twenty two(one prefixed)", [
["twenty ", SCWC("two(", ")", "one", "prefixed")],
[SCWC("twenty two(", ")", "one", "prefixed")],
["twenty ", SCWC("two(", ")", CNC("prefixed", a="one"))],
[CN("twenties", source="twenty two"), "one", "prefixed"],
]),
("f1(one plus two mult three) plus f2(suffixed x$!# prefixed)", [
[SCWC("f1(", ")", "one", "two", "three", "mult", "plus"),
SCWC("f2(", (")", 1), "x$!#", "prefixed", "suffixed"),
[SCWC("f1(", ")", CN("plus", source="one plus two mult three")),
SCWC("f2(", (")", 1), CN("suffixed", source="suffixed x$!# prefixed")),
("plus", 1)]
]),
@@ -715,12 +716,10 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
# Sequence
("if one then two else three end function(x$!#)", [
["one", "two", "three", "if", SCWC(" function(", ")", "x$!#")]]),
("one prefixed function(two)", [["one", "prefixed", SCWC(" function(", ")", "two")]]),
("suffixed one function(two)", [["one", "suffixed", SCWC(" function(", ")", "two")]]),
(
"func1(suffixed one func2(two))",
[[SCWC("func1(", (")", 1), "one", "suffixed", SCWC(" func2(", ")", "two"))]]),
["one", "two", "three", "if", UTN(" ", start=13, end=13), SCWC("function(", ")", "x$!#")]]),
("one prefixed function(two)", [["one", "prefixed", UTN(" ", start=3, end=3), SCWC("function(", ")", "two")]]),
("suffixed one function(two)", [["one", "suffixed", UTN(" ", start=3, end=3), SCWC("function(", ")", "two")]]),
("func(one, two, three)", [[SCWC("func(", ")", "one", ", ", "two", (", ", 1), "three")]]),
])
def test_i_can_post_fix_when_parenthesis_and_unknown(self, expression, expected_sequences):
sheerka, context, parser = self.init_parser()
@@ -737,6 +736,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
("one plus ( 1 + ", ("(", 4)),
("one( 1 + ", ("(", 1)),
("one ( 1 + ", ("(", 2)),
("function(", ("(", 1)),
("function( 1 + ", ("(", 1)),
("function ( 1 + ", ("(", 2)),
("one plus ) 1 + ", (")", 4)),
@@ -754,7 +754,16 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
res = parser.infix_to_postfix(context, ParserInput(expression))
assert len(res) == 1
assert res[0].errors == [expected]
assert res[0].errors == [ParenthesisMismatchErrorNode(expected)]
def test_i_can_detect_parenthesis_mismatch_error_special_case(self):
sheerka, context, parser = self.init_parser()
expression = "one ? function( : two"
expected = [ParenthesisMismatchErrorNode(("(", 5)), ParenthesisMismatchErrorNode(("(", 5))]
res = parser.infix_to_postfix(context, ParserInput(expression))
assert len(res) == 1
assert res[0].errors == expected
@pytest.mark.parametrize("expression, expected", [
("one ? one two : three", ("?", ":")),
@@ -802,29 +811,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
assert len(res) == 1
assert res[0].out == expected_array
def test_i_cannot_post_fix_using_concept_short_name(self):
concepts_map = {
"infixed": self.from_def_concept("infixed", "a infixed b", ["a", "b"]),
"suffixed": self.from_def_concept("suffixed", "suffixed a", ["a"]),
"prefixed": self.from_def_concept("prefixed", "a prefixed", ["a"]),
}
sheerka, context, parser = self.init_parser(concepts_map)
res = parser.infix_to_postfix(context, ParserInput("desc(infixed)"))
assert len(res) == 1
assert isinstance(res[0].out[0], SourceCodeWithConceptNode)
assert res[0].out[0].nodes[0].error == 'Not enough prefix parameters'
res = parser.infix_to_postfix(context, ParserInput("desc(suffixed)"))
assert len(res) == 1
assert isinstance(res[0].out[0], SourceCodeWithConceptNode)
assert res[0].out[0].nodes[0].error == 'Not enough suffix parameters'
res = parser.infix_to_postfix(context, ParserInput("desc(prefixed)"))
assert len(res) == 1
assert isinstance(res[0].out[0], SourceCodeWithConceptNode)
assert res[0].out[0].nodes[0].error == 'Not enough prefix parameters'
@pytest.mark.parametrize("expression", [
"one ? two : three",
"one?two:three",
@@ -861,7 +847,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
expression = "a plus plus equals b"
res = parser.infix_to_postfix(context, ParserInput(expression))
expected_array = tests.parsers.parsers_utils.compute_debug_array(res)
assert expected_array == [
assert len(expected_array) == len([
["T(a)", "C(a plus b)", "C(a plus b)", "T(equals)", "T(b)"],
["T(a)", "C(a plus b)", "C(a plus plus)", "T(equals)", "T(b)"],
["T(a)", "C(a plus b)", "C(a plus equals b)", "T(equals)", "T(b)"],
@@ -871,27 +857,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
["T(a)", "C(a plus equals b)", "C(a plus b)", "T(equals)", "T(b)"],
["T(a)", "C(a plus equals b)", "C(a plus plus)", "T(equals)", "T(b)"],
["T(a)", "C(a plus equals b)", "C(a plus equals b)", "T(equals)", "T(b)"],
]
def test_non_reg(self):
concepts_map = {
"plus": Concept("a plus b").def_var("a").def_var("b"),
"complex infix": Concept("a complex infix b ").def_var("a").def_var("b"),
}
sya_def = {
# concepts_map["plus"]: (1, SyaAssociativity.Right),
# concepts_map["plus plus"]: (1, SyaAssociativity.Right),
# concepts_map["plus equals"]: (1, SyaAssociativity.Right),
}
sheerka, context, parser = self.init_parser(concepts_map, sya_def)
expression = "a plus complex infix b"
res = parser.infix_to_postfix(context, ParserInput(expression))
res = parser.parse(context, ParserInput(expression))
pass
])
def test_i_can_use_string_instead_of_identifier(self):
concepts_map = {
@@ -945,6 +911,81 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
assert len(res) == 1
assert res[0].out == expected_array
@pytest.mark.parametrize("expression, expected_debugs", [
("one", [[" 0:one => PUSH_UNREC"]]),
("one plus two", [[
' 0:one => PUSH_UNREC',
' 1:<ws> => PUSH_UNREC',
' 2:plus(SyaConceptDef(concept=(1005)a plus b, precedence=1, associativity=right)) => ??',
" _: => RECOG [[CN((1001)one)]]",
" _: => POP ConceptNode(concept='(1001)one', source='one', start=0, end=0)",
' 2:plus(SyaConceptDef(concept=(1005)a plus b, precedence=1, associativity=right)) => PUSH',
' 3:<ws> => EAT',
' 4:two => PUSH_UNREC',
' 5:<EOF> => ??',
" _: => RECOG [[CN((1002)two)]]",
" _: => POP ConceptNode(concept='(1002)two', source='two', start=4, end=4)",
' _: => POP SyaConceptParserHelper(concept=(1005)a plus b, start=2, error=None)']]),
("suffixed one", [[
' 0:suffixed(SyaConceptDef(concept=(1009)suffixed a, precedence=1, associativity=right)) => PUSH',
' 1:<ws> => EAT',
' 2:one => PUSH_UNREC',
' 3:<EOF> => ??',
" _: => RECOG [[CN((1001)one)]]",
" _: => POP ConceptNode(concept='(1001)one', source='one', start=2, end=2)",
' _: => POP SyaConceptParserHelper(concept=(1009)suffixed a, start=0, error=None)'
]]),
("one ? twenty one : three", [[
' 0:one => PUSH_UNREC',
' 1:<ws> => PUSH_UNREC',
' 2:?(SyaConceptDef(concept=(1011)a ? b : c, precedence=1, associativity=right)) => ??',
" _: => RECOG [[CN((1001)one)]]",
" _: => POP ConceptNode(concept='(1001)one', source='one', start=0, end=0)",
' 2:?(SyaConceptDef(concept=(1011)a ? b : c, precedence=1, associativity=right)) => PUSH',
' 3:<ws> => EAT',
' 4:twenty => PUSH_UNREC',
' 5:<ws> => PUSH_UNREC',
' 6:one => PUSH_UNREC',
' 7:<ws> => PUSH_UNREC',
' 8:: => ??',
" _: => RECOG [[UTN('twenty '), CN((1001)one)], [CN((1016)twenties)]]",
" _: => POP UnrecognizedTokensNode(source='twenty ', start=4, end=5)",
" _: => POP ConceptNode(concept='(1001)one', source='one', start=6, end=6)",
" _: => => ERROR Too many parameters found for '(1011)a ? b : c' before token 'Token(:)'",
' 8:: => EAT',
], [
' 0:one => PUSH_UNREC',
' 1:<ws> => PUSH_UNREC',
' 2:?(SyaConceptDef(concept=(1011)a ? b : c, precedence=1, associativity=right)) => ??',
' _: => RECOG [[CN((1001)one)]]',
" _: => POP ConceptNode(concept='(1001)one', source='one', start=0, end=0)",
' 2:?(SyaConceptDef(concept=(1011)a ? b : c, precedence=1, associativity=right)) => PUSH',
' 3:<ws> => EAT',
' 4:twenty => PUSH_UNREC',
' 5:<ws> => PUSH_UNREC',
' 6:one => PUSH_UNREC',
' 7:<ws> => PUSH_UNREC',
' 8:: => ??',
" _: => RECOG [[UTN('twenty '), CN((1001)one)], [CN((1016)twenties)]]",
" _: => POP ConceptNode(concept='(1016)twenties', source='twenty one', start=4, end=6, ConceptParts.BODY='DoNotResolve(value='twenty one')', unit='(1001)one')",
' 9:<ws> => EAT',
' 10:three => PUSH_UNREC',
' 11:<EOF> => ??',
' _: => RECOG [[CN((1003)three)]]',
" _: => POP ConceptNode(concept='(1003)three', source='three', start=10, end=10)",
' _: => POP SyaConceptParserHelper(concept=(1011)a ? b : c, start=2, error=None)'
]]),
])
def test_i_can_debug(self, expression, expected_debugs):
sheerka, context, parser = self.init_parser()
context.add_to_private_hints(BuiltinConcepts.DEBUG)
res = parser.infix_to_postfix(context, ParserInput(expression))
assert len(res) == len(expected_debugs)
for res_i, expected_debug in zip(res, expected_debugs):
actual_debug = [str(di) for di in res_i.debug]
assert actual_debug == expected_debug
def test_i_can_parse_when_concept_atom_only(self):
sheerka, context, parser = self.init_parser()
@@ -1032,17 +1073,11 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
assert concept_suffixed_a == cmap["two"]
@pytest.mark.parametrize("text, expected_status, expected_result", [
("function(suffixed one)", True, [
SCWC("function(", ")", CNC("suffixed", 2, 4, a="one"))]),
("function(one plus two mult three)", True, [
SCWC("function(", ")", CNC("plus", 2, 10, a="one", b=CC("mult", a="two", b="three")))]),
("f1(one prefixed) plus f2(suffixed two)", True, [
("f1(one prefixed) plus f2(suffixed two)", False, [
CNC("plus",
a=SCWC("f1(", ")", CNC("prefixed", a="one")),
b=SCWC("f2(", (")", 1), CNC("suffixed", a="two")))
]),
("function(suffixed x$!#)", False, [
SCWC("function(", ")", CNC("suffixed", 2, 7, a="x$!#"))]),
("one is a concept", True, [CNC("is a concept", c="one")]),
("a is a concept", False, [CNC("is a concept", c=UTN("a"))]),
])
@@ -1058,6 +1093,19 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert lexer_nodes == expected_array
@pytest.mark.parametrize("text", [
"function(suffixed one)",
"function(one plus two mult three)",
"function(suffixed x$!#)"
])
def test_i_cannot_parse_when_function_only(self, text):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
@pytest.mark.parametrize("text", [
"foo bar (one",
"foo bar one",
@@ -1082,14 +1130,13 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
("one plus two foo bar baz", [CNC("plus", a="one", b="two"), UTN(" foo bar baz")]),
("one plus two foo bar", [CNC("plus", a="one", b="two"), UTN(" foo bar")]),
("foo bar one plus two", [UTN("foo bar "), CNC("plus", a="one", b="two")]),
("foo bar (one plus two", [UTN("foo bar ("), CNC("plus", a="one", b="two")]),
("one plus two a long other b", [CNC("plus", a="one", b="two"), UTN(" a long other b")]),
("one plus two a long infixed", [CNC("plus", a="one", b="two"), UTN(" a long infixed")]),
("one plus two a long", [CNC("plus", a="one", b="two"), UTN(" a long")]),
("one ? a long infixed : two", [CNC("?", a="one", b=UTN("a long infixed"), c="two")]),
("one ? a long infix : two", [CNC("?", a="one", b=UTN("a long infix"), c="two")]),
])
def test_i_cannot_parse_when_one_part_is_recognized_but_not_the_rest(self, text, expected_result):
def test_i_can_almost_parse_when_one_part_is_recognized_but_not_the_rest(self, text, expected_result):
"""
We test that the parsed concept seems like a known one, but it was not.
The parser has to detected that the predication was incorrect
@@ -1194,3 +1241,57 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY)
@pytest.mark.parametrize("expression, expected", [
("function(", ([], "function(")),
("before the function(", (["before the "], "function(")),
("one two function(", (["one", "two", UTN(" ", 3, 3)], "function(")),
("one(", ([], "one(")),
("one before the function(", (["one", " before the "], "function(")),
])
def test_i_can_get_functions_names_from_unrecognized(self, expression, expected):
sheerka, context, parser = self.init_parser()
infix_to_postfix = InFixToPostFix(context)
tokens = list(Tokenizer(expression, yield_eof=False))
for pos, token in enumerate(tokens[:-1]):
infix_to_postfix.eat_unrecognized(token, pos)
resolved_to_out = compute_expected_array(cmap, expression, expected[0])
resolved_function_name = compute_expected_array(cmap, expression, [expected[1]])
actual = infix_to_postfix.get_functions_names_from_unrecognized(tokens[-1], len(tokens) - 1)
assert len(actual) == 1
assert actual[0].to_out == resolved_to_out
actual[0].function.fix_source()
assert actual[0].function == resolved_function_name[0]
@pytest.mark.parametrize("expression, expected_list", [
("twenty two function(", [(["twenty ", "two", UTN(" ", 3, 3)], "function("),
([CN("twenties", source="twenty two"), UTN(" ", 3, 3)], "function(")]),
("twenty two(", [(["twenty "], "two("),
([CN("twenties", source="twenty two")], None)]),
])
def test_i_can_get_functions_names_from_unrecognized_when_multiple_results(self, expression, expected_list):
sheerka, context, parser = self.init_parser()
infix_to_postfix = InFixToPostFix(context)
tokens = list(Tokenizer(expression, yield_eof=False))
for pos, token in enumerate(tokens[:-1]):
infix_to_postfix.eat_unrecognized(token, pos)
actual_list = infix_to_postfix.get_functions_names_from_unrecognized(tokens[-1], len(tokens) - 1)
assert len(actual_list) == len(expected_list)
for actual, expected in zip(actual_list, expected_list):
resolved_to_out = compute_expected_array(cmap, expression, expected[0])
assert actual.to_out == resolved_to_out
if actual.function:
actual.function.fix_source()
resolved_function_name = compute_expected_array(cmap, expression, [expected[1]])
assert actual.function == resolved_function_name[0]
else:
assert actual.function is None