Fixed #125: SheerkaErrorManager

Fixed #135: Change services service priorities
Fixed #136: ErrorManager: Implement recognize_error
Fixed #137: BNFNodeParser : Error when parsing regex with sub parsers
Fixed #138: get_last_errors(): real errors sources are lost
Fixed #139: OneError return value removes the origin of the error
Fixed #140: Concept variables are not correctly handled when parsing sub expression
Fixed #143: Implement has_unknown_concepts()
This commit is contained in:
2021-10-28 14:04:41 +02:00
parent 48ab72fd9c
commit 87cab44fb8
56 changed files with 1391 additions and 1286 deletions
-95
View File
@@ -10,12 +10,10 @@ from core.tokenizer import Token, TokenKind, Tokenizer
from core.utils import get_text_from_tokens, str_concept, tokens_index
from parsers.BaseExpressionParser import AndNode, BinaryNode, ComparisonNode, ComparisonType, Comprehension, \
FunctionNode, \
FunctionParameter, \
ListComprehensionNode, ListNode, NameExprNode, \
NotNode, OrNode, SequenceNode, VariableNode, t_comma
from parsers.BaseNodeParser import ConceptNode, RuleNode, SourceCodeNode, SourceCodeWithConceptNode, \
UnrecognizedTokensNode
from parsers.FunctionParserOld import FunctionNodeOld
from parsers.PythonParser import PythonNode
from sheerkapython.python_wrapper import sheerka_globals
from sheerkarete.common import V
@@ -387,99 +385,6 @@ class LC(ExprTestObj): # for List Comprehension node
return ListComprehensionNode(start, end, full_text_as_tokens[start: end + 1], element, comprehensions)
class FNOld(ExprTestObj):
"""
Test class only
It matches with FunctionNodeOld but with less constraints
Thereby,
FNOld("first", "last", ["param1," ...]) can be compared to
FunctionNodeOld(NameExprNode("first"), NameExprNode("second"), [FunctionParameter(NamesNodes("param1"), NamesNodes(", ")])
Note that FunctionParameter can easily be defined with a single string
* "param" -> FunctionParameter(NameExprNode("param"), None)
* "param, " -> FunctionParameter(NameExprNode("param"), NameExprNode(", "))
For more complicated situations, you can use a tuple (value, sep) to define the value part and the separator part
"""
def __init__(self, first, last, parameters):
self.first = first
self.last = last
self.parameters = []
for param in parameters:
if isinstance(param, tuple):
self.parameters.append(param)
elif isinstance(param, str) and (pos := param.find(",")) != -1:
self.parameters.append((param[:pos], param[pos:]))
else:
self.parameters.append((param, None))
def __repr__(self):
res = self.first
for param in self.parameters:
if param[1]:
res += f"{param[0]}{param[1]} "
else:
res += f"{param[0]}"
return res + self.last
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, FNOld):
return self.first == other.first and self.last == other.last and self.parameters == other.parameters
return False
def __hash__(self):
return hash((self.first, self.last, self.parameters))
def transform_real_obj(self, other, get_test_obj_delegate):
if isinstance(other, FNOld):
return other
if isinstance(other, FunctionNodeOld):
params = []
for self_parameter, other_parameter in zip(self.parameters, other.parameters):
if isinstance(self_parameter[0], str):
value = other_parameter.value.value
else:
value = get_test_obj_delegate(other_parameter.value, self_parameter[0])
sep = other_parameter.separator.value if other_parameter.separator else None
params.append((value, sep))
return FNOld(other.first.value, other.last.value, params)
raise Exception(f"Expecting FunctionNodeOld but received {other=}")
def get_expr_node(self, full_text_as_tokens, default_expr_obj):
start, end = self.get_pos_from_source(self.first, full_text_as_tokens)
first = NameExprNode(start, end, full_text_as_tokens[start: end + 1])
start, end = self.get_pos_from_source(self.last, full_text_as_tokens)
last = NameExprNode(start, end, full_text_as_tokens[start: end + 1])
parameters = []
for param_value, sep in self.parameters:
if isinstance(param_value, str):
start, end = self.get_pos_from_source(param_value, full_text_as_tokens)
param_as_expr_node = NameExprNode(start, end, full_text_as_tokens[start: end + 1])
else:
param_as_expr_node = param_value.get_expr_node(full_text_as_tokens, default_expr_obj)
if sep:
sep_tokens = Tokenizer(sep, yield_eof=False)
start = param_as_expr_node.end + 1
end = start + len(list(sep_tokens)) - 1
sep_as_expr_node = NameExprNode(start, end, full_text_as_tokens[start: end + 1])
else:
sep_as_expr_node = None
parameters.append(FunctionParameter(param_as_expr_node, sep_as_expr_node))
start, end = first.start, last.end
return FunctionNodeOld(start, end, full_text_as_tokens[start: end + 1], first, last, parameters)
class HelperWithPos:
def __init__(self, start=None, end=None):
self.start = start
+41 -2
View File
@@ -9,6 +9,7 @@ from core.global_symbols import NotInit
from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager
from core.sheerka.services.SheerkaExecute import ParserInput
from core.sheerka.services.SheerkaIsAManager import SheerkaIsAManager
from core.tokenizer import Tokenizer
from parsers.BaseNodeParser import NoMatchingTokenError
from parsers.BnfDefinitionParser import BnfDefinitionParser
from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor, BnfNodeParser, ConceptExpression, Match, NonTerminalNode, \
@@ -1075,6 +1076,44 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
expected = [CNC("foo", "one onyx", x=CC("one"))]
self.validate_get_concepts_sequences(my_map, text, expected)
def test_i_can_match_regex_when_sub_parser_input(self):
my_map = {
"foo": self.bnf_concept("foo", RegExMatch("[a-f0-9]{4}")),
}
text = "begin 0af0 end"
parser_input = ParserInput(text).reset()
sub_parser = parser_input.sub_part(2, 3)
sheerka, context, parser = self.init_parser(my_map)
res = parser.parse(context, sub_parser)
assert res.status
concept_nodes = res.body.body
expected = [CN("foo", "0af0")]
actual, expected = tests.parsers.parsers_utils.prepare_nodes_comparison(my_map, text, concept_nodes, expected)
assert actual == expected
def test_i_can_match_regex_when_from_tokens(self):
my_map = {
"foo": self.bnf_concept("foo", RegExMatch("[a-f0-9]{4}")),
}
text = "begin 0af0 end"
tokens = list(Tokenizer(text))
parser_input = ParserInput(None, tokens[2:4]).reset()
sheerka, context, parser = self.init_parser(my_map)
res = parser.parse(context, parser_input)
assert res.status
concept_nodes = res.body.body
expected = [CN("foo", "0af0")]
actual, expected = tests.parsers.parsers_utils.prepare_nodes_comparison(my_map, "0af0", concept_nodes, expected)
assert actual == expected
def test_i_can_reuse_the_same_variable(self):
# in this test, the variable appears several times, but only once in concept.compiled
my_map = {
@@ -1831,7 +1870,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
assert res.body.reason == [NoMatchingTokenError(4)]
assert res.body.reason == [NoMatchingTokenError(4, concept=foo)]
@pytest.mark.parametrize("text", [
"one",
@@ -1852,7 +1891,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
(Sequence(StrMatch("foo"), VariableExpression("x")), "foo one"),
(Sequence(StrMatch("foo"), VariableExpression("x"), StrMatch("bar")), "foo one bar"),
])
def test_i_cannot_parse_variable_when_unrecognized_nodes(self, bnf, text):
def test_i_cannot_parse_variable_when_unrecognized_nodes(self, bnf, text):
sheerka, context, foo = self.init_test().with_concepts(
self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe")))
).unpack()
+2 -2
View File
@@ -16,7 +16,7 @@ from parsers.BnfNodeParser import OrderedChoice, ConceptExpression, StrMatch, Se
from parsers.DefConceptParser import DefConceptParser, NameNode, SyntaxErrorNode, CannotHandleParsingError
from parsers.DefConceptParser import UnexpectedTokenParsingError, DefConceptNode
from parsers.ExpressionParser import ExpressionParser
from parsers.FunctionParserOld import FunctionParserOld
from parsers.FunctionParser import FunctionParser
from parsers.PythonParser import PythonParser, PythonNode
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
from tests.parsers.parsers_utils import compute_expected_array, SCWC, compare_with_test_object, CIO
@@ -77,7 +77,7 @@ def get_concept_part(part, use_expression=False):
status=True,
value=ParserResultConcept(
source=part.source,
parser=FunctionParserOld(),
parser=FunctionParser(),
value=nodes[0],
try_parsed=nodes[0]))
-249
View File
@@ -1,249 +0,0 @@
import pytest
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from core.sheerka.services.SheerkaExecute import ParserInput
from parsers.BaseNodeParser import SourceCodeWithConceptNode
from parsers.BaseParser import ErrorSink
from parsers.FunctionParserOld import FunctionParserOld
from parsers.PythonParser import PythonErrorNode
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
from tests.parsers.parsers_utils import compute_expected_array, SCN, SCWC, CN, UTN, CNC, RN, FNOld, get_test_obj, \
get_expr_node_from_test_node
cmap = {
"one": Concept("one"),
"two": Concept("two"),
"twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
"plus": Concept("a plus b").def_var("a").def_var("b"),
}
class TestFunctionParserOld(TestUsingMemoryBasedSheerka):
shared_ontology = None
@classmethod
def setup_class(cls):
init_test_helper = cls().init_test(cache_only=False, ontology="#TestFunctionParserOld#")
sheerka, context, *updated = init_test_helper.with_concepts(*cmap.values(), create_new=True).unpack()
for i, concept_name in enumerate(cmap):
cmap[concept_name] = updated[i]
cls.shared_ontology = sheerka.get_ontology(context)
sheerka.pop_ontology(context)
def init_parser(self, my_concepts_map=None, **kwargs):
if my_concepts_map is None:
sheerka, context = self.init_test().unpack()
sheerka.add_ontology(context, self.shared_ontology)
else:
sheerka, context, *updated = self.init_test().with_concepts(*my_concepts_map.values(), **kwargs).unpack()
for i, pair in enumerate(my_concepts_map):
my_concepts_map[pair] = updated[i]
parser = FunctionParserOld()
return sheerka, context, parser
def init_parser_with_source(self, source):
sheerka, context, parser = self.init_parser()
error_sink = ErrorSink()
parser_input = ParserInput(source)
parser.reset_parser_input(parser_input, error_sink)
return sheerka, context, parser, parser_input, error_sink
def test_i_can_detect_empty_expression(self):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(""))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY)
def test_input_must_be_a_parser_input(self):
sheerka, context, parser = self.init_parser()
parser.parse(context, "not a parser input") is None
def test_i_cannot_parse_when_not_a_function(self):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput("not a function"))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
@pytest.mark.parametrize("expression, expected", [
("func()", FNOld("func(", ")", [])),
("concept(one)", FNOld("concept(", ")", ["one"])),
("func(one)", FNOld("func(", ")", ["one"])),
("func(a long two, 'three', ;:$*)", FNOld("func(", ")", ["a long two, ", "'three', ", ";:$*"])),
("func(func1(one), two, func2(func3(), func4(three)))", FNOld("func(", (")", 4), [
(FNOld("func1(", ")", ["one"]), ", "),
"two, ",
(FNOld("func2(", (")", 3), [
(FNOld("func3(", (")", 1), []), ", "),
(FNOld("func4(", (")", 2), ["three"]), None),
]), None)
])),
("func(r:|1:)", FNOld("func(", ")", ["r:|1:"]))
])
def test_i_can_parse_function(self, expression, expected):
sheerka, context, parser, parser_input, error_sink = self.init_parser_with_source(expression)
expected = get_expr_node_from_test_node(expression, expected)
parsed = parser.parse_input(context, parser_input, error_sink)
assert not error_sink.has_error
assert parsed == expected
@pytest.mark.parametrize("text, expected", [
("func()", SCN("func()")),
(" func()", SCN("func()")),
("func(one)", SCWC("func(", ")", CN("one"))),
("func(one, unknown, two)", SCWC("func(", ")", CN("one"), ", ", UTN("unknown"), (", ", 1), CN("two"))),
("func(one, twenty two)", SCWC("func(", ")", "one", ", ", CN("twenties", "twenty two"))),
("func(one plus two, three)", SCWC("func(", ")", CNC("plus", a="one", b="two"), ", ", UTN("three"))),
("func(func1(one), two)", SCWC("func(", (")", 1), SCWC("func1(", ")", "one"), ", ", "two"))
])
def test_i_can_parse(self, text, expected):
sheerka, context, parser = self.init_parser()
resolved_expected = compute_expected_array(cmap, text, [expected])[0]
res = parser.parse(context, ParserInput(text))
parser_result = res.body
expression = res.body.body
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
transformed_expression = get_test_obj(expression, resolved_expected)
assert transformed_expression == resolved_expected
assert expression.python_node is not None
assert expression.return_value is not None
def test_i_can_parse_when_multiple_results_when_requested(self):
# the previous output was
# [
# SCWC("func(", ")", "one", ", ", "twenty ", "two"),
# SCWC("func(", ")", "one", ", ", CN("twenties", "twenty two"))
# ]
# But the first one is now filtered out, as it's not a valid python function call
sheerka, context, parser = self.init_parser()
parser.longest_concepts_only = False
text = "func(one, twenty two)"
expected = [SCWC("func(", ")", "one", ", ", CN("twenties", "twenty two"))]
resolved_expected = compute_expected_array(cmap, text, expected)
results = parser.parse(context, ParserInput(text))
assert len(results) == 2
res = results[0]
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert len(res.body.body) == 1
assert (res.body.body[0], PythonErrorNode)
res = results[1]
parser_result = res.body
expressions = res.body.body
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
transformed_expressions = get_test_obj(expressions, resolved_expected[0])
assert transformed_expressions == resolved_expected[0]
def test_i_can_parse_when_the_parameter_is_not_a_concept(self):
"""
It's not a concept, but it can be a valid short term memory object
:return:
"""
sheerka, context, parser = self.init_parser()
text = "func(unknown_concept)"
res = parser.parse(context, ParserInput(text))
expected = [SCWC("func(", ")", "unknown_concept")]
resolved_expected = compute_expected_array(cmap, text, expected)
assert res.status
parsed = res.body.body
transformed_parsed = get_test_obj([parsed], resolved_expected)
assert transformed_parsed == resolved_expected
def test_i_can_parse_when_the_concept_is_not_found(self):
"""
We do not check yet if it's a valid concept
If you find a cheap way to do so, simply remove this test
:return:
"""
sheerka, context, parser = self.init_parser()
text = "func(c:|xxx:)"
res = parser.parse(context, ParserInput(text))
assert res.status
def test_i_can_parse_when_rules(self):
sheerka, context, parser = self.init_parser()
text = "func(r:|1:)"
expected = SCWC("func(", ")", RN("1"))
resolved_expected = compute_expected_array(cmap, text, [expected])[0]
res = parser.parse(context, ParserInput(text))
parser_result = res.body
expression = res.body.body
transformed_expression = get_test_obj(expression, resolved_expected)
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert transformed_expression == resolved_expected
assert expression.python_node is not None
assert expression.return_value is not None
def test_i_can_parse_when_the_parameter_is_a_dynamic_concept(self):
sheerka, context, parser = self.init_parser()
text = "func(ones)"
res = parser.parse(context, ParserInput(text))
assert res.status
assert isinstance(res.body.body, SourceCodeWithConceptNode)
assert res.body.body.python_node.source == 'func(__C__ones__1001___PLURAL__C__)'
assert "__C__ones__1001___PLURAL__C__" in res.body.body.python_node.objects
@pytest.mark.parametrize("text, expected_error_type", [
("one", BuiltinConcepts.NOT_FOR_ME), # no function found
("$*!", BuiltinConcepts.NOT_FOR_ME), # no function found
("func(", BuiltinConcepts.ERROR), # function found, but incomplete
("func(one", BuiltinConcepts.ERROR), # function found, but incomplete
("func(one, two, ", BuiltinConcepts.ERROR), # function found, but incomplete
("func(one) and func(two)", BuiltinConcepts.ERROR), # to many function
("one func(one)", BuiltinConcepts.NOT_FOR_ME), # function not found ! (as it is not the first)
("func(a=b, c)", BuiltinConcepts.ERROR), # function found, but cannot be parsed
("func(one two)", BuiltinConcepts.ERROR), # function found, but cannot be parsed
])
def test_i_cannot_parse(self, text, expected_error_type):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
assert not res.status
assert sheerka.isinstance(res.body, expected_error_type)
@pytest.mark.parametrize("sequence, expected", [
(None, None),
([["a"]], [["a"]]),
([["a"], ["b", "c"]], [["a"]]),
([["b", "c"], ["a"]], [["a"]]),
([["b", "c"], ["a"], ["d", "e"], ["f"]], [["a"], ["f"]]),
])
def test_i_can_get_the_longest_concept_sequence(self, sequence, expected):
assert FunctionParserOld.get_longest_concepts(sequence) == expected
def test_concepts_found_are_fully_initialized(self):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput("func(one plus three)"))
concept = res.body.body.nodes[0].concept
assert res.status
assert isinstance(concept.get_compiled()["a"], Concept)
# three is not recognized,
# so it will be transformed into list of ReturnValueConcept that indicate how to recognized it
assert isinstance(concept.get_compiled()["b"], list)
for item in concept.get_compiled()["b"]:
assert sheerka.isinstance(item, BuiltinConcepts.RETURN_VALUE)
+1 -29
View File
@@ -9,8 +9,7 @@ from core.sheerka.Sheerka import RECOGNIZED_BY_KEY
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Token, TokenKind, Tokenizer, comparable_tokens
from core.utils import get_text_from_tokens
from parsers.BaseExpressionParser import BinaryNode, FunctionNode, FunctionNodeOld, FunctionParameter, ListNode, \
NameExprNode, VariableNode
from parsers.BaseExpressionParser import BinaryNode, FunctionNode, ListNode, NameExprNode, VariableNode
from parsers.BaseNodeParser import ConceptNode, SourceCodeNode, UnrecognizedTokensNode
from parsers.PythonParser import PythonNode
from parsers.SyaNodeParser import FunctionDetected, NoSyaConceptFound, NotEnoughParameters, SyaConceptParser, \
@@ -1564,33 +1563,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
_stack, _expected = prepare_nodes_comparison(concepts_map, text, lexer_nodes, expected)
assert _stack == _expected
def test_i_can_parse_when_function_old_style_expr_tokens(self):
sheerka, context, parser = self.init_parser()
text = "one plus func(twenty two)"
tokens = list(Tokenizer(text, yield_eof=False))
fun_token = tokens[4]
expr = FunctionNodeOld(4, 9, tokens[4:10],
NameExprNode(4, 5, tokens[4:6]),
NameExprNode(9, 9, tokens[9:10]),
[FunctionParameter(NameExprNode(6, 8, tokens[6:9]), None)])
tokens[4:] = [Token(TokenKind.EXPR, expr, fun_token.index, fun_token.line, fun_token.column)]
res = parser.parse(context, ParserInput(None, tokens=tokens))
wrapper = res.body
lexer_nodes = res.body.body
assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
expected_ret_val = RETVAL("func(twenty two)", objects={"__o_00__": CIO("twenties", source="twenty two")})
expected = [CNC("plus", a=CC("one"), b=[expected_ret_val], source=text)]
_stack, _expected = prepare_nodes_comparison(cmap, text, lexer_nodes, expected)
assert _stack == _expected
# check the metadata
expected_concept = lexer_nodes[0].concept
assert expected_concept.get_metadata().variables == [("a", "one"), ("b", "func(twenty two)")]
def test_i_can_parse_when_function_style_expr_tokens(self):
sheerka, context, parser = self.init_parser()
+4 -27
View File
@@ -1,13 +1,13 @@
from core.concept import Concept, ConceptParts
from core.global_symbols import NotInit
from core.rule import Rule, ACTION_TYPE_EXEC
from core.rule import ACTION_TYPE_EXEC, Rule
from core.sheerka.services.SheerkaExecute import ParserInput
from parsers.BaseNodeParser import RuleNode
from parsers.BnfNodeParser import BnfNodeParser
from parsers.FunctionParserOld import FunctionParserOld
from parsers.FunctionParser import FunctionParser
from parsers.SyaNodeParser import SyaNodeParser
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
from tests.parsers.parsers_utils import get_test_obj, CNC, CC, CN, SCN, SCWC, UTN, RN, CB
from tests.parsers.parsers_utils import CB, CC, CN, CNC, RN, SCN, get_test_obj
class TestParsersUtils(TestUsingMemoryBasedSheerka):
@@ -133,7 +133,7 @@ class TestParsersUtils(TestUsingMemoryBasedSheerka):
def test_i_can_get_test_obj_when_SCN(self):
sheerka, context = self.init_test().unpack()
parser = FunctionParserOld()
parser = FunctionParser()
scn = parser.parse(context, ParserInput("test()")).body.body
scn_res = get_test_obj(scn, SCN("", start=0, end=1))
@@ -145,29 +145,6 @@ class TestParsersUtils(TestUsingMemoryBasedSheerka):
assert isinstance(scn_res, SCN)
assert scn_res == SCN("test()", start=None, end=None)
def test_i_can_get_test_obj_when_SCWC(self):
sheerka, context = self.init_test().unpack()
parser = FunctionParserOld()
scwc = parser.parse(context, ParserInput("test(param1, test2())")).body.body
scwc_res = get_test_obj(scwc, SCWC(UTN(""), UTN(""), UTN(""), UTN(""), SCN("", None, 0, 0)))
assert isinstance(scwc_res, SCWC)
expected = SCWC(UTN("test(", 0, 1),
UTN(")", 8, 8),
UTN("param1", 2, 2),
UTN(", ", 3, 4),
SCN("test2()", None, 5, 7))
expected.start = 0
expected.end = 8
assert scwc_res == expected
assert isinstance(scwc_res.first, UTN)
assert isinstance(scwc_res.last, UTN)
assert isinstance(scwc_res.content[0], UTN)
assert isinstance(scwc_res.content[1], UTN)
assert isinstance(scwc_res.content[2], SCN)
def test_i_can_get_test_obj_when_RN(self):
rule = Rule(ACTION_TYPE_EXEC, "test_rule", "True", "True")
rn = RuleNode(rule, 1, 1, source="r:|xxx:")