diff --git a/src/parsers/FunctionParser.py b/src/parsers/FunctionParser.py index 430edba..b5b3dbf 100644 --- a/src/parsers/FunctionParser.py +++ b/src/parsers/FunctionParser.py @@ -1,20 +1,17 @@ -from dataclasses import dataclass - from core.builtin_concepts import BuiltinConcepts from core.builtin_helpers import get_lexer_nodes_from_unrecognized, update_compiled from core.concept import Concept from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import TokenKind from core.utils import get_n_clones +from parsers.BaseExpressionParser import NameExprNode, FunctionNode, FunctionParameter, BaseExpressionParser from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode -from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError, Node, \ - BaseParserInputParser +from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError, ErrorSink from parsers.BnfNodeParser import BnfNodeParser from parsers.PythonWithConceptsParser import PythonWithConceptsParser from parsers.RuleParser import RuleParser from parsers.SequenceNodeParser import SequenceNodeParser from parsers.SyaNodeParser import SyaNodeParser -from parsers.BaseExpressionParser import NameExprNode, FunctionNode, FunctionParameter PARSERS = [RuleParser.NAME, SequenceNodeParser.NAME, @@ -22,18 +19,15 @@ PARSERS = [RuleParser.NAME, SyaNodeParser.NAME] -@dataclass -class FunctionParserNode(Node): - pass - - -class FunctionParser(BaseParserInputParser): +class FunctionParser(BaseExpressionParser): """ The parser will be used to parse func(x, y, z) where x, y and z can be source code, concepts or other functions It will return a SourceCodeNode or SourceCodeNodeWithConcept """ + NAME = "Function" + def __init__(self, sep=",", longest_concepts_only=True, **kwargs): """ @@ -42,174 +36,135 @@ class FunctionParser(BaseParserInputParser): so 'twenty one' will resolve to [[c:twenty one:]], not [[c:twenty one:], [c:twenty:, c:one:]] :param kwargs: """ - super().__init__("Function", 55) + super().__init__(self.NAME, 55) self.sep = sep self.longest_concepts_only = longest_concepts_only - self.record_errors = True - def function_parser_get_return_value_body(self, source_code_node): + def function_parser_get_return_value_body(self, context, source, source_code_node): if source_code_node.error_when_parsing: - return self.sheerka.new(BuiltinConcepts.ERROR, - body=source_code_node.error_when_parsing) + return context.sheerka.new(BuiltinConcepts.ERROR, + body=source_code_node.error_when_parsing) - return self.sheerka.new(BuiltinConcepts.PARSER_RESULT, - parser=self, - source=self.parser_input.as_text(), - body=source_code_node, - try_parsed=source_code_node) + return context.sheerka.new(BuiltinConcepts.PARSER_RESULT, + parser=self, + source=source, + body=source_code_node, + try_parsed=source_code_node) - def add_error(self, error, next_token=True): - if not self.record_errors: - return - - return super().add_error(error, next_token) + # def add_error(self, error, next_token=True): + # if not self.record_errors: + # return + # + # return super().add_error(error, next_token) def parse(self, context, parser_input: ParserInput): - """ + ret = super().parse(context, parser_input) - :param context: - :param parser_input: - :return: - """ - - if not isinstance(parser_input, ParserInput): + if ret is None: return None - context.log(f"Parsing '{parser_input}' with FunctionParser", self.name) - sheerka = context.sheerka - - if parser_input.is_empty(): - return sheerka.ret(self.name, - False, - sheerka.new(BuiltinConcepts.IS_EMPTY)) - - if not self.reset_parser(context, parser_input): - return self.sheerka.ret( - self.name, - False, - context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) - - self.parser_input.next_token() - node = self.parse_function() - - if self.parser_input.next_token(): - self.add_error(UnexpectedTokenParsingError("Only one function supported", - self.parser_input.token, - [TokenKind.EOF])) - - if self.has_error: - if len(self.error_sink) == 1 and isinstance(self.error_sink[0], Concept): - return self.error_sink[0] - - if node is None: - body = context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, - body=parser_input.as_text(), - reason=self.error_sink) - else: - body = context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink) - return self.sheerka.ret(self.name, False, body) - - source_code_nodes = self.to_source_code_node(node) + if not ret.status: + return ret + node = ret.body.body + source_code_nodes = self.to_source_code_node(context, node) res = [] for source_code_node in source_code_nodes: - body = self.function_parser_get_return_value_body(source_code_node) - res.append(self.sheerka.ret(self.name, source_code_node.python_node is not None, body)) + body = self.function_parser_get_return_value_body(context, parser_input.as_text(), source_code_node) + res.append(context.sheerka.ret(self.name, source_code_node.python_node is not None, body)) return res[0] if len(res) == 1 else res - def parse_input(self): - return self.parse_function() + def parse_input(self, context, parser_input, error_sink): + return self.parse_function(context, parser_input, error_sink) - def parse_function(self): + def parse_function(self, context, parser_input, error_sink): - start = self.parser_input.pos - token = self.parser_input.token + start = parser_input.pos + token = parser_input.token if token.type != TokenKind.IDENTIFIER: - self.add_error(UnexpectedTokenParsingError(f"{token.repr_value} is not a identifier", - token, - [TokenKind.IDENTIFIER])) + error_sink.add_error(UnexpectedTokenParsingError(f"{token.repr_value} is not a identifier", + token, + [TokenKind.IDENTIFIER])) return None - if not self.parser_input.next_token(): - self.add_error(UnexpectedEofParsingError(f"Unexpected EOF while parsing left parenthesis")) + if not parser_input.next_token(): + error_sink.add_error(UnexpectedEofParsingError(f"Unexpected EOF while parsing left parenthesis")) return None - token = self.parser_input.token + token = parser_input.token if token.type != TokenKind.LPAR: - self.add_error(UnexpectedTokenParsingError(f"{token.repr_value} is not a left parenthesis", - token, - [TokenKind.LPAR])) + error_sink.add_error(UnexpectedTokenParsingError(f"{token.repr_value} is not a left parenthesis", + token, + [TokenKind.LPAR])) return None - start_node = NameExprNode(start, start + 1, self.parser_input.tokens[start:start + 2]) - if not self.parser_input.next_token(): - self.add_error(UnexpectedEofParsingError(f"Unexpected EOF after left parenthesis")) + start_node = NameExprNode(start, start + 1, parser_input.tokens[start:start + 2]) + if not parser_input.next_token(): + error_sink.add_error(UnexpectedEofParsingError(f"Unexpected EOF after left parenthesis")) return FunctionNode(start, start + 1, [], start_node, None, None) - params = self.parse_parameters() - if self.has_error: - return FunctionNode(start, self.parser_input.pos, [], start_node, None, params) + params = self.parse_parameters(context, parser_input, error_sink) + if error_sink.has_error: + return FunctionNode(start, parser_input.pos, [], start_node, None, params) - token = self.parser_input.token + token = parser_input.token if not token or token.type != TokenKind.RPAR: - self.add_error(UnexpectedTokenParsingError(f"Right parenthesis not found", - token, - [TokenKind.RPAR])) - return FunctionNode(start, self.parser_input.pos, [], start_node, None, params) + error_sink.add_error(UnexpectedTokenParsingError(f"Right parenthesis not found", + token, + [TokenKind.RPAR])) + return FunctionNode(start, parser_input.pos, [], start_node, None, params) return FunctionNode(start, - self.parser_input.pos, - self.parser_input.tokens[start:self.parser_input.pos + 1], + parser_input.pos, + parser_input.tokens[start:parser_input.pos + 1], start_node, - NameExprNode(self.parser_input.pos, self.parser_input.pos, [token]), + NameExprNode(parser_input.pos, parser_input.pos, [token]), params) - def parse_parameters(self): + def parse_parameters(self, context, parser_input, error_sink): nodes = [] while True: - param_value = self.parse_parameter_value() + param_value = self.parse_parameter_value(context, parser_input, error_sink) if not param_value: break function_parameter = FunctionParameter(param_value) nodes.append(function_parameter) - token = self.parser_input.token + token = parser_input.token if token.type == TokenKind.EOF: - self.add_error(UnexpectedEofParsingError(f"Unexpected EOF while parsing parameters")) + error_sink.add_error(UnexpectedEofParsingError(f"Unexpected EOF while parsing parameters")) return None if token.type == TokenKind.RPAR: break if token.value == self.sep: - sep_pos = self.parser_input.pos - has_next = self.parser_input.next_token() # it's before add_sep() to capture trailing whitespace + sep_pos = parser_input.pos + has_next = parser_input.next_token() # it's before add_sep() to capture trailing whitespace function_parameter.add_sep(sep_pos, - self.parser_input.pos - 1, - self.parser_input.tokens[sep_pos: self.parser_input.pos]) + parser_input.pos - 1, + parser_input.tokens[sep_pos: parser_input.pos]) if not has_next: break return nodes - def parse_parameter_value(self): + def parse_parameter_value(self, context, parser_input, error_sink): # check if the parameter is a function - start_pos = self.parser_input.pos - self.record_errors = False - func = self.parse_function() - self.record_errors = True - if func: - self.parser_input.next_token() + start_pos = parser_input.pos + new_error_sink = ErrorSink() + func = self.parse_function(context, parser_input, new_error_sink) + if func and not new_error_sink.has_error: + parser_input.next_token() return func # otherwise, eat until LPAR or separator - self.parser_input.seek(start_pos) - self.record_errors = True + parser_input.seek(start_pos) tokens = [] while True: - token = self.parser_input.token + token = parser_input.token if token is None: break @@ -217,12 +172,12 @@ class FunctionParser(BaseParserInputParser): break tokens.append(token) - if not self.parser_input.next_token(skip_whitespace=False): + if not parser_input.next_token(skip_whitespace=False): break - return NameExprNode(start_pos, self.parser_input.pos - 1, tokens) if len(tokens) else None + return NameExprNode(start_pos, parser_input.pos - 1, tokens) if len(tokens) else None - def to_source_code_node(self, function_node: FunctionNode): + def to_source_code_node(self, context, function_node: FunctionNode): python_parser = PythonWithConceptsParser() def update_source_code_node(scn, nodes, sep): @@ -239,7 +194,7 @@ class FunctionParser(BaseParserInputParser): if parsing_res.status: return None - if self.sheerka.isinstance(parsing_res.body, BuiltinConcepts.NOT_FOR_ME): + if context.sheerka.isinstance(parsing_res.body, BuiltinConcepts.NOT_FOR_ME): return parsing_res.body.reason else: return parsing_res.body.body @@ -247,7 +202,7 @@ class FunctionParser(BaseParserInputParser): if len(function_node.parameters) == 0: # validate the source nodes_to_parse = [function_node.first.to_unrecognized(), function_node.last.to_unrecognized()] - python_parsing_res = python_parser.parse_nodes(self.context, nodes_to_parse) + python_parsing_res = python_parser.parse_nodes(context, nodes_to_parse) python_node = python_parsing_res.body.body if python_parsing_res.status else None return [SourceCodeNode(start=function_node.first.start, @@ -264,12 +219,12 @@ class FunctionParser(BaseParserInputParser): if isinstance(param.value, NameExprNode): # try to recognize concepts unrecognized = param.value.to_unrecognized() - nodes_sequences = get_lexer_nodes_from_unrecognized(self.context, + nodes_sequences = get_lexer_nodes_from_unrecognized(context, unrecognized, PARSERS) else: # the parameter is also a function - nodes_sequences = self.to_source_code_node(param.value) + nodes_sequences = self.to_source_code_node(context, param.value) if self.longest_concepts_only: nodes_sequences = self.get_longest_concepts(nodes_sequences) @@ -300,7 +255,7 @@ class FunctionParser(BaseParserInputParser): source_code_node.fix_all_pos() source_code_node.pseudo_fix_source() - python_parsing_res = python_parser.parse_nodes(self.context, source_code_node.get_all_nodes()) + python_parsing_res = python_parser.parse_nodes(context, source_code_node.get_all_nodes()) if python_parsing_res.status: source_code_node.python_node = python_parsing_res.body.body source_code_node.return_value = python_parsing_res @@ -308,7 +263,7 @@ class FunctionParser(BaseParserInputParser): # make sure that concepts found can be evaluated errors = [] for c in [c for c in source_code_node.python_node.objects.values() if isinstance(c, Concept)]: - update_compiled(self.context, c, errors) + update_compiled(context, c, errors) if errors: source_code_node.error_when_parsing = errors diff --git a/tests/parsers/test_ExpressionParser.py b/tests/parsers/test_ExpressionParser.py index af21d4d..b7e2e69 100644 --- a/tests/parsers/test_ExpressionParser.py +++ b/tests/parsers/test_ExpressionParser.py @@ -17,7 +17,7 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka): parser = ExpressionParser() return sheerka, context, parser - def input_parser_with_source(self, source): + def init_parser_with_source(self, source): sheerka, context, parser = self.init_parser() error_sink = ErrorSink() parser_input = ParserInput(source) @@ -50,7 +50,7 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka): ]) def test_i_can_parse_input(self, expression, expected): - sheerka, context, parser, parser_input, error_sink = self.input_parser_with_source(expression) + sheerka, context, parser, parser_input, error_sink = self.init_parser_with_source(expression) expected = get_expr_node_from_test_node(expression, expected) parsed = parser.parse_input(context, parser_input, error_sink) @@ -63,7 +63,7 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka): "var . attr1 . attr2", ]) def test_i_can_parse_variable(self, expression): - sheerka, context, parser, parser_input, error_sink = self.input_parser_with_source(expression) + sheerka, context, parser, parser_input, error_sink = self.init_parser_with_source(expression) parsed = parser.parse_input(context, parser_input, error_sink) assert not error_sink.has_error diff --git a/tests/parsers/test_FunctionParser.py b/tests/parsers/test_FunctionParser.py index 1f72d08..e99cc3a 100644 --- a/tests/parsers/test_FunctionParser.py +++ b/tests/parsers/test_FunctionParser.py @@ -3,6 +3,7 @@ import pytest from core.builtin_concepts import BuiltinConcepts from core.concept import Concept from core.sheerka.services.SheerkaExecute import ParserInput +from parsers.BaseParser import ErrorSink from parsers.FunctionParser import FunctionParser from parsers.PythonParser import PythonErrorNode from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka @@ -42,6 +43,13 @@ class TestFunctionParser(TestUsingMemoryBasedSheerka): parser = FunctionParser() return sheerka, context, parser + def init_parser_with_source(self, source): + sheerka, context, parser = self.init_parser() + error_sink = ErrorSink() + parser_input = ParserInput(source) + parser.reset_parser_input(parser_input, error_sink) + return sheerka, context, parser, parser_input, error_sink + def test_i_can_detect_empty_expression(self): sheerka, context, parser = self.init_parser() res = parser.parse(context, ParserInput("")) @@ -73,28 +81,16 @@ class TestFunctionParser(TestUsingMemoryBasedSheerka): (FN("func4(", (")", 2), ["three"]), None), ]), None) ])), + ("func(r:|1:)", FN("func(", ")", ["r:|1:"])) ]) def test_i_can_parse_function(self, expression, expected): - sheerka, context, parser = self.init_parser() - - parser.reset_parser(context, ParserInput(expression)) - parser.parser_input.next_token() - - res = parser.parse_function() + sheerka, context, parser, parser_input, error_sink = self.init_parser_with_source(expression) expected = get_expr_node_from_test_node(expression, expected) - assert res == expected - def test_i_can_parse_function_when_rule(self): - sheerka, context, parser = self.init_parser() - expected = FN("func(", ")", ["r:|1:"]) + parsed = parser.parse_input(context, parser_input, error_sink) - parser.reset_parser(context, ParserInput("func(r:|1:)")) - parser.parser_input.next_token() - - res = parser.parse_function() - - transformed_res = get_test_obj(res, expected) - assert transformed_res == expected + assert not error_sink.has_error + assert parsed == expected @pytest.mark.parametrize("text, expected", [ ("func()", SCN("func()")),