Working on #48 : Working on FunctionParser.py

This commit is contained in:
2021-03-11 16:51:20 +01:00
parent e303b32eb9
commit f32e14669a
3 changed files with 96 additions and 145 deletions
+70 -115
View File
@@ -1,20 +1,17 @@
from dataclasses import dataclass
from core.builtin_concepts import BuiltinConcepts from core.builtin_concepts import BuiltinConcepts
from core.builtin_helpers import get_lexer_nodes_from_unrecognized, update_compiled from core.builtin_helpers import get_lexer_nodes_from_unrecognized, update_compiled
from core.concept import Concept from core.concept import Concept
from core.sheerka.services.SheerkaExecute import ParserInput from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import TokenKind from core.tokenizer import TokenKind
from core.utils import get_n_clones from core.utils import get_n_clones
from parsers.BaseExpressionParser import NameExprNode, FunctionNode, FunctionParameter, BaseExpressionParser
from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode
from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError, Node, \ from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError, ErrorSink
BaseParserInputParser
from parsers.BnfNodeParser import BnfNodeParser from parsers.BnfNodeParser import BnfNodeParser
from parsers.PythonWithConceptsParser import PythonWithConceptsParser from parsers.PythonWithConceptsParser import PythonWithConceptsParser
from parsers.RuleParser import RuleParser from parsers.RuleParser import RuleParser
from parsers.SequenceNodeParser import SequenceNodeParser from parsers.SequenceNodeParser import SequenceNodeParser
from parsers.SyaNodeParser import SyaNodeParser from parsers.SyaNodeParser import SyaNodeParser
from parsers.BaseExpressionParser import NameExprNode, FunctionNode, FunctionParameter
PARSERS = [RuleParser.NAME, PARSERS = [RuleParser.NAME,
SequenceNodeParser.NAME, SequenceNodeParser.NAME,
@@ -22,18 +19,15 @@ PARSERS = [RuleParser.NAME,
SyaNodeParser.NAME] SyaNodeParser.NAME]
@dataclass class FunctionParser(BaseExpressionParser):
class FunctionParserNode(Node):
pass
class FunctionParser(BaseParserInputParser):
""" """
The parser will be used to parse func(x, y, z) The parser will be used to parse func(x, y, z)
where x, y and z can be source code, concepts or other functions where x, y and z can be source code, concepts or other functions
It will return a SourceCodeNode or SourceCodeNodeWithConcept It will return a SourceCodeNode or SourceCodeNodeWithConcept
""" """
NAME = "Function"
def __init__(self, sep=",", longest_concepts_only=True, **kwargs): def __init__(self, sep=",", longest_concepts_only=True, **kwargs):
""" """
@@ -42,174 +36,135 @@ class FunctionParser(BaseParserInputParser):
so 'twenty one' will resolve to [[c:twenty one:]], not [[c:twenty one:], [c:twenty:, c:one:]] so 'twenty one' will resolve to [[c:twenty one:]], not [[c:twenty one:], [c:twenty:, c:one:]]
:param kwargs: :param kwargs:
""" """
super().__init__("Function", 55) super().__init__(self.NAME, 55)
self.sep = sep self.sep = sep
self.longest_concepts_only = longest_concepts_only self.longest_concepts_only = longest_concepts_only
self.record_errors = True
def function_parser_get_return_value_body(self, source_code_node): def function_parser_get_return_value_body(self, context, source, source_code_node):
if source_code_node.error_when_parsing: if source_code_node.error_when_parsing:
return self.sheerka.new(BuiltinConcepts.ERROR, return context.sheerka.new(BuiltinConcepts.ERROR,
body=source_code_node.error_when_parsing) body=source_code_node.error_when_parsing)
return self.sheerka.new(BuiltinConcepts.PARSER_RESULT, return context.sheerka.new(BuiltinConcepts.PARSER_RESULT,
parser=self, parser=self,
source=self.parser_input.as_text(), source=source,
body=source_code_node, body=source_code_node,
try_parsed=source_code_node) try_parsed=source_code_node)
def add_error(self, error, next_token=True): # def add_error(self, error, next_token=True):
if not self.record_errors: # if not self.record_errors:
return # return
#
return super().add_error(error, next_token) # return super().add_error(error, next_token)
def parse(self, context, parser_input: ParserInput): def parse(self, context, parser_input: ParserInput):
""" ret = super().parse(context, parser_input)
:param context: if ret is None:
:param parser_input:
:return:
"""
if not isinstance(parser_input, ParserInput):
return None return None
context.log(f"Parsing '{parser_input}' with FunctionParser", self.name) if not ret.status:
sheerka = context.sheerka return ret
if parser_input.is_empty():
return sheerka.ret(self.name,
False,
sheerka.new(BuiltinConcepts.IS_EMPTY))
if not self.reset_parser(context, parser_input):
return self.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
self.parser_input.next_token()
node = self.parse_function()
if self.parser_input.next_token():
self.add_error(UnexpectedTokenParsingError("Only one function supported",
self.parser_input.token,
[TokenKind.EOF]))
if self.has_error:
if len(self.error_sink) == 1 and isinstance(self.error_sink[0], Concept):
return self.error_sink[0]
if node is None:
body = context.sheerka.new(BuiltinConcepts.NOT_FOR_ME,
body=parser_input.as_text(),
reason=self.error_sink)
else:
body = context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)
return self.sheerka.ret(self.name, False, body)
source_code_nodes = self.to_source_code_node(node)
node = ret.body.body
source_code_nodes = self.to_source_code_node(context, node)
res = [] res = []
for source_code_node in source_code_nodes: for source_code_node in source_code_nodes:
body = self.function_parser_get_return_value_body(source_code_node) body = self.function_parser_get_return_value_body(context, parser_input.as_text(), source_code_node)
res.append(self.sheerka.ret(self.name, source_code_node.python_node is not None, body)) res.append(context.sheerka.ret(self.name, source_code_node.python_node is not None, body))
return res[0] if len(res) == 1 else res return res[0] if len(res) == 1 else res
def parse_input(self): def parse_input(self, context, parser_input, error_sink):
return self.parse_function() return self.parse_function(context, parser_input, error_sink)
def parse_function(self): def parse_function(self, context, parser_input, error_sink):
start = self.parser_input.pos start = parser_input.pos
token = self.parser_input.token token = parser_input.token
if token.type != TokenKind.IDENTIFIER: if token.type != TokenKind.IDENTIFIER:
self.add_error(UnexpectedTokenParsingError(f"{token.repr_value} is not a identifier", error_sink.add_error(UnexpectedTokenParsingError(f"{token.repr_value} is not a identifier",
token, token,
[TokenKind.IDENTIFIER])) [TokenKind.IDENTIFIER]))
return None return None
if not self.parser_input.next_token(): if not parser_input.next_token():
self.add_error(UnexpectedEofParsingError(f"Unexpected EOF while parsing left parenthesis")) error_sink.add_error(UnexpectedEofParsingError(f"Unexpected EOF while parsing left parenthesis"))
return None return None
token = self.parser_input.token token = parser_input.token
if token.type != TokenKind.LPAR: if token.type != TokenKind.LPAR:
self.add_error(UnexpectedTokenParsingError(f"{token.repr_value} is not a left parenthesis", error_sink.add_error(UnexpectedTokenParsingError(f"{token.repr_value} is not a left parenthesis",
token, token,
[TokenKind.LPAR])) [TokenKind.LPAR]))
return None return None
start_node = NameExprNode(start, start + 1, self.parser_input.tokens[start:start + 2]) start_node = NameExprNode(start, start + 1, parser_input.tokens[start:start + 2])
if not self.parser_input.next_token(): if not parser_input.next_token():
self.add_error(UnexpectedEofParsingError(f"Unexpected EOF after left parenthesis")) error_sink.add_error(UnexpectedEofParsingError(f"Unexpected EOF after left parenthesis"))
return FunctionNode(start, start + 1, [], start_node, None, None) return FunctionNode(start, start + 1, [], start_node, None, None)
params = self.parse_parameters() params = self.parse_parameters(context, parser_input, error_sink)
if self.has_error: if error_sink.has_error:
return FunctionNode(start, self.parser_input.pos, [], start_node, None, params) return FunctionNode(start, parser_input.pos, [], start_node, None, params)
token = self.parser_input.token token = parser_input.token
if not token or token.type != TokenKind.RPAR: if not token or token.type != TokenKind.RPAR:
self.add_error(UnexpectedTokenParsingError(f"Right parenthesis not found", error_sink.add_error(UnexpectedTokenParsingError(f"Right parenthesis not found",
token, token,
[TokenKind.RPAR])) [TokenKind.RPAR]))
return FunctionNode(start, self.parser_input.pos, [], start_node, None, params) return FunctionNode(start, parser_input.pos, [], start_node, None, params)
return FunctionNode(start, return FunctionNode(start,
self.parser_input.pos, parser_input.pos,
self.parser_input.tokens[start:self.parser_input.pos + 1], parser_input.tokens[start:parser_input.pos + 1],
start_node, start_node,
NameExprNode(self.parser_input.pos, self.parser_input.pos, [token]), NameExprNode(parser_input.pos, parser_input.pos, [token]),
params) params)
def parse_parameters(self): def parse_parameters(self, context, parser_input, error_sink):
nodes = [] nodes = []
while True: while True:
param_value = self.parse_parameter_value() param_value = self.parse_parameter_value(context, parser_input, error_sink)
if not param_value: if not param_value:
break break
function_parameter = FunctionParameter(param_value) function_parameter = FunctionParameter(param_value)
nodes.append(function_parameter) nodes.append(function_parameter)
token = self.parser_input.token token = parser_input.token
if token.type == TokenKind.EOF: if token.type == TokenKind.EOF:
self.add_error(UnexpectedEofParsingError(f"Unexpected EOF while parsing parameters")) error_sink.add_error(UnexpectedEofParsingError(f"Unexpected EOF while parsing parameters"))
return None return None
if token.type == TokenKind.RPAR: if token.type == TokenKind.RPAR:
break break
if token.value == self.sep: if token.value == self.sep:
sep_pos = self.parser_input.pos sep_pos = parser_input.pos
has_next = self.parser_input.next_token() # it's before add_sep() to capture trailing whitespace has_next = parser_input.next_token() # it's before add_sep() to capture trailing whitespace
function_parameter.add_sep(sep_pos, function_parameter.add_sep(sep_pos,
self.parser_input.pos - 1, parser_input.pos - 1,
self.parser_input.tokens[sep_pos: self.parser_input.pos]) parser_input.tokens[sep_pos: parser_input.pos])
if not has_next: if not has_next:
break break
return nodes return nodes
def parse_parameter_value(self): def parse_parameter_value(self, context, parser_input, error_sink):
# check if the parameter is a function # check if the parameter is a function
start_pos = self.parser_input.pos start_pos = parser_input.pos
self.record_errors = False new_error_sink = ErrorSink()
func = self.parse_function() func = self.parse_function(context, parser_input, new_error_sink)
self.record_errors = True if func and not new_error_sink.has_error:
if func: parser_input.next_token()
self.parser_input.next_token()
return func return func
# otherwise, eat until LPAR or separator # otherwise, eat until LPAR or separator
self.parser_input.seek(start_pos) parser_input.seek(start_pos)
self.record_errors = True
tokens = [] tokens = []
while True: while True:
token = self.parser_input.token token = parser_input.token
if token is None: if token is None:
break break
@@ -217,12 +172,12 @@ class FunctionParser(BaseParserInputParser):
break break
tokens.append(token) tokens.append(token)
if not self.parser_input.next_token(skip_whitespace=False): if not parser_input.next_token(skip_whitespace=False):
break break
return NameExprNode(start_pos, self.parser_input.pos - 1, tokens) if len(tokens) else None return NameExprNode(start_pos, parser_input.pos - 1, tokens) if len(tokens) else None
def to_source_code_node(self, function_node: FunctionNode): def to_source_code_node(self, context, function_node: FunctionNode):
python_parser = PythonWithConceptsParser() python_parser = PythonWithConceptsParser()
def update_source_code_node(scn, nodes, sep): def update_source_code_node(scn, nodes, sep):
@@ -239,7 +194,7 @@ class FunctionParser(BaseParserInputParser):
if parsing_res.status: if parsing_res.status:
return None return None
if self.sheerka.isinstance(parsing_res.body, BuiltinConcepts.NOT_FOR_ME): if context.sheerka.isinstance(parsing_res.body, BuiltinConcepts.NOT_FOR_ME):
return parsing_res.body.reason return parsing_res.body.reason
else: else:
return parsing_res.body.body return parsing_res.body.body
@@ -247,7 +202,7 @@ class FunctionParser(BaseParserInputParser):
if len(function_node.parameters) == 0: if len(function_node.parameters) == 0:
# validate the source # validate the source
nodes_to_parse = [function_node.first.to_unrecognized(), function_node.last.to_unrecognized()] nodes_to_parse = [function_node.first.to_unrecognized(), function_node.last.to_unrecognized()]
python_parsing_res = python_parser.parse_nodes(self.context, nodes_to_parse) python_parsing_res = python_parser.parse_nodes(context, nodes_to_parse)
python_node = python_parsing_res.body.body if python_parsing_res.status else None python_node = python_parsing_res.body.body if python_parsing_res.status else None
return [SourceCodeNode(start=function_node.first.start, return [SourceCodeNode(start=function_node.first.start,
@@ -264,12 +219,12 @@ class FunctionParser(BaseParserInputParser):
if isinstance(param.value, NameExprNode): if isinstance(param.value, NameExprNode):
# try to recognize concepts # try to recognize concepts
unrecognized = param.value.to_unrecognized() unrecognized = param.value.to_unrecognized()
nodes_sequences = get_lexer_nodes_from_unrecognized(self.context, nodes_sequences = get_lexer_nodes_from_unrecognized(context,
unrecognized, unrecognized,
PARSERS) PARSERS)
else: else:
# the parameter is also a function # the parameter is also a function
nodes_sequences = self.to_source_code_node(param.value) nodes_sequences = self.to_source_code_node(context, param.value)
if self.longest_concepts_only: if self.longest_concepts_only:
nodes_sequences = self.get_longest_concepts(nodes_sequences) nodes_sequences = self.get_longest_concepts(nodes_sequences)
@@ -300,7 +255,7 @@ class FunctionParser(BaseParserInputParser):
source_code_node.fix_all_pos() source_code_node.fix_all_pos()
source_code_node.pseudo_fix_source() source_code_node.pseudo_fix_source()
python_parsing_res = python_parser.parse_nodes(self.context, source_code_node.get_all_nodes()) python_parsing_res = python_parser.parse_nodes(context, source_code_node.get_all_nodes())
if python_parsing_res.status: if python_parsing_res.status:
source_code_node.python_node = python_parsing_res.body.body source_code_node.python_node = python_parsing_res.body.body
source_code_node.return_value = python_parsing_res source_code_node.return_value = python_parsing_res
@@ -308,7 +263,7 @@ class FunctionParser(BaseParserInputParser):
# make sure that concepts found can be evaluated # make sure that concepts found can be evaluated
errors = [] errors = []
for c in [c for c in source_code_node.python_node.objects.values() if isinstance(c, Concept)]: for c in [c for c in source_code_node.python_node.objects.values() if isinstance(c, Concept)]:
update_compiled(self.context, c, errors) update_compiled(context, c, errors)
if errors: if errors:
source_code_node.error_when_parsing = errors source_code_node.error_when_parsing = errors
+3 -3
View File
@@ -17,7 +17,7 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
parser = ExpressionParser() parser = ExpressionParser()
return sheerka, context, parser return sheerka, context, parser
def input_parser_with_source(self, source): def init_parser_with_source(self, source):
sheerka, context, parser = self.init_parser() sheerka, context, parser = self.init_parser()
error_sink = ErrorSink() error_sink = ErrorSink()
parser_input = ParserInput(source) parser_input = ParserInput(source)
@@ -50,7 +50,7 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
]) ])
def test_i_can_parse_input(self, expression, expected): def test_i_can_parse_input(self, expression, expected):
sheerka, context, parser, parser_input, error_sink = self.input_parser_with_source(expression) sheerka, context, parser, parser_input, error_sink = self.init_parser_with_source(expression)
expected = get_expr_node_from_test_node(expression, expected) expected = get_expr_node_from_test_node(expression, expected)
parsed = parser.parse_input(context, parser_input, error_sink) parsed = parser.parse_input(context, parser_input, error_sink)
@@ -63,7 +63,7 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
"var . attr1 . attr2", "var . attr1 . attr2",
]) ])
def test_i_can_parse_variable(self, expression): def test_i_can_parse_variable(self, expression):
sheerka, context, parser, parser_input, error_sink = self.input_parser_with_source(expression) sheerka, context, parser, parser_input, error_sink = self.init_parser_with_source(expression)
parsed = parser.parse_input(context, parser_input, error_sink) parsed = parser.parse_input(context, parser_input, error_sink)
assert not error_sink.has_error assert not error_sink.has_error
+13 -17
View File
@@ -3,6 +3,7 @@ import pytest
from core.builtin_concepts import BuiltinConcepts from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept from core.concept import Concept
from core.sheerka.services.SheerkaExecute import ParserInput from core.sheerka.services.SheerkaExecute import ParserInput
from parsers.BaseParser import ErrorSink
from parsers.FunctionParser import FunctionParser from parsers.FunctionParser import FunctionParser
from parsers.PythonParser import PythonErrorNode from parsers.PythonParser import PythonErrorNode
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
@@ -42,6 +43,13 @@ class TestFunctionParser(TestUsingMemoryBasedSheerka):
parser = FunctionParser() parser = FunctionParser()
return sheerka, context, parser return sheerka, context, parser
def init_parser_with_source(self, source):
sheerka, context, parser = self.init_parser()
error_sink = ErrorSink()
parser_input = ParserInput(source)
parser.reset_parser_input(parser_input, error_sink)
return sheerka, context, parser, parser_input, error_sink
def test_i_can_detect_empty_expression(self): def test_i_can_detect_empty_expression(self):
sheerka, context, parser = self.init_parser() sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput("")) res = parser.parse(context, ParserInput(""))
@@ -73,28 +81,16 @@ class TestFunctionParser(TestUsingMemoryBasedSheerka):
(FN("func4(", (")", 2), ["three"]), None), (FN("func4(", (")", 2), ["three"]), None),
]), None) ]), None)
])), ])),
("func(r:|1:)", FN("func(", ")", ["r:|1:"]))
]) ])
def test_i_can_parse_function(self, expression, expected): def test_i_can_parse_function(self, expression, expected):
sheerka, context, parser = self.init_parser() sheerka, context, parser, parser_input, error_sink = self.init_parser_with_source(expression)
parser.reset_parser(context, ParserInput(expression))
parser.parser_input.next_token()
res = parser.parse_function()
expected = get_expr_node_from_test_node(expression, expected) expected = get_expr_node_from_test_node(expression, expected)
assert res == expected
def test_i_can_parse_function_when_rule(self): parsed = parser.parse_input(context, parser_input, error_sink)
sheerka, context, parser = self.init_parser()
expected = FN("func(", ")", ["r:|1:"])
parser.reset_parser(context, ParserInput("func(r:|1:)")) assert not error_sink.has_error
parser.parser_input.next_token() assert parsed == expected
res = parser.parse_function()
transformed_res = get_test_obj(res, expected)
assert transformed_res == expected
@pytest.mark.parametrize("text, expected", [ @pytest.mark.parametrize("text, expected", [
("func()", SCN("func()")), ("func()", SCN("func()")),