Working on #48 : Working on FunctionParser.py

This commit is contained in:
2021-03-11 16:51:20 +01:00
parent e303b32eb9
commit f32e14669a
3 changed files with 96 additions and 145 deletions
+80 -125
View File
@@ -1,20 +1,17 @@
from dataclasses import dataclass
from core.builtin_concepts import BuiltinConcepts
from core.builtin_helpers import get_lexer_nodes_from_unrecognized, update_compiled
from core.concept import Concept
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import TokenKind
from core.utils import get_n_clones
from parsers.BaseExpressionParser import NameExprNode, FunctionNode, FunctionParameter, BaseExpressionParser
from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode
from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError, Node, \
BaseParserInputParser
from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError, ErrorSink
from parsers.BnfNodeParser import BnfNodeParser
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
from parsers.RuleParser import RuleParser
from parsers.SequenceNodeParser import SequenceNodeParser
from parsers.SyaNodeParser import SyaNodeParser
from parsers.BaseExpressionParser import NameExprNode, FunctionNode, FunctionParameter
PARSERS = [RuleParser.NAME,
SequenceNodeParser.NAME,
@@ -22,18 +19,15 @@ PARSERS = [RuleParser.NAME,
SyaNodeParser.NAME]
@dataclass
class FunctionParserNode(Node):
pass
class FunctionParser(BaseParserInputParser):
class FunctionParser(BaseExpressionParser):
"""
The parser will be used to parse func(x, y, z)
where x, y and z can be source code, concepts or other functions
It will return a SourceCodeNode or SourceCodeNodeWithConcept
"""
NAME = "Function"
def __init__(self, sep=",", longest_concepts_only=True, **kwargs):
"""
@@ -42,174 +36,135 @@ class FunctionParser(BaseParserInputParser):
so 'twenty one' will resolve to [[c:twenty one:]], not [[c:twenty one:], [c:twenty:, c:one:]]
:param kwargs:
"""
super().__init__("Function", 55)
super().__init__(self.NAME, 55)
self.sep = sep
self.longest_concepts_only = longest_concepts_only
self.record_errors = True
def function_parser_get_return_value_body(self, source_code_node):
def function_parser_get_return_value_body(self, context, source, source_code_node):
if source_code_node.error_when_parsing:
return self.sheerka.new(BuiltinConcepts.ERROR,
body=source_code_node.error_when_parsing)
return context.sheerka.new(BuiltinConcepts.ERROR,
body=source_code_node.error_when_parsing)
return self.sheerka.new(BuiltinConcepts.PARSER_RESULT,
parser=self,
source=self.parser_input.as_text(),
body=source_code_node,
try_parsed=source_code_node)
return context.sheerka.new(BuiltinConcepts.PARSER_RESULT,
parser=self,
source=source,
body=source_code_node,
try_parsed=source_code_node)
def add_error(self, error, next_token=True):
if not self.record_errors:
return
return super().add_error(error, next_token)
# def add_error(self, error, next_token=True):
# if not self.record_errors:
# return
#
# return super().add_error(error, next_token)
def parse(self, context, parser_input: ParserInput):
"""
ret = super().parse(context, parser_input)
:param context:
:param parser_input:
:return:
"""
if not isinstance(parser_input, ParserInput):
if ret is None:
return None
context.log(f"Parsing '{parser_input}' with FunctionParser", self.name)
sheerka = context.sheerka
if parser_input.is_empty():
return sheerka.ret(self.name,
False,
sheerka.new(BuiltinConcepts.IS_EMPTY))
if not self.reset_parser(context, parser_input):
return self.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
self.parser_input.next_token()
node = self.parse_function()
if self.parser_input.next_token():
self.add_error(UnexpectedTokenParsingError("Only one function supported",
self.parser_input.token,
[TokenKind.EOF]))
if self.has_error:
if len(self.error_sink) == 1 and isinstance(self.error_sink[0], Concept):
return self.error_sink[0]
if node is None:
body = context.sheerka.new(BuiltinConcepts.NOT_FOR_ME,
body=parser_input.as_text(),
reason=self.error_sink)
else:
body = context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)
return self.sheerka.ret(self.name, False, body)
source_code_nodes = self.to_source_code_node(node)
if not ret.status:
return ret
node = ret.body.body
source_code_nodes = self.to_source_code_node(context, node)
res = []
for source_code_node in source_code_nodes:
body = self.function_parser_get_return_value_body(source_code_node)
res.append(self.sheerka.ret(self.name, source_code_node.python_node is not None, body))
body = self.function_parser_get_return_value_body(context, parser_input.as_text(), source_code_node)
res.append(context.sheerka.ret(self.name, source_code_node.python_node is not None, body))
return res[0] if len(res) == 1 else res
def parse_input(self):
return self.parse_function()
def parse_input(self, context, parser_input, error_sink):
return self.parse_function(context, parser_input, error_sink)
def parse_function(self):
def parse_function(self, context, parser_input, error_sink):
start = self.parser_input.pos
token = self.parser_input.token
start = parser_input.pos
token = parser_input.token
if token.type != TokenKind.IDENTIFIER:
self.add_error(UnexpectedTokenParsingError(f"{token.repr_value} is not a identifier",
token,
[TokenKind.IDENTIFIER]))
error_sink.add_error(UnexpectedTokenParsingError(f"{token.repr_value} is not a identifier",
token,
[TokenKind.IDENTIFIER]))
return None
if not self.parser_input.next_token():
self.add_error(UnexpectedEofParsingError(f"Unexpected EOF while parsing left parenthesis"))
if not parser_input.next_token():
error_sink.add_error(UnexpectedEofParsingError(f"Unexpected EOF while parsing left parenthesis"))
return None
token = self.parser_input.token
token = parser_input.token
if token.type != TokenKind.LPAR:
self.add_error(UnexpectedTokenParsingError(f"{token.repr_value} is not a left parenthesis",
token,
[TokenKind.LPAR]))
error_sink.add_error(UnexpectedTokenParsingError(f"{token.repr_value} is not a left parenthesis",
token,
[TokenKind.LPAR]))
return None
start_node = NameExprNode(start, start + 1, self.parser_input.tokens[start:start + 2])
if not self.parser_input.next_token():
self.add_error(UnexpectedEofParsingError(f"Unexpected EOF after left parenthesis"))
start_node = NameExprNode(start, start + 1, parser_input.tokens[start:start + 2])
if not parser_input.next_token():
error_sink.add_error(UnexpectedEofParsingError(f"Unexpected EOF after left parenthesis"))
return FunctionNode(start, start + 1, [], start_node, None, None)
params = self.parse_parameters()
if self.has_error:
return FunctionNode(start, self.parser_input.pos, [], start_node, None, params)
params = self.parse_parameters(context, parser_input, error_sink)
if error_sink.has_error:
return FunctionNode(start, parser_input.pos, [], start_node, None, params)
token = self.parser_input.token
token = parser_input.token
if not token or token.type != TokenKind.RPAR:
self.add_error(UnexpectedTokenParsingError(f"Right parenthesis not found",
token,
[TokenKind.RPAR]))
return FunctionNode(start, self.parser_input.pos, [], start_node, None, params)
error_sink.add_error(UnexpectedTokenParsingError(f"Right parenthesis not found",
token,
[TokenKind.RPAR]))
return FunctionNode(start, parser_input.pos, [], start_node, None, params)
return FunctionNode(start,
self.parser_input.pos,
self.parser_input.tokens[start:self.parser_input.pos + 1],
parser_input.pos,
parser_input.tokens[start:parser_input.pos + 1],
start_node,
NameExprNode(self.parser_input.pos, self.parser_input.pos, [token]),
NameExprNode(parser_input.pos, parser_input.pos, [token]),
params)
def parse_parameters(self):
def parse_parameters(self, context, parser_input, error_sink):
nodes = []
while True:
param_value = self.parse_parameter_value()
param_value = self.parse_parameter_value(context, parser_input, error_sink)
if not param_value:
break
function_parameter = FunctionParameter(param_value)
nodes.append(function_parameter)
token = self.parser_input.token
token = parser_input.token
if token.type == TokenKind.EOF:
self.add_error(UnexpectedEofParsingError(f"Unexpected EOF while parsing parameters"))
error_sink.add_error(UnexpectedEofParsingError(f"Unexpected EOF while parsing parameters"))
return None
if token.type == TokenKind.RPAR:
break
if token.value == self.sep:
sep_pos = self.parser_input.pos
has_next = self.parser_input.next_token() # it's before add_sep() to capture trailing whitespace
sep_pos = parser_input.pos
has_next = parser_input.next_token() # it's before add_sep() to capture trailing whitespace
function_parameter.add_sep(sep_pos,
self.parser_input.pos - 1,
self.parser_input.tokens[sep_pos: self.parser_input.pos])
parser_input.pos - 1,
parser_input.tokens[sep_pos: parser_input.pos])
if not has_next:
break
return nodes
def parse_parameter_value(self):
def parse_parameter_value(self, context, parser_input, error_sink):
# check if the parameter is a function
start_pos = self.parser_input.pos
self.record_errors = False
func = self.parse_function()
self.record_errors = True
if func:
self.parser_input.next_token()
start_pos = parser_input.pos
new_error_sink = ErrorSink()
func = self.parse_function(context, parser_input, new_error_sink)
if func and not new_error_sink.has_error:
parser_input.next_token()
return func
# otherwise, eat until LPAR or separator
self.parser_input.seek(start_pos)
self.record_errors = True
parser_input.seek(start_pos)
tokens = []
while True:
token = self.parser_input.token
token = parser_input.token
if token is None:
break
@@ -217,12 +172,12 @@ class FunctionParser(BaseParserInputParser):
break
tokens.append(token)
if not self.parser_input.next_token(skip_whitespace=False):
if not parser_input.next_token(skip_whitespace=False):
break
return NameExprNode(start_pos, self.parser_input.pos - 1, tokens) if len(tokens) else None
return NameExprNode(start_pos, parser_input.pos - 1, tokens) if len(tokens) else None
def to_source_code_node(self, function_node: FunctionNode):
def to_source_code_node(self, context, function_node: FunctionNode):
python_parser = PythonWithConceptsParser()
def update_source_code_node(scn, nodes, sep):
@@ -239,7 +194,7 @@ class FunctionParser(BaseParserInputParser):
if parsing_res.status:
return None
if self.sheerka.isinstance(parsing_res.body, BuiltinConcepts.NOT_FOR_ME):
if context.sheerka.isinstance(parsing_res.body, BuiltinConcepts.NOT_FOR_ME):
return parsing_res.body.reason
else:
return parsing_res.body.body
@@ -247,7 +202,7 @@ class FunctionParser(BaseParserInputParser):
if len(function_node.parameters) == 0:
# validate the source
nodes_to_parse = [function_node.first.to_unrecognized(), function_node.last.to_unrecognized()]
python_parsing_res = python_parser.parse_nodes(self.context, nodes_to_parse)
python_parsing_res = python_parser.parse_nodes(context, nodes_to_parse)
python_node = python_parsing_res.body.body if python_parsing_res.status else None
return [SourceCodeNode(start=function_node.first.start,
@@ -264,12 +219,12 @@ class FunctionParser(BaseParserInputParser):
if isinstance(param.value, NameExprNode):
# try to recognize concepts
unrecognized = param.value.to_unrecognized()
nodes_sequences = get_lexer_nodes_from_unrecognized(self.context,
nodes_sequences = get_lexer_nodes_from_unrecognized(context,
unrecognized,
PARSERS)
else:
# the parameter is also a function
nodes_sequences = self.to_source_code_node(param.value)
nodes_sequences = self.to_source_code_node(context, param.value)
if self.longest_concepts_only:
nodes_sequences = self.get_longest_concepts(nodes_sequences)
@@ -300,7 +255,7 @@ class FunctionParser(BaseParserInputParser):
source_code_node.fix_all_pos()
source_code_node.pseudo_fix_source()
python_parsing_res = python_parser.parse_nodes(self.context, source_code_node.get_all_nodes())
python_parsing_res = python_parser.parse_nodes(context, source_code_node.get_all_nodes())
if python_parsing_res.status:
source_code_node.python_node = python_parsing_res.body.body
source_code_node.return_value = python_parsing_res
@@ -308,7 +263,7 @@ class FunctionParser(BaseParserInputParser):
# make sure that concepts found can be evaluated
errors = []
for c in [c for c in source_code_node.python_node.objects.values() if isinstance(c, Concept)]:
update_compiled(self.context, c, errors)
update_compiled(context, c, errors)
if errors:
source_code_node.error_when_parsing = errors
+3 -3
View File
@@ -17,7 +17,7 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
parser = ExpressionParser()
return sheerka, context, parser
def input_parser_with_source(self, source):
def init_parser_with_source(self, source):
sheerka, context, parser = self.init_parser()
error_sink = ErrorSink()
parser_input = ParserInput(source)
@@ -50,7 +50,7 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
])
def test_i_can_parse_input(self, expression, expected):
sheerka, context, parser, parser_input, error_sink = self.input_parser_with_source(expression)
sheerka, context, parser, parser_input, error_sink = self.init_parser_with_source(expression)
expected = get_expr_node_from_test_node(expression, expected)
parsed = parser.parse_input(context, parser_input, error_sink)
@@ -63,7 +63,7 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
"var . attr1 . attr2",
])
def test_i_can_parse_variable(self, expression):
sheerka, context, parser, parser_input, error_sink = self.input_parser_with_source(expression)
sheerka, context, parser, parser_input, error_sink = self.init_parser_with_source(expression)
parsed = parser.parse_input(context, parser_input, error_sink)
assert not error_sink.has_error
+13 -17
View File
@@ -3,6 +3,7 @@ import pytest
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from core.sheerka.services.SheerkaExecute import ParserInput
from parsers.BaseParser import ErrorSink
from parsers.FunctionParser import FunctionParser
from parsers.PythonParser import PythonErrorNode
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
@@ -42,6 +43,13 @@ class TestFunctionParser(TestUsingMemoryBasedSheerka):
parser = FunctionParser()
return sheerka, context, parser
def init_parser_with_source(self, source):
sheerka, context, parser = self.init_parser()
error_sink = ErrorSink()
parser_input = ParserInput(source)
parser.reset_parser_input(parser_input, error_sink)
return sheerka, context, parser, parser_input, error_sink
def test_i_can_detect_empty_expression(self):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(""))
@@ -73,28 +81,16 @@ class TestFunctionParser(TestUsingMemoryBasedSheerka):
(FN("func4(", (")", 2), ["three"]), None),
]), None)
])),
("func(r:|1:)", FN("func(", ")", ["r:|1:"]))
])
def test_i_can_parse_function(self, expression, expected):
sheerka, context, parser = self.init_parser()
parser.reset_parser(context, ParserInput(expression))
parser.parser_input.next_token()
res = parser.parse_function()
sheerka, context, parser, parser_input, error_sink = self.init_parser_with_source(expression)
expected = get_expr_node_from_test_node(expression, expected)
assert res == expected
def test_i_can_parse_function_when_rule(self):
sheerka, context, parser = self.init_parser()
expected = FN("func(", ")", ["r:|1:"])
parsed = parser.parse_input(context, parser_input, error_sink)
parser.reset_parser(context, ParserInput("func(r:|1:)"))
parser.parser_input.next_token()
res = parser.parse_function()
transformed_res = get_test_obj(res, expected)
assert transformed_res == expected
assert not error_sink.has_error
assert parsed == expected
@pytest.mark.parametrize("text, expected", [
("func()", SCN("func()")),