Working on #48 : Working on FunctionParser.py
This commit is contained in:
+80
-125
@@ -1,20 +1,17 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.builtin_helpers import get_lexer_nodes_from_unrecognized, update_compiled
|
||||
from core.concept import Concept
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.tokenizer import TokenKind
|
||||
from core.utils import get_n_clones
|
||||
from parsers.BaseExpressionParser import NameExprNode, FunctionNode, FunctionParameter, BaseExpressionParser
|
||||
from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode
|
||||
from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError, Node, \
|
||||
BaseParserInputParser
|
||||
from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError, ErrorSink
|
||||
from parsers.BnfNodeParser import BnfNodeParser
|
||||
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
|
||||
from parsers.RuleParser import RuleParser
|
||||
from parsers.SequenceNodeParser import SequenceNodeParser
|
||||
from parsers.SyaNodeParser import SyaNodeParser
|
||||
from parsers.BaseExpressionParser import NameExprNode, FunctionNode, FunctionParameter
|
||||
|
||||
PARSERS = [RuleParser.NAME,
|
||||
SequenceNodeParser.NAME,
|
||||
@@ -22,18 +19,15 @@ PARSERS = [RuleParser.NAME,
|
||||
SyaNodeParser.NAME]
|
||||
|
||||
|
||||
@dataclass
|
||||
class FunctionParserNode(Node):
|
||||
pass
|
||||
|
||||
|
||||
class FunctionParser(BaseParserInputParser):
|
||||
class FunctionParser(BaseExpressionParser):
|
||||
"""
|
||||
The parser will be used to parse func(x, y, z)
|
||||
where x, y and z can be source code, concepts or other functions
|
||||
It will return a SourceCodeNode or SourceCodeNodeWithConcept
|
||||
"""
|
||||
|
||||
NAME = "Function"
|
||||
|
||||
def __init__(self, sep=",", longest_concepts_only=True, **kwargs):
|
||||
"""
|
||||
|
||||
@@ -42,174 +36,135 @@ class FunctionParser(BaseParserInputParser):
|
||||
so 'twenty one' will resolve to [[c:twenty one:]], not [[c:twenty one:], [c:twenty:, c:one:]]
|
||||
:param kwargs:
|
||||
"""
|
||||
super().__init__("Function", 55)
|
||||
super().__init__(self.NAME, 55)
|
||||
self.sep = sep
|
||||
self.longest_concepts_only = longest_concepts_only
|
||||
self.record_errors = True
|
||||
|
||||
def function_parser_get_return_value_body(self, source_code_node):
|
||||
def function_parser_get_return_value_body(self, context, source, source_code_node):
|
||||
if source_code_node.error_when_parsing:
|
||||
return self.sheerka.new(BuiltinConcepts.ERROR,
|
||||
body=source_code_node.error_when_parsing)
|
||||
return context.sheerka.new(BuiltinConcepts.ERROR,
|
||||
body=source_code_node.error_when_parsing)
|
||||
|
||||
return self.sheerka.new(BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=self.parser_input.as_text(),
|
||||
body=source_code_node,
|
||||
try_parsed=source_code_node)
|
||||
return context.sheerka.new(BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=source,
|
||||
body=source_code_node,
|
||||
try_parsed=source_code_node)
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
if not self.record_errors:
|
||||
return
|
||||
|
||||
return super().add_error(error, next_token)
|
||||
# def add_error(self, error, next_token=True):
|
||||
# if not self.record_errors:
|
||||
# return
|
||||
#
|
||||
# return super().add_error(error, next_token)
|
||||
|
||||
def parse(self, context, parser_input: ParserInput):
|
||||
"""
|
||||
ret = super().parse(context, parser_input)
|
||||
|
||||
:param context:
|
||||
:param parser_input:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if not isinstance(parser_input, ParserInput):
|
||||
if ret is None:
|
||||
return None
|
||||
|
||||
context.log(f"Parsing '{parser_input}' with FunctionParser", self.name)
|
||||
sheerka = context.sheerka
|
||||
|
||||
if parser_input.is_empty():
|
||||
return sheerka.ret(self.name,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.IS_EMPTY))
|
||||
|
||||
if not self.reset_parser(context, parser_input):
|
||||
return self.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
||||
|
||||
self.parser_input.next_token()
|
||||
node = self.parse_function()
|
||||
|
||||
if self.parser_input.next_token():
|
||||
self.add_error(UnexpectedTokenParsingError("Only one function supported",
|
||||
self.parser_input.token,
|
||||
[TokenKind.EOF]))
|
||||
|
||||
if self.has_error:
|
||||
if len(self.error_sink) == 1 and isinstance(self.error_sink[0], Concept):
|
||||
return self.error_sink[0]
|
||||
|
||||
if node is None:
|
||||
body = context.sheerka.new(BuiltinConcepts.NOT_FOR_ME,
|
||||
body=parser_input.as_text(),
|
||||
reason=self.error_sink)
|
||||
else:
|
||||
body = context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)
|
||||
return self.sheerka.ret(self.name, False, body)
|
||||
|
||||
source_code_nodes = self.to_source_code_node(node)
|
||||
if not ret.status:
|
||||
return ret
|
||||
|
||||
node = ret.body.body
|
||||
source_code_nodes = self.to_source_code_node(context, node)
|
||||
res = []
|
||||
for source_code_node in source_code_nodes:
|
||||
body = self.function_parser_get_return_value_body(source_code_node)
|
||||
res.append(self.sheerka.ret(self.name, source_code_node.python_node is not None, body))
|
||||
body = self.function_parser_get_return_value_body(context, parser_input.as_text(), source_code_node)
|
||||
res.append(context.sheerka.ret(self.name, source_code_node.python_node is not None, body))
|
||||
|
||||
return res[0] if len(res) == 1 else res
|
||||
|
||||
def parse_input(self):
|
||||
return self.parse_function()
|
||||
def parse_input(self, context, parser_input, error_sink):
|
||||
return self.parse_function(context, parser_input, error_sink)
|
||||
|
||||
def parse_function(self):
|
||||
def parse_function(self, context, parser_input, error_sink):
|
||||
|
||||
start = self.parser_input.pos
|
||||
token = self.parser_input.token
|
||||
start = parser_input.pos
|
||||
token = parser_input.token
|
||||
if token.type != TokenKind.IDENTIFIER:
|
||||
self.add_error(UnexpectedTokenParsingError(f"{token.repr_value} is not a identifier",
|
||||
token,
|
||||
[TokenKind.IDENTIFIER]))
|
||||
error_sink.add_error(UnexpectedTokenParsingError(f"{token.repr_value} is not a identifier",
|
||||
token,
|
||||
[TokenKind.IDENTIFIER]))
|
||||
return None
|
||||
|
||||
if not self.parser_input.next_token():
|
||||
self.add_error(UnexpectedEofParsingError(f"Unexpected EOF while parsing left parenthesis"))
|
||||
if not parser_input.next_token():
|
||||
error_sink.add_error(UnexpectedEofParsingError(f"Unexpected EOF while parsing left parenthesis"))
|
||||
return None
|
||||
|
||||
token = self.parser_input.token
|
||||
token = parser_input.token
|
||||
if token.type != TokenKind.LPAR:
|
||||
self.add_error(UnexpectedTokenParsingError(f"{token.repr_value} is not a left parenthesis",
|
||||
token,
|
||||
[TokenKind.LPAR]))
|
||||
error_sink.add_error(UnexpectedTokenParsingError(f"{token.repr_value} is not a left parenthesis",
|
||||
token,
|
||||
[TokenKind.LPAR]))
|
||||
return None
|
||||
|
||||
start_node = NameExprNode(start, start + 1, self.parser_input.tokens[start:start + 2])
|
||||
if not self.parser_input.next_token():
|
||||
self.add_error(UnexpectedEofParsingError(f"Unexpected EOF after left parenthesis"))
|
||||
start_node = NameExprNode(start, start + 1, parser_input.tokens[start:start + 2])
|
||||
if not parser_input.next_token():
|
||||
error_sink.add_error(UnexpectedEofParsingError(f"Unexpected EOF after left parenthesis"))
|
||||
return FunctionNode(start, start + 1, [], start_node, None, None)
|
||||
|
||||
params = self.parse_parameters()
|
||||
if self.has_error:
|
||||
return FunctionNode(start, self.parser_input.pos, [], start_node, None, params)
|
||||
params = self.parse_parameters(context, parser_input, error_sink)
|
||||
if error_sink.has_error:
|
||||
return FunctionNode(start, parser_input.pos, [], start_node, None, params)
|
||||
|
||||
token = self.parser_input.token
|
||||
token = parser_input.token
|
||||
if not token or token.type != TokenKind.RPAR:
|
||||
self.add_error(UnexpectedTokenParsingError(f"Right parenthesis not found",
|
||||
token,
|
||||
[TokenKind.RPAR]))
|
||||
return FunctionNode(start, self.parser_input.pos, [], start_node, None, params)
|
||||
error_sink.add_error(UnexpectedTokenParsingError(f"Right parenthesis not found",
|
||||
token,
|
||||
[TokenKind.RPAR]))
|
||||
return FunctionNode(start, parser_input.pos, [], start_node, None, params)
|
||||
|
||||
return FunctionNode(start,
|
||||
self.parser_input.pos,
|
||||
self.parser_input.tokens[start:self.parser_input.pos + 1],
|
||||
parser_input.pos,
|
||||
parser_input.tokens[start:parser_input.pos + 1],
|
||||
start_node,
|
||||
NameExprNode(self.parser_input.pos, self.parser_input.pos, [token]),
|
||||
NameExprNode(parser_input.pos, parser_input.pos, [token]),
|
||||
params)
|
||||
|
||||
def parse_parameters(self):
|
||||
def parse_parameters(self, context, parser_input, error_sink):
|
||||
nodes = []
|
||||
while True:
|
||||
param_value = self.parse_parameter_value()
|
||||
param_value = self.parse_parameter_value(context, parser_input, error_sink)
|
||||
if not param_value:
|
||||
break
|
||||
|
||||
function_parameter = FunctionParameter(param_value)
|
||||
nodes.append(function_parameter)
|
||||
|
||||
token = self.parser_input.token
|
||||
token = parser_input.token
|
||||
if token.type == TokenKind.EOF:
|
||||
self.add_error(UnexpectedEofParsingError(f"Unexpected EOF while parsing parameters"))
|
||||
error_sink.add_error(UnexpectedEofParsingError(f"Unexpected EOF while parsing parameters"))
|
||||
return None
|
||||
|
||||
if token.type == TokenKind.RPAR:
|
||||
break
|
||||
|
||||
if token.value == self.sep:
|
||||
sep_pos = self.parser_input.pos
|
||||
has_next = self.parser_input.next_token() # it's before add_sep() to capture trailing whitespace
|
||||
sep_pos = parser_input.pos
|
||||
has_next = parser_input.next_token() # it's before add_sep() to capture trailing whitespace
|
||||
function_parameter.add_sep(sep_pos,
|
||||
self.parser_input.pos - 1,
|
||||
self.parser_input.tokens[sep_pos: self.parser_input.pos])
|
||||
parser_input.pos - 1,
|
||||
parser_input.tokens[sep_pos: parser_input.pos])
|
||||
if not has_next:
|
||||
break
|
||||
|
||||
return nodes
|
||||
|
||||
def parse_parameter_value(self):
|
||||
def parse_parameter_value(self, context, parser_input, error_sink):
|
||||
# check if the parameter is a function
|
||||
start_pos = self.parser_input.pos
|
||||
self.record_errors = False
|
||||
func = self.parse_function()
|
||||
self.record_errors = True
|
||||
if func:
|
||||
self.parser_input.next_token()
|
||||
start_pos = parser_input.pos
|
||||
new_error_sink = ErrorSink()
|
||||
func = self.parse_function(context, parser_input, new_error_sink)
|
||||
if func and not new_error_sink.has_error:
|
||||
parser_input.next_token()
|
||||
return func
|
||||
|
||||
# otherwise, eat until LPAR or separator
|
||||
self.parser_input.seek(start_pos)
|
||||
self.record_errors = True
|
||||
parser_input.seek(start_pos)
|
||||
tokens = []
|
||||
while True:
|
||||
token = self.parser_input.token
|
||||
token = parser_input.token
|
||||
if token is None:
|
||||
break
|
||||
|
||||
@@ -217,12 +172,12 @@ class FunctionParser(BaseParserInputParser):
|
||||
break
|
||||
|
||||
tokens.append(token)
|
||||
if not self.parser_input.next_token(skip_whitespace=False):
|
||||
if not parser_input.next_token(skip_whitespace=False):
|
||||
break
|
||||
|
||||
return NameExprNode(start_pos, self.parser_input.pos - 1, tokens) if len(tokens) else None
|
||||
return NameExprNode(start_pos, parser_input.pos - 1, tokens) if len(tokens) else None
|
||||
|
||||
def to_source_code_node(self, function_node: FunctionNode):
|
||||
def to_source_code_node(self, context, function_node: FunctionNode):
|
||||
python_parser = PythonWithConceptsParser()
|
||||
|
||||
def update_source_code_node(scn, nodes, sep):
|
||||
@@ -239,7 +194,7 @@ class FunctionParser(BaseParserInputParser):
|
||||
if parsing_res.status:
|
||||
return None
|
||||
|
||||
if self.sheerka.isinstance(parsing_res.body, BuiltinConcepts.NOT_FOR_ME):
|
||||
if context.sheerka.isinstance(parsing_res.body, BuiltinConcepts.NOT_FOR_ME):
|
||||
return parsing_res.body.reason
|
||||
else:
|
||||
return parsing_res.body.body
|
||||
@@ -247,7 +202,7 @@ class FunctionParser(BaseParserInputParser):
|
||||
if len(function_node.parameters) == 0:
|
||||
# validate the source
|
||||
nodes_to_parse = [function_node.first.to_unrecognized(), function_node.last.to_unrecognized()]
|
||||
python_parsing_res = python_parser.parse_nodes(self.context, nodes_to_parse)
|
||||
python_parsing_res = python_parser.parse_nodes(context, nodes_to_parse)
|
||||
python_node = python_parsing_res.body.body if python_parsing_res.status else None
|
||||
|
||||
return [SourceCodeNode(start=function_node.first.start,
|
||||
@@ -264,12 +219,12 @@ class FunctionParser(BaseParserInputParser):
|
||||
if isinstance(param.value, NameExprNode):
|
||||
# try to recognize concepts
|
||||
unrecognized = param.value.to_unrecognized()
|
||||
nodes_sequences = get_lexer_nodes_from_unrecognized(self.context,
|
||||
nodes_sequences = get_lexer_nodes_from_unrecognized(context,
|
||||
unrecognized,
|
||||
PARSERS)
|
||||
else:
|
||||
# the parameter is also a function
|
||||
nodes_sequences = self.to_source_code_node(param.value)
|
||||
nodes_sequences = self.to_source_code_node(context, param.value)
|
||||
|
||||
if self.longest_concepts_only:
|
||||
nodes_sequences = self.get_longest_concepts(nodes_sequences)
|
||||
@@ -300,7 +255,7 @@ class FunctionParser(BaseParserInputParser):
|
||||
source_code_node.fix_all_pos()
|
||||
source_code_node.pseudo_fix_source()
|
||||
|
||||
python_parsing_res = python_parser.parse_nodes(self.context, source_code_node.get_all_nodes())
|
||||
python_parsing_res = python_parser.parse_nodes(context, source_code_node.get_all_nodes())
|
||||
if python_parsing_res.status:
|
||||
source_code_node.python_node = python_parsing_res.body.body
|
||||
source_code_node.return_value = python_parsing_res
|
||||
@@ -308,7 +263,7 @@ class FunctionParser(BaseParserInputParser):
|
||||
# make sure that concepts found can be evaluated
|
||||
errors = []
|
||||
for c in [c for c in source_code_node.python_node.objects.values() if isinstance(c, Concept)]:
|
||||
update_compiled(self.context, c, errors)
|
||||
update_compiled(context, c, errors)
|
||||
|
||||
if errors:
|
||||
source_code_node.error_when_parsing = errors
|
||||
|
||||
Reference in New Issue
Block a user