Working on #48 : Refactored FunctionParser, introducting ExpressionParser

This commit is contained in:
2021-03-10 11:46:39 +01:00
parent 07f0d3670d
commit 966a1ed814
11 changed files with 239 additions and 67 deletions
+7 -2
View File
@@ -545,16 +545,21 @@ def decode_concept(text, wrapper="C"):
return None, None
def tokens_index(tokens, sub_tokens, skip=0):
def tokens_index(tokens, sub_tokens, skip=0, start_from_end=False):
"""
Index of the sub tokens in tokens
:param tokens: tokens
:param sub_tokens: sub tokens to search
:param skip: number of found to skip
:param start_from_end: start by the end
:return:
"""
expected = [token.value for token in sub_tokens if token.type != TokenKind.EOF]
for i in range(0, len(tokens) - len(expected) + 1):
indexes = range(0, len(tokens) - len(expected) + 1)
if start_from_end:
indexes = reversed(indexes)
for i in indexes:
for j in range(len(expected)):
if tokens[i + j].value != expected[j]:
break
+6
View File
@@ -232,3 +232,9 @@ class BaseParser:
@staticmethod
def get_name(name):
return BaseParser.PREFIX + name
class BaseExprParser(BaseParser):
def parse_input(self):
raise NotImplementedError
+72
View File
@@ -0,0 +1,72 @@
from core.builtin_concepts_ids import BuiltinConcepts
from core.sheerka.services.SheerkaExecute import ParserInput
from parsers.BaseParser import BaseExprParser
from parsers.FunctionParser import FunctionParser
from parsers.LogicalOperatorParser import LogicalOperatorParser
from parsers.RelationalOperatorParser import RelationalOperatorParser
from parsers.expressions import ParenthesisNode, NameExprNode
class ExpressionParser(BaseExprParser):
"""
Parses xxx (== | > | < | >= | <= | != | in | not in) yyy
Nothing else
"""
NAME = "Expression"
def __init__(self, **kwargs):
super().__init__(ExpressionParser.NAME, 60, False, yield_eof=True)
self.logical_parser = LogicalOperatorParser()
self.relational_parser = RelationalOperatorParser()
self.function_parser = FunctionParser()
def parse(self, context, parser_input: ParserInput):
"""
:param context:
:param parser_input:
:return:
"""
if not isinstance(parser_input, ParserInput):
return None
context.log(f"Parsing '{parser_input}' with {self.NAME}Parser", self.name)
sheerka = context.sheerka
if parser_input.is_empty():
return context.sheerka.ret(self.name,
False,
sheerka.new(BuiltinConcepts.IS_EMPTY))
if not self.reset_parser(context, parser_input):
return self.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
self.parser_input.next_token()
node = self.parse_input()
if isinstance(node, ParenthesisNode):
node = node.node
value = self.get_return_value_body(context.sheerka, self.parser_input.as_text(), node, node)
ret = self.sheerka.ret(
self.name,
not self.has_error,
value)
return ret
def parse_input(self):
pos = self.parser_input.pos
for parser in []: # [self.logical_parser, self.relational_parser, self.function_parser]:
self.parser_input.seek(pos) # reset position
if parser.reset_parser(self.context, self.parser_input):
res = parser.parse_input()
if res and not parser.has_error:
return res
return NameExprNode(self.parser_input.start, self.parser_input.end, self.parser_input.as_tokens())
+14 -35
View File
@@ -6,14 +6,14 @@ from core.concept import Concept
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import TokenKind
from core.utils import get_n_clones
from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode, UnrecognizedTokensNode
from parsers.BaseParser import BaseParser, UnexpectedTokenParsingError, UnexpectedEofParsingError, Node
from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode
from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError, Node, BaseExprParser
from parsers.BnfNodeParser import BnfNodeParser
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
from parsers.RuleParser import RuleParser
from parsers.SequenceNodeParser import SequenceNodeParser
from parsers.SyaNodeParser import SyaNodeParser
from parsers.expressions import NameExprNode
from parsers.expressions import NameExprNode, FunctionNode, FunctionParameter
PARSERS = [RuleParser.NAME,
SequenceNodeParser.NAME,
@@ -26,34 +26,7 @@ class FunctionParserNode(Node):
pass
@dataclass()
class FunctionParameter:
"""
class the represent result of the parameter parsing
"""
value: NameExprNode # value parsed
separator: NameExprNode = None # holds the value and the position of the separator
def add_sep(self, start, end, tokens):
self.separator = NameExprNode(start, end, tokens)
def value_to_unrecognized(self):
return UnrecognizedTokensNode(self.value.start, self.value.end, self.value.tokens).fix_source()
def separator_to_unrecognized(self):
if self.separator is None:
return None
return UnrecognizedTokensNode(self.separator.start, self.separator.end, self.separator.tokens).fix_source()
@dataclass
class FunctionNode(FunctionParserNode):
first: NameExprNode # beginning of the function (it should represent the name of the function)
last: NameExprNode # last part of the function (it should be the trailing parenthesis)
parameters: list
class FunctionParser(BaseParser):
class FunctionParser(BaseExprParser):
"""
The parser will be used to parse func(x, y, z)
where x, y and z can be source code, concepts or other functions
@@ -144,6 +117,9 @@ class FunctionParser(BaseParser):
return res[0] if len(res) == 1 else res
def parse_input(self):
return self.parse_function()
def parse_function(self):
start = self.parser_input.pos
@@ -168,20 +144,23 @@ class FunctionParser(BaseParser):
start_node = NameExprNode(start, start + 1, self.parser_input.tokens[start:start + 2])
if not self.parser_input.next_token():
self.add_error(UnexpectedEofParsingError(f"Unexpected EOF after left parenthesis"))
return FunctionNode(start_node, None, None)
return FunctionNode(start, start + 1, [], start_node, None, None)
params = self.parse_parameters()
if self.has_error:
return FunctionNode(start_node, None, params)
return FunctionNode(start, self.parser_input.pos, [], start_node, None, params)
token = self.parser_input.token
if not token or token.type != TokenKind.RPAR:
self.add_error(UnexpectedTokenParsingError(f"Right parenthesis not found",
token,
[TokenKind.RPAR]))
return FunctionNode(start_node, None, params)
return FunctionNode(start, self.parser_input.pos, [], start_node, None, params)
return FunctionNode(start_node,
return FunctionNode(start,
self.parser_input.pos,
self.parser_input.tokens[start:self.parser_input.pos + 1],
start_node,
NameExprNode(self.parser_input.pos, self.parser_input.pos, [token]),
params)
+7 -4
View File
@@ -7,7 +7,7 @@ from core.sheerka.services.sheerka_service import FailedToCompileError
from core.tokenizer import TokenKind, Tokenizer, Keywords
from core.utils import get_text_from_tokens
from parsers.BaseNodeParser import UnrecognizedTokensNode
from parsers.BaseParser import BaseParser, UnexpectedTokenParsingError, UnexpectedEofParsingError
from parsers.BaseParser import BaseParser, UnexpectedTokenParsingError, UnexpectedEofParsingError, BaseExprParser
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
from parsers.expressions import ParenthesisNode, OrNode, AndNode, NotNode, LeftPartNotFoundError, \
ParenthesisMismatchError, NameExprNode, ExprNode, VariableNode, ComparisonNode
@@ -60,7 +60,7 @@ class ReteConditionsEmitter:
return [AndConditions(conditions)]
class LogicalOperatorParser(BaseParser):
class LogicalOperatorParser(BaseExprParser):
"""
will parser logic expression
like not (a and b or c)
@@ -93,7 +93,7 @@ class LogicalOperatorParser(BaseParser):
if not isinstance(parser_input, ParserInput):
return None
context.log(f"Parsing '{parser_input}' with LogicalOperatorParser", self.name)
context.log(f"Parsing '{parser_input}' with {self.NAME}Parser", self.name)
sheerka = context.sheerka
if parser_input.is_empty():
@@ -108,7 +108,7 @@ class LogicalOperatorParser(BaseParser):
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
self.parser_input.next_token()
tree = self.parse_or()
tree = self.parse_input()
token = self.parser_input.token
if token and token.type != TokenKind.EOF:
self.add_error(UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, []))
@@ -124,6 +124,9 @@ class LogicalOperatorParser(BaseParser):
return ret
def parse_input(self):
return self.parse_or()
def parse_or(self):
start = self.parser_input.pos
expr = self.parse_and()
+10 -5
View File
@@ -4,19 +4,21 @@ from core.builtin_concepts_ids import BuiltinConcepts
from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute
from core.tokenizer import TokenKind, Token
from core.utils import get_text_from_tokens
from parsers.BaseParser import BaseParser, UnexpectedTokenParsingError
from parsers.BaseParser import UnexpectedTokenParsingError, BaseExprParser
from parsers.expressions import ComparisonNode, ParenthesisMismatchError, NameExprNode, ComparisonType, VariableNode, \
ParenthesisNode, LeftPartNotFoundError
class RelationalOperatorParser(BaseParser):
class RelationalOperatorParser(BaseExprParser):
"""
Parses xxx (== | > | < | >= | <= | != | in | not in) yyy
Nothing else
"""
NAME = "RelationalOperator"
def __init__(self, **kwargs):
super().__init__("Expression", 60, False, yield_eof=True)
super().__init__(self.NAME, 60, False, yield_eof=True)
def parse(self, context, parser_input: Union[ParserInput, List[Token]]):
"""
@@ -31,7 +33,7 @@ class RelationalOperatorParser(BaseParser):
elif not isinstance(parser_input, ParserInput):
return None
context.log(f"Parsing '{parser_input}' with ComparisonExpressionParser", self.name)
context.log(f"Parsing '{parser_input}' with {self.NAME}Parser", self.name)
sheerka = context.sheerka
if parser_input.is_empty():
@@ -47,7 +49,7 @@ class RelationalOperatorParser(BaseParser):
self.parser_input.next_token()
node = self.parse_compare()
node = self.parse_input()
if isinstance(node, ParenthesisNode):
node = node.node
@@ -60,6 +62,9 @@ class RelationalOperatorParser(BaseParser):
return ret
def parse_input(self):
return self.parse_compare()
def parse_compare(self):
start = self.parser_input.pos
left = self.parse_names()
+30 -4
View File
@@ -1,5 +1,5 @@
from dataclasses import dataclass
from typing import List, Tuple
from typing import List, Tuple, Union
from core.tokenizer import Token, TokenKind, Tokenizer
from core.utils import tokens_are_matching
@@ -63,7 +63,6 @@ class ExprNode(Node):
class NameExprNode(ExprNode):
def __init__(self, start, end, tokens):
super().__init__(start, end, tokens)
self.tokens = tokens
self.value = "".join([t.str_value for t in self.tokens])
def eval(self, obj):
@@ -224,8 +223,8 @@ class ParenthesisNode(ExprNode):
if self.start != other.start or self.end != other.end:
return False
# if other.tokens is not None and other.tokens != self.tokens:
# return False
if other.tokens is not None and other.tokens != self.tokens:
return False
return self.node == other.node
@@ -302,6 +301,33 @@ class ComparisonNode(ExprNode):
return f"{self.left} {self.comp} {self.right}"
@dataclass()
class FunctionParameter:
"""
class the represent result of the parameter parsing
"""
value: NameExprNode # value parsed
separator: NameExprNode = None # holds the value and the position of the separator
def add_sep(self, start, end, tokens):
self.separator = NameExprNode(start, end, tokens)
def value_to_unrecognized(self):
return UnrecognizedTokensNode(self.value.start, self.value.end, self.value.tokens).fix_source()
def separator_to_unrecognized(self):
if self.separator is None:
return None
return UnrecognizedTokensNode(self.separator.start, self.separator.end, self.separator.tokens).fix_source()
@dataclass
class FunctionNode(ExprNode):
first: NameExprNode # beginning of the function (it should represent the name of the function)
last: NameExprNode # last part of the function (it should be the trailing parenthesis)
parameters: Union[None, List[FunctionParameter]]
class ExpressionVisitor:
"""
Pyhtonic implementation of visitors for ExprNode