Working on #48 : Added BaseExprParser and BaseNodeParser.py
This commit is contained in:
@@ -42,7 +42,7 @@ class ParserInput:
|
||||
self.yield_oef = yield_oef
|
||||
|
||||
self.start = start or 0
|
||||
if end:
|
||||
if end is not None:
|
||||
self.original_end = end # forced index of the last token
|
||||
self.end = self.original_end # index of the last token => len(tokens) - 1 if full tokens
|
||||
else:
|
||||
@@ -115,6 +115,7 @@ class ParserInput:
|
||||
self.pos += 1
|
||||
|
||||
if self.pos > self.end:
|
||||
self.token = self.tokens[-1]
|
||||
return False
|
||||
|
||||
self.token = self.tokens[self.pos]
|
||||
@@ -128,7 +129,11 @@ class ParserInput:
|
||||
return False
|
||||
self.token = self.tokens[self.pos]
|
||||
|
||||
return self.pos <= self.end
|
||||
if self.pos <= self.end:
|
||||
return True
|
||||
else:
|
||||
self.token = self.tokens[-1]
|
||||
return False
|
||||
|
||||
def the_token_after(self, skip_whitespace=True):
|
||||
"""
|
||||
@@ -137,13 +142,13 @@ class ParserInput:
|
||||
"""
|
||||
my_pos = self.pos + 1
|
||||
if my_pos > self.end:
|
||||
return Token(TokenKind.EOF, "", -1, -1, -1)
|
||||
return self.tokens[-1]
|
||||
|
||||
if skip_whitespace:
|
||||
while self.tokens[my_pos].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
|
||||
my_pos += 1
|
||||
if my_pos > self.end:
|
||||
return Token(TokenKind.EOF, "", -1, -1, -1)
|
||||
return self.tokens[-1]
|
||||
|
||||
return self.tokens[my_pos]
|
||||
|
||||
|
||||
@@ -2,7 +2,8 @@ from dataclasses import dataclass, field
|
||||
|
||||
import core.utils
|
||||
from core.tokenizer import Keywords, TokenKind, Tokenizer
|
||||
from parsers.BaseParser import BaseParser, Node, ParsingError, UnexpectedEofParsingError, UnexpectedTokenParsingError
|
||||
from parsers.BaseParser import Node, ParsingError, UnexpectedEofParsingError, UnexpectedTokenParsingError, \
|
||||
BaseParserInputParser
|
||||
|
||||
|
||||
@dataclass()
|
||||
@@ -94,7 +95,7 @@ class NameNode(CustomGrammarParserNode):
|
||||
return hash(self.get_name())
|
||||
|
||||
|
||||
class BaseCustomGrammarParser(BaseParser):
|
||||
class BaseCustomGrammarParser(BaseParserInputParser):
|
||||
"""
|
||||
Base class for sheerka specific grammars
|
||||
"""
|
||||
|
||||
@@ -4,7 +4,7 @@ from enum import Enum
|
||||
import core.utils
|
||||
from core.tokenizer import TokenKind, Token
|
||||
from core.var_ref import VariableRef
|
||||
from parsers.BaseParser import Node, BaseParser, ParsingError
|
||||
from parsers.BaseParser import Node, ParsingError, BaseParserInputParser
|
||||
|
||||
DEBUG_COMPILED = True
|
||||
|
||||
@@ -461,7 +461,7 @@ class SyaAssociativity(Enum):
|
||||
return self.value
|
||||
|
||||
|
||||
class BaseNodeParser(BaseParser):
|
||||
class BaseNodeParser(BaseParserInputParser):
|
||||
"""
|
||||
Parser that return LexerNode
|
||||
"""
|
||||
|
||||
+95
-58
@@ -62,11 +62,6 @@ class BaseParser:
|
||||
self.short_name = name
|
||||
self.priority = priority
|
||||
self.enabled = enabled
|
||||
|
||||
self.error_sink = []
|
||||
self.context: ExecutionContext = None
|
||||
self.sheerka = None
|
||||
self.parser_input: ParserInput = None
|
||||
self.yield_eof = yield_eof
|
||||
|
||||
def __eq__(self, other):
|
||||
@@ -80,6 +75,74 @@ class BaseParser:
|
||||
def __repr__(self):
|
||||
return self.name
|
||||
|
||||
def log_result(self, context, source, ret):
|
||||
pass
|
||||
# if not self.log.isEnabledFor(logging.DEBUG):
|
||||
# return
|
||||
#
|
||||
# if ret.status:
|
||||
# value = context.return_value_to_str(ret)
|
||||
# context.log(f"Recognized '{source}' as {value}", self.name)
|
||||
# else:
|
||||
# context.log(f"Failed to recognize '{source}'", self.name)
|
||||
|
||||
def log_multiple_results(self, context, source, list_of_ret):
|
||||
pass
|
||||
# if not self.log.isEnabledFor(logging.DEBUG):
|
||||
# return
|
||||
#
|
||||
# context.log(f"Recognized '{source}' as multiple concepts", self.name)
|
||||
# for r in list_of_ret:
|
||||
# value = context.return_value_to_str(r)
|
||||
# context.log(f" Recognized '{value}'", self.name)
|
||||
|
||||
def get_return_value_body(self, sheerka, source, parsed, try_parse, errors):
|
||||
"""
|
||||
All parsers must return their result in a standard way
|
||||
:param sheerka:
|
||||
:param source:
|
||||
:param parsed:
|
||||
:param try_parse:
|
||||
:param errors:
|
||||
:return:
|
||||
"""
|
||||
if len(errors) == 1 and isinstance(errors[0], Concept):
|
||||
return errors[0]
|
||||
|
||||
if len(errors):
|
||||
if parsed is None:
|
||||
return sheerka.new(BuiltinConcepts.NOT_FOR_ME,
|
||||
body=source,
|
||||
reason=errors)
|
||||
else:
|
||||
return sheerka.new(BuiltinConcepts.ERROR,
|
||||
body=errors)
|
||||
|
||||
return sheerka.new(BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=source,
|
||||
body=parsed,
|
||||
try_parsed=try_parse)
|
||||
|
||||
@staticmethod
|
||||
def get_name(name):
|
||||
return BaseParser.PREFIX + name
|
||||
|
||||
|
||||
class BaseParserInputParser(BaseParser):
|
||||
"""
|
||||
Base parser for stateful parser where context, parser input, and error sink are part of the class
|
||||
"""
|
||||
|
||||
def __init__(self, name, priority: int, enabled=True, yield_eof=False):
|
||||
super(BaseParserInputParser, self).__init__(name, priority, enabled, yield_eof)
|
||||
|
||||
self.error_sink = []
|
||||
self.context: ExecutionContext = None
|
||||
self.sheerka = None
|
||||
self.parser_input: ParserInput = None
|
||||
self.yield_eof = yield_eof
|
||||
|
||||
def reset_parser(self, context, parser_input: ParserInput):
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
@@ -106,54 +169,6 @@ class BaseParser:
|
||||
def has_error(self):
|
||||
return len(self.error_sink) > 0
|
||||
|
||||
def log_result(self, context, source, ret):
|
||||
pass
|
||||
# if not self.log.isEnabledFor(logging.DEBUG):
|
||||
# return
|
||||
#
|
||||
# if ret.status:
|
||||
# value = context.return_value_to_str(ret)
|
||||
# context.log(f"Recognized '{source}' as {value}", self.name)
|
||||
# else:
|
||||
# context.log(f"Failed to recognize '{source}'", self.name)
|
||||
|
||||
def log_multiple_results(self, context, source, list_of_ret):
|
||||
pass
|
||||
# if not self.log.isEnabledFor(logging.DEBUG):
|
||||
# return
|
||||
#
|
||||
# context.log(f"Recognized '{source}' as multiple concepts", self.name)
|
||||
# for r in list_of_ret:
|
||||
# value = context.return_value_to_str(r)
|
||||
# context.log(f" Recognized '{value}'", self.name)
|
||||
|
||||
def get_return_value_body(self, sheerka, source, parsed, try_parse):
|
||||
"""
|
||||
All parsers must return their result in a standard way
|
||||
:param sheerka:
|
||||
:param source:
|
||||
:param parsed:
|
||||
:param try_parse:
|
||||
:return:
|
||||
"""
|
||||
if len(self.error_sink) == 1 and isinstance(self.error_sink[0], Concept):
|
||||
return self.error_sink[0]
|
||||
|
||||
if self.has_error:
|
||||
if parsed is None:
|
||||
return sheerka.new(BuiltinConcepts.NOT_FOR_ME,
|
||||
body=source,
|
||||
reason=self.error_sink)
|
||||
else:
|
||||
return sheerka.new(BuiltinConcepts.ERROR,
|
||||
body=self.error_sink)
|
||||
|
||||
return sheerka.new(BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=source,
|
||||
body=parsed,
|
||||
try_parsed=try_parse)
|
||||
|
||||
@staticmethod
|
||||
def get_input_as_lexer_nodes(parser_input, expected_parser=None):
|
||||
"""
|
||||
@@ -229,12 +244,34 @@ class BaseParser:
|
||||
|
||||
return list_a
|
||||
|
||||
@staticmethod
|
||||
def get_name(name):
|
||||
return BaseParser.PREFIX + name
|
||||
|
||||
|
||||
class BaseExprParser(BaseParser):
|
||||
|
||||
def parse_input(self):
|
||||
def parse_input(self, context, parser_input, error_sink):
|
||||
raise NotImplementedError
|
||||
|
||||
def reset_parser_input(self, parser_input: ParserInput, error_sink):
|
||||
try:
|
||||
error_sink.clear()
|
||||
parser_input.reset(self.yield_eof)
|
||||
except LexerError as e:
|
||||
error_sink.add_error(e)
|
||||
return False
|
||||
|
||||
parser_input.next_token()
|
||||
return True
|
||||
|
||||
|
||||
class ErrorSink:
|
||||
def __init__(self):
|
||||
self.sink = []
|
||||
|
||||
def add_error(self, error):
|
||||
self.sink.append(error)
|
||||
|
||||
def clear(self):
|
||||
self.sink.clear()
|
||||
|
||||
@property
|
||||
def has_error(self):
|
||||
return len(self.sink) > 0
|
||||
|
||||
@@ -33,6 +33,7 @@ class BnfDefinitionParser(BaseParser):
|
||||
self.context = None
|
||||
self.source = ""
|
||||
self.sheerka = None
|
||||
self.error_sink = []
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, BnfDefinitionParser):
|
||||
@@ -60,6 +61,10 @@ class BnfDefinitionParser(BaseParser):
|
||||
self.next_token()
|
||||
return error
|
||||
|
||||
@property
|
||||
def has_error(self):
|
||||
return len(self.error_sink) > 0
|
||||
|
||||
def get_token(self) -> Token:
|
||||
return self._current
|
||||
|
||||
@@ -123,7 +128,7 @@ class BnfDefinitionParser(BaseParser):
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.ERROR, body=[e]))
|
||||
|
||||
value = self.get_return_value_body(context.sheerka, self.source, tree, tree)
|
||||
value = self.get_return_value_body(context.sheerka, self.source, tree, tree, self.error_sink)
|
||||
|
||||
ret = self.sheerka.ret(
|
||||
self.name,
|
||||
|
||||
@@ -85,7 +85,7 @@ class DefConceptParser(BaseCustomGrammarParser):
|
||||
self.parser_input.next_token()
|
||||
node = self.parse_def_concept()
|
||||
|
||||
body = self.get_return_value_body(sheerka, parser_input.as_text(), node, node)
|
||||
body = self.get_return_value_body(sheerka, parser_input.as_text(), node, node, self.error_sink)
|
||||
ret = sheerka.ret(self.name, not self.has_error, body)
|
||||
|
||||
self.log_result(context, parser_input.as_text(), ret)
|
||||
|
||||
@@ -71,7 +71,7 @@ class DefRuleParser(BaseCustomGrammarParser):
|
||||
self.parser_input.next_token()
|
||||
node = self.parse_def_rule()
|
||||
|
||||
body = self.get_return_value_body(sheerka, parser_input.as_text(), node, node)
|
||||
body = self.get_return_value_body(sheerka, parser_input.as_text(), node, node, self.error_sink)
|
||||
ret = sheerka.ret(self.name, not self.has_error, body)
|
||||
|
||||
self.log_result(context, parser_input.as_text(), ret)
|
||||
|
||||
@@ -4,10 +4,10 @@ from core.concept import VARIABLE_PREFIX
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.tokenizer import TokenKind
|
||||
from core.utils import str_concept
|
||||
from parsers.BaseParser import BaseParser
|
||||
from parsers.BaseParser import BaseParser, BaseParserInputParser
|
||||
|
||||
|
||||
class ExactConceptParser(BaseParser):
|
||||
class ExactConceptParser(BaseParserInputParser):
|
||||
"""
|
||||
Tries to recognize a single concept
|
||||
"""
|
||||
@@ -15,7 +15,7 @@ class ExactConceptParser(BaseParser):
|
||||
MAX_WORDS_SIZE = 6
|
||||
|
||||
def __init__(self, max_word_size=None, **kwargs):
|
||||
BaseParser.__init__(self, "ExactConcept", 80)
|
||||
BaseParserInputParser.__init__(self, "ExactConcept", 80)
|
||||
self.max_word_size = max_word_size
|
||||
|
||||
def parse(self, context, parser_input: ParserInput):
|
||||
|
||||
@@ -2,7 +2,7 @@ from core.builtin_concepts_ids import BuiltinConcepts
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.tokenizer import TokenKind
|
||||
from core.utils import get_text_from_tokens
|
||||
from parsers.BaseParser import BaseExprParser
|
||||
from parsers.BaseParser import BaseExprParser, ErrorSink
|
||||
from parsers.FunctionParser import FunctionParser
|
||||
from parsers.LogicalOperatorParser import LogicalOperatorParser
|
||||
from parsers.RelationalOperatorParser import RelationalOperatorParser
|
||||
@@ -18,9 +18,10 @@ class ExpressionParser(BaseExprParser):
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(ExpressionParser.NAME, 60, False, yield_eof=False)
|
||||
self.logical_parser = LogicalOperatorParser()
|
||||
self.relational_parser = RelationalOperatorParser()
|
||||
self.variable_parser = VariableOrNamesParser()
|
||||
self.function_parser = FunctionParser()
|
||||
self.relational_parser = RelationalOperatorParser()
|
||||
self.logical_parser = LogicalOperatorParser(expr_parser=self.variable_parser)
|
||||
|
||||
def parse(self, context, parser_input: ParserInput):
|
||||
"""
|
||||
@@ -40,61 +41,109 @@ class ExpressionParser(BaseExprParser):
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.IS_EMPTY))
|
||||
|
||||
if not self.reset_parser(context, parser_input):
|
||||
return self.sheerka.ret(
|
||||
error_sink = ErrorSink()
|
||||
if not self.reset_parser_input(parser_input, error_sink):
|
||||
return context.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
||||
context.sheerka.new(BuiltinConcepts.ERROR, body=error_sink.sink))
|
||||
|
||||
self.parser_input.next_token()
|
||||
|
||||
node = self.parse_input()
|
||||
node = self.parse_input(context, parser_input, error_sink)
|
||||
if isinstance(node, ParenthesisNode):
|
||||
node = node.node
|
||||
|
||||
value = self.get_return_value_body(context.sheerka, self.parser_input.as_text(), node, node)
|
||||
value = self.get_return_value_body(context.sheerka, parser_input.as_text(), node, node, error_sink.sink)
|
||||
|
||||
ret = self.sheerka.ret(
|
||||
ret = context.sheerka.ret(
|
||||
self.name,
|
||||
not self.has_error,
|
||||
not error_sink.has_error,
|
||||
value)
|
||||
|
||||
return ret
|
||||
|
||||
def parse_input(self):
|
||||
pos = self.parser_input.pos
|
||||
for parser in []: # [self.logical_parser, self.relational_parser, self.function_parser]:
|
||||
self.parser_input.seek(pos) # reset position
|
||||
if parser.reset_parser(self.context, self.parser_input):
|
||||
res = parser.parse_input()
|
||||
if res and not parser.has_error:
|
||||
return res
|
||||
def parse_input(self, context, parser_input, error_sink):
|
||||
pos = parser_input.pos
|
||||
for parser in [self.logical_parser,
|
||||
self.variable_parser]: # [self.logical_parser, self.relational_parser, self.function_parser]:
|
||||
parser_input.seek(pos) # reset position
|
||||
res = parser.parse_input(context, parser_input, error_sink)
|
||||
if res and not error_sink.has_error:
|
||||
return res
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class VariableOrNamesParser(BaseExprParser):
|
||||
NAME = "VariableOrNames"
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(VariableOrNamesParser.NAME, 60, False, yield_eof=False)
|
||||
|
||||
def parse(self, context, parser_input: ParserInput):
|
||||
"""
|
||||
:param context:
|
||||
:param parser_input:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if not isinstance(parser_input, ParserInput):
|
||||
return None
|
||||
|
||||
context.log(f"Parsing '{parser_input}' with {self.NAME}Parser", self.name)
|
||||
sheerka = context.sheerka
|
||||
|
||||
if parser_input.is_empty():
|
||||
return context.sheerka.ret(self.name,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.IS_EMPTY))
|
||||
|
||||
error_sink = ErrorSink()
|
||||
if not self.reset_parser_input(parser_input, error_sink):
|
||||
return context.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.ERROR, body=error_sink.sink))
|
||||
|
||||
node = self.parse_input(context, parser_input, error_sink)
|
||||
if isinstance(node, ParenthesisNode):
|
||||
node = node.node
|
||||
|
||||
value = self.get_return_value_body(context.sheerka, parser_input.as_text(), node, node, error_sink.sink)
|
||||
|
||||
ret = context.sheerka.ret(
|
||||
self.name,
|
||||
not error_sink.has_error,
|
||||
value)
|
||||
|
||||
return ret
|
||||
|
||||
def parse_input(self, context, parser_input, error_sink):
|
||||
# try to recognize a VariableNode
|
||||
dots_found = []
|
||||
for i, token in enumerate(self.parser_input.as_tokens()):
|
||||
pos = parser_input.pos
|
||||
for i, token in enumerate(parser_input.as_tokens()):
|
||||
if token.type == TokenKind.DOT:
|
||||
dots_found.append(i)
|
||||
continue
|
||||
|
||||
if not (token.type == TokenKind.WHITESPACE or
|
||||
token.type == TokenKind.IDENTIFIER and token.value.isidentifier()):
|
||||
return NameExprNode(self.parser_input.start, self.parser_input.end, self.parser_input.as_tokens())
|
||||
return NameExprNode(parser_input.start, parser_input.end, parser_input.as_tokens())
|
||||
|
||||
if len(dots_found) == 0:
|
||||
return VariableNode(pos, self.parser_input.end, self.parser_input.as_tokens(), self.parser_input.as_text())
|
||||
return VariableNode(pos, parser_input.end, parser_input.as_tokens(), parser_input.as_text())
|
||||
|
||||
parts = []
|
||||
current_dot_pos = pos
|
||||
for dot_found in dots_found:
|
||||
parts.append(get_text_from_tokens(self.parser_input.tokens[current_dot_pos: dot_found]))
|
||||
parts.append(get_text_from_tokens(parser_input.tokens[current_dot_pos: dot_found]))
|
||||
current_dot_pos = dot_found + 1
|
||||
|
||||
# do not forget the trailing part
|
||||
parts.append(get_text_from_tokens(self.parser_input.tokens[current_dot_pos: self.parser_input.end + 1]))
|
||||
parts.append(get_text_from_tokens(parser_input.tokens[current_dot_pos: parser_input.end + 1]))
|
||||
|
||||
return VariableNode(self.parser_input.start,
|
||||
self.parser_input.end,
|
||||
self.parser_input.as_tokens(),
|
||||
return VariableNode(parser_input.start,
|
||||
parser_input.end,
|
||||
parser_input.as_tokens(),
|
||||
parts[0],
|
||||
*parts[1:])
|
||||
|
||||
@@ -7,7 +7,8 @@ from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.tokenizer import TokenKind
|
||||
from core.utils import get_n_clones
|
||||
from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode
|
||||
from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError, Node, BaseExprParser
|
||||
from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError, Node, BaseExprParser, \
|
||||
BaseParserInputParser
|
||||
from parsers.BnfNodeParser import BnfNodeParser
|
||||
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
|
||||
from parsers.RuleParser import RuleParser
|
||||
@@ -26,7 +27,7 @@ class FunctionParserNode(Node):
|
||||
pass
|
||||
|
||||
|
||||
class FunctionParser(BaseExprParser):
|
||||
class FunctionParser(BaseParserInputParser):
|
||||
"""
|
||||
The parser will be used to parse func(x, y, z)
|
||||
where x, y and z can be source code, concepts or other functions
|
||||
|
||||
@@ -7,7 +7,7 @@ from core.sheerka.services.sheerka_service import FailedToCompileError
|
||||
from core.tokenizer import TokenKind, Tokenizer, Keywords
|
||||
from core.utils import get_text_from_tokens
|
||||
from parsers.BaseNodeParser import UnrecognizedTokensNode
|
||||
from parsers.BaseParser import BaseParser, UnexpectedTokenParsingError, UnexpectedEofParsingError, BaseExprParser
|
||||
from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError, BaseExprParser, ErrorSink
|
||||
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
|
||||
from parsers.expressions import ParenthesisNode, OrNode, AndNode, NotNode, LeftPartNotFoundError, \
|
||||
ParenthesisMismatchError, NameExprNode, ExprNode, VariableNode, ComparisonNode
|
||||
@@ -76,6 +76,7 @@ class LogicalOperatorParser(BaseExprParser):
|
||||
self.and_tokens = list(Tokenizer(" and ", yield_eof=False))
|
||||
self.and_not_tokens = list(Tokenizer(" and not ", yield_eof=False))
|
||||
self.not_tokens = list(Tokenizer("not ", yield_eof=False))
|
||||
self.expr_parser = kwargs.get("expr_parser", None)
|
||||
|
||||
@staticmethod
|
||||
def clean_parenthesis_nodes(nodes):
|
||||
@@ -101,144 +102,161 @@ class LogicalOperatorParser(BaseExprParser):
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.IS_EMPTY))
|
||||
|
||||
if not self.reset_parser(context, parser_input):
|
||||
return self.sheerka.ret(
|
||||
error_sink = ErrorSink()
|
||||
if not self.reset_parser_input(parser_input, error_sink):
|
||||
return context.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
||||
context.sheerka.new(BuiltinConcepts.ERROR, body=error_sink.sink))
|
||||
|
||||
self.parser_input.next_token()
|
||||
tree = self.parse_input()
|
||||
token = self.parser_input.token
|
||||
tree = self.parse_input(context, parser_input, error_sink)
|
||||
|
||||
token = parser_input.token
|
||||
if token and token.type != TokenKind.EOF:
|
||||
self.add_error(UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, []))
|
||||
error_sink.add_error(UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, []))
|
||||
|
||||
if isinstance(tree, ParenthesisNode):
|
||||
tree = tree.node
|
||||
value = self.get_return_value_body(context.sheerka, self.parser_input.as_text(), tree, tree)
|
||||
|
||||
ret = self.sheerka.ret(
|
||||
self.name,
|
||||
not self.has_error,
|
||||
value)
|
||||
value = self.get_return_value_body(context.sheerka,
|
||||
parser_input.as_text(),
|
||||
tree,
|
||||
tree,
|
||||
error_sink.sink)
|
||||
|
||||
ret = context.sheerka.ret(self.name,
|
||||
not error_sink.has_error,
|
||||
value)
|
||||
|
||||
return ret
|
||||
|
||||
def parse_input(self):
|
||||
return self.parse_or()
|
||||
def parse_input(self, context, parser_input, error_sink):
|
||||
return self.parse_or(context, parser_input, error_sink)
|
||||
|
||||
def parse_or(self):
|
||||
start = self.parser_input.pos
|
||||
expr = self.parse_and()
|
||||
token = self.parser_input.token
|
||||
def parse_or(self, context, parser_input, error_sink):
|
||||
start = parser_input.pos
|
||||
expr = self.parse_and(context, parser_input, error_sink)
|
||||
token = parser_input.token
|
||||
if token.type != TokenKind.IDENTIFIER or token.value != "or":
|
||||
return expr
|
||||
|
||||
parts = [expr]
|
||||
while token.type == TokenKind.IDENTIFIER and token.value == "or":
|
||||
self.parser_input.next_token()
|
||||
expr = self.parse_and()
|
||||
parser_input.next_token()
|
||||
expr = self.parse_and(context, parser_input, error_sink)
|
||||
if expr is None:
|
||||
self.add_error(UnexpectedEofParsingError("When parsing 'or'"))
|
||||
end = self.parser_input.pos
|
||||
error_sink.add_error(UnexpectedEofParsingError("When parsing 'or'"))
|
||||
end = parser_input.pos
|
||||
self.clean_parenthesis_nodes(parts)
|
||||
return OrNode(start, end, self.parser_input.tokens[start: end + 1], *parts)
|
||||
return OrNode(start, end, parser_input.tokens[start: end + 1], *parts)
|
||||
parts.append(expr)
|
||||
token = self.parser_input.token
|
||||
token = parser_input.token
|
||||
|
||||
end = parts[-1].end
|
||||
self.clean_parenthesis_nodes(parts)
|
||||
return OrNode(start, end, self.parser_input.tokens[start: end + 1], *parts)
|
||||
return OrNode(start, end, parser_input.tokens[start: end + 1], *parts)
|
||||
|
||||
def parse_and(self):
|
||||
start = self.parser_input.pos
|
||||
expr = self.parse_not()
|
||||
token = self.parser_input.token
|
||||
def parse_and(self, context, parser_input, error_sink):
|
||||
start = parser_input.pos
|
||||
expr = self.parse_not(context, parser_input, error_sink)
|
||||
token = parser_input.token
|
||||
if token.type != TokenKind.IDENTIFIER or token.value != "and":
|
||||
return expr
|
||||
|
||||
parts = [expr]
|
||||
while token.type == TokenKind.IDENTIFIER and token.value == "and":
|
||||
self.parser_input.next_token()
|
||||
expr = self.parse_not()
|
||||
parser_input.next_token()
|
||||
expr = self.parse_not(context, parser_input, error_sink)
|
||||
if expr is None:
|
||||
self.add_error(UnexpectedEofParsingError("When parsing 'and'"))
|
||||
end = self.parser_input.pos
|
||||
error_sink.add_error(UnexpectedEofParsingError("When parsing 'and'"))
|
||||
end = parser_input.pos
|
||||
self.clean_parenthesis_nodes(parts)
|
||||
return AndNode(start, end, self.parser_input.tokens[start: end + 1], *parts)
|
||||
return AndNode(start, end, parser_input.tokens[start: end + 1], *parts)
|
||||
parts.append(expr)
|
||||
token = self.parser_input.token
|
||||
token = parser_input.token
|
||||
|
||||
end = parts[-1].end
|
||||
self.clean_parenthesis_nodes(parts)
|
||||
return AndNode(start, end, self.parser_input.tokens[start: end + 1], *parts)
|
||||
return AndNode(start, end, parser_input.tokens[start: end + 1], *parts)
|
||||
|
||||
def parse_not(self):
|
||||
token = self.parser_input.token
|
||||
start = self.parser_input.pos
|
||||
def parse_not(self, context, parser_input, error_sink):
|
||||
token = parser_input.token
|
||||
start = parser_input.pos
|
||||
if token.type == TokenKind.IDENTIFIER and token.value == "not":
|
||||
self.parser_input.next_token()
|
||||
parsed = self.parse_not()
|
||||
parser_input.next_token()
|
||||
parsed = self.parse_not(context, parser_input, error_sink)
|
||||
node = parsed.node if isinstance(parsed, ParenthesisNode) else parsed
|
||||
return NotNode(start,
|
||||
parsed.end,
|
||||
self.parser_input.tokens[start: parsed.end + 1],
|
||||
parser_input.tokens[start: parsed.end + 1],
|
||||
node)
|
||||
else:
|
||||
return self.parse_names()
|
||||
return self.parse_names(context, parser_input, error_sink)
|
||||
|
||||
def parse_names(self):
|
||||
def parse_names(self, context, parser_input, error_sink):
|
||||
|
||||
def stop():
|
||||
return token.type == TokenKind.EOF or \
|
||||
paren_count == 0 and token.type == TokenKind.RPAR or \
|
||||
token.type == TokenKind.IDENTIFIER and token.value in ("and", "or", "not")
|
||||
|
||||
token = self.parser_input.token
|
||||
token = parser_input.token
|
||||
if token.type == TokenKind.EOF:
|
||||
return None
|
||||
|
||||
if token.type == TokenKind.LPAR:
|
||||
start = self.parser_input.pos
|
||||
self.parser_input.next_token()
|
||||
expr = self.parse_or()
|
||||
token = self.parser_input.token
|
||||
start = parser_input.pos
|
||||
parser_input.next_token()
|
||||
expr = self.parse_or(context, parser_input, error_sink)
|
||||
token = parser_input.token
|
||||
if token.type != TokenKind.RPAR:
|
||||
self.error_sink.append(
|
||||
error_sink.add_error(
|
||||
UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, [TokenKind.RPAR]))
|
||||
return expr
|
||||
end = self.parser_input.pos
|
||||
self.parser_input.next_token()
|
||||
end = parser_input.pos
|
||||
parser_input.next_token()
|
||||
return ParenthesisNode(start, end, None, expr)
|
||||
|
||||
buffer = []
|
||||
paren_count = 0
|
||||
last_paren = None
|
||||
start = self.parser_input.pos
|
||||
start = parser_input.pos
|
||||
end = parser_input.pos
|
||||
last_is_whitespace = False
|
||||
while not stop():
|
||||
buffer.append(token)
|
||||
last_is_whitespace = token.type == TokenKind.WHITESPACE
|
||||
end += 1
|
||||
if token.type == TokenKind.LPAR:
|
||||
last_paren = token
|
||||
paren_count += 1
|
||||
if token.type == TokenKind.RPAR:
|
||||
paren_count -= 1
|
||||
self.parser_input.next_token(False)
|
||||
token = self.parser_input.token
|
||||
parser_input.next_token(False)
|
||||
token = parser_input.token
|
||||
|
||||
if len(buffer) == 0:
|
||||
if last_is_whitespace:
|
||||
end -= 1
|
||||
|
||||
if start == end:
|
||||
if token.type != TokenKind.RPAR:
|
||||
self.error_sink.append(LeftPartNotFoundError())
|
||||
error_sink.add_error(LeftPartNotFoundError())
|
||||
return None
|
||||
|
||||
if paren_count != 0:
|
||||
self.error_sink.append(ParenthesisMismatchError(last_paren))
|
||||
error_sink.add_error(ParenthesisMismatchError(last_paren))
|
||||
return None
|
||||
|
||||
if buffer[-1].type == TokenKind.WHITESPACE:
|
||||
buffer.pop()
|
||||
|
||||
end = start + len(buffer) - 1
|
||||
return NameExprNode(start, end, buffer)
|
||||
if self.expr_parser:
|
||||
new_parsing_input = ParserInput(
|
||||
None,
|
||||
tokens=parser_input.tokens,
|
||||
length=parser_input.length,
|
||||
start=start,
|
||||
end=end - 1,
|
||||
yield_oef=False).reset()
|
||||
new_parsing_input.next_token()
|
||||
return self.expr_parser.parse_input(context, new_parsing_input, error_sink)
|
||||
else:
|
||||
return NameExprNode(start, end - 1, parser_input.tokens[start:end])
|
||||
|
||||
def compile_conjunctions(self, context, conjunctions, who):
|
||||
"""
|
||||
|
||||
@@ -6,7 +6,7 @@ import core.utils
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.tokenizer import TokenKind
|
||||
from parsers.BaseParser import BaseParser, Node, ParsingError
|
||||
from parsers.BaseParser import BaseParser, Node, ParsingError, BaseParserInputParser
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
@@ -107,7 +107,7 @@ class PythonGetNamesVisitor(ast.NodeVisitor):
|
||||
self.names.add(node.id)
|
||||
|
||||
|
||||
class PythonParser(BaseParser):
|
||||
class PythonParser(BaseParserInputParser):
|
||||
"""
|
||||
Parse Python scripts
|
||||
"""
|
||||
@@ -116,7 +116,7 @@ class PythonParser(BaseParser):
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
|
||||
BaseParser.__init__(self, PythonParser.NAME, 50)
|
||||
BaseParserInputParser.__init__(self, PythonParser.NAME, 50)
|
||||
self.source = kwargs.get("source", "<undef>")
|
||||
|
||||
def parse(self, context, parser_input: ParserInput):
|
||||
|
||||
@@ -2,13 +2,13 @@ from core.builtin_concepts import BuiltinConcepts
|
||||
from core.builtin_helpers import CreateObjectIdentifiers
|
||||
from parsers.BaseNodeParser import ConceptNode, RuleNode, VariableNode
|
||||
from parsers.BaseNodeParser import SourceCodeWithConceptNode
|
||||
from parsers.BaseParser import BaseParser
|
||||
from parsers.BaseParser import BaseParser, BaseParserInputParser
|
||||
from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser
|
||||
|
||||
unrecognized_nodes_parser = UnrecognizedNodeParser()
|
||||
|
||||
|
||||
class PythonWithConceptsParser(BaseParser):
|
||||
class PythonWithConceptsParser(BaseParserInputParser):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("PythonWithConcepts", 20)
|
||||
|
||||
|
||||
@@ -4,12 +4,12 @@ from core.builtin_concepts_ids import BuiltinConcepts
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute
|
||||
from core.tokenizer import TokenKind, Token
|
||||
from core.utils import get_text_from_tokens
|
||||
from parsers.BaseParser import UnexpectedTokenParsingError, BaseExprParser
|
||||
from parsers.BaseParser import UnexpectedTokenParsingError, BaseExprParser, BaseParserInputParser
|
||||
from parsers.expressions import ComparisonNode, ParenthesisMismatchError, NameExprNode, ComparisonType, VariableNode, \
|
||||
ParenthesisNode, LeftPartNotFoundError
|
||||
|
||||
|
||||
class RelationalOperatorParser(BaseExprParser):
|
||||
class RelationalOperatorParser(BaseParserInputParser):
|
||||
"""
|
||||
Parses xxx (== | > | < | >= | <= | != | in | not in) yyy
|
||||
Nothing else
|
||||
@@ -53,7 +53,7 @@ class RelationalOperatorParser(BaseExprParser):
|
||||
if isinstance(node, ParenthesisNode):
|
||||
node = node.node
|
||||
|
||||
value = self.get_return_value_body(context.sheerka, self.parser_input.as_text(), node, node)
|
||||
value = self.get_return_value_body(context.sheerka, self.parser_input.as_text(), node, node, self.error_sink)
|
||||
|
||||
ret = self.sheerka.ret(
|
||||
self.name,
|
||||
|
||||
@@ -2,7 +2,7 @@ from core.builtin_concepts import BuiltinConcepts
|
||||
from core.rule import Rule
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.tokenizer import TokenKind
|
||||
from parsers.BaseParser import BaseParser, ParsingError, UnexpectedTokenParsingError
|
||||
from parsers.BaseParser import BaseParser, ParsingError, UnexpectedTokenParsingError, BaseParserInputParser
|
||||
|
||||
|
||||
class RuleNotFoundError(ParsingError):
|
||||
@@ -14,7 +14,7 @@ class RuleNotFoundError(ParsingError):
|
||||
return f"RuleNotFoundError(id={self.id}, key={self.key}"
|
||||
|
||||
|
||||
class RuleParser(BaseParser):
|
||||
class RuleParser(BaseParserInputParser):
|
||||
"""
|
||||
Tries to recognize rules
|
||||
"""
|
||||
@@ -22,7 +22,7 @@ class RuleParser(BaseParser):
|
||||
NAME = "Rule"
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, RuleParser.NAME, 80)
|
||||
BaseParserInputParser.__init__(self, RuleParser.NAME, 80)
|
||||
|
||||
def parse(self, context, parser_input: ParserInput):
|
||||
"""
|
||||
|
||||
@@ -4,7 +4,7 @@ import core.utils
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.builtin_helpers import only_successful, get_lexer_nodes, update_compiled
|
||||
from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode, SourceCodeWithConceptNode
|
||||
from parsers.BaseParser import BaseParser, ParsingError
|
||||
from parsers.BaseParser import BaseParser, ParsingError, BaseParserInputParser
|
||||
from parsers.BnfNodeParser import BnfNodeParser
|
||||
from parsers.SequenceNodeParser import SequenceNodeParser
|
||||
from parsers.SyaNodeParser import SyaNodeParser
|
||||
@@ -22,7 +22,7 @@ class CannotParseError(ParsingError):
|
||||
unrecognized: UnrecognizedTokensNode
|
||||
|
||||
|
||||
class UnrecognizedNodeParser(BaseParser):
|
||||
class UnrecognizedNodeParser(BaseParserInputParser):
|
||||
"""
|
||||
This parser comes after the other NodeParsers (Atom, Bnf or Sya)
|
||||
It will try to resolve all UnrecognizedTokensNode.
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import pytest
|
||||
|
||||
from core.tokenizer import Tokenizer
|
||||
from parsers.BaseParser import BaseParser
|
||||
from parsers.BaseParser import BaseParser, BaseParserInputParser
|
||||
|
||||
|
||||
@pytest.mark.parametrize("tokens, expected", [
|
||||
@@ -17,4 +17,4 @@ from parsers.BaseParser import BaseParser
|
||||
(list(Tokenizer(" a ", yield_eof=False)), (1, 1)),
|
||||
])
|
||||
def test_i_can_get_tokens_boundaries(tokens, expected):
|
||||
assert BaseParser.get_tokens_boundaries(tokens) == expected
|
||||
assert BaseParserInputParser.get_tokens_boundaries(tokens) == expected
|
||||
|
||||
@@ -3,10 +3,11 @@ import pytest
|
||||
from core.builtin_concepts_ids import BuiltinConcepts
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.tokenizer import Tokenizer
|
||||
from parsers.BaseParser import ErrorSink
|
||||
from parsers.ExpressionParser import ExpressionParser
|
||||
from parsers.expressions import VariableNode
|
||||
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
||||
from tests.parsers.parsers_utils import get_expr_node_from_test_node, VAR, EXPR
|
||||
from tests.parsers.parsers_utils import get_expr_node_from_test_node, VAR, EXPR, AND
|
||||
|
||||
|
||||
class TestExpressionParser(TestUsingMemoryBasedSheerka):
|
||||
@@ -17,9 +18,10 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
|
||||
|
||||
def input_parser_with_source(self, source):
|
||||
sheerka, context, parser = self.init_parser()
|
||||
parser.reset_parser(context, ParserInput(source))
|
||||
parser.parser_input.next_token()
|
||||
return sheerka, context, parser
|
||||
error_sink = ErrorSink()
|
||||
parser_input = ParserInput(source)
|
||||
parser.reset_parser_input(parser_input, error_sink)
|
||||
return sheerka, context, parser, parser_input, error_sink
|
||||
|
||||
def test_i_can_detect_empty_expression(self):
|
||||
sheerka, context, parser = self.init_parser()
|
||||
@@ -29,17 +31,18 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
|
||||
assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY)
|
||||
|
||||
@pytest.mark.parametrize("expression, expected", [
|
||||
("var1 + var 2", EXPR("var1 + var 2")),
|
||||
("var1 + var2", EXPR("var1 + var2")),
|
||||
("variable", VAR("variable")),
|
||||
("var.attr", VAR("var.attr")),
|
||||
("var1 and var2", AND(VAR("var1"), VAR("var2")))
|
||||
])
|
||||
def test_i_can_parse_input(self, expression, expected):
|
||||
sheerka, context, parser = self.input_parser_with_source(expression)
|
||||
sheerka, context, parser, parser_input, error_sink = self.input_parser_with_source(expression)
|
||||
expected = get_expr_node_from_test_node(expression, expected)
|
||||
|
||||
parsed = parser.parse_input()
|
||||
parsed = parser.parse_input(context, parser_input, error_sink)
|
||||
|
||||
assert not parser.has_error
|
||||
assert not error_sink.has_error
|
||||
assert parsed == expected
|
||||
|
||||
@pytest.mark.parametrize("expression", [
|
||||
@@ -47,10 +50,10 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
|
||||
"var . attr1 . attr2",
|
||||
])
|
||||
def test_i_can_parse_variable(self, expression):
|
||||
sheerka, context, parser = self.input_parser_with_source(expression)
|
||||
parsed = parser.parse_input()
|
||||
sheerka, context, parser, parser_input, error_sink = self.input_parser_with_source(expression)
|
||||
parsed = parser.parse_input(context, parser_input, error_sink)
|
||||
|
||||
assert not parser.has_error
|
||||
assert not error_sink.has_error
|
||||
assert isinstance(parsed, VariableNode)
|
||||
assert parsed.name == "var"
|
||||
assert parsed.attributes == ["attr1", "attr2"]
|
||||
@@ -60,9 +63,9 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
|
||||
|
||||
expression = "do not care var1 + var2 do not care either"
|
||||
parser_input = ParserInput("text", list(Tokenizer(expression, yield_eof=False)), start=6, end=10)
|
||||
error_sink = ErrorSink()
|
||||
parser.reset_parser_input(parser_input, error_sink)
|
||||
parsed = parser.parse_input(context, parser_input, error_sink)
|
||||
|
||||
parser.reset_parser(context, parser_input)
|
||||
parsed = parser.parse_input()
|
||||
|
||||
assert not parser.has_error
|
||||
assert not error_sink.has_error
|
||||
assert parsed == get_expr_node_from_test_node(expression, EXPR("var1 + var2"))
|
||||
|
||||
Reference in New Issue
Block a user