Working on #48 : Added BaseExprParser and BaseNodeParser.py

This commit is contained in:
2021-03-10 21:09:09 +01:00
parent 998ea160be
commit 9c4991923e
18 changed files with 317 additions and 198 deletions
+9 -4
View File
@@ -42,7 +42,7 @@ class ParserInput:
self.yield_oef = yield_oef self.yield_oef = yield_oef
self.start = start or 0 self.start = start or 0
if end: if end is not None:
self.original_end = end # forced index of the last token self.original_end = end # forced index of the last token
self.end = self.original_end # index of the last token => len(tokens) - 1 if full tokens self.end = self.original_end # index of the last token => len(tokens) - 1 if full tokens
else: else:
@@ -115,6 +115,7 @@ class ParserInput:
self.pos += 1 self.pos += 1
if self.pos > self.end: if self.pos > self.end:
self.token = self.tokens[-1]
return False return False
self.token = self.tokens[self.pos] self.token = self.tokens[self.pos]
@@ -128,7 +129,11 @@ class ParserInput:
return False return False
self.token = self.tokens[self.pos] self.token = self.tokens[self.pos]
return self.pos <= self.end if self.pos <= self.end:
return True
else:
self.token = self.tokens[-1]
return False
def the_token_after(self, skip_whitespace=True): def the_token_after(self, skip_whitespace=True):
""" """
@@ -137,13 +142,13 @@ class ParserInput:
""" """
my_pos = self.pos + 1 my_pos = self.pos + 1
if my_pos > self.end: if my_pos > self.end:
return Token(TokenKind.EOF, "", -1, -1, -1) return self.tokens[-1]
if skip_whitespace: if skip_whitespace:
while self.tokens[my_pos].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE): while self.tokens[my_pos].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
my_pos += 1 my_pos += 1
if my_pos > self.end: if my_pos > self.end:
return Token(TokenKind.EOF, "", -1, -1, -1) return self.tokens[-1]
return self.tokens[my_pos] return self.tokens[my_pos]
+3 -2
View File
@@ -2,7 +2,8 @@ from dataclasses import dataclass, field
import core.utils import core.utils
from core.tokenizer import Keywords, TokenKind, Tokenizer from core.tokenizer import Keywords, TokenKind, Tokenizer
from parsers.BaseParser import BaseParser, Node, ParsingError, UnexpectedEofParsingError, UnexpectedTokenParsingError from parsers.BaseParser import Node, ParsingError, UnexpectedEofParsingError, UnexpectedTokenParsingError, \
BaseParserInputParser
@dataclass() @dataclass()
@@ -94,7 +95,7 @@ class NameNode(CustomGrammarParserNode):
return hash(self.get_name()) return hash(self.get_name())
class BaseCustomGrammarParser(BaseParser): class BaseCustomGrammarParser(BaseParserInputParser):
""" """
Base class for sheerka specific grammars Base class for sheerka specific grammars
""" """
+2 -2
View File
@@ -4,7 +4,7 @@ from enum import Enum
import core.utils import core.utils
from core.tokenizer import TokenKind, Token from core.tokenizer import TokenKind, Token
from core.var_ref import VariableRef from core.var_ref import VariableRef
from parsers.BaseParser import Node, BaseParser, ParsingError from parsers.BaseParser import Node, ParsingError, BaseParserInputParser
DEBUG_COMPILED = True DEBUG_COMPILED = True
@@ -461,7 +461,7 @@ class SyaAssociativity(Enum):
return self.value return self.value
class BaseNodeParser(BaseParser): class BaseNodeParser(BaseParserInputParser):
""" """
Parser that return LexerNode Parser that return LexerNode
""" """
+95 -58
View File
@@ -62,11 +62,6 @@ class BaseParser:
self.short_name = name self.short_name = name
self.priority = priority self.priority = priority
self.enabled = enabled self.enabled = enabled
self.error_sink = []
self.context: ExecutionContext = None
self.sheerka = None
self.parser_input: ParserInput = None
self.yield_eof = yield_eof self.yield_eof = yield_eof
def __eq__(self, other): def __eq__(self, other):
@@ -80,6 +75,74 @@ class BaseParser:
def __repr__(self): def __repr__(self):
return self.name return self.name
def log_result(self, context, source, ret):
pass
# if not self.log.isEnabledFor(logging.DEBUG):
# return
#
# if ret.status:
# value = context.return_value_to_str(ret)
# context.log(f"Recognized '{source}' as {value}", self.name)
# else:
# context.log(f"Failed to recognize '{source}'", self.name)
def log_multiple_results(self, context, source, list_of_ret):
pass
# if not self.log.isEnabledFor(logging.DEBUG):
# return
#
# context.log(f"Recognized '{source}' as multiple concepts", self.name)
# for r in list_of_ret:
# value = context.return_value_to_str(r)
# context.log(f" Recognized '{value}'", self.name)
def get_return_value_body(self, sheerka, source, parsed, try_parse, errors):
"""
All parsers must return their result in a standard way
:param sheerka:
:param source:
:param parsed:
:param try_parse:
:param errors:
:return:
"""
if len(errors) == 1 and isinstance(errors[0], Concept):
return errors[0]
if len(errors):
if parsed is None:
return sheerka.new(BuiltinConcepts.NOT_FOR_ME,
body=source,
reason=errors)
else:
return sheerka.new(BuiltinConcepts.ERROR,
body=errors)
return sheerka.new(BuiltinConcepts.PARSER_RESULT,
parser=self,
source=source,
body=parsed,
try_parsed=try_parse)
@staticmethod
def get_name(name):
return BaseParser.PREFIX + name
class BaseParserInputParser(BaseParser):
"""
Base parser for stateful parser where context, parser input, and error sink are part of the class
"""
def __init__(self, name, priority: int, enabled=True, yield_eof=False):
super(BaseParserInputParser, self).__init__(name, priority, enabled, yield_eof)
self.error_sink = []
self.context: ExecutionContext = None
self.sheerka = None
self.parser_input: ParserInput = None
self.yield_eof = yield_eof
def reset_parser(self, context, parser_input: ParserInput): def reset_parser(self, context, parser_input: ParserInput):
self.context = context self.context = context
self.sheerka = context.sheerka self.sheerka = context.sheerka
@@ -106,54 +169,6 @@ class BaseParser:
def has_error(self): def has_error(self):
return len(self.error_sink) > 0 return len(self.error_sink) > 0
def log_result(self, context, source, ret):
pass
# if not self.log.isEnabledFor(logging.DEBUG):
# return
#
# if ret.status:
# value = context.return_value_to_str(ret)
# context.log(f"Recognized '{source}' as {value}", self.name)
# else:
# context.log(f"Failed to recognize '{source}'", self.name)
def log_multiple_results(self, context, source, list_of_ret):
pass
# if not self.log.isEnabledFor(logging.DEBUG):
# return
#
# context.log(f"Recognized '{source}' as multiple concepts", self.name)
# for r in list_of_ret:
# value = context.return_value_to_str(r)
# context.log(f" Recognized '{value}'", self.name)
def get_return_value_body(self, sheerka, source, parsed, try_parse):
"""
All parsers must return their result in a standard way
:param sheerka:
:param source:
:param parsed:
:param try_parse:
:return:
"""
if len(self.error_sink) == 1 and isinstance(self.error_sink[0], Concept):
return self.error_sink[0]
if self.has_error:
if parsed is None:
return sheerka.new(BuiltinConcepts.NOT_FOR_ME,
body=source,
reason=self.error_sink)
else:
return sheerka.new(BuiltinConcepts.ERROR,
body=self.error_sink)
return sheerka.new(BuiltinConcepts.PARSER_RESULT,
parser=self,
source=source,
body=parsed,
try_parsed=try_parse)
@staticmethod @staticmethod
def get_input_as_lexer_nodes(parser_input, expected_parser=None): def get_input_as_lexer_nodes(parser_input, expected_parser=None):
""" """
@@ -229,12 +244,34 @@ class BaseParser:
return list_a return list_a
@staticmethod
def get_name(name):
return BaseParser.PREFIX + name
class BaseExprParser(BaseParser): class BaseExprParser(BaseParser):
def parse_input(self): def parse_input(self, context, parser_input, error_sink):
raise NotImplementedError raise NotImplementedError
def reset_parser_input(self, parser_input: ParserInput, error_sink):
try:
error_sink.clear()
parser_input.reset(self.yield_eof)
except LexerError as e:
error_sink.add_error(e)
return False
parser_input.next_token()
return True
class ErrorSink:
def __init__(self):
self.sink = []
def add_error(self, error):
self.sink.append(error)
def clear(self):
self.sink.clear()
@property
def has_error(self):
return len(self.sink) > 0
+6 -1
View File
@@ -33,6 +33,7 @@ class BnfDefinitionParser(BaseParser):
self.context = None self.context = None
self.source = "" self.source = ""
self.sheerka = None self.sheerka = None
self.error_sink = []
def __eq__(self, other): def __eq__(self, other):
if not isinstance(other, BnfDefinitionParser): if not isinstance(other, BnfDefinitionParser):
@@ -60,6 +61,10 @@ class BnfDefinitionParser(BaseParser):
self.next_token() self.next_token()
return error return error
@property
def has_error(self):
return len(self.error_sink) > 0
def get_token(self) -> Token: def get_token(self) -> Token:
return self._current return self._current
@@ -123,7 +128,7 @@ class BnfDefinitionParser(BaseParser):
False, False,
context.sheerka.new(BuiltinConcepts.ERROR, body=[e])) context.sheerka.new(BuiltinConcepts.ERROR, body=[e]))
value = self.get_return_value_body(context.sheerka, self.source, tree, tree) value = self.get_return_value_body(context.sheerka, self.source, tree, tree, self.error_sink)
ret = self.sheerka.ret( ret = self.sheerka.ret(
self.name, self.name,
+1 -1
View File
@@ -85,7 +85,7 @@ class DefConceptParser(BaseCustomGrammarParser):
self.parser_input.next_token() self.parser_input.next_token()
node = self.parse_def_concept() node = self.parse_def_concept()
body = self.get_return_value_body(sheerka, parser_input.as_text(), node, node) body = self.get_return_value_body(sheerka, parser_input.as_text(), node, node, self.error_sink)
ret = sheerka.ret(self.name, not self.has_error, body) ret = sheerka.ret(self.name, not self.has_error, body)
self.log_result(context, parser_input.as_text(), ret) self.log_result(context, parser_input.as_text(), ret)
+1 -1
View File
@@ -71,7 +71,7 @@ class DefRuleParser(BaseCustomGrammarParser):
self.parser_input.next_token() self.parser_input.next_token()
node = self.parse_def_rule() node = self.parse_def_rule()
body = self.get_return_value_body(sheerka, parser_input.as_text(), node, node) body = self.get_return_value_body(sheerka, parser_input.as_text(), node, node, self.error_sink)
ret = sheerka.ret(self.name, not self.has_error, body) ret = sheerka.ret(self.name, not self.has_error, body)
self.log_result(context, parser_input.as_text(), ret) self.log_result(context, parser_input.as_text(), ret)
+3 -3
View File
@@ -4,10 +4,10 @@ from core.concept import VARIABLE_PREFIX
from core.sheerka.services.SheerkaExecute import ParserInput from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import TokenKind from core.tokenizer import TokenKind
from core.utils import str_concept from core.utils import str_concept
from parsers.BaseParser import BaseParser from parsers.BaseParser import BaseParser, BaseParserInputParser
class ExactConceptParser(BaseParser): class ExactConceptParser(BaseParserInputParser):
""" """
Tries to recognize a single concept Tries to recognize a single concept
""" """
@@ -15,7 +15,7 @@ class ExactConceptParser(BaseParser):
MAX_WORDS_SIZE = 6 MAX_WORDS_SIZE = 6
def __init__(self, max_word_size=None, **kwargs): def __init__(self, max_word_size=None, **kwargs):
BaseParser.__init__(self, "ExactConcept", 80) BaseParserInputParser.__init__(self, "ExactConcept", 80)
self.max_word_size = max_word_size self.max_word_size = max_word_size
def parse(self, context, parser_input: ParserInput): def parse(self, context, parser_input: ParserInput):
+76 -27
View File
@@ -2,7 +2,7 @@ from core.builtin_concepts_ids import BuiltinConcepts
from core.sheerka.services.SheerkaExecute import ParserInput from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import TokenKind from core.tokenizer import TokenKind
from core.utils import get_text_from_tokens from core.utils import get_text_from_tokens
from parsers.BaseParser import BaseExprParser from parsers.BaseParser import BaseExprParser, ErrorSink
from parsers.FunctionParser import FunctionParser from parsers.FunctionParser import FunctionParser
from parsers.LogicalOperatorParser import LogicalOperatorParser from parsers.LogicalOperatorParser import LogicalOperatorParser
from parsers.RelationalOperatorParser import RelationalOperatorParser from parsers.RelationalOperatorParser import RelationalOperatorParser
@@ -18,9 +18,10 @@ class ExpressionParser(BaseExprParser):
def __init__(self, **kwargs): def __init__(self, **kwargs):
super().__init__(ExpressionParser.NAME, 60, False, yield_eof=False) super().__init__(ExpressionParser.NAME, 60, False, yield_eof=False)
self.logical_parser = LogicalOperatorParser() self.variable_parser = VariableOrNamesParser()
self.relational_parser = RelationalOperatorParser()
self.function_parser = FunctionParser() self.function_parser = FunctionParser()
self.relational_parser = RelationalOperatorParser()
self.logical_parser = LogicalOperatorParser(expr_parser=self.variable_parser)
def parse(self, context, parser_input: ParserInput): def parse(self, context, parser_input: ParserInput):
""" """
@@ -40,61 +41,109 @@ class ExpressionParser(BaseExprParser):
False, False,
sheerka.new(BuiltinConcepts.IS_EMPTY)) sheerka.new(BuiltinConcepts.IS_EMPTY))
if not self.reset_parser(context, parser_input): error_sink = ErrorSink()
return self.sheerka.ret( if not self.reset_parser_input(parser_input, error_sink):
return context.sheerka.ret(
self.name, self.name,
False, False,
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) context.sheerka.new(BuiltinConcepts.ERROR, body=error_sink.sink))
self.parser_input.next_token() node = self.parse_input(context, parser_input, error_sink)
node = self.parse_input()
if isinstance(node, ParenthesisNode): if isinstance(node, ParenthesisNode):
node = node.node node = node.node
value = self.get_return_value_body(context.sheerka, self.parser_input.as_text(), node, node) value = self.get_return_value_body(context.sheerka, parser_input.as_text(), node, node, error_sink.sink)
ret = self.sheerka.ret( ret = context.sheerka.ret(
self.name, self.name,
not self.has_error, not error_sink.has_error,
value) value)
return ret return ret
def parse_input(self): def parse_input(self, context, parser_input, error_sink):
pos = self.parser_input.pos pos = parser_input.pos
for parser in []: # [self.logical_parser, self.relational_parser, self.function_parser]: for parser in [self.logical_parser,
self.parser_input.seek(pos) # reset position self.variable_parser]: # [self.logical_parser, self.relational_parser, self.function_parser]:
if parser.reset_parser(self.context, self.parser_input): parser_input.seek(pos) # reset position
res = parser.parse_input() res = parser.parse_input(context, parser_input, error_sink)
if res and not parser.has_error: if res and not error_sink.has_error:
return res return res
return None
class VariableOrNamesParser(BaseExprParser):
NAME = "VariableOrNames"
def __init__(self, **kwargs):
super().__init__(VariableOrNamesParser.NAME, 60, False, yield_eof=False)
def parse(self, context, parser_input: ParserInput):
"""
:param context:
:param parser_input:
:return:
"""
if not isinstance(parser_input, ParserInput):
return None
context.log(f"Parsing '{parser_input}' with {self.NAME}Parser", self.name)
sheerka = context.sheerka
if parser_input.is_empty():
return context.sheerka.ret(self.name,
False,
sheerka.new(BuiltinConcepts.IS_EMPTY))
error_sink = ErrorSink()
if not self.reset_parser_input(parser_input, error_sink):
return context.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.ERROR, body=error_sink.sink))
node = self.parse_input(context, parser_input, error_sink)
if isinstance(node, ParenthesisNode):
node = node.node
value = self.get_return_value_body(context.sheerka, parser_input.as_text(), node, node, error_sink.sink)
ret = context.sheerka.ret(
self.name,
not error_sink.has_error,
value)
return ret
def parse_input(self, context, parser_input, error_sink):
# try to recognize a VariableNode # try to recognize a VariableNode
dots_found = [] dots_found = []
for i, token in enumerate(self.parser_input.as_tokens()): pos = parser_input.pos
for i, token in enumerate(parser_input.as_tokens()):
if token.type == TokenKind.DOT: if token.type == TokenKind.DOT:
dots_found.append(i) dots_found.append(i)
continue continue
if not (token.type == TokenKind.WHITESPACE or if not (token.type == TokenKind.WHITESPACE or
token.type == TokenKind.IDENTIFIER and token.value.isidentifier()): token.type == TokenKind.IDENTIFIER and token.value.isidentifier()):
return NameExprNode(self.parser_input.start, self.parser_input.end, self.parser_input.as_tokens()) return NameExprNode(parser_input.start, parser_input.end, parser_input.as_tokens())
if len(dots_found) == 0: if len(dots_found) == 0:
return VariableNode(pos, self.parser_input.end, self.parser_input.as_tokens(), self.parser_input.as_text()) return VariableNode(pos, parser_input.end, parser_input.as_tokens(), parser_input.as_text())
parts = [] parts = []
current_dot_pos = pos current_dot_pos = pos
for dot_found in dots_found: for dot_found in dots_found:
parts.append(get_text_from_tokens(self.parser_input.tokens[current_dot_pos: dot_found])) parts.append(get_text_from_tokens(parser_input.tokens[current_dot_pos: dot_found]))
current_dot_pos = dot_found + 1 current_dot_pos = dot_found + 1
# do not forget the trailing part # do not forget the trailing part
parts.append(get_text_from_tokens(self.parser_input.tokens[current_dot_pos: self.parser_input.end + 1])) parts.append(get_text_from_tokens(parser_input.tokens[current_dot_pos: parser_input.end + 1]))
return VariableNode(self.parser_input.start, return VariableNode(parser_input.start,
self.parser_input.end, parser_input.end,
self.parser_input.as_tokens(), parser_input.as_tokens(),
parts[0], parts[0],
*parts[1:]) *parts[1:])
+3 -2
View File
@@ -7,7 +7,8 @@ from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import TokenKind from core.tokenizer import TokenKind
from core.utils import get_n_clones from core.utils import get_n_clones
from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode
from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError, Node, BaseExprParser from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError, Node, BaseExprParser, \
BaseParserInputParser
from parsers.BnfNodeParser import BnfNodeParser from parsers.BnfNodeParser import BnfNodeParser
from parsers.PythonWithConceptsParser import PythonWithConceptsParser from parsers.PythonWithConceptsParser import PythonWithConceptsParser
from parsers.RuleParser import RuleParser from parsers.RuleParser import RuleParser
@@ -26,7 +27,7 @@ class FunctionParserNode(Node):
pass pass
class FunctionParser(BaseExprParser): class FunctionParser(BaseParserInputParser):
""" """
The parser will be used to parse func(x, y, z) The parser will be used to parse func(x, y, z)
where x, y and z can be source code, concepts or other functions where x, y and z can be source code, concepts or other functions
+83 -65
View File
@@ -7,7 +7,7 @@ from core.sheerka.services.sheerka_service import FailedToCompileError
from core.tokenizer import TokenKind, Tokenizer, Keywords from core.tokenizer import TokenKind, Tokenizer, Keywords
from core.utils import get_text_from_tokens from core.utils import get_text_from_tokens
from parsers.BaseNodeParser import UnrecognizedTokensNode from parsers.BaseNodeParser import UnrecognizedTokensNode
from parsers.BaseParser import BaseParser, UnexpectedTokenParsingError, UnexpectedEofParsingError, BaseExprParser from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError, BaseExprParser, ErrorSink
from parsers.PythonWithConceptsParser import PythonWithConceptsParser from parsers.PythonWithConceptsParser import PythonWithConceptsParser
from parsers.expressions import ParenthesisNode, OrNode, AndNode, NotNode, LeftPartNotFoundError, \ from parsers.expressions import ParenthesisNode, OrNode, AndNode, NotNode, LeftPartNotFoundError, \
ParenthesisMismatchError, NameExprNode, ExprNode, VariableNode, ComparisonNode ParenthesisMismatchError, NameExprNode, ExprNode, VariableNode, ComparisonNode
@@ -76,6 +76,7 @@ class LogicalOperatorParser(BaseExprParser):
self.and_tokens = list(Tokenizer(" and ", yield_eof=False)) self.and_tokens = list(Tokenizer(" and ", yield_eof=False))
self.and_not_tokens = list(Tokenizer(" and not ", yield_eof=False)) self.and_not_tokens = list(Tokenizer(" and not ", yield_eof=False))
self.not_tokens = list(Tokenizer("not ", yield_eof=False)) self.not_tokens = list(Tokenizer("not ", yield_eof=False))
self.expr_parser = kwargs.get("expr_parser", None)
@staticmethod @staticmethod
def clean_parenthesis_nodes(nodes): def clean_parenthesis_nodes(nodes):
@@ -101,144 +102,161 @@ class LogicalOperatorParser(BaseExprParser):
False, False,
sheerka.new(BuiltinConcepts.IS_EMPTY)) sheerka.new(BuiltinConcepts.IS_EMPTY))
if not self.reset_parser(context, parser_input): error_sink = ErrorSink()
return self.sheerka.ret( if not self.reset_parser_input(parser_input, error_sink):
return context.sheerka.ret(
self.name, self.name,
False, False,
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) context.sheerka.new(BuiltinConcepts.ERROR, body=error_sink.sink))
self.parser_input.next_token() tree = self.parse_input(context, parser_input, error_sink)
tree = self.parse_input()
token = self.parser_input.token token = parser_input.token
if token and token.type != TokenKind.EOF: if token and token.type != TokenKind.EOF:
self.add_error(UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, [])) error_sink.add_error(UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, []))
if isinstance(tree, ParenthesisNode): if isinstance(tree, ParenthesisNode):
tree = tree.node tree = tree.node
value = self.get_return_value_body(context.sheerka, self.parser_input.as_text(), tree, tree)
ret = self.sheerka.ret( value = self.get_return_value_body(context.sheerka,
self.name, parser_input.as_text(),
not self.has_error, tree,
tree,
error_sink.sink)
ret = context.sheerka.ret(self.name,
not error_sink.has_error,
value) value)
return ret return ret
def parse_input(self): def parse_input(self, context, parser_input, error_sink):
return self.parse_or() return self.parse_or(context, parser_input, error_sink)
def parse_or(self): def parse_or(self, context, parser_input, error_sink):
start = self.parser_input.pos start = parser_input.pos
expr = self.parse_and() expr = self.parse_and(context, parser_input, error_sink)
token = self.parser_input.token token = parser_input.token
if token.type != TokenKind.IDENTIFIER or token.value != "or": if token.type != TokenKind.IDENTIFIER or token.value != "or":
return expr return expr
parts = [expr] parts = [expr]
while token.type == TokenKind.IDENTIFIER and token.value == "or": while token.type == TokenKind.IDENTIFIER and token.value == "or":
self.parser_input.next_token() parser_input.next_token()
expr = self.parse_and() expr = self.parse_and(context, parser_input, error_sink)
if expr is None: if expr is None:
self.add_error(UnexpectedEofParsingError("When parsing 'or'")) error_sink.add_error(UnexpectedEofParsingError("When parsing 'or'"))
end = self.parser_input.pos end = parser_input.pos
self.clean_parenthesis_nodes(parts) self.clean_parenthesis_nodes(parts)
return OrNode(start, end, self.parser_input.tokens[start: end + 1], *parts) return OrNode(start, end, parser_input.tokens[start: end + 1], *parts)
parts.append(expr) parts.append(expr)
token = self.parser_input.token token = parser_input.token
end = parts[-1].end end = parts[-1].end
self.clean_parenthesis_nodes(parts) self.clean_parenthesis_nodes(parts)
return OrNode(start, end, self.parser_input.tokens[start: end + 1], *parts) return OrNode(start, end, parser_input.tokens[start: end + 1], *parts)
def parse_and(self): def parse_and(self, context, parser_input, error_sink):
start = self.parser_input.pos start = parser_input.pos
expr = self.parse_not() expr = self.parse_not(context, parser_input, error_sink)
token = self.parser_input.token token = parser_input.token
if token.type != TokenKind.IDENTIFIER or token.value != "and": if token.type != TokenKind.IDENTIFIER or token.value != "and":
return expr return expr
parts = [expr] parts = [expr]
while token.type == TokenKind.IDENTIFIER and token.value == "and": while token.type == TokenKind.IDENTIFIER and token.value == "and":
self.parser_input.next_token() parser_input.next_token()
expr = self.parse_not() expr = self.parse_not(context, parser_input, error_sink)
if expr is None: if expr is None:
self.add_error(UnexpectedEofParsingError("When parsing 'and'")) error_sink.add_error(UnexpectedEofParsingError("When parsing 'and'"))
end = self.parser_input.pos end = parser_input.pos
self.clean_parenthesis_nodes(parts) self.clean_parenthesis_nodes(parts)
return AndNode(start, end, self.parser_input.tokens[start: end + 1], *parts) return AndNode(start, end, parser_input.tokens[start: end + 1], *parts)
parts.append(expr) parts.append(expr)
token = self.parser_input.token token = parser_input.token
end = parts[-1].end end = parts[-1].end
self.clean_parenthesis_nodes(parts) self.clean_parenthesis_nodes(parts)
return AndNode(start, end, self.parser_input.tokens[start: end + 1], *parts) return AndNode(start, end, parser_input.tokens[start: end + 1], *parts)
def parse_not(self): def parse_not(self, context, parser_input, error_sink):
token = self.parser_input.token token = parser_input.token
start = self.parser_input.pos start = parser_input.pos
if token.type == TokenKind.IDENTIFIER and token.value == "not": if token.type == TokenKind.IDENTIFIER and token.value == "not":
self.parser_input.next_token() parser_input.next_token()
parsed = self.parse_not() parsed = self.parse_not(context, parser_input, error_sink)
node = parsed.node if isinstance(parsed, ParenthesisNode) else parsed node = parsed.node if isinstance(parsed, ParenthesisNode) else parsed
return NotNode(start, return NotNode(start,
parsed.end, parsed.end,
self.parser_input.tokens[start: parsed.end + 1], parser_input.tokens[start: parsed.end + 1],
node) node)
else: else:
return self.parse_names() return self.parse_names(context, parser_input, error_sink)
def parse_names(self): def parse_names(self, context, parser_input, error_sink):
def stop(): def stop():
return token.type == TokenKind.EOF or \ return token.type == TokenKind.EOF or \
paren_count == 0 and token.type == TokenKind.RPAR or \ paren_count == 0 and token.type == TokenKind.RPAR or \
token.type == TokenKind.IDENTIFIER and token.value in ("and", "or", "not") token.type == TokenKind.IDENTIFIER and token.value in ("and", "or", "not")
token = self.parser_input.token token = parser_input.token
if token.type == TokenKind.EOF: if token.type == TokenKind.EOF:
return None return None
if token.type == TokenKind.LPAR: if token.type == TokenKind.LPAR:
start = self.parser_input.pos start = parser_input.pos
self.parser_input.next_token() parser_input.next_token()
expr = self.parse_or() expr = self.parse_or(context, parser_input, error_sink)
token = self.parser_input.token token = parser_input.token
if token.type != TokenKind.RPAR: if token.type != TokenKind.RPAR:
self.error_sink.append( error_sink.add_error(
UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, [TokenKind.RPAR])) UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, [TokenKind.RPAR]))
return expr return expr
end = self.parser_input.pos end = parser_input.pos
self.parser_input.next_token() parser_input.next_token()
return ParenthesisNode(start, end, None, expr) return ParenthesisNode(start, end, None, expr)
buffer = []
paren_count = 0 paren_count = 0
last_paren = None last_paren = None
start = self.parser_input.pos start = parser_input.pos
end = parser_input.pos
last_is_whitespace = False
while not stop(): while not stop():
buffer.append(token) last_is_whitespace = token.type == TokenKind.WHITESPACE
end += 1
if token.type == TokenKind.LPAR: if token.type == TokenKind.LPAR:
last_paren = token last_paren = token
paren_count += 1 paren_count += 1
if token.type == TokenKind.RPAR: if token.type == TokenKind.RPAR:
paren_count -= 1 paren_count -= 1
self.parser_input.next_token(False) parser_input.next_token(False)
token = self.parser_input.token token = parser_input.token
if len(buffer) == 0: if last_is_whitespace:
end -= 1
if start == end:
if token.type != TokenKind.RPAR: if token.type != TokenKind.RPAR:
self.error_sink.append(LeftPartNotFoundError()) error_sink.add_error(LeftPartNotFoundError())
return None return None
if paren_count != 0: if paren_count != 0:
self.error_sink.append(ParenthesisMismatchError(last_paren)) error_sink.add_error(ParenthesisMismatchError(last_paren))
return None return None
if buffer[-1].type == TokenKind.WHITESPACE: if self.expr_parser:
buffer.pop() new_parsing_input = ParserInput(
None,
end = start + len(buffer) - 1 tokens=parser_input.tokens,
return NameExprNode(start, end, buffer) length=parser_input.length,
start=start,
end=end - 1,
yield_oef=False).reset()
new_parsing_input.next_token()
return self.expr_parser.parse_input(context, new_parsing_input, error_sink)
else:
return NameExprNode(start, end - 1, parser_input.tokens[start:end])
def compile_conjunctions(self, context, conjunctions, who): def compile_conjunctions(self, context, conjunctions, who):
""" """
+3 -3
View File
@@ -6,7 +6,7 @@ import core.utils
from core.builtin_concepts import BuiltinConcepts from core.builtin_concepts import BuiltinConcepts
from core.sheerka.services.SheerkaExecute import ParserInput from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import TokenKind from core.tokenizer import TokenKind
from parsers.BaseParser import BaseParser, Node, ParsingError from parsers.BaseParser import BaseParser, Node, ParsingError, BaseParserInputParser
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@@ -107,7 +107,7 @@ class PythonGetNamesVisitor(ast.NodeVisitor):
self.names.add(node.id) self.names.add(node.id)
class PythonParser(BaseParser): class PythonParser(BaseParserInputParser):
""" """
Parse Python scripts Parse Python scripts
""" """
@@ -116,7 +116,7 @@ class PythonParser(BaseParser):
def __init__(self, **kwargs): def __init__(self, **kwargs):
BaseParser.__init__(self, PythonParser.NAME, 50) BaseParserInputParser.__init__(self, PythonParser.NAME, 50)
self.source = kwargs.get("source", "<undef>") self.source = kwargs.get("source", "<undef>")
def parse(self, context, parser_input: ParserInput): def parse(self, context, parser_input: ParserInput):
+2 -2
View File
@@ -2,13 +2,13 @@ from core.builtin_concepts import BuiltinConcepts
from core.builtin_helpers import CreateObjectIdentifiers from core.builtin_helpers import CreateObjectIdentifiers
from parsers.BaseNodeParser import ConceptNode, RuleNode, VariableNode from parsers.BaseNodeParser import ConceptNode, RuleNode, VariableNode
from parsers.BaseNodeParser import SourceCodeWithConceptNode from parsers.BaseNodeParser import SourceCodeWithConceptNode
from parsers.BaseParser import BaseParser from parsers.BaseParser import BaseParser, BaseParserInputParser
from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser
unrecognized_nodes_parser = UnrecognizedNodeParser() unrecognized_nodes_parser = UnrecognizedNodeParser()
class PythonWithConceptsParser(BaseParser): class PythonWithConceptsParser(BaseParserInputParser):
def __init__(self, **kwargs): def __init__(self, **kwargs):
super().__init__("PythonWithConcepts", 20) super().__init__("PythonWithConcepts", 20)
+3 -3
View File
@@ -4,12 +4,12 @@ from core.builtin_concepts_ids import BuiltinConcepts
from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute
from core.tokenizer import TokenKind, Token from core.tokenizer import TokenKind, Token
from core.utils import get_text_from_tokens from core.utils import get_text_from_tokens
from parsers.BaseParser import UnexpectedTokenParsingError, BaseExprParser from parsers.BaseParser import UnexpectedTokenParsingError, BaseExprParser, BaseParserInputParser
from parsers.expressions import ComparisonNode, ParenthesisMismatchError, NameExprNode, ComparisonType, VariableNode, \ from parsers.expressions import ComparisonNode, ParenthesisMismatchError, NameExprNode, ComparisonType, VariableNode, \
ParenthesisNode, LeftPartNotFoundError ParenthesisNode, LeftPartNotFoundError
class RelationalOperatorParser(BaseExprParser): class RelationalOperatorParser(BaseParserInputParser):
""" """
Parses xxx (== | > | < | >= | <= | != | in | not in) yyy Parses xxx (== | > | < | >= | <= | != | in | not in) yyy
Nothing else Nothing else
@@ -53,7 +53,7 @@ class RelationalOperatorParser(BaseExprParser):
if isinstance(node, ParenthesisNode): if isinstance(node, ParenthesisNode):
node = node.node node = node.node
value = self.get_return_value_body(context.sheerka, self.parser_input.as_text(), node, node) value = self.get_return_value_body(context.sheerka, self.parser_input.as_text(), node, node, self.error_sink)
ret = self.sheerka.ret( ret = self.sheerka.ret(
self.name, self.name,
+3 -3
View File
@@ -2,7 +2,7 @@ from core.builtin_concepts import BuiltinConcepts
from core.rule import Rule from core.rule import Rule
from core.sheerka.services.SheerkaExecute import ParserInput from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import TokenKind from core.tokenizer import TokenKind
from parsers.BaseParser import BaseParser, ParsingError, UnexpectedTokenParsingError from parsers.BaseParser import BaseParser, ParsingError, UnexpectedTokenParsingError, BaseParserInputParser
class RuleNotFoundError(ParsingError): class RuleNotFoundError(ParsingError):
@@ -14,7 +14,7 @@ class RuleNotFoundError(ParsingError):
return f"RuleNotFoundError(id={self.id}, key={self.key}" return f"RuleNotFoundError(id={self.id}, key={self.key}"
class RuleParser(BaseParser): class RuleParser(BaseParserInputParser):
""" """
Tries to recognize rules Tries to recognize rules
""" """
@@ -22,7 +22,7 @@ class RuleParser(BaseParser):
NAME = "Rule" NAME = "Rule"
def __init__(self, **kwargs): def __init__(self, **kwargs):
BaseParser.__init__(self, RuleParser.NAME, 80) BaseParserInputParser.__init__(self, RuleParser.NAME, 80)
def parse(self, context, parser_input: ParserInput): def parse(self, context, parser_input: ParserInput):
""" """
+2 -2
View File
@@ -4,7 +4,7 @@ import core.utils
from core.builtin_concepts import BuiltinConcepts from core.builtin_concepts import BuiltinConcepts
from core.builtin_helpers import only_successful, get_lexer_nodes, update_compiled from core.builtin_helpers import only_successful, get_lexer_nodes, update_compiled
from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode, SourceCodeWithConceptNode from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode, SourceCodeWithConceptNode
from parsers.BaseParser import BaseParser, ParsingError from parsers.BaseParser import BaseParser, ParsingError, BaseParserInputParser
from parsers.BnfNodeParser import BnfNodeParser from parsers.BnfNodeParser import BnfNodeParser
from parsers.SequenceNodeParser import SequenceNodeParser from parsers.SequenceNodeParser import SequenceNodeParser
from parsers.SyaNodeParser import SyaNodeParser from parsers.SyaNodeParser import SyaNodeParser
@@ -22,7 +22,7 @@ class CannotParseError(ParsingError):
unrecognized: UnrecognizedTokensNode unrecognized: UnrecognizedTokensNode
class UnrecognizedNodeParser(BaseParser): class UnrecognizedNodeParser(BaseParserInputParser):
""" """
This parser comes after the other NodeParsers (Atom, Bnf or Sya) This parser comes after the other NodeParsers (Atom, Bnf or Sya)
It will try to resolve all UnrecognizedTokensNode. It will try to resolve all UnrecognizedTokensNode.
+2 -2
View File
@@ -1,7 +1,7 @@
import pytest import pytest
from core.tokenizer import Tokenizer from core.tokenizer import Tokenizer
from parsers.BaseParser import BaseParser from parsers.BaseParser import BaseParser, BaseParserInputParser
@pytest.mark.parametrize("tokens, expected", [ @pytest.mark.parametrize("tokens, expected", [
@@ -17,4 +17,4 @@ from parsers.BaseParser import BaseParser
(list(Tokenizer(" a ", yield_eof=False)), (1, 1)), (list(Tokenizer(" a ", yield_eof=False)), (1, 1)),
]) ])
def test_i_can_get_tokens_boundaries(tokens, expected): def test_i_can_get_tokens_boundaries(tokens, expected):
assert BaseParser.get_tokens_boundaries(tokens) == expected assert BaseParserInputParser.get_tokens_boundaries(tokens) == expected
+18 -15
View File
@@ -3,10 +3,11 @@ import pytest
from core.builtin_concepts_ids import BuiltinConcepts from core.builtin_concepts_ids import BuiltinConcepts
from core.sheerka.services.SheerkaExecute import ParserInput from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Tokenizer from core.tokenizer import Tokenizer
from parsers.BaseParser import ErrorSink
from parsers.ExpressionParser import ExpressionParser from parsers.ExpressionParser import ExpressionParser
from parsers.expressions import VariableNode from parsers.expressions import VariableNode
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
from tests.parsers.parsers_utils import get_expr_node_from_test_node, VAR, EXPR from tests.parsers.parsers_utils import get_expr_node_from_test_node, VAR, EXPR, AND
class TestExpressionParser(TestUsingMemoryBasedSheerka): class TestExpressionParser(TestUsingMemoryBasedSheerka):
@@ -17,9 +18,10 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
def input_parser_with_source(self, source): def input_parser_with_source(self, source):
sheerka, context, parser = self.init_parser() sheerka, context, parser = self.init_parser()
parser.reset_parser(context, ParserInput(source)) error_sink = ErrorSink()
parser.parser_input.next_token() parser_input = ParserInput(source)
return sheerka, context, parser parser.reset_parser_input(parser_input, error_sink)
return sheerka, context, parser, parser_input, error_sink
def test_i_can_detect_empty_expression(self): def test_i_can_detect_empty_expression(self):
sheerka, context, parser = self.init_parser() sheerka, context, parser = self.init_parser()
@@ -29,17 +31,18 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY) assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY)
@pytest.mark.parametrize("expression, expected", [ @pytest.mark.parametrize("expression, expected", [
("var1 + var 2", EXPR("var1 + var 2")), ("var1 + var2", EXPR("var1 + var2")),
("variable", VAR("variable")), ("variable", VAR("variable")),
("var.attr", VAR("var.attr")), ("var.attr", VAR("var.attr")),
("var1 and var2", AND(VAR("var1"), VAR("var2")))
]) ])
def test_i_can_parse_input(self, expression, expected): def test_i_can_parse_input(self, expression, expected):
sheerka, context, parser = self.input_parser_with_source(expression) sheerka, context, parser, parser_input, error_sink = self.input_parser_with_source(expression)
expected = get_expr_node_from_test_node(expression, expected) expected = get_expr_node_from_test_node(expression, expected)
parsed = parser.parse_input() parsed = parser.parse_input(context, parser_input, error_sink)
assert not parser.has_error assert not error_sink.has_error
assert parsed == expected assert parsed == expected
@pytest.mark.parametrize("expression", [ @pytest.mark.parametrize("expression", [
@@ -47,10 +50,10 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
"var . attr1 . attr2", "var . attr1 . attr2",
]) ])
def test_i_can_parse_variable(self, expression): def test_i_can_parse_variable(self, expression):
sheerka, context, parser = self.input_parser_with_source(expression) sheerka, context, parser, parser_input, error_sink = self.input_parser_with_source(expression)
parsed = parser.parse_input() parsed = parser.parse_input(context, parser_input, error_sink)
assert not parser.has_error assert not error_sink.has_error
assert isinstance(parsed, VariableNode) assert isinstance(parsed, VariableNode)
assert parsed.name == "var" assert parsed.name == "var"
assert parsed.attributes == ["attr1", "attr2"] assert parsed.attributes == ["attr1", "attr2"]
@@ -60,9 +63,9 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
expression = "do not care var1 + var2 do not care either" expression = "do not care var1 + var2 do not care either"
parser_input = ParserInput("text", list(Tokenizer(expression, yield_eof=False)), start=6, end=10) parser_input = ParserInput("text", list(Tokenizer(expression, yield_eof=False)), start=6, end=10)
error_sink = ErrorSink()
parser.reset_parser_input(parser_input, error_sink)
parsed = parser.parse_input(context, parser_input, error_sink)
parser.reset_parser(context, parser_input) assert not error_sink.has_error
parsed = parser.parse_input()
assert not parser.has_error
assert parsed == get_expr_node_from_test_node(expression, EXPR("var1 + var2")) assert parsed == get_expr_node_from_test_node(expression, EXPR("var1 + var2"))