Working on #48 : Added BaseExprParser and BaseNodeParser.py

This commit is contained in:
2021-03-10 21:09:09 +01:00
parent 998ea160be
commit 9c4991923e
18 changed files with 317 additions and 198 deletions
+9 -4
View File
@@ -42,7 +42,7 @@ class ParserInput:
self.yield_oef = yield_oef
self.start = start or 0
if end:
if end is not None:
self.original_end = end # forced index of the last token
self.end = self.original_end # index of the last token => len(tokens) - 1 if full tokens
else:
@@ -115,6 +115,7 @@ class ParserInput:
self.pos += 1
if self.pos > self.end:
self.token = self.tokens[-1]
return False
self.token = self.tokens[self.pos]
@@ -128,7 +129,11 @@ class ParserInput:
return False
self.token = self.tokens[self.pos]
return self.pos <= self.end
if self.pos <= self.end:
return True
else:
self.token = self.tokens[-1]
return False
def the_token_after(self, skip_whitespace=True):
"""
@@ -137,13 +142,13 @@ class ParserInput:
"""
my_pos = self.pos + 1
if my_pos > self.end:
return Token(TokenKind.EOF, "", -1, -1, -1)
return self.tokens[-1]
if skip_whitespace:
while self.tokens[my_pos].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
my_pos += 1
if my_pos > self.end:
return Token(TokenKind.EOF, "", -1, -1, -1)
return self.tokens[-1]
return self.tokens[my_pos]
+3 -2
View File
@@ -2,7 +2,8 @@ from dataclasses import dataclass, field
import core.utils
from core.tokenizer import Keywords, TokenKind, Tokenizer
from parsers.BaseParser import BaseParser, Node, ParsingError, UnexpectedEofParsingError, UnexpectedTokenParsingError
from parsers.BaseParser import Node, ParsingError, UnexpectedEofParsingError, UnexpectedTokenParsingError, \
BaseParserInputParser
@dataclass()
@@ -94,7 +95,7 @@ class NameNode(CustomGrammarParserNode):
return hash(self.get_name())
class BaseCustomGrammarParser(BaseParser):
class BaseCustomGrammarParser(BaseParserInputParser):
"""
Base class for sheerka specific grammars
"""
+2 -2
View File
@@ -4,7 +4,7 @@ from enum import Enum
import core.utils
from core.tokenizer import TokenKind, Token
from core.var_ref import VariableRef
from parsers.BaseParser import Node, BaseParser, ParsingError
from parsers.BaseParser import Node, ParsingError, BaseParserInputParser
DEBUG_COMPILED = True
@@ -461,7 +461,7 @@ class SyaAssociativity(Enum):
return self.value
class BaseNodeParser(BaseParser):
class BaseNodeParser(BaseParserInputParser):
"""
Parser that return LexerNode
"""
+95 -58
View File
@@ -62,11 +62,6 @@ class BaseParser:
self.short_name = name
self.priority = priority
self.enabled = enabled
self.error_sink = []
self.context: ExecutionContext = None
self.sheerka = None
self.parser_input: ParserInput = None
self.yield_eof = yield_eof
def __eq__(self, other):
@@ -80,6 +75,74 @@ class BaseParser:
def __repr__(self):
return self.name
def log_result(self, context, source, ret):
pass
# if not self.log.isEnabledFor(logging.DEBUG):
# return
#
# if ret.status:
# value = context.return_value_to_str(ret)
# context.log(f"Recognized '{source}' as {value}", self.name)
# else:
# context.log(f"Failed to recognize '{source}'", self.name)
def log_multiple_results(self, context, source, list_of_ret):
pass
# if not self.log.isEnabledFor(logging.DEBUG):
# return
#
# context.log(f"Recognized '{source}' as multiple concepts", self.name)
# for r in list_of_ret:
# value = context.return_value_to_str(r)
# context.log(f" Recognized '{value}'", self.name)
def get_return_value_body(self, sheerka, source, parsed, try_parse, errors):
"""
All parsers must return their result in a standard way
:param sheerka:
:param source:
:param parsed:
:param try_parse:
:param errors:
:return:
"""
if len(errors) == 1 and isinstance(errors[0], Concept):
return errors[0]
if len(errors):
if parsed is None:
return sheerka.new(BuiltinConcepts.NOT_FOR_ME,
body=source,
reason=errors)
else:
return sheerka.new(BuiltinConcepts.ERROR,
body=errors)
return sheerka.new(BuiltinConcepts.PARSER_RESULT,
parser=self,
source=source,
body=parsed,
try_parsed=try_parse)
@staticmethod
def get_name(name):
return BaseParser.PREFIX + name
class BaseParserInputParser(BaseParser):
"""
Base parser for stateful parser where context, parser input, and error sink are part of the class
"""
def __init__(self, name, priority: int, enabled=True, yield_eof=False):
super(BaseParserInputParser, self).__init__(name, priority, enabled, yield_eof)
self.error_sink = []
self.context: ExecutionContext = None
self.sheerka = None
self.parser_input: ParserInput = None
self.yield_eof = yield_eof
def reset_parser(self, context, parser_input: ParserInput):
self.context = context
self.sheerka = context.sheerka
@@ -106,54 +169,6 @@ class BaseParser:
def has_error(self):
return len(self.error_sink) > 0
def log_result(self, context, source, ret):
pass
# if not self.log.isEnabledFor(logging.DEBUG):
# return
#
# if ret.status:
# value = context.return_value_to_str(ret)
# context.log(f"Recognized '{source}' as {value}", self.name)
# else:
# context.log(f"Failed to recognize '{source}'", self.name)
def log_multiple_results(self, context, source, list_of_ret):
pass
# if not self.log.isEnabledFor(logging.DEBUG):
# return
#
# context.log(f"Recognized '{source}' as multiple concepts", self.name)
# for r in list_of_ret:
# value = context.return_value_to_str(r)
# context.log(f" Recognized '{value}'", self.name)
def get_return_value_body(self, sheerka, source, parsed, try_parse):
"""
All parsers must return their result in a standard way
:param sheerka:
:param source:
:param parsed:
:param try_parse:
:return:
"""
if len(self.error_sink) == 1 and isinstance(self.error_sink[0], Concept):
return self.error_sink[0]
if self.has_error:
if parsed is None:
return sheerka.new(BuiltinConcepts.NOT_FOR_ME,
body=source,
reason=self.error_sink)
else:
return sheerka.new(BuiltinConcepts.ERROR,
body=self.error_sink)
return sheerka.new(BuiltinConcepts.PARSER_RESULT,
parser=self,
source=source,
body=parsed,
try_parsed=try_parse)
@staticmethod
def get_input_as_lexer_nodes(parser_input, expected_parser=None):
"""
@@ -229,12 +244,34 @@ class BaseParser:
return list_a
@staticmethod
def get_name(name):
return BaseParser.PREFIX + name
class BaseExprParser(BaseParser):
def parse_input(self):
def parse_input(self, context, parser_input, error_sink):
raise NotImplementedError
def reset_parser_input(self, parser_input: ParserInput, error_sink):
try:
error_sink.clear()
parser_input.reset(self.yield_eof)
except LexerError as e:
error_sink.add_error(e)
return False
parser_input.next_token()
return True
class ErrorSink:
def __init__(self):
self.sink = []
def add_error(self, error):
self.sink.append(error)
def clear(self):
self.sink.clear()
@property
def has_error(self):
return len(self.sink) > 0
+6 -1
View File
@@ -33,6 +33,7 @@ class BnfDefinitionParser(BaseParser):
self.context = None
self.source = ""
self.sheerka = None
self.error_sink = []
def __eq__(self, other):
if not isinstance(other, BnfDefinitionParser):
@@ -60,6 +61,10 @@ class BnfDefinitionParser(BaseParser):
self.next_token()
return error
@property
def has_error(self):
return len(self.error_sink) > 0
def get_token(self) -> Token:
return self._current
@@ -123,7 +128,7 @@ class BnfDefinitionParser(BaseParser):
False,
context.sheerka.new(BuiltinConcepts.ERROR, body=[e]))
value = self.get_return_value_body(context.sheerka, self.source, tree, tree)
value = self.get_return_value_body(context.sheerka, self.source, tree, tree, self.error_sink)
ret = self.sheerka.ret(
self.name,
+1 -1
View File
@@ -85,7 +85,7 @@ class DefConceptParser(BaseCustomGrammarParser):
self.parser_input.next_token()
node = self.parse_def_concept()
body = self.get_return_value_body(sheerka, parser_input.as_text(), node, node)
body = self.get_return_value_body(sheerka, parser_input.as_text(), node, node, self.error_sink)
ret = sheerka.ret(self.name, not self.has_error, body)
self.log_result(context, parser_input.as_text(), ret)
+1 -1
View File
@@ -71,7 +71,7 @@ class DefRuleParser(BaseCustomGrammarParser):
self.parser_input.next_token()
node = self.parse_def_rule()
body = self.get_return_value_body(sheerka, parser_input.as_text(), node, node)
body = self.get_return_value_body(sheerka, parser_input.as_text(), node, node, self.error_sink)
ret = sheerka.ret(self.name, not self.has_error, body)
self.log_result(context, parser_input.as_text(), ret)
+3 -3
View File
@@ -4,10 +4,10 @@ from core.concept import VARIABLE_PREFIX
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import TokenKind
from core.utils import str_concept
from parsers.BaseParser import BaseParser
from parsers.BaseParser import BaseParser, BaseParserInputParser
class ExactConceptParser(BaseParser):
class ExactConceptParser(BaseParserInputParser):
"""
Tries to recognize a single concept
"""
@@ -15,7 +15,7 @@ class ExactConceptParser(BaseParser):
MAX_WORDS_SIZE = 6
def __init__(self, max_word_size=None, **kwargs):
BaseParser.__init__(self, "ExactConcept", 80)
BaseParserInputParser.__init__(self, "ExactConcept", 80)
self.max_word_size = max_word_size
def parse(self, context, parser_input: ParserInput):
+77 -28
View File
@@ -2,7 +2,7 @@ from core.builtin_concepts_ids import BuiltinConcepts
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import TokenKind
from core.utils import get_text_from_tokens
from parsers.BaseParser import BaseExprParser
from parsers.BaseParser import BaseExprParser, ErrorSink
from parsers.FunctionParser import FunctionParser
from parsers.LogicalOperatorParser import LogicalOperatorParser
from parsers.RelationalOperatorParser import RelationalOperatorParser
@@ -18,9 +18,10 @@ class ExpressionParser(BaseExprParser):
def __init__(self, **kwargs):
super().__init__(ExpressionParser.NAME, 60, False, yield_eof=False)
self.logical_parser = LogicalOperatorParser()
self.relational_parser = RelationalOperatorParser()
self.variable_parser = VariableOrNamesParser()
self.function_parser = FunctionParser()
self.relational_parser = RelationalOperatorParser()
self.logical_parser = LogicalOperatorParser(expr_parser=self.variable_parser)
def parse(self, context, parser_input: ParserInput):
"""
@@ -40,61 +41,109 @@ class ExpressionParser(BaseExprParser):
False,
sheerka.new(BuiltinConcepts.IS_EMPTY))
if not self.reset_parser(context, parser_input):
return self.sheerka.ret(
error_sink = ErrorSink()
if not self.reset_parser_input(parser_input, error_sink):
return context.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
context.sheerka.new(BuiltinConcepts.ERROR, body=error_sink.sink))
self.parser_input.next_token()
node = self.parse_input()
node = self.parse_input(context, parser_input, error_sink)
if isinstance(node, ParenthesisNode):
node = node.node
value = self.get_return_value_body(context.sheerka, self.parser_input.as_text(), node, node)
value = self.get_return_value_body(context.sheerka, parser_input.as_text(), node, node, error_sink.sink)
ret = self.sheerka.ret(
ret = context.sheerka.ret(
self.name,
not self.has_error,
not error_sink.has_error,
value)
return ret
def parse_input(self):
pos = self.parser_input.pos
for parser in []: # [self.logical_parser, self.relational_parser, self.function_parser]:
self.parser_input.seek(pos) # reset position
if parser.reset_parser(self.context, self.parser_input):
res = parser.parse_input()
if res and not parser.has_error:
return res
def parse_input(self, context, parser_input, error_sink):
pos = parser_input.pos
for parser in [self.logical_parser,
self.variable_parser]: # [self.logical_parser, self.relational_parser, self.function_parser]:
parser_input.seek(pos) # reset position
res = parser.parse_input(context, parser_input, error_sink)
if res and not error_sink.has_error:
return res
return None
class VariableOrNamesParser(BaseExprParser):
NAME = "VariableOrNames"
def __init__(self, **kwargs):
super().__init__(VariableOrNamesParser.NAME, 60, False, yield_eof=False)
def parse(self, context, parser_input: ParserInput):
"""
:param context:
:param parser_input:
:return:
"""
if not isinstance(parser_input, ParserInput):
return None
context.log(f"Parsing '{parser_input}' with {self.NAME}Parser", self.name)
sheerka = context.sheerka
if parser_input.is_empty():
return context.sheerka.ret(self.name,
False,
sheerka.new(BuiltinConcepts.IS_EMPTY))
error_sink = ErrorSink()
if not self.reset_parser_input(parser_input, error_sink):
return context.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.ERROR, body=error_sink.sink))
node = self.parse_input(context, parser_input, error_sink)
if isinstance(node, ParenthesisNode):
node = node.node
value = self.get_return_value_body(context.sheerka, parser_input.as_text(), node, node, error_sink.sink)
ret = context.sheerka.ret(
self.name,
not error_sink.has_error,
value)
return ret
def parse_input(self, context, parser_input, error_sink):
# try to recognize a VariableNode
dots_found = []
for i, token in enumerate(self.parser_input.as_tokens()):
pos = parser_input.pos
for i, token in enumerate(parser_input.as_tokens()):
if token.type == TokenKind.DOT:
dots_found.append(i)
continue
if not (token.type == TokenKind.WHITESPACE or
token.type == TokenKind.IDENTIFIER and token.value.isidentifier()):
return NameExprNode(self.parser_input.start, self.parser_input.end, self.parser_input.as_tokens())
return NameExprNode(parser_input.start, parser_input.end, parser_input.as_tokens())
if len(dots_found) == 0:
return VariableNode(pos, self.parser_input.end, self.parser_input.as_tokens(), self.parser_input.as_text())
return VariableNode(pos, parser_input.end, parser_input.as_tokens(), parser_input.as_text())
parts = []
current_dot_pos = pos
for dot_found in dots_found:
parts.append(get_text_from_tokens(self.parser_input.tokens[current_dot_pos: dot_found]))
parts.append(get_text_from_tokens(parser_input.tokens[current_dot_pos: dot_found]))
current_dot_pos = dot_found + 1
# do not forget the trailing part
parts.append(get_text_from_tokens(self.parser_input.tokens[current_dot_pos: self.parser_input.end + 1]))
parts.append(get_text_from_tokens(parser_input.tokens[current_dot_pos: parser_input.end + 1]))
return VariableNode(self.parser_input.start,
self.parser_input.end,
self.parser_input.as_tokens(),
return VariableNode(parser_input.start,
parser_input.end,
parser_input.as_tokens(),
parts[0],
*parts[1:])
+3 -2
View File
@@ -7,7 +7,8 @@ from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import TokenKind
from core.utils import get_n_clones
from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode
from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError, Node, BaseExprParser
from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError, Node, BaseExprParser, \
BaseParserInputParser
from parsers.BnfNodeParser import BnfNodeParser
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
from parsers.RuleParser import RuleParser
@@ -26,7 +27,7 @@ class FunctionParserNode(Node):
pass
class FunctionParser(BaseExprParser):
class FunctionParser(BaseParserInputParser):
"""
The parser will be used to parse func(x, y, z)
where x, y and z can be source code, concepts or other functions
+84 -66
View File
@@ -7,7 +7,7 @@ from core.sheerka.services.sheerka_service import FailedToCompileError
from core.tokenizer import TokenKind, Tokenizer, Keywords
from core.utils import get_text_from_tokens
from parsers.BaseNodeParser import UnrecognizedTokensNode
from parsers.BaseParser import BaseParser, UnexpectedTokenParsingError, UnexpectedEofParsingError, BaseExprParser
from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError, BaseExprParser, ErrorSink
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
from parsers.expressions import ParenthesisNode, OrNode, AndNode, NotNode, LeftPartNotFoundError, \
ParenthesisMismatchError, NameExprNode, ExprNode, VariableNode, ComparisonNode
@@ -76,6 +76,7 @@ class LogicalOperatorParser(BaseExprParser):
self.and_tokens = list(Tokenizer(" and ", yield_eof=False))
self.and_not_tokens = list(Tokenizer(" and not ", yield_eof=False))
self.not_tokens = list(Tokenizer("not ", yield_eof=False))
self.expr_parser = kwargs.get("expr_parser", None)
@staticmethod
def clean_parenthesis_nodes(nodes):
@@ -101,144 +102,161 @@ class LogicalOperatorParser(BaseExprParser):
False,
sheerka.new(BuiltinConcepts.IS_EMPTY))
if not self.reset_parser(context, parser_input):
return self.sheerka.ret(
error_sink = ErrorSink()
if not self.reset_parser_input(parser_input, error_sink):
return context.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
context.sheerka.new(BuiltinConcepts.ERROR, body=error_sink.sink))
self.parser_input.next_token()
tree = self.parse_input()
token = self.parser_input.token
tree = self.parse_input(context, parser_input, error_sink)
token = parser_input.token
if token and token.type != TokenKind.EOF:
self.add_error(UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, []))
error_sink.add_error(UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, []))
if isinstance(tree, ParenthesisNode):
tree = tree.node
value = self.get_return_value_body(context.sheerka, self.parser_input.as_text(), tree, tree)
ret = self.sheerka.ret(
self.name,
not self.has_error,
value)
value = self.get_return_value_body(context.sheerka,
parser_input.as_text(),
tree,
tree,
error_sink.sink)
ret = context.sheerka.ret(self.name,
not error_sink.has_error,
value)
return ret
def parse_input(self):
return self.parse_or()
def parse_input(self, context, parser_input, error_sink):
return self.parse_or(context, parser_input, error_sink)
def parse_or(self):
start = self.parser_input.pos
expr = self.parse_and()
token = self.parser_input.token
def parse_or(self, context, parser_input, error_sink):
start = parser_input.pos
expr = self.parse_and(context, parser_input, error_sink)
token = parser_input.token
if token.type != TokenKind.IDENTIFIER or token.value != "or":
return expr
parts = [expr]
while token.type == TokenKind.IDENTIFIER and token.value == "or":
self.parser_input.next_token()
expr = self.parse_and()
parser_input.next_token()
expr = self.parse_and(context, parser_input, error_sink)
if expr is None:
self.add_error(UnexpectedEofParsingError("When parsing 'or'"))
end = self.parser_input.pos
error_sink.add_error(UnexpectedEofParsingError("When parsing 'or'"))
end = parser_input.pos
self.clean_parenthesis_nodes(parts)
return OrNode(start, end, self.parser_input.tokens[start: end + 1], *parts)
return OrNode(start, end, parser_input.tokens[start: end + 1], *parts)
parts.append(expr)
token = self.parser_input.token
token = parser_input.token
end = parts[-1].end
self.clean_parenthesis_nodes(parts)
return OrNode(start, end, self.parser_input.tokens[start: end + 1], *parts)
return OrNode(start, end, parser_input.tokens[start: end + 1], *parts)
def parse_and(self):
start = self.parser_input.pos
expr = self.parse_not()
token = self.parser_input.token
def parse_and(self, context, parser_input, error_sink):
start = parser_input.pos
expr = self.parse_not(context, parser_input, error_sink)
token = parser_input.token
if token.type != TokenKind.IDENTIFIER or token.value != "and":
return expr
parts = [expr]
while token.type == TokenKind.IDENTIFIER and token.value == "and":
self.parser_input.next_token()
expr = self.parse_not()
parser_input.next_token()
expr = self.parse_not(context, parser_input, error_sink)
if expr is None:
self.add_error(UnexpectedEofParsingError("When parsing 'and'"))
end = self.parser_input.pos
error_sink.add_error(UnexpectedEofParsingError("When parsing 'and'"))
end = parser_input.pos
self.clean_parenthesis_nodes(parts)
return AndNode(start, end, self.parser_input.tokens[start: end + 1], *parts)
return AndNode(start, end, parser_input.tokens[start: end + 1], *parts)
parts.append(expr)
token = self.parser_input.token
token = parser_input.token
end = parts[-1].end
self.clean_parenthesis_nodes(parts)
return AndNode(start, end, self.parser_input.tokens[start: end + 1], *parts)
return AndNode(start, end, parser_input.tokens[start: end + 1], *parts)
def parse_not(self):
token = self.parser_input.token
start = self.parser_input.pos
def parse_not(self, context, parser_input, error_sink):
token = parser_input.token
start = parser_input.pos
if token.type == TokenKind.IDENTIFIER and token.value == "not":
self.parser_input.next_token()
parsed = self.parse_not()
parser_input.next_token()
parsed = self.parse_not(context, parser_input, error_sink)
node = parsed.node if isinstance(parsed, ParenthesisNode) else parsed
return NotNode(start,
parsed.end,
self.parser_input.tokens[start: parsed.end + 1],
parser_input.tokens[start: parsed.end + 1],
node)
else:
return self.parse_names()
return self.parse_names(context, parser_input, error_sink)
def parse_names(self):
def parse_names(self, context, parser_input, error_sink):
def stop():
return token.type == TokenKind.EOF or \
paren_count == 0 and token.type == TokenKind.RPAR or \
token.type == TokenKind.IDENTIFIER and token.value in ("and", "or", "not")
token = self.parser_input.token
token = parser_input.token
if token.type == TokenKind.EOF:
return None
if token.type == TokenKind.LPAR:
start = self.parser_input.pos
self.parser_input.next_token()
expr = self.parse_or()
token = self.parser_input.token
start = parser_input.pos
parser_input.next_token()
expr = self.parse_or(context, parser_input, error_sink)
token = parser_input.token
if token.type != TokenKind.RPAR:
self.error_sink.append(
error_sink.add_error(
UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, [TokenKind.RPAR]))
return expr
end = self.parser_input.pos
self.parser_input.next_token()
end = parser_input.pos
parser_input.next_token()
return ParenthesisNode(start, end, None, expr)
buffer = []
paren_count = 0
last_paren = None
start = self.parser_input.pos
start = parser_input.pos
end = parser_input.pos
last_is_whitespace = False
while not stop():
buffer.append(token)
last_is_whitespace = token.type == TokenKind.WHITESPACE
end += 1
if token.type == TokenKind.LPAR:
last_paren = token
paren_count += 1
if token.type == TokenKind.RPAR:
paren_count -= 1
self.parser_input.next_token(False)
token = self.parser_input.token
parser_input.next_token(False)
token = parser_input.token
if len(buffer) == 0:
if last_is_whitespace:
end -= 1
if start == end:
if token.type != TokenKind.RPAR:
self.error_sink.append(LeftPartNotFoundError())
error_sink.add_error(LeftPartNotFoundError())
return None
if paren_count != 0:
self.error_sink.append(ParenthesisMismatchError(last_paren))
error_sink.add_error(ParenthesisMismatchError(last_paren))
return None
if buffer[-1].type == TokenKind.WHITESPACE:
buffer.pop()
end = start + len(buffer) - 1
return NameExprNode(start, end, buffer)
if self.expr_parser:
new_parsing_input = ParserInput(
None,
tokens=parser_input.tokens,
length=parser_input.length,
start=start,
end=end - 1,
yield_oef=False).reset()
new_parsing_input.next_token()
return self.expr_parser.parse_input(context, new_parsing_input, error_sink)
else:
return NameExprNode(start, end - 1, parser_input.tokens[start:end])
def compile_conjunctions(self, context, conjunctions, who):
"""
+3 -3
View File
@@ -6,7 +6,7 @@ import core.utils
from core.builtin_concepts import BuiltinConcepts
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import TokenKind
from parsers.BaseParser import BaseParser, Node, ParsingError
from parsers.BaseParser import BaseParser, Node, ParsingError, BaseParserInputParser
log = logging.getLogger(__name__)
@@ -107,7 +107,7 @@ class PythonGetNamesVisitor(ast.NodeVisitor):
self.names.add(node.id)
class PythonParser(BaseParser):
class PythonParser(BaseParserInputParser):
"""
Parse Python scripts
"""
@@ -116,7 +116,7 @@ class PythonParser(BaseParser):
def __init__(self, **kwargs):
BaseParser.__init__(self, PythonParser.NAME, 50)
BaseParserInputParser.__init__(self, PythonParser.NAME, 50)
self.source = kwargs.get("source", "<undef>")
def parse(self, context, parser_input: ParserInput):
+2 -2
View File
@@ -2,13 +2,13 @@ from core.builtin_concepts import BuiltinConcepts
from core.builtin_helpers import CreateObjectIdentifiers
from parsers.BaseNodeParser import ConceptNode, RuleNode, VariableNode
from parsers.BaseNodeParser import SourceCodeWithConceptNode
from parsers.BaseParser import BaseParser
from parsers.BaseParser import BaseParser, BaseParserInputParser
from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser
unrecognized_nodes_parser = UnrecognizedNodeParser()
class PythonWithConceptsParser(BaseParser):
class PythonWithConceptsParser(BaseParserInputParser):
def __init__(self, **kwargs):
super().__init__("PythonWithConcepts", 20)
+3 -3
View File
@@ -4,12 +4,12 @@ from core.builtin_concepts_ids import BuiltinConcepts
from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute
from core.tokenizer import TokenKind, Token
from core.utils import get_text_from_tokens
from parsers.BaseParser import UnexpectedTokenParsingError, BaseExprParser
from parsers.BaseParser import UnexpectedTokenParsingError, BaseExprParser, BaseParserInputParser
from parsers.expressions import ComparisonNode, ParenthesisMismatchError, NameExprNode, ComparisonType, VariableNode, \
ParenthesisNode, LeftPartNotFoundError
class RelationalOperatorParser(BaseExprParser):
class RelationalOperatorParser(BaseParserInputParser):
"""
Parses xxx (== | > | < | >= | <= | != | in | not in) yyy
Nothing else
@@ -53,7 +53,7 @@ class RelationalOperatorParser(BaseExprParser):
if isinstance(node, ParenthesisNode):
node = node.node
value = self.get_return_value_body(context.sheerka, self.parser_input.as_text(), node, node)
value = self.get_return_value_body(context.sheerka, self.parser_input.as_text(), node, node, self.error_sink)
ret = self.sheerka.ret(
self.name,
+3 -3
View File
@@ -2,7 +2,7 @@ from core.builtin_concepts import BuiltinConcepts
from core.rule import Rule
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import TokenKind
from parsers.BaseParser import BaseParser, ParsingError, UnexpectedTokenParsingError
from parsers.BaseParser import BaseParser, ParsingError, UnexpectedTokenParsingError, BaseParserInputParser
class RuleNotFoundError(ParsingError):
@@ -14,7 +14,7 @@ class RuleNotFoundError(ParsingError):
return f"RuleNotFoundError(id={self.id}, key={self.key}"
class RuleParser(BaseParser):
class RuleParser(BaseParserInputParser):
"""
Tries to recognize rules
"""
@@ -22,7 +22,7 @@ class RuleParser(BaseParser):
NAME = "Rule"
def __init__(self, **kwargs):
BaseParser.__init__(self, RuleParser.NAME, 80)
BaseParserInputParser.__init__(self, RuleParser.NAME, 80)
def parse(self, context, parser_input: ParserInput):
"""
+2 -2
View File
@@ -4,7 +4,7 @@ import core.utils
from core.builtin_concepts import BuiltinConcepts
from core.builtin_helpers import only_successful, get_lexer_nodes, update_compiled
from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode, SourceCodeWithConceptNode
from parsers.BaseParser import BaseParser, ParsingError
from parsers.BaseParser import BaseParser, ParsingError, BaseParserInputParser
from parsers.BnfNodeParser import BnfNodeParser
from parsers.SequenceNodeParser import SequenceNodeParser
from parsers.SyaNodeParser import SyaNodeParser
@@ -22,7 +22,7 @@ class CannotParseError(ParsingError):
unrecognized: UnrecognizedTokensNode
class UnrecognizedNodeParser(BaseParser):
class UnrecognizedNodeParser(BaseParserInputParser):
"""
This parser comes after the other NodeParsers (Atom, Bnf or Sya)
It will try to resolve all UnrecognizedTokensNode.
+2 -2
View File
@@ -1,7 +1,7 @@
import pytest
from core.tokenizer import Tokenizer
from parsers.BaseParser import BaseParser
from parsers.BaseParser import BaseParser, BaseParserInputParser
@pytest.mark.parametrize("tokens, expected", [
@@ -17,4 +17,4 @@ from parsers.BaseParser import BaseParser
(list(Tokenizer(" a ", yield_eof=False)), (1, 1)),
])
def test_i_can_get_tokens_boundaries(tokens, expected):
assert BaseParser.get_tokens_boundaries(tokens) == expected
assert BaseParserInputParser.get_tokens_boundaries(tokens) == expected
+18 -15
View File
@@ -3,10 +3,11 @@ import pytest
from core.builtin_concepts_ids import BuiltinConcepts
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Tokenizer
from parsers.BaseParser import ErrorSink
from parsers.ExpressionParser import ExpressionParser
from parsers.expressions import VariableNode
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
from tests.parsers.parsers_utils import get_expr_node_from_test_node, VAR, EXPR
from tests.parsers.parsers_utils import get_expr_node_from_test_node, VAR, EXPR, AND
class TestExpressionParser(TestUsingMemoryBasedSheerka):
@@ -17,9 +18,10 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
def input_parser_with_source(self, source):
sheerka, context, parser = self.init_parser()
parser.reset_parser(context, ParserInput(source))
parser.parser_input.next_token()
return sheerka, context, parser
error_sink = ErrorSink()
parser_input = ParserInput(source)
parser.reset_parser_input(parser_input, error_sink)
return sheerka, context, parser, parser_input, error_sink
def test_i_can_detect_empty_expression(self):
sheerka, context, parser = self.init_parser()
@@ -29,17 +31,18 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY)
@pytest.mark.parametrize("expression, expected", [
("var1 + var 2", EXPR("var1 + var 2")),
("var1 + var2", EXPR("var1 + var2")),
("variable", VAR("variable")),
("var.attr", VAR("var.attr")),
("var1 and var2", AND(VAR("var1"), VAR("var2")))
])
def test_i_can_parse_input(self, expression, expected):
sheerka, context, parser = self.input_parser_with_source(expression)
sheerka, context, parser, parser_input, error_sink = self.input_parser_with_source(expression)
expected = get_expr_node_from_test_node(expression, expected)
parsed = parser.parse_input()
parsed = parser.parse_input(context, parser_input, error_sink)
assert not parser.has_error
assert not error_sink.has_error
assert parsed == expected
@pytest.mark.parametrize("expression", [
@@ -47,10 +50,10 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
"var . attr1 . attr2",
])
def test_i_can_parse_variable(self, expression):
sheerka, context, parser = self.input_parser_with_source(expression)
parsed = parser.parse_input()
sheerka, context, parser, parser_input, error_sink = self.input_parser_with_source(expression)
parsed = parser.parse_input(context, parser_input, error_sink)
assert not parser.has_error
assert not error_sink.has_error
assert isinstance(parsed, VariableNode)
assert parsed.name == "var"
assert parsed.attributes == ["attr1", "attr2"]
@@ -60,9 +63,9 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
expression = "do not care var1 + var2 do not care either"
parser_input = ParserInput("text", list(Tokenizer(expression, yield_eof=False)), start=6, end=10)
error_sink = ErrorSink()
parser.reset_parser_input(parser_input, error_sink)
parsed = parser.parse_input(context, parser_input, error_sink)
parser.reset_parser(context, parser_input)
parsed = parser.parse_input()
assert not parser.has_error
assert not error_sink.has_error
assert parsed == get_expr_node_from_test_node(expression, EXPR("var1 + var2"))