Working on #48 : Added BaseExprParser and BaseNodeParser.py
This commit is contained in:
@@ -42,7 +42,7 @@ class ParserInput:
|
|||||||
self.yield_oef = yield_oef
|
self.yield_oef = yield_oef
|
||||||
|
|
||||||
self.start = start or 0
|
self.start = start or 0
|
||||||
if end:
|
if end is not None:
|
||||||
self.original_end = end # forced index of the last token
|
self.original_end = end # forced index of the last token
|
||||||
self.end = self.original_end # index of the last token => len(tokens) - 1 if full tokens
|
self.end = self.original_end # index of the last token => len(tokens) - 1 if full tokens
|
||||||
else:
|
else:
|
||||||
@@ -115,6 +115,7 @@ class ParserInput:
|
|||||||
self.pos += 1
|
self.pos += 1
|
||||||
|
|
||||||
if self.pos > self.end:
|
if self.pos > self.end:
|
||||||
|
self.token = self.tokens[-1]
|
||||||
return False
|
return False
|
||||||
|
|
||||||
self.token = self.tokens[self.pos]
|
self.token = self.tokens[self.pos]
|
||||||
@@ -128,7 +129,11 @@ class ParserInput:
|
|||||||
return False
|
return False
|
||||||
self.token = self.tokens[self.pos]
|
self.token = self.tokens[self.pos]
|
||||||
|
|
||||||
return self.pos <= self.end
|
if self.pos <= self.end:
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
self.token = self.tokens[-1]
|
||||||
|
return False
|
||||||
|
|
||||||
def the_token_after(self, skip_whitespace=True):
|
def the_token_after(self, skip_whitespace=True):
|
||||||
"""
|
"""
|
||||||
@@ -137,13 +142,13 @@ class ParserInput:
|
|||||||
"""
|
"""
|
||||||
my_pos = self.pos + 1
|
my_pos = self.pos + 1
|
||||||
if my_pos > self.end:
|
if my_pos > self.end:
|
||||||
return Token(TokenKind.EOF, "", -1, -1, -1)
|
return self.tokens[-1]
|
||||||
|
|
||||||
if skip_whitespace:
|
if skip_whitespace:
|
||||||
while self.tokens[my_pos].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
|
while self.tokens[my_pos].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
|
||||||
my_pos += 1
|
my_pos += 1
|
||||||
if my_pos > self.end:
|
if my_pos > self.end:
|
||||||
return Token(TokenKind.EOF, "", -1, -1, -1)
|
return self.tokens[-1]
|
||||||
|
|
||||||
return self.tokens[my_pos]
|
return self.tokens[my_pos]
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,8 @@ from dataclasses import dataclass, field
|
|||||||
|
|
||||||
import core.utils
|
import core.utils
|
||||||
from core.tokenizer import Keywords, TokenKind, Tokenizer
|
from core.tokenizer import Keywords, TokenKind, Tokenizer
|
||||||
from parsers.BaseParser import BaseParser, Node, ParsingError, UnexpectedEofParsingError, UnexpectedTokenParsingError
|
from parsers.BaseParser import Node, ParsingError, UnexpectedEofParsingError, UnexpectedTokenParsingError, \
|
||||||
|
BaseParserInputParser
|
||||||
|
|
||||||
|
|
||||||
@dataclass()
|
@dataclass()
|
||||||
@@ -94,7 +95,7 @@ class NameNode(CustomGrammarParserNode):
|
|||||||
return hash(self.get_name())
|
return hash(self.get_name())
|
||||||
|
|
||||||
|
|
||||||
class BaseCustomGrammarParser(BaseParser):
|
class BaseCustomGrammarParser(BaseParserInputParser):
|
||||||
"""
|
"""
|
||||||
Base class for sheerka specific grammars
|
Base class for sheerka specific grammars
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ from enum import Enum
|
|||||||
import core.utils
|
import core.utils
|
||||||
from core.tokenizer import TokenKind, Token
|
from core.tokenizer import TokenKind, Token
|
||||||
from core.var_ref import VariableRef
|
from core.var_ref import VariableRef
|
||||||
from parsers.BaseParser import Node, BaseParser, ParsingError
|
from parsers.BaseParser import Node, ParsingError, BaseParserInputParser
|
||||||
|
|
||||||
DEBUG_COMPILED = True
|
DEBUG_COMPILED = True
|
||||||
|
|
||||||
@@ -461,7 +461,7 @@ class SyaAssociativity(Enum):
|
|||||||
return self.value
|
return self.value
|
||||||
|
|
||||||
|
|
||||||
class BaseNodeParser(BaseParser):
|
class BaseNodeParser(BaseParserInputParser):
|
||||||
"""
|
"""
|
||||||
Parser that return LexerNode
|
Parser that return LexerNode
|
||||||
"""
|
"""
|
||||||
|
|||||||
+95
-58
@@ -62,11 +62,6 @@ class BaseParser:
|
|||||||
self.short_name = name
|
self.short_name = name
|
||||||
self.priority = priority
|
self.priority = priority
|
||||||
self.enabled = enabled
|
self.enabled = enabled
|
||||||
|
|
||||||
self.error_sink = []
|
|
||||||
self.context: ExecutionContext = None
|
|
||||||
self.sheerka = None
|
|
||||||
self.parser_input: ParserInput = None
|
|
||||||
self.yield_eof = yield_eof
|
self.yield_eof = yield_eof
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
@@ -80,6 +75,74 @@ class BaseParser:
|
|||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return self.name
|
return self.name
|
||||||
|
|
||||||
|
def log_result(self, context, source, ret):
|
||||||
|
pass
|
||||||
|
# if not self.log.isEnabledFor(logging.DEBUG):
|
||||||
|
# return
|
||||||
|
#
|
||||||
|
# if ret.status:
|
||||||
|
# value = context.return_value_to_str(ret)
|
||||||
|
# context.log(f"Recognized '{source}' as {value}", self.name)
|
||||||
|
# else:
|
||||||
|
# context.log(f"Failed to recognize '{source}'", self.name)
|
||||||
|
|
||||||
|
def log_multiple_results(self, context, source, list_of_ret):
|
||||||
|
pass
|
||||||
|
# if not self.log.isEnabledFor(logging.DEBUG):
|
||||||
|
# return
|
||||||
|
#
|
||||||
|
# context.log(f"Recognized '{source}' as multiple concepts", self.name)
|
||||||
|
# for r in list_of_ret:
|
||||||
|
# value = context.return_value_to_str(r)
|
||||||
|
# context.log(f" Recognized '{value}'", self.name)
|
||||||
|
|
||||||
|
def get_return_value_body(self, sheerka, source, parsed, try_parse, errors):
|
||||||
|
"""
|
||||||
|
All parsers must return their result in a standard way
|
||||||
|
:param sheerka:
|
||||||
|
:param source:
|
||||||
|
:param parsed:
|
||||||
|
:param try_parse:
|
||||||
|
:param errors:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
if len(errors) == 1 and isinstance(errors[0], Concept):
|
||||||
|
return errors[0]
|
||||||
|
|
||||||
|
if len(errors):
|
||||||
|
if parsed is None:
|
||||||
|
return sheerka.new(BuiltinConcepts.NOT_FOR_ME,
|
||||||
|
body=source,
|
||||||
|
reason=errors)
|
||||||
|
else:
|
||||||
|
return sheerka.new(BuiltinConcepts.ERROR,
|
||||||
|
body=errors)
|
||||||
|
|
||||||
|
return sheerka.new(BuiltinConcepts.PARSER_RESULT,
|
||||||
|
parser=self,
|
||||||
|
source=source,
|
||||||
|
body=parsed,
|
||||||
|
try_parsed=try_parse)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_name(name):
|
||||||
|
return BaseParser.PREFIX + name
|
||||||
|
|
||||||
|
|
||||||
|
class BaseParserInputParser(BaseParser):
|
||||||
|
"""
|
||||||
|
Base parser for stateful parser where context, parser input, and error sink are part of the class
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, name, priority: int, enabled=True, yield_eof=False):
|
||||||
|
super(BaseParserInputParser, self).__init__(name, priority, enabled, yield_eof)
|
||||||
|
|
||||||
|
self.error_sink = []
|
||||||
|
self.context: ExecutionContext = None
|
||||||
|
self.sheerka = None
|
||||||
|
self.parser_input: ParserInput = None
|
||||||
|
self.yield_eof = yield_eof
|
||||||
|
|
||||||
def reset_parser(self, context, parser_input: ParserInput):
|
def reset_parser(self, context, parser_input: ParserInput):
|
||||||
self.context = context
|
self.context = context
|
||||||
self.sheerka = context.sheerka
|
self.sheerka = context.sheerka
|
||||||
@@ -106,54 +169,6 @@ class BaseParser:
|
|||||||
def has_error(self):
|
def has_error(self):
|
||||||
return len(self.error_sink) > 0
|
return len(self.error_sink) > 0
|
||||||
|
|
||||||
def log_result(self, context, source, ret):
|
|
||||||
pass
|
|
||||||
# if not self.log.isEnabledFor(logging.DEBUG):
|
|
||||||
# return
|
|
||||||
#
|
|
||||||
# if ret.status:
|
|
||||||
# value = context.return_value_to_str(ret)
|
|
||||||
# context.log(f"Recognized '{source}' as {value}", self.name)
|
|
||||||
# else:
|
|
||||||
# context.log(f"Failed to recognize '{source}'", self.name)
|
|
||||||
|
|
||||||
def log_multiple_results(self, context, source, list_of_ret):
|
|
||||||
pass
|
|
||||||
# if not self.log.isEnabledFor(logging.DEBUG):
|
|
||||||
# return
|
|
||||||
#
|
|
||||||
# context.log(f"Recognized '{source}' as multiple concepts", self.name)
|
|
||||||
# for r in list_of_ret:
|
|
||||||
# value = context.return_value_to_str(r)
|
|
||||||
# context.log(f" Recognized '{value}'", self.name)
|
|
||||||
|
|
||||||
def get_return_value_body(self, sheerka, source, parsed, try_parse):
|
|
||||||
"""
|
|
||||||
All parsers must return their result in a standard way
|
|
||||||
:param sheerka:
|
|
||||||
:param source:
|
|
||||||
:param parsed:
|
|
||||||
:param try_parse:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
if len(self.error_sink) == 1 and isinstance(self.error_sink[0], Concept):
|
|
||||||
return self.error_sink[0]
|
|
||||||
|
|
||||||
if self.has_error:
|
|
||||||
if parsed is None:
|
|
||||||
return sheerka.new(BuiltinConcepts.NOT_FOR_ME,
|
|
||||||
body=source,
|
|
||||||
reason=self.error_sink)
|
|
||||||
else:
|
|
||||||
return sheerka.new(BuiltinConcepts.ERROR,
|
|
||||||
body=self.error_sink)
|
|
||||||
|
|
||||||
return sheerka.new(BuiltinConcepts.PARSER_RESULT,
|
|
||||||
parser=self,
|
|
||||||
source=source,
|
|
||||||
body=parsed,
|
|
||||||
try_parsed=try_parse)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_input_as_lexer_nodes(parser_input, expected_parser=None):
|
def get_input_as_lexer_nodes(parser_input, expected_parser=None):
|
||||||
"""
|
"""
|
||||||
@@ -229,12 +244,34 @@ class BaseParser:
|
|||||||
|
|
||||||
return list_a
|
return list_a
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get_name(name):
|
|
||||||
return BaseParser.PREFIX + name
|
|
||||||
|
|
||||||
|
|
||||||
class BaseExprParser(BaseParser):
|
class BaseExprParser(BaseParser):
|
||||||
|
|
||||||
def parse_input(self):
|
def parse_input(self, context, parser_input, error_sink):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def reset_parser_input(self, parser_input: ParserInput, error_sink):
|
||||||
|
try:
|
||||||
|
error_sink.clear()
|
||||||
|
parser_input.reset(self.yield_eof)
|
||||||
|
except LexerError as e:
|
||||||
|
error_sink.add_error(e)
|
||||||
|
return False
|
||||||
|
|
||||||
|
parser_input.next_token()
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
class ErrorSink:
|
||||||
|
def __init__(self):
|
||||||
|
self.sink = []
|
||||||
|
|
||||||
|
def add_error(self, error):
|
||||||
|
self.sink.append(error)
|
||||||
|
|
||||||
|
def clear(self):
|
||||||
|
self.sink.clear()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def has_error(self):
|
||||||
|
return len(self.sink) > 0
|
||||||
|
|||||||
@@ -33,6 +33,7 @@ class BnfDefinitionParser(BaseParser):
|
|||||||
self.context = None
|
self.context = None
|
||||||
self.source = ""
|
self.source = ""
|
||||||
self.sheerka = None
|
self.sheerka = None
|
||||||
|
self.error_sink = []
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
if not isinstance(other, BnfDefinitionParser):
|
if not isinstance(other, BnfDefinitionParser):
|
||||||
@@ -60,6 +61,10 @@ class BnfDefinitionParser(BaseParser):
|
|||||||
self.next_token()
|
self.next_token()
|
||||||
return error
|
return error
|
||||||
|
|
||||||
|
@property
|
||||||
|
def has_error(self):
|
||||||
|
return len(self.error_sink) > 0
|
||||||
|
|
||||||
def get_token(self) -> Token:
|
def get_token(self) -> Token:
|
||||||
return self._current
|
return self._current
|
||||||
|
|
||||||
@@ -123,7 +128,7 @@ class BnfDefinitionParser(BaseParser):
|
|||||||
False,
|
False,
|
||||||
context.sheerka.new(BuiltinConcepts.ERROR, body=[e]))
|
context.sheerka.new(BuiltinConcepts.ERROR, body=[e]))
|
||||||
|
|
||||||
value = self.get_return_value_body(context.sheerka, self.source, tree, tree)
|
value = self.get_return_value_body(context.sheerka, self.source, tree, tree, self.error_sink)
|
||||||
|
|
||||||
ret = self.sheerka.ret(
|
ret = self.sheerka.ret(
|
||||||
self.name,
|
self.name,
|
||||||
|
|||||||
@@ -85,7 +85,7 @@ class DefConceptParser(BaseCustomGrammarParser):
|
|||||||
self.parser_input.next_token()
|
self.parser_input.next_token()
|
||||||
node = self.parse_def_concept()
|
node = self.parse_def_concept()
|
||||||
|
|
||||||
body = self.get_return_value_body(sheerka, parser_input.as_text(), node, node)
|
body = self.get_return_value_body(sheerka, parser_input.as_text(), node, node, self.error_sink)
|
||||||
ret = sheerka.ret(self.name, not self.has_error, body)
|
ret = sheerka.ret(self.name, not self.has_error, body)
|
||||||
|
|
||||||
self.log_result(context, parser_input.as_text(), ret)
|
self.log_result(context, parser_input.as_text(), ret)
|
||||||
|
|||||||
@@ -71,7 +71,7 @@ class DefRuleParser(BaseCustomGrammarParser):
|
|||||||
self.parser_input.next_token()
|
self.parser_input.next_token()
|
||||||
node = self.parse_def_rule()
|
node = self.parse_def_rule()
|
||||||
|
|
||||||
body = self.get_return_value_body(sheerka, parser_input.as_text(), node, node)
|
body = self.get_return_value_body(sheerka, parser_input.as_text(), node, node, self.error_sink)
|
||||||
ret = sheerka.ret(self.name, not self.has_error, body)
|
ret = sheerka.ret(self.name, not self.has_error, body)
|
||||||
|
|
||||||
self.log_result(context, parser_input.as_text(), ret)
|
self.log_result(context, parser_input.as_text(), ret)
|
||||||
|
|||||||
@@ -4,10 +4,10 @@ from core.concept import VARIABLE_PREFIX
|
|||||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||||
from core.tokenizer import TokenKind
|
from core.tokenizer import TokenKind
|
||||||
from core.utils import str_concept
|
from core.utils import str_concept
|
||||||
from parsers.BaseParser import BaseParser
|
from parsers.BaseParser import BaseParser, BaseParserInputParser
|
||||||
|
|
||||||
|
|
||||||
class ExactConceptParser(BaseParser):
|
class ExactConceptParser(BaseParserInputParser):
|
||||||
"""
|
"""
|
||||||
Tries to recognize a single concept
|
Tries to recognize a single concept
|
||||||
"""
|
"""
|
||||||
@@ -15,7 +15,7 @@ class ExactConceptParser(BaseParser):
|
|||||||
MAX_WORDS_SIZE = 6
|
MAX_WORDS_SIZE = 6
|
||||||
|
|
||||||
def __init__(self, max_word_size=None, **kwargs):
|
def __init__(self, max_word_size=None, **kwargs):
|
||||||
BaseParser.__init__(self, "ExactConcept", 80)
|
BaseParserInputParser.__init__(self, "ExactConcept", 80)
|
||||||
self.max_word_size = max_word_size
|
self.max_word_size = max_word_size
|
||||||
|
|
||||||
def parse(self, context, parser_input: ParserInput):
|
def parse(self, context, parser_input: ParserInput):
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ from core.builtin_concepts_ids import BuiltinConcepts
|
|||||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||||
from core.tokenizer import TokenKind
|
from core.tokenizer import TokenKind
|
||||||
from core.utils import get_text_from_tokens
|
from core.utils import get_text_from_tokens
|
||||||
from parsers.BaseParser import BaseExprParser
|
from parsers.BaseParser import BaseExprParser, ErrorSink
|
||||||
from parsers.FunctionParser import FunctionParser
|
from parsers.FunctionParser import FunctionParser
|
||||||
from parsers.LogicalOperatorParser import LogicalOperatorParser
|
from parsers.LogicalOperatorParser import LogicalOperatorParser
|
||||||
from parsers.RelationalOperatorParser import RelationalOperatorParser
|
from parsers.RelationalOperatorParser import RelationalOperatorParser
|
||||||
@@ -18,9 +18,10 @@ class ExpressionParser(BaseExprParser):
|
|||||||
|
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super().__init__(ExpressionParser.NAME, 60, False, yield_eof=False)
|
super().__init__(ExpressionParser.NAME, 60, False, yield_eof=False)
|
||||||
self.logical_parser = LogicalOperatorParser()
|
self.variable_parser = VariableOrNamesParser()
|
||||||
self.relational_parser = RelationalOperatorParser()
|
|
||||||
self.function_parser = FunctionParser()
|
self.function_parser = FunctionParser()
|
||||||
|
self.relational_parser = RelationalOperatorParser()
|
||||||
|
self.logical_parser = LogicalOperatorParser(expr_parser=self.variable_parser)
|
||||||
|
|
||||||
def parse(self, context, parser_input: ParserInput):
|
def parse(self, context, parser_input: ParserInput):
|
||||||
"""
|
"""
|
||||||
@@ -40,61 +41,109 @@ class ExpressionParser(BaseExprParser):
|
|||||||
False,
|
False,
|
||||||
sheerka.new(BuiltinConcepts.IS_EMPTY))
|
sheerka.new(BuiltinConcepts.IS_EMPTY))
|
||||||
|
|
||||||
if not self.reset_parser(context, parser_input):
|
error_sink = ErrorSink()
|
||||||
return self.sheerka.ret(
|
if not self.reset_parser_input(parser_input, error_sink):
|
||||||
|
return context.sheerka.ret(
|
||||||
self.name,
|
self.name,
|
||||||
False,
|
False,
|
||||||
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
context.sheerka.new(BuiltinConcepts.ERROR, body=error_sink.sink))
|
||||||
|
|
||||||
self.parser_input.next_token()
|
node = self.parse_input(context, parser_input, error_sink)
|
||||||
|
|
||||||
node = self.parse_input()
|
|
||||||
if isinstance(node, ParenthesisNode):
|
if isinstance(node, ParenthesisNode):
|
||||||
node = node.node
|
node = node.node
|
||||||
|
|
||||||
value = self.get_return_value_body(context.sheerka, self.parser_input.as_text(), node, node)
|
value = self.get_return_value_body(context.sheerka, parser_input.as_text(), node, node, error_sink.sink)
|
||||||
|
|
||||||
ret = self.sheerka.ret(
|
ret = context.sheerka.ret(
|
||||||
self.name,
|
self.name,
|
||||||
not self.has_error,
|
not error_sink.has_error,
|
||||||
value)
|
value)
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def parse_input(self):
|
def parse_input(self, context, parser_input, error_sink):
|
||||||
pos = self.parser_input.pos
|
pos = parser_input.pos
|
||||||
for parser in []: # [self.logical_parser, self.relational_parser, self.function_parser]:
|
for parser in [self.logical_parser,
|
||||||
self.parser_input.seek(pos) # reset position
|
self.variable_parser]: # [self.logical_parser, self.relational_parser, self.function_parser]:
|
||||||
if parser.reset_parser(self.context, self.parser_input):
|
parser_input.seek(pos) # reset position
|
||||||
res = parser.parse_input()
|
res = parser.parse_input(context, parser_input, error_sink)
|
||||||
if res and not parser.has_error:
|
if res and not error_sink.has_error:
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class VariableOrNamesParser(BaseExprParser):
|
||||||
|
NAME = "VariableOrNames"
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super().__init__(VariableOrNamesParser.NAME, 60, False, yield_eof=False)
|
||||||
|
|
||||||
|
def parse(self, context, parser_input: ParserInput):
|
||||||
|
"""
|
||||||
|
:param context:
|
||||||
|
:param parser_input:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not isinstance(parser_input, ParserInput):
|
||||||
|
return None
|
||||||
|
|
||||||
|
context.log(f"Parsing '{parser_input}' with {self.NAME}Parser", self.name)
|
||||||
|
sheerka = context.sheerka
|
||||||
|
|
||||||
|
if parser_input.is_empty():
|
||||||
|
return context.sheerka.ret(self.name,
|
||||||
|
False,
|
||||||
|
sheerka.new(BuiltinConcepts.IS_EMPTY))
|
||||||
|
|
||||||
|
error_sink = ErrorSink()
|
||||||
|
if not self.reset_parser_input(parser_input, error_sink):
|
||||||
|
return context.sheerka.ret(
|
||||||
|
self.name,
|
||||||
|
False,
|
||||||
|
context.sheerka.new(BuiltinConcepts.ERROR, body=error_sink.sink))
|
||||||
|
|
||||||
|
node = self.parse_input(context, parser_input, error_sink)
|
||||||
|
if isinstance(node, ParenthesisNode):
|
||||||
|
node = node.node
|
||||||
|
|
||||||
|
value = self.get_return_value_body(context.sheerka, parser_input.as_text(), node, node, error_sink.sink)
|
||||||
|
|
||||||
|
ret = context.sheerka.ret(
|
||||||
|
self.name,
|
||||||
|
not error_sink.has_error,
|
||||||
|
value)
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def parse_input(self, context, parser_input, error_sink):
|
||||||
# try to recognize a VariableNode
|
# try to recognize a VariableNode
|
||||||
dots_found = []
|
dots_found = []
|
||||||
for i, token in enumerate(self.parser_input.as_tokens()):
|
pos = parser_input.pos
|
||||||
|
for i, token in enumerate(parser_input.as_tokens()):
|
||||||
if token.type == TokenKind.DOT:
|
if token.type == TokenKind.DOT:
|
||||||
dots_found.append(i)
|
dots_found.append(i)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if not (token.type == TokenKind.WHITESPACE or
|
if not (token.type == TokenKind.WHITESPACE or
|
||||||
token.type == TokenKind.IDENTIFIER and token.value.isidentifier()):
|
token.type == TokenKind.IDENTIFIER and token.value.isidentifier()):
|
||||||
return NameExprNode(self.parser_input.start, self.parser_input.end, self.parser_input.as_tokens())
|
return NameExprNode(parser_input.start, parser_input.end, parser_input.as_tokens())
|
||||||
|
|
||||||
if len(dots_found) == 0:
|
if len(dots_found) == 0:
|
||||||
return VariableNode(pos, self.parser_input.end, self.parser_input.as_tokens(), self.parser_input.as_text())
|
return VariableNode(pos, parser_input.end, parser_input.as_tokens(), parser_input.as_text())
|
||||||
|
|
||||||
parts = []
|
parts = []
|
||||||
current_dot_pos = pos
|
current_dot_pos = pos
|
||||||
for dot_found in dots_found:
|
for dot_found in dots_found:
|
||||||
parts.append(get_text_from_tokens(self.parser_input.tokens[current_dot_pos: dot_found]))
|
parts.append(get_text_from_tokens(parser_input.tokens[current_dot_pos: dot_found]))
|
||||||
current_dot_pos = dot_found + 1
|
current_dot_pos = dot_found + 1
|
||||||
|
|
||||||
# do not forget the trailing part
|
# do not forget the trailing part
|
||||||
parts.append(get_text_from_tokens(self.parser_input.tokens[current_dot_pos: self.parser_input.end + 1]))
|
parts.append(get_text_from_tokens(parser_input.tokens[current_dot_pos: parser_input.end + 1]))
|
||||||
|
|
||||||
return VariableNode(self.parser_input.start,
|
return VariableNode(parser_input.start,
|
||||||
self.parser_input.end,
|
parser_input.end,
|
||||||
self.parser_input.as_tokens(),
|
parser_input.as_tokens(),
|
||||||
parts[0],
|
parts[0],
|
||||||
*parts[1:])
|
*parts[1:])
|
||||||
|
|||||||
@@ -7,7 +7,8 @@ from core.sheerka.services.SheerkaExecute import ParserInput
|
|||||||
from core.tokenizer import TokenKind
|
from core.tokenizer import TokenKind
|
||||||
from core.utils import get_n_clones
|
from core.utils import get_n_clones
|
||||||
from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode
|
from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode
|
||||||
from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError, Node, BaseExprParser
|
from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError, Node, BaseExprParser, \
|
||||||
|
BaseParserInputParser
|
||||||
from parsers.BnfNodeParser import BnfNodeParser
|
from parsers.BnfNodeParser import BnfNodeParser
|
||||||
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
|
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
|
||||||
from parsers.RuleParser import RuleParser
|
from parsers.RuleParser import RuleParser
|
||||||
@@ -26,7 +27,7 @@ class FunctionParserNode(Node):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class FunctionParser(BaseExprParser):
|
class FunctionParser(BaseParserInputParser):
|
||||||
"""
|
"""
|
||||||
The parser will be used to parse func(x, y, z)
|
The parser will be used to parse func(x, y, z)
|
||||||
where x, y and z can be source code, concepts or other functions
|
where x, y and z can be source code, concepts or other functions
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ from core.sheerka.services.sheerka_service import FailedToCompileError
|
|||||||
from core.tokenizer import TokenKind, Tokenizer, Keywords
|
from core.tokenizer import TokenKind, Tokenizer, Keywords
|
||||||
from core.utils import get_text_from_tokens
|
from core.utils import get_text_from_tokens
|
||||||
from parsers.BaseNodeParser import UnrecognizedTokensNode
|
from parsers.BaseNodeParser import UnrecognizedTokensNode
|
||||||
from parsers.BaseParser import BaseParser, UnexpectedTokenParsingError, UnexpectedEofParsingError, BaseExprParser
|
from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError, BaseExprParser, ErrorSink
|
||||||
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
|
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
|
||||||
from parsers.expressions import ParenthesisNode, OrNode, AndNode, NotNode, LeftPartNotFoundError, \
|
from parsers.expressions import ParenthesisNode, OrNode, AndNode, NotNode, LeftPartNotFoundError, \
|
||||||
ParenthesisMismatchError, NameExprNode, ExprNode, VariableNode, ComparisonNode
|
ParenthesisMismatchError, NameExprNode, ExprNode, VariableNode, ComparisonNode
|
||||||
@@ -76,6 +76,7 @@ class LogicalOperatorParser(BaseExprParser):
|
|||||||
self.and_tokens = list(Tokenizer(" and ", yield_eof=False))
|
self.and_tokens = list(Tokenizer(" and ", yield_eof=False))
|
||||||
self.and_not_tokens = list(Tokenizer(" and not ", yield_eof=False))
|
self.and_not_tokens = list(Tokenizer(" and not ", yield_eof=False))
|
||||||
self.not_tokens = list(Tokenizer("not ", yield_eof=False))
|
self.not_tokens = list(Tokenizer("not ", yield_eof=False))
|
||||||
|
self.expr_parser = kwargs.get("expr_parser", None)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def clean_parenthesis_nodes(nodes):
|
def clean_parenthesis_nodes(nodes):
|
||||||
@@ -101,144 +102,161 @@ class LogicalOperatorParser(BaseExprParser):
|
|||||||
False,
|
False,
|
||||||
sheerka.new(BuiltinConcepts.IS_EMPTY))
|
sheerka.new(BuiltinConcepts.IS_EMPTY))
|
||||||
|
|
||||||
if not self.reset_parser(context, parser_input):
|
error_sink = ErrorSink()
|
||||||
return self.sheerka.ret(
|
if not self.reset_parser_input(parser_input, error_sink):
|
||||||
|
return context.sheerka.ret(
|
||||||
self.name,
|
self.name,
|
||||||
False,
|
False,
|
||||||
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
context.sheerka.new(BuiltinConcepts.ERROR, body=error_sink.sink))
|
||||||
|
|
||||||
self.parser_input.next_token()
|
tree = self.parse_input(context, parser_input, error_sink)
|
||||||
tree = self.parse_input()
|
|
||||||
token = self.parser_input.token
|
token = parser_input.token
|
||||||
if token and token.type != TokenKind.EOF:
|
if token and token.type != TokenKind.EOF:
|
||||||
self.add_error(UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, []))
|
error_sink.add_error(UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, []))
|
||||||
|
|
||||||
if isinstance(tree, ParenthesisNode):
|
if isinstance(tree, ParenthesisNode):
|
||||||
tree = tree.node
|
tree = tree.node
|
||||||
value = self.get_return_value_body(context.sheerka, self.parser_input.as_text(), tree, tree)
|
|
||||||
|
|
||||||
ret = self.sheerka.ret(
|
value = self.get_return_value_body(context.sheerka,
|
||||||
self.name,
|
parser_input.as_text(),
|
||||||
not self.has_error,
|
tree,
|
||||||
value)
|
tree,
|
||||||
|
error_sink.sink)
|
||||||
|
|
||||||
|
ret = context.sheerka.ret(self.name,
|
||||||
|
not error_sink.has_error,
|
||||||
|
value)
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def parse_input(self):
|
def parse_input(self, context, parser_input, error_sink):
|
||||||
return self.parse_or()
|
return self.parse_or(context, parser_input, error_sink)
|
||||||
|
|
||||||
def parse_or(self):
|
def parse_or(self, context, parser_input, error_sink):
|
||||||
start = self.parser_input.pos
|
start = parser_input.pos
|
||||||
expr = self.parse_and()
|
expr = self.parse_and(context, parser_input, error_sink)
|
||||||
token = self.parser_input.token
|
token = parser_input.token
|
||||||
if token.type != TokenKind.IDENTIFIER or token.value != "or":
|
if token.type != TokenKind.IDENTIFIER or token.value != "or":
|
||||||
return expr
|
return expr
|
||||||
|
|
||||||
parts = [expr]
|
parts = [expr]
|
||||||
while token.type == TokenKind.IDENTIFIER and token.value == "or":
|
while token.type == TokenKind.IDENTIFIER and token.value == "or":
|
||||||
self.parser_input.next_token()
|
parser_input.next_token()
|
||||||
expr = self.parse_and()
|
expr = self.parse_and(context, parser_input, error_sink)
|
||||||
if expr is None:
|
if expr is None:
|
||||||
self.add_error(UnexpectedEofParsingError("When parsing 'or'"))
|
error_sink.add_error(UnexpectedEofParsingError("When parsing 'or'"))
|
||||||
end = self.parser_input.pos
|
end = parser_input.pos
|
||||||
self.clean_parenthesis_nodes(parts)
|
self.clean_parenthesis_nodes(parts)
|
||||||
return OrNode(start, end, self.parser_input.tokens[start: end + 1], *parts)
|
return OrNode(start, end, parser_input.tokens[start: end + 1], *parts)
|
||||||
parts.append(expr)
|
parts.append(expr)
|
||||||
token = self.parser_input.token
|
token = parser_input.token
|
||||||
|
|
||||||
end = parts[-1].end
|
end = parts[-1].end
|
||||||
self.clean_parenthesis_nodes(parts)
|
self.clean_parenthesis_nodes(parts)
|
||||||
return OrNode(start, end, self.parser_input.tokens[start: end + 1], *parts)
|
return OrNode(start, end, parser_input.tokens[start: end + 1], *parts)
|
||||||
|
|
||||||
def parse_and(self):
|
def parse_and(self, context, parser_input, error_sink):
|
||||||
start = self.parser_input.pos
|
start = parser_input.pos
|
||||||
expr = self.parse_not()
|
expr = self.parse_not(context, parser_input, error_sink)
|
||||||
token = self.parser_input.token
|
token = parser_input.token
|
||||||
if token.type != TokenKind.IDENTIFIER or token.value != "and":
|
if token.type != TokenKind.IDENTIFIER or token.value != "and":
|
||||||
return expr
|
return expr
|
||||||
|
|
||||||
parts = [expr]
|
parts = [expr]
|
||||||
while token.type == TokenKind.IDENTIFIER and token.value == "and":
|
while token.type == TokenKind.IDENTIFIER and token.value == "and":
|
||||||
self.parser_input.next_token()
|
parser_input.next_token()
|
||||||
expr = self.parse_not()
|
expr = self.parse_not(context, parser_input, error_sink)
|
||||||
if expr is None:
|
if expr is None:
|
||||||
self.add_error(UnexpectedEofParsingError("When parsing 'and'"))
|
error_sink.add_error(UnexpectedEofParsingError("When parsing 'and'"))
|
||||||
end = self.parser_input.pos
|
end = parser_input.pos
|
||||||
self.clean_parenthesis_nodes(parts)
|
self.clean_parenthesis_nodes(parts)
|
||||||
return AndNode(start, end, self.parser_input.tokens[start: end + 1], *parts)
|
return AndNode(start, end, parser_input.tokens[start: end + 1], *parts)
|
||||||
parts.append(expr)
|
parts.append(expr)
|
||||||
token = self.parser_input.token
|
token = parser_input.token
|
||||||
|
|
||||||
end = parts[-1].end
|
end = parts[-1].end
|
||||||
self.clean_parenthesis_nodes(parts)
|
self.clean_parenthesis_nodes(parts)
|
||||||
return AndNode(start, end, self.parser_input.tokens[start: end + 1], *parts)
|
return AndNode(start, end, parser_input.tokens[start: end + 1], *parts)
|
||||||
|
|
||||||
def parse_not(self):
|
def parse_not(self, context, parser_input, error_sink):
|
||||||
token = self.parser_input.token
|
token = parser_input.token
|
||||||
start = self.parser_input.pos
|
start = parser_input.pos
|
||||||
if token.type == TokenKind.IDENTIFIER and token.value == "not":
|
if token.type == TokenKind.IDENTIFIER and token.value == "not":
|
||||||
self.parser_input.next_token()
|
parser_input.next_token()
|
||||||
parsed = self.parse_not()
|
parsed = self.parse_not(context, parser_input, error_sink)
|
||||||
node = parsed.node if isinstance(parsed, ParenthesisNode) else parsed
|
node = parsed.node if isinstance(parsed, ParenthesisNode) else parsed
|
||||||
return NotNode(start,
|
return NotNode(start,
|
||||||
parsed.end,
|
parsed.end,
|
||||||
self.parser_input.tokens[start: parsed.end + 1],
|
parser_input.tokens[start: parsed.end + 1],
|
||||||
node)
|
node)
|
||||||
else:
|
else:
|
||||||
return self.parse_names()
|
return self.parse_names(context, parser_input, error_sink)
|
||||||
|
|
||||||
def parse_names(self):
|
def parse_names(self, context, parser_input, error_sink):
|
||||||
|
|
||||||
def stop():
|
def stop():
|
||||||
return token.type == TokenKind.EOF or \
|
return token.type == TokenKind.EOF or \
|
||||||
paren_count == 0 and token.type == TokenKind.RPAR or \
|
paren_count == 0 and token.type == TokenKind.RPAR or \
|
||||||
token.type == TokenKind.IDENTIFIER and token.value in ("and", "or", "not")
|
token.type == TokenKind.IDENTIFIER and token.value in ("and", "or", "not")
|
||||||
|
|
||||||
token = self.parser_input.token
|
token = parser_input.token
|
||||||
if token.type == TokenKind.EOF:
|
if token.type == TokenKind.EOF:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if token.type == TokenKind.LPAR:
|
if token.type == TokenKind.LPAR:
|
||||||
start = self.parser_input.pos
|
start = parser_input.pos
|
||||||
self.parser_input.next_token()
|
parser_input.next_token()
|
||||||
expr = self.parse_or()
|
expr = self.parse_or(context, parser_input, error_sink)
|
||||||
token = self.parser_input.token
|
token = parser_input.token
|
||||||
if token.type != TokenKind.RPAR:
|
if token.type != TokenKind.RPAR:
|
||||||
self.error_sink.append(
|
error_sink.add_error(
|
||||||
UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, [TokenKind.RPAR]))
|
UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, [TokenKind.RPAR]))
|
||||||
return expr
|
return expr
|
||||||
end = self.parser_input.pos
|
end = parser_input.pos
|
||||||
self.parser_input.next_token()
|
parser_input.next_token()
|
||||||
return ParenthesisNode(start, end, None, expr)
|
return ParenthesisNode(start, end, None, expr)
|
||||||
|
|
||||||
buffer = []
|
|
||||||
paren_count = 0
|
paren_count = 0
|
||||||
last_paren = None
|
last_paren = None
|
||||||
start = self.parser_input.pos
|
start = parser_input.pos
|
||||||
|
end = parser_input.pos
|
||||||
|
last_is_whitespace = False
|
||||||
while not stop():
|
while not stop():
|
||||||
buffer.append(token)
|
last_is_whitespace = token.type == TokenKind.WHITESPACE
|
||||||
|
end += 1
|
||||||
if token.type == TokenKind.LPAR:
|
if token.type == TokenKind.LPAR:
|
||||||
last_paren = token
|
last_paren = token
|
||||||
paren_count += 1
|
paren_count += 1
|
||||||
if token.type == TokenKind.RPAR:
|
if token.type == TokenKind.RPAR:
|
||||||
paren_count -= 1
|
paren_count -= 1
|
||||||
self.parser_input.next_token(False)
|
parser_input.next_token(False)
|
||||||
token = self.parser_input.token
|
token = parser_input.token
|
||||||
|
|
||||||
if len(buffer) == 0:
|
if last_is_whitespace:
|
||||||
|
end -= 1
|
||||||
|
|
||||||
|
if start == end:
|
||||||
if token.type != TokenKind.RPAR:
|
if token.type != TokenKind.RPAR:
|
||||||
self.error_sink.append(LeftPartNotFoundError())
|
error_sink.add_error(LeftPartNotFoundError())
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if paren_count != 0:
|
if paren_count != 0:
|
||||||
self.error_sink.append(ParenthesisMismatchError(last_paren))
|
error_sink.add_error(ParenthesisMismatchError(last_paren))
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if buffer[-1].type == TokenKind.WHITESPACE:
|
if self.expr_parser:
|
||||||
buffer.pop()
|
new_parsing_input = ParserInput(
|
||||||
|
None,
|
||||||
end = start + len(buffer) - 1
|
tokens=parser_input.tokens,
|
||||||
return NameExprNode(start, end, buffer)
|
length=parser_input.length,
|
||||||
|
start=start,
|
||||||
|
end=end - 1,
|
||||||
|
yield_oef=False).reset()
|
||||||
|
new_parsing_input.next_token()
|
||||||
|
return self.expr_parser.parse_input(context, new_parsing_input, error_sink)
|
||||||
|
else:
|
||||||
|
return NameExprNode(start, end - 1, parser_input.tokens[start:end])
|
||||||
|
|
||||||
def compile_conjunctions(self, context, conjunctions, who):
|
def compile_conjunctions(self, context, conjunctions, who):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ import core.utils
|
|||||||
from core.builtin_concepts import BuiltinConcepts
|
from core.builtin_concepts import BuiltinConcepts
|
||||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||||
from core.tokenizer import TokenKind
|
from core.tokenizer import TokenKind
|
||||||
from parsers.BaseParser import BaseParser, Node, ParsingError
|
from parsers.BaseParser import BaseParser, Node, ParsingError, BaseParserInputParser
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -107,7 +107,7 @@ class PythonGetNamesVisitor(ast.NodeVisitor):
|
|||||||
self.names.add(node.id)
|
self.names.add(node.id)
|
||||||
|
|
||||||
|
|
||||||
class PythonParser(BaseParser):
|
class PythonParser(BaseParserInputParser):
|
||||||
"""
|
"""
|
||||||
Parse Python scripts
|
Parse Python scripts
|
||||||
"""
|
"""
|
||||||
@@ -116,7 +116,7 @@ class PythonParser(BaseParser):
|
|||||||
|
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
|
|
||||||
BaseParser.__init__(self, PythonParser.NAME, 50)
|
BaseParserInputParser.__init__(self, PythonParser.NAME, 50)
|
||||||
self.source = kwargs.get("source", "<undef>")
|
self.source = kwargs.get("source", "<undef>")
|
||||||
|
|
||||||
def parse(self, context, parser_input: ParserInput):
|
def parse(self, context, parser_input: ParserInput):
|
||||||
|
|||||||
@@ -2,13 +2,13 @@ from core.builtin_concepts import BuiltinConcepts
|
|||||||
from core.builtin_helpers import CreateObjectIdentifiers
|
from core.builtin_helpers import CreateObjectIdentifiers
|
||||||
from parsers.BaseNodeParser import ConceptNode, RuleNode, VariableNode
|
from parsers.BaseNodeParser import ConceptNode, RuleNode, VariableNode
|
||||||
from parsers.BaseNodeParser import SourceCodeWithConceptNode
|
from parsers.BaseNodeParser import SourceCodeWithConceptNode
|
||||||
from parsers.BaseParser import BaseParser
|
from parsers.BaseParser import BaseParser, BaseParserInputParser
|
||||||
from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser
|
from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser
|
||||||
|
|
||||||
unrecognized_nodes_parser = UnrecognizedNodeParser()
|
unrecognized_nodes_parser = UnrecognizedNodeParser()
|
||||||
|
|
||||||
|
|
||||||
class PythonWithConceptsParser(BaseParser):
|
class PythonWithConceptsParser(BaseParserInputParser):
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super().__init__("PythonWithConcepts", 20)
|
super().__init__("PythonWithConcepts", 20)
|
||||||
|
|
||||||
|
|||||||
@@ -4,12 +4,12 @@ from core.builtin_concepts_ids import BuiltinConcepts
|
|||||||
from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute
|
from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute
|
||||||
from core.tokenizer import TokenKind, Token
|
from core.tokenizer import TokenKind, Token
|
||||||
from core.utils import get_text_from_tokens
|
from core.utils import get_text_from_tokens
|
||||||
from parsers.BaseParser import UnexpectedTokenParsingError, BaseExprParser
|
from parsers.BaseParser import UnexpectedTokenParsingError, BaseExprParser, BaseParserInputParser
|
||||||
from parsers.expressions import ComparisonNode, ParenthesisMismatchError, NameExprNode, ComparisonType, VariableNode, \
|
from parsers.expressions import ComparisonNode, ParenthesisMismatchError, NameExprNode, ComparisonType, VariableNode, \
|
||||||
ParenthesisNode, LeftPartNotFoundError
|
ParenthesisNode, LeftPartNotFoundError
|
||||||
|
|
||||||
|
|
||||||
class RelationalOperatorParser(BaseExprParser):
|
class RelationalOperatorParser(BaseParserInputParser):
|
||||||
"""
|
"""
|
||||||
Parses xxx (== | > | < | >= | <= | != | in | not in) yyy
|
Parses xxx (== | > | < | >= | <= | != | in | not in) yyy
|
||||||
Nothing else
|
Nothing else
|
||||||
@@ -53,7 +53,7 @@ class RelationalOperatorParser(BaseExprParser):
|
|||||||
if isinstance(node, ParenthesisNode):
|
if isinstance(node, ParenthesisNode):
|
||||||
node = node.node
|
node = node.node
|
||||||
|
|
||||||
value = self.get_return_value_body(context.sheerka, self.parser_input.as_text(), node, node)
|
value = self.get_return_value_body(context.sheerka, self.parser_input.as_text(), node, node, self.error_sink)
|
||||||
|
|
||||||
ret = self.sheerka.ret(
|
ret = self.sheerka.ret(
|
||||||
self.name,
|
self.name,
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ from core.builtin_concepts import BuiltinConcepts
|
|||||||
from core.rule import Rule
|
from core.rule import Rule
|
||||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||||
from core.tokenizer import TokenKind
|
from core.tokenizer import TokenKind
|
||||||
from parsers.BaseParser import BaseParser, ParsingError, UnexpectedTokenParsingError
|
from parsers.BaseParser import BaseParser, ParsingError, UnexpectedTokenParsingError, BaseParserInputParser
|
||||||
|
|
||||||
|
|
||||||
class RuleNotFoundError(ParsingError):
|
class RuleNotFoundError(ParsingError):
|
||||||
@@ -14,7 +14,7 @@ class RuleNotFoundError(ParsingError):
|
|||||||
return f"RuleNotFoundError(id={self.id}, key={self.key}"
|
return f"RuleNotFoundError(id={self.id}, key={self.key}"
|
||||||
|
|
||||||
|
|
||||||
class RuleParser(BaseParser):
|
class RuleParser(BaseParserInputParser):
|
||||||
"""
|
"""
|
||||||
Tries to recognize rules
|
Tries to recognize rules
|
||||||
"""
|
"""
|
||||||
@@ -22,7 +22,7 @@ class RuleParser(BaseParser):
|
|||||||
NAME = "Rule"
|
NAME = "Rule"
|
||||||
|
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
BaseParser.__init__(self, RuleParser.NAME, 80)
|
BaseParserInputParser.__init__(self, RuleParser.NAME, 80)
|
||||||
|
|
||||||
def parse(self, context, parser_input: ParserInput):
|
def parse(self, context, parser_input: ParserInput):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ import core.utils
|
|||||||
from core.builtin_concepts import BuiltinConcepts
|
from core.builtin_concepts import BuiltinConcepts
|
||||||
from core.builtin_helpers import only_successful, get_lexer_nodes, update_compiled
|
from core.builtin_helpers import only_successful, get_lexer_nodes, update_compiled
|
||||||
from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode, SourceCodeWithConceptNode
|
from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode, SourceCodeWithConceptNode
|
||||||
from parsers.BaseParser import BaseParser, ParsingError
|
from parsers.BaseParser import BaseParser, ParsingError, BaseParserInputParser
|
||||||
from parsers.BnfNodeParser import BnfNodeParser
|
from parsers.BnfNodeParser import BnfNodeParser
|
||||||
from parsers.SequenceNodeParser import SequenceNodeParser
|
from parsers.SequenceNodeParser import SequenceNodeParser
|
||||||
from parsers.SyaNodeParser import SyaNodeParser
|
from parsers.SyaNodeParser import SyaNodeParser
|
||||||
@@ -22,7 +22,7 @@ class CannotParseError(ParsingError):
|
|||||||
unrecognized: UnrecognizedTokensNode
|
unrecognized: UnrecognizedTokensNode
|
||||||
|
|
||||||
|
|
||||||
class UnrecognizedNodeParser(BaseParser):
|
class UnrecognizedNodeParser(BaseParserInputParser):
|
||||||
"""
|
"""
|
||||||
This parser comes after the other NodeParsers (Atom, Bnf or Sya)
|
This parser comes after the other NodeParsers (Atom, Bnf or Sya)
|
||||||
It will try to resolve all UnrecognizedTokensNode.
|
It will try to resolve all UnrecognizedTokensNode.
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from core.tokenizer import Tokenizer
|
from core.tokenizer import Tokenizer
|
||||||
from parsers.BaseParser import BaseParser
|
from parsers.BaseParser import BaseParser, BaseParserInputParser
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("tokens, expected", [
|
@pytest.mark.parametrize("tokens, expected", [
|
||||||
@@ -17,4 +17,4 @@ from parsers.BaseParser import BaseParser
|
|||||||
(list(Tokenizer(" a ", yield_eof=False)), (1, 1)),
|
(list(Tokenizer(" a ", yield_eof=False)), (1, 1)),
|
||||||
])
|
])
|
||||||
def test_i_can_get_tokens_boundaries(tokens, expected):
|
def test_i_can_get_tokens_boundaries(tokens, expected):
|
||||||
assert BaseParser.get_tokens_boundaries(tokens) == expected
|
assert BaseParserInputParser.get_tokens_boundaries(tokens) == expected
|
||||||
|
|||||||
@@ -3,10 +3,11 @@ import pytest
|
|||||||
from core.builtin_concepts_ids import BuiltinConcepts
|
from core.builtin_concepts_ids import BuiltinConcepts
|
||||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||||
from core.tokenizer import Tokenizer
|
from core.tokenizer import Tokenizer
|
||||||
|
from parsers.BaseParser import ErrorSink
|
||||||
from parsers.ExpressionParser import ExpressionParser
|
from parsers.ExpressionParser import ExpressionParser
|
||||||
from parsers.expressions import VariableNode
|
from parsers.expressions import VariableNode
|
||||||
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
||||||
from tests.parsers.parsers_utils import get_expr_node_from_test_node, VAR, EXPR
|
from tests.parsers.parsers_utils import get_expr_node_from_test_node, VAR, EXPR, AND
|
||||||
|
|
||||||
|
|
||||||
class TestExpressionParser(TestUsingMemoryBasedSheerka):
|
class TestExpressionParser(TestUsingMemoryBasedSheerka):
|
||||||
@@ -17,9 +18,10 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
|
|||||||
|
|
||||||
def input_parser_with_source(self, source):
|
def input_parser_with_source(self, source):
|
||||||
sheerka, context, parser = self.init_parser()
|
sheerka, context, parser = self.init_parser()
|
||||||
parser.reset_parser(context, ParserInput(source))
|
error_sink = ErrorSink()
|
||||||
parser.parser_input.next_token()
|
parser_input = ParserInput(source)
|
||||||
return sheerka, context, parser
|
parser.reset_parser_input(parser_input, error_sink)
|
||||||
|
return sheerka, context, parser, parser_input, error_sink
|
||||||
|
|
||||||
def test_i_can_detect_empty_expression(self):
|
def test_i_can_detect_empty_expression(self):
|
||||||
sheerka, context, parser = self.init_parser()
|
sheerka, context, parser = self.init_parser()
|
||||||
@@ -29,17 +31,18 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
|
|||||||
assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY)
|
assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY)
|
||||||
|
|
||||||
@pytest.mark.parametrize("expression, expected", [
|
@pytest.mark.parametrize("expression, expected", [
|
||||||
("var1 + var 2", EXPR("var1 + var 2")),
|
("var1 + var2", EXPR("var1 + var2")),
|
||||||
("variable", VAR("variable")),
|
("variable", VAR("variable")),
|
||||||
("var.attr", VAR("var.attr")),
|
("var.attr", VAR("var.attr")),
|
||||||
|
("var1 and var2", AND(VAR("var1"), VAR("var2")))
|
||||||
])
|
])
|
||||||
def test_i_can_parse_input(self, expression, expected):
|
def test_i_can_parse_input(self, expression, expected):
|
||||||
sheerka, context, parser = self.input_parser_with_source(expression)
|
sheerka, context, parser, parser_input, error_sink = self.input_parser_with_source(expression)
|
||||||
expected = get_expr_node_from_test_node(expression, expected)
|
expected = get_expr_node_from_test_node(expression, expected)
|
||||||
|
|
||||||
parsed = parser.parse_input()
|
parsed = parser.parse_input(context, parser_input, error_sink)
|
||||||
|
|
||||||
assert not parser.has_error
|
assert not error_sink.has_error
|
||||||
assert parsed == expected
|
assert parsed == expected
|
||||||
|
|
||||||
@pytest.mark.parametrize("expression", [
|
@pytest.mark.parametrize("expression", [
|
||||||
@@ -47,10 +50,10 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
|
|||||||
"var . attr1 . attr2",
|
"var . attr1 . attr2",
|
||||||
])
|
])
|
||||||
def test_i_can_parse_variable(self, expression):
|
def test_i_can_parse_variable(self, expression):
|
||||||
sheerka, context, parser = self.input_parser_with_source(expression)
|
sheerka, context, parser, parser_input, error_sink = self.input_parser_with_source(expression)
|
||||||
parsed = parser.parse_input()
|
parsed = parser.parse_input(context, parser_input, error_sink)
|
||||||
|
|
||||||
assert not parser.has_error
|
assert not error_sink.has_error
|
||||||
assert isinstance(parsed, VariableNode)
|
assert isinstance(parsed, VariableNode)
|
||||||
assert parsed.name == "var"
|
assert parsed.name == "var"
|
||||||
assert parsed.attributes == ["attr1", "attr2"]
|
assert parsed.attributes == ["attr1", "attr2"]
|
||||||
@@ -60,9 +63,9 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
|
|||||||
|
|
||||||
expression = "do not care var1 + var2 do not care either"
|
expression = "do not care var1 + var2 do not care either"
|
||||||
parser_input = ParserInput("text", list(Tokenizer(expression, yield_eof=False)), start=6, end=10)
|
parser_input = ParserInput("text", list(Tokenizer(expression, yield_eof=False)), start=6, end=10)
|
||||||
|
error_sink = ErrorSink()
|
||||||
|
parser.reset_parser_input(parser_input, error_sink)
|
||||||
|
parsed = parser.parse_input(context, parser_input, error_sink)
|
||||||
|
|
||||||
parser.reset_parser(context, parser_input)
|
assert not error_sink.has_error
|
||||||
parsed = parser.parse_input()
|
|
||||||
|
|
||||||
assert not parser.has_error
|
|
||||||
assert parsed == get_expr_node_from_test_node(expression, EXPR("var1 + var2"))
|
assert parsed == get_expr_node_from_test_node(expression, EXPR("var1 + var2"))
|
||||||
|
|||||||
Reference in New Issue
Block a user