diff --git a/src/core/sheerka/services/SheerkaExecute.py b/src/core/sheerka/services/SheerkaExecute.py index 51061f0..2f181a1 100644 --- a/src/core/sheerka/services/SheerkaExecute.py +++ b/src/core/sheerka/services/SheerkaExecute.py @@ -42,7 +42,7 @@ class ParserInput: self.yield_oef = yield_oef self.start = start or 0 - if end: + if end is not None: self.original_end = end # forced index of the last token self.end = self.original_end # index of the last token => len(tokens) - 1 if full tokens else: @@ -115,6 +115,7 @@ class ParserInput: self.pos += 1 if self.pos > self.end: + self.token = self.tokens[-1] return False self.token = self.tokens[self.pos] @@ -128,7 +129,11 @@ class ParserInput: return False self.token = self.tokens[self.pos] - return self.pos <= self.end + if self.pos <= self.end: + return True + else: + self.token = self.tokens[-1] + return False def the_token_after(self, skip_whitespace=True): """ @@ -137,13 +142,13 @@ class ParserInput: """ my_pos = self.pos + 1 if my_pos > self.end: - return Token(TokenKind.EOF, "", -1, -1, -1) + return self.tokens[-1] if skip_whitespace: while self.tokens[my_pos].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE): my_pos += 1 if my_pos > self.end: - return Token(TokenKind.EOF, "", -1, -1, -1) + return self.tokens[-1] return self.tokens[my_pos] diff --git a/src/parsers/BaseCustomGrammarParser.py b/src/parsers/BaseCustomGrammarParser.py index ae45488..558295c 100644 --- a/src/parsers/BaseCustomGrammarParser.py +++ b/src/parsers/BaseCustomGrammarParser.py @@ -2,7 +2,8 @@ from dataclasses import dataclass, field import core.utils from core.tokenizer import Keywords, TokenKind, Tokenizer -from parsers.BaseParser import BaseParser, Node, ParsingError, UnexpectedEofParsingError, UnexpectedTokenParsingError +from parsers.BaseParser import Node, ParsingError, UnexpectedEofParsingError, UnexpectedTokenParsingError, \ + BaseParserInputParser @dataclass() @@ -94,7 +95,7 @@ class NameNode(CustomGrammarParserNode): return hash(self.get_name()) -class BaseCustomGrammarParser(BaseParser): +class BaseCustomGrammarParser(BaseParserInputParser): """ Base class for sheerka specific grammars """ diff --git a/src/parsers/BaseNodeParser.py b/src/parsers/BaseNodeParser.py index a943779..a12cb56 100644 --- a/src/parsers/BaseNodeParser.py +++ b/src/parsers/BaseNodeParser.py @@ -4,7 +4,7 @@ from enum import Enum import core.utils from core.tokenizer import TokenKind, Token from core.var_ref import VariableRef -from parsers.BaseParser import Node, BaseParser, ParsingError +from parsers.BaseParser import Node, ParsingError, BaseParserInputParser DEBUG_COMPILED = True @@ -461,7 +461,7 @@ class SyaAssociativity(Enum): return self.value -class BaseNodeParser(BaseParser): +class BaseNodeParser(BaseParserInputParser): """ Parser that return LexerNode """ diff --git a/src/parsers/BaseParser.py b/src/parsers/BaseParser.py index 876ed74..b7834c1 100644 --- a/src/parsers/BaseParser.py +++ b/src/parsers/BaseParser.py @@ -62,11 +62,6 @@ class BaseParser: self.short_name = name self.priority = priority self.enabled = enabled - - self.error_sink = [] - self.context: ExecutionContext = None - self.sheerka = None - self.parser_input: ParserInput = None self.yield_eof = yield_eof def __eq__(self, other): @@ -80,6 +75,74 @@ class BaseParser: def __repr__(self): return self.name + def log_result(self, context, source, ret): + pass + # if not self.log.isEnabledFor(logging.DEBUG): + # return + # + # if ret.status: + # value = context.return_value_to_str(ret) + # context.log(f"Recognized '{source}' as {value}", self.name) + # else: + # context.log(f"Failed to recognize '{source}'", self.name) + + def log_multiple_results(self, context, source, list_of_ret): + pass + # if not self.log.isEnabledFor(logging.DEBUG): + # return + # + # context.log(f"Recognized '{source}' as multiple concepts", self.name) + # for r in list_of_ret: + # value = context.return_value_to_str(r) + # context.log(f" Recognized '{value}'", self.name) + + def get_return_value_body(self, sheerka, source, parsed, try_parse, errors): + """ + All parsers must return their result in a standard way + :param sheerka: + :param source: + :param parsed: + :param try_parse: + :param errors: + :return: + """ + if len(errors) == 1 and isinstance(errors[0], Concept): + return errors[0] + + if len(errors): + if parsed is None: + return sheerka.new(BuiltinConcepts.NOT_FOR_ME, + body=source, + reason=errors) + else: + return sheerka.new(BuiltinConcepts.ERROR, + body=errors) + + return sheerka.new(BuiltinConcepts.PARSER_RESULT, + parser=self, + source=source, + body=parsed, + try_parsed=try_parse) + + @staticmethod + def get_name(name): + return BaseParser.PREFIX + name + + +class BaseParserInputParser(BaseParser): + """ + Base parser for stateful parser where context, parser input, and error sink are part of the class + """ + + def __init__(self, name, priority: int, enabled=True, yield_eof=False): + super(BaseParserInputParser, self).__init__(name, priority, enabled, yield_eof) + + self.error_sink = [] + self.context: ExecutionContext = None + self.sheerka = None + self.parser_input: ParserInput = None + self.yield_eof = yield_eof + def reset_parser(self, context, parser_input: ParserInput): self.context = context self.sheerka = context.sheerka @@ -106,54 +169,6 @@ class BaseParser: def has_error(self): return len(self.error_sink) > 0 - def log_result(self, context, source, ret): - pass - # if not self.log.isEnabledFor(logging.DEBUG): - # return - # - # if ret.status: - # value = context.return_value_to_str(ret) - # context.log(f"Recognized '{source}' as {value}", self.name) - # else: - # context.log(f"Failed to recognize '{source}'", self.name) - - def log_multiple_results(self, context, source, list_of_ret): - pass - # if not self.log.isEnabledFor(logging.DEBUG): - # return - # - # context.log(f"Recognized '{source}' as multiple concepts", self.name) - # for r in list_of_ret: - # value = context.return_value_to_str(r) - # context.log(f" Recognized '{value}'", self.name) - - def get_return_value_body(self, sheerka, source, parsed, try_parse): - """ - All parsers must return their result in a standard way - :param sheerka: - :param source: - :param parsed: - :param try_parse: - :return: - """ - if len(self.error_sink) == 1 and isinstance(self.error_sink[0], Concept): - return self.error_sink[0] - - if self.has_error: - if parsed is None: - return sheerka.new(BuiltinConcepts.NOT_FOR_ME, - body=source, - reason=self.error_sink) - else: - return sheerka.new(BuiltinConcepts.ERROR, - body=self.error_sink) - - return sheerka.new(BuiltinConcepts.PARSER_RESULT, - parser=self, - source=source, - body=parsed, - try_parsed=try_parse) - @staticmethod def get_input_as_lexer_nodes(parser_input, expected_parser=None): """ @@ -229,12 +244,34 @@ class BaseParser: return list_a - @staticmethod - def get_name(name): - return BaseParser.PREFIX + name - class BaseExprParser(BaseParser): - def parse_input(self): + def parse_input(self, context, parser_input, error_sink): raise NotImplementedError + + def reset_parser_input(self, parser_input: ParserInput, error_sink): + try: + error_sink.clear() + parser_input.reset(self.yield_eof) + except LexerError as e: + error_sink.add_error(e) + return False + + parser_input.next_token() + return True + + +class ErrorSink: + def __init__(self): + self.sink = [] + + def add_error(self, error): + self.sink.append(error) + + def clear(self): + self.sink.clear() + + @property + def has_error(self): + return len(self.sink) > 0 diff --git a/src/parsers/BnfDefinitionParser.py b/src/parsers/BnfDefinitionParser.py index abc35c8..f521727 100644 --- a/src/parsers/BnfDefinitionParser.py +++ b/src/parsers/BnfDefinitionParser.py @@ -33,6 +33,7 @@ class BnfDefinitionParser(BaseParser): self.context = None self.source = "" self.sheerka = None + self.error_sink = [] def __eq__(self, other): if not isinstance(other, BnfDefinitionParser): @@ -60,6 +61,10 @@ class BnfDefinitionParser(BaseParser): self.next_token() return error + @property + def has_error(self): + return len(self.error_sink) > 0 + def get_token(self) -> Token: return self._current @@ -123,7 +128,7 @@ class BnfDefinitionParser(BaseParser): False, context.sheerka.new(BuiltinConcepts.ERROR, body=[e])) - value = self.get_return_value_body(context.sheerka, self.source, tree, tree) + value = self.get_return_value_body(context.sheerka, self.source, tree, tree, self.error_sink) ret = self.sheerka.ret( self.name, diff --git a/src/parsers/DefConceptParser.py b/src/parsers/DefConceptParser.py index c513ff5..66f1189 100644 --- a/src/parsers/DefConceptParser.py +++ b/src/parsers/DefConceptParser.py @@ -85,7 +85,7 @@ class DefConceptParser(BaseCustomGrammarParser): self.parser_input.next_token() node = self.parse_def_concept() - body = self.get_return_value_body(sheerka, parser_input.as_text(), node, node) + body = self.get_return_value_body(sheerka, parser_input.as_text(), node, node, self.error_sink) ret = sheerka.ret(self.name, not self.has_error, body) self.log_result(context, parser_input.as_text(), ret) diff --git a/src/parsers/DefRuleParser.py b/src/parsers/DefRuleParser.py index ac4516f..c85ae36 100644 --- a/src/parsers/DefRuleParser.py +++ b/src/parsers/DefRuleParser.py @@ -71,7 +71,7 @@ class DefRuleParser(BaseCustomGrammarParser): self.parser_input.next_token() node = self.parse_def_rule() - body = self.get_return_value_body(sheerka, parser_input.as_text(), node, node) + body = self.get_return_value_body(sheerka, parser_input.as_text(), node, node, self.error_sink) ret = sheerka.ret(self.name, not self.has_error, body) self.log_result(context, parser_input.as_text(), ret) diff --git a/src/parsers/ExactConceptParser.py b/src/parsers/ExactConceptParser.py index f8a5b91..9b4aad3 100644 --- a/src/parsers/ExactConceptParser.py +++ b/src/parsers/ExactConceptParser.py @@ -4,10 +4,10 @@ from core.concept import VARIABLE_PREFIX from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import TokenKind from core.utils import str_concept -from parsers.BaseParser import BaseParser +from parsers.BaseParser import BaseParser, BaseParserInputParser -class ExactConceptParser(BaseParser): +class ExactConceptParser(BaseParserInputParser): """ Tries to recognize a single concept """ @@ -15,7 +15,7 @@ class ExactConceptParser(BaseParser): MAX_WORDS_SIZE = 6 def __init__(self, max_word_size=None, **kwargs): - BaseParser.__init__(self, "ExactConcept", 80) + BaseParserInputParser.__init__(self, "ExactConcept", 80) self.max_word_size = max_word_size def parse(self, context, parser_input: ParserInput): diff --git a/src/parsers/ExpressionParser.py b/src/parsers/ExpressionParser.py index 70d6668..2891eaa 100644 --- a/src/parsers/ExpressionParser.py +++ b/src/parsers/ExpressionParser.py @@ -2,7 +2,7 @@ from core.builtin_concepts_ids import BuiltinConcepts from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import TokenKind from core.utils import get_text_from_tokens -from parsers.BaseParser import BaseExprParser +from parsers.BaseParser import BaseExprParser, ErrorSink from parsers.FunctionParser import FunctionParser from parsers.LogicalOperatorParser import LogicalOperatorParser from parsers.RelationalOperatorParser import RelationalOperatorParser @@ -18,9 +18,10 @@ class ExpressionParser(BaseExprParser): def __init__(self, **kwargs): super().__init__(ExpressionParser.NAME, 60, False, yield_eof=False) - self.logical_parser = LogicalOperatorParser() - self.relational_parser = RelationalOperatorParser() + self.variable_parser = VariableOrNamesParser() self.function_parser = FunctionParser() + self.relational_parser = RelationalOperatorParser() + self.logical_parser = LogicalOperatorParser(expr_parser=self.variable_parser) def parse(self, context, parser_input: ParserInput): """ @@ -40,61 +41,109 @@ class ExpressionParser(BaseExprParser): False, sheerka.new(BuiltinConcepts.IS_EMPTY)) - if not self.reset_parser(context, parser_input): - return self.sheerka.ret( + error_sink = ErrorSink() + if not self.reset_parser_input(parser_input, error_sink): + return context.sheerka.ret( self.name, False, - context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) + context.sheerka.new(BuiltinConcepts.ERROR, body=error_sink.sink)) - self.parser_input.next_token() - - node = self.parse_input() + node = self.parse_input(context, parser_input, error_sink) if isinstance(node, ParenthesisNode): node = node.node - value = self.get_return_value_body(context.sheerka, self.parser_input.as_text(), node, node) + value = self.get_return_value_body(context.sheerka, parser_input.as_text(), node, node, error_sink.sink) - ret = self.sheerka.ret( + ret = context.sheerka.ret( self.name, - not self.has_error, + not error_sink.has_error, value) return ret - def parse_input(self): - pos = self.parser_input.pos - for parser in []: # [self.logical_parser, self.relational_parser, self.function_parser]: - self.parser_input.seek(pos) # reset position - if parser.reset_parser(self.context, self.parser_input): - res = parser.parse_input() - if res and not parser.has_error: - return res + def parse_input(self, context, parser_input, error_sink): + pos = parser_input.pos + for parser in [self.logical_parser, + self.variable_parser]: # [self.logical_parser, self.relational_parser, self.function_parser]: + parser_input.seek(pos) # reset position + res = parser.parse_input(context, parser_input, error_sink) + if res and not error_sink.has_error: + return res + return None + + +class VariableOrNamesParser(BaseExprParser): + NAME = "VariableOrNames" + + def __init__(self, **kwargs): + super().__init__(VariableOrNamesParser.NAME, 60, False, yield_eof=False) + + def parse(self, context, parser_input: ParserInput): + """ + :param context: + :param parser_input: + :return: + """ + + if not isinstance(parser_input, ParserInput): + return None + + context.log(f"Parsing '{parser_input}' with {self.NAME}Parser", self.name) + sheerka = context.sheerka + + if parser_input.is_empty(): + return context.sheerka.ret(self.name, + False, + sheerka.new(BuiltinConcepts.IS_EMPTY)) + + error_sink = ErrorSink() + if not self.reset_parser_input(parser_input, error_sink): + return context.sheerka.ret( + self.name, + False, + context.sheerka.new(BuiltinConcepts.ERROR, body=error_sink.sink)) + + node = self.parse_input(context, parser_input, error_sink) + if isinstance(node, ParenthesisNode): + node = node.node + + value = self.get_return_value_body(context.sheerka, parser_input.as_text(), node, node, error_sink.sink) + + ret = context.sheerka.ret( + self.name, + not error_sink.has_error, + value) + + return ret + + def parse_input(self, context, parser_input, error_sink): # try to recognize a VariableNode dots_found = [] - for i, token in enumerate(self.parser_input.as_tokens()): + pos = parser_input.pos + for i, token in enumerate(parser_input.as_tokens()): if token.type == TokenKind.DOT: dots_found.append(i) continue if not (token.type == TokenKind.WHITESPACE or token.type == TokenKind.IDENTIFIER and token.value.isidentifier()): - return NameExprNode(self.parser_input.start, self.parser_input.end, self.parser_input.as_tokens()) + return NameExprNode(parser_input.start, parser_input.end, parser_input.as_tokens()) if len(dots_found) == 0: - return VariableNode(pos, self.parser_input.end, self.parser_input.as_tokens(), self.parser_input.as_text()) + return VariableNode(pos, parser_input.end, parser_input.as_tokens(), parser_input.as_text()) parts = [] current_dot_pos = pos for dot_found in dots_found: - parts.append(get_text_from_tokens(self.parser_input.tokens[current_dot_pos: dot_found])) + parts.append(get_text_from_tokens(parser_input.tokens[current_dot_pos: dot_found])) current_dot_pos = dot_found + 1 # do not forget the trailing part - parts.append(get_text_from_tokens(self.parser_input.tokens[current_dot_pos: self.parser_input.end + 1])) + parts.append(get_text_from_tokens(parser_input.tokens[current_dot_pos: parser_input.end + 1])) - return VariableNode(self.parser_input.start, - self.parser_input.end, - self.parser_input.as_tokens(), + return VariableNode(parser_input.start, + parser_input.end, + parser_input.as_tokens(), parts[0], *parts[1:]) diff --git a/src/parsers/FunctionParser.py b/src/parsers/FunctionParser.py index 6c222cb..2ec340c 100644 --- a/src/parsers/FunctionParser.py +++ b/src/parsers/FunctionParser.py @@ -7,7 +7,8 @@ from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import TokenKind from core.utils import get_n_clones from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode -from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError, Node, BaseExprParser +from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError, Node, BaseExprParser, \ + BaseParserInputParser from parsers.BnfNodeParser import BnfNodeParser from parsers.PythonWithConceptsParser import PythonWithConceptsParser from parsers.RuleParser import RuleParser @@ -26,7 +27,7 @@ class FunctionParserNode(Node): pass -class FunctionParser(BaseExprParser): +class FunctionParser(BaseParserInputParser): """ The parser will be used to parse func(x, y, z) where x, y and z can be source code, concepts or other functions diff --git a/src/parsers/LogicalOperatorParser.py b/src/parsers/LogicalOperatorParser.py index 1feeb81..44a1b98 100644 --- a/src/parsers/LogicalOperatorParser.py +++ b/src/parsers/LogicalOperatorParser.py @@ -7,7 +7,7 @@ from core.sheerka.services.sheerka_service import FailedToCompileError from core.tokenizer import TokenKind, Tokenizer, Keywords from core.utils import get_text_from_tokens from parsers.BaseNodeParser import UnrecognizedTokensNode -from parsers.BaseParser import BaseParser, UnexpectedTokenParsingError, UnexpectedEofParsingError, BaseExprParser +from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError, BaseExprParser, ErrorSink from parsers.PythonWithConceptsParser import PythonWithConceptsParser from parsers.expressions import ParenthesisNode, OrNode, AndNode, NotNode, LeftPartNotFoundError, \ ParenthesisMismatchError, NameExprNode, ExprNode, VariableNode, ComparisonNode @@ -76,6 +76,7 @@ class LogicalOperatorParser(BaseExprParser): self.and_tokens = list(Tokenizer(" and ", yield_eof=False)) self.and_not_tokens = list(Tokenizer(" and not ", yield_eof=False)) self.not_tokens = list(Tokenizer("not ", yield_eof=False)) + self.expr_parser = kwargs.get("expr_parser", None) @staticmethod def clean_parenthesis_nodes(nodes): @@ -101,144 +102,161 @@ class LogicalOperatorParser(BaseExprParser): False, sheerka.new(BuiltinConcepts.IS_EMPTY)) - if not self.reset_parser(context, parser_input): - return self.sheerka.ret( + error_sink = ErrorSink() + if not self.reset_parser_input(parser_input, error_sink): + return context.sheerka.ret( self.name, False, - context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) + context.sheerka.new(BuiltinConcepts.ERROR, body=error_sink.sink)) - self.parser_input.next_token() - tree = self.parse_input() - token = self.parser_input.token + tree = self.parse_input(context, parser_input, error_sink) + + token = parser_input.token if token and token.type != TokenKind.EOF: - self.add_error(UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, [])) + error_sink.add_error(UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, [])) if isinstance(tree, ParenthesisNode): tree = tree.node - value = self.get_return_value_body(context.sheerka, self.parser_input.as_text(), tree, tree) - ret = self.sheerka.ret( - self.name, - not self.has_error, - value) + value = self.get_return_value_body(context.sheerka, + parser_input.as_text(), + tree, + tree, + error_sink.sink) + + ret = context.sheerka.ret(self.name, + not error_sink.has_error, + value) return ret - def parse_input(self): - return self.parse_or() + def parse_input(self, context, parser_input, error_sink): + return self.parse_or(context, parser_input, error_sink) - def parse_or(self): - start = self.parser_input.pos - expr = self.parse_and() - token = self.parser_input.token + def parse_or(self, context, parser_input, error_sink): + start = parser_input.pos + expr = self.parse_and(context, parser_input, error_sink) + token = parser_input.token if token.type != TokenKind.IDENTIFIER or token.value != "or": return expr parts = [expr] while token.type == TokenKind.IDENTIFIER and token.value == "or": - self.parser_input.next_token() - expr = self.parse_and() + parser_input.next_token() + expr = self.parse_and(context, parser_input, error_sink) if expr is None: - self.add_error(UnexpectedEofParsingError("When parsing 'or'")) - end = self.parser_input.pos + error_sink.add_error(UnexpectedEofParsingError("When parsing 'or'")) + end = parser_input.pos self.clean_parenthesis_nodes(parts) - return OrNode(start, end, self.parser_input.tokens[start: end + 1], *parts) + return OrNode(start, end, parser_input.tokens[start: end + 1], *parts) parts.append(expr) - token = self.parser_input.token + token = parser_input.token end = parts[-1].end self.clean_parenthesis_nodes(parts) - return OrNode(start, end, self.parser_input.tokens[start: end + 1], *parts) + return OrNode(start, end, parser_input.tokens[start: end + 1], *parts) - def parse_and(self): - start = self.parser_input.pos - expr = self.parse_not() - token = self.parser_input.token + def parse_and(self, context, parser_input, error_sink): + start = parser_input.pos + expr = self.parse_not(context, parser_input, error_sink) + token = parser_input.token if token.type != TokenKind.IDENTIFIER or token.value != "and": return expr parts = [expr] while token.type == TokenKind.IDENTIFIER and token.value == "and": - self.parser_input.next_token() - expr = self.parse_not() + parser_input.next_token() + expr = self.parse_not(context, parser_input, error_sink) if expr is None: - self.add_error(UnexpectedEofParsingError("When parsing 'and'")) - end = self.parser_input.pos + error_sink.add_error(UnexpectedEofParsingError("When parsing 'and'")) + end = parser_input.pos self.clean_parenthesis_nodes(parts) - return AndNode(start, end, self.parser_input.tokens[start: end + 1], *parts) + return AndNode(start, end, parser_input.tokens[start: end + 1], *parts) parts.append(expr) - token = self.parser_input.token + token = parser_input.token end = parts[-1].end self.clean_parenthesis_nodes(parts) - return AndNode(start, end, self.parser_input.tokens[start: end + 1], *parts) + return AndNode(start, end, parser_input.tokens[start: end + 1], *parts) - def parse_not(self): - token = self.parser_input.token - start = self.parser_input.pos + def parse_not(self, context, parser_input, error_sink): + token = parser_input.token + start = parser_input.pos if token.type == TokenKind.IDENTIFIER and token.value == "not": - self.parser_input.next_token() - parsed = self.parse_not() + parser_input.next_token() + parsed = self.parse_not(context, parser_input, error_sink) node = parsed.node if isinstance(parsed, ParenthesisNode) else parsed return NotNode(start, parsed.end, - self.parser_input.tokens[start: parsed.end + 1], + parser_input.tokens[start: parsed.end + 1], node) else: - return self.parse_names() + return self.parse_names(context, parser_input, error_sink) - def parse_names(self): + def parse_names(self, context, parser_input, error_sink): def stop(): return token.type == TokenKind.EOF or \ paren_count == 0 and token.type == TokenKind.RPAR or \ token.type == TokenKind.IDENTIFIER and token.value in ("and", "or", "not") - token = self.parser_input.token + token = parser_input.token if token.type == TokenKind.EOF: return None if token.type == TokenKind.LPAR: - start = self.parser_input.pos - self.parser_input.next_token() - expr = self.parse_or() - token = self.parser_input.token + start = parser_input.pos + parser_input.next_token() + expr = self.parse_or(context, parser_input, error_sink) + token = parser_input.token if token.type != TokenKind.RPAR: - self.error_sink.append( + error_sink.add_error( UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, [TokenKind.RPAR])) return expr - end = self.parser_input.pos - self.parser_input.next_token() + end = parser_input.pos + parser_input.next_token() return ParenthesisNode(start, end, None, expr) - buffer = [] paren_count = 0 last_paren = None - start = self.parser_input.pos + start = parser_input.pos + end = parser_input.pos + last_is_whitespace = False while not stop(): - buffer.append(token) + last_is_whitespace = token.type == TokenKind.WHITESPACE + end += 1 if token.type == TokenKind.LPAR: last_paren = token paren_count += 1 if token.type == TokenKind.RPAR: paren_count -= 1 - self.parser_input.next_token(False) - token = self.parser_input.token + parser_input.next_token(False) + token = parser_input.token - if len(buffer) == 0: + if last_is_whitespace: + end -= 1 + + if start == end: if token.type != TokenKind.RPAR: - self.error_sink.append(LeftPartNotFoundError()) + error_sink.add_error(LeftPartNotFoundError()) return None if paren_count != 0: - self.error_sink.append(ParenthesisMismatchError(last_paren)) + error_sink.add_error(ParenthesisMismatchError(last_paren)) return None - if buffer[-1].type == TokenKind.WHITESPACE: - buffer.pop() - - end = start + len(buffer) - 1 - return NameExprNode(start, end, buffer) + if self.expr_parser: + new_parsing_input = ParserInput( + None, + tokens=parser_input.tokens, + length=parser_input.length, + start=start, + end=end - 1, + yield_oef=False).reset() + new_parsing_input.next_token() + return self.expr_parser.parse_input(context, new_parsing_input, error_sink) + else: + return NameExprNode(start, end - 1, parser_input.tokens[start:end]) def compile_conjunctions(self, context, conjunctions, who): """ diff --git a/src/parsers/PythonParser.py b/src/parsers/PythonParser.py index 97a9783..22864b0 100644 --- a/src/parsers/PythonParser.py +++ b/src/parsers/PythonParser.py @@ -6,7 +6,7 @@ import core.utils from core.builtin_concepts import BuiltinConcepts from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import TokenKind -from parsers.BaseParser import BaseParser, Node, ParsingError +from parsers.BaseParser import BaseParser, Node, ParsingError, BaseParserInputParser log = logging.getLogger(__name__) @@ -107,7 +107,7 @@ class PythonGetNamesVisitor(ast.NodeVisitor): self.names.add(node.id) -class PythonParser(BaseParser): +class PythonParser(BaseParserInputParser): """ Parse Python scripts """ @@ -116,7 +116,7 @@ class PythonParser(BaseParser): def __init__(self, **kwargs): - BaseParser.__init__(self, PythonParser.NAME, 50) + BaseParserInputParser.__init__(self, PythonParser.NAME, 50) self.source = kwargs.get("source", "") def parse(self, context, parser_input: ParserInput): diff --git a/src/parsers/PythonWithConceptsParser.py b/src/parsers/PythonWithConceptsParser.py index 222b269..724b92d 100644 --- a/src/parsers/PythonWithConceptsParser.py +++ b/src/parsers/PythonWithConceptsParser.py @@ -2,13 +2,13 @@ from core.builtin_concepts import BuiltinConcepts from core.builtin_helpers import CreateObjectIdentifiers from parsers.BaseNodeParser import ConceptNode, RuleNode, VariableNode from parsers.BaseNodeParser import SourceCodeWithConceptNode -from parsers.BaseParser import BaseParser +from parsers.BaseParser import BaseParser, BaseParserInputParser from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser unrecognized_nodes_parser = UnrecognizedNodeParser() -class PythonWithConceptsParser(BaseParser): +class PythonWithConceptsParser(BaseParserInputParser): def __init__(self, **kwargs): super().__init__("PythonWithConcepts", 20) diff --git a/src/parsers/RelationalOperatorParser.py b/src/parsers/RelationalOperatorParser.py index 2b64282..2bcd0e7 100644 --- a/src/parsers/RelationalOperatorParser.py +++ b/src/parsers/RelationalOperatorParser.py @@ -4,12 +4,12 @@ from core.builtin_concepts_ids import BuiltinConcepts from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute from core.tokenizer import TokenKind, Token from core.utils import get_text_from_tokens -from parsers.BaseParser import UnexpectedTokenParsingError, BaseExprParser +from parsers.BaseParser import UnexpectedTokenParsingError, BaseExprParser, BaseParserInputParser from parsers.expressions import ComparisonNode, ParenthesisMismatchError, NameExprNode, ComparisonType, VariableNode, \ ParenthesisNode, LeftPartNotFoundError -class RelationalOperatorParser(BaseExprParser): +class RelationalOperatorParser(BaseParserInputParser): """ Parses xxx (== | > | < | >= | <= | != | in | not in) yyy Nothing else @@ -53,7 +53,7 @@ class RelationalOperatorParser(BaseExprParser): if isinstance(node, ParenthesisNode): node = node.node - value = self.get_return_value_body(context.sheerka, self.parser_input.as_text(), node, node) + value = self.get_return_value_body(context.sheerka, self.parser_input.as_text(), node, node, self.error_sink) ret = self.sheerka.ret( self.name, diff --git a/src/parsers/RuleParser.py b/src/parsers/RuleParser.py index 12bb114..f1e97b4 100644 --- a/src/parsers/RuleParser.py +++ b/src/parsers/RuleParser.py @@ -2,7 +2,7 @@ from core.builtin_concepts import BuiltinConcepts from core.rule import Rule from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import TokenKind -from parsers.BaseParser import BaseParser, ParsingError, UnexpectedTokenParsingError +from parsers.BaseParser import BaseParser, ParsingError, UnexpectedTokenParsingError, BaseParserInputParser class RuleNotFoundError(ParsingError): @@ -14,7 +14,7 @@ class RuleNotFoundError(ParsingError): return f"RuleNotFoundError(id={self.id}, key={self.key}" -class RuleParser(BaseParser): +class RuleParser(BaseParserInputParser): """ Tries to recognize rules """ @@ -22,7 +22,7 @@ class RuleParser(BaseParser): NAME = "Rule" def __init__(self, **kwargs): - BaseParser.__init__(self, RuleParser.NAME, 80) + BaseParserInputParser.__init__(self, RuleParser.NAME, 80) def parse(self, context, parser_input: ParserInput): """ diff --git a/src/parsers/UnrecognizedNodeParser.py b/src/parsers/UnrecognizedNodeParser.py index 99a6969..718cd66 100644 --- a/src/parsers/UnrecognizedNodeParser.py +++ b/src/parsers/UnrecognizedNodeParser.py @@ -4,7 +4,7 @@ import core.utils from core.builtin_concepts import BuiltinConcepts from core.builtin_helpers import only_successful, get_lexer_nodes, update_compiled from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode, SourceCodeWithConceptNode -from parsers.BaseParser import BaseParser, ParsingError +from parsers.BaseParser import BaseParser, ParsingError, BaseParserInputParser from parsers.BnfNodeParser import BnfNodeParser from parsers.SequenceNodeParser import SequenceNodeParser from parsers.SyaNodeParser import SyaNodeParser @@ -22,7 +22,7 @@ class CannotParseError(ParsingError): unrecognized: UnrecognizedTokensNode -class UnrecognizedNodeParser(BaseParser): +class UnrecognizedNodeParser(BaseParserInputParser): """ This parser comes after the other NodeParsers (Atom, Bnf or Sya) It will try to resolve all UnrecognizedTokensNode. diff --git a/tests/parsers/test_BaseParser.py b/tests/parsers/test_BaseParser.py index 3365fee..b4d3848 100644 --- a/tests/parsers/test_BaseParser.py +++ b/tests/parsers/test_BaseParser.py @@ -1,7 +1,7 @@ import pytest from core.tokenizer import Tokenizer -from parsers.BaseParser import BaseParser +from parsers.BaseParser import BaseParser, BaseParserInputParser @pytest.mark.parametrize("tokens, expected", [ @@ -17,4 +17,4 @@ from parsers.BaseParser import BaseParser (list(Tokenizer(" a ", yield_eof=False)), (1, 1)), ]) def test_i_can_get_tokens_boundaries(tokens, expected): - assert BaseParser.get_tokens_boundaries(tokens) == expected + assert BaseParserInputParser.get_tokens_boundaries(tokens) == expected diff --git a/tests/parsers/test_ExpressionParser.py b/tests/parsers/test_ExpressionParser.py index 08db492..e7bd1ef 100644 --- a/tests/parsers/test_ExpressionParser.py +++ b/tests/parsers/test_ExpressionParser.py @@ -3,10 +3,11 @@ import pytest from core.builtin_concepts_ids import BuiltinConcepts from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import Tokenizer +from parsers.BaseParser import ErrorSink from parsers.ExpressionParser import ExpressionParser from parsers.expressions import VariableNode from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka -from tests.parsers.parsers_utils import get_expr_node_from_test_node, VAR, EXPR +from tests.parsers.parsers_utils import get_expr_node_from_test_node, VAR, EXPR, AND class TestExpressionParser(TestUsingMemoryBasedSheerka): @@ -17,9 +18,10 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka): def input_parser_with_source(self, source): sheerka, context, parser = self.init_parser() - parser.reset_parser(context, ParserInput(source)) - parser.parser_input.next_token() - return sheerka, context, parser + error_sink = ErrorSink() + parser_input = ParserInput(source) + parser.reset_parser_input(parser_input, error_sink) + return sheerka, context, parser, parser_input, error_sink def test_i_can_detect_empty_expression(self): sheerka, context, parser = self.init_parser() @@ -29,17 +31,18 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka): assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY) @pytest.mark.parametrize("expression, expected", [ - ("var1 + var 2", EXPR("var1 + var 2")), + ("var1 + var2", EXPR("var1 + var2")), ("variable", VAR("variable")), ("var.attr", VAR("var.attr")), + ("var1 and var2", AND(VAR("var1"), VAR("var2"))) ]) def test_i_can_parse_input(self, expression, expected): - sheerka, context, parser = self.input_parser_with_source(expression) + sheerka, context, parser, parser_input, error_sink = self.input_parser_with_source(expression) expected = get_expr_node_from_test_node(expression, expected) - parsed = parser.parse_input() + parsed = parser.parse_input(context, parser_input, error_sink) - assert not parser.has_error + assert not error_sink.has_error assert parsed == expected @pytest.mark.parametrize("expression", [ @@ -47,10 +50,10 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka): "var . attr1 . attr2", ]) def test_i_can_parse_variable(self, expression): - sheerka, context, parser = self.input_parser_with_source(expression) - parsed = parser.parse_input() + sheerka, context, parser, parser_input, error_sink = self.input_parser_with_source(expression) + parsed = parser.parse_input(context, parser_input, error_sink) - assert not parser.has_error + assert not error_sink.has_error assert isinstance(parsed, VariableNode) assert parsed.name == "var" assert parsed.attributes == ["attr1", "attr2"] @@ -60,9 +63,9 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka): expression = "do not care var1 + var2 do not care either" parser_input = ParserInput("text", list(Tokenizer(expression, yield_eof=False)), start=6, end=10) + error_sink = ErrorSink() + parser.reset_parser_input(parser_input, error_sink) + parsed = parser.parse_input(context, parser_input, error_sink) - parser.reset_parser(context, parser_input) - parsed = parser.parse_input() - - assert not parser.has_error + assert not error_sink.has_error assert parsed == get_expr_node_from_test_node(expression, EXPR("var1 + var2"))