from dataclasses import dataclass from typing import Union from core.builtin_concepts import BuiltinConcepts, ParserResultConcept from core.concept import Concept from core.global_symbols import ErrorObj from core.sheerka.ExecutionContext import ExecutionContext from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import TokenKind, Token, LexerError class ErrorSink: def __init__(self): self.sink = [] def __repr__(self): return f"Errors({self.sink})" def add_error(self, error): self.sink.append(error) def clear(self): self.sink.clear() @property def has_error(self): return len(self.sink) > 0 @dataclass() class Node: pass @dataclass() class ParsingError(Node, ErrorObj): pass @dataclass() class UnexpectedTokenParsingError(ParsingError): message: str token: Union[Token, str] expected_tokens: list def __eq__(self, other): if id(other) == id(self): return True if not isinstance(other, UnexpectedTokenParsingError): return False if self.message != other.message: return False to_compare = self.token.repr_value if isinstance(other.token, str) else self.token if to_compare != other.token: return False return self.expected_tokens == other.expected_tokens def __hash__(self): return hash((self.message, self.token, self.expected_tokens)) @dataclass() class UnexpectedEofParsingError(ParsingError): message: str = None def __repr__(self): return f"UnexpectedEofParsingError({self.message})" class BaseParser: PREFIX = "parsers." def __init__(self, name, priority: int, enabled=True, yield_eof=False, hints=None): """ :param name: :param priority: :param enabled: :param yield_eof: :param hints: Dictionary context_hint: priority. When not null, priority is taken from it """ # self.log = get_logger("parsers." + self.__class__.__name__) # self.init_log = get_logger("init." + self.PREFIX + self.__class__.__name__) # self.verbose_log = get_logger("verbose." + self.PREFIX + self.__class__.__name__) self.name = BaseParser.get_name(name) self.short_name = name self.priority = priority self.enabled = enabled self.yield_eof = yield_eof self.hints = hints def __eq__(self, other): if not isinstance(other, self.__class__): return False return self.name == other.name def __hash__(self): return hash(self.name) def __repr__(self): return self.name def log_result(self, context, source, ret): pass # if not self.log.isEnabledFor(logging.DEBUG): # return # # if ret.status: # value = context.return_value_to_str(ret) # context.log(f"Recognized '{source}' as {value}", self.name) # else: # context.log(f"Failed to recognize '{source}'", self.name) def log_multiple_results(self, context, source, list_of_ret): pass # if not self.log.isEnabledFor(logging.DEBUG): # return # # context.log(f"Recognized '{source}' as multiple concepts", self.name) # for r in list_of_ret: # value = context.return_value_to_str(r) # context.log(f" Recognized '{value}'", self.name) def get_return_value_body(self, sheerka, source, parsed, try_parse, errors): """ All parsers must return their result in a standard way :param sheerka: :param source: :param parsed: :param try_parse: :param errors: :return: """ if len(errors) == 1 and isinstance(errors[0], Concept): return errors[0] if len(errors): if parsed is None: return sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=source, reason=errors) else: return sheerka.new(BuiltinConcepts.ERROR, body=errors) return sheerka.new(BuiltinConcepts.PARSER_RESULT, parser=self, source=source, body=parsed, try_parsed=try_parse) @staticmethod def get_name(name): return BaseParser.PREFIX + name class BaseParserInputParser(BaseParser): """ Base parser for stateful parser where context, parser input, and error sink are part of the class """ def __init__(self, name, priority: int, enabled=True, yield_eof=False): super(BaseParserInputParser, self).__init__(name, priority, enabled, yield_eof) self.error_sink = [] self.context: ExecutionContext = None self.sheerka = None self.parser_input: ParserInput = None self.yield_eof = yield_eof def reset_parser(self, context, parser_input: ParserInput): self.context = context self.sheerka = context.sheerka self.parser_input = parser_input self.error_sink.clear() try: self.parser_input.reset(self.yield_eof) except LexerError as e: self.add_error(e, False) return False return True def parse(self, context, parser_input): pass def add_error(self, error, next_token=True): self.error_sink.append(error) if next_token: self.parser_input.next_token() return error @property def has_error(self): return len(self.error_sink) > 0 @staticmethod def get_input_as_lexer_nodes(parser_input, expected_parser=None): """ Extract the lexer node from the parser_input :param parser_input: :param expected_parser: returns the nodes if the parent parser is the expected one :return: """ if not isinstance(parser_input, ParserResultConcept): return None if expected_parser and parser_input.parser != expected_parser: return None from parsers.BaseNodeParser import LexerNode if isinstance(parser_input.value, list): if len(parser_input.value) == 0: return None for node in parser_input.value: if not isinstance(node, LexerNode): return None return parser_input.value else: if not isinstance(parser_input.value, LexerNode): return None return [parser_input.value] @staticmethod def get_tokens_boundaries(tokens): """ Returns the first and the last valid index of the tokens a valid index is a token that is not a whitespace nor and EOF :param tokens: :return: """ if tokens is None: return None if len(tokens) == 0: return 0, 0 if tokens[0].type == TokenKind.EOF: return 0, 0 start = 1 if tokens[0].type == TokenKind.WHITESPACE else 0 end = len(tokens) - 1 while tokens[end].type in (TokenKind.WHITESPACE, TokenKind.EOF): end -= 1 return start, end