Files
Sheerka-Old/src/parsers/BaseParser.py
T
kodjo 54e5681c5a Fixed #109 : Mix python and concept. List comprehension
Fixed #110 : SheerkaDebugManager: add list_debug_settings
Fixed #111 : SheerkaDebugManager: Implement ListDebugLogger
Fixed #112 : SyaNodeParser: rewrite this parser
Fixed #113 : Sheerka.: Add enable_parser_caching to disable parsers caching
Fixed #114 : SyaNodeParser : Implement fast cache to resolve unrecognized tokens requests
Fixed #115 : BnfNodeParser : Implement fast cache to resolve unrecognized tokens requests
Fixed #116 : SequenceNodeParser : Implement fast cache to resolve unrecognized tokens requests
Fixed #117 : ResolveMultiplePluralAmbiguityEvaluator: Resolve Multiple plural ambiguity
2021-09-06 11:51:50 +02:00

256 lines
7.2 KiB
Python

from dataclasses import dataclass
from typing import Union
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
from core.concept import Concept
from core.global_symbols import ErrorObj
from core.sheerka.ExecutionContext import ExecutionContext
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import TokenKind, Token, LexerError
class ErrorSink:
def __init__(self):
self.sink = []
def __repr__(self):
return f"Errors({self.sink})"
def add_error(self, error):
self.sink.append(error)
def clear(self):
self.sink.clear()
@property
def has_error(self):
return len(self.sink) > 0
@dataclass()
class Node:
pass
@dataclass()
class ParsingError(Node, ErrorObj):
pass
@dataclass()
class UnexpectedTokenParsingError(ParsingError):
message: str
token: Union[Token, str]
expected_tokens: list
def __eq__(self, other):
if id(other) == id(self):
return True
if not isinstance(other, UnexpectedTokenParsingError):
return False
if self.message != other.message:
return False
to_compare = self.token.repr_value if isinstance(other.token, str) else self.token
if to_compare != other.token:
return False
return self.expected_tokens == other.expected_tokens
def __hash__(self):
return hash((self.message, self.token, self.expected_tokens))
@dataclass()
class UnexpectedEofParsingError(ParsingError):
message: str = None
def __repr__(self):
return f"UnexpectedEofParsingError({self.message})"
class BaseParser:
PREFIX = "parsers."
def __init__(self, name, priority: int, enabled=True, yield_eof=False, hints=None):
"""
:param name:
:param priority:
:param enabled:
:param yield_eof:
:param hints: Dictionary context_hint: priority. When not null, priority is taken from it
"""
# self.log = get_logger("parsers." + self.__class__.__name__)
# self.init_log = get_logger("init." + self.PREFIX + self.__class__.__name__)
# self.verbose_log = get_logger("verbose." + self.PREFIX + self.__class__.__name__)
self.name = BaseParser.get_name(name)
self.short_name = name
self.priority = priority
self.enabled = enabled
self.yield_eof = yield_eof
self.hints = hints
def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
return self.name == other.name
def __hash__(self):
return hash(self.name)
def __repr__(self):
return self.name
def log_result(self, context, source, ret):
pass
# if not self.log.isEnabledFor(logging.DEBUG):
# return
#
# if ret.status:
# value = context.return_value_to_str(ret)
# context.log(f"Recognized '{source}' as {value}", self.name)
# else:
# context.log(f"Failed to recognize '{source}'", self.name)
def log_multiple_results(self, context, source, list_of_ret):
pass
# if not self.log.isEnabledFor(logging.DEBUG):
# return
#
# context.log(f"Recognized '{source}' as multiple concepts", self.name)
# for r in list_of_ret:
# value = context.return_value_to_str(r)
# context.log(f" Recognized '{value}'", self.name)
def get_return_value_body(self, sheerka, source, parsed, try_parse, errors):
"""
All parsers must return their result in a standard way
:param sheerka:
:param source:
:param parsed:
:param try_parse:
:param errors:
:return:
"""
if len(errors) == 1 and isinstance(errors[0], Concept):
return errors[0]
if len(errors):
if parsed is None:
return sheerka.new(BuiltinConcepts.NOT_FOR_ME,
body=source,
reason=errors)
else:
return sheerka.new(BuiltinConcepts.ERROR,
body=errors)
return sheerka.new(BuiltinConcepts.PARSER_RESULT,
parser=self,
source=source,
body=parsed,
try_parsed=try_parse)
@staticmethod
def get_name(name):
return BaseParser.PREFIX + name
class BaseParserInputParser(BaseParser):
"""
Base parser for stateful parser where context, parser input, and error sink are part of the class
"""
def __init__(self, name, priority: int, enabled=True, yield_eof=False):
super(BaseParserInputParser, self).__init__(name, priority, enabled, yield_eof)
self.error_sink = []
self.context: ExecutionContext = None
self.sheerka = None
self.parser_input: ParserInput = None
self.yield_eof = yield_eof
def reset_parser(self, context, parser_input: ParserInput):
self.context = context
self.sheerka = context.sheerka
self.parser_input = parser_input
self.error_sink.clear()
try:
self.parser_input.reset(self.yield_eof)
except LexerError as e:
self.add_error(e, False)
return False
return True
def parse(self, context, parser_input):
pass
def add_error(self, error, next_token=True):
self.error_sink.append(error)
if next_token:
self.parser_input.next_token()
return error
@property
def has_error(self):
return len(self.error_sink) > 0
@staticmethod
def get_input_as_lexer_nodes(parser_input, expected_parser=None):
"""
Extract the lexer node from the parser_input
:param parser_input:
:param expected_parser: returns the nodes if the parent parser is the expected one
:return:
"""
if not isinstance(parser_input, ParserResultConcept):
return None
if expected_parser and parser_input.parser != expected_parser:
return None
from parsers.BaseNodeParser import LexerNode
if isinstance(parser_input.value, list):
if len(parser_input.value) == 0:
return None
for node in parser_input.value:
if not isinstance(node, LexerNode):
return None
return parser_input.value
else:
if not isinstance(parser_input.value, LexerNode):
return None
return [parser_input.value]
@staticmethod
def get_tokens_boundaries(tokens):
"""
Returns the first and the last valid index of the tokens
a valid index is a token that is not a whitespace nor and EOF
:param tokens:
:return:
"""
if tokens is None:
return None
if len(tokens) == 0:
return 0, 0
if tokens[0].type == TokenKind.EOF:
return 0, 0
start = 1 if tokens[0].type == TokenKind.WHITESPACE else 0
end = len(tokens) - 1
while tokens[end].type in (TokenKind.WHITESPACE, TokenKind.EOF):
end -= 1
return start, end