Implemented some enhancement requests
This commit is contained in:
+13
-208
@@ -35,14 +35,6 @@ class Node:
|
||||
pass
|
||||
|
||||
|
||||
@dataclass()
|
||||
class NopNode(Node):
|
||||
pass
|
||||
|
||||
def __repr__(self):
|
||||
return "nop"
|
||||
|
||||
|
||||
class NotInitializedNode(Node):
|
||||
pass
|
||||
|
||||
@@ -51,12 +43,12 @@ class NotInitializedNode(Node):
|
||||
|
||||
|
||||
@dataclass()
|
||||
class ErrorNode(Node, ErrorObj):
|
||||
class ParsingError(Node, ErrorObj):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass()
|
||||
class UnexpectedTokenErrorNode(ErrorNode):
|
||||
class UnexpectedTokenParsingError(ParsingError):
|
||||
message: str
|
||||
token: Union[Token, str]
|
||||
expected_tokens: list
|
||||
@@ -65,7 +57,7 @@ class UnexpectedTokenErrorNode(ErrorNode):
|
||||
if id(other) == id(self):
|
||||
return True
|
||||
|
||||
if not isinstance(other, UnexpectedTokenErrorNode):
|
||||
if not isinstance(other, UnexpectedTokenParsingError):
|
||||
return False
|
||||
|
||||
if self.message != other.message:
|
||||
@@ -82,8 +74,8 @@ class UnexpectedTokenErrorNode(ErrorNode):
|
||||
|
||||
|
||||
@dataclass()
|
||||
class UnexpectedEofNode(ErrorNode):
|
||||
message: str
|
||||
class UnexpectedEofParsingError(ParsingError):
|
||||
message: str = None
|
||||
|
||||
|
||||
class BaseParser:
|
||||
@@ -214,46 +206,16 @@ class BaseParser:
|
||||
|
||||
return parser_input.value
|
||||
|
||||
@staticmethod
|
||||
def manage_eof(lst, strip_eof):
|
||||
if strip_eof:
|
||||
if len(lst) and lst[-1].type == TokenKind.EOF:
|
||||
lst.pop()
|
||||
return lst
|
||||
|
||||
if len(lst) == 0 or not lst[-1].type == TokenKind.EOF:
|
||||
lst.append(Token(TokenKind.EOF, "", -1, -1, -1))
|
||||
return lst
|
||||
|
||||
# @staticmethod
|
||||
# def get_text_from_tokens(tokens, custom_switcher=None, tracker=None):
|
||||
# """
|
||||
# Create the source code, from the list of token
|
||||
# :param tokens: list of tokens
|
||||
# :param custom_switcher: to override the behaviour (the return value) of some token
|
||||
# :param tracker: keep track of the original token value when custom switched
|
||||
# :return:
|
||||
# """
|
||||
# if tokens is None:
|
||||
# return ""
|
||||
# res = ""
|
||||
# def manage_eof(lst, strip_eof):
|
||||
# if strip_eof:
|
||||
# if len(lst) and lst[-1].type == TokenKind.EOF:
|
||||
# lst.pop()
|
||||
# return lst
|
||||
#
|
||||
# if not hasattr(tokens, "__iter__"):
|
||||
# tokens = [tokens]
|
||||
#
|
||||
# switcher = {
|
||||
# # TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value),
|
||||
# }
|
||||
#
|
||||
# if custom_switcher:
|
||||
# switcher.update(custom_switcher)
|
||||
#
|
||||
# for token in tokens:
|
||||
# value = switcher.get(token.type, lambda t: t.str_value)(token)
|
||||
# res += value
|
||||
# if tracker is not None and token.type in custom_switcher:
|
||||
# tracker[value] = token.value
|
||||
# return res
|
||||
# if len(lst) == 0 or not lst[-1].type == TokenKind.EOF:
|
||||
# lst.append(Token(TokenKind.EOF, "", -1, -1, -1))
|
||||
# return lst
|
||||
|
||||
@staticmethod
|
||||
def get_tokens_boundaries(tokens):
|
||||
@@ -302,160 +264,3 @@ class BaseParser:
|
||||
@staticmethod
|
||||
def get_name(name):
|
||||
return BaseParser.PREFIX + name
|
||||
|
||||
|
||||
class BaseTokenizerIterParser(BaseParser):
|
||||
|
||||
def __init__(self, name, priority, parse_word=False, none_on_eof=True):
|
||||
super().__init__(name, priority)
|
||||
self.lexer_iter = None
|
||||
self._current = None
|
||||
self.context: ExecutionContext = None
|
||||
self.text = None
|
||||
self.sheerka = None
|
||||
|
||||
self.parse_word = parse_word
|
||||
self.none_on_eof = none_on_eof
|
||||
|
||||
def reset_parser(self, context, text):
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
|
||||
self.text = text
|
||||
self.lexer_iter = iter(Tokenizer(text, self.parse_word))
|
||||
self._current = None
|
||||
|
||||
self.next_token()
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.next_token()
|
||||
return error
|
||||
|
||||
def get_token(self) -> Token:
|
||||
return self._current
|
||||
|
||||
def next_token(self, skip_whitespace=True):
|
||||
try:
|
||||
self._current = next(self.lexer_iter)
|
||||
|
||||
if self.none_on_eof and self._current.type == TokenKind.EOF:
|
||||
self._current = None
|
||||
return False
|
||||
|
||||
if skip_whitespace:
|
||||
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
|
||||
self._current = next(self.lexer_iter)
|
||||
except StopIteration:
|
||||
self._current = None
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
class BaseSplitIterParser(BaseParser):
|
||||
|
||||
def __init__(self, name, priority, none_on_eof=False):
|
||||
super().__init__(name, priority)
|
||||
self._current = None
|
||||
self.context: ExecutionContext = None
|
||||
self.text = None
|
||||
self.sheerka = None
|
||||
self.iter_split = None
|
||||
self.split_and_eat_tokens = (" ", "\n", "\t")
|
||||
self.split_and_keep_tokens = ("=", ")", "(", ",")
|
||||
self.split_tokens = self.split_and_eat_tokens + self.split_and_keep_tokens
|
||||
|
||||
self.none_on_eof = none_on_eof # current token is set to None when EOF is hit
|
||||
|
||||
def parse_word(self, c, index, line, column):
|
||||
end = self.split_tokens
|
||||
escaped = False
|
||||
buffer = ""
|
||||
|
||||
while escaped or c not in end:
|
||||
if not escaped and c == "\\":
|
||||
escaped = True
|
||||
elif not escaped and c in ("'", '"'):
|
||||
end = [c]
|
||||
else:
|
||||
buffer += c
|
||||
escaped = False
|
||||
|
||||
index, column = index + 1, column + 1
|
||||
if index == len(self.text):
|
||||
break
|
||||
c = self.text[index]
|
||||
|
||||
if c == "\n":
|
||||
line += 1
|
||||
column = 0
|
||||
|
||||
if c not in self.split_and_keep_tokens: # 'not in' instead of 'in' to when c is a quote
|
||||
index, column = index + 1, column + 1
|
||||
|
||||
return buffer, index, line, column
|
||||
|
||||
def split(self):
|
||||
index = 0
|
||||
line = 1
|
||||
column = 1
|
||||
|
||||
while index < len(self.text):
|
||||
c = self.text[index]
|
||||
|
||||
if c == "=":
|
||||
if index + 1 < len(self.text) and self.text[index + 1] == "=":
|
||||
yield Token(TokenKind.EQUALSEQUALS, "==", index, line, column)
|
||||
index, column = index + 2, column + 2
|
||||
else:
|
||||
yield Token(TokenKind.EQUALS, "=", index, line, column)
|
||||
index, column = index + 1, column + 1
|
||||
elif c == ")":
|
||||
yield Token(TokenKind.RPAR, ")", index, line, column)
|
||||
index, column = index + 1, column + 1
|
||||
elif c == "(":
|
||||
yield Token(TokenKind.LPAR, "(", index, line, column)
|
||||
index, column = index + 1, column + 1
|
||||
elif c == ",":
|
||||
yield Token(TokenKind.COMMA, ",", index, line, column)
|
||||
index, column = index + 1, column + 1
|
||||
else:
|
||||
|
||||
buffer, end_index, end_line, end_column = self.parse_word(c, index, line, column)
|
||||
if buffer:
|
||||
yield Token(TokenKind.WORD, buffer, index, line, column)
|
||||
index, line, column = end_index, end_line, end_column
|
||||
|
||||
yield Token(TokenKind.EOF, "<eof>", index, line, column)
|
||||
|
||||
def reset_parser(self, context, text):
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka if context else None
|
||||
|
||||
self.text = text
|
||||
self._current = None
|
||||
self.iter_split = iter(self.split())
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.next_token()
|
||||
return error
|
||||
|
||||
def get_token(self) -> Token:
|
||||
return self._current
|
||||
|
||||
def next_token(self):
|
||||
try:
|
||||
self._current = next(self.iter_split)
|
||||
if self._current.type == TokenKind.EOF:
|
||||
if self.none_on_eof:
|
||||
self._current = None
|
||||
return False
|
||||
except StopIteration:
|
||||
self._current = None
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
Reference in New Issue
Block a user