Implemented some enhancement requests

2020-12-14 10:30:10 +01:00
parent 657c7536f7
commit e3c2adb533
46 changed files with 352 additions and 1286 deletions
@@ -35,14 +35,6 @@ class Node:
    pass


-@dataclass()
-class NopNode(Node):
-    pass
-
-    def __repr__(self):
-        return "nop"
-
-
 class NotInitializedNode(Node):
    pass

@@ -51,12 +43,12 @@ class NotInitializedNode(Node):


@dataclass()
-class ErrorNode(Node, ErrorObj):
+class ParsingError(Node, ErrorObj):
    pass


@dataclass()
-class UnexpectedTokenErrorNode(ErrorNode):
+class UnexpectedTokenParsingError(ParsingError):
    message: str
    token: Union[Token, str]
    expected_tokens: list
@@ -65,7 +57,7 @@ class UnexpectedTokenErrorNode(ErrorNode):
        if id(other) == id(self):
            return True

-        if not isinstance(other, UnexpectedTokenErrorNode):
+        if not isinstance(other, UnexpectedTokenParsingError):
            return False

        if self.message != other.message:
@@ -82,8 +74,8 @@ class UnexpectedTokenErrorNode(ErrorNode):


@dataclass()
-class UnexpectedEofNode(ErrorNode):
-    message: str
+class UnexpectedEofParsingError(ParsingError):
+    message: str = None


 class BaseParser:
@@ -214,46 +206,16 @@ class BaseParser:

        return parser_input.value

-    @staticmethod
-    def manage_eof(lst, strip_eof):
-        if strip_eof:
-            if len(lst) and lst[-1].type == TokenKind.EOF:
-                lst.pop()
-            return lst
-
-        if len(lst) == 0 or not lst[-1].type == TokenKind.EOF:
-            lst.append(Token(TokenKind.EOF, "", -1, -1, -1))
-        return lst
-
    # @staticmethod
-    # def get_text_from_tokens(tokens, custom_switcher=None, tracker=None):
-    #     """
-    #     Create the source code, from the list of token
-    #     :param tokens: list of tokens
-    #     :param custom_switcher: to override the behaviour (the return value) of some token
-    #     :param tracker: keep track of the original token value when custom switched
-    #     :return:
-    #     """
-    #     if tokens is None:
-    #         return ""
-    #     res = ""
+    # def manage_eof(lst, strip_eof):
+    #     if strip_eof:
+    #         if len(lst) and lst[-1].type == TokenKind.EOF:
+    #             lst.pop()
+    #         return lst
    #
-    #     if not hasattr(tokens, "__iter__"):
-    #         tokens = [tokens]
-    #
-    #     switcher = {
-    #         #   TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value),
-    #     }
-    #
-    #     if custom_switcher:
-    #         switcher.update(custom_switcher)
-    #
-    #     for token in tokens:
-    #         value = switcher.get(token.type, lambda t: t.str_value)(token)
-    #         res += value
-    #         if tracker is not None and token.type in custom_switcher:
-    #             tracker[value] = token.value
-    #     return res
+    #     if len(lst) == 0 or not lst[-1].type == TokenKind.EOF:
+    #         lst.append(Token(TokenKind.EOF, "", -1, -1, -1))
+    #     return lst

    @staticmethod
    def get_tokens_boundaries(tokens):
@@ -302,160 +264,3 @@ class BaseParser:
    @staticmethod
    def get_name(name):
        return BaseParser.PREFIX + name
-
-
-class BaseTokenizerIterParser(BaseParser):
-
-    def __init__(self, name, priority, parse_word=False, none_on_eof=True):
-        super().__init__(name, priority)
-        self.lexer_iter = None
-        self._current = None
-        self.context: ExecutionContext = None
-        self.text = None
-        self.sheerka = None
-
-        self.parse_word = parse_word
-        self.none_on_eof = none_on_eof
-
-    def reset_parser(self, context, text):
-        self.context = context
-        self.sheerka = context.sheerka
-
-        self.text = text
-        self.lexer_iter = iter(Tokenizer(text, self.parse_word))
-        self._current = None
-
-        self.next_token()
-
-    def add_error(self, error, next_token=True):
-        self.error_sink.append(error)
-        if next_token:
-            self.next_token()
-        return error
-
-    def get_token(self) -> Token:
-        return self._current
-
-    def next_token(self, skip_whitespace=True):
-        try:
-            self._current = next(self.lexer_iter)
-
-            if self.none_on_eof and self._current.type == TokenKind.EOF:
-                self._current = None
-                return False
-
-            if skip_whitespace:
-                while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
-                    self._current = next(self.lexer_iter)
-        except StopIteration:
-            self._current = None
-            return False
-
-        return True
-
-
-class BaseSplitIterParser(BaseParser):
-
-    def __init__(self, name, priority, none_on_eof=False):
-        super().__init__(name, priority)
-        self._current = None
-        self.context: ExecutionContext = None
-        self.text = None
-        self.sheerka = None
-        self.iter_split = None
-        self.split_and_eat_tokens = (" ", "\n", "\t")
-        self.split_and_keep_tokens = ("=", ")", "(", ",")
-        self.split_tokens = self.split_and_eat_tokens + self.split_and_keep_tokens
-
-        self.none_on_eof = none_on_eof  # current token is set to None when EOF is hit
-
-    def parse_word(self, c, index, line, column):
-        end = self.split_tokens
-        escaped = False
-        buffer = ""
-
-        while escaped or c not in end:
-            if not escaped and c == "\\":
-                escaped = True
-            elif not escaped and c in ("'", '"'):
-                end = [c]
-            else:
-                buffer += c
-                escaped = False
-
-            index, column = index + 1, column + 1
-            if index == len(self.text):
-                break
-            c = self.text[index]
-
-        if c == "\n":
-            line += 1
-            column = 0
-
-        if c not in self.split_and_keep_tokens:  # 'not in' instead of 'in' to when c is a quote
-            index, column = index + 1, column + 1
-
-        return buffer, index, line, column
-
-    def split(self):
-        index = 0
-        line = 1
-        column = 1
-
-        while index < len(self.text):
-            c = self.text[index]
-
-            if c == "=":
-                if index + 1 < len(self.text) and self.text[index + 1] == "=":
-                    yield Token(TokenKind.EQUALSEQUALS, "==", index, line, column)
-                    index, column = index + 2, column + 2
-                else:
-                    yield Token(TokenKind.EQUALS, "=", index, line, column)
-                    index, column = index + 1, column + 1
-            elif c == ")":
-                yield Token(TokenKind.RPAR, ")", index, line, column)
-                index, column = index + 1, column + 1
-            elif c == "(":
-                yield Token(TokenKind.LPAR, "(", index, line, column)
-                index, column = index + 1, column + 1
-            elif c == ",":
-                yield Token(TokenKind.COMMA, ",", index, line, column)
-                index, column = index + 1, column + 1
-            else:
-
-                buffer, end_index, end_line, end_column = self.parse_word(c, index, line, column)
-                if buffer:
-                    yield Token(TokenKind.WORD, buffer, index, line, column)
-                index, line, column = end_index, end_line, end_column
-
-        yield Token(TokenKind.EOF, "<eof>", index, line, column)
-
-    def reset_parser(self, context, text):
-        self.context = context
-        self.sheerka = context.sheerka if context else None
-
-        self.text = text
-        self._current = None
-        self.iter_split = iter(self.split())
-
-    def add_error(self, error, next_token=True):
-        self.error_sink.append(error)
-        if next_token:
-            self.next_token()
-        return error
-
-    def get_token(self) -> Token:
-        return self._current
-
-    def next_token(self):
-        try:
-            self._current = next(self.iter_split)
-            if self._current.type == TokenKind.EOF:
-                if self.none_on_eof:
-                    self._current = None
-                return False
-        except StopIteration:
-            self._current = None
-            return False
-
-        return True