First version of explain. Creating a new parser was a wrong approach. Need to reimplement

2020-04-17 17:24:57 +02:00
parent 6c7c529016
commit d6ea2461a8
43 changed files with 2679 additions and 162 deletions
@@ -1,11 +1,12 @@
+import logging
 from dataclasses import dataclass

+import core.utils
 from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
 from core.concept import Concept
-from core.tokenizer import TokenKind, Keywords, Token, Tokenizer
+from core.sheerka.ExecutionContext import ExecutionContext
 from core.sheerka_logger import get_logger
-import core.utils
-import logging
+from core.tokenizer import TokenKind, Keywords, Token, Tokenizer


@dataclass()
@@ -65,6 +66,11 @@ class UnexpectedTokenErrorNode(ErrorNode):
        return hash((self.message, self.token, self.expected_tokens))


+@dataclass()
+class UnexpectedEof(ErrorNode):
+    message: str
+
+
 class BaseParser:
    PREFIX = "parsers."

@@ -203,3 +209,160 @@ class BaseParser:
            value = switcher.get(token.type, lambda t: t.value)(token)
            res += value
        return res
+
+
+class BaseTokenizerIterParser(BaseParser):
+
+    def __init__(self, name, priority, parse_word=False, none_on_eof=True):
+        super().__init__(name, priority)
+        self.lexer_iter = None
+        self._current = None
+        self.context: ExecutionContext = None
+        self.text = None
+        self.sheerka = None
+
+        self.parse_word = parse_word
+        self.none_on_eof = none_on_eof
+
+    def reset_parser(self, context, text):
+        self.context = context
+        self.sheerka = context.sheerka
+
+        self.text = text
+        self.lexer_iter = iter(Tokenizer(text, self.parse_word))
+        self._current = None
+
+        self.next_token()
+
+    def add_error(self, error, next_token=True):
+        self.error_sink.append(error)
+        if next_token:
+            self.next_token()
+        return error
+
+    def get_token(self) -> Token:
+        return self._current
+
+    def next_token(self, skip_whitespace=True):
+        try:
+            self._current = next(self.lexer_iter)
+
+            if self.none_on_eof and self._current.type == TokenKind.EOF:
+                self._current = None
+                return False
+
+            if skip_whitespace:
+                while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
+                    self._current = next(self.lexer_iter)
+        except StopIteration:
+            self._current = None
+            return False
+
+        return True
+
+
+class BaseSplitIterParser(BaseParser):
+
+    def __init__(self, name, priority, none_on_eof=False):
+        super().__init__(name, priority)
+        self._current = None
+        self.context: ExecutionContext = None
+        self.text = None
+        self.sheerka = None
+        self.iter_split = None
+        self.split_and_eat_tokens = (" ", "\n", "\t")
+        self.split_and_keep_tokens = ("=", ")", "(", ",")
+        self.split_tokens = self.split_and_eat_tokens + self.split_and_keep_tokens
+
+        self.none_on_eof = none_on_eof  # current token is set to None when EOF is hit
+
+    def parse_word(self, c, index, line, column):
+        end = self.split_tokens
+        escaped = False
+        buffer = ""
+
+        while escaped or c not in end:
+            if not escaped and c == "\\":
+                escaped = True
+            elif not escaped and c in ("'", '"'):
+                end = [c]
+            else:
+                buffer += c
+                escaped = False
+
+            index, column = index + 1, column + 1
+            if index == len(self.text):
+                break
+            c = self.text[index]
+
+        if c == "\n":
+            line += 1
+            column = 0
+
+        if c not in self.split_and_keep_tokens:  # 'not in' instead of 'in' to when c is a quote
+            index, column = index + 1, column + 1
+
+        return buffer, index, line, column
+
+    def split(self):
+        index = 0
+        line = 1
+        column = 1
+
+        while index < len(self.text):
+            c = self.text[index]
+
+            if c == "=":
+                if index + 1 < len(self.text) and self.text[index + 1] == "=":
+                    yield Token(TokenKind.EQUALSEQUALS, "==", index, line, column)
+                    index, column = index + 2, column + 2
+                else:
+                    yield Token(TokenKind.EQUALS, "=", index, line, column)
+                    index, column = index + 1, column + 1
+            elif c == ")":
+                yield Token(TokenKind.RPAR, ")", index, line, column)
+                index, column = index + 1, column + 1
+            elif c == "(":
+                yield Token(TokenKind.LPAR, "(", index, line, column)
+                index, column = index + 1, column + 1
+            elif c == ",":
+                yield Token(TokenKind.COMMA, ",", index, line, column)
+                index, column = index + 1, column + 1
+            else:
+
+                buffer, end_index, end_line, end_column = self.parse_word(c, index, line, column)
+                if buffer:
+                    yield Token(TokenKind.WORD, buffer, index, line, column)
+                index, line, column = end_index, end_line, end_column
+
+        yield Token(TokenKind.EOF, "<eof>", index, line, column)
+
+    def reset_parser(self, context, text):
+        self.context = context
+        self.sheerka = context.sheerka if context else None
+
+        self.text = text
+        self._current = None
+        self.iter_split = iter(self.split())
+
+    def add_error(self, error, next_token=True):
+        self.error_sink.append(error)
+        if next_token:
+            self.next_token()
+        return error
+
+    def get_token(self) -> Token:
+        return self._current
+
+    def next_token(self):
+        try:
+            self._current = next(self.iter_split)
+            if self._current.type == TokenKind.EOF:
+                if self.none_on_eof:
+                    self._current = None
+                return False
+        except StopIteration:
+            self._current = None
+            return False
+
+        return True