First version of explain. Creating a new parser was a wrong approach. Need to reimplement

2020-04-17 17:24:57 +02:00
parent 6c7c529016
commit d6ea2461a8
43 changed files with 2679 additions and 162 deletions
@@ -46,6 +46,8 @@ class TokenKind(Enum):
    TILDE = "tilde"  # ~
    UNDERSCORE = "underscore"  # _
    DEGREE = "degree"  # °
+    WORD = "word"
+    EQUALSEQUALS = "=="


@dataclass()
@@ -99,12 +101,13 @@ class Tokenizer:

    KEYWORDS = set(x.value for x in Keywords)

-    def __init__(self, text):
+    def __init__(self, text, parse_word=False):
        self.text = text
        self.text_len = len(text)
        self.column = 1
        self.line = 1
        self.i = 0
+        self.parse_word = parse_word

    def __iter__(self):

@@ -175,9 +178,14 @@ class Tokenizer:
                self.i += 1
                self.column += 1
            elif c == "=":
-                yield Token(TokenKind.EQUALS, "=", self.i, self.line, self.column)
-                self.i += 1
-                self.column += 1
+                if self.i + 1 < self.text_len and self.text[self.i + 1] == "=":
+                    yield Token(TokenKind.EQUALSEQUALS, "==", self.i, self.line, self.column)
+                    self.i += 2
+                    self.column += 2
+                else:
+                    yield Token(TokenKind.EQUALS, "=", self.i, self.line, self.column)
+                    self.i += 1
+                    self.column += 1
            elif c == " " or c == "\t":
                whitespace = self.eat_whitespace(self.i)
                yield Token(TokenKind.WHITESPACE, whitespace, self.i, self.line, self.column)
@@ -270,6 +278,11 @@ class Tokenizer:
                yield Token(TokenKind.CONCEPT, (name, id), self.i, self.line, self.column)
                self.i += length + 2
                self.column += length + 2
+            elif self.parse_word and (c.isalpha() or c.isdigit()):
+                word = self.eat_word(self.i)
+                yield Token(TokenKind.WORD, word, self.i, self.line, self.column)
+                self.i += len(word)
+                self.column += len(word)
            elif c.isalpha() or c == "_":
                identifier = self.eat_identifier(self.i)
                token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
@@ -419,3 +432,16 @@ class Tokenizer:
                             1 if lines_count > 0 else start_column + len(result))

        return result, lines_count
+
+    def eat_word(self, start):
+        result = self.text[start]
+        i = start + 1
+        while i < self.text_len:
+            c = self.text[i]
+            if c.isalpha() or c.isdigit():
+                result += c
+                i += 1
+            else:
+                break
+
+        return result