First version of explain. Creating a new parser was a wrong approach. Need to reimplement

This commit is contained in:
2020-04-17 17:24:57 +02:00
parent 6c7c529016
commit d6ea2461a8
43 changed files with 2679 additions and 162 deletions
+30 -4
View File
@@ -46,6 +46,8 @@ class TokenKind(Enum):
TILDE = "tilde" # ~
UNDERSCORE = "underscore" # _
DEGREE = "degree" # °
WORD = "word"
EQUALSEQUALS = "=="
@dataclass()
@@ -99,12 +101,13 @@ class Tokenizer:
KEYWORDS = set(x.value for x in Keywords)
def __init__(self, text):
def __init__(self, text, parse_word=False):
self.text = text
self.text_len = len(text)
self.column = 1
self.line = 1
self.i = 0
self.parse_word = parse_word
def __iter__(self):
@@ -175,9 +178,14 @@ class Tokenizer:
self.i += 1
self.column += 1
elif c == "=":
yield Token(TokenKind.EQUALS, "=", self.i, self.line, self.column)
self.i += 1
self.column += 1
if self.i + 1 < self.text_len and self.text[self.i + 1] == "=":
yield Token(TokenKind.EQUALSEQUALS, "==", self.i, self.line, self.column)
self.i += 2
self.column += 2
else:
yield Token(TokenKind.EQUALS, "=", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == " " or c == "\t":
whitespace = self.eat_whitespace(self.i)
yield Token(TokenKind.WHITESPACE, whitespace, self.i, self.line, self.column)
@@ -270,6 +278,11 @@ class Tokenizer:
yield Token(TokenKind.CONCEPT, (name, id), self.i, self.line, self.column)
self.i += length + 2
self.column += length + 2
elif self.parse_word and (c.isalpha() or c.isdigit()):
word = self.eat_word(self.i)
yield Token(TokenKind.WORD, word, self.i, self.line, self.column)
self.i += len(word)
self.column += len(word)
elif c.isalpha() or c == "_":
identifier = self.eat_identifier(self.i)
token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
@@ -419,3 +432,16 @@ class Tokenizer:
1 if lines_count > 0 else start_column + len(result))
return result, lines_count
def eat_word(self, start):
result = self.text[start]
i = start + 1
while i < self.text_len:
c = self.text[i]
if c.isalpha() or c.isdigit():
result += c
i += 1
else:
break
return result