First version of explain. Creating a new parser was a wrong approach. Need to reimplement
This commit is contained in:
+30
-4
@@ -46,6 +46,8 @@ class TokenKind(Enum):
|
||||
TILDE = "tilde" # ~
|
||||
UNDERSCORE = "underscore" # _
|
||||
DEGREE = "degree" # °
|
||||
WORD = "word"
|
||||
EQUALSEQUALS = "=="
|
||||
|
||||
|
||||
@dataclass()
|
||||
@@ -99,12 +101,13 @@ class Tokenizer:
|
||||
|
||||
KEYWORDS = set(x.value for x in Keywords)
|
||||
|
||||
def __init__(self, text):
|
||||
def __init__(self, text, parse_word=False):
|
||||
self.text = text
|
||||
self.text_len = len(text)
|
||||
self.column = 1
|
||||
self.line = 1
|
||||
self.i = 0
|
||||
self.parse_word = parse_word
|
||||
|
||||
def __iter__(self):
|
||||
|
||||
@@ -175,9 +178,14 @@ class Tokenizer:
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == "=":
|
||||
yield Token(TokenKind.EQUALS, "=", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
if self.i + 1 < self.text_len and self.text[self.i + 1] == "=":
|
||||
yield Token(TokenKind.EQUALSEQUALS, "==", self.i, self.line, self.column)
|
||||
self.i += 2
|
||||
self.column += 2
|
||||
else:
|
||||
yield Token(TokenKind.EQUALS, "=", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == " " or c == "\t":
|
||||
whitespace = self.eat_whitespace(self.i)
|
||||
yield Token(TokenKind.WHITESPACE, whitespace, self.i, self.line, self.column)
|
||||
@@ -270,6 +278,11 @@ class Tokenizer:
|
||||
yield Token(TokenKind.CONCEPT, (name, id), self.i, self.line, self.column)
|
||||
self.i += length + 2
|
||||
self.column += length + 2
|
||||
elif self.parse_word and (c.isalpha() or c.isdigit()):
|
||||
word = self.eat_word(self.i)
|
||||
yield Token(TokenKind.WORD, word, self.i, self.line, self.column)
|
||||
self.i += len(word)
|
||||
self.column += len(word)
|
||||
elif c.isalpha() or c == "_":
|
||||
identifier = self.eat_identifier(self.i)
|
||||
token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
|
||||
@@ -419,3 +432,16 @@ class Tokenizer:
|
||||
1 if lines_count > 0 else start_column + len(result))
|
||||
|
||||
return result, lines_count
|
||||
|
||||
def eat_word(self, start):
|
||||
result = self.text[start]
|
||||
i = start + 1
|
||||
while i < self.text_len:
|
||||
c = self.text[i]
|
||||
if c.isalpha() or c.isdigit():
|
||||
result += c
|
||||
i += 1
|
||||
else:
|
||||
break
|
||||
|
||||
return result
|
||||
|
||||
Reference in New Issue
Block a user