Added first version of DebugManager. Implemented draft of the rule engine
This commit is contained in:
+98
-26
@@ -9,13 +9,14 @@ class TokenKind(Enum):
|
||||
KEYWORD = "keyword"
|
||||
IDENTIFIER = "identifier"
|
||||
CONCEPT = "concept"
|
||||
RULE = "rule"
|
||||
STRING = "string"
|
||||
NUMBER = "number"
|
||||
TRUE = "true"
|
||||
FALSE = "false"
|
||||
LPAR = "lpar"
|
||||
RPAR = "rpar"
|
||||
LBRACKET = "lbrace"
|
||||
LBRACKET = "lbracket"
|
||||
RBRACKET = "rbracket"
|
||||
LBRACE = "lbrace"
|
||||
RBRACE = "rbrace"
|
||||
@@ -49,7 +50,7 @@ class TokenKind(Enum):
|
||||
WORD = "word"
|
||||
EQUALSEQUALS = "=="
|
||||
VAR_DEF = "__var__"
|
||||
REGEX = "r'xxx' or r\"xxx\" or r:xxx: or r|xxx| or r/xxx/"
|
||||
REGEX = "r'xxx' or r\"xxx\" or r|xxx| or r/xxx/"
|
||||
|
||||
|
||||
@dataclass()
|
||||
@@ -65,18 +66,7 @@ class Token:
|
||||
_repr_value: str = field(default=None, repr=False, compare=False, hash=None)
|
||||
|
||||
def __repr__(self):
|
||||
if self.type == TokenKind.IDENTIFIER:
|
||||
value = str(self.value)
|
||||
elif self.type == TokenKind.WHITESPACE:
|
||||
value = "<ws!>" if self.value == "" else "<tab>" if self.value[0] == "\t" else "<ws>"
|
||||
elif self.type == TokenKind.NEWLINE:
|
||||
value = "<nl>"
|
||||
elif self.type == TokenKind.EOF:
|
||||
value = "<EOF>"
|
||||
else:
|
||||
value = self.value
|
||||
|
||||
return f"Token({value})"
|
||||
return f"Token({self.repr_value})"
|
||||
|
||||
@property
|
||||
def strip_quote(self):
|
||||
@@ -102,9 +92,15 @@ class Token:
|
||||
if self.type == TokenKind.EOF:
|
||||
self._repr_value = "<EOF>"
|
||||
elif self.type == TokenKind.WHITESPACE:
|
||||
self._repr_value = "<ws>"
|
||||
self._repr_value = "<ws!>" if self.value == "" else "<tab>" if self.value[0] == "\t" else "<ws>"
|
||||
elif self.type == TokenKind.NEWLINE:
|
||||
self._repr_value = "<nl>"
|
||||
elif self.type == TokenKind.CONCEPT:
|
||||
from core.utils import str_concept
|
||||
self._repr_value = str_concept(self.value)
|
||||
elif self.type == TokenKind.RULE:
|
||||
from core.utils import str_concept
|
||||
self._repr_value = str_concept(self.value, prefix="r:")
|
||||
else:
|
||||
self._repr_value = self.str_value
|
||||
return self._repr_value
|
||||
@@ -121,6 +117,9 @@ class Token:
|
||||
elif self.type == TokenKind.CONCEPT:
|
||||
from core.utils import str_concept
|
||||
return str_concept(self.value)
|
||||
elif self.type == TokenKind.RULE:
|
||||
from core.utils import str_concept
|
||||
return str_concept(self.value, prefix="r:")
|
||||
else:
|
||||
return str(self.value)
|
||||
|
||||
@@ -192,18 +191,18 @@ class Tokenizer:
|
||||
self.column += 1
|
||||
elif c == "_":
|
||||
from core.concept import VARIABLE_PREFIX
|
||||
if self.i + 1 < self.text_len and self.text[self.i + 1].isalpha():
|
||||
identifier = self.eat_identifier(self.i)
|
||||
yield Token(TokenKind.IDENTIFIER, identifier, self.i, self.line, self.column)
|
||||
self.i += len(identifier)
|
||||
self.column += len(identifier)
|
||||
elif self.i + 7 < self.text_len and \
|
||||
if self.i + 7 < self.text_len and \
|
||||
self.text[self.i: self.i + 7] == VARIABLE_PREFIX and \
|
||||
self.text[self.i + 7].isdigit():
|
||||
number = self.eat_number(self.i + 7)
|
||||
yield Token(TokenKind.VAR_DEF, VARIABLE_PREFIX + number, self.i, self.line, self.column)
|
||||
self.i += 7 + len(number)
|
||||
self.column += 7 + len(number)
|
||||
elif self.i + 1 < self.text_len and (self.text[self.i + 1].isalpha() or self.text[self.i + 1] == "_"):
|
||||
identifier = self.eat_identifier(self.i)
|
||||
yield Token(TokenKind.IDENTIFIER, identifier, self.i, self.line, self.column)
|
||||
self.i += len(identifier)
|
||||
self.column += len(identifier)
|
||||
else:
|
||||
yield Token(TokenKind.UNDERSCORE, "_", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
@@ -341,7 +340,12 @@ class Tokenizer:
|
||||
yield Token(TokenKind.CONCEPT, (name, id), self.i, self.line, self.column)
|
||||
self.i += length + 2
|
||||
self.column += length + 2
|
||||
elif c == "r" and self.i + 1 < self.text_len and self.text[self.i + 1] in "'\":|/":
|
||||
elif c == "r" and self.i + 1 < self.text_len and self.text[self.i + 1] == ":":
|
||||
name, id, length = self.eat_concept(self.i + 2, self.line, self.column + 2)
|
||||
yield Token(TokenKind.RULE, (name, id), self.i, self.line, self.column)
|
||||
self.i += length + 2
|
||||
self.column += length + 2
|
||||
elif c == "r" and self.i + 1 < self.text_len and self.text[self.i + 1] in "'\"|/":
|
||||
string, newlines, column_index = self.eat_string(self.i + 1, self.line, self.column)
|
||||
yield Token(TokenKind.REGEX, string, self.i, self.line, self.column) # quotes are kept
|
||||
self.i += len(string) + 1
|
||||
@@ -368,10 +372,10 @@ class Tokenizer:
|
||||
self.i += len(string)
|
||||
self.column = column_index # 1 if newlines > 0 else self.column + len(string)
|
||||
self.line += newlines
|
||||
elif c == "_":
|
||||
yield Token(TokenKind.UNDERSCORE, "_", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
# elif c == "_":
|
||||
# yield Token(TokenKind.UNDERSCORE, "_", self.i, self.line, self.column)
|
||||
# self.i += 1
|
||||
# self.column += 1
|
||||
else:
|
||||
raise LexerError(f"Unknown token '{c}'", self.text, self.i, self.line, self.column)
|
||||
|
||||
@@ -518,3 +522,71 @@ class Tokenizer:
|
||||
break
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class IterParser:
|
||||
def __init__(self, source):
|
||||
self.source = source
|
||||
self.iterator = iter(Tokenizer(source))
|
||||
self.tokens_after = []
|
||||
self.token = None
|
||||
self.error_sink = None
|
||||
|
||||
def next_token(self, skip_whitespace=True):
|
||||
try:
|
||||
if len(self.tokens_after) > 0:
|
||||
self.token = self.tokens_after.pop(0)
|
||||
else:
|
||||
self.token = next(self.iterator)
|
||||
if skip_whitespace:
|
||||
while self.token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
|
||||
self.token = next(self.iterator)
|
||||
return self.token.type != TokenKind.EOF
|
||||
except StopIteration:
|
||||
return False
|
||||
|
||||
def the_token_after(self, skip_whitespace=True):
|
||||
try:
|
||||
token_after = next(self.iterator)
|
||||
self.tokens_after.append(token_after)
|
||||
if skip_whitespace:
|
||||
while token_after.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
|
||||
token_after = next(self.iterator)
|
||||
self.tokens_after.append(token_after)
|
||||
|
||||
return token_after
|
||||
except StopIteration:
|
||||
return Token(TokenKind.EOF, -1, -1, -1, -1)
|
||||
|
||||
|
||||
# @dataclass
|
||||
# class PropDef:
|
||||
# prop: str
|
||||
# index: int
|
||||
#
|
||||
#
|
||||
# class SimpleExpressionParser(IterParser):
|
||||
# def __init__(self, source):
|
||||
# super().__init__(source)
|
||||
# self.properties = []
|
||||
#
|
||||
# def parse(self):
|
||||
#
|
||||
# prop, index, key = None, None, None
|
||||
# while self.next_token():
|
||||
# if self.token.type == TokenKind.DOT:
|
||||
# self.properties.append(PropDef(prop, index, key))
|
||||
# prop, index, key = None, None, None
|
||||
# continue
|
||||
#
|
||||
# if self.token.type == TokenKind.LBRACKET:
|
||||
# index = self.parse_index()
|
||||
# elif self.token.type == TokenKind.LBRACE:
|
||||
# key = self.parse_key()
|
||||
# else:
|
||||
# prop = self.token.value
|
||||
#
|
||||
# if prop is not None:
|
||||
# self.properties.append(PropDef(prop, index, key))
|
||||
#
|
||||
# def parse_i
|
||||
|
||||
Reference in New Issue
Block a user