First version of explain. Creating a new parser was a wrong approach. Need to reimplement

This commit is contained in:
2020-04-17 17:24:57 +02:00
parent 6c7c529016
commit d6ea2461a8
43 changed files with 2679 additions and 162 deletions
+166 -3
View File
@@ -1,11 +1,12 @@
import logging
from dataclasses import dataclass
import core.utils
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
from core.concept import Concept
from core.tokenizer import TokenKind, Keywords, Token, Tokenizer
from core.sheerka.ExecutionContext import ExecutionContext
from core.sheerka_logger import get_logger
import core.utils
import logging
from core.tokenizer import TokenKind, Keywords, Token, Tokenizer
@dataclass()
@@ -65,6 +66,11 @@ class UnexpectedTokenErrorNode(ErrorNode):
return hash((self.message, self.token, self.expected_tokens))
@dataclass()
class UnexpectedEof(ErrorNode):
message: str
class BaseParser:
PREFIX = "parsers."
@@ -203,3 +209,160 @@ class BaseParser:
value = switcher.get(token.type, lambda t: t.value)(token)
res += value
return res
class BaseTokenizerIterParser(BaseParser):
def __init__(self, name, priority, parse_word=False, none_on_eof=True):
super().__init__(name, priority)
self.lexer_iter = None
self._current = None
self.context: ExecutionContext = None
self.text = None
self.sheerka = None
self.parse_word = parse_word
self.none_on_eof = none_on_eof
def reset_parser(self, context, text):
self.context = context
self.sheerka = context.sheerka
self.text = text
self.lexer_iter = iter(Tokenizer(text, self.parse_word))
self._current = None
self.next_token()
def add_error(self, error, next_token=True):
self.error_sink.append(error)
if next_token:
self.next_token()
return error
def get_token(self) -> Token:
return self._current
def next_token(self, skip_whitespace=True):
try:
self._current = next(self.lexer_iter)
if self.none_on_eof and self._current.type == TokenKind.EOF:
self._current = None
return False
if skip_whitespace:
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
self._current = next(self.lexer_iter)
except StopIteration:
self._current = None
return False
return True
class BaseSplitIterParser(BaseParser):
def __init__(self, name, priority, none_on_eof=False):
super().__init__(name, priority)
self._current = None
self.context: ExecutionContext = None
self.text = None
self.sheerka = None
self.iter_split = None
self.split_and_eat_tokens = (" ", "\n", "\t")
self.split_and_keep_tokens = ("=", ")", "(", ",")
self.split_tokens = self.split_and_eat_tokens + self.split_and_keep_tokens
self.none_on_eof = none_on_eof # current token is set to None when EOF is hit
def parse_word(self, c, index, line, column):
end = self.split_tokens
escaped = False
buffer = ""
while escaped or c not in end:
if not escaped and c == "\\":
escaped = True
elif not escaped and c in ("'", '"'):
end = [c]
else:
buffer += c
escaped = False
index, column = index + 1, column + 1
if index == len(self.text):
break
c = self.text[index]
if c == "\n":
line += 1
column = 0
if c not in self.split_and_keep_tokens: # 'not in' instead of 'in' to when c is a quote
index, column = index + 1, column + 1
return buffer, index, line, column
def split(self):
index = 0
line = 1
column = 1
while index < len(self.text):
c = self.text[index]
if c == "=":
if index + 1 < len(self.text) and self.text[index + 1] == "=":
yield Token(TokenKind.EQUALSEQUALS, "==", index, line, column)
index, column = index + 2, column + 2
else:
yield Token(TokenKind.EQUALS, "=", index, line, column)
index, column = index + 1, column + 1
elif c == ")":
yield Token(TokenKind.RPAR, ")", index, line, column)
index, column = index + 1, column + 1
elif c == "(":
yield Token(TokenKind.LPAR, "(", index, line, column)
index, column = index + 1, column + 1
elif c == ",":
yield Token(TokenKind.COMMA, ",", index, line, column)
index, column = index + 1, column + 1
else:
buffer, end_index, end_line, end_column = self.parse_word(c, index, line, column)
if buffer:
yield Token(TokenKind.WORD, buffer, index, line, column)
index, line, column = end_index, end_line, end_column
yield Token(TokenKind.EOF, "<eof>", index, line, column)
def reset_parser(self, context, text):
self.context = context
self.sheerka = context.sheerka if context else None
self.text = text
self._current = None
self.iter_split = iter(self.split())
def add_error(self, error, next_token=True):
self.error_sink.append(error)
if next_token:
self.next_token()
return error
def get_token(self) -> Token:
return self._current
def next_token(self):
try:
self._current = next(self.iter_split)
if self._current.type == TokenKind.EOF:
if self.none_on_eof:
self._current = None
return False
except StopIteration:
self._current = None
return False
return True