120 lines
3.4 KiB
Python
120 lines
3.4 KiB
Python
from common.utils import get_text_from_tokens
|
|
from parsers.tokenizer import Token, TokenKind, Tokenizer
|
|
|
|
|
|
class ParserInput:
|
|
def __init__(self, text):
|
|
self.original_text = text
|
|
|
|
self.all_tokens: list = None
|
|
self.exception: Exception = None
|
|
self.pos: int = None
|
|
self.end: int = None
|
|
self.token: Token = None
|
|
|
|
def init(self) -> bool:
|
|
try:
|
|
# the eof if forced, but will not be yield if not set to.
|
|
self.all_tokens = list(Tokenizer(self.original_text, yield_eof=True))
|
|
self.pos = -1
|
|
self.end = len(self.all_tokens)
|
|
return True
|
|
except Exception as ex:
|
|
self.all_tokens = None
|
|
self.exception = ex
|
|
return False
|
|
|
|
def as_text(self, custom_switcher=None, tracker=None):
|
|
if self.all_tokens is None:
|
|
raise Exception("You must call init() first !")
|
|
|
|
return get_text_from_tokens(self.all_tokens, custom_switcher, tracker)
|
|
|
|
def reset(self):
|
|
if self.exception:
|
|
raise self.exception
|
|
|
|
self.pos = -1
|
|
|
|
def next_token(self, skip_whitespace=True) -> bool:
|
|
self.pos += 1
|
|
|
|
if self.pos >= self.end:
|
|
self.token = self.all_tokens[-1]
|
|
return False
|
|
|
|
self.token = self.all_tokens[self.pos]
|
|
|
|
if skip_whitespace:
|
|
while self.token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
|
|
self.pos += 1
|
|
if self.pos > self.end:
|
|
self.token = self.all_tokens[-1]
|
|
return False
|
|
|
|
self.token = self.all_tokens[self.pos]
|
|
|
|
if self.token.type == TokenKind.EOF:
|
|
return False
|
|
|
|
return True
|
|
|
|
def check_next_token(self, skip_whitespace=True):
|
|
"""
|
|
Returns the token after the current one
|
|
Never returns None (returns TokenKind.EOF instead)
|
|
"""
|
|
my_pos = self.pos + 1
|
|
if my_pos > self.end:
|
|
return self.all_tokens[-1]
|
|
|
|
if skip_whitespace:
|
|
while self.all_tokens[my_pos].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
|
|
my_pos += 1
|
|
if my_pos > self.end:
|
|
return self.all_tokens[-1]
|
|
|
|
return self.all_tokens[my_pos]
|
|
|
|
def seek(self, pos):
|
|
"""
|
|
Move the token offset to position pos
|
|
:param pos:
|
|
:return: True is pos is a valid position False otherwise
|
|
"""
|
|
if pos < 0 or pos > self.end:
|
|
self.token = None
|
|
return False
|
|
|
|
self.pos = pos
|
|
self.token = self.all_tokens[self.pos]
|
|
return True
|
|
|
|
def clone(self):
|
|
res = ParserInput(self.original_text)
|
|
res.all_tokens = self.all_tokens
|
|
res.exception = self.exception
|
|
res.pos = self.pos
|
|
res.end = self.end
|
|
res.token = self.token
|
|
|
|
return res
|
|
|
|
@staticmethod
|
|
def from_tokens(tokens, text=None):
|
|
"""
|
|
returns a parser input, given already computed tokens
|
|
:param tokens:
|
|
:param text:
|
|
:return:
|
|
"""
|
|
res = ParserInput(None)
|
|
res.all_tokens = tokens
|
|
res.original_text = text or get_text_from_tokens(tokens)
|
|
res.pos = -1
|
|
res.end = len(res.all_tokens)
|
|
return res
|
|
|
|
def __repr__(self):
|
|
return f"ParserInput('{self.original_text}', len={len(self.all_tokens)})"
|