Fixed #30 : Add variable support in BNF concept definition
Fixed #31 : Add regex support in BNF Concept Fixed #33 : Do not memorize object during restore
This commit is contained in:
@@ -22,7 +22,7 @@ class ParserInput:
|
||||
Helper class that tokenizes the input once for all
|
||||
"""
|
||||
|
||||
def __init__(self, text, tokens=None, start=None, end=None, yield_oef=True):
|
||||
def __init__(self, text, tokens=None, length=None, start=None, end=None, yield_oef=True):
|
||||
self.text = text
|
||||
self.tokens = tokens or None
|
||||
if self.tokens:
|
||||
@@ -38,13 +38,13 @@ class ParserInput:
|
||||
last_token.line,
|
||||
last_token.column + 1)]
|
||||
|
||||
self.length = None # to be computed in reset()
|
||||
self.length = length # to be computed (again) in reset()
|
||||
self.yield_oef = yield_oef
|
||||
|
||||
self.start = start or 0
|
||||
if end:
|
||||
self.original_end = end + 1
|
||||
self.end = self.original_end
|
||||
self.original_end = end # forced index of the last token
|
||||
self.end = self.original_end # index of the last token => len(tokens) - 1 if full tokens
|
||||
else:
|
||||
self.original_end = self.end = None
|
||||
|
||||
@@ -61,30 +61,43 @@ class ParserInput:
|
||||
return f"ParserInput({from_tokens}'{self.text}')"
|
||||
|
||||
def reset(self, yield_oef=None):
|
||||
|
||||
def _get_end_from_yield_eof(_length, _yield_oef):
|
||||
return _length - 1 if _yield_oef else _length - 2
|
||||
|
||||
if yield_oef is None:
|
||||
yield_oef = self.yield_oef
|
||||
|
||||
# make sure tokens is correctly initialized
|
||||
if self.tokens is None:
|
||||
# the eof if forced, but will not be yield if not set to.
|
||||
self.tokens = list(Tokenizer(self.text, yield_eof=True))
|
||||
|
||||
self.length = len(self.tokens)
|
||||
|
||||
if self.original_end is None:
|
||||
self.end = len(self.tokens) if yield_oef else len(self.tokens) - 1
|
||||
self.end = _get_end_from_yield_eof(self.length, yield_oef)
|
||||
else:
|
||||
self.end = self.original_end if self.original_end <= len(self.tokens) else self.tokens
|
||||
self.end = self.original_end if self.original_end < self.length else \
|
||||
_get_end_from_yield_eof(self.length, yield_oef)
|
||||
|
||||
self.pos = self.start - 1
|
||||
self.token = None
|
||||
return self
|
||||
|
||||
def as_text(self, custom_switcher=None, tracker=None):
|
||||
if not self.tokens or self.end is None:
|
||||
# as_text is requested before reset().
|
||||
# It means that we want the original text
|
||||
return self.text
|
||||
|
||||
if custom_switcher is None:
|
||||
if self.sub_text:
|
||||
return self.sub_text
|
||||
if self.start == 0 and self.end == self.length:
|
||||
if self.start == 0 and self.end == self.length - 1:
|
||||
self.sub_text = self.text
|
||||
return self.sub_text
|
||||
self.sub_text = core.utils.get_text_from_tokens(self.tokens[self.start:self.end])
|
||||
self.sub_text = core.utils.get_text_from_tokens(self.tokens[self.start:self.end + 1])
|
||||
return self.sub_text
|
||||
else:
|
||||
return core.utils.get_text_from_tokens(self.as_tokens(), custom_switcher, tracker)
|
||||
@@ -92,16 +105,16 @@ class ParserInput:
|
||||
def as_tokens(self):
|
||||
if self.sub_tokens:
|
||||
return self.sub_tokens
|
||||
if self.start == 0 and self.end == self.length:
|
||||
if self.start == 0 and self.end == self.length - 1:
|
||||
self.sub_tokens = self.tokens
|
||||
return self.sub_tokens
|
||||
self.sub_tokens = self.tokens[self.start:self.end]
|
||||
self.sub_tokens = self.tokens[self.start:self.end + 1]
|
||||
return self.sub_tokens
|
||||
|
||||
def next_token(self, skip_whitespace=True):
|
||||
self.pos += 1
|
||||
|
||||
if self.pos >= self.end:
|
||||
if self.pos > self.end:
|
||||
return False
|
||||
|
||||
self.token = self.tokens[self.pos]
|
||||
@@ -111,11 +124,11 @@ class ParserInput:
|
||||
if skip_whitespace:
|
||||
while self.token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
|
||||
self.pos += 1
|
||||
if self.pos == self.end:
|
||||
if self.pos > self.end:
|
||||
return False
|
||||
self.token = self.tokens[self.pos]
|
||||
|
||||
return self.pos < self.end
|
||||
return self.pos <= self.end
|
||||
|
||||
def the_token_after(self, skip_whitespace=True):
|
||||
"""
|
||||
@@ -123,13 +136,13 @@ class ParserInput:
|
||||
Never returns None (returns TokenKind.EOF instead)
|
||||
"""
|
||||
my_pos = self.pos + 1
|
||||
if my_pos >= self.end:
|
||||
if my_pos > self.end:
|
||||
return Token(TokenKind.EOF, "", -1, -1, -1)
|
||||
|
||||
if skip_whitespace:
|
||||
while self.tokens[my_pos].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
|
||||
my_pos += 1
|
||||
if my_pos == self.end:
|
||||
if my_pos > self.end:
|
||||
return Token(TokenKind.EOF, "", -1, -1, -1)
|
||||
|
||||
return self.tokens[my_pos]
|
||||
@@ -140,7 +153,7 @@ class ParserInput:
|
||||
:param pos:
|
||||
:return: True is pos is a valid position False otherwise
|
||||
"""
|
||||
if pos < 0 or pos >= self.end:
|
||||
if pos < 0 or pos > self.end:
|
||||
self.token = None
|
||||
return False
|
||||
|
||||
@@ -355,10 +368,10 @@ class SheerkaExecute(BaseService):
|
||||
if pi is NotFound: # when CacheManager.cache_only is True
|
||||
pi = ParserInput(text)
|
||||
self.pi_cache.put(text, pi)
|
||||
return ParserInput(text, pi.tokens) # new instance, but no need to tokenize the text again
|
||||
return ParserInput(text, tokens=pi.tokens, length=pi.length) # new instance, but no need to tokenize the text again
|
||||
|
||||
key = text or core.utils.get_text_from_tokens(tokens)
|
||||
pi = ParserInput(key, tokens)
|
||||
pi = ParserInput(key, tokens=tokens, length=len(tokens))
|
||||
self.pi_cache.put(key, pi)
|
||||
return pi
|
||||
|
||||
|
||||
Reference in New Issue
Block a user