Introduced ParserInput
This commit is contained in:
@@ -226,28 +226,28 @@ def only_parsers_results(context, return_values):
|
||||
parents=return_values)
|
||||
|
||||
|
||||
def parse_unrecognized(context, tokens, parsers):
|
||||
def parse_unrecognized(context, source, parsers):
|
||||
"""
|
||||
Try to recognize concepts or code from tokens using the given parsers
|
||||
Try to recognize concepts or code from source using the given parsers
|
||||
:param context:
|
||||
:param tokens:
|
||||
:param source:
|
||||
:param parsers:
|
||||
:return:
|
||||
"""
|
||||
steps = [BuiltinConcepts.BEFORE_PARSING, BuiltinConcepts.PARSING, BuiltinConcepts.AFTER_PARSING]
|
||||
sheerka = context.sheerka
|
||||
|
||||
with context.push(desc=f"Parsing unrecognized '{tokens}'") as sub_context:
|
||||
with context.push(desc=f"Parsing unrecognized '{source}'") as sub_context:
|
||||
# disable all parsers but the following ones
|
||||
sub_context.add_preprocess(BaseParser.PREFIX + "*", enabled=False)
|
||||
for parser in parsers:
|
||||
sub_context.add_preprocess(BaseParser.PREFIX + parser, enabled=True)
|
||||
|
||||
sub_context.add_inputs(source=tokens)
|
||||
sub_context.add_inputs(source=source)
|
||||
to_parse = sheerka.ret(
|
||||
context.who,
|
||||
True,
|
||||
sheerka.new(BuiltinConcepts.USER_INPUT, body=tokens))
|
||||
sheerka.new(BuiltinConcepts.USER_INPUT, body=source))
|
||||
res = sheerka.execute(sub_context, to_parse, steps)
|
||||
sub_context.add_values(return_values=res)
|
||||
|
||||
|
||||
@@ -1,23 +1,178 @@
|
||||
import core.utils
|
||||
from cache.Cache import Cache
|
||||
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
|
||||
from core.sheerka.services.sheerka_service import BaseService
|
||||
from core.tokenizer import Tokenizer, TokenKind, Keywords, Token
|
||||
|
||||
NO_MATCH = "** No Match **"
|
||||
|
||||
|
||||
class ParserInput:
|
||||
"""
|
||||
Helper class that tokenizes the input once for all
|
||||
"""
|
||||
|
||||
def __init__(self, text, tokens=None, start=None, end=None, yield_oef=True):
|
||||
self.text = text
|
||||
self.tokens = tokens or None
|
||||
self.length = len(tokens) if tokens else None
|
||||
self.yield_oef = yield_oef
|
||||
|
||||
self.start = start or 0
|
||||
self.end = end + 1 if end else None
|
||||
self.sub_text = None
|
||||
self.sub_tokens = None
|
||||
|
||||
self.pos = None
|
||||
self.token = None
|
||||
|
||||
self.from_tokens = tokens is not None
|
||||
|
||||
def __repr__(self):
|
||||
from_tokens = "from_tokens" if self.from_tokens else ""
|
||||
return f"ParserInput({from_tokens}'{self.text}')"
|
||||
|
||||
def reset(self, yield_oef=True):
|
||||
if self.tokens is None:
|
||||
self.tokens = list(Tokenizer(self.text))
|
||||
self.length = len(self.tokens)
|
||||
|
||||
if self.end is None:
|
||||
self.end = self.length
|
||||
|
||||
self.yield_oef = yield_oef
|
||||
self.pos = self.start - 1
|
||||
self.token = None
|
||||
return self
|
||||
|
||||
def as_text(self, custom_switcher=None, tracker=None):
|
||||
if custom_switcher is None:
|
||||
if self.sub_text:
|
||||
return self.sub_text
|
||||
if self.start == 0 and self.end == self.length:
|
||||
self.sub_text = self.text
|
||||
return self.sub_text
|
||||
self.sub_text = self.get_text_from_tokens(self.tokens[self.start:self.end])
|
||||
return self.sub_text
|
||||
else:
|
||||
return self.get_text_from_tokens(self.as_tokens(), custom_switcher, tracker)
|
||||
|
||||
def as_tokens(self):
|
||||
if self.sub_tokens:
|
||||
return self.sub_tokens
|
||||
if self.start == 0 and self.end == self.length:
|
||||
self.sub_tokens = self.tokens
|
||||
return self.sub_tokens
|
||||
self.sub_tokens = self.tokens[self.start:self.end]
|
||||
return self.sub_tokens
|
||||
|
||||
def next_token(self, skip_whitespace=True):
|
||||
self.pos += 1
|
||||
|
||||
if self.pos >= self.end:
|
||||
if self.yield_oef:
|
||||
self.token = Token(TokenKind.EOF, "", -1, -1, -1)
|
||||
return False
|
||||
|
||||
self.token = self.tokens[self.pos]
|
||||
|
||||
if self.token.type == TokenKind.EOF and not self.yield_oef:
|
||||
return False
|
||||
|
||||
if skip_whitespace:
|
||||
while self.token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
|
||||
self.pos += 1
|
||||
if self.pos == self.end:
|
||||
return False
|
||||
self.token = self.tokens[self.pos]
|
||||
|
||||
return self.pos < self.end
|
||||
|
||||
def is_empty(self):
|
||||
if self.text.strip() == "":
|
||||
return True
|
||||
|
||||
if self.end == self.start:
|
||||
return True
|
||||
|
||||
if self.end and self.end == self.start + 1 and self.tokens[self.start].type == TokenKind.WHITESPACE:
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def get_text_from_tokens(tokens, custom_switcher=None, tracker=None):
|
||||
"""
|
||||
Create the source code, from the list of token
|
||||
:param tokens: list of tokens
|
||||
:param custom_switcher: to override the behaviour (the return value) of some token
|
||||
:param tracker: keep track of the original token value when custom switched
|
||||
:return:
|
||||
"""
|
||||
if tokens is None:
|
||||
return ""
|
||||
res = ""
|
||||
|
||||
if not hasattr(tokens, "__iter__"):
|
||||
tokens = [tokens]
|
||||
|
||||
switcher = {
|
||||
TokenKind.KEYWORD: lambda t: Keywords(t.value).value,
|
||||
TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value),
|
||||
}
|
||||
|
||||
if custom_switcher:
|
||||
switcher.update(custom_switcher)
|
||||
|
||||
for token in tokens:
|
||||
value = switcher.get(token.type, lambda t: t.value)(token)
|
||||
res += value
|
||||
if tracker is not None and token.type in custom_switcher:
|
||||
tracker[value] = token.value
|
||||
return res
|
||||
|
||||
|
||||
class SheerkaExecute(BaseService):
|
||||
"""
|
||||
Manage the execution of a process flow
|
||||
"""
|
||||
|
||||
NAME = "Execute"
|
||||
PARSERS_INPUTS_ENTRY = "ParserInput" # entry for admin or internal variables
|
||||
|
||||
def __init__(self, sheerka):
|
||||
super().__init__(sheerka)
|
||||
|
||||
self.pi_cache = None
|
||||
|
||||
def initialize(self):
|
||||
self.sheerka.bind_service_method(self.execute)
|
||||
|
||||
self.pi_cache = Cache(default=lambda key: ParserInput(key), max_size=20)
|
||||
self.sheerka.cache_manager.register_cache(self.PARSERS_INPUTS_ENTRY, self.pi_cache, False)
|
||||
|
||||
def get_parser_input(self, text, tokens=None):
|
||||
"""
|
||||
Returns new or existing parser input
|
||||
:param text:
|
||||
:param tokens:
|
||||
:param length:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if isinstance(text, ParserInput):
|
||||
return text
|
||||
|
||||
if tokens is None or self.pi_cache.has(text):
|
||||
pi = self.pi_cache.get(text)
|
||||
if pi is None: # when CacheManager.cache_only is True
|
||||
pi = ParserInput(text)
|
||||
self.pi_cache.put(text, pi)
|
||||
return pi
|
||||
|
||||
key = text or ParserInput.get_text_from_tokens(tokens)
|
||||
pi = ParserInput(key, tokens)
|
||||
self.pi_cache.put(key, pi)
|
||||
return pi
|
||||
|
||||
def call_parsers(self, context, return_values):
|
||||
|
||||
# return_values must be a list
|
||||
@@ -56,7 +211,7 @@ class SheerkaExecute(BaseService):
|
||||
|
||||
for return_value in inputs_for_this_group:
|
||||
|
||||
to_parse = return_value.body.body \
|
||||
to_parse = self.get_parser_input(return_value.body.body) \
|
||||
if self.sheerka.isinstance(return_value.body, BuiltinConcepts.USER_INPUT) \
|
||||
else return_value.body
|
||||
|
||||
|
||||
@@ -104,6 +104,9 @@ class Token:
|
||||
else:
|
||||
return str(self.value)
|
||||
|
||||
def clone(self):
|
||||
return Token(self.type, self.value, self.index, self.line, self.column)
|
||||
|
||||
|
||||
@dataclass()
|
||||
class LexerError(Exception):
|
||||
|
||||
Reference in New Issue
Block a user