Fixed #30 : Add variable support in BNF concept definition

Fixed #31 : Add regex support in BNF Concept
Fixed #33 : Do not memorize object during restore
This commit is contained in:
2021-02-24 17:23:03 +01:00
parent cac2dad17f
commit 646c428edb
32 changed files with 2107 additions and 360 deletions
+10
View File
@@ -199,6 +199,16 @@ class ExecutionContext:
self._push = None
def add_preprocess(self, name, **kwargs):
"""
PreProcess item are used during the parsing and the evaluation of the ReturnValueConcept
Using them, you can twitch the behaviour of parser and evaluator (you can disable them for instance)
example :
context.add_preprocess(BaseEvaluator.get_name("priority15"), enabled=False)
context.add_preprocess(BaseEvaluator.get_name("all_priority15"), priority=99)
:param name:
:param kwargs:
:return:
"""
preprocess = self.sheerka.new(BuiltinConcepts.EVALUATOR_PRE_PROCESS)
preprocess.set_value("preprocess_name", name)
for k, v in kwargs.items():
+8 -1
View File
@@ -734,7 +734,7 @@ class Sheerka(Concept):
if not isinstance(obj, Concept):
return True
return obj.key not in (BuiltinConcepts.UNKNOWN_CONCEPT, BuiltinConcepts.UNKNOWN_RULE)
return obj.key not in (None, BuiltinConcepts.UNKNOWN_CONCEPT, BuiltinConcepts.UNKNOWN_RULE)
@staticmethod
def isinstance(a, b):
@@ -879,6 +879,13 @@ class Sheerka(Concept):
return concept
@staticmethod
def deepdiff(a, b):
from deepdiff import DeepDiff
ddiff = DeepDiff(a, b, ignore_order=True)
print(ddiff)
return ddiff
def to_profile():
sheerka = Sheerka()
@@ -1,5 +1,6 @@
import re
from dataclasses import dataclass
from typing import Set
from typing import Set, List, Union
import core.utils
from cache.Cache import Cache
@@ -12,9 +13,10 @@ from core.builtin_concepts_ids import BuiltinConcepts, AllBuiltinConcepts, Built
from core.builtin_helpers import ensure_concept, ensure_bnf
from core.concept import Concept, DEFINITION_TYPE_DEF, DEFINITION_TYPE_BNF, freeze_concept_attrs, ConceptMetadata, \
VARIABLE_PREFIX
from core.global_symbols import EVENT_CONCEPT_CREATED, NotInit, NotFound, ErrorObj, EVENT_CONCEPT_DELETED
from core.global_symbols import EVENT_CONCEPT_CREATED, NotInit, NotFound, ErrorObj, EVENT_CONCEPT_DELETED, NoFirstToken
from core.sheerka.services.sheerka_service import BaseService
from core.tokenizer import Tokenizer, TokenKind
from parsers.BnfNodeParser import RegExDef
from sdp.sheerkaDataProvider import SheerkaDataProviderDuplicateKeyError
BASE_NODE_PARSER_CLASS = "parsers.BaseNodeParser.BaseNodeParser"
@@ -98,6 +100,8 @@ class SheerkaConceptManager(BaseService):
CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "ConceptManager:Concepts_By_First_Keyword"
RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "ConceptManager:Resolved_Concepts_By_First_Keyword"
CONCEPTS_BY_REGEX_ENTRY = "ConceptManager:Concepts_By_Regex"
CONCEPTS_BNF_DEFINITIONS_ENTRY = "ConceptManager:Concepts_BNF_Definitions"
def __init__(self, sheerka):
@@ -105,6 +109,7 @@ class SheerkaConceptManager(BaseService):
self.forbidden_meta = {"is_builtin", "key", "id", "props", "variables"}
self.allowed_meta = {attr for attr in vars(ConceptMetadata) if
not attr.startswith("_") and attr not in self.forbidden_meta}
self.compiled_concepts_by_regex = []
def initialize(self):
self.sheerka.bind_service_method(self.create_new_concept, True)
@@ -119,6 +124,7 @@ class SheerkaConceptManager(BaseService):
self.sheerka.bind_service_method(self.get_by_id, False, visible=False)
self.sheerka.bind_service_method(self.is_not_a_variable, False, visible=False)
self.sheerka.bind_service_method(self.get_concepts_by_first_token, False, visible=False)
self.sheerka.bind_service_method(self.get_concepts_by_first_regex, False, visible=False)
self.sheerka.bind_service_method(self.get_concepts_bnf_definitions, False, visible=False)
self.sheerka.bind_service_method(self.clear_bnf_definition, True, visible=False)
@@ -145,6 +151,9 @@ class SheerkaConceptManager(BaseService):
cache = DictionaryCache().auto_configure(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
self.sheerka.om.register_cache(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, cache, persist=False)
cache = DictionaryCache().auto_configure(self.CONCEPTS_BY_REGEX_ENTRY)
self.sheerka.om.register_cache(self.CONCEPTS_BY_REGEX_ENTRY, cache)
cache = Cache().auto_configure(self.CONCEPTS_BNF_DEFINITIONS_ENTRY)
self.sheerka.om.register_cache(self.CONCEPTS_BNF_DEFINITIONS_ENTRY, cache, persist=False)
@@ -158,6 +167,14 @@ class SheerkaConceptManager(BaseService):
res = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
self.sheerka.om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, res.body)
# init the regular expression
self.sheerka.om.get(self.CONCEPTS_BY_REGEX_ENTRY, None)
from_db = self.sheerka.om.current_cache_manager().copy(self.CONCEPTS_BY_REGEX_ENTRY)
concepts_by_first_regex = {RegExDef().deserialize(k): v for k, v in from_db.items()}
res = self.compile_concepts_by_first_regex(context, concepts_by_first_regex)
self.compiled_concepts_by_regex.clear()
self.compiled_concepts_by_regex.extend(res.body)
def initialize_builtin_concepts(self):
"""
Initializes the builtin concepts
@@ -201,9 +218,9 @@ class SheerkaConceptManager(BaseService):
concept.init_key()
init_bnf_ret_value = None
ontology = sheerka.om
om = sheerka.om
if ontology.exists(self.CONCEPTS_BY_HASH_ENTRY, concept.get_definition_hash()):
if om.exists(self.CONCEPTS_BY_HASH_ENTRY, concept.get_definition_hash()):
error = SheerkaDataProviderDuplicateKeyError(self.CONCEPTS_BY_KEY_ENTRY + "." + concept.key, concept)
return sheerka.ret(
self.NAME,
@@ -220,33 +237,44 @@ class SheerkaConceptManager(BaseService):
except Exception as ex:
return sheerka.ret(self.NAME, False, ex.args[0])
# compute new concepts_by_first_keyword
init_ret_value = self.compute_concepts_by_first_token(context, [concept], True)
# compute first token and/or first regex
init_ret_value = self.compute_concepts_by_first_item(context, [concept], True)
if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
concepts_by_first_keyword = init_ret_value.body
by_first_keyword, by_first_regex = init_ret_value.body
# computes resolved concepts_by_first_keyword
init_ret_value = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
init_ret_value = self.resolve_concepts_by_first_keyword(context, by_first_keyword)
if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
resolved_concepts_by_first_keyword = init_ret_value.body
# compile regex
compile_ret = self.compile_concepts_by_first_regex(context, by_first_regex)
if not compile_ret.status:
return sheerka.ret(self.NAME, False, ErrorConcept(compile_ret.value))
compiled_concepts_by_first_regex = compile_ret.body
# if everything is fine
freeze_concept_attrs(concept)
concept.freeze_definition_hash()
ontology.add_concept(concept)
ontology.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
ontology.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
om.add_concept(concept)
om.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, by_first_keyword)
om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
om.put(self.CONCEPTS_BY_REGEX_ENTRY, False, {k.serialize(): v for k, v in by_first_regex.items()})
# update the compiled regex
self.compiled_concepts_by_regex.clear()
self.compiled_concepts_by_regex.extend(compiled_concepts_by_first_regex)
if concept.get_metadata().definition_type == DEFINITION_TYPE_DEF and concept.get_metadata().definition != concept.name:
# allow search by definition when definition relevant
ontology.put(self.sheerka.CONCEPTS_BY_NAME_ENTRY, concept.get_metadata().definition, concept)
om.put(self.sheerka.CONCEPTS_BY_NAME_ENTRY, concept.get_metadata().definition, concept)
# update references
for ref in self.compute_references(concept):
ontology.put(self.CONCEPTS_REFERENCES_ENTRY, ref, concept.id)
om.put(self.CONCEPTS_REFERENCES_ENTRY, ref, concept.id)
# TODO : this line seems to be useless
# The grammar is never reset
@@ -286,7 +314,7 @@ class SheerkaConceptManager(BaseService):
# }
#
sheerka = self.sheerka
cache_manager = self.sheerka.om
om = self.sheerka.om
if not to_add and not to_remove:
return sheerka.ret(self.NAME, False, sheerka.err(NoModificationFound(concept)))
@@ -301,23 +329,19 @@ class SheerkaConceptManager(BaseService):
if res is not None:
return res
# To update concept by first keyword
# first remove the old references
keywords = self.get_first_tokens(sheerka, concept) # keyword of the old concept
concepts_by_first_keyword = cache_manager.copy(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
for keyword in keywords:
try:
concepts_by_first_keyword[keyword].remove(concept.id)
if len(concepts_by_first_keyword[keyword]) == 0:
del concepts_by_first_keyword[keyword]
except KeyError: # only occurs in unit tests when concepts are created without create_new()
pass
# To update concept by first keyword and first regex
# first remove old first token and first regex entries
concepts_by_first_keyword, concepts_by_regex = self._remove_concept_first_token_and_first_regex(concept)
# and then update
init_ret_value = self.compute_concepts_by_first_token(context, [new_concept], False, concepts_by_first_keyword)
init_ret_value = self.compute_concepts_by_first_item(context,
[new_concept],
False,
concepts_by_first_keyword,
concepts_by_regex)
if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
concepts_by_first_keyword = init_ret_value.body
concepts_by_first_keyword, concepts_by_regex = init_ret_value.body
# computes resolved concepts_by_first_keyword
init_ret_value = self.resolve_concepts_by_first_keyword(context,
@@ -327,18 +351,30 @@ class SheerkaConceptManager(BaseService):
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
resolved_concepts_by_first_keyword = init_ret_value.body
# compile new regex
compile_ret = self.compile_concepts_by_first_regex(context, concepts_by_regex)
if not compile_ret.status:
return sheerka.ret(self.NAME, False, ErrorConcept(compile_ret.value))
compiled_concepts_by_first_regex = compile_ret.body
# update concept that referenced the old concept and clear old references
self.update_references(context, concept, new_concept, to_add)
for ref in self.compute_references(concept):
cache_manager.delete(self.CONCEPTS_REFERENCES_ENTRY, ref, concept.id)
om.delete(self.CONCEPTS_REFERENCES_ENTRY, ref, concept.id)
# compute new references
for ref in self.compute_references(new_concept):
cache_manager.put(self.CONCEPTS_REFERENCES_ENTRY, ref, new_concept.id)
om.put(self.CONCEPTS_REFERENCES_ENTRY, ref, new_concept.id)
cache_manager.update_concept(concept, new_concept)
cache_manager.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
cache_manager.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
# everything is ok, update the caches
om.update_concept(concept, new_concept)
om.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
om.put(self.CONCEPTS_BY_REGEX_ENTRY, False, {k.serialize(): v for k, v in concepts_by_regex.items()})
# update the compiled regex
self.compiled_concepts_by_regex.clear()
self.compiled_concepts_by_regex.extend(compiled_concepts_by_first_regex)
# everything seems to be fine. Update the list of attributes
# Caution. Must be done AFTER update_concept()
@@ -349,6 +385,7 @@ class SheerkaConceptManager(BaseService):
if modify_source:
self._update_concept(context, concept, to_add, to_remove)
# KSI 2021-02-16 publish the modification of the concept only when someone needs it
ret = sheerka.ret(self.NAME, True, sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=new_concept))
return ret
@@ -362,17 +399,44 @@ class SheerkaConceptManager(BaseService):
# TODO : resolve concept first
sheerka = context.sheerka
refs = self.sheerka.om.get(self.CONCEPTS_REFERENCES_ENTRY, concept.id)
if not sheerka.is_known(concept):
return sheerka.ret(self.NAME, False, sheerka.err(ConceptNotFound(concept)))
om = sheerka.om
refs = om.get(self.CONCEPTS_REFERENCES_ENTRY, concept.id)
if refs is not NotFound:
refs_instances = [sheerka.new_from_template(c, c.key) for c in [self.get_by_id(ref) for ref in refs]]
return sheerka.ret(self.NAME, False, sheerka.err(ConceptIsReferenced(refs_instances)))
try:
sheerka.om.remove_concept(concept)
sheerka.publish(context, EVENT_CONCEPT_DELETED, concept)
return sheerka.ret(self.NAME, True, sheerka.new(BuiltinConcepts.SUCCESS))
except ConceptNotFound as ex:
return sheerka.ret(self.NAME, False, sheerka.err(ex))
concepts_by_first_keyword, concepts_by_regex = self._remove_concept_first_token_and_first_regex(concept)
# computes resolved concepts_by_first_keyword
init_ret_value = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
resolved_concepts_by_first_keyword = init_ret_value.body
# compile new regex
compile_ret = self.compile_concepts_by_first_regex(context, concepts_by_regex)
if not compile_ret.status:
return sheerka.ret(self.NAME, False, ErrorConcept(compile_ret.value))
compiled_concepts_by_first_regex = compile_ret.body
# everything seems fine. I can commit the modification and remove
om.remove_concept(concept)
om.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
om.put(self.CONCEPTS_BY_REGEX_ENTRY, False, {k.serialize(): v for k, v in concepts_by_regex.items()})
# update the compiled regex
self.compiled_concepts_by_regex.clear()
self.compiled_concepts_by_regex.extend(compiled_concepts_by_first_regex)
sheerka.publish(context, EVENT_CONCEPT_DELETED, concept)
return sheerka.ret(self.NAME, True, sheerka.new(BuiltinConcepts.SUCCESS))
def set_attr(self, concept, attribute, value):
"""
@@ -497,7 +561,7 @@ class SheerkaConceptManager(BaseService):
if c.id == concept_id:
return c
metadata = [(index_name, key), ("id", concept_id)] if concept_id else (index_name, key)
metadata = {index_name: key, "id": concept_id} if concept_id else {index_name: key}
return self.sheerka.get_unknown(metadata)
def update_references(self, context, concept, modified_concept=None, modifications=None):
@@ -663,12 +727,39 @@ class SheerkaConceptManager(BaseService):
concept.get_metadata().key = None
if self._definition_has_changed(to_add) and concept.get_metadata().definition_type == DEFINITION_TYPE_BNF:
concept.set_bnf(None)
ensure_bnf(context, concept)
ensure_bnf(context, concept, update_bnf_for_cached_concept=False)
concept.init_key()
return
def _remove_concept_first_token_and_first_regex(self, concept):
keywords_or_regex = self.get_first_items(self.sheerka, concept) # keyword of the old concept
concepts_by_first_keyword = self.sheerka.om.copy(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
concepts_by_regex = self.sheerka.om.copy(self.CONCEPTS_BY_REGEX_ENTRY)
for item in keywords_or_regex:
try:
if isinstance(item, RegExDef):
serialized = item.serialize()
copy = concepts_by_regex[serialized].copy()
copy.remove(concept.id)
if len(copy) == 0:
del concepts_by_regex[serialized]
else:
concepts_by_regex[serialized] = copy
else:
copy = concepts_by_first_keyword[item].copy()
copy.remove(concept.id)
if len(copy) == 0:
del concepts_by_first_keyword[item]
else:
concepts_by_first_keyword[item] = copy
except KeyError: # only occurs in unit tests when concepts are created without create_new()
pass
# return concepts_by_first_keyword, concepts_by_regex
return concepts_by_first_keyword, {RegExDef().deserialize(k): v for k, v in concepts_by_regex.items()}
@staticmethod
def get_first_tokens(sheerka, concept):
"""
@@ -677,6 +768,30 @@ class SheerkaConceptManager(BaseService):
:param concept:
:return:
"""
if concept.get_bnf():
from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor
bnf_visitor = BnfNodeFirstTokenVisitor(sheerka)
bnf_visitor.visit(concept.get_bnf())
return [t for t in bnf_visitor.first_tokens if t is not NoFirstToken]
else:
keywords = concept.key.split()
for keyword in keywords:
if keyword.startswith(VARIABLE_PREFIX):
continue
return [keyword]
return None
@staticmethod
def get_first_items(sheerka, concept) -> List[Union[str, RegExDef]]:
"""
Get all the first item needed by the concept
An item can either be a token, or regular expression
:param sheerka:
:param concept:
:return: List of string (if it's token or RegExDef if it's the definition of a regex)
"""
if concept.get_bnf():
from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor
bnf_visitor = BnfNodeFirstTokenVisitor(sheerka)
@@ -692,6 +807,55 @@ class SheerkaConceptManager(BaseService):
return None
@staticmethod
def compute_concepts_by_first_item(context,
concepts,
use_sheerka=False,
previous_first_keywords=None,
previous_first_regex=None):
"""
Create two map,
one for describing the first token expected by a concept
one for the first regular expression
eg the dictionaries that go into CONCEPTS_BY_FIRST_KEYWORD_ENTRY and CONCEPTS_BY_REGEX_ENTRY
:param context:
:param concepts: lists of concepts to parse
:param use_sheerka: if True, updates sheerka
:param previous_first_keywords:
:param previous_first_regex:
:return: Returns two dictionaries : on for ALL first item entries, another one for all first regex entries
"""
sheerka = context.sheerka
if use_sheerka:
previous_first_keywords = sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
previous_first_regex = sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY)
previous_first_regex = {RegExDef().deserialize(k): v for k, v in previous_first_regex.items()}
else:
previous_first_keywords = previous_first_keywords or {}
previous_first_regex = previous_first_regex or {}
for concept in concepts:
items = SheerkaConceptManager.get_first_items(sheerka, concept)
if items is None:
# no first token found for a concept ?
return sheerka.ret(sheerka.name, False, NoFirstTokenError(concept, concept.key))
for item in items:
if isinstance(item, RegExDef):
previous_first_regex.setdefault(item, []).append(concept.id)
else:
previous_first_keywords.setdefault(item, []).append(concept.id)
# 'uniquify' the lists
for k, v in previous_first_keywords.items():
previous_first_keywords[k] = core.utils.make_unique(v)
for k, v in previous_first_regex.items():
previous_first_regex[k] = core.utils.make_unique(v)
return sheerka.ret("BaseNodeParser", True, (previous_first_keywords, previous_first_regex))
@staticmethod
def compute_concepts_by_first_token(context, concepts, use_sheerka=False, previous_entries=None):
"""
@@ -812,6 +976,19 @@ class SheerkaConceptManager(BaseService):
return sheerka.ret("BaseNodeParser", True, res)
@staticmethod
def compile_concepts_by_first_regex(context, concepts_by_first_regex):
res = []
try:
for k, v in concepts_by_first_regex.items():
flags = RegExDef.compile_flags(k.ignore_case, k.multiline, k.explicit_flags)
res.append((re.compile(k.to_match, flags), v))
except Exception as ex:
return context.sheerka.ret("BaseNodeParser", False, ex)
return context.sheerka.ret("BaseNodeParser", True, res)
def get_concepts_by_first_token(self, token, to_keep, custom=None, to_map=None, strip_quotes=False, parser=None):
"""
Tries to find if there are concepts that match the value of the token
@@ -853,5 +1030,19 @@ class SheerkaConceptManager(BaseService):
return core.utils.make_unique(result + custom_concepts,
lambda c: c.concept.id if hasattr(c, "concept") else c.id)
def get_concepts_by_first_regex(self, expr, pos):
"""
Go thru all the declared regular expressions and try to see if there is a match
:param expr:
:param pos:
:return:
"""
result = []
for compiled_regex, concept_ids in self.compiled_concepts_by_regex:
if compiled_regex.match(expr, pos):
result.extend([self.sheerka.get_by_id(concept_id) for concept_id in concept_ids])
return result
def get_concepts_bnf_definitions(self):
return self.sheerka.om.current_cache_manager().caches[self.CONCEPTS_BNF_DEFINITIONS_ENTRY].cache
@@ -239,14 +239,17 @@ class ConsoleDebugLogger(BaseDebugLogger):
:param kwargs:
:return:
"""
raw = kwargs.pop('raw', None)
if not self.debug_manager.compute_debug_concept(self.context,
self.service_name,
self.method_name,
concept.id,
self.debug_id):
return
raw = kwargs.pop('raw', None)
color = kwargs.pop('color', None)
str_vars = raw if raw else pp.pformat(kwargs) if kwargs else ""
if color:
str_vars = CCM[color] + str_vars + CCM['reset']
text = " - " + text if text is not None else ""
colon = ": " if str_vars else ""
str_text = f"{CCM['cyan']}..concept#{concept.id}{text}{colon} {CCM['reset']}"
@@ -5,6 +5,7 @@ from core.builtin_helpers import expect_one, only_successful, evaluate, ensure_c
from core.concept import Concept, DoNotResolve, ConceptParts, InfiniteRecursionResolved, AllConceptParts, \
concept_part_value
from core.global_symbols import NotInit
from core.rule import Rule
from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager
from core.sheerka.services.SheerkaExecute import ParserInput
from core.sheerka.services.sheerka_service import BaseService
@@ -421,6 +422,9 @@ class SheerkaEvaluateConcept(BaseService):
else:
return evaluated
elif isinstance(to_resolve, Rule):
raise NotImplementedError() # how to resolve rules ?
# otherwise, execute all return values to find out what is the value
else:
# update short term memory with current concept variables
+31 -18
View File
@@ -22,7 +22,7 @@ class ParserInput:
Helper class that tokenizes the input once for all
"""
def __init__(self, text, tokens=None, start=None, end=None, yield_oef=True):
def __init__(self, text, tokens=None, length=None, start=None, end=None, yield_oef=True):
self.text = text
self.tokens = tokens or None
if self.tokens:
@@ -38,13 +38,13 @@ class ParserInput:
last_token.line,
last_token.column + 1)]
self.length = None # to be computed in reset()
self.length = length # to be computed (again) in reset()
self.yield_oef = yield_oef
self.start = start or 0
if end:
self.original_end = end + 1
self.end = self.original_end
self.original_end = end # forced index of the last token
self.end = self.original_end # index of the last token => len(tokens) - 1 if full tokens
else:
self.original_end = self.end = None
@@ -61,30 +61,43 @@ class ParserInput:
return f"ParserInput({from_tokens}'{self.text}')"
def reset(self, yield_oef=None):
def _get_end_from_yield_eof(_length, _yield_oef):
return _length - 1 if _yield_oef else _length - 2
if yield_oef is None:
yield_oef = self.yield_oef
# make sure tokens is correctly initialized
if self.tokens is None:
# the eof if forced, but will not be yield if not set to.
self.tokens = list(Tokenizer(self.text, yield_eof=True))
self.length = len(self.tokens)
if self.original_end is None:
self.end = len(self.tokens) if yield_oef else len(self.tokens) - 1
self.end = _get_end_from_yield_eof(self.length, yield_oef)
else:
self.end = self.original_end if self.original_end <= len(self.tokens) else self.tokens
self.end = self.original_end if self.original_end < self.length else \
_get_end_from_yield_eof(self.length, yield_oef)
self.pos = self.start - 1
self.token = None
return self
def as_text(self, custom_switcher=None, tracker=None):
if not self.tokens or self.end is None:
# as_text is requested before reset().
# It means that we want the original text
return self.text
if custom_switcher is None:
if self.sub_text:
return self.sub_text
if self.start == 0 and self.end == self.length:
if self.start == 0 and self.end == self.length - 1:
self.sub_text = self.text
return self.sub_text
self.sub_text = core.utils.get_text_from_tokens(self.tokens[self.start:self.end])
self.sub_text = core.utils.get_text_from_tokens(self.tokens[self.start:self.end + 1])
return self.sub_text
else:
return core.utils.get_text_from_tokens(self.as_tokens(), custom_switcher, tracker)
@@ -92,16 +105,16 @@ class ParserInput:
def as_tokens(self):
if self.sub_tokens:
return self.sub_tokens
if self.start == 0 and self.end == self.length:
if self.start == 0 and self.end == self.length - 1:
self.sub_tokens = self.tokens
return self.sub_tokens
self.sub_tokens = self.tokens[self.start:self.end]
self.sub_tokens = self.tokens[self.start:self.end + 1]
return self.sub_tokens
def next_token(self, skip_whitespace=True):
self.pos += 1
if self.pos >= self.end:
if self.pos > self.end:
return False
self.token = self.tokens[self.pos]
@@ -111,11 +124,11 @@ class ParserInput:
if skip_whitespace:
while self.token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
self.pos += 1
if self.pos == self.end:
if self.pos > self.end:
return False
self.token = self.tokens[self.pos]
return self.pos < self.end
return self.pos <= self.end
def the_token_after(self, skip_whitespace=True):
"""
@@ -123,13 +136,13 @@ class ParserInput:
Never returns None (returns TokenKind.EOF instead)
"""
my_pos = self.pos + 1
if my_pos >= self.end:
if my_pos > self.end:
return Token(TokenKind.EOF, "", -1, -1, -1)
if skip_whitespace:
while self.tokens[my_pos].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
my_pos += 1
if my_pos == self.end:
if my_pos > self.end:
return Token(TokenKind.EOF, "", -1, -1, -1)
return self.tokens[my_pos]
@@ -140,7 +153,7 @@ class ParserInput:
:param pos:
:return: True is pos is a valid position False otherwise
"""
if pos < 0 or pos >= self.end:
if pos < 0 or pos > self.end:
self.token = None
return False
@@ -355,10 +368,10 @@ class SheerkaExecute(BaseService):
if pi is NotFound: # when CacheManager.cache_only is True
pi = ParserInput(text)
self.pi_cache.put(text, pi)
return ParserInput(text, pi.tokens) # new instance, but no need to tokenize the text again
return ParserInput(text, tokens=pi.tokens, length=pi.length) # new instance, but no need to tokenize the text again
key = text or core.utils.get_text_from_tokens(tokens)
pi = ParserInput(key, tokens)
pi = ParserInput(key, tokens=tokens, length=len(tokens))
self.pi_cache.put(key, pi)
return pi
+1 -1
View File
@@ -144,7 +144,7 @@ class SheerkaMemory(BaseService):
:param concept:
:return:
"""
if self.sheerka.during_initialisation:
if self.sheerka.during_initialisation or self.sheerka.during_restore:
return
self.registration[key] = concept