Fixed #30 : Add variable support in BNF concept definition

Fixed #31 : Add regex support in BNF Concept
Fixed #33 : Do not memorize object during restore
This commit is contained in:
2021-02-24 17:23:03 +01:00
parent cac2dad17f
commit 646c428edb
32 changed files with 2107 additions and 360 deletions
@@ -1,5 +1,6 @@
import re
from dataclasses import dataclass
from typing import Set
from typing import Set, List, Union
import core.utils
from cache.Cache import Cache
@@ -12,9 +13,10 @@ from core.builtin_concepts_ids import BuiltinConcepts, AllBuiltinConcepts, Built
from core.builtin_helpers import ensure_concept, ensure_bnf
from core.concept import Concept, DEFINITION_TYPE_DEF, DEFINITION_TYPE_BNF, freeze_concept_attrs, ConceptMetadata, \
VARIABLE_PREFIX
from core.global_symbols import EVENT_CONCEPT_CREATED, NotInit, NotFound, ErrorObj, EVENT_CONCEPT_DELETED
from core.global_symbols import EVENT_CONCEPT_CREATED, NotInit, NotFound, ErrorObj, EVENT_CONCEPT_DELETED, NoFirstToken
from core.sheerka.services.sheerka_service import BaseService
from core.tokenizer import Tokenizer, TokenKind
from parsers.BnfNodeParser import RegExDef
from sdp.sheerkaDataProvider import SheerkaDataProviderDuplicateKeyError
BASE_NODE_PARSER_CLASS = "parsers.BaseNodeParser.BaseNodeParser"
@@ -98,6 +100,8 @@ class SheerkaConceptManager(BaseService):
CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "ConceptManager:Concepts_By_First_Keyword"
RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "ConceptManager:Resolved_Concepts_By_First_Keyword"
CONCEPTS_BY_REGEX_ENTRY = "ConceptManager:Concepts_By_Regex"
CONCEPTS_BNF_DEFINITIONS_ENTRY = "ConceptManager:Concepts_BNF_Definitions"
def __init__(self, sheerka):
@@ -105,6 +109,7 @@ class SheerkaConceptManager(BaseService):
self.forbidden_meta = {"is_builtin", "key", "id", "props", "variables"}
self.allowed_meta = {attr for attr in vars(ConceptMetadata) if
not attr.startswith("_") and attr not in self.forbidden_meta}
self.compiled_concepts_by_regex = []
def initialize(self):
self.sheerka.bind_service_method(self.create_new_concept, True)
@@ -119,6 +124,7 @@ class SheerkaConceptManager(BaseService):
self.sheerka.bind_service_method(self.get_by_id, False, visible=False)
self.sheerka.bind_service_method(self.is_not_a_variable, False, visible=False)
self.sheerka.bind_service_method(self.get_concepts_by_first_token, False, visible=False)
self.sheerka.bind_service_method(self.get_concepts_by_first_regex, False, visible=False)
self.sheerka.bind_service_method(self.get_concepts_bnf_definitions, False, visible=False)
self.sheerka.bind_service_method(self.clear_bnf_definition, True, visible=False)
@@ -145,6 +151,9 @@ class SheerkaConceptManager(BaseService):
cache = DictionaryCache().auto_configure(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
self.sheerka.om.register_cache(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, cache, persist=False)
cache = DictionaryCache().auto_configure(self.CONCEPTS_BY_REGEX_ENTRY)
self.sheerka.om.register_cache(self.CONCEPTS_BY_REGEX_ENTRY, cache)
cache = Cache().auto_configure(self.CONCEPTS_BNF_DEFINITIONS_ENTRY)
self.sheerka.om.register_cache(self.CONCEPTS_BNF_DEFINITIONS_ENTRY, cache, persist=False)
@@ -158,6 +167,14 @@ class SheerkaConceptManager(BaseService):
res = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
self.sheerka.om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, res.body)
# init the regular expression
self.sheerka.om.get(self.CONCEPTS_BY_REGEX_ENTRY, None)
from_db = self.sheerka.om.current_cache_manager().copy(self.CONCEPTS_BY_REGEX_ENTRY)
concepts_by_first_regex = {RegExDef().deserialize(k): v for k, v in from_db.items()}
res = self.compile_concepts_by_first_regex(context, concepts_by_first_regex)
self.compiled_concepts_by_regex.clear()
self.compiled_concepts_by_regex.extend(res.body)
def initialize_builtin_concepts(self):
"""
Initializes the builtin concepts
@@ -201,9 +218,9 @@ class SheerkaConceptManager(BaseService):
concept.init_key()
init_bnf_ret_value = None
ontology = sheerka.om
om = sheerka.om
if ontology.exists(self.CONCEPTS_BY_HASH_ENTRY, concept.get_definition_hash()):
if om.exists(self.CONCEPTS_BY_HASH_ENTRY, concept.get_definition_hash()):
error = SheerkaDataProviderDuplicateKeyError(self.CONCEPTS_BY_KEY_ENTRY + "." + concept.key, concept)
return sheerka.ret(
self.NAME,
@@ -220,33 +237,44 @@ class SheerkaConceptManager(BaseService):
except Exception as ex:
return sheerka.ret(self.NAME, False, ex.args[0])
# compute new concepts_by_first_keyword
init_ret_value = self.compute_concepts_by_first_token(context, [concept], True)
# compute first token and/or first regex
init_ret_value = self.compute_concepts_by_first_item(context, [concept], True)
if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
concepts_by_first_keyword = init_ret_value.body
by_first_keyword, by_first_regex = init_ret_value.body
# computes resolved concepts_by_first_keyword
init_ret_value = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
init_ret_value = self.resolve_concepts_by_first_keyword(context, by_first_keyword)
if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
resolved_concepts_by_first_keyword = init_ret_value.body
# compile regex
compile_ret = self.compile_concepts_by_first_regex(context, by_first_regex)
if not compile_ret.status:
return sheerka.ret(self.NAME, False, ErrorConcept(compile_ret.value))
compiled_concepts_by_first_regex = compile_ret.body
# if everything is fine
freeze_concept_attrs(concept)
concept.freeze_definition_hash()
ontology.add_concept(concept)
ontology.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
ontology.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
om.add_concept(concept)
om.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, by_first_keyword)
om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
om.put(self.CONCEPTS_BY_REGEX_ENTRY, False, {k.serialize(): v for k, v in by_first_regex.items()})
# update the compiled regex
self.compiled_concepts_by_regex.clear()
self.compiled_concepts_by_regex.extend(compiled_concepts_by_first_regex)
if concept.get_metadata().definition_type == DEFINITION_TYPE_DEF and concept.get_metadata().definition != concept.name:
# allow search by definition when definition relevant
ontology.put(self.sheerka.CONCEPTS_BY_NAME_ENTRY, concept.get_metadata().definition, concept)
om.put(self.sheerka.CONCEPTS_BY_NAME_ENTRY, concept.get_metadata().definition, concept)
# update references
for ref in self.compute_references(concept):
ontology.put(self.CONCEPTS_REFERENCES_ENTRY, ref, concept.id)
om.put(self.CONCEPTS_REFERENCES_ENTRY, ref, concept.id)
# TODO : this line seems to be useless
# The grammar is never reset
@@ -286,7 +314,7 @@ class SheerkaConceptManager(BaseService):
# }
#
sheerka = self.sheerka
cache_manager = self.sheerka.om
om = self.sheerka.om
if not to_add and not to_remove:
return sheerka.ret(self.NAME, False, sheerka.err(NoModificationFound(concept)))
@@ -301,23 +329,19 @@ class SheerkaConceptManager(BaseService):
if res is not None:
return res
# To update concept by first keyword
# first remove the old references
keywords = self.get_first_tokens(sheerka, concept) # keyword of the old concept
concepts_by_first_keyword = cache_manager.copy(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
for keyword in keywords:
try:
concepts_by_first_keyword[keyword].remove(concept.id)
if len(concepts_by_first_keyword[keyword]) == 0:
del concepts_by_first_keyword[keyword]
except KeyError: # only occurs in unit tests when concepts are created without create_new()
pass
# To update concept by first keyword and first regex
# first remove old first token and first regex entries
concepts_by_first_keyword, concepts_by_regex = self._remove_concept_first_token_and_first_regex(concept)
# and then update
init_ret_value = self.compute_concepts_by_first_token(context, [new_concept], False, concepts_by_first_keyword)
init_ret_value = self.compute_concepts_by_first_item(context,
[new_concept],
False,
concepts_by_first_keyword,
concepts_by_regex)
if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
concepts_by_first_keyword = init_ret_value.body
concepts_by_first_keyword, concepts_by_regex = init_ret_value.body
# computes resolved concepts_by_first_keyword
init_ret_value = self.resolve_concepts_by_first_keyword(context,
@@ -327,18 +351,30 @@ class SheerkaConceptManager(BaseService):
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
resolved_concepts_by_first_keyword = init_ret_value.body
# compile new regex
compile_ret = self.compile_concepts_by_first_regex(context, concepts_by_regex)
if not compile_ret.status:
return sheerka.ret(self.NAME, False, ErrorConcept(compile_ret.value))
compiled_concepts_by_first_regex = compile_ret.body
# update concept that referenced the old concept and clear old references
self.update_references(context, concept, new_concept, to_add)
for ref in self.compute_references(concept):
cache_manager.delete(self.CONCEPTS_REFERENCES_ENTRY, ref, concept.id)
om.delete(self.CONCEPTS_REFERENCES_ENTRY, ref, concept.id)
# compute new references
for ref in self.compute_references(new_concept):
cache_manager.put(self.CONCEPTS_REFERENCES_ENTRY, ref, new_concept.id)
om.put(self.CONCEPTS_REFERENCES_ENTRY, ref, new_concept.id)
cache_manager.update_concept(concept, new_concept)
cache_manager.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
cache_manager.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
# everything is ok, update the caches
om.update_concept(concept, new_concept)
om.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
om.put(self.CONCEPTS_BY_REGEX_ENTRY, False, {k.serialize(): v for k, v in concepts_by_regex.items()})
# update the compiled regex
self.compiled_concepts_by_regex.clear()
self.compiled_concepts_by_regex.extend(compiled_concepts_by_first_regex)
# everything seems to be fine. Update the list of attributes
# Caution. Must be done AFTER update_concept()
@@ -349,6 +385,7 @@ class SheerkaConceptManager(BaseService):
if modify_source:
self._update_concept(context, concept, to_add, to_remove)
# KSI 2021-02-16 publish the modification of the concept only when someone needs it
ret = sheerka.ret(self.NAME, True, sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=new_concept))
return ret
@@ -362,17 +399,44 @@ class SheerkaConceptManager(BaseService):
# TODO : resolve concept first
sheerka = context.sheerka
refs = self.sheerka.om.get(self.CONCEPTS_REFERENCES_ENTRY, concept.id)
if not sheerka.is_known(concept):
return sheerka.ret(self.NAME, False, sheerka.err(ConceptNotFound(concept)))
om = sheerka.om
refs = om.get(self.CONCEPTS_REFERENCES_ENTRY, concept.id)
if refs is not NotFound:
refs_instances = [sheerka.new_from_template(c, c.key) for c in [self.get_by_id(ref) for ref in refs]]
return sheerka.ret(self.NAME, False, sheerka.err(ConceptIsReferenced(refs_instances)))
try:
sheerka.om.remove_concept(concept)
sheerka.publish(context, EVENT_CONCEPT_DELETED, concept)
return sheerka.ret(self.NAME, True, sheerka.new(BuiltinConcepts.SUCCESS))
except ConceptNotFound as ex:
return sheerka.ret(self.NAME, False, sheerka.err(ex))
concepts_by_first_keyword, concepts_by_regex = self._remove_concept_first_token_and_first_regex(concept)
# computes resolved concepts_by_first_keyword
init_ret_value = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
resolved_concepts_by_first_keyword = init_ret_value.body
# compile new regex
compile_ret = self.compile_concepts_by_first_regex(context, concepts_by_regex)
if not compile_ret.status:
return sheerka.ret(self.NAME, False, ErrorConcept(compile_ret.value))
compiled_concepts_by_first_regex = compile_ret.body
# everything seems fine. I can commit the modification and remove
om.remove_concept(concept)
om.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
om.put(self.CONCEPTS_BY_REGEX_ENTRY, False, {k.serialize(): v for k, v in concepts_by_regex.items()})
# update the compiled regex
self.compiled_concepts_by_regex.clear()
self.compiled_concepts_by_regex.extend(compiled_concepts_by_first_regex)
sheerka.publish(context, EVENT_CONCEPT_DELETED, concept)
return sheerka.ret(self.NAME, True, sheerka.new(BuiltinConcepts.SUCCESS))
def set_attr(self, concept, attribute, value):
"""
@@ -497,7 +561,7 @@ class SheerkaConceptManager(BaseService):
if c.id == concept_id:
return c
metadata = [(index_name, key), ("id", concept_id)] if concept_id else (index_name, key)
metadata = {index_name: key, "id": concept_id} if concept_id else {index_name: key}
return self.sheerka.get_unknown(metadata)
def update_references(self, context, concept, modified_concept=None, modifications=None):
@@ -663,12 +727,39 @@ class SheerkaConceptManager(BaseService):
concept.get_metadata().key = None
if self._definition_has_changed(to_add) and concept.get_metadata().definition_type == DEFINITION_TYPE_BNF:
concept.set_bnf(None)
ensure_bnf(context, concept)
ensure_bnf(context, concept, update_bnf_for_cached_concept=False)
concept.init_key()
return
def _remove_concept_first_token_and_first_regex(self, concept):
keywords_or_regex = self.get_first_items(self.sheerka, concept) # keyword of the old concept
concepts_by_first_keyword = self.sheerka.om.copy(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
concepts_by_regex = self.sheerka.om.copy(self.CONCEPTS_BY_REGEX_ENTRY)
for item in keywords_or_regex:
try:
if isinstance(item, RegExDef):
serialized = item.serialize()
copy = concepts_by_regex[serialized].copy()
copy.remove(concept.id)
if len(copy) == 0:
del concepts_by_regex[serialized]
else:
concepts_by_regex[serialized] = copy
else:
copy = concepts_by_first_keyword[item].copy()
copy.remove(concept.id)
if len(copy) == 0:
del concepts_by_first_keyword[item]
else:
concepts_by_first_keyword[item] = copy
except KeyError: # only occurs in unit tests when concepts are created without create_new()
pass
# return concepts_by_first_keyword, concepts_by_regex
return concepts_by_first_keyword, {RegExDef().deserialize(k): v for k, v in concepts_by_regex.items()}
@staticmethod
def get_first_tokens(sheerka, concept):
"""
@@ -677,6 +768,30 @@ class SheerkaConceptManager(BaseService):
:param concept:
:return:
"""
if concept.get_bnf():
from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor
bnf_visitor = BnfNodeFirstTokenVisitor(sheerka)
bnf_visitor.visit(concept.get_bnf())
return [t for t in bnf_visitor.first_tokens if t is not NoFirstToken]
else:
keywords = concept.key.split()
for keyword in keywords:
if keyword.startswith(VARIABLE_PREFIX):
continue
return [keyword]
return None
@staticmethod
def get_first_items(sheerka, concept) -> List[Union[str, RegExDef]]:
"""
Get all the first item needed by the concept
An item can either be a token, or regular expression
:param sheerka:
:param concept:
:return: List of string (if it's token or RegExDef if it's the definition of a regex)
"""
if concept.get_bnf():
from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor
bnf_visitor = BnfNodeFirstTokenVisitor(sheerka)
@@ -692,6 +807,55 @@ class SheerkaConceptManager(BaseService):
return None
@staticmethod
def compute_concepts_by_first_item(context,
concepts,
use_sheerka=False,
previous_first_keywords=None,
previous_first_regex=None):
"""
Create two map,
one for describing the first token expected by a concept
one for the first regular expression
eg the dictionaries that go into CONCEPTS_BY_FIRST_KEYWORD_ENTRY and CONCEPTS_BY_REGEX_ENTRY
:param context:
:param concepts: lists of concepts to parse
:param use_sheerka: if True, updates sheerka
:param previous_first_keywords:
:param previous_first_regex:
:return: Returns two dictionaries : on for ALL first item entries, another one for all first regex entries
"""
sheerka = context.sheerka
if use_sheerka:
previous_first_keywords = sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
previous_first_regex = sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY)
previous_first_regex = {RegExDef().deserialize(k): v for k, v in previous_first_regex.items()}
else:
previous_first_keywords = previous_first_keywords or {}
previous_first_regex = previous_first_regex or {}
for concept in concepts:
items = SheerkaConceptManager.get_first_items(sheerka, concept)
if items is None:
# no first token found for a concept ?
return sheerka.ret(sheerka.name, False, NoFirstTokenError(concept, concept.key))
for item in items:
if isinstance(item, RegExDef):
previous_first_regex.setdefault(item, []).append(concept.id)
else:
previous_first_keywords.setdefault(item, []).append(concept.id)
# 'uniquify' the lists
for k, v in previous_first_keywords.items():
previous_first_keywords[k] = core.utils.make_unique(v)
for k, v in previous_first_regex.items():
previous_first_regex[k] = core.utils.make_unique(v)
return sheerka.ret("BaseNodeParser", True, (previous_first_keywords, previous_first_regex))
@staticmethod
def compute_concepts_by_first_token(context, concepts, use_sheerka=False, previous_entries=None):
"""
@@ -812,6 +976,19 @@ class SheerkaConceptManager(BaseService):
return sheerka.ret("BaseNodeParser", True, res)
@staticmethod
def compile_concepts_by_first_regex(context, concepts_by_first_regex):
res = []
try:
for k, v in concepts_by_first_regex.items():
flags = RegExDef.compile_flags(k.ignore_case, k.multiline, k.explicit_flags)
res.append((re.compile(k.to_match, flags), v))
except Exception as ex:
return context.sheerka.ret("BaseNodeParser", False, ex)
return context.sheerka.ret("BaseNodeParser", True, res)
def get_concepts_by_first_token(self, token, to_keep, custom=None, to_map=None, strip_quotes=False, parser=None):
"""
Tries to find if there are concepts that match the value of the token
@@ -853,5 +1030,19 @@ class SheerkaConceptManager(BaseService):
return core.utils.make_unique(result + custom_concepts,
lambda c: c.concept.id if hasattr(c, "concept") else c.id)
def get_concepts_by_first_regex(self, expr, pos):
"""
Go thru all the declared regular expressions and try to see if there is a match
:param expr:
:param pos:
:return:
"""
result = []
for compiled_regex, concept_ids in self.compiled_concepts_by_regex:
if compiled_regex.match(expr, pos):
result.extend([self.sheerka.get_by_id(concept_id) for concept_id in concept_ids])
return result
def get_concepts_bnf_definitions(self):
return self.sheerka.om.current_cache_manager().caches[self.CONCEPTS_BNF_DEFINITIONS_ENTRY].cache
@@ -239,14 +239,17 @@ class ConsoleDebugLogger(BaseDebugLogger):
:param kwargs:
:return:
"""
raw = kwargs.pop('raw', None)
if not self.debug_manager.compute_debug_concept(self.context,
self.service_name,
self.method_name,
concept.id,
self.debug_id):
return
raw = kwargs.pop('raw', None)
color = kwargs.pop('color', None)
str_vars = raw if raw else pp.pformat(kwargs) if kwargs else ""
if color:
str_vars = CCM[color] + str_vars + CCM['reset']
text = " - " + text if text is not None else ""
colon = ": " if str_vars else ""
str_text = f"{CCM['cyan']}..concept#{concept.id}{text}{colon} {CCM['reset']}"
@@ -5,6 +5,7 @@ from core.builtin_helpers import expect_one, only_successful, evaluate, ensure_c
from core.concept import Concept, DoNotResolve, ConceptParts, InfiniteRecursionResolved, AllConceptParts, \
concept_part_value
from core.global_symbols import NotInit
from core.rule import Rule
from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager
from core.sheerka.services.SheerkaExecute import ParserInput
from core.sheerka.services.sheerka_service import BaseService
@@ -421,6 +422,9 @@ class SheerkaEvaluateConcept(BaseService):
else:
return evaluated
elif isinstance(to_resolve, Rule):
raise NotImplementedError() # how to resolve rules ?
# otherwise, execute all return values to find out what is the value
else:
# update short term memory with current concept variables
+31 -18
View File
@@ -22,7 +22,7 @@ class ParserInput:
Helper class that tokenizes the input once for all
"""
def __init__(self, text, tokens=None, start=None, end=None, yield_oef=True):
def __init__(self, text, tokens=None, length=None, start=None, end=None, yield_oef=True):
self.text = text
self.tokens = tokens or None
if self.tokens:
@@ -38,13 +38,13 @@ class ParserInput:
last_token.line,
last_token.column + 1)]
self.length = None # to be computed in reset()
self.length = length # to be computed (again) in reset()
self.yield_oef = yield_oef
self.start = start or 0
if end:
self.original_end = end + 1
self.end = self.original_end
self.original_end = end # forced index of the last token
self.end = self.original_end # index of the last token => len(tokens) - 1 if full tokens
else:
self.original_end = self.end = None
@@ -61,30 +61,43 @@ class ParserInput:
return f"ParserInput({from_tokens}'{self.text}')"
def reset(self, yield_oef=None):
def _get_end_from_yield_eof(_length, _yield_oef):
return _length - 1 if _yield_oef else _length - 2
if yield_oef is None:
yield_oef = self.yield_oef
# make sure tokens is correctly initialized
if self.tokens is None:
# the eof if forced, but will not be yield if not set to.
self.tokens = list(Tokenizer(self.text, yield_eof=True))
self.length = len(self.tokens)
if self.original_end is None:
self.end = len(self.tokens) if yield_oef else len(self.tokens) - 1
self.end = _get_end_from_yield_eof(self.length, yield_oef)
else:
self.end = self.original_end if self.original_end <= len(self.tokens) else self.tokens
self.end = self.original_end if self.original_end < self.length else \
_get_end_from_yield_eof(self.length, yield_oef)
self.pos = self.start - 1
self.token = None
return self
def as_text(self, custom_switcher=None, tracker=None):
if not self.tokens or self.end is None:
# as_text is requested before reset().
# It means that we want the original text
return self.text
if custom_switcher is None:
if self.sub_text:
return self.sub_text
if self.start == 0 and self.end == self.length:
if self.start == 0 and self.end == self.length - 1:
self.sub_text = self.text
return self.sub_text
self.sub_text = core.utils.get_text_from_tokens(self.tokens[self.start:self.end])
self.sub_text = core.utils.get_text_from_tokens(self.tokens[self.start:self.end + 1])
return self.sub_text
else:
return core.utils.get_text_from_tokens(self.as_tokens(), custom_switcher, tracker)
@@ -92,16 +105,16 @@ class ParserInput:
def as_tokens(self):
if self.sub_tokens:
return self.sub_tokens
if self.start == 0 and self.end == self.length:
if self.start == 0 and self.end == self.length - 1:
self.sub_tokens = self.tokens
return self.sub_tokens
self.sub_tokens = self.tokens[self.start:self.end]
self.sub_tokens = self.tokens[self.start:self.end + 1]
return self.sub_tokens
def next_token(self, skip_whitespace=True):
self.pos += 1
if self.pos >= self.end:
if self.pos > self.end:
return False
self.token = self.tokens[self.pos]
@@ -111,11 +124,11 @@ class ParserInput:
if skip_whitespace:
while self.token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
self.pos += 1
if self.pos == self.end:
if self.pos > self.end:
return False
self.token = self.tokens[self.pos]
return self.pos < self.end
return self.pos <= self.end
def the_token_after(self, skip_whitespace=True):
"""
@@ -123,13 +136,13 @@ class ParserInput:
Never returns None (returns TokenKind.EOF instead)
"""
my_pos = self.pos + 1
if my_pos >= self.end:
if my_pos > self.end:
return Token(TokenKind.EOF, "", -1, -1, -1)
if skip_whitespace:
while self.tokens[my_pos].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
my_pos += 1
if my_pos == self.end:
if my_pos > self.end:
return Token(TokenKind.EOF, "", -1, -1, -1)
return self.tokens[my_pos]
@@ -140,7 +153,7 @@ class ParserInput:
:param pos:
:return: True is pos is a valid position False otherwise
"""
if pos < 0 or pos >= self.end:
if pos < 0 or pos > self.end:
self.token = None
return False
@@ -355,10 +368,10 @@ class SheerkaExecute(BaseService):
if pi is NotFound: # when CacheManager.cache_only is True
pi = ParserInput(text)
self.pi_cache.put(text, pi)
return ParserInput(text, pi.tokens) # new instance, but no need to tokenize the text again
return ParserInput(text, tokens=pi.tokens, length=pi.length) # new instance, but no need to tokenize the text again
key = text or core.utils.get_text_from_tokens(tokens)
pi = ParserInput(key, tokens)
pi = ParserInput(key, tokens=tokens, length=len(tokens))
self.pi_cache.put(key, pi)
return pi
+1 -1
View File
@@ -144,7 +144,7 @@ class SheerkaMemory(BaseService):
:param concept:
:return:
"""
if self.sheerka.during_initialisation:
if self.sheerka.during_initialisation or self.sheerka.during_restore:
return
self.registration[key] = concept