Files
Sheerka-Old/src/core/sheerka/services/SheerkaConceptManager.py
T
kodjo 821dbed189 Fixed #3: Added sheerka.resolve_rule()
Fixed #5: Refactored SheerkaComparisonManager
Fixed #6: Sya parser no longer works after restart
2021-01-15 07:11:04 +01:00

840 lines
33 KiB
Python

from dataclasses import dataclass
from typing import Set
import core.utils
from cache.Cache import Cache
from cache.CacheManager import ConceptNotFound
from cache.DictionaryCache import DictionaryCache
from cache.ListIfNeededCache import ListIfNeededCache
from cache.SetCache import SetCache
from core.builtin_concepts import ErrorConcept
from core.builtin_concepts_ids import BuiltinConcepts, AllBuiltinConcepts, BuiltinUnique
from core.builtin_helpers import ensure_concept, ensure_bnf
from core.concept import Concept, DEFINITION_TYPE_DEF, DEFINITION_TYPE_BNF, freeze_concept_attrs, ConceptMetadata, \
VARIABLE_PREFIX
from core.error import ErrorObj
from core.global_symbols import EVENT_CONCEPT_CREATED, NotInit, NotFound
from core.sheerka.services.sheerka_service import BaseService
from core.tokenizer import Tokenizer, TokenKind
from sdp.sheerkaDataProvider import SheerkaDataProviderDuplicateKeyError
BASE_NODE_PARSER_CLASS = "parsers.BaseNodeParser.BaseNodeParser"
@dataclass
class ChickenAndEggError(Exception):
concepts: Set[str]
@dataclass
class NoFirstTokenError(ErrorObj):
concept: Concept
key: str
@dataclass
class NoModificationFound(ErrorObj):
"""
Trying to modify a concept without modifying it
"""
concept: Concept
attrs: dict = None
@dataclass
class ForbiddenAttribute(ErrorObj):
"""
When trying to modify an attribute that must not be modified
"""
attr: str
@dataclass
class UnknownAttribute(ErrorObj):
"""
When trying to modify an attribute that does not exist
"""
attr: str
@dataclass
class CannotRemoveMeta(ErrorObj):
"""
When trying to remove a metadata attribute (ConceptMeta is a class, you cannot remove attr form it)
"""
attrs: dict
@dataclass
class ValueNotFound(ErrorObj):
"""
When trying to remove a value that does not exists (but the props/variable exists)
"""
item: str
value: object
@dataclass
class ConceptIsReferenced(ErrorObj):
"""
When trying to remove a concept that is referenced by other concept(s)
"""
references: list
class SheerkaConceptManager(BaseService):
NAME = "ConceptManager"
BUILTIN_CONCEPTS_IDS = "Builtins_Concepts_IDs" # sequential key for builtin concepts
USER_CONCEPTS_IDS = "User_Concepts_IDs" # sequential key for user defined concepts
CONCEPTS_IDS_ENTRY = "ConceptManager:Concepts_IDs"
CONCEPTS_BY_ID_ENTRY = "ConceptManager:Concepts_By_ID" # to store all the concepts
CONCEPTS_BY_KEY_ENTRY = "ConceptManager:Concepts_By_Key"
CONCEPTS_BY_NAME_ENTRY = "ConceptManager:Concepts_By_Name"
CONCEPTS_BY_HASH_ENTRY = "ConceptManager:Concepts_By_Hash" # store hash of concepts definitions (not values)
CONCEPTS_REFERENCES_ENTRY = "ConceptManager:Concepts_References" # tracks references between concepts
CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "ConceptManager:Concepts_By_First_Keyword"
RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "ConceptManager:Resolved_Concepts_By_First_Keyword"
def __init__(self, sheerka):
super().__init__(sheerka)
self.forbidden_meta = {"is_builtin", "key", "id", "props", "variables"}
self.allowed_meta = {attr for attr in vars(ConceptMetadata) if
not attr.startswith("_") and attr not in self.forbidden_meta}
def initialize(self):
self.sheerka.bind_service_method(self.create_new_concept, True)
self.sheerka.bind_service_method(self.modify_concept, True)
self.sheerka.bind_service_method(self.remove_concept, True)
self.sheerka.bind_service_method(self.set_id_if_needed, True)
self.sheerka.bind_service_method(self.set_attr, True)
self.sheerka.bind_service_method(self.get_attr, False)
self.sheerka.bind_service_method(self.get_by_key, False, visible=False)
self.sheerka.bind_service_method(self.get_by_name, False, visible=False)
self.sheerka.bind_service_method(self.get_by_hash, False, visible=False)
self.sheerka.bind_service_method(self.get_by_id, False, visible=False)
self.sheerka.bind_service_method(self.is_not_a_variable, False, visible=False)
self.sheerka.bind_service_method(self.get_concepts_by_first_token, False, visible=False)
register_concept_cache = self.sheerka.om.register_concept_cache
cache = Cache().auto_configure(self.CONCEPTS_BY_ID_ENTRY)
register_concept_cache(self.CONCEPTS_BY_ID_ENTRY, cache, lambda c: c.id, True)
cache = ListIfNeededCache().auto_configure(self.CONCEPTS_BY_KEY_ENTRY)
register_concept_cache(self.CONCEPTS_BY_KEY_ENTRY, cache, lambda c: c.key, True)
cache = ListIfNeededCache().auto_configure(self.CONCEPTS_BY_NAME_ENTRY)
register_concept_cache(self.CONCEPTS_BY_NAME_ENTRY, cache, lambda c: c.name, True)
cache = ListIfNeededCache().auto_configure(self.CONCEPTS_BY_HASH_ENTRY)
register_concept_cache(self.CONCEPTS_BY_HASH_ENTRY, cache, lambda c: c.get_definition_hash(), True)
cache = SetCache().auto_configure(self.CONCEPTS_REFERENCES_ENTRY)
self.sheerka.om.register_cache(self.CONCEPTS_REFERENCES_ENTRY, cache)
cache = DictionaryCache().auto_configure(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
self.sheerka.om.register_cache(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, cache)
cache = DictionaryCache().auto_configure(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
self.sheerka.om.register_cache(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, cache, persist=False)
def initialize_deferred(self, context, is_first_time):
if is_first_time:
self.sheerka.om.put(self.sheerka.OBJECTS_IDS_ENTRY, self.USER_CONCEPTS_IDS, 1000)
# initialize the dictionary of first tokens
self.sheerka.om.get(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, None) # to init the cache with the values from sdp
concepts_by_first_keyword = self.sheerka.om.current_cache_manager().copy(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
res = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
self.sheerka.om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, res.body)
def initialize_builtin_concepts(self):
"""
Initializes the builtin concepts
:return: None
"""
builtin_concepts_ids = {}
for key in AllBuiltinConcepts:
concept = self.sheerka if key == BuiltinConcepts.SHEERKA \
else self.sheerka.builtin_cache[str(key)]() if str(key) in self.sheerka.builtin_cache \
else Concept(key, True, False, key)
if key in BuiltinUnique:
concept.get_metadata().is_unique = True
concept.get_metadata().is_evaluated = True
from_db = self.sheerka.om.get(self.CONCEPTS_BY_KEY_ENTRY, concept.get_metadata().key)
if from_db is NotFound:
# self.init_log.debug(f"'{concept.name}' concept is not found in db. Adding.")
self.set_id_if_needed(concept, True)
self.sheerka.om.add_concept(concept)
else:
# self.init_log.debug(f"Found concept '{from_db}' in db. Updating.")
concept.update_from(from_db)
builtin_concepts_ids[key] = concept.id
return builtin_concepts_ids
def create_new_concept(self, context, concept: Concept):
"""
Adds a new concept to the system
:param context:
:param concept: DefConceptNode
:return: digest of the new concept
"""
ensure_concept(concept)
sheerka = self.sheerka
concept.init_key()
init_bnf_ret_value = None
ontology = sheerka.om
if ontology.exists(self.CONCEPTS_BY_HASH_ENTRY, concept.get_definition_hash()):
error = SheerkaDataProviderDuplicateKeyError(self.CONCEPTS_BY_KEY_ENTRY + "." + concept.key, concept)
return sheerka.ret(
self.NAME,
False,
sheerka.new(BuiltinConcepts.CONCEPT_ALREADY_DEFINED, body=concept),
error.args[0])
# set id before saving in db
sheerka.set_id_if_needed(concept, False)
# check if the bnf definition is correctly computed
try:
ensure_bnf(context, concept)
except Exception as ex:
return sheerka.ret(self.NAME, False, ex.args[0])
# compute new concepts_by_first_keyword
init_ret_value = self.compute_concepts_by_first_token(context, [concept], True)
if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
concepts_by_first_keyword = init_ret_value.body
# computes resolved concepts_by_first_keyword
init_ret_value = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
resolved_concepts_by_first_keyword = init_ret_value.body
# if everything is fine
freeze_concept_attrs(concept)
concept.freeze_definition_hash()
ontology.add_concept(concept)
ontology.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
ontology.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
if concept.get_metadata().definition_type == DEFINITION_TYPE_DEF and concept.get_metadata().definition != concept.name:
# allow search by definition when definition relevant
ontology.put(self.sheerka.CONCEPTS_BY_NAME_ENTRY, concept.get_metadata().definition, concept)
# update references
for ref in self.compute_references(concept):
ontology.put(self.CONCEPTS_REFERENCES_ENTRY, ref, concept.id)
# TODO : this line seems to be useless
# The grammar is never reset
if concept.get_bnf() and init_bnf_ret_value is not None and init_bnf_ret_value.status:
sheerka.cache_manager.clear(sheerka.CONCEPTS_GRAMMARS_ENTRY)
# publish the new concept
sheerka.publish(context, EVENT_CONCEPT_CREATED, concept)
# process the return if needed
ret = sheerka.ret(self.NAME, True, sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
return ret
def modify_concept(self, context, concept, to_add=None, to_remove=None, modify_source=False):
"""
Modify the definition of a concept
:param context:
:param concept: concept to modify
:param to_add: meta, props or variables to add/update
:param to_remove: props or variables to remove
:param modify_source: update or not the concept given in parameter
:return:
"""
# to_add is a dictionary
# to_add = {
# 'meta' : {<key, value>} of metadata to update,
# 'props' : {<key, value>} of properties to add/update,
# 'variables': {<key, value>} of variables to add/update,
# }
# if the <key> already exists, the entry is updated, otherwise a new value is created
# for props, if the <key> already exists, a new entry is added to the set
#
# to_remove = {
# 'props' : {<key, [value]>} entries to remove. 'value' can be a list or a single entry
# 'variables': [<key>] list of keys to remove
# }
#
sheerka = self.sheerka
cache_manager = self.sheerka.om
if not to_add and not to_remove:
return sheerka.ret(self.NAME, False, sheerka.err(NoModificationFound(concept)))
if not sheerka.om.exists(self.CONCEPTS_BY_ID_ENTRY, concept.id):
return sheerka.ret(self.NAME, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=concept))
# modify the metadata. Almost all ConceptMetadata attributes except variables and props
new_concept = sheerka.new_from_template(concept, concept.key) # reload from cache or database ?
res = self._update_concept(context, new_concept, to_add, to_remove)
if res is not None:
return res
# To update concept by first keyword
# first remove the old references
keywords = self.get_first_tokens(sheerka, concept) # keyword of the old concept
concepts_by_first_keyword = cache_manager.copy(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
for keyword in keywords:
try:
concepts_by_first_keyword[keyword].remove(concept.id)
if len(concepts_by_first_keyword[keyword]) == 0:
del concepts_by_first_keyword[keyword]
except KeyError: # only occurs in unit tests when concepts are created without create_new()
pass
# and then update
init_ret_value = self.compute_concepts_by_first_token(context, [new_concept], False, concepts_by_first_keyword)
if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
concepts_by_first_keyword = init_ret_value.body
# computes resolved concepts_by_first_keyword
init_ret_value = self.resolve_concepts_by_first_keyword(context,
concepts_by_first_keyword,
{new_concept.id: new_concept})
if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
resolved_concepts_by_first_keyword = init_ret_value.body
# update concept that referenced the old concept and clear old references
self.update_references(context, concept, new_concept, to_add)
for ref in self.compute_references(concept):
cache_manager.delete(self.CONCEPTS_REFERENCES_ENTRY, ref, concept.id)
# compute new references
for ref in self.compute_references(new_concept):
cache_manager.put(self.CONCEPTS_REFERENCES_ENTRY, ref, new_concept.id)
cache_manager.update_concept(concept, new_concept)
cache_manager.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
cache_manager.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
# everything seems to be fine. Update the list of attributes
# Caution. Must be done AFTER update_concept()
freeze_concept_attrs(new_concept)
# TODO : update when definition_type = DEFINITION_TYPE_DEF : have a look at update_references() below
# TODO : Update concepts grammars : have a look at update_references() below
if modify_source:
self._update_concept(context, concept, to_add, to_remove)
ret = sheerka.ret(self.NAME, True, sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=new_concept))
return ret
def remove_concept(self, context, concept):
"""
Remove a concept
:param context:
:param concept:
:return:
"""
sheerka = context.sheerka
refs = self.sheerka.om.get(self.CONCEPTS_REFERENCES_ENTRY, concept.id)
if refs is not NotFound:
refs_instances = [sheerka.new_from_template(c, c.key) for c in [self.get_by_id(ref) for ref in refs]]
return sheerka.ret(self.NAME, False, sheerka.err(ConceptIsReferenced(refs_instances)))
try:
sheerka.om.remove_concept(concept)
return sheerka.ret(self.NAME, True, sheerka.new(BuiltinConcepts.SUCCESS))
except ConceptNotFound as ex:
return sheerka.ret(self.NAME, False, sheerka.err(ex))
def set_attr(self, concept, attribute, value):
"""
Modifies an attribute of a concept (concept.values)
:param context:
:param concept:
:param attribute:
:param value:
:return:
"""
ensure_concept(concept)
attr = attribute.str_id if isinstance(attribute, Concept) else attribute
concept.set_value(attr, value)
return self.sheerka.ret(self.NAME, True, self.sheerka.new(BuiltinConcepts.SUCCESS))
def get_attr(self, concept, attribute):
"""
Returns the attribute of a concept
:param context:
:param concept:
:param attribute:
:return:
"""
ensure_concept()
if not self.sheerka.is_success(concept):
return concept
attr = attribute.str_id if isinstance(attribute, Concept) else attribute
if (value := concept.get_value(attr)) == NotInit:
return self.sheerka.new(BuiltinConcepts.NOT_FOUND, body={"#concept": concept, "#attr": attribute})
return value
def set_id_if_needed(self, obj: Concept, is_builtin: bool):
"""
Set the key for the concept if needed
:param obj:
:param is_builtin:
:return:
"""
if obj.get_metadata().id is not None:
return
entry_key = self.BUILTIN_CONCEPTS_IDS if is_builtin else self.USER_CONCEPTS_IDS
obj.get_metadata().id = str(self.sheerka.om.get(self.sheerka.OBJECTS_IDS_ENTRY, entry_key))
# self.log.debug(f"Setting id '{obj.metadata.id}' to concept '{obj.metadata.name}'.")
def get_by_key(self, concept_key, concept_id=None):
concept_key = str(concept_key) if isinstance(concept_key, BuiltinConcepts) else concept_key
return self.internal_get("key", concept_key, self.CONCEPTS_BY_KEY_ENTRY, concept_id)
def get_by_name(self, concept_name, concept_id=None):
return self.internal_get("name", concept_name, self.CONCEPTS_BY_NAME_ENTRY, concept_id)
def get_by_hash(self, concept_hash, concept_id=None):
return self.internal_get("hash", concept_hash, self.CONCEPTS_BY_HASH_ENTRY, concept_id)
def get_by_id(self, concept_id):
return self.internal_get("id", concept_id, self.CONCEPTS_BY_ID_ENTRY, None)
def has_id(self, concept_id):
"""
Returns True if a concept with this id exists in cache
It does not search in the remote repository
:param concept_id:
:return:
"""
if concept_id is None:
return False
return self.sheerka.om.current_cache_manager().has(self.CONCEPTS_BY_ID_ENTRY, concept_id)
def has_key(self, concept_key):
"""
Returns True if concept(s) with this key exist in cache
It does not search in the remote repository
:param concept_key:
:return:
"""
return self.sheerka.om.current_cache_manager().has(self.CONCEPTS_BY_KEY_ENTRY, concept_key)
def has_name(self, concept_name):
"""
Returns True if concept(s) with this name exist in cache
It does not search in the remote repository
:param concept_name:
:return:
"""
return self.sheerka.om.current_cache_manager().has(self.CONCEPTS_BY_NAME_ENTRY, concept_name)
def has_hash(self, concept_hash):
"""
Returns True if concept(s) with this hash exist in cache
It does not search in the remote repository
:param concept_hash:
:return:
"""
return self.sheerka.om.current_cache_manager().has(self.CONCEPTS_BY_HASH_ENTRY, concept_hash)
def internal_get(self, index_name, key, cache_name, concept_id=None):
"""
Tries to find an entry
:param index_name: name of the index (ex by_id, by_key...)
:param key: index value
:param cache_name: name of the cache (ex Concepts_By_ID...)
:param concept_id: id of the concept if none, in case where there are multiple results
:return:
"""
if key is None:
return ErrorConcept(f"Concept '{key}' is undefined.")
concepts = self.sheerka.om.get(cache_name, key)
if concepts is not NotFound:
if concept_id is None:
return concepts
if not hasattr(concepts, "__iter__"):
return concepts
for c in concepts:
if c.id == concept_id:
return c
metadata = [(index_name, key), ("id", concept_id)] if concept_id else (index_name, key)
return self.sheerka.get_unknown(metadata)
def update_references(self, context, concept, modified_concept=None, modifications=None):
"""
Updates all the concepts that reference concept
:param context:
:param concept:
:param modified_concept:
:param modifications:
:return:
"""
refs = self.sheerka.om.get(self.CONCEPTS_REFERENCES_ENTRY, concept.id)
if refs is NotFound:
return
for concept_id in refs:
# remove the grammar entry so that it can be recreated
self.sheerka.om.delete(self.sheerka.CONCEPTS_GRAMMARS_ENTRY, concept_id)
# reset the bnf definition if needed
if modified_concept:
if self.has_id(concept_id):
to_update = self.get_by_id(concept_id)
metadata = to_update.get_metadata()
if metadata.definition_type == DEFINITION_TYPE_BNF and self._name_has_changed(modifications):
tokens = list(Tokenizer(metadata.definition))
modified = False
for i, token in enumerate(tokens):
if token.type == TokenKind.IDENTIFIER and token.value == concept.name:
clone = token.clone()
clone.value = modified_concept.name
tokens[i] = clone
modified = True
if modified:
to_update.get_metadata().definition = core.utils.get_text_from_tokens(tokens)
to_update.set_bnf(None)
def compute_references(self, concept):
"""
We need to keep a track of all concepts used by the current concept
So that if one of these are modified, we can modify the current concept accordingly
:param concept:
:return:
"""
refs = set()
if concept.get_metadata().definition_type == DEFINITION_TYPE_BNF:
from parsers.BnfNodeParser import BnfNodeConceptExpressionVisitor
other_concepts_visitor = BnfNodeConceptExpressionVisitor()
other_concepts_visitor.visit(concept.get_bnf())
for concept in other_concepts_visitor.references:
if isinstance(concept, str):
concept = self.sheerka.get_by_key(concept)
refs.add(concept.id)
return refs
def is_not_a_variable(self, name):
"""
Given a name tells if it refers to a variable name
:param name:
:return:
"""
return self.sheerka.om.get(self.sheerka.CONCEPTS_BY_NAME_ENTRY, name) is NotFound
@staticmethod
def _name_has_changed(to_add):
if to_add is None or "meta" not in to_add:
return False
return "name" in to_add["meta"]
@staticmethod
def _definition_has_changed(to_add):
if to_add is None or "meta" not in to_add:
return False
return "definition" in to_add["meta"]
def _update_concept(self, context, concept, to_add, to_remove):
sheerka = context.sheerka
same_values = {}
if to_add:
if "meta" in to_add:
# All modifications must be allowed
metadata = concept.get_metadata()
for k, v in to_add["meta"].items():
if k in self.forbidden_meta:
return sheerka.ret(self.NAME, False, sheerka.err(ForbiddenAttribute(k)))
try:
if getattr(metadata, k) == v:
same_values[k] = v
else:
setattr(metadata, k, v)
except AttributeError:
return sheerka.ret(self.NAME, False, sheerka.err(UnknownAttribute(k)))
if same_values == to_add["meta"]:
return sheerka.ret(self.NAME, False,
sheerka.err(NoModificationFound(concept, same_values)))
if "props" in to_add:
for k, v in to_add["props"].items():
concept.add_prop(k, v)
if "variables" in to_add:
for k, v in to_add["variables"].items():
# update existing or add new
for i, var in enumerate(concept.get_metadata().variables):
if var[0] == k:
concept.get_metadata().variables[i] = (k, v)
break
else:
concept.def_var(k, v)
if to_remove:
if "meta" in to_remove:
return sheerka.ret(self.NAME, False, sheerka.err(CannotRemoveMeta(to_remove["meta"])))
if "props" in to_remove:
for k, v in to_remove["props"].items():
if k not in concept.get_metadata().props:
return sheerka.ret(self.NAME, False, sheerka.err(UnknownAttribute(k)))
props = concept.get_metadata().props[k]
try:
if isinstance(v, (set, list, tuple)):
for item in v:
props.remove(item)
else:
props.remove(v)
# remove empty sets
if len(props) == 0:
del concept.get_metadata().props[k]
except KeyError:
return sheerka.ret(self.NAME, False, sheerka.err(ValueNotFound(k, v)))
if "variables" in to_remove:
variables_to_remove = []
for k in to_remove["variables"]:
for var_def in concept.get_metadata().variables:
if var_def[0] == k:
variables_to_remove.append(var_def)
delattr(concept, var_def[0])
break
else:
return sheerka.ret(self.NAME, False, sheerka.err(UnknownAttribute(k)))
core.utils.remove_list_from_list(concept.get_metadata().variables, variables_to_remove)
concept.get_metadata().key = None
if self._definition_has_changed(to_add) and concept.get_metadata().definition_type == DEFINITION_TYPE_BNF:
concept.set_bnf(None)
ensure_bnf(context, concept)
concept.init_key()
return
@staticmethod
def get_first_tokens(sheerka, concept):
"""
:param sheerka:
:param concept:
:return:
"""
if concept.get_bnf():
from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor
bnf_visitor = BnfNodeFirstTokenVisitor(sheerka)
bnf_visitor.visit(concept.get_bnf())
return bnf_visitor.first_tokens
else:
keywords = concept.key.split()
for keyword in keywords:
if keyword.startswith(VARIABLE_PREFIX):
continue
return [keyword]
return None
@staticmethod
def compute_concepts_by_first_token(context, concepts, use_sheerka=False, previous_entries=None):
"""
Create the map describing the first token expected by a concept
eg the dictionary that goes into CONCEPTS_BY_FIRST_KEYWORD_ENTRY
:param context:
:param concepts: lists of concepts to parse
:param use_sheerka: if True, update concepts_by_first_keyword from sheerka
:param previous_entries:
:return:
"""
sheerka = context.sheerka
res = sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) if use_sheerka \
else (previous_entries or {})
for concept in concepts:
keywords = SheerkaConceptManager.get_first_tokens(sheerka, concept)
if keywords is None:
# no first token found for a concept ?
return sheerka.ret(sheerka.name, False, NoFirstTokenError(concept, concept.key))
for keyword in keywords:
res.setdefault(keyword, []).append(concept.id)
# 'uniquify' the lists
for k, v in res.items():
res[k] = core.utils.make_unique(v)
return sheerka.ret("BaseNodeParser", True, res)
@staticmethod
def resolve_concepts_by_first_keyword(context, concepts_by_first_keyword, modified_concepts=None):
"""
From a dictionary of first tokens, create another dictionary where all references to other concepts
are resolved
fom example, from entries
{
'c:|1001:' : 'c:|1002:' # which means than the concept 1002 starts with the concept 1001
'foo': 'c:|1001:'
}
It will create
{
'foo': ['c:|1001:, 'c:|1002:'],
}
This dictionary is supposed to go into CONCEPTS_REFERENCES_ENTRY
"""
sheerka = context.sheerka
res = {}
def get_by_id(c_id):
if modified_concepts and c_id in modified_concepts:
return modified_concepts[c_id]
return sheerka.get_by_id(c_id)
def resolve_concepts(concept_str):
c_key, c_id = core.utils.unstr_concept(concept_str)
if c_id in already_seen:
return ChickenAndEggError(already_seen)
already_seen.add(c_id)
resolved = set()
to_resolve = set()
chicken_and_egg = set()
concept = get_by_id(c_id)
if sheerka.isaset(context, concept):
concepts = sheerka.get_set_elements(context, concept)
else:
concepts = [concept]
for concept in concepts:
ensure_bnf(context, concept) # need to make sure that it cannot fail
keywords = SheerkaConceptManager.get_first_tokens(sheerka, concept)
for keyword in keywords:
(to_resolve if keyword.startswith("c:|") else resolved).add(keyword)
for concept_to_resolve_str in to_resolve:
res = resolve_concepts(concept_to_resolve_str)
if isinstance(res, ChickenAndEggError):
chicken_and_egg |= res.concepts
else:
resolved |= res
to_resolve.clear()
if len(resolved) == 0 and len(chicken_and_egg) > 0:
raise ChickenAndEggError(chicken_and_egg)
else:
return resolved
for k, v in concepts_by_first_keyword.items():
if k.startswith("c:|"):
try:
already_seen = set()
resolved_keywords = resolve_concepts(k)
for resolved in resolved_keywords:
res.setdefault(resolved, []).extend(v)
except ChickenAndEggError as ex:
context.log(f"Chicken and egg detected for {k}, concepts={ex.concepts}")
concepts_in_recursion = ex.concepts
# make sure to have all the parents
for parent in v:
concepts_in_recursion.add(parent)
for concept_id in concepts_in_recursion:
# make sure we keep the longest chain
old = sheerka.chicken_and_eggs.get(concept_id)
if old is NotFound or len(old) < len(ex.concepts):
sheerka.chicken_and_eggs.put(concept_id, concepts_in_recursion)
else:
res.setdefault(k, []).extend(v)
# 'uniquify' the lists
for k, v in res.items():
res[k] = core.utils.make_unique(v)
return sheerka.ret("BaseNodeParser", True, res)
def get_concepts_by_first_token(self, token, to_keep, custom=None, to_map=None, strip_quotes=False, parser=None):
"""
Tries to find if there are concepts that match the value of the token
Caution: Returns the actual cache, not a copy
:param token:
:param to_keep: predicate to tell if the concept is eligible
:param custom: lambda name -> List[Concepts] that gives extra concepts, according to the name
:param to_map:
:param strip_quotes: Remove quotes from strings
:param parser: If needed, parser which requested the concepts
:return:
"""
if token.type == TokenKind.WHITESPACE:
return None
if token.type == TokenKind.STRING:
name = token.value[1:-1] if strip_quotes else token.value
else:
name = token.value
custom_concepts = custom(name) if custom else [] # to get extra concepts using an alternative method
result = []
concepts_ids = self.sheerka.om.get(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, name)
if concepts_ids is NotFound:
return custom_concepts if custom else None
for concept_id in concepts_ids:
concept = self.sheerka.get_by_id(concept_id)
if not to_keep(concept):
continue
concept = to_map(concept, parser, self.sheerka) if to_map else concept
result.append(concept)
return core.utils.make_unique(result + custom_concepts,
lambda c: c.concept.id if hasattr(c, "concept") else c.id)