Files
Sheerka-Old/src/core/sheerka/services/SheerkaConceptManager.py
T

1296 lines
53 KiB
Python

import re
from dataclasses import dataclass
from typing import Set, List, Union
import core.utils
from cache.Cache import Cache
from cache.CacheManager import ConceptNotFound
from cache.DictionaryCache import DictionaryCache
from cache.ListIfNeededCache import ListIfNeededCache
from cache.SetCache import SetCache
from core.builtin_concepts import ErrorConcept
from core.builtin_concepts_ids import BuiltinConcepts, AllBuiltinConcepts, BuiltinUnique
from core.builtin_helpers import ensure_concept, ensure_bnf
from core.concept import Concept, DEFINITION_TYPE_DEF, DEFINITION_TYPE_BNF, freeze_concept_attrs, ConceptMetadata, \
VARIABLE_PREFIX
from core.global_symbols import EVENT_CONCEPT_CREATED, NotInit, NotFound, ErrorObj, EVENT_CONCEPT_DELETED, NoFirstToken, \
EVENT_CONCEPT_MODIFIED, CONCEPT_COMPARISON_CONTEXT
from core.sheerka.services.sheerka_service import BaseService
from core.tokenizer import Tokenizer, TokenKind
from parsers.BnfNodeParser import RegExDef
from sdp.sheerkaDataProvider import SheerkaDataProviderDuplicateKeyError
BASE_NODE_PARSER_CLASS = "parsers.BaseNodeParser.BaseNodeParser"
@dataclass
class ChickenAndEggError(Exception):
concepts: Set[str]
@dataclass
class NoFirstTokenError(ErrorObj):
concept: Concept
key: str
@dataclass
class NoModificationFound(ErrorObj):
"""
Trying to modify a concept without modifying it
"""
concept: Concept
attrs: dict = None
@dataclass
class ForbiddenAttribute(ErrorObj):
"""
When trying to modify an attribute that must not be modified
"""
attr: str
@dataclass
class UnknownAttribute(ErrorObj):
"""
When trying to modify an attribute that does not exist
"""
attr: str
@dataclass
class CannotRemoveMeta(ErrorObj):
"""
When trying to remove a metadata attribute (ConceptMeta is a class, you cannot remove attr form it)
"""
attrs: dict
@dataclass
class ValueNotFound(ErrorObj):
"""
When trying to remove a value that does not exists (but the props/variable exists)
"""
item: str
value: object
@dataclass
class ConceptIsReferenced(ErrorObj):
"""
When trying to remove a concept that is referenced by other concept(s)
"""
references: list
class SheerkaConceptManager(BaseService):
NAME = "ConceptManager"
BUILTIN_CONCEPTS_IDS = "Builtins_Concepts_IDs" # sequential key for builtin concepts
USER_CONCEPTS_IDS = "User_Concepts_IDs" # sequential key for user defined concepts
CONCEPTS_IDS_ENTRY = "ConceptManager:Concepts_IDs"
CONCEPTS_BY_ID_ENTRY = "ConceptManager:Concepts_By_ID" # to store all the concepts
CONCEPTS_BY_KEY_ENTRY = "ConceptManager:Concepts_By_Key"
CONCEPTS_BY_NAME_ENTRY = "ConceptManager:Concepts_By_Name"
CONCEPTS_BY_HASH_ENTRY = "ConceptManager:Concepts_By_Hash" # store hash of concepts definitions (not values)
CONCEPTS_REFERENCES_ENTRY = "ConceptManager:Concepts_References" # tracks references between concepts
CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "ConceptManager:Concepts_By_First_Keyword"
RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "ConceptManager:Resolved_Concepts_By_First_Keyword"
CONCEPTS_BY_REGEX_ENTRY = "ConceptManager:Concepts_By_Regex"
CONCEPTS_BNF_DEFINITIONS_ENTRY = "ConceptManager:Concepts_BNF_Definitions"
def __init__(self, sheerka):
super().__init__(sheerka, order=11)
self.forbidden_meta = {"is_builtin", "key", "id", "props", "variables"}
self.allowed_meta = {attr for attr in vars(ConceptMetadata) if
not attr.startswith("_") and attr not in self.forbidden_meta}
self.compiled_concepts_by_regex = []
def initialize(self):
self.sheerka.bind_service_method(self.NAME, self.create_new_concept, True)
self.sheerka.bind_service_method(self.NAME, self.modify_concept, True)
self.sheerka.bind_service_method(self.NAME, self.remove_concept, True)
self.sheerka.bind_service_method(self.NAME, self.set_id_if_needed, True)
self.sheerka.bind_service_method(self.NAME, self.set_attr, True)
self.sheerka.bind_service_method(self.NAME, self.get_attr, False)
self.sheerka.bind_service_method(self.NAME, self.smart_get_attr, False)
self.sheerka.bind_service_method(self.NAME, self.set_property, True, as_name="set_prop")
self.sheerka.bind_service_method(self.NAME, self.get_property, False, as_name="get_prop")
self.sheerka.bind_service_method(self.NAME, self.get_by_key, False, visible=False)
self.sheerka.bind_service_method(self.NAME, self.get_by_name, False, visible=False)
self.sheerka.bind_service_method(self.NAME, self.get_by_hash, False, visible=False)
self.sheerka.bind_service_method(self.NAME, self.get_by_id, False, visible=False)
self.sheerka.bind_service_method(self.NAME, self.is_not_a_concept_name, False, visible=False)
self.sheerka.bind_service_method(self.NAME, self.is_a_concept_name, False, visible=False)
self.sheerka.bind_service_method(self.NAME, self.get_concepts_by_first_token, False, visible=False)
self.sheerka.bind_service_method(self.NAME, self.get_concepts_by_first_regex, False, visible=False)
self.sheerka.bind_service_method(self.NAME, self.get_concepts_bnf_definitions, False, visible=False)
self.sheerka.bind_service_method(self.NAME, self.clear_bnf_definition, True, visible=False)
self.sheerka.bind_service_method(self.NAME, self.set_precedence, True)
register_concept_cache = self.sheerka.om.register_concept_cache
cache = Cache().auto_configure(self.CONCEPTS_BY_ID_ENTRY)
register_concept_cache(self.CONCEPTS_BY_ID_ENTRY, cache, lambda c: c.id, True)
cache = ListIfNeededCache().auto_configure(self.CONCEPTS_BY_KEY_ENTRY)
register_concept_cache(self.CONCEPTS_BY_KEY_ENTRY, cache, lambda c: c.key, True)
cache = ListIfNeededCache().auto_configure(self.CONCEPTS_BY_NAME_ENTRY)
register_concept_cache(self.CONCEPTS_BY_NAME_ENTRY, cache, lambda c: c.name, True)
cache = ListIfNeededCache().auto_configure(self.CONCEPTS_BY_HASH_ENTRY)
register_concept_cache(self.CONCEPTS_BY_HASH_ENTRY, cache, lambda c: c.get_definition_hash(), True)
cache = SetCache().auto_configure(self.CONCEPTS_REFERENCES_ENTRY)
self.sheerka.om.register_cache(self.CONCEPTS_REFERENCES_ENTRY, cache)
cache = DictionaryCache().auto_configure(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
self.sheerka.om.register_cache(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, cache)
cache = DictionaryCache().auto_configure(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
self.sheerka.om.register_cache(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, cache, persist=False)
cache = DictionaryCache().auto_configure(self.CONCEPTS_BY_REGEX_ENTRY)
self.sheerka.om.register_cache(self.CONCEPTS_BY_REGEX_ENTRY, cache)
cache = Cache().auto_configure(self.CONCEPTS_BNF_DEFINITIONS_ENTRY)
self.sheerka.om.register_cache(self.CONCEPTS_BNF_DEFINITIONS_ENTRY, cache, persist=False)
def initialize_deferred(self, context, is_first_time):
if is_first_time:
self.sheerka.om.put(self.sheerka.OBJECTS_IDS_ENTRY, self.USER_CONCEPTS_IDS, 1000)
# initialize the dictionary of first tokens
self.sheerka.om.get(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, None) # to init the cache with the values from sdp
concepts_by_first_keyword = self.sheerka.om.current_cache_manager().copy(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
res = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
self.sheerka.om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, res.body)
# init the regular expression
self.sheerka.om.get(self.CONCEPTS_BY_REGEX_ENTRY, None)
from_db = self.sheerka.om.current_cache_manager().copy(self.CONCEPTS_BY_REGEX_ENTRY)
concepts_by_first_regex = {RegExDef().deserialize(k): v for k, v in from_db.items()}
res = self.compile_concepts_by_first_regex(context, concepts_by_first_regex)
self.compiled_concepts_by_regex.clear()
self.compiled_concepts_by_regex.extend(res.body)
def initialize_builtin_concepts(self):
"""
Initializes the builtin concepts
:return: None
"""
builtin_concepts_ids = {}
for key in AllBuiltinConcepts:
concept = self.sheerka if key == BuiltinConcepts.SHEERKA \
else self.sheerka.builtin_cache[str(key)]() if str(key) in self.sheerka.builtin_cache \
else Concept(key, True, False, key)
if key in BuiltinUnique:
concept.get_metadata().is_unique = True
concept.get_hints().is_evaluated = True
from_db = self.sheerka.om.get(self.CONCEPTS_BY_KEY_ENTRY, concept.get_metadata().key)
if from_db is NotFound:
# self.init_log.debug(f"'{concept.name}' concept is not found in db. Adding.")
self.set_id_if_needed(concept, True)
self.sheerka.om.add_concept(concept)
else:
# self.init_log.debug(f"Found concept '{from_db}' in db. Updating.")
concept.update_from(from_db)
builtin_concepts_ids[key] = concept.id
return builtin_concepts_ids
def create_new_concept(self, context, concept: Concept):
"""
Adds a new concept to the system
:param context:
:param concept: DefConceptNode
:return: digest of the new concept
"""
ensure_concept(concept)
sheerka = self.sheerka
concept.init_key()
init_bnf_ret_value = None
om = sheerka.om
if om.exists(self.CONCEPTS_BY_HASH_ENTRY, concept.get_definition_hash()):
error = SheerkaDataProviderDuplicateKeyError(self.CONCEPTS_BY_KEY_ENTRY + "." + concept.key, concept)
return sheerka.ret(
self.NAME,
False,
sheerka.new(BuiltinConcepts.CONCEPT_ALREADY_DEFINED, body=concept),
error.args[0])
# set id before saving in db
sheerka.set_id_if_needed(concept, False)
# check if the bnf definition is correctly computed
try:
ensure_bnf(context, concept)
except Exception as ex:
return sheerka.ret(self.NAME, False, ex.args[0])
# recompute concepts by first tokens and concept by first regex
update_items_res = self.recompute_first_items(context, None, [concept])
if not update_items_res.status:
return update_items_res
by_first_keyword, by_first_regex, resolved_by_first_keyword, compiled_by_first_regex = update_items_res.body
# if everything is fine
freeze_concept_attrs(concept)
concept.freeze_definition_hash()
om.add_concept(concept)
self.update_first_items_caches(context,
by_first_keyword,
by_first_regex,
resolved_by_first_keyword,
compiled_by_first_regex)
if concept.get_metadata().definition_type == DEFINITION_TYPE_DEF and concept.get_metadata().definition != concept.name:
# allow search by definition when definition relevant
om.put(self.sheerka.CONCEPTS_BY_NAME_ENTRY, concept.get_metadata().definition, concept)
# update references
for ref in self.compute_references(concept):
om.put(self.CONCEPTS_REFERENCES_ENTRY, ref, concept.id)
# TODO : this line seems to be useless
# The grammar is never reset
if concept.get_bnf() and init_bnf_ret_value is not None and init_bnf_ret_value.status:
sheerka.cache_manager.clear(self.CONCEPTS_BNF_DEFINITIONS_ENTRY)
# publish the new concept
sheerka.publish(context, EVENT_CONCEPT_CREATED, concept)
return sheerka.ret(self.NAME, True, sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
def modify_concept(self, context, concept, to_add=None, to_remove=None, modify_source=False):
"""
Modify the definition of a concept
:param context:
:param concept: concept to modify
:param to_add: meta, props or variables to add/update
:param to_remove: props or variables to remove
:param modify_source: update or not the concept given in parameter
:return:
"""
# to_add is a dictionary
# to_add = {
# 'meta' : {<key>: <value>} of metadata to update,
# 'props' : {<key>: <value>} of properties to add/update,
# 'variables': {<key>: <value>} of variables to add/update,
# }
# for variables, if the <key> already exists, the entry is updated, otherwise a new value is created
# for props, if the <key> already exists, a new entry is added to the set
#
# to_remove = {
# 'props' : {<key>: [value]} entries to remove. 'value' can be a list or a single entry
# 'variables': [<key>] list of keys to remove
# }
#
sheerka = self.sheerka
om = self.sheerka.om
if not to_add and not to_remove:
return sheerka.ret(self.NAME, False, sheerka.err(NoModificationFound(concept)))
if not sheerka.om.exists(self.CONCEPTS_BY_ID_ENTRY, concept.id):
return sheerka.ret(self.NAME, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=concept))
# modify the metadata. Almost all ConceptMetadata attributes except variables and props
new_concept = sheerka.new_from_template(concept, concept.key) # reload from cache or database ?
res = self._update_concept(context, new_concept, to_add, to_remove)
if res is not None:
return res
# recompute concepts by first tokens and concept by first regex
update_items_res = self.recompute_first_items(context, concept, [new_concept])
if not update_items_res.status:
return update_items_res
by_first_keyword, by_first_regex, resolved_by_first_keyword, compiled_by_first_regex = update_items_res.body
# update concepts that referenced the old concept and clear old references
self.update_references(context, concept, new_concept, to_add)
for ref in self.compute_references(concept):
om.delete(self.CONCEPTS_REFERENCES_ENTRY, ref, concept.id)
# compute new references
for ref in self.compute_references(new_concept):
om.put(self.CONCEPTS_REFERENCES_ENTRY, ref, new_concept.id)
# everything is ok, update the caches
om.update_concept(concept, new_concept)
self.update_first_items_caches(context,
by_first_keyword,
by_first_regex,
resolved_by_first_keyword,
compiled_by_first_regex)
# everything seems to be fine. Update the list of attributes
# Caution. Must be done AFTER update_concept()
freeze_concept_attrs(new_concept)
# TODO : update when definition_type = DEFINITION_TYPE_DEF : have a look at update_references() below
# TODO : Update concepts grammars : have a look at update_references() below
if modify_source:
self._update_concept(context, concept, to_add, to_remove)
sheerka.publish(context, EVENT_CONCEPT_MODIFIED, {"old": concept, "new": new_concept})
return sheerka.ret(self.NAME, True, sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=new_concept))
def remove_concept(self, context, concept):
"""
Remove a concept
:param context:
:param concept:
:return:
"""
# TODO : resolve concept first
sheerka = context.sheerka
if not sheerka.is_known(concept):
return sheerka.ret(self.NAME, False, sheerka.err(ConceptNotFound(concept)))
om = sheerka.om
refs = om.get(self.CONCEPTS_REFERENCES_ENTRY, concept.id)
if refs is not NotFound:
refs_instances = [sheerka.new_from_template(c, c.key) for c in [self.get_by_id(ref) for ref in refs]]
return sheerka.ret(self.NAME, False, sheerka.err(ConceptIsReferenced(refs_instances)))
# recompute concepts by first tokens and concept by first regex
update_items_res = self.recompute_first_items(context, concept, None)
if not update_items_res.status:
return update_items_res
by_first_keyword, by_first_regex, resolved_by_first_keyword, compiled_by_first_regex = update_items_res.body
# everything seems fine. I can commit the modification and remove
om.remove_concept(concept)
self.update_first_items_caches(context,
by_first_keyword,
by_first_regex,
resolved_by_first_keyword,
compiled_by_first_regex)
sheerka.publish(context, EVENT_CONCEPT_DELETED, concept)
return sheerka.ret(self.NAME, True, sheerka.new(BuiltinConcepts.SUCCESS))
def set_attr(self, context, concept, attribute, value):
"""
Modifies an attribute of a concept (concept.values)
:param context:
:param concept:
:param attribute:
:param value:
:return:
"""
ensure_concept(concept)
attr = attribute.str_id if isinstance(attribute, Concept) else attribute
old_value = concept.get_value(attr)
if context.in_context(BuiltinConcepts.EVAL_GLOBAL_TRUTH_REQUESTED):
old_value = self.sheerka.get_by_id(concept.id).get_value(attr)
else:
old_value = concept.get_value(attr)
# Caution, creating a list when a set_attr is called for the second time is not always
# what is expected
if old_value is not NotInit:
if old_value == value:
return self.sheerka.ret(self.NAME, True, self.sheerka.new(BuiltinConcepts.SUCCESS))
if isinstance(old_value, list):
old_value.append(value)
value = old_value
else:
value = [old_value, value]
if context.in_context(BuiltinConcepts.EVAL_GLOBAL_TRUTH_REQUESTED):
to_add = {"variables": {attr: value}}
res = self.sheerka.modify_concept(context, concept, to_add, modify_source=True)
concept.set_value(attr, value)
return res
else:
concept.set_value(attr, value)
return self.sheerka.ret(self.NAME, True, self.sheerka.new(BuiltinConcepts.SUCCESS))
def get_attr(self, concept, attribute):
"""
Returns the attribute of a concept
:param concept:
:param attribute:
:return:
"""
ensure_concept()
if not self.sheerka.is_success(concept):
return concept
attr = attribute.str_id if isinstance(attribute, Concept) else attribute
if (value := concept.get_value(attr)) == NotInit:
return self.sheerka.new(BuiltinConcepts.NOT_FOUND, body={"#concept": concept, "#attr": attribute})
return value
def smart_get_attr(self, concept, attribute):
def get_obj_value(c, concept_to_look_for):
"""
Return the body of the concept c if it is an instance of concept_to_look_for
Go deeper in the bodies until the concept_to_look_for is found
:param c:
:param concept_to_look_for:
:return:
"""
while isinstance(c, Concept) and c.body is not NotInit:
if self.sheerka.isinstance(c.body, concept_to_look_for):
return c.body
c = c.body
return None
ensure_concept()
if not self.sheerka.is_success(concept):
return concept
value = self.get_attr(concept, attribute)
if not self.sheerka.isinstance(value, BuiltinConcepts.NOT_FOUND):
return value
if not isinstance(attribute, Concept):
return self.sheerka.new(BuiltinConcepts.NOT_FOUND, body={"#concept": concept, "#attr": attribute})
# try to be smart
result = []
# first look in children
for k, v in concept.variables().items():
attr_as_concept_key, attr_concept_id = core.utils.unstr_concept(k)
if attr_concept_id is not None:
attr_as_concept = self.sheerka.fast_resolve((attr_as_concept_key, attr_concept_id), return_new=False)
if attr_as_concept is not None and self.sheerka.isa(attribute, attr_as_concept):
if hasattr(v, "__iter__"):
for _v in v:
value = get_obj_value(_v, attribute)
if value is not None:
result.append(value)
else:
value = get_obj_value(v, attribute)
if value is not None:
result.append(value)
if len(result) > 0:
return result[0] if len(result) == 1 else result
# then try the ancestors
for k, v in concept.variables().items():
attr_as_concept_key, attr_concept_id = core.utils.unstr_concept(k)
if attr_concept_id is not None:
attr_as_concept = self.sheerka.fast_resolve((attr_as_concept_key, attr_concept_id), return_new=False)
if attr_as_concept is not None and self.sheerka.isa(attr_as_concept, attribute):
if isinstance(v, list):
result.extend(v)
else:
result.append(v)
if len(result) > 0:
return result[0] if len(result) == 1 else result
return self.sheerka.new(BuiltinConcepts.NOT_FOUND, body={"#concept": concept, "#attr": attribute})
def get_property(self, concept, prop):
"""
Returns the value of a concept property
:param concept:
:param prop:
:return:
"""
ensure_concept()
if not self.sheerka.is_success(concept):
return concept
if isinstance(prop, Concept) and prop.get_metadata().is_builtin:
prop = prop.key
if (value := concept.get_prop(prop)) is None:
return self.sheerka.new(BuiltinConcepts.NOT_FOUND, body={"#concept": concept, "#prop": prop})
return value
def set_property(self, context, concept, prop, value, all_concepts=False):
"""
Set the value of a concept property
The concept is modified
:param context:
:param concept:
:param prop:
:param value:
:param all_concepts: if True, updates the definitions of the concept
:return:
"""
ensure_concept()
if not self.sheerka.is_success(concept):
return concept
if isinstance(prop, Concept) and prop.get_metadata().is_builtin:
prop = prop.key
if all_concepts:
return self.modify_concept(context, concept, to_add={'props': {prop: value}}, modify_source=True)
else:
concept.set_prop(prop, value)
return self.sheerka.ret(self.NAME, True, self.sheerka.new(BuiltinConcepts.SUCCESS))
def set_id_if_needed(self, obj: Concept, is_builtin: bool):
"""
Set the key for the concept if needed
:param obj:
:param is_builtin:
:return:
"""
if obj.get_metadata().id is not None:
return
entry_key = self.BUILTIN_CONCEPTS_IDS if is_builtin else self.USER_CONCEPTS_IDS
obj.get_metadata().id = str(self.sheerka.om.get(self.sheerka.OBJECTS_IDS_ENTRY, entry_key))
# self.log.debug(f"Setting id '{obj.metadata.id}' to concept '{obj.metadata.name}'.")
def get_by_key(self, concept_key, concept_id=None):
concept_key = str(concept_key) if isinstance(concept_key, BuiltinConcepts) else concept_key
return self.internal_get("key", concept_key, self.CONCEPTS_BY_KEY_ENTRY, concept_id)
def get_by_name(self, concept_name, concept_id=None):
return self.internal_get("name", concept_name, self.CONCEPTS_BY_NAME_ENTRY, concept_id)
def get_by_hash(self, concept_hash, concept_id=None):
return self.internal_get("hash", concept_hash, self.CONCEPTS_BY_HASH_ENTRY, concept_id)
def get_by_id(self, concept_id):
return self.internal_get("id", concept_id, self.CONCEPTS_BY_ID_ENTRY, None)
def has_id(self, concept_id):
"""
Returns True if a concept with this id exists in cache
It does not search in the remote repository
:param concept_id:
:return:
"""
if concept_id is None:
return False
return self.sheerka.om.current_cache_manager().has(self.CONCEPTS_BY_ID_ENTRY, concept_id)
def has_key(self, concept_key):
"""
Returns True if concept(s) with this key exist in cache
It does not search in the remote repository
:param concept_key:
:return:
"""
return self.sheerka.om.current_cache_manager().has(self.CONCEPTS_BY_KEY_ENTRY, concept_key)
def has_name(self, concept_name):
"""
Returns True if concept(s) with this name exist in cache
It does not search in the remote repository
:param concept_name:
:return:
"""
return self.sheerka.om.current_cache_manager().has(self.CONCEPTS_BY_NAME_ENTRY, concept_name)
def has_hash(self, concept_hash):
"""
Returns True if concept(s) with this hash exist in cache
It does not search in the remote repository
:param concept_hash:
:return:
"""
return self.sheerka.om.current_cache_manager().has(self.CONCEPTS_BY_HASH_ENTRY, concept_hash)
def internal_get(self, index_name, key, cache_name, concept_id=None):
"""
Tries to find an entry
:param index_name: name of the index (ex by_id, by_key...)
:param key: index value
:param cache_name: name of the cache (ex Concepts_By_ID...)
:param concept_id: id of the concept if none, in case where there are multiple results
:return:
"""
if key is None:
return ErrorConcept(f"Concept '{key}' is undefined.")
concepts = self.sheerka.om.get(cache_name, key)
if concepts is not NotFound:
if concept_id is None:
return concepts
if not hasattr(concepts, "__iter__"):
return concepts
for c in concepts:
if c.id == concept_id:
return c
metadata = {index_name: key, "id": concept_id} if concept_id else {index_name: key}
return self.sheerka.get_unknown(metadata)
def update_references(self, context, concept, modified_concept=None, modifications=None):
"""
Updates all the concepts that reference concept
:param context:
:param concept: Old version of the concept
:param modified_concept: new version of the concept
:param modifications: what are the modification
:return:
"""
refs = self.sheerka.om.get(self.CONCEPTS_REFERENCES_ENTRY, concept.id)
if refs is NotFound:
return
for concept_id in refs:
# remove the grammar entry so that it can be recreated
self.sheerka.om.delete(self.CONCEPTS_BNF_DEFINITIONS_ENTRY, concept_id)
to_update = self.get_by_id(concept_id)
metadata = to_update.get_metadata()
# reset the bnf definition if needed
if modified_concept:
if self.has_id(concept_id): # reset only it the bnf definition is in cache
if metadata.definition_type == DEFINITION_TYPE_BNF and self._name_has_changed(modifications):
tokens = list(Tokenizer(metadata.definition))
modified = False
for i, token in enumerate(tokens):
if token.type == TokenKind.IDENTIFIER and token.value == concept.name:
clone = token.clone()
clone.value = modified_concept.name
tokens[i] = clone
modified = True
if modified:
to_update.get_metadata().definition = core.utils.get_text_from_tokens(tokens)
to_update.set_bnf(None)
# update concept_by_first_token
if metadata.definition_type == DEFINITION_TYPE_BNF:
# recompute concepts by first tokens and concept by first regex
res = self.recompute_first_items(context, concept, [concept])
if not res.status:
return res
by_first_keyword, by_first_regex, resolved_by_first_keyword, compiled_by_first_regex = res.body
self.update_first_items_caches(context,
by_first_keyword,
by_first_regex,
resolved_by_first_keyword,
compiled_by_first_regex)
def compute_references(self, concept):
"""
We need to keep a track of all concepts used by the current concept
So that if one of these are modified, we can modify the current concept accordingly
:param concept:
:return:
"""
refs = set()
if concept.get_metadata().definition_type == DEFINITION_TYPE_BNF:
from parsers.BnfNodeParser import BnfNodeConceptExpressionVisitor
other_concepts_visitor = BnfNodeConceptExpressionVisitor()
other_concepts_visitor.visit(concept.get_bnf())
for concept in other_concepts_visitor.references:
if isinstance(concept, str):
concept = self.sheerka.get_by_key(concept)
refs.add(concept.id)
return refs
def is_not_a_concept_name(self, name):
"""
Given a name tells if it refers to a variable name
:param name:
:return:
"""
return self.sheerka.om.get(self.sheerka.CONCEPTS_BY_NAME_ENTRY, name) is NotFound
def is_a_concept_name(self, name):
"""
Given a name tells if it refers to a variable name
:param name:
:return:
"""
return self.sheerka.om.get(self.sheerka.CONCEPTS_BY_NAME_ENTRY, name) is not NotFound
def clear_bnf_definition(self, concept_id=None):
if concept_id:
self.sheerka.om.delete(self.CONCEPTS_BNF_DEFINITIONS_ENTRY, concept_id)
else:
self.sheerka.om.clear(self.CONCEPTS_BNF_DEFINITIONS_ENTRY)
def set_precedence(self, context, *concepts):
"""
Set the precedence order when parsing concept with SyaNodeParser
The first concept in the list have the highest priority
:param context:
:param concepts:
:return:
"""
if len(concepts) < 2:
return self.sheerka.err("Not enough elements")
as_iterable = iter(concepts)
first = next(as_iterable)
ensure_concept(first)
try:
while True:
second = next(as_iterable)
ret = self.sheerka.set_is_greater_than(context,
BuiltinConcepts.PRECEDENCE,
first,
second,
CONCEPT_COMPARISON_CONTEXT)
if not ret.status:
return ret
first = second
except StopIteration:
pass
return self.sheerka.new(BuiltinConcepts.SUCCESS)
@staticmethod
def _name_has_changed(to_add):
if to_add is None or "meta" not in to_add:
return False
return "name" in to_add["meta"]
@staticmethod
def _definition_has_changed(to_add):
if to_add is None or "meta" not in to_add:
return False
return "definition" in to_add["meta"]
def _update_concept(self, context, concept, to_add, to_remove):
sheerka = context.sheerka
same_values = {}
if to_add:
if "meta" in to_add:
# All modifications must be allowed
metadata = concept.get_metadata()
for k, v in to_add["meta"].items():
if k in self.forbidden_meta:
return sheerka.ret(self.NAME, False, sheerka.err(ForbiddenAttribute(k)))
try:
if getattr(metadata, k) == v:
same_values[k] = v
else:
setattr(metadata, k, v)
except AttributeError:
return sheerka.ret(self.NAME, False, sheerka.err(UnknownAttribute(k)))
if same_values == to_add["meta"]:
return sheerka.ret(self.NAME, False,
sheerka.err(NoModificationFound(concept, same_values)))
if "props" in to_add:
for k, v in to_add["props"].items():
concept.set_prop(k, v)
if "variables" in to_add:
for k, v in to_add["variables"].items():
# update existing or add new
for i, var in enumerate(concept.get_metadata().variables):
if var[0] == k:
concept.get_metadata().variables[i] = (k, v)
break
else:
concept.def_var(k, v)
if to_remove:
if "meta" in to_remove:
return sheerka.ret(self.NAME, False, sheerka.err(CannotRemoveMeta(to_remove["meta"])))
if "props" in to_remove:
for k, v in to_remove["props"].items():
if k not in concept.get_metadata().props:
return sheerka.ret(self.NAME, False, sheerka.err(UnknownAttribute(k)))
props = concept.get_metadata().props[k]
try:
if isinstance(v, (set, list, tuple)):
for item in v:
props.remove(item)
else:
props.remove(v)
# remove empty sets
if len(props) == 0:
del concept.get_metadata().props[k]
except KeyError:
return sheerka.ret(self.NAME, False, sheerka.err(ValueNotFound(k, v)))
if "variables" in to_remove:
variables_to_remove = []
for k in to_remove["variables"]:
for var_def in concept.get_metadata().variables:
if var_def[0] == k:
variables_to_remove.append(var_def)
delattr(concept, var_def[0])
break
else:
return sheerka.ret(self.NAME, False, sheerka.err(UnknownAttribute(k)))
core.utils.remove_list_from_list(concept.get_metadata().variables, variables_to_remove)
if concept.get_all_attributes():
core.utils.remove_list_from_list(concept.get_all_attributes(), [v[0] for v in variables_to_remove])
concept.get_metadata().key = None
if self._definition_has_changed(to_add) and concept.get_metadata().definition_type == DEFINITION_TYPE_BNF:
concept.set_bnf(None)
ensure_bnf(context, concept)
self.recompute_concept_parameters(context, concept)
concept.init_key()
return
def _remove_concept_first_token_and_first_regex(self, concept):
keywords_or_regex = self.get_first_items(self.sheerka, concept) # keyword of the old concept
concepts_by_first_keyword = self.sheerka.om.copy(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
concepts_by_regex = self.sheerka.om.copy(self.CONCEPTS_BY_REGEX_ENTRY)
for item in keywords_or_regex:
try:
if isinstance(item, RegExDef):
serialized = item.serialize()
copy = concepts_by_regex[serialized].copy()
copy.remove(concept.id)
if len(copy) == 0:
del concepts_by_regex[serialized]
else:
concepts_by_regex[serialized] = copy
else:
copy = concepts_by_first_keyword[item].copy()
copy.remove(concept.id)
if len(copy) == 0:
del concepts_by_first_keyword[item]
else:
concepts_by_first_keyword[item] = copy
except KeyError: # only occurs in unit tests when concepts are created without create_new()
pass
# return concepts_by_first_keyword, concepts_by_regex
return concepts_by_first_keyword, {RegExDef().deserialize(k): v for k, v in concepts_by_regex.items()}
@staticmethod
def get_first_tokens(sheerka, concept):
"""
:param sheerka:
:param concept:
:return:
"""
if concept.get_bnf():
from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor
bnf_visitor = BnfNodeFirstTokenVisitor(sheerka)
bnf_visitor.visit(concept.get_bnf())
return [t for t in bnf_visitor.first_tokens if t is not NoFirstToken]
else:
keywords = concept.key.split()
for keyword in keywords:
if keyword.startswith(VARIABLE_PREFIX):
continue
return [keyword]
return None
@staticmethod
def get_first_items(sheerka, concept) -> List[Union[str, RegExDef]]:
"""
Get all the first item needed by the concept
An item can either be a token, or regular expression
:param sheerka:
:param concept:
:return: List of string (if it's token or RegExDef if it's the definition of a regex)
"""
if concept.get_bnf():
from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor
bnf_visitor = BnfNodeFirstTokenVisitor(sheerka)
bnf_visitor.visit(concept.get_bnf())
return bnf_visitor.first_tokens
else:
keywords = concept.key.split()
for keyword in keywords:
if keyword.startswith(VARIABLE_PREFIX):
continue
return [keyword]
return None
@staticmethod
def compute_concepts_by_first_item(context,
concepts,
use_sheerka=False,
previous_first_keywords=None,
previous_first_regex=None):
"""
Create two map,
one for describing the first token expected by a concept
one for the first regular expression
eg the dictionaries that go into CONCEPTS_BY_FIRST_KEYWORD_ENTRY and CONCEPTS_BY_REGEX_ENTRY
:param context:
:param concepts: lists of concepts to parse
:param use_sheerka: if True, updates sheerka
:param previous_first_keywords:
:param previous_first_regex:
:return: Returns two dictionaries : on for ALL first item entries, another one for all first regex entries
"""
sheerka = context.sheerka
if use_sheerka:
previous_first_keywords = sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
previous_first_regex = sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY)
previous_first_regex = {RegExDef().deserialize(k): v for k, v in previous_first_regex.items()}
else:
previous_first_keywords = previous_first_keywords or {}
previous_first_regex = previous_first_regex or {}
for concept in concepts:
items = SheerkaConceptManager.get_first_items(sheerka, concept)
if items is None:
# no first token found for a concept ?
return sheerka.ret(sheerka.name, False, NoFirstTokenError(concept, concept.key))
for item in items:
if isinstance(item, RegExDef):
previous_first_regex.setdefault(item, []).append(concept.id)
else:
previous_first_keywords.setdefault(item, []).append(concept.id)
# 'uniquify' the lists
for k, v in previous_first_keywords.items():
previous_first_keywords[k] = core.utils.make_unique(v)
for k, v in previous_first_regex.items():
previous_first_regex[k] = core.utils.make_unique(v)
return sheerka.ret("BaseNodeParser", True, (previous_first_keywords, previous_first_regex))
@staticmethod
def compute_concepts_by_first_token(context, concepts, use_sheerka=False, previous_entries=None):
"""
Create the map describing the first token expected by a concept
eg the dictionary that goes into CONCEPTS_BY_FIRST_KEYWORD_ENTRY
:param context:
:param concepts: lists of concepts to parse
:param use_sheerka: if True, update concepts_by_first_keyword from sheerka
:param previous_entries:
:return:
"""
sheerka = context.sheerka
res = sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) if use_sheerka \
else (previous_entries or {})
for concept in concepts:
keywords = SheerkaConceptManager.get_first_tokens(sheerka, concept)
if keywords is None:
# no first token found for a concept ?
return sheerka.ret(sheerka.name, False, NoFirstTokenError(concept, concept.key))
for keyword in keywords:
res.setdefault(keyword, []).append(concept.id)
# 'uniquify' the lists
for k, v in res.items():
res[k] = core.utils.make_unique(v)
return sheerka.ret("BaseNodeParser", True, res)
@staticmethod
def resolve_concepts_by_first_keyword(context, concepts_by_first_keyword, modified_concepts=None):
"""
From a dictionary of first tokens, create another dictionary where all references to other concepts
are resolved
fom example, from entries
{
'c:|1001:' : 'c:|1002:' # which means than the concept 1002 starts with the concept 1001
'foo': 'c:|1001:'
}
It will create
{
'foo': ['c:|1001:, 'c:|1002:'],
}
This dictionary is supposed to go into CONCEPTS_REFERENCES_ENTRY
"""
sheerka = context.sheerka
res = {}
def get_by_id(c_id):
if modified_concepts and c_id in modified_concepts:
return modified_concepts[c_id]
return sheerka.get_by_id(c_id)
def resolve_concepts(concept_str):
c_key, c_id = core.utils.unstr_concept(concept_str)
if c_id in already_seen:
return ChickenAndEggError(already_seen)
already_seen.add(c_id)
resolved = set()
to_resolve = set()
chicken_and_egg = set()
concept = get_by_id(c_id)
if sheerka.isaset(context, concept):
concepts = sheerka.get_set_elements(context, concept)
else:
concepts = [concept]
for c in concepts:
if sheerka.isaset(context, c):
to_resolve.add(c.str_id)
else:
ensure_bnf(context, c) # need to make sure that it cannot fail
keywords = SheerkaConceptManager.get_first_tokens(sheerka, c)
for keyword in keywords:
(to_resolve if keyword.startswith("c:|") else resolved).add(keyword)
for concept_to_resolve_str in to_resolve:
res = resolve_concepts(concept_to_resolve_str)
if isinstance(res, ChickenAndEggError):
chicken_and_egg |= res.concepts
else:
resolved |= res
to_resolve.clear()
if len(resolved) == 0 and len(chicken_and_egg) > 0:
raise ChickenAndEggError(chicken_and_egg)
else:
return resolved
for k, v in concepts_by_first_keyword.items():
if k.startswith("c:|"):
try:
already_seen = set()
resolved_keywords = resolve_concepts(k)
for resolved in resolved_keywords:
res.setdefault(resolved, []).extend(v)
except ChickenAndEggError as ex:
context.log(f"Chicken and egg detected for {k}, concepts={ex.concepts}")
concepts_in_recursion = ex.concepts
# make sure to have all the parents
for parent in v:
concepts_in_recursion.add(parent)
for concept_id in concepts_in_recursion:
# make sure we keep the longest chain
old = sheerka.chicken_and_eggs.get(concept_id)
if old is NotFound or len(old) < len(ex.concepts):
sheerka.chicken_and_eggs.put(concept_id, concepts_in_recursion)
else:
res.setdefault(k, []).extend(v)
# 'uniquify' the lists
for k, v in res.items():
res[k] = core.utils.make_unique(v)
return sheerka.ret("BaseNodeParser", True, res)
@staticmethod
def compile_concepts_by_first_regex(context, concepts_by_first_regex):
res = []
try:
for k, v in concepts_by_first_regex.items():
flags = RegExDef.compile_flags(k.ignore_case, k.multiline, k.explicit_flags)
res.append((re.compile(k.to_match, flags), v))
except Exception as ex:
return context.sheerka.ret("BaseNodeParser", False, ex)
return context.sheerka.ret("BaseNodeParser", True, res)
def get_concepts_by_first_token(self, token, to_keep, custom=None, to_map=None, strip_quotes=False, parser=None):
"""
Tries to find if there are concepts that match the value of the token
Caution: Returns the actual cache, not a copy
:param token:
:param to_keep: predicate to tell if the concept is eligible
:param custom: lambda name -> List[Concepts] that gives extra concepts, according to the name
:param to_map:
:param strip_quotes: Remove quotes from strings
:param parser: If needed, parser which requested the concepts
:return:
"""
if token.type == TokenKind.WHITESPACE:
return None
if token.type == TokenKind.STRING:
name = token.value[1:-1] if strip_quotes else token.value
else:
name = token.value
custom_concepts = custom(name) if custom else [] # to get extra concepts using an alternative method
result = []
concepts_ids = self.sheerka.om.get(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, name)
if concepts_ids is NotFound:
return custom_concepts if custom else None
for concept_id in concepts_ids:
concept = self.sheerka.get_by_id(concept_id)
if not to_keep(concept):
continue
concept = to_map(concept, parser, self.sheerka) if to_map else concept
result.append(concept)
return core.utils.make_unique(result + custom_concepts,
lambda c: c.concept.id if hasattr(c, "concept") else c.id)
def get_concepts_by_first_regex(self, expr, pos):
"""
Go thru all the declared regular expressions and try to see if there is a match
:param expr:
:param pos:
:return:
"""
result = []
for compiled_regex, concept_ids in self.compiled_concepts_by_regex:
if compiled_regex.match(expr, pos):
result.extend([self.sheerka.get_by_id(concept_id) for concept_id in concept_ids])
return result
def get_concepts_bnf_definitions(self):
return self.sheerka.om.current_cache_manager().caches[self.CONCEPTS_BNF_DEFINITIONS_ENTRY].cache
def recompute_first_items(self, context, old_concept, new_concepts):
"""
Recompute
concepts fy first items
resolved concept by first items
concepts by first regex
compiled concepts by first regex
Do not update anything
:param context:
:param old_concept:
:param new_concepts:
:return:
"""
sheerka = context.sheerka
if old_concept and not new_concepts:
# remove
modified_concepts = None
by_first_keyword, by_first_regex = self._remove_concept_first_token_and_first_regex(old_concept)
elif old_concept and new_concepts:
# remove
by_first_keyword, by_first_regex = self._remove_concept_first_token_and_first_regex(old_concept)
# and then update
init_ret_value = self.compute_concepts_by_first_item(context,
new_concepts,
False,
by_first_keyword,
by_first_regex)
if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
by_first_keyword, by_first_regex = init_ret_value.body
modified_concepts = {new_concept.id: new_concept for new_concept in new_concepts}
elif not old_concept and new_concepts:
# only update
modified_concepts = None
init_ret_value = self.compute_concepts_by_first_item(context, new_concepts, True)
if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
by_first_keyword, by_first_regex = init_ret_value.body
# computes resolved concepts_by_first_keyword
init_ret_value = self.resolve_concepts_by_first_keyword(context, by_first_keyword, modified_concepts)
if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
resolved_by_first_keyword = init_ret_value.body
# compile new regex
compile_ret = self.compile_concepts_by_first_regex(context, by_first_regex)
if not compile_ret.status:
return sheerka.ret(self.NAME, False, ErrorConcept(compile_ret.value))
compiled_by_first_regex = compile_ret.body
return sheerka.ret(self.NAME,
True,
(by_first_keyword, by_first_regex, resolved_by_first_keyword, compiled_by_first_regex))
def update_first_items_caches(self,
context,
by_first_keyword,
by_first_regex,
resolved_by_first_keyword,
compiled_by_first_regex):
om = context.sheerka.om
om.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, by_first_keyword)
om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_by_first_keyword)
om.put(self.CONCEPTS_BY_REGEX_ENTRY, False, {k.serialize(): v for k, v in by_first_regex.items()})
# update the compiled regex
self.compiled_concepts_by_regex.clear()
self.compiled_concepts_by_regex.extend(compiled_by_first_regex)
@staticmethod
def recompute_concept_parameters(context, concept):
concept.get_metadata().parameters.clear()
if concept.get_metadata().definition_type == DEFINITION_TYPE_DEF:
tokens = list(Tokenizer(concept.get_metadata().definition, yield_eof=False))
else:
tokens = list(Tokenizer(concept.get_metadata().name, yield_eof=False))
# all variables that appear in the name are concept parameters
if len(tokens) > 0:
variables = [p[0] for p in concept.get_metadata().variables]
for token in [t for t in tokens if t.value in variables]:
concept.get_metadata().parameters.append(token.value)
# for bnf concept, use the visitor to extract parameters
if concept.get_bnf():
from evaluators.DefConceptEvaluator import ConceptOrRuleVariableVisitor, ParameterVariable
visitor = ConceptOrRuleVariableVisitor(context)
visitor.visit(concept.get_bnf())
for variable in [v for v in visitor.variables if isinstance(v, ParameterVariable)]:
if variable.name not in concept.get_metadata().parameters:
concept.get_metadata().parameters.append(variable.name)