import hashlib import logging from dataclasses import dataclass from typing import Literal from caching.Cache import Cache from caching.FastCache import FastCache from caching.ListCache import ListCache from caching.ListIfNeededCache import ListIfNeededCache from common.global_symbols import NotFound, NotInit, VARIABLE_PREFIX from common.utils import get_logger_name, unstr_concept from core.BuiltinConcepts import BuiltinConcepts from core.ExecutionContext import ExecutionContext from core.ReturnValue import ReturnValue from core.concept import Concept, ConceptDefaultPropsAttrs, ConceptMetadata, DefinitionType from core.error import ErrorContext, ErrorObj from parsers.parser_utils import strip_tokens from parsers.tokenizer import TokenKind, Tokenizer from services.BaseService import BaseService PROPERTIES_FOR_DIGEST = ("name", "key", "definition", "definition_type", "is_builtin", "is_unique", "where", "pre", "post", "body", "ret", "desc", "bound_body", "autouse", "props", "variables", "parameters") @dataclass class ConceptAlreadyDefined(ErrorObj): concept: ConceptMetadata already_defined_id: str def get_error_msg(self) -> str: return f"Concept {self.concept.name}, is already defined (id={self.already_defined_id})" @dataclass class InvalidBnf(ErrorObj): bnf: str def get_error_msg(self) -> str: return f"Invalid bnf '{self.bnf}'" @dataclass class NoFirstItemError(ErrorObj): pass @dataclass class ConceptRef: concept: Concept def __eq__(self, other): if not isinstance(other, ConceptRef): return False return self.concept.id == other.concept.id def __hash__(self): return hash(self.concept.id) class ConceptManager(BaseService): """ The service is used for the administration of concepts You can define new concept, modify or delete them There are also function to help retrieve them easily (like first token cache) Already instantiated concepts are managed by the SheerkaMemory service, not here """ NAME = "ConceptManager" USER_CONCEPTS_IDS = "User_Concepts_IDs" # incremented everytime a new concept is created CONCEPTS_BY_ID_ENTRY = "ConceptManager:Concepts_By_ID" # to store all the concepts CONCEPTS_BY_KEY_ENTRY = "ConceptManager:Concepts_By_Key" CONCEPTS_BY_NAME_ENTRY = "ConceptManager:Concepts_By_Name" CONCEPTS_BY_HASH_ENTRY = "ConceptManager:Concepts_By_Hash" CONCEPT_BY_FIRST_TOKEN_IN_KEY = "ConceptManager:Concepts_By_First_Token_In_Key" CONCEPT_BY_FIRST_TOKEN_IN_NAME = "ConceptManager:Concepts_By_First_Token_In_Name" def __init__(self, sheerka): super().__init__(sheerka, order=11) self.log = logging.getLogger(get_logger_name(__name__)) self.init_log = logging.getLogger(get_logger_name("init." + __name__)) self.bnf_expr_cache = FastCache() def initialize(self): self.init_log.debug(f"Initializing ConceptManager, order={self.order}") self.sheerka.bind_service_method(self.NAME, self.define_new_concept, True) self.sheerka.bind_service_method(self.NAME, self.new, True) self.sheerka.bind_service_method(self.NAME, self.newn, True) self.sheerka.bind_service_method(self.NAME, self.newi, True) self.sheerka.bind_service_method(self.NAME, self.get_by_name, False) self.sheerka.bind_service_method(self.NAME, self.get_by_id, False) self.sheerka.bind_service_method(self.NAME, self.get_by_key, False) self.sheerka.bind_service_method(self.NAME, self.get_by_digest, False) self.sheerka.bind_service_method(self.NAME, self.is_a_concept_name, False) self.sheerka.bind_service_method(self.NAME, self.get_metadatas_from_first_token, False) register_concept_cache = self.sheerka.om.register_concept_cache # Cache of concept metadata, organized by id cache = Cache().auto_configure(self.CONCEPTS_BY_ID_ENTRY) register_concept_cache(self.CONCEPTS_BY_ID_ENTRY, cache, lambda c: c.id, True) cache = ListIfNeededCache().auto_configure(self.CONCEPTS_BY_KEY_ENTRY) register_concept_cache(self.CONCEPTS_BY_KEY_ENTRY, cache, lambda c: c.key, True) cache = ListIfNeededCache().auto_configure(self.CONCEPTS_BY_NAME_ENTRY) register_concept_cache(self.CONCEPTS_BY_NAME_ENTRY, cache, lambda c: c.name, True) cache = ListIfNeededCache().auto_configure(self.CONCEPTS_BY_HASH_ENTRY) register_concept_cache(self.CONCEPTS_BY_HASH_ENTRY, cache, lambda c: c.digest, True) cache = ListCache().auto_configure(self.CONCEPT_BY_FIRST_TOKEN_IN_KEY) self.sheerka.om.register_cache(self.CONCEPT_BY_FIRST_TOKEN_IN_KEY, cache) cache = ListCache().auto_configure(self.CONCEPT_BY_FIRST_TOKEN_IN_NAME) self.sheerka.om.register_cache(self.CONCEPT_BY_FIRST_TOKEN_IN_NAME, cache) def initialize_deferred(self, context, is_first_time): if is_first_time: self.sheerka.om.put(self.sheerka.OBJECTS_IDS_ENTRY, self.USER_CONCEPTS_IDS, 1000) _ = self._create_builtin_concept _(1, BuiltinConcepts.SHEERKA, desc="Sheerka") _(2, BuiltinConcepts.NEW_CONCEPT, desc="On new concept creation", variables=("metadata",)) _(3, BuiltinConcepts.UNKNOWN_CONCEPT, desc="Unknown concept", variables=("requested",)) _(4, BuiltinConcepts.USER_INPUT, desc="Any external input", variables=("command",)) _(5, BuiltinConcepts.PARSER_INPUT, desc="tokenized input", variables=("pi",)) _(6, BuiltinConcepts.PYTHON_CODE, desc="python code", variables=("pf",)) # pf for PythonFragment _(7, BuiltinConcepts.PARSER_RESULT, desc="parser result", variables=("result",)) _(8, BuiltinConcepts.INVALID_CONCEPT, desc="invalid concept", variables=("concept_id", "reason")) _(9, BuiltinConcepts.EVALUATION_ERROR, desc="evaluation error", variables=("concept", "reason")) self.init_log.debug('%s builtin concepts created', len(self.sheerka.om.current_cache_manager().concept_caches)) def define_new_concept(self, context: ExecutionContext, name: str, is_builtin: bool = False, # is the concept defined Sheerka is_unique: bool = False, # is the concept a singleton body: str = "", # return value of the concept where: str = "", # condition to recognize variables in name pre: str = "", # list of preconditions before calling the main function post: str = "", # list of post conditions after calling the main function ret: str = "", # variable to return when a concept is recognized definition: str = "", # regex used to define the concept definition_type: DefinitionType = DefinitionType.DEFAULT, autouse: bool = False, # indicate if the concept must be automatically evaluated bound_body: str = None, # desc: str = "", # possible description for the concept props: dict = None, # hashmap of default properties variables: list = None, # list of concept variables(tuple), with their default values parameters: set = None # list of variables that are part of the name of the concept ) -> ReturnValue: """ Adds the definition of a new concept :return: :rtype: """ concept_key = self.create_concept_key(name, definition, variables) concept_id = "waiting for id" metadata = ConceptMetadata( concept_id, name, concept_key, is_builtin, is_unique, body, where, pre, post, ret, definition, DefinitionType.DEFAULT if definition_type is None else definition_type, desc, autouse, bound_body, {} if props is None else props, [] if variables is None else variables, set() if parameters is None else parameters) digest = self.compute_metadata_digest(metadata) if self.sheerka.om.exists_in_current(self.CONCEPTS_BY_HASH_ENTRY, digest): already_defined = self.sheerka.om.get(self.CONCEPTS_BY_HASH_ENTRY, digest) error = ErrorContext(self.NAME, context, ConceptAlreadyDefined(metadata, already_defined.id)) return ReturnValue(self.NAME, False, error) metadata.digest = digest metadata.all_attrs = self.compute_all_attrs(variables) # bnf_expr = None # if definition_type == DefinitionType.BNF: # try: # bnf_expr = self.compute_concept_bnf(definition) # except InvalidBnf as ex: # error = ErrorContext(self.NAME, context, ex) # return ReturnValue(self.NAME, False, error) first_token_by_key = self._get_concept_first_token(concept_key) if first_token_by_key is None: return ReturnValue(self.NAME, False, self.newn(BuiltinConcepts.INVALID_CONCEPT, concept_id=concept_id, reason=NoFirstItemError())) first_token_by_name = self._get_concept_first_token(name) if first_token_by_name is None: return ReturnValue(self.NAME, False, self.newn(BuiltinConcepts.INVALID_CONCEPT, concept_id=concept_id, reason=NoFirstItemError())) # at this point everything is fine. let's get the id and save everything om = self.sheerka.om metadata.id = str(self.sheerka.om.get(self.sheerka.OBJECTS_IDS_ENTRY, self.USER_CONCEPTS_IDS)) om.add_concept(metadata) # add the first token to the om.put(self.CONCEPT_BY_FIRST_TOKEN_IN_KEY, first_token_by_key, metadata.id) if first_token_by_name != first_token_by_key: om.put(self.CONCEPT_BY_FIRST_TOKEN_IN_NAME, first_token_by_name, metadata.id) # self.update_first_items_caches(context, first_item_res) # if bnf_expr: # self.bnf_expr_cache.put(metadata.id, bnf_expr) # # update references # for ref in self.compute_references(bnf_expr): # om.put(self.CONCEPTS_REFERENCES_ENTRY, ref, metadata.id) return ReturnValue(self.NAME, True, self.newn(BuiltinConcepts.NEW_CONCEPT, metadata=metadata)) def newn(self, concept_name: str, **kwargs): """ new_by_name Creates and returns an instance of a new concept by its name :param concept_name: :type concept_name: :param kwargs: :type kwargs: :return: :rtype: """ metadata = self.get_by_name(concept_name) if metadata is NotFound: return self._inner_new(self.get_by_name(BuiltinConcepts.UNKNOWN_CONCEPT), requested=concept_name) if isinstance(metadata, list): return [self._inner_new(m, **kwargs) for m in metadata] return self._inner_new(metadata, **kwargs) def newi(self, concept_id: str, **kwargs): """ new_by_id Creates and returns an instance of a new concept by its id :param concept_id: :type concept_id: :param kwargs: :type kwargs: :return: :rtype: """ metadata = self.get_by_id(concept_id) if metadata is NotFound: return self._inner_new(self.get_by_name(BuiltinConcepts.UNKNOWN_CONCEPT), requested=f"#{concept_id}") return self._inner_new(metadata, **kwargs) def new(self, identifier, **kwargs): """ Try to resolve the instantiation of a concept :param identifier: :type identifier: :param kwargs: :type kwargs: :return: :rtype: """ if isinstance(identifier, (ConceptMetadata, Concept)): return self._inner_new(identifier.get_metadata(), **kwargs) if isinstance(identifier, ConceptRef): # first, try the digest resolved_identifier = identifier.concept.get_definition_digest() metadata = self.get_by_digest(resolved_identifier) if metadata is NotFound: # used the same method that was used when the concept was first recognized match identifier.concept.get_runtime_info().info["resolution_method"]: case "id": resolved_identifier = f"#{identifier.concept.id}" metadata = self.get_by_id(resolved_identifier) case "key": resolved_identifier = identifier.concept.key metadata = self.get_by_key(resolved_identifier) case _: resolved_identifier = identifier.concept.name metadata = self.get_by_name(resolved_identifier) if metadata is NotFound: return self._inner_new(self.get_by_name(BuiltinConcepts.UNKNOWN_CONCEPT), requested=resolved_identifier) else: return [self.new(item, **kwargs) for item in metadata] if \ isinstance(metadata, list) else self._inner_new(metadata, **kwargs) if isinstance(identifier, list): return [self.new(item, **kwargs) for item in identifier] if (tmp := unstr_concept(identifier)) != (None, None): # manage c:name#id: identifier = tmp if isinstance(identifier, tuple): return self.newi(identifier[1], **kwargs) if identifier[1] else self.newn(identifier[0], **kwargs) if isinstance(identifier, str): return self.newn(identifier, **kwargs) # failed to instantiate the concept return self._inner_new(self.get_by_name(BuiltinConcepts.UNKNOWN_CONCEPT), requested=identifier) def get_by_name(self, key: str): """ Returns a concept metadata, using its name :param key: :type key: :return: NotFound if not found :rtype: """ return self.sheerka.om.get(self.CONCEPTS_BY_NAME_ENTRY, key) def get_by_id(self, concept_id: str): """ Returns a concept metadata, using its name :param concept_id: :type concept_id: :return: NotFound if not found :rtype: """ return self.sheerka.om.get(self.CONCEPTS_BY_ID_ENTRY, concept_id) def get_by_key(self, key: str): """ Returns a concept metadata, using its name :param key: :type key: :return: NotFound if not found :rtype: """ return self.sheerka.om.get(self.CONCEPTS_BY_KEY_ENTRY, key) def get_by_digest(self, digest: str): """ Returns a concept metadata, using its digest :param digest: :type digest: :return: NotFound if not found :rtype: """ return self.sheerka.om.get(self.CONCEPTS_BY_HASH_ENTRY, digest) def get_all_concepts(self): return list(sorted(self.sheerka.om.list(self.CONCEPTS_BY_ID_ENTRY), key=lambda item: int(item.id))) def get_metadatas_from_first_token(self, attr: Literal["key", "name"], token: str): """ Get the list of the concepts that start with token :param attr: "key" or "name" :type attr: :param token: :type token: :return: :rtype: """ cache_name = self.CONCEPT_BY_FIRST_TOKEN_IN_NAME if attr == "name" else self.CONCEPT_BY_FIRST_TOKEN_IN_KEY concepts_ids = self.sheerka.om.get(cache_name, token) if concepts_ids is NotFound: return [] return [self.get_by_id(c_id) for c_id in concepts_ids] def is_a_concept_name(self, name): return self.sheerka.om.exists(self.CONCEPTS_BY_NAME_ENTRY, name) @staticmethod def compute_metadata_digest(metadata: ConceptMetadata): """ Compute once for all the digest of the definition of a concept :param metadata: :type metadata: :return: :rtype: """ as_dict = {p: getattr(metadata, p) for p in PROPERTIES_FOR_DIGEST} return hashlib.sha256(f"{as_dict}".encode("utf-8")).hexdigest() @staticmethod def compute_all_attrs(variables: tuple | None): """ Compute the list of available attributes for a concept :param variables: :return: :rtype: """ all_attrs = ConceptDefaultPropsAttrs.copy() if variables: all_attrs += [k for k, v in variables] return tuple(all_attrs) @staticmethod def compute_concept_bnf(definition): pass @staticmethod def create_concept_key(name: str, definition: str | None, variables: tuple | None): """ Creates the key from the definition :param name: :type name: :param definition: :type definition: :param variables: :type variables: :return: :rtype: """ definition_to_use = definition or name tokens = list(Tokenizer(definition_to_use, yield_eof=False)) if variables is None or len(strip_tokens(tokens, True)) == 1: variables_to_use = [] else: variables_to_use = [k for k, v in variables] parts = [] for token in tokens: if token.type == TokenKind.WHITESPACE: continue if token.value in variables_to_use: parts.append(VARIABLE_PREFIX + str(variables_to_use.index(token.value))) else: parts.append(token.value) return " ".join(parts) def _create_builtin_concept(self, concept_id: int, name: str, desc: str, variables: tuple = ()): variables_to_use = tuple((k, NotInit) for k in variables) concept_key = self.create_concept_key(name, None, variables_to_use) metadata = ConceptMetadata( str(concept_id), name, concept_key, True, False, "", "", "", "", "", "", DefinitionType.DEFAULT, desc, False, variables[0] if variables else "", {}, variables_to_use, variables, ) metadata.digest = self.compute_metadata_digest(metadata) metadata.all_attrs = self.compute_all_attrs(variables_to_use) self.sheerka.om.add_concept(metadata) @staticmethod def _get_concept_first_token(concept_key): """ Return the list of tokens that consist of the first par of a concept key >>> assert _get_concept_first_token("I am a concept") == "I" >>> assert _get_concept_first_token("__var__1 multiplied by __var__2") == "multiplied" :param concept_key: :type concept_key: :return: :rtype: """ keywords = concept_key.split() # trim first variables res = [] for keyword in keywords: if keyword.startswith(VARIABLE_PREFIX): continue return keyword return None @staticmethod def _inner_new(_metadata_def: ConceptMetadata, **kwargs): concept = Concept(_metadata_def) for k, v in kwargs.items(): concept.set_value(k, v) if kwargs: # if an attribute is set, the concept is considered as evaluated concept.get_runtime_info().is_evaluated = True return concept