512 lines
20 KiB
Python
512 lines
20 KiB
Python
import hashlib
|
|
import logging
|
|
from dataclasses import dataclass
|
|
from typing import Literal
|
|
|
|
from caching.Cache import Cache
|
|
from caching.FastCache import FastCache
|
|
from caching.ListCache import ListCache
|
|
from caching.ListIfNeededCache import ListIfNeededCache
|
|
from common.global_symbols import NotFound, NotInit, VARIABLE_PREFIX
|
|
from common.utils import get_logger_name, unstr_concept
|
|
from core.BuiltinConcepts import BuiltinConcepts
|
|
from core.ExecutionContext import ExecutionContext
|
|
from core.ReturnValue import ReturnValue
|
|
from core.concept import Concept, ConceptDefaultPropsAttrs, ConceptMetadata, DefinitionType
|
|
from core.error import ErrorContext, ErrorObj
|
|
from parsers.parser_utils import strip_tokens
|
|
from parsers.tokenizer import TokenKind, Tokenizer
|
|
from services.BaseService import BaseService
|
|
|
|
PROPERTIES_FOR_DIGEST = ("name", "key",
|
|
"definition", "definition_type",
|
|
"is_builtin", "is_unique",
|
|
"where", "pre", "post", "body", "ret",
|
|
"desc", "bound_body", "autouse", "props", "variables", "parameters")
|
|
|
|
|
|
@dataclass
|
|
class ConceptAlreadyDefined(ErrorObj):
|
|
concept: ConceptMetadata
|
|
already_defined_id: str
|
|
|
|
def get_error_msg(self) -> str:
|
|
return f"Concept {self.concept.name}, is already defined (id={self.already_defined_id})"
|
|
|
|
|
|
@dataclass
|
|
class InvalidBnf(ErrorObj):
|
|
bnf: str
|
|
|
|
def get_error_msg(self) -> str:
|
|
return f"Invalid bnf '{self.bnf}'"
|
|
|
|
|
|
@dataclass
|
|
class NoFirstItemError(ErrorObj):
|
|
pass
|
|
|
|
|
|
@dataclass
|
|
class ConceptRef:
|
|
concept: Concept
|
|
|
|
def __eq__(self, other):
|
|
if not isinstance(other, ConceptRef):
|
|
return False
|
|
|
|
return self.concept.id == other.concept.id
|
|
|
|
def __hash__(self):
|
|
return hash(self.concept.id)
|
|
|
|
|
|
class ConceptManager(BaseService):
|
|
"""
|
|
The service is used for the administration of concepts
|
|
You can define new concept, modify or delete them
|
|
|
|
There are also function to help retrieve them easily (like first token cache)
|
|
Already instantiated concepts are managed by the SheerkaMemory service, not here
|
|
"""
|
|
|
|
NAME = "ConceptManager"
|
|
|
|
USER_CONCEPTS_IDS = "User_Concepts_IDs" # incremented everytime a new concept is created
|
|
CONCEPTS_BY_ID_ENTRY = "ConceptManager:Concepts_By_ID" # to store all the concepts
|
|
CONCEPTS_BY_KEY_ENTRY = "ConceptManager:Concepts_By_Key"
|
|
CONCEPTS_BY_NAME_ENTRY = "ConceptManager:Concepts_By_Name"
|
|
CONCEPTS_BY_HASH_ENTRY = "ConceptManager:Concepts_By_Hash"
|
|
|
|
CONCEPT_BY_FIRST_TOKEN_IN_KEY = "ConceptManager:Concepts_By_First_Token_In_Key"
|
|
CONCEPT_BY_FIRST_TOKEN_IN_NAME = "ConceptManager:Concepts_By_First_Token_In_Name"
|
|
|
|
def __init__(self, sheerka):
|
|
super().__init__(sheerka, order=11)
|
|
|
|
self.log = logging.getLogger(get_logger_name(__name__))
|
|
self.init_log = logging.getLogger(get_logger_name("init." + __name__))
|
|
self.bnf_expr_cache = FastCache()
|
|
|
|
def initialize(self):
|
|
self.init_log.debug(f"Initializing ConceptManager, order={self.order}")
|
|
self.sheerka.bind_service_method(self.NAME, self.define_new_concept, True)
|
|
self.sheerka.bind_service_method(self.NAME, self.new, True)
|
|
self.sheerka.bind_service_method(self.NAME, self.newn, True)
|
|
self.sheerka.bind_service_method(self.NAME, self.newi, True)
|
|
self.sheerka.bind_service_method(self.NAME, self.get_by_name, False)
|
|
self.sheerka.bind_service_method(self.NAME, self.get_by_id, False)
|
|
self.sheerka.bind_service_method(self.NAME, self.get_by_key, False)
|
|
self.sheerka.bind_service_method(self.NAME, self.get_by_digest, False)
|
|
self.sheerka.bind_service_method(self.NAME, self.is_a_concept_name, False)
|
|
self.sheerka.bind_service_method(self.NAME, self.get_metadatas_from_first_token, False)
|
|
|
|
register_concept_cache = self.sheerka.om.register_concept_cache
|
|
|
|
# Cache of concept metadata, organized by id
|
|
cache = Cache().auto_configure(self.CONCEPTS_BY_ID_ENTRY)
|
|
register_concept_cache(self.CONCEPTS_BY_ID_ENTRY, cache, lambda c: c.id, True)
|
|
|
|
cache = ListIfNeededCache().auto_configure(self.CONCEPTS_BY_KEY_ENTRY)
|
|
register_concept_cache(self.CONCEPTS_BY_KEY_ENTRY, cache, lambda c: c.key, True)
|
|
|
|
cache = ListIfNeededCache().auto_configure(self.CONCEPTS_BY_NAME_ENTRY)
|
|
register_concept_cache(self.CONCEPTS_BY_NAME_ENTRY, cache, lambda c: c.name, True)
|
|
|
|
cache = ListIfNeededCache().auto_configure(self.CONCEPTS_BY_HASH_ENTRY)
|
|
register_concept_cache(self.CONCEPTS_BY_HASH_ENTRY, cache, lambda c: c.digest, True)
|
|
|
|
cache = ListCache().auto_configure(self.CONCEPT_BY_FIRST_TOKEN_IN_KEY)
|
|
self.sheerka.om.register_cache(self.CONCEPT_BY_FIRST_TOKEN_IN_KEY, cache)
|
|
|
|
cache = ListCache().auto_configure(self.CONCEPT_BY_FIRST_TOKEN_IN_NAME)
|
|
self.sheerka.om.register_cache(self.CONCEPT_BY_FIRST_TOKEN_IN_NAME, cache)
|
|
|
|
def initialize_deferred(self, context, is_first_time):
|
|
if is_first_time:
|
|
self.sheerka.om.put(self.sheerka.OBJECTS_IDS_ENTRY, self.USER_CONCEPTS_IDS, 1000)
|
|
|
|
_ = self._create_builtin_concept
|
|
_(1, BuiltinConcepts.SHEERKA, desc="Sheerka")
|
|
_(2, BuiltinConcepts.NEW_CONCEPT, desc="On new concept creation", variables=("metadata",))
|
|
_(3, BuiltinConcepts.UNKNOWN_CONCEPT, desc="Unknown concept", variables=("requested",))
|
|
_(4, BuiltinConcepts.USER_INPUT, desc="Any external input", variables=("command",))
|
|
_(5, BuiltinConcepts.PARSER_INPUT, desc="tokenized input", variables=("pi",))
|
|
_(6, BuiltinConcepts.PYTHON_CODE, desc="python code", variables=("pf",)) # pf for PythonFragment
|
|
_(7, BuiltinConcepts.PARSER_RESULT, desc="parser result", variables=("result",))
|
|
_(8, BuiltinConcepts.INVALID_CONCEPT, desc="invalid concept", variables=("concept_id", "reason"))
|
|
_(9, BuiltinConcepts.EVALUATION_ERROR, desc="evaluation error", variables=("concept", "reason"))
|
|
|
|
self.init_log.debug('%s builtin concepts created',
|
|
len(self.sheerka.om.current_cache_manager().concept_caches))
|
|
|
|
def define_new_concept(self,
|
|
context: ExecutionContext,
|
|
name: str,
|
|
is_builtin: bool = False, # is the concept defined Sheerka
|
|
is_unique: bool = False, # is the concept a singleton
|
|
body: str = "", # return value of the concept
|
|
where: str = "", # condition to recognize variables in name
|
|
pre: str = "", # list of preconditions before calling the main function
|
|
post: str = "", # list of post conditions after calling the main function
|
|
ret: str = "", # variable to return when a concept is recognized
|
|
definition: str = "", # regex used to define the concept
|
|
definition_type: DefinitionType = DefinitionType.DEFAULT,
|
|
autouse: bool = False, # indicate if the concept must be automatically evaluated
|
|
bound_body: str = None, #
|
|
desc: str = "", # possible description for the concept
|
|
props: dict = None, # hashmap of default properties
|
|
variables: list = None, # list of concept variables(tuple), with their default values
|
|
parameters: set = None # list of variables that are part of the name of the concept
|
|
) -> ReturnValue:
|
|
"""
|
|
Adds the definition of a new concept
|
|
:return:
|
|
:rtype:
|
|
"""
|
|
concept_key = self.create_concept_key(name, definition, variables)
|
|
concept_id = "waiting for id"
|
|
|
|
metadata = ConceptMetadata(
|
|
concept_id,
|
|
name,
|
|
concept_key,
|
|
is_builtin,
|
|
is_unique,
|
|
body,
|
|
where,
|
|
pre,
|
|
post,
|
|
ret,
|
|
definition,
|
|
DefinitionType.DEFAULT if definition_type is None else definition_type,
|
|
desc,
|
|
autouse,
|
|
bound_body,
|
|
{} if props is None else props,
|
|
[] if variables is None else variables,
|
|
set() if parameters is None else parameters)
|
|
|
|
digest = self.compute_metadata_digest(metadata)
|
|
if self.sheerka.om.exists_in_current(self.CONCEPTS_BY_HASH_ENTRY, digest):
|
|
already_defined = self.sheerka.om.get(self.CONCEPTS_BY_HASH_ENTRY, digest)
|
|
error = ErrorContext(self.NAME, context, ConceptAlreadyDefined(metadata, already_defined.id))
|
|
return ReturnValue(self.NAME, False, error)
|
|
|
|
metadata.digest = digest
|
|
metadata.all_attrs = self.compute_all_attrs(variables)
|
|
|
|
# bnf_expr = None
|
|
# if definition_type == DefinitionType.BNF:
|
|
# try:
|
|
# bnf_expr = self.compute_concept_bnf(definition)
|
|
# except InvalidBnf as ex:
|
|
# error = ErrorContext(self.NAME, context, ex)
|
|
# return ReturnValue(self.NAME, False, error)
|
|
|
|
first_token_by_key = self._get_concept_first_token(concept_key)
|
|
if first_token_by_key is None:
|
|
return ReturnValue(self.NAME, False, self.newn(BuiltinConcepts.INVALID_CONCEPT,
|
|
concept_id=concept_id,
|
|
reason=NoFirstItemError()))
|
|
|
|
first_token_by_name = self._get_concept_first_token(name)
|
|
if first_token_by_name is None:
|
|
return ReturnValue(self.NAME, False, self.newn(BuiltinConcepts.INVALID_CONCEPT,
|
|
concept_id=concept_id,
|
|
reason=NoFirstItemError()))
|
|
|
|
# at this point everything is fine. let's get the id and save everything
|
|
om = self.sheerka.om
|
|
metadata.id = str(self.sheerka.om.get(self.sheerka.OBJECTS_IDS_ENTRY, self.USER_CONCEPTS_IDS))
|
|
om.add_concept(metadata)
|
|
|
|
# add the first token to the
|
|
om.put(self.CONCEPT_BY_FIRST_TOKEN_IN_KEY, first_token_by_key, metadata.id)
|
|
if first_token_by_name != first_token_by_key:
|
|
om.put(self.CONCEPT_BY_FIRST_TOKEN_IN_NAME, first_token_by_name, metadata.id)
|
|
|
|
# self.update_first_items_caches(context, first_item_res)
|
|
# if bnf_expr:
|
|
# self.bnf_expr_cache.put(metadata.id, bnf_expr)
|
|
# # update references
|
|
# for ref in self.compute_references(bnf_expr):
|
|
# om.put(self.CONCEPTS_REFERENCES_ENTRY, ref, metadata.id)
|
|
|
|
return ReturnValue(self.NAME, True, self.newn(BuiltinConcepts.NEW_CONCEPT, metadata=metadata))
|
|
|
|
def newn(self, concept_name: str, **kwargs):
|
|
"""
|
|
new_by_name
|
|
Creates and returns an instance of a new concept by its name
|
|
:param concept_name:
|
|
:type concept_name:
|
|
:param kwargs:
|
|
:type kwargs:
|
|
:return:
|
|
:rtype:
|
|
"""
|
|
metadata = self.get_by_name(concept_name)
|
|
if metadata is NotFound:
|
|
return self._inner_new(self.get_by_name(BuiltinConcepts.UNKNOWN_CONCEPT), requested=concept_name)
|
|
|
|
if isinstance(metadata, list):
|
|
return [self._inner_new(m, **kwargs) for m in metadata]
|
|
|
|
return self._inner_new(metadata, **kwargs)
|
|
|
|
def newi(self, concept_id: str, **kwargs):
|
|
"""
|
|
new_by_id
|
|
Creates and returns an instance of a new concept by its id
|
|
:param concept_id:
|
|
:type concept_id:
|
|
:param kwargs:
|
|
:type kwargs:
|
|
:return:
|
|
:rtype:
|
|
"""
|
|
metadata = self.get_by_id(concept_id)
|
|
if metadata is NotFound:
|
|
return self._inner_new(self.get_by_name(BuiltinConcepts.UNKNOWN_CONCEPT), requested=f"#{concept_id}")
|
|
return self._inner_new(metadata, **kwargs)
|
|
|
|
def new(self, identifier, **kwargs):
|
|
"""
|
|
Try to resolve the instantiation of a concept
|
|
:param identifier:
|
|
:type identifier:
|
|
:param kwargs:
|
|
:type kwargs:
|
|
:return:
|
|
:rtype:
|
|
"""
|
|
if isinstance(identifier, (ConceptMetadata, Concept)):
|
|
return self._inner_new(identifier.get_metadata(), **kwargs)
|
|
|
|
if isinstance(identifier, ConceptRef):
|
|
# first, try the digest
|
|
resolved_identifier = identifier.concept.get_definition_digest()
|
|
metadata = self.get_by_digest(resolved_identifier)
|
|
if metadata is NotFound:
|
|
# used the same method that was used when the concept was first recognized
|
|
match identifier.concept.get_runtime_info().info["resolution_method"]:
|
|
case "id":
|
|
resolved_identifier = f"#{identifier.concept.id}"
|
|
metadata = self.get_by_id(resolved_identifier)
|
|
case "key":
|
|
resolved_identifier = identifier.concept.key
|
|
metadata = self.get_by_key(resolved_identifier)
|
|
case _:
|
|
resolved_identifier = identifier.concept.name
|
|
metadata = self.get_by_name(resolved_identifier)
|
|
|
|
if metadata is NotFound:
|
|
return self._inner_new(self.get_by_name(BuiltinConcepts.UNKNOWN_CONCEPT), requested=resolved_identifier)
|
|
else:
|
|
return [self.new(item, **kwargs) for item in metadata] if \
|
|
isinstance(metadata, list) else self._inner_new(metadata, **kwargs)
|
|
|
|
if isinstance(identifier, list):
|
|
return [self.new(item, **kwargs) for item in identifier]
|
|
|
|
if (tmp := unstr_concept(identifier)) != (None, None):
|
|
# manage c:name#id:
|
|
identifier = tmp
|
|
|
|
if isinstance(identifier, tuple):
|
|
return self.newi(identifier[1], **kwargs) if identifier[1] else self.newn(identifier[0], **kwargs)
|
|
|
|
if isinstance(identifier, str):
|
|
return self.newn(identifier, **kwargs)
|
|
|
|
# failed to instantiate the concept
|
|
return self._inner_new(self.get_by_name(BuiltinConcepts.UNKNOWN_CONCEPT), requested=identifier)
|
|
|
|
def get_by_name(self, key: str):
|
|
"""
|
|
Returns a concept metadata, using its name
|
|
:param key:
|
|
:type key:
|
|
:return: NotFound if not found
|
|
:rtype:
|
|
"""
|
|
return self.sheerka.om.get(self.CONCEPTS_BY_NAME_ENTRY, key)
|
|
|
|
def get_by_id(self, concept_id: str):
|
|
"""
|
|
Returns a concept metadata, using its name
|
|
:param concept_id:
|
|
:type concept_id:
|
|
:return: NotFound if not found
|
|
:rtype:
|
|
"""
|
|
return self.sheerka.om.get(self.CONCEPTS_BY_ID_ENTRY, concept_id)
|
|
|
|
def get_by_key(self, key: str):
|
|
"""
|
|
Returns a concept metadata, using its name
|
|
:param key:
|
|
:type key:
|
|
:return: NotFound if not found
|
|
:rtype:
|
|
"""
|
|
return self.sheerka.om.get(self.CONCEPTS_BY_KEY_ENTRY, key)
|
|
|
|
def get_by_digest(self, digest: str):
|
|
"""
|
|
Returns a concept metadata, using its digest
|
|
:param digest:
|
|
:type digest:
|
|
:return: NotFound if not found
|
|
:rtype:
|
|
"""
|
|
return self.sheerka.om.get(self.CONCEPTS_BY_HASH_ENTRY, digest)
|
|
|
|
def get_all_concepts(self):
|
|
return list(sorted(self.sheerka.om.list(self.CONCEPTS_BY_ID_ENTRY), key=lambda item: int(item.id)))
|
|
|
|
def get_metadatas_from_first_token(self, attr: Literal["key", "name"], token: str):
|
|
"""
|
|
Get the list of the concepts that start with token
|
|
:param attr: "key" or "name"
|
|
:type attr:
|
|
:param token:
|
|
:type token:
|
|
:return:
|
|
:rtype:
|
|
"""
|
|
cache_name = self.CONCEPT_BY_FIRST_TOKEN_IN_NAME if attr == "name" else self.CONCEPT_BY_FIRST_TOKEN_IN_KEY
|
|
concepts_ids = self.sheerka.om.get(cache_name, token)
|
|
if concepts_ids is NotFound:
|
|
return []
|
|
|
|
return [self.get_by_id(c_id) for c_id in concepts_ids]
|
|
|
|
def is_a_concept_name(self, name):
|
|
return self.sheerka.om.exists(self.CONCEPTS_BY_NAME_ENTRY, name)
|
|
|
|
@staticmethod
|
|
def compute_metadata_digest(metadata: ConceptMetadata):
|
|
"""
|
|
Compute once for all the digest of the definition of a concept
|
|
:param metadata:
|
|
:type metadata:
|
|
:return:
|
|
:rtype:
|
|
"""
|
|
as_dict = {p: getattr(metadata, p) for p in PROPERTIES_FOR_DIGEST}
|
|
return hashlib.sha256(f"{as_dict}".encode("utf-8")).hexdigest()
|
|
|
|
@staticmethod
|
|
def compute_all_attrs(variables: tuple | None):
|
|
"""
|
|
Compute the list of available attributes for a concept
|
|
:param variables:
|
|
:return:
|
|
:rtype:
|
|
"""
|
|
all_attrs = ConceptDefaultPropsAttrs.copy()
|
|
if variables:
|
|
all_attrs += [k for k, v in variables]
|
|
|
|
return tuple(all_attrs)
|
|
|
|
@staticmethod
|
|
def compute_concept_bnf(definition):
|
|
pass
|
|
|
|
@staticmethod
|
|
def create_concept_key(name: str, definition: str | None, variables: tuple | None):
|
|
"""
|
|
Creates the key from the definition
|
|
:param name:
|
|
:type name:
|
|
:param definition:
|
|
:type definition:
|
|
:param variables:
|
|
:type variables:
|
|
:return:
|
|
:rtype:
|
|
"""
|
|
|
|
definition_to_use = definition or name
|
|
tokens = list(Tokenizer(definition_to_use, yield_eof=False))
|
|
|
|
if variables is None or len(strip_tokens(tokens, True)) == 1:
|
|
variables_to_use = []
|
|
else:
|
|
variables_to_use = [k for k, v in variables]
|
|
|
|
parts = []
|
|
for token in tokens:
|
|
if token.type == TokenKind.WHITESPACE:
|
|
continue
|
|
if token.value in variables_to_use:
|
|
parts.append(VARIABLE_PREFIX + str(variables_to_use.index(token.value)))
|
|
else:
|
|
parts.append(token.value)
|
|
|
|
return " ".join(parts)
|
|
|
|
def _create_builtin_concept(self, concept_id: int, name: str, desc: str, variables: tuple = ()):
|
|
variables_to_use = tuple((k, NotInit) for k in variables)
|
|
concept_key = self.create_concept_key(name, None, variables_to_use)
|
|
metadata = ConceptMetadata(
|
|
str(concept_id),
|
|
name,
|
|
concept_key,
|
|
True,
|
|
False,
|
|
"",
|
|
"",
|
|
"",
|
|
"",
|
|
"",
|
|
"",
|
|
DefinitionType.DEFAULT,
|
|
desc,
|
|
False,
|
|
variables[0] if variables else "",
|
|
{},
|
|
variables_to_use,
|
|
variables,
|
|
)
|
|
metadata.digest = self.compute_metadata_digest(metadata)
|
|
metadata.all_attrs = self.compute_all_attrs(variables_to_use)
|
|
self.sheerka.om.add_concept(metadata)
|
|
|
|
@staticmethod
|
|
def _get_concept_first_token(concept_key):
|
|
"""
|
|
Return the list of tokens that consist of the first par of a concept key
|
|
>>> assert _get_concept_first_token("I am a concept") == "I"
|
|
>>> assert _get_concept_first_token("__var__1 multiplied by __var__2") == "multiplied"
|
|
:param concept_key:
|
|
:type concept_key:
|
|
:return:
|
|
:rtype:
|
|
"""
|
|
keywords = concept_key.split()
|
|
# trim first variables
|
|
res = []
|
|
for keyword in keywords:
|
|
if keyword.startswith(VARIABLE_PREFIX):
|
|
continue
|
|
|
|
return keyword
|
|
|
|
return None
|
|
|
|
@staticmethod
|
|
def _inner_new(_metadata_def: ConceptMetadata, **kwargs):
|
|
concept = Concept(_metadata_def)
|
|
for k, v in kwargs.items():
|
|
concept.set_value(k, v)
|
|
|
|
if kwargs:
|
|
# if an attribute is set, the concept is considered as evaluated
|
|
concept.get_runtime_info().is_evaluated = True
|
|
|
|
return concept
|