Fixed #20: I can parse simple concepts

This commit is contained in:
2023-07-09 18:08:47 +02:00
parent ba397b0b72
commit 57f9ce2bbb
44 changed files with 2462 additions and 149 deletions
+131 -18
View File
@@ -1,9 +1,11 @@
import hashlib
import logging
from dataclasses import dataclass
from typing import Literal
from caching.Cache import Cache
from caching.FastCache import FastCache
from caching.ListCache import ListCache
from caching.ListIfNeededCache import ListIfNeededCache
from common.global_symbols import NotFound, NotInit, VARIABLE_PREFIX
from common.utils import get_logger_name, unstr_concept
@@ -41,10 +43,24 @@ class InvalidBnf(ErrorObj):
@dataclass
class FirstItemError(ErrorObj):
class NoFirstItemError(ErrorObj):
pass
@dataclass
class ConceptRef:
concept: Concept
def __eq__(self, other):
if not isinstance(other, ConceptRef):
return False
return self.concept.id == other.concept.id
def __hash__(self):
return hash(self.concept.id)
class ConceptManager(BaseService):
"""
The service is used for the administration of concepts
@@ -60,7 +76,10 @@ class ConceptManager(BaseService):
CONCEPTS_BY_ID_ENTRY = "ConceptManager:Concepts_By_ID" # to store all the concepts
CONCEPTS_BY_KEY_ENTRY = "ConceptManager:Concepts_By_Key"
CONCEPTS_BY_NAME_ENTRY = "ConceptManager:Concepts_By_Name"
CONCEPTS_BY_HASH_ENTRY = "ConceptManager:Concepts_By_Hash" # sto
CONCEPTS_BY_HASH_ENTRY = "ConceptManager:Concepts_By_Hash"
CONCEPT_BY_FIRST_TOKEN_IN_KEY = "ConceptManager:Concepts_By_First_Token_In_Key"
CONCEPT_BY_FIRST_TOKEN_IN_NAME = "ConceptManager:Concepts_By_First_Token_In_Name"
def __init__(self, sheerka):
super().__init__(sheerka, order=11)
@@ -78,7 +97,9 @@ class ConceptManager(BaseService):
self.sheerka.bind_service_method(self.NAME, self.get_by_name, False)
self.sheerka.bind_service_method(self.NAME, self.get_by_id, False)
self.sheerka.bind_service_method(self.NAME, self.get_by_key, False)
self.sheerka.bind_service_method(self.NAME, self.get_by_digest, False)
self.sheerka.bind_service_method(self.NAME, self.is_a_concept_name, False)
self.sheerka.bind_service_method(self.NAME, self.get_metadatas_from_first_token, False)
register_concept_cache = self.sheerka.om.register_concept_cache
@@ -95,6 +116,12 @@ class ConceptManager(BaseService):
cache = ListIfNeededCache().auto_configure(self.CONCEPTS_BY_HASH_ENTRY)
register_concept_cache(self.CONCEPTS_BY_HASH_ENTRY, cache, lambda c: c.digest, True)
cache = ListCache().auto_configure(self.CONCEPT_BY_FIRST_TOKEN_IN_KEY)
self.sheerka.om.register_cache(self.CONCEPT_BY_FIRST_TOKEN_IN_KEY, cache)
cache = ListCache().auto_configure(self.CONCEPT_BY_FIRST_TOKEN_IN_NAME)
self.sheerka.om.register_cache(self.CONCEPT_BY_FIRST_TOKEN_IN_NAME, cache)
def initialize_deferred(self, context, is_first_time):
if is_first_time:
self.sheerka.om.put(self.sheerka.OBJECTS_IDS_ENTRY, self.USER_CONCEPTS_IDS, 1000)
@@ -102,12 +129,13 @@ class ConceptManager(BaseService):
_ = self._create_builtin_concept
_(1, BuiltinConcepts.SHEERKA, desc="Sheerka")
_(2, BuiltinConcepts.NEW_CONCEPT, desc="On new concept creation", variables=("metadata",))
_(3, BuiltinConcepts.UNKNOWN_CONCEPT, desc="Unknown concept", variables=("requested_name", "requested_id"))
_(3, BuiltinConcepts.UNKNOWN_CONCEPT, desc="Unknown concept", variables=("requested",))
_(4, BuiltinConcepts.USER_INPUT, desc="Any external input", variables=("command",))
_(5, BuiltinConcepts.PARSER_INPUT, desc="tokenized input", variables=("pi",))
_(6, BuiltinConcepts.PYTHON_CODE, desc="python code", variables=("pf",)) # pf for PythonFragment
_(7, BuiltinConcepts.INVALID_CONCEPT, desc="invalid concept", variables=("concept_id", "reason"))
_(8, BuiltinConcepts.EVALUATION_ERROR, desc="evaluation error", variables=("concept", "reason"))
_(7, BuiltinConcepts.PARSER_RESULT, desc="parser result", variables=("result",))
_(8, BuiltinConcepts.INVALID_CONCEPT, desc="invalid concept", variables=("concept_id", "reason"))
_(9, BuiltinConcepts.EVALUATION_ERROR, desc="evaluation error", variables=("concept", "reason"))
self.init_log.debug('%s builtin concepts created',
len(self.sheerka.om.current_cache_manager().concept_caches))
@@ -129,7 +157,7 @@ class ConceptManager(BaseService):
desc: str = "", # possible description for the concept
props: dict = None, # hashmap of default properties
variables: list = None, # list of concept variables(tuple), with their default values
parameters: list = None # list of variables that are part of the name of the concept
parameters: set = None # list of variables that are part of the name of the concept
) -> ReturnValue:
"""
Adds the definition of a new concept
@@ -151,14 +179,13 @@ class ConceptManager(BaseService):
post,
ret,
definition,
definition_type,
DefinitionType.DEFAULT if definition_type is None else definition_type,
desc,
autouse,
bound_body,
props or {},
variables or (),
parameters or (),
)
{} if props is None else props,
[] if variables is None else variables,
set() if parameters is None else parameters)
digest = self.compute_metadata_digest(metadata)
if self.sheerka.om.exists_in_current(self.CONCEPTS_BY_HASH_ENTRY, digest):
@@ -177,15 +204,28 @@ class ConceptManager(BaseService):
# error = ErrorContext(self.NAME, context, ex)
# return ReturnValue(self.NAME, False, error)
# try:
# first_item_res = self.recompute_first_items(context, None, [metadata])
# except FirstItemError as ex:
# return ReturnValue(self.NAME, False, ex)
first_token_by_key = self._get_concept_first_token(concept_key)
if first_token_by_key is None:
return ReturnValue(self.NAME, False, self.newn(BuiltinConcepts.INVALID_CONCEPT,
concept_id=concept_id,
reason=NoFirstItemError()))
first_token_by_name = self._get_concept_first_token(name)
if first_token_by_name is None:
return ReturnValue(self.NAME, False, self.newn(BuiltinConcepts.INVALID_CONCEPT,
concept_id=concept_id,
reason=NoFirstItemError()))
# at this point everything is fine. let's get the id and save everything
om = self.sheerka.om
metadata.id = str(self.sheerka.om.get(self.sheerka.OBJECTS_IDS_ENTRY, self.USER_CONCEPTS_IDS))
om.add_concept(metadata)
# add the first token to the
om.put(self.CONCEPT_BY_FIRST_TOKEN_IN_KEY, first_token_by_key, metadata.id)
if first_token_by_name != first_token_by_key:
om.put(self.CONCEPT_BY_FIRST_TOKEN_IN_NAME, first_token_by_name, metadata.id)
# self.update_first_items_caches(context, first_item_res)
# if bnf_expr:
# self.bnf_expr_cache.put(metadata.id, bnf_expr)
@@ -208,7 +248,7 @@ class ConceptManager(BaseService):
"""
metadata = self.get_by_name(concept_name)
if metadata is NotFound:
return self._inner_new(self.get_by_name(BuiltinConcepts.UNKNOWN_CONCEPT), requested_name=concept_name)
return self._inner_new(self.get_by_name(BuiltinConcepts.UNKNOWN_CONCEPT), requested=concept_name)
if isinstance(metadata, list):
return [self._inner_new(m, **kwargs) for m in metadata]
@@ -228,7 +268,7 @@ class ConceptManager(BaseService):
"""
metadata = self.get_by_id(concept_id)
if metadata is NotFound:
return self._inner_new(self.get_by_name(BuiltinConcepts.UNKNOWN_CONCEPT), requested_id=concept_id)
return self._inner_new(self.get_by_name(BuiltinConcepts.UNKNOWN_CONCEPT), requested=f"#{concept_id}")
return self._inner_new(metadata, **kwargs)
def new(self, identifier, **kwargs):
@@ -244,6 +284,29 @@ class ConceptManager(BaseService):
if isinstance(identifier, (ConceptMetadata, Concept)):
return self._inner_new(identifier.get_metadata(), **kwargs)
if isinstance(identifier, ConceptRef):
# first, try the digest
resolved_identifier = identifier.concept.get_definition_digest()
metadata = self.get_by_digest(resolved_identifier)
if metadata is NotFound:
# used the same method that was used when the concept was first recognized
match identifier.concept.get_runtime_info().info["resolution_method"]:
case "id":
resolved_identifier = f"#{identifier.concept.id}"
metadata = self.get_by_id(resolved_identifier)
case "key":
resolved_identifier = identifier.concept.key
metadata = self.get_by_key(resolved_identifier)
case _:
resolved_identifier = identifier.concept.name
metadata = self.get_by_name(resolved_identifier)
if metadata is NotFound:
return self._inner_new(self.get_by_name(BuiltinConcepts.UNKNOWN_CONCEPT), requested=resolved_identifier)
else:
return [self.new(item, **kwargs) for item in metadata] if \
isinstance(metadata, list) else self._inner_new(metadata, **kwargs)
if isinstance(identifier, list):
return [self.new(item, **kwargs) for item in identifier]
@@ -257,7 +320,8 @@ class ConceptManager(BaseService):
if isinstance(identifier, str):
return self.newn(identifier, **kwargs)
return self._inner_new(self.get_by_name(BuiltinConcepts.UNKNOWN_CONCEPT), requested_name=identifier)
# failed to instantiate the concept
return self._inner_new(self.get_by_name(BuiltinConcepts.UNKNOWN_CONCEPT), requested=identifier)
def get_by_name(self, key: str):
"""
@@ -289,9 +353,36 @@ class ConceptManager(BaseService):
"""
return self.sheerka.om.get(self.CONCEPTS_BY_KEY_ENTRY, key)
def get_by_digest(self, digest: str):
"""
Returns a concept metadata, using its digest
:param digest:
:type digest:
:return: NotFound if not found
:rtype:
"""
return self.sheerka.om.get(self.CONCEPTS_BY_HASH_ENTRY, digest)
def get_all_concepts(self):
return list(sorted(self.sheerka.om.list(self.CONCEPTS_BY_ID_ENTRY), key=lambda item: int(item.id)))
def get_metadatas_from_first_token(self, attr: Literal["key", "name"], token: str):
"""
Get the list of the concepts that start with token
:param attr: "key" or "name"
:type attr:
:param token:
:type token:
:return:
:rtype:
"""
cache_name = self.CONCEPT_BY_FIRST_TOKEN_IN_NAME if attr == "name" else self.CONCEPT_BY_FIRST_TOKEN_IN_KEY
concepts_ids = self.sheerka.om.get(cache_name, token)
if concepts_ids is NotFound:
return []
return [self.get_by_id(c_id) for c_id in concepts_ids]
def is_a_concept_name(self, name):
return self.sheerka.om.exists(self.CONCEPTS_BY_NAME_ENTRY, name)
@@ -385,6 +476,28 @@ class ConceptManager(BaseService):
metadata.all_attrs = self.compute_all_attrs(variables_to_use)
self.sheerka.om.add_concept(metadata)
@staticmethod
def _get_concept_first_token(concept_key):
"""
Return the list of tokens that consist of the first par of a concept key
>>> assert _get_concept_first_token("I am a concept") == "I"
>>> assert _get_concept_first_token("__var__1 multiplied by __var__2") == "multiplied"
:param concept_key:
:type concept_key:
:return:
:rtype:
"""
keywords = concept_key.split()
# trim first variables
res = []
for keyword in keywords:
if keyword.startswith(VARIABLE_PREFIX):
continue
return keyword
return None
@staticmethod
def _inner_new(_metadata_def: ConceptMetadata, **kwargs):
concept = Concept(_metadata_def)