Fixed #12
Fixed #13
Fixed #14
This commit is contained in:
2023-05-08 17:50:28 +02:00
parent 21a397861a
commit e41094f908
95 changed files with 12168 additions and 260 deletions
+442
View File
@@ -0,0 +1,442 @@
from threading import RLock
from common.global_symbols import NotFound
from common.utils import sheerka_deepcopy
MAX_INITIALIZED_KEY = 100
class BaseCache:
"""
An in memory FIFO cache object
When the max_size is reach the first element that was put is removed
When you put the same key twice, the previous element is overridden
"""
def __init__(self, max_size: int = None, default=NotFound, extend_exists=None, alt_sdp_get=None, sdp=None):
self._cache = {}
self._max_size = max_size
self._default = default # default value to return when key is not found. It can be a callable of key
self._extend_exists = extend_exists # search in remote
self._alt_sdp_get = alt_sdp_get # How to get the value when called by alt_sdp
self._sdp = sdp # current instance of SheerkaDataProvider
self._lock = RLock()
self._current_size = 0
self._initialized_keys = set() # to keep the list of the keys already requested (using get())
self._is_cleared = False # indicate that clear() was called
self.to_add = set()
self.to_remove = set()
# Explanation on _initialized_keys
# everytime you try to get an item, its key is added to _initialized_keys
# If the item is found, the entru is i
def __len__(self):
"""
Return the number of items in the cache
:return:
"""
with self._lock:
return self._current_size
def __contains__(self, key: str):
with self._lock:
return key in self._cache
def __iter__(self):
with self._lock:
keys = self._cache.copy()
yield from keys
def __next__(self):
return next(iter(self._cache))
def __repr__(self):
return f"{self.__class__.__name__}(size={self._current_size}, #keys={len(self._cache)})"
def configure(self, max_size: int = None, default=NotFound, extend_exists=None, alt_sdp_get=None, sdp=None):
if max_size is not None:
self._max_size = max_size
if default is not NotFound:
self._default = default
if extend_exists is not None:
self._extend_exists = extend_exists
if alt_sdp_get is not None:
self._alt_sdp_get = alt_sdp_get
if sdp is not None:
self._sdp = sdp
return self
def auto_configure(self, cache_name: str):
"""
Convenient way to configure the cache
:param cache_name:
:return:
"""
self._default = lambda sdp, key: sdp.get(cache_name, key)
self._extend_exists = lambda sdp, key: sdp.exists(cache_name, key)
self._alt_sdp_get = lambda sdp, key: sdp.alt_get(cache_name, key) # by default, same than get
return self
def disable_default(self):
self._default = (lambda sdp, key: NotFound) if self._sdp else (lambda key: NotFound)
def put(self, key: str, value: object, alt_sdp=None):
"""
Add a new entry in cache
:param key:
:param value:
:param alt_sdp:
:return:
"""
with self._lock:
if self._put(key, value, alt_sdp):
self._current_size += 1
def get(self, key: str, alt_sdp=None):
"""
Retrieve an entry from the cache
If the entry does not exist, will use the 'default' value or delegate
:param key:
:param alt_sdp: if not found in cache._sdp, look in other repositories
:return:
"""
with self._lock:
return self._get(key, alt_sdp)
def alt_get(self, key: str):
"""
Alternate way to get an entry, from concept cache
This is mainly used for IncCache, in order to get the value without increasing it
It used for another cache, it must return the value from key WITHOUT modifying the state of the cache
:param key:
:return:
"""
with self._lock:
return self._alt_get(key)
def get_all(self):
"""
Retrieve all items already in cache
This method does not fetch in the remoter repository
:return:
"""
with self._lock:
return self._cache.values()
def inner_get(self, key: str):
return self._cache[key]
def update(self, old_key: str, old_value: object, new_key: str, new_value: object, alt_sdp=None):
"""
Update an entry in the cache
:param old_key: key of the previous version of the entry
:param old_value: previous version of the entry
:param new_key: key of the entry
:param new_value: new value
:param alt_sdp: new value
:return:
"""
with self._lock:
self._update(old_key, old_value, new_key, new_value, alt_sdp)
def delete(self, key: str, value: object = None, alt_sdp=None):
with self._lock:
try:
self._sync(key)
self._delete(key, value, alt_sdp)
return True
except KeyError:
return False
def populate(self, populate_function: callable, get_key_function: callable, reset_events=False):
"""
Initialise the cache with a bunch of data
:param populate_function: iterable that produces item
:param get_key_function: how to compute the key of an item
:param reset_events: if TRUE, events are not updated
:return:
"""
with self._lock:
if reset_events:
to_add_copy = self.to_add.copy()
to_remove_copy = self.to_remove.copy()
for item in (populate_function(self._sdp) if self._sdp else populate_function()):
self.put(get_key_function(item), item)
if reset_events:
self.to_add = to_add_copy
self.to_remove = to_remove_copy
def force_value(self, key: str, value: object):
"""
Force a value into a key without raising any event
"""
with self._lock:
self._cache[key] = value
def remove_initialized_key(self, key: str):
"""
When a value is requested by alt_sdp, we should not keep track of the request
As the outcome is not known
"""
with self._lock:
self._initialized_keys.remove(key)
def has(self, key: str):
"""
Return True if the key is in the cache
Never use extend_exist
:param key:
:return:
"""
with self._lock:
return key in self._cache
def exists(self, key: str):
"""
Return True if the key is in the cache
Can use extend_exist
:param key:
:return:
"""
with self._lock:
if key in self._cache:
return True
if self._extend_exists:
return self._extend_exists(self._sdp, key) if self._sdp else self._extend_exists(key)
else:
return False
def evict(self, nb_items: int):
"""
Remove nb_items from the cache, using the replacement policy
:return:
"""
with self._lock:
nb_items = self._current_size if self._current_size < nb_items else nb_items
to_remove = []
iter_cache = iter(self._cache)
try:
while nb_items > 0:
key = next(iter_cache)
if key in self.to_add or key in self.to_remove:
continue # cannot remove an item that is not yet committed
else:
to_remove.append(key)
nb_items -= 1
except StopIteration:
pass
for key in to_remove:
del (self._cache[key])
try:
self._initialized_keys.remove(key)
except KeyError:
pass
self._current_size -= len(to_remove)
return len(to_remove)
def evict_by_key(self, predicate: callable):
"""
Remove entries that matches the predicate
:param predicate:
:return:
"""
to_delete = []
with self._lock:
for key in self._cache:
if predicate(key):
to_delete.append(key)
for key in to_delete:
del (self._cache[key])
try:
self._initialized_keys.remove(key)
except KeyError:
pass
self._current_size -= len(to_delete)
return len(to_delete)
def clear(self, set_is_cleared: bool = True):
with self._lock:
# Seems that remote sdp is not correctly updated
self._cache.clear()
self._current_size = 0
self._initialized_keys.clear()
self.to_add.clear()
self.to_remove.clear()
if set_is_cleared:
self._is_cleared = True
def dump(self):
with self._lock:
return {
"current_size": self._current_size,
"cache": self._cache.copy()
}
def copy(self):
with self._lock:
return self._cache.copy()
def init_from_dump(self, dump: dict):
with self._lock:
self._current_size = dump["current_size"]
self._cache = dump["cache"].copy()
return self
def reset_events(self):
with self._lock:
self.to_add.clear()
self.to_remove.clear()
self._is_cleared = False
def reset_initialized_keys(self):
"""
Use when an ontology is put back. Reset all the previous requests as alt_sdp is a new one
"""
with self._lock:
self._initialized_keys.clear()
def is_cleared(self):
with self._lock:
return self._is_cleared
def clone(self):
return type(self)(self._max_size, self._default, self._extend_exists, self._alt_sdp_get, self._sdp)
def test_only_reset(self):
"""
Clears the cache, but does not set is_cleared to True
It's a convenient way to clear the cache without altering alt_sdp behaviour
"""
self.clear(set_is_cleared=False)
def _sync(self, *keys):
# KSI 2020-12-29. DO not try to use alt_sdp here
# Sync must only sync with the current sdp
for key in keys:
if key not in self._initialized_keys and callable(self._default):
# to keep sync with the remote repo is needed
# first check self._initialized_keys to prevent infinite loop
self.get(key)
def _add_to_add(self, key: str):
"""
Adds the key to the list of recently added keys
:param key:
:type key:
:return:
:rtype:
"""
self.to_add.add(key)
try:
self.to_remove.remove(key)
except KeyError:
pass
def _add_to_remove(self, key: str):
"""
Adds the key to the list of recently removed keys
:param key:
:type key:
:return:
:rtype:
"""
self.to_remove.add(key)
try:
self.to_add.remove(key)
except KeyError:
pass
def _get(self, key: str, alt_sdp=None):
try:
value = self._cache[key]
except KeyError:
if len(self._initialized_keys) == MAX_INITIALIZED_KEY:
self._initialized_keys.clear()
if callable(self._default):
if key in self._initialized_keys:
# it means that we have already asked the repository
return NotFound
simple_copy = True
# first, tries to use the default value
value = self._default(self._sdp, key) if self._sdp else self._default(key)
if value is NotFound and alt_sdp and not self._is_cleared:
# try in the alternate (Sheerka) Data Provider
value = self._alt_sdp_get(alt_sdp, key)
simple_copy = False # in the case, make sure to make a deep copy
if value is not NotFound:
value = value if simple_copy else sheerka_deepcopy(value)
self._cache[key] = value
# update _current_size
if isinstance(value, (list, set)):
self._current_size += len(value)
else:
self._current_size += 1
if self._max_size and self._current_size > self._max_size:
self.evict(self._current_size - self._max_size)
# else 'value' remains NotFound
else:
value = self._default
self._initialized_keys.add(key)
return value
def _alt_get(self, key: str):
return self._get(key) # by default, point to _get
def _put(self, key: str, value: object, alt_sdp):
"""
To be defined in subclass
:param key:
:type key:
:param value:
:type value:
:param alt_sdp:
:type alt_sdp:
:return:
:rtype:
"""
pass
def _update(self, old_key: str, old_value: object, new_key: str, new_value: object, alt_sdp):
"""
To be defined in subclass
:param old_key:
:type old_key:
:param old_value:
:type old_value:
:param new_key:
:type new_key:
:param new_value:
:type new_value:
:param alt_sdp:
:type alt_sdp:
:return:
:rtype:
"""
pass
def _delete(self, key: str, value: object, alt_sdp):
raise NotImplementedError("_delete BaseCache")
+40
View File
@@ -0,0 +1,40 @@
from caching.BaseCache import BaseCache
from common.global_symbols import Removed
class Cache(BaseCache):
"""
An in memory FIFO cache object
When the max_size is reach the first element that was put is removed
When you put the same key twice, the previous element is overridden
"""
def _put(self, key: str, value: object, alt_sdp):
res = key not in self._cache
self._cache[key] = value
self._add_to_add(key)
return res
def _update(self, old_key: str, old_value: object, new_key: str, new_value: object, alt_sdp):
self._cache[new_key] = new_value
self._add_to_add(new_key)
if new_key != old_key:
self._sync(old_key)
if not self._is_cleared and alt_sdp and self._extend_exists and self._extend_exists(alt_sdp, old_key):
self._cache[old_key] = Removed
self._add_to_add(old_key)
self._current_size += 1
else:
del (self._cache[old_key])
self._add_to_remove(old_key)
def _delete(self, key: str, value: object, alt_sdp):
if not self._is_cleared and alt_sdp and self._extend_exists and self._extend_exists(alt_sdp, key):
self._cache[key] = Removed
self._add_to_add(key)
# do not decrease self._current_size as 'Removed' takes on slot
else:
del (self._cache[key])
self._add_to_remove(key)
self._current_size -= 1
+342
View File
@@ -0,0 +1,342 @@
from dataclasses import dataclass, field
from threading import RLock
from typing import Callable
from caching.BaseCache import BaseCache
from common.global_symbols import NotFound
from core.concept import Concept, ConceptMetadata
@dataclass
class MultipleEntryError(Exception):
"""
Exception raised when trying to alter an entry with multiple element
without giving the origin of the element
"""
key: str
@dataclass
class ConceptNotFound(Exception):
"""
Thrown when you try to remove a concept that is not found
"""
concept: object
@dataclass
class CacheDefinition:
cache: BaseCache
use_ref: bool
get_key: Callable[[ConceptMetadata], str] | None = field(repr=False)
persist: bool = True
class CacheManager:
"""
Single class to manage all the caches
"""
def __init__(self, sdp=None):
"""
Manager for all the caches.
Make the link between the cache and the SheerkaDataProvider
:param sdp:
:type sdp:
"""
self.sdp = sdp
self.caches: dict[str, CacheDefinition] = {} # dict {cache_name: CacheDefinition}
self.concept_caches = []
self.is_dirty = False # to indicate that the value of a cache has changed
self._lock = RLock()
def register_concept_cache(self, name, cache, get_key, use_ref):
"""
Special caches to manage concept definition
They store concepts metadata, using specific index
For example, you may declare an index to store metadata by id, by key or by whatever you need.
It a convenient way to manage indexes for concepts definitions
Underneath, there are simple `Cache` objects.
:param name:
:param cache:
:param get_key:
:param use_ref:
:return:
"""
with self._lock:
if self.sdp:
cache.configure(sdp=self.sdp)
self.caches[name] = CacheDefinition(cache, use_ref, get_key)
self.concept_caches.append(name)
def register_cache(self, name, cache, persist=True, use_ref=False):
"""
Define which type of cache along with how to compute the key
:param name:
:param cache:
:param persist:
:param use_ref:
:return:
"""
with self._lock:
if self.sdp:
cache.configure(sdp=self.sdp)
self.caches[name] = CacheDefinition(cache, use_ref, None, persist)
def add_concept(self, metadata: ConceptMetadata, alt_sdp=None):
"""
We need multiple indexes to retrieve a concept
So the new concept is dispatched into multiple caches
:param metadata:
:param alt_sdp: if not found in self.sdp, look in other repositories
:return:
"""
with self._lock:
for name in self.concept_caches:
cache_def = self.caches[name]
key = cache_def.get_key(metadata)
if key is None:
raise KeyError("")
cache_def.cache.put(key, metadata, alt_sdp)
self.is_dirty = True
def update_concept(self, old: ConceptMetadata, new: ConceptMetadata, alt_sdp=None):
"""
Update a concept.
:param old: old version of the concept
:param new: new version of the concept
:param alt_sdp: if not found in self.sdp, look in other repositories
:return:
"""
with self._lock:
for cache_name in self.concept_caches:
cache_def = self.caches[cache_name]
old_key = cache_def.get_key(old)
new_key = cache_def.get_key(new)
cache_def.cache.update(old_key, old, new_key, new, alt_sdp=alt_sdp)
self.is_dirty = True
def remove_concept(self, concept: ConceptMetadata, alt_sdp=None):
"""
Remove a concept from all caches
:param concept:
:param alt_sdp: if not found in self.sdp, look in other repositories
:return:
"""
with self._lock:
# the first concept cache must the one where all concept are unique
# eg it has to be the concept by id
ref_cache_def = self.caches[self.concept_caches[0]]
concept_id = ref_cache_def.get_key(concept)
ref_concept = ref_cache_def.cache.get(concept_id)
if ref_concept is NotFound and alt_sdp:
ref_concept = alt_sdp.get(self.concept_caches[0], concept_id)
if ref_concept is NotFound:
raise ConceptNotFound(concept)
for cache_name in self.concept_caches:
cache_def = self.caches[cache_name]
key = cache_def.get_key(ref_concept)
cache_def.cache.delete(key, ref_concept, alt_sdp=alt_sdp)
self.is_dirty = True
def get(self, cache_name, key, alt_sdp=None):
"""
From concept cache, get an entry
:param cache_name:
:param key:
:param alt_sdp: if not found in self.sdp, look in other repositories
:return:
"""
with self._lock:
return self.caches[cache_name].cache.get(key, alt_sdp)
def alt_get(self, cache_name, key):
"""
Alternate way to get an entry, from concept cache
This is mainly used for IncCache, in order to get the value without increasing it
:param cache_name:
:param key:
:return:
"""
with self._lock:
return self.caches[cache_name].cache.alt_get(key)
def put(self, cache_name, key, value, alt_sdp=None):
"""
Add to a cache
:param cache_name:
:param key:
:param value:
:param alt_sdp: if not found in self.sdp, look in other repositories
:return:
"""
with self._lock:
self.caches[cache_name].cache.put(key, value, alt_sdp)
self.is_dirty = True
def delete(self, cache_name, key, value=None, alt_sdp=None):
"""
Delete an entry from the cache
:param cache_name:
:param key:
:param value:
:param alt_sdp: if not found in self.sdp, look in other repositories
:return:
"""
with self._lock:
if self.caches[cache_name].cache.delete(key, value, alt_sdp):
self.is_dirty = True
def get_inner_cache(self, cache_name) -> BaseCache:
"""
Return the BaseCache object
:param cache_name:
:return:
"""
with self._lock:
return self.caches[cache_name].cache
def copy(self, cache_name) -> dict:
"""
get a copy the content of the whole cache as a dictionary
:param self:
:param cache_name:
:return:
"""
return self.caches[cache_name].cache.copy()
def populate(self, cache_name, populate_function, get_key_function, reset_events=False):
"""
Populate a specific cache with a bunch of items
:param cache_name:
:param populate_function: how to get the items
:param get_key_function: how to get the key, out of an item
:param reset_events: reset to_add and to_remove events after populate
:return:
"""
with self._lock:
self.caches[cache_name].cache.populate(populate_function, get_key_function, reset_events)
def force_value(self, cache_name, key, value):
"""
Update the content of the cache, but does not raise any event
"""
with self._lock:
self.caches[cache_name].cache.force_value(key, value)
def remove_initialized_key(self, cache_name, key):
"""
"""
with self._lock:
self.caches[cache_name].cache.remove_initialized_key(key)
def has(self, cache_name, key):
"""
True if the value is in cache only. Never try to look in a remote repository
:param cache_name:
:param key:
:return:
"""
with self._lock:
return self.caches[cache_name].cache.has(key)
def exists(self, cache_name, key):
"""
True if the value is in cache.
If not found, may search in a remote repository
:param cache_name:
:param key:
:return:
"""
with self._lock:
return self.caches[cache_name].cache.exists(key)
def commit(self, context):
"""
Persist all the caches into a physical persistence storage
:param context:
:return:
"""
def update_full_serialisation(items, value):
# Take care, infinite recursion is not handled !!
if isinstance(items, (list, set, tuple)):
for item in items:
update_full_serialisation(item, value)
elif isinstance(items, dict):
for values in items.values():
update_full_serialisation(values, value)
elif isinstance(items, Concept):
items.get_metadata().full_serialization = value
with self._lock:
with self.sdp.get_transaction(context.event.get_digest()) as transaction:
for cache_name, cache_def in self.caches.items():
if not cache_def.persist:
continue
if cache_def.cache.is_cleared():
transaction.clear(cache_name)
for key in cache_def.cache.to_remove:
transaction.remove(cache_name, key)
for key in cache_def.cache.to_add:
if key == "*self*":
transaction.add(cache_name, None, cache_def.cache.dump()["cache"])
else:
to_save = cache_def.cache.inner_get(key)
update_full_serialisation(to_save, True)
transaction.add(cache_name, key, to_save, cache_def.use_ref)
update_full_serialisation(to_save, False)
cache_def.cache.reset_events()
self.is_dirty = False
def clear(self, cache_name=None, set_is_cleared=True):
with self._lock:
if cache_name:
self.caches[cache_name].cache.clear(set_is_cleared)
else:
for cache_def in self.caches.values():
cache_def.cache.clear(set_is_cleared)
def dump(self):
"""
For test purpose, dumps the whole content of the cache manager
:return:
"""
with self._lock:
res = {}
for cache_name, cache_def in self.caches.items():
res[cache_name] = cache_def.cache.dump()
return res
def init_from_dump(self, dump):
with self._lock:
for cache_name, content in dump.items():
if cache_name in self.caches:
self.caches[cache_name].cache.init_from_dump(content)
return self
def reset(self):
"""For unit test speed enhancement"""
self.clear()
self.caches.clear()
self.concept_caches.clear()
self.is_dirty = False
+88
View File
@@ -0,0 +1,88 @@
from caching.BaseCache import MAX_INITIALIZED_KEY
from caching.Cache import BaseCache
from common.global_symbols import NotFound
class DictionaryCache(BaseCache):
"""
Kind of all or nothing dictionary database
You can get the values key by by
But when you want to put, you must put the whole database
For this reason, alt_sdp is not supported. The top ontology layer contains the whole database
"""
def auto_configure(self, cache_name):
"""
Convenient way to configure the cache
:param cache_name:
:return:
"""
self._default = lambda sdp, key: sdp.get(cache_name) # retrieve the whole entry
self._extend_exists = None # not used
self._alt_sdp_get = None # not used
return self
def _get(self, key, alt_sdp=None):
"""
Management of the default is different
:param key:
:return:
"""
try:
return self._cache[key]
except KeyError:
if key in self._initialized_keys:
return NotFound
if len(self._initialized_keys) == MAX_INITIALIZED_KEY:
self._initialized_keys.clear()
self._initialized_keys.add(key)
if callable(self._default):
default_values = self._default(self._sdp, key) if self._sdp else self._default(key)
else:
default_values = self._default
if isinstance(default_values, dict):
self._cache.update(default_values) # update the whole cache dictionary to resync with remote sdp
self._count_items()
return self._cache[key] if key in self._cache else NotFound
def _put(self, key, value, alt_sdp):
"""
Adds a whole dictionary
:param key: True to append, False to reset
:param value: dictionary
:param alt_sdp: NOT SUPPORTED as the values from alt_sdp must be retrieved and computed BEFORE the put
:return:
"""
if not isinstance(key, bool):
raise KeyError
if not isinstance(value, dict):
raise ValueError
if key: # update the current cache
if self._cache is None:
self._cache = value.copy()
else:
self._cache.update(value)
else: # reset the current cache
self._cache = value
self._count_items()
# special meaning for to_add
self._add_to_add("*self*")
return False
def _delete(self, key, value, alt_sdp):
raise NotImplementedError("_delete DictionaryCache")
def _count_items(self):
self._current_size = 0
for v in self._cache.values():
self._current_size += len(v) if hasattr(v, "__len__") and not isinstance(v, str) else 1
+71
View File
@@ -0,0 +1,71 @@
from common.global_symbols import NotFound
class FastCache:
"""
Simplest LRU cache
"""
def __init__(self, max_size=256, default=None):
self.max_size = max_size
self.cache = {}
self.lru = []
self.default = default
self.calls = {}
def __contains__(self, item):
return self.has(item)
def __iter__(self):
yield from self.cache
def __next__(self):
return next(iter(self.cache))
def __len__(self):
return len(self.cache)
def put(self, key, value):
if len(self.cache) == self.max_size:
del self.cache[self.lru.pop(0)]
if key in self.cache:
self.lru.remove(key)
self.cache[key] = value
self.lru.append(key)
self.calls[key] = 0
def has(self, key):
return key in self.cache
def get(self, key):
try:
res = self.cache[key]
self.calls[key] += 1
return res
except KeyError:
if self.default:
value = self.default(key)
self.put(key, value)
return value
return NotFound
def evict_by_key(self, predicate):
to_remove = []
for k, v in self.cache.items():
if predicate(k):
to_remove.append(k)
for k in to_remove:
self.lru.remove(k)
del self.cache[k]
def copy(self):
return self.cache.copy()
def clear(self):
self.cache.clear()
self.lru.clear()
+24
View File
@@ -0,0 +1,24 @@
from caching.Cache import Cache
from common.global_symbols import NotFound, Removed
class IncCache(Cache):
"""
Increment the value of the key every time it's accessed
"""
def _get(self, key, alt_sdp=None):
value = super()._get(key, alt_sdp=alt_sdp)
if value in (NotFound, Removed):
value = 0
value += 1
self._put(key, value, alt_sdp)
return value
def _put(self, key, value, alt_sdp):
self._cache[key] = value
self._add_to_add(key)
return True
def _alt_get(self, key):
return super()._get(key) # point to parent, not to self
+66
View File
@@ -0,0 +1,66 @@
from caching.Cache import BaseCache
from common.global_symbols import NotFound, Removed
from common.utils import sheerka_deepcopy
class ListCache(BaseCache):
"""
An in memory FIFO cache object
When the max_size is reach the first element that was put is removed
Items of this cache are list
"""
def _put(self, key, value, alt_sdp):
if key in self._cache:
self._cache[key].append(value)
else:
self._sync(key)
if key not in self._cache and alt_sdp and not self._is_cleared:
previous = self._alt_sdp_get(alt_sdp, key)
if previous not in (NotFound, Removed):
self._cache[key] = sheerka_deepcopy(previous)
if key in self._cache:
self._cache[key].append(value)
else:
self._cache[key] = [value]
self._add_to_add(key)
return True
def _update(self, old_key, old_value, new_key, new_value, alt_sdp):
self._sync(old_key, new_key)
if old_key not in self._cache and alt_sdp and not self._is_cleared:
# no value found in local cache or remote repository
# Use the values from alt_sdp
previous = self._alt_sdp_get(alt_sdp, old_key)
if previous in (NotFound, Removed):
raise KeyError(old_key)
self._cache[old_key] = sheerka_deepcopy(previous)
self._current_size += len(previous)
if old_key != new_key:
self._cache[old_key].remove(old_value)
if len(self._cache[old_key]) == 0:
if not self._is_cleared and alt_sdp and self._extend_exists(alt_sdp, old_key):
self._cache[old_key] = Removed
self._add_to_add(old_key)
self._current_size += 1
else:
del (self._cache[old_key])
self._add_to_remove(old_key)
else:
self._add_to_add(old_key)
self._put(new_key, new_value, alt_sdp)
self._add_to_add(new_key)
else:
for i in range(len(self._cache[new_key])):
if self._cache[new_key][i] == old_value:
self._cache[new_key][i] = new_value # avoid add and remove in dict
break # only the first one is affected
self._add_to_add(new_key)
+134
View File
@@ -0,0 +1,134 @@
from caching.Cache import BaseCache
from common.global_symbols import NotFound, Removed
from common.utils import sheerka_deepcopy
class ListIfNeededCache(BaseCache):
"""
An in memory FIFO cache object
When the max_size is reach the first element that was put is removed
When you put the same key twice, you now have a list of two elements
"""
def _put(self, key, value, alt_sdp):
if key in self._cache:
if isinstance(self._cache[key], list):
self._cache[key].append(value)
else:
self._cache[key] = value if self._cache[key] is Removed else [self._cache[key], value]
else:
self._sync(key)
if key not in self._cache and alt_sdp and not self._is_cleared:
previous = self._alt_sdp_get(alt_sdp, key)
if previous not in (NotFound, Removed):
self._cache[key] = sheerka_deepcopy(previous)
if key in self._cache:
if isinstance(self._cache[key], list):
self._cache[key].append(value)
else:
self._cache[key] = value if self._cache[key] is Removed else [self._cache[key], value]
else:
self._cache[key] = value
self._add_to_add(key)
return True
def _update(self, old_key, old_value, new_key, new_value, alt_sdp):
self._sync(old_key, new_key)
if old_key not in self._cache and alt_sdp and not self._is_cleared:
# no value found in local cache or remote repository
# Use the values from alt_sdp
previous = self._alt_sdp_get(alt_sdp, old_key)
if previous in (NotFound, Removed):
raise KeyError(old_key)
self._cache[old_key] = sheerka_deepcopy(previous)
self._current_size += len(previous) if isinstance(previous, list) else 1
if old_key != new_key:
if isinstance(self._cache[old_key], list):
self._cache[old_key].remove(old_value)
if len(self._cache[old_key]) == 1:
self._cache[old_key] = self._cache[old_key][0]
self._add_to_add(old_key)
else:
if not self._is_cleared and alt_sdp and self._extend_exists(alt_sdp, old_key):
self._cache[old_key] = Removed
self._add_to_add(old_key)
self._current_size += 1
else:
del (self._cache[old_key])
self._add_to_remove(old_key)
self._put(new_key, new_value, alt_sdp)
self._add_to_add(new_key)
else:
if isinstance(self._cache[new_key], list):
for i in range(len(self._cache[new_key])):
if self._cache[new_key][i] == old_value:
self._cache[new_key][i] = new_value # avoid add and remove in dict
break
else:
self._cache[new_key] = new_value
self._add_to_add(new_key)
def _delete(self, key, value, alt_sdp):
if value is None:
# Remove the whole key
if not self._is_cleared and alt_sdp and self._extend_exists(alt_sdp, key):
if key in self._cache:
previous = self._cache[key]
if isinstance(previous, list):
self._current_size -= len(previous) + 1
else:
self._current_size += 1
self._cache[key] = Removed
self._add_to_add(key)
else:
previous = self._cache[key]
self._current_size -= len(previous) if isinstance(previous, list) else 1
del self._cache[key]
self._add_to_remove(key)
else:
# Remove a single value
try:
previous = self._cache[key]
if isinstance(previous, list):
previous.remove(value)
self._cache[key] = previous[0] if len(previous) == 1 else previous
self._current_size -= 1
self.to_add.add(key)
else:
if previous == value:
# I am about to delete the entry
if not self._is_cleared and alt_sdp and self._extend_exists(alt_sdp, key):
self._cache[key] = Removed
self.to_add.add(key)
# self._current_size -= 1 # Do not decrease size, as it's replaced by 'Removed'
else:
del self._cache[key]
self._current_size -= 1
self.to_remove.add(key)
except KeyError as ex:
previous = self._alt_sdp_get(alt_sdp, key) if not self._is_cleared and alt_sdp else NotFound
if previous in (NotFound, Removed):
raise ex
if isinstance(previous, list):
previous = sheerka_deepcopy(previous)
previous.remove(value) # raise an exception if value in not in the list
self._cache[key] = previous[0] if len(previous) == 1 else previous
self._current_size -= 1
self.to_add.add(key)
else:
if previous == value:
self._cache[key] = Removed
self.to_add.add(key)
self._current_size -= 1
return True
+122
View File
@@ -0,0 +1,122 @@
from caching.Cache import BaseCache
from common.global_symbols import NotFound, Removed
from common.utils import sheerka_deepcopy
class SetCache(BaseCache):
"""
An in memory FIFO cache object
When the max_size is reach the first element that was put is removed
You can use the same key multiple times, but the elements under this key will be unique
When there are multiple elements, a python set is used
>> self.put('key', 'value1')
>> assert {'value1'} == self.get('key')
>> self.put('key', 'value2')
>> assert {'value1', 'value2'} == self.get('key')
"""
def _put(self, key, value, alt_sdp):
if key in self._cache:
if self._cache[key] is Removed:
self._cache[key] = {value}
elif value in self._cache[key]:
return False
else:
self._cache[key].add(value)
else:
self._sync(key)
if key not in self._cache and alt_sdp and not self._is_cleared:
previous = self._alt_sdp_get(alt_sdp, key)
if previous not in (NotFound, Removed):
self._cache[key] = sheerka_deepcopy(previous)
if key in self._cache:
if self._cache[key] == Removed:
self._cache[key] = {value}
else:
self._cache[key].add(value)
else:
self._cache[key] = {value}
self._add_to_add(key)
return True
def _update(self, old_key, old_value, new_key, new_value, alt_sdp):
self._sync(old_key, new_key)
if old_key not in self._cache and alt_sdp and not self._is_cleared:
# no value found in local cache or remote repository
# Use the values from alt_sdp
previous = self._alt_sdp_get(alt_sdp, old_key)
if previous in (NotFound, Removed):
raise KeyError(old_key)
self._cache[old_key] = sheerka_deepcopy(previous)
self._current_size += len(previous)
if old_key != new_key:
if isinstance(self._cache[old_key], set):
self._cache[old_key].remove(old_value)
if len(self._cache[old_key]) == 0:
if not self._is_cleared and alt_sdp and self._extend_exists(alt_sdp, old_key):
self._cache[old_key] = Removed
self._add_to_add(old_key)
self._current_size += 1
else:
del (self._cache[old_key])
self._add_to_remove(old_key)
else:
self._add_to_add(old_key)
self._put(new_key, new_value, alt_sdp)
self._add_to_add(new_key)
else:
self._cache[new_key].remove(old_value)
self._put(new_key, new_value, alt_sdp)
self._add_to_add(new_key)
def _delete(self, key, value, alt_sdp):
if value is None:
if not self._is_cleared and alt_sdp and self._extend_exists(alt_sdp, key):
self._current_size += 1 - len(self._cache[key]) if key in self._cache else 1
self._cache[key] = Removed
self._add_to_add(key)
else:
self._current_size -= len(self._cache[key])
del self._cache[key]
self._add_to_remove(key)
else:
try:
self._cache[key].remove(value)
if len(self._cache[key]) == 0:
if not self._is_cleared and alt_sdp and self._extend_exists(alt_sdp, key):
self._cache[key] = Removed
self._add_to_add(key)
# self._current_size -= 1 # Do not decrease size, as it's replaced by 'Removed'
else:
del self._cache[key]
self._add_to_remove(key)
self._current_size -= 1
else:
self._add_to_add(key)
self._current_size -= 1
except KeyError as ex:
previous = self._alt_sdp_get(alt_sdp, key) if not self._is_cleared and alt_sdp else NotFound
if previous in (NotFound, Removed):
raise ex
previous = sheerka_deepcopy(previous)
previous.remove(value) # will raise a KeyError if value is not in the set
if len(previous) == 0:
self._cache[key] = Removed
self._current_size += 1
else:
self._cache[key] = previous
self._current_size += len(previous)
self._add_to_add(key)
return True
View File
+18 -3
View File
@@ -30,6 +30,7 @@ class SheerkaClient:
self.url = f"{self.hostname}:{self.port}" if self.port else f"{self.hostname}"
self.history_file = path.abspath(path.join(path.expanduser("~"), ".sheerka", "history.txt"))
self.token = None
self.user = None
def init_folder(self):
root_path = path.dirname(self.history_file)
@@ -56,7 +57,9 @@ class SheerkaClient:
form_data = {"username": username, "password": password}
res = requests.post(token_url, data=form_data)
if res:
self.token = res.json()["access_token"]
as_json = res.json()
self.token = as_json["access_token"]
self.user = {"first_name": as_json["first_name"], "last_name": as_json["last_name"]}
return TestResponse(True, f"Connected as {username}")
else:
self.token = None
@@ -65,6 +68,14 @@ class SheerkaClient:
self.token = None
return TestResponse(False, str(ex))
def help(self):
self.print_info("Basic commands:")
self.print_info(" clear")
self.print_info(" connect")
if not self.token:
self.print_error("You are not connected.")
self.print_info("You should use the command `connect('username', 'password'`)")
def run(self):
while True:
try:
@@ -80,6 +91,10 @@ class SheerkaClient:
prompt_toolkit.shortcuts.clear()
continue
if _in == "help":
self.help()
continue
# allow reconnection
m = connect_regex.match(_in)
if m:
@@ -92,9 +107,9 @@ class SheerkaClient:
# Call Sheerka
if self.token:
headers = {"Authorization": f"Bearer {self.token}"}
response = requests.post(f"{self.url}/echo/{_in}", headers=headers)
response = requests.post(f"{self.url}/command/{_in}", headers=headers)
else:
response = requests.post(f"{self.url}/echo/{_in}")
response = requests.post(f"{self.url}/command/{_in}")
# read the response from the server
if response:
View File
+66
View File
@@ -0,0 +1,66 @@
EVENT_CONCEPT_PRECEDENCE_MODIFIED = "evt_cp_m"
EVENT_RULE_PRECEDENCE_MODIFIED = "evt_rp_m"
EVENT_CONTEXT_DISPOSED = "evt_ctx_d"
EVENT_USER_INPUT_EVALUATED = "evt_ui_e"
EVENT_CONCEPT_CREATED = "evt_c_c"
EVENT_CONCEPT_MODIFIED = "evt_c_m"
EVENT_CONCEPT_DELETED = "evt_c_d"
EVENT_CONCEPT_ID_DELETED = "evt_c_id_d"
EVENT_RULE_CREATED = "evt_r_c"
EVENT_RULE_DELETED = "evt_r_d"
EVENT_RULE_ID_DELETED = "evt_r_id_d"
EVENT_ONTOLOGY_CREATED = "evt_o_c"
EVENT_ONTOLOGY_DELETED = "evt_o_d"
VARIABLE_PREFIX = "__var__"
class CustomType:
"""
Base class for custom types used in Sheerka
A custom type is a type that has only one instance across the application and have a semantic meaning
For example the type 'None' is a singleton which have a semantic meaning.
We need to define others in Sheerka
"""
def __init__(self, value):
self.value = value
def __repr__(self):
return self.value
def __eq__(self, other):
return isinstance(other, CustomType) and self.value == other.value
def __hash__(self):
return hash(self.value)
class NotInitType(CustomType):
def __init__(self):
super(NotInitType, self).__init__("**NotInit**")
class NotFoundType(CustomType):
"""
Using when an entry in not found in Cache or in sdp
"""
def __init__(self):
super(NotFoundType, self).__init__("**NotFound**")
class RemovedType(CustomType):
def __init__(self):
super(RemovedType, self).__init__("**Removed**")
class NoFirstTokenType(CustomType):
def __init__(self):
super(NoFirstTokenType, self).__init__("**NoFirstToken**")
NotInit = NotInitType()
NotFound = NotFoundType()
Removed = RemovedType()
NoFirstToken = NoFirstTokenType()
+293
View File
@@ -0,0 +1,293 @@
import importlib
import pkgutil
from copy import deepcopy
from common.global_symbols import CustomType
def get_class(qname):
"""
Loads a class from its full qualified name
:param qname:
:return:
"""
parts = qname.split('.')
module = ".".join(parts[:-1])
m = __import__(module)
for comp in parts[1:]:
m = getattr(m, comp)
return m
def get_module(qname):
"""
Loads a module from its full qualified name
:param qname:
:return:
"""
parts = qname.split('.')
m = __import__(qname)
for comp in parts[1:]:
m = getattr(m, comp)
return m
def get_full_qualified_name(obj):
"""
Returns the full qualified name of a class (including its module name )
:param obj:
:return:
"""
if obj.__class__ == type:
module = obj.__module__
if module is None or module == str.__class__.__module__:
return obj.__name__ # Avoid reporting __builtin__
else:
return module + '.' + obj.__name__
else:
module = obj.__class__.__module__
if module is None or module == str.__class__.__module__:
return obj.__class__.__name__ # Avoid reporting __builtin__
else:
return module + '.' + obj.__class__.__name__
def get_logger_name(name):
"""
Wrapper to fancy how the name of the loggers are returned
:param name:
:type name:
:return:
:rtype:
"""
if name is None:
return None
return name.split(".")[-1]
def import_module_and_sub_module(module_name):
"""
Import the module, and one sub level
:param module_name:
:return:
"""
mod = get_module(module_name)
for (module_loader, name, ispkg) in pkgutil.iter_modules(mod.__path__, module_name + "."):
importlib.import_module(name)
def get_sub_classes(package_name, base_class):
def _get_class(name):
modname, _, clsname = name.rpartition('.')
mod = importlib.import_module(modname)
cls = getattr(mod, clsname)
return cls
base_class = _get_class(base_class) if isinstance(base_class, str) else base_class
all_class = set(base_class.__subclasses__()).union(
[s for c in base_class.__subclasses__() for s in get_sub_classes(package_name, c)])
# limit to the classes of the package
return [c for c in all_class if c.__module__.startswith(package_name)]
def sheerka_deepcopy(obj):
"""
Internal implementation of deepcopy that will eventually handle Concept's circular references
:param obj:
:return:
"""
from core.concept import Concept
already_seen = {}
def copy_concept(c: Concept):
id_c = id(c)
if id_c in already_seen:
ref = already_seen[id_c]
if ref == '_##_REF_##_':
raise Exception("Circular Ref not managed yet!")
else:
return ref
already_seen[id_c] = '_##_REF_##_'
cls = type(c)
instance = cls()
# update the metadata
for prop_name, prop_value in vars(c.get_metadata()).items():
if prop_name != "props":
setattr(instance.get_metadata(), prop_name, prop_value)
else:
setattr(instance.get_metadata(), prop_name, sheerka_deepcopy(prop_value))
# update the values
for prop_name, prop_value in c.values().items():
setattr(instance, prop_name, prop_value)
already_seen[id_c] = instance
return instance
if isinstance(obj, CustomType):
return obj
elif isinstance(obj, dict):
res = {sheerka_deepcopy(k): sheerka_deepcopy(v) for k, v in obj.items()}
return res
elif isinstance(obj, list):
return [sheerka_deepcopy(item) for item in obj]
elif isinstance(obj, set):
return {sheerka_deepcopy(item) for item in obj}
elif isinstance(obj, tuple):
return tuple((sheerka_deepcopy(item) for item in obj))
elif isinstance(obj, Concept):
return copy_concept(obj)
else:
return deepcopy(obj)
def str_concept(t, drop_name=None, prefix="c:"):
"""
The key,id identifiers of a concept are stored in a tuple
we want to return the key and the id, separated by a pipe
None value must be replaced by an empty string
>>> assert str_concept(("key", "id")) == "c:key#id:"
>>> assert str_concept((None, "id")) == "c:#id:"
>>> assert str_concept(("key", None)) == "c:key:"
>>> assert str_concept((None, None)) == ""
>>> assert str_concept(Concept(name="foo", id="bar")) == "c:foo#bar:"
>>> assert str_concept(Concept(name="foo", id="bar"), drop_name=True) == "c:#bar:"
>>> assert str_concept(("key", "id"), prefix='r:') == "r:key|id:"
:param t:
:param drop_name: True if we only want the id (and not the key)
:param prefix:
:return:
"""
if isinstance(t, tuple):
name, id_ = t[0], t[1]
else:
name, id_ = t.name, t.id
if name is None and id_ is None:
return ""
result = prefix if (name is None or drop_name) else prefix + name
if id_:
result += "#" + id_
return result + ":"
def unstr_concept(concept_repr, prefix='c:'):
"""
if concept_repr is like :c:key:id:
return the key and the id
>>> assert unstr_concept("c:key:") == ("key", None)
>>> assert unstr_concept("c:key#id:") == ("key", "id")
>>> assert unstr_concept("c:#id:") == ("None", "id")
>>> assert unstr_concept("c:key#:") == ("key", "None")
>>> assert unstr_concept("r:key#id:", prefix='r:') == ("key", "id")
>>> # Otherwise, return (None,None)
:param concept_repr:
:return:
"""
if not (concept_repr and
isinstance(concept_repr, str) and
concept_repr.startswith(prefix) and
concept_repr.endswith(":")):
return None, None
i = 2
length = len(concept_repr)
key = ""
while i < length:
c = concept_repr[i]
i += 1
if c in (":", "#"):
break
key += c
else:
return None, None
if c == ":":
return key if key != "" and i == length else None, None
c_id = ""
while i < length:
c = concept_repr[i]
i += 1
if c == ":":
break
c_id += c
else:
return None, None
if i != length:
return None, None
return key if key != "" else None, c_id if c_id != "" else None
def compute_hash(obj):
"""
Helper to get the hash from collection
:param obj:
:type obj:
:return:
:rtype:
"""
try:
if isinstance(obj, (list, tuple)):
return hash(tuple([compute_hash(o) for o in obj]))
if isinstance(obj, set):
return hash(tuple([compute_hash(o) for o in sorted(list(obj))]))
if isinstance(obj, dict):
return hash(repr(obj))
return hash(obj)
except:
return 0
def decode_enum(enum_repr: str):
"""
Tries to transform ClassName.Name into an enum
:param enum_repr:
:return:
"""
if not (enum_repr and isinstance(enum_repr, str)):
return None
try:
idx = enum_repr.rindex(".")
if idx == len(enum_repr):
return None
cls_name = enum_repr[:idx]
cls = get_class(cls_name)
name = enum_repr[idx + 1:]
return cls[name]
except ValueError:
return None
except TypeError:
return None
def to_dict(items, get_attr):
"""
Create a dictionary from a list when duplicates keys are merged into lists
:param items:
:type items:
:param get_attr:
:type get_attr:
:return:
:rtype:
"""
res = {}
for item in items:
res.setdefault(get_attr(item), []).append(item)
return res
+7
View File
@@ -0,0 +1,7 @@
class BuiltinConcepts:
SHEERKA = "__SHEERKA"
NEW_CONCEPT = "__NEW_CONCEPT"
UNKNOWN_CONCEPT = "__UNKNOWN_CONCEPT"
USER_INPUT = "__USER_INPUT"
PARSER_INPUT = "__PARSER_INPUT"
+35
View File
@@ -0,0 +1,35 @@
from common.utils import compute_hash
from core.ExecutionContext import ExecutionContext
class SheerkaException(Exception):
pass
class ErrorContext:
"""
This class represents the result of a data flow processing
"""
def __init__(self, who: str, context: ExecutionContext, value: object = None):
self.who = who
self.context = context
self.value = value
self.parents = None
def __repr__(self):
return f"Error(who={self.who}, context_id={self.context.long_id}, value={self.value})"
def __eq__(self, other):
if id(self) == id(other):
return True
if not isinstance(other, ErrorContext):
return False
return self.who == other.who and \
self.context.id == other.context.id and \
self.value == other.value
def __hash__(self):
return hash((self.who, self.context.id, compute_hash(self.value)))
+69
View File
@@ -0,0 +1,69 @@
import hashlib
from datetime import datetime
class Event(object):
"""
Class that represents something that modifies the state of the system
"""
def __init__(self, message="", user_id="", date=None, parents=None):
self.user_id: str = user_id # id of the user that triggers the modification
self.date: datetime | None = date or datetime.now() # when
self.message: str = message # user input or whatever that modifies the system
self.parents: list[str] = parents # digest(s) of the parent(s) of this event
self._digest: str | None = None # digest of the event
def __str__(self):
return f"{self.date.strftime('%d/%m/%Y %H:%M:%S')} {self.message}"
def __repr__(self):
return f"{self.get_digest()[:12]} {self.message}"
def get_digest(self):
"""
Returns the digest of the event
:return: sha256 of the event
"""
if self._digest:
return self._digest
if self.user_id == "":
# only possible during the unit test
# We use this little trick to speed up the unit test
self._digest = self.message[6:] if self.message.startswith("TEST::") else "xxx"
return self._digest
if not isinstance(self.message, str):
raise NotImplementedError(f"message={self.message}")
to_hash = f"Event:{self.user_id}{self.date}{self.message}{self.parents}".encode("utf-8")
self._digest = hashlib.sha256(to_hash).hexdigest()
return self._digest
def to_dict(self):
return self.__dict__
def from_dict(self, as_dict):
self.user_id = as_dict["user_id"]
self.date = datetime.fromisoformat(as_dict["date"])
self.message = as_dict["message"]
self.parents = as_dict["parents"]
self._digest = as_dict["_digest"] # freeze the digest
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, Event):
return (self.user_id == other.user_id and
self.date == other.date and
self.message == other.message and
self.parents == other.parents)
return False
def __hash__(self):
return hash(self.get_digest())
+180 -1
View File
@@ -1,2 +1,181 @@
from __future__ import annotations
import time
from core.Event import Event
class ExecutionContextActions:
TESTING = "Testing"
INIT_SHEERKA = "Init Sheerka"
EVALUATE_USER_INPUT = "Evaluate user input"
EVALUATING_STEP = "Evaluating step"
EVALUATING_ITERATION = "Evaluating iteration"
BEFORE_PARSING = "Before parsing"
PARSING = "Parsing"
AFTER_PARSING = "After parsing"
BEFORE_EVALUATION = "Before evaluation"
EVALUATION = "Evaluation"
AFTER_EVALUATION = "After Evaluation"
class ContextHint:
REDUCE_CONCEPTS = "Reduce Concepts" # to tell the process to only keep the meaningful results
ids = {} # keep track of the next execution context id, for a given event id
def get_next_id(event_digest):
"""
For a given event, give the next id
:param event_digest:
:type event_digest:
:return:
:rtype:
"""
if event_digest in ids:
ids[event_digest] += 1
else:
ids[event_digest] = 0
return ids[event_digest]
class ExecutionContext:
pass
"""
To keep track of the execution of a request
Note that the protected hints are working correctly only if the hint is added BEFORE the creation of the child
"""
def __init__(self,
who: str,
event: Event,
sheerka,
action: ExecutionContextActions,
action_context: object,
desc: str = None,
logger=None,
global_hints=None,
protected_hints=None,
parent: ExecutionContext = None):
self._id = get_next_id(event.get_digest())
self._parent = parent
self._children = []
self._start = 0 # when the execution starts (to measure elapsed time)
self._stop = 0 # when the execution stops (to measure elapses time)
self._logger = logger
self.who = who # who is asking
self.event = event # what was the (original) trigger
self.sheerka = sheerka # sheerka
self.action = action
self.action_context = action_context
self.desc = desc # human description of what is going on
self.private_hints = set()
self.protected_hints = set() if protected_hints is None else protected_hints.copy()
self.global_hints = set() if global_hints is None else global_hints
self.inputs = {} # what were the parameters of the execution context
self.values = {} # what was produced by the execution context
def __repr__(self):
msg = f"ExecutionContext(who={self.who}, id={self._id}, action={self.action}, context={self.action_context}"
if self.desc:
msg += f", desc='{self.desc}'"
msg += ")"
return msg
def __eq__(self, other):
if id(self) == id(other):
return True
if not isinstance(other, ExecutionContext):
return False
return self.long_id == other.long_id
def __hash__(self):
return hash(self.long_id)
@property
def long_id(self):
return f"{self.event.get_digest()}:{self._id}"
@property
def id(self):
return self._id
@property
def elapsed(self):
if self._start == 0:
return 0
return (self._stop if self._stop > 0 else time.time_ns()) - self._start
@property
def elapsed_str(self):
nano_sec = self.elapsed
dt = nano_sec / 1e6
return f"{dt} ms" if dt < 1000 else f"{dt / 1000} s"
def add_inputs(self, **kwargs):
"""
When entering stacking an ExecutionContext, list of variable that are worth to trace
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self.inputs.update(kwargs)
return self
def add_values(self, **kwargs):
"""
When popping from an ExecutionContext, list of variable that are worth to trace
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self.values.update(kwargs)
return self
def push(self,
who: str,
action: ExecutionContextActions,
action_context: object,
desc: str = None,
logger=None):
child = ExecutionContext(
who,
self.event,
self.sheerka,
action,
action_context,
desc,
logger or self._logger,
self.global_hints,
self.protected_hints,
self
)
self._children.append(child)
return child
def get_children(self, level=-1):
"""
recursively look for children
:return:
:rtype:
"""
for child in self._children:
yield child
if level != 1:
yield from child.get_children(level - 1)
def __enter__(self):
self._start = time.time_ns()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self._stop = time.time_ns()
+32
View File
@@ -0,0 +1,32 @@
from typing import Any
from common.utils import compute_hash
class ReturnValue:
"""
This class represents the result of a data flow processing
"""
def __init__(self, who: str = None, status: bool = None, value: Any = None, parents: list = None):
self.who = who
self.status = status
self.value = value
self.parents = parents
def __repr__(self):
return f"ReturnValue(who={self.who}, status={self.status}, value={self.value})"
def __eq__(self, other):
if id(self) == id(other):
return True
if not isinstance(other, ReturnValue):
return False
return self.who == other.who and \
self.status == other.status and \
self.value == other.value
def __hash__(self):
return hash((self.who, self.status, compute_hash(self.value)))
+323 -1
View File
@@ -1,2 +1,324 @@
import inspect
import logging
import sys
from dataclasses import dataclass
from operator import attrgetter
from os import path
from typing import Callable
from caching.Cache import Cache
from caching.IncCache import IncCache
from common.utils import get_logger_name, get_sub_classes, import_module_and_sub_module
from core.BuiltinConcepts import BuiltinConcepts
from core.ErrorContext import ErrorContext
from core.Event import Event
from core.ExecutionContext import ContextHint, ExecutionContext, ExecutionContextActions
from core.ReturnValue import ReturnValue
from core.concept import Concept, ConceptMetadata
from ontologies.SheerkaOntologyManager import SheerkaOntologyManager
from server.authentication import User
EXECUTE_STEPS = [
ExecutionContextActions.BEFORE_PARSING,
ExecutionContextActions.PARSING,
ExecutionContextActions.AFTER_PARSING,
ExecutionContextActions.BEFORE_EVALUATION,
ExecutionContextActions.EVALUATION,
ExecutionContextActions.AFTER_EVALUATION
]
@dataclass
class SheerkaConfig:
"""
After each execution, persist the whole executions flow as a file
This file will be used by the debugger
"""
save_execution_context: bool = True
@dataclass
class SheerkaMethod:
"""
Wrapper to sheerka method, to indicate if it's safe to call
"""
name: str
service: str
method: Callable
has_side_effect: bool
def __repr__(self):
return self.name
def __hash__(self):
return hash((self.name, self.service))
class Sheerka:
pass
OBJECTS_IDS_ENTRY = "Objects_Ids"
CHICKEN_AND_EGG_CONCEPTS_ENTRY = "Chicken_And_Egg_Concepts"
def __init__(self):
"""
Engine of the so called Sheerka
"""
self.name = "Sheerka"
self.om: SheerkaOntologyManager = None
self.config = SheerkaConfig()
self.during_initialisation = False
self.log = logging.getLogger(get_logger_name(__name__))
self.init_log = logging.getLogger(get_logger_name("init." + __name__))
self.services = {} # sheerka plugins
self.evaluators = {} # cache for evaluators
self.sheerka_methods = {}
self.methods_with_context = set() # only the names, the method is defined in sheerka_methods
self.global_context_hints = set()
def bind_service_method(self, service_name, bound_method, can_modify_state, as_name=None, visible=True):
"""
Bind service method to sheerka instance for ease of use ?
:param service_name:
:param bound_method:
:param can_modify_state: Can update the state of Sheerka => can produce side_effect
:param as_name: give another name to the method
:param visible: make the method visible to Sheerka
:return:
"""
if as_name is None:
as_name = bound_method.__name__
if visible:
signature = inspect.signature(bound_method)
if len(signature.parameters) > 0 and list(signature.parameters.keys())[0] == "context":
self.methods_with_context.add(as_name)
self.sheerka_methods[as_name] = SheerkaMethod(as_name, service_name, bound_method, can_modify_state)
setattr(self, bound_method.__name__, bound_method)
def initialize(self, root_folder: str = None, **kwargs):
"""
Starting Sheerka
Loads the current configuration
Notes that when it's the first time, it also creates the needed working folders
:param root_folder: root configuration folder
:return: ReturnValue(Success or Error)
"""
if root_folder is None:
root_folder = path.abspath(path.join(path.expanduser("~"), ".sheerka"))
self.initialize_logging(False, root_folder)
self.config.save_execution_context = kwargs.get("save_execution_context", self.config.save_execution_context)
try:
self.init_log.info("Starting Sheerka")
self.during_initialisation = True
# from sheerkapickle.sheerka_handlers import initialize_pickle_handlers
# initialize_pickle_handlers()
self.om = SheerkaOntologyManager(self, root_folder)
# self.builtin_cache, self.builtin_cache_by_class_name = self.get_builtins_classes_as_dict()
self.initialize_caching()
self.initialize_evaluators()
self.initialize_services()
# self.initialize_builtin_evaluators()
# self.om.init_subscriptions()
event = Event("Initializing Sheerka.", user_id=self.name)
self.om.save_event(event)
with ExecutionContext(self.name,
event,
self,
ExecutionContextActions.INIT_SHEERKA,
None,
desc="Initializing Sheerka.") as exec_context:
if self.om.current_sdp().first_time:
self.first_time_initialisation(exec_context)
self.initialize_services_deferred(exec_context, self.om.current_sdp().first_time)
res = ReturnValue(self.name, True, self.get_startup_config())
exec_context.add_values(return_values=res)
if self.om.is_dirty():
self.om.commit(exec_context)
if self.config.save_execution_context:
self.om.save_execution_context(exec_context, is_admin=True)
# append the other ontologies if needed
self.om.freeze()
self.initialize_ontologies(exec_context)
# self.init_log.debug(f"Sheerka successfully initialized")
except IOError as e:
res = ReturnValue(self.name, False, ErrorContext(self.name, exec_context, e))
finally:
self.during_initialisation = False
return res
@staticmethod
def initialize_logging(is_debug, root_folder):
if is_debug:
# log_format = "%(asctime)s %(name)s"
log_format = "[%(levelname)s] [%(name)s]"
log_format += " %(message)s"
log_level = logging.DEBUG
else:
log_format = "%(message)s"
log_level = logging.INFO
logging.basicConfig(format=log_format, level=log_level, handlers=[logging.StreamHandler(sys.stdout)])
logging.addLevelName(logging.ERROR, f"\033[1;41m%s\033[1;0m{logging.getLevelName(logging.ERROR)}")
def initialize_ontologies(self, context):
ontologies = self.om.current_sdp().load_ontologies()
if not ontologies:
return
for ontology_name in list(reversed(ontologies))[1:]:
self.om.push_ontology(ontology_name, False)
# self.initialize_services_deferred(context, False)
def first_time_initialisation(self, context):
pass
# self.record_var(context, self.name, "save_execution_context", self.save_execution_context)
def initialize_caching(self):
cache = IncCache().auto_configure(self.OBJECTS_IDS_ENTRY)
self.om.register_cache(self.OBJECTS_IDS_ENTRY, cache)
cache = Cache().auto_configure(self.CHICKEN_AND_EGG_CONCEPTS_ENTRY)
self.om.register_cache(self.CHICKEN_AND_EGG_CONCEPTS_ENTRY, cache, persist=False)
def initialize_services(self):
"""
Introspect to find services and bind them
:return:
"""
self.init_log.info("Initializing services")
import_module_and_sub_module('core.services')
base_class = "core.services.BaseService.BaseService"
services = [service(self) for service in get_sub_classes("core.services", base_class)]
services.sort(key=attrgetter("order"))
for service in services:
if hasattr(service, "initialize"):
service.initialize()
self.services[service.NAME] = service
self.init_log.info(f"{len(services)} service(s) found.")
def initialize_services_deferred(self, context, is_first_time):
"""
Initialize part of services that may take some time or that need the execution context
:return:
"""
self.init_log.debug(f"Initializing services (deferred, {is_first_time=})")
for service in self.services.values():
if hasattr(service, "initialize_deferred"):
service.initialize_deferred(context, is_first_time)
def initialize_evaluators(self):
self.init_log.info("Initializing evaluators")
base_class1 = "evaluators.base_evaluator.OneReturnValueEvaluator"
base_class2 = "evaluators.base_evaluator.AllReturnValuesEvaluator"
import_module_and_sub_module('evaluators')
evaluators = [evaluator() for evaluator in get_sub_classes("evaluators", base_class1)] + \
[evaluator() for evaluator in get_sub_classes("evaluators", base_class2)]
self.evaluators = {e.NAME: e for e in evaluators}
self.init_log.info(f"{len(evaluators)} evaluator(s) found.")
def bind_services_methods(self):
# init methods
# self.bind_service_method(self.name, self.test, False)
# self.bind_service_method(self.name, self.test_using_context, False)
# self.bind_service_method(self.name, self.test_dict, False)
# self.bind_service_method(self.name, self.test_error, False)
# self.bind_service_method(self.name, self.is_sheerka, False)
# self.bind_service_method(self.name, self.objvalue, False)
pass
def get_startup_config(self):
"""
Return a dictionary with current configuration, used for initialization
:return:
:rtype:
"""
return {
"config": self.config.__dict__
}
def publish(self, context, topic, data=None):
"""
To be removed as it must be part of the EventManager service
:param context:
:type context:
:param topic:
:type topic:
:param data:
:type data:
:return:
:rtype:
"""
pass
def evaluate_user_input(self, command: str, user: User):
self.log.info("Processing '%s' from '%s'", command, user.email)
event = Event(command, user_id=user.email)
self.om.save_event(event)
with ExecutionContext(user.email,
event,
self,
ExecutionContextActions.EVALUATE_USER_INPUT,
command,
desc=f"Evaluating '{command}'",
global_hints=self.global_context_hints.copy()) as exec_context:
user_input = ReturnValue(self.name, True, self.newn(BuiltinConcepts.USER_INPUT, command=command))
exec_context.private_hints.add(ContextHint.REDUCE_CONCEPTS)
# KSI : 2023-04-30
# Il me manque le execute et toute la classe SheerkaProcessUserInput
exec_context.add_inputs(user_input=user_input)
ret = self.execute(exec_context, [user_input], EXECUTE_STEPS)
exec_context.add_values(return_values=ret)
if self.om.is_dirty():
self.om.commit(exec_context)
return ret
def isinstance(self, a, b):
"""
Returns true if 'a' is a concept of type 'b'
Note that this function can be moved into ConceptManager
I keep it here for quick access
:param a:
:type a:
:param b:
:type b:
:return:
:rtype:
"""
if not isinstance(a, Concept):
return False
if isinstance(b, (Concept, ConceptMetadata)):
return a.id == b.id
if b.startswith("c:#"):
return a.id == b[3:-1]
return a.key == b
View File
+201
View File
@@ -0,0 +1,201 @@
from dataclasses import dataclass
from common.global_symbols import NotFound, NotInit
class ConceptDefaultProps:
"""
Lists metadata that can contains some code
"""
WHERE = "#where#"
PRE = "#pre#"
POST = "#post#"
BODY = "#body#"
RET = "#ret#"
DefaultProps = [v for k, v in ConceptDefaultProps.__dict__.items() if not k.startswith("_")]
class DefinitionType:
DEFAULT = "Default"
BNF = "Bnf"
@dataclass
class ConceptMetadata:
"""
Static information of the Concept
"""
id: str # unique identifier for a concept. The id will never be modified (but the key can)
name: str
key: str
is_builtin: bool
is_unique: bool
body: str # main method, can also be the value of the concept
where: str # condition to recognize variables in name
pre: str # list of preconditions before calling the main function
post: str # list of post conditions after calling the main function
ret: str # variable to return when a concept is recognized
definition: str # regex used to define the concept
definition_type: DefinitionType # definition can be done with something else than regex
desc: str # possible description for the concept
autouse: bool # indicates if eval must be automatically called on the concept once validated
bound_body: str # which property must be considered have default value for the concept
props: dict # hashmap of properties, values
variables: tuple # list of concept variables(tuple), with their default values
parameters: tuple # list of variables that are part of the name of the concept
digest: str = None
all_attrs: tuple = None
@dataclass
class ConceptRuntimeInfo:
"""
Dynamic information of the Concept
They are related to the instance of the concept
"""
is_evaluated: bool = False # True is the concept is evaluated by sheerka.eval_concept()
need_validation: bool = False # True if the properties of the concept need to be validated
recognized_by: str = None # RECOGNIZED_BY_ID, RECOGNIZED_BY_NAME, RECOGNIZED_BY_KEY (from Sheerka.py)
def copy(self):
return ConceptRuntimeInfo(self.is_evaluated,
self.need_validation,
self.recognized_by)
class Concept:
"""
Default concept object
A concept is the base object of our universe
Everything is a concept
"""
def __init__(self, metadata: ConceptMetadata):
self._metadata: ConceptMetadata = metadata
self._compiled = {} # cached ast for the where, pre, post and body parts and variables
self._compiled_context_hints = {} # context hints to use when evaluating compiled
self._bnf = None # compiled bnf expression
self._runtime_info = ConceptRuntimeInfo() # runtime settings for the concept
self._all_attrs = None
def __repr__(self):
text = f"({self._metadata.id}){self._metadata.name}"
if self._metadata.pre:
text += f", #pre={self._metadata.pre}"
for attr in [attr for attr in self.all_attrs() if not attr.startswith("#")]:
text += f", {attr}={self.get_value(attr)}"
return text
def __eq__(self, other):
# I don't want this test to be part of the recursion
# So let's just get rif ogf it
if not isinstance(other, Concept):
return False
# I chose to use an iterative algorithm in order to be able to spot circular reference
# without inner functions.
# I also think that it's a better approach for a function that can be massively called
stack = [self, other]
id_self = id(self)
while stack:
right = stack.pop()
left = stack.pop()
if id(left) == id(right):
return True
# 1. in order for two concepts to be equal, they must have the same definition
# 2. They must have the same properties and variables
if left.get_definition_digest() != right.get_definition_digest():
return False
if left.all_attrs() != right.all_attrs():
return False
for attr in left.all_attrs():
value = left.get_value(attr)
other_value = right.get_value(attr)
if isinstance(value, Concept) and isinstance(other_value, Concept):
if id(value) == id_self or id(other_value) == id_self:
# infinite recursion detected
pass
else:
stack.extend([value, other_value])
else:
if value != other_value:
return False
return True
def __hash__(self):
return self._metadata.digest
@property
def id(self):
return self._metadata.id
@property
def name(self):
return self._metadata.name
@property
def key(self):
return self._metadata.key
@property
def body(self):
return self.get_value(ConceptDefaultProps.BODY)
@property
def str_id(self):
return f"c:#{self.id}:" if self.id else f"c:{self.name}:"
def get_definition_digest(self):
return self._metadata.digest
def all_attrs(self):
if self._all_attrs is None:
return self._metadata.all_attrs
return self._all_attrs
def get_metadata(self) -> ConceptMetadata:
return self._metadata
def set_value(self, name: str, value: object):
"""
Set the resolved value of a metadata or a variable (not the metadata itself)
:param name:
:param value:
:return:
"""
setattr(self, name, value)
if name == self._metadata.bound_body:
setattr(self, ConceptDefaultProps.BODY, value)
elif self._metadata.bound_body and name == ConceptDefaultProps.BODY:
setattr(self, self._metadata.bound_body, value)
return self
def get_value(self, name: str):
"""
Gets the resolved value of a metadata
:param name:
:return:
"""
try:
return getattr(self, name)
except AttributeError:
return NotInit if name in self.all_attrs() else NotFound
def get_runtime_info(self):
return self._runtime_info
-31
View File
@@ -1,31 +0,0 @@
class CustomType:
"""
Base class for custom types used in Sheerka
A custom type is a type that has only one instance across the application and have a semantic meaning
For example the type 'None' is a singleton which have a semantic meaning.
We need to define others in Sheerka
"""
def __init__(self, value):
self.value = value
def __repr__(self):
return self.value
def __eq__(self, other):
return isinstance(other, CustomType) and self.value == other.value
def __hash__(self):
return hash(self.value)
class NotFoundType(CustomType):
"""
Using when an entry in not found in Cache or in sdp
"""
def __init__(self):
super(NotFoundType, self).__init__("**NotFound**")
NotFound = NotFoundType()
+49
View File
@@ -0,0 +1,49 @@
from common.global_symbols import NotFound
from common.utils import sheerka_deepcopy
from core.Sheerka import Sheerka
class BaseService:
"""
Base class for services
"""
def __init__(self, sheerka: Sheerka, order=999):
self.sheerka = sheerka
self.order = order # initialisation order. The lowest is initialized first
def initialize(self):
"""
Adds cache or bind methods
:return:
"""
pass
def state_properties(self):
pass
def push_state(self, context):
"""
Use variable Manager to store the state of the service
"""
args = self.state_properties()
if args:
for prop_name in args:
self.sheerka.record_var(context, self.NAME, prop_name, sheerka_deepcopy(getattr(self, prop_name)))
def pop_state(self):
"""
Use Variable Manager to restore the state of a service
:return:
"""
args = self.state_properties()
if args:
for prop_name in args:
if (value := self.sheerka.load_var(self.NAME, prop_name)) is not NotFound:
setattr(self, prop_name, value)
def store_var(self, context, var_name):
"""
Store/record the value of an attribute
"""
self.sheerka.record_var(context, self.NAME, var_name, getattr(self, var_name))
+343
View File
@@ -0,0 +1,343 @@
import hashlib
import logging
from dataclasses import dataclass
from caching.Cache import Cache
from caching.FastCache import FastCache
from caching.ListIfNeededCache import ListIfNeededCache
from common.global_symbols import NotFound, NotInit, VARIABLE_PREFIX
from common.utils import get_logger_name
from core.BuiltinConcepts import BuiltinConcepts
from core.ErrorContext import ErrorContext, SheerkaException
from core.ExecutionContext import ExecutionContext
from core.ReturnValue import ReturnValue
from core.concept import Concept, ConceptMetadata, DefaultProps, DefinitionType
from core.services.BaseService import BaseService
from parsers.tokenizer import TokenKind, Tokenizer, strip_tokens
PROPERTIES_FOR_DIGEST = ("name", "key",
"definition", "definition_type",
"is_builtin", "is_unique",
"where", "pre", "post", "body", "ret",
"desc", "bound_body", "autouse", "props", "variables", "parameters")
@dataclass
class ConceptAlreadyDefined(SheerkaException):
concept: ConceptMetadata
already_defined_id: str
@dataclass
class InvalidBnf(SheerkaException):
bnf: str
@dataclass
class FirstItemError(SheerkaException):
pass
class ConceptManager(BaseService):
"""
The service is used for the administration of concepts
You can define new concept, modify or delete them
There are also function to help retrieve them easily (like first token cache)
Already instantiated concepts are managed by the Memory service
"""
NAME = "ConceptManager"
USER_CONCEPTS_IDS = "User_Concepts_IDs" # incremented everytime a new concept is created
CONCEPTS_BY_ID_ENTRY = "ConceptManager:Concepts_By_ID" # to store all the concepts
CONCEPTS_BY_KEY_ENTRY = "ConceptManager:Concepts_By_Key"
CONCEPTS_BY_NAME_ENTRY = "ConceptManager:Concepts_By_Name"
CONCEPTS_BY_HASH_ENTRY = "ConceptManager:Concepts_By_Hash" # sto
def __init__(self, sheerka):
super().__init__(sheerka, order=11)
self.log = logging.getLogger(get_logger_name(__name__))
self.init_log = logging.getLogger(get_logger_name("init." + __name__))
self.bnf_expr_cache = FastCache()
def initialize(self):
self.init_log.debug(f"Initializing ConceptManager, order={self.order}")
self.sheerka.bind_service_method(self.NAME, self.define_new_concept, True)
self.sheerka.bind_service_method(self.NAME, self.newn, True)
self.sheerka.bind_service_method(self.NAME, self.newi, True)
register_concept_cache = self.sheerka.om.register_concept_cache
# Cache of concept metadata, organized by id
cache = Cache().auto_configure(self.CONCEPTS_BY_ID_ENTRY)
register_concept_cache(self.CONCEPTS_BY_ID_ENTRY, cache, lambda c: c.id, True)
cache = ListIfNeededCache().auto_configure(self.CONCEPTS_BY_KEY_ENTRY)
register_concept_cache(self.CONCEPTS_BY_KEY_ENTRY, cache, lambda c: c.key, True)
cache = ListIfNeededCache().auto_configure(self.CONCEPTS_BY_NAME_ENTRY)
register_concept_cache(self.CONCEPTS_BY_NAME_ENTRY, cache, lambda c: c.name, True)
cache = ListIfNeededCache().auto_configure(self.CONCEPTS_BY_HASH_ENTRY)
register_concept_cache(self.CONCEPTS_BY_HASH_ENTRY, cache, lambda c: c.digest, True)
def initialize_deferred(self, context, is_first_time):
if is_first_time:
self.sheerka.om.put(self.sheerka.OBJECTS_IDS_ENTRY, self.USER_CONCEPTS_IDS, 1000)
_ = self._create_builtin_concept
_(1, BuiltinConcepts.SHEERKA, desc="Sheerka")
_(2, BuiltinConcepts.NEW_CONCEPT, desc="On new concept creation", variables=("metadata",))
_(3, BuiltinConcepts.UNKNOWN_CONCEPT, desc="Unknown concept", variables=("requested_name", "requested_id"))
_(4, BuiltinConcepts.USER_INPUT, desc="Any external input", variables=("command",))
_(5, BuiltinConcepts.PARSER_INPUT, desc="tokenized input", variables=("pi",))
self.init_log.debug('%s builtin concepts created',
len(self.sheerka.om.current_cache_manager().concept_caches))
def define_new_concept(self, context: ExecutionContext,
name: str,
is_builtin: bool = False, # is the concept defined Sheerka
is_unique: bool = False, # is the concept a singleton
body: str = "", # return value of the concept
where: str = "", # condition to recognize variables in name
pre: str = "", # list of preconditions before calling the main function
post: str = "", # list of post conditions after calling the main function
ret: str = "", # variable to return when a concept is recognized
definition: str = "", # regex used to define the concept
definition_type: DefinitionType = DefinitionType.DEFAULT,
autouse: bool = False, # indicate if the concept must be automatically evaluated
bound_body: str = None, #
desc: str = "", # possible description for the concept
props: dict = None, # hashmap of default properties
variables: list = None, # list of concept variables(tuple), with their default values
parameters: list = None # list of variables that are part of the name of the concept
) -> ReturnValue:
"""
Adds the definition of a new concept
:return:
:rtype:
"""
concept_key = self.create_concept_key(name, definition, variables)
concept_id = "waiting for id"
metadata = ConceptMetadata(
concept_id,
name,
concept_key,
is_builtin,
is_unique,
body,
where,
pre,
post,
ret,
definition,
definition_type,
desc,
autouse,
bound_body,
props or {},
variables or (),
parameters or (),
)
digest = self.compute_metadata_digest(metadata)
if self.sheerka.om.exists_in_current(self.CONCEPTS_BY_HASH_ENTRY, digest):
already_defined = self.sheerka.om.get(self.CONCEPTS_BY_HASH_ENTRY, digest)
error = ErrorContext(self.NAME, context, ConceptAlreadyDefined(metadata, already_defined.id))
return ReturnValue(self.NAME, False, error)
metadata.digest = digest
metadata.all_attrs = self.compute_all_attrs(variables)
# bnf_expr = None
# if definition_type == DefinitionType.BNF:
# try:
# bnf_expr = self.compute_concept_bnf(definition)
# except InvalidBnf as ex:
# error = ErrorContext(self.NAME, context, ex)
# return ReturnValue(self.NAME, False, error)
# try:
# first_item_res = self.recompute_first_items(context, None, [metadata])
# except FirstItemError as ex:
# return ReturnValue(self.NAME, False, ex)
# at this point everything is fine. let's get the id and save everything
om = self.sheerka.om
metadata.id = str(self.sheerka.om.get(self.sheerka.OBJECTS_IDS_ENTRY, self.USER_CONCEPTS_IDS))
om.add_concept(metadata)
# self.update_first_items_caches(context, first_item_res)
# if bnf_expr:
# self.bnf_expr_cache.put(metadata.id, bnf_expr)
# # update references
# for ref in self.compute_references(bnf_expr):
# om.put(self.CONCEPTS_REFERENCES_ENTRY, ref, metadata.id)
return ReturnValue(self.NAME, True, self.newn(BuiltinConcepts.NEW_CONCEPT, metadata=metadata))
def newn(self, concept_name: str, **kwargs):
"""
new_by_name
Creates and returns an instance of a new concept by its name
:param concept_name:
:type concept_name:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
metadata = self.get_by_name(concept_name)
if metadata is NotFound:
return self._inner_new(self.get_by_name(BuiltinConcepts.UNKNOWN_CONCEPT), requested_name=concept_name)
if isinstance(metadata, list):
return [self._inner_new(m, **kwargs) for m in metadata]
return self._inner_new(metadata, **kwargs)
def newi(self, concept_id: str, **kwargs):
"""
new_by_id
Creates and returns an instance of a new concept by its id
:param concept_id:
:type concept_id:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
metadata = self.get_by_id(concept_id)
if metadata is NotFound:
return self._inner_new(self.get_by_name(BuiltinConcepts.UNKNOWN_CONCEPT), requested_id=concept_id)
return self._inner_new(metadata, **kwargs)
def get_by_name(self, key: str):
"""
Returns a concept metadata, using its name
:param key:
:type key:
:return:
:rtype:
"""
return self.sheerka.om.get(self.CONCEPTS_BY_NAME_ENTRY, key)
def get_by_id(self, concept_id: str):
"""
Returns a concept metadata, using its name
:param concept_id:
:type concept_id:
:return:
:rtype:
"""
return self.sheerka.om.get(self.CONCEPTS_BY_ID_ENTRY, concept_id)
def get_by_key(self, key: str):
"""
Returns a concept metadata, using its name
:param key:
:type key:
:return:
:rtype:
"""
return self.sheerka.om.get(self.CONCEPTS_BY_KEY_ENTRY, key)
@staticmethod
def compute_metadata_digest(metadata: ConceptMetadata):
"""
Compute once for all the digest of the definition of a concept
:param metadata:
:type metadata:
:return:
:rtype:
"""
as_dict = {p: getattr(metadata, p) for p in PROPERTIES_FOR_DIGEST}
return hashlib.sha256(f"{as_dict}".encode("utf-8")).hexdigest()
@staticmethod
def compute_all_attrs(variables: tuple | None):
"""
Compute the list of available attributes for a concept
:param variables:
:return:
:rtype:
"""
all_attrs = DefaultProps.copy()
if variables:
all_attrs += [k for k, v in variables]
return tuple(all_attrs)
@staticmethod
def compute_concept_bnf(definition):
pass
@staticmethod
def create_concept_key(name: str, definition: str | None, variables: tuple | None):
"""
Creates the key from the definition
:param name:
:type name:
:param definition:
:type definition:
:param variables:
:type variables:
:return:
:rtype:
"""
definition_to_use = definition or name
tokens = list(Tokenizer(definition_to_use, yield_eof=False))
if variables is None or len(strip_tokens(tokens, True)) == 1:
variables_to_use = []
else:
variables_to_use = [k for k, v in variables]
parts = []
for token in tokens:
if token.type == TokenKind.WHITESPACE:
continue
if token.value in variables_to_use:
parts.append(VARIABLE_PREFIX + str(variables_to_use.index(token.value)))
else:
parts.append(token.value)
return " ".join(parts)
def _create_builtin_concept(self, concept_id: int, name: str, desc: str, variables: tuple = ()):
variables_to_use = tuple((k, NotInit) for k in variables)
concept_key = self.create_concept_key(name, None, variables_to_use)
metadata = ConceptMetadata(
str(concept_id),
name,
concept_key,
True,
False,
"",
"",
"",
"",
"",
"",
DefinitionType.DEFAULT,
desc,
False,
variables[0] if variables else "",
{},
variables_to_use,
variables,
)
metadata.digest = self.compute_metadata_digest(metadata)
metadata.all_attrs = self.compute_all_attrs(variables_to_use)
self.sheerka.om.add_concept(metadata)
@staticmethod
def _inner_new(_metadata_def: ConceptMetadata, **kwargs):
concept = Concept(_metadata_def)
for k, v in kwargs.items():
concept.set_value(k, v)
return concept
+223
View File
@@ -0,0 +1,223 @@
from dataclasses import dataclass
from common.utils import to_dict
from core.ExecutionContext import ExecutionContext, ExecutionContextActions
from core.ReturnValue import ReturnValue
from core.services.BaseService import BaseService
from evaluators.base_evaluator import AllReturnValuesEvaluator, BaseEvaluator, OneReturnValueEvaluator
@dataclass
class EvaluationPlan:
sorted_priorities: list[int] # list of available priorities
evaluators: dict[int, list[BaseEvaluator]]
class SheerkaEngine(BaseService):
"""
This service is used to process user input
It is responsible to parse and evaluate the information
It also holds the rule engine
"""
NAME = "Engine"
def __init__(self, sheerka):
super().__init__(sheerka, order=15)
self.execution_plan = None # { ExecutionContextActions : { priority : [evaluators] }}
self.no_evaluation_plan = EvaluationPlan([], {})
def initialize(self):
self.execution_plan = self.compute_execution_plan(self.sheerka.evaluators.values())
self.sheerka.bind_service_method(self.NAME, self.execute, True)
def call_evaluators(self,
context: ExecutionContext,
return_values: list[ReturnValue],
step: ExecutionContextActions):
"""
Calls all evaluators defined for a given step
:param context:
:type context:
:param return_values:
:type return_values:
:param step:
:type step:
:return:
:rtype:
"""
plan = self.get_evaluation_plan(context, step)
iteration = 0
while True:
with context.push(self.NAME,
ExecutionContextActions.EVALUATING_ITERATION,
{"step": step, "iteration": iteration},
desc=f"iteration #{iteration}") as iteration_context:
simple_digest = return_values.copy()
iteration_context.add_inputs(return_values=simple_digest)
for priority in plan.sorted_priorities:
return_values_copy = return_values.copy()
new_return_values = {}
return_values_to_delete = set()
for evaluator in plan.evaluators[priority]:
sub_context_desc = f"Evaluating using {evaluator.name} ({priority=})"
with iteration_context.push(self.NAME,
step,
{"step": step,
"iteration": iteration,
"evaluator": evaluator.name},
desc=sub_context_desc) as evaluator_context:
evaluator_context.add_inputs(return_values=return_values_copy)
# process evaluators that work on one simple return value at the time
if isinstance(evaluator, OneReturnValueEvaluator):
self.call_one_return_value_evaluator(evaluator_context,
evaluator,
return_values_copy,
new_return_values,
return_values_to_delete)
# process evaluators that work on all return values
else:
self.call_all_return_values_evaluator(evaluator_context,
evaluator,
return_values_copy,
new_return_values,
return_values_to_delete)
# Recreate the new return_value
# Try to keep the order of what replaces what
return_values = []
for item in return_values_copy:
if item not in return_values_to_delete:
return_values.append(item)
if item in new_return_values:
return_values.extend(new_return_values[item])
iteration_context.add_values(return_values=return_values.copy())
iteration += 1
if simple_digest == return_values:
# I can use a variable like 'has_changed', but I think that this comparison is explicit
# It explains that I stay in the loop if something was modified
break
return return_values
def execute(self,
context: ExecutionContext,
return_values: list[ReturnValue],
steps: list[ExecutionContextActions]):
"""
Runs the processing engine on the return_values
:param context:
:type context:
:param return_values:
:type return_values:
:param steps:
:type steps:
:return:
:rtype:
"""
for step in steps:
copy = return_values.copy()
with context.push(self.NAME, ExecutionContextActions.EVALUATING_STEP, {"step": step}) as sub_context:
sub_context.add_inputs(return_values=copy)
return_values = self.call_evaluators(sub_context, return_values, step)
sub_context.add_values(return_values=return_values)
sub_context.add_values(has_changed=(copy != return_values))
return return_values
def get_evaluation_plan(self, context: ExecutionContext, step: ExecutionContextActions) -> EvaluationPlan:
if step not in self.execution_plan:
return self.no_evaluation_plan
evaluators = self.execution_plan[step]
return EvaluationPlan(sorted(evaluators.keys(), reverse=True), evaluators)
@staticmethod
def call_one_return_value_evaluator(context: ExecutionContext,
evaluator: OneReturnValueEvaluator,
return_values: list[ReturnValue],
new_return_values: dict[ReturnValue, list[ReturnValue]],
return_values_to_delete: set[ReturnValue]):
"""
:param context:
:type context:
:param evaluator:
:type evaluator:
:param return_values:
:type return_values:
:param new_return_values:
:type new_return_values:
:param return_values_to_delete:
:type return_values_to_delete:
:return:
:rtype:
"""
context_trace = []
for item in return_values:
debug = {"item": item}
context_trace.append(debug)
m = evaluator.matches(context, item)
debug["match"] = m.status
if m.status:
result = evaluator.eval(context, m.obj, item)
return_values_to_delete.update(result.eaten)
new_return_values.setdefault(item, []).extend(result.new)
debug["new"] = result.new
debug["eaten"] = result.eaten
context.add_values(evaluation=context_trace)
@staticmethod
def call_all_return_values_evaluator(context: ExecutionContext,
evaluator: AllReturnValuesEvaluator,
return_values: list[ReturnValue],
new_return_values: dict[ReturnValue, list[ReturnValue]],
return_values_to_delete: set[ReturnValue]):
"""
:param context:
:type context:
:param evaluator:
:type evaluator:
:param return_values:
:type return_values:
:param new_return_values:
:type new_return_values:
:param return_values_to_delete:
:type return_values_to_delete:
:return:
:rtype:
"""
debug = {}
m = evaluator.matches(context, return_values)
debug["match"] = m.status
if m.status:
result = evaluator.eval(context, m.obj, return_values)
return_values_to_delete.update(result.eaten)
new_return_values.setdefault(result.new[0].parents[0], []).extend(result.new)
debug["new"] = result.new
debug["eaten"] = result.eaten
context.add_values(evaluation=debug)
@staticmethod
def compute_execution_plan(evaluators):
evaluators = [e for e in evaluators if e.enabled]
by_step = to_dict(evaluators, lambda e: e.step)
for k, v in by_step.items():
by_step[k] = to_dict(v, lambda e: e.priority)
return by_step
View File
-32
View File
@@ -1,32 +0,0 @@
def get_class(qname):
"""
Loads a class from its full qualified name
:param qname:
:return:
"""
parts = qname.split('.')
module = ".".join(parts[:-1])
m = __import__(module)
for comp in parts[1:]:
m = getattr(m, comp)
return m
def get_full_qualified_name(obj):
"""
Returns the full qualified name of a class (including its module name )
:param obj:
:return:
"""
if obj.__class__ == type:
module = obj.__module__
if module is None or module == str.__class__.__module__:
return obj.__name__ # Avoid reporting __builtin__
else:
return module + '.' + obj.__name__
else:
module = obj.__class__.__module__
if module is None or module == str.__class__.__module__:
return obj.__class__.__name__ # Avoid reporting __builtin__
else:
return module + '.' + obj.__class__.__name__
+30
View File
@@ -0,0 +1,30 @@
from core.BuiltinConcepts import BuiltinConcepts
from core.ErrorContext import ErrorContext
from core.ExecutionContext import ExecutionContext, ExecutionContextActions
from core.ReturnValue import ReturnValue
from evaluators.base_evaluator import EvaluatorEvalResult, EvaluatorMatchResult, OneReturnValueEvaluator
from parsers.ParserInput import ParserInput
class CreateParserInput(OneReturnValueEvaluator):
NAME = "CreateParserInput"
def __init__(self):
super().__init__(self.NAME, ExecutionContextActions.BEFORE_EVALUATION, 50)
def matches(self, context: ExecutionContext, return_value: ReturnValue) -> EvaluatorMatchResult:
if return_value.status and \
context.sheerka.isinstance(return_value.value, BuiltinConcepts.USER_INPUT):
return EvaluatorMatchResult(True)
return EvaluatorMatchResult(False)
def eval(self, context, evaluator_context, return_value):
parser_input = ParserInput(return_value.value.get_value("command"))
if parser_input.init():
parser_input_concept = context.sheerka.newn(BuiltinConcepts.PARSER_INPUT, pi=parser_input)
new_ret_val = ReturnValue(self.NAME, True, parser_input_concept, parents=[return_value])
return EvaluatorEvalResult([new_ret_val], [return_value])
else:
error = ErrorContext(self.NAME, context, parser_input)
new_ret_val = ReturnValue(self.NAME, False, error, parents=[return_value])
return EvaluatorEvalResult([new_ret_val], [return_value])
View File
+73
View File
@@ -0,0 +1,73 @@
from dataclasses import dataclass
from core.ExecutionContext import ExecutionContext, ExecutionContextActions
from core.ReturnValue import ReturnValue
@dataclass
class EvaluatorMatchResult:
status: bool
obj: object = None
@dataclass
class EvaluatorEvalResult:
new: list[ReturnValue] = None
eaten: list[ReturnValue] = None
class BaseEvaluator:
"""
Base class to evaluate ReturnValues
"""
def __init__(self, name, step: ExecutionContextActions, priority: int, enabled=True):
self.name = name
self.step = step
self.priority = priority
self.enabled = enabled
def __repr__(self):
return f"{self.name} ({self.priority})"
def __eq__(self, other):
if not isinstance(other, BaseEvaluator):
return False
return self.name == other.name and \
self.priority == other.priority and \
self.step == other.step and \
self.enabled == other.enabled
def __hash__(self):
return hash((self.name, self.priority, self.step, self.enabled))
class OneReturnValueEvaluator(BaseEvaluator):
"""
Evaluate one specific return value
"""
def matches(self, context: ExecutionContext,
return_value: ReturnValue) -> EvaluatorMatchResult:
pass
def eval(self, context: ExecutionContext,
evaluation_context: object,
return_value: ReturnValue) -> EvaluatorEvalResult:
pass
class AllReturnValuesEvaluator(BaseEvaluator):
"""
Evaluates the groups of ReturnValues
"""
def matches(self, context: ExecutionContext,
return_values: list[ReturnValue]) -> EvaluatorMatchResult:
pass
def eval(self, context: ExecutionContext,
evaluation_context: object,
return_values: list[ReturnValue]) -> EvaluatorEvalResult:
pass
+37
View File
@@ -0,0 +1,37 @@
class OntologyManagerFrozen(Exception):
"""
Raised when you try to add a cache manager while the ontology manager is frozen
"""
pass
class OntologyManagerNotFrozen(Exception):
"""
Raised when you try to push or pop a cache manager while the ontology manager is not frozen
"""
pass
class OntologyManagerCannotPopLatest(Exception):
"""
Raised when you try pop the latest cache manager
"""
pass
class OntologyAlreadyExists(Exception):
"""
When the ontology exists AND is not the top layer
"""
def __init__(self, name):
self.name = name
class OntologyNotFound(Exception):
"""
During revert(), when the requested ontology does not exist
"""
def __init__(self, ontology):
self.ontology = ontology
+534
View File
@@ -0,0 +1,534 @@
from caching.Cache import Cache
from caching.CacheManager import CacheManager
from caching.DictionaryCache import DictionaryCache
from caching.SetCache import SetCache
from common.global_symbols import EVENT_CONCEPT_ID_DELETED, \
EVENT_RULE_ID_DELETED, NotFound, \
Removed
from ontologies.Exceptions import OntologyAlreadyExists, OntologyManagerCannotPopLatest, OntologyManagerFrozen, \
OntologyManagerNotFrozen, OntologyNotFound
from sdp.sheerkaDataProvider import SheerkaDataProvider
class AlternateSdp:
"""
Other Cache managers that are linked together
"""
def __init__(self, ontologies):
self.names = [o.name for o in ontologies]
self.cache_managers = [o.cache_manager for o in ontologies]
def get(self, cache_name: str, key: str):
last = len(self.cache_managers) - 1
for i, cache_manager in enumerate(self.cache_managers):
value = cache_manager.get(cache_name, key)
if value is not NotFound:
return value
if i != last:
# forget than the key was requested
cache_manager.remove_initialized_key(cache_name, key)
return NotFound
def alt_get(self, cache_name: str, key: str):
last = len(self.cache_managers) - 1
for i, cache_manager in enumerate(self.cache_managers):
value = cache_manager.alt_get(cache_name, key)
if value is not NotFound:
return value
if i != last:
# forget than the key was requested
cache_manager.remove_initialized_key(cache_name, key)
return NotFound
def exists(self, cache_name: str, key: str):
for cache_manager in self.cache_managers:
if cache_manager.exists(cache_name, key):
return True
return False
class Ontology:
"""
an Ontology is
* a CacheManager (which is linked to a sdp)
* and a way to get to the next Ontologies (as AlternateSdp)
"""
def __init__(self, name, depth, cache_manager: CacheManager, alt_sdp: AlternateSdp | None):
self.name = name
self.depth = depth
self.cache_manager = cache_manager
self.alt_sdp = alt_sdp
self.concepts_attributes = None
def __repr__(self):
return f"Ontology('{self.name}')"
def __eq__(self, other):
if not isinstance(other, Ontology):
return False
return self.name == other.name and self.depth == other.depth
def __hash__(self):
return hash((self.name, self.depth))
class SheerkaOntologyManager:
ROOT_ONTOLOGY_NAME = "__default__"
SELF_CACHE_MANAGER = "__ontology_manager__" # cache to store SheerkaOntologyManager info
CONCEPTS_BY_ONTOLOGY_ENTRY = "ConceptsByOntologyEntry"
RULES_BY_ONTOLOGY_ENTRY = "RulesByOntologyEntry"
ONTOLOGY_BY_CONCEPT_ENTRY = "OntologyByConceptEntry"
ONTOLOGY_BY_RULE_ENTRY = "OntologyByRuleEntry"
def __init__(self, sheerka, root_folder):
self.sheerka = sheerka
self.root_folder = root_folder
self.frozen = False
# We create the first cache manager
# For the time being, this there is no AltSdp yet !
ref_cache_manager = CacheManager(sdp=SheerkaDataProvider(root_folder, self.sheerka))
self.ontologies = [Ontology(self.ROOT_ONTOLOGY_NAME, 0, ref_cache_manager, None)]
# Ontology manager also needs to keep track of its own stuff
# So we create a separate sdp
internal_sdp = SheerkaDataProvider(root_folder, self.sheerka, self.SELF_CACHE_MANAGER)
self.internal_cache_manager = CacheManager(sdp=internal_sdp)
# add cache to track all the concepts defined under a given ontology
# key : name of the ontology
# value : sets of all concepts id
cache = SetCache(max_size=None).auto_configure(self.CONCEPTS_BY_ONTOLOGY_ENTRY)
self.internal_cache_manager.register_cache(self.CONCEPTS_BY_ONTOLOGY_ENTRY, cache)
# add cache to track all the rules defined under a given ontology
# key : name of the ontology
# value : sets of all rules id
cache = SetCache(max_size=None).auto_configure(self.RULES_BY_ONTOLOGY_ENTRY)
self.internal_cache_manager.register_cache(self.RULES_BY_ONTOLOGY_ENTRY, cache)
# add cache to track where the concept is created (under which ontology)
# key : concept id
# value : name of the ontology
cache = Cache(max_size=None).auto_configure(self.ONTOLOGY_BY_CONCEPT_ENTRY)
self.internal_cache_manager.register_cache(self.ONTOLOGY_BY_CONCEPT_ENTRY, cache)
# add cache to track where the rule is created (under which ontology)
# key : rule id
# value : name of the ontology
cache = Cache(max_size=None).auto_configure(self.ONTOLOGY_BY_RULE_ENTRY)
self.internal_cache_manager.register_cache(self.ONTOLOGY_BY_RULE_ENTRY, cache)
@property
def ontologies_names(self):
return [o.name for o in self.ontologies]
def freeze(self):
"""
Once frozen, you can no longer register cache
It's to ensure consistency between ontologies
The same caches must exist for all ontologies
if an information is not found in a cache manager, we can request to the parent ontology.
This is possible only if the same cache exists in the parent ontology.
To ensure that.
1. You `register_cache()` all the caches
2. You call `freeze()`
You won't be able to `add` or `push` new ontologies if the `freeze()` is not called
:return:
:rtype:
"""
self.frozen = True
return self
def push_ontology(self, name) -> Ontology:
"""
Add an ontology layer
:param name: name of the layer
:param cache_only:
"""
if not self.frozen:
raise OntologyManagerNotFrozen()
# pseudo clone cache manager
cache_manager = CacheManager(sdp=self.get_sdp(name))
for cache_name, cache_def in self.current_cache_manager().caches.items():
clone = cache_def.cache.clone()
if cache_name in self.current_cache_manager().concept_caches:
cache_manager.register_concept_cache(cache_name, clone, cache_def.get_key, cache_def.use_ref)
else:
cache_manager.register_cache(cache_name, clone, cache_def.persist, cache_def.use_ref)
# Dictionary cache special treatment
if isinstance(clone, DictionaryCache):
clone.put(False, cache_def.cache.copy()) # only a shadow copy for now
clone.reset_events()
alt_sdp = AlternateSdp(self.ontologies)
new_ontology = Ontology(name, len(self.ontologies), cache_manager, alt_sdp)
self.ontologies.insert(0, new_ontology)
return new_ontology
def pop_ontology(self, context):
"""
Remove the top ontology layer
"""
if not self.frozen:
raise OntologyManagerNotFrozen()
if len(self.ontologies) == 1:
raise OntologyManagerCannotPopLatest()
# remove concepts and rules tracking for the ontology to pop
ontology_name = self.current_ontology().name
concepts = self.internal_cache_manager.get(self.CONCEPTS_BY_ONTOLOGY_ENTRY, ontology_name)
if concepts is not NotFound:
for concept in concepts:
self.sheerka.publish(context, EVENT_CONCEPT_ID_DELETED, concept)
self.internal_cache_manager.delete(self.ONTOLOGY_BY_CONCEPT_ENTRY, concept)
self.internal_cache_manager.delete(self.CONCEPTS_BY_ONTOLOGY_ENTRY, ontology_name)
rules = self.internal_cache_manager.get(self.RULES_BY_ONTOLOGY_ENTRY, ontology_name)
if rules is not NotFound:
for rule in rules:
self.sheerka.publish(context, EVENT_RULE_ID_DELETED, rule)
self.internal_cache_manager.delete(self.ONTOLOGY_BY_RULE_ENTRY, rule)
self.internal_cache_manager.delete(self.RULES_BY_ONTOLOGY_ENTRY, ontology_name)
return self.ontologies.pop(0)
def add_ontology(self, ontology: Ontology):
"""
Put back a previously created ontology
:param ontology: how to get the items
"""
if not self.frozen:
raise OntologyManagerNotFrozen()
ontology.alt_sdp = AlternateSdp(self.ontologies)
self.ontologies.insert(0, ontology)
for cache_def in ontology.cache_manager.caches.values():
cache_def.cache.reset_initialized_keys()
return self
def revert_ontology(self, context, ontology) -> Ontology:
"""
Pop every ontology until the requested one is found.
The requested one is also popped
:param context:
:type context:
:param ontology:
:type ontology:
:return:
:rtype:
"""
if ontology not in self.ontologies:
raise OntologyNotFound(ontology)
while self.current_ontology() != ontology:
self.pop_ontology(context)
self.pop_ontology(context)
return self.current_ontology()
def get_ontology(self, name=None):
"""
Return the first ontology with the corresponding name
When no name is given, return the top ontology
"""
if name is None:
return self.ontologies[0]
for ontology in self.ontologies:
if ontology.name == name:
return ontology
raise KeyError(name)
def save_ontologies_names(self):
self.current_sdp().save_ontologies(self.ontologies_names)
def already_on_top(self, name):
"""
Returns True if the ontology 'name' is already on the top
Raises a OntologyAlreadyExists exception if the ontology exists, but not at the top
"""
if self.ontologies[0].name == name:
return True
if name in self.ontologies_names:
raise OntologyAlreadyExists(name)
return False
def record_sheerka_state(self):
"""
The current ontology can keep extra information
"""
pass
# # TODO persist these information ?
# self.current_ontology().concepts_attributes = copy_concepts_attrs()
def reset_sheerka_state(self):
pass
# if self.current_ontology().concepts_attributes is not None:
# load_concepts_attrs(self.current_ontology().concepts_attributes)
def current_cache_manager(self) -> CacheManager:
return self.ontologies[0].cache_manager
def current_sdp(self) -> SheerkaDataProvider:
return self.ontologies[0].cache_manager.sdp
def current_ontology(self) -> Ontology:
return self.ontologies[0]
def register_concept_cache(self, name, cache, get_key, use_ref):
"""
Define which type of cache along with how to compute the key
:param name:
:param cache:
:param get_key:
:param use_ref:
:return:
"""
if self.frozen:
raise OntologyManagerFrozen
return self.current_cache_manager().register_concept_cache(name, cache, get_key, use_ref)
def register_cache(self, name, cache, persist=True, use_ref=False):
"""
Define which type of cache along with how to compute the key
:param name:
:param cache:
:param persist:
:param use_ref:
:return:
"""
if self.frozen:
raise OntologyManagerFrozen
return self.current_cache_manager().register_cache(name, cache, persist, use_ref)
def add_concept(self, concept):
"""
We need multiple indexes to retrieve a concept
So the new concept is dispatched into multiple caches
:param concept:
:return:
"""
self.current_cache_manager().add_concept(concept, self.ontologies[0].alt_sdp)
# update internal states
self._on_concept_created(concept)
def update_concept(self, old, new):
"""
Update a concept.
:param old: old version of the concept
:param new: new version of the concept
:return:
"""
self.current_cache_manager().update_concept(old, new, self.ontologies[0].alt_sdp)
# update internal states
self._on_concept_deleted(old)
self._on_concept_created(new)
def remove_concept(self, concept):
"""
Remove a concept from all caches
:param concept:
:return:
"""
self.current_cache_manager().remove_concept(concept, self.ontologies[0].alt_sdp)
# update internal states
self._on_concept_deleted(concept)
def get(self, cache_name, key):
"""
Browses the ontologies, looking for the data 'key' in entry 'cache_name'
If a value is found in a low level cache, updates the top level one
:param cache_name:
:param key:
:return:
"""
value = self.current_cache_manager().get(cache_name, key, self.ontologies[0].alt_sdp)
return NotFound if value is Removed else value
def exists(self, cache_name, key):
"""
Browses the ontologies to check if the data 'key' is defined in entry 'cache_name'
:param cache_name:
:param key:
:return:
"""
for ontology in self.ontologies:
if ontology.cache_manager.exists(cache_name, key):
return True
return False
def exists_in_current(self, cache_name, key):
"""
Checks if the keys exists in the current ontology only
:param cache_name:
:type cache_name:
:param key:
:type key:
:return:
:rtype:
"""
return self.current_cache_manager().exists(cache_name, key)
def list(self, entry, cache_only=False):
"""
list all entries
"""
return list(self.get_all(entry, cache_only).values())
def get_all(self, entry, cache_only=False):
"""
Return all <key, value> from all ontologies
First look in sdp, then override with the cache, for all ontologies
:param entry: cache name / sdp entry
:param cache_only: Do no fetch data from remote sdp
"""
res = {}
for ontology in reversed(self.ontologies):
if not cache_only:
# get values from sdp
values = ontology.cache_manager.sdp.get(entry)
if values is Removed:
res.clear()
elif values is not NotFound:
for k, v in values.items():
if v is Removed:
del res[k]
else:
res[k] = v
# override with the values from cache
try:
cache = ontology.cache_manager.get_inner_cache(entry)
if cache.is_cleared():
res.clear()
for k in cache:
v = cache.alt_get(k) # Do not use get(), because of IncCache()
if v is Removed:
del res[k]
else:
res[k] = v
except KeyError:
pass
return res
def put(self, cache_name, key, value):
"""
Add to a cache
:param cache_name:
:param key:
:param value:
:return:
"""
return self.current_cache_manager().put(cache_name, key, value, self.ontologies[0].alt_sdp)
def delete(self, cache_name, key, value=None):
"""
Delete an entry
:param cache_name:
:param key:
:param value:
:return:
"""
return self.current_cache_manager().delete(cache_name, key, value, self.ontologies[0].alt_sdp)
def populate(self, cache_name, populate_function, get_key_function, reset_events=False, all_ontologies=False):
"""
Populate a specific cache with a bunch of items
:param cache_name:
:param populate_function: how to get the items
:param get_key_function: how to get the key, out of an item
:param reset_events: reset the to_add and to_remove events after the populate
:param all_ontologies: populate all ontology layers
:return:
"""
self.current_cache_manager().populate(cache_name, populate_function, get_key_function, reset_events)
if all_ontologies:
for ontology in self.ontologies[1:]:
ontology.cache_manager.populate(cache_name, populate_function, get_key_function, reset_events)
def copy(self, cache_name):
"""
get a copy the content of the top ontology layer
:param self:
:param cache_name:
:return:
"""
return self.current_cache_manager().caches[cache_name].cache.copy()
def commit(self, context):
"""
Persist all the caches into a physical persistence storage
:param context:
:return:
"""
self.internal_cache_manager.commit(context)
return self.current_cache_manager().commit(context)
def clear(self, cache_name=None):
return self.current_cache_manager().clear(cache_name)
def get_sdp(self, name=None):
"""
Return new instance of SheerkaDataProvider
"""
if name:
return SheerkaDataProvider(self.root_folder, self.sheerka, name)
else:
return self.current_sdp()
def save_event(self, event):
return self.current_sdp().save_event(event)
def save_execution_context(self, execution_context, is_admin):
return self.current_sdp().save_execution_context(execution_context, is_admin)
def is_dirty(self):
return self.current_cache_manager().is_dirty
def _on_concept_created(self, concept):
self.internal_cache_manager.put(self.CONCEPTS_BY_ONTOLOGY_ENTRY, self.current_ontology().name, concept.id)
self.internal_cache_manager.put(self.ONTOLOGY_BY_CONCEPT_ENTRY, concept.id, self.current_ontology().name)
def _on_concept_deleted(self, concept):
ontology_name = self.internal_cache_manager.get(self.ONTOLOGY_BY_CONCEPT_ENTRY, concept.id)
self.internal_cache_manager.delete(self.CONCEPTS_BY_ONTOLOGY_ENTRY, ontology_name, concept.id)
self.internal_cache_manager.delete(self.ONTOLOGY_BY_CONCEPT_ENTRY, concept.id)
def _on_rule_created(self, rule):
self.internal_cache_manager.put(self.RULES_BY_ONTOLOGY_ENTRY, self.current_ontology().name, rule.id)
self.internal_cache_manager.put(self.ONTOLOGY_BY_RULE_ENTRY, rule.id, self.current_ontology().name)
def _on_rule_deleted(self, rule):
ontology_name = self.internal_cache_manager.get(self.ONTOLOGY_BY_RULE_ENTRY, rule.id)
self.internal_cache_manager.delete(self.RULES_BY_ONTOLOGY_ENTRY, ontology_name, rule.id)
self.internal_cache_manager.delete(self.ONTOLOGY_BY_RULE_ENTRY, rule.id)
View File
+19
View File
@@ -0,0 +1,19 @@
from parsers.tokenizer import Tokenizer
class ParserInput:
def __init__(self, text, yield_oef=True):
self.original_text = text
self.yield_oef = yield_oef
self.tokens = None
self.exception = None
def init(self) -> bool:
try:
# the eof if forced, but will not be yield if not set to.
self.tokens = list(Tokenizer(self.original_text, yield_eof=True))
return True
except Exception as ex:
self.tokens = None
self.exception = ex
return False
View File
+569
View File
@@ -0,0 +1,569 @@
from dataclasses import dataclass, field
from enum import Enum
from common.global_symbols import VARIABLE_PREFIX
class TokenKind(Enum):
EOF = "eof"
WHITESPACE = "whitespace"
NEWLINE = "newline"
IDENTIFIER = "identifier"
CONCEPT = "concept"
RULE = "rule"
EXPR = "expression"
STRING = "string"
NUMBER = "number"
LPAR = "lpar"
RPAR = "rpar"
LBRACKET = "lbracket"
RBRACKET = "rbracket"
LBRACE = "lbrace"
RBRACE = "rbrace"
PLUS = "plus"
MINUS = "minus"
STAR = "star"
SLASH = "slash"
PERCENT = "percent"
COMMA = "comma" # ,
SEMICOLON = "semicolon" # ;
COLON = "colon" # :
DOT = "dot" # .
QMARK = "qmark"
VBAR = "vbar"
AMPER = "amper"
EQUALS = "="
AT = "at"
BACK_QUOTE = "bquote" # `
BACK_SLASH = "bslash" # \
CARAT = "carat" # ^
DOLLAR = "dollar" # $
EURO = "dollar" # €
STERLING = "steling" # £
EMARK = "emark" # !
GREATER = "greater" # >
LESS = "less" # <
HASH = "HASH" # #
TILDE = "tilde" # ~
UNDERSCORE = "underscore" # _
DEGREE = "degree" # °
QUOTE = "'" # '
WORD = "word"
EQUALSEQUALS = "=="
STARSTAR = "**"
SLASHSLASH = "//"
VAR_DEF = "concept variable" # __var__
REGEX = "regex" # r'xxx' or r\"xxx\" or r|xxx| or r/xxx/ but not r:xxx: which means rules
@dataclass()
class Token:
type: TokenKind
value: object
index: int
line: int
column: int
_strip_quote: str = field(default=None, repr=False, compare=False, hash=None)
_str_value: str = field(default=None, repr=False, compare=False, hash=None)
_repr_value: str = field(default=None, repr=False, compare=False, hash=None)
def __repr__(self):
return f"Token({self.repr_value})"
@property
def strip_quote(self):
if self._strip_quote:
return self._strip_quote
if self.type in (TokenKind.STRING, TokenKind.REGEX):
self._strip_quote = self.value[1:-1]
else:
self._strip_quote = self.value
return self._strip_quote
@property
def str_value(self):
if self._str_value:
return self._str_value
self._str_value = self.to_str(False)
return self._str_value
@property
def repr_value(self):
if self._repr_value:
return self._repr_value
if self.type == TokenKind.EOF:
self._repr_value = "<EOF>"
elif self.type == TokenKind.WHITESPACE:
self._repr_value = "<!ws>" if self.value == "" else "<tab>" if self.value[0] == "\t" else "<ws>"
elif self.type == TokenKind.NEWLINE:
self._repr_value = "<nl>"
elif self.type == TokenKind.CONCEPT:
self._repr_value = str_concept(self.value)
elif self.type == TokenKind.RULE:
self._repr_value = str_concept(self.value, prefix="r:")
else:
self._repr_value = self.str_value
return self._repr_value
@staticmethod
def is_whitespace(token):
return token and token.type == TokenKind.WHITESPACE
def to_str(self, strip_quote):
if strip_quote and self.type == TokenKind.STRING:
return self.value[1:-1]
elif self.type == TokenKind.CONCEPT:
return str_concept(self.value)
elif self.type == TokenKind.RULE:
return str_concept(self.value, prefix="r:")
elif self.type == TokenKind.REGEX:
return "r" + self.value
else:
return str(self.value)
def clone(self):
return Token(self.type, self.value, self.index, self.line, self.column)
@dataclass()
class LexerError(Exception):
message: str
text: str
index: int
line: int
column: int
class Keywords(Enum):
DEF = "def"
CONCEPT = "concept"
RULE = "rule"
FROM = "from"
BNF = "bnf"
AS = "as"
WHERE = "where"
PRE = "pre"
POST = "post"
ISA = "isa"
RET = "ret"
WHEN = "when"
PRINT = "print"
THEN = "then"
AUTO_EVAL = "auto_eval"
DEF_VAR = "def_var"
class Tokenizer:
"""
Class that can iterate on the tokens
"""
def __init__(self, text, yield_eof=True, parse_word=False):
self.text = text
self.text_len = len(text)
self.column = 1
self.line = 1
self.i = 0
self.yield_eof = yield_eof
self.parse_word = parse_word
def __iter__(self):
while self.i < self.text_len:
c = self.text[self.i]
if c == "+":
if self.i + 1 < self.text_len and self.text[self.i + 1].isdigit():
number = self.eat_number(self.i)
yield Token(TokenKind.NUMBER, number, self.i, self.line, self.column)
self.i += len(number)
self.column += len(number)
else:
yield Token(TokenKind.PLUS, "+", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "-":
if self.i + 1 < self.text_len and self.text[self.i + 1].isdigit():
number = self.eat_number(self.i)
yield Token(TokenKind.NUMBER, number, self.i, self.line, self.column)
self.i += len(number)
self.column += len(number)
else:
yield Token(TokenKind.MINUS, "-", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "_":
if self.i + 7 < self.text_len and \
self.text[self.i: self.i + 7] == VARIABLE_PREFIX and \
self.text[self.i + 7].isdigit():
number = self.eat_number(self.i + 7)
yield Token(TokenKind.VAR_DEF, VARIABLE_PREFIX + number, self.i, self.line, self.column)
self.i += 7 + len(number)
self.column += 7 + len(number)
elif self.i + 1 < self.text_len and (self.text[self.i + 1].isalpha() or self.text[self.i + 1] == "_"):
identifier = self.eat_identifier(self.i)
yield Token(TokenKind.IDENTIFIER, identifier, self.i, self.line, self.column)
self.i += len(identifier)
self.column += len(identifier)
else:
yield Token(TokenKind.UNDERSCORE, "_", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "/":
if self.i + 1 < self.text_len and self.text[self.i + 1] == "/":
yield Token(TokenKind.SLASHSLASH, "//", self.i, self.line, self.column)
self.i += 2
self.column += 2
else:
yield Token(TokenKind.SLASH, "/", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "*":
if self.i + 1 < self.text_len and self.text[self.i + 1] == "*":
yield Token(TokenKind.STARSTAR, "**", self.i, self.line, self.column)
self.i += 2
self.column += 2
else:
yield Token(TokenKind.STAR, "*", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "{":
yield Token(TokenKind.LBRACE, "{", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "}":
yield Token(TokenKind.RBRACE, "}", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "(":
yield Token(TokenKind.LPAR, "(", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == ")":
yield Token(TokenKind.RPAR, ")", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "[":
yield Token(TokenKind.LBRACKET, "[", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "]":
yield Token(TokenKind.RBRACKET, "]", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "=":
if self.i + 1 < self.text_len and self.text[self.i + 1] == "=":
yield Token(TokenKind.EQUALSEQUALS, "==", self.i, self.line, self.column)
self.i += 2
self.column += 2
else:
yield Token(TokenKind.EQUALS, "=", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == " " or c == "\t":
whitespace = self.eat_whitespace(self.i)
yield Token(TokenKind.WHITESPACE, whitespace, self.i, self.line, self.column)
self.i += len(whitespace)
self.column += len(whitespace)
elif c == ",":
yield Token(TokenKind.COMMA, ",", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == ".":
yield Token(TokenKind.DOT, ".", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == ";":
yield Token(TokenKind.SEMICOLON, ";", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == ":":
yield Token(TokenKind.COLON, ":", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "?":
yield Token(TokenKind.QMARK, "?", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "|":
yield Token(TokenKind.VBAR, "|", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "&":
yield Token(TokenKind.AMPER, "&", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "<":
yield Token(TokenKind.LESS, "<", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == ">":
yield Token(TokenKind.GREATER, ">", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "!":
yield Token(TokenKind.EMARK, "!", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "`":
yield Token(TokenKind.BACK_QUOTE, "`", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "\\":
yield Token(TokenKind.BACK_SLASH, "\\", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "^":
yield Token(TokenKind.CARAT, "^", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "$":
yield Token(TokenKind.DOLLAR, "$", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "":
yield Token(TokenKind.EURO, "", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "£":
yield Token(TokenKind.STERLING, "£", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "#":
yield Token(TokenKind.HASH, "#", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "°":
yield Token(TokenKind.DEGREE, "°", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "~":
yield Token(TokenKind.TILDE, "~", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "%":
yield Token(TokenKind.PERCENT, "%", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "\n" or c == "\r":
newline = self.eat_newline(self.i)
yield Token(TokenKind.NEWLINE, newline, self.i, self.line, self.column)
self.i += len(newline)
self.column = 1
self.line += 1
elif c == "c" and self.i + 1 < self.text_len and self.text[self.i + 1] == ":":
name, id, length = self.eat_concept(self.i + 2, self.line, self.column + 2)
yield Token(TokenKind.CONCEPT, (name, id), self.i, self.line, self.column)
self.i += length + 2
self.column += length + 2
elif c == "r" and self.i + 1 < self.text_len and self.text[self.i + 1] == ":":
name, id, length = self.eat_concept(self.i + 2, self.line, self.column + 2)
yield Token(TokenKind.RULE, (name, id), self.i, self.line, self.column)
self.i += length + 2
self.column += length + 2
elif c == "r" and self.i + 1 < self.text_len and self.text[self.i + 1] in "'\"|/":
string, newlines, column_index = self.eat_string(self.i + 1, self.line, self.column)
yield Token(TokenKind.REGEX, string, self.i, self.line, self.column) # quotes are kept
self.i += len(string) + 1
self.column = column_index # 1 if newlines > 0 else self.column + len(string)
self.line += newlines
elif self.parse_word and (c.isalpha() or c.isdigit()):
word = self.eat_word(self.i)
yield Token(TokenKind.WORD, word, self.i, self.line, self.column)
self.i += len(word)
self.column += len(word)
elif c.isalpha():
identifier = self.eat_identifier(self.i)
yield Token(TokenKind.IDENTIFIER, identifier, self.i, self.line, self.column)
self.i += len(identifier)
self.column += len(identifier)
elif c.isdigit():
number = self.eat_number(self.i)
yield Token(TokenKind.NUMBER, number, self.i, self.line, self.column)
self.i += len(number)
self.column += len(number)
elif c == "'" and self.i > 0 and self.text[self.i - 1] != " ":
yield Token(TokenKind.QUOTE, "'", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "'" or c == '"':
string, newlines, column_index = self.eat_string(self.i, self.line, self.column)
yield Token(TokenKind.STRING, string, self.i, self.line, self.column) # quotes are kept
self.i += len(string)
self.column = column_index # 1 if newlines > 0 else self.column + len(string)
self.line += newlines
else:
raise LexerError(f"Unknown token '{c}'", self.text, self.i, self.line, self.column)
if self.yield_eof:
yield Token(TokenKind.EOF, "", self.i, self.line, self.column)
def eat_concept(self, start, line, column):
key, id, buffer = None, None, ""
i = start
processing_key = True
while i < self.text_len:
c = self.text[i]
if c == "\n":
raise LexerError(f"New line in concept name", self.text[start:i], i, line, column + i - start)
if c == ":":
if processing_key:
key = buffer if buffer else None
else:
id = buffer if buffer else None
i += 1 # eat the colon
break
if c == "#":
key = buffer if buffer else None
buffer = ""
processing_key = False
i += 1
continue
buffer += c
i += 1
else:
raise LexerError(f"Missing ending colon", self.text[start:i], i, line, column + i - start)
if (key, id) == (None, None):
raise LexerError(f"Concept identifiers not found", "", start, line, column)
return key, id, i - start
def eat_whitespace(self, start):
result = self.text[start]
i = start + 1
while i < self.text_len:
c = self.text[i]
if c == " " or c == "\t":
result += c
i += 1
else:
break
return result
def eat_newline(self, start):
if start + 1 == self.text_len:
return self.text[start]
current = self.text[start]
next = self.text[start + 1]
if current == "\n" and next == "\r" or current == "\r" and next == "\n":
return current + next
return current
def eat_identifier(self, start):
result = self.text[start]
i = start + 1
while i < self.text_len:
c = self.text[i]
if c.isalpha() or c == "_" or c == "-" or c.isdigit():
result += c
i += 1
else:
break
return result
def eat_number(self, start):
result = self.text[start]
i = start + 1
while i < self.text_len:
c = self.text[i]
if c.isdigit() or c == ".":
result += c
i += 1
else:
break
return result
def eat_string(self, start_index, start_line, start_column):
quote = self.text[start_index]
result = self.text[start_index]
lines_count = 0
column_index = start_column + 1
i = start_index + 1
escape = False
# newline = None
while i < self.text_len:
c = self.text[i]
result += c
i += 1
column_index += 1
if c == "\n":
lines_count += 1
column_index = 1
if c == "\\":
escape = True
elif c == quote and not escape:
break
else:
escape = False
# # add trailing new line if needed
# if newline:
# lines_count += 1
# column_index = 1
if result[-1] != quote or len(result) == 1:
raise LexerError("Missing Trailing quote", result, i, start_line + lines_count,
1 if lines_count > 0 else start_column + len(result))
return result, lines_count, column_index
def eat_word(self, start):
"""
Word is an alphanum (no space)
:param start:
:return:
"""
result = self.text[start]
i = start + 1
while i < self.text_len:
c = self.text[i]
if c.isalpha() or c.isdigit():
result += c
i += 1
else:
break
return result
def strip_tokens(tokens, strip_eof=False):
"""
Remove the starting and trailing spaces and newline
"""
if tokens is None:
return None
start = 0
length = len(tokens)
while start < length and tokens[start].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
start += 1
if start == length:
return []
end_tokens = (TokenKind.WHITESPACE, TokenKind.NEWLINE, TokenKind.EOF) \
if strip_eof \
else (TokenKind.WHITESPACE, TokenKind.NEWLINE)
end = length - 1
while end > 0 and tokens[end].type in end_tokens:
end -= 1
return tokens[start: end + 1]
View File
+24
View File
@@ -0,0 +1,24 @@
# How to serialize ?
## General rule
- 1 byte : type of object code
- int : version of the encoder
- data : can be the json representation of the object
### Current supported types
- J : Json object
- P : pickle
- E : events
- S : state
- X : executionContext
- M : context metadata
Not verified yet !
- C : concept (with history management)
- D : concept definitions (no history management)
- R : executionContext ('R' stands for Result or ReturnValue, no history management)
- O : ServiceObj (from pickle)
- M : MemoryObject (using SheerkaPickle)
- X : Rule (from sheerkaPickle, 'X' stands for nothing, I am running out of meaningful letters)
- T : CustomType
+9 -72
View File
@@ -8,9 +8,10 @@ from os import path
from threading import RLock
from typing import Callable
from common.utils import get_logger_name
from core.Event import Event
from core.ExecutionContext import ExecutionContext
from core.Sheerka import Sheerka
from core.global_symbols import NotFound
from common.global_symbols import NotFound
from sdp.sheerkaDataProviderIO import SheerkaDataProviderIO
from sdp.sheerkaSerializer import Serializer, SerializerContext
@@ -27,72 +28,6 @@ def json_default_converter(o):
return o.isoformat()
class Event(object):
"""
Class that represents something that modifies the state of the system
"""
def __init__(self, message="", user_id="", date=None, parents=None):
self.user_id: str = user_id # id of the user that triggers the modification
self.date: datetime | None = date or datetime.now() # when
self.message: str = message # user input or whatever that modifies the system
self.parents: list[str] = parents # digest(s) of the parent(s) of this event
self._digest: str | None = None # digest of the event
def __str__(self):
return f"{self.date.strftime('%d/%m/%Y %H:%M:%S')} {self.message}"
def __repr__(self):
return f"{self.get_digest()[:12]} {self.message}"
def get_digest(self):
"""
Returns the digest of the event
:return: sha256 of the event
"""
if self._digest:
return self._digest
if self.user_id == "":
# only possible during the unit test
# We use this little trick to speed up the unit test
self._digest = self.message[6:] if self.message.startswith("TEST::") else "xxx"
return self._digest
if not isinstance(self.message, str):
raise NotImplementedError(f"message={self.message}")
to_hash = f"Event:{self.user_id}{self.date}{self.message}{self.parents}".encode("utf-8")
self._digest = hashlib.sha256(to_hash).hexdigest()
return self._digest
def to_dict(self):
return self.__dict__
def from_dict(self, as_dict):
self.user_id = as_dict["user_id"]
self.date = datetime.fromisoformat(as_dict["date"])
self.message = as_dict["message"]
self.parents = as_dict["parents"]
self._digest = as_dict["_digest"] # freeze the digest
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, Event):
return (self.user_id == other.user_id and
self.date == other.date and
self.message == other.message and
self.parents == other.parents)
return False
def __hash__(self):
return hash(self.get_digest())
class State:
"""
Class that represents the state of the system (dictionary of all known entries)
@@ -118,7 +53,7 @@ class SheerkaDataProviderTransaction:
Note that Transaction within Transaction is not supported
"""
def __init__(self, sdp, event):
def __init__(self, sdp, event: Event):
self.sdp: SheerkaDataProvider = sdp
self.event: Event = event
self.state: State | None = None
@@ -181,6 +116,7 @@ class SheerkaDataProviderTransaction:
pass
def __exit__(self, exc_type, exc_val, exc_tb):
# TODO: Manage exception in order to rollback
self.state.parents = [] if self.snapshot is None else [self.snapshot]
self.state.events = [self.event_digest]
self.state.date = datetime.now()
@@ -207,10 +143,11 @@ class SheerkaDataProvider:
REF_PREFIX = "##REF##:"
def __init__(self, root=None, sheerka=None, name="__default__"):
self.log = logging.getLogger(__name__)
self.init_log = logging.getLogger("init." + __name__)
self.init_log.debug("Initializing sdp.")
self.log = logging.getLogger(get_logger_name(__name__))
self.init_log = logging.getLogger(get_logger_name("init." + __name__))
self.init_log.debug(f"Initializing sdp '{name}'")
from core.Sheerka import Sheerka
self.sheerka: Sheerka = sheerka
self.io = SheerkaDataProviderIO.get(root)
self.first_time = self.io.first_time
+4 -2
View File
@@ -3,13 +3,15 @@ import logging
import os
from os import path
from common.utils import get_logger_name
class SheerkaDataProviderIO:
def __init__(self, root):
self.root = root
self.log = logging.getLogger(__name__)
self.init_log = logging.getLogger("init." + __name__)
self.log = logging.getLogger(get_logger_name(__name__))
self.init_log = logging.getLogger(get_logger_name("init." + __name__))
def exists(self, file_path):
pass
+50 -28
View File
@@ -7,9 +7,12 @@ import struct
from dataclasses import dataclass
from enum import Enum
from core.utils import get_class, get_full_qualified_name
from common.global_symbols import CustomType, NotFound, NotInit, Removed
from common.utils import get_class, get_full_qualified_name, get_logger_name
# https://stackoverflow.com/questions/15721363/preserve-python-tuples-with-json
def json_default_converter(o):
"""
Default formatter for json
@@ -24,7 +27,7 @@ def json_default_converter(o):
if isinstance(o, Enum):
return o.name
raise Exception(f"Cannot serialize object '{o}', class='{o.__class__.__name__}'")
# raise Exception(f"Cannot serialize object '{o}', class='{o.__class__.__name__}'")
# In debug mode, just
#
# with open("json_encoding_error.txt", "a") as f:
@@ -47,21 +50,23 @@ class Serializer:
HISTORY = "##history##"
def __init__(self):
self.log = logging.getLogger(__name__)
self.init_log = logging.getLogger("init." + __name__)
self.log = logging.getLogger(get_logger_name(__name__))
self.init_log = logging.getLogger(get_logger_name("init." + __name__))
self.init_log.debug("Initializing serializers")
self._cache = []
# add builtin serializers
self.register(EventSerializer())
self.register(StateSerializer())
self.register(ExecutionContextSerializer())
self.register(ConceptMetadataSerializer())
self.register(CustomTypeSerializer())
# self.register(ConceptSerializer())
# self.register(DictionarySerializer())
# self.register(ExecutionContextSerializer())
# self.register(MemoryObjectSerializer()) # before ServiceObjSerializer
# self.register(ServiceObjSerializer())
# self.register(RuleSerializer())
# self.register(CustomTypeSerializer())
#
def register(self, serializer):
"""
@@ -204,12 +209,35 @@ class JsonSerializer(BaseSerializer):
return obj
class CustomTypeSerializer(BaseSerializer):
def __init__(self):
BaseSerializer.__init__(self, "T", 1)
def matches(self, obj):
return isinstance(obj, CustomType)
def dump(self, stream, obj, context):
stream.write(obj.value.encode("utf-8"))
stream.seek(0)
return stream
def load(self, stream, context):
value = stream.read().decode("utf-8")
if value == NotInit.value:
return NotInit
elif value == NotFound.value:
return NotFound
elif value == Removed.value:
return Removed
raise NotImplemented(f"CustomTypeSerializer.load({value})")
class EventSerializer(BaseSerializer):
def __init__(self):
BaseSerializer.__init__(self, "E", 1)
def matches(self, obj):
return get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.Event"
return get_full_qualified_name(obj) == "core.Event.Event"
def dump(self, stream, obj, context):
stream.write(json.dumps(obj.to_dict(), default=json_default_converter).encode("utf-8"))
@@ -231,6 +259,21 @@ class StateSerializer(PickleSerializer):
"S",
1)
class ExecutionContextSerializer(JsonSerializer):
CLASS_NAME = "core.ExecutionContext.ExecutionContext"
def __init__(self):
super().__init__(lambda obj: get_full_qualified_name(obj) == self.CLASS_NAME, "X", 1)
class ConceptMetadataSerializer(PickleSerializer):
CLASS_NAME = "core.concept.ConceptMetadata"
def __init__(self):
super().__init__(lambda obj: get_full_qualified_name(obj) == self.CLASS_NAME, "M", 1)
#
#
#
# class JsonSerializer(BaseSerializer):
@@ -359,24 +402,3 @@ class StateSerializer(PickleSerializer):
# super().__init__(lambda obj: isinstance(obj, Rule), "X", 1)
#
#
# class CustomTypeSerializer(BaseSerializer):
# def __init__(self):
# BaseSerializer.__init__(self, "T", 1)
#
# def matches(self, obj):
# return isinstance(obj, CustomType)
#
# def dump(self, stream, obj, context):
# stream.write(obj.value.encode("utf-8"))
# stream.seek(0)
# return stream
#
# def load(self, stream, context):
# value = stream.read().decode("utf-8")
# if value == NotInit.value:
# return NotInit
# elif value == NotFound.value:
# return NotFound
# elif value == Removed.value:
# return Removed
# raise NotImplemented(f"CustomTypeSerializer.load({value})")
View File
+4 -2
View File
@@ -19,7 +19,8 @@ oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
fake_users_db = {
"kodjo": {
"username": "kodjo",
"full_name": "Kodjo Sossouvi",
"firstname": "Kodjo",
"lastname": "Sossouvi",
"email": "kodjo.sossouvi@gmail.com",
"hashed_password": "$2b$12$fb9jW7QUZ9KIEAAtVmWMEOGtehKy9FafUr7Zfrsb3ZMhsBbzZs7SC", # password is kodjo
"disabled": False,
@@ -42,7 +43,8 @@ class User(BaseModel):
"""
username: str
email: str | None = None
full_name: str | None = None
firstname: str | None = None
lastname: str | None = None
disabled: bool | None = None
+40 -1
View File
@@ -4,12 +4,32 @@ import uvicorn
from fastapi import Depends, FastAPI, HTTPException
from fastapi.security import OAuth2PasswordRequestForm
from starlette import status
from starlette.middleware.cors import CORSMiddleware
from constants import CLIENT_OPERATION_QUIT, EXIT_COMMANDS, SHEERKA_PORT
from core.Sheerka import Sheerka
from server.authentication import ACCESS_TOKEN_EXPIRE_MINUTES, User, authenticate_user, create_access_token, \
fake_users_db, get_current_active_user
app = FastAPI()
origins = [
"http://localhost:56426",
"http://localhost:5173",
]
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# NEED TO FIND A WAY TO INIT SHEERKA within the __name__ == "__main__" section
# As of now, if we do that, sheerka is not properly initialized using the command
# 'uvicorn server.main:app' from the shell
sheerka = Sheerka()
sheerka.initialize()
@app.get("/")
@@ -34,7 +54,10 @@ async def login(form_data: OAuth2PasswordRequestForm = Depends()):
access_token_expires = timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
access_token = create_access_token(data={"sub": user.username}, expires_delta=access_token_expires)
return {"access_token": access_token, "token_type": "bearer"}
return {"first_name": user.firstname,
"last_name": user.lastname,
"access_token": access_token,
"token_type": "bearer"}
@app.post("/echo/{message}", status_code=status.HTTP_200_OK, response_model=dict)
@@ -62,5 +85,21 @@ async def echo(message: str, current_user: User = Depends(get_current_active_use
}
@app.post("/command/{message}", status_code=status.HTTP_200_OK, response_model=dict)
async def command(message: str, current_user: User = Depends(get_current_active_user)) -> dict:
if message in EXIT_COMMANDS:
return {
"status": True,
"response": "Take care.",
"command": CLIENT_OPERATION_QUIT
}
res = sheerka.evaluate_user_input(message, current_user)
return {
"status": res[0].status,
"response": res[0].value.body,
"command": None
}
if __name__ == "__main__":
uvicorn.run("server.main:app", port=SHEERKA_PORT, log_level="info")
+7
View File
@@ -0,0 +1,7 @@
from .sheerkaplicker import encode
from .sheerkaunpickler import decode
__all__ = ('encode', 'decode')
# register built-in handlers
__import__('sheerkapickle.handlers', level=0)
+197
View File
@@ -0,0 +1,197 @@
import re
import threading
import uuid
from sheerkapickle import utils
class ToReduce:
def __init__(self, predicate, get_value):
self.predicate = predicate
self.get_value = get_value
class SheerkaRegistry(object):
def __init__(self):
self._handlers = {}
self._base_handlers = {}
def get(self, cls_or_name: str, default=None):
"""
:param cls_or_name: the type or its fully qualified name
:param default: default value, if no matching handler is not found
Looks up a handler by type reference or its fully
qualified name. If a direct match
is not found, the search is performed over all
handlers registered with base=True.
"""
handler = self._handlers.get(cls_or_name)
# attempt to find a base class
if handler is None and utils.is_type(cls_or_name):
for cls, base_handler in self._base_handlers.items():
if issubclass(cls_or_name, cls):
return base_handler
return default if handler is None else handler
def register(self, cls, handler: 'BaseHandler' = None, base: bool = False):
"""Register the custom handler for a class
:param cls: The custom object class to handle
:param handler: The custom handler class (if
None, a decorator wrapper is returned)
:param base: Indicates whether the handler should
be registered for all subclasses
This function can be also used as a decorator
by omitting the `handler` argument::
@jsonpickle.handlers.register(Foo, base=True)
class FooHandler(jsonpickle.handlers.BaseHandler):
pass
"""
if handler is None:
def _register(handler_cls):
self.register(cls, handler=handler_cls, base=base)
return handler_cls
return _register
if not utils.is_type(cls):
raise TypeError('{!r} is not a class/type'.format(cls))
# store both the name and the actual type for the ugly cases like
# _sre.SRE_Pattern that cannot be loaded back directly
self._handlers[utils.importable_name(cls)] = \
self._handlers[cls] = handler
if base:
# only store the actual type for subclass checking
self._base_handlers[cls] = handler
def unregister(self, cls):
self._handlers.pop(cls, None)
self._handlers.pop(utils.importable_name(cls), None)
self._base_handlers.pop(cls, None)
registry = SheerkaRegistry()
register = registry.register
unregister = registry.unregister
get = registry.get
class BaseHandler(object):
def __init__(self, sheerka, context):
"""
Initialize a new handler to handle a registered type.
:Parameters:
- `context`: reference to pickler/unpickler
"""
self.sheerka = sheerka
self.context = context
def __call__(self, sheerka, context):
"""This permits registering either Handler instances or classes
:Parameters:
- `context`: reference to pickler/unpickler
"""
self.sheerka = sheerka
self.context = context
return self
def flatten(self, obj: object, data: dict):
"""
Flatten `obj` into a json-friendly form and write result to `data`.
:param object obj: The object to be serialized.
:param dict data: A partially filled dictionary which will contain the
json-friendly representation of `obj` once this method has
finished.
"""
raise NotImplementedError('You must implement flatten() in %s' %
self.__class__)
def new(self, data: dict):
raise NotImplementedError('You must implement new() in %s' %
self.__class__)
def restore(self, data: dict, instance: object):
"""
Restore an object of the registered type from the json-friendly
representation `obj` and return it.
"""
raise NotImplementedError('You must implement restore() in %s' %
self.__class__)
@classmethod
def handles(self, cls: str):
"""
Register this handler for the given class. Suitable as a decorator,
e.g.::
@MyCustomHandler.handles
class MyCustomClass:
def __reduce__(self):
...
"""
registry.register(cls, self)
return cls
class RegexHandler(BaseHandler):
"""Flatten _sre.SRE_Pattern (compiled regex) objects"""
def flatten(self, obj, data):
data['pattern'] = obj.pattern
return data
def new(self, data):
return re.compile(data['pattern'])
def restore(self, data, instance):
return instance
RegexHandler.handles(type(re.compile('')))
class UUIDHandler(BaseHandler):
"""Serialize uuid.UUID objects"""
def flatten(self, obj, data):
data['hex'] = obj.hex
return data
def new(self, data):
return uuid.UUID(data['hex'])
def restore(self, data, instance):
return instance
UUIDHandler.handles(uuid.UUID)
class LockHandler(BaseHandler):
"""Serialize threading.Lock objects"""
def flatten(self, obj, data):
data['locked'] = obj.locked()
return data
def new(self, data):
lock = threading.Lock()
if data.get('locked', False):
lock.acquire()
return lock
def restore(self, data, instance):
return instance
_lock = threading.Lock()
LockHandler.handles(_lock.__class__)
+240
View File
@@ -0,0 +1,240 @@
from common.global_symbols import NotInit
from core.concept import Concept
from sheerkapickle.handlers import BaseHandler, registry
default_concept = Concept()
default_concept_values = default_concept.values()
CONCEPT_ID = "concept/id"
RULE_ID = "rule/id"
class ConceptHandler(BaseHandler):
def flatten(self, obj: Concept, data):
pickler = self.context
sheerka = self.sheerka
if obj.get_metadata().full_serialization:
ref = default_concept
ref_values = default_concept_values
else:
ref = sheerka.get_by_id(obj.id, allow_dynamic=True)
ref_values = ref.values()
data[CONCEPT_ID] = obj.id
# transform metadata
for name in CONCEPT_PROPERTIES_TO_SERIALIZE:
value = getattr(obj.get_metadata(), name)
ref_value = getattr(ref.get_metadata(), name)
if value != ref_value:
value_to_use = [list(t) for t in value] if name == "variables" else value
data["meta." + name] = pickler.flatten(value_to_use)
# # transform values
for name, value in obj.values().items():
if name not in ref_values or value != ref_values[name]:
if "values" not in data:
data["values"] = []
data["values"].append((pickler.flatten(name), pickler.flatten(value)))
return data
def new(self, data):
sheerka = self.sheerka
if CONCEPT_ID in data:
return sheerka.new((None, data[CONCEPT_ID]), allow_dynamic=True)
else:
return Concept()
def restore(self, data, instance):
pickler = self.context
for key, value in data.items():
if key.startswith("_sheerka/") or key == CONCEPT_ID:
continue
resolved_value = pickler.restore(data[key])
if key.startswith("meta."):
# get metadata
resolved_prop = key[5:]
if resolved_prop == "variables":
for prop_name, prop_value in resolved_value:
instance.def_var(prop_name, prop_value)
else:
setattr(instance.get_metadata(), resolved_prop, resolved_value)
elif key == "values":
# get properties
for prop_name, prop_value in resolved_value:
instance.set_value(prop_name, NotInit if prop_value is None else prop_value)
else:
raise Exception("Sanity check as it's not possible yet")
# instance.freeze_definition_hash()
return instance
class UserInputHandler(ConceptHandler):
def flatten(self, obj: UserInputConcept, data):
data[CONCEPT_ID] = (obj.key, obj.id)
data["user_name"] = obj.user_name
data["text"] = core.utils.get_text_from_tokens(obj.text) if isinstance(obj.text, list) else \
obj.text.as_text() if isinstance(obj.text, ParserInput) else \
obj.text
return data
def new(self, data):
return UserInputConcept.__new__(UserInputConcept)
def restore(self, data, instance):
instance.__init__(data["text"], data["user_name"])
instance.get_metadata().key = data[CONCEPT_ID][0]
instance.get_metadata().id = data[CONCEPT_ID][1]
instance.freeze_definition_hash()
return instance
class ReturnValueHandler(BaseHandler):
def flatten(self, obj: ReturnValueConcept, data):
pickler = self.context
data[CONCEPT_ID] = (obj.key, obj.id)
data["who"] = f"c:{obj.who.id}:" if isinstance(obj.who, Concept) else \
obj.who.name if isinstance(obj.who, (BaseParser, BaseEvaluator)) else \
obj.who
data["status"] = obj.status
data["value"] = pickler.flatten(obj.value)
if obj.parents:
data["parents"] = pickler.flatten(obj.parents)
return data
def new(self, data):
return ReturnValueConcept.__new__(ReturnValueConcept)
def restore(self, data, instance):
pickler = self.context
instance.__init__(data["who"], data["status"], pickler.restore(data["value"]))
if "parents" in data:
instance.parents = pickler.restore(data["parents"])
instance.get_metadata().key = data[CONCEPT_ID][0]
instance.get_metadata().id = data[CONCEPT_ID][1]
instance.freeze_definition_hash()
return instance
class SheerkaHandler(ConceptHandler):
# def flatten(self, obj: BuiltinConcepts, data):
# return ConceptHandler().flatten()data
def new(self, data):
return self.sheerka
# def restore(self, data, instance):
# return instance
class ExecutionContextHandler(BaseHandler):
def flatten(self, obj, data):
pickler = self.context
for prop in CONTEXT_PROPERTIES_TO_SERIALIZE:
if prop in ("who", "action", "action_context"):
value = str(getattr(obj, prop))
else:
value = getattr(obj, prop)
if value is not None:
data[prop] = pickler.flatten(value)
return data
def new(self, data):
return ExecutionContext(data["who"], None, self.sheerka, BuiltinConcepts.NOP, None)
def restore(self, data, instance):
pickler = self.context
for prop in CONTEXT_PROPERTIES_TO_SERIALIZE:
if prop not in data or prop == "who":
continue
setattr(instance, prop, pickler.restore(data[prop]))
return instance
class RuleContextHandler(BaseHandler):
def flatten(self, obj, data):
data[RULE_ID] = obj.id
data["name"] = obj.metadata.name
data["predicate"] = obj.metadata.predicate
data["action_type"] = obj.metadata.action_type
data["action"] = obj.metadata.action
return data
def new(self, data):
return Rule(data["action_type"], data["name"], data["predicate"], data["action"])
def restore(self, data, instance):
instance.metadata.id = data[RULE_ID]
return instance
class PythonNodeHandler(BaseHandler):
def flatten(self, obj, data):
pickler = self.context
data["source"] = obj.source
data["ast_str"] = obj.ast_str
data["objects"] = pickler.flatten(obj.objects)
return data
def new(self, data):
return PythonNode.__new__(PythonNode)
def restore(self, data, instance):
pickler = self.context
instance.__init__(data["source"], objects=pickler.restore(data["objects"]))
instance.ast_str = data["ast_str"]
return instance
class DefRuleNodeHandler(BaseHandler):
def flatten(self, obj, data):
pickler = self.context
data["tokens"] = pickler.flatten(obj.tokens)
data["name"] = pickler.flatten(obj.name)
return data
def new(self, data):
return DefRuleNode.__new__(DefRuleNode)
def restore(self, data, instance):
pickler = self.context
instance.__init__(data["source"], objects=pickler.restore(data["objects"]))
instance.tokens = pickler.restore(data["tokens"])
instance.name = pickler.restore(data["name"])
return instance
def initialize_pickle_handlers():
registry.register(Concept, ConceptHandler, True)
registry.register(UserInputConcept, UserInputHandler, True)
registry.register(ReturnValueConcept, ReturnValueHandler, True)
registry.register(Sheerka, SheerkaHandler, True)
registry.register(ExecutionContext, ExecutionContextHandler, True)
registry.register(Rule, RuleContextHandler, True)
registry.register(PythonNode, PythonNodeHandler, True)
registry.register(DefRuleNode, DefRuleNodeHandler, True)
registry.register(DefExecRuleNode, DefRuleNodeHandler, True) # TODO: fix inheritance that does not work
registry.register(DefFormatRuleNode, DefRuleNodeHandler, True) # TODO: fix inheritance that does not work
+163
View File
@@ -0,0 +1,163 @@
import json
from logging import Logger
from common.utils import get_full_qualified_name, str_concept
from core.concept import Concept
from sheerkapickle import utils, tags, handlers
def encode(sheerka, obj):
pickler = SheerkaPickler(sheerka)
flatten = pickler.flatten(obj)
return json.dumps(flatten)
class ToReduce:
def __init__(self, predicate, get_value):
self.predicate = predicate
self.get_value = get_value
class SheerkaPickler:
"""
Json sheerkapickle
Inspired by jsonpickle (https://github.com/jsonpickle/jsonpickle)
which failed to work in my environment
"""
def __init__(self, sheerka):
self.ids = {}
self.objs = []
self.id_count = -1
self.sheerka = sheerka
self.to_reduce = []
self.to_reduce.append(ToReduce(lambda o: isinstance(o, Logger), lambda o: None))
# from parsers.BaseParser import BaseParser
# from evaluators.BaseEvaluator import BaseEvaluator
from ontologies.SheerkaOntologyManager import Ontology
# from core.sheerka.Sheerka import SheerkaMethod
# self.to_reduce.append(ToReduce(lambda o: isinstance(o, (BaseParser, BaseEvaluator)), lambda o: o.name))
# self.to_reduce.append(ToReduce(lambda o: isinstance(o, ParserInput), lambda o: o.as_text()))
self.to_reduce.append(ToReduce(lambda o: isinstance(o, Ontology), lambda o: f"__ONTOLOGY:{o.name}__"))
# self.to_reduce.append(ToReduce(lambda o: isinstance(o, SheerkaMethod), lambda o: o.name))
def flatten(self, obj):
if utils.is_to_discard(obj):
return str(obj)
if utils.is_primitive(obj):
return obj
if utils.is_custom_type(obj):
return self._flatten_custom_type(obj)
if utils.is_type(obj):
return str(obj)
if utils.is_tuple(obj):
return {tags.TUPLE: [self.flatten(v) for v in obj]}
if utils.is_set(obj):
return {tags.SET: [self.flatten(v) for v in obj]}
if utils.is_list(obj):
return [self.flatten(v) for v in obj]
if utils.is_dictionary(obj):
return self._flatten_dict(obj)
if utils.is_enum(obj):
return self._flatten_enum(obj)
if utils.is_object(obj):
return self._flatten_obj_instance(obj)
raise Exception(f"Cannot flatten '{obj}'")
def _flatten_dict(self, obj):
data = {}
for k, v in obj.items():
if k is None:
k_str = "null"
elif utils.is_enum(k):
k_str = get_full_qualified_name(k) + "." + k.name
elif isinstance(k, Concept):
k_str = str_concept(k)
else:
k_str = k
data[k_str] = self.flatten(v)
return data
def _flatten_enum(self, obj):
# check if the object was already seen
exists, _id = self.exist(obj)
if exists:
return {tags.ID: _id}
else:
self.id_count = self.id_count + 1
self.ids[id(obj)] = self.id_count
self.objs.append(obj)
data = {}
class_name = get_full_qualified_name(obj)
data[tags.ENUM] = class_name + "." + obj.name
return data
def _flatten_obj_instance(self, obj):
for reduce in self.to_reduce:
if reduce.predicate(obj):
return reduce.get_value(obj)
# check if the object was already seen
exists, _id = self.exist(obj)
if exists:
return {tags.ID: _id}
else:
self.id_count = self.id_count + 1
self.ids[id(obj)] = self.id_count
self.objs.append(obj)
# flatten
data = {}
cls = obj.__class__ if hasattr(obj, '__class__') else type(obj)
class_name = utils.importable_name(cls)
data[tags.OBJECT] = class_name
handler = handlers.get(class_name)
if handler is not None:
return handler(self.sheerka, self).flatten(obj, data)
if hasattr(obj, "__dict__"):
for k, v in obj.__dict__.items():
data[k] = self.flatten(v)
return data
def _flatten_custom_type(self, obj):
# check if the object was already seen
exists, _id = self.exist(obj)
if exists:
return {tags.ID: _id}
else:
self.id_count = self.id_count + 1
self.ids[id(obj)] = self.id_count
self.objs.append(obj)
return {tags.CUSTOM: obj.value}
def exist(self, obj):
try:
v = self.ids[id(obj)]
return True, v
except KeyError:
return False, None
# def exist(self, obj):
# for k, v in self.ids.items():
# if k == id(obj):
# return True, v
#
# return False, None
+129
View File
@@ -0,0 +1,129 @@
import json
from common.global_symbols import NoFirstToken, NotFound, NotInit, Removed
from common.utils import decode_enum, get_class, unstr_concept
from sheerkapickle import tags, utils, handlers
def decode(sheerka, obj):
return SheerkaUnpickler(sheerka).restore(json.loads(obj))
class SheerkaUnpickler:
def __init__(self, sheerka):
self.sheerka = sheerka
self.objs = []
def restore(self, obj):
if has_tag(obj, tags.ID):
return self._restore_id(obj)
if has_tag(obj, tags.TUPLE):
return self._restore_tuple(obj)
if has_tag(obj, tags.CUSTOM):
return self._restore_custom(obj)
if has_tag(obj, tags.SET):
return self._restore_set(obj)
if has_tag(obj, tags.ENUM):
return self._restore_enum(obj)
if has_tag(obj, tags.OBJECT):
return self._restore_obj(obj)
if utils.is_list(obj):
return self._restore_list(obj)
if utils.is_dictionary(obj):
return self._restore_dict(obj)
return obj
def _restore_list(self, obj):
return [self.restore(v) for v in obj]
def _restore_tuple(self, obj):
return tuple([self.restore(v) for v in obj[tags.TUPLE]])
def _restore_custom(self, obj):
if obj[tags.CUSTOM] == NotInit.value:
instance = NotInit
elif obj[tags.CUSTOM] == NotFound.value:
instance = NotFound
elif obj[tags.CUSTOM] == Removed.value:
instance = Removed
elif obj[tags.CUSTOM] == NoFirstToken.value:
instance = NoFirstToken
else:
raise KeyError(f"unknown {obj[tags.CUSTOM]}")
self.objs.append(instance)
return instance
def _restore_set(self, obj):
return set([self.restore(v) for v in obj[tags.SET]])
def _restore_enum(self, obj):
instance = decode_enum(obj[tags.ENUM])
self.objs.append(instance)
return instance
def _restore_dict(self, obj):
data = {}
for k, v in obj.items():
resolved_key = self._resolve_key(k)
data[resolved_key] = self.restore(v)
return data
def _restore_id(self, obj):
try:
return self.objs[obj[tags.ID]]
except IndexError:
pass
def _restore_obj(self, obj):
handler = handlers.get(obj[tags.OBJECT])
if handler:
handler = handler(self.sheerka, self)
instance = handler.new(obj)
self.objs.append(instance)
instance = handler.restore(obj, instance)
else:
# KSI 202011: Hack because Property is removed
# To suppress asap
if obj[tags.OBJECT] == "core.concept.Property":
return self.restore(obj["value"])
cls = get_class(obj[tags.OBJECT])
instance = cls.__new__(cls)
self.objs.append(instance)
for k, v in obj.items():
if k == tags.OBJECT:
continue
value = self.restore(v)
setattr(instance, k, value)
return instance
def _resolve_key(self, key):
if key == "null":
return None
concept_key, concept_id = unstr_concept(key)
if concept_key is not None:
return self.sheerka.new((concept_key, concept_id)) if concept_id else self.sheerka.new(concept_key)
as_enum = decode_enum(key)
if as_enum is not None:
return as_enum
return key
def has_tag(obj, tag):
return type(obj) is dict and tag in obj
+6
View File
@@ -0,0 +1,6 @@
ID = "_sheerka/id"
TUPLE = "_sheerka/tuple"
SET = "_sheerka/set"
OBJECT = "_sheerka/obj"
ENUM = "_sheerka/enum"
CUSTOM = "_sheerka/custom"
+101
View File
@@ -0,0 +1,101 @@
import base64
import types
from enum import Enum
from common.global_symbols import CustomType
class_types = (type,)
PRIMITIVES = (str, bool, type(None), int, float)
def is_type(obj):
"""Returns True is obj is a reference to a type.
"""
# use "isinstance" and not "is" to allow for metaclasses
return isinstance(obj, class_types)
def is_enum(obj):
return isinstance(obj, Enum)
def is_custom_type(obj):
return isinstance(obj, CustomType)
def is_object(obj):
"""Returns True is obj is a reference to an object instance."""
return (isinstance(obj, object) and
not isinstance(obj, (type,
types.FunctionType,
types.BuiltinFunctionType,
types.GeneratorType)))
def is_to_discard(obj):
return isinstance(obj, (types.GeneratorType, types.CodeType))
def is_primitive(obj):
return isinstance(obj, PRIMITIVES)
def is_dictionary(obj):
return isinstance(obj, dict)
def is_list(obj):
return isinstance(obj, list)
def is_set(obj):
return isinstance(obj, set)
def is_bytes(obj):
return isinstance(obj, bytes)
def is_tuple(obj):
return isinstance(obj, tuple)
def is_class(obj):
return isinstance(obj, type)
def b64encode(data):
"""
Encode binary data to ascii text in base64. Data must be bytes.
"""
return base64.b64encode(data).decode('ascii')
def translate_module_name(module):
"""Rename builtin modules to a consistent module name.
Prefer the more modern naming.
This is used so that references to Python's `builtins` module can
be loaded in both Python 2 and 3. We remap to the "__builtin__"
name and unmap it when importing.
Map the Python2 `exceptions` module to `builtins` because
`builtins` is a superset and contains everything that is
available in `exceptions`, which makes the translation simpler.
See untranslate_module_name() for the reverse operation.
"""
lookup = dict(__builtin__='builtins', exceptions='builtins')
return lookup.get(module, module)
def importable_name(cls):
"""
Fully qualified name (prefixed by builtin when needed)
"""
# Use the fully-qualified name if available (Python >= 3.3)
name = getattr(cls, '__qualname__', cls.__name__)
module = translate_module_name(cls.__module__)
return '{}.{}'.format(module, name)