Implemented SheerkaOntology

This commit is contained in:
2021-01-11 15:36:03 +01:00
parent e3c2adb533
commit e26c83a825
119 changed files with 6876 additions and 2002 deletions
+132 -27
View File
@@ -1,5 +1,8 @@
from threading import RLock
from core.global_symbols import NotFound, Removed
from core.utils import sheerka_deepcopy
MAX_INITIALIZED_KEY = 100
@@ -10,14 +13,17 @@ class BaseCache:
When you put the same key twice, the previous element is overridden
"""
def __init__(self, max_size=None, default=None, extend_exists=None):
def __init__(self, max_size=None, default=NotFound, extend_exists=None, alt_sdp_get=None, sdp=None):
self._cache = {}
self._max_size = max_size
self._default = default # default value to return when key is not found. It can be a callable of key
self._extend_exists = extend_exists # search in remote
self._alt_sdp_get = alt_sdp_get # How to get the value when called by alt_sdp
self._sdp = sdp # current instance of SheerkaDataProvider
self._lock = RLock()
self._current_size = 0
self._initialized_keys = set() # to keep the list of the keys already requested (using get())
self._is_cleared = False # indicate that clear() was called
self.to_add = set()
self.to_remove = set()
@@ -49,42 +55,75 @@ class BaseCache:
def __repr__(self):
return f"{self.__class__.__name__}(size={self._current_size}, #keys={len(self._cache)})"
def configure(self, max_size=None, default=None, extend_exists=None):
def configure(self, max_size=None, default=NotFound, extend_exists=None, alt_sdp_get=None, sdp=None):
if max_size is not None:
self._max_size = max_size
if default is not None:
if default is not NotFound:
self._default = default
if extend_exists is not None:
self._extend_exists = extend_exists
def disable_default(self):
self._default = None
if alt_sdp_get is not None:
self._alt_sdp_get = alt_sdp_get
def put(self, key, value):
if sdp is not None:
self._sdp = sdp
return self
def auto_configure(self, cache_name):
"""
Convenient way to configure the cache
:param cache_name:
:return:
"""
self._default = lambda sdp, key: sdp.get(cache_name, key)
self._extend_exists = lambda sdp, key: sdp.exists(cache_name, key)
self._alt_sdp_get = lambda sdp, key: sdp.alt_get(cache_name, key) # by default, same than get
return self
def disable_default(self):
self._default = (lambda sdp, key: NotFound) if self._sdp else (lambda key: NotFound)
def put(self, key, value, alt_sdp=None):
"""
Add a new entry in cache
:param key:
:param value:
:param alt_sdp:
:return:
"""
with self._lock:
if self._max_size and self._current_size >= self._max_size:
self.evict(self._max_size - self._current_size + 1)
if self._put(key, value):
if self._put(key, value, alt_sdp):
self._current_size += 1
def get(self, key):
def get(self, key, alt_sdp=None):
"""
Retrieve an entry from the cache
If the entry does not exist, will use the 'default' value or delegate
:param key:
:param alt_sdp: if not found in cache._sdp, look in other repositories
:return:
"""
with self._lock:
return self._get(key)
return self._get(key, alt_sdp)
def alt_get(self, key):
"""
Alternate way to get an entry, from concept cache
This is mainly used for IncCache, in order to get the value without increasing it
It used for another cache, it must return the value from key WITHOUT modifying the state of the cache
:param key:
:return:
"""
with self._lock:
return self._alt_get(key)
def get_all(self):
"""
@@ -98,36 +137,63 @@ class BaseCache:
def inner_get(self, key):
return self._cache[key]
def update(self, old_key, old_value, new_key, new_value):
def update(self, old_key, old_value, new_key, new_value, alt_sdp=None):
"""
Update an entry in the cache
:param old_key: key of the previous version of the entry
:param old_value: previous version of the entry
:param new_key: key of the entry
:param new_value: new value
:param alt_sdp: new value
:return:
"""
with self._lock:
self._update(old_key, old_value, new_key, new_value)
self._update(old_key, old_value, new_key, new_value, alt_sdp)
def delete(self, key, value=None):
def delete(self, key, value=None, alt_sdp=None):
with self._lock:
try:
self._delete(key, value)
self._sync(key)
self._delete(key, value, alt_sdp)
return True
except KeyError:
pass
return False
def populate(self, populate_function, get_key_function):
def populate(self, populate_function, get_key_function, reset_events=False):
"""
Initialise the cache with a bunch of data
:param populate_function:
:param get_key_function:
:param reset_events:
:return:
"""
with self._lock:
for item in populate_function():
if reset_events:
to_add_copy = self.to_add.copy()
to_remove_copy = self.to_remove.copy()
for item in (populate_function(self._sdp) if self._sdp else populate_function()):
self.put(get_key_function(item), item)
if reset_events:
self.to_add = to_add_copy
self.to_remove = to_remove_copy
def force_value(self, key, value):
"""
Force a value into a key without raising any event
"""
with self._lock:
self._cache[key] = value
def remove_initialized_key(self, key):
"""
When a value is requested by alt_sdp, we should not keep track of the request
As the outcome is not known
"""
with self._lock:
self._initialized_keys.remove(key)
def has(self, key):
"""
Return True if the key is in the cache
@@ -149,7 +215,10 @@ class BaseCache:
if key in self._cache:
return True
return self._extend_exists(key) if self._extend_exists else False
if self._extend_exists:
return self._extend_exists(self._sdp, key) if self._sdp else self._extend_exists(key)
else:
return False
def evict(self, nb_items):
"""
@@ -195,13 +264,16 @@ class BaseCache:
return len(to_delete)
def clear(self):
def clear(self, set_is_cleared=True):
with self._lock:
# Seems that remote sdp is not correctly updated
self._cache.clear()
self._current_size = 0
self._initialized_keys.clear()
self.to_add.clear()
self.to_remove.clear()
if set_is_cleared:
self._is_cleared = True
def dump(self):
with self._lock:
@@ -225,9 +297,32 @@ class BaseCache:
self.to_add.clear()
self.to_remove.clear()
def reset_initialized_keys(self):
"""
Use when an ontology is put back. Reset all the previous requests as alt_sdp is a new one
"""
with self._lock:
self._initialized_keys.clear()
def is_cleared(self):
with self._lock:
return self._is_cleared
def clone(self):
return type(self)(self._max_size, self._default, self._extend_exists, self._alt_sdp_get, self._sdp)
def test_only_reset(self):
"""
Clears the cache, but does not set is_cleared to True
It's a convenient way to clear the cache without altering alt_sdp behaviour
"""
self.clear(set_is_cleared=False)
def _sync(self, *keys):
# KSI 2020-12-29. DO not try to use alt_sdp here
# Sync must only sync with the current sdp
for key in keys:
if key not in self._initialized_keys and self._default:
if key not in self._initialized_keys and callable(self._default):
# to keep sync with the remote repo is needed
# first check self._initialized_keys to prevent infinite loop
self.get(key)
@@ -246,7 +341,7 @@ class BaseCache:
except KeyError:
pass
def _get(self, key):
def _get(self, key, alt_sdp=None):
try:
value = self._cache[key]
except KeyError:
@@ -254,11 +349,18 @@ class BaseCache:
self._initialized_keys.clear()
if callable(self._default):
if key in self._initialized_keys:
return None
# it means that we have already asked the repository
return NotFound
value = self._default(key)
if value is not None:
self._cache[key] = value
simple_copy = True
value = self._default(self._sdp, key) if self._sdp else self._default(key)
if value is NotFound and alt_sdp and not self._is_cleared:
value = self._alt_sdp_get(alt_sdp, key)
simple_copy = False
if value is not NotFound:
self._cache[key] = value if simple_copy else sheerka_deepcopy(value)
value = self._cache[key]
# update _current_size
if isinstance(value, (list, set)):
@@ -271,11 +373,14 @@ class BaseCache:
return value
def _put(self, key, value):
def _alt_get(self, key):
return self._get(key) # by default, point to _get
def _put(self, key, value, alt_sdp):
pass
def _update(self, old_key, old_value, new_key, new_value):
def _update(self, old_key, old_value, new_key, new_value, alt_sdp):
pass
def _delete(self, key, value):
def _delete(self, key, value, alt_sdp):
raise NotImplementedError()