945807b375
Fixed #74 : Keyword parameters are no longer recognized when a concept that redefines equality is created Fixed #118 : RecursionError: maximum recursion depth exceeded Fixed #119 : PreventCircularReferenceEvaluator Fixed #121 : Plural are not updated when new elements are added Fixed #123 : BaseCache : Values in cache can be evicted before being committed Fixed #105 : TOO_MANY_ERROR is not the relevant error when results are filtered
397 lines
12 KiB
Python
397 lines
12 KiB
Python
from threading import RLock
|
|
|
|
from core.global_symbols import NotFound
|
|
from core.utils import sheerka_deepcopy
|
|
|
|
MAX_INITIALIZED_KEY = 100
|
|
|
|
|
|
class BaseCache:
|
|
"""
|
|
An in memory FIFO cache object
|
|
When the max_size is reach the first element that was put is removed
|
|
When you put the same key twice, the previous element is overridden
|
|
"""
|
|
|
|
def __init__(self, max_size=None, default=NotFound, extend_exists=None, alt_sdp_get=None, sdp=None):
|
|
self._cache = {}
|
|
self._max_size = max_size
|
|
self._default = default # default value to return when key is not found. It can be a callable of key
|
|
self._extend_exists = extend_exists # search in remote
|
|
self._alt_sdp_get = alt_sdp_get # How to get the value when called by alt_sdp
|
|
self._sdp = sdp # current instance of SheerkaDataProvider
|
|
self._lock = RLock()
|
|
self._current_size = 0
|
|
self._initialized_keys = set() # to keep the list of the keys already requested (using get())
|
|
self._is_cleared = False # indicate that clear() was called
|
|
|
|
self.to_add = set()
|
|
self.to_remove = set()
|
|
|
|
# Explanation on _initialized_keys
|
|
# everytime you try to get an item, its key is added to _initialized_keys
|
|
# If the item is found, the entru is i
|
|
|
|
def __len__(self):
|
|
"""
|
|
Return the number of items in the cache
|
|
:return:
|
|
"""
|
|
with self._lock:
|
|
return self._current_size
|
|
|
|
def __contains__(self, key):
|
|
with self._lock:
|
|
return key in self._cache
|
|
|
|
def __iter__(self):
|
|
with self._lock:
|
|
keys = self._cache.copy()
|
|
yield from keys
|
|
|
|
def __next__(self):
|
|
return next(iter(self._cache))
|
|
|
|
def __repr__(self):
|
|
return f"{self.__class__.__name__}(size={self._current_size}, #keys={len(self._cache)})"
|
|
|
|
def configure(self, max_size=None, default=NotFound, extend_exists=None, alt_sdp_get=None, sdp=None):
|
|
if max_size is not None:
|
|
self._max_size = max_size
|
|
|
|
if default is not NotFound:
|
|
self._default = default
|
|
|
|
if extend_exists is not None:
|
|
self._extend_exists = extend_exists
|
|
|
|
if alt_sdp_get is not None:
|
|
self._alt_sdp_get = alt_sdp_get
|
|
|
|
if sdp is not None:
|
|
self._sdp = sdp
|
|
|
|
return self
|
|
|
|
def auto_configure(self, cache_name):
|
|
"""
|
|
Convenient way to configure the cache
|
|
:param cache_name:
|
|
:return:
|
|
"""
|
|
self._default = lambda sdp, key: sdp.get(cache_name, key)
|
|
self._extend_exists = lambda sdp, key: sdp.exists(cache_name, key)
|
|
self._alt_sdp_get = lambda sdp, key: sdp.alt_get(cache_name, key) # by default, same than get
|
|
|
|
return self
|
|
|
|
def disable_default(self):
|
|
self._default = (lambda sdp, key: NotFound) if self._sdp else (lambda key: NotFound)
|
|
|
|
def put(self, key, value, alt_sdp=None):
|
|
"""
|
|
Add a new entry in cache
|
|
:param key:
|
|
:param value:
|
|
:param alt_sdp:
|
|
:return:
|
|
"""
|
|
with self._lock:
|
|
if self._put(key, value, alt_sdp):
|
|
self._current_size += 1
|
|
|
|
def get(self, key, alt_sdp=None):
|
|
"""
|
|
Retrieve an entry from the cache
|
|
If the entry does not exist, will use the 'default' value or delegate
|
|
:param key:
|
|
:param alt_sdp: if not found in cache._sdp, look in other repositories
|
|
:return:
|
|
"""
|
|
with self._lock:
|
|
return self._get(key, alt_sdp)
|
|
|
|
def alt_get(self, key):
|
|
"""
|
|
Alternate way to get an entry, from concept cache
|
|
This is mainly used for IncCache, in order to get the value without increasing it
|
|
It used for another cache, it must return the value from key WITHOUT modifying the state of the cache
|
|
:param key:
|
|
:return:
|
|
"""
|
|
with self._lock:
|
|
return self._alt_get(key)
|
|
|
|
def get_all(self):
|
|
"""
|
|
Retrieve all items already in cache
|
|
This method does not fetch in the remoter repository
|
|
:return:
|
|
"""
|
|
with self._lock:
|
|
return self._cache.values()
|
|
|
|
def inner_get(self, key):
|
|
return self._cache[key]
|
|
|
|
def update(self, old_key, old_value, new_key, new_value, alt_sdp=None):
|
|
"""
|
|
Update an entry in the cache
|
|
:param old_key: key of the previous version of the entry
|
|
:param old_value: previous version of the entry
|
|
:param new_key: key of the entry
|
|
:param new_value: new value
|
|
:param alt_sdp: new value
|
|
:return:
|
|
"""
|
|
with self._lock:
|
|
self._update(old_key, old_value, new_key, new_value, alt_sdp)
|
|
|
|
def delete(self, key, value=None, alt_sdp=None):
|
|
with self._lock:
|
|
try:
|
|
self._sync(key)
|
|
self._delete(key, value, alt_sdp)
|
|
return True
|
|
except KeyError:
|
|
return False
|
|
|
|
def populate(self, populate_function, get_key_function, reset_events=False):
|
|
"""
|
|
Initialise the cache with a bunch of data
|
|
:param populate_function:
|
|
:param get_key_function:
|
|
:param reset_events:
|
|
:return:
|
|
"""
|
|
with self._lock:
|
|
if reset_events:
|
|
to_add_copy = self.to_add.copy()
|
|
to_remove_copy = self.to_remove.copy()
|
|
|
|
for item in (populate_function(self._sdp) if self._sdp else populate_function()):
|
|
self.put(get_key_function(item), item)
|
|
|
|
if reset_events:
|
|
self.to_add = to_add_copy
|
|
self.to_remove = to_remove_copy
|
|
|
|
def force_value(self, key, value):
|
|
"""
|
|
Force a value into a key without raising any event
|
|
"""
|
|
with self._lock:
|
|
self._cache[key] = value
|
|
|
|
def remove_initialized_key(self, key):
|
|
"""
|
|
When a value is requested by alt_sdp, we should not keep track of the request
|
|
As the outcome is not known
|
|
"""
|
|
with self._lock:
|
|
self._initialized_keys.remove(key)
|
|
|
|
def has(self, key):
|
|
"""
|
|
Return True if the key is in the cache
|
|
Never use extend_exist
|
|
:param key:
|
|
:return:
|
|
"""
|
|
with self._lock:
|
|
return key in self._cache
|
|
|
|
def exists(self, key):
|
|
"""
|
|
Return True if the key is in the cache
|
|
Can use extend_exist
|
|
:param key:
|
|
:return:
|
|
"""
|
|
with self._lock:
|
|
if key in self._cache:
|
|
return True
|
|
|
|
if self._extend_exists:
|
|
return self._extend_exists(self._sdp, key) if self._sdp else self._extend_exists(key)
|
|
else:
|
|
return False
|
|
|
|
def evict(self, nb_items):
|
|
"""
|
|
Remove nb_items from the cache, using the replacement policy
|
|
:return:
|
|
"""
|
|
with self._lock:
|
|
nb_items = self._current_size if self._current_size < nb_items else nb_items
|
|
to_remove = []
|
|
iter_cache = iter(self._cache)
|
|
try:
|
|
while nb_items > 0:
|
|
key = next(iter_cache)
|
|
if key in self.to_add or key in self.to_remove:
|
|
continue # cannot remove an item that is not yet committed
|
|
else:
|
|
to_remove.append(key)
|
|
nb_items -= 1
|
|
except StopIteration:
|
|
pass
|
|
|
|
for key in to_remove:
|
|
del (self._cache[key])
|
|
try:
|
|
self._initialized_keys.remove(key)
|
|
except KeyError:
|
|
pass
|
|
|
|
self._current_size -= len(to_remove)
|
|
|
|
return len(to_remove)
|
|
|
|
def evict_by_key(self, predicate):
|
|
"""
|
|
Remove entries that matches the predicate
|
|
:param predicate:
|
|
:return:
|
|
"""
|
|
to_delete = []
|
|
with self._lock:
|
|
for key in self._cache:
|
|
if predicate(key):
|
|
to_delete.append(key)
|
|
|
|
for key in to_delete:
|
|
del (self._cache[key])
|
|
try:
|
|
self._initialized_keys.remove(key)
|
|
except KeyError:
|
|
pass
|
|
|
|
self._current_size -= len(to_delete)
|
|
|
|
return len(to_delete)
|
|
|
|
def clear(self, set_is_cleared=True):
|
|
with self._lock:
|
|
# Seems that remote sdp is not correctly updated
|
|
self._cache.clear()
|
|
self._current_size = 0
|
|
self._initialized_keys.clear()
|
|
self.to_add.clear()
|
|
self.to_remove.clear()
|
|
if set_is_cleared:
|
|
self._is_cleared = True
|
|
|
|
def dump(self):
|
|
with self._lock:
|
|
return {
|
|
"current_size": self._current_size,
|
|
"cache": self._cache.copy()
|
|
}
|
|
|
|
def copy(self):
|
|
with self._lock:
|
|
return self._cache.copy()
|
|
|
|
def init_from_dump(self, dump):
|
|
with self._lock:
|
|
self._current_size = dump["current_size"]
|
|
self._cache = dump["cache"].copy()
|
|
return self
|
|
|
|
def reset_events(self):
|
|
with self._lock:
|
|
self.to_add.clear()
|
|
self.to_remove.clear()
|
|
|
|
def reset_initialized_keys(self):
|
|
"""
|
|
Use when an ontology is put back. Reset all the previous requests as alt_sdp is a new one
|
|
"""
|
|
with self._lock:
|
|
self._initialized_keys.clear()
|
|
|
|
def is_cleared(self):
|
|
with self._lock:
|
|
return self._is_cleared
|
|
|
|
def clone(self):
|
|
return type(self)(self._max_size, self._default, self._extend_exists, self._alt_sdp_get, self._sdp)
|
|
|
|
def test_only_reset(self):
|
|
"""
|
|
Clears the cache, but does not set is_cleared to True
|
|
It's a convenient way to clear the cache without altering alt_sdp behaviour
|
|
"""
|
|
self.clear(set_is_cleared=False)
|
|
|
|
def _sync(self, *keys):
|
|
# KSI 2020-12-29. DO not try to use alt_sdp here
|
|
# Sync must only sync with the current sdp
|
|
for key in keys:
|
|
if key not in self._initialized_keys and callable(self._default):
|
|
# to keep sync with the remote repo is needed
|
|
# first check self._initialized_keys to prevent infinite loop
|
|
self.get(key)
|
|
|
|
def _add_to_add(self, key):
|
|
self.to_add.add(key)
|
|
try:
|
|
self.to_remove.remove(key)
|
|
except KeyError:
|
|
pass
|
|
|
|
def _add_to_remove(self, key):
|
|
self.to_remove.add(key)
|
|
try:
|
|
self.to_add.remove(key)
|
|
except KeyError:
|
|
pass
|
|
|
|
def _get(self, key, alt_sdp=None):
|
|
try:
|
|
value = self._cache[key]
|
|
except KeyError:
|
|
if len(self._initialized_keys) == MAX_INITIALIZED_KEY:
|
|
self._initialized_keys.clear()
|
|
if callable(self._default):
|
|
if key in self._initialized_keys:
|
|
# it means that we have already asked the repository
|
|
return NotFound
|
|
|
|
simple_copy = True
|
|
value = self._default(self._sdp, key) if self._sdp else self._default(key)
|
|
if value is NotFound and alt_sdp and not self._is_cleared:
|
|
value = self._alt_sdp_get(alt_sdp, key)
|
|
simple_copy = False
|
|
|
|
if value is not NotFound:
|
|
value = value if simple_copy else sheerka_deepcopy(value)
|
|
self._cache[key] = value
|
|
|
|
# update _current_size
|
|
if isinstance(value, (list, set)):
|
|
self._current_size += len(value)
|
|
else:
|
|
self._current_size += 1
|
|
|
|
if self._max_size and self._current_size > self._max_size:
|
|
self.evict(self._current_size - self._max_size)
|
|
else:
|
|
value = self._default
|
|
self._initialized_keys.add(key)
|
|
|
|
return value
|
|
|
|
def _alt_get(self, key):
|
|
return self._get(key) # by default, point to _get
|
|
|
|
def _put(self, key, value, alt_sdp):
|
|
pass
|
|
|
|
def _update(self, old_key, old_value, new_key, new_value, alt_sdp):
|
|
pass
|
|
|
|
def _delete(self, key, value, alt_sdp):
|
|
raise NotImplementedError("_delete BaseCache")
|