Files
Sheerka-Old/src/cache/BaseCache.py
T
kodjo 945807b375 Fixed #72 : Exception when get_results(id=10)
Fixed #74 : Keyword parameters are no longer recognized when a concept that redefines equality is created
Fixed #118 : RecursionError: maximum recursion depth exceeded
Fixed #119 : PreventCircularReferenceEvaluator
Fixed #121 : Plural are not updated when new elements are added
Fixed #123 : BaseCache : Values in cache can be evicted before being committed
Fixed #105 : TOO_MANY_ERROR is not the relevant error when results are filtered
2021-09-09 10:57:01 +02:00

397 lines
12 KiB
Python

from threading import RLock
from core.global_symbols import NotFound
from core.utils import sheerka_deepcopy
MAX_INITIALIZED_KEY = 100
class BaseCache:
"""
An in memory FIFO cache object
When the max_size is reach the first element that was put is removed
When you put the same key twice, the previous element is overridden
"""
def __init__(self, max_size=None, default=NotFound, extend_exists=None, alt_sdp_get=None, sdp=None):
self._cache = {}
self._max_size = max_size
self._default = default # default value to return when key is not found. It can be a callable of key
self._extend_exists = extend_exists # search in remote
self._alt_sdp_get = alt_sdp_get # How to get the value when called by alt_sdp
self._sdp = sdp # current instance of SheerkaDataProvider
self._lock = RLock()
self._current_size = 0
self._initialized_keys = set() # to keep the list of the keys already requested (using get())
self._is_cleared = False # indicate that clear() was called
self.to_add = set()
self.to_remove = set()
# Explanation on _initialized_keys
# everytime you try to get an item, its key is added to _initialized_keys
# If the item is found, the entru is i
def __len__(self):
"""
Return the number of items in the cache
:return:
"""
with self._lock:
return self._current_size
def __contains__(self, key):
with self._lock:
return key in self._cache
def __iter__(self):
with self._lock:
keys = self._cache.copy()
yield from keys
def __next__(self):
return next(iter(self._cache))
def __repr__(self):
return f"{self.__class__.__name__}(size={self._current_size}, #keys={len(self._cache)})"
def configure(self, max_size=None, default=NotFound, extend_exists=None, alt_sdp_get=None, sdp=None):
if max_size is not None:
self._max_size = max_size
if default is not NotFound:
self._default = default
if extend_exists is not None:
self._extend_exists = extend_exists
if alt_sdp_get is not None:
self._alt_sdp_get = alt_sdp_get
if sdp is not None:
self._sdp = sdp
return self
def auto_configure(self, cache_name):
"""
Convenient way to configure the cache
:param cache_name:
:return:
"""
self._default = lambda sdp, key: sdp.get(cache_name, key)
self._extend_exists = lambda sdp, key: sdp.exists(cache_name, key)
self._alt_sdp_get = lambda sdp, key: sdp.alt_get(cache_name, key) # by default, same than get
return self
def disable_default(self):
self._default = (lambda sdp, key: NotFound) if self._sdp else (lambda key: NotFound)
def put(self, key, value, alt_sdp=None):
"""
Add a new entry in cache
:param key:
:param value:
:param alt_sdp:
:return:
"""
with self._lock:
if self._put(key, value, alt_sdp):
self._current_size += 1
def get(self, key, alt_sdp=None):
"""
Retrieve an entry from the cache
If the entry does not exist, will use the 'default' value or delegate
:param key:
:param alt_sdp: if not found in cache._sdp, look in other repositories
:return:
"""
with self._lock:
return self._get(key, alt_sdp)
def alt_get(self, key):
"""
Alternate way to get an entry, from concept cache
This is mainly used for IncCache, in order to get the value without increasing it
It used for another cache, it must return the value from key WITHOUT modifying the state of the cache
:param key:
:return:
"""
with self._lock:
return self._alt_get(key)
def get_all(self):
"""
Retrieve all items already in cache
This method does not fetch in the remoter repository
:return:
"""
with self._lock:
return self._cache.values()
def inner_get(self, key):
return self._cache[key]
def update(self, old_key, old_value, new_key, new_value, alt_sdp=None):
"""
Update an entry in the cache
:param old_key: key of the previous version of the entry
:param old_value: previous version of the entry
:param new_key: key of the entry
:param new_value: new value
:param alt_sdp: new value
:return:
"""
with self._lock:
self._update(old_key, old_value, new_key, new_value, alt_sdp)
def delete(self, key, value=None, alt_sdp=None):
with self._lock:
try:
self._sync(key)
self._delete(key, value, alt_sdp)
return True
except KeyError:
return False
def populate(self, populate_function, get_key_function, reset_events=False):
"""
Initialise the cache with a bunch of data
:param populate_function:
:param get_key_function:
:param reset_events:
:return:
"""
with self._lock:
if reset_events:
to_add_copy = self.to_add.copy()
to_remove_copy = self.to_remove.copy()
for item in (populate_function(self._sdp) if self._sdp else populate_function()):
self.put(get_key_function(item), item)
if reset_events:
self.to_add = to_add_copy
self.to_remove = to_remove_copy
def force_value(self, key, value):
"""
Force a value into a key without raising any event
"""
with self._lock:
self._cache[key] = value
def remove_initialized_key(self, key):
"""
When a value is requested by alt_sdp, we should not keep track of the request
As the outcome is not known
"""
with self._lock:
self._initialized_keys.remove(key)
def has(self, key):
"""
Return True if the key is in the cache
Never use extend_exist
:param key:
:return:
"""
with self._lock:
return key in self._cache
def exists(self, key):
"""
Return True if the key is in the cache
Can use extend_exist
:param key:
:return:
"""
with self._lock:
if key in self._cache:
return True
if self._extend_exists:
return self._extend_exists(self._sdp, key) if self._sdp else self._extend_exists(key)
else:
return False
def evict(self, nb_items):
"""
Remove nb_items from the cache, using the replacement policy
:return:
"""
with self._lock:
nb_items = self._current_size if self._current_size < nb_items else nb_items
to_remove = []
iter_cache = iter(self._cache)
try:
while nb_items > 0:
key = next(iter_cache)
if key in self.to_add or key in self.to_remove:
continue # cannot remove an item that is not yet committed
else:
to_remove.append(key)
nb_items -= 1
except StopIteration:
pass
for key in to_remove:
del (self._cache[key])
try:
self._initialized_keys.remove(key)
except KeyError:
pass
self._current_size -= len(to_remove)
return len(to_remove)
def evict_by_key(self, predicate):
"""
Remove entries that matches the predicate
:param predicate:
:return:
"""
to_delete = []
with self._lock:
for key in self._cache:
if predicate(key):
to_delete.append(key)
for key in to_delete:
del (self._cache[key])
try:
self._initialized_keys.remove(key)
except KeyError:
pass
self._current_size -= len(to_delete)
return len(to_delete)
def clear(self, set_is_cleared=True):
with self._lock:
# Seems that remote sdp is not correctly updated
self._cache.clear()
self._current_size = 0
self._initialized_keys.clear()
self.to_add.clear()
self.to_remove.clear()
if set_is_cleared:
self._is_cleared = True
def dump(self):
with self._lock:
return {
"current_size": self._current_size,
"cache": self._cache.copy()
}
def copy(self):
with self._lock:
return self._cache.copy()
def init_from_dump(self, dump):
with self._lock:
self._current_size = dump["current_size"]
self._cache = dump["cache"].copy()
return self
def reset_events(self):
with self._lock:
self.to_add.clear()
self.to_remove.clear()
def reset_initialized_keys(self):
"""
Use when an ontology is put back. Reset all the previous requests as alt_sdp is a new one
"""
with self._lock:
self._initialized_keys.clear()
def is_cleared(self):
with self._lock:
return self._is_cleared
def clone(self):
return type(self)(self._max_size, self._default, self._extend_exists, self._alt_sdp_get, self._sdp)
def test_only_reset(self):
"""
Clears the cache, but does not set is_cleared to True
It's a convenient way to clear the cache without altering alt_sdp behaviour
"""
self.clear(set_is_cleared=False)
def _sync(self, *keys):
# KSI 2020-12-29. DO not try to use alt_sdp here
# Sync must only sync with the current sdp
for key in keys:
if key not in self._initialized_keys and callable(self._default):
# to keep sync with the remote repo is needed
# first check self._initialized_keys to prevent infinite loop
self.get(key)
def _add_to_add(self, key):
self.to_add.add(key)
try:
self.to_remove.remove(key)
except KeyError:
pass
def _add_to_remove(self, key):
self.to_remove.add(key)
try:
self.to_add.remove(key)
except KeyError:
pass
def _get(self, key, alt_sdp=None):
try:
value = self._cache[key]
except KeyError:
if len(self._initialized_keys) == MAX_INITIALIZED_KEY:
self._initialized_keys.clear()
if callable(self._default):
if key in self._initialized_keys:
# it means that we have already asked the repository
return NotFound
simple_copy = True
value = self._default(self._sdp, key) if self._sdp else self._default(key)
if value is NotFound and alt_sdp and not self._is_cleared:
value = self._alt_sdp_get(alt_sdp, key)
simple_copy = False
if value is not NotFound:
value = value if simple_copy else sheerka_deepcopy(value)
self._cache[key] = value
# update _current_size
if isinstance(value, (list, set)):
self._current_size += len(value)
else:
self._current_size += 1
if self._max_size and self._current_size > self._max_size:
self.evict(self._current_size - self._max_size)
else:
value = self._default
self._initialized_keys.add(key)
return value
def _alt_get(self, key):
return self._get(key) # by default, point to _get
def _put(self, key, value, alt_sdp):
pass
def _update(self, old_key, old_value, new_key, new_value, alt_sdp):
pass
def _delete(self, key, value, alt_sdp):
raise NotImplementedError("_delete BaseCache")