from threading import RLock from core.global_symbols import NotFound from core.utils import sheerka_deepcopy MAX_INITIALIZED_KEY = 100 class BaseCache: """ An in memory FIFO cache object When the max_size is reach the first element that was put is removed When you put the same key twice, the previous element is overridden """ def __init__(self, max_size=None, default=NotFound, extend_exists=None, alt_sdp_get=None, sdp=None): self._cache = {} self._max_size = max_size self._default = default # default value to return when key is not found. It can be a callable of key self._extend_exists = extend_exists # search in remote self._alt_sdp_get = alt_sdp_get # How to get the value when called by alt_sdp self._sdp = sdp # current instance of SheerkaDataProvider self._lock = RLock() self._current_size = 0 self._initialized_keys = set() # to keep the list of the keys already requested (using get()) self._is_cleared = False # indicate that clear() was called self.to_add = set() self.to_remove = set() # Explanation on _initialized_keys # everytime you try to get an item, its key is added to _initialized_keys # If the item is found, the entru is i def __len__(self): """ Return the number of items in the cache :return: """ with self._lock: return self._current_size def __contains__(self, key): with self._lock: return key in self._cache def __iter__(self): with self._lock: keys = self._cache.copy() yield from keys def __next__(self): return next(iter(self._cache)) def __repr__(self): return f"{self.__class__.__name__}(size={self._current_size}, #keys={len(self._cache)})" def configure(self, max_size=None, default=NotFound, extend_exists=None, alt_sdp_get=None, sdp=None): if max_size is not None: self._max_size = max_size if default is not NotFound: self._default = default if extend_exists is not None: self._extend_exists = extend_exists if alt_sdp_get is not None: self._alt_sdp_get = alt_sdp_get if sdp is not None: self._sdp = sdp return self def auto_configure(self, cache_name): """ Convenient way to configure the cache :param cache_name: :return: """ self._default = lambda sdp, key: sdp.get(cache_name, key) self._extend_exists = lambda sdp, key: sdp.exists(cache_name, key) self._alt_sdp_get = lambda sdp, key: sdp.alt_get(cache_name, key) # by default, same than get return self def disable_default(self): self._default = (lambda sdp, key: NotFound) if self._sdp else (lambda key: NotFound) def put(self, key, value, alt_sdp=None): """ Add a new entry in cache :param key: :param value: :param alt_sdp: :return: """ with self._lock: if self._put(key, value, alt_sdp): self._current_size += 1 def get(self, key, alt_sdp=None): """ Retrieve an entry from the cache If the entry does not exist, will use the 'default' value or delegate :param key: :param alt_sdp: if not found in cache._sdp, look in other repositories :return: """ with self._lock: return self._get(key, alt_sdp) def alt_get(self, key): """ Alternate way to get an entry, from concept cache This is mainly used for IncCache, in order to get the value without increasing it It used for another cache, it must return the value from key WITHOUT modifying the state of the cache :param key: :return: """ with self._lock: return self._alt_get(key) def get_all(self): """ Retrieve all items already in cache This method does not fetch in the remoter repository :return: """ with self._lock: return self._cache.values() def inner_get(self, key): return self._cache[key] def update(self, old_key, old_value, new_key, new_value, alt_sdp=None): """ Update an entry in the cache :param old_key: key of the previous version of the entry :param old_value: previous version of the entry :param new_key: key of the entry :param new_value: new value :param alt_sdp: new value :return: """ with self._lock: self._update(old_key, old_value, new_key, new_value, alt_sdp) def delete(self, key, value=None, alt_sdp=None): with self._lock: try: self._sync(key) self._delete(key, value, alt_sdp) return True except KeyError: return False def populate(self, populate_function, get_key_function, reset_events=False): """ Initialise the cache with a bunch of data :param populate_function: :param get_key_function: :param reset_events: :return: """ with self._lock: if reset_events: to_add_copy = self.to_add.copy() to_remove_copy = self.to_remove.copy() for item in (populate_function(self._sdp) if self._sdp else populate_function()): self.put(get_key_function(item), item) if reset_events: self.to_add = to_add_copy self.to_remove = to_remove_copy def force_value(self, key, value): """ Force a value into a key without raising any event """ with self._lock: self._cache[key] = value def remove_initialized_key(self, key): """ When a value is requested by alt_sdp, we should not keep track of the request As the outcome is not known """ with self._lock: self._initialized_keys.remove(key) def has(self, key): """ Return True if the key is in the cache Never use extend_exist :param key: :return: """ with self._lock: return key in self._cache def exists(self, key): """ Return True if the key is in the cache Can use extend_exist :param key: :return: """ with self._lock: if key in self._cache: return True if self._extend_exists: return self._extend_exists(self._sdp, key) if self._sdp else self._extend_exists(key) else: return False def evict(self, nb_items): """ Remove nb_items from the cache, using the replacement policy :return: """ with self._lock: nb_items = self._current_size if self._current_size < nb_items else nb_items to_remove = [] iter_cache = iter(self._cache) try: while nb_items > 0: key = next(iter_cache) if key in self.to_add or key in self.to_remove: continue # cannot remove an item that is not yet committed else: to_remove.append(key) nb_items -= 1 except StopIteration: pass for key in to_remove: del (self._cache[key]) try: self._initialized_keys.remove(key) except KeyError: pass self._current_size -= len(to_remove) return len(to_remove) def evict_by_key(self, predicate): """ Remove entries that matches the predicate :param predicate: :return: """ to_delete = [] with self._lock: for key in self._cache: if predicate(key): to_delete.append(key) for key in to_delete: del (self._cache[key]) try: self._initialized_keys.remove(key) except KeyError: pass self._current_size -= len(to_delete) return len(to_delete) def clear(self, set_is_cleared=True): with self._lock: # Seems that remote sdp is not correctly updated self._cache.clear() self._current_size = 0 self._initialized_keys.clear() self.to_add.clear() self.to_remove.clear() if set_is_cleared: self._is_cleared = True def dump(self): with self._lock: return { "current_size": self._current_size, "cache": self._cache.copy() } def copy(self): with self._lock: return self._cache.copy() def init_from_dump(self, dump): with self._lock: self._current_size = dump["current_size"] self._cache = dump["cache"].copy() return self def reset_events(self): with self._lock: self.to_add.clear() self.to_remove.clear() def reset_initialized_keys(self): """ Use when an ontology is put back. Reset all the previous requests as alt_sdp is a new one """ with self._lock: self._initialized_keys.clear() def is_cleared(self): with self._lock: return self._is_cleared def clone(self): return type(self)(self._max_size, self._default, self._extend_exists, self._alt_sdp_get, self._sdp) def test_only_reset(self): """ Clears the cache, but does not set is_cleared to True It's a convenient way to clear the cache without altering alt_sdp behaviour """ self.clear(set_is_cleared=False) def _sync(self, *keys): # KSI 2020-12-29. DO not try to use alt_sdp here # Sync must only sync with the current sdp for key in keys: if key not in self._initialized_keys and callable(self._default): # to keep sync with the remote repo is needed # first check self._initialized_keys to prevent infinite loop self.get(key) def _add_to_add(self, key): self.to_add.add(key) try: self.to_remove.remove(key) except KeyError: pass def _add_to_remove(self, key): self.to_remove.add(key) try: self.to_add.remove(key) except KeyError: pass def _get(self, key, alt_sdp=None): try: value = self._cache[key] except KeyError: if len(self._initialized_keys) == MAX_INITIALIZED_KEY: self._initialized_keys.clear() if callable(self._default): if key in self._initialized_keys: # it means that we have already asked the repository return NotFound simple_copy = True value = self._default(self._sdp, key) if self._sdp else self._default(key) if value is NotFound and alt_sdp and not self._is_cleared: value = self._alt_sdp_get(alt_sdp, key) simple_copy = False if value is not NotFound: value = value if simple_copy else sheerka_deepcopy(value) self._cache[key] = value # update _current_size if isinstance(value, (list, set)): self._current_size += len(value) else: self._current_size += 1 if self._max_size and self._current_size > self._max_size: self.evict(self._current_size - self._max_size) else: value = self._default self._initialized_keys.add(key) return value def _alt_get(self, key): return self._get(key) # by default, point to _get def _put(self, key, value, alt_sdp): pass def _update(self, old_key, old_value, new_key, new_value, alt_sdp): pass def _delete(self, key, value, alt_sdp): raise NotImplementedError("_delete BaseCache")