Refactored Caching, Refactored BnfNodeParser, Introduced Sphinx

This commit is contained in:
2020-05-12 17:21:10 +02:00
parent 7d3a490bc5
commit 6e343ba996
110 changed files with 13865 additions and 7540 deletions
+241
View File
@@ -0,0 +1,241 @@
from threading import RLock
class BaseCache:
"""
An in memory FIFO cache object
When the max_size is reach the first element that was put is removed
When you put the same key twice, the previous element is overridden
"""
def __init__(self, max_size=None, default=None, extend_exists=None):
self._cache = {}
self._max_size = max_size
self._default = default # default value to return when key is not found. It can be a callable of key
self._extend_exists = extend_exists # search in remote
self._lock = RLock()
self._current_size = 0
self._initialized_keys = set()
self.to_add = set()
self.to_remove = set()
def __len__(self):
"""
Return the number of items in the cache
:return:
"""
with self._lock:
return self._current_size
def __contains__(self, key):
with self._lock:
return key in self._cache
def __iter__(self):
with self._lock:
keys = self._cache.copy()
yield from keys
def __next__(self):
return next(iter(self._cache))
def __repr__(self):
return f"{self.__class__.__name__}(size={self._current_size}, #keys={len(self._cache)})"
def configure(self, max_size=None, default=None, extend_exists=None):
if max_size is not None:
self._max_size = max_size
if default is not None:
self._default = default
if extend_exists is not None:
self._extend_exists = extend_exists
def disable_default(self):
self._default = None
def put(self, key, value):
"""
Add a new entry in cache
:param key:
:param value:
:return:
"""
with self._lock:
if self._max_size and self._current_size >= self._max_size:
self.evict(self._max_size - self._current_size + 1)
if self._put(key, value):
self._current_size += 1
def get(self, key):
"""
Retrieve an entry from the cache
If the entry does not exist, will use the 'default' value or delegate
:param key:
:return:
"""
with self._lock:
self._initialized_keys.add(key)
return self._get(key)
def inner_get(self, key):
return self._cache[key]
def update(self, old_key, old_value, new_key, new_value):
"""
Update an entry in the cache
:param old_key: key of the previous version of the entry
:param old_value: previous version of the entry
:param new_key: key of the entry
:param new_value: new value
:return:
"""
with self._lock:
self._update(old_key, old_value, new_key, new_value)
def delete(self, key, value=None):
with self._lock:
try:
self._delete(key, value)
except KeyError:
pass
def has(self, key):
"""
Return True if the key is in the cache
Never use extend_exist
:param key:
:return:
"""
with self._lock:
return key in self._cache
def exists(self, key):
"""
Return True if the key is in the cache
Can use extend_exist
:param key:
:return:
"""
with self._lock:
if key in self._cache:
return True
return self._extend_exists(key) if self._extend_exists else False
def evict(self, nb_items):
"""
Remove nb_items from the cache, using the replacement policy
:return:
"""
with self._lock:
nb_items = self._current_size if self._current_size < nb_items else nb_items
nb_to_delete = nb_items
while nb_items > 0:
key = next(iter(self._cache))
del (self._cache[key])
try:
self._initialized_keys.remove(key)
except KeyError:
pass
nb_items -= 1
self._current_size -= nb_to_delete
return nb_to_delete
def clear(self):
with self._lock:
self._cache.clear()
self._current_size = 0
self._initialized_keys.clear()
self.to_add.clear()
self.to_remove.clear()
def dump(self):
with self._lock:
return {
"current_size": self._current_size,
"cache": self._cache.copy()
}
def copy(self):
with self._lock:
return self._cache.copy()
def init_from(self, dump):
with self._lock:
self._current_size = dump["current_size"]
self._cache = dump["cache"].copy()
return self
def reset_events(self):
with self._lock:
self.to_add.clear()
self.to_remove.clear()
def _sync(self, *keys):
for key in keys:
if key not in self._initialized_keys and self._default:
# to keep sync with the remote repo is needed
self.get(key)
def _add_to_add(self, key):
self.to_add.add(key)
try:
self.to_remove.remove(key)
except KeyError:
pass
def _add_to_remove(self, key):
self.to_remove.add(key)
try:
self.to_add.remove(key)
except KeyError:
pass
def _get(self, key):
try:
value = self._cache[key]
except KeyError:
if callable(self._default):
value = self._default(key)
if value is not None:
self._cache[key] = value
# update _current_size
if isinstance(value, (list, set)):
self._current_size += len(value)
else:
self._current_size += 1
else:
value = self._default
return value
def _put(self, key, value):
pass
def _update(self, old_key, old_value, new_key, new_value):
pass
def _delete(self, key, value):
raise NotImplementedError()
# def _put(self, key, value):
# self._cache[key] = value
# self._add_to_add(key)
# return True
#
#
# def _update(self, old_key, old_value, new_key, new_value):
# self._cache[new_key] = new_value
# self._add_to_add(new_key)
#
# if new_key != old_key:
# del (self._cache[old_key])
# self._add_to_remove(old_key)
+31
View File
@@ -0,0 +1,31 @@
from threading import RLock
from cache.BaseCache import BaseCache
class Cache(BaseCache):
"""
An in memory FIFO cache object
When the max_size is reach the first element that was put is removed
When you put the same key twice, the previous element is overridden
"""
def _put(self, key, value):
res = key not in self._cache
self._cache[key] = value
self._add_to_add(key)
return res
def _update(self, old_key, old_value, new_key, new_value):
self._cache[new_key] = new_value
self._add_to_add(new_key)
if new_key != old_key:
self._sync(old_key)
del (self._cache[old_key])
self._add_to_remove(old_key)
def _delete(self, key, value):
del(self._cache[key])
self._add_to_remove(key)
+261
View File
@@ -0,0 +1,261 @@
from dataclasses import dataclass, field
from threading import RLock
from typing import Callable
from cache.Cache import Cache
from core.concept import Concept
class MultipleEntryError(Exception):
"""
Exception raised when trying to alter an entry with multiple element
without giving the origin of the element
"""
def __init__(self, key):
self.key = key
@dataclass
class CacheDefinition:
cache: Cache
use_ref: bool
get_key: Callable[[Concept], str] = field(repr=False)
persist: bool = True
class CacheManager:
"""
Single class to manage all the caches
"""
def __init__(self, cache_only):
self.cache_only = cache_only # if true disable all remote access when key not found
self.caches = {}
self.concept_caches = []
self.is_dirty = False # to indicate that the value of a cache has changed
self._lock = RLock()
def register_concept_cache(self, name, cache, get_key, use_ref):
"""
Define which type of cache along with how to compute the key
:param name:
:param cache:
:param get_key:
:param use_ref:
:return:
"""
with self._lock:
if self.cache_only:
cache.disable_default()
self.caches[name] = CacheDefinition(cache, use_ref, get_key)
self.concept_caches.append(name)
def register_cache(self, name, cache, persist=True, use_ref=False):
"""
Define which type of cache along with how to compute the key
:param name:
:param cache:
:param persist:
:param use_ref:
:return:
"""
with self._lock:
if self.cache_only:
cache.disable_default()
self.caches[name] = CacheDefinition(cache, use_ref, None, persist)
def add_concept(self, concept):
"""
We need multiple indexes to retrieve a concept
So the new concept is dispatched into multiple caches
:param concept:
:return:
"""
with self._lock:
for name in self.concept_caches:
cache_def = self.caches[name]
key = cache_def.get_key(concept)
cache_def.cache.put(key, concept)
self.is_dirty = True
def update_concept(self, old, new):
"""
Update a concept.
:param old: old version of the concept
:param new: new version of the concept
:return:
"""
with self._lock:
for cache_name in self.concept_caches:
cache_def = self.caches[cache_name]
old_key = cache_def.get_key(old)
new_key = cache_def.get_key(new)
cache_def.cache.update(old_key, old, new_key, new)
self.is_dirty = True
# how can you update an entry it the key may have changed ?
# You need to have an invariant. By convention the keys in the first cache cannot change
# with self._lock:
# iter_cache_def = iter(self.caches)
#
# cache_def = next(iter_cache_def)
# old_key = cache_def.get_key(concept)
#
# try:
# while True:
# items = cache_def.cache[old_key]
# if isinstance(items, (list, set)):
# for item in items:
# if item.id == concept.id:
# break
# else:
# raise IndexError(f"{old_key=}, id={concept.id}")
#
# cache_def.cache.update(old_key, item, cache_def.get_key(concept), concept)
#
# else:
# cache_def.cache.update(old_key, items, cache_def.get_key(concept), concept)
#
# cache_def = next(iter_cache_def)
# except StopIteration:
# pass
# self.is_dirty = True
def get(self, cache_name, key):
"""
From concept cache, get an entry
:param cache_name:
:param key:
:return:
"""
with self._lock:
return self.caches[cache_name].cache.get(key)
def copy(self, cache_name):
"""
get a copy the content of the whole cache as a dictionary
:param self:
:param cache_name:
:return:
"""
return self.caches[cache_name].cache.copy()
def put(self, cache_name, key, value):
"""
Add to a cache
:param cache_name:
:param key:
:param value:
:return:
"""
with self._lock:
self.caches[cache_name].cache.put(key, value)
self.is_dirty = True
def delete(self, cache_name, key, value=None):
"""
Delete an entry from the cache
:param cache_name:
:param key:
:param value:
:return:
"""
with self._lock:
self.caches[cache_name].cache.delete(key, value)
self.is_dirty = True
def has(self, cache_name, key):
"""
True if the value is in cache only. Never try to look in a remote repository
:param cache_name:
:param key:
:return:
"""
with self._lock:
return self.caches[cache_name].cache.has(key)
def exists(self, cache_name, key):
"""
True if the value is in cache.
If not found, may search in a remote repository
:param cache_name:
:param key:
:return:
"""
if self.cache_only:
return self.has(cache_name, key)
with self._lock:
return self.caches[cache_name].cache.exists(key)
def commit(self, context):
"""
Persist all the caches into a physical persistence storage
:param context:
:return:
"""
def update_full_serialisation(items, value):
# Take care, infinite recursion is not handled !!
if isinstance(items, (list, set, tuple)):
for item in items:
update_full_serialisation(item, value)
elif isinstance(items, dict):
for values in items.values():
update_full_serialisation(values, value)
elif isinstance(items, Concept):
items.metadata.full_serialization = value
if self.cache_only:
return
with self._lock:
with context.sheerka.sdp.get_transaction(context.event.get_digest()) as transaction:
for cache_name, cache_def in self.caches.items():
if not cache_def.persist:
continue
for key in cache_def.cache.to_remove:
transaction.remove(cache_name, key)
for key in cache_def.cache.to_add:
if key == "*self*":
transaction.add(cache_name, None, cache_def.cache.dump()["cache"])
else:
to_save = cache_def.cache.inner_get(key)
update_full_serialisation(to_save, True)
transaction.add(cache_name, key, to_save, cache_def.use_ref)
update_full_serialisation(to_save, False)
cache_def.cache.reset_events()
self.is_dirty = False
def clear(self, cache_name=None):
with self._lock:
if cache_name:
self.caches[cache_name].cache.clear()
else:
for cache_def in self.caches.values():
cache_def.cache.clear()
def dump(self):
with self._lock:
res = {}
for cache_name, cache_def in self.caches.items():
res[cache_name] = cache_def.cache.dump()
return res
def init_from(self, dump):
with self._lock:
for cache_name, content in dump.items():
if cache_name in self.caches:
self.caches[cache_name].cache.init_from(content)
return self
+53
View File
@@ -0,0 +1,53 @@
from cache.BaseCache import BaseCache
class DictionaryCache(BaseCache):
def _get(self, key):
"""
Management of the default is different
:param key:
:return:
"""
try:
value = self._cache[key]
return value
except KeyError:
if callable(self._default):
self._cache = self._default(key) or {}
else:
self._cache = self._default.copy() if self._default else {}
self._count_items()
return self._cache[key] if key in self._cache else None
def _put(self, key, value):
"""
Adds a whole dictionary
:param key: True to append, false to reset
:param value: dictionary
:return:
"""
if not isinstance(key, bool):
raise KeyError
if not isinstance(value, dict):
raise ValueError
if key:
if self._cache is None:
self._cache = value.copy()
else:
self._cache.update(value)
else:
self._cache = value
self._count_items()
# special meaning for to_add
self._add_to_add("*self*")
return False
def _count_items(self):
self._current_size = 0
for v in self._cache.values():
self._current_size += len(v) if hasattr(v, "__len__") and not isinstance(v, str) else 1
+18
View File
@@ -0,0 +1,18 @@
from cache.Cache import Cache
class IncCache(Cache):
"""
Increment the value of the key every time it's accessed
"""
def _get(self, key):
value = super()._get(key) or 0
value += 1
self._put(key, value)
return value
def _put(self, key, value):
self._cache[key] = value
self._add_to_add(key)
return True
+43
View File
@@ -0,0 +1,43 @@
from cache.Cache import BaseCache
class ListCache(BaseCache):
"""
An in memory FIFO cache object
When the max_size is reach the first element that was put is removed
Items of this cache are list
"""
def _put(self, key, value):
if key in self._cache:
self._cache[key].append(value)
else:
self._sync(key)
if key in self._cache:
self._cache[key].append(value)
else:
self._cache[key] = [value]
self._add_to_add(key)
return True
def _update(self, old_key, old_value, new_key, new_value):
self._sync(old_key, new_key)
if old_key != new_key:
self._cache[old_key].remove(old_value)
if len(self._cache[old_key]) == 0:
del (self._cache[old_key])
self._add_to_remove(old_key)
else:
self._add_to_add(old_key)
self._put(new_key, new_value)
self._add_to_add(new_key)
else:
for i in range(len(self._cache[new_key])):
if self._cache[new_key][i] == old_value:
self._cache[new_key][i] = new_value # avoid add and remove in dict
break # only the first one is affected
self._add_to_add(new_key)
+56
View File
@@ -0,0 +1,56 @@
from cache.Cache import BaseCache
class ListIfNeededCache(BaseCache):
"""
An in memory FIFO cache object
When the max_size is reach the first element that was put is removed
When you put the same key twice, you now have a list of two elements
"""
def _put(self, key, value):
if key in self._cache:
if isinstance(self._cache[key], list):
self._cache[key].append(value)
else:
self._cache[key] = [self._cache[key], value]
else:
self._sync(key)
if key in self._cache:
if isinstance(self._cache[key], list):
self._cache[key].append(value)
else:
self._cache[key] = [self._cache[key], value]
else:
self._cache[key] = value
self._add_to_add(key)
return True
def _update(self, old_key, old_value, new_key, new_value):
self._sync(old_key, new_key)
if old_key != new_key:
if isinstance(self._cache[old_key], list):
self._cache[old_key].remove(old_value)
if len(self._cache[old_key]) == 0:
del (self._cache[old_key])
self._add_to_remove(old_key)
else:
self._add_to_add(old_key)
else:
del (self._cache[old_key])
self._add_to_remove(old_key)
self._put(new_key, new_value)
self._add_to_add(new_key)
else:
if isinstance(self._cache[new_key], list):
for i in range(len(self._cache[new_key])):
if self._cache[new_key][i] == old_value:
self._cache[new_key][i] = new_value # avoid add and remove in dict
break
else:
self._cache[new_key] = new_value
self._add_to_add(new_key)
+45
View File
@@ -0,0 +1,45 @@
from cache.Cache import BaseCache
class SetCache(BaseCache):
"""
An in memory FIFO cache object
When the max_size is reach the first element that was put is removed
You can use the same key multiple times, but the elements under this key will be unique
When there are multiple elements, a python set is used
"""
def _put(self, key, value):
if key in self._cache:
if value in self._cache[key]:
return False
self._cache[key].add(value)
else:
self._sync(key)
if key in self._cache:
self._cache[key].add(value)
else:
self._cache[key] = {value}
self._add_to_add(key)
return True
def _update(self, old_key, old_value, new_key, new_value):
self._sync(old_key, new_key)
if old_key != new_key:
if isinstance(self._cache[old_key], set):
self._cache[old_key].remove(old_value)
if len(self._cache[old_key]) == 0:
del (self._cache[old_key])
self._add_to_remove(old_key)
else:
self._add_to_add(old_key)
self._put(new_key, new_value)
self._add_to_add(new_key)
else:
self._cache[new_key].remove(old_value)
self._put(new_key, new_value)
self._add_to_add(new_key)
View File