Reworked sdp
This commit is contained in:
+209
-46
@@ -49,6 +49,18 @@ class Event(object):
|
||||
self.message = as_dict["message"]
|
||||
|
||||
|
||||
class ObjWithKey:
|
||||
"""
|
||||
Internal key value class to hold the key (and the value)
|
||||
when it is detected
|
||||
It's created to distinguish from {key, value}
|
||||
"""
|
||||
|
||||
def __init__(self, key, obj):
|
||||
self.key = key
|
||||
self.obj = obj
|
||||
|
||||
|
||||
class State:
|
||||
"""
|
||||
Class that represents the state of the system (dictionary of all known entries)
|
||||
@@ -62,25 +74,73 @@ class State:
|
||||
self.data = {}
|
||||
|
||||
def update(self, entry, obj, append=True):
|
||||
obj_to_use = {str(obj.get_key()): obj} if hasattr(obj, "get_key") else obj
|
||||
"""
|
||||
adds obj to entry
|
||||
:param entry:
|
||||
:param obj:
|
||||
:param append: if True, ducplicate keys will create lists
|
||||
:return:
|
||||
"""
|
||||
obj_to_use = {obj.key: obj.obj} if isinstance(obj, ObjWithKey) else obj
|
||||
|
||||
if entry not in self.data:
|
||||
self.data[entry] = obj_to_use
|
||||
elif isinstance(obj_to_use, dict):
|
||||
if append:
|
||||
|
||||
elif not append:
|
||||
if isinstance(obj_to_use, dict):
|
||||
self.data[entry].update(obj_to_use)
|
||||
else:
|
||||
self.data[entry] = obj_to_use
|
||||
|
||||
elif isinstance(self.data[entry], list):
|
||||
if append:
|
||||
self.data[entry].append(obj_to_use)
|
||||
else:
|
||||
self.data[entry] = obj_to_use
|
||||
self.data[entry].append(obj.obj if isinstance(obj, ObjWithKey) else obj) # do not use obj_to_use !
|
||||
|
||||
elif isinstance(obj_to_use, dict):
|
||||
for k in obj_to_use:
|
||||
if k not in self.data[entry]:
|
||||
self.data[entry][k] = obj_to_use[k]
|
||||
elif isinstance(self.data[entry][k], list):
|
||||
self.data[entry][k].append(obj_to_use[k])
|
||||
else:
|
||||
self.data[entry][k] = [self.data[entry][k], obj_to_use[k]]
|
||||
|
||||
elif isinstance(self.data[entry], dict):
|
||||
raise SheerkaDataProviderError(f"Cannot found key on '{obj}' while all other elements have.", obj)
|
||||
|
||||
else:
|
||||
if append:
|
||||
self.data[entry] = [self.data[entry], obj_to_use]
|
||||
else:
|
||||
self.data[entry] = obj_to_use
|
||||
self.data[entry] = [self.data[entry], obj_to_use]
|
||||
|
||||
def modify(self, entry, key, obj, obj_key):
|
||||
# if the key changes, make sure to remove the previous entry
|
||||
append = False
|
||||
if obj_key != key:
|
||||
self.remove(entry, lambda k, o: k == key) # modify from on object to another
|
||||
append = True
|
||||
|
||||
self.update(entry, ObjWithKey(obj_key, obj), append=append)
|
||||
|
||||
def modify_in_list(self, entry, key, obj, obj_key, obj_origin, load_ref_if_needed, save_ref_if_needed):
|
||||
found = False
|
||||
to_remove = None
|
||||
for i in range(len(self.data[entry][key])):
|
||||
item, is_ref = load_ref_if_needed(self.data[entry][key][i])
|
||||
if not hasattr(item, "get_digest"):
|
||||
continue
|
||||
if item.get_digest() == obj_origin:
|
||||
obj = save_ref_if_needed(is_ref, obj)
|
||||
if obj_key == key:
|
||||
self.data[entry][key][i] = obj
|
||||
else:
|
||||
to_remove = i
|
||||
self.update(entry, ObjWithKey(obj_key, obj), append=True)
|
||||
found = True
|
||||
break
|
||||
|
||||
if not found:
|
||||
raise (SheerkaDataProviderError(f"Cannot modify '{entry}.{key}'. Item '{obj_origin}' not found.", obj))
|
||||
|
||||
if to_remove is not None:
|
||||
del self.data[entry][key][to_remove]
|
||||
|
||||
def remove(self, entry, filter):
|
||||
if filter is None:
|
||||
@@ -108,13 +168,29 @@ class State:
|
||||
return hashlib.sha256(as_json.encode("utf-8")).hexdigest()
|
||||
|
||||
def contains(self, entry, key):
|
||||
"""
|
||||
if key is None, returns True if entry exists
|
||||
if key has a value
|
||||
returns True if entry is an dict and contains key
|
||||
:param entry:
|
||||
:param key:
|
||||
:return:
|
||||
"""
|
||||
if entry not in self.data:
|
||||
return False
|
||||
if key is None:
|
||||
return entry in self.data
|
||||
if not isinstance(self.data[entry], dict):
|
||||
return False
|
||||
return key in self.data[entry]
|
||||
|
||||
|
||||
class SheerkaDataProviderError(Exception):
|
||||
def __init__(self, message, obj):
|
||||
Exception.__init__(self, message)
|
||||
self.obj = obj
|
||||
|
||||
|
||||
class SheerkaDataProvider:
|
||||
"""Manages the state of the system"""
|
||||
|
||||
@@ -124,6 +200,7 @@ class SheerkaDataProvider:
|
||||
CacheFolder = "cache"
|
||||
HeadFile = "HEAD"
|
||||
KeysFile = "keys"
|
||||
REF_PREFIX = "##REF##:"
|
||||
|
||||
def __init__(self, root=None):
|
||||
|
||||
@@ -137,37 +214,72 @@ class SheerkaDataProvider:
|
||||
self.serializer = Serializer()
|
||||
|
||||
def get_obj_path(self, object_type, digest):
|
||||
path.join(self.root, object_type, digest[:24], digest)
|
||||
return path.join(self.root, object_type, digest[:24], digest)
|
||||
|
||||
def add(self, event: Event, entry, obj):
|
||||
@staticmethod
|
||||
def get_obj_key(obj):
|
||||
"""
|
||||
Tries to find the key of an object
|
||||
Look for .key, .get_key()
|
||||
:param obj:
|
||||
:return: String version of that is found, None otherwise
|
||||
"""
|
||||
return str(obj.key) if hasattr(obj, "key") else str(obj.get_key()) if hasattr(obj, "get_key") else None
|
||||
|
||||
@staticmethod
|
||||
def get_stream_digest(stream):
|
||||
sha256_hash = hashlib.sha256()
|
||||
for byte_block in iter(lambda: stream.read(4096), b""):
|
||||
sha256_hash.update(byte_block)
|
||||
|
||||
stream.seek(0)
|
||||
return sha256_hash.hexdigest()
|
||||
|
||||
@staticmethod
|
||||
def is_reference(obj):
|
||||
return isinstance(obj, str) and obj.startswith(SheerkaDataProvider.REF_PREFIX)
|
||||
|
||||
def add(self, event: Event, entry, obj, allow_multiple=True, use_ref=False):
|
||||
"""
|
||||
Adds obj to the entry 'entry'
|
||||
:param event: events that triggers the update of the state
|
||||
:param entry: entry of the state to update
|
||||
:param obj: obj to insert or add
|
||||
:return: new sha256 of the state
|
||||
:param allow_multiple: if set to true, the same key can be added several times.
|
||||
All entries will be put in a list
|
||||
:param use_ref: if True the actual object is saved under 'objects' folder,
|
||||
only a reference is saved in the state
|
||||
:return: (entry, key) to retrieve the object
|
||||
"""
|
||||
|
||||
event_digest = self.save_event(event)
|
||||
snapshot = self.get_snapshot()
|
||||
state = self.load_state(snapshot)
|
||||
|
||||
# check uniqueness, cannot add the same key twice
|
||||
obj_key = str(obj.get_key()) if hasattr(obj, "get_key") else None
|
||||
if state.contains(entry, obj_key):
|
||||
raise IndexError(f"{entry}.{obj_key}")
|
||||
elif isinstance(obj, dict):
|
||||
for k in obj:
|
||||
if state.contains(entry, k):
|
||||
raise IndexError(f"{entry}.{k}")
|
||||
# check uniqueness, cannot add the same key twice if allow_multiple == False
|
||||
key = self.get_obj_key(obj)
|
||||
if not allow_multiple:
|
||||
if isinstance(obj, dict):
|
||||
for k in obj:
|
||||
if state.contains(entry, k):
|
||||
raise IndexError(f"{entry}.{k}")
|
||||
else:
|
||||
if state.contains(entry, key):
|
||||
raise IndexError(f"{entry}.{key}" if key else entry)
|
||||
|
||||
state.parents = [] if snapshot is None else [snapshot]
|
||||
state.events = [event_digest]
|
||||
state.date = datetime.now()
|
||||
state.update(entry, obj if obj_key is None else {obj_key: obj})
|
||||
|
||||
if use_ref:
|
||||
digest = self.save_obj(obj)
|
||||
obj = ObjWithKey(key, self.REF_PREFIX + digest) if key else self.REF_PREFIX + digest
|
||||
|
||||
state.update(entry, obj if (isinstance(obj, ObjWithKey) or key is None) else ObjWithKey(key, obj))
|
||||
|
||||
new_snapshot = self.save_state(state)
|
||||
self.set_snapshot(new_snapshot)
|
||||
return new_snapshot
|
||||
return entry, key
|
||||
|
||||
def add_with_auto_key(self, event: Event, entry, obj):
|
||||
"""
|
||||
@@ -180,7 +292,8 @@ class SheerkaDataProvider:
|
||||
next_key = self.get_next_key(entry)
|
||||
if hasattr(obj, "set_key"):
|
||||
obj.set_key(next_key)
|
||||
return self.add(event, entry, {next_key: obj})
|
||||
self.add(event, entry, ObjWithKey(next_key, obj))
|
||||
return entry, next_key
|
||||
|
||||
def add_unique(self, event: Event, entry, obj):
|
||||
"""Add an entry and make sure it's unique"""
|
||||
@@ -198,14 +311,16 @@ class SheerkaDataProvider:
|
||||
|
||||
new_snapshot = self.save_state(state)
|
||||
self.set_snapshot(new_snapshot)
|
||||
return new_snapshot
|
||||
return entry, None
|
||||
|
||||
def set(self, event: Event, entry, obj):
|
||||
def set(self, event: Event, entry, obj, use_ref=False):
|
||||
"""
|
||||
Add or replace an element
|
||||
Add or replace an entry. The entry is reinitialized.
|
||||
If the previous value was dict, all keys are lost
|
||||
:param event:
|
||||
:param entry:
|
||||
:param obj:
|
||||
:param use_ref:
|
||||
:return:
|
||||
"""
|
||||
event_digest = self.save_event(event)
|
||||
@@ -215,22 +330,30 @@ class SheerkaDataProvider:
|
||||
state.parents = [] if snapshot is None else [snapshot]
|
||||
state.events = [event_digest]
|
||||
state.date = datetime.now()
|
||||
obj_key = str(obj.get_key()) if hasattr(obj, "get_key") else None
|
||||
state.update(entry, obj if obj_key is None else {obj_key: obj}, append=False)
|
||||
|
||||
key = self.get_obj_key(obj)
|
||||
obj = self.save_ref_if_needed(use_ref, obj)
|
||||
|
||||
state.data[entry] = obj if key is None else {key: obj}
|
||||
|
||||
new_snapshot = self.save_state(state)
|
||||
self.set_snapshot(new_snapshot)
|
||||
return new_snapshot
|
||||
return entry, key
|
||||
|
||||
def modify(self, event: Event, entry, key, obj):
|
||||
"""
|
||||
Updates an existing element when element are saved by key
|
||||
Replace an element
|
||||
If the key is not provided, has the same effect than set eg, the entry is reset
|
||||
:param event:
|
||||
:param entry:
|
||||
:param key: key of the object to update
|
||||
:param obj: new data
|
||||
:return:
|
||||
"""
|
||||
|
||||
if key is None:
|
||||
raise SheerkaDataProviderError("Key is mandatory.", None)
|
||||
|
||||
event_digest = self.save_event(event)
|
||||
snapshot = self.get_snapshot()
|
||||
state = self.load_state(snapshot)
|
||||
@@ -244,14 +367,24 @@ class SheerkaDataProvider:
|
||||
state.parents = [] if snapshot is None else [snapshot]
|
||||
state.events = [event_digest]
|
||||
state.date = datetime.now()
|
||||
if key is None:
|
||||
state.data[entry] = obj
|
||||
|
||||
# Gets obj original key, it will help to know if the key has changed
|
||||
obj_key = self.get_obj_key(obj) or key
|
||||
|
||||
if isinstance(state.data[entry][key], list):
|
||||
if not hasattr(obj, Serializer.ORIGIN):
|
||||
raise (SheerkaDataProviderError(f"Multiple entries under '{entry}.{key}'", obj))
|
||||
|
||||
obj_origin = getattr(obj, Serializer.ORIGIN)
|
||||
state.modify_in_list(entry, key, obj, obj_key, obj_origin, self.load_ref_if_needed, self.save_ref_if_needed)
|
||||
|
||||
else:
|
||||
state.update(entry, {key: obj})
|
||||
obj = self.save_ref_if_needed(self.is_reference(state.data[entry][key]), obj)
|
||||
state.modify(entry, key, obj, obj_key)
|
||||
|
||||
new_snapshot = self.save_state(state)
|
||||
self.set_snapshot(new_snapshot)
|
||||
return new_snapshot
|
||||
return entry, obj_key
|
||||
|
||||
def list(self, entry, filter=None):
|
||||
"""
|
||||
@@ -272,7 +405,7 @@ class SheerkaDataProvider:
|
||||
filter_to_use = (lambda k, o: True) if filter is None else filter
|
||||
for key, element in elements.items():
|
||||
if filter_to_use(key, element):
|
||||
yield element
|
||||
yield self.load_ref_if_needed(element)[0]
|
||||
else:
|
||||
# manage when no key is defined for the elements
|
||||
if not isinstance(elements, list) and not isinstance(elements, set):
|
||||
@@ -281,7 +414,7 @@ class SheerkaDataProvider:
|
||||
filter_to_use = (lambda o: True) if filter is None else filter
|
||||
for element in elements:
|
||||
if filter_to_use(element):
|
||||
yield element
|
||||
yield self.load_ref_if_needed(element)[0]
|
||||
|
||||
def remove(self, event: Event, entry, filter=None):
|
||||
"""
|
||||
@@ -325,7 +458,11 @@ class SheerkaDataProvider:
|
||||
if key is not None and key not in state.data[entry]:
|
||||
raise IndexError(f"{entry}.{key}")
|
||||
|
||||
return state.data[entry] if key is None else state.data[entry][key]
|
||||
item = state.data[entry] if key is None else state.data[entry][key]
|
||||
if isinstance(item, list):
|
||||
return [self.load_ref_if_needed(i)[0] for i in item]
|
||||
|
||||
return self.load_ref_if_needed(item)[0]
|
||||
|
||||
def get_safe(self, entry, key=None):
|
||||
"""
|
||||
@@ -343,7 +480,7 @@ class SheerkaDataProvider:
|
||||
if key is not None and key not in state.data[entry]:
|
||||
return None
|
||||
|
||||
return state.data[entry] if key is None else state.data[entry][key]
|
||||
return self.load_ref_if_needed(state.data[entry] if key is None else state.data[entry][key])[0]
|
||||
|
||||
def exists(self, entry):
|
||||
"""
|
||||
@@ -407,10 +544,9 @@ class SheerkaDataProvider:
|
||||
return self.serializer.deserialize(f, None)
|
||||
|
||||
def save_obj(self, obj):
|
||||
if hasattr(obj, "key") and hasattr(obj, "key_name") and obj.key is None:
|
||||
obj.key = self.get_next_key(obj.key_name)
|
||||
stream = self.serializer.serialize(obj, SerializerContext(user_name="kodjo"))
|
||||
digest = obj.get_digest() if hasattr(obj, "get_digest") else self.get_stream_digest(stream)
|
||||
|
||||
digest = obj.get_digest()
|
||||
target_path = path.join(self.root, SheerkaDataProvider.ObjectsFolder, digest[:24], digest)
|
||||
if path.exists(target_path):
|
||||
return digest
|
||||
@@ -419,17 +555,44 @@ class SheerkaDataProvider:
|
||||
os.makedirs(path.dirname(target_path))
|
||||
|
||||
with open(target_path, "wb") as f:
|
||||
f.write(self.serializer.serialize(obj, SerializerContext("kodjo", digest)).read())
|
||||
f.write(stream.read())
|
||||
|
||||
return digest
|
||||
|
||||
def load_obj(self, digest):
|
||||
if digest is None:
|
||||
return State()
|
||||
return None
|
||||
|
||||
target_path = path.join(self.root, SheerkaDataProvider.ObjectsFolder, digest[:24], digest)
|
||||
if not path.exists(target_path):
|
||||
return None
|
||||
|
||||
with open(target_path, "rb") as f:
|
||||
return self.serializer.deserialize(f, SerializerContext("kodjo", digest))
|
||||
obj = self.serializer.deserialize(f, SerializerContext(origin=digest))
|
||||
|
||||
# set the origin of the object
|
||||
if not isinstance(obj, str):
|
||||
setattr(obj, Serializer.ORIGIN, digest)
|
||||
return obj
|
||||
|
||||
def load_ref_if_needed(self, obj):
|
||||
if not isinstance(obj, str):
|
||||
return obj, False
|
||||
if not obj.startswith(SheerkaDataProvider.REF_PREFIX):
|
||||
return obj, False
|
||||
|
||||
resolved = self.load_obj(obj[len(SheerkaDataProvider.REF_PREFIX):])
|
||||
if resolved is None:
|
||||
return obj, False
|
||||
|
||||
return resolved, True
|
||||
|
||||
def save_ref_if_needed(self, save_ref, obj):
|
||||
if not save_ref:
|
||||
return obj
|
||||
|
||||
digest = self.save_obj(obj)
|
||||
return self.REF_PREFIX + digest
|
||||
|
||||
def get_cache_params(self, category, key):
|
||||
digest = hashlib.sha3_256(f"{category}:{key}".encode("utf-8")).hexdigest()
|
||||
|
||||
@@ -20,8 +20,8 @@ def json_default_converter(o):
|
||||
|
||||
@dataclass()
|
||||
class SerializerContext:
|
||||
user_name: str
|
||||
origin: str
|
||||
user_name: str = None
|
||||
origin: str = None
|
||||
|
||||
|
||||
class Serializer:
|
||||
@@ -37,7 +37,7 @@ class Serializer:
|
||||
|
||||
# add builtin serializers
|
||||
self.register(EventSerializer())
|
||||
self.register(PickleSerializer())
|
||||
self.register(StateSerializer())
|
||||
self.register(ConceptSerializer())
|
||||
|
||||
def register(self, serializer):
|
||||
@@ -205,11 +205,12 @@ class ObjectSerializer(BaseSerializer):
|
||||
|
||||
class PickleSerializer(BaseSerializer):
|
||||
|
||||
def __init__(self):
|
||||
BaseSerializer.__init__(self, "P", 1)
|
||||
def __init__(self, predicate, name="P", version=1):
|
||||
BaseSerializer.__init__(self, name, version)
|
||||
self.predicate = predicate
|
||||
|
||||
def match(self, obj):
|
||||
return BaseSerializer.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.State"
|
||||
return self.predicate(obj)
|
||||
|
||||
def dump(self, stream, obj, context):
|
||||
stream.write(pickle.dumps(obj))
|
||||
@@ -220,6 +221,12 @@ class PickleSerializer(BaseSerializer):
|
||||
return pickle.loads(stream.read())
|
||||
|
||||
|
||||
class StateSerializer(PickleSerializer):
|
||||
def __init__(self, ):
|
||||
PickleSerializer.__init__(self, lambda obj: BaseSerializer.get_full_qualified_name(
|
||||
obj) == "sdp.sheerkaDataProvider.State", "S", 1)
|
||||
|
||||
|
||||
class ConceptSerializer(ObjectSerializer):
|
||||
def __init__(self):
|
||||
ObjectSerializer.__init__(self, "core.concept.Concept", "C", 1)
|
||||
|
||||
Reference in New Issue
Block a user