Added DefaultParser
This commit is contained in:
+35
-1
@@ -1,5 +1,39 @@
|
||||
# How to serialize ?
|
||||
|
||||
## General rule
|
||||
- 1 byte : type of object code
|
||||
- int : version of the encoder
|
||||
- data : can be the json representation of the object
|
||||
- data : can be the json representation of the object
|
||||
|
||||
### Current supported types
|
||||
- E : events
|
||||
- O : object (with history management)
|
||||
- P : pickle
|
||||
|
||||
## How concepts are serialized ?
|
||||
- get the id of the concept
|
||||
- get the hash of the concept −> it will be its unique key
|
||||
structure of the serialisation:
|
||||
```json
|
||||
{
|
||||
"id" : "id",
|
||||
"parent": <hash code of the previous version of the concept> or "",
|
||||
"name": <name of the concept>,
|
||||
"where": "",
|
||||
"pre": "",
|
||||
"post": "",
|
||||
"body": "",
|
||||
"desc": "",
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
## Idea to manage ObjectSerializer
|
||||
Problem:
|
||||
During serialization, there is no issue. The match() method is the unique way to get the correct serialier.
|
||||
During the deserialisation, all Object serializer have type = '0' and version = 1.
|
||||
So how to choose the correct one ?
|
||||
A possible solution will be to add the type of the object to deserialize to the saved stream
|
||||
--> SHA256 for every object. Too much data saved.
|
||||
The id is to let to inc the version automatically in the Serialiser (during the registration) and to keep the mapping within sdp.state
|
||||
|
||||
|
||||
+41
-13
@@ -4,7 +4,7 @@ from datetime import datetime, date
|
||||
import hashlib
|
||||
import json
|
||||
import zlib
|
||||
from sdp.sheerkaSerializer import Serializer
|
||||
from sdp.sheerkaSerializer import Serializer, SerializerContext
|
||||
|
||||
|
||||
def json_default_converter(o):
|
||||
@@ -38,15 +38,15 @@ class Event(object):
|
||||
if not isinstance(self.message, str):
|
||||
raise NotImplementedError
|
||||
|
||||
return hashlib.sha256(f"{self.user}{self.date}{self.message}".encode("utf-8")).hexdigest()
|
||||
return hashlib.sha256(f"Event:{self.user}{self.date}{self.message}".encode("utf-8")).hexdigest()
|
||||
|
||||
def to_json(self):
|
||||
return json.dumps(self.__dict__, default=json_default_converter)
|
||||
def to_dict(self):
|
||||
return self.__dict__
|
||||
|
||||
def from_json(self, json_message):
|
||||
self.user = json_message["user"]
|
||||
self.date = datetime.fromisoformat(json_message["date"])
|
||||
self.message = json_message["message"]
|
||||
def from_dict(self, as_dict):
|
||||
self.user = as_dict["user"]
|
||||
self.date = datetime.fromisoformat(as_dict["date"])
|
||||
self.message = as_dict["message"]
|
||||
|
||||
|
||||
class State:
|
||||
@@ -120,6 +120,7 @@ class SheerkaDataProvider:
|
||||
|
||||
EventFolder = "events"
|
||||
StateFolder = "state"
|
||||
ObjectsFolder = "objects"
|
||||
CacheFolder = "cache"
|
||||
HeadFile = "HEAD"
|
||||
KeysFile = "keys"
|
||||
@@ -135,6 +136,9 @@ class SheerkaDataProvider:
|
||||
|
||||
self.serializer = Serializer()
|
||||
|
||||
def get_obj_path(self, object_type, digest):
|
||||
path.join(self.root, object_type, digest[:24], digest)
|
||||
|
||||
def add(self, event: Event, entry, obj):
|
||||
"""
|
||||
Adds obj to the entry 'entry'
|
||||
@@ -366,7 +370,7 @@ class SheerkaDataProvider:
|
||||
os.makedirs(path.dirname(target_path))
|
||||
|
||||
with open(target_path, "wb") as f:
|
||||
f.write(self.serializer.serialize(event).read())
|
||||
f.write(self.serializer.serialize(event, None).read())
|
||||
|
||||
return digest
|
||||
|
||||
@@ -378,7 +382,7 @@ class SheerkaDataProvider:
|
||||
"""
|
||||
target_path = path.join(self.root, SheerkaDataProvider.EventFolder, digest[:24], digest)
|
||||
with open(target_path, "rb") as f:
|
||||
return self.serializer.deserialize(f)
|
||||
return self.serializer.deserialize(f, None)
|
||||
|
||||
def save_state(self, state: State):
|
||||
digest = state.get_digest()
|
||||
@@ -390,7 +394,7 @@ class SheerkaDataProvider:
|
||||
os.makedirs(path.dirname(target_path))
|
||||
|
||||
with open(target_path, "wb") as f:
|
||||
f.write(self.serializer.serialize(state).read())
|
||||
f.write(self.serializer.serialize(state, None).read())
|
||||
|
||||
return digest
|
||||
|
||||
@@ -400,7 +404,32 @@ class SheerkaDataProvider:
|
||||
|
||||
target_path = path.join(self.root, SheerkaDataProvider.StateFolder, digest[:24], digest)
|
||||
with open(target_path, "rb") as f:
|
||||
return self.serializer.deserialize(f)
|
||||
return self.serializer.deserialize(f, None)
|
||||
|
||||
def save_obj(self, obj):
|
||||
if hasattr(obj, "key") and hasattr(obj, "key_name") and obj.key is None:
|
||||
obj.key = self.get_next_key(obj.key_name)
|
||||
|
||||
digest = obj.get_digest()
|
||||
target_path = path.join(self.root, SheerkaDataProvider.ObjectsFolder, digest[:24], digest)
|
||||
if path.exists(target_path):
|
||||
return digest
|
||||
|
||||
if not path.exists(path.dirname(target_path)):
|
||||
os.makedirs(path.dirname(target_path))
|
||||
|
||||
with open(target_path, "wb") as f:
|
||||
f.write(self.serializer.serialize(obj, SerializerContext("kodjo", digest)).read())
|
||||
|
||||
return digest
|
||||
|
||||
def load_obj(self, digest):
|
||||
if digest is None:
|
||||
return State()
|
||||
|
||||
target_path = path.join(self.root, SheerkaDataProvider.ObjectsFolder, digest[:24], digest)
|
||||
with open(target_path, "rb") as f:
|
||||
return self.serializer.deserialize(f, SerializerContext("kodjo", digest))
|
||||
|
||||
def get_cache_params(self, category, key):
|
||||
digest = hashlib.sha3_256(f"{category}:{key}".encode("utf-8")).hexdigest()
|
||||
@@ -507,4 +536,3 @@ class SheerkaDataProvider:
|
||||
keys[entry] = value
|
||||
self.save_keys(keys)
|
||||
return str(value)
|
||||
|
||||
|
||||
+84
-26
@@ -3,6 +3,7 @@ import pickle
|
||||
import datetime
|
||||
import struct
|
||||
import io
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
def json_default_converter(o):
|
||||
@@ -17,15 +18,27 @@ def json_default_converter(o):
|
||||
return o.isoformat()
|
||||
|
||||
|
||||
@dataclass()
|
||||
class SerializerContext:
|
||||
user_name: str
|
||||
origin: str
|
||||
|
||||
|
||||
class Serializer:
|
||||
HEADER_FORMAT = "cH"
|
||||
USERNAME = "user_name" # key to store user that as committed the snapshot
|
||||
MODIFICATION_DATE = "modification_date" #
|
||||
PARENTS = "parents"
|
||||
ORIGIN = "origin"
|
||||
HISTORY = "##history##"
|
||||
|
||||
def __init__(self):
|
||||
self._cache = []
|
||||
|
||||
# add builtin serializers
|
||||
self._cache.append(EventSerializer())
|
||||
self._cache.append(PickleSerializer())
|
||||
self.register(EventSerializer())
|
||||
self.register(PickleSerializer())
|
||||
self.register(ConceptSerializer())
|
||||
|
||||
def register(self, serializer):
|
||||
"""
|
||||
@@ -35,9 +48,10 @@ class Serializer:
|
||||
"""
|
||||
self._cache.append(serializer)
|
||||
|
||||
def serialize(self, obj):
|
||||
def serialize(self, obj, context):
|
||||
"""
|
||||
Get the stream representation of an object
|
||||
:param context:
|
||||
:param obj:
|
||||
:return:
|
||||
"""
|
||||
@@ -52,11 +66,12 @@ class Serializer:
|
||||
header = struct.pack(Serializer.HEADER_FORMAT, bytes(serializer.name, "utf-8"), serializer.version)
|
||||
stream.write(header)
|
||||
|
||||
return serializer.dump(stream, obj)
|
||||
return serializer.dump(stream, obj, context)
|
||||
|
||||
def deserialize(self, stream):
|
||||
def deserialize(self, stream, context):
|
||||
"""
|
||||
Loads an object from its stream representation
|
||||
:param context:
|
||||
:param stream:
|
||||
:return:
|
||||
"""
|
||||
@@ -67,7 +82,7 @@ class Serializer:
|
||||
raise TypeError(f"Don't know how serializer name={header[0]}, version={header[1]}")
|
||||
|
||||
serializer = serializers[0]
|
||||
return serializer.load(stream)
|
||||
return serializer.load(stream, context)
|
||||
|
||||
|
||||
class BaseSerializer:
|
||||
@@ -82,8 +97,7 @@ class BaseSerializer:
|
||||
self.name = name
|
||||
self.version = version
|
||||
|
||||
@staticmethod
|
||||
def match(obj):
|
||||
def match(self, obj):
|
||||
"""
|
||||
Returns true if self can serialize obj
|
||||
:param obj:
|
||||
@@ -91,26 +105,32 @@ class BaseSerializer:
|
||||
"""
|
||||
pass
|
||||
|
||||
def dump(self, stream, obj):
|
||||
def dump(self, stream, obj, context):
|
||||
"""
|
||||
Returns the byte representation of how the object should be serialized
|
||||
|
||||
:param stream: to write to
|
||||
:param obj:
|
||||
:param obj: obj to serialize
|
||||
:param context: additional info needed to dump
|
||||
:return: stream of bytes
|
||||
"""
|
||||
pass
|
||||
|
||||
def load(self, stream):
|
||||
def load(self, stream, context):
|
||||
"""
|
||||
From a stream of bytes, create the object
|
||||
:param stream:
|
||||
:param context: additional info needed to load
|
||||
:return: object
|
||||
"""
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def get_class(kls):
|
||||
"""
|
||||
Loads a class from its string full qualified name
|
||||
:param kls:
|
||||
:return:
|
||||
"""
|
||||
parts = kls.split('.')
|
||||
module = ".".join(parts[:-1])
|
||||
m = __import__(module)
|
||||
@@ -120,6 +140,11 @@ class BaseSerializer:
|
||||
|
||||
@staticmethod
|
||||
def get_full_qualified_name(obj):
|
||||
"""
|
||||
Returns the full qualified name of a class (including its module name )
|
||||
:param obj:
|
||||
:return:
|
||||
"""
|
||||
module = obj.__class__.__module__
|
||||
if module is None or module == str.__class__.__module__:
|
||||
return obj.__class__.__name__ # Avoid reporting __builtin__
|
||||
@@ -128,40 +153,73 @@ class BaseSerializer:
|
||||
|
||||
|
||||
class EventSerializer(BaseSerializer):
|
||||
@staticmethod
|
||||
def match(obj):
|
||||
return BaseSerializer.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.Event"
|
||||
|
||||
def __init__(self):
|
||||
BaseSerializer.__init__(self, "E", 1)
|
||||
|
||||
def dump(self, stream, obj):
|
||||
stream.write(obj.to_json().encode("utf-8"))
|
||||
def match(self, obj):
|
||||
return BaseSerializer.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.Event"
|
||||
|
||||
def dump(self, stream, obj, context):
|
||||
stream.write(json.dumps(obj.to_dict(), default=json_default_converter).encode("utf-8"))
|
||||
stream.seek(0)
|
||||
return stream
|
||||
|
||||
def load(self, stream):
|
||||
def load(self, stream, context):
|
||||
json_stream = stream.read().decode("utf-8")
|
||||
json_message = json.loads(json_stream)
|
||||
as_dict = json.loads(json_stream)
|
||||
event = BaseSerializer.get_class("sdp.sheerkaDataProvider.Event")()
|
||||
event.from_json(json_message)
|
||||
event.from_dict(as_dict)
|
||||
return event
|
||||
|
||||
|
||||
class ObjectSerializer(BaseSerializer):
|
||||
|
||||
def __init__(self, fully_qualified_name, name="O", version=1):
|
||||
BaseSerializer.__init__(self, name, version)
|
||||
self.fully_qualified_name = fully_qualified_name
|
||||
|
||||
def match(self, obj):
|
||||
return BaseSerializer.get_full_qualified_name(obj) == self.fully_qualified_name
|
||||
|
||||
def dump(self, stream, obj, context):
|
||||
as_json = obj.to_dict()
|
||||
as_json.update({
|
||||
Serializer.HISTORY: {
|
||||
Serializer.USERNAME: context.user_name,
|
||||
Serializer.MODIFICATION_DATE: datetime.datetime.now().isoformat(),
|
||||
Serializer.PARENTS: [getattr(obj, Serializer.ORIGIN)] if hasattr(obj, Serializer.ORIGIN) else []
|
||||
}})
|
||||
stream.write(json.dumps(as_json, default=json_default_converter).encode("utf-8"))
|
||||
stream.seek(0)
|
||||
return stream
|
||||
|
||||
def load(self, stream, context):
|
||||
json_stream = stream.read().decode("utf-8")
|
||||
json_message = json.loads(json_stream)
|
||||
obj = BaseSerializer.get_class(self.fully_qualified_name)()
|
||||
obj.from_dict(json_message)
|
||||
setattr(obj, Serializer.HISTORY, json_message[Serializer.HISTORY])
|
||||
|
||||
return obj
|
||||
|
||||
|
||||
class PickleSerializer(BaseSerializer):
|
||||
@staticmethod
|
||||
def match(obj):
|
||||
return BaseSerializer.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.State"
|
||||
|
||||
def __init__(self):
|
||||
BaseSerializer.__init__(self, "P", 1)
|
||||
|
||||
def dump(self, stream, obj):
|
||||
def match(self, obj):
|
||||
return BaseSerializer.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.State"
|
||||
|
||||
def dump(self, stream, obj, context):
|
||||
stream.write(pickle.dumps(obj))
|
||||
stream.seek(0)
|
||||
return stream
|
||||
|
||||
def load(self, stream):
|
||||
def load(self, stream, context):
|
||||
return pickle.loads(stream.read())
|
||||
|
||||
|
||||
class ConceptSerializer(ObjectSerializer):
|
||||
def __init__(self):
|
||||
ObjectSerializer.__init__(self, "core.concept.Concept", "C", 1)
|
||||
|
||||
Reference in New Issue
Block a user