Added DefaultParser

This commit is contained in:
2019-10-29 18:39:51 +01:00
parent 101319b8b6
commit 8107e149b9
18 changed files with 1581 additions and 376 deletions
+35 -1
View File
@@ -1,5 +1,39 @@
# How to serialize ?
## General rule
- 1 byte : type of object code
- int : version of the encoder
- data : can be the json representation of the object
- data : can be the json representation of the object
### Current supported types
- E : events
- O : object (with history management)
- P : pickle
## How concepts are serialized ?
- get the id of the concept
- get the hash of the concept > it will be its unique key
structure of the serialisation:
```json
{
"id" : "id",
"parent": <hash code of the previous version of the concept> or "",
"name": <name of the concept>,
"where": "",
"pre": "",
"post": "",
"body": "",
"desc": "",
...
}
```
## Idea to manage ObjectSerializer
Problem:
During serialization, there is no issue. The match() method is the unique way to get the correct serialier.
During the deserialisation, all Object serializer have type = '0' and version = 1.
So how to choose the correct one ?
A possible solution will be to add the type of the object to deserialize to the saved stream
--> SHA256 for every object. Too much data saved.
The id is to let to inc the version automatically in the Serialiser (during the registration) and to keep the mapping within sdp.state
+41 -13
View File
@@ -4,7 +4,7 @@ from datetime import datetime, date
import hashlib
import json
import zlib
from sdp.sheerkaSerializer import Serializer
from sdp.sheerkaSerializer import Serializer, SerializerContext
def json_default_converter(o):
@@ -38,15 +38,15 @@ class Event(object):
if not isinstance(self.message, str):
raise NotImplementedError
return hashlib.sha256(f"{self.user}{self.date}{self.message}".encode("utf-8")).hexdigest()
return hashlib.sha256(f"Event:{self.user}{self.date}{self.message}".encode("utf-8")).hexdigest()
def to_json(self):
return json.dumps(self.__dict__, default=json_default_converter)
def to_dict(self):
return self.__dict__
def from_json(self, json_message):
self.user = json_message["user"]
self.date = datetime.fromisoformat(json_message["date"])
self.message = json_message["message"]
def from_dict(self, as_dict):
self.user = as_dict["user"]
self.date = datetime.fromisoformat(as_dict["date"])
self.message = as_dict["message"]
class State:
@@ -120,6 +120,7 @@ class SheerkaDataProvider:
EventFolder = "events"
StateFolder = "state"
ObjectsFolder = "objects"
CacheFolder = "cache"
HeadFile = "HEAD"
KeysFile = "keys"
@@ -135,6 +136,9 @@ class SheerkaDataProvider:
self.serializer = Serializer()
def get_obj_path(self, object_type, digest):
path.join(self.root, object_type, digest[:24], digest)
def add(self, event: Event, entry, obj):
"""
Adds obj to the entry 'entry'
@@ -366,7 +370,7 @@ class SheerkaDataProvider:
os.makedirs(path.dirname(target_path))
with open(target_path, "wb") as f:
f.write(self.serializer.serialize(event).read())
f.write(self.serializer.serialize(event, None).read())
return digest
@@ -378,7 +382,7 @@ class SheerkaDataProvider:
"""
target_path = path.join(self.root, SheerkaDataProvider.EventFolder, digest[:24], digest)
with open(target_path, "rb") as f:
return self.serializer.deserialize(f)
return self.serializer.deserialize(f, None)
def save_state(self, state: State):
digest = state.get_digest()
@@ -390,7 +394,7 @@ class SheerkaDataProvider:
os.makedirs(path.dirname(target_path))
with open(target_path, "wb") as f:
f.write(self.serializer.serialize(state).read())
f.write(self.serializer.serialize(state, None).read())
return digest
@@ -400,7 +404,32 @@ class SheerkaDataProvider:
target_path = path.join(self.root, SheerkaDataProvider.StateFolder, digest[:24], digest)
with open(target_path, "rb") as f:
return self.serializer.deserialize(f)
return self.serializer.deserialize(f, None)
def save_obj(self, obj):
if hasattr(obj, "key") and hasattr(obj, "key_name") and obj.key is None:
obj.key = self.get_next_key(obj.key_name)
digest = obj.get_digest()
target_path = path.join(self.root, SheerkaDataProvider.ObjectsFolder, digest[:24], digest)
if path.exists(target_path):
return digest
if not path.exists(path.dirname(target_path)):
os.makedirs(path.dirname(target_path))
with open(target_path, "wb") as f:
f.write(self.serializer.serialize(obj, SerializerContext("kodjo", digest)).read())
return digest
def load_obj(self, digest):
if digest is None:
return State()
target_path = path.join(self.root, SheerkaDataProvider.ObjectsFolder, digest[:24], digest)
with open(target_path, "rb") as f:
return self.serializer.deserialize(f, SerializerContext("kodjo", digest))
def get_cache_params(self, category, key):
digest = hashlib.sha3_256(f"{category}:{key}".encode("utf-8")).hexdigest()
@@ -507,4 +536,3 @@ class SheerkaDataProvider:
keys[entry] = value
self.save_keys(keys)
return str(value)
+84 -26
View File
@@ -3,6 +3,7 @@ import pickle
import datetime
import struct
import io
from dataclasses import dataclass
def json_default_converter(o):
@@ -17,15 +18,27 @@ def json_default_converter(o):
return o.isoformat()
@dataclass()
class SerializerContext:
user_name: str
origin: str
class Serializer:
HEADER_FORMAT = "cH"
USERNAME = "user_name" # key to store user that as committed the snapshot
MODIFICATION_DATE = "modification_date" #
PARENTS = "parents"
ORIGIN = "origin"
HISTORY = "##history##"
def __init__(self):
self._cache = []
# add builtin serializers
self._cache.append(EventSerializer())
self._cache.append(PickleSerializer())
self.register(EventSerializer())
self.register(PickleSerializer())
self.register(ConceptSerializer())
def register(self, serializer):
"""
@@ -35,9 +48,10 @@ class Serializer:
"""
self._cache.append(serializer)
def serialize(self, obj):
def serialize(self, obj, context):
"""
Get the stream representation of an object
:param context:
:param obj:
:return:
"""
@@ -52,11 +66,12 @@ class Serializer:
header = struct.pack(Serializer.HEADER_FORMAT, bytes(serializer.name, "utf-8"), serializer.version)
stream.write(header)
return serializer.dump(stream, obj)
return serializer.dump(stream, obj, context)
def deserialize(self, stream):
def deserialize(self, stream, context):
"""
Loads an object from its stream representation
:param context:
:param stream:
:return:
"""
@@ -67,7 +82,7 @@ class Serializer:
raise TypeError(f"Don't know how serializer name={header[0]}, version={header[1]}")
serializer = serializers[0]
return serializer.load(stream)
return serializer.load(stream, context)
class BaseSerializer:
@@ -82,8 +97,7 @@ class BaseSerializer:
self.name = name
self.version = version
@staticmethod
def match(obj):
def match(self, obj):
"""
Returns true if self can serialize obj
:param obj:
@@ -91,26 +105,32 @@ class BaseSerializer:
"""
pass
def dump(self, stream, obj):
def dump(self, stream, obj, context):
"""
Returns the byte representation of how the object should be serialized
:param stream: to write to
:param obj:
:param obj: obj to serialize
:param context: additional info needed to dump
:return: stream of bytes
"""
pass
def load(self, stream):
def load(self, stream, context):
"""
From a stream of bytes, create the object
:param stream:
:param context: additional info needed to load
:return: object
"""
pass
@staticmethod
def get_class(kls):
"""
Loads a class from its string full qualified name
:param kls:
:return:
"""
parts = kls.split('.')
module = ".".join(parts[:-1])
m = __import__(module)
@@ -120,6 +140,11 @@ class BaseSerializer:
@staticmethod
def get_full_qualified_name(obj):
"""
Returns the full qualified name of a class (including its module name )
:param obj:
:return:
"""
module = obj.__class__.__module__
if module is None or module == str.__class__.__module__:
return obj.__class__.__name__ # Avoid reporting __builtin__
@@ -128,40 +153,73 @@ class BaseSerializer:
class EventSerializer(BaseSerializer):
@staticmethod
def match(obj):
return BaseSerializer.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.Event"
def __init__(self):
BaseSerializer.__init__(self, "E", 1)
def dump(self, stream, obj):
stream.write(obj.to_json().encode("utf-8"))
def match(self, obj):
return BaseSerializer.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.Event"
def dump(self, stream, obj, context):
stream.write(json.dumps(obj.to_dict(), default=json_default_converter).encode("utf-8"))
stream.seek(0)
return stream
def load(self, stream):
def load(self, stream, context):
json_stream = stream.read().decode("utf-8")
json_message = json.loads(json_stream)
as_dict = json.loads(json_stream)
event = BaseSerializer.get_class("sdp.sheerkaDataProvider.Event")()
event.from_json(json_message)
event.from_dict(as_dict)
return event
class ObjectSerializer(BaseSerializer):
def __init__(self, fully_qualified_name, name="O", version=1):
BaseSerializer.__init__(self, name, version)
self.fully_qualified_name = fully_qualified_name
def match(self, obj):
return BaseSerializer.get_full_qualified_name(obj) == self.fully_qualified_name
def dump(self, stream, obj, context):
as_json = obj.to_dict()
as_json.update({
Serializer.HISTORY: {
Serializer.USERNAME: context.user_name,
Serializer.MODIFICATION_DATE: datetime.datetime.now().isoformat(),
Serializer.PARENTS: [getattr(obj, Serializer.ORIGIN)] if hasattr(obj, Serializer.ORIGIN) else []
}})
stream.write(json.dumps(as_json, default=json_default_converter).encode("utf-8"))
stream.seek(0)
return stream
def load(self, stream, context):
json_stream = stream.read().decode("utf-8")
json_message = json.loads(json_stream)
obj = BaseSerializer.get_class(self.fully_qualified_name)()
obj.from_dict(json_message)
setattr(obj, Serializer.HISTORY, json_message[Serializer.HISTORY])
return obj
class PickleSerializer(BaseSerializer):
@staticmethod
def match(obj):
return BaseSerializer.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.State"
def __init__(self):
BaseSerializer.__init__(self, "P", 1)
def dump(self, stream, obj):
def match(self, obj):
return BaseSerializer.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.State"
def dump(self, stream, obj, context):
stream.write(pickle.dumps(obj))
stream.seek(0)
return stream
def load(self, stream):
def load(self, stream, context):
return pickle.loads(stream.read())
class ConceptSerializer(ObjectSerializer):
def __init__(self):
ObjectSerializer.__init__(self, "core.concept.Concept", "C", 1)