Refactored ExecutionContext serialization (added sheerkapickle) and added History management

This commit is contained in:
2020-01-31 18:58:03 +01:00
parent fed0735eb9
commit b9afcba61f
31 changed files with 1546 additions and 518 deletions
+132
View File
@@ -0,0 +1,132 @@
import json
from logging import Logger
import core.utils
from core.concept import Concept
from sheerkapickle import utils, tags, handlers
def encode(sheerka, obj):
pickler = SheerkaPickler(sheerka)
data = pickler.flatten(obj)
return json.dumps(data)
class ToReduce:
def __init__(self, predicate, get_value):
self.predicate = predicate
self.get_value = get_value
class SheerkaPickler:
"""
Json sheerkapickle
Inspired by jsonpickle (https://github.com/jsonpickle/jsonpickle)
which failed to work in my environment
"""
def __init__(self, sheerka):
self.ids = {}
self.objs = []
self.id_count = -1
self.sheerka = sheerka
self.to_reduce = []
self.to_reduce.append(ToReduce(lambda o: isinstance(o, Logger), lambda o: None))
from parsers.BaseParser import BaseParser
from evaluators.BaseEvaluator import BaseEvaluator
self.to_reduce.append(ToReduce(lambda o: isinstance(o, (BaseParser, BaseEvaluator)), lambda o: o.name))
def flatten(self, obj):
if utils.is_primitive(obj):
return obj
if utils.is_tuple(obj):
return {tags.TUPLE: [self.flatten(v) for v in obj]}
if utils.is_set(obj):
return {tags.SET: [self.flatten(v) for v in obj]}
if utils.is_list(obj):
return [self.flatten(v) for v in obj]
if utils.is_dictionary(obj):
return self._flatten_dict(obj)
if utils.is_enum(obj):
return self._flatten_enum(obj)
if utils.is_object(obj):
return self._flatten_obj_instance(obj)
raise Exception(f"Cannot flatten '{obj}'")
def _flatten_dict(self, obj):
data = {}
for k, v in obj.items():
if k is None:
k_str = "null"
elif utils.is_enum(k):
k_str = core.utils.get_full_qualified_name(k) + "." + k.name
elif isinstance(k, Concept):
k_str = f":c:{k.key}:{k.id}:"
else:
k_str = k
data[k_str] = self.flatten(v)
return data
def _flatten_enum(self, obj):
# check if the object was already seen
exists, _id = self.exist(obj)
if exists:
return {tags.ID: _id}
else:
self.id_count = self.id_count + 1
self.ids[id(obj)] = self.id_count
self.objs.append(obj)
data = {}
class_name = core.utils.get_full_qualified_name(obj)
data[tags.ENUM] = class_name + "." + obj.name
return data
def _flatten_obj_instance(self, obj):
for reduce in self.to_reduce:
if reduce.predicate(obj):
return reduce.get_value(obj)
# check if the object was already seen
exists, _id = self.exist(obj)
if exists:
return {tags.ID: _id}
else:
self.id_count = self.id_count + 1
self.ids[id(obj)] = self.id_count
self.objs.append(obj)
# flatten
data = {}
cls = obj.__class__ if hasattr(obj, '__class__') else type(obj)
class_name = utils.importable_name(cls)
data[tags.OBJECT] = class_name
handler = handlers.get(class_name)
if handler is not None:
return handler(self.sheerka, self).flatten(obj, data)
if hasattr(obj, "__dict__"):
for k, v in obj.__dict__.items():
data[k] = self.flatten(v)
return data
return None
def exist(self, obj):
for k, v in self.ids.items():
if k == id(obj):
return True, v
return False, None
+105
View File
@@ -0,0 +1,105 @@
import json
import core.utils
from sheerkapickle import tags, utils, handlers
def decode(sheerka, obj):
return SheerkaUnpickler(sheerka).restore(json.loads(obj))
class SheerkaUnpickler:
def __init__(self, sheerka):
self.sheerka = sheerka
self.objs = []
def restore(self, obj):
if has_tag(obj, tags.ID):
return self._restore_id(obj)
if has_tag(obj, tags.TUPLE):
return self._restore_tuple(obj)
if has_tag(obj, tags.SET):
return self._restore_set(obj)
if has_tag(obj, tags.ENUM):
return self._restore_enum(obj)
if has_tag(obj, tags.OBJECT):
return self._restore_obj(obj)
if utils.is_list(obj):
return self._restore_list(obj)
if utils.is_dictionary(obj):
return self._restore_dict(obj)
return obj
def _restore_list(self, obj):
return [self.restore(v) for v in obj]
def _restore_tuple(self, obj):
return tuple([self.restore(v) for v in obj[tags.TUPLE]])
def _restore_set(self, obj):
return set([self.restore(v) for v in obj[tags.SET]])
def _restore_enum(self, obj):
instance = core.utils.decode_enum(obj[tags.ENUM])
self.objs.append(instance)
return instance
def _restore_dict(self, obj):
data = {}
for k, v in obj.items():
resolved_key = self._resolve_key(k)
data[resolved_key] = self.restore(v)
return data
def _restore_id(self, obj):
try:
return self.objs[obj[tags.ID]]
except IndexError:
pass
def _restore_obj(self, obj):
handler = handlers.get(obj[tags.OBJECT])
if handler:
handler = handler(self.sheerka, self)
instance = handler.new(obj)
self.objs.append(instance)
instance = handler.restore(obj, instance)
else:
cls = core.utils.get_class(obj[tags.OBJECT])
instance = cls.__new__(cls)
self.objs.append(instance)
for k, v in obj.items():
if k == tags.OBJECT:
continue
value = self.restore(v)
setattr(instance, k, value)
return instance
def _resolve_key(self, key):
if key == "null":
return None
concept_key, concept_id = core.utils.decode_concept(key)
if concept_key is not None:
return self.sheerka.new((concept_key, concept_id)) if concept_id else self.sheerka.new(concept_key)
as_enum = core.utils.decode_enum(key)
if as_enum is not None:
return as_enum
return key
def has_tag(obj, tag):
return type(obj) is dict and tag in obj
+7
View File
@@ -0,0 +1,7 @@
from .SheerkaPickler import encode
from .SheerkaUnpickler import decode
__all__ = ('encode', 'decode')
# register built-in handlers
__import__('sheerkapickle.handlers', level=0)
+233
View File
@@ -0,0 +1,233 @@
import datetime
import re
import threading
import uuid
from sheerkapickle import utils
class ToReduce:
def __init__(self, predicate, get_value):
self.predicate = predicate
self.get_value = get_value
class SheerkaRegistry(object):
def __init__(self):
self._handlers = {}
self._base_handlers = {}
def get(self, cls_or_name, default=None):
"""
:param cls_or_name: the type or its fully qualified name
:param default: default value, if a matching handler is not found
Looks up a handler by type reference or its fully
qualified name. If a direct match
is not found, the search is performed over all
handlers registered with base=True.
"""
handler = self._handlers.get(cls_or_name)
# attempt to find a base class
if handler is None and utils.is_type(cls_or_name):
for cls, base_handler in self._base_handlers.items():
if issubclass(cls_or_name, cls):
return base_handler
return default if handler is None else handler
def register(self, cls, handler=None, base=False):
"""Register the a custom handler for a class
:param cls: The custom object class to handle
:param handler: The custom handler class (if
None, a decorator wrapper is returned)
:param base: Indicates whether the handler should
be registered for all subclasses
This function can be also used as a decorator
by omitting the `handler` argument::
@jsonpickle.handlers.register(Foo, base=True)
class FooHandler(jsonpickle.handlers.BaseHandler):
pass
"""
if handler is None:
def _register(handler_cls):
self.register(cls, handler=handler_cls, base=base)
return handler_cls
return _register
if not utils.is_type(cls):
raise TypeError('{!r} is not a class/type'.format(cls))
# store both the name and the actual type for the ugly cases like
# _sre.SRE_Pattern that cannot be loaded back directly
self._handlers[utils.importable_name(cls)] = \
self._handlers[cls] = handler
if base:
# only store the actual type for subclass checking
self._base_handlers[cls] = handler
def unregister(self, cls):
self._handlers.pop(cls, None)
self._handlers.pop(utils.importable_name(cls), None)
self._base_handlers.pop(cls, None)
registry = SheerkaRegistry()
register = registry.register
unregister = registry.unregister
get = registry.get
class BaseHandler(object):
def __init__(self, sheerka, context):
"""
Initialize a new handler to handle a registered type.
:Parameters:
- `context`: reference to pickler/unpickler
"""
self.sheerka = sheerka
self.context = context
def __call__(self, sheerka, context):
"""This permits registering either Handler instances or classes
:Parameters:
- `context`: reference to pickler/unpickler
"""
self.sheerka = sheerka
self.context = context
return self
def flatten(self, obj, data):
"""
Flatten `obj` into a json-friendly form and write result to `data`.
:param object obj: The object to be serialized.
:param dict data: A partially filled dictionary which will contain the
json-friendly representation of `obj` once this method has
finished.
"""
raise NotImplementedError('You must implement flatten() in %s' %
self.__class__)
def new(self, data):
raise NotImplementedError('You must implement new() in %s' %
self.__class__)
def restore(self, data, instance):
"""
Restore an object of the registered type from the json-friendly
representation `obj` and return it.
"""
raise NotImplementedError('You must implement restore() in %s' %
self.__class__)
@classmethod
def handles(self, cls):
"""
Register this handler for the given class. Suitable as a decorator,
e.g.::
@MyCustomHandler.handles
class MyCustomClass:
def __reduce__(self):
...
"""
registry.register(cls, self)
return cls
# class DatetimeHandler(BaseHandler):
# """Custom handler for datetime objects
#
# Datetime objects use __reduce__, and they generate binary strings encoding
# the payload. This handler encodes that payload to reconstruct the
# object.
#
# """
#
# def flatten(self, obj, data):
# pickler = self.context
# if not pickler.unpicklable:
# return str(obj)
# cls, args = obj.__reduce__()
# flatten = pickler.flatten
# payload = utils.b64encode(args[0])
# args = [payload] + [flatten(i, reset=False) for i in args[1:]]
# data['__reduce__'] = (flatten(cls, reset=False), args)
# return data
#
# def restore(self, data):
# cls, args = data['__reduce__']
# unpickler = self.context
# restore = unpickler.restore
# cls = restore(cls, reset=False)
# value = utils.b64decode(args[0])
# params = (value,) + tuple([restore(i, reset=False) for i in args[1:]])
# return cls.__new__(cls, *params)
#
#
# DatetimeHandler.handles(datetime.datetime)
# DatetimeHandler.handles(datetime.date)
# DatetimeHandler.handles(datetime.time)
class RegexHandler(BaseHandler):
"""Flatten _sre.SRE_Pattern (compiled regex) objects"""
def flatten(self, obj, data):
data['pattern'] = obj.pattern
return data
def new(self, data):
return re.compile(data['pattern'])
def restore(self, data, instance):
return instance
RegexHandler.handles(type(re.compile('')))
class UUIDHandler(BaseHandler):
"""Serialize uuid.UUID objects"""
def flatten(self, obj, data):
data['hex'] = obj.hex
return data
def new(self, data):
return uuid.UUID(data['hex'])
def restore(self, data, instance):
return instance
UUIDHandler.handles(uuid.UUID)
class LockHandler(BaseHandler):
"""Serialize threading.Lock objects"""
def flatten(self, obj, data):
data['locked'] = obj.locked()
return data
def new(self, data):
lock = threading.Lock()
if data.get('locked', False):
lock.acquire()
return lock
def restore(self, data, instance):
return instance
_lock = threading.Lock()
LockHandler.handles(_lock.__class__)
+182
View File
@@ -0,0 +1,182 @@
from core.builtin_concepts import UserInputConcept, ReturnValueConcept, BuiltinConcepts
from core.sheerka.Sheerka import Sheerka
from evaluators.BaseEvaluator import BaseEvaluator
from parsers.BaseParser import BaseParser
from sheerkapickle.handlers import BaseHandler, registry
from core.concept import Concept, PROPERTIES_TO_SERIALIZE as CONCEPT_PROPERTIES_TO_SERIALIZE, ConceptParts
from core.sheerka.ExecutionContext import ExecutionContext, PROPERTIES_TO_SERIALIZE as CONTEXT_PROPERTIES_TO_SERIALIZE
default_concept = Concept()
CONCEPT_ID = "concept/id"
class ConceptHandler(BaseHandler):
def flatten(self, obj: Concept, data):
pickler = self.context
sheerka = self.sheerka
if obj.id:
ref = sheerka.get_by_id(obj.id)
data[CONCEPT_ID] = (obj.key, obj.id)
else:
ref = default_concept
# transform metadata
for prop in CONCEPT_PROPERTIES_TO_SERIALIZE:
value = getattr(obj.metadata, prop)
ref_value = getattr(ref.metadata, prop)
if value != ref_value:
data["meta." + prop] = pickler.flatten(value)
# transform value
for metadata, value in obj.values.items():
ref_value = ref.values[metadata] if metadata in ref.values else None
if value != ref_value:
data[metadata.value] = pickler.flatten(value)
# transform properties
for prop in obj.props:
value = obj.props[prop].value
if prop not in ref.props or value != ref.props[prop].value:
if "props" not in data:
data["props"] = []
data["props"].append((prop, pickler.flatten(value)))
return data
def new(self, data):
sheerka = self.sheerka
return sheerka.new(tuple(data[CONCEPT_ID])) if CONCEPT_ID in data else Concept()
def restore(self, data, instance):
pickler = self.context
for key, value in data.items():
if key.startswith("_sheerka/") or key == CONCEPT_ID:
continue
resolved_value = pickler.restore(data[key])
if key.startswith("meta."):
# get metadata
resolved_prop = key[5:]
if resolved_prop == "props":
for prop_name, prop_value in resolved_value:
instance.def_prop(prop_name, prop_value)
else:
setattr(instance.metadata, resolved_prop, resolved_value)
elif key == "props":
# get properties
for prop_name, prop_value in resolved_value:
instance.set_prop(prop_name, prop_value)
else:
# get value
instance.set_metadata_value(ConceptParts(key), resolved_value)
return instance
class UserInputHandler(BaseHandler):
def flatten(self, obj: UserInputConcept, data):
data[CONCEPT_ID] = (obj.key, obj.id)
data["user_name"] = obj.user_name
data["text"] = BaseParser.get_text_from_tokens(obj.text) if isinstance(obj.text, list) else obj.text
return data
def new(self, data):
sheerka = self.sheerka
instance = sheerka.new(tuple(data[CONCEPT_ID]), body=data["text"], user_name=data["user_name"])
return instance
def restore(self, data, instance):
return instance
class ReturnValueHandler(BaseHandler):
def flatten(self, obj: ReturnValueConcept, data):
pickler = self.context
data["who"] = f"c:{obj.who.id}:" if isinstance(obj.who, Concept) else \
obj.who.name if isinstance(obj.who, (BaseParser, BaseEvaluator)) else \
obj.who
data["status"] = obj.status
data["value"] = pickler.flatten(obj.value)
if obj.parents:
data["parents"] = pickler.flatten(obj.parents)
return data
def new(self, data):
sheerka = self.sheerka
instance = sheerka.ret(data["who"], data["status"], None)
return instance
def restore(self, data, instance):
pickler = self.context
instance.value = pickler.restore(data["value"])
if "parents" in data:
instance.parents = pickler.restore(data["parents"])
return instance
# class BuiltinConceptsHandler(BaseHandler):
#
# def flatten(self, obj: BuiltinConcepts, data):
# return data
#
# def restore(self, obj):
# pass
class SheerkaHandler(BaseHandler):
def flatten(self, obj: BuiltinConcepts, data):
return data
def new(self, data):
return self.sheerka
def restore(self, data, instance):
return instance
class ExecutionContextHandler(BaseHandler):
def flatten(self, obj, data):
pickler = self.context
for prop in CONTEXT_PROPERTIES_TO_SERIALIZE:
if prop == "who":
value = str(getattr(obj, prop))
else:
value = getattr(obj, prop)
if value is not None:
data[prop] = pickler.flatten(value)
return data
def new(self, data):
return ExecutionContext(data["who"], None, None)
def restore(self, data, instance):
pickler = self.context
for prop in CONTEXT_PROPERTIES_TO_SERIALIZE:
if prop not in data or prop == "who":
continue
setattr(instance, prop, pickler.restore(data[prop]))
return instance
def initialize_pickle_handlers():
registry.register(Concept, ConceptHandler, True)
registry.register(UserInputConcept, UserInputHandler, True)
registry.register(ReturnValueConcept, ReturnValueHandler, True)
registry.register(Sheerka, SheerkaHandler, True)
registry.register(ExecutionContext, ExecutionContextHandler, True)
+5
View File
@@ -0,0 +1,5 @@
ID = "_sheerka/id"
TUPLE = "_sheerka/tuple"
SET = "_sheerka/set"
OBJECT = "_sheerka/obj"
ENUM = "_sheerka/enum"
+85
View File
@@ -0,0 +1,85 @@
import base64
import types
from enum import Enum
class_types = (type,)
PRIMITIVES = (str, bool, type(None), int, float)
def is_type(obj):
"""Returns True is obj is a reference to a type.
"""
# use "isinstance" and not "is" to allow for metaclasses
return isinstance(obj, class_types)
def is_enum(obj):
return isinstance(obj, Enum)
def is_object(obj):
"""Returns True is obj is a reference to an object instance."""
return (isinstance(obj, object) and
not isinstance(obj, (type, types.FunctionType,
types.BuiltinFunctionType)))
def is_primitive(obj):
return type(obj) in PRIMITIVES
def is_dictionary(obj):
return type(obj) is dict
def is_list(obj):
return type(obj) is list
def is_set(obj):
return type(obj) is set
def is_bytes(obj):
return type(obj) is bytes
def is_tuple(obj):
return type(obj) is tuple
def b64encode(data):
"""
Encode binary data to ascii text in base64. Data must be bytes.
"""
return base64.b64encode(data).decode('ascii')
def translate_module_name(module):
"""Rename builtin modules to a consistent module name.
Prefer the more modern naming.
This is used so that references to Python's `builtins` module can
be loaded in both Python 2 and 3. We remap to the "__builtin__"
name and unmap it when importing.
Map the Python2 `exceptions` module to `builtins` because
`builtins` is a superset and contains everything that is
available in `exceptions`, which makes the translation simpler.
See untranslate_module_name() for the reverse operation.
"""
lookup = dict(__builtin__='builtins', exceptions='builtins')
return lookup.get(module, module)
def importable_name(cls):
"""
Fully qualified name (prefixed by builtin when needed)
"""
# Use the fully-qualified name if available (Python >= 3.3)
name = getattr(cls, '__qualname__', cls.__name__)
module = translate_module_name(cls.__module__)
return '{}.{}'.format(module, name)