336 lines
8.7 KiB
Python
336 lines
8.7 KiB
Python
import datetime
|
|
import hashlib
|
|
import io
|
|
import json
|
|
import logging
|
|
import os
|
|
import pickle
|
|
from threading import RLock
|
|
|
|
from core.serializer import Serializer, DebugSerializer
|
|
from core.utils import get_stream_digest
|
|
|
|
TYPE_KEY = "__type__"
|
|
TAG_PARENT = "__parent__"
|
|
TAG_USER = "__user_id__"
|
|
TAG_DATE = "__date__"
|
|
BUFFER_SIZE = 4096
|
|
FAKE_USER_ID = "FakeUserId"
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class DbException(Exception):
|
|
pass
|
|
|
|
|
|
class RefHelper:
|
|
def __init__(self, get_obj_path):
|
|
self.get_obj_path = get_obj_path
|
|
|
|
def save_ref(self, obj):
|
|
"""
|
|
|
|
:param obj:
|
|
:return:
|
|
"""
|
|
buffer = io.BytesIO()
|
|
pickler = pickle.Pickler(buffer)
|
|
pickler.dump(obj)
|
|
|
|
digest = get_stream_digest(buffer)
|
|
|
|
target_path = self.get_obj_path(digest)
|
|
if not os.path.exists(os.path.dirname(target_path)):
|
|
os.makedirs(os.path.dirname(target_path))
|
|
|
|
buffer.seek(0)
|
|
with open(self.get_obj_path(digest), "wb") as file:
|
|
while chunk := buffer.read(BUFFER_SIZE):
|
|
file.write(chunk)
|
|
|
|
logger.debug(f"Saved object type '{type(obj).__name__}' with digest {digest}")
|
|
return digest
|
|
|
|
def load_ref(self, digest):
|
|
"""
|
|
|
|
:param digest:
|
|
:return:
|
|
"""
|
|
with open(self.get_obj_path(digest), 'rb') as file:
|
|
return pickle.load(file)
|
|
|
|
|
|
class DbEngine:
|
|
"""
|
|
Personal implementation of DB engine
|
|
Inspire by the way git manage its files
|
|
Designed to keep history of the modifications
|
|
"""
|
|
ObjectsFolder = "objects" # group objects in the same folder
|
|
HeadFile = "head" # used to keep track the latest version of all entries
|
|
|
|
def __init__(self, root: str = None):
|
|
self.root = root or ".mytools_db"
|
|
self.serializer = Serializer(RefHelper(self._get_obj_path))
|
|
self.debug_serializer = DebugSerializer(RefHelper(self.debug_load))
|
|
self.lock = RLock()
|
|
|
|
def is_initialized(self):
|
|
"""
|
|
|
|
:return:
|
|
"""
|
|
return os.path.exists(self.root)
|
|
|
|
def init(self):
|
|
"""
|
|
Make sure that the DbEngine is properly initialized
|
|
:return:
|
|
"""
|
|
if not os.path.exists(self.root):
|
|
logger.debug(f"Creating root folder in {os.path.abspath(self.root)}.")
|
|
os.mkdir(self.root)
|
|
|
|
def save(self, user_id: str, entry: str, obj: object) -> str:
|
|
"""
|
|
Save a snapshot of an entry
|
|
:param user_id:
|
|
:param entry:
|
|
:param obj: snapshot to save
|
|
:return:
|
|
"""
|
|
with self.lock:
|
|
logger.info(f"Saving {user_id=}, {entry=}, {obj=}")
|
|
# prepare the data
|
|
as_dict = self._serialize(obj)
|
|
as_dict[TAG_PARENT] = [self._get_entry_digest(entry)]
|
|
as_dict[TAG_USER] = user_id
|
|
as_dict[TAG_DATE] = datetime.datetime.now().strftime('%Y%m%d %H:%M:%S %z')
|
|
|
|
# transform into a stream
|
|
as_str = json.dumps(as_dict, sort_keys=True, indent=4)
|
|
logger.debug(f"Serialized object : {as_str}")
|
|
byte_stream = as_str.encode("utf-8")
|
|
|
|
# compute the digest to know where to store it
|
|
digest = hashlib.sha256(byte_stream).hexdigest()
|
|
|
|
target_path = self._get_obj_path(digest)
|
|
if os.path.exists(target_path):
|
|
# the same object is already saved. Noting to do
|
|
return digest
|
|
|
|
# save the new value
|
|
if not os.path.exists(os.path.dirname(target_path)):
|
|
os.makedirs(os.path.dirname(target_path))
|
|
with open(target_path, "wb") as file:
|
|
file.write(byte_stream)
|
|
|
|
# update the head to remember where is the latest entry
|
|
self._update_head(entry, digest)
|
|
logger.debug(f"New head for entry '{entry}' is {digest}")
|
|
return digest
|
|
|
|
def load(self, user_id: str, entry, digest=None):
|
|
"""
|
|
Loads a snapshot
|
|
:param user_id:
|
|
:param entry:
|
|
:param digest:
|
|
:return:
|
|
"""
|
|
with self.lock:
|
|
logger.info(f"Loading {user_id=}, {entry=}, {digest=}")
|
|
|
|
digest_to_use = digest or self._get_entry_digest(entry)
|
|
logger.debug(f"Using digest {digest_to_use}.")
|
|
|
|
if digest_to_use is None:
|
|
raise DbException(entry)
|
|
|
|
target_file = self._get_obj_path(digest_to_use)
|
|
with open(target_file, 'r', encoding='utf-8') as file:
|
|
as_dict = json.load(file)
|
|
|
|
return self._deserialize(as_dict)
|
|
|
|
def put(self, user_id: str, entry, key: str, value: object):
|
|
"""
|
|
Save a specific record.
|
|
This will create a new snapshot is the record is new or different
|
|
|
|
You should not mix the usage of put_many() and save() as it's two different way to manage the db
|
|
:param user_id:
|
|
:param entry:
|
|
:param key:
|
|
:param value:
|
|
:return:
|
|
"""
|
|
with self.lock:
|
|
logger.info(f"Adding {user_id=}, {entry=}, {key=}, {value=}")
|
|
try:
|
|
entry_content = self.load(user_id, entry)
|
|
except DbException:
|
|
entry_content = {}
|
|
|
|
# Do not save if the entry is the same
|
|
if key in entry_content:
|
|
old_value = entry_content[key]
|
|
if old_value == value:
|
|
return False
|
|
|
|
entry_content[key] = value
|
|
self.save(user_id, entry, entry_content)
|
|
return True
|
|
|
|
def put_many(self, user_id: str, entry, items: list):
|
|
"""
|
|
Save a list of item as one single snapshot
|
|
A new snapshot will not be created if all the items already exist
|
|
|
|
You should not mix the usage of put_many() and save() as it's two different way to manage the db
|
|
:param user_id:
|
|
:param entry:
|
|
:param items:
|
|
:return:
|
|
"""
|
|
with self.lock:
|
|
logger.info(f"Adding many {user_id=}, {entry=}, {items=}")
|
|
try:
|
|
entry_content = self.load(user_id, entry)
|
|
except DbException:
|
|
entry_content = {}
|
|
|
|
is_dirty = False
|
|
for item in items:
|
|
key = item.get_key()
|
|
if key in entry_content and entry_content[key] == item:
|
|
continue
|
|
else:
|
|
entry_content[key] = item
|
|
is_dirty = True
|
|
|
|
if is_dirty:
|
|
self.save(user_id, entry, entry_content)
|
|
return True
|
|
|
|
return False
|
|
|
|
def exists(self, entry: str):
|
|
"""
|
|
Tells if an entry exist
|
|
:param user_id:
|
|
:param entry:
|
|
:return:
|
|
"""
|
|
with self.lock:
|
|
return self._get_entry_digest(entry) is not None
|
|
|
|
def get(self, user_id: str, entry: str, key: str | None = None, digest=None):
|
|
"""
|
|
Retrieve an item from the snapshot
|
|
:param user_id:
|
|
:param entry:
|
|
:param key:
|
|
:param digest:
|
|
:return:
|
|
"""
|
|
with self.lock:
|
|
logger.info(f"Getting {user_id=}, {entry=}, {key=}, {digest=}")
|
|
entry_content = self.load(user_id, entry, digest)
|
|
|
|
if key is None:
|
|
# return all items as list
|
|
return [v for k, v in entry_content.items() if not k.startswith("__")]
|
|
|
|
return entry_content[key]
|
|
|
|
def debug_head(self):
|
|
with self.lock:
|
|
head_path = os.path.join(self.root, self.HeadFile)
|
|
# load
|
|
try:
|
|
with open(head_path, 'r') as file:
|
|
head = json.load(file)
|
|
except FileNotFoundError:
|
|
head = {}
|
|
|
|
return head
|
|
|
|
def debug_load(self, digest):
|
|
with self.lock:
|
|
target_file = self._get_obj_path(digest)
|
|
with open(target_file, 'r', encoding='utf-8') as file:
|
|
as_dict = json.load(file)
|
|
|
|
return self.debug_serializer.deserialize(as_dict)
|
|
|
|
def _serialize(self, obj):
|
|
"""
|
|
Just call the serializer
|
|
:param obj:
|
|
:return:
|
|
"""
|
|
# serializer = Serializer(RefHelper(self._get_obj_path))
|
|
use_refs = getattr(obj, "use_refs")() if hasattr(obj, "use_refs") else None
|
|
return self.serializer.serialize(obj, use_refs)
|
|
|
|
def _deserialize(self, as_dict):
|
|
return self.serializer.deserialize(as_dict)
|
|
|
|
def _update_head(self, entry, digest):
|
|
"""
|
|
Actually dumps the snapshot in file system
|
|
:param entry:
|
|
:param digest:
|
|
:return:
|
|
"""
|
|
head_path = os.path.join(self.root, self.HeadFile)
|
|
# load
|
|
try:
|
|
with open(head_path, 'r') as file:
|
|
head = json.load(file)
|
|
except FileNotFoundError:
|
|
head = {}
|
|
|
|
# update
|
|
head[entry] = digest
|
|
|
|
# and save
|
|
with open(head_path, 'w') as file:
|
|
json.dump(head, file)
|
|
|
|
def _get_entry_digest(self, entry):
|
|
"""
|
|
Search for the latest digest, for a given entry
|
|
:param entry:
|
|
:return:
|
|
"""
|
|
head_path = os.path.join(self.root, self.HeadFile)
|
|
try:
|
|
with open(head_path, 'r') as file:
|
|
head = json.load(file)
|
|
return head[str(entry)]
|
|
|
|
except FileNotFoundError:
|
|
return None
|
|
except KeyError:
|
|
return None
|
|
|
|
def _get_head_path(self):
|
|
"""
|
|
Location of the Head file
|
|
:return:
|
|
"""
|
|
return os.path.join(self.root, self.HeadFile)
|
|
|
|
def _get_obj_path(self, digest):
|
|
"""
|
|
Location of objects
|
|
:param digest:
|
|
:return:
|
|
"""
|
|
return os.path.join(self.root, "objects", digest[:24], digest)
|