Files
MyManagingTools/src/core/dbengine.py

336 lines
8.7 KiB
Python

import datetime
import hashlib
import io
import json
import logging
import os
import pickle
from threading import RLock
from core.serializer import Serializer, DebugSerializer
from core.utils import get_stream_digest
TYPE_KEY = "__type__"
TAG_PARENT = "__parent__"
TAG_USER = "__user_id__"
TAG_DATE = "__date__"
BUFFER_SIZE = 4096
FAKE_USER_ID = "FakeUserId"
logger = logging.getLogger(__name__)
class DbException(Exception):
pass
class RefHelper:
def __init__(self, get_obj_path):
self.get_obj_path = get_obj_path
def save_ref(self, obj):
"""
:param obj:
:return:
"""
buffer = io.BytesIO()
pickler = pickle.Pickler(buffer)
pickler.dump(obj)
digest = get_stream_digest(buffer)
target_path = self.get_obj_path(digest)
if not os.path.exists(os.path.dirname(target_path)):
os.makedirs(os.path.dirname(target_path))
buffer.seek(0)
with open(self.get_obj_path(digest), "wb") as file:
while chunk := buffer.read(BUFFER_SIZE):
file.write(chunk)
logger.debug(f"Saved object type '{type(obj).__name__}' with digest {digest}")
return digest
def load_ref(self, digest):
"""
:param digest:
:return:
"""
with open(self.get_obj_path(digest), 'rb') as file:
return pickle.load(file)
class DbEngine:
"""
Personal implementation of DB engine
Inspire by the way git manage its files
Designed to keep history of the modifications
"""
ObjectsFolder = "objects" # group objects in the same folder
HeadFile = "head" # used to keep track the latest version of all entries
def __init__(self, root: str = None):
self.root = root or ".mytools_db"
self.serializer = Serializer(RefHelper(self._get_obj_path))
self.debug_serializer = DebugSerializer(RefHelper(self.debug_load))
self.lock = RLock()
def is_initialized(self):
"""
:return:
"""
return os.path.exists(self.root)
def init(self):
"""
Make sure that the DbEngine is properly initialized
:return:
"""
if not os.path.exists(self.root):
logger.debug(f"Creating root folder in {os.path.abspath(self.root)}.")
os.mkdir(self.root)
def save(self, user_id: str, entry: str, obj: object) -> str:
"""
Save a snapshot of an entry
:param user_id:
:param entry:
:param obj: snapshot to save
:return:
"""
with self.lock:
logger.info(f"Saving {user_id=}, {entry=}, {obj=}")
# prepare the data
as_dict = self._serialize(obj)
as_dict[TAG_PARENT] = [self._get_entry_digest(entry)]
as_dict[TAG_USER] = user_id
as_dict[TAG_DATE] = datetime.datetime.now().strftime('%Y%m%d %H:%M:%S %z')
# transform into a stream
as_str = json.dumps(as_dict, sort_keys=True, indent=4)
logger.debug(f"Serialized object : {as_str}")
byte_stream = as_str.encode("utf-8")
# compute the digest to know where to store it
digest = hashlib.sha256(byte_stream).hexdigest()
target_path = self._get_obj_path(digest)
if os.path.exists(target_path):
# the same object is already saved. Noting to do
return digest
# save the new value
if not os.path.exists(os.path.dirname(target_path)):
os.makedirs(os.path.dirname(target_path))
with open(target_path, "wb") as file:
file.write(byte_stream)
# update the head to remember where is the latest entry
self._update_head(entry, digest)
logger.debug(f"New head for entry '{entry}' is {digest}")
return digest
def load(self, user_id: str, entry, digest=None):
"""
Loads a snapshot
:param user_id:
:param entry:
:param digest:
:return:
"""
with self.lock:
logger.info(f"Loading {user_id=}, {entry=}, {digest=}")
digest_to_use = digest or self._get_entry_digest(entry)
logger.debug(f"Using digest {digest_to_use}.")
if digest_to_use is None:
raise DbException(entry)
target_file = self._get_obj_path(digest_to_use)
with open(target_file, 'r', encoding='utf-8') as file:
as_dict = json.load(file)
return self._deserialize(as_dict)
def put(self, user_id: str, entry, key: str, value: object):
"""
Save a specific record.
This will create a new snapshot is the record is new or different
You should not mix the usage of put_many() and save() as it's two different way to manage the db
:param user_id:
:param entry:
:param key:
:param value:
:return:
"""
with self.lock:
logger.info(f"Adding {user_id=}, {entry=}, {key=}, {value=}")
try:
entry_content = self.load(user_id, entry)
except DbException:
entry_content = {}
# Do not save if the entry is the same
if key in entry_content:
old_value = entry_content[key]
if old_value == value:
return False
entry_content[key] = value
self.save(user_id, entry, entry_content)
return True
def put_many(self, user_id: str, entry, items: list):
"""
Save a list of item as one single snapshot
A new snapshot will not be created if all the items already exist
You should not mix the usage of put_many() and save() as it's two different way to manage the db
:param user_id:
:param entry:
:param items:
:return:
"""
with self.lock:
logger.info(f"Adding many {user_id=}, {entry=}, {items=}")
try:
entry_content = self.load(user_id, entry)
except DbException:
entry_content = {}
is_dirty = False
for item in items:
key = item.get_key()
if key in entry_content and entry_content[key] == item:
continue
else:
entry_content[key] = item
is_dirty = True
if is_dirty:
self.save(user_id, entry, entry_content)
return True
return False
def exists(self, entry: str):
"""
Tells if an entry exist
:param user_id:
:param entry:
:return:
"""
with self.lock:
return self._get_entry_digest(entry) is not None
def get(self, user_id: str, entry: str, key: str | None = None, digest=None):
"""
Retrieve an item from the snapshot
:param user_id:
:param entry:
:param key:
:param digest:
:return:
"""
with self.lock:
logger.info(f"Getting {user_id=}, {entry=}, {key=}, {digest=}")
entry_content = self.load(user_id, entry, digest)
if key is None:
# return all items as list
return [v for k, v in entry_content.items() if not k.startswith("__")]
return entry_content[key]
def debug_head(self):
with self.lock:
head_path = os.path.join(self.root, self.HeadFile)
# load
try:
with open(head_path, 'r') as file:
head = json.load(file)
except FileNotFoundError:
head = {}
return head
def debug_load(self, digest):
with self.lock:
target_file = self._get_obj_path(digest)
with open(target_file, 'r', encoding='utf-8') as file:
as_dict = json.load(file)
return self.debug_serializer.deserialize(as_dict)
def _serialize(self, obj):
"""
Just call the serializer
:param obj:
:return:
"""
# serializer = Serializer(RefHelper(self._get_obj_path))
use_refs = getattr(obj, "use_refs")() if hasattr(obj, "use_refs") else None
return self.serializer.serialize(obj, use_refs)
def _deserialize(self, as_dict):
return self.serializer.deserialize(as_dict)
def _update_head(self, entry, digest):
"""
Actually dumps the snapshot in file system
:param entry:
:param digest:
:return:
"""
head_path = os.path.join(self.root, self.HeadFile)
# load
try:
with open(head_path, 'r') as file:
head = json.load(file)
except FileNotFoundError:
head = {}
# update
head[entry] = digest
# and save
with open(head_path, 'w') as file:
json.dump(head, file)
def _get_entry_digest(self, entry):
"""
Search for the latest digest, for a given entry
:param entry:
:return:
"""
head_path = os.path.join(self.root, self.HeadFile)
try:
with open(head_path, 'r') as file:
head = json.load(file)
return head[str(entry)]
except FileNotFoundError:
return None
except KeyError:
return None
def _get_head_path(self):
"""
Location of the Head file
:return:
"""
return os.path.join(self.root, self.HeadFile)
def _get_obj_path(self, digest):
"""
Location of objects
:param digest:
:return:
"""
return os.path.join(self.root, "objects", digest[:24], digest)