First Working version. I can add table
This commit is contained in:
335
src/core/dbengine.py
Normal file
335
src/core/dbengine.py
Normal file
@@ -0,0 +1,335 @@
|
||||
import datetime
|
||||
import hashlib
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import pickle
|
||||
from threading import RLock
|
||||
|
||||
from core.serializer import Serializer, DebugSerializer
|
||||
from core.utils import get_stream_digest
|
||||
|
||||
TYPE_KEY = "__type__"
|
||||
TAG_PARENT = "__parent__"
|
||||
TAG_USER = "__user_id__"
|
||||
TAG_DATE = "__date__"
|
||||
BUFFER_SIZE = 4096
|
||||
FAKE_USER_ID = "FakeUserId"
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DbException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class RefHelper:
|
||||
def __init__(self, get_obj_path):
|
||||
self.get_obj_path = get_obj_path
|
||||
|
||||
def save_ref(self, obj):
|
||||
"""
|
||||
|
||||
:param obj:
|
||||
:return:
|
||||
"""
|
||||
buffer = io.BytesIO()
|
||||
pickler = pickle.Pickler(buffer)
|
||||
pickler.dump(obj)
|
||||
|
||||
digest = get_stream_digest(buffer)
|
||||
|
||||
target_path = self.get_obj_path(digest)
|
||||
if not os.path.exists(os.path.dirname(target_path)):
|
||||
os.makedirs(os.path.dirname(target_path))
|
||||
|
||||
buffer.seek(0)
|
||||
with open(self.get_obj_path(digest), "wb") as file:
|
||||
while chunk := buffer.read(BUFFER_SIZE):
|
||||
file.write(chunk)
|
||||
|
||||
logger.debug(f"Saved object type '{type(obj).__name__}' with digest {digest}")
|
||||
return digest
|
||||
|
||||
def load_ref(self, digest):
|
||||
"""
|
||||
|
||||
:param digest:
|
||||
:return:
|
||||
"""
|
||||
with open(self.get_obj_path(digest), 'rb') as file:
|
||||
return pickle.load(file)
|
||||
|
||||
|
||||
class DbEngine:
|
||||
"""
|
||||
Personal implementation of DB engine
|
||||
Inspire by the way git manage its files
|
||||
Designed to keep history of the modifications
|
||||
"""
|
||||
ObjectsFolder = "objects" # group objects in the same folder
|
||||
HeadFile = "head" # used to keep track the latest version of all entries
|
||||
|
||||
def __init__(self, root: str = None):
|
||||
self.root = root or ".mytools_db"
|
||||
self.serializer = Serializer(RefHelper(self._get_obj_path))
|
||||
self.debug_serializer = DebugSerializer(RefHelper(self.debug_load))
|
||||
self.lock = RLock()
|
||||
|
||||
def is_initialized(self):
|
||||
"""
|
||||
|
||||
:return:
|
||||
"""
|
||||
return os.path.exists(self.root)
|
||||
|
||||
def init(self):
|
||||
"""
|
||||
Make sure that the DbEngine is properly initialized
|
||||
:return:
|
||||
"""
|
||||
if not os.path.exists(self.root):
|
||||
logger.debug(f"Creating root folder in {os.path.abspath(self.root)}.")
|
||||
os.mkdir(self.root)
|
||||
|
||||
def save(self, user_id: str, entry: str, obj: object) -> str:
|
||||
"""
|
||||
Save a snapshot of an entry
|
||||
:param user_id:
|
||||
:param entry:
|
||||
:param obj: snapshot to save
|
||||
:return:
|
||||
"""
|
||||
with self.lock:
|
||||
logger.info(f"Saving {user_id=}, {entry=}, {obj=}")
|
||||
# prepare the data
|
||||
as_dict = self._serialize(obj)
|
||||
as_dict[TAG_PARENT] = [self._get_entry_digest(entry)]
|
||||
as_dict[TAG_USER] = user_id
|
||||
as_dict[TAG_DATE] = datetime.datetime.now().strftime('%Y%m%d %H:%M:%S %z')
|
||||
|
||||
# transform into a stream
|
||||
as_str = json.dumps(as_dict, sort_keys=True, indent=4)
|
||||
logger.debug(f"Serialized object : {as_str}")
|
||||
byte_stream = as_str.encode("utf-8")
|
||||
|
||||
# compute the digest to know where to store it
|
||||
digest = hashlib.sha256(byte_stream).hexdigest()
|
||||
|
||||
target_path = self._get_obj_path(digest)
|
||||
if os.path.exists(target_path):
|
||||
# the same object is already saved. Noting to do
|
||||
return digest
|
||||
|
||||
# save the new value
|
||||
if not os.path.exists(os.path.dirname(target_path)):
|
||||
os.makedirs(os.path.dirname(target_path))
|
||||
with open(target_path, "wb") as file:
|
||||
file.write(byte_stream)
|
||||
|
||||
# update the head to remember where is the latest entry
|
||||
self._update_head(entry, digest)
|
||||
logger.debug(f"New head for entry '{entry}' is {digest}")
|
||||
return digest
|
||||
|
||||
def load(self, user_id: str, entry, digest=None):
|
||||
"""
|
||||
Loads a snapshot
|
||||
:param user_id:
|
||||
:param entry:
|
||||
:param digest:
|
||||
:return:
|
||||
"""
|
||||
with self.lock:
|
||||
logger.info(f"Loading {user_id=}, {entry=}, {digest=}")
|
||||
|
||||
digest_to_use = digest or self._get_entry_digest(entry)
|
||||
logger.debug(f"Using digest {digest_to_use}.")
|
||||
|
||||
if digest_to_use is None:
|
||||
raise DbException(entry)
|
||||
|
||||
target_file = self._get_obj_path(digest_to_use)
|
||||
with open(target_file, 'r', encoding='utf-8') as file:
|
||||
as_dict = json.load(file)
|
||||
|
||||
return self._deserialize(as_dict)
|
||||
|
||||
def put(self, user_id: str, entry, key: str, value: object):
|
||||
"""
|
||||
Save a specific record.
|
||||
This will create a new snapshot is the record is new or different
|
||||
|
||||
You should not mix the usage of put_many() and save() as it's two different way to manage the db
|
||||
:param user_id:
|
||||
:param entry:
|
||||
:param key:
|
||||
:param value:
|
||||
:return:
|
||||
"""
|
||||
with self.lock:
|
||||
logger.info(f"Adding {user_id=}, {entry=}, {key=}, {value=}")
|
||||
try:
|
||||
entry_content = self.load(user_id, entry)
|
||||
except DbException:
|
||||
entry_content = {}
|
||||
|
||||
# Do not save if the entry is the same
|
||||
if key in entry_content:
|
||||
old_value = entry_content[key]
|
||||
if old_value == value:
|
||||
return False
|
||||
|
||||
entry_content[key] = value
|
||||
self.save(user_id, entry, entry_content)
|
||||
return True
|
||||
|
||||
def put_many(self, user_id: str, entry, items: list):
|
||||
"""
|
||||
Save a list of item as one single snapshot
|
||||
A new snapshot will not be created if all the items already exist
|
||||
|
||||
You should not mix the usage of put_many() and save() as it's two different way to manage the db
|
||||
:param user_id:
|
||||
:param entry:
|
||||
:param items:
|
||||
:return:
|
||||
"""
|
||||
with self.lock:
|
||||
logger.info(f"Adding many {user_id=}, {entry=}, {items=}")
|
||||
try:
|
||||
entry_content = self.load(user_id, entry)
|
||||
except DbException:
|
||||
entry_content = {}
|
||||
|
||||
is_dirty = False
|
||||
for item in items:
|
||||
key = item.get_key()
|
||||
if key in entry_content and entry_content[key] == item:
|
||||
continue
|
||||
else:
|
||||
entry_content[key] = item
|
||||
is_dirty = True
|
||||
|
||||
if is_dirty:
|
||||
self.save(user_id, entry, entry_content)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def exists(self, entry: str):
|
||||
"""
|
||||
Tells if an entry exist
|
||||
:param user_id:
|
||||
:param entry:
|
||||
:return:
|
||||
"""
|
||||
with self.lock:
|
||||
return self._get_entry_digest(entry) is not None
|
||||
|
||||
def get(self, user_id: str, entry: str, key: str | None = None, digest=None):
|
||||
"""
|
||||
Retrieve an item from the snapshot
|
||||
:param user_id:
|
||||
:param entry:
|
||||
:param key:
|
||||
:param digest:
|
||||
:return:
|
||||
"""
|
||||
with self.lock:
|
||||
logger.info(f"Getting {user_id=}, {entry=}, {key=}, {digest=}")
|
||||
entry_content = self.load(user_id, entry, digest)
|
||||
|
||||
if key is None:
|
||||
# return all items as list
|
||||
return [v for k, v in entry_content.items() if not k.startswith("__")]
|
||||
|
||||
return entry_content[key]
|
||||
|
||||
def debug_head(self):
|
||||
with self.lock:
|
||||
head_path = os.path.join(self.root, self.HeadFile)
|
||||
# load
|
||||
try:
|
||||
with open(head_path, 'r') as file:
|
||||
head = json.load(file)
|
||||
except FileNotFoundError:
|
||||
head = {}
|
||||
|
||||
return head
|
||||
|
||||
def debug_load(self, digest):
|
||||
with self.lock:
|
||||
target_file = self._get_obj_path(digest)
|
||||
with open(target_file, 'r', encoding='utf-8') as file:
|
||||
as_dict = json.load(file)
|
||||
|
||||
return self.debug_serializer.deserialize(as_dict)
|
||||
|
||||
def _serialize(self, obj):
|
||||
"""
|
||||
Just call the serializer
|
||||
:param obj:
|
||||
:return:
|
||||
"""
|
||||
# serializer = Serializer(RefHelper(self._get_obj_path))
|
||||
use_refs = getattr(obj, "use_refs")() if hasattr(obj, "use_refs") else None
|
||||
return self.serializer.serialize(obj, use_refs)
|
||||
|
||||
def _deserialize(self, as_dict):
|
||||
return self.serializer.deserialize(as_dict)
|
||||
|
||||
def _update_head(self, entry, digest):
|
||||
"""
|
||||
Actually dumps the snapshot in file system
|
||||
:param entry:
|
||||
:param digest:
|
||||
:return:
|
||||
"""
|
||||
head_path = os.path.join(self.root, self.HeadFile)
|
||||
# load
|
||||
try:
|
||||
with open(head_path, 'r') as file:
|
||||
head = json.load(file)
|
||||
except FileNotFoundError:
|
||||
head = {}
|
||||
|
||||
# update
|
||||
head[entry] = digest
|
||||
|
||||
# and save
|
||||
with open(head_path, 'w') as file:
|
||||
json.dump(head, file)
|
||||
|
||||
def _get_entry_digest(self, entry):
|
||||
"""
|
||||
Search for the latest digest, for a given entry
|
||||
:param entry:
|
||||
:return:
|
||||
"""
|
||||
head_path = os.path.join(self.root, self.HeadFile)
|
||||
try:
|
||||
with open(head_path, 'r') as file:
|
||||
head = json.load(file)
|
||||
return head[str(entry)]
|
||||
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
except KeyError:
|
||||
return None
|
||||
|
||||
def _get_head_path(self):
|
||||
"""
|
||||
Location of the Head file
|
||||
:return:
|
||||
"""
|
||||
return os.path.join(self.root, self.HeadFile)
|
||||
|
||||
def _get_obj_path(self, digest):
|
||||
"""
|
||||
Location of objects
|
||||
:param digest:
|
||||
:return:
|
||||
"""
|
||||
return os.path.join(self.root, "objects", digest[:24], digest)
|
||||
Reference in New Issue
Block a user