First Working version. I can add table

This commit is contained in:
2025-05-10 16:55:52 +02:00
parent b708ef2c46
commit 2daff83e67
157 changed files with 17282 additions and 12 deletions

335
src/core/dbengine.py Normal file
View File

@@ -0,0 +1,335 @@
import datetime
import hashlib
import io
import json
import logging
import os
import pickle
from threading import RLock
from core.serializer import Serializer, DebugSerializer
from core.utils import get_stream_digest
TYPE_KEY = "__type__"
TAG_PARENT = "__parent__"
TAG_USER = "__user_id__"
TAG_DATE = "__date__"
BUFFER_SIZE = 4096
FAKE_USER_ID = "FakeUserId"
logger = logging.getLogger(__name__)
class DbException(Exception):
pass
class RefHelper:
def __init__(self, get_obj_path):
self.get_obj_path = get_obj_path
def save_ref(self, obj):
"""
:param obj:
:return:
"""
buffer = io.BytesIO()
pickler = pickle.Pickler(buffer)
pickler.dump(obj)
digest = get_stream_digest(buffer)
target_path = self.get_obj_path(digest)
if not os.path.exists(os.path.dirname(target_path)):
os.makedirs(os.path.dirname(target_path))
buffer.seek(0)
with open(self.get_obj_path(digest), "wb") as file:
while chunk := buffer.read(BUFFER_SIZE):
file.write(chunk)
logger.debug(f"Saved object type '{type(obj).__name__}' with digest {digest}")
return digest
def load_ref(self, digest):
"""
:param digest:
:return:
"""
with open(self.get_obj_path(digest), 'rb') as file:
return pickle.load(file)
class DbEngine:
"""
Personal implementation of DB engine
Inspire by the way git manage its files
Designed to keep history of the modifications
"""
ObjectsFolder = "objects" # group objects in the same folder
HeadFile = "head" # used to keep track the latest version of all entries
def __init__(self, root: str = None):
self.root = root or ".mytools_db"
self.serializer = Serializer(RefHelper(self._get_obj_path))
self.debug_serializer = DebugSerializer(RefHelper(self.debug_load))
self.lock = RLock()
def is_initialized(self):
"""
:return:
"""
return os.path.exists(self.root)
def init(self):
"""
Make sure that the DbEngine is properly initialized
:return:
"""
if not os.path.exists(self.root):
logger.debug(f"Creating root folder in {os.path.abspath(self.root)}.")
os.mkdir(self.root)
def save(self, user_id: str, entry: str, obj: object) -> str:
"""
Save a snapshot of an entry
:param user_id:
:param entry:
:param obj: snapshot to save
:return:
"""
with self.lock:
logger.info(f"Saving {user_id=}, {entry=}, {obj=}")
# prepare the data
as_dict = self._serialize(obj)
as_dict[TAG_PARENT] = [self._get_entry_digest(entry)]
as_dict[TAG_USER] = user_id
as_dict[TAG_DATE] = datetime.datetime.now().strftime('%Y%m%d %H:%M:%S %z')
# transform into a stream
as_str = json.dumps(as_dict, sort_keys=True, indent=4)
logger.debug(f"Serialized object : {as_str}")
byte_stream = as_str.encode("utf-8")
# compute the digest to know where to store it
digest = hashlib.sha256(byte_stream).hexdigest()
target_path = self._get_obj_path(digest)
if os.path.exists(target_path):
# the same object is already saved. Noting to do
return digest
# save the new value
if not os.path.exists(os.path.dirname(target_path)):
os.makedirs(os.path.dirname(target_path))
with open(target_path, "wb") as file:
file.write(byte_stream)
# update the head to remember where is the latest entry
self._update_head(entry, digest)
logger.debug(f"New head for entry '{entry}' is {digest}")
return digest
def load(self, user_id: str, entry, digest=None):
"""
Loads a snapshot
:param user_id:
:param entry:
:param digest:
:return:
"""
with self.lock:
logger.info(f"Loading {user_id=}, {entry=}, {digest=}")
digest_to_use = digest or self._get_entry_digest(entry)
logger.debug(f"Using digest {digest_to_use}.")
if digest_to_use is None:
raise DbException(entry)
target_file = self._get_obj_path(digest_to_use)
with open(target_file, 'r', encoding='utf-8') as file:
as_dict = json.load(file)
return self._deserialize(as_dict)
def put(self, user_id: str, entry, key: str, value: object):
"""
Save a specific record.
This will create a new snapshot is the record is new or different
You should not mix the usage of put_many() and save() as it's two different way to manage the db
:param user_id:
:param entry:
:param key:
:param value:
:return:
"""
with self.lock:
logger.info(f"Adding {user_id=}, {entry=}, {key=}, {value=}")
try:
entry_content = self.load(user_id, entry)
except DbException:
entry_content = {}
# Do not save if the entry is the same
if key in entry_content:
old_value = entry_content[key]
if old_value == value:
return False
entry_content[key] = value
self.save(user_id, entry, entry_content)
return True
def put_many(self, user_id: str, entry, items: list):
"""
Save a list of item as one single snapshot
A new snapshot will not be created if all the items already exist
You should not mix the usage of put_many() and save() as it's two different way to manage the db
:param user_id:
:param entry:
:param items:
:return:
"""
with self.lock:
logger.info(f"Adding many {user_id=}, {entry=}, {items=}")
try:
entry_content = self.load(user_id, entry)
except DbException:
entry_content = {}
is_dirty = False
for item in items:
key = item.get_key()
if key in entry_content and entry_content[key] == item:
continue
else:
entry_content[key] = item
is_dirty = True
if is_dirty:
self.save(user_id, entry, entry_content)
return True
return False
def exists(self, entry: str):
"""
Tells if an entry exist
:param user_id:
:param entry:
:return:
"""
with self.lock:
return self._get_entry_digest(entry) is not None
def get(self, user_id: str, entry: str, key: str | None = None, digest=None):
"""
Retrieve an item from the snapshot
:param user_id:
:param entry:
:param key:
:param digest:
:return:
"""
with self.lock:
logger.info(f"Getting {user_id=}, {entry=}, {key=}, {digest=}")
entry_content = self.load(user_id, entry, digest)
if key is None:
# return all items as list
return [v for k, v in entry_content.items() if not k.startswith("__")]
return entry_content[key]
def debug_head(self):
with self.lock:
head_path = os.path.join(self.root, self.HeadFile)
# load
try:
with open(head_path, 'r') as file:
head = json.load(file)
except FileNotFoundError:
head = {}
return head
def debug_load(self, digest):
with self.lock:
target_file = self._get_obj_path(digest)
with open(target_file, 'r', encoding='utf-8') as file:
as_dict = json.load(file)
return self.debug_serializer.deserialize(as_dict)
def _serialize(self, obj):
"""
Just call the serializer
:param obj:
:return:
"""
# serializer = Serializer(RefHelper(self._get_obj_path))
use_refs = getattr(obj, "use_refs")() if hasattr(obj, "use_refs") else None
return self.serializer.serialize(obj, use_refs)
def _deserialize(self, as_dict):
return self.serializer.deserialize(as_dict)
def _update_head(self, entry, digest):
"""
Actually dumps the snapshot in file system
:param entry:
:param digest:
:return:
"""
head_path = os.path.join(self.root, self.HeadFile)
# load
try:
with open(head_path, 'r') as file:
head = json.load(file)
except FileNotFoundError:
head = {}
# update
head[entry] = digest
# and save
with open(head_path, 'w') as file:
json.dump(head, file)
def _get_entry_digest(self, entry):
"""
Search for the latest digest, for a given entry
:param entry:
:return:
"""
head_path = os.path.join(self.root, self.HeadFile)
try:
with open(head_path, 'r') as file:
head = json.load(file)
return head[str(entry)]
except FileNotFoundError:
return None
except KeyError:
return None
def _get_head_path(self):
"""
Location of the Head file
:return:
"""
return os.path.join(self.root, self.HeadFile)
def _get_obj_path(self, digest):
"""
Location of objects
:param digest:
:return:
"""
return os.path.join(self.root, "objects", digest[:24], digest)