first commit

2025-10-17 21:08:20 +02:00
commit 878064b140
20 changed files with 1871 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,216 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#   Usually these files are written by a python script from a template
+#   before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+# Pipfile.lock
+
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+# uv.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+# poetry.lock
+# poetry.toml
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+# pdm.lock
+# pdm.toml
+.pdm-python
+.pdm-build/
+
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+# pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# Redis
+*.rdb
+*.aof
+*.pid
+
+# RabbitMQ
+mnesia/
+rabbitmq/
+rabbitmq-data/
+
+# ActiveMQ
+activemq-data/
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#   JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#   be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#   and can be added to the global gitignore or merged into this file.  For a more nuclear
+#   option (not recommended) you can uncomment the following to ignore the entire idea folder.
+# .idea/
+
+# Abstra
+#   Abstra is an AI-powered process automation framework.
+#   Ignore directories containing user credentials, local state, and settings.
+#   Learn more at https://abstra.io/docs
+.abstra/
+
+# Visual Studio Code
+#   Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#   that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#   and can be added to the global gitignore or merged into this file. However, if you prefer,
+#   you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+
+# Ruff stuff:
+.ruff_cache/
+
+# PyPI configuration file
+.pypirc
+
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+
+# Streamlit
+.streamlit/secrets.toml
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
--- a/.idea/MyDbEngine.iml
+++ b/.idea/MyDbEngine.iml
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/tests" isTestSource="true" />
+    </content>
+    <orderEntry type="jdk" jdkName="Python 3.12.3 WSL (Ubuntu-24.04): (/home/kodjo/.virtualenvs/MyDbEngine/bin/python)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,14 @@
+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
+    <inspection_tool class="PyInitNewSignatureInspection" enabled="false" level="WARNING" enabled_by_default="false" />
+    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredPackages">
+        <list>
+          <option value="bson" />
+        </list>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.12.3 WSL (Ubuntu-24.04): (/home/kodjo/.virtualenvs/MyDbEngine/bin/python)" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12.3 WSL (Ubuntu-24.04): (/home/kodjo/.virtualenvs/MyDbEngine/bin/python)" project-jdk-type="Python SDK" />
+</project>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/MyDbEngine.iml" filepath="$PROJECT_DIR$/.idea/MyDbEngine.iml" />
+    </modules>
+  </component>
+</project>
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
--- a/Readme.md
+++ b/Readme.md
@@ -0,0 +1,187 @@
+# DbEngine
+
+A lightweight, git-inspired database engine for Python that maintains complete history of all modifications.
+
+## Overview
+
+DbEngine is a personal implementation of a versioned database engine that stores snapshots of data changes over time. Each modification creates a new immutable snapshot, allowing you to track the complete history of your data.
+
+## Key Features
+
+- **Version Control**: Every change creates a new snapshot with a unique digest (SHA-256 hash)
+- **History Tracking**: Access any previous version of your data
+- **Multi-tenant Support**: Isolated data storage per tenant
+- **Thread-safe**: Built-in locking mechanism for concurrent access
+- **Git-inspired Architecture**: Objects are stored in a content-addressable format
+- **Efficient Storage**: Identical objects are stored only once
+
+## Architecture
+
+The engine uses a file-based storage system with the following structure:
+
+```
+.mytools_db/
+├── {tenant_id}/
+│   ├── head                    # Points to latest version of each entry
+│   └── objects/
+│       └── {digest_prefix}/
+│           └── {full_digest}   # Actual object data
+└── refs/                       # Shared references
+```
+
+## Installation
+
+```python
+from db_engine import DbEngine
+
+# Initialize with default root
+db = DbEngine()
+
+# Or specify custom root directory
+db = DbEngine(root="/path/to/database")
+```
+
+## Basic Usage
+
+### Initialize Database for a Tenant
+
+```python
+tenant_id = "my_company"
+db.init(tenant_id)
+```
+
+### Save Data
+
+```python
+# Save a complete object
+user_id = "john_doe"
+entry = "users"
+data = {"name": "John", "age": 30}
+
+digest = db.save(tenant_id, user_id, entry, data)
+```
+
+### Load Data
+
+```python
+# Load latest version
+data = db.load(tenant_id, entry="users")
+
+# Load specific version by digest
+data = db.load(tenant_id, entry="users", digest="abc123...")
+```
+
+### Work with Individual Records
+
+```python
+# Add or update a single record
+db.put(tenant_id, user_id, entry="users", key="john", value={"name": "John", "age": 30})
+
+# Add or update multiple records at once
+items = {
+    "john": {"name": "John", "age": 30},
+    "jane": {"name": "Jane", "age": 25}
+}
+db.put_many(tenant_id, user_id, entry="users", items=items)
+
+# Get a specific record
+user = db.get(tenant_id, entry="users", key="john")
+
+# Get all records
+all_users = db.get(tenant_id, entry="users")
+```
+
+### Check Existence
+
+```python
+if db.exists(tenant_id, entry="users"):
+    print("Entry exists")
+```
+
+### Access History
+
+```python
+# Get history of an entry (returns list of digests)
+history = db.history(tenant_id, entry="users", max_items=10)
+
+# Load a previous version
+old_data = db.load(tenant_id, entry="users", digest=history[1])
+```
+
+## Metadata
+
+Each snapshot automatically includes metadata:
+
+- `__parent__`: Digest of the previous version
+- `__user__`: User ID who made the change
+- `__date__`: Timestamp of the change (format: `YYYYMMDD HH:MM:SS`)
+
+## API Reference
+
+### Core Methods
+
+#### `init(tenant_id: str)`
+Initialize database structure for a tenant.
+
+#### `save(tenant_id: str, user_id: str, entry: str, obj: object) -> str`
+Save a complete snapshot. Returns the digest of the saved object.
+
+#### `load(tenant_id: str, entry: str, digest: str = None) -> object`
+Load a snapshot. If digest is None, loads the latest version.
+
+#### `put(tenant_id: str, user_id: str, entry: str, key: str, value: object) -> bool`
+Add or update a single record. Returns True if a new snapshot was created.
+
+#### `put_many(tenant_id: str, user_id: str, entry: str, items: list | dict) -> bool`
+Add or update multiple records. Returns True if a new snapshot was created.
+
+#### `get(tenant_id: str, entry: str, key: str = None, digest: str = None) -> object`
+Retrieve record(s). If key is None, returns all records as a list.
+
+#### `exists(tenant_id: str, entry: str) -> bool`
+Check if an entry exists.
+
+#### `history(tenant_id: str, entry: str, digest: str = None, max_items: int = 1000) -> list`
+Get the history chain of digests for an entry.
+
+#### `get_digest(tenant_id: str, entry: str) -> str`
+Get the current digest for an entry.
+
+## Usage Patterns
+
+### Pattern 1: Snapshot-based (using `save()`)
+Best for saving complete states of complex objects.
+
+```python
+config = {"theme": "dark", "language": "en"}
+db.save(tenant_id, user_id, "config", config)
+```
+
+### Pattern 2: Record-based (using `put()` / `put_many()`)
+Best for managing collections of items incrementally.
+
+```python
+db.put(tenant_id, user_id, "settings", "theme", "dark")
+db.put(tenant_id, user_id, "settings", "language", "en")
+```
+
+**Note**: Don't mix these patterns for the same entry, as they use different data structures.
+
+## Thread Safety
+
+DbEngine uses `RLock` internally, making it safe for multi-threaded applications.
+
+## Exceptions
+
+- `DbException`: Raised for database-related errors (missing entries, invalid parameters, etc.)
+
+## Performance Considerations
+
+- Objects are stored as JSON files
+- Identical objects (same SHA-256) are stored only once
+- History chains can become long; use `max_items` parameter to limit traversal
+- File system performance impacts overall speed
+
+## License
+
+This is a personal implementation. Please check with the author for licensing terms.
--- a/main.py
+++ b/main.py
@@ -0,0 +1,16 @@
+# This is a sample Python script.
+
+# Press Ctrl+F5 to execute it or replace it with your code.
+# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.
+
+
+def print_hi(name):
+    # Use a breakpoint in the code line below to debug your script.
+    print(f'Hi, {name}')  # Press F9 to toggle the breakpoint.
+
+
+# Press the green button in the gutter to run the script.
+if __name__ == '__main__':
+    print_hi('PyCharm')
+
+# See PyCharm help at https://www.jetbrains.com/help/pycharm/
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+iniconfig==2.1.0
+packaging==25.0
+pluggy==1.6.0
+Pygments==2.19.2
+pytest==8.4.2
--- a/src/init.py
+++ b/src/init.py
--- a/src/core/init.py
+++ b/src/core/init.py
--- a/src/core/dbengine.py
+++ b/src/core/dbengine.py
@@ -0,0 +1,391 @@
+import datetime
+import hashlib
+import io
+import json
+import logging
+import os
+import pickle
+from threading import RLock
+
+from core.serializer import Serializer
+from core.utils import get_stream_digest
+
+TYPE_KEY = "__type__"
+TAG_PARENT = "__parent__"
+TAG_USER = "__user_id__"
+TAG_DATE = "__date__"
+BUFFER_SIZE = 4096
+
+logger = logging.getLogger(__name__)
+
+
+class DbException(Exception):
+  pass
+
+
+class RefHelper:
+  def __init__(self, get_ref_path):
+    self.get_ref_path = get_ref_path
+  
+  def save_ref(self, obj):
+    """
+
+    :param obj:
+    :return:
+    """
+    buffer = io.BytesIO()
+    pickler = pickle.Pickler(buffer)
+    pickler.dump(obj)
+    
+    digest = get_stream_digest(buffer)
+    
+    target_path = self.get_ref_path(digest)
+    if not os.path.exists(os.path.dirname(target_path)):
+      os.makedirs(os.path.dirname(target_path))
+    
+    buffer.seek(0)
+    with open(self.get_ref_path(digest), "wb") as file:
+      while chunk := buffer.read(BUFFER_SIZE):
+        file.write(chunk)
+    
+    logger.debug(f"Saved object type '{type(obj).__name__}' with digest {digest}")
+    return digest
+  
+  def load_ref(self, digest):
+    """
+
+    :param digest:
+    :return:
+    """
+    with open(self.get_ref_path(digest), 'rb') as file:
+      return pickle.load(file)
+
+
+class DbEngine:
+  """
+  Personal implementation of DB engine
+  Inspired by the way git manage its files
+  Designed to keep history of the modifications
+  """
+  ObjectsFolder = "objects"  # group objects in the same folder
+  HeadFile = "head"  # used to keep track of the latest version of all entries
+  
+  def __init__(self, root: str = None):
+    self.root = root or ".mytools_db"
+    self.lock = RLock()
+  
+  def is_initialized(self, tenant_id: str):
+    """
+
+    :return:
+    """
+    return os.path.exists(self._get_user_root(tenant_id))
+  
+  def init(self, tenant_id: str):
+    """
+    Make sure that the DbEngine is properly initialized
+    :return:
+    """
+    if not os.path.exists(self._get_user_root(tenant_id)):
+      logger.debug(f"Creating root folder in {os.path.abspath(self._get_user_root(tenant_id))}.")
+      os.makedirs(self._get_user_root(tenant_id))
+  
+  def save(self, tenant_id: str, user_id: str, entry: str, obj: object) -> str:
+    """
+    Save a snapshot of an entry
+    :param tenant_id:
+    :param user_id:
+    :param entry:
+    :param obj: snapshot to save
+    :return:
+    """
+    with self.lock:
+      logger.info(f"Saving {tenant_id=}, {entry=}, {obj=}")
+      
+      if not tenant_id:
+        raise DbException("tenant_id is None")
+      
+      if not user_id:
+        raise DbException("user_id is None")
+      
+      if not entry:
+        raise DbException("entry is None")
+      # prepare the data
+      as_dict = self._serialize(obj)
+      as_dict[TAG_PARENT] = [self._get_entry_digest(tenant_id, entry)]
+      as_dict[TAG_USER] = user_id
+      as_dict[TAG_DATE] = datetime.datetime.now().strftime('%Y%m%d %H:%M:%S %z')
+      
+      # transform into a stream
+      as_str = json.dumps(as_dict, sort_keys=True, indent=4)
+      logger.debug(f"Serialized object : {as_str}")
+      byte_stream = as_str.encode("utf-8")
+      
+      # compute the digest to know where to store it
+      digest = hashlib.sha256(byte_stream).hexdigest()
+      
+      target_path = self._get_obj_path(tenant_id, digest)
+      if os.path.exists(target_path):
+        # the same object is already saved. Noting to do
+        return digest
+      
+      # save the new value
+      if not os.path.exists(os.path.dirname(target_path)):
+        os.makedirs(os.path.dirname(target_path))
+      with open(target_path, "wb") as file:
+        file.write(byte_stream)
+      
+      # update the head to remember where the latest entry is
+      self._update_head(tenant_id, entry, digest)
+      logger.debug(f"New head for entry '{entry}' is {digest}")
+      return digest
+  
+  def load(self, tenant_id: str, entry, digest=None):
+    """
+    Loads a snapshot
+    :param tenant_id:
+    :param entry:
+    :param digest:
+    :return:
+    """
+    with self.lock:
+      logger.info(f"Loading {tenant_id=}, {entry=}, {digest=}")
+      
+      digest_to_use = digest or self._get_entry_digest(tenant_id, entry)
+      logger.debug(f"Using digest {digest_to_use}.")
+      
+      if digest_to_use is None:
+        raise DbException(entry)
+      
+      target_file = self._get_obj_path(tenant_id, digest_to_use)
+      with open(target_file, 'r', encoding='utf-8') as file:
+        as_dict = json.load(file)
+      
+      return self._deserialize(as_dict)
+  
+  def put(self, tenant_id: str, user_id, entry, key: str, value: object):
+    """
+    Save a specific record.
+    This will create a new snapshot is the record is new or different
+
+    You should not mix the usage of put_many() and save() as it's two different way to manage the db
+    :param user_id:
+    :param tenant_id:
+    :param entry:
+    :param key:
+    :param value:
+    :return:
+    """
+    with self.lock:
+      logger.info(f"Adding {tenant_id=}, {entry=}, {key=}, {value=}")
+      try:
+        entry_content = self.load(tenant_id, entry)
+      except DbException:
+        entry_content = {}
+      
+      # Do not save if the entry is the same
+      if key in entry_content:
+        old_value = entry_content[key]
+        if old_value == value:
+          return False
+      
+      entry_content[key] = value
+      self.save(tenant_id, user_id, entry, entry_content)
+      return True
+  
+  def put_many(self, tenant_id: str, user_id, entry, items: list | dict):
+    """
+    Save a list of item as one single snapshot
+    A new snapshot will not be created if all the items already exist
+
+    You should not mix the usage of put_many() and save() as it's two different way to manage the db
+    :param tenant_id:
+    :param user_id:
+    :param entry:
+    :param items:
+    :return:
+    """
+    with self.lock:
+      logger.info(f"Adding many {tenant_id=}, {entry=}, {items=}")
+      try:
+        entry_content = self.load(tenant_id, entry)
+      except DbException:
+        entry_content = {}
+      
+      is_dirty = False
+      
+      if isinstance(items, dict):
+        for key, item in items.items():
+          if key in entry_content and entry_content[key] == item:
+            continue
+          else:
+            entry_content[key] = item
+            is_dirty = True
+      
+      else:
+        
+        for item in items:
+          key = item.get_key()
+          if key in entry_content and entry_content[key] == item:
+            continue
+          else:
+            entry_content[key] = item
+            is_dirty = True
+      
+      if is_dirty:
+        self.save(tenant_id, user_id, entry, entry_content)
+        return True
+      
+      return False
+  
+  def exists(self, tenant_id, entry: str):
+    """
+    Tells if an entry exist
+    :param tenant_id:
+    :param entry:
+    :return:
+    """
+    with self.lock:
+      return self._get_entry_digest(tenant_id, entry) is not None
+  
+  def get(self, tenant_id: str, entry: str, key: str | None = None, digest=None):
+    """
+    Retrieve an item from the snapshot
+    :param tenant_id:
+    :param entry:
+    :param key:
+    :param digest:
+    :return:
+    """
+    with self.lock:
+      logger.info(f"Getting {tenant_id=}, {entry=}, {key=}, {digest=}")
+      entry_content = self.load(tenant_id, entry, digest)
+      
+      if key is None:
+        # return all items as list
+        return [v for k, v in entry_content.items() if not k.startswith("__")]
+      
+      try:
+        return entry_content[key]
+      except KeyError:
+        raise DbException(f"Key '{key}' not found in entry '{entry}'")
+  
+  def history(self, tenant_id, entry, digest=None, max_items=1000):
+    """
+    Gives the current digest and all its ancestors
+    :param tenant_id:
+    :param entry:
+    :param digest:
+    :param max_items:
+    :return:
+    """
+    with self.lock:
+      logger.info(f"History for {tenant_id=}, {entry=}, {digest=}")
+      
+      digest_to_use = digest or self._get_entry_digest(tenant_id, entry)
+      logger.debug(f"Using digest {digest_to_use}.")
+      
+      count = 0
+      history = []
+      
+      while True:
+        if count >= max_items or digest_to_use is None:
+          break
+        
+        history.append(digest_to_use)
+        count += 1
+        
+        try:
+          target_file = self._get_obj_path(tenant_id, digest_to_use)
+          with open(target_file, 'r', encoding='utf-8') as file:
+            as_dict = json.load(file)
+            
+            digest_to_use = as_dict[TAG_PARENT][0]
+        except FileNotFoundError:
+          break
+      
+      return history
+  
+  def get_digest(self, tenant_id, entry):
+    return self._get_entry_digest(tenant_id, entry)
+  
+  def _serialize(self, obj):
+    """
+    Just call the serializer
+    :param obj:
+    :return:
+    """
+    with self.lock:
+      serializer = Serializer(RefHelper(self._get_ref_path))
+      use_refs = getattr(obj, "use_refs")() if hasattr(obj, "use_refs") else None
+      return serializer.serialize(obj, use_refs)
+  
+  def _deserialize(self, as_dict):
+    with self.lock:
+      serializer = Serializer(RefHelper(self._get_ref_path))
+      return serializer.deserialize(as_dict)
+  
+  def _update_head(self, tenant_id, entry, digest):
+    """
+    Actually dumps the snapshot in file system
+    :param entry:
+    :param digest:
+    :return:
+    """
+    head_path = os.path.join(self.root, tenant_id, self.HeadFile)
+    # load
+    try:
+      with open(head_path, 'r') as file:
+        head = json.load(file)
+    except FileNotFoundError:
+      head = {}
+    
+    # update
+    head[entry] = digest
+    
+    # and save
+    with open(head_path, 'w') as file:
+      json.dump(head, file)
+  
+  def _get_user_root(self, tenant_id):
+    return os.path.join(self.root, tenant_id)
+  
+  def _get_entry_digest(self, tenant_id, entry):
+    """
+    Search for the latest digest, for a given entry
+    :param entry:
+    :return:
+    """
+    head_path = os.path.join(self._get_user_root(tenant_id), self.HeadFile)
+    try:
+      with open(head_path, 'r') as file:
+        head = json.load(file)
+        return head[str(entry)]
+    
+    except FileNotFoundError:
+      return None
+    except KeyError:
+      return None
+  
+  def _get_head_path(self, tenant_id: str):
+    """
+    Location of the Head file
+    :return:
+    """
+    return os.path.join(self._get_user_root(tenant_id), self.HeadFile)
+  
+  def _get_obj_path(self, tenant_id, digest):
+    """
+    Location of objects
+    :param digest:
+    :return:
+    """
+    return os.path.join(self._get_user_root(tenant_id), "objects", digest[:24], digest)
+  
+  def _get_ref_path(self, digest):
+    """
+    Location of reference. They are not linked to the user folder
+    :param digest:
+    :return:
+    """
+    return os.path.join(self.root, "refs", digest[:24], digest)
--- a/src/core/handlers.py
+++ b/src/core/handlers.py
@@ -0,0 +1,59 @@
+# I delegate the complexity of some data type within specific handlers
+
+import datetime
+
+from core.utils import has_tag
+
+TAG_SPECIAL = "__special__"
+
+
+class BaseHandler:
+    def is_eligible_for(self, obj):
+        pass
+
+    def tag(self):
+        pass
+
+    def serialize(self, obj) -> dict:
+        pass
+
+    def deserialize(self, data: dict) -> object:
+        pass
+
+
+class DateHandler(BaseHandler):
+    def is_eligible_for(self, obj):
+        return isinstance(obj, datetime.date)
+
+    def tag(self):
+        return "Date"
+
+    def serialize(self, obj):
+        return {
+            TAG_SPECIAL: self.tag(),
+            "year": obj.year,
+            "month": obj.month,
+            "day": obj.day,
+        }
+
+    def deserialize(self, data: dict) -> object:
+        return datetime.date(year=data["year"], month=data["month"], day=data["day"])
+
+
+class Handlers:
+
+    def __init__(self, handlers_):
+        self.handlers = handlers_
+
+    def get_handler(self, obj):
+        if has_tag(obj, TAG_SPECIAL):
+            return [h for h in self.handlers if h.tag() == obj[TAG_SPECIAL]][0]
+
+        for h in self.handlers:
+            if h.is_eligible_for(obj):
+                return h
+
+        return None
+
+
+handlers = Handlers([DateHandler()])
--- a/src/core/serializer.py
+++ b/src/core/serializer.py
@@ -0,0 +1,201 @@
+import copy
+
+from core.handlers import handlers
+from core.utils import has_tag, is_dictionary, is_list, is_object, is_set, is_tuple, is_primitive, importable_name, \
+  get_class, get_full_qualified_name, is_enum
+
+TAG_ID = "__id__"
+TAG_OBJECT = "__object__"
+TAG_TUPLE = "__tuple__"
+TAG_SET = "__set__"
+TAG_REF = "__ref__"
+TAG_ENUM = "__enum__"
+
+
+class Serializer:
+  def __init__(self, ref_helper=None):
+    self.ref_helper = ref_helper
+    
+    self.ids = {}
+    self.objs = []
+    self.id_count = 0
+  
+  def serialize(self, obj, use_refs=None):
+    """
+    From object to dictionary
+    :param obj:
+    :param use_refs: Sometimes it easier / quicker to use pickle !
+    :return:
+    """
+    if use_refs:
+      use_refs = set("root." + path for path in use_refs)
+    
+    return self._serialize(obj, use_refs or set(), "root")
+  
+  def deserialize(self, obj: dict):
+    """
+    From dictionary to object (or primitive)
+    :param obj:
+    :return:
+    """
+    if has_tag(obj, TAG_REF):
+      return self.ref_helper.load_ref(obj[TAG_REF])
+    
+    if has_tag(obj, TAG_ID):
+      return self._restore_id(obj)
+    
+    if has_tag(obj, TAG_TUPLE):
+      return tuple([self.deserialize(v) for v in obj[TAG_TUPLE]])
+    
+    if has_tag(obj, TAG_SET):
+      return set([self.deserialize(v) for v in obj[TAG_SET]])
+    
+    if has_tag(obj, TAG_ENUM):
+      return self._deserialize_enum(obj)
+    
+    if has_tag(obj, TAG_OBJECT):
+      return self._deserialize_obj_instance(obj)
+    
+    if (handler := handlers.get_handler(obj)) is not None:
+      return handler.deserialize(obj)
+    
+    if is_list(obj):
+      return [self.deserialize(v) for v in obj]
+    
+    if is_dictionary(obj):
+      return {k: self.deserialize(v) for k, v in obj.items()}
+    
+    return obj
+  
+  def _serialize(self, obj, use_refs: set | None, path):
+    if use_refs is not None and path in use_refs:
+      digest = self.ref_helper.save_ref(obj)
+      return {TAG_REF: digest}
+    
+    if is_primitive(obj):
+      return obj
+    
+    if is_tuple(obj):
+      return {TAG_TUPLE: [self._serialize(v, use_refs, path) for v in obj]}
+    
+    if is_set(obj):
+      return {TAG_SET: [self._serialize(v, use_refs, path) for v in obj]}
+    
+    if is_list(obj):
+      return [self._serialize(v, use_refs, path) for v in obj]
+    
+    if is_dictionary(obj):
+      return {k: self._serialize(v, use_refs, path) for k, v in obj.items()}
+    
+    if is_enum(obj):
+      return self._serialize_enum(obj, use_refs, path)
+    
+    if is_object(obj):
+      return self._serialize_obj_instance(obj, use_refs, path)
+    
+    raise Exception(f"Cannot serialize '{obj}'")
+  
+  def _serialize_enum(self, obj, use_refs: set | None, path):
+    # check if the object was already seen
+    if (seen := self._check_already_seen(obj)) is not None:
+      return seen
+    
+    data = {}
+    class_name = get_full_qualified_name(obj)
+    data[TAG_ENUM] = class_name + "." + obj.name
+    return data
+  
+  def _serialize_obj_instance(self, obj, use_refs: set | None, path):
+    # check if the object was already seen
+    if (seen := self._check_already_seen(obj)) is not None:
+      return seen
+    
+    # try to manage use_refs
+    current_obj_use_refs = getattr(obj, "use_refs")() if hasattr(obj, "use_refs") else None
+    if current_obj_use_refs:
+      use_refs.update(f"{path}.{sub_path}" for sub_path in current_obj_use_refs)
+    
+    if (handler := handlers.get_handler(obj)) is not None:
+      return handler.serialize(obj)
+    
+    # flatten
+    data = {}
+    cls = obj.__class__ if hasattr(obj, '__class__') else type(obj)
+    class_name = importable_name(cls)
+    data[TAG_OBJECT] = class_name
+    
+    if hasattr(obj, "__dict__"):
+      for k, v in obj.__dict__.items():
+        data[k] = self._serialize(v, use_refs, f"{path}.{k}")
+    
+    return data
+  
+  def _check_already_seen(self, obj):
+    _id = self._exist(obj)
+    if _id is not None:
+      return {TAG_ID: _id}
+    
+    # else:
+    self.ids[id(obj)] = self.id_count
+    self.objs.append(obj)
+    self.id_count = self.id_count + 1
+    
+    return None
+  
+  def _deserialize_enum(self, obj):
+    cls_name, enum_name = obj[TAG_ENUM].rsplit(".", 1)
+    cls = get_class(cls_name)
+    obj = getattr(cls, enum_name)
+    self.objs.append(obj)
+    return obj
+  
+  def _deserialize_obj_instance(self, obj):
+    
+    cls = get_class(obj[TAG_OBJECT])
+    instance = cls.__new__(cls)
+    self.objs.append(instance)
+    
+    for k, v in obj.items():
+      value = self.deserialize(v)
+      setattr(instance, k, value)
+    
+    return instance
+  
+  def _restore_id(self, obj):
+    try:
+      return self.objs[obj[TAG_ID]]
+    except IndexError:
+      pass
+  
+  def _exist(self, obj):
+    try:
+      v = self.ids[id(obj)]
+      return v
+    except KeyError:
+      return None
+
+
+class DebugSerializer(Serializer):
+  def __init__(self, ref_helper=None):
+    super().__init__(ref_helper)
+  
+  def _deserialize_obj_instance(self, obj):
+    data = {TAG_OBJECT: obj[TAG_OBJECT]}
+    self.objs.append(data)
+    
+    for k, v in obj.items():
+      value = self.deserialize(v)
+      data[k] = value
+      
+    return data
+  
+  def _deserialize_enum(self, obj):
+    cls_name, enum_name = obj[TAG_ENUM].rsplit(".", 1)
+    self.objs.append(enum_name)
+    return enum_name
+  
+  def _restore_id(self, obj):
+    try:
+      return copy.deepcopy(self.objs[obj[TAG_ID]])
+    except IndexError:
+      pass
--- a/src/core/utils.py
+++ b/src/core/utils.py
@@ -0,0 +1,195 @@
+import ast
+import hashlib
+import importlib
+import types
+from enum import Enum
+
+PRIMITIVES = (str, bool, type(None), int, float)
+
+
+def get_stream_digest(stream):
+  """
+  Compute a SHA256 from a stream
+  :param stream:
+  :type stream:
+  :return:
+  :rtype:
+  """
+  sha256_hash = hashlib.sha256()
+  stream.seek(0)
+  for byte_block in iter(lambda: stream.read(4096), b""):
+    sha256_hash.update(byte_block)
+  
+  return sha256_hash.hexdigest()
+
+
+def has_tag(obj, tag):
+  """
+
+  :param obj:
+  :param tag:
+  :return:
+  """
+  return type(obj) is dict and tag in obj
+
+
+def is_primitive(obj):
+  """
+
+  :param obj:
+  :return:
+  """
+  return isinstance(obj, PRIMITIVES)
+
+
+def is_dictionary(obj):
+  """
+
+  :param obj:
+  :return:
+  """
+  return isinstance(obj, dict)
+
+
+def is_list(obj):
+  """
+
+  :param obj:
+  :return:
+  """
+  return isinstance(obj, list)
+
+
+def is_set(obj):
+  """
+
+  :param obj:
+  :return:
+  """
+  return isinstance(obj, set)
+
+
+def is_tuple(obj):
+  """
+
+  :param obj:
+  :return:
+  """
+  return isinstance(obj, tuple)
+
+
+def is_enum(obj):
+  return isinstance(obj, Enum)
+
+
+def is_object(obj):
+  """Returns True is obj is a reference to an object instance."""
+  
+  return (isinstance(obj, object) and
+          not isinstance(obj, (type,
+                               types.FunctionType,
+                               types.BuiltinFunctionType,
+                               types.GeneratorType)))
+
+
+def get_full_qualified_name(obj):
+  """
+  Returns the full qualified name of a class (including its module name )
+  :param obj:
+  :return:
+  """
+  if obj.__class__ == type:
+    module = obj.__module__
+    if module is None or module == str.__class__.__module__:
+      return obj.__name__  # Avoid reporting __builtin__
+    else:
+      return module + '.' + obj.__name__
+  else:
+    module = obj.__class__.__module__
+    if module is None or module == str.__class__.__module__:
+      return obj.__class__.__name__  # Avoid reporting __builtin__
+    else:
+      return module + '.' + obj.__class__.__name__
+
+
+def importable_name(cls):
+  """
+  Fully qualified name (prefixed by builtin when needed)
+  """
+  # Use the fully-qualified name if available (Python >= 3.3)
+  name = getattr(cls, '__qualname__', cls.__name__)
+  
+  # manage python 2
+  lookup = dict(__builtin__='builtins', exceptions='builtins')
+  module = lookup.get(cls.__module__, cls.__module__)
+  
+  return f"{module}.{name}"
+
+
+def get_class(qualified_class_name: str):
+  """
+    Dynamically loads and returns a class type from its fully qualified name.
+    Note that the class is not instantiated.
+
+    :param qualified_class_name: Fully qualified name of the class (e.g., 'some.module.ClassName').
+    :return: The class object.
+    :raises ImportError: If the module cannot be imported.
+    :raises AttributeError: If the class cannot be resolved in the module.
+    """
+  module_name, class_name = qualified_class_name.rsplit(".", 1)
+  
+  try:
+    module = importlib.import_module(module_name)
+  except ModuleNotFoundError as e:
+    raise ImportError(f"Could not import module '{module_name}' for '{qualified_class_name}': {e}")
+  
+  if not hasattr(module, class_name):
+    raise AttributeError(f"Component '{class_name}' not found in '{module.__name__}'.")
+  
+  return getattr(module, class_name)
+
+
+class UnreferencedNamesVisitor(ast.NodeVisitor):
+  """
+  Try to find symbols that will be requested by the ast
+  It can be variable names, but also function names
+  """
+  
+  def __init__(self):
+    self.names = set()
+  
+  def get_names(self, node):
+    self.visit(node)
+    return self.names
+  
+  def visit_Name(self, node):
+    self.names.add(node.id)
+  
+  def visit_For(self, node: ast.For):
+    self.visit_selected(node, ["body", "orelse"])
+  
+  def visit_selected(self, node, to_visit):
+    """Called if no explicit visitor function exists for a node."""
+    for field in to_visit:
+      value = getattr(node, field)
+      if isinstance(value, list):
+        for item in value:
+          if isinstance(item, ast.AST):
+            self.visit(item)
+      elif isinstance(value, ast.AST):
+        self.visit(value)
+  
+  def visit_Call(self, node: ast.Call):
+    self.visit_selected(node, ["args", "keywords"])
+  
+  def visit_keyword(self, node: ast.keyword):
+    """
+    Keywords are parameters that are defined with a double star (**) in function / method definition
+    ex: def fun(positional, *args, **keywords)
+    :param node:
+    :type node:
+    :return:
+    :rtype:
+    """
+    self.names.add(node.arg)
+    self.visit_selected(node, ["value"])
--- a/tests/init.py
+++ b/tests/init.py
--- a/tests/test_dbengine.py
+++ b/tests/test_dbengine.py
@@ -0,0 +1,273 @@
+import os.path
+import shutil
+
+import pytest
+
+from core.dbengine import DbEngine, DbException, TAG_PARENT, TAG_USER, TAG_DATE
+
+DB_ENGINE_ROOT = "TestDBEngineRoot"
+FAKE_TENANT_ID = "FakeTenantId"
+FAKE_USER_EMAIL = "fake_user@me.com"
+
+
+class DummyObj:
+  def __init__(self, a, b, c):
+    self.a = a
+    self.b = b
+    self.c = c
+  
+  def __eq__(self, other):
+    if id(self) == id(other):
+      return True
+    
+    if not isinstance(other, DummyObj):
+      return False
+    
+    return self.a == other.a and self.b == other.b and self.c == other.c
+  
+  def __hash__(self):
+    return hash((self.a, self.b, self.c))
+
+
+class DummyObjWithRef(DummyObj):
+  @staticmethod
+  def use_refs() -> set:
+    return {"c"}
+
+
+class DummyObjWithKey(DummyObj):
+  def get_key(self) -> set:
+    return self.a
+
+
+@pytest.fixture()
+def engine():
+  if os.path.exists(DB_ENGINE_ROOT):
+    shutil.rmtree(DB_ENGINE_ROOT)
+  
+  engine = DbEngine(DB_ENGINE_ROOT)
+  engine.init(FAKE_TENANT_ID)
+  
+  yield engine
+  
+  shutil.rmtree(DB_ENGINE_ROOT)
+
+
+@pytest.fixture()
+def dummy_obj():
+  return DummyObj(1, "a", False)
+
+
+@pytest.fixture()
+def dummy_obj2():
+  return DummyObj(2, "b", True)
+
+
+@pytest.fixture()
+def dummy_obj_with_ref():
+  data = {
+      'Key1': ['A', 'B', 'C'],
+      'Key2': ['X', 'Y', 'Z'],
+      'Percentage': [0.1, 0.2, 0.15],
+  }
+  return DummyObjWithRef(1, "a", data)
+
+
+def test_i_can_test_init():
+  if os.path.exists(DB_ENGINE_ROOT):
+    shutil.rmtree(DB_ENGINE_ROOT)
+  
+  engine = DbEngine(DB_ENGINE_ROOT)
+  assert not engine.is_initialized(FAKE_TENANT_ID)
+  
+  engine.init(FAKE_TENANT_ID)
+  assert engine.is_initialized(FAKE_TENANT_ID)
+
+
+def test_i_can_save_and_load(engine, dummy_obj):
+  digest = engine.save(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", dummy_obj)
+  
+  res = engine.load(FAKE_TENANT_ID, "MyEntry")
+  
+  assert digest is not None
+  assert isinstance(res, DummyObj)
+  
+  assert res.a == dummy_obj.a
+  assert res.b == dummy_obj.b
+  assert res.c == dummy_obj.c
+  
+  # check that the files are created
+  assert os.path.exists(os.path.join(DB_ENGINE_ROOT, FAKE_TENANT_ID, "objects"))
+  assert os.path.exists(os.path.join(DB_ENGINE_ROOT, FAKE_TENANT_ID, "head"))
+
+
+def test_save_invalid_inputs(engine):
+  """
+  Test save with invalid inputs.
+  """
+  with pytest.raises(DbException):
+    engine.save(None, FAKE_USER_EMAIL, "InvalidEntry", DummyObj(1, 2, 3))
+  
+  with pytest.raises(DbException):
+    engine.save(FAKE_TENANT_ID, None, "InvalidEntry", DummyObj(1, 2, 3))
+  
+  with pytest.raises(DbException):
+    engine.save(FAKE_TENANT_ID, FAKE_USER_EMAIL, "", DummyObj(1, 2, 3))
+  
+  with pytest.raises(DbException):
+    engine.save(FAKE_TENANT_ID, FAKE_USER_EMAIL, None, DummyObj(1, 2, 3))
+
+
+def test_i_can_save_using_ref(engine, dummy_obj_with_ref):
+  engine.save(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", dummy_obj_with_ref)
+  
+  res = engine.load(FAKE_TENANT_ID, "MyEntry")
+  assert isinstance(res, DummyObjWithRef)
+  
+  assert res.a == dummy_obj_with_ref.a
+  assert res.b == dummy_obj_with_ref.b
+  assert res.c == dummy_obj_with_ref.c
+  
+  # check that the files are created
+  assert os.path.exists(os.path.join(DB_ENGINE_ROOT, FAKE_TENANT_ID, "objects"))
+  assert os.path.exists(os.path.join(DB_ENGINE_ROOT, FAKE_TENANT_ID, "head"))
+  assert os.path.exists(os.path.join(DB_ENGINE_ROOT, "refs"))
+
+
+def test_refs_are_share_across_users(engine, dummy_obj_with_ref):
+  engine.save(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", dummy_obj_with_ref)
+  engine.save("AnotherUserId", "AnotherUser", "AnotherMyEntry", dummy_obj_with_ref)
+  
+  refs_path = os.path.join(DB_ENGINE_ROOT, "refs")
+  assert len(os.listdir(refs_path)) == 1
+
+
+def test_metadata_are_correctly_set(engine, dummy_obj):
+  digest = engine.save(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", {"obj": dummy_obj})
+  
+  as_dict = engine.load(FAKE_TENANT_ID, "MyEntry", digest)
+  assert as_dict[TAG_PARENT] == [None]
+  assert as_dict[TAG_USER] == FAKE_USER_EMAIL
+  assert as_dict[TAG_DATE] is not None
+
+
+def test_i_can_track_parents(engine):
+  digest = engine.save(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", {"obj": DummyObj(1, "a", False)})
+  second_digest = engine.save(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", {"obj": DummyObj(1, "a", True)})
+  
+  as_dict = engine.load(FAKE_TENANT_ID, "MyEntry", second_digest)
+  
+  assert as_dict[TAG_PARENT] == [digest]
+
+
+def test_i_can_put_and_get_one_object(engine, dummy_obj):
+  engine.put(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", "key1", dummy_obj)
+  from_db = engine.get(FAKE_TENANT_ID, "MyEntry", "key1")
+  
+  assert from_db == dummy_obj
+
+
+def test_i_can_put_and_get_multiple_objects(engine, dummy_obj, dummy_obj2):
+  engine.put(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", "key1", dummy_obj)
+  engine.put(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", "key2", dummy_obj2)
+  
+  from_db1 = engine.get(FAKE_TENANT_ID, "MyEntry", "key1")
+  from_db2 = engine.get(FAKE_TENANT_ID, "MyEntry", "key2")
+  
+  assert from_db1 == dummy_obj
+  assert from_db2 == dummy_obj2
+  
+  as_dict = engine.load(FAKE_TENANT_ID, "MyEntry")
+  
+  assert "key1" in as_dict
+  assert "key2" in as_dict
+  assert as_dict["key1"] == dummy_obj
+  assert as_dict["key2"] == dummy_obj2
+
+
+def test_i_automatically_replace_keys(engine, dummy_obj, dummy_obj2):
+  engine.put(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", "key1", dummy_obj)
+  engine.put(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", "key1", dummy_obj2)
+  
+  from_db1 = engine.get(FAKE_TENANT_ID, "MyEntry", "key1")
+  assert from_db1 == dummy_obj2
+  
+  all_items = engine.get(FAKE_TENANT_ID, "MyEntry")
+  assert all_items == [dummy_obj2]
+
+
+def test_i_do_not_save_twice_when_the_entries_are_the_same(engine, dummy_obj):
+  engine.put(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", "key1", dummy_obj)
+  
+  entry_content = engine.load(FAKE_TENANT_ID, "MyEntry")
+  assert entry_content[TAG_PARENT] == [None]
+  
+  # Save the same entry again
+  engine.put(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", "key1", dummy_obj)
+  
+  entry_content = engine.load(FAKE_TENANT_ID, "MyEntry")
+  assert entry_content[TAG_PARENT] == [None]  # still no other parent
+
+
+def test_i_can_put_many(engine):
+  dummy_obj = DummyObjWithKey("1", "a", True)
+  dummy_obj2 = DummyObjWithKey("2", "b", False)
+  engine.put_many(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", [dummy_obj, dummy_obj2])
+  
+  from_db1 = engine.get(FAKE_TENANT_ID, "MyEntry", "1")
+  from_db2 = engine.get(FAKE_TENANT_ID, "MyEntry", "2")
+  
+  assert from_db1 == dummy_obj
+  assert from_db2 == dummy_obj2
+  
+  entry_content = engine.load(FAKE_TENANT_ID, "MyEntry")
+  assert entry_content[TAG_PARENT] == [None]  # only one save was made
+
+
+def test_put_many_save_only_if_necessary(engine):
+  dummy_obj = DummyObjWithKey("1", "a", True)
+  dummy_obj2 = DummyObjWithKey("2", "b", False)
+  
+  engine.put_many(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", [dummy_obj, dummy_obj2])
+  engine.put_many(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", [dummy_obj, dummy_obj2])
+  
+  entry_content = engine.load(FAKE_TENANT_ID, "MyEntry")
+  assert entry_content[TAG_PARENT] == [None]  # Still None, nothing was save
+
+
+def test_i_can_retrieve_history_using_put(engine):
+  engine.put(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", "key1", DummyObj(1, "a", False))
+  engine.put(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", "key1", DummyObj(2, "a", False))
+  engine.put(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", "key1", DummyObj(3, "a", False))
+  
+  history = engine.history(FAKE_TENANT_ID, "MyEntry")
+  assert len(history) == 3
+  
+  v0 = engine.load(FAKE_TENANT_ID, "MyEntry", history[0])
+  v1 = engine.load(FAKE_TENANT_ID, "MyEntry", history[1])
+  v2 = engine.load(FAKE_TENANT_ID, "MyEntry", history[2])
+  
+  assert v0["key1"] == DummyObj(3, "a", False)
+  assert v1["key1"] == DummyObj(2, "a", False)
+  assert v2["key1"] == DummyObj(1, "a", False)
+  
+  assert v2[TAG_PARENT] == [None]
+
+
+def test_i_can_retrieve_history_using_save(engine):
+  engine.save(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", {"key1": DummyObj(1, "a", False)})
+  engine.save(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", {"key1": DummyObj(2, "a", False)})
+  engine.save(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", {"key1": DummyObj(3, "a", False)})
+  
+  history = engine.history(FAKE_TENANT_ID, "MyEntry")
+  assert len(history) == 3
+  
+  v0 = engine.load(FAKE_TENANT_ID, "MyEntry", history[0])
+  v1 = engine.load(FAKE_TENANT_ID, "MyEntry", history[1])
+  v2 = engine.load(FAKE_TENANT_ID, "MyEntry", history[2])
+  
+  assert v0["key1"] == DummyObj(3, "a", False)
+  assert v1["key1"] == DummyObj(2, "a", False)
+  assert v2["key1"] == DummyObj(1, "a", False)
+  
+  assert v2[TAG_PARENT] == [None]
--- a/tests/test_serializer.py
+++ b/tests/test_serializer.py
@@ -0,0 +1,268 @@
+import dataclasses
+import datetime
+import hashlib
+import pickle
+from enum import Enum
+
+import pytest
+
+from core.serializer import TAG_TUPLE, TAG_SET, Serializer, TAG_OBJECT, TAG_ID, TAG_REF
+
+
+class Obj:
+  def __init__(self, a, b, c):
+    self.a = a
+    self.b = b
+    self.c = c
+  
+  def __eq__(self, other):
+    if id(self) == id(other):
+      return True
+    
+    if not isinstance(other, Obj):
+      return False
+    
+    return self.a == other.a and self.b == other.b and self.c == other.c
+  
+  def __hash__(self):
+    return hash((self.a, self.b, self.c))
+
+
+class Obj2:
+  class InnerClass:
+    def __init__(self, x):
+      self.x = x
+    
+    def __eq__(self, other):
+      if not isinstance(other, Obj2.InnerClass):
+        return False
+      
+      return self.x == other.x
+    
+    def __hash__(self):
+      return hash(self.x)
+  
+  def __init__(self, a, b, x):
+    self.a = a
+    self.b = b
+    self.x = Obj2.InnerClass(x)
+  
+  def __eq__(self, other):
+    if not isinstance(other, Obj2):
+      return False
+    
+    return (self.a == other.a and
+            self.b == other.b and
+            self.x == other.x)
+  
+  def __hash__(self):
+    return hash((self.a, self.b))
+
+
+class ObjEnum(Enum):
+  A = 1
+  B = "second"
+  C = "last"
+
+
+@dataclasses.dataclass
+class DummyComplexClass:
+  prop1: str
+  prop2: Obj
+  prop3: ObjEnum
+
+
+class DummyRefHelper:
+  """
+  When something is too complicated to serialize, we just default to pickle
+  That is what this helper class is doing
+  """
+  
+  def __init__(self):
+    self.refs = {}
+  
+  def save_ref(self, obj):
+    sha256_hash = hashlib.sha256()
+    
+    pickled_data = pickle.dumps(obj)
+    sha256_hash.update(pickled_data)
+    digest = sha256_hash.hexdigest()
+    
+    self.refs[digest] = pickled_data
+    return digest
+  
+  def load_ref(self, digest):
+    return pickle.loads(self.refs[digest])
+
+
+@pytest.mark.parametrize("obj, expected", [
+    (1, 1),
+    (3.14, 3.14),
+    ("a string", "a string"),
+    (True, True),
+    (None, None),
+    ([1, 3.14, "a string"], [1, 3.14, "a string"]),
+    ((1, 3.14, "a string"), {TAG_TUPLE: [1, 3.14, "a string"]}),
+    ({1}, {TAG_SET: [1]}),
+    ({"a": "a", "b": 3.14, "c": True}, {"a": "a", "b": 3.14, "c": True}),
+    ({1: "a", 2: 3.14, 3: True}, {1: "a", 2: 3.14, 3: True}),
+    ([1, [3.14, "a string"]], [1, [3.14, "a string"]]),
+    ([1, (3.14, "a string")], [1, {TAG_TUPLE: [3.14, "a string"]}]),
+    ([], []),
+])
+def test_i_can_flatten_and_restore_primitives(obj, expected):
+  serializer = Serializer()
+  
+  flatten = serializer.serialize(obj)
+  assert flatten == expected
+  
+  decoded = serializer.deserialize(flatten)
+  assert decoded == obj
+
+
+def test_i_can_flatten_and_restore_instances():
+  serializer = Serializer()
+  obj1 = Obj(1, "b", True)
+  obj2 = Obj(3.14, ("a", "b"), obj1)
+  
+  flatten = serializer.serialize(obj2)
+  assert flatten == {TAG_OBJECT: 'tests.test_serializer.Obj',
+                     'a': 3.14,
+                     'b': {TAG_TUPLE: ['a', 'b']},
+                     'c': {TAG_OBJECT: 'tests.test_serializer.Obj',
+                           'a': 1,
+                           'b': 'b',
+                           'c': True}}
+  
+  decoded = serializer.deserialize(flatten)
+  assert decoded == obj2
+
+
+def test_i_can_flatten_and_restore_enum():
+  serializer = Serializer()
+  obj1 = ObjEnum.A
+  obj2 = ObjEnum.B
+  obj3 = ObjEnum.C
+  
+  wrapper = {
+      "a": obj1,
+      "b": obj2,
+      "c": obj3,
+      "d": obj1
+  }
+  flatten = serializer.serialize(wrapper)
+  assert flatten == {'a': {'__enum__': 'tests.test_serializer.ObjEnum.A'},
+                     'b': {'__enum__': 'tests.test_serializer.ObjEnum.B'},
+                     'c': {'__enum__': 'tests.test_serializer.ObjEnum.C'},
+                     'd': {'__id__': 0}}
+  decoded = serializer.deserialize(flatten)
+  assert decoded == wrapper
+
+
+def test_i_can_flatten_and_restore_list_with_enum():
+  serializer = Serializer()
+  obj = [DummyComplexClass("a", Obj(1, "a", ObjEnum.A), ObjEnum.A),
+         DummyComplexClass("b", Obj(2, "b", ObjEnum.B), ObjEnum.B),
+         DummyComplexClass("c", Obj(3, "c", ObjEnum.C), ObjEnum.B)]
+  
+  flatten = serializer.serialize(obj)
+  assert flatten == [{'__object__': 'tests.test_serializer.DummyComplexClass',
+                      'prop1': 'a',
+                      'prop2': {'__object__': 'tests.test_serializer.Obj',
+                                'a': 1,
+                                'b': 'a',
+                                'c': {'__enum__': 'tests.test_serializer.ObjEnum.A'}},
+                      'prop3': {'__id__': 2}},
+                     {'__object__': 'tests.test_serializer.DummyComplexClass',
+                      'prop1': 'b',
+                      'prop2': {'__object__': 'tests.test_serializer.Obj',
+                                'a': 2,
+                                'b': 'b',
+                                'c': {'__enum__': 'tests.test_serializer.ObjEnum.B'}},
+                      'prop3': {'__id__': 5}},
+                     {'__object__': 'tests.test_serializer.DummyComplexClass',
+                      'prop1': 'c',
+                      'prop2': {'__object__': 'tests.test_serializer.Obj',
+                                'a': 3,
+                                'b': 'c',
+                                'c': {'__enum__': 'tests.test_serializer.ObjEnum.C'}},
+                      'prop3': {'__id__': 5}}]
+  decoded = serializer.deserialize(flatten)
+  assert decoded == obj
+
+
+def test_i_can_manage_circular_reference():
+  serializer = Serializer()
+  obj1 = Obj(1, "b", True)
+  obj1.c = obj1
+  
+  flatten = serializer.serialize(obj1)
+  assert flatten == {TAG_OBJECT: 'tests.test_serializer.Obj',
+                     'a': 1,
+                     'b': 'b',
+                     'c': {TAG_ID: 0}}
+  
+  decoded = serializer.deserialize(flatten)
+  assert decoded.a == obj1.a
+  assert decoded.b == obj1.b
+  assert decoded.c == decoded
+
+
+def test_i_can_use_refs_on_primitive():
+  serializer = Serializer(DummyRefHelper())
+  obj1 = Obj(1, "b", True)
+  
+  flatten = serializer.serialize(obj1, ["c"])
+  assert flatten == {TAG_OBJECT: 'tests.test_serializer.Obj',
+                     'a': 1,
+                     'b': 'b',
+                     'c': {TAG_REF: '112bda3b495d867b6a98c899fac7c25eb60ca4b6e6fe5ec7ab9299f93e8274bc'}}
+  
+  decoded = serializer.deserialize(flatten)
+  assert decoded == obj1
+
+
+def test_i_can_use_refs_on_path():
+  serializer = Serializer(DummyRefHelper())
+  obj1 = Obj(1, "b", True)
+  obj2 = Obj(1, "b", obj1)
+  
+  flatten = serializer.serialize(obj2, ["c.b"])
+  assert flatten == {TAG_OBJECT: 'tests.test_serializer.Obj',
+                     'a': 1,
+                     'b': 'b',
+                     'c': {TAG_OBJECT: 'tests.test_serializer.Obj',
+                           'a': 1,
+                           'b': {TAG_REF: '897f2e2b559dd876ad870c82283197b8cfecdf84736192ea6fb9ee5a5080a3a4'},
+                           'c': True}}
+  
+  decoded = serializer.deserialize(flatten)
+  assert decoded == obj2
+
+
+def test_can_use_refs_when_circular_reference():
+  serializer = Serializer(DummyRefHelper())
+  obj1 = Obj(1, "b", True)
+  obj1.c = obj1
+  
+  flatten = serializer.serialize(obj1, ["c"])
+  assert flatten == {TAG_OBJECT: 'tests.test_serializer.Obj',
+                     'a': 1,
+                     'b': 'b',
+                     'c': {TAG_REF: "87b1980d83bd267e2c8cc2fbc435ba00349e45b736c40f3984f710ebb4495adc"}}
+  
+  decoded = serializer.deserialize(flatten)
+  assert decoded.a == obj1.a
+  assert decoded.b == obj1.b
+  assert decoded.c == decoded
+
+
+def test_i_can_serialize_date():
+  obj = datetime.date.today()
+  serializer = Serializer()
+  
+  flatten = serializer.serialize(obj)
+  
+  decoded = serializer.deserialize(flatten)
+  
+  assert decoded == obj