first commit

This commit is contained in:
2025-10-17 21:08:20 +02:00
commit 878064b140
20 changed files with 1871 additions and 0 deletions

0
tests/__init__.py Normal file
View File

273
tests/test_dbengine.py Normal file
View File

@@ -0,0 +1,273 @@
import os.path
import shutil
import pytest
from core.dbengine import DbEngine, DbException, TAG_PARENT, TAG_USER, TAG_DATE
DB_ENGINE_ROOT = "TestDBEngineRoot"
FAKE_TENANT_ID = "FakeTenantId"
FAKE_USER_EMAIL = "fake_user@me.com"
class DummyObj:
def __init__(self, a, b, c):
self.a = a
self.b = b
self.c = c
def __eq__(self, other):
if id(self) == id(other):
return True
if not isinstance(other, DummyObj):
return False
return self.a == other.a and self.b == other.b and self.c == other.c
def __hash__(self):
return hash((self.a, self.b, self.c))
class DummyObjWithRef(DummyObj):
@staticmethod
def use_refs() -> set:
return {"c"}
class DummyObjWithKey(DummyObj):
def get_key(self) -> set:
return self.a
@pytest.fixture()
def engine():
if os.path.exists(DB_ENGINE_ROOT):
shutil.rmtree(DB_ENGINE_ROOT)
engine = DbEngine(DB_ENGINE_ROOT)
engine.init(FAKE_TENANT_ID)
yield engine
shutil.rmtree(DB_ENGINE_ROOT)
@pytest.fixture()
def dummy_obj():
return DummyObj(1, "a", False)
@pytest.fixture()
def dummy_obj2():
return DummyObj(2, "b", True)
@pytest.fixture()
def dummy_obj_with_ref():
data = {
'Key1': ['A', 'B', 'C'],
'Key2': ['X', 'Y', 'Z'],
'Percentage': [0.1, 0.2, 0.15],
}
return DummyObjWithRef(1, "a", data)
def test_i_can_test_init():
if os.path.exists(DB_ENGINE_ROOT):
shutil.rmtree(DB_ENGINE_ROOT)
engine = DbEngine(DB_ENGINE_ROOT)
assert not engine.is_initialized(FAKE_TENANT_ID)
engine.init(FAKE_TENANT_ID)
assert engine.is_initialized(FAKE_TENANT_ID)
def test_i_can_save_and_load(engine, dummy_obj):
digest = engine.save(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", dummy_obj)
res = engine.load(FAKE_TENANT_ID, "MyEntry")
assert digest is not None
assert isinstance(res, DummyObj)
assert res.a == dummy_obj.a
assert res.b == dummy_obj.b
assert res.c == dummy_obj.c
# check that the files are created
assert os.path.exists(os.path.join(DB_ENGINE_ROOT, FAKE_TENANT_ID, "objects"))
assert os.path.exists(os.path.join(DB_ENGINE_ROOT, FAKE_TENANT_ID, "head"))
def test_save_invalid_inputs(engine):
"""
Test save with invalid inputs.
"""
with pytest.raises(DbException):
engine.save(None, FAKE_USER_EMAIL, "InvalidEntry", DummyObj(1, 2, 3))
with pytest.raises(DbException):
engine.save(FAKE_TENANT_ID, None, "InvalidEntry", DummyObj(1, 2, 3))
with pytest.raises(DbException):
engine.save(FAKE_TENANT_ID, FAKE_USER_EMAIL, "", DummyObj(1, 2, 3))
with pytest.raises(DbException):
engine.save(FAKE_TENANT_ID, FAKE_USER_EMAIL, None, DummyObj(1, 2, 3))
def test_i_can_save_using_ref(engine, dummy_obj_with_ref):
engine.save(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", dummy_obj_with_ref)
res = engine.load(FAKE_TENANT_ID, "MyEntry")
assert isinstance(res, DummyObjWithRef)
assert res.a == dummy_obj_with_ref.a
assert res.b == dummy_obj_with_ref.b
assert res.c == dummy_obj_with_ref.c
# check that the files are created
assert os.path.exists(os.path.join(DB_ENGINE_ROOT, FAKE_TENANT_ID, "objects"))
assert os.path.exists(os.path.join(DB_ENGINE_ROOT, FAKE_TENANT_ID, "head"))
assert os.path.exists(os.path.join(DB_ENGINE_ROOT, "refs"))
def test_refs_are_share_across_users(engine, dummy_obj_with_ref):
engine.save(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", dummy_obj_with_ref)
engine.save("AnotherUserId", "AnotherUser", "AnotherMyEntry", dummy_obj_with_ref)
refs_path = os.path.join(DB_ENGINE_ROOT, "refs")
assert len(os.listdir(refs_path)) == 1
def test_metadata_are_correctly_set(engine, dummy_obj):
digest = engine.save(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", {"obj": dummy_obj})
as_dict = engine.load(FAKE_TENANT_ID, "MyEntry", digest)
assert as_dict[TAG_PARENT] == [None]
assert as_dict[TAG_USER] == FAKE_USER_EMAIL
assert as_dict[TAG_DATE] is not None
def test_i_can_track_parents(engine):
digest = engine.save(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", {"obj": DummyObj(1, "a", False)})
second_digest = engine.save(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", {"obj": DummyObj(1, "a", True)})
as_dict = engine.load(FAKE_TENANT_ID, "MyEntry", second_digest)
assert as_dict[TAG_PARENT] == [digest]
def test_i_can_put_and_get_one_object(engine, dummy_obj):
engine.put(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", "key1", dummy_obj)
from_db = engine.get(FAKE_TENANT_ID, "MyEntry", "key1")
assert from_db == dummy_obj
def test_i_can_put_and_get_multiple_objects(engine, dummy_obj, dummy_obj2):
engine.put(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", "key1", dummy_obj)
engine.put(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", "key2", dummy_obj2)
from_db1 = engine.get(FAKE_TENANT_ID, "MyEntry", "key1")
from_db2 = engine.get(FAKE_TENANT_ID, "MyEntry", "key2")
assert from_db1 == dummy_obj
assert from_db2 == dummy_obj2
as_dict = engine.load(FAKE_TENANT_ID, "MyEntry")
assert "key1" in as_dict
assert "key2" in as_dict
assert as_dict["key1"] == dummy_obj
assert as_dict["key2"] == dummy_obj2
def test_i_automatically_replace_keys(engine, dummy_obj, dummy_obj2):
engine.put(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", "key1", dummy_obj)
engine.put(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", "key1", dummy_obj2)
from_db1 = engine.get(FAKE_TENANT_ID, "MyEntry", "key1")
assert from_db1 == dummy_obj2
all_items = engine.get(FAKE_TENANT_ID, "MyEntry")
assert all_items == [dummy_obj2]
def test_i_do_not_save_twice_when_the_entries_are_the_same(engine, dummy_obj):
engine.put(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", "key1", dummy_obj)
entry_content = engine.load(FAKE_TENANT_ID, "MyEntry")
assert entry_content[TAG_PARENT] == [None]
# Save the same entry again
engine.put(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", "key1", dummy_obj)
entry_content = engine.load(FAKE_TENANT_ID, "MyEntry")
assert entry_content[TAG_PARENT] == [None] # still no other parent
def test_i_can_put_many(engine):
dummy_obj = DummyObjWithKey("1", "a", True)
dummy_obj2 = DummyObjWithKey("2", "b", False)
engine.put_many(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", [dummy_obj, dummy_obj2])
from_db1 = engine.get(FAKE_TENANT_ID, "MyEntry", "1")
from_db2 = engine.get(FAKE_TENANT_ID, "MyEntry", "2")
assert from_db1 == dummy_obj
assert from_db2 == dummy_obj2
entry_content = engine.load(FAKE_TENANT_ID, "MyEntry")
assert entry_content[TAG_PARENT] == [None] # only one save was made
def test_put_many_save_only_if_necessary(engine):
dummy_obj = DummyObjWithKey("1", "a", True)
dummy_obj2 = DummyObjWithKey("2", "b", False)
engine.put_many(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", [dummy_obj, dummy_obj2])
engine.put_many(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", [dummy_obj, dummy_obj2])
entry_content = engine.load(FAKE_TENANT_ID, "MyEntry")
assert entry_content[TAG_PARENT] == [None] # Still None, nothing was save
def test_i_can_retrieve_history_using_put(engine):
engine.put(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", "key1", DummyObj(1, "a", False))
engine.put(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", "key1", DummyObj(2, "a", False))
engine.put(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", "key1", DummyObj(3, "a", False))
history = engine.history(FAKE_TENANT_ID, "MyEntry")
assert len(history) == 3
v0 = engine.load(FAKE_TENANT_ID, "MyEntry", history[0])
v1 = engine.load(FAKE_TENANT_ID, "MyEntry", history[1])
v2 = engine.load(FAKE_TENANT_ID, "MyEntry", history[2])
assert v0["key1"] == DummyObj(3, "a", False)
assert v1["key1"] == DummyObj(2, "a", False)
assert v2["key1"] == DummyObj(1, "a", False)
assert v2[TAG_PARENT] == [None]
def test_i_can_retrieve_history_using_save(engine):
engine.save(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", {"key1": DummyObj(1, "a", False)})
engine.save(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", {"key1": DummyObj(2, "a", False)})
engine.save(FAKE_TENANT_ID, FAKE_USER_EMAIL, "MyEntry", {"key1": DummyObj(3, "a", False)})
history = engine.history(FAKE_TENANT_ID, "MyEntry")
assert len(history) == 3
v0 = engine.load(FAKE_TENANT_ID, "MyEntry", history[0])
v1 = engine.load(FAKE_TENANT_ID, "MyEntry", history[1])
v2 = engine.load(FAKE_TENANT_ID, "MyEntry", history[2])
assert v0["key1"] == DummyObj(3, "a", False)
assert v1["key1"] == DummyObj(2, "a", False)
assert v2["key1"] == DummyObj(1, "a", False)
assert v2[TAG_PARENT] == [None]

268
tests/test_serializer.py Normal file
View File

@@ -0,0 +1,268 @@
import dataclasses
import datetime
import hashlib
import pickle
from enum import Enum
import pytest
from core.serializer import TAG_TUPLE, TAG_SET, Serializer, TAG_OBJECT, TAG_ID, TAG_REF
class Obj:
def __init__(self, a, b, c):
self.a = a
self.b = b
self.c = c
def __eq__(self, other):
if id(self) == id(other):
return True
if not isinstance(other, Obj):
return False
return self.a == other.a and self.b == other.b and self.c == other.c
def __hash__(self):
return hash((self.a, self.b, self.c))
class Obj2:
class InnerClass:
def __init__(self, x):
self.x = x
def __eq__(self, other):
if not isinstance(other, Obj2.InnerClass):
return False
return self.x == other.x
def __hash__(self):
return hash(self.x)
def __init__(self, a, b, x):
self.a = a
self.b = b
self.x = Obj2.InnerClass(x)
def __eq__(self, other):
if not isinstance(other, Obj2):
return False
return (self.a == other.a and
self.b == other.b and
self.x == other.x)
def __hash__(self):
return hash((self.a, self.b))
class ObjEnum(Enum):
A = 1
B = "second"
C = "last"
@dataclasses.dataclass
class DummyComplexClass:
prop1: str
prop2: Obj
prop3: ObjEnum
class DummyRefHelper:
"""
When something is too complicated to serialize, we just default to pickle
That is what this helper class is doing
"""
def __init__(self):
self.refs = {}
def save_ref(self, obj):
sha256_hash = hashlib.sha256()
pickled_data = pickle.dumps(obj)
sha256_hash.update(pickled_data)
digest = sha256_hash.hexdigest()
self.refs[digest] = pickled_data
return digest
def load_ref(self, digest):
return pickle.loads(self.refs[digest])
@pytest.mark.parametrize("obj, expected", [
(1, 1),
(3.14, 3.14),
("a string", "a string"),
(True, True),
(None, None),
([1, 3.14, "a string"], [1, 3.14, "a string"]),
((1, 3.14, "a string"), {TAG_TUPLE: [1, 3.14, "a string"]}),
({1}, {TAG_SET: [1]}),
({"a": "a", "b": 3.14, "c": True}, {"a": "a", "b": 3.14, "c": True}),
({1: "a", 2: 3.14, 3: True}, {1: "a", 2: 3.14, 3: True}),
([1, [3.14, "a string"]], [1, [3.14, "a string"]]),
([1, (3.14, "a string")], [1, {TAG_TUPLE: [3.14, "a string"]}]),
([], []),
])
def test_i_can_flatten_and_restore_primitives(obj, expected):
serializer = Serializer()
flatten = serializer.serialize(obj)
assert flatten == expected
decoded = serializer.deserialize(flatten)
assert decoded == obj
def test_i_can_flatten_and_restore_instances():
serializer = Serializer()
obj1 = Obj(1, "b", True)
obj2 = Obj(3.14, ("a", "b"), obj1)
flatten = serializer.serialize(obj2)
assert flatten == {TAG_OBJECT: 'tests.test_serializer.Obj',
'a': 3.14,
'b': {TAG_TUPLE: ['a', 'b']},
'c': {TAG_OBJECT: 'tests.test_serializer.Obj',
'a': 1,
'b': 'b',
'c': True}}
decoded = serializer.deserialize(flatten)
assert decoded == obj2
def test_i_can_flatten_and_restore_enum():
serializer = Serializer()
obj1 = ObjEnum.A
obj2 = ObjEnum.B
obj3 = ObjEnum.C
wrapper = {
"a": obj1,
"b": obj2,
"c": obj3,
"d": obj1
}
flatten = serializer.serialize(wrapper)
assert flatten == {'a': {'__enum__': 'tests.test_serializer.ObjEnum.A'},
'b': {'__enum__': 'tests.test_serializer.ObjEnum.B'},
'c': {'__enum__': 'tests.test_serializer.ObjEnum.C'},
'd': {'__id__': 0}}
decoded = serializer.deserialize(flatten)
assert decoded == wrapper
def test_i_can_flatten_and_restore_list_with_enum():
serializer = Serializer()
obj = [DummyComplexClass("a", Obj(1, "a", ObjEnum.A), ObjEnum.A),
DummyComplexClass("b", Obj(2, "b", ObjEnum.B), ObjEnum.B),
DummyComplexClass("c", Obj(3, "c", ObjEnum.C), ObjEnum.B)]
flatten = serializer.serialize(obj)
assert flatten == [{'__object__': 'tests.test_serializer.DummyComplexClass',
'prop1': 'a',
'prop2': {'__object__': 'tests.test_serializer.Obj',
'a': 1,
'b': 'a',
'c': {'__enum__': 'tests.test_serializer.ObjEnum.A'}},
'prop3': {'__id__': 2}},
{'__object__': 'tests.test_serializer.DummyComplexClass',
'prop1': 'b',
'prop2': {'__object__': 'tests.test_serializer.Obj',
'a': 2,
'b': 'b',
'c': {'__enum__': 'tests.test_serializer.ObjEnum.B'}},
'prop3': {'__id__': 5}},
{'__object__': 'tests.test_serializer.DummyComplexClass',
'prop1': 'c',
'prop2': {'__object__': 'tests.test_serializer.Obj',
'a': 3,
'b': 'c',
'c': {'__enum__': 'tests.test_serializer.ObjEnum.C'}},
'prop3': {'__id__': 5}}]
decoded = serializer.deserialize(flatten)
assert decoded == obj
def test_i_can_manage_circular_reference():
serializer = Serializer()
obj1 = Obj(1, "b", True)
obj1.c = obj1
flatten = serializer.serialize(obj1)
assert flatten == {TAG_OBJECT: 'tests.test_serializer.Obj',
'a': 1,
'b': 'b',
'c': {TAG_ID: 0}}
decoded = serializer.deserialize(flatten)
assert decoded.a == obj1.a
assert decoded.b == obj1.b
assert decoded.c == decoded
def test_i_can_use_refs_on_primitive():
serializer = Serializer(DummyRefHelper())
obj1 = Obj(1, "b", True)
flatten = serializer.serialize(obj1, ["c"])
assert flatten == {TAG_OBJECT: 'tests.test_serializer.Obj',
'a': 1,
'b': 'b',
'c': {TAG_REF: '112bda3b495d867b6a98c899fac7c25eb60ca4b6e6fe5ec7ab9299f93e8274bc'}}
decoded = serializer.deserialize(flatten)
assert decoded == obj1
def test_i_can_use_refs_on_path():
serializer = Serializer(DummyRefHelper())
obj1 = Obj(1, "b", True)
obj2 = Obj(1, "b", obj1)
flatten = serializer.serialize(obj2, ["c.b"])
assert flatten == {TAG_OBJECT: 'tests.test_serializer.Obj',
'a': 1,
'b': 'b',
'c': {TAG_OBJECT: 'tests.test_serializer.Obj',
'a': 1,
'b': {TAG_REF: '897f2e2b559dd876ad870c82283197b8cfecdf84736192ea6fb9ee5a5080a3a4'},
'c': True}}
decoded = serializer.deserialize(flatten)
assert decoded == obj2
def test_can_use_refs_when_circular_reference():
serializer = Serializer(DummyRefHelper())
obj1 = Obj(1, "b", True)
obj1.c = obj1
flatten = serializer.serialize(obj1, ["c"])
assert flatten == {TAG_OBJECT: 'tests.test_serializer.Obj',
'a': 1,
'b': 'b',
'c': {TAG_REF: "87b1980d83bd267e2c8cc2fbc435ba00349e45b736c40f3984f710ebb4495adc"}}
decoded = serializer.deserialize(flatten)
assert decoded.a == obj1.a
assert decoded.b == obj1.b
assert decoded.c == decoded
def test_i_can_serialize_date():
obj = datetime.date.today()
serializer = Serializer()
flatten = serializer.serialize(obj)
decoded = serializer.deserialize(flatten)
assert decoded == obj