import os.path import shutil import pandas as pd import pytest from core.dbengine import DbEngine, DbException, TAG_PARENT, TAG_USER, TAG_DATE DB_ENGINE_ROOT = "TestDBEngineRoot" FAKE_USER_ID = "FakeUserId" FAKE_USER_EMAIL = "fake_user@me.com" class DummyObj: def __init__(self, a, b, c): self.a = a self.b = b self.c = c def __eq__(self, other): if id(self) == id(other): return True if not isinstance(other, DummyObj): return False return self.a == other.a and self.b == other.b and self.c == other.c def __hash__(self): return hash((self.a, self.b, self.c)) class DummyObjWithRef(DummyObj): @staticmethod def use_refs() -> set: return {"c"} class DummyObjWithKey(DummyObj): def get_key(self) -> set: return self.a @pytest.fixture() def engine(): if os.path.exists(DB_ENGINE_ROOT): shutil.rmtree(DB_ENGINE_ROOT) engine = DbEngine(DB_ENGINE_ROOT) engine.init(FAKE_USER_ID) yield engine shutil.rmtree(DB_ENGINE_ROOT) @pytest.fixture() def dummy_obj(): return DummyObj(1, "a", False) @pytest.fixture() def dummy_obj2(): return DummyObj(2, "b", True) @pytest.fixture() def dummy_obj_with_ref(): data = { 'Key1': ['A', 'B', 'C'], 'Key2': ['X', 'Y', 'Z'], 'Percentage': [0.1, 0.2, 0.15], } df = pd.DataFrame(data) return DummyObjWithRef(1, "a", df) def test_i_can_test_init(): if os.path.exists(DB_ENGINE_ROOT): shutil.rmtree(DB_ENGINE_ROOT) engine = DbEngine(DB_ENGINE_ROOT) assert not engine.is_initialized(FAKE_USER_ID) engine.init(FAKE_USER_ID) assert engine.is_initialized(FAKE_USER_ID) def test_i_can_save_and_load(engine, dummy_obj): digest = engine.save(FAKE_USER_ID, FAKE_USER_EMAIL, "MyEntry", dummy_obj) res = engine.load(FAKE_USER_ID, "MyEntry") assert digest is not None assert isinstance(res, DummyObj) assert res.a == dummy_obj.a assert res.b == dummy_obj.b assert res.c == dummy_obj.c # check that the files are created assert os.path.exists(os.path.join(DB_ENGINE_ROOT, FAKE_USER_ID, "objects")) assert os.path.exists(os.path.join(DB_ENGINE_ROOT, FAKE_USER_ID, "head")) def test_save_invalid_inputs(engine): """ Test save with invalid inputs. """ with pytest.raises(DbException): engine.save(None, FAKE_USER_EMAIL, "InvalidEntry", DummyObj(1, 2, 3)) with pytest.raises(DbException): engine.save(FAKE_USER_ID, None, "InvalidEntry", DummyObj(1, 2, 3)) with pytest.raises(DbException): engine.save(FAKE_USER_ID, FAKE_USER_EMAIL, "", DummyObj(1, 2, 3)) with pytest.raises(DbException): engine.save(FAKE_USER_ID, FAKE_USER_EMAIL, None, DummyObj(1, 2, 3)) def test_i_can_save_using_ref(engine, dummy_obj_with_ref): engine.save(FAKE_USER_ID, FAKE_USER_EMAIL, "MyEntry", dummy_obj_with_ref) res = engine.load(FAKE_USER_ID, "MyEntry") assert isinstance(res, DummyObjWithRef) assert res.a == dummy_obj_with_ref.a assert res.b == dummy_obj_with_ref.b assert res.c.to_dict() == dummy_obj_with_ref.c.to_dict() # check that the files are created assert os.path.exists(os.path.join(DB_ENGINE_ROOT, FAKE_USER_ID, "objects")) assert os.path.exists(os.path.join(DB_ENGINE_ROOT, FAKE_USER_ID, "head")) assert os.path.exists(os.path.join(DB_ENGINE_ROOT, "refs")) def test_refs_are_share_across_users(engine, dummy_obj_with_ref): engine.save(FAKE_USER_ID, FAKE_USER_EMAIL, "MyEntry", dummy_obj_with_ref) engine.save("AnotherUserId", "AnotherUser", "AnotherMyEntry", dummy_obj_with_ref) refs_path = os.path.join(DB_ENGINE_ROOT, "refs") assert len(os.listdir(refs_path)) == 1 def test_metadata_are_correctly_set(engine, dummy_obj): digest = engine.save(FAKE_USER_ID, FAKE_USER_EMAIL, "MyEntry", dummy_obj) as_dict = engine.debug_load(FAKE_USER_ID, digest) assert as_dict[TAG_PARENT] == [None] assert as_dict[TAG_USER] == FAKE_USER_EMAIL assert as_dict[TAG_DATE] is not None def test_i_can_track_parents(engine): digest = engine.save(FAKE_USER_ID, FAKE_USER_EMAIL, "MyEntry", DummyObj(1, "a", False)) second_digest = engine.save(FAKE_USER_ID, FAKE_USER_EMAIL, "MyEntry", DummyObj(1, "a", True)) as_dict = engine.debug_load(FAKE_USER_ID, second_digest) assert as_dict[TAG_PARENT] == [digest] def test_i_can_put_and_get_one_object(engine, dummy_obj): engine.put(FAKE_USER_ID, FAKE_USER_EMAIL, "MyEntry", "key1", dummy_obj) from_db = engine.get(FAKE_USER_ID, "MyEntry", "key1") assert from_db == dummy_obj def test_i_can_put_and_get_multiple_objects(engine, dummy_obj, dummy_obj2): engine.put(FAKE_USER_ID, FAKE_USER_EMAIL, "MyEntry", "key1", dummy_obj) engine.put(FAKE_USER_ID, FAKE_USER_EMAIL, "MyEntry", "key2", dummy_obj2) from_db1 = engine.get(FAKE_USER_ID, "MyEntry", "key1") from_db2 = engine.get(FAKE_USER_ID, "MyEntry", "key2") assert from_db1 == dummy_obj assert from_db2 == dummy_obj2 as_dict = engine.load(FAKE_USER_ID, "MyEntry") assert "key1" in as_dict assert "key2" in as_dict assert as_dict["key1"] == dummy_obj assert as_dict["key2"] == dummy_obj2 def test_i_automatically_replace_keys(engine, dummy_obj, dummy_obj2): engine.put(FAKE_USER_ID, FAKE_USER_EMAIL, "MyEntry", "key1", dummy_obj) engine.put(FAKE_USER_ID, FAKE_USER_EMAIL, "MyEntry", "key1", dummy_obj2) from_db1 = engine.get(FAKE_USER_ID, "MyEntry", "key1") assert from_db1 == dummy_obj2 all_items = engine.get(FAKE_USER_ID, "MyEntry") assert all_items == [dummy_obj2] def test_i_do_not_save_twice_when_the_entries_are_the_same(engine, dummy_obj): engine.put(FAKE_USER_ID, FAKE_USER_EMAIL, "MyEntry", "key1", dummy_obj) entry_content = engine.load(FAKE_USER_ID, "MyEntry") assert entry_content[TAG_PARENT] == [None] # Save the same entry again engine.put(FAKE_USER_ID, FAKE_USER_EMAIL, "MyEntry", "key1", dummy_obj) entry_content = engine.load(FAKE_USER_ID, "MyEntry") assert entry_content[TAG_PARENT] == [None] # still no other parent def test_i_can_put_many(engine): dummy_obj = DummyObjWithKey("1", "a", True) dummy_obj2 = DummyObjWithKey("2", "b", False) engine.put_many(FAKE_USER_ID, FAKE_USER_EMAIL, "MyEntry", [dummy_obj, dummy_obj2]) from_db1 = engine.get(FAKE_USER_ID, "MyEntry", "1") from_db2 = engine.get(FAKE_USER_ID, "MyEntry", "2") assert from_db1 == dummy_obj assert from_db2 == dummy_obj2 entry_content = engine.load(FAKE_USER_ID, "MyEntry") assert entry_content[TAG_PARENT] == [None] # only one save was made def test_put_many_save_only_if_necessary(engine): dummy_obj = DummyObjWithKey("1", "a", True) dummy_obj2 = DummyObjWithKey("2", "b", False) engine.put_many(FAKE_USER_ID, FAKE_USER_EMAIL, "MyEntry", [dummy_obj, dummy_obj2]) engine.put_many(FAKE_USER_ID, FAKE_USER_EMAIL, "MyEntry", [dummy_obj, dummy_obj2]) entry_content = engine.load(FAKE_USER_ID, "MyEntry") assert entry_content[TAG_PARENT] == [None] # Still None, nothing was save def test_i_can_retrieve_history_using_put(engine): engine.put(FAKE_USER_ID, FAKE_USER_EMAIL, "MyEntry", "key1", DummyObj(1, "a", False)) engine.put(FAKE_USER_ID, FAKE_USER_EMAIL, "MyEntry", "key1", DummyObj(2, "a", False)) engine.put(FAKE_USER_ID, FAKE_USER_EMAIL, "MyEntry", "key1", DummyObj(3, "a", False)) history = engine.history(FAKE_USER_ID, "MyEntry") assert len(history) == 3 v0 = engine.load(FAKE_USER_ID, "MyEntry", history[0]) v1 = engine.load(FAKE_USER_ID, "MyEntry", history[1]) v2 = engine.load(FAKE_USER_ID, "MyEntry", history[2]) assert v0["key1"] == DummyObj(3, "a", False) assert v1["key1"] == DummyObj(2, "a", False) assert v2["key1"] == DummyObj(1, "a", False) assert v2[TAG_PARENT] == [None] def test_i_can_retrieve_history_using_save(engine): engine.save(FAKE_USER_ID, FAKE_USER_EMAIL, "MyEntry", {"key1" : DummyObj(1, "a", False)}) engine.save(FAKE_USER_ID, FAKE_USER_EMAIL, "MyEntry", {"key1" : DummyObj(2, "a", False)}) engine.save(FAKE_USER_ID, FAKE_USER_EMAIL, "MyEntry", {"key1" : DummyObj(3, "a", False)}) history = engine.history(FAKE_USER_ID, "MyEntry") assert len(history) == 3 v0 = engine.load(FAKE_USER_ID, "MyEntry", history[0]) v1 = engine.load(FAKE_USER_ID, "MyEntry", history[1]) v2 = engine.load(FAKE_USER_ID, "MyEntry", history[2]) assert v0["key1"] == DummyObj(3, "a", False) assert v1["key1"] == DummyObj(2, "a", False) assert v2["key1"] == DummyObj(1, "a", False) assert v2[TAG_PARENT] == [None]