import hashlib import pytest import os from os import path from sdp.sheerkaDataProvider import SheerkaDataProvider, Event, SheerkaDataProviderError, \ SheerkaDataProviderDuplicateKeyError from datetime import date, datetime import shutil import json from sdp.sheerkaSerializer import ObjectSerializer, BaseSerializer, Serializer, SerializerContext, PickleSerializer import core.utils tests_root = path.abspath("../build/tests") evt_digest = "3a571cb6034ef6fc8d7fe91948d0d29728eed74de02bac7968b0e9facca2c2d7" def read_text_file(file_name): with open(file_name, "r") as f: return f.read() def read_json_file(file_name): with open(file_name, "r") as f: return json.load(f) class ObjWithKey: def __init__(self, a, b): self.a = a self.b = b def __eq__(self, obj): return isinstance(obj, ObjWithKey) and \ self.a == obj.a and \ self.b == obj.b def __repr__(self): return f"ObjWithKey({self.a}, {self.b})" def get_key(self): return self.a class ObjSetKey: def __init__(self, value, key=None): self.value = value self.key = key def __eq__(self, obj): return isinstance(obj, ObjSetKey) and \ self.key == obj.key and \ self.value == obj.value def __repr__(self): return f"ObjSetKey({self.key}, {self.value})" def set_key(self, key): self.key = key class ObjNoKey: def __init__(self, a, b): self.a = a self.b = b def __hash__(self): return hash((self.a, self.b)) def __eq__(self, obj): return isinstance(obj, ObjNoKey) and \ self.a == obj.a and \ self.b == obj.b def __repr__(self): return f"ObjNoKey({self.a}, {self.b})" class ObjDumpJson: def __init__(self, key=None, value=None): self.key = key self.value = value def __eq__(self, obj): return isinstance(obj, ObjDumpJson) and \ self.key == obj.key and \ self.value == obj.value def __repr__(self): return f"ObjDumpJson({self.key}, {self.value})" def get_key(self): return self.key def get_digest(self): """ Returns the digest of the event :return: hexa form of the sha256 """ return hashlib.sha256(f"Concept:{self.key}{self.value}".encode("utf-8")).hexdigest() def to_dict(self): return self.__dict__ def from_dict(self, as_dict): self.value = as_dict["value"] self.key = as_dict["key"] class ObjWithDigestNoKey: def __init__(self, a, b): self.a = a self.b = b def __hash__(self): return hash((self.a, self.b)) def __eq__(self, obj): return isinstance(obj, ObjNoKey) and \ self.a == obj.a and \ self.b == obj.b def __repr__(self): return f"ObjNoKey({self.a}, {self.b})" def get_digest(self): return str(self.a) + str(self.b) class ObjWithDigestWithKey: def __init__(self, a, b): self.a = a self.b = b def __hash__(self): return hash((self.a, self.b)) def __eq__(self, obj): return isinstance(obj, ObjNoKey) and \ self.a == obj.a and \ self.b == obj.b def __repr__(self): return f"ObjNoKey({self.a}, {self.b})" def get_key(self): return self.a def get_digest(self): return str(self.a) + str(self.b) @pytest.fixture(autouse=True) def init_test(): if path.exists(tests_root): shutil.rmtree(tests_root) if not path.exists(tests_root): os.makedirs(tests_root) current_pwd = os.getcwd() os.chdir(tests_root) yield None os.chdir(current_pwd) def test_i_can_init_the_data_provider(): sdp = SheerkaDataProvider(".sheerka") assert sdp.root == path.abspath(path.join(tests_root, ".sheerka")) assert path.exists(path.join(tests_root, ".sheerka")) def test_i_can_save_and_load_an_event(): sdp = SheerkaDataProvider(".sheerka") event = Event("hello world", date=date(year=2007, month=9, day=10), user="kodjo") evt_digest = sdp.save_event(event) evt = sdp.load_event(evt_digest) assert path.exists(path.join(sdp.root, SheerkaDataProvider.EventFolder, evt_digest[0:24], evt_digest)) assert evt.version == 1 assert evt.date == datetime(year=2007, month=9, day=10) assert evt.user == "kodjo" assert evt.message == "hello world" def test_i_can_add_an_string(): sdp = SheerkaDataProvider(".sheerka") obj = "foo => bar" entry, key = sdp.add(evt_digest, "entry", obj) last_commit = sdp.get_snapshot() state = sdp.load_state(last_commit) loaded = sdp.get(entry, key) assert entry == "entry" assert key is None assert loaded == obj assert path.exists(path.join(sdp.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit)) assert path.exists(path.join(sdp.root, SheerkaDataProvider.HeadFile)) assert state.date is not None assert state.parents == [] assert state.events == [evt_digest] assert state.data == {"entry": "foo => bar"} assert read_text_file(path.join(sdp.root, SheerkaDataProvider.HeadFile)) == last_commit def test_i_can_add_several_strings_if_allow_multiple_is_true(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry", "foo") sdp.add(evt_digest, "entry", "foo") entry, key = sdp.add(evt_digest, "entry", "bar") loaded = sdp.get(entry, key) assert entry == "entry" assert key is None assert loaded == ["foo", "foo", "bar"] def test_i_cannot_add_several_strings_if_allow_multiple_is_false(): sdp = SheerkaDataProvider(".sheerka") with pytest.raises(IndexError) as index_error: sdp.add(evt_digest, "entry", "foo", False) sdp.add(evt_digest, "entry", "foo", False) assert index_error.value.args[0] == "entry" def test_i_can_add_an_object_with_no_key(): sdp = SheerkaDataProvider(".sheerka") obj = ObjNoKey("a", "b") entry, key = sdp.add(evt_digest, "entry", obj) last_commit = sdp.get_snapshot() state = sdp.load_state(last_commit) loaded = sdp.get(entry, key) assert entry == "entry" assert key is None assert loaded == obj assert path.exists(path.join(sdp.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit)) assert path.exists(path.join(sdp.root, SheerkaDataProvider.HeadFile)) assert state.date is not None assert state.parents == [] assert state.events == [evt_digest] assert state.data == {"entry": ObjNoKey("a", "b")} assert read_text_file(path.join(sdp.root, SheerkaDataProvider.HeadFile)) == last_commit def test_i_can_add_several_obj_no_key_if_allow_multiple_is_true(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry", ObjNoKey("a", "b")) sdp.add(evt_digest, "entry", ObjNoKey("a", "b")) entry, key = sdp.add(evt_digest, "entry", ObjNoKey("c", "d")) loaded = sdp.get(entry, key) assert entry == "entry" assert key is None assert loaded == [ObjNoKey("a", "b"), ObjNoKey("a", "b"), ObjNoKey("c", "d")] def test_i_cannot_add_several_obj_no_key_if_allow_multiple_is_false(): sdp = SheerkaDataProvider(".sheerka") with pytest.raises(IndexError) as index_error: sdp.add(evt_digest, "entry", ObjNoKey("a", "b"), False) sdp.add(evt_digest, "entry", ObjNoKey("c", "d"), False) assert index_error.value.args[0] == "entry" def test_i_can_add_a_dict(): sdp = SheerkaDataProvider(".sheerka") obj = {"my_key": "my_value"} entry, key = sdp.add(evt_digest, "entry", obj) last_commit = sdp.get_snapshot() state = sdp.load_state(last_commit) loaded = sdp.get(entry, key) loaded_value = sdp.get(entry, "my_key") # we can retrieve by key assert entry == "entry" assert key is None # we return None as dict may contains several entries assert loaded == obj assert loaded_value == "my_value" assert path.exists(path.join(sdp.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit)) assert path.exists(path.join(sdp.root, SheerkaDataProvider.HeadFile)) assert state.date is not None assert state.parents == [] assert state.events == [evt_digest] assert state.data == {"entry": obj} assert read_text_file(path.join(sdp.root, SheerkaDataProvider.HeadFile)) == last_commit def test_i_can_add_multiple_entries_at_once_with_dict(): sdp = SheerkaDataProvider(".sheerka") entry, key = sdp.add(evt_digest, "entry", {"my_key1": "value1", "my_key2": "value2"}) loaded = sdp.get(entry, key) loaded_value1 = sdp.get(entry, "my_key1") loaded_value2 = sdp.get(entry, "my_key2") assert loaded == {"my_key1": "value1", "my_key2": "value2"} assert loaded_value1 == "value1" assert loaded_value2 == "value2" def test_i_can_add_same_key_with_dict_if_allow_multiple_is_true(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry", {"my_key": "my_value"}) entry, key = sdp.add(evt_digest, "entry", {"my_key": "my_value"}) loaded1 = sdp.get(entry, key) entry, key = sdp.add(evt_digest, "entry", {"my_key": "my_value2"}) loaded2 = sdp.get(entry, key) assert entry == "entry" assert key is None assert loaded1 == {"my_key": ["my_value", "my_value"]} assert loaded2 == {"my_key": ["my_value", "my_value", "my_value2"]} def test_i_cannot_add_same_key_with_dict_if_allow_multiple_is_false(): sdp = SheerkaDataProvider(".sheerka") with pytest.raises(IndexError) as index_error: sdp.add(evt_digest, "entry", {"my_key": "my_value"}, False) sdp.add(evt_digest, "entry", {"my_key": "my_value2"}, False) assert index_error.value.args[0] == "entry.my_key" def test_i_can_add_object_with_different_key_if_allow_multiple_is_false(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry", {"my_key": "a"}, False) sdp.add(evt_digest, "entry", {"my_key2": "b"}, False) assert sdp.get("entry", "my_key") == "a" assert sdp.get("entry", "my_key2") == "b" def test_i_can_add_obj_with_key(): sdp = SheerkaDataProvider(".sheerka") obj1 = ObjWithKey("key1", "b") obj2 = ObjSetKey("c", key="key2") entry1, key1 = sdp.add(evt_digest, "entry", obj1) # test when key is taken from obj.get_key() entry2, key2 = sdp.add(evt_digest, "entry2", obj2) # test when key is taken from obj.key last_commit = sdp.get_snapshot() state = sdp.load_state(last_commit) loaded1 = sdp.get(entry1, key1) loaded2 = sdp.get(entry2, key2) assert entry1 == "entry" assert key1 == "key1" assert loaded1 == ObjWithKey("key1", "b") assert entry2 == "entry2" assert key2 == "key2" assert loaded2 == ObjSetKey("c", key="key2") assert path.exists(path.join(sdp.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit)) assert path.exists(path.join(sdp.root, SheerkaDataProvider.HeadFile)) assert state.date is not None assert len(state.parents) == 1 assert state.events == [evt_digest] assert state.data == {"entry": {"key1": obj1}, "entry2": {"key2": obj2}} assert read_text_file(path.join(sdp.root, SheerkaDataProvider.HeadFile)) == last_commit def test_i_can_add_objects_with_same_key_if_allow_multiple_is_true(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry", ObjWithKey("my_key", "b")) entry, key = sdp.add(evt_digest, "entry", ObjSetKey("c", key="my_key")) loaded1 = sdp.get(entry, key) entry, key = sdp.add(evt_digest, "entry", ObjSetKey("c", key="my_key")) sdp.add(evt_digest, "entry", ObjSetKey("c", key="my_key2")) # to prove that it does not melt everything loaded2 = sdp.get(entry, key) assert entry == "entry" assert key == "my_key" assert loaded1 == [ObjWithKey("my_key", "b"), ObjSetKey("c", key="my_key")] assert loaded2 == [ObjWithKey("my_key", "b"), ObjSetKey("c", key="my_key"), ObjSetKey("c", key="my_key")] def test_i_cannot_add_object_with_same_key_if_allow_multiple_is_false(): sdp = SheerkaDataProvider(".sheerka") with pytest.raises(IndexError) as index_error: sdp.add(evt_digest, "entry", ObjWithKey("my_key", "b"), False) sdp.add(evt_digest, "entry", ObjSetKey("c", key="my_key"), False) assert index_error.value.args[0] == "entry.my_key" def test_i_can_add_obj_with_key_to_a_list(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry", "foo") sdp.add(evt_digest, "entry", "bar") # entry is now a list sdp.add(evt_digest, "entry", ObjWithKey("a", "b")) # this entry must no be taken as a object with a key loaded = sdp.get("entry") assert loaded == ["foo", "bar", ObjWithKey("a", "b")] def test_i_cannot_add_obj_with_no_key_when_then_entry_has_keys(): sdp = SheerkaDataProvider(".sheerka") with pytest.raises(SheerkaDataProviderError) as error: sdp.add(evt_digest, "entry", ObjWithKey("a", "b")) sdp.add(evt_digest, "entry", "foo") assert error.value.obj == "foo" def test_i_can_add_string_using_auto_generated_key(): sdp = SheerkaDataProvider(".sheerka") key_file = path.join(sdp.root, SheerkaDataProvider.KeysFile) entry1, key1 = sdp.add_with_auto_key(evt_digest, "entry1", "foo") entry2, key2 = sdp.add_with_auto_key(evt_digest, "entry1", "bar") entry3, key3 = sdp.add_with_auto_key(evt_digest, "entry2", "baz") state = sdp.load_state(sdp.get_snapshot()) assert path.exists(key_file) assert read_json_file(key_file) == {"entry1": 2, "entry2": 1} assert state.data == {"entry1": {"1": "foo", "2": "bar"}, "entry2": {"1": "baz"}} assert entry1 == "entry1" assert entry2 == "entry1" assert entry3 == "entry2" assert key1 == "1" assert key2 == "2" assert key3 == "1" def test_i_cannot_add_the_same_digest_twice_in_the_same_entry(): """ If get_digest() is implemented, checks for duplicates :return: """ sdp = SheerkaDataProvider(".sheerka") with pytest.raises(SheerkaDataProviderDuplicateKeyError) as error: sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "b")) sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "b")) assert error.value.obj.get_digest() == ObjWithDigestNoKey("a", "b").get_digest() assert error.value.key == "entry" assert error.value.args[0] == "duplicate key" def test_i_cannot_add_the_same_digest_twice_in_the_same_entry2(): """ If get_digest() is implemented, checks for duplicates in list when no key :return: """ sdp = SheerkaDataProvider(".sheerka") with pytest.raises(SheerkaDataProviderDuplicateKeyError) as error: sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "b")) sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "c")) sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "b")) assert error.value.obj.get_digest() == ObjWithDigestNoKey("a", "b").get_digest() assert error.value.key == "entry" assert error.value.args[0] == "duplicate key" def test_i_cannot_add_the_same_digest_twice_in_the_same_entry3(): """ If get_digest() is implemented, checks for duplicates when the key is provided :return: """ sdp = SheerkaDataProvider(".sheerka") with pytest.raises(SheerkaDataProviderDuplicateKeyError) as error: sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b")) sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b")) assert error.value.obj.get_digest() == ObjWithDigestWithKey("a", "b").get_digest() assert error.value.key == "entry.a" assert error.value.args[0] == "duplicate key" def test_i_cannot_add_the_same_digest_twice_in_the_same_entry4(): """ If get_digest() is implemented, checks for duplicates in list when the key is provided :return: """ sdp = SheerkaDataProvider(".sheerka") with pytest.raises(SheerkaDataProviderDuplicateKeyError) as error: sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b")) sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "c")) sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b")) assert error.value.obj.get_digest() == ObjWithDigestWithKey("a", "b").get_digest() assert error.value.key == "entry.a" assert error.value.args[0] == "duplicate key" def test_i_can_get_and_set_key(): sdp = SheerkaDataProvider(".sheerka") key_file = path.join(sdp.root, SheerkaDataProvider.KeysFile) sdp.set_key("entry1", 1000) sdp.get_next_key("entry1") sdp.get_next_key("entry1") sdp.get_next_key("entry1") sdp.get_next_key("entry2") sdp.get_next_key("entry2") assert path.exists(key_file) assert read_json_file(key_file) == {"entry1": 1003, "entry2": 2} def test_i_can_add_object_using_auto_generated_key(): sdp = SheerkaDataProvider(".sheerka") key_file = path.join(sdp.root, SheerkaDataProvider.KeysFile) entry1, key1 = sdp.add_with_auto_key(evt_digest, "entry1", ObjNoKey("a", "b")) entry2, key2 = sdp.add_with_auto_key(evt_digest, "entry1", ObjNoKey("a", "b")) state = sdp.load_state(sdp.get_snapshot()) assert path.exists(key_file) assert read_json_file(key_file) == {"entry1": 2} assert state.data == {"entry1": {"1": ObjNoKey("a", "b"), "2": ObjNoKey("a", "b")}} assert entry1 == "entry1" assert entry2 == "entry1" assert key1 == "1" assert key2 == "2" def test_object_key_is_updated_when_possible_using_auto_generated_key(): sdp = SheerkaDataProvider(".sheerka") key_file = path.join(sdp.root, SheerkaDataProvider.KeysFile) entry1, key1 = sdp.add_with_auto_key(evt_digest, "entry1", ObjSetKey("foo")) entry2, key2 = sdp.add_with_auto_key(evt_digest, "entry1", ObjSetKey("foo")) state = sdp.load_state(sdp.get_snapshot()) assert path.exists(key_file) assert read_json_file(key_file) == {"entry1": 2} assert state.data == {"entry1": {"1": ObjSetKey("foo", "1"), "2": ObjSetKey("foo", "2")}} assert entry1 == "entry1" assert entry2 == "entry1" assert key1 == "1" assert key2 == "2" def test_i_can_set_objects_with_key(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry", ObjWithKey(1, "foo")) entry, key = sdp.set(evt_digest, "entry", ObjWithKey(2, "foo")) state = sdp.load_state(sdp.get_snapshot()) assert state.data == {"entry": {"2": ObjWithKey(2, "foo")}} assert entry == "entry" assert key == "2" def test_i_can_set_objects_with_no_key(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry", ObjNoKey(1, "foo")) entry, key = sdp.set(evt_digest, "entry", ObjNoKey(2, "foo")) state = sdp.load_state(sdp.get_snapshot()) assert state.data == {"entry": ObjNoKey(2, "foo")} assert entry == "entry" assert key is None def test_i_can_set_from_list_to_dict(): sdp = SheerkaDataProvider(".sheerka") sdp.set(evt_digest, "entry", [ObjNoKey(1, "foo"), ObjNoKey(2, "foo")]) entry, key = sdp.set(evt_digest, "entry", {"1": ObjNoKey(1, "foo"), "2": ObjNoKey(2, "foo")}) state = sdp.load_state(sdp.get_snapshot()) assert state.data == {"entry": {"1": ObjNoKey(1, "foo"), "2": ObjNoKey(2, "foo")}} assert entry == "entry" assert key is None def test_i_can_set_using_reference(): sdp = SheerkaDataProvider(".sheerka") sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithKey))) sdp.add(evt_digest, "entry", ObjWithKey(1, "foo")) entry, key = sdp.set(evt_digest, "entry", ObjWithKey(2, "foo"), use_ref=True) state = sdp.load_state(sdp.get_snapshot()) assert state.data == {"entry": {"2": '##REF##:95b5cbab545dded0b90b57a3d15a157b9a559fb586ee2f8d6ccbc6d2491f1268'}} assert entry == "entry" assert key == "2" assert path.exists(sdp.get_obj_path(SheerkaDataProvider.ObjectsFolder, "95b5cbab545dded0b90b57a3d15a157b9a559fb586ee2f8d6ccbc6d2491f1268")) # sanity check, make sure that I can load back loaded = sdp.get(entry, key) assert loaded == ObjWithKey(2, "foo") def test_i_can_add_unique(): sdp = SheerkaDataProvider(".sheerka") sdp.add_unique(evt_digest, "entry", ObjNoKey(1, "foo")) sdp.add_unique(evt_digest, "entry", ObjNoKey(1, "foo")) sdp.add_unique(evt_digest, "entry", ObjNoKey(2, "bar")) entry, key = sdp.add_unique(evt_digest, "entry", ObjNoKey(2, "bar")) state = sdp.load_state(sdp.get_snapshot()) assert state.data == {"entry": {ObjNoKey(1, "foo"), ObjNoKey(2, "bar")}} assert entry == "entry" assert key is None def test_i_can_add_reference_of_an_object_with_a_key(): sdp = SheerkaDataProvider(".sheerka") obj = ObjDumpJson("my_key", "value1") obj_serializer = ObjectSerializer(core.utils.get_full_qualified_name(obj)) sdp.serializer.register(obj_serializer) entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True) state = sdp.load_state(sdp.get_snapshot()) digest = state.data["entry"]["my_key"][len(SheerkaDataProvider.REF_PREFIX):] assert key == obj.key assert entry == "entry" assert state.data == {'entry': {'my_key': f"{SheerkaDataProvider.REF_PREFIX}{digest}"}} assert sdp.load_obj(digest) == obj def test_i_can_keep_state_history(): sdp = SheerkaDataProvider(".sheerka") event1 = Event("cmd add 'foo => bar'") event_digest1 = sdp.save_event(event1) obj1 = "foo => bar" sdp.add(event_digest1, "entry1", obj1) state_digest1 = sdp.get_snapshot() event2 = Event("cmd add 'foo => baz'") event_digest2 = sdp.save_event(event2) obj2 = "foo => baz" sdp.add(event_digest2, "entry2", obj2) state_digest2 = sdp.get_snapshot() state2 = sdp.load_state(state_digest2) assert path.exists(path.join(sdp.root, SheerkaDataProvider.EventFolder, event_digest1[0:24], event_digest1)) assert path.exists(path.join(sdp.root, SheerkaDataProvider.StateFolder, state_digest1[0:24], state_digest1)) assert path.exists(path.join(sdp.root, SheerkaDataProvider.EventFolder, event_digest2[0:24], event_digest2)) assert path.exists(path.join(sdp.root, SheerkaDataProvider.StateFolder, state_digest2[0:24], state_digest2)) assert state2.date is not None assert state2.parents == [state_digest1] assert state2.events == [event_digest2] assert state2.data == {"entry1": "foo => bar", "entry2": "foo => baz"} def test_i_can_list_elements_when_there_is_nothing_to_list(): sdp = SheerkaDataProvider(".sheerka") result = sdp.list("entry") assert list(result) == [] def test_i_can_list_when_no_key(): sdp = SheerkaDataProvider(".sheerka") sdp.serializer.register(PickleSerializer(lambda obj: isinstance(obj, str))) sdp.add(evt_digest, "entry1", "foo") sdp.add(evt_digest, "entry1", "bar") sdp.add(evt_digest, "entry1", "baz", use_ref=True) sdp.add(evt_digest, "entry2", "xyz") result = sdp.list("entry1") assert list(result) == ["foo", "bar", "baz"] def test_i_can_list_when_key(): sdp = SheerkaDataProvider(".sheerka") sdp.serializer.register(PickleSerializer(lambda obj: isinstance(obj, ObjWithKey))) sdp.add(evt_digest, "entry1", {"1": "foo"}) sdp.add(evt_digest, "entry1", {"2": "bar"}) sdp.add(evt_digest, "entry1", ObjWithKey("3", "value"), use_ref=True) sdp.add(evt_digest, "entry2", {"4": "xxx"}) result = sdp.list("entry1") assert list(result) == ["foo", "bar", ObjWithKey("3", "value")] def test_i_can_list_when_one_element(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry1", "foo") sdp.add(evt_digest, "entry2", "baz") result = sdp.list("entry1") assert list(result) == ["foo"] def test_i_can_filter_on_key_for_dict(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry1", {"1": "foo"}) sdp.add(evt_digest, "entry1", {"2": "bar"}) result = sdp.list("entry1", lambda k, o: k == "1") assert list(result) == ["foo"] def test_i_can_filter_on_key_for_objects(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry1", ObjWithKey("a1", "b1")) sdp.add(evt_digest, "entry1", ObjWithKey("a2", "b2")) result = sdp.list("entry1", lambda k, o: k == "a1") assert list(result) == [ObjWithKey("a1", "b1")] def test_i_can_filter_on_attribute_for_dict(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry1", {"1": {"a": "a1", "b": "b1"}}) sdp.add(evt_digest, "entry1", {"2": {"a": "a2", "b": "b2"}}) result = sdp.list("entry1", lambda k, o: o["a"] == "a2") assert list(result) == [{"a": "a2", "b": "b2"}] def test_i_can_filter_on_attribute_for_object(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry1", ObjWithKey("a1", "b1")) sdp.add(evt_digest, "entry1", ObjWithKey("a2", "b2")) result = sdp.list("entry1", lambda k, o: o.b == "b2") assert list(result) == [ObjWithKey("a2", "b2")] def test_i_can_filter_a_list(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry1", "foo") sdp.add(evt_digest, "entry1", "bar") result = sdp.list("entry1", lambda o: o == "bar") assert list(result) == ["bar"] def test_i_can_filter_a_list_of_object(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry1", ObjNoKey("a1", "b1")) sdp.add(evt_digest, "entry1", ObjNoKey("a2", "b2")) result = sdp.list("entry1", lambda o: o.b == "b1") assert list(result) == [ObjNoKey("a1", "b1")] def test_i_can_remove_all_elements(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry1", "foo") sdp.add(evt_digest, "entry1", "bar") state_digest = sdp.remove(evt_digest, "entry1") result = sdp.list("entry1") assert read_text_file(path.join(sdp.root, SheerkaDataProvider.HeadFile)) == state_digest assert list(result) == [] def test_i_can_remove_a_element(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry1", "foo") sdp.add(evt_digest, "entry1", "bar") sdp.remove(evt_digest, "entry1", lambda o: o == "foo") result = sdp.list("entry1") assert list(result) == ["bar"] def test_i_can_remove_dict_by_key(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry1", {"1": ObjNoKey("a1", "b1")}) sdp.add(evt_digest, "entry1", {"2": ObjNoKey("a2", "b2")}) sdp.remove(evt_digest, "entry1", lambda k, o: k == "2") result = sdp.list("entry1") assert list(result) == [ObjNoKey("a1", "b1")] def test_i_can_remove_when_only_one_element(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry1", "foo") sdp.remove(evt_digest, "entry1", lambda o: o == "foo") result = sdp.list("entry1") assert list(result) == [] def test_i_cannot_remove_if_entry_does_not_exist(): sdp = SheerkaDataProvider(".sheerka") with pytest.raises(IndexError) as e: sdp.remove(evt_digest, "entry") assert str(e) == "entry" def test_i_cannot_modify_an_entry_without_a_key(): sdp = SheerkaDataProvider(".sheerka") with pytest.raises(SheerkaDataProviderError) as error: sdp.modify(evt_digest, "entry", None, "baz") assert error.value.args[0] == "Key is mandatory." def test_i_can_modify_dict_with_a_key(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry", {"key1": "foo"}) sdp.add(evt_digest, "entry", {"key2": "bar"}) entry, key = sdp.modify(evt_digest, "entry", "key1", "baz") state = sdp.load_state(sdp.get_snapshot()) assert state.data == {"entry": {"key1": "baz", "key2": "bar"}} assert entry == "entry" assert key == "key1" def test_i_can_modify_an_object_with_a_key(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry", ObjWithKey("key1", "foo")) sdp.add(evt_digest, "entry", ObjWithKey("key2", "bar")) entry, key = sdp.modify(evt_digest, "entry", "key1", ObjWithKey("key1", "baz")) state = sdp.load_state(sdp.get_snapshot()) assert state.data == {"entry": {"key1": ObjWithKey("key1", "baz"), "key2": ObjWithKey("key2", "bar")}} assert entry == "entry" assert key == "key1" def test_i_can_modify_an_object_while_changing_the_key(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry", ObjWithKey("key1", "foo")) sdp.add(evt_digest, "entry", ObjWithKey("key2", "bar")) entry, key = sdp.modify(evt_digest, "entry", "key1", ObjWithKey("key3", "baz")) state = sdp.load_state(sdp.get_snapshot()) assert state.data == {"entry": {"key2": ObjWithKey("key2", "bar"), "key3": ObjWithKey("key3", "baz")}} assert entry == "entry" assert key == "key3" def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_key(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry", ObjWithKey("key1", "foo")) sdp.add(evt_digest, "entry", ObjWithKey("key2", "bar")) entry, key = sdp.modify(evt_digest, "entry", "key2", ObjWithKey("key1", "bar")) state = sdp.load_state(sdp.get_snapshot()) assert state.data == {"entry": {"key1": [ObjWithKey("key1", "foo"), ObjWithKey("key1", "bar")]}} assert entry == "entry" assert key == "key1" def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_when_list(): """ In this example, the item to modify is within a list, and its key has changed and in the new key, there is already a list :return: """ sdp = SheerkaDataProvider(".sheerka") sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson))) sdp.add(evt_digest, "entry", ObjDumpJson("key1", "value11")) sdp.add(evt_digest, "entry", ObjDumpJson("key1", "value12")) sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value21")) sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value22")) new_value = ObjDumpJson("key1", "value13") setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key2", "value21").get_digest()) entry, key = sdp.modify(evt_digest, "entry", "key2", new_value) state = sdp.load_state(sdp.get_snapshot()) assert state.data == {"entry": { "key1": [ObjDumpJson("key1", "value11"), ObjDumpJson("key1", "value12"), ObjDumpJson("key1", "value13")], "key2": [ObjDumpJson("key2", "value22")] }} assert entry == "entry" assert key == "key1" def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_when_nothing(): """ In this example, the item to modify is within a list, and its key has changed and in the new key, there is nothing (the new key does not exist) :return: """ sdp = SheerkaDataProvider(".sheerka") sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson))) sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value21")) sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value22")) new_value = ObjDumpJson("key1", "value13") setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key2", "value21").get_digest()) entry, key = sdp.modify(evt_digest, "entry", "key2", new_value) state = sdp.load_state(sdp.get_snapshot()) assert state.data == {"entry": { "key1": ObjDumpJson("key1", "value13"), "key2": [ObjDumpJson("key2", "value22")] }} assert entry == "entry" assert key == "key1" def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_when_one_item(): """ In this example, the item to modify is within a list, and its key has changed and in the new key, there is only one element :return: """ sdp = SheerkaDataProvider(".sheerka") sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson))) sdp.add(evt_digest, "entry", ObjDumpJson("key1", "value11")) sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value21")) sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value22")) new_value = ObjDumpJson("key1", "value13") setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key2", "value21").get_digest()) entry, key = sdp.modify(evt_digest, "entry", "key2", new_value) state = sdp.load_state(sdp.get_snapshot()) assert state.data == {"entry": { "key1": [ObjDumpJson("key1", "value11"), ObjDumpJson("key1", "value13")], "key2": [ObjDumpJson("key2", "value22")] }} assert entry == "entry" assert key == "key1" def test_i_can_modify_a_ref(): sdp = SheerkaDataProvider(".sheerka") sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithKey))) sdp.add(evt_digest, "entry", ObjWithKey("key1", "foo")) entry, key = sdp.add(evt_digest, "entry", ObjWithKey("key2", "bar"), use_ref=True) sdp.modify(evt_digest, "entry", "key2", ObjWithKey("key2", "baz")) state = sdp.load_state(sdp.get_snapshot()) assert state.data == {"entry": { "key1": ObjWithKey("key1", "foo"), "key2": "##REF##:d70b0247311645ed18d275337cbcf79ad186d995236cdc8ad4fcfc708085bd3d"}} assert entry == "entry" assert key == "key2" def test_i_cannot_modify_an_entry_that_does_not_exist(): sdp = SheerkaDataProvider(".sheerka") with pytest.raises(IndexError) as e: sdp.modify(evt_digest, "entry", "key", "foo") assert str(e.value) == "entry" def test_i_cannot_modify_a_key_that_does_not_exist(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry1", {"1": "foo"}) with pytest.raises(IndexError) as e: sdp.modify(evt_digest, "entry1", "2", "bar") assert str(e) == "entry1.2" def test_i_cannot_modify_a_list_when_origin_is_unknown(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry", ObjWithKey("key", "value1")) sdp.add(evt_digest, "entry", ObjWithKey("key", "value2")) # same they state = sdp.load_state(sdp.get_snapshot()) with pytest.raises(SheerkaDataProviderError) as error: sdp.modify(evt_digest, "entry", "key", ObjWithKey("key", "value2")) assert error.value.obj == ObjWithKey("key", "value2") assert error.value.args[0] == "Multiple entries under 'entry.key'" def test_i_can_modify_a_list_when_the_origin_is_known(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry", ObjDumpJson("key", "value1")) sdp.add(evt_digest, "entry", ObjDumpJson("key", "value2")) # same they new_value = ObjDumpJson("key", "value3") setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key", "value1").get_digest()) sdp.modify(evt_digest, "entry", "key", new_value) state = sdp.load_state(sdp.get_snapshot()) assert state.data == {"entry": {"key": [ObjDumpJson("key", "value3"), ObjDumpJson("key", "value2")]}} def test_i_can_modify_a_list_when_the_origin_is_known_2(): """ This time, we check that the origin is automatically set when the object was saved as a reference We also check that all objects are still persisted as reference :return: """ sdp = SheerkaDataProvider(".sheerka") sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson))) sdp.add(evt_digest, "entry", ObjDumpJson("key", "value1"), use_ref=True) sdp.add(evt_digest, "entry", ObjDumpJson("key", "value2"), use_ref=True) # same they objs = sdp.get("entry", "key") # origin is automatically set to the loaded objects objs[0].value = "value3" sdp.modify(evt_digest, "entry", "key", objs[0]) state = sdp.load_state(sdp.get_snapshot()) assert state.data == {"entry": {"key": [ "##REF##:621771a3af6a331e9abb3a63fb25e0cac4b13df0b292dfa30db6bd89031bfad0", "##REF##:5fe085e8366d35c5f04a18b2d3dada376128b246e07c66de5872830b00f5f517"]}} # checks that all objects are (still) persisted path.exists(sdp.get_obj_path(sdp.ObjectsFolder, "621771a3af6a331e9abb3a63fb25e0cac4b13df0b292dfa30db6bd89031bfad0")) path.exists(sdp.get_obj_path(sdp.ObjectsFolder, "5fe085e8366d35c5f04a18b2d3dada376128b246e07c66de5872830b00f5f517")) path.exists(sdp.get_obj_path(sdp.ObjectsFolder, "1aac9e0d5c74c3bb989fd0f9def792bba36c5595d32f61be7cbb1a38dcf75327")) def test_i_can_get_the_entire_entry(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry1", "foo") sdp.add(evt_digest, "entry1", "bar") result = sdp.get("entry1") result_safe = sdp.get_safe("entry1") assert result == ["foo", "bar"] assert result_safe == ["foo", "bar"] def test_i_can_get_an_entry_with_on_object(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry1", "foo") result = sdp.get("entry1") result_safe = sdp.get_safe("entry1") assert result == "foo" assert result_safe == "foo" def test_i_can_get_an_entry_by_key(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry1", {"1": "foo"}) sdp.add(evt_digest, "entry1", {"2": "bar"}) result = sdp.get("entry1", "2") result_safe = sdp.get_safe("entry1", "2") assert result == "bar" assert result_safe == "bar" def test_i_can_get_object_save_by_reference(): sdp = SheerkaDataProvider(".sheerka") obj = ObjDumpJson("my_key", "value1") sdp.serializer.register(ObjectSerializer(core.utils.get_full_qualified_name(obj))) entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True) loaded = sdp.get(entry, key) assert loaded == obj def test_i_can_get_objects_from_list_when_saved_by_reference(): sdp = SheerkaDataProvider(".sheerka") sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson))) sdp.add(evt_digest, "entry", ObjDumpJson("key", "value1"), use_ref=True) sdp.add(evt_digest, "entry", ObjDumpJson("key", "value2"), use_ref=True) # same they objs = sdp.get("entry", "key") assert objs[0] == ObjDumpJson("key", "value1") assert objs[1] == ObjDumpJson("key", "value2") def test_i_cannot_get_an_entry_that_does_not_exist(): sdp = SheerkaDataProvider(".sheerka") assert sdp.get_safe("entry") is None with pytest.raises(IndexError) as e: sdp.get("entry") assert str(e) == "entry" def test_i_cannot_get_a_key_that_does_not_exist(): sdp = SheerkaDataProvider(".sheerka") sdp.add(evt_digest, "entry1", {"1": "foo"}) assert sdp.get_safe("entry1", "2") is None with pytest.raises(IndexError) as e: sdp.get("entry1", "2") assert str(e) == "entry.1" def test_i_can_save_and_retrieve_cache(): sdp = SheerkaDataProvider(".sheerka") txt = "foo bar baz foo bar baz foo bar baz" key = "key_to_use" category = "cache_category" assert not sdp.in_cache(category, key) digest = sdp.add_to_cache(category, key, txt) assert path.exists(path.join(sdp.root, SheerkaDataProvider.CacheFolder, digest[0:24], digest)) assert sdp.in_cache(category, key) from_cache = sdp.load_from_cache(category, key) assert from_cache == txt def test_cache_is_not_updated_by_default(): sdp = SheerkaDataProvider(".sheerka") txt = "foo bar baz foo bar baz foo bar baz" txt2 = "foo foo foo foo foo foo foo foo foo" key = "key_to_use" category = "cache_category" sdp.add_to_cache(category, key, txt) sdp.add_to_cache(category, key, txt2) from_cache = sdp.load_from_cache(category, key) assert from_cache == txt def test_i_can_update_cache(): sdp = SheerkaDataProvider(".sheerka") txt = "foo bar baz foo bar baz foo bar baz" txt2 = "foo foo foo foo foo foo foo foo foo" key = "key_to_use" category = "cache_category" sdp.add_to_cache(category, key, txt) sdp.add_to_cache(category, key, txt2, update=True) from_cache = sdp.load_from_cache(category, key) assert from_cache == txt2 def test_i_can_remove_from_cache(): sdp = SheerkaDataProvider(".sheerka") txt = "foo bar baz foo bar baz foo bar baz" key = "key_to_use" category = "cache_category" sdp.add_to_cache(category, key, txt) digest = sdp.remove_from_cache(category, key) assert not path.exists(path.join(sdp.root, SheerkaDataProvider.CacheFolder, digest[0:24], digest)) assert not sdp.in_cache(category, key) def test_i_can_test_than_an_entry_exits(): sdp = SheerkaDataProvider(".sheerka") assert not sdp.exists("entry") sdp.add(evt_digest, "entry", "value") assert sdp.exists("entry") def test_i_can_save_and_load_object_ref_with_history(): sdp = SheerkaDataProvider(".sheerka") obj = ObjDumpJson("my_key", "value1") sdp.serializer.register(ObjectSerializer(core.utils.get_full_qualified_name(obj))) entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True) loaded = sdp.get(entry, key) history = getattr(loaded, Serializer.HISTORY) assert key == obj.key assert entry == "entry" assert loaded.key == obj.key assert loaded.value == obj.value assert history[Serializer.USERNAME] == "kodjo" assert history[Serializer.MODIFICATION_DATE] != "" assert history[Serializer.PARENTS] == [] assert os.path.exists(sdp.get_obj_path(sdp.ObjectsFolder, obj.get_digest())) # save a second type with no modification previous_modification_time = history[Serializer.MODIFICATION_DATE] previous_parents = history[Serializer.PARENTS] sdp.modify(evt_digest, "entry", key, loaded) loaded = sdp.get(entry, key) history = getattr(loaded, Serializer.HISTORY) assert history[Serializer.MODIFICATION_DATE] == previous_modification_time assert history[Serializer.PARENTS] == previous_parents # save again, but with a modification previous_digest = loaded.get_digest() loaded.value = "value2" sdp.modify(evt_digest, "entry", key, loaded) loaded2 = sdp.get(entry, key) history2 = getattr(loaded2, Serializer.HISTORY) assert loaded2.key == loaded.key assert loaded2.value == loaded.value assert history2[Serializer.USERNAME] == "kodjo" assert history2[Serializer.MODIFICATION_DATE] != "" assert history2[Serializer.PARENTS] == [previous_digest] state = sdp.load_state(sdp.get_snapshot()) assert state.data == {"entry": { "my_key": '##REF##:e6bf5b56428cfce0f08c94f2c3625dc3b3a8180d7229eaa9f8aa967fb16e5256'}}