Files
Sheerka-Old/tests/test_sheerkaDataProvider.py
T
2019-11-05 19:56:00 +01:00

1147 lines
38 KiB
Python

import hashlib
import pytest
import os
from os import path
from sdp.sheerkaDataProvider import SheerkaDataProvider, Event, SheerkaDataProviderError
from datetime import date, datetime
import shutil
import json
from sdp.sheerkaSerializer import ObjectSerializer, BaseSerializer, Serializer, SerializerContext, PickleSerializer
tests_root = path.abspath("../build/tests")
evt_digest = "3a571cb6034ef6fc8d7fe91948d0d29728eed74de02bac7968b0e9facca2c2d7"
def read_text_file(file_name):
with open(file_name, "r") as f:
return f.read()
def read_json_file(file_name):
with open(file_name, "r") as f:
return json.load(f)
class ObjWithKey:
def __init__(self, a, b):
self.a = a
self.b = b
def __eq__(self, obj):
return isinstance(obj, ObjWithKey) and \
self.a == obj.a and \
self.b == obj.b
def __repr__(self):
return f"ObjWithKey({self.a}, {self.b})"
def get_key(self):
return self.a
class ObjSetKey:
def __init__(self, value, key=None):
self.value = value
self.key = key
def __eq__(self, obj):
return isinstance(obj, ObjSetKey) and \
self.key == obj.key and \
self.value == obj.value
def __repr__(self):
return f"ObjSetKey({self.key}, {self.value})"
def set_key(self, key):
self.key = key
class ObjNoKey:
def __init__(self, a, b):
self.a = a
self.b = b
def __hash__(self):
return hash((self.a, self.b))
def __eq__(self, obj):
return isinstance(obj, ObjNoKey) and \
self.a == obj.a and \
self.b == obj.b
def __repr__(self):
return f"ObjNoKey({self.a}, {self.b})"
class ObjDumpJson:
def __init__(self, key=None, value=None):
self.key = key
self.value = value
def __eq__(self, obj):
return isinstance(obj, ObjDumpJson) and \
self.key == obj.key and \
self.value == obj.value
def __repr__(self):
return f"ObjDumpJson({self.key}, {self.value})"
def get_key(self):
return self.key
def get_digest(self):
"""
Returns the digest of the event
:return: hexa form of the sha256
"""
return hashlib.sha256(f"Concept:{self.key}{self.value}".encode("utf-8")).hexdigest()
def to_dict(self):
return self.__dict__
def from_dict(self, as_dict):
self.value = as_dict["value"]
self.key = as_dict["key"]
@pytest.fixture(autouse=True)
def init_test():
if path.exists(tests_root):
shutil.rmtree(tests_root)
if not path.exists(tests_root):
os.makedirs(tests_root)
current_pwd = os.getcwd()
os.chdir(tests_root)
yield None
os.chdir(current_pwd)
def test_i_can_init_the_data_provider():
sdp = SheerkaDataProvider(".sheerka")
assert sdp.root == path.abspath(path.join(tests_root, ".sheerka"))
assert path.exists(path.join(tests_root, ".sheerka"))
def test_i_can_save_and_load_an_event():
sdp = SheerkaDataProvider(".sheerka")
event = Event("hello world", date=date(year=2007, month=9, day=10), user="kodjo")
evt_digest = sdp.save_event(event)
evt = sdp.load_event(evt_digest)
assert path.exists(path.join(sdp.root, SheerkaDataProvider.EventFolder, evt_digest[0:24], evt_digest))
assert evt.version == 1
assert evt.date == datetime(year=2007, month=9, day=10)
assert evt.user == "kodjo"
assert evt.message == "hello world"
def test_i_can_add_an_string():
sdp = SheerkaDataProvider(".sheerka")
obj = "foo => bar"
entry, key = sdp.add(evt_digest, "entry", obj)
last_commit = sdp.get_snapshot()
state = sdp.load_state(last_commit)
loaded = sdp.get(entry, key)
assert entry == "entry"
assert key is None
assert loaded == obj
assert path.exists(path.join(sdp.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit))
assert path.exists(path.join(sdp.root, SheerkaDataProvider.HeadFile))
assert state.date is not None
assert state.parents == []
assert state.events == [evt_digest]
assert state.data == {"entry": "foo => bar"}
assert read_text_file(path.join(sdp.root, SheerkaDataProvider.HeadFile)) == last_commit
def test_i_can_add_several_strings_if_allow_multiple_is_true():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry", "foo")
sdp.add(evt_digest, "entry", "foo")
entry, key = sdp.add(evt_digest, "entry", "bar")
loaded = sdp.get(entry, key)
assert entry == "entry"
assert key is None
assert loaded == ["foo", "foo", "bar"]
def test_i_cannot_add_several_strings_if_allow_multiple_is_false():
sdp = SheerkaDataProvider(".sheerka")
with pytest.raises(IndexError) as index_error:
sdp.add(evt_digest, "entry", "foo", False)
sdp.add(evt_digest, "entry", "foo", False)
assert index_error.value.args[0] == "entry"
def test_i_can_add_an_object_with_no_key():
sdp = SheerkaDataProvider(".sheerka")
obj = ObjNoKey("a", "b")
entry, key = sdp.add(evt_digest, "entry", obj)
last_commit = sdp.get_snapshot()
state = sdp.load_state(last_commit)
loaded = sdp.get(entry, key)
assert entry == "entry"
assert key is None
assert loaded == obj
assert path.exists(path.join(sdp.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit))
assert path.exists(path.join(sdp.root, SheerkaDataProvider.HeadFile))
assert state.date is not None
assert state.parents == []
assert state.events == [evt_digest]
assert state.data == {"entry": ObjNoKey("a", "b")}
assert read_text_file(path.join(sdp.root, SheerkaDataProvider.HeadFile)) == last_commit
def test_i_can_add_several_obj_no_key_if_allow_multiple_is_true():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry", ObjNoKey("a", "b"))
sdp.add(evt_digest, "entry", ObjNoKey("a", "b"))
entry, key = sdp.add(evt_digest, "entry", ObjNoKey("c", "d"))
loaded = sdp.get(entry, key)
assert entry == "entry"
assert key is None
assert loaded == [ObjNoKey("a", "b"), ObjNoKey("a", "b"), ObjNoKey("c", "d")]
def test_i_cannot_add_several_obj_no_key_if_allow_multiple_is_false():
sdp = SheerkaDataProvider(".sheerka")
with pytest.raises(IndexError) as index_error:
sdp.add(evt_digest, "entry", ObjNoKey("a", "b"), False)
sdp.add(evt_digest, "entry", ObjNoKey("c", "d"), False)
assert index_error.value.args[0] == "entry"
def test_i_can_add_a_dict():
sdp = SheerkaDataProvider(".sheerka")
obj = {"my_key": "my_value"}
entry, key = sdp.add(evt_digest, "entry", obj)
last_commit = sdp.get_snapshot()
state = sdp.load_state(last_commit)
loaded = sdp.get(entry, key)
loaded_value = sdp.get(entry, "my_key") # we can retrieve by key
assert entry == "entry"
assert key is None # we return None as dict may contains several entries
assert loaded == obj
assert loaded_value == "my_value"
assert path.exists(path.join(sdp.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit))
assert path.exists(path.join(sdp.root, SheerkaDataProvider.HeadFile))
assert state.date is not None
assert state.parents == []
assert state.events == [evt_digest]
assert state.data == {"entry": obj}
assert read_text_file(path.join(sdp.root, SheerkaDataProvider.HeadFile)) == last_commit
def test_i_can_add_multiple_entries_at_once_with_dict():
sdp = SheerkaDataProvider(".sheerka")
entry, key = sdp.add(evt_digest, "entry", {"my_key1": "value1", "my_key2": "value2"})
loaded = sdp.get(entry, key)
loaded_value1 = sdp.get(entry, "my_key1")
loaded_value2 = sdp.get(entry, "my_key2")
assert loaded == {"my_key1": "value1", "my_key2": "value2"}
assert loaded_value1 == "value1"
assert loaded_value2 == "value2"
def test_i_can_add_same_key_with_dict_if_allow_multiple_is_true():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry", {"my_key": "my_value"})
entry, key = sdp.add(evt_digest, "entry", {"my_key": "my_value"})
loaded1 = sdp.get(entry, key)
entry, key = sdp.add(evt_digest, "entry", {"my_key": "my_value2"})
loaded2 = sdp.get(entry, key)
assert entry == "entry"
assert key is None
assert loaded1 == {"my_key": ["my_value", "my_value"]}
assert loaded2 == {"my_key": ["my_value", "my_value", "my_value2"]}
def test_i_cannot_add_same_key_with_dict_if_allow_multiple_is_false():
sdp = SheerkaDataProvider(".sheerka")
with pytest.raises(IndexError) as index_error:
sdp.add(evt_digest, "entry", {"my_key": "my_value"}, False)
sdp.add(evt_digest, "entry", {"my_key": "my_value2"}, False)
assert index_error.value.args[0] == "entry.my_key"
def test_i_can_add_object_with_different_key_if_allow_multiple_is_false():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry", {"my_key": "a"}, False)
sdp.add(evt_digest, "entry", {"my_key2": "b"}, False)
assert sdp.get("entry", "my_key") == "a"
assert sdp.get("entry", "my_key2") == "b"
def test_i_can_add_obj_with_key():
sdp = SheerkaDataProvider(".sheerka")
obj1 = ObjWithKey("key1", "b")
obj2 = ObjSetKey("c", key="key2")
entry1, key1 = sdp.add(evt_digest, "entry", obj1) # test when key is taken from obj.get_key()
entry2, key2 = sdp.add(evt_digest, "entry2", obj2) # test when key is taken from obj.key
last_commit = sdp.get_snapshot()
state = sdp.load_state(last_commit)
loaded1 = sdp.get(entry1, key1)
loaded2 = sdp.get(entry2, key2)
assert entry1 == "entry"
assert key1 == "key1"
assert loaded1 == ObjWithKey("key1", "b")
assert entry2 == "entry2"
assert key2 == "key2"
assert loaded2 == ObjSetKey("c", key="key2")
assert path.exists(path.join(sdp.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit))
assert path.exists(path.join(sdp.root, SheerkaDataProvider.HeadFile))
assert state.date is not None
assert len(state.parents) == 1
assert state.events == [evt_digest]
assert state.data == {"entry": {"key1": obj1}, "entry2": {"key2": obj2}}
assert read_text_file(path.join(sdp.root, SheerkaDataProvider.HeadFile)) == last_commit
def test_i_can_add_objects_with_same_key_if_allow_multiple_is_true():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry", ObjWithKey("my_key", "b"))
entry, key = sdp.add(evt_digest, "entry", ObjSetKey("c", key="my_key"))
loaded1 = sdp.get(entry, key)
entry, key = sdp.add(evt_digest, "entry", ObjSetKey("c", key="my_key"))
sdp.add(evt_digest, "entry", ObjSetKey("c", key="my_key2")) # to prove that it does not melt everything
loaded2 = sdp.get(entry, key)
assert entry == "entry"
assert key == "my_key"
assert loaded1 == [ObjWithKey("my_key", "b"), ObjSetKey("c", key="my_key")]
assert loaded2 == [ObjWithKey("my_key", "b"), ObjSetKey("c", key="my_key"), ObjSetKey("c", key="my_key")]
def test_i_cannot_add_object_with_same_key_if_allow_multiple_is_false():
sdp = SheerkaDataProvider(".sheerka")
with pytest.raises(IndexError) as index_error:
sdp.add(evt_digest, "entry", ObjWithKey("my_key", "b"), False)
sdp.add(evt_digest, "entry", ObjSetKey("c", key="my_key"), False)
assert index_error.value.args[0] == "entry.my_key"
def test_i_can_add_obj_with_key_to_a_list():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry", "foo")
sdp.add(evt_digest, "entry", "bar") # entry is now a list
sdp.add(evt_digest, "entry", ObjWithKey("a", "b")) # this entry must no be taken as a object with a key
loaded = sdp.get("entry")
assert loaded == ["foo", "bar", ObjWithKey("a", "b")]
def test_i_cannot_add_obj_with_no_key_when_then_entry_has_keys():
sdp = SheerkaDataProvider(".sheerka")
with pytest.raises(SheerkaDataProviderError) as error:
sdp.add(evt_digest, "entry", ObjWithKey("a", "b"))
sdp.add(evt_digest, "entry", "foo")
assert error.value.obj == "foo"
def test_i_can_add_string_using_auto_generated_key():
sdp = SheerkaDataProvider(".sheerka")
key_file = path.join(sdp.root, SheerkaDataProvider.KeysFile)
entry1, key1 = sdp.add_with_auto_key(evt_digest, "entry1", "foo")
entry2, key2 = sdp.add_with_auto_key(evt_digest, "entry1", "bar")
entry3, key3 = sdp.add_with_auto_key(evt_digest, "entry2", "baz")
state = sdp.load_state(sdp.get_snapshot())
assert path.exists(key_file)
assert read_json_file(key_file) == {"entry1": 2, "entry2": 1}
assert state.data == {"entry1": {"1": "foo", "2": "bar"}, "entry2": {"1": "baz"}}
assert entry1 == "entry1"
assert entry2 == "entry1"
assert entry3 == "entry2"
assert key1 == "1"
assert key2 == "2"
assert key3 == "1"
def test_i_can_get_and_set_key():
sdp = SheerkaDataProvider(".sheerka")
key_file = path.join(sdp.root, SheerkaDataProvider.KeysFile)
sdp.set_key("entry1", 1000)
sdp.get_next_key("entry1")
sdp.get_next_key("entry1")
sdp.get_next_key("entry1")
sdp.get_next_key("entry2")
sdp.get_next_key("entry2")
assert path.exists(key_file)
assert read_json_file(key_file) == {"entry1": 1003, "entry2": 2}
def test_i_can_add_object_using_auto_generated_key():
sdp = SheerkaDataProvider(".sheerka")
key_file = path.join(sdp.root, SheerkaDataProvider.KeysFile)
entry1, key1 = sdp.add_with_auto_key(evt_digest, "entry1", ObjNoKey("a", "b"))
entry2, key2 = sdp.add_with_auto_key(evt_digest, "entry1", ObjNoKey("a", "b"))
state = sdp.load_state(sdp.get_snapshot())
assert path.exists(key_file)
assert read_json_file(key_file) == {"entry1": 2}
assert state.data == {"entry1": {"1": ObjNoKey("a", "b"), "2": ObjNoKey("a", "b")}}
assert entry1 == "entry1"
assert entry2 == "entry1"
assert key1 == "1"
assert key2 == "2"
def test_object_key_is_updated_when_possible_using_auto_generated_key():
sdp = SheerkaDataProvider(".sheerka")
key_file = path.join(sdp.root, SheerkaDataProvider.KeysFile)
entry1, key1 = sdp.add_with_auto_key(evt_digest, "entry1", ObjSetKey("foo"))
entry2, key2 = sdp.add_with_auto_key(evt_digest, "entry1", ObjSetKey("foo"))
state = sdp.load_state(sdp.get_snapshot())
assert path.exists(key_file)
assert read_json_file(key_file) == {"entry1": 2}
assert state.data == {"entry1": {"1": ObjSetKey("foo", "1"), "2": ObjSetKey("foo", "2")}}
assert entry1 == "entry1"
assert entry2 == "entry1"
assert key1 == "1"
assert key2 == "2"
def test_i_can_set_objects_with_key():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry", ObjWithKey(1, "foo"))
entry, key = sdp.set(evt_digest, "entry", ObjWithKey(2, "foo"))
state = sdp.load_state(sdp.get_snapshot())
assert state.data == {"entry": {"2": ObjWithKey(2, "foo")}}
assert entry == "entry"
assert key == "2"
def test_i_can_set_objects_with_no_key():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry", ObjNoKey(1, "foo"))
entry, key = sdp.set(evt_digest, "entry", ObjNoKey(2, "foo"))
state = sdp.load_state(sdp.get_snapshot())
assert state.data == {"entry": ObjNoKey(2, "foo")}
assert entry == "entry"
assert key is None
def test_i_can_set_from_list_to_dict():
sdp = SheerkaDataProvider(".sheerka")
sdp.set(evt_digest, "entry", [ObjNoKey(1, "foo"), ObjNoKey(2, "foo")])
entry, key = sdp.set(evt_digest, "entry", {"1": ObjNoKey(1, "foo"), "2": ObjNoKey(2, "foo")})
state = sdp.load_state(sdp.get_snapshot())
assert state.data == {"entry": {"1": ObjNoKey(1, "foo"), "2": ObjNoKey(2, "foo")}}
assert entry == "entry"
assert key is None
def test_i_can_set_using_reference():
sdp = SheerkaDataProvider(".sheerka")
sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithKey)))
sdp.add(evt_digest, "entry", ObjWithKey(1, "foo"))
entry, key = sdp.set(evt_digest, "entry", ObjWithKey(2, "foo"), use_ref=True)
state = sdp.load_state(sdp.get_snapshot())
assert state.data == {"entry": {"2": '##REF##:9b14e03847d73c640f54ea9b46ba62b19e5451ecd300428a225be012ad9f25f9'}}
assert entry == "entry"
assert key == "2"
assert path.exists(sdp.get_obj_path(SheerkaDataProvider.ObjectsFolder,
"9b14e03847d73c640f54ea9b46ba62b19e5451ecd300428a225be012ad9f25f9"))
# sanity check, make sure that I can load back
loaded = sdp.get(entry, key)
assert loaded == ObjWithKey(2, "foo")
def test_i_can_add_unique():
sdp = SheerkaDataProvider(".sheerka")
sdp.add_unique(evt_digest, "entry", ObjNoKey(1, "foo"))
sdp.add_unique(evt_digest, "entry", ObjNoKey(1, "foo"))
sdp.add_unique(evt_digest, "entry", ObjNoKey(2, "bar"))
entry, key = sdp.add_unique(evt_digest, "entry", ObjNoKey(2, "bar"))
state = sdp.load_state(sdp.get_snapshot())
assert state.data == {"entry": {ObjNoKey(1, "foo"), ObjNoKey(2, "bar")}}
assert entry == "entry"
assert key is None
def test_i_can_add_reference_of_an_object_with_a_key():
sdp = SheerkaDataProvider(".sheerka")
obj = ObjDumpJson("my_key", "value1")
obj_serializer = ObjectSerializer(BaseSerializer.get_full_qualified_name(obj))
sdp.serializer.register(obj_serializer)
entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True)
state = sdp.load_state(sdp.get_snapshot())
digest = state.data["entry"]["my_key"][len(SheerkaDataProvider.REF_PREFIX):]
assert key == obj.key
assert entry == "entry"
assert state.data == {'entry': {'my_key': f"{SheerkaDataProvider.REF_PREFIX}{digest}"}}
assert sdp.load_obj(digest) == obj
def test_i_can_keep_state_history():
sdp = SheerkaDataProvider(".sheerka")
event1 = Event("cmd add 'foo => bar'")
event_digest1 = sdp.save_event(event1)
obj1 = "foo => bar"
sdp.add(event_digest1, "entry1", obj1)
state_digest1 = sdp.get_snapshot()
event2 = Event("cmd add 'foo => baz'")
event_digest2 = sdp.save_event(event2)
obj2 = "foo => baz"
sdp.add(event_digest2, "entry2", obj2)
state_digest2 = sdp.get_snapshot()
state2 = sdp.load_state(state_digest2)
assert path.exists(path.join(sdp.root, SheerkaDataProvider.EventFolder, event_digest1[0:24], event_digest1))
assert path.exists(path.join(sdp.root, SheerkaDataProvider.StateFolder, state_digest1[0:24], state_digest1))
assert path.exists(path.join(sdp.root, SheerkaDataProvider.EventFolder, event_digest2[0:24], event_digest2))
assert path.exists(path.join(sdp.root, SheerkaDataProvider.StateFolder, state_digest2[0:24], state_digest2))
assert state2.date is not None
assert state2.parents == [state_digest1]
assert state2.events == [event_digest2]
assert state2.data == {"entry1": "foo => bar", "entry2": "foo => baz"}
def test_i_can_list_elements_when_there_is_nothing_to_list():
sdp = SheerkaDataProvider(".sheerka")
result = sdp.list("entry")
assert list(result) == []
def test_i_can_list_when_no_key():
sdp = SheerkaDataProvider(".sheerka")
sdp.serializer.register(PickleSerializer(lambda obj: isinstance(obj, str)))
sdp.add(evt_digest, "entry1", "foo")
sdp.add(evt_digest, "entry1", "bar")
sdp.add(evt_digest, "entry1", "baz", use_ref=True)
sdp.add(evt_digest, "entry2", "xyz")
result = sdp.list("entry1")
assert list(result) == ["foo", "bar", "baz"]
def test_i_can_list_when_key():
sdp = SheerkaDataProvider(".sheerka")
sdp.serializer.register(PickleSerializer(lambda obj: isinstance(obj, ObjWithKey)))
sdp.add(evt_digest, "entry1", {"1": "foo"})
sdp.add(evt_digest, "entry1", {"2": "bar"})
sdp.add(evt_digest, "entry1", ObjWithKey("3", "value"), use_ref=True)
sdp.add(evt_digest, "entry2", {"4": "xxx"})
result = sdp.list("entry1")
assert list(result) == ["foo", "bar", ObjWithKey("3", "value")]
def test_i_can_list_when_one_element():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry1", "foo")
sdp.add(evt_digest, "entry2", "baz")
result = sdp.list("entry1")
assert list(result) == ["foo"]
def test_i_can_filter_on_key_for_dict():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry1", {"1": "foo"})
sdp.add(evt_digest, "entry1", {"2": "bar"})
result = sdp.list("entry1", lambda k, o: k == "1")
assert list(result) == ["foo"]
def test_i_can_filter_on_key_for_objects():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry1", ObjWithKey("a1", "b1"))
sdp.add(evt_digest, "entry1", ObjWithKey("a2", "b2"))
result = sdp.list("entry1", lambda k, o: k == "a1")
assert list(result) == [ObjWithKey("a1", "b1")]
def test_i_can_filter_on_attribute_for_dict():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry1", {"1": {"a": "a1", "b": "b1"}})
sdp.add(evt_digest, "entry1", {"2": {"a": "a2", "b": "b2"}})
result = sdp.list("entry1", lambda k, o: o["a"] == "a2")
assert list(result) == [{"a": "a2", "b": "b2"}]
def test_i_can_filter_on_attribute_for_object():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry1", ObjWithKey("a1", "b1"))
sdp.add(evt_digest, "entry1", ObjWithKey("a2", "b2"))
result = sdp.list("entry1", lambda k, o: o.b == "b2")
assert list(result) == [ObjWithKey("a2", "b2")]
def test_i_can_filter_a_list():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry1", "foo")
sdp.add(evt_digest, "entry1", "bar")
result = sdp.list("entry1", lambda o: o == "bar")
assert list(result) == ["bar"]
def test_i_can_filter_a_list_of_object():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry1", ObjNoKey("a1", "b1"))
sdp.add(evt_digest, "entry1", ObjNoKey("a2", "b2"))
result = sdp.list("entry1", lambda o: o.b == "b1")
assert list(result) == [ObjNoKey("a1", "b1")]
def test_i_can_remove_all_elements():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry1", "foo")
sdp.add(evt_digest, "entry1", "bar")
state_digest = sdp.remove(evt_digest, "entry1")
result = sdp.list("entry1")
assert read_text_file(path.join(sdp.root, SheerkaDataProvider.HeadFile)) == state_digest
assert list(result) == []
def test_i_can_remove_a_element():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry1", "foo")
sdp.add(evt_digest, "entry1", "bar")
sdp.remove(evt_digest, "entry1", lambda o: o == "foo")
result = sdp.list("entry1")
assert list(result) == ["bar"]
def test_i_can_remove_dict_by_key():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry1", {"1": ObjNoKey("a1", "b1")})
sdp.add(evt_digest, "entry1", {"2": ObjNoKey("a2", "b2")})
sdp.remove(evt_digest, "entry1", lambda k, o: k == "2")
result = sdp.list("entry1")
assert list(result) == [ObjNoKey("a1", "b1")]
def test_i_can_remove_when_only_one_element():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry1", "foo")
sdp.remove(evt_digest, "entry1", lambda o: o == "foo")
result = sdp.list("entry1")
assert list(result) == []
def test_i_cannot_remove_if_entry_does_not_exist():
sdp = SheerkaDataProvider(".sheerka")
with pytest.raises(IndexError) as e:
sdp.remove(evt_digest, "entry")
assert str(e) == "entry"
def test_i_cannot_modify_an_entry_without_a_key():
sdp = SheerkaDataProvider(".sheerka")
with pytest.raises(SheerkaDataProviderError) as error:
sdp.modify(evt_digest, "entry", None, "baz")
assert error.value.args[0] == "Key is mandatory."
def test_i_can_modify_dict_with_a_key():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry", {"key1": "foo"})
sdp.add(evt_digest, "entry", {"key2": "bar"})
entry, key = sdp.modify(evt_digest, "entry", "key1", "baz")
state = sdp.load_state(sdp.get_snapshot())
assert state.data == {"entry": {"key1": "baz", "key2": "bar"}}
assert entry == "entry"
assert key == "key1"
def test_i_can_modify_an_object_with_a_key():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry", ObjWithKey("key1", "foo"))
sdp.add(evt_digest, "entry", ObjWithKey("key2", "bar"))
entry, key = sdp.modify(evt_digest, "entry", "key1", ObjWithKey("key1", "baz"))
state = sdp.load_state(sdp.get_snapshot())
assert state.data == {"entry": {"key1": ObjWithKey("key1", "baz"), "key2": ObjWithKey("key2", "bar")}}
assert entry == "entry"
assert key == "key1"
def test_i_can_modify_an_object_while_changing_the_key():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry", ObjWithKey("key1", "foo"))
sdp.add(evt_digest, "entry", ObjWithKey("key2", "bar"))
entry, key = sdp.modify(evt_digest, "entry", "key1", ObjWithKey("key3", "baz"))
state = sdp.load_state(sdp.get_snapshot())
assert state.data == {"entry": {"key2": ObjWithKey("key2", "bar"), "key3": ObjWithKey("key3", "baz")}}
assert entry == "entry"
assert key == "key3"
def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_key():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry", ObjWithKey("key1", "foo"))
sdp.add(evt_digest, "entry", ObjWithKey("key2", "bar"))
entry, key = sdp.modify(evt_digest, "entry", "key2", ObjWithKey("key1", "bar"))
state = sdp.load_state(sdp.get_snapshot())
assert state.data == {"entry": {"key1": [ObjWithKey("key1", "foo"), ObjWithKey("key1", "bar")]}}
assert entry == "entry"
assert key == "key1"
def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_when_list():
"""
In this example, the item to modify is within a list, and its key has changed
and in the new key, there is already a list
:return:
"""
sdp = SheerkaDataProvider(".sheerka")
sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson)))
sdp.add(evt_digest, "entry", ObjDumpJson("key1", "value11"))
sdp.add(evt_digest, "entry", ObjDumpJson("key1", "value12"))
sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value21"))
sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value22"))
new_value = ObjDumpJson("key1", "value13")
setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key2", "value21").get_digest())
entry, key = sdp.modify(evt_digest, "entry", "key2", new_value)
state = sdp.load_state(sdp.get_snapshot())
assert state.data == {"entry": {
"key1": [ObjDumpJson("key1", "value11"), ObjDumpJson("key1", "value12"), ObjDumpJson("key1", "value13")],
"key2": [ObjDumpJson("key2", "value22")]
}}
assert entry == "entry"
assert key == "key1"
def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_when_nothing():
"""
In this example, the item to modify is within a list, and its key has changed
and in the new key, there is nothing (the new key does not exist)
:return:
"""
sdp = SheerkaDataProvider(".sheerka")
sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson)))
sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value21"))
sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value22"))
new_value = ObjDumpJson("key1", "value13")
setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key2", "value21").get_digest())
entry, key = sdp.modify(evt_digest, "entry", "key2", new_value)
state = sdp.load_state(sdp.get_snapshot())
assert state.data == {"entry": {
"key1": ObjDumpJson("key1", "value13"),
"key2": [ObjDumpJson("key2", "value22")]
}}
assert entry == "entry"
assert key == "key1"
def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_when_one_item():
"""
In this example, the item to modify is within a list, and its key has changed
and in the new key, there is only one element
:return:
"""
sdp = SheerkaDataProvider(".sheerka")
sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson)))
sdp.add(evt_digest, "entry", ObjDumpJson("key1", "value11"))
sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value21"))
sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value22"))
new_value = ObjDumpJson("key1", "value13")
setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key2", "value21").get_digest())
entry, key = sdp.modify(evt_digest, "entry", "key2", new_value)
state = sdp.load_state(sdp.get_snapshot())
assert state.data == {"entry": {
"key1": [ObjDumpJson("key1", "value11"), ObjDumpJson("key1", "value13")],
"key2": [ObjDumpJson("key2", "value22")]
}}
assert entry == "entry"
assert key == "key1"
def test_i_can_modify_a_ref():
sdp = SheerkaDataProvider(".sheerka")
sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithKey)))
sdp.add(evt_digest, "entry", ObjWithKey("key1", "foo"))
entry, key = sdp.add(evt_digest, "entry", ObjWithKey("key2", "bar"), use_ref=True)
sdp.modify(evt_digest, "entry", "key2", ObjWithKey("key2", "baz"))
state = sdp.load_state(sdp.get_snapshot())
assert state.data == {"entry": {
"key1": ObjWithKey("key1", "foo"),
"key2": "##REF##:eb297e98710dd17244bb0e38eb9f1bf72cba692a8f8d94e9eb2d898e130cac8b"}}
assert entry == "entry"
assert key == "key2"
def test_i_cannot_modify_an_entry_that_does_not_exist():
sdp = SheerkaDataProvider(".sheerka")
with pytest.raises(IndexError) as e:
sdp.modify(evt_digest, "entry", "key", "foo")
assert str(e.value) == "entry"
def test_i_cannot_modify_a_key_that_does_not_exist():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry1", {"1": "foo"})
with pytest.raises(IndexError) as e:
sdp.modify(evt_digest, "entry1", "2", "bar")
assert str(e) == "entry1.2"
def test_i_cannot_modify_a_list_when_origin_is_unknown():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry", ObjWithKey("key", "value1"))
sdp.add(evt_digest, "entry", ObjWithKey("key", "value2")) # same they
state = sdp.load_state(sdp.get_snapshot())
with pytest.raises(SheerkaDataProviderError) as error:
sdp.modify(evt_digest, "entry", "key", ObjWithKey("key", "value2"))
assert error.value.obj == ObjWithKey("key", "value2")
assert error.value.args[0] == "Multiple entries under 'entry.key'"
def test_i_can_modify_a_list_when_the_origin_is_known():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry", ObjDumpJson("key", "value1"))
sdp.add(evt_digest, "entry", ObjDumpJson("key", "value2")) # same they
new_value = ObjDumpJson("key", "value3")
setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key", "value1").get_digest())
sdp.modify(evt_digest, "entry", "key", new_value)
state = sdp.load_state(sdp.get_snapshot())
assert state.data == {"entry": {"key": [ObjDumpJson("key", "value3"), ObjDumpJson("key", "value2")]}}
def test_i_can_modify_a_list_when_the_origin_is_known_2():
"""
This time, we check that the origin is automatically set when the object was saved as a reference
We also check that all objects are still persisted as reference
:return:
"""
sdp = SheerkaDataProvider(".sheerka")
sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson)))
sdp.add(evt_digest, "entry", ObjDumpJson("key", "value1"), use_ref=True)
sdp.add(evt_digest, "entry", ObjDumpJson("key", "value2"), use_ref=True) # same they
objs = sdp.get("entry", "key") # origin is automatically set to the loaded objects
objs[0].value = "value3"
sdp.modify(evt_digest, "entry", "key", objs[0])
state = sdp.load_state(sdp.get_snapshot())
assert state.data == {"entry": {"key": [
"##REF##:621771a3af6a331e9abb3a63fb25e0cac4b13df0b292dfa30db6bd89031bfad0",
"##REF##:5fe085e8366d35c5f04a18b2d3dada376128b246e07c66de5872830b00f5f517"]}}
# checks that all objects are (still) persisted
path.exists(sdp.get_obj_path(sdp.ObjectsFolder, "621771a3af6a331e9abb3a63fb25e0cac4b13df0b292dfa30db6bd89031bfad0"))
path.exists(sdp.get_obj_path(sdp.ObjectsFolder, "5fe085e8366d35c5f04a18b2d3dada376128b246e07c66de5872830b00f5f517"))
path.exists(sdp.get_obj_path(sdp.ObjectsFolder, "1aac9e0d5c74c3bb989fd0f9def792bba36c5595d32f61be7cbb1a38dcf75327"))
def test_i_can_get_the_entire_entry():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry1", "foo")
sdp.add(evt_digest, "entry1", "bar")
result = sdp.get("entry1")
result_safe = sdp.get_safe("entry1")
assert result == ["foo", "bar"]
assert result_safe == ["foo", "bar"]
def test_i_can_get_an_entry_with_on_object():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry1", "foo")
result = sdp.get("entry1")
result_safe = sdp.get_safe("entry1")
assert result == "foo"
assert result_safe == "foo"
def test_i_can_get_an_entry_by_key():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry1", {"1": "foo"})
sdp.add(evt_digest, "entry1", {"2": "bar"})
result = sdp.get("entry1", "2")
result_safe = sdp.get_safe("entry1", "2")
assert result == "bar"
assert result_safe == "bar"
def test_i_can_get_object_save_by_reference():
sdp = SheerkaDataProvider(".sheerka")
obj = ObjDumpJson("my_key", "value1")
sdp.serializer.register(ObjectSerializer(BaseSerializer.get_full_qualified_name(obj)))
entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True)
loaded = sdp.get(entry, key)
assert loaded == obj
def test_i_can_get_objects_from_list_when_saved_by_reference():
sdp = SheerkaDataProvider(".sheerka")
sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson)))
sdp.add(evt_digest, "entry", ObjDumpJson("key", "value1"), use_ref=True)
sdp.add(evt_digest, "entry", ObjDumpJson("key", "value2"), use_ref=True) # same they
objs = sdp.get("entry", "key")
assert objs[0] == ObjDumpJson("key", "value1")
assert objs[1] == ObjDumpJson("key", "value2")
def test_i_cannot_get_an_entry_that_does_not_exist():
sdp = SheerkaDataProvider(".sheerka")
assert sdp.get_safe("entry") is None
with pytest.raises(IndexError) as e:
sdp.get("entry")
assert str(e) == "entry"
def test_i_cannot_get_a_key_that_does_not_exist():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(evt_digest, "entry1", {"1": "foo"})
assert sdp.get_safe("entry1", "2") is None
with pytest.raises(IndexError) as e:
sdp.get("entry1", "2")
assert str(e) == "entry.1"
def test_i_can_save_and_retrieve_cache():
sdp = SheerkaDataProvider(".sheerka")
txt = "foo bar baz foo bar baz foo bar baz"
key = "key_to_use"
category = "cache_category"
assert not sdp.in_cache(category, key)
digest = sdp.add_to_cache(category, key, txt)
assert path.exists(path.join(sdp.root, SheerkaDataProvider.CacheFolder, digest[0:24], digest))
assert sdp.in_cache(category, key)
from_cache = sdp.load_from_cache(category, key)
assert from_cache == txt
def test_cache_is_not_updated_by_default():
sdp = SheerkaDataProvider(".sheerka")
txt = "foo bar baz foo bar baz foo bar baz"
txt2 = "foo foo foo foo foo foo foo foo foo"
key = "key_to_use"
category = "cache_category"
sdp.add_to_cache(category, key, txt)
sdp.add_to_cache(category, key, txt2)
from_cache = sdp.load_from_cache(category, key)
assert from_cache == txt
def test_i_can_update_cache():
sdp = SheerkaDataProvider(".sheerka")
txt = "foo bar baz foo bar baz foo bar baz"
txt2 = "foo foo foo foo foo foo foo foo foo"
key = "key_to_use"
category = "cache_category"
sdp.add_to_cache(category, key, txt)
sdp.add_to_cache(category, key, txt2, update=True)
from_cache = sdp.load_from_cache(category, key)
assert from_cache == txt2
def test_i_can_remove_from_cache():
sdp = SheerkaDataProvider(".sheerka")
txt = "foo bar baz foo bar baz foo bar baz"
key = "key_to_use"
category = "cache_category"
sdp.add_to_cache(category, key, txt)
digest = sdp.remove_from_cache(category, key)
assert not path.exists(path.join(sdp.root, SheerkaDataProvider.CacheFolder, digest[0:24], digest))
assert not sdp.in_cache(category, key)
def test_i_can_test_than_an_entry_exits():
sdp = SheerkaDataProvider(".sheerka")
assert not sdp.exists("entry")
sdp.add(evt_digest, "entry", "value")
assert sdp.exists("entry")
def test_i_can_save_and_load_object_ref_with_history():
sdp = SheerkaDataProvider(".sheerka")
obj = ObjDumpJson("my_key", "value1")
sdp.serializer.register(ObjectSerializer(BaseSerializer.get_full_qualified_name(obj)))
entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True)
loaded = sdp.get(entry, key)
history = getattr(loaded, Serializer.HISTORY)
assert key == obj.key
assert entry == "entry"
assert loaded.key == obj.key
assert loaded.value == obj.value
assert history[Serializer.USERNAME] == "kodjo"
assert history[Serializer.MODIFICATION_DATE] != ""
assert history[Serializer.PARENTS] == []
assert os.path.exists(sdp.get_obj_path(sdp.ObjectsFolder, obj.get_digest()))
# save a second type with no modification
previous_modification_time = history[Serializer.MODIFICATION_DATE]
previous_parents = history[Serializer.PARENTS]
sdp.modify(evt_digest, "entry", key, loaded)
loaded = sdp.get(entry, key)
history = getattr(loaded, Serializer.HISTORY)
assert history[Serializer.MODIFICATION_DATE] == previous_modification_time
assert history[Serializer.PARENTS] == previous_parents
# save again, but with a modification
previous_digest = loaded.get_digest()
loaded.value = "value2"
sdp.modify(evt_digest, "entry", key, loaded)
loaded2 = sdp.get(entry, key)
history2 = getattr(loaded2, Serializer.HISTORY)
assert loaded2.key == loaded.key
assert loaded2.value == loaded.value
assert history2[Serializer.USERNAME] == "kodjo"
assert history2[Serializer.MODIFICATION_DATE] != ""
assert history2[Serializer.PARENTS] == [previous_digest]
state = sdp.load_state(sdp.get_snapshot())
assert state.data == {"entry": {
"my_key": '##REF##:e6bf5b56428cfce0f08c94f2c3625dc3b3a8180d7229eaa9f8aa967fb16e5256'}}