Added ModifyConcept function, and fixed 'isa' not working

This commit is contained in:
2020-02-20 11:30:53 +01:00
parent 87f232b527
commit 7cd94e888f
17 changed files with 750 additions and 228 deletions
+416 -169
View File
@@ -4,7 +4,7 @@ import pytest
import os
from os import path
from sdp.sheerkaDataProvider import SheerkaDataProvider, Event, SheerkaDataProviderError, \
SheerkaDataProviderDuplicateKeyError
SheerkaDataProviderDuplicateKeyError, SheerkaDataProviderResult, SheerkaDataProviderRef
from datetime import date, datetime
import shutil
import json
@@ -123,6 +123,36 @@ class ObjDumpJson:
self.key = as_dict["key"]
class ObjDumpJsonNoDigest:
"""
Object where the key can be resolved using get_key()
that can be used to dump as Json,
But with no builtin digest computation
"""
def __init__(self, key=None, value=None):
self.key = key
self.value = value
def __eq__(self, obj):
return isinstance(obj, ObjDumpJsonNoDigest) and \
self.key == obj.key and \
self.value == obj.value
def __repr__(self):
return f"ObjDumpJsonNoDigest({self.key}, {self.value})"
def get_key(self):
return self.key
def to_dict(self):
return self.__dict__
def from_dict(self, as_dict):
self.value = as_dict["value"]
self.key = as_dict["key"]
class ObjWithDigestNoKey:
"""
Object that can compute its digest.
@@ -301,13 +331,15 @@ def test_i_can_add_an_string(root):
sdp = SheerkaDataProvider(root)
obj = "foo => bar"
entry, key = sdp.add(evt_digest, "entry", obj)
result = sdp.add(evt_digest, "entry", obj)
last_commit = sdp.get_snapshot(SheerkaDataProvider.HeadFile)
state = sdp.load_state(last_commit)
loaded = sdp.get(entry, key)
loaded = sdp.get(result.entry, result.key)
assert entry == "entry"
assert key is None
assert result.obj == obj
assert result.entry == "entry"
assert result.key is None
assert result.digest is None
assert loaded == obj
assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit))
@@ -330,11 +362,13 @@ def test_i_can_add_several_strings_if_allow_multiple_is_true(root):
sdp.add(evt_digest, "entry", "foo")
sdp.add(evt_digest, "entry", "foo")
entry, key = sdp.add(evt_digest, "entry", "bar")
loaded = sdp.get(entry, key)
result = sdp.add(evt_digest, "entry", "bar")
loaded = sdp.get(result.entry, result.key)
assert entry == "entry"
assert key is None
assert result.obj == "bar"
assert result.entry == "entry"
assert result.key is None
assert result.digest is None
assert loaded == ["foo", "foo", "bar"]
@@ -359,14 +393,15 @@ def test_i_can_add_an_object_with_no_key(root):
sdp = SheerkaDataProvider(root)
obj = ObjNoKey("a", "b")
entry, key = sdp.add(evt_digest, "entry", obj)
result = sdp.add(evt_digest, "entry", obj)
last_commit = sdp.get_snapshot(SheerkaDataProvider.HeadFile)
state = sdp.load_state(last_commit)
loaded = sdp.get(entry, key)
loaded = sdp.get(result.entry, result.key)
assert entry == "entry"
assert key is None
assert loaded == obj
assert result.obj == obj
assert result.entry == "entry"
assert result.key is None
assert result.digest is None
assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit))
assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.HeadFile))
@@ -388,11 +423,13 @@ def test_i_can_add_several_obj_no_key_if_allow_multiple_is_true(root):
sdp.add(evt_digest, "entry", ObjNoKey("a", "b"))
sdp.add(evt_digest, "entry", ObjNoKey("a", "b"))
entry, key = sdp.add(evt_digest, "entry", ObjNoKey("c", "d"))
loaded = sdp.get(entry, key)
result = sdp.add(evt_digest, "entry", ObjNoKey("c", "d"))
loaded = sdp.get(result.entry, result.key)
assert entry == "entry"
assert key is None
assert result.obj == ObjNoKey("c", "d")
assert result.entry == "entry"
assert result.key is None
assert result.digest is None
assert loaded == [ObjNoKey("a", "b"), ObjNoKey("a", "b"), ObjNoKey("c", "d")]
@@ -429,15 +466,18 @@ def test_i_can_add_a_dict(root):
sdp = SheerkaDataProvider(root)
obj = {"my_key": "my_value"}
entry, key = sdp.add(evt_digest, "entry", obj)
result = sdp.add(evt_digest, "entry", obj)
last_commit = sdp.get_snapshot(SheerkaDataProvider.HeadFile)
state = sdp.load_state(last_commit)
loaded = sdp.get(entry, key)
loaded = sdp.get(result.entry, result.key)
loaded_value = sdp.get(entry, "my_key") # we can retrieve by key
loaded_value = sdp.get(result.entry, "my_key") # we can retrieve by key
assert result.obj == obj
assert result.entry == "entry"
assert result.key is None # we return None as dict may contains several entries
assert result.digest is None
assert entry == "entry"
assert key is None # we return None as dict may contains several entries
assert loaded == obj
assert loaded_value == "my_value"
@@ -458,11 +498,17 @@ def test_i_can_add_a_dict(root):
])
def test_i_can_add_multiple_entries_at_once_with_dict(root):
sdp = SheerkaDataProvider(root)
obj = {"my_key1": "value1", "my_key2": "value2"}
entry, key = sdp.add(evt_digest, "entry", {"my_key1": "value1", "my_key2": "value2"})
loaded = sdp.get(entry, key)
loaded_value1 = sdp.get(entry, "my_key1")
loaded_value2 = sdp.get(entry, "my_key2")
result = sdp.add(evt_digest, "entry", obj)
loaded = sdp.get(result.entry, result.key)
loaded_value1 = sdp.get(result.entry, "my_key1")
loaded_value2 = sdp.get(result.entry, "my_key2")
assert result.obj == obj
assert result.entry == "entry"
assert result.key is None # we return None as dict may contains several entries
assert result.digest is None
assert loaded == {"my_key1": "value1", "my_key2": "value2"}
assert loaded_value1 == "value1"
@@ -477,14 +523,14 @@ def test_i_can_add_same_key_with_dict_if_allow_multiple_is_true(root):
sdp = SheerkaDataProvider(root)
sdp.add(evt_digest, "entry", {"my_key": "my_value"})
entry, key = sdp.add(evt_digest, "entry", {"my_key": "my_value"})
loaded1 = sdp.get(entry, key)
result = sdp.add(evt_digest, "entry", {"my_key": "my_value"})
loaded1 = sdp.get(result.entry, result.key)
entry, key = sdp.add(evt_digest, "entry", {"my_key": "my_value2"})
loaded2 = sdp.get(entry, key)
result = sdp.add(evt_digest, "entry", {"my_key": "my_value2"})
loaded2 = sdp.get(result.entry, result.key)
assert entry == "entry"
assert key is None
assert result.entry == "entry"
assert result.key is None
assert loaded1 == {"my_key": ["my_value", "my_value"]}
assert loaded2 == {"my_key": ["my_value", "my_value", "my_value2"]}
@@ -525,19 +571,25 @@ def test_i_can_add_obj_with_key(root):
obj1 = ObjWithKey("key1", "b")
obj2 = ObjSetKey("c", key="key2")
entry1, key1 = sdp.add(evt_digest, "entry", obj1) # test when key is taken from obj.get_key()
entry2, key2 = sdp.add(evt_digest, "entry2", obj2) # test when key is taken from obj.key
result1 = sdp.add(evt_digest, "entry", obj1) # test when key is taken from obj.get_key()
result2 = sdp.add(evt_digest, "entry2", obj2) # test when key is taken from obj.key
last_commit = sdp.get_snapshot(SheerkaDataProvider.HeadFile)
state = sdp.load_state(last_commit)
loaded1 = sdp.get(entry1, key1)
loaded2 = sdp.get(entry2, key2)
loaded1 = sdp.get(result1.entry, result1.key)
loaded2 = sdp.get(result2.entry, result2.key)
assert result1.obj == obj1
assert result1.entry == "entry"
assert result1.key == "key1"
assert result1.digest is None
assert result2.obj == obj2
assert result2.entry == "entry2"
assert result2.key == "key2"
assert result2.digest is None
assert entry1 == "entry"
assert key1 == "key1"
assert loaded1 == ObjWithKey("key1", "b")
assert entry2 == "entry2"
assert key2 == "key2"
assert loaded2 == ObjSetKey("c", key="key2")
assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit))
@@ -559,15 +611,13 @@ def test_i_can_add_objects_with_same_key_if_allow_multiple_is_true(root):
sdp = SheerkaDataProvider(root)
sdp.add(evt_digest, "entry", ObjWithKey("my_key", "b"))
entry, key = sdp.add(evt_digest, "entry", ObjSetKey("c", key="my_key"))
loaded1 = sdp.get(entry, key)
result = sdp.add(evt_digest, "entry", ObjSetKey("c", key="my_key"))
loaded1 = sdp.get(result.entry, result.key)
entry, key = sdp.add(evt_digest, "entry", ObjSetKey("c", key="my_key"))
result = sdp.add(evt_digest, "entry", ObjSetKey("c", key="my_key"))
sdp.add(evt_digest, "entry", ObjSetKey("c", key="my_key2")) # to prove that it does not melt everything
loaded2 = sdp.get(entry, key)
loaded2 = sdp.get(result.entry, result.key)
assert entry == "entry"
assert key == "my_key"
assert loaded1 == [ObjWithKey("my_key", "b"), ObjSetKey("c", key="my_key")]
assert loaded2 == [ObjWithKey("my_key", "b"), ObjSetKey("c", key="my_key"), ObjSetKey("c", key="my_key")]
@@ -608,13 +658,23 @@ def test_i_can_add_a_reference(root):
sdp = SheerkaDataProvider(root)
sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithDigestWithKey)))
obj1 = ObjWithDigestWithKey(1, "foo")
sdp.add(evt_digest, "entry", obj1, use_ref=True)
sdp.add(evt_digest, "entry_by_value", {obj1.b: obj1.get_digest()}, is_ref=True)
result1 = sdp.add(evt_digest, "entry", obj1, use_ref=True)
result3 = sdp.add(evt_digest, "entry_by_ref", SheerkaDataProviderRef(obj1.b, obj1.get_digest()))
# another object
obj2 = ObjWithDigestWithKey(2, "bar")
sdp.add(evt_digest, "entry", obj2, use_ref=True)
sdp.add(evt_digest, "entry_by_value", {obj2.b: obj2.get_digest()}, is_ref=True)
sdp.add(evt_digest, "entry_by_ref", SheerkaDataProviderRef(obj2.b, obj2.get_digest()))
assert result1.obj == obj1
assert result1.entry == "entry"
assert result1.key == str(obj1.get_key())
assert result1.digest == obj1.get_digest()
assert result3.obj == SheerkaDataProviderRef(obj1.b, obj1.get_digest())
assert result3.entry == "entry_by_ref"
assert result3.key == "foo"
assert result3.digest is None
state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile))
assert state.data == {
@@ -622,22 +682,47 @@ def test_i_can_add_a_reference(root):
"1": '##REF##:' + obj1.get_digest(),
"2": '##REF##:' + obj2.get_digest(),
},
"entry_by_value": {
"foo": '##REF##:' + obj1.get_digest(),
"bar": '##REF##:' + obj2.get_digest()
"entry_by_ref": {
"foo": SheerkaDataProviderRef(obj1.b, obj1.get_digest()),
"bar": SheerkaDataProviderRef(obj2.b, obj2.get_digest())
},
}
# sanity check, make sure that I can load back
loaded1 = sdp.get("entry_by_value", "foo")
# make sure that I can load back
loaded1 = sdp.get("entry_by_ref", "foo")
assert loaded1 == ObjWithDigestWithKey(1, "foo")
assert getattr(loaded1, Serializer.ORIGIN) == obj1.get_digest()
loaded2 = sdp.get("entry_by_value", "bar")
loaded2 = sdp.get("entry_by_ref", "bar")
assert loaded2 == ObjWithDigestWithKey(2, "bar")
assert getattr(loaded2, Serializer.ORIGIN) == obj2.get_digest()
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_can_have_multiple_is_ref_to_the_same_key(root):
sdp = SheerkaDataProvider(root)
sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithDigestWithKey)))
ref_result1 = sdp.add(evt_digest, "entry", ObjWithDigestWithKey(1, "foo"), use_ref=True)
ref_result2 = sdp.add(evt_digest, "entry", ObjWithDigestWithKey(2, "bar"), use_ref=True)
sdp.add(evt_digest, "entry_ref", SheerkaDataProviderRef("1", ref_result1.digest))
sdp.add(evt_digest, "entry_ref", SheerkaDataProviderRef("1", ref_result2.digest))
state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile))
assert state.data == {'entry': {'1': '##REF##:1foo', '2': '##REF##:2bar'},
'entry_ref': {'1': [SheerkaDataProviderRef("1", ref_result1.digest),
SheerkaDataProviderRef("1", ref_result2.digest)]},
}
loaded = sdp.get("entry_ref", "1")
assert len(loaded) == 2
assert loaded[0] == ObjWithDigestWithKey(1, "foo")
assert loaded[1] == ObjWithDigestWithKey(2, "bar")
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
@@ -660,21 +745,24 @@ def test_i_can_add_string_using_auto_generated_key(root):
sdp = SheerkaDataProvider(root)
key_file = path.join(sdp.io.root, SheerkaDataProvider.KeysFile)
entry1, key1 = sdp.add_with_auto_key(evt_digest, "entry1", "foo")
entry2, key2 = sdp.add_with_auto_key(evt_digest, "entry1", "bar")
entry3, key3 = sdp.add_with_auto_key(evt_digest, "entry2", "baz")
result1 = sdp.add_with_auto_key(evt_digest, "entry1", "foo")
result2 = sdp.add_with_auto_key(evt_digest, "entry1", "bar")
result3 = sdp.add_with_auto_key(evt_digest, "entry2", "baz")
state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile))
assert sdp.io.exists(key_file)
assert read_json_file(sdp, key_file) == {"entry1": 2, "entry2": 1}
assert state.data == {"entry1": {"1": "foo", "2": "bar"}, "entry2": {"1": "baz"}}
assert entry1 == "entry1"
assert entry2 == "entry1"
assert entry3 == "entry2"
assert key1 == "1"
assert key2 == "2"
assert key3 == "1"
assert result1.obj == "foo"
assert result2.obj == "bar"
assert result3.obj == "baz"
assert result1.entry == "entry1"
assert result2.entry == "entry1"
assert result3.entry == "entry2"
assert result1.digest is None
assert result2.digest is None
assert result3.digest is None
@pytest.mark.parametrize("root", [
@@ -759,20 +847,6 @@ def test_i_cannot_add_the_same_digest_twice_in_the_same_entry4(root):
assert error.value.args[0] == "Duplicate object."
def test_i_cannot_add_using_use_ref_and_is_ref():
sdp = SheerkaDataProvider("mem://")
with pytest.raises(SheerkaDataProviderError) as error:
sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b"), use_ref=True, is_ref=True)
def test_i_cannot_add_using_is_ref_if_obj_is_not_a_dictionary():
sdp = SheerkaDataProvider("mem://")
with pytest.raises(SheerkaDataProviderError) as error:
sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b"), is_ref=True)
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
@@ -800,18 +874,23 @@ def test_i_can_add_object_using_auto_generated_key(root):
sdp = SheerkaDataProvider(root)
key_file = path.join(sdp.io.root, SheerkaDataProvider.KeysFile)
entry1, key1 = sdp.add_with_auto_key(evt_digest, "entry1", ObjNoKey("a", "b"))
entry2, key2 = sdp.add_with_auto_key(evt_digest, "entry1", ObjNoKey("a", "b"))
result1 = sdp.add_with_auto_key(evt_digest, "entry1", ObjNoKey("a", "b"))
result2 = sdp.add_with_auto_key(evt_digest, "entry1", ObjNoKey("a", "b"))
state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile))
assert sdp.io.exists(key_file)
assert read_json_file(sdp, key_file) == {"entry1": 2}
assert state.data == {"entry1": {"1": ObjNoKey("a", "b"), "2": ObjNoKey("a", "b")}}
assert entry1 == "entry1"
assert entry2 == "entry1"
assert key1 == "1"
assert key2 == "2"
assert result1.obj == ObjNoKey("a", "b")
assert result2.obj == ObjNoKey("a", "b")
assert result1.entry == "entry1"
assert result2.entry == "entry1"
assert result1.key == "1"
assert result2.key == "2"
assert result1.digest is None
assert result2.digest is None
@pytest.mark.parametrize("root", [
@@ -822,18 +901,23 @@ def test_object_key_is_updated_when_possible_using_auto_generated_key(root):
sdp = SheerkaDataProvider(root)
key_file = path.join(sdp.io.root, SheerkaDataProvider.KeysFile)
entry1, key1 = sdp.add_with_auto_key(evt_digest, "entry1", ObjSetKey("foo"))
entry2, key2 = sdp.add_with_auto_key(evt_digest, "entry1", ObjSetKey("foo"))
result1 = sdp.add_with_auto_key(evt_digest, "entry1", ObjSetKey("foo"))
result2 = sdp.add_with_auto_key(evt_digest, "entry1", ObjSetKey("foo"))
state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile))
assert sdp.io.exists(key_file)
assert read_json_file(sdp, key_file) == {"entry1": 2}
assert state.data == {"entry1": {"1": ObjSetKey("foo", "1"), "2": ObjSetKey("foo", "2")}}
assert entry1 == "entry1"
assert entry2 == "entry1"
assert key1 == "1"
assert key2 == "2"
assert result1.obj == ObjSetKey("foo", "1")
assert result2.obj == ObjSetKey("foo", "2")
assert result1.entry == "entry1"
assert result2.entry == "entry1"
assert result1.key == "1"
assert result2.key == "2"
assert result1.digest is None
assert result2.digest is None
@pytest.mark.parametrize("root", [
@@ -843,12 +927,13 @@ def test_object_key_is_updated_when_possible_using_auto_generated_key(root):
def test_i_can_set_objects_with_key(root):
sdp = SheerkaDataProvider(root)
sdp.add(evt_digest, "entry", ObjWithKey(1, "foo"))
entry, key = sdp.set(evt_digest, "entry", ObjWithKey(2, "foo"))
result = sdp.set(evt_digest, "entry", ObjWithKey(2, "foo"))
state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile))
assert state.data == {"entry": {"2": ObjWithKey(2, "foo")}}
assert entry == "entry"
assert key == "2"
assert result.entry == "entry"
assert result.key == "2"
assert result.digest is None
@pytest.mark.parametrize("root", [
@@ -858,12 +943,13 @@ def test_i_can_set_objects_with_key(root):
def test_i_can_set_objects_with_no_key(root):
sdp = SheerkaDataProvider(root)
sdp.add(evt_digest, "entry", ObjNoKey(1, "foo"))
entry, key = sdp.set(evt_digest, "entry", ObjNoKey(2, "foo"))
result = sdp.set(evt_digest, "entry", ObjNoKey(2, "foo"))
state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile))
assert state.data == {"entry": ObjNoKey(2, "foo")}
assert entry == "entry"
assert key is None
assert result.entry == "entry"
assert result.key is None
assert result.digest is None
@pytest.mark.parametrize("root", [
@@ -873,12 +959,13 @@ def test_i_can_set_objects_with_no_key(root):
def test_i_can_set_from_list_to_dict(root):
sdp = SheerkaDataProvider(root)
sdp.set(evt_digest, "entry", [ObjNoKey(1, "foo"), ObjNoKey(2, "foo")])
entry, key = sdp.set(evt_digest, "entry", {"1": ObjNoKey(1, "foo"), "2": ObjNoKey(2, "foo")})
result = sdp.set(evt_digest, "entry", {"1": ObjNoKey(1, "foo"), "2": ObjNoKey(2, "foo")})
state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile))
assert state.data == {"entry": {"1": ObjNoKey(1, "foo"), "2": ObjNoKey(2, "foo")}}
assert entry == "entry"
assert key is None
assert result.entry == "entry"
assert result.key is None
assert result.digest is None
@pytest.mark.parametrize("root", [
@@ -889,18 +976,21 @@ def test_i_can_set_using_reference(root):
sdp = SheerkaDataProvider(root)
sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithKey)))
sdp.add(evt_digest, "entry", ObjWithKey(1, "foo"))
entry, key = sdp.set(evt_digest, "entry", ObjWithKey(2, "foo"), use_ref=True)
result = sdp.set(evt_digest, "entry", ObjWithKey(2, "foo"), use_ref=True)
state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile))
assert state.data == {"entry": {"2": '##REF##:43f07065c7bad051cdd726bdfa4de7f8d754c31486c65ddb31d6b6548dec3db9'}}
assert entry == "entry"
assert key == "2"
assert result.obj == ObjWithKey(2, "foo")
assert result.entry == "entry"
assert result.key == "2"
assert result.digest == "43f07065c7bad051cdd726bdfa4de7f8d754c31486c65ddb31d6b6548dec3db9"
assert sdp.io.exists(sdp.io.get_obj_path(SheerkaDataProvider.ObjectsFolder,
"43f07065c7bad051cdd726bdfa4de7f8d754c31486c65ddb31d6b6548dec3db9"))
# sanity check, make sure that I can load back
loaded = sdp.get(entry, key)
loaded = sdp.get(result.entry, result.key)
assert loaded == ObjWithKey(2, "foo")
assert getattr(loaded, Serializer.ORIGIN) == "43f07065c7bad051cdd726bdfa4de7f8d754c31486c65ddb31d6b6548dec3db9"
@@ -952,12 +1042,15 @@ def test_i_can_add_an_object_with_a_key_as_a_reference(root):
obj_serializer = JsonSerializer(core.utils.get_full_qualified_name(obj))
sdp.serializer.register(obj_serializer)
entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True)
result = sdp.add(evt_digest, "entry", obj, use_ref=True)
state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile))
digest = state.data["entry"]["my_key"][len(SheerkaDataProvider.REF_PREFIX):]
assert key == obj.key
assert entry == "entry"
assert result.obj == obj
assert result.entry == "entry"
assert result.key == obj.key
assert result.digest == obj.get_digest()
assert digest == result.digest
assert state.data == {'entry': {'my_key': f"{SheerkaDataProvider.REF_PREFIX}{digest}"}}
loaded = sdp.load_obj(digest)
@@ -973,15 +1066,18 @@ def test_i_can_add_a_dictionary_as_a_reference(root):
sdp = SheerkaDataProvider(root)
obj = {"my_key": "value1"}
obj_serializer = JsonSerializer(core.utils.get_full_qualified_name(obj))
sdp.serializer.register(obj_serializer)
# No need to register a serializer for dictionaries
entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True)
result = sdp.add(evt_digest, "entry", obj, use_ref=True)
state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile))
digest = state.data["entry"][len(SheerkaDataProvider.REF_PREFIX):]
assert key is None
assert entry == "entry"
assert result.obj == obj
assert result.entry == "entry"
assert result.key is None # we return None as dict may contains several entries
assert result.digest == "1790cae3f354ecb6b419faaa2ee2c374ff33efb8cddafda9960924036ac04c1f" # a digest is created
assert digest == result.digest
assert state.data == {'entry': f"{SheerkaDataProvider.REF_PREFIX}{digest}"}
loaded = sdp.load_obj(digest)
@@ -990,23 +1086,50 @@ def test_i_can_add_a_dictionary_as_a_reference(root):
assert len(loaded) == 2
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_can_add_an_object_with_no_builtin_digest_as_a_reference(root):
sdp = SheerkaDataProvider(root)
obj = ObjDumpJsonNoDigest("a", "b")
obj_serializer = JsonSerializer(core.utils.get_full_qualified_name(obj))
sdp.serializer.register(obj_serializer)
result = sdp.add(evt_digest, "entry", obj, use_ref=True)
state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile))
digest = state.data["entry"][obj.get_key()][len(SheerkaDataProvider.REF_PREFIX):]
assert result.obj == obj
assert result.entry == "entry"
assert result.key == obj.get_key()
assert result.digest is not None
assert digest == result.digest
assert state.data == {'entry': {obj.key: f"{SheerkaDataProvider.REF_PREFIX}{result.digest}"}}
loaded = sdp.load_obj(digest)
assert getattr(loaded, Serializer.ORIGIN) == digest
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_can_add_unique(root):
sdp = SheerkaDataProvider(root)
entry, key = sdp.add_unique(evt_digest, "entry", ObjNoKey(1, "foo"))
assert (entry, key) == ("entry", None)
result = sdp.add_unique(evt_digest, "entry", ObjNoKey(1, "foo"))
assert result == SheerkaDataProviderResult(ObjNoKey(1, "foo"), "entry", None, None, False)
entry, key = sdp.add_unique(evt_digest, "entry", ObjNoKey(1, "foo"))
assert (entry, key) == (None, None)
result = sdp.add_unique(evt_digest, "entry", ObjNoKey(1, "foo"))
assert result == SheerkaDataProviderResult(ObjNoKey(1, "foo"), "entry", None, None, True)
entry, key = sdp.add_unique(evt_digest, "entry", ObjNoKey(2, "bar"))
assert (entry, key) == ("entry", None)
result = sdp.add_unique(evt_digest, "entry", ObjNoKey(2, "bar"))
assert result == SheerkaDataProviderResult(ObjNoKey(2, "bar"), "entry", None, None, False)
entry, key = sdp.add_unique(evt_digest, "entry", ObjNoKey(2, "bar"))
assert (entry, key) == (None, None)
result = sdp.add_unique(evt_digest, "entry", ObjNoKey(2, "bar"))
assert result == SheerkaDataProviderResult(ObjNoKey(2, "bar"), "entry", None, None, True)
state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile))
assert state.data == {"entry": {ObjNoKey(1, "foo"), ObjNoKey(2, "bar")}}
@@ -1338,12 +1461,14 @@ def test_i_can_modify_dict_with_a_key(root):
sdp.add(evt_digest, "entry", {"key1": "foo"})
sdp.add(evt_digest, "entry", {"key2": "bar"})
entry, key = sdp.modify(evt_digest, "entry", "key1", "baz")
result = sdp.modify(evt_digest, "entry", "key1", "baz")
state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile))
assert state.data == {"entry": {"key1": "baz", "key2": "bar"}}
assert entry == "entry"
assert key == "key1"
assert result.obj == "baz"
assert result.entry == "entry"
assert result.key == "key1"
assert result.digest is None
@pytest.mark.parametrize("root", [
@@ -1355,12 +1480,15 @@ def test_i_can_modify_an_object_with_a_key(root):
sdp.add(evt_digest, "entry", ObjWithKey("key1", "foo"))
sdp.add(evt_digest, "entry", ObjWithKey("key2", "bar"))
entry, key = sdp.modify(evt_digest, "entry", "key1", ObjWithKey("key1", "baz"))
result = sdp.modify(evt_digest, "entry", "key1", ObjWithKey("key1", "baz"))
state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile))
assert state.data == {"entry": {"key1": ObjWithKey("key1", "baz"), "key2": ObjWithKey("key2", "bar")}}
assert entry == "entry"
assert key == "key1"
assert result.obj == ObjWithKey("key1", "baz")
assert result.entry == "entry"
assert result.key == "key1"
assert result.digest is None
@pytest.mark.parametrize("root", [
@@ -1372,12 +1500,14 @@ def test_i_can_modify_an_object_while_changing_the_key(root):
sdp.add(evt_digest, "entry", ObjWithKey("key1", "foo"))
sdp.add(evt_digest, "entry", ObjWithKey("key2", "bar"))
entry, key = sdp.modify(evt_digest, "entry", "key1", ObjWithKey("key3", "baz"))
result = sdp.modify(evt_digest, "entry", "key1", ObjWithKey("key3", "baz"))
state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile))
assert state.data == {"entry": {"key2": ObjWithKey("key2", "bar"), "key3": ObjWithKey("key3", "baz")}}
assert entry == "entry"
assert key == "key3"
assert result.obj == ObjWithKey("key3", "baz")
assert result.entry == "entry"
assert result.key == "key3"
assert result.digest is None
@pytest.mark.parametrize("root", [
@@ -1389,12 +1519,14 @@ def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_key(root):
sdp.add(evt_digest, "entry", ObjWithKey("key1", "foo"))
sdp.add(evt_digest, "entry", ObjWithKey("key2", "bar"))
entry, key = sdp.modify(evt_digest, "entry", "key2", ObjWithKey("key1", "bar"))
result = sdp.modify(evt_digest, "entry", "key2", ObjWithKey("key1", "bar"))
state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile))
assert state.data == {"entry": {"key1": [ObjWithKey("key1", "foo"), ObjWithKey("key1", "bar")]}}
assert entry == "entry"
assert key == "key1"
assert result.obj == ObjWithKey("key1", "bar")
assert result.entry == "entry"
assert result.key == "key1"
assert result.digest is None
@pytest.mark.parametrize("root", [
@@ -1408,7 +1540,6 @@ def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_when_list(
:return:
"""
sdp = SheerkaDataProvider(root)
sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson)))
sdp.add(evt_digest, "entry", ObjDumpJson("key1", "value11"))
sdp.add(evt_digest, "entry", ObjDumpJson("key1", "value12"))
@@ -1417,15 +1548,17 @@ def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_when_list(
new_value = ObjDumpJson("key1", "value13")
setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key2", "value21").get_digest())
entry, key = sdp.modify(evt_digest, "entry", "key2", new_value)
result = sdp.modify(evt_digest, "entry", "key2", new_value)
state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile))
assert state.data == {"entry": {
"key1": [ObjDumpJson("key1", "value11"), ObjDumpJson("key1", "value12"), ObjDumpJson("key1", "value13")],
"key2": [ObjDumpJson("key2", "value22")]
}}
assert entry == "entry"
assert key == "key1"
assert result.obj == new_value
assert result.entry == "entry"
assert result.key == "key1"
assert result.digest is None
@pytest.mark.parametrize("root", [
@@ -1446,15 +1579,17 @@ def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_when_nothi
new_value = ObjDumpJson("key1", "value13")
setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key2", "value21").get_digest())
entry, key = sdp.modify(evt_digest, "entry", "key2", new_value)
result = sdp.modify(evt_digest, "entry", "key2", new_value)
state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile))
assert state.data == {"entry": {
"key1": ObjDumpJson("key1", "value13"),
"key2": [ObjDumpJson("key2", "value22")]
}}
assert entry == "entry"
assert key == "key1"
assert result.obj == new_value
assert result.entry == "entry"
assert result.key == "key1"
assert result.digest is None
@pytest.mark.parametrize("root", [
@@ -1476,34 +1611,143 @@ def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_when_one_i
new_value = ObjDumpJson("key1", "value13")
setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key2", "value21").get_digest())
entry, key = sdp.modify(evt_digest, "entry", "key2", new_value)
result = sdp.modify(evt_digest, "entry", "key2", new_value)
state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile))
assert state.data == {"entry": {
"key1": [ObjDumpJson("key1", "value11"), ObjDumpJson("key1", "value13")],
"key2": [ObjDumpJson("key2", "value22")]
}}
assert entry == "entry"
assert key == "key1"
assert result.obj == new_value
assert result.entry == "entry"
assert result.key == "key1"
assert result.digest is None
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_can_modify_a_ref(root):
def test_i_can_modify_a_object_saved_by_ref(root):
sdp = SheerkaDataProvider(root)
sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithKey)))
sdp.add(evt_digest, "entry", ObjWithKey("key1", "foo"))
entry, key = sdp.add(evt_digest, "entry", ObjWithKey("key2", "bar"), use_ref=True)
sdp.add(evt_digest, "entry", ObjWithKey("key2", "bar"), use_ref=True)
sdp.modify(evt_digest, "entry", "key2", ObjWithKey("key2", "baz"))
result = sdp.modify(evt_digest, "entry", "key2", ObjWithKey("key2", "baz"))
state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile))
assert state.data == {"entry": {
"key1": ObjWithKey("key1", "foo"),
"key2": "##REF##:041d3cca905b51bc2c66251e73e56b836aae7b9435ee3d7eb05d44bb67ff575e"}}
assert entry == "entry"
assert key == "key2"
assert result.obj == ObjWithKey("key2", "baz")
assert result.entry == "entry"
assert result.key == "key2"
assert result.digest == "041d3cca905b51bc2c66251e73e56b836aae7b9435ee3d7eb05d44bb67ff575e"
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_can_modify_an_object_saved_by_ref_in_a_list(root):
sdp = SheerkaDataProvider(root)
sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJsonNoDigest)))
sdp.add(evt_digest, "entry", ObjDumpJsonNoDigest("key1", "value11"), use_ref=True)
sdp.add(evt_digest, "entry", ObjDumpJsonNoDigest("key1", "value12"), use_ref=True)
result = sdp.add(evt_digest, "entry", ObjDumpJsonNoDigest("key2", "value21"), use_ref=True)
sdp.add(evt_digest, "entry", ObjDumpJsonNoDigest("key2", "value22"), use_ref=True)
new_value = ObjDumpJsonNoDigest("key1", "value13")
setattr(new_value, Serializer.ORIGIN, result.digest)
result = sdp.modify(evt_digest, "entry", "key2", new_value)
state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile))
assert state.data == {"entry": {
'key1': ['##REF##:f80a0c0aceb1a7a3d238c0cff2d86d6bd3a62e0c1a65c5b505f43b10c4604bd8',
'##REF##:239a8238d188c37afa10b1bcc312ca8a0e78f6e75d688ca65d08e16717ff68b0',
'##REF##:9d0a2bf9d4081de0b14837ea46bc7a1cfb6b7562f7ae86255ea9bd0ac53a6437'],
'key2': ['##REF##:df8a38b07f469f2ff8001ea6a70f77f4f9ce85d69c530091fcaf4b380f1500d3']
}}
assert result.obj == new_value
assert result.entry == "entry"
assert result.key == "key1"
assert result.digest is not None
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_can_modify_a_data_provider_ref(root):
# first, create a valid entry
sdp = SheerkaDataProvider(root)
sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithDigestWithKey)))
obj = ObjWithDigestWithKey("1", "foo")
sdp.add(evt_digest, "entry", obj, use_ref=True)
sdp.add(evt_digest, "entry_ref", SheerkaDataProviderRef(obj.b, obj.get_digest()))
state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile))
assert state.data == {
"entry": {"1": "##REF##:1foo"},
"entry_ref": {"foo": SheerkaDataProviderRef(obj.b, obj.get_digest())}}
# modify this entry
obj_new = ObjWithDigestWithKey("1", "bar")
sdp.modify(evt_digest, "entry", obj_new.a, obj_new)
result = sdp.modify(evt_digest, "entry_ref", "foo", SheerkaDataProviderRef(obj.b, obj_new.get_digest()))
state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile))
assert state.data == {
"entry": {"1": "##REF##:1bar"},
"entry_ref": {"foo": SheerkaDataProviderRef(obj.b, obj_new.get_digest())}}
assert result.obj == SheerkaDataProviderRef(obj.b, obj_new.get_digest())
assert result.entry == "entry_ref"
assert result.key == "foo"
assert result.digest is None # digest is not set as what is saved (the digest) is not saved by ref
# sanity check, I can load the modified entry
loaded = sdp.get("entry_ref", "foo")
assert loaded == ObjWithDigestWithKey("1", "bar")
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_can_modify_is_ref_when_in_list(root):
sdp = SheerkaDataProvider(root)
sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithDigestWithKey)))
ref_result1 = sdp.add(evt_digest, "entry", ObjWithDigestWithKey(1, "foo"), use_ref=True)
ref_result2 = sdp.add(evt_digest, "entry", ObjWithDigestWithKey(2, "bar"), use_ref=True)
sdp.add(evt_digest, "entry_ref", SheerkaDataProviderRef("1", ref_result1.digest))
sdp.add(evt_digest, "entry_ref", SheerkaDataProviderRef("1", ref_result2.digest))
ref_result3 = sdp.add(evt_digest, "entry", ObjWithDigestWithKey(3, "baz"), use_ref=True)
result = sdp.modify(
evt_digest,
"entry_ref",
"1",
SheerkaDataProviderRef("1", ref_result3.digest, ref_result2.digest))
state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile))
assert state.data == {'entry': {'1': '##REF##:1foo', '2': '##REF##:2bar', '3': '##REF##:3baz'},
'entry_ref': {'1': [
SheerkaDataProviderRef("1", ref_result1.digest),
SheerkaDataProviderRef("1", ref_result3.digest, ref_result2.digest)]}}
loaded = sdp.get("entry_ref", "1")
assert len(loaded) == 2
assert loaded[0] == ObjWithDigestWithKey(1, "foo")
assert loaded[1] == ObjWithDigestWithKey(3, "baz")
assert result.obj == SheerkaDataProviderRef("1", ref_result3.digest, ref_result2.digest)
assert result.entry == "entry_ref"
assert result.key == "1"
assert result.digest is None # digest is not set as what is saved (the digest) is not saved by ref
@pytest.mark.parametrize("root", [
@@ -1661,8 +1905,8 @@ def test_i_can_get_object_saved_by_reference(root):
obj = ObjDumpJson("my_key", "value1")
sdp.serializer.register(JsonSerializer(core.utils.get_full_qualified_name(obj)))
entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True)
loaded = sdp.get(entry, key)
result = sdp.add(evt_digest, "entry", obj, use_ref=True)
loaded = sdp.get(result.entry, result.key)
assert loaded == obj
@@ -1876,12 +2120,15 @@ def test_i_can_save_and_load_object_ref_with_history(root):
obj = ObjDumpJson("my_key", "value1")
sdp.serializer.register(JsonSerializer(core.utils.get_full_qualified_name(obj)))
entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True)
loaded = sdp.get(entry, key)
result = sdp.add(evt_digest, "entry", obj, use_ref=True)
loaded = sdp.get(result.entry, result.key)
history = getattr(loaded, Serializer.HISTORY)
assert key == obj.key
assert entry == "entry"
assert result.obj == obj
assert result.entry == "entry"
assert result.key == obj.key
assert result.digest == obj.get_digest()
assert loaded.key == obj.key
assert loaded.value == obj.value
@@ -1895,8 +2142,8 @@ def test_i_can_save_and_load_object_ref_with_history(root):
previous_modification_time = history[Serializer.MODIFICATION_DATE]
previous_parents = history[Serializer.PARENTS]
sdp.modify(evt_digest, "entry", key, loaded)
loaded = sdp.get(entry, key)
sdp.modify(evt_digest, "entry", result.key, loaded)
loaded = sdp.get(result.entry, result.key)
history = getattr(loaded, Serializer.HISTORY)
assert history[Serializer.MODIFICATION_DATE] == previous_modification_time
@@ -1906,8 +2153,8 @@ def test_i_can_save_and_load_object_ref_with_history(root):
previous_digest = loaded.get_digest()
loaded.value = "value2"
sdp.modify(evt_digest, "entry", key, loaded)
loaded2 = sdp.get(entry, key)
sdp.modify(evt_digest, "entry", result.key, loaded)
loaded2 = sdp.get(result.entry, result.key)
history2 = getattr(loaded2, Serializer.HISTORY)
assert loaded2.key == loaded.key
@@ -1932,10 +2179,10 @@ def test_i_can_add_obj_with_same_key_and_get_them_back(root):
obj2 = ObjDumpJson("key", "value2")
sdp.serializer.register(JsonSerializer(core.utils.get_full_qualified_name(obj1)))
entry1, key1 = sdp.add(evt_digest, "entry", obj1, use_ref=True)
entry2, key2 = sdp.add(evt_digest, "entry", obj2, use_ref=True)
result = sdp.add(evt_digest, "entry", obj1, use_ref=True)
sdp.add(evt_digest, "entry", obj2, use_ref=True)
loaded = sdp.get_safe(entry1, key1)
loaded = sdp.get(result.entry, result.key)
assert len(loaded) == 2
assert loaded[0] == obj1
@@ -1953,14 +2200,14 @@ def test_i_get_safe_dictionary_without_origin(root):
obj_serializer = JsonSerializer(core.utils.get_full_qualified_name(obj))
sdp.serializer.register(obj_serializer)
entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True)
from_db = sdp.get_safe(entry, key)
result = sdp.add(evt_digest, "entry", obj, use_ref=True)
from_db = sdp.get(result.entry, result.key)
assert len(from_db) == 2
assert from_db["my_key"] == obj["my_key"]
assert Serializer.ORIGIN in from_db
from_db_no_origin = sdp.get_safe(entry, key, load_origin=False)
from_db_no_origin = sdp.get_safe(result.entry, result.key, load_origin=False)
assert len(from_db_no_origin) == 1
assert from_db_no_origin["my_key"] == obj["my_key"]
assert Serializer.ORIGIN not in from_db_no_origin
@@ -1977,14 +2224,14 @@ def test_i_get_dictionary_without_origin(root):
obj_serializer = JsonSerializer(core.utils.get_full_qualified_name(obj))
sdp.serializer.register(obj_serializer)
entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True)
from_db = sdp.get(entry, key)
result = sdp.add(evt_digest, "entry", obj, use_ref=True)
from_db = sdp.get(result.entry, result.key)
assert len(from_db) == 2
assert from_db["my_key"] == obj["my_key"]
assert Serializer.ORIGIN in from_db
from_db_no_origin = sdp.get(entry, key, load_origin=False)
from_db_no_origin = sdp.get(result.entry, result.key, load_origin=False)
assert len(from_db_no_origin) == 1
assert from_db_no_origin["my_key"] == obj["my_key"]
assert Serializer.ORIGIN not in from_db_no_origin
@@ -2001,12 +2248,12 @@ def test_i_get_safe_object_without_origin(root):
obj_serializer = JsonSerializer(core.utils.get_full_qualified_name(obj))
sdp.serializer.register(obj_serializer)
entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True)
from_db = sdp.get_safe(entry, key)
result = sdp.add(evt_digest, "entry", obj, use_ref=True)
from_db = sdp.get(result.entry, result.key)
assert from_db == obj
assert hasattr(from_db, Serializer.ORIGIN)
from_db_no_origin = sdp.get_safe(entry, key, load_origin=False)
from_db_no_origin = sdp.get_safe(result.entry, result.key, load_origin=False)
assert from_db_no_origin == obj
assert not hasattr(from_db_no_origin, Serializer.ORIGIN)