diff --git a/core/sheerka.py b/core/sheerka.py
index e9fc95e..09a5a2b 100644
--- a/core/sheerka.py
+++ b/core/sheerka.py
@@ -85,7 +85,7 @@ class Sheerka(Concept, metaclass=Singleton):
return ReturnValue(True, self.get_concept(Sheerka.SUCCESS_CONCEPT_NAME, True))
def eval(self, text):
- #evt_digest = self.sdp.save_event(Event(text))
+ # evt_digest = self.sdp.save_event(Event(text))
result = self.try_parse(text)
return_values = []
diff --git a/docs/blog.rst b/docs/blog.rst
new file mode 100644
index 0000000..641bfa8
--- /dev/null
+++ b/docs/blog.rst
@@ -0,0 +1,228 @@
+2019-10-30
+**********
+
+What is Sheerka ?
+"""""""""""""""""
+
+Sheerka is a *communication* language,
+as opposed to the traditional *programming* languages. Its
+purpose is to ease the communication between the (wo)man and the machine,
+ultimately using the voice. I will first use it to program faster, and maybe
+more easily.
+
+.. _ulysse31: https://fr.wikipedia.org/wiki/Ulysse_31
+
+Where does the name Sheerka came from ?
+"""""""""""""""""""""""""""""""""""""""
+Sheerka is my misspell of Shyrka, from my childhood anime ulysse31_.
+For those you don't know this old cartoon, it's the Odyssey story from Homer,
+ported in the 31st century. Ulysses has a spacecraft with an AI named Shyrka
+
+I was a great fan of this cartoon when I was young. I thought that the idea of
+bringing the ancient story of Ulysses in the future was a bright.
+
+Ever since then, Sheerka was my reference for any sophisticated computer. Unfortunately
+for me, at that time there was no wikipedia to tell the the correct spelling.
+
+Model v0
+""""""""
+In my view, the beginning of everything are the **Events**. Basically, they are the commands (ie requests)
+entered by the users.
+
+The events are parsed, to understand what is required, so they produce a new **State**.
+The state is a like a big dictionary that holds everything that is known by the system.
+
+Most of the elements saved in the **State** are the **Concepts**. In this first version,
+it's a little bit complicated to define what is the **Concept** as it can have several
+usages. To make it simple, I will say that a **Concept** is an idea that can be
+manipulated by the rest of the system.
+I am pretty sure that its form and usage will evolve as I will manipulate
+them
+
+- Each **State** has a reference to the event(s) that trigger this state
+- Each **State** has an **history**
+- Each **Concept** has an **history**
+
+
+An **history** is a triplet of
+
+- user name
+- modification date
+- digest of the parent
+
+.. _git: https://git-scm.com/
+
+Personally, i have taken this way of tracking modification from how it's done on git_,
+I guess Linux Torvarlds took it from somewhere.
+
+
+2019-10-31
+**********
+
+More on Concepts
+""""""""""""""""
+To define a new concept
+
+::
+
+ def concept hello a as "hello" + a
+
+
+Note that the traditional quotes that would surround hello a are not necessary.
+In this example 'a' is a variable, as it appears as variable in the 'as' section
+
+So, you could call the concept by
+
+::
+
+ hello kodjo
+ hello my friend
+
+They will produce the strings "hello kodjo" or "hello my friend"
+
+About versioning
+""""""""""""""""
+As I said previously, I mimic how git_ versions its objects.
+
+::
+
+ Obj v0 : parents = []
+ user name =
+ modification date =
+ digest = xxxxx
+
+ Obj v1: parents = [xxxxx]
+ user name =
+ modification date =
+ digest = yyyyy
+
+ Obj v1: parents = [yyyyy]
+ user name =
+ modification date =
+ digest = zzzzz
+
+I always keep a reference to the last version of the object, so I can navigate through
+the versions using the :code:`parents` attribute of the object
+
+In git_, there are basically two types of objects :
+
+- **content** (file content, or directory structure)
+- **reference** to content (commit or tags)
+
+The hash a **content** only depends on it, while the hash of a **reference** also depends
+on the user name, the modification date and the parents. In both cases, the hash is
+computed on the whole object. So the hash can also be used to check the integrity
+of an object.
+
+For my objects, I need to decide how I compute the hash.
+
+**Concepts** have history, if I decide to include the history in the hash,
+as the modification date is :code:`datetime.now()`, a new version will be created
+even if the **Concept** has not changed. If I don't include it, the integrity of the
+what is saved is no longer guaranteed.
+
+I choose to value identity over integrity. The hash code of the **Concepts** does not depend
+on his history. We will see what the future will say about this.
+
+2019-11-01
+**********
+
+Inspired by CodinGames
+""""""""""""""""""""""
+
+
+.. _codingame: https://www.codingame.com/home
+
+I am trying to teach my little kid how to code. He is 12 years old and it was his very
+first time.
+
+Rather than trying a standard formal approach, we went on the codingame_ web site. There
+are some pro and cons to use this platform, specially for the very beginners, but
+I like the visual output of the programs . It's really like coding a game !
+
+What I haven't noticed previously, is that (at least for the first programs), the solution
+is given in human language.
+
+For example, for the exercise called "The descent" you will find
+
+::
+
+ For each round of play :
+ Reset the variables containing the index of the highest mountain and its height to 0
+ For each mountain index (from 0 to 7 included) :
+ Read the height of the mountain (variable :code:`mountainH`) from stdin
+ If it's higher than the highest known mountain, save its index and height
+ Returns the index of the highest mountain on stdout
+
+It will be great if Sheerka is able to produce some code from these instructions.
+
+Some words data persistence
+"""""""""""""""""""""""""""""""""""""""""
+As I previously said (or not), the main difference between Sheerka and other languages,
+is that Sheerka has a memory of its (her ? :-) previous interactions with the users.
+
+The **Concepts**, as well as the **Events** or the **Rules** are persisted. Because of
+that, I think that the more Sheerka is used, the more easier it will be to use it.
+
+So my first focus was to decide which database to use.
+
+There are tons od different databases already on the market. Unfortunately for me, I'm not
+a database expert. But, I already knew that I was not looking for a traditional
+relational database (SGDB) as the structure will evolve and I didn't want to spend
+my time on redesigning the schemas and the constraints.
+
+As I was learning Python, it could have been a good idea to also start looking at an
+already existing NoSql database. I started to look at MongoDB, but I got lazy. I knew that
+the top feature was that management of the history (the way git does it), and it was not
+provided by Mongo, or I didn't notice it in my first readings on the subject.
+
+So I decided to design and implement my own database.
+
+
+SheerkaDataProvider (sdp)
+"""""""""""""""""""""""""
+Not I great name, I confess. But who care ?
+
+What are the main design constraints?
+
+::
+
+ 1. No adherence with the filesystem.
+ We must not care about where the data are stored.
+ The first implementation will be file based, but it has to be extensible.
+ The final target will be to have a decentralized persistence
+ 2. CRUD operations are designed according to my needs
+ I don't want standard CRUD operations that will be tweaked.
+ The direct consequence is that this library won't fit any other purpose
+ 3. History management for State and other objects for free.
+
+
+sdp, like many modern database systems, is a dictionary. A big list of key-value pairs.
+The key is a string, the value can be almost anything. Actually, for my needs, I guess
+that I only need strings, numbers and list (of strings and numbers :-)
+
+I need one level of categorization. That means that my objects can be group. The basic
+signature to add a new element :code:`add(entry, obj)`.
+
+with
+
+::
+
+ entry : is the group / category where I want to put the object
+ object : object to persist
+
+With :code:`add("Concepts", "foo")` the database, let's call it **State** once for all, will be updated like this:
+
+.. code::python
+
+{"Concepts" : foo}
+
+If I want to have another entry, I don't want to care about what was previously done. I
+need the second call :code:`add("Concepts", "bar")` to produce
+
+.. code-block::json
+
+{"Concepts" : ["foo", "bar"]}
+
+So we are no longer in the usual way of implementing a CRUD.
+
diff --git a/sdp/sheerkaDataProvider.py b/sdp/sheerkaDataProvider.py
index be47436..259baf4 100644
--- a/sdp/sheerkaDataProvider.py
+++ b/sdp/sheerkaDataProvider.py
@@ -49,6 +49,18 @@ class Event(object):
self.message = as_dict["message"]
+class ObjWithKey:
+ """
+ Internal key value class to hold the key (and the value)
+ when it is detected
+ It's created to distinguish from {key, value}
+ """
+
+ def __init__(self, key, obj):
+ self.key = key
+ self.obj = obj
+
+
class State:
"""
Class that represents the state of the system (dictionary of all known entries)
@@ -62,25 +74,73 @@ class State:
self.data = {}
def update(self, entry, obj, append=True):
- obj_to_use = {str(obj.get_key()): obj} if hasattr(obj, "get_key") else obj
+ """
+ adds obj to entry
+ :param entry:
+ :param obj:
+ :param append: if True, ducplicate keys will create lists
+ :return:
+ """
+ obj_to_use = {obj.key: obj.obj} if isinstance(obj, ObjWithKey) else obj
if entry not in self.data:
self.data[entry] = obj_to_use
- elif isinstance(obj_to_use, dict):
- if append:
+
+ elif not append:
+ if isinstance(obj_to_use, dict):
self.data[entry].update(obj_to_use)
else:
self.data[entry] = obj_to_use
+
elif isinstance(self.data[entry], list):
- if append:
- self.data[entry].append(obj_to_use)
- else:
- self.data[entry] = obj_to_use
+ self.data[entry].append(obj.obj if isinstance(obj, ObjWithKey) else obj) # do not use obj_to_use !
+
+ elif isinstance(obj_to_use, dict):
+ for k in obj_to_use:
+ if k not in self.data[entry]:
+ self.data[entry][k] = obj_to_use[k]
+ elif isinstance(self.data[entry][k], list):
+ self.data[entry][k].append(obj_to_use[k])
+ else:
+ self.data[entry][k] = [self.data[entry][k], obj_to_use[k]]
+
+ elif isinstance(self.data[entry], dict):
+ raise SheerkaDataProviderError(f"Cannot found key on '{obj}' while all other elements have.", obj)
+
else:
- if append:
- self.data[entry] = [self.data[entry], obj_to_use]
- else:
- self.data[entry] = obj_to_use
+ self.data[entry] = [self.data[entry], obj_to_use]
+
+ def modify(self, entry, key, obj, obj_key):
+ # if the key changes, make sure to remove the previous entry
+ append = False
+ if obj_key != key:
+ self.remove(entry, lambda k, o: k == key) # modify from on object to another
+ append = True
+
+ self.update(entry, ObjWithKey(obj_key, obj), append=append)
+
+ def modify_in_list(self, entry, key, obj, obj_key, obj_origin, load_ref_if_needed, save_ref_if_needed):
+ found = False
+ to_remove = None
+ for i in range(len(self.data[entry][key])):
+ item, is_ref = load_ref_if_needed(self.data[entry][key][i])
+ if not hasattr(item, "get_digest"):
+ continue
+ if item.get_digest() == obj_origin:
+ obj = save_ref_if_needed(is_ref, obj)
+ if obj_key == key:
+ self.data[entry][key][i] = obj
+ else:
+ to_remove = i
+ self.update(entry, ObjWithKey(obj_key, obj), append=True)
+ found = True
+ break
+
+ if not found:
+ raise (SheerkaDataProviderError(f"Cannot modify '{entry}.{key}'. Item '{obj_origin}' not found.", obj))
+
+ if to_remove is not None:
+ del self.data[entry][key][to_remove]
def remove(self, entry, filter):
if filter is None:
@@ -108,13 +168,29 @@ class State:
return hashlib.sha256(as_json.encode("utf-8")).hexdigest()
def contains(self, entry, key):
+ """
+ if key is None, returns True if entry exists
+ if key has a value
+ returns True if entry is an dict and contains key
+ :param entry:
+ :param key:
+ :return:
+ """
if entry not in self.data:
return False
+ if key is None:
+ return entry in self.data
if not isinstance(self.data[entry], dict):
return False
return key in self.data[entry]
+class SheerkaDataProviderError(Exception):
+ def __init__(self, message, obj):
+ Exception.__init__(self, message)
+ self.obj = obj
+
+
class SheerkaDataProvider:
"""Manages the state of the system"""
@@ -124,6 +200,7 @@ class SheerkaDataProvider:
CacheFolder = "cache"
HeadFile = "HEAD"
KeysFile = "keys"
+ REF_PREFIX = "##REF##:"
def __init__(self, root=None):
@@ -137,37 +214,72 @@ class SheerkaDataProvider:
self.serializer = Serializer()
def get_obj_path(self, object_type, digest):
- path.join(self.root, object_type, digest[:24], digest)
+ return path.join(self.root, object_type, digest[:24], digest)
- def add(self, event: Event, entry, obj):
+ @staticmethod
+ def get_obj_key(obj):
+ """
+ Tries to find the key of an object
+ Look for .key, .get_key()
+ :param obj:
+ :return: String version of that is found, None otherwise
+ """
+ return str(obj.key) if hasattr(obj, "key") else str(obj.get_key()) if hasattr(obj, "get_key") else None
+
+ @staticmethod
+ def get_stream_digest(stream):
+ sha256_hash = hashlib.sha256()
+ for byte_block in iter(lambda: stream.read(4096), b""):
+ sha256_hash.update(byte_block)
+
+ stream.seek(0)
+ return sha256_hash.hexdigest()
+
+ @staticmethod
+ def is_reference(obj):
+ return isinstance(obj, str) and obj.startswith(SheerkaDataProvider.REF_PREFIX)
+
+ def add(self, event: Event, entry, obj, allow_multiple=True, use_ref=False):
"""
Adds obj to the entry 'entry'
:param event: events that triggers the update of the state
:param entry: entry of the state to update
:param obj: obj to insert or add
- :return: new sha256 of the state
+ :param allow_multiple: if set to true, the same key can be added several times.
+ All entries will be put in a list
+ :param use_ref: if True the actual object is saved under 'objects' folder,
+ only a reference is saved in the state
+ :return: (entry, key) to retrieve the object
"""
+
event_digest = self.save_event(event)
snapshot = self.get_snapshot()
state = self.load_state(snapshot)
- # check uniqueness, cannot add the same key twice
- obj_key = str(obj.get_key()) if hasattr(obj, "get_key") else None
- if state.contains(entry, obj_key):
- raise IndexError(f"{entry}.{obj_key}")
- elif isinstance(obj, dict):
- for k in obj:
- if state.contains(entry, k):
- raise IndexError(f"{entry}.{k}")
+ # check uniqueness, cannot add the same key twice if allow_multiple == False
+ key = self.get_obj_key(obj)
+ if not allow_multiple:
+ if isinstance(obj, dict):
+ for k in obj:
+ if state.contains(entry, k):
+ raise IndexError(f"{entry}.{k}")
+ else:
+ if state.contains(entry, key):
+ raise IndexError(f"{entry}.{key}" if key else entry)
state.parents = [] if snapshot is None else [snapshot]
state.events = [event_digest]
state.date = datetime.now()
- state.update(entry, obj if obj_key is None else {obj_key: obj})
+
+ if use_ref:
+ digest = self.save_obj(obj)
+ obj = ObjWithKey(key, self.REF_PREFIX + digest) if key else self.REF_PREFIX + digest
+
+ state.update(entry, obj if (isinstance(obj, ObjWithKey) or key is None) else ObjWithKey(key, obj))
new_snapshot = self.save_state(state)
self.set_snapshot(new_snapshot)
- return new_snapshot
+ return entry, key
def add_with_auto_key(self, event: Event, entry, obj):
"""
@@ -180,7 +292,8 @@ class SheerkaDataProvider:
next_key = self.get_next_key(entry)
if hasattr(obj, "set_key"):
obj.set_key(next_key)
- return self.add(event, entry, {next_key: obj})
+ self.add(event, entry, ObjWithKey(next_key, obj))
+ return entry, next_key
def add_unique(self, event: Event, entry, obj):
"""Add an entry and make sure it's unique"""
@@ -198,14 +311,16 @@ class SheerkaDataProvider:
new_snapshot = self.save_state(state)
self.set_snapshot(new_snapshot)
- return new_snapshot
+ return entry, None
- def set(self, event: Event, entry, obj):
+ def set(self, event: Event, entry, obj, use_ref=False):
"""
- Add or replace an element
+ Add or replace an entry. The entry is reinitialized.
+ If the previous value was dict, all keys are lost
:param event:
:param entry:
:param obj:
+ :param use_ref:
:return:
"""
event_digest = self.save_event(event)
@@ -215,22 +330,30 @@ class SheerkaDataProvider:
state.parents = [] if snapshot is None else [snapshot]
state.events = [event_digest]
state.date = datetime.now()
- obj_key = str(obj.get_key()) if hasattr(obj, "get_key") else None
- state.update(entry, obj if obj_key is None else {obj_key: obj}, append=False)
+
+ key = self.get_obj_key(obj)
+ obj = self.save_ref_if_needed(use_ref, obj)
+
+ state.data[entry] = obj if key is None else {key: obj}
new_snapshot = self.save_state(state)
self.set_snapshot(new_snapshot)
- return new_snapshot
+ return entry, key
def modify(self, event: Event, entry, key, obj):
"""
- Updates an existing element when element are saved by key
+ Replace an element
+ If the key is not provided, has the same effect than set eg, the entry is reset
:param event:
:param entry:
:param key: key of the object to update
:param obj: new data
:return:
"""
+
+ if key is None:
+ raise SheerkaDataProviderError("Key is mandatory.", None)
+
event_digest = self.save_event(event)
snapshot = self.get_snapshot()
state = self.load_state(snapshot)
@@ -244,14 +367,24 @@ class SheerkaDataProvider:
state.parents = [] if snapshot is None else [snapshot]
state.events = [event_digest]
state.date = datetime.now()
- if key is None:
- state.data[entry] = obj
+
+ # Gets obj original key, it will help to know if the key has changed
+ obj_key = self.get_obj_key(obj) or key
+
+ if isinstance(state.data[entry][key], list):
+ if not hasattr(obj, Serializer.ORIGIN):
+ raise (SheerkaDataProviderError(f"Multiple entries under '{entry}.{key}'", obj))
+
+ obj_origin = getattr(obj, Serializer.ORIGIN)
+ state.modify_in_list(entry, key, obj, obj_key, obj_origin, self.load_ref_if_needed, self.save_ref_if_needed)
+
else:
- state.update(entry, {key: obj})
+ obj = self.save_ref_if_needed(self.is_reference(state.data[entry][key]), obj)
+ state.modify(entry, key, obj, obj_key)
new_snapshot = self.save_state(state)
self.set_snapshot(new_snapshot)
- return new_snapshot
+ return entry, obj_key
def list(self, entry, filter=None):
"""
@@ -272,7 +405,7 @@ class SheerkaDataProvider:
filter_to_use = (lambda k, o: True) if filter is None else filter
for key, element in elements.items():
if filter_to_use(key, element):
- yield element
+ yield self.load_ref_if_needed(element)[0]
else:
# manage when no key is defined for the elements
if not isinstance(elements, list) and not isinstance(elements, set):
@@ -281,7 +414,7 @@ class SheerkaDataProvider:
filter_to_use = (lambda o: True) if filter is None else filter
for element in elements:
if filter_to_use(element):
- yield element
+ yield self.load_ref_if_needed(element)[0]
def remove(self, event: Event, entry, filter=None):
"""
@@ -325,7 +458,11 @@ class SheerkaDataProvider:
if key is not None and key not in state.data[entry]:
raise IndexError(f"{entry}.{key}")
- return state.data[entry] if key is None else state.data[entry][key]
+ item = state.data[entry] if key is None else state.data[entry][key]
+ if isinstance(item, list):
+ return [self.load_ref_if_needed(i)[0] for i in item]
+
+ return self.load_ref_if_needed(item)[0]
def get_safe(self, entry, key=None):
"""
@@ -343,7 +480,7 @@ class SheerkaDataProvider:
if key is not None and key not in state.data[entry]:
return None
- return state.data[entry] if key is None else state.data[entry][key]
+ return self.load_ref_if_needed(state.data[entry] if key is None else state.data[entry][key])[0]
def exists(self, entry):
"""
@@ -407,10 +544,9 @@ class SheerkaDataProvider:
return self.serializer.deserialize(f, None)
def save_obj(self, obj):
- if hasattr(obj, "key") and hasattr(obj, "key_name") and obj.key is None:
- obj.key = self.get_next_key(obj.key_name)
+ stream = self.serializer.serialize(obj, SerializerContext(user_name="kodjo"))
+ digest = obj.get_digest() if hasattr(obj, "get_digest") else self.get_stream_digest(stream)
- digest = obj.get_digest()
target_path = path.join(self.root, SheerkaDataProvider.ObjectsFolder, digest[:24], digest)
if path.exists(target_path):
return digest
@@ -419,17 +555,44 @@ class SheerkaDataProvider:
os.makedirs(path.dirname(target_path))
with open(target_path, "wb") as f:
- f.write(self.serializer.serialize(obj, SerializerContext("kodjo", digest)).read())
+ f.write(stream.read())
return digest
def load_obj(self, digest):
if digest is None:
- return State()
+ return None
target_path = path.join(self.root, SheerkaDataProvider.ObjectsFolder, digest[:24], digest)
+ if not path.exists(target_path):
+ return None
+
with open(target_path, "rb") as f:
- return self.serializer.deserialize(f, SerializerContext("kodjo", digest))
+ obj = self.serializer.deserialize(f, SerializerContext(origin=digest))
+
+ # set the origin of the object
+ if not isinstance(obj, str):
+ setattr(obj, Serializer.ORIGIN, digest)
+ return obj
+
+ def load_ref_if_needed(self, obj):
+ if not isinstance(obj, str):
+ return obj, False
+ if not obj.startswith(SheerkaDataProvider.REF_PREFIX):
+ return obj, False
+
+ resolved = self.load_obj(obj[len(SheerkaDataProvider.REF_PREFIX):])
+ if resolved is None:
+ return obj, False
+
+ return resolved, True
+
+ def save_ref_if_needed(self, save_ref, obj):
+ if not save_ref:
+ return obj
+
+ digest = self.save_obj(obj)
+ return self.REF_PREFIX + digest
def get_cache_params(self, category, key):
digest = hashlib.sha3_256(f"{category}:{key}".encode("utf-8")).hexdigest()
diff --git a/sdp/sheerkaSerializer.py b/sdp/sheerkaSerializer.py
index 8688201..c1ae818 100644
--- a/sdp/sheerkaSerializer.py
+++ b/sdp/sheerkaSerializer.py
@@ -20,8 +20,8 @@ def json_default_converter(o):
@dataclass()
class SerializerContext:
- user_name: str
- origin: str
+ user_name: str = None
+ origin: str = None
class Serializer:
@@ -37,7 +37,7 @@ class Serializer:
# add builtin serializers
self.register(EventSerializer())
- self.register(PickleSerializer())
+ self.register(StateSerializer())
self.register(ConceptSerializer())
def register(self, serializer):
@@ -205,11 +205,12 @@ class ObjectSerializer(BaseSerializer):
class PickleSerializer(BaseSerializer):
- def __init__(self):
- BaseSerializer.__init__(self, "P", 1)
+ def __init__(self, predicate, name="P", version=1):
+ BaseSerializer.__init__(self, name, version)
+ self.predicate = predicate
def match(self, obj):
- return BaseSerializer.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.State"
+ return self.predicate(obj)
def dump(self, stream, obj, context):
stream.write(pickle.dumps(obj))
@@ -220,6 +221,12 @@ class PickleSerializer(BaseSerializer):
return pickle.loads(stream.read())
+class StateSerializer(PickleSerializer):
+ def __init__(self, ):
+ PickleSerializer.__init__(self, lambda obj: BaseSerializer.get_full_qualified_name(
+ obj) == "sdp.sheerkaDataProvider.State", "S", 1)
+
+
class ConceptSerializer(ObjectSerializer):
def __init__(self):
ObjectSerializer.__init__(self, "core.concept.Concept", "C", 1)
diff --git a/tests/test_sheerkaDataProvider.py b/tests/test_sheerkaDataProvider.py
index 39c0898..02398e5 100644
--- a/tests/test_sheerkaDataProvider.py
+++ b/tests/test_sheerkaDataProvider.py
@@ -3,12 +3,12 @@ import hashlib
import pytest
import os
from os import path
-from sdp.sheerkaDataProvider import SheerkaDataProvider, Event
+from sdp.sheerkaDataProvider import SheerkaDataProvider, Event, SheerkaDataProviderError
from datetime import date, datetime
import shutil
import json
-from sdp.sheerkaSerializer import ObjectSerializer, BaseSerializer, Serializer
+from sdp.sheerkaSerializer import ObjectSerializer, BaseSerializer, Serializer, SerializerContext, PickleSerializer
tests_root = path.abspath("../build/tests")
@@ -75,7 +75,7 @@ class ObjNoKey:
class ObjDumpJson:
- def __init__(self, key, value):
+ def __init__(self, key=None, value=None):
self.key = key
self.value = value
@@ -91,6 +91,10 @@ class ObjDumpJson:
return self.key
def get_digest(self):
+ """
+ Returns the digest of the event
+ :return: hexa form of the sha256
+ """
return hashlib.sha256(f"Concept:{self.key}{self.value}".encode("utf-8")).hexdigest()
def to_dict(self):
@@ -123,7 +127,7 @@ def test_i_can_init_the_data_provider():
assert path.exists(path.join(tests_root, ".sheerka"))
-def test_i_can_add_and_retrieve_an_event():
+def test_i_can_save_and_load_an_event():
sdp = SheerkaDataProvider(".sheerka")
event = Event("hello world", date=date(year=2007, month=9, day=10), user="kodjo")
@@ -137,17 +141,23 @@ def test_i_can_add_and_retrieve_an_event():
assert evt.message == "hello world"
-def test_i_can_add_an_object():
+def test_i_can_add_an_string():
sdp = SheerkaDataProvider(".sheerka")
event = Event("cmd add 'foo => bar'")
event_digest = event.get_digest()
obj = "foo => bar"
- state_digest = sdp.add(event, "entry", obj)
- state = sdp.load_state(state_digest)
+ entry, key = sdp.add(event, "entry", obj)
+ last_commit = sdp.get_snapshot()
+ state = sdp.load_state(last_commit)
+ loaded = sdp.get(entry, key)
+
+ assert entry == "entry"
+ assert key is None
+ assert loaded == obj
assert path.exists(path.join(sdp.root, SheerkaDataProvider.EventFolder, event_digest[0:24], event_digest))
- assert path.exists(path.join(sdp.root, SheerkaDataProvider.StateFolder, state_digest[0:24], state_digest))
+ assert path.exists(path.join(sdp.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit))
assert path.exists(path.join(sdp.root, SheerkaDataProvider.HeadFile))
assert state.date is not None
@@ -155,121 +165,349 @@ def test_i_can_add_an_object():
assert state.events == [event_digest]
assert state.data == {"entry": "foo => bar"}
- assert read_text_file(path.join(sdp.root, SheerkaDataProvider.HeadFile)) == state_digest
+ assert read_text_file(path.join(sdp.root, SheerkaDataProvider.HeadFile)) == last_commit
-def test_i_can_add_multiple_elements_in_an_entry():
+def test_i_can_add_several_strings_if_allow_multiple_is_true():
+ sdp = SheerkaDataProvider(".sheerka")
+
+ sdp.add(Event("event"), "entry", "foo")
+ sdp.add(Event("event"), "entry", "foo")
+ entry, key = sdp.add(Event("event"), "entry", "bar")
+ loaded = sdp.get(entry, key)
+
+ assert entry == "entry"
+ assert key is None
+ assert loaded == ["foo", "foo", "bar"]
+
+
+def test_i_cannot_add_several_strings_if_allow_multiple_is_false():
+ sdp = SheerkaDataProvider(".sheerka")
+
+ with pytest.raises(IndexError) as index_error:
+ sdp.add(Event("event"), "entry", "foo", False)
+ sdp.add(Event("event"), "entry", "foo", False)
+ assert index_error.value.args[0] == "entry"
+
+
+def test_i_can_add_an_object_with_no_key():
sdp = SheerkaDataProvider(".sheerka")
event = Event("cmd add 'foo => bar'")
+ event_digest = event.get_digest()
+ obj = ObjNoKey("a", "b")
- state_digest1 = sdp.add(event, "entry", 1)
- state1 = sdp.load_state(state_digest1)
+ entry, key = sdp.add(event, "entry", obj)
+ last_commit = sdp.get_snapshot()
+ state = sdp.load_state(last_commit)
+ loaded = sdp.get(entry, key)
- state_digest2 = sdp.add(event, "entry", 2)
- state2 = sdp.load_state(state_digest2)
+ assert entry == "entry"
+ assert key is None
+ assert loaded == obj
- state_digest3 = sdp.add(event, "entry", 3)
- state3 = sdp.load_state(state_digest3)
+ assert path.exists(path.join(sdp.root, SheerkaDataProvider.EventFolder, event_digest[0:24], event_digest))
+ assert path.exists(path.join(sdp.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit))
+ assert path.exists(path.join(sdp.root, SheerkaDataProvider.HeadFile))
- assert state1.data == {"entry": 1}
- assert state2.data == {"entry": [1, 2]}
- assert state3.data == {"entry": [1, 2, 3]}
+ assert state.date is not None
+ assert state.parents == []
+ assert state.events == [event_digest]
+ assert state.data == {"entry": ObjNoKey("a", "b")}
+
+ assert read_text_file(path.join(sdp.root, SheerkaDataProvider.HeadFile)) == last_commit
-def test_i_can_add_element_using_auto_generated_key():
+def test_i_can_add_several_obj_no_key_if_allow_multiple_is_true():
+ sdp = SheerkaDataProvider(".sheerka")
+
+ sdp.add(Event("event"), "entry", ObjNoKey("a", "b"))
+ sdp.add(Event("event"), "entry", ObjNoKey("a", "b"))
+ entry, key = sdp.add(Event("event"), "entry", ObjNoKey("c", "d"))
+ loaded = sdp.get(entry, key)
+
+ assert entry == "entry"
+ assert key is None
+ assert loaded == [ObjNoKey("a", "b"), ObjNoKey("a", "b"), ObjNoKey("c", "d")]
+
+
+def test_i_cannot_add_several_obj_no_key_if_allow_multiple_is_false():
+ sdp = SheerkaDataProvider(".sheerka")
+
+ with pytest.raises(IndexError) as index_error:
+ sdp.add(Event("event"), "entry", ObjNoKey("a", "b"), False)
+ sdp.add(Event("event"), "entry", ObjNoKey("c", "d"), False)
+ assert index_error.value.args[0] == "entry"
+
+
+def test_i_can_add_a_dict():
+ sdp = SheerkaDataProvider(".sheerka")
+ event = Event("cmd add 'foo => bar'")
+ event_digest = event.get_digest()
+ obj = {"my_key": "my_value"}
+
+ entry, key = sdp.add(event, "entry", obj)
+ last_commit = sdp.get_snapshot()
+ state = sdp.load_state(last_commit)
+ loaded = sdp.get(entry, key)
+
+ loaded_value = sdp.get(entry, "my_key") # we can retrieve by key
+
+ assert entry == "entry"
+ assert key is None # we return None as dict may contains several entries
+ assert loaded == obj
+ assert loaded_value == "my_value"
+
+ assert path.exists(path.join(sdp.root, SheerkaDataProvider.EventFolder, event_digest[0:24], event_digest))
+ assert path.exists(path.join(sdp.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit))
+ assert path.exists(path.join(sdp.root, SheerkaDataProvider.HeadFile))
+
+ assert state.date is not None
+ assert state.parents == []
+ assert state.events == [event_digest]
+ assert state.data == {"entry": obj}
+
+ assert read_text_file(path.join(sdp.root, SheerkaDataProvider.HeadFile)) == last_commit
+
+
+def test_i_can_add_multiple_entries_at_once_with_dict():
+ sdp = SheerkaDataProvider(".sheerka")
+
+ entry, key = sdp.add(Event("event"), "entry", {"my_key1": "value1", "my_key2": "value2"})
+ loaded = sdp.get(entry, key)
+ loaded_value1 = sdp.get(entry, "my_key1")
+ loaded_value2 = sdp.get(entry, "my_key2")
+
+ assert loaded == {"my_key1": "value1", "my_key2": "value2"}
+ assert loaded_value1 == "value1"
+ assert loaded_value2 == "value2"
+
+
+def test_i_can_add_same_key_with_dict_if_allow_multiple_is_true():
+ sdp = SheerkaDataProvider(".sheerka")
+
+ sdp.add(Event("event"), "entry", {"my_key": "my_value"})
+ entry, key = sdp.add(Event("event"), "entry", {"my_key": "my_value"})
+ loaded1 = sdp.get(entry, key)
+
+ entry, key = sdp.add(Event("event"), "entry", {"my_key": "my_value2"})
+ loaded2 = sdp.get(entry, key)
+
+ assert entry == "entry"
+ assert key is None
+ assert loaded1 == {"my_key": ["my_value", "my_value"]}
+ assert loaded2 == {"my_key": ["my_value", "my_value", "my_value2"]}
+
+
+def test_i_cannot_add_same_key_with_dict_if_allow_multiple_is_false():
+ sdp = SheerkaDataProvider(".sheerka")
+
+ with pytest.raises(IndexError) as index_error:
+ sdp.add(Event("event"), "entry", {"my_key": "my_value"}, False)
+ sdp.add(Event("event"), "entry", {"my_key": "my_value2"}, False)
+ assert index_error.value.args[0] == "entry.my_key"
+
+
+def test_i_can_add_object_with_different_key_if_allow_multiple_is_false():
+ sdp = SheerkaDataProvider(".sheerka")
+
+ sdp.add(Event("event"), "entry", {"my_key": "a"}, False)
+ sdp.add(Event("event"), "entry", {"my_key2": "b"}, False)
+
+ assert sdp.get("entry", "my_key") == "a"
+ assert sdp.get("entry", "my_key2") == "b"
+
+
+def test_i_can_add_obj_with_key():
+ sdp = SheerkaDataProvider(".sheerka")
+ event = Event("cmd add 'foo => bar'")
+ event_digest = event.get_digest()
+ obj1 = ObjWithKey("key1", "b")
+ obj2 = ObjSetKey("c", key="key2")
+
+ entry1, key1 = sdp.add(event, "entry", obj1) # test when key is taken from obj.get_key()
+ entry2, key2 = sdp.add(event, "entry2", obj2) # test when key is taken from obj.key
+ last_commit = sdp.get_snapshot()
+ state = sdp.load_state(last_commit)
+
+ loaded1 = sdp.get(entry1, key1)
+ loaded2 = sdp.get(entry2, key2)
+
+ assert entry1 == "entry"
+ assert key1 == "key1"
+ assert loaded1 == ObjWithKey("key1", "b")
+ assert entry2 == "entry2"
+ assert key2 == "key2"
+ assert loaded2 == ObjSetKey("c", key="key2")
+
+ assert path.exists(path.join(sdp.root, SheerkaDataProvider.EventFolder, event_digest[0:24], event_digest))
+ assert path.exists(path.join(sdp.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit))
+ assert path.exists(path.join(sdp.root, SheerkaDataProvider.HeadFile))
+
+ assert state.date is not None
+ assert len(state.parents) == 1
+ assert state.events == [event_digest]
+ assert state.data == {"entry": {"key1": obj1}, "entry2": {"key2": obj2}}
+
+ assert read_text_file(path.join(sdp.root, SheerkaDataProvider.HeadFile)) == last_commit
+
+
+def test_i_can_add_objects_with_same_key_if_allow_multiple_is_true():
+ sdp = SheerkaDataProvider(".sheerka")
+
+ sdp.add(Event("event"), "entry", ObjWithKey("my_key", "b"))
+ entry, key = sdp.add(Event("event"), "entry", ObjSetKey("c", key="my_key"))
+ loaded1 = sdp.get(entry, key)
+
+ entry, key = sdp.add(Event("event"), "entry", ObjSetKey("c", key="my_key"))
+ sdp.add(Event("event"), "entry", ObjSetKey("c", key="my_key2")) # to prove that it does not melt everything
+ loaded2 = sdp.get(entry, key)
+
+ assert entry == "entry"
+ assert key == "my_key"
+ assert loaded1 == [ObjWithKey("my_key", "b"), ObjSetKey("c", key="my_key")]
+ assert loaded2 == [ObjWithKey("my_key", "b"), ObjSetKey("c", key="my_key"), ObjSetKey("c", key="my_key")]
+
+
+def test_i_cannot_add_object_with_same_key_if_allow_multiple_is_false():
+ sdp = SheerkaDataProvider(".sheerka")
+
+ with pytest.raises(IndexError) as index_error:
+ sdp.add(Event("event"), "entry", ObjWithKey("my_key", "b"), False)
+ sdp.add(Event("event"), "entry", ObjSetKey("c", key="my_key"), False)
+ assert index_error.value.args[0] == "entry.my_key"
+
+
+def test_i_can_add_obj_with_key_to_a_list():
+ sdp = SheerkaDataProvider(".sheerka")
+
+ sdp.add(Event("event"), "entry", "foo")
+ sdp.add(Event("event"), "entry", "bar") # entry is now a list
+ sdp.add(Event("event"), "entry", ObjWithKey("a", "b")) # this entry must no be taken as a object with a key
+
+ loaded = sdp.get("entry")
+ assert loaded == ["foo", "bar", ObjWithKey("a", "b")]
+
+
+def test_i_cannot_add_obj_with_no_key_when_then_entry_has_keys():
+ sdp = SheerkaDataProvider(".sheerka")
+
+ with pytest.raises(SheerkaDataProviderError) as error:
+ sdp.add(Event("event"), "entry", ObjWithKey("a", "b"))
+ sdp.add(Event("event"), "entry", "foo")
+
+ assert error.value.obj == "foo"
+
+
+def test_i_can_add_string_using_auto_generated_key():
sdp = SheerkaDataProvider(".sheerka")
event = Event("cmd add 'foo => bar'")
key_file = path.join(sdp.root, SheerkaDataProvider.KeysFile)
- sdp.add_with_auto_key(event, "entry1", "foo")
- sdp.add_with_auto_key(event, "entry1", "bar")
- sdp.add_with_auto_key(event, "entry2", "baz")
+ entry1, key1 = sdp.add_with_auto_key(event, "entry1", "foo")
+ entry2, key2 = sdp.add_with_auto_key(event, "entry1", "bar")
+ entry3, key3 = sdp.add_with_auto_key(event, "entry2", "baz")
state = sdp.load_state(sdp.get_snapshot())
assert path.exists(key_file)
assert read_json_file(key_file) == {"entry1": 2, "entry2": 1}
assert state.data == {"entry1": {"1": "foo", "2": "bar"}, "entry2": {"1": "baz"}}
+ assert entry1 == "entry1"
+ assert entry2 == "entry1"
+ assert entry3 == "entry2"
+ assert key1 == "1"
+ assert key2 == "2"
+ assert key3 == "1"
-def test_i_can_add_and_auto_set_the_key():
+def test_i_can_add_object_using_auto_generated_key():
sdp = SheerkaDataProvider(".sheerka")
event = Event("cmd add 'foo => bar'")
key_file = path.join(sdp.root, SheerkaDataProvider.KeysFile)
- sdp.add_with_auto_key(event, "entry1", ObjSetKey("foo"))
- sdp.add_with_auto_key(event, "entry1", ObjSetKey("bar"))
+ entry1, key1 = sdp.add_with_auto_key(event, "entry1", ObjNoKey("a", "b"))
+ entry2, key2 = sdp.add_with_auto_key(event, "entry1", ObjNoKey("a", "b"))
state = sdp.load_state(sdp.get_snapshot())
assert path.exists(key_file)
assert read_json_file(key_file) == {"entry1": 2}
- assert state.data == {"entry1": {"1": ObjSetKey("foo", "1"), "2": ObjSetKey("bar", "2")}}
+ assert state.data == {"entry1": {"1": ObjNoKey("a", "b"), "2": ObjNoKey("a", "b")}}
+ assert entry1 == "entry1"
+ assert entry2 == "entry1"
+ assert key1 == "1"
+ assert key2 == "2"
-def test_i_can_add_an_object_with_its_own_key():
+def test_object_key_is_updated_when_possible_using_auto_generated_key():
sdp = SheerkaDataProvider(".sheerka")
event = Event("cmd add 'foo => bar'")
+ key_file = path.join(sdp.root, SheerkaDataProvider.KeysFile)
- sdp.add(event, "entry", ObjWithKey(1, "foo"))
- sdp.add(event, "entry", ObjWithKey(2, "bar"))
+ entry1, key1 = sdp.add_with_auto_key(event, "entry1", ObjSetKey("foo"))
+ entry2, key2 = sdp.add_with_auto_key(event, "entry1", ObjSetKey("foo"))
state = sdp.load_state(sdp.get_snapshot())
- assert state.data == {"entry": {"1": ObjWithKey(1, "foo"), "2": ObjWithKey(2, "bar")}}
-
-def test_i_can_add_dictionary():
- sdp = SheerkaDataProvider(".sheerka")
- event = Event("cmd add 'foo => bar'")
-
- sdp.add(event, "entry", {"1": "foo"})
- sdp.add(event, "entry", {"2": "bar"})
-
- state = sdp.load_state(sdp.get_snapshot())
- assert state.data == {"entry": {"1": "foo", "2": "bar"}}
-
-
-def test_i_cannot_add_the_same_key_twice():
- sdp = SheerkaDataProvider(".sheerka")
- sdp.add(Event("event"), "entry", {"1": "foo"})
-
- with pytest.raises(IndexError):
- sdp.add(Event("event"), "entry", {"1": "foo"})
-
-
-def test_i_cannot_add_the_same_element_twice():
- sdp = SheerkaDataProvider(".sheerka")
- sdp.add(Event("event"), "entry", ObjWithKey(1, "foo"))
-
- with pytest.raises(IndexError):
- sdp.add(Event("event"), "entry", ObjWithKey(1, "foo"))
+ assert path.exists(key_file)
+ assert read_json_file(key_file) == {"entry1": 2}
+ assert state.data == {"entry1": {"1": ObjSetKey("foo", "1"), "2": ObjSetKey("foo", "2")}}
+ assert entry1 == "entry1"
+ assert entry2 == "entry1"
+ assert key1 == "1"
+ assert key2 == "2"
def test_i_can_set_objects_with_key():
sdp = SheerkaDataProvider(".sheerka")
- sdp.set(Event("event"), "entry", ObjWithKey(1, "foo"))
- sdp.set(Event("event"), "entry", ObjWithKey(2, "foo"))
+ sdp.add(Event("event"), "entry", ObjWithKey(1, "foo"))
+ entry, key = sdp.set(Event("event"), "entry", ObjWithKey(2, "foo"))
state = sdp.load_state(sdp.get_snapshot())
assert state.data == {"entry": {"2": ObjWithKey(2, "foo")}}
+ assert entry == "entry"
+ assert key == "2"
def test_i_can_set_objects_with_no_key():
sdp = SheerkaDataProvider(".sheerka")
- sdp.set(Event("event"), "entry", ObjNoKey(1, "foo"))
- sdp.set(Event("event"), "entry", ObjNoKey(2, "foo"))
+ sdp.add(Event("event"), "entry", ObjNoKey(1, "foo"))
+ entry, key = sdp.set(Event("event"), "entry", ObjNoKey(2, "foo"))
state = sdp.load_state(sdp.get_snapshot())
assert state.data == {"entry": ObjNoKey(2, "foo")}
+ assert entry == "entry"
+ assert key is None
def test_i_can_set_from_list_to_dict():
sdp = SheerkaDataProvider(".sheerka")
sdp.set(Event("event"), "entry", [ObjNoKey(1, "foo"), ObjNoKey(2, "foo")])
- sdp.set(Event("event"), "entry", {"1": ObjNoKey(1, "foo"), "2": ObjNoKey(2, "foo")})
+ entry, key = sdp.set(Event("event"), "entry", {"1": ObjNoKey(1, "foo"), "2": ObjNoKey(2, "foo")})
state = sdp.load_state(sdp.get_snapshot())
assert state.data == {"entry": {"1": ObjNoKey(1, "foo"), "2": ObjNoKey(2, "foo")}}
+ assert entry == "entry"
+ assert key is None
+
+
+def test_i_can_set_using_reference():
+ sdp = SheerkaDataProvider(".sheerka")
+ sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithKey)))
+ sdp.add(Event("event"), "entry", ObjWithKey(1, "foo"))
+ entry, key = sdp.set(Event("event"), "entry", ObjWithKey(2, "foo"), use_ref=True)
+
+ state = sdp.load_state(sdp.get_snapshot())
+ assert state.data == {"entry": {"2": '##REF##:9b14e03847d73c640f54ea9b46ba62b19e5451ecd300428a225be012ad9f25f9'}}
+ assert entry == "entry"
+ assert key == "2"
+
+ assert path.exists(sdp.get_obj_path(SheerkaDataProvider.ObjectsFolder,
+ "9b14e03847d73c640f54ea9b46ba62b19e5451ecd300428a225be012ad9f25f9"))
+
+ # sanity check, make sure that I can load back
+ loaded = sdp.get(entry, key)
+ assert loaded == ObjWithKey(2, "foo")
def test_i_can_add_unique():
@@ -277,10 +515,28 @@ def test_i_can_add_unique():
sdp.add_unique(Event("event"), "entry", ObjNoKey(1, "foo"))
sdp.add_unique(Event("event"), "entry", ObjNoKey(1, "foo"))
sdp.add_unique(Event("event"), "entry", ObjNoKey(2, "bar"))
- sdp.add_unique(Event("event"), "entry", ObjNoKey(2, "bar"))
+ entry, key = sdp.add_unique(Event("event"), "entry", ObjNoKey(2, "bar"))
state = sdp.load_state(sdp.get_snapshot())
assert state.data == {"entry": {ObjNoKey(1, "foo"), ObjNoKey(2, "bar")}}
+ assert entry == "entry"
+ assert key is None
+
+
+def test_i_can_add_reference_of_an_object_with_a_key():
+ sdp = SheerkaDataProvider(".sheerka")
+ obj = ObjDumpJson("my_key", "value1")
+ obj_serializer = ObjectSerializer(BaseSerializer.get_full_qualified_name(obj))
+ sdp.serializer.register(obj_serializer)
+
+ entry, key = sdp.add(Event("event"), "entry", obj, use_ref=True)
+ state = sdp.load_state(sdp.get_snapshot())
+ digest = state.data["entry"]["my_key"][len(SheerkaDataProvider.REF_PREFIX):]
+
+ assert key == obj.key
+ assert entry == "entry"
+ assert state.data == {'entry': {'my_key': f"{SheerkaDataProvider.REF_PREFIX}{digest}"}}
+ assert sdp.load_obj(digest) == obj
def test_i_can_keep_state_history():
@@ -289,12 +545,14 @@ def test_i_can_keep_state_history():
event1 = Event("cmd add 'foo => bar'")
event_digest1 = event1.get_digest()
obj1 = "foo => bar"
- state_digest1 = sdp.add(event1, "entry1", obj1)
+ sdp.add(event1, "entry1", obj1)
+ state_digest1 = sdp.get_snapshot()
event2 = Event("cmd add 'foo => baz'")
event_digest2 = event2.get_digest()
obj2 = "foo => baz"
- state_digest2 = sdp.add(event2, "entry2", obj2)
+ sdp.add(event2, "entry2", obj2)
+ state_digest2 = sdp.get_snapshot()
state2 = sdp.load_state(state_digest2)
@@ -318,24 +576,30 @@ def test_i_can_list_elements_when_there_is_nothing_to_list():
def test_i_can_list_when_no_key():
sdp = SheerkaDataProvider(".sheerka")
+ sdp.serializer.register(PickleSerializer(lambda obj: isinstance(obj, str)))
+
sdp.add(Event("event"), "entry1", "foo")
sdp.add(Event("event"), "entry1", "bar")
- sdp.add(Event("event"), "entry2", "baz")
+ sdp.add(Event("event"), "entry1", "baz", use_ref=True)
+ sdp.add(Event("event"), "entry2", "xyz")
result = sdp.list("entry1")
- assert list(result) == ["foo", "bar"]
+ assert list(result) == ["foo", "bar", "baz"]
def test_i_can_list_when_key():
sdp = SheerkaDataProvider(".sheerka")
+ sdp.serializer.register(PickleSerializer(lambda obj: isinstance(obj, ObjWithKey)))
+
sdp.add(Event("event"), "entry1", {"1": "foo"})
sdp.add(Event("event"), "entry1", {"2": "bar"})
- sdp.add(Event("event"), "entry2", {"3": "baz"})
+ sdp.add(Event("event"), "entry1", ObjWithKey("3", "value"), use_ref=True)
+ sdp.add(Event("event"), "entry2", {"4": "xxx"})
result = sdp.list("entry1")
- assert list(result) == ["foo", "bar"]
+ assert list(result) == ["foo", "bar", ObjWithKey("3", "value")]
def test_i_can_list_when_one_element():
@@ -459,32 +723,234 @@ def test_i_cannot_remove_if_entry_does_not_exist():
assert str(e) == "entry"
-def test_i_can_replace_an_entry():
+def test_i_cannot_modify_an_entry_without_a_key():
sdp = SheerkaDataProvider(".sheerka")
- sdp.add(Event("event"), "entry1", "foo")
- sdp.add(Event("event"), "entry1", "bar")
- sdp.modify(Event("event"), "entry1", None, "baz")
- result = sdp.list("entry1")
+ with pytest.raises(SheerkaDataProviderError) as error:
+ sdp.modify(Event("event"), "entry", None, "baz")
- assert list(result) == ["baz"]
+ assert error.value.args[0] == "Key is mandatory."
-def test_i_cannot_update_an_entry_that_does_not_exist():
+def test_i_can_modify_dict_with_a_key():
+ sdp = SheerkaDataProvider(".sheerka")
+ sdp.add(Event("event"), "entry", {"key1": "foo"})
+ sdp.add(Event("event"), "entry", {"key2": "bar"})
+
+ entry, key = sdp.modify(Event("event"), "entry", "key1", "baz")
+
+ state = sdp.load_state(sdp.get_snapshot())
+ assert state.data == {"entry": {"key1": "baz", "key2": "bar"}}
+ assert entry == "entry"
+ assert key == "key1"
+
+
+def test_i_can_modify_an_object_with_a_key():
+ sdp = SheerkaDataProvider(".sheerka")
+ sdp.add(Event("event"), "entry", ObjWithKey("key1", "foo"))
+ sdp.add(Event("event"), "entry", ObjWithKey("key2", "bar"))
+
+ entry, key = sdp.modify(Event("event"), "entry", "key1", ObjWithKey("key1", "baz"))
+
+ state = sdp.load_state(sdp.get_snapshot())
+ assert state.data == {"entry": {"key1": ObjWithKey("key1", "baz"), "key2": ObjWithKey("key2", "bar")}}
+ assert entry == "entry"
+ assert key == "key1"
+
+
+def test_i_can_modify_an_object_while_changing_the_key():
+ sdp = SheerkaDataProvider(".sheerka")
+ sdp.add(Event("event"), "entry", ObjWithKey("key1", "foo"))
+ sdp.add(Event("event"), "entry", ObjWithKey("key2", "bar"))
+
+ entry, key = sdp.modify(Event("event"), "entry", "key1", ObjWithKey("key3", "baz"))
+
+ state = sdp.load_state(sdp.get_snapshot())
+ assert state.data == {"entry": {"key2": ObjWithKey("key2", "bar"), "key3": ObjWithKey("key3", "baz")}}
+ assert entry == "entry"
+ assert key == "key3"
+
+
+def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_key():
+ sdp = SheerkaDataProvider(".sheerka")
+ sdp.add(Event("event"), "entry", ObjWithKey("key1", "foo"))
+ sdp.add(Event("event"), "entry", ObjWithKey("key2", "bar"))
+
+ entry, key = sdp.modify(Event("event"), "entry", "key2", ObjWithKey("key1", "bar"))
+
+ state = sdp.load_state(sdp.get_snapshot())
+ assert state.data == {"entry": {"key1": [ObjWithKey("key1", "foo"), ObjWithKey("key1", "bar")]}}
+ assert entry == "entry"
+ assert key == "key1"
+
+
+def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_when_list():
+ """
+ In this example, the item to modify is within a list, and its key has changed
+ and in the new key, there is already a list
+ :return:
+ """
+ sdp = SheerkaDataProvider(".sheerka")
+ sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson)))
+
+ sdp.add(Event("event"), "entry", ObjDumpJson("key1", "value11"))
+ sdp.add(Event("event"), "entry", ObjDumpJson("key1", "value12"))
+ sdp.add(Event("event"), "entry", ObjDumpJson("key2", "value21"))
+ sdp.add(Event("event"), "entry", ObjDumpJson("key2", "value22"))
+
+ new_value = ObjDumpJson("key1", "value13")
+ setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key2", "value21").get_digest())
+ entry, key = sdp.modify(Event("event"), "entry", "key2", new_value)
+
+ state = sdp.load_state(sdp.get_snapshot())
+ assert state.data == {"entry": {
+ "key1": [ObjDumpJson("key1", "value11"), ObjDumpJson("key1", "value12"), ObjDumpJson("key1", "value13")],
+ "key2": [ObjDumpJson("key2", "value22")]
+ }}
+ assert entry == "entry"
+ assert key == "key1"
+
+
+def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_when_nothing():
+ """
+ In this example, the item to modify is within a list, and its key has changed
+ and in the new key, there is nothing (the new key does not exist)
+ :return:
+ """
+ sdp = SheerkaDataProvider(".sheerka")
+ sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson)))
+
+ sdp.add(Event("event"), "entry", ObjDumpJson("key2", "value21"))
+ sdp.add(Event("event"), "entry", ObjDumpJson("key2", "value22"))
+
+ new_value = ObjDumpJson("key1", "value13")
+ setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key2", "value21").get_digest())
+ entry, key = sdp.modify(Event("event"), "entry", "key2", new_value)
+
+ state = sdp.load_state(sdp.get_snapshot())
+ assert state.data == {"entry": {
+ "key1": ObjDumpJson("key1", "value13"),
+ "key2": [ObjDumpJson("key2", "value22")]
+ }}
+ assert entry == "entry"
+ assert key == "key1"
+
+
+def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_when_one_item():
+ """
+ In this example, the item to modify is within a list, and its key has changed
+ and in the new key, there is only one element
+ :return:
+ """
+ sdp = SheerkaDataProvider(".sheerka")
+ sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson)))
+
+ sdp.add(Event("event"), "entry", ObjDumpJson("key1", "value11"))
+ sdp.add(Event("event"), "entry", ObjDumpJson("key2", "value21"))
+ sdp.add(Event("event"), "entry", ObjDumpJson("key2", "value22"))
+
+ new_value = ObjDumpJson("key1", "value13")
+ setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key2", "value21").get_digest())
+ entry, key = sdp.modify(Event("event"), "entry", "key2", new_value)
+
+ state = sdp.load_state(sdp.get_snapshot())
+ assert state.data == {"entry": {
+ "key1": [ObjDumpJson("key1", "value11"), ObjDumpJson("key1", "value13")],
+ "key2": [ObjDumpJson("key2", "value22")]
+ }}
+ assert entry == "entry"
+ assert key == "key1"
+
+
+def test_i_can_modify_a_ref():
+ sdp = SheerkaDataProvider(".sheerka")
+ sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithKey)))
+ sdp.add(Event("event"), "entry", ObjWithKey("key1", "foo"))
+ entry, key = sdp.add(Event("event"), "entry", ObjWithKey("key2", "bar"), use_ref=True)
+
+ sdp.modify(Event("event"), "entry", "key2", ObjWithKey("key2", "baz"))
+ state = sdp.load_state(sdp.get_snapshot())
+ assert state.data == {"entry": {
+ "key1": ObjWithKey("key1", "foo"),
+ "key2": "##REF##:eb297e98710dd17244bb0e38eb9f1bf72cba692a8f8d94e9eb2d898e130cac8b"}}
+ assert entry == "entry"
+ assert key == "key2"
+
+
+def test_i_cannot_modify_an_entry_that_does_not_exist():
sdp = SheerkaDataProvider(".sheerka")
with pytest.raises(IndexError) as e:
sdp.modify(Event("event"), "entry", "key", "foo")
- assert str(e) == "entry"
+
+ assert str(e.value) == "entry"
-def test_i_cannot_update_a_key_that_does_not_exist():
+def test_i_cannot_modify_a_key_that_does_not_exist():
sdp = SheerkaDataProvider(".sheerka")
sdp.add(Event("event"), "entry1", {"1": "foo"})
with pytest.raises(IndexError) as e:
sdp.modify(Event("event"), "entry1", "2", "bar")
- assert str(e) == "entry.1"
+ assert str(e) == "entry1.2"
+
+
+def test_i_cannot_modify_a_list_when_origin_is_unknown():
+ sdp = SheerkaDataProvider(".sheerka")
+
+ sdp.add(Event("event"), "entry", ObjWithKey("key", "value1"))
+ sdp.add(Event("event"), "entry", ObjWithKey("key", "value2")) # same they
+
+ state = sdp.load_state(sdp.get_snapshot())
+
+ with pytest.raises(SheerkaDataProviderError) as error:
+ sdp.modify(Event("event"), "entry", "key", ObjWithKey("key", "value2"))
+
+ assert error.value.obj == ObjWithKey("key", "value2")
+ assert error.value.args[0] == "Multiple entries under 'entry.key'"
+
+
+def test_i_can_modify_a_list_when_the_origin_is_known():
+ sdp = SheerkaDataProvider(".sheerka")
+
+ sdp.add(Event("event"), "entry", ObjDumpJson("key", "value1"))
+ sdp.add(Event("event"), "entry", ObjDumpJson("key", "value2")) # same they
+
+ new_value = ObjDumpJson("key", "value3")
+ setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key", "value1").get_digest())
+
+ sdp.modify(Event("event"), "entry", "key", new_value)
+
+ state = sdp.load_state(sdp.get_snapshot())
+ assert state.data == {"entry": {"key": [ObjDumpJson("key", "value3"), ObjDumpJson("key", "value2")]}}
+
+
+def test_i_can_modify_a_list_when_the_origin_is_known_2():
+ """
+ This time, we check that the origin is automatically set when the object was saved as a reference
+ We also check that all objects are still persisted as reference
+ :return:
+ """
+ sdp = SheerkaDataProvider(".sheerka")
+ sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson)))
+
+ sdp.add(Event("event"), "entry", ObjDumpJson("key", "value1"), use_ref=True)
+ sdp.add(Event("event"), "entry", ObjDumpJson("key", "value2"), use_ref=True) # same they
+
+ objs = sdp.get("entry", "key") # origin is automatically set to the loaded objects
+ objs[0].value = "value3"
+
+ sdp.modify(Event("event"), "entry", "key", objs[0])
+
+ state = sdp.load_state(sdp.get_snapshot())
+ assert state.data == {"entry": {"key": [
+ "##REF##:621771a3af6a331e9abb3a63fb25e0cac4b13df0b292dfa30db6bd89031bfad0",
+ "##REF##:5fe085e8366d35c5f04a18b2d3dada376128b246e07c66de5872830b00f5f517"]}}
+
+ # checks that all objects are (still) persisted
+ path.exists(sdp.get_obj_path(sdp.ObjectsFolder, "621771a3af6a331e9abb3a63fb25e0cac4b13df0b292dfa30db6bd89031bfad0"))
+ path.exists(sdp.get_obj_path(sdp.ObjectsFolder, "5fe085e8366d35c5f04a18b2d3dada376128b246e07c66de5872830b00f5f517"))
+ path.exists(sdp.get_obj_path(sdp.ObjectsFolder, "1aac9e0d5c74c3bb989fd0f9def792bba36c5595d32f61be7cbb1a38dcf75327"))
def test_i_can_get_the_entire_entry():
@@ -522,6 +988,30 @@ def test_i_can_get_an_entry_by_key():
assert result_safe == "bar"
+def test_i_can_get_object_save_by_reference():
+ sdp = SheerkaDataProvider(".sheerka")
+ obj = ObjDumpJson("my_key", "value1")
+ sdp.serializer.register(ObjectSerializer(BaseSerializer.get_full_qualified_name(obj)))
+
+ entry, key = sdp.add(Event("event"), "entry", obj, use_ref=True)
+ loaded = sdp.get(entry, key)
+
+ assert loaded == obj
+
+
+def test_i_can_get_objects_from_list_when_saved_by_reference():
+ sdp = SheerkaDataProvider(".sheerka")
+ sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson)))
+
+ sdp.add(Event("event"), "entry", ObjDumpJson("key", "value1"), use_ref=True)
+ sdp.add(Event("event"), "entry", ObjDumpJson("key", "value2"), use_ref=True) # same they
+
+ objs = sdp.get("entry", "key")
+
+ assert objs[0] == ObjDumpJson("key", "value1")
+ assert objs[1] == ObjDumpJson("key", "value2")
+
+
def test_i_cannot_get_an_entry_that_does_not_exist():
sdp = SheerkaDataProvider(".sheerka")
@@ -604,53 +1094,52 @@ def test_i_can_test_than_an_entry_exits():
assert sdp.exists("entry")
-def test_i_can_save_and_load_object_with_history():
+def test_i_can_save_and_load_object_ref_with_history():
sdp = SheerkaDataProvider(".sheerka")
obj = ObjDumpJson("my_key", "value1")
sdp.serializer.register(ObjectSerializer(BaseSerializer.get_full_qualified_name(obj)))
- entry, key = sdp.add_ref("Obj", obj)
+ entry, key = sdp.add(Event("event"), "entry", obj, use_ref=True)
loaded = sdp.get(entry, key)
history = getattr(loaded, Serializer.HISTORY)
assert key == obj.key
- assert entry == "Obj"
+ assert entry == "entry"
assert loaded.key == obj.key
assert loaded.value == obj.value
- assert getattr(history, Serializer.USERNAME) == "kodjo"
- assert getattr(history, Serializer.MODIFICATION_DATE) != ""
- assert getattr(history, Serializer.PARENTS) == []
+ assert history[Serializer.USERNAME] == "kodjo"
+ assert history[Serializer.MODIFICATION_DATE] != ""
+ assert history[Serializer.PARENTS] == []
assert os.path.exists(sdp.get_obj_path(sdp.ObjectsFolder, obj.get_digest()))
# save a second type with no modification
- previous_modification_time = getattr(history, Serializer.MODIFICATION_DATE)
- previous_parents = getattr(history, Serializer.PARENTS)
+ previous_modification_time = history[Serializer.MODIFICATION_DATE]
+ previous_parents = history[Serializer.PARENTS]
- sdp.add_ref("Obj", loaded)
+ sdp.modify(Event("event"), "entry", key, loaded)
loaded = sdp.get(entry, key)
history = getattr(loaded, Serializer.HISTORY)
- assert getattr(history, Serializer.MODIFICATION_DATE) == previous_modification_time
- assert getattr(history, Serializer.PARENTS) == previous_parents
+ assert history[Serializer.MODIFICATION_DATE] == previous_modification_time
+ assert history[Serializer.PARENTS] == previous_parents
# save again, but with a modification
previous_digest = loaded.get_digest()
loaded.value = "value2"
- sdp.add_ref("Obj", loaded)
+ sdp.modify(Event("event"), "entry", key, loaded)
loaded2 = sdp.get(entry, key)
- history2 = getattr(loaded, Serializer.HISTORY)
+ history2 = getattr(loaded2, Serializer.HISTORY)
assert loaded2.key == loaded.key
assert loaded2.value == loaded.value
- assert getattr(history2, Serializer.USERNAME) == "kodjo"
- assert getattr(history2, Serializer.MODIFICATION_DATE) != ""
- assert getattr(history2, Serializer.PARENTS) == [previous_digest]
-
-
-
-
+ assert history2[Serializer.USERNAME] == "kodjo"
+ assert history2[Serializer.MODIFICATION_DATE] != ""
+ assert history2[Serializer.PARENTS] == [previous_digest]
+ state = sdp.load_state(sdp.get_snapshot())
+ assert state.data == {"entry": {
+ "my_key": '##REF##:e6bf5b56428cfce0f08c94f2c3625dc3b3a8180d7229eaa9f8aa967fb16e5256'}}