diff --git a/core/sheerka.py b/core/sheerka.py index e9fc95e..09a5a2b 100644 --- a/core/sheerka.py +++ b/core/sheerka.py @@ -85,7 +85,7 @@ class Sheerka(Concept, metaclass=Singleton): return ReturnValue(True, self.get_concept(Sheerka.SUCCESS_CONCEPT_NAME, True)) def eval(self, text): - #evt_digest = self.sdp.save_event(Event(text)) + # evt_digest = self.sdp.save_event(Event(text)) result = self.try_parse(text) return_values = [] diff --git a/docs/blog.rst b/docs/blog.rst new file mode 100644 index 0000000..641bfa8 --- /dev/null +++ b/docs/blog.rst @@ -0,0 +1,228 @@ +2019-10-30 +********** + +What is Sheerka ? +""""""""""""""""" + +Sheerka is a *communication* language, +as opposed to the traditional *programming* languages. Its +purpose is to ease the communication between the (wo)man and the machine, +ultimately using the voice. I will first use it to program faster, and maybe +more easily. + +.. _ulysse31: https://fr.wikipedia.org/wiki/Ulysse_31 + +Where does the name Sheerka came from ? +""""""""""""""""""""""""""""""""""""""" +Sheerka is my misspell of Shyrka, from my childhood anime ulysse31_. +For those you don't know this old cartoon, it's the Odyssey story from Homer, +ported in the 31st century. Ulysses has a spacecraft with an AI named Shyrka + +I was a great fan of this cartoon when I was young. I thought that the idea of +bringing the ancient story of Ulysses in the future was a bright. + +Ever since then, Sheerka was my reference for any sophisticated computer. Unfortunately +for me, at that time there was no wikipedia to tell the the correct spelling. + +Model v0 +"""""""" +In my view, the beginning of everything are the **Events**. Basically, they are the commands (ie requests) +entered by the users. + +The events are parsed, to understand what is required, so they produce a new **State**. +The state is a like a big dictionary that holds everything that is known by the system. + +Most of the elements saved in the **State** are the **Concepts**. In this first version, +it's a little bit complicated to define what is the **Concept** as it can have several +usages. To make it simple, I will say that a **Concept** is an idea that can be +manipulated by the rest of the system. +I am pretty sure that its form and usage will evolve as I will manipulate +them + +- Each **State** has a reference to the event(s) that trigger this state +- Each **State** has an **history** +- Each **Concept** has an **history** + + +An **history** is a triplet of + +- user name +- modification date +- digest of the parent + +.. _git: https://git-scm.com/ + +Personally, i have taken this way of tracking modification from how it's done on git_, +I guess Linux Torvarlds took it from somewhere. + + +2019-10-31 +********** + +More on Concepts +"""""""""""""""" +To define a new concept + +:: + + def concept hello a as "hello" + a + + +Note that the traditional quotes that would surround hello a are not necessary. +In this example 'a' is a variable, as it appears as variable in the 'as' section + +So, you could call the concept by + +:: + + hello kodjo + hello my friend + +They will produce the strings "hello kodjo" or "hello my friend" + +About versioning +"""""""""""""""" +As I said previously, I mimic how git_ versions its objects. + +:: + + Obj v0 : parents = [] + user name = + modification date = + digest = xxxxx + + Obj v1: parents = [xxxxx] + user name = + modification date = + digest = yyyyy + + Obj v1: parents = [yyyyy] + user name = + modification date = + digest = zzzzz + +I always keep a reference to the last version of the object, so I can navigate through +the versions using the :code:`parents` attribute of the object + +In git_, there are basically two types of objects : + +- **content** (file content, or directory structure) +- **reference** to content (commit or tags) + +The hash a **content** only depends on it, while the hash of a **reference** also depends +on the user name, the modification date and the parents. In both cases, the hash is +computed on the whole object. So the hash can also be used to check the integrity +of an object. + +For my objects, I need to decide how I compute the hash. + +**Concepts** have history, if I decide to include the history in the hash, +as the modification date is :code:`datetime.now()`, a new version will be created +even if the **Concept** has not changed. If I don't include it, the integrity of the +what is saved is no longer guaranteed. + +I choose to value identity over integrity. The hash code of the **Concepts** does not depend +on his history. We will see what the future will say about this. + +2019-11-01 +********** + +Inspired by CodinGames +"""""""""""""""""""""" + + +.. _codingame: https://www.codingame.com/home + +I am trying to teach my little kid how to code. He is 12 years old and it was his very +first time. + +Rather than trying a standard formal approach, we went on the codingame_ web site. There +are some pro and cons to use this platform, specially for the very beginners, but +I like the visual output of the programs . It's really like coding a game ! + +What I haven't noticed previously, is that (at least for the first programs), the solution +is given in human language. + +For example, for the exercise called "The descent" you will find + +:: + + For each round of play : + Reset the variables containing the index of the highest mountain and its height to 0 + For each mountain index (from 0 to 7 included) : + Read the height of the mountain (variable :code:`mountainH`) from stdin + If it's higher than the highest known mountain, save its index and height + Returns the index of the highest mountain on stdout + +It will be great if Sheerka is able to produce some code from these instructions. + +Some words data persistence +""""""""""""""""""""""""""""""""""""""""" +As I previously said (or not), the main difference between Sheerka and other languages, +is that Sheerka has a memory of its (her ? :-) previous interactions with the users. + +The **Concepts**, as well as the **Events** or the **Rules** are persisted. Because of +that, I think that the more Sheerka is used, the more easier it will be to use it. + +So my first focus was to decide which database to use. + +There are tons od different databases already on the market. Unfortunately for me, I'm not +a database expert. But, I already knew that I was not looking for a traditional +relational database (SGDB) as the structure will evolve and I didn't want to spend +my time on redesigning the schemas and the constraints. + +As I was learning Python, it could have been a good idea to also start looking at an +already existing NoSql database. I started to look at MongoDB, but I got lazy. I knew that +the top feature was that management of the history (the way git does it), and it was not +provided by Mongo, or I didn't notice it in my first readings on the subject. + +So I decided to design and implement my own database. + + +SheerkaDataProvider (sdp) +""""""""""""""""""""""""" +Not I great name, I confess. But who care ? + +What are the main design constraints? + +:: + + 1. No adherence with the filesystem. + We must not care about where the data are stored. + The first implementation will be file based, but it has to be extensible. + The final target will be to have a decentralized persistence + 2. CRUD operations are designed according to my needs + I don't want standard CRUD operations that will be tweaked. + The direct consequence is that this library won't fit any other purpose + 3. History management for State and other objects for free. + + +sdp, like many modern database systems, is a dictionary. A big list of key-value pairs. +The key is a string, the value can be almost anything. Actually, for my needs, I guess +that I only need strings, numbers and list (of strings and numbers :-) + +I need one level of categorization. That means that my objects can be group. The basic +signature to add a new element :code:`add(entry, obj)`. + +with + +:: + + entry : is the group / category where I want to put the object + object : object to persist + +With :code:`add("Concepts", "foo")` the database, let's call it **State** once for all, will be updated like this: + +.. code::python + +{"Concepts" : foo} + +If I want to have another entry, I don't want to care about what was previously done. I +need the second call :code:`add("Concepts", "bar")` to produce + +.. code-block::json + +{"Concepts" : ["foo", "bar"]} + +So we are no longer in the usual way of implementing a CRUD. + diff --git a/sdp/sheerkaDataProvider.py b/sdp/sheerkaDataProvider.py index be47436..259baf4 100644 --- a/sdp/sheerkaDataProvider.py +++ b/sdp/sheerkaDataProvider.py @@ -49,6 +49,18 @@ class Event(object): self.message = as_dict["message"] +class ObjWithKey: + """ + Internal key value class to hold the key (and the value) + when it is detected + It's created to distinguish from {key, value} + """ + + def __init__(self, key, obj): + self.key = key + self.obj = obj + + class State: """ Class that represents the state of the system (dictionary of all known entries) @@ -62,25 +74,73 @@ class State: self.data = {} def update(self, entry, obj, append=True): - obj_to_use = {str(obj.get_key()): obj} if hasattr(obj, "get_key") else obj + """ + adds obj to entry + :param entry: + :param obj: + :param append: if True, ducplicate keys will create lists + :return: + """ + obj_to_use = {obj.key: obj.obj} if isinstance(obj, ObjWithKey) else obj if entry not in self.data: self.data[entry] = obj_to_use - elif isinstance(obj_to_use, dict): - if append: + + elif not append: + if isinstance(obj_to_use, dict): self.data[entry].update(obj_to_use) else: self.data[entry] = obj_to_use + elif isinstance(self.data[entry], list): - if append: - self.data[entry].append(obj_to_use) - else: - self.data[entry] = obj_to_use + self.data[entry].append(obj.obj if isinstance(obj, ObjWithKey) else obj) # do not use obj_to_use ! + + elif isinstance(obj_to_use, dict): + for k in obj_to_use: + if k not in self.data[entry]: + self.data[entry][k] = obj_to_use[k] + elif isinstance(self.data[entry][k], list): + self.data[entry][k].append(obj_to_use[k]) + else: + self.data[entry][k] = [self.data[entry][k], obj_to_use[k]] + + elif isinstance(self.data[entry], dict): + raise SheerkaDataProviderError(f"Cannot found key on '{obj}' while all other elements have.", obj) + else: - if append: - self.data[entry] = [self.data[entry], obj_to_use] - else: - self.data[entry] = obj_to_use + self.data[entry] = [self.data[entry], obj_to_use] + + def modify(self, entry, key, obj, obj_key): + # if the key changes, make sure to remove the previous entry + append = False + if obj_key != key: + self.remove(entry, lambda k, o: k == key) # modify from on object to another + append = True + + self.update(entry, ObjWithKey(obj_key, obj), append=append) + + def modify_in_list(self, entry, key, obj, obj_key, obj_origin, load_ref_if_needed, save_ref_if_needed): + found = False + to_remove = None + for i in range(len(self.data[entry][key])): + item, is_ref = load_ref_if_needed(self.data[entry][key][i]) + if not hasattr(item, "get_digest"): + continue + if item.get_digest() == obj_origin: + obj = save_ref_if_needed(is_ref, obj) + if obj_key == key: + self.data[entry][key][i] = obj + else: + to_remove = i + self.update(entry, ObjWithKey(obj_key, obj), append=True) + found = True + break + + if not found: + raise (SheerkaDataProviderError(f"Cannot modify '{entry}.{key}'. Item '{obj_origin}' not found.", obj)) + + if to_remove is not None: + del self.data[entry][key][to_remove] def remove(self, entry, filter): if filter is None: @@ -108,13 +168,29 @@ class State: return hashlib.sha256(as_json.encode("utf-8")).hexdigest() def contains(self, entry, key): + """ + if key is None, returns True if entry exists + if key has a value + returns True if entry is an dict and contains key + :param entry: + :param key: + :return: + """ if entry not in self.data: return False + if key is None: + return entry in self.data if not isinstance(self.data[entry], dict): return False return key in self.data[entry] +class SheerkaDataProviderError(Exception): + def __init__(self, message, obj): + Exception.__init__(self, message) + self.obj = obj + + class SheerkaDataProvider: """Manages the state of the system""" @@ -124,6 +200,7 @@ class SheerkaDataProvider: CacheFolder = "cache" HeadFile = "HEAD" KeysFile = "keys" + REF_PREFIX = "##REF##:" def __init__(self, root=None): @@ -137,37 +214,72 @@ class SheerkaDataProvider: self.serializer = Serializer() def get_obj_path(self, object_type, digest): - path.join(self.root, object_type, digest[:24], digest) + return path.join(self.root, object_type, digest[:24], digest) - def add(self, event: Event, entry, obj): + @staticmethod + def get_obj_key(obj): + """ + Tries to find the key of an object + Look for .key, .get_key() + :param obj: + :return: String version of that is found, None otherwise + """ + return str(obj.key) if hasattr(obj, "key") else str(obj.get_key()) if hasattr(obj, "get_key") else None + + @staticmethod + def get_stream_digest(stream): + sha256_hash = hashlib.sha256() + for byte_block in iter(lambda: stream.read(4096), b""): + sha256_hash.update(byte_block) + + stream.seek(0) + return sha256_hash.hexdigest() + + @staticmethod + def is_reference(obj): + return isinstance(obj, str) and obj.startswith(SheerkaDataProvider.REF_PREFIX) + + def add(self, event: Event, entry, obj, allow_multiple=True, use_ref=False): """ Adds obj to the entry 'entry' :param event: events that triggers the update of the state :param entry: entry of the state to update :param obj: obj to insert or add - :return: new sha256 of the state + :param allow_multiple: if set to true, the same key can be added several times. + All entries will be put in a list + :param use_ref: if True the actual object is saved under 'objects' folder, + only a reference is saved in the state + :return: (entry, key) to retrieve the object """ + event_digest = self.save_event(event) snapshot = self.get_snapshot() state = self.load_state(snapshot) - # check uniqueness, cannot add the same key twice - obj_key = str(obj.get_key()) if hasattr(obj, "get_key") else None - if state.contains(entry, obj_key): - raise IndexError(f"{entry}.{obj_key}") - elif isinstance(obj, dict): - for k in obj: - if state.contains(entry, k): - raise IndexError(f"{entry}.{k}") + # check uniqueness, cannot add the same key twice if allow_multiple == False + key = self.get_obj_key(obj) + if not allow_multiple: + if isinstance(obj, dict): + for k in obj: + if state.contains(entry, k): + raise IndexError(f"{entry}.{k}") + else: + if state.contains(entry, key): + raise IndexError(f"{entry}.{key}" if key else entry) state.parents = [] if snapshot is None else [snapshot] state.events = [event_digest] state.date = datetime.now() - state.update(entry, obj if obj_key is None else {obj_key: obj}) + + if use_ref: + digest = self.save_obj(obj) + obj = ObjWithKey(key, self.REF_PREFIX + digest) if key else self.REF_PREFIX + digest + + state.update(entry, obj if (isinstance(obj, ObjWithKey) or key is None) else ObjWithKey(key, obj)) new_snapshot = self.save_state(state) self.set_snapshot(new_snapshot) - return new_snapshot + return entry, key def add_with_auto_key(self, event: Event, entry, obj): """ @@ -180,7 +292,8 @@ class SheerkaDataProvider: next_key = self.get_next_key(entry) if hasattr(obj, "set_key"): obj.set_key(next_key) - return self.add(event, entry, {next_key: obj}) + self.add(event, entry, ObjWithKey(next_key, obj)) + return entry, next_key def add_unique(self, event: Event, entry, obj): """Add an entry and make sure it's unique""" @@ -198,14 +311,16 @@ class SheerkaDataProvider: new_snapshot = self.save_state(state) self.set_snapshot(new_snapshot) - return new_snapshot + return entry, None - def set(self, event: Event, entry, obj): + def set(self, event: Event, entry, obj, use_ref=False): """ - Add or replace an element + Add or replace an entry. The entry is reinitialized. + If the previous value was dict, all keys are lost :param event: :param entry: :param obj: + :param use_ref: :return: """ event_digest = self.save_event(event) @@ -215,22 +330,30 @@ class SheerkaDataProvider: state.parents = [] if snapshot is None else [snapshot] state.events = [event_digest] state.date = datetime.now() - obj_key = str(obj.get_key()) if hasattr(obj, "get_key") else None - state.update(entry, obj if obj_key is None else {obj_key: obj}, append=False) + + key = self.get_obj_key(obj) + obj = self.save_ref_if_needed(use_ref, obj) + + state.data[entry] = obj if key is None else {key: obj} new_snapshot = self.save_state(state) self.set_snapshot(new_snapshot) - return new_snapshot + return entry, key def modify(self, event: Event, entry, key, obj): """ - Updates an existing element when element are saved by key + Replace an element + If the key is not provided, has the same effect than set eg, the entry is reset :param event: :param entry: :param key: key of the object to update :param obj: new data :return: """ + + if key is None: + raise SheerkaDataProviderError("Key is mandatory.", None) + event_digest = self.save_event(event) snapshot = self.get_snapshot() state = self.load_state(snapshot) @@ -244,14 +367,24 @@ class SheerkaDataProvider: state.parents = [] if snapshot is None else [snapshot] state.events = [event_digest] state.date = datetime.now() - if key is None: - state.data[entry] = obj + + # Gets obj original key, it will help to know if the key has changed + obj_key = self.get_obj_key(obj) or key + + if isinstance(state.data[entry][key], list): + if not hasattr(obj, Serializer.ORIGIN): + raise (SheerkaDataProviderError(f"Multiple entries under '{entry}.{key}'", obj)) + + obj_origin = getattr(obj, Serializer.ORIGIN) + state.modify_in_list(entry, key, obj, obj_key, obj_origin, self.load_ref_if_needed, self.save_ref_if_needed) + else: - state.update(entry, {key: obj}) + obj = self.save_ref_if_needed(self.is_reference(state.data[entry][key]), obj) + state.modify(entry, key, obj, obj_key) new_snapshot = self.save_state(state) self.set_snapshot(new_snapshot) - return new_snapshot + return entry, obj_key def list(self, entry, filter=None): """ @@ -272,7 +405,7 @@ class SheerkaDataProvider: filter_to_use = (lambda k, o: True) if filter is None else filter for key, element in elements.items(): if filter_to_use(key, element): - yield element + yield self.load_ref_if_needed(element)[0] else: # manage when no key is defined for the elements if not isinstance(elements, list) and not isinstance(elements, set): @@ -281,7 +414,7 @@ class SheerkaDataProvider: filter_to_use = (lambda o: True) if filter is None else filter for element in elements: if filter_to_use(element): - yield element + yield self.load_ref_if_needed(element)[0] def remove(self, event: Event, entry, filter=None): """ @@ -325,7 +458,11 @@ class SheerkaDataProvider: if key is not None and key not in state.data[entry]: raise IndexError(f"{entry}.{key}") - return state.data[entry] if key is None else state.data[entry][key] + item = state.data[entry] if key is None else state.data[entry][key] + if isinstance(item, list): + return [self.load_ref_if_needed(i)[0] for i in item] + + return self.load_ref_if_needed(item)[0] def get_safe(self, entry, key=None): """ @@ -343,7 +480,7 @@ class SheerkaDataProvider: if key is not None and key not in state.data[entry]: return None - return state.data[entry] if key is None else state.data[entry][key] + return self.load_ref_if_needed(state.data[entry] if key is None else state.data[entry][key])[0] def exists(self, entry): """ @@ -407,10 +544,9 @@ class SheerkaDataProvider: return self.serializer.deserialize(f, None) def save_obj(self, obj): - if hasattr(obj, "key") and hasattr(obj, "key_name") and obj.key is None: - obj.key = self.get_next_key(obj.key_name) + stream = self.serializer.serialize(obj, SerializerContext(user_name="kodjo")) + digest = obj.get_digest() if hasattr(obj, "get_digest") else self.get_stream_digest(stream) - digest = obj.get_digest() target_path = path.join(self.root, SheerkaDataProvider.ObjectsFolder, digest[:24], digest) if path.exists(target_path): return digest @@ -419,17 +555,44 @@ class SheerkaDataProvider: os.makedirs(path.dirname(target_path)) with open(target_path, "wb") as f: - f.write(self.serializer.serialize(obj, SerializerContext("kodjo", digest)).read()) + f.write(stream.read()) return digest def load_obj(self, digest): if digest is None: - return State() + return None target_path = path.join(self.root, SheerkaDataProvider.ObjectsFolder, digest[:24], digest) + if not path.exists(target_path): + return None + with open(target_path, "rb") as f: - return self.serializer.deserialize(f, SerializerContext("kodjo", digest)) + obj = self.serializer.deserialize(f, SerializerContext(origin=digest)) + + # set the origin of the object + if not isinstance(obj, str): + setattr(obj, Serializer.ORIGIN, digest) + return obj + + def load_ref_if_needed(self, obj): + if not isinstance(obj, str): + return obj, False + if not obj.startswith(SheerkaDataProvider.REF_PREFIX): + return obj, False + + resolved = self.load_obj(obj[len(SheerkaDataProvider.REF_PREFIX):]) + if resolved is None: + return obj, False + + return resolved, True + + def save_ref_if_needed(self, save_ref, obj): + if not save_ref: + return obj + + digest = self.save_obj(obj) + return self.REF_PREFIX + digest def get_cache_params(self, category, key): digest = hashlib.sha3_256(f"{category}:{key}".encode("utf-8")).hexdigest() diff --git a/sdp/sheerkaSerializer.py b/sdp/sheerkaSerializer.py index 8688201..c1ae818 100644 --- a/sdp/sheerkaSerializer.py +++ b/sdp/sheerkaSerializer.py @@ -20,8 +20,8 @@ def json_default_converter(o): @dataclass() class SerializerContext: - user_name: str - origin: str + user_name: str = None + origin: str = None class Serializer: @@ -37,7 +37,7 @@ class Serializer: # add builtin serializers self.register(EventSerializer()) - self.register(PickleSerializer()) + self.register(StateSerializer()) self.register(ConceptSerializer()) def register(self, serializer): @@ -205,11 +205,12 @@ class ObjectSerializer(BaseSerializer): class PickleSerializer(BaseSerializer): - def __init__(self): - BaseSerializer.__init__(self, "P", 1) + def __init__(self, predicate, name="P", version=1): + BaseSerializer.__init__(self, name, version) + self.predicate = predicate def match(self, obj): - return BaseSerializer.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.State" + return self.predicate(obj) def dump(self, stream, obj, context): stream.write(pickle.dumps(obj)) @@ -220,6 +221,12 @@ class PickleSerializer(BaseSerializer): return pickle.loads(stream.read()) +class StateSerializer(PickleSerializer): + def __init__(self, ): + PickleSerializer.__init__(self, lambda obj: BaseSerializer.get_full_qualified_name( + obj) == "sdp.sheerkaDataProvider.State", "S", 1) + + class ConceptSerializer(ObjectSerializer): def __init__(self): ObjectSerializer.__init__(self, "core.concept.Concept", "C", 1) diff --git a/tests/test_sheerkaDataProvider.py b/tests/test_sheerkaDataProvider.py index 39c0898..02398e5 100644 --- a/tests/test_sheerkaDataProvider.py +++ b/tests/test_sheerkaDataProvider.py @@ -3,12 +3,12 @@ import hashlib import pytest import os from os import path -from sdp.sheerkaDataProvider import SheerkaDataProvider, Event +from sdp.sheerkaDataProvider import SheerkaDataProvider, Event, SheerkaDataProviderError from datetime import date, datetime import shutil import json -from sdp.sheerkaSerializer import ObjectSerializer, BaseSerializer, Serializer +from sdp.sheerkaSerializer import ObjectSerializer, BaseSerializer, Serializer, SerializerContext, PickleSerializer tests_root = path.abspath("../build/tests") @@ -75,7 +75,7 @@ class ObjNoKey: class ObjDumpJson: - def __init__(self, key, value): + def __init__(self, key=None, value=None): self.key = key self.value = value @@ -91,6 +91,10 @@ class ObjDumpJson: return self.key def get_digest(self): + """ + Returns the digest of the event + :return: hexa form of the sha256 + """ return hashlib.sha256(f"Concept:{self.key}{self.value}".encode("utf-8")).hexdigest() def to_dict(self): @@ -123,7 +127,7 @@ def test_i_can_init_the_data_provider(): assert path.exists(path.join(tests_root, ".sheerka")) -def test_i_can_add_and_retrieve_an_event(): +def test_i_can_save_and_load_an_event(): sdp = SheerkaDataProvider(".sheerka") event = Event("hello world", date=date(year=2007, month=9, day=10), user="kodjo") @@ -137,17 +141,23 @@ def test_i_can_add_and_retrieve_an_event(): assert evt.message == "hello world" -def test_i_can_add_an_object(): +def test_i_can_add_an_string(): sdp = SheerkaDataProvider(".sheerka") event = Event("cmd add 'foo => bar'") event_digest = event.get_digest() obj = "foo => bar" - state_digest = sdp.add(event, "entry", obj) - state = sdp.load_state(state_digest) + entry, key = sdp.add(event, "entry", obj) + last_commit = sdp.get_snapshot() + state = sdp.load_state(last_commit) + loaded = sdp.get(entry, key) + + assert entry == "entry" + assert key is None + assert loaded == obj assert path.exists(path.join(sdp.root, SheerkaDataProvider.EventFolder, event_digest[0:24], event_digest)) - assert path.exists(path.join(sdp.root, SheerkaDataProvider.StateFolder, state_digest[0:24], state_digest)) + assert path.exists(path.join(sdp.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit)) assert path.exists(path.join(sdp.root, SheerkaDataProvider.HeadFile)) assert state.date is not None @@ -155,121 +165,349 @@ def test_i_can_add_an_object(): assert state.events == [event_digest] assert state.data == {"entry": "foo => bar"} - assert read_text_file(path.join(sdp.root, SheerkaDataProvider.HeadFile)) == state_digest + assert read_text_file(path.join(sdp.root, SheerkaDataProvider.HeadFile)) == last_commit -def test_i_can_add_multiple_elements_in_an_entry(): +def test_i_can_add_several_strings_if_allow_multiple_is_true(): + sdp = SheerkaDataProvider(".sheerka") + + sdp.add(Event("event"), "entry", "foo") + sdp.add(Event("event"), "entry", "foo") + entry, key = sdp.add(Event("event"), "entry", "bar") + loaded = sdp.get(entry, key) + + assert entry == "entry" + assert key is None + assert loaded == ["foo", "foo", "bar"] + + +def test_i_cannot_add_several_strings_if_allow_multiple_is_false(): + sdp = SheerkaDataProvider(".sheerka") + + with pytest.raises(IndexError) as index_error: + sdp.add(Event("event"), "entry", "foo", False) + sdp.add(Event("event"), "entry", "foo", False) + assert index_error.value.args[0] == "entry" + + +def test_i_can_add_an_object_with_no_key(): sdp = SheerkaDataProvider(".sheerka") event = Event("cmd add 'foo => bar'") + event_digest = event.get_digest() + obj = ObjNoKey("a", "b") - state_digest1 = sdp.add(event, "entry", 1) - state1 = sdp.load_state(state_digest1) + entry, key = sdp.add(event, "entry", obj) + last_commit = sdp.get_snapshot() + state = sdp.load_state(last_commit) + loaded = sdp.get(entry, key) - state_digest2 = sdp.add(event, "entry", 2) - state2 = sdp.load_state(state_digest2) + assert entry == "entry" + assert key is None + assert loaded == obj - state_digest3 = sdp.add(event, "entry", 3) - state3 = sdp.load_state(state_digest3) + assert path.exists(path.join(sdp.root, SheerkaDataProvider.EventFolder, event_digest[0:24], event_digest)) + assert path.exists(path.join(sdp.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit)) + assert path.exists(path.join(sdp.root, SheerkaDataProvider.HeadFile)) - assert state1.data == {"entry": 1} - assert state2.data == {"entry": [1, 2]} - assert state3.data == {"entry": [1, 2, 3]} + assert state.date is not None + assert state.parents == [] + assert state.events == [event_digest] + assert state.data == {"entry": ObjNoKey("a", "b")} + + assert read_text_file(path.join(sdp.root, SheerkaDataProvider.HeadFile)) == last_commit -def test_i_can_add_element_using_auto_generated_key(): +def test_i_can_add_several_obj_no_key_if_allow_multiple_is_true(): + sdp = SheerkaDataProvider(".sheerka") + + sdp.add(Event("event"), "entry", ObjNoKey("a", "b")) + sdp.add(Event("event"), "entry", ObjNoKey("a", "b")) + entry, key = sdp.add(Event("event"), "entry", ObjNoKey("c", "d")) + loaded = sdp.get(entry, key) + + assert entry == "entry" + assert key is None + assert loaded == [ObjNoKey("a", "b"), ObjNoKey("a", "b"), ObjNoKey("c", "d")] + + +def test_i_cannot_add_several_obj_no_key_if_allow_multiple_is_false(): + sdp = SheerkaDataProvider(".sheerka") + + with pytest.raises(IndexError) as index_error: + sdp.add(Event("event"), "entry", ObjNoKey("a", "b"), False) + sdp.add(Event("event"), "entry", ObjNoKey("c", "d"), False) + assert index_error.value.args[0] == "entry" + + +def test_i_can_add_a_dict(): + sdp = SheerkaDataProvider(".sheerka") + event = Event("cmd add 'foo => bar'") + event_digest = event.get_digest() + obj = {"my_key": "my_value"} + + entry, key = sdp.add(event, "entry", obj) + last_commit = sdp.get_snapshot() + state = sdp.load_state(last_commit) + loaded = sdp.get(entry, key) + + loaded_value = sdp.get(entry, "my_key") # we can retrieve by key + + assert entry == "entry" + assert key is None # we return None as dict may contains several entries + assert loaded == obj + assert loaded_value == "my_value" + + assert path.exists(path.join(sdp.root, SheerkaDataProvider.EventFolder, event_digest[0:24], event_digest)) + assert path.exists(path.join(sdp.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit)) + assert path.exists(path.join(sdp.root, SheerkaDataProvider.HeadFile)) + + assert state.date is not None + assert state.parents == [] + assert state.events == [event_digest] + assert state.data == {"entry": obj} + + assert read_text_file(path.join(sdp.root, SheerkaDataProvider.HeadFile)) == last_commit + + +def test_i_can_add_multiple_entries_at_once_with_dict(): + sdp = SheerkaDataProvider(".sheerka") + + entry, key = sdp.add(Event("event"), "entry", {"my_key1": "value1", "my_key2": "value2"}) + loaded = sdp.get(entry, key) + loaded_value1 = sdp.get(entry, "my_key1") + loaded_value2 = sdp.get(entry, "my_key2") + + assert loaded == {"my_key1": "value1", "my_key2": "value2"} + assert loaded_value1 == "value1" + assert loaded_value2 == "value2" + + +def test_i_can_add_same_key_with_dict_if_allow_multiple_is_true(): + sdp = SheerkaDataProvider(".sheerka") + + sdp.add(Event("event"), "entry", {"my_key": "my_value"}) + entry, key = sdp.add(Event("event"), "entry", {"my_key": "my_value"}) + loaded1 = sdp.get(entry, key) + + entry, key = sdp.add(Event("event"), "entry", {"my_key": "my_value2"}) + loaded2 = sdp.get(entry, key) + + assert entry == "entry" + assert key is None + assert loaded1 == {"my_key": ["my_value", "my_value"]} + assert loaded2 == {"my_key": ["my_value", "my_value", "my_value2"]} + + +def test_i_cannot_add_same_key_with_dict_if_allow_multiple_is_false(): + sdp = SheerkaDataProvider(".sheerka") + + with pytest.raises(IndexError) as index_error: + sdp.add(Event("event"), "entry", {"my_key": "my_value"}, False) + sdp.add(Event("event"), "entry", {"my_key": "my_value2"}, False) + assert index_error.value.args[0] == "entry.my_key" + + +def test_i_can_add_object_with_different_key_if_allow_multiple_is_false(): + sdp = SheerkaDataProvider(".sheerka") + + sdp.add(Event("event"), "entry", {"my_key": "a"}, False) + sdp.add(Event("event"), "entry", {"my_key2": "b"}, False) + + assert sdp.get("entry", "my_key") == "a" + assert sdp.get("entry", "my_key2") == "b" + + +def test_i_can_add_obj_with_key(): + sdp = SheerkaDataProvider(".sheerka") + event = Event("cmd add 'foo => bar'") + event_digest = event.get_digest() + obj1 = ObjWithKey("key1", "b") + obj2 = ObjSetKey("c", key="key2") + + entry1, key1 = sdp.add(event, "entry", obj1) # test when key is taken from obj.get_key() + entry2, key2 = sdp.add(event, "entry2", obj2) # test when key is taken from obj.key + last_commit = sdp.get_snapshot() + state = sdp.load_state(last_commit) + + loaded1 = sdp.get(entry1, key1) + loaded2 = sdp.get(entry2, key2) + + assert entry1 == "entry" + assert key1 == "key1" + assert loaded1 == ObjWithKey("key1", "b") + assert entry2 == "entry2" + assert key2 == "key2" + assert loaded2 == ObjSetKey("c", key="key2") + + assert path.exists(path.join(sdp.root, SheerkaDataProvider.EventFolder, event_digest[0:24], event_digest)) + assert path.exists(path.join(sdp.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit)) + assert path.exists(path.join(sdp.root, SheerkaDataProvider.HeadFile)) + + assert state.date is not None + assert len(state.parents) == 1 + assert state.events == [event_digest] + assert state.data == {"entry": {"key1": obj1}, "entry2": {"key2": obj2}} + + assert read_text_file(path.join(sdp.root, SheerkaDataProvider.HeadFile)) == last_commit + + +def test_i_can_add_objects_with_same_key_if_allow_multiple_is_true(): + sdp = SheerkaDataProvider(".sheerka") + + sdp.add(Event("event"), "entry", ObjWithKey("my_key", "b")) + entry, key = sdp.add(Event("event"), "entry", ObjSetKey("c", key="my_key")) + loaded1 = sdp.get(entry, key) + + entry, key = sdp.add(Event("event"), "entry", ObjSetKey("c", key="my_key")) + sdp.add(Event("event"), "entry", ObjSetKey("c", key="my_key2")) # to prove that it does not melt everything + loaded2 = sdp.get(entry, key) + + assert entry == "entry" + assert key == "my_key" + assert loaded1 == [ObjWithKey("my_key", "b"), ObjSetKey("c", key="my_key")] + assert loaded2 == [ObjWithKey("my_key", "b"), ObjSetKey("c", key="my_key"), ObjSetKey("c", key="my_key")] + + +def test_i_cannot_add_object_with_same_key_if_allow_multiple_is_false(): + sdp = SheerkaDataProvider(".sheerka") + + with pytest.raises(IndexError) as index_error: + sdp.add(Event("event"), "entry", ObjWithKey("my_key", "b"), False) + sdp.add(Event("event"), "entry", ObjSetKey("c", key="my_key"), False) + assert index_error.value.args[0] == "entry.my_key" + + +def test_i_can_add_obj_with_key_to_a_list(): + sdp = SheerkaDataProvider(".sheerka") + + sdp.add(Event("event"), "entry", "foo") + sdp.add(Event("event"), "entry", "bar") # entry is now a list + sdp.add(Event("event"), "entry", ObjWithKey("a", "b")) # this entry must no be taken as a object with a key + + loaded = sdp.get("entry") + assert loaded == ["foo", "bar", ObjWithKey("a", "b")] + + +def test_i_cannot_add_obj_with_no_key_when_then_entry_has_keys(): + sdp = SheerkaDataProvider(".sheerka") + + with pytest.raises(SheerkaDataProviderError) as error: + sdp.add(Event("event"), "entry", ObjWithKey("a", "b")) + sdp.add(Event("event"), "entry", "foo") + + assert error.value.obj == "foo" + + +def test_i_can_add_string_using_auto_generated_key(): sdp = SheerkaDataProvider(".sheerka") event = Event("cmd add 'foo => bar'") key_file = path.join(sdp.root, SheerkaDataProvider.KeysFile) - sdp.add_with_auto_key(event, "entry1", "foo") - sdp.add_with_auto_key(event, "entry1", "bar") - sdp.add_with_auto_key(event, "entry2", "baz") + entry1, key1 = sdp.add_with_auto_key(event, "entry1", "foo") + entry2, key2 = sdp.add_with_auto_key(event, "entry1", "bar") + entry3, key3 = sdp.add_with_auto_key(event, "entry2", "baz") state = sdp.load_state(sdp.get_snapshot()) assert path.exists(key_file) assert read_json_file(key_file) == {"entry1": 2, "entry2": 1} assert state.data == {"entry1": {"1": "foo", "2": "bar"}, "entry2": {"1": "baz"}} + assert entry1 == "entry1" + assert entry2 == "entry1" + assert entry3 == "entry2" + assert key1 == "1" + assert key2 == "2" + assert key3 == "1" -def test_i_can_add_and_auto_set_the_key(): +def test_i_can_add_object_using_auto_generated_key(): sdp = SheerkaDataProvider(".sheerka") event = Event("cmd add 'foo => bar'") key_file = path.join(sdp.root, SheerkaDataProvider.KeysFile) - sdp.add_with_auto_key(event, "entry1", ObjSetKey("foo")) - sdp.add_with_auto_key(event, "entry1", ObjSetKey("bar")) + entry1, key1 = sdp.add_with_auto_key(event, "entry1", ObjNoKey("a", "b")) + entry2, key2 = sdp.add_with_auto_key(event, "entry1", ObjNoKey("a", "b")) state = sdp.load_state(sdp.get_snapshot()) assert path.exists(key_file) assert read_json_file(key_file) == {"entry1": 2} - assert state.data == {"entry1": {"1": ObjSetKey("foo", "1"), "2": ObjSetKey("bar", "2")}} + assert state.data == {"entry1": {"1": ObjNoKey("a", "b"), "2": ObjNoKey("a", "b")}} + assert entry1 == "entry1" + assert entry2 == "entry1" + assert key1 == "1" + assert key2 == "2" -def test_i_can_add_an_object_with_its_own_key(): +def test_object_key_is_updated_when_possible_using_auto_generated_key(): sdp = SheerkaDataProvider(".sheerka") event = Event("cmd add 'foo => bar'") + key_file = path.join(sdp.root, SheerkaDataProvider.KeysFile) - sdp.add(event, "entry", ObjWithKey(1, "foo")) - sdp.add(event, "entry", ObjWithKey(2, "bar")) + entry1, key1 = sdp.add_with_auto_key(event, "entry1", ObjSetKey("foo")) + entry2, key2 = sdp.add_with_auto_key(event, "entry1", ObjSetKey("foo")) state = sdp.load_state(sdp.get_snapshot()) - assert state.data == {"entry": {"1": ObjWithKey(1, "foo"), "2": ObjWithKey(2, "bar")}} - -def test_i_can_add_dictionary(): - sdp = SheerkaDataProvider(".sheerka") - event = Event("cmd add 'foo => bar'") - - sdp.add(event, "entry", {"1": "foo"}) - sdp.add(event, "entry", {"2": "bar"}) - - state = sdp.load_state(sdp.get_snapshot()) - assert state.data == {"entry": {"1": "foo", "2": "bar"}} - - -def test_i_cannot_add_the_same_key_twice(): - sdp = SheerkaDataProvider(".sheerka") - sdp.add(Event("event"), "entry", {"1": "foo"}) - - with pytest.raises(IndexError): - sdp.add(Event("event"), "entry", {"1": "foo"}) - - -def test_i_cannot_add_the_same_element_twice(): - sdp = SheerkaDataProvider(".sheerka") - sdp.add(Event("event"), "entry", ObjWithKey(1, "foo")) - - with pytest.raises(IndexError): - sdp.add(Event("event"), "entry", ObjWithKey(1, "foo")) + assert path.exists(key_file) + assert read_json_file(key_file) == {"entry1": 2} + assert state.data == {"entry1": {"1": ObjSetKey("foo", "1"), "2": ObjSetKey("foo", "2")}} + assert entry1 == "entry1" + assert entry2 == "entry1" + assert key1 == "1" + assert key2 == "2" def test_i_can_set_objects_with_key(): sdp = SheerkaDataProvider(".sheerka") - sdp.set(Event("event"), "entry", ObjWithKey(1, "foo")) - sdp.set(Event("event"), "entry", ObjWithKey(2, "foo")) + sdp.add(Event("event"), "entry", ObjWithKey(1, "foo")) + entry, key = sdp.set(Event("event"), "entry", ObjWithKey(2, "foo")) state = sdp.load_state(sdp.get_snapshot()) assert state.data == {"entry": {"2": ObjWithKey(2, "foo")}} + assert entry == "entry" + assert key == "2" def test_i_can_set_objects_with_no_key(): sdp = SheerkaDataProvider(".sheerka") - sdp.set(Event("event"), "entry", ObjNoKey(1, "foo")) - sdp.set(Event("event"), "entry", ObjNoKey(2, "foo")) + sdp.add(Event("event"), "entry", ObjNoKey(1, "foo")) + entry, key = sdp.set(Event("event"), "entry", ObjNoKey(2, "foo")) state = sdp.load_state(sdp.get_snapshot()) assert state.data == {"entry": ObjNoKey(2, "foo")} + assert entry == "entry" + assert key is None def test_i_can_set_from_list_to_dict(): sdp = SheerkaDataProvider(".sheerka") sdp.set(Event("event"), "entry", [ObjNoKey(1, "foo"), ObjNoKey(2, "foo")]) - sdp.set(Event("event"), "entry", {"1": ObjNoKey(1, "foo"), "2": ObjNoKey(2, "foo")}) + entry, key = sdp.set(Event("event"), "entry", {"1": ObjNoKey(1, "foo"), "2": ObjNoKey(2, "foo")}) state = sdp.load_state(sdp.get_snapshot()) assert state.data == {"entry": {"1": ObjNoKey(1, "foo"), "2": ObjNoKey(2, "foo")}} + assert entry == "entry" + assert key is None + + +def test_i_can_set_using_reference(): + sdp = SheerkaDataProvider(".sheerka") + sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithKey))) + sdp.add(Event("event"), "entry", ObjWithKey(1, "foo")) + entry, key = sdp.set(Event("event"), "entry", ObjWithKey(2, "foo"), use_ref=True) + + state = sdp.load_state(sdp.get_snapshot()) + assert state.data == {"entry": {"2": '##REF##:9b14e03847d73c640f54ea9b46ba62b19e5451ecd300428a225be012ad9f25f9'}} + assert entry == "entry" + assert key == "2" + + assert path.exists(sdp.get_obj_path(SheerkaDataProvider.ObjectsFolder, + "9b14e03847d73c640f54ea9b46ba62b19e5451ecd300428a225be012ad9f25f9")) + + # sanity check, make sure that I can load back + loaded = sdp.get(entry, key) + assert loaded == ObjWithKey(2, "foo") def test_i_can_add_unique(): @@ -277,10 +515,28 @@ def test_i_can_add_unique(): sdp.add_unique(Event("event"), "entry", ObjNoKey(1, "foo")) sdp.add_unique(Event("event"), "entry", ObjNoKey(1, "foo")) sdp.add_unique(Event("event"), "entry", ObjNoKey(2, "bar")) - sdp.add_unique(Event("event"), "entry", ObjNoKey(2, "bar")) + entry, key = sdp.add_unique(Event("event"), "entry", ObjNoKey(2, "bar")) state = sdp.load_state(sdp.get_snapshot()) assert state.data == {"entry": {ObjNoKey(1, "foo"), ObjNoKey(2, "bar")}} + assert entry == "entry" + assert key is None + + +def test_i_can_add_reference_of_an_object_with_a_key(): + sdp = SheerkaDataProvider(".sheerka") + obj = ObjDumpJson("my_key", "value1") + obj_serializer = ObjectSerializer(BaseSerializer.get_full_qualified_name(obj)) + sdp.serializer.register(obj_serializer) + + entry, key = sdp.add(Event("event"), "entry", obj, use_ref=True) + state = sdp.load_state(sdp.get_snapshot()) + digest = state.data["entry"]["my_key"][len(SheerkaDataProvider.REF_PREFIX):] + + assert key == obj.key + assert entry == "entry" + assert state.data == {'entry': {'my_key': f"{SheerkaDataProvider.REF_PREFIX}{digest}"}} + assert sdp.load_obj(digest) == obj def test_i_can_keep_state_history(): @@ -289,12 +545,14 @@ def test_i_can_keep_state_history(): event1 = Event("cmd add 'foo => bar'") event_digest1 = event1.get_digest() obj1 = "foo => bar" - state_digest1 = sdp.add(event1, "entry1", obj1) + sdp.add(event1, "entry1", obj1) + state_digest1 = sdp.get_snapshot() event2 = Event("cmd add 'foo => baz'") event_digest2 = event2.get_digest() obj2 = "foo => baz" - state_digest2 = sdp.add(event2, "entry2", obj2) + sdp.add(event2, "entry2", obj2) + state_digest2 = sdp.get_snapshot() state2 = sdp.load_state(state_digest2) @@ -318,24 +576,30 @@ def test_i_can_list_elements_when_there_is_nothing_to_list(): def test_i_can_list_when_no_key(): sdp = SheerkaDataProvider(".sheerka") + sdp.serializer.register(PickleSerializer(lambda obj: isinstance(obj, str))) + sdp.add(Event("event"), "entry1", "foo") sdp.add(Event("event"), "entry1", "bar") - sdp.add(Event("event"), "entry2", "baz") + sdp.add(Event("event"), "entry1", "baz", use_ref=True) + sdp.add(Event("event"), "entry2", "xyz") result = sdp.list("entry1") - assert list(result) == ["foo", "bar"] + assert list(result) == ["foo", "bar", "baz"] def test_i_can_list_when_key(): sdp = SheerkaDataProvider(".sheerka") + sdp.serializer.register(PickleSerializer(lambda obj: isinstance(obj, ObjWithKey))) + sdp.add(Event("event"), "entry1", {"1": "foo"}) sdp.add(Event("event"), "entry1", {"2": "bar"}) - sdp.add(Event("event"), "entry2", {"3": "baz"}) + sdp.add(Event("event"), "entry1", ObjWithKey("3", "value"), use_ref=True) + sdp.add(Event("event"), "entry2", {"4": "xxx"}) result = sdp.list("entry1") - assert list(result) == ["foo", "bar"] + assert list(result) == ["foo", "bar", ObjWithKey("3", "value")] def test_i_can_list_when_one_element(): @@ -459,32 +723,234 @@ def test_i_cannot_remove_if_entry_does_not_exist(): assert str(e) == "entry" -def test_i_can_replace_an_entry(): +def test_i_cannot_modify_an_entry_without_a_key(): sdp = SheerkaDataProvider(".sheerka") - sdp.add(Event("event"), "entry1", "foo") - sdp.add(Event("event"), "entry1", "bar") - sdp.modify(Event("event"), "entry1", None, "baz") - result = sdp.list("entry1") + with pytest.raises(SheerkaDataProviderError) as error: + sdp.modify(Event("event"), "entry", None, "baz") - assert list(result) == ["baz"] + assert error.value.args[0] == "Key is mandatory." -def test_i_cannot_update_an_entry_that_does_not_exist(): +def test_i_can_modify_dict_with_a_key(): + sdp = SheerkaDataProvider(".sheerka") + sdp.add(Event("event"), "entry", {"key1": "foo"}) + sdp.add(Event("event"), "entry", {"key2": "bar"}) + + entry, key = sdp.modify(Event("event"), "entry", "key1", "baz") + + state = sdp.load_state(sdp.get_snapshot()) + assert state.data == {"entry": {"key1": "baz", "key2": "bar"}} + assert entry == "entry" + assert key == "key1" + + +def test_i_can_modify_an_object_with_a_key(): + sdp = SheerkaDataProvider(".sheerka") + sdp.add(Event("event"), "entry", ObjWithKey("key1", "foo")) + sdp.add(Event("event"), "entry", ObjWithKey("key2", "bar")) + + entry, key = sdp.modify(Event("event"), "entry", "key1", ObjWithKey("key1", "baz")) + + state = sdp.load_state(sdp.get_snapshot()) + assert state.data == {"entry": {"key1": ObjWithKey("key1", "baz"), "key2": ObjWithKey("key2", "bar")}} + assert entry == "entry" + assert key == "key1" + + +def test_i_can_modify_an_object_while_changing_the_key(): + sdp = SheerkaDataProvider(".sheerka") + sdp.add(Event("event"), "entry", ObjWithKey("key1", "foo")) + sdp.add(Event("event"), "entry", ObjWithKey("key2", "bar")) + + entry, key = sdp.modify(Event("event"), "entry", "key1", ObjWithKey("key3", "baz")) + + state = sdp.load_state(sdp.get_snapshot()) + assert state.data == {"entry": {"key2": ObjWithKey("key2", "bar"), "key3": ObjWithKey("key3", "baz")}} + assert entry == "entry" + assert key == "key3" + + +def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_key(): + sdp = SheerkaDataProvider(".sheerka") + sdp.add(Event("event"), "entry", ObjWithKey("key1", "foo")) + sdp.add(Event("event"), "entry", ObjWithKey("key2", "bar")) + + entry, key = sdp.modify(Event("event"), "entry", "key2", ObjWithKey("key1", "bar")) + + state = sdp.load_state(sdp.get_snapshot()) + assert state.data == {"entry": {"key1": [ObjWithKey("key1", "foo"), ObjWithKey("key1", "bar")]}} + assert entry == "entry" + assert key == "key1" + + +def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_when_list(): + """ + In this example, the item to modify is within a list, and its key has changed + and in the new key, there is already a list + :return: + """ + sdp = SheerkaDataProvider(".sheerka") + sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson))) + + sdp.add(Event("event"), "entry", ObjDumpJson("key1", "value11")) + sdp.add(Event("event"), "entry", ObjDumpJson("key1", "value12")) + sdp.add(Event("event"), "entry", ObjDumpJson("key2", "value21")) + sdp.add(Event("event"), "entry", ObjDumpJson("key2", "value22")) + + new_value = ObjDumpJson("key1", "value13") + setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key2", "value21").get_digest()) + entry, key = sdp.modify(Event("event"), "entry", "key2", new_value) + + state = sdp.load_state(sdp.get_snapshot()) + assert state.data == {"entry": { + "key1": [ObjDumpJson("key1", "value11"), ObjDumpJson("key1", "value12"), ObjDumpJson("key1", "value13")], + "key2": [ObjDumpJson("key2", "value22")] + }} + assert entry == "entry" + assert key == "key1" + + +def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_when_nothing(): + """ + In this example, the item to modify is within a list, and its key has changed + and in the new key, there is nothing (the new key does not exist) + :return: + """ + sdp = SheerkaDataProvider(".sheerka") + sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson))) + + sdp.add(Event("event"), "entry", ObjDumpJson("key2", "value21")) + sdp.add(Event("event"), "entry", ObjDumpJson("key2", "value22")) + + new_value = ObjDumpJson("key1", "value13") + setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key2", "value21").get_digest()) + entry, key = sdp.modify(Event("event"), "entry", "key2", new_value) + + state = sdp.load_state(sdp.get_snapshot()) + assert state.data == {"entry": { + "key1": ObjDumpJson("key1", "value13"), + "key2": [ObjDumpJson("key2", "value22")] + }} + assert entry == "entry" + assert key == "key1" + + +def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_when_one_item(): + """ + In this example, the item to modify is within a list, and its key has changed + and in the new key, there is only one element + :return: + """ + sdp = SheerkaDataProvider(".sheerka") + sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson))) + + sdp.add(Event("event"), "entry", ObjDumpJson("key1", "value11")) + sdp.add(Event("event"), "entry", ObjDumpJson("key2", "value21")) + sdp.add(Event("event"), "entry", ObjDumpJson("key2", "value22")) + + new_value = ObjDumpJson("key1", "value13") + setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key2", "value21").get_digest()) + entry, key = sdp.modify(Event("event"), "entry", "key2", new_value) + + state = sdp.load_state(sdp.get_snapshot()) + assert state.data == {"entry": { + "key1": [ObjDumpJson("key1", "value11"), ObjDumpJson("key1", "value13")], + "key2": [ObjDumpJson("key2", "value22")] + }} + assert entry == "entry" + assert key == "key1" + + +def test_i_can_modify_a_ref(): + sdp = SheerkaDataProvider(".sheerka") + sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithKey))) + sdp.add(Event("event"), "entry", ObjWithKey("key1", "foo")) + entry, key = sdp.add(Event("event"), "entry", ObjWithKey("key2", "bar"), use_ref=True) + + sdp.modify(Event("event"), "entry", "key2", ObjWithKey("key2", "baz")) + state = sdp.load_state(sdp.get_snapshot()) + assert state.data == {"entry": { + "key1": ObjWithKey("key1", "foo"), + "key2": "##REF##:eb297e98710dd17244bb0e38eb9f1bf72cba692a8f8d94e9eb2d898e130cac8b"}} + assert entry == "entry" + assert key == "key2" + + +def test_i_cannot_modify_an_entry_that_does_not_exist(): sdp = SheerkaDataProvider(".sheerka") with pytest.raises(IndexError) as e: sdp.modify(Event("event"), "entry", "key", "foo") - assert str(e) == "entry" + + assert str(e.value) == "entry" -def test_i_cannot_update_a_key_that_does_not_exist(): +def test_i_cannot_modify_a_key_that_does_not_exist(): sdp = SheerkaDataProvider(".sheerka") sdp.add(Event("event"), "entry1", {"1": "foo"}) with pytest.raises(IndexError) as e: sdp.modify(Event("event"), "entry1", "2", "bar") - assert str(e) == "entry.1" + assert str(e) == "entry1.2" + + +def test_i_cannot_modify_a_list_when_origin_is_unknown(): + sdp = SheerkaDataProvider(".sheerka") + + sdp.add(Event("event"), "entry", ObjWithKey("key", "value1")) + sdp.add(Event("event"), "entry", ObjWithKey("key", "value2")) # same they + + state = sdp.load_state(sdp.get_snapshot()) + + with pytest.raises(SheerkaDataProviderError) as error: + sdp.modify(Event("event"), "entry", "key", ObjWithKey("key", "value2")) + + assert error.value.obj == ObjWithKey("key", "value2") + assert error.value.args[0] == "Multiple entries under 'entry.key'" + + +def test_i_can_modify_a_list_when_the_origin_is_known(): + sdp = SheerkaDataProvider(".sheerka") + + sdp.add(Event("event"), "entry", ObjDumpJson("key", "value1")) + sdp.add(Event("event"), "entry", ObjDumpJson("key", "value2")) # same they + + new_value = ObjDumpJson("key", "value3") + setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key", "value1").get_digest()) + + sdp.modify(Event("event"), "entry", "key", new_value) + + state = sdp.load_state(sdp.get_snapshot()) + assert state.data == {"entry": {"key": [ObjDumpJson("key", "value3"), ObjDumpJson("key", "value2")]}} + + +def test_i_can_modify_a_list_when_the_origin_is_known_2(): + """ + This time, we check that the origin is automatically set when the object was saved as a reference + We also check that all objects are still persisted as reference + :return: + """ + sdp = SheerkaDataProvider(".sheerka") + sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson))) + + sdp.add(Event("event"), "entry", ObjDumpJson("key", "value1"), use_ref=True) + sdp.add(Event("event"), "entry", ObjDumpJson("key", "value2"), use_ref=True) # same they + + objs = sdp.get("entry", "key") # origin is automatically set to the loaded objects + objs[0].value = "value3" + + sdp.modify(Event("event"), "entry", "key", objs[0]) + + state = sdp.load_state(sdp.get_snapshot()) + assert state.data == {"entry": {"key": [ + "##REF##:621771a3af6a331e9abb3a63fb25e0cac4b13df0b292dfa30db6bd89031bfad0", + "##REF##:5fe085e8366d35c5f04a18b2d3dada376128b246e07c66de5872830b00f5f517"]}} + + # checks that all objects are (still) persisted + path.exists(sdp.get_obj_path(sdp.ObjectsFolder, "621771a3af6a331e9abb3a63fb25e0cac4b13df0b292dfa30db6bd89031bfad0")) + path.exists(sdp.get_obj_path(sdp.ObjectsFolder, "5fe085e8366d35c5f04a18b2d3dada376128b246e07c66de5872830b00f5f517")) + path.exists(sdp.get_obj_path(sdp.ObjectsFolder, "1aac9e0d5c74c3bb989fd0f9def792bba36c5595d32f61be7cbb1a38dcf75327")) def test_i_can_get_the_entire_entry(): @@ -522,6 +988,30 @@ def test_i_can_get_an_entry_by_key(): assert result_safe == "bar" +def test_i_can_get_object_save_by_reference(): + sdp = SheerkaDataProvider(".sheerka") + obj = ObjDumpJson("my_key", "value1") + sdp.serializer.register(ObjectSerializer(BaseSerializer.get_full_qualified_name(obj))) + + entry, key = sdp.add(Event("event"), "entry", obj, use_ref=True) + loaded = sdp.get(entry, key) + + assert loaded == obj + + +def test_i_can_get_objects_from_list_when_saved_by_reference(): + sdp = SheerkaDataProvider(".sheerka") + sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson))) + + sdp.add(Event("event"), "entry", ObjDumpJson("key", "value1"), use_ref=True) + sdp.add(Event("event"), "entry", ObjDumpJson("key", "value2"), use_ref=True) # same they + + objs = sdp.get("entry", "key") + + assert objs[0] == ObjDumpJson("key", "value1") + assert objs[1] == ObjDumpJson("key", "value2") + + def test_i_cannot_get_an_entry_that_does_not_exist(): sdp = SheerkaDataProvider(".sheerka") @@ -604,53 +1094,52 @@ def test_i_can_test_than_an_entry_exits(): assert sdp.exists("entry") -def test_i_can_save_and_load_object_with_history(): +def test_i_can_save_and_load_object_ref_with_history(): sdp = SheerkaDataProvider(".sheerka") obj = ObjDumpJson("my_key", "value1") sdp.serializer.register(ObjectSerializer(BaseSerializer.get_full_qualified_name(obj))) - entry, key = sdp.add_ref("Obj", obj) + entry, key = sdp.add(Event("event"), "entry", obj, use_ref=True) loaded = sdp.get(entry, key) history = getattr(loaded, Serializer.HISTORY) assert key == obj.key - assert entry == "Obj" + assert entry == "entry" assert loaded.key == obj.key assert loaded.value == obj.value - assert getattr(history, Serializer.USERNAME) == "kodjo" - assert getattr(history, Serializer.MODIFICATION_DATE) != "" - assert getattr(history, Serializer.PARENTS) == [] + assert history[Serializer.USERNAME] == "kodjo" + assert history[Serializer.MODIFICATION_DATE] != "" + assert history[Serializer.PARENTS] == [] assert os.path.exists(sdp.get_obj_path(sdp.ObjectsFolder, obj.get_digest())) # save a second type with no modification - previous_modification_time = getattr(history, Serializer.MODIFICATION_DATE) - previous_parents = getattr(history, Serializer.PARENTS) + previous_modification_time = history[Serializer.MODIFICATION_DATE] + previous_parents = history[Serializer.PARENTS] - sdp.add_ref("Obj", loaded) + sdp.modify(Event("event"), "entry", key, loaded) loaded = sdp.get(entry, key) history = getattr(loaded, Serializer.HISTORY) - assert getattr(history, Serializer.MODIFICATION_DATE) == previous_modification_time - assert getattr(history, Serializer.PARENTS) == previous_parents + assert history[Serializer.MODIFICATION_DATE] == previous_modification_time + assert history[Serializer.PARENTS] == previous_parents # save again, but with a modification previous_digest = loaded.get_digest() loaded.value = "value2" - sdp.add_ref("Obj", loaded) + sdp.modify(Event("event"), "entry", key, loaded) loaded2 = sdp.get(entry, key) - history2 = getattr(loaded, Serializer.HISTORY) + history2 = getattr(loaded2, Serializer.HISTORY) assert loaded2.key == loaded.key assert loaded2.value == loaded.value - assert getattr(history2, Serializer.USERNAME) == "kodjo" - assert getattr(history2, Serializer.MODIFICATION_DATE) != "" - assert getattr(history2, Serializer.PARENTS) == [previous_digest] - - - - + assert history2[Serializer.USERNAME] == "kodjo" + assert history2[Serializer.MODIFICATION_DATE] != "" + assert history2[Serializer.PARENTS] == [previous_digest] + state = sdp.load_state(sdp.get_snapshot()) + assert state.data == {"entry": { + "my_key": '##REF##:e6bf5b56428cfce0f08c94f2c3625dc3b3a8180d7229eaa9f8aa967fb16e5256'}}