From 8f1c2ed8181e2d32fe55f915d96146288697a309 Mon Sep 17 00:00:00 2001 From: Kodjo Sossouvi Date: Mon, 21 Oct 2019 16:13:56 +0200 Subject: [PATCH] Refactored sdp serializers --- .gitignore | 3 +- core/concept.py | 1 + core/sheerka.py | 15 +- core/utils.py | 20 ++ main.py | 24 ++ sdp/__init__.py | 0 sdp/readme.md | 5 + sdp/sheerkaDataProvider.py | 510 ++++++++++++++++++++++++++ sdp/sheerkaSerializer.py | 167 +++++++++ tests/test_sheerka.py | 20 +- tests/test_sheerkaDataProvider.py | 573 ++++++++++++++++++++++++++++++ tests/test_sheerkaSerializer.py | 16 + tests/test_utils.py | 12 + 13 files changed, 1353 insertions(+), 13 deletions(-) create mode 100644 core/utils.py create mode 100644 main.py create mode 100644 sdp/__init__.py create mode 100644 sdp/readme.md create mode 100644 sdp/sheerkaDataProvider.py create mode 100644 sdp/sheerkaSerializer.py create mode 100644 tests/test_sheerkaDataProvider.py create mode 100644 tests/test_sheerkaSerializer.py create mode 100644 tests/test_utils.py diff --git a/.gitignore b/.gitignore index 131ecd2..dff3700 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ venv .pytest_cache .idea -__pycache__ \ No newline at end of file +__pycache__ +build \ No newline at end of file diff --git a/core/concept.py b/core/concept.py index bc4315e..3af285e 100644 --- a/core/concept.py +++ b/core/concept.py @@ -13,6 +13,7 @@ class Concept: self.pre = None # list of pre conditions before calling the main function self.post = None # list of post conditions after calling the main function self.main = None # main method + self.value = None # value of the concept self.id = Concept.concepts_id Concept.concepts_id = Concept.concepts_id + 1 diff --git a/core/sheerka.py b/core/sheerka.py index 2862f67..fbd274f 100644 --- a/core/sheerka.py +++ b/core/sheerka.py @@ -2,6 +2,7 @@ import os from dataclasses import dataclass from core.concept import Concept +from sdp.sheerkaDataProvider import SheerkaDataProvider class Singleton(type): @@ -52,6 +53,8 @@ class Sheerka(Concept, metaclass=Singleton): self.create_builtin_concepts() + self.sdp = None + def create_builtin_concepts(self): """ Initializes the builtin concepts @@ -62,7 +65,7 @@ class Sheerka(Concept, metaclass=Singleton): self.concepts.append(Concept(Sheerka.SUCCESS_CONCEPT_NAME)) self.concepts.append(Concept(Sheerka.ERROR_CONCEPT_NAME)) - def initialize(self, root_folder): + def initialize(self, root_folder=None): """ Starting Sheerka Loads the current configuration @@ -70,10 +73,9 @@ class Sheerka(Concept, metaclass=Singleton): :param root_folder: root configuration folder :return: ReturnValue(Success or Error) """ - # create the folder configuration folder if needed + try: - if not os.path.exists(root_folder): - os.makedirs(root_folder) + self.sdp = SheerkaDataProvider(root_folder) except IOError as e: return ReturnValue(False, self.get_concept(Sheerka.ERROR_CONCEPT_NAME, True), e) @@ -101,3 +103,8 @@ class Sheerka(Concept, metaclass=Singleton): return False return concept1.id == concept2.id + + def record_event(self, event): + self.sdp.save_event(event) + + diff --git a/core/utils.py b/core/utils.py new file mode 100644 index 0000000..1dadc89 --- /dev/null +++ b/core/utils.py @@ -0,0 +1,20 @@ + +def sysarg_to_string(argv): + """ + Transform a list of strings into a single string + Add quotes if needed + :return: + """ + if argv is None or not argv: + return "" + + result = "" + first = True + for s in argv: + if not first: + result += " " + + result += '"' + s + '"' if " " in s else s + first = False + + return result diff --git a/main.py b/main.py new file mode 100644 index 0000000..0cfd4e6 --- /dev/null +++ b/main.py @@ -0,0 +1,24 @@ +import sys +from core.utils import sysarg_to_string +from core.sheerka import Sheerka +from sdp.sheerkaDataProvider import Event + + +def main(): + sheerka = Sheerka() + sheerka.initialize() + + # first, record the event + event_as_string = sysarg_to_string(sys.argv[1:]) + evt_digest = sheerka.record_event(Event(event_as_string)) + + # launch the parsers + + # execute the concepts + + return True + + +if __name__ == '__main__': + res = main() + exit(0 if res else 1) diff --git a/sdp/__init__.py b/sdp/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sdp/readme.md b/sdp/readme.md new file mode 100644 index 0000000..3469897 --- /dev/null +++ b/sdp/readme.md @@ -0,0 +1,5 @@ +# How to serialize ? + +- 1 byte : type of object code +- int : version of the encoder +- data : can be the json representation of the object diff --git a/sdp/sheerkaDataProvider.py b/sdp/sheerkaDataProvider.py new file mode 100644 index 0000000..40d03cf --- /dev/null +++ b/sdp/sheerkaDataProvider.py @@ -0,0 +1,510 @@ +from os import path +import os +from datetime import datetime, date +import hashlib +import json +import zlib +from sdp.sheerkaSerializer import Serializer + + +def json_default_converter(o): + """ + Default formatter for json + It's used when the json serializer does not know + how to serialise a type + :param o: + :return: + """ + if isinstance(o, (date, datetime)): + return o.isoformat() + + +class Event(object): + """ + Class that represents something that modifies the state of the system + """ + + def __init__(self, message="", user="kodjo", date=datetime.now()): + self.version = 1 + self.user = user + self.date = date + self.message = message + + def get_digest(self): + """ + Returns the digest of the event + :return: hexa form of the sha256 + """ + if not isinstance(self.message, str): + raise NotImplementedError + + return hashlib.sha256(f"{self.user}{self.date}{self.message}".encode("utf-8")).hexdigest() + + def to_json(self): + return json.dumps(self.__dict__, default=json_default_converter) + + def from_json(self, json_message): + self.user = json_message["user"] + self.date = datetime.fromisoformat(json_message["date"]) + self.message = json_message["message"] + + +class State: + """ + Class that represents the state of the system (dictionary of all known entries) + """ + + def __init__(self): + self.version = 1 + self.date = None + self.parents = [] + self.events = [] + self.data = {} + + def update(self, entry, obj, append=True): + obj_to_use = {str(obj.get_key()): obj} if hasattr(obj, "get_key") else obj + + if entry not in self.data: + self.data[entry] = obj_to_use + elif isinstance(obj_to_use, dict): + if append: + self.data[entry].update(obj_to_use) + else: + self.data[entry] = obj_to_use + elif isinstance(self.data[entry], list): + if append: + self.data[entry].append(obj_to_use) + else: + self.data[entry] = obj_to_use + else: + if append: + self.data[entry] = [self.data[entry], obj_to_use] + else: + self.data[entry] = obj_to_use + + def remove(self, entry, filter): + if filter is None: + del (self.data[entry]) + + elif isinstance(self.data[entry], dict): + keys_to_remove = [] + for key, element in self.data[entry].items(): + if filter(key, element): + keys_to_remove.append(key) + for key in keys_to_remove: + del (self.data[entry][key]) + + elif not isinstance(self.data[entry], list): + if filter(self.data[entry]): + del (self.data[entry]) + + else: + for element in self.data[entry]: + if filter(element): + self.data[entry].remove(element) + + def get_digest(self): + as_json = json.dumps(self.__dict__, default=json_default_converter) + return hashlib.sha256(as_json.encode("utf-8")).hexdigest() + + def contains(self, entry, key): + if entry not in self.data: + return False + if not isinstance(self.data[entry], dict): + return False + return key in self.data[entry] + + +class SheerkaDataProvider: + """Manages the state of the system""" + + EventFolder = "events" + StateFolder = "state" + CacheFolder = "cache" + HeadFile = "HEAD" + KeysFile = "keys" + + def __init__(self, root=None): + + self.root = path.abspath(path.join(path.expanduser("~"), ".sheerka")) \ + if root is None \ + else path.abspath(root) + + if not path.exists(self.root): + os.makedirs(self.root) + + self.serializer = Serializer() + + def add(self, event: Event, entry, obj): + """ + Adds obj to the entry 'entry' + :param event: events that triggers the update of the state + :param entry: entry of the state to update + :param obj: obj to insert or add + :return: new sha256 of the state + """ + event_digest = self.save_event(event) + snapshot = self.get_snapshot() + state = self.load_state(snapshot) + + # check uniqueness, cannot add the same key twice + obj_key = str(obj.get_key()) if hasattr(obj, "get_key") else None + if state.contains(entry, obj_key): + raise IndexError(f"{entry}.{obj_key}") + elif isinstance(obj, dict): + for k in obj: + if state.contains(entry, k): + raise IndexError(f"{entry}.{k}") + + state.parents = [] if snapshot is None else [snapshot] + state.events = [event_digest] + state.date = datetime.now() + state.update(entry, obj if obj_key is None else {obj_key: obj}) + + new_snapshot = self.save_state(state) + self.set_snapshot(new_snapshot) + return new_snapshot + + def add_with_auto_key(self, event: Event, entry, obj): + """ + Add obj to entry. An autogenerated key created for obj + :param event: + :param entry: + :param obj: + :return: + """ + next_key = self.get_next_key(entry) + if hasattr(obj, "set_key"): + obj.set_key(next_key) + return self.add(event, entry, {next_key: obj}) + + def add_unique(self, event: Event, entry, obj): + """Add an entry and make sure it's unique""" + event_digest = self.save_event(event) + snapshot = self.get_snapshot() + state = self.load_state(snapshot) + + state.parents = [] if snapshot is None else [snapshot] + state.events = [event_digest] + state.date = datetime.now() + if entry not in state.data: + state.data[entry] = {obj} + else: + state.data[entry].add(obj) + + new_snapshot = self.save_state(state) + self.set_snapshot(new_snapshot) + return new_snapshot + + def set(self, event: Event, entry, obj): + """ + Add or replace an element + :param event: + :param entry: + :param obj: + :return: + """ + event_digest = self.save_event(event) + snapshot = self.get_snapshot() + state = self.load_state(snapshot) + + state.parents = [] if snapshot is None else [snapshot] + state.events = [event_digest] + state.date = datetime.now() + obj_key = str(obj.get_key()) if hasattr(obj, "get_key") else None + state.update(entry, obj if obj_key is None else {obj_key: obj}, append=False) + + new_snapshot = self.save_state(state) + self.set_snapshot(new_snapshot) + return new_snapshot + + def modify(self, event: Event, entry, key, obj): + """ + Updates an existing element when element are saved by key + :param event: + :param entry: + :param key: key of the object to update + :param obj: new data + :return: + """ + event_digest = self.save_event(event) + snapshot = self.get_snapshot() + state = self.load_state(snapshot) + + if entry not in state.data: + raise IndexError(entry) + + if key is not None and key not in state.data[entry]: + raise IndexError(f"{entry}.{key}") + + state.parents = [] if snapshot is None else [snapshot] + state.events = [event_digest] + state.date = datetime.now() + if key is None: + state.data[entry] = obj + else: + state.update(entry, {key: obj}) + + new_snapshot = self.save_state(state) + self.set_snapshot(new_snapshot) + return new_snapshot + + def list(self, entry, filter=None): + """ + Lists elements of entry 'entry' + :param entry: name of the entry to list + :param filter: filter to use + :return: list of elements + """ + snapshot = self.get_snapshot() + state = self.load_state(snapshot) + if entry not in state.data: + return [] + + elements = state.data[entry] + + if isinstance(elements, dict): + # manage when elements have a key + filter_to_use = (lambda k, o: True) if filter is None else filter + for key, element in elements.items(): + if filter_to_use(key, element): + yield element + else: + # manage when no key is defined for the elements + if not isinstance(elements, list) and not isinstance(elements, set): + elements = [elements] + + filter_to_use = (lambda o: True) if filter is None else filter + for element in elements: + if filter_to_use(element): + yield element + + def remove(self, event: Event, entry, filter=None): + """ + Removes elements under the entry 'entry' + :param event: event that triggers the deletion + :param entry: + :param filter: filter to use + :return: new sha256 of the state + TODO: Remove by key + """ + snapshot = self.get_snapshot() + state = self.load_state(snapshot) + + if entry not in state.data: + raise IndexError(entry) + + event_digest = self.save_event(event) + + state.parents = [] if snapshot is None else [snapshot] + state.events = [event_digest] + state.date = datetime.now() + state.remove(entry, filter) + + new_snapshot = self.save_state(state) + self.set_snapshot(new_snapshot) + return new_snapshot + + def get(self, entry, key=None): + """ + Retrieve an element by its key + :param entry: + :param key: + :return: + """ + snapshot = self.get_snapshot() + state = self.load_state(snapshot) + + if entry not in state.data: + raise IndexError(entry) + + if key is not None and key not in state.data[entry]: + raise IndexError(f"{entry}.{key}") + + return state.data[entry] if key is None else state.data[entry][key] + + def get_safe(self, entry, key=None): + """ + Retrieve an element by its key. Return None if the element does not exist + :param entry: + :param key: + :return: + """ + snapshot = self.get_snapshot() + state = self.load_state(snapshot) + + if entry not in state.data: + return None + + if key is not None and key not in state.data[entry]: + return None + + return state.data[entry] if key is None else state.data[entry][key] + + def exists(self, entry): + """ + Returns true if the entry is defined + :param entry: + :return: + """ + snapshot = self.get_snapshot() + state = self.load_state(snapshot) + return entry in state.data + + def save_event(self, event: Event): + """ + return an event, given its digest + :param event: + :return: digest of the event + """ + digest = event.get_digest() + target_path = path.join(self.root, SheerkaDataProvider.EventFolder, digest[:24], digest) + if path.exists(target_path): + return digest + + if not path.exists(path.dirname(target_path)): + os.makedirs(path.dirname(target_path)) + + with open(target_path, "wb") as f: + f.write(self.serializer.serialize(event).read()) + + return digest + + def load_event(self, digest): + """ + return an event, given its digest + :param digest: + :return: + """ + target_path = path.join(self.root, SheerkaDataProvider.EventFolder, digest[:24], digest) + with open(target_path, "rb") as f: + return self.serializer.deserialize(f) + + def save_state(self, state: State): + digest = state.get_digest() + target_path = path.join(self.root, SheerkaDataProvider.StateFolder, digest[:24], digest) + if path.exists(target_path): + return digest + + if not path.exists(path.dirname(target_path)): + os.makedirs(path.dirname(target_path)) + + with open(target_path, "wb") as f: + f.write(self.serializer.serialize(state).read()) + + return digest + + def load_state(self, digest): + if digest is None: + return State() + + target_path = path.join(self.root, SheerkaDataProvider.StateFolder, digest[:24], digest) + with open(target_path, "rb") as f: + return self.serializer.deserialize(f) + + def get_cache_params(self, category, key): + digest = hashlib.sha3_256(f"{category}:{key}".encode("utf-8")).hexdigest() + cache_path = path.join(self.root, SheerkaDataProvider.CacheFolder, digest[:24], digest) + return digest, cache_path + + def add_to_cache(self, category, key, obj, update=False): + """ + Save obj in the internal cache system + :param category: + :param key: + :param obj: + :param update: + :return: + """ + digest, cache_path = self.get_cache_params(category, key) + + if path.exists(cache_path) and not update: + return digest + + if not path.exists(path.dirname(cache_path)): + os.makedirs(path.dirname(cache_path)) + + with open(cache_path, "wb") as f: + f.write(zlib.compress(obj.encode("utf-8"), 9)) + + return digest + + def load_from_cache(self, category, key): + """ + Reload a compress object from the cache + :param category: + :param key: + :return: + """ + digest, cache_path = self.get_cache_params(category, key) + + if not path.exists(cache_path): + raise IndexError(f"{category}.{key}") + + with open(cache_path, "rb") as f: + return zlib.decompress(f.read()).decode("utf-8") + + def remove_from_cache(self, category, key): + """ + + :param category: + :param key: + :return: + """ + digest, cache_path = self.get_cache_params(category, key) + if path.exists(cache_path): + os.remove(cache_path) + + return digest + + def in_cache(self, category, key): + """ + Returns true if the key is in cache + :param category: + :param key: + :return: + """ + digest, cache_path = self.get_cache_params(category, key) + return path.exists(cache_path) + + def get_snapshot(self): + head_file = path.join(self.root, SheerkaDataProvider.HeadFile) + if not path.exists(head_file): + return None + with open(head_file, "r") as f: + return f.read() + + def set_snapshot(self, digest): + head_file = path.join(self.root, SheerkaDataProvider.HeadFile) + with open(head_file, "w") as f: + return f.write(digest) + + def load_keys(self): + keys_file = path.join(self.root, SheerkaDataProvider.KeysFile) + if not path.exists(keys_file): + keys = {} + else: + with open(keys_file, "r") as f: + keys = json.load(f) + return keys + + def save_keys(self, keys): + keys_file = path.join(self.root, SheerkaDataProvider.KeysFile) + with open(keys_file, "w") as f: + json.dump(keys, f) + + def get_next_key(self, entry): + keys = self.load_keys() + + next_key = keys.get(entry, 0) + 1 + keys[entry] = next_key + + self.save_keys(keys) + return str(next_key) + + def set_key(self, entry, value): + keys = self.load_keys() + keys[entry] = value + self.save_keys(keys) + return str(value) + diff --git a/sdp/sheerkaSerializer.py b/sdp/sheerkaSerializer.py new file mode 100644 index 0000000..34dc8e3 --- /dev/null +++ b/sdp/sheerkaSerializer.py @@ -0,0 +1,167 @@ +import json +import pickle +import datetime +import struct +import io + + +def json_default_converter(o): + """ + Default formatter for json + It's used when the json serializer does not know + how to serialise a type + :param o: + :return: + """ + if isinstance(o, (datetime.date, datetime.datetime)): + return o.isoformat() + + +class Serializer: + HEADER_FORMAT = "cH" + + def __init__(self): + self._cache = [] + + # add builtin serializers + self._cache.append(EventSerializer()) + self._cache.append(PickleSerializer()) + + def register(self, serializer): + """ + Register the list of all know serializers + :param serializer: + :return: + """ + self._cache.append(serializer) + + def serialize(self, obj): + """ + Get the stream representation of an object + :param obj: + :return: + """ + serializers = [s for s in self._cache if s.match(obj)] + + if not serializers: + raise TypeError(f"Don't know how to serialize {type(obj)}") + + serializer = serializers[0] + + stream = io.BytesIO() + header = struct.pack(Serializer.HEADER_FORMAT, bytes(serializer.name, "utf-8"), serializer.version) + stream.write(header) + + return serializer.dump(stream, obj) + + def deserialize(self, stream): + """ + Loads an object from its stream representation + :param stream: + :return: + """ + header = struct.unpack(Serializer.HEADER_FORMAT, stream.read(4)) + serializers = [s for s in self._cache if s.name == header[0].decode("utf-8") and s.version == header[1]] + + if not serializers: + raise TypeError(f"Don't know how serializer name={header[0]}, version={header[1]}") + + serializer = serializers[0] + return serializer.load(stream) + + +class BaseSerializer: + + def __init__(self, name, version): + """ + Create a serializer, given a name and a version + :param name: + :param version: + :return: + """ + self.name = name + self.version = version + + @staticmethod + def match(obj): + """ + Returns true if self can serialize obj + :param obj: + :return: + """ + pass + + def dump(self, stream, obj): + """ + Returns the byte representation of how the object should be serialized + + :param stream: to write to + :param obj: + :return: stream of bytes + """ + pass + + def load(self, stream): + """ + From a stream of bytes, create the object + :param stream: + :return: object + """ + pass + + @staticmethod + def get_class(kls): + parts = kls.split('.') + module = ".".join(parts[:-1]) + m = __import__(module) + for comp in parts[1:]: + m = getattr(m, comp) + return m + + @staticmethod + def get_full_qualified_name(obj): + module = obj.__class__.__module__ + if module is None or module == str.__class__.__module__: + return obj.__class__.__name__ # Avoid reporting __builtin__ + else: + return module + '.' + obj.__class__.__name__ + + +class EventSerializer(BaseSerializer): + @staticmethod + def match(obj): + return BaseSerializer.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.Event" + + def __init__(self): + BaseSerializer.__init__(self, "E", 1) + + def dump(self, stream, obj): + stream.write(obj.to_json().encode("utf-8")) + stream.seek(0) + return stream + + def load(self, stream): + json_stream = stream.read().decode("utf-8") + json_message = json.loads(json_stream) + event = BaseSerializer.get_class("sdp.sheerkaDataProvider.Event")() + event.from_json(json_message) + return event + + +class PickleSerializer(BaseSerializer): + @staticmethod + def match(obj): + return BaseSerializer.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.State" + + def __init__(self): + BaseSerializer.__init__(self, "P", 1) + + def dump(self, stream, obj): + stream.write(pickle.dumps(obj)) + stream.seek(0) + return stream + + def load(self, stream): + return pickle.loads(stream.read()) + + diff --git a/tests/test_sheerka.py b/tests/test_sheerka.py index d0701c1..226e8fe 100644 --- a/tests/test_sheerka.py +++ b/tests/test_sheerka.py @@ -1,27 +1,30 @@ import pytest import os +from os import path +import shutil from core.concept import Concept from core.sheerka import Sheerka +tests_root = path.abspath("../build/tests") + @pytest.fixture(autouse=True) def init_test(): - print("Before yield") - print("Current folder " + os.getcwd()) - if not os.path.exists("build/tests"): - os.makedirs("build/tests") + if path.exists(tests_root): + shutil.rmtree(tests_root) + + if not path.exists(tests_root): + os.makedirs(tests_root) current_pwd = os.getcwd() - os.chdir("build/tests") + os.chdir(tests_root) + yield None os.chdir(current_pwd) - print("After yield") - print("Current folder " + os.getcwd()) def test_root_folder_is_created_after_initialization(): - print("Before yield") root_folder = "init_folder" return_value = Sheerka().initialize(root_folder) @@ -49,3 +52,4 @@ def test_null_concept_are_equals(): assert Sheerka.concept_equals(None, None) assert Sheerka.concept_equals(concept1, concept1) + diff --git a/tests/test_sheerkaDataProvider.py b/tests/test_sheerkaDataProvider.py new file mode 100644 index 0000000..0072ddd --- /dev/null +++ b/tests/test_sheerkaDataProvider.py @@ -0,0 +1,573 @@ +import pytest +import os +from os import path +from sdp.sheerkaDataProvider import SheerkaDataProvider, Event +from datetime import date, datetime +import shutil +import json + +tests_root = path.abspath("../build/tests") + + +def read_text_file(file_name): + with open(file_name, "r") as f: + return f.read() + + +def read_json_file(file_name): + with open(file_name, "r") as f: + return json.load(f) + + +class ObjWithKey: + def __init__(self, a, b): + self.a = a + self.b = b + + def __eq__(self, obj): + return isinstance(obj, ObjWithKey) and \ + self.a == obj.a and \ + self.b == obj.b + + def __repr__(self): + return f"ObjWithKey({self.a}, {self.b})" + + def get_key(self): + return self.a + + +class ObjSetKey: + def __init__(self, value, key=None): + self.value = value + self.key = key + + def __eq__(self, obj): + return isinstance(obj, ObjSetKey) and \ + self.key == obj.key and \ + self.value == obj.value + + def __repr__(self): + return f"ObjSetKey({self.key}, {self.value})" + + def set_key(self, key): + self.key = key + + +class ObjNoKey: + def __init__(self, a, b): + self.a = a + self.b = b + + def __hash__(self): + return hash((self.a, self.b)) + + def __eq__(self, obj): + return isinstance(obj, ObjNoKey) and \ + self.a == obj.a and \ + self.b == obj.b + + def __repr__(self): + return f"ObjNoKey({self.a}, {self.b})" + + +@pytest.fixture(autouse=True) +def init_test(): + if path.exists(tests_root): + shutil.rmtree(tests_root) + + if not path.exists(tests_root): + os.makedirs(tests_root) + current_pwd = os.getcwd() + os.chdir(tests_root) + + yield None + + os.chdir(current_pwd) + + +def test_i_can_init_the_data_provider(): + sdp = SheerkaDataProvider(".sheerka") + + assert sdp.root == path.abspath(path.join(tests_root, ".sheerka")) + assert path.exists(path.join(tests_root, ".sheerka")) + + +def test_i_can_add_and_retrieve_an_event(): + sdp = SheerkaDataProvider(".sheerka") + event = Event("hello world", date=date(year=2007, month=9, day=10), user="kodjo") + + evt_digest = sdp.save_event(event) + evt = sdp.load_event(evt_digest) + + assert path.exists(path.join(sdp.root, SheerkaDataProvider.EventFolder, evt_digest[0:24], evt_digest)) + assert evt.version == 1 + assert evt.date == datetime(year=2007, month=9, day=10) + assert evt.user == "kodjo" + assert evt.message == "hello world" + + +def test_i_can_add_an_object(): + sdp = SheerkaDataProvider(".sheerka") + event = Event("cmd add 'foo => bar'") + event_digest = event.get_digest() + obj = "foo => bar" + + state_digest = sdp.add(event, "entry", obj) + state = sdp.load_state(state_digest) + + assert path.exists(path.join(sdp.root, SheerkaDataProvider.EventFolder, event_digest[0:24], event_digest)) + assert path.exists(path.join(sdp.root, SheerkaDataProvider.StateFolder, state_digest[0:24], state_digest)) + assert path.exists(path.join(sdp.root, SheerkaDataProvider.HeadFile)) + + assert state.date is not None + assert state.parents == [] + assert state.events == [event_digest] + assert state.data == {"entry": "foo => bar"} + + assert read_text_file(path.join(sdp.root, SheerkaDataProvider.HeadFile)) == state_digest + + +def test_i_can_add_multiple_elements_in_an_entry(): + sdp = SheerkaDataProvider(".sheerka") + event = Event("cmd add 'foo => bar'") + + state_digest1 = sdp.add(event, "entry", 1) + state1 = sdp.load_state(state_digest1) + + state_digest2 = sdp.add(event, "entry", 2) + state2 = sdp.load_state(state_digest2) + + state_digest3 = sdp.add(event, "entry", 3) + state3 = sdp.load_state(state_digest3) + + assert state1.data == {"entry": 1} + assert state2.data == {"entry": [1, 2]} + assert state3.data == {"entry": [1, 2, 3]} + + +def test_i_can_add_element_using_auto_generated_key(): + sdp = SheerkaDataProvider(".sheerka") + event = Event("cmd add 'foo => bar'") + key_file = path.join(sdp.root, SheerkaDataProvider.KeysFile) + + sdp.add_with_auto_key(event, "entry1", "foo") + sdp.add_with_auto_key(event, "entry1", "bar") + sdp.add_with_auto_key(event, "entry2", "baz") + + state = sdp.load_state(sdp.get_snapshot()) + + assert path.exists(key_file) + assert read_json_file(key_file) == {"entry1": 2, "entry2": 1} + assert state.data == {"entry1": {"1": "foo", "2": "bar"}, "entry2": {"1": "baz"}} + + +def test_i_can_add_and_auto_set_the_key(): + sdp = SheerkaDataProvider(".sheerka") + event = Event("cmd add 'foo => bar'") + key_file = path.join(sdp.root, SheerkaDataProvider.KeysFile) + + sdp.add_with_auto_key(event, "entry1", ObjSetKey("foo")) + sdp.add_with_auto_key(event, "entry1", ObjSetKey("bar")) + + state = sdp.load_state(sdp.get_snapshot()) + + assert path.exists(key_file) + assert read_json_file(key_file) == {"entry1": 2} + assert state.data == {"entry1": {"1": ObjSetKey("foo", "1"), "2": ObjSetKey("bar", "2")}} + + +def test_i_can_add_an_object_with_its_own_key(): + sdp = SheerkaDataProvider(".sheerka") + event = Event("cmd add 'foo => bar'") + + sdp.add(event, "entry", ObjWithKey(1, "foo")) + sdp.add(event, "entry", ObjWithKey(2, "bar")) + + state = sdp.load_state(sdp.get_snapshot()) + assert state.data == {"entry": {"1": ObjWithKey(1, "foo"), "2": ObjWithKey(2, "bar")}} + + +def test_i_can_add_dictionary(): + sdp = SheerkaDataProvider(".sheerka") + event = Event("cmd add 'foo => bar'") + + sdp.add(event, "entry", {"1": "foo"}) + sdp.add(event, "entry", {"2": "bar"}) + + state = sdp.load_state(sdp.get_snapshot()) + assert state.data == {"entry": {"1": "foo", "2": "bar"}} + + +def test_i_cannot_add_the_same_key_twice(): + sdp = SheerkaDataProvider(".sheerka") + sdp.add(Event("event"), "entry", {"1": "foo"}) + + with pytest.raises(IndexError): + sdp.add(Event("event"), "entry", {"1": "foo"}) + + +def test_i_cannot_add_the_same_element_twice(): + sdp = SheerkaDataProvider(".sheerka") + sdp.add(Event("event"), "entry", ObjWithKey(1, "foo")) + + with pytest.raises(IndexError): + sdp.add(Event("event"), "entry", ObjWithKey(1, "foo")) + + +def test_i_can_set_objects_with_key(): + sdp = SheerkaDataProvider(".sheerka") + sdp.set(Event("event"), "entry", ObjWithKey(1, "foo")) + sdp.set(Event("event"), "entry", ObjWithKey(2, "foo")) + + state = sdp.load_state(sdp.get_snapshot()) + assert state.data == {"entry": {"2": ObjWithKey(2, "foo")}} + + +def test_i_can_set_objects_with_no_key(): + sdp = SheerkaDataProvider(".sheerka") + sdp.set(Event("event"), "entry", ObjNoKey(1, "foo")) + sdp.set(Event("event"), "entry", ObjNoKey(2, "foo")) + + state = sdp.load_state(sdp.get_snapshot()) + assert state.data == {"entry": ObjNoKey(2, "foo")} + + +def test_i_can_set_from_list_to_dict(): + sdp = SheerkaDataProvider(".sheerka") + sdp.set(Event("event"), "entry", [ObjNoKey(1, "foo"), ObjNoKey(2, "foo")]) + sdp.set(Event("event"), "entry", {"1": ObjNoKey(1, "foo"), "2": ObjNoKey(2, "foo")}) + + state = sdp.load_state(sdp.get_snapshot()) + assert state.data == {"entry": {"1": ObjNoKey(1, "foo"), "2": ObjNoKey(2, "foo")}} + + +def test_i_can_add_unique(): + sdp = SheerkaDataProvider(".sheerka") + sdp.add_unique(Event("event"), "entry", ObjNoKey(1, "foo")) + sdp.add_unique(Event("event"), "entry", ObjNoKey(1, "foo")) + sdp.add_unique(Event("event"), "entry", ObjNoKey(2, "bar")) + sdp.add_unique(Event("event"), "entry", ObjNoKey(2, "bar")) + + state = sdp.load_state(sdp.get_snapshot()) + assert state.data == {"entry": {ObjNoKey(1, "foo"), ObjNoKey(2, "bar")}} + + +def test_i_can_keep_state_history(): + sdp = SheerkaDataProvider(".sheerka") + + event1 = Event("cmd add 'foo => bar'") + event_digest1 = event1.get_digest() + obj1 = "foo => bar" + state_digest1 = sdp.add(event1, "entry1", obj1) + + event2 = Event("cmd add 'foo => baz'") + event_digest2 = event2.get_digest() + obj2 = "foo => baz" + state_digest2 = sdp.add(event2, "entry2", obj2) + + state2 = sdp.load_state(state_digest2) + + assert path.exists(path.join(sdp.root, SheerkaDataProvider.EventFolder, event_digest1[0:24], event_digest1)) + assert path.exists(path.join(sdp.root, SheerkaDataProvider.StateFolder, state_digest1[0:24], state_digest1)) + assert path.exists(path.join(sdp.root, SheerkaDataProvider.EventFolder, event_digest2[0:24], event_digest2)) + assert path.exists(path.join(sdp.root, SheerkaDataProvider.StateFolder, state_digest2[0:24], state_digest2)) + assert state2.date is not None + assert state2.parents == [state_digest1] + assert state2.events == [event_digest2] + assert state2.data == {"entry1": "foo => bar", "entry2": "foo => baz"} + + +def test_i_can_list_elements_when_there_is_nothing_to_list(): + sdp = SheerkaDataProvider(".sheerka") + + result = sdp.list("entry") + + assert list(result) == [] + + +def test_i_can_list_when_no_key(): + sdp = SheerkaDataProvider(".sheerka") + sdp.add(Event("event"), "entry1", "foo") + sdp.add(Event("event"), "entry1", "bar") + sdp.add(Event("event"), "entry2", "baz") + + result = sdp.list("entry1") + + assert list(result) == ["foo", "bar"] + + +def test_i_can_list_when_key(): + sdp = SheerkaDataProvider(".sheerka") + sdp.add(Event("event"), "entry1", {"1": "foo"}) + sdp.add(Event("event"), "entry1", {"2": "bar"}) + sdp.add(Event("event"), "entry2", {"3": "baz"}) + + result = sdp.list("entry1") + + assert list(result) == ["foo", "bar"] + + +def test_i_can_list_when_one_element(): + sdp = SheerkaDataProvider(".sheerka") + sdp.add(Event("event"), "entry1", "foo") + sdp.add(Event("event"), "entry2", "baz") + + result = sdp.list("entry1") + + assert list(result) == ["foo"] + + +def test_i_can_filter_on_key_for_dict(): + sdp = SheerkaDataProvider(".sheerka") + sdp.add(Event("event"), "entry1", {"1": "foo"}) + sdp.add(Event("event"), "entry1", {"2": "bar"}) + + result = sdp.list("entry1", lambda k, o: k == "1") + + assert list(result) == ["foo"] + + +def test_i_can_filter_on_key_for_objects(): + sdp = SheerkaDataProvider(".sheerka") + sdp.add(Event("event"), "entry1", ObjWithKey("a1", "b1")) + sdp.add(Event("event"), "entry1", ObjWithKey("a2", "b2")) + + result = sdp.list("entry1", lambda k, o: k == "a1") + + assert list(result) == [ObjWithKey("a1", "b1")] + + +def test_i_can_filter_on_attribute_for_dict(): + sdp = SheerkaDataProvider(".sheerka") + sdp.add(Event("event"), "entry1", {"1": {"a": "a1", "b": "b1"}}) + sdp.add(Event("event"), "entry1", {"2": {"a": "a2", "b": "b2"}}) + + result = sdp.list("entry1", lambda k, o: o["a"] == "a2") + + assert list(result) == [{"a": "a2", "b": "b2"}] + + +def test_i_can_filter_on_attribute_for_object(): + sdp = SheerkaDataProvider(".sheerka") + sdp.add(Event("event"), "entry1", ObjWithKey("a1", "b1")) + sdp.add(Event("event"), "entry1", ObjWithKey("a2", "b2")) + + result = sdp.list("entry1", lambda k, o: o.b == "b2") + + assert list(result) == [ObjWithKey("a2", "b2")] + + +def test_i_can_filter_a_list(): + sdp = SheerkaDataProvider(".sheerka") + sdp.add(Event("event"), "entry1", "foo") + sdp.add(Event("event"), "entry1", "bar") + + result = sdp.list("entry1", lambda o: o == "bar") + + assert list(result) == ["bar"] + + +def test_i_can_filter_a_list_of_object(): + sdp = SheerkaDataProvider(".sheerka") + sdp.add(Event("event"), "entry1", ObjNoKey("a1", "b1")) + sdp.add(Event("event"), "entry1", ObjNoKey("a2", "b2")) + + result = sdp.list("entry1", lambda o: o.b == "b1") + + assert list(result) == [ObjNoKey("a1", "b1")] + + +def test_i_can_remove_all_elements(): + sdp = SheerkaDataProvider(".sheerka") + sdp.add(Event("event"), "entry1", "foo") + sdp.add(Event("event"), "entry1", "bar") + + state_digest = sdp.remove(Event("event"), "entry1") + result = sdp.list("entry1") + + assert read_text_file(path.join(sdp.root, SheerkaDataProvider.HeadFile)) == state_digest + assert list(result) == [] + + +def test_i_can_remove_a_element(): + sdp = SheerkaDataProvider(".sheerka") + sdp.add(Event("event"), "entry1", "foo") + sdp.add(Event("event"), "entry1", "bar") + + sdp.remove(Event("event"), "entry1", lambda o: o == "foo") + result = sdp.list("entry1") + + assert list(result) == ["bar"] + + +def test_i_can_remove_dict_by_key(): + sdp = SheerkaDataProvider(".sheerka") + sdp.add(Event("event"), "entry1", {"1": ObjNoKey("a1", "b1")}) + sdp.add(Event("event"), "entry1", {"2": ObjNoKey("a2", "b2")}) + + sdp.remove(Event("event"), "entry1", lambda k, o: k == "2") + result = sdp.list("entry1") + + assert list(result) == [ObjNoKey("a1", "b1")] + + +def test_i_can_remove_when_only_one_element(): + sdp = SheerkaDataProvider(".sheerka") + sdp.add(Event("event"), "entry1", "foo") + + sdp.remove(Event("event"), "entry1", lambda o: o == "foo") + result = sdp.list("entry1") + + assert list(result) == [] + + +def test_i_cannot_remove_if_entry_does_not_exist(): + sdp = SheerkaDataProvider(".sheerka") + with pytest.raises(IndexError) as e: + sdp.remove(Event("event"), "entry") + assert str(e) == "entry" + + +def test_i_can_replace_an_entry(): + sdp = SheerkaDataProvider(".sheerka") + sdp.add(Event("event"), "entry1", "foo") + sdp.add(Event("event"), "entry1", "bar") + + sdp.modify(Event("event"), "entry1", None, "baz") + result = sdp.list("entry1") + + assert list(result) == ["baz"] + + +def test_i_cannot_update_an_entry_that_does_not_exist(): + sdp = SheerkaDataProvider(".sheerka") + + with pytest.raises(IndexError) as e: + sdp.modify(Event("event"), "entry", "key", "foo") + assert str(e) == "entry" + + +def test_i_cannot_update_a_key_that_does_not_exist(): + sdp = SheerkaDataProvider(".sheerka") + sdp.add(Event("event"), "entry1", {"1": "foo"}) + + with pytest.raises(IndexError) as e: + sdp.modify(Event("event"), "entry1", "2", "bar") + assert str(e) == "entry.1" + + +def test_i_can_get_the_entire_entry(): + sdp = SheerkaDataProvider(".sheerka") + sdp.add(Event("event"), "entry1", "foo") + sdp.add(Event("event"), "entry1", "bar") + + result = sdp.get("entry1") + result_safe = sdp.get_safe("entry1") + + assert result == ["foo", "bar"] + assert result_safe == ["foo", "bar"] + + +def test_i_can_get_an_entry_with_on_object(): + sdp = SheerkaDataProvider(".sheerka") + sdp.add(Event("event"), "entry1", "foo") + + result = sdp.get("entry1") + result_safe = sdp.get_safe("entry1") + + assert result == "foo" + assert result_safe == "foo" + + +def test_i_can_get_an_entry_by_key(): + sdp = SheerkaDataProvider(".sheerka") + sdp.add(Event("event"), "entry1", {"1": "foo"}) + sdp.add(Event("event"), "entry1", {"2": "bar"}) + + result = sdp.get("entry1", "2") + result_safe = sdp.get_safe("entry1", "2") + + assert result == "bar" + assert result_safe == "bar" + + +def test_i_cannot_get_an_entry_that_does_not_exist(): + sdp = SheerkaDataProvider(".sheerka") + + assert sdp.get_safe("entry") is None + with pytest.raises(IndexError) as e: + sdp.get("entry") + assert str(e) == "entry" + + +def test_i_cannot_get_a_key_that_does_not_exist(): + sdp = SheerkaDataProvider(".sheerka") + sdp.add(Event("event"), "entry1", {"1": "foo"}) + + assert sdp.get_safe("entry1", "2") is None + with pytest.raises(IndexError) as e: + sdp.get("entry1", "2") + assert str(e) == "entry.1" + + +def test_i_can_save_and_retrieve_cache(): + sdp = SheerkaDataProvider(".sheerka") + txt = "foo bar baz foo bar baz foo bar baz" + key = "key_to_use" + category = "cache_category" + + assert not sdp.in_cache(category, key) + digest = sdp.add_to_cache(category, key, txt) + assert path.exists(path.join(sdp.root, SheerkaDataProvider.CacheFolder, digest[0:24], digest)) + assert sdp.in_cache(category, key) + + from_cache = sdp.load_from_cache(category, key) + assert from_cache == txt + + +def test_cache_is_not_updated_by_default(): + sdp = SheerkaDataProvider(".sheerka") + txt = "foo bar baz foo bar baz foo bar baz" + txt2 = "foo foo foo foo foo foo foo foo foo" + key = "key_to_use" + category = "cache_category" + + sdp.add_to_cache(category, key, txt) + sdp.add_to_cache(category, key, txt2) + + from_cache = sdp.load_from_cache(category, key) + assert from_cache == txt + + +def test_i_can_update_cache(): + sdp = SheerkaDataProvider(".sheerka") + txt = "foo bar baz foo bar baz foo bar baz" + txt2 = "foo foo foo foo foo foo foo foo foo" + key = "key_to_use" + category = "cache_category" + + sdp.add_to_cache(category, key, txt) + sdp.add_to_cache(category, key, txt2, update=True) + + from_cache = sdp.load_from_cache(category, key) + assert from_cache == txt2 + + +def test_i_can_remove_from_cache(): + sdp = SheerkaDataProvider(".sheerka") + txt = "foo bar baz foo bar baz foo bar baz" + key = "key_to_use" + category = "cache_category" + + sdp.add_to_cache(category, key, txt) + digest = sdp.remove_from_cache(category, key) + assert not path.exists(path.join(sdp.root, SheerkaDataProvider.CacheFolder, digest[0:24], digest)) + assert not sdp.in_cache(category, key) + + +def test_i_can_test_than_an_entry_exits(): + sdp = SheerkaDataProvider(".sheerka") + + assert not sdp.exists("entry") + sdp.add(Event("event"), "entry", "value") + assert sdp.exists("entry") \ No newline at end of file diff --git a/tests/test_sheerkaSerializer.py b/tests/test_sheerkaSerializer.py new file mode 100644 index 0000000..4d267ee --- /dev/null +++ b/tests/test_sheerkaSerializer.py @@ -0,0 +1,16 @@ +from sdp.sheerkaDataProvider import Event +from sdp.sheerkaSerializer import Serializer +from datetime import datetime + + +def test_i_can_serialize_an_event(): + event = Event("test", user="user", date=datetime.fromisoformat("2019-10-21T10:20:30.999")) + serializer = Serializer() + + stream = serializer.serialize(event) + loaded = serializer.deserialize(stream) + + assert event.version == loaded.version + assert event.user == loaded.user + assert event.date == loaded.date + assert event.message == loaded.message diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..8415b98 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,12 @@ +import core.utils +import pytest + + +@pytest.mark.parametrize("lst, as_string", [ + (None, "",), + ([], ""), + (["hello", "world"], "hello world"), + (["hello world", "my friend"], '"hello world" "my friend"') +]) +def test_i_can_create_string_from_a_list(lst, as_string): + assert core.utils.sysarg_to_string(lst) == as_string