From b818c992ec743fd588e3e787ef8908cd459fc3d3 Mon Sep 17 00:00:00 2001 From: Kodjo Sossouvi Date: Wed, 6 Nov 2019 16:01:58 +0100 Subject: [PATCH] Upgrade to Python 3.8 + duplicate check when adding in sdp --- core/concept.py | 3 +- core/sheerka.py | 8 +- sdp/sheerkaDataProvider.py | 126 +++++++++++++++++++++++++----- tests/test_defautparser.py | 22 +++--- tests/test_sheerka.py | 6 +- tests/test_sheerkaDataProvider.py | 118 +++++++++++++++++++++++++++- 6 files changed, 244 insertions(+), 39 deletions(-) diff --git a/core/concept.py b/core/concept.py index 407b658..c45de57 100644 --- a/core/concept.py +++ b/core/concept.py @@ -99,8 +99,7 @@ class ErrorConcept(Concept): Concept.__init__(self, "error", is_builtin=True, where=where, pre=pre, post=post, body=body, desc=desc) def __repr__(self): - return f"{self.name} : {self.body}" - + return f"({self.id}){self.name} : {self.body}" class Property: """ diff --git a/core/sheerka.py b/core/sheerka.py index a9f46ed..a1c235f 100644 --- a/core/sheerka.py +++ b/core/sheerka.py @@ -2,7 +2,7 @@ from dataclasses import dataclass from core.concept import Concept, ErrorConcept, Property from parsers.PythonParser import PythonParser, PythonGetNamesVisitor, PythonNode -from sdp.sheerkaDataProvider import SheerkaDataProvider, Event +from sdp.sheerkaDataProvider import SheerkaDataProvider, Event, SheerkaDataProviderDuplicateKeyError from parsers.DefaultParser import DefaultParser, DefConceptNode import logging @@ -200,8 +200,10 @@ class Sheerka(Concept, metaclass=Singleton): concept.add_codes(def_concept_node.get_codes()) self.set_id_if_needed(concept, False) - self.sdp.add(exec_context.event_digest, self.CONCEPTS_ENTRY, concept, use_ref=True) - + try: + self.sdp.add(exec_context.event_digest, self.CONCEPTS_ENTRY, concept, use_ref=True) + except SheerkaDataProviderDuplicateKeyError as error: + return ReturnValue(False, ErrorConcept(body=error), error.args[0]) return ReturnValue(True, concept) @staticmethod diff --git a/sdp/sheerkaDataProvider.py b/sdp/sheerkaDataProvider.py index 4b4cea6..bb6d00b 100644 --- a/sdp/sheerkaDataProvider.py +++ b/sdp/sheerkaDataProvider.py @@ -52,16 +52,61 @@ class Event(object): self.message = as_dict["message"] -class ObjWithKey: +class ObjToUpdate: """ Internal key value class to hold the key (and the value) when it is detected It's created to distinguish from {key, value} """ - def __init__(self, key, obj): - self.key = key + def __init__(self, obj, key=None, digest=None): self.obj = obj + self.has_key = None + self.has_digest = None + self._key = None + self._digest = None + if key is not None: + self.set_key(key) + if digest is not None: + self.set_digest(digest) + + def get_key(self): + if self.has_key is None: + key = SheerkaDataProvider.get_obj_key(self.obj) + if key is None: + self.has_key = False + return None + else: + self.has_key = True + self._key = key + return key + elif not self.has_key: + return None + else: + return self._key + + def get_digest(self): + if self.has_digest is None: + digest = SheerkaDataProvider.get_obj_digest(self.obj) + if digest is None: + self.has_digest = False + return None + else: + self.has_digest = True + self._digest = digest + return digest + elif not self.has_digest: + return None + else: + return self._digest + + def set_digest(self, digest): + self.has_digest = True + self._digest = digest + + def set_key(self, key): + self.has_key = True + self._key = key class State: @@ -76,15 +121,29 @@ class State: self.events = [] self.data = {} - def update(self, entry, obj, append=True): + @staticmethod + def check_duplicate(items, obj: ObjToUpdate, key): + digest = obj.get_digest() + if digest is None: + return + + if not isinstance(items, list): + items = [items] + + for item in items: + item_digest = SheerkaDataProvider.get_obj_digest(item) + if item_digest == digest: + raise SheerkaDataProviderDuplicateKeyError("duplicate key", key, obj.obj) + + def update(self, entry, obj: ObjToUpdate, append=True): """ adds obj to entry :param entry: :param obj: - :param append: if True, ducplicate keys will create lists + :param append: if True, duplicate keys will create lists :return: """ - obj_to_use = {obj.key: obj.obj} if isinstance(obj, ObjWithKey) else obj + obj_to_use = {obj.get_key(): obj.obj} if obj.has_key else obj.obj if entry not in self.data: self.data[entry] = obj_to_use @@ -96,21 +155,25 @@ class State: self.data[entry] = obj_to_use elif isinstance(self.data[entry], list): - self.data[entry].append(obj.obj if isinstance(obj, ObjWithKey) else obj) # do not use obj_to_use ! + self.check_duplicate(self.data[entry], obj, entry) + self.data[entry].append(obj.obj) elif isinstance(obj_to_use, dict): for k in obj_to_use: if k not in self.data[entry]: self.data[entry][k] = obj_to_use[k] elif isinstance(self.data[entry][k], list): + self.check_duplicate(self.data[entry][k], obj, entry + "." + k) self.data[entry][k].append(obj_to_use[k]) else: + self.check_duplicate(self.data[entry][k], obj, entry + "." + k) self.data[entry][k] = [self.data[entry][k], obj_to_use[k]] elif isinstance(self.data[entry], dict): - raise SheerkaDataProviderError(f"Cannot found key on '{obj}' while all other elements have.", obj) + raise SheerkaDataProviderError(f"Cannot found key on '{obj.obj}' while all other elements have.", obj.obj) else: + self.check_duplicate(self.data[entry], obj, entry) self.data[entry] = [self.data[entry], obj_to_use] def modify(self, entry, key, obj, obj_key): @@ -120,7 +183,7 @@ class State: self.remove(entry, lambda k, o: k == key) # modify from on object to another append = True - self.update(entry, ObjWithKey(obj_key, obj), append=append) + self.update(entry, ObjToUpdate(obj, obj_key), append=append) def modify_in_list(self, entry, key, obj, obj_key, obj_origin, load_ref_if_needed, save_ref_if_needed): found = False @@ -135,7 +198,7 @@ class State: self.data[entry][key][i] = obj else: to_remove = i - self.update(entry, ObjWithKey(obj_key, obj), append=True) + self.update(entry, ObjToUpdate(obj, obj_key), append=True) found = True break @@ -194,6 +257,13 @@ class SheerkaDataProviderError(Exception): self.obj = obj +class SheerkaDataProviderDuplicateKeyError(Exception): + def __init__(self, message, key, obj): + Exception.__init__(self, message) + self.key = key + self.obj = obj + + class SheerkaDataProvider: """Manages the state of the system""" @@ -233,7 +303,24 @@ class SheerkaDataProvider: :param obj: :return: String version of that is found, None otherwise """ - return str(obj.key) if hasattr(obj, "key") else str(obj.get_key()) if hasattr(obj, "get_key") else None + return str(obj.key) if hasattr(obj, "key") \ + else str(obj.get_key()) if hasattr(obj, "get_key") \ + else None + + @staticmethod + def get_obj_digest(obj): + """ + Tries to find the key of an object + Look for .digest, .get_digest() + :param obj: + :return: digest, None otherwise + """ + if isinstance(obj, str) and obj.startswith(SheerkaDataProvider.REF_PREFIX): + return obj[len(SheerkaDataProvider.REF_PREFIX):] + + return obj.digest if hasattr(obj, "digest") \ + else obj.get_digest() if hasattr(obj, "get_digest") \ + else None @staticmethod def get_stream_digest(stream): @@ -266,12 +353,15 @@ class SheerkaDataProvider: log.debug(f"Adding obj '{obj}' in entry '{entry}' (allow_multiple={allow_multiple}, use_ref={use_ref})") + if not isinstance(obj, ObjToUpdate): + obj = ObjToUpdate(obj) + # check uniqueness, cannot add the same key twice if allow_multiple == False - key = self.get_obj_key(obj) + key = obj.get_key() log.debug(f"key found : '{key}'") if key else log.debug("No key found") if not allow_multiple: - if isinstance(obj, dict): - for k in obj: + if isinstance(obj.obj, dict): + for k in obj.obj: if state.contains(entry, k): raise IndexError(f"{entry}.{k}") else: @@ -284,10 +374,10 @@ class SheerkaDataProvider: log.debug(state.data) if use_ref: - digest = self.save_obj(obj) - obj = ObjWithKey(key, self.REF_PREFIX + digest) if key else self.REF_PREFIX + digest + obj.set_digest(self.save_obj(obj.obj)) + obj.obj = self.REF_PREFIX + obj.get_digest() - state.update(entry, obj if (isinstance(obj, ObjWithKey) or key is None) else ObjWithKey(key, obj)) + state.update(entry, obj) new_snapshot = self.save_state(state) self.set_snapshot(new_snapshot) @@ -304,7 +394,7 @@ class SheerkaDataProvider: next_key = self.get_next_key(entry) if hasattr(obj, "set_key"): obj.set_key(next_key) - self.add(event_digest, entry, ObjWithKey(next_key, obj)) + self.add(event_digest, entry, ObjToUpdate(obj, next_key)) return entry, next_key def add_unique(self, event_digest: str, entry, obj): diff --git a/tests/test_defautparser.py b/tests/test_defautparser.py index 5407963..8c23fd6 100644 --- a/tests/test_defautparser.py +++ b/tests/test_defautparser.py @@ -39,6 +39,18 @@ def null(): def b(operator, left, right): return BinaryNode([], operator, left, right) +def compare_ast(left, right): + left_as_string = ast.dump(left) + left_as_string = left_as_string.replace(", ctx=Load()", "") + left_as_string = left_as_string.replace(", kind=None", "") + + right_as_string = right if isinstance(right, str) else ast.dump(right) + right_as_string = right_as_string.replace(", ctx=Load()", "") + right_as_string = right_as_string.replace(", kind=None", "") + + return left_as_string == right_as_string + + def test_i_can_tokenize(): source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"=" @@ -213,19 +225,11 @@ def test_i_can_parse_def_concept(text, expected_name, expected_expr): assert isinstance(tree, DefConceptNode) assert tree.name == expected_name if isinstance(tree.body, PythonNode): - assert ast.dump(tree.body.ast) == ast.dump(expected_expr) + assert compare_ast(tree.body.ast, expected_expr) else: assert tree.body == expected_expr -def compare_ast(left, right): - left_as_string = ast.dump(left) - left_as_string = left_as_string.replace(", ctx=Load()", "") - - right_as_string = right if isinstance(right, str) else ast.dump(right) - right_as_string = right_as_string.replace(", ctx=Load()", "") - - return left_as_string == right_as_string def test_i_can_parse_complex_def_concept_statement(): diff --git a/tests/test_sheerka.py b/tests/test_sheerka.py index f82c32d..438be86 100644 --- a/tests/test_sheerka.py +++ b/tests/test_sheerka.py @@ -6,7 +6,7 @@ from os import path import shutil from core.concept import Concept, ConceptParts -from core.sheerka import Sheerka +from core.sheerka import Sheerka, ExecutionContext from parsers.DefaultParser import DefConceptNode, DefaultParser from parsers.PythonParser import PythonParser @@ -72,7 +72,7 @@ def test_i_can_add_a_concept(): concept = get_concept() sheerka = Sheerka() sheerka.initialize(root_folder) - res = sheerka.add_concept(concept) + res = sheerka.add_concept(ExecutionContext("xxx"), concept) concept_found = res.value assert res.status @@ -91,7 +91,7 @@ def test_i_can_add_a_concept(): assert all_props == ["a", "b"] assert concept_found.key == "__var__0 + __var__1" - assert concept_found.id == "100" + assert concept_found.id == "1001" # def test_i_cannot_add_the_same_concept_twice(): # concept1 = DefConceptNode(name="concept") diff --git a/tests/test_sheerkaDataProvider.py b/tests/test_sheerkaDataProvider.py index b1bedba..5a43412 100644 --- a/tests/test_sheerkaDataProvider.py +++ b/tests/test_sheerkaDataProvider.py @@ -3,7 +3,8 @@ import hashlib import pytest import os from os import path -from sdp.sheerkaDataProvider import SheerkaDataProvider, Event, SheerkaDataProviderError +from sdp.sheerkaDataProvider import SheerkaDataProvider, Event, SheerkaDataProviderError, \ + SheerkaDataProviderDuplicateKeyError from datetime import date, datetime import shutil import json @@ -106,6 +107,49 @@ class ObjDumpJson: self.key = as_dict["key"] +class ObjWithDigestNoKey: + def __init__(self, a, b): + self.a = a + self.b = b + + def __hash__(self): + return hash((self.a, self.b)) + + def __eq__(self, obj): + return isinstance(obj, ObjNoKey) and \ + self.a == obj.a and \ + self.b == obj.b + + def __repr__(self): + return f"ObjNoKey({self.a}, {self.b})" + + def get_digest(self): + return str(self.a) + str(self.b) + + +class ObjWithDigestWithKey: + def __init__(self, a, b): + self.a = a + self.b = b + + def __hash__(self): + return hash((self.a, self.b)) + + def __eq__(self, obj): + return isinstance(obj, ObjNoKey) and \ + self.a == obj.a and \ + self.b == obj.b + + def __repr__(self): + return f"ObjNoKey({self.a}, {self.b})" + + def get_key(self): + return self.a + + def get_digest(self): + return str(self.a) + str(self.b) + + @pytest.fixture(autouse=True) def init_test(): if path.exists(tests_root): @@ -408,6 +452,72 @@ def test_i_can_add_string_using_auto_generated_key(): assert key3 == "1" +def test_i_cannot_add_the_same_digest_twice_in_the_same_entry(): + """ + If get_digest() is implemented, checks for duplicates + :return: + """ + sdp = SheerkaDataProvider(".sheerka") + + with pytest.raises(SheerkaDataProviderDuplicateKeyError) as error: + sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "b")) + sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "b")) + + assert error.value.obj.get_digest() == ObjWithDigestNoKey("a", "b").get_digest() + assert error.value.key == "entry" + assert error.value.args[0] == "duplicate key" + + +def test_i_cannot_add_the_same_digest_twice_in_the_same_entry2(): + """ + If get_digest() is implemented, checks for duplicates in list when no key + :return: + """ + sdp = SheerkaDataProvider(".sheerka") + + with pytest.raises(SheerkaDataProviderDuplicateKeyError) as error: + sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "b")) + sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "c")) + sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "b")) + + assert error.value.obj.get_digest() == ObjWithDigestNoKey("a", "b").get_digest() + assert error.value.key == "entry" + assert error.value.args[0] == "duplicate key" + + +def test_i_cannot_add_the_same_digest_twice_in_the_same_entry3(): + """ + If get_digest() is implemented, checks for duplicates when the key is provided + :return: + """ + sdp = SheerkaDataProvider(".sheerka") + + with pytest.raises(SheerkaDataProviderDuplicateKeyError) as error: + sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b")) + sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b")) + + assert error.value.obj.get_digest() == ObjWithDigestWithKey("a", "b").get_digest() + assert error.value.key == "entry.a" + assert error.value.args[0] == "duplicate key" + + +def test_i_cannot_add_the_same_digest_twice_in_the_same_entry4(): + """ + If get_digest() is implemented, checks for duplicates in list when the key is provided + :return: + """ + sdp = SheerkaDataProvider(".sheerka") + + with pytest.raises(SheerkaDataProviderDuplicateKeyError) as error: + sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b")) + sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "c")) + sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b")) + + assert error.value.obj.get_digest() == ObjWithDigestWithKey("a", "b").get_digest() + assert error.value.key == "entry.a" + assert error.value.args[0] == "duplicate key" + + def test_i_can_get_and_set_key(): sdp = SheerkaDataProvider(".sheerka") key_file = path.join(sdp.root, SheerkaDataProvider.KeysFile) @@ -499,12 +609,12 @@ def test_i_can_set_using_reference(): entry, key = sdp.set(evt_digest, "entry", ObjWithKey(2, "foo"), use_ref=True) state = sdp.load_state(sdp.get_snapshot()) - assert state.data == {"entry": {"2": '##REF##:9b14e03847d73c640f54ea9b46ba62b19e5451ecd300428a225be012ad9f25f9'}} + assert state.data == {"entry": {"2": '##REF##:95b5cbab545dded0b90b57a3d15a157b9a559fb586ee2f8d6ccbc6d2491f1268'}} assert entry == "entry" assert key == "2" assert path.exists(sdp.get_obj_path(SheerkaDataProvider.ObjectsFolder, - "9b14e03847d73c640f54ea9b46ba62b19e5451ecd300428a225be012ad9f25f9")) + "95b5cbab545dded0b90b57a3d15a157b9a559fb586ee2f8d6ccbc6d2491f1268")) # sanity check, make sure that I can load back loaded = sdp.get(entry, key) @@ -873,7 +983,7 @@ def test_i_can_modify_a_ref(): state = sdp.load_state(sdp.get_snapshot()) assert state.data == {"entry": { "key1": ObjWithKey("key1", "foo"), - "key2": "##REF##:eb297e98710dd17244bb0e38eb9f1bf72cba692a8f8d94e9eb2d898e130cac8b"}} + "key2": "##REF##:d70b0247311645ed18d275337cbcf79ad186d995236cdc8ad4fcfc708085bd3d"}} assert entry == "entry" assert key == "key2"