Upgrade to Python 3.8 + duplicate check when adding in sdp

This commit is contained in:
2019-11-06 16:01:58 +01:00
parent 0d2adf1b6c
commit b818c992ec
6 changed files with 244 additions and 39 deletions
+1 -2
View File
@@ -99,8 +99,7 @@ class ErrorConcept(Concept):
Concept.__init__(self, "error", is_builtin=True, where=where, pre=pre, post=post, body=body, desc=desc) Concept.__init__(self, "error", is_builtin=True, where=where, pre=pre, post=post, body=body, desc=desc)
def __repr__(self): def __repr__(self):
return f"{self.name} : {self.body}" return f"({self.id}){self.name} : {self.body}"
class Property: class Property:
""" """
+5 -3
View File
@@ -2,7 +2,7 @@ from dataclasses import dataclass
from core.concept import Concept, ErrorConcept, Property from core.concept import Concept, ErrorConcept, Property
from parsers.PythonParser import PythonParser, PythonGetNamesVisitor, PythonNode from parsers.PythonParser import PythonParser, PythonGetNamesVisitor, PythonNode
from sdp.sheerkaDataProvider import SheerkaDataProvider, Event from sdp.sheerkaDataProvider import SheerkaDataProvider, Event, SheerkaDataProviderDuplicateKeyError
from parsers.DefaultParser import DefaultParser, DefConceptNode from parsers.DefaultParser import DefaultParser, DefConceptNode
import logging import logging
@@ -200,8 +200,10 @@ class Sheerka(Concept, metaclass=Singleton):
concept.add_codes(def_concept_node.get_codes()) concept.add_codes(def_concept_node.get_codes())
self.set_id_if_needed(concept, False) self.set_id_if_needed(concept, False)
self.sdp.add(exec_context.event_digest, self.CONCEPTS_ENTRY, concept, use_ref=True) try:
self.sdp.add(exec_context.event_digest, self.CONCEPTS_ENTRY, concept, use_ref=True)
except SheerkaDataProviderDuplicateKeyError as error:
return ReturnValue(False, ErrorConcept(body=error), error.args[0])
return ReturnValue(True, concept) return ReturnValue(True, concept)
@staticmethod @staticmethod
+108 -18
View File
@@ -52,16 +52,61 @@ class Event(object):
self.message = as_dict["message"] self.message = as_dict["message"]
class ObjWithKey: class ObjToUpdate:
""" """
Internal key value class to hold the key (and the value) Internal key value class to hold the key (and the value)
when it is detected when it is detected
It's created to distinguish from {key, value} It's created to distinguish from {key, value}
""" """
def __init__(self, key, obj): def __init__(self, obj, key=None, digest=None):
self.key = key
self.obj = obj self.obj = obj
self.has_key = None
self.has_digest = None
self._key = None
self._digest = None
if key is not None:
self.set_key(key)
if digest is not None:
self.set_digest(digest)
def get_key(self):
if self.has_key is None:
key = SheerkaDataProvider.get_obj_key(self.obj)
if key is None:
self.has_key = False
return None
else:
self.has_key = True
self._key = key
return key
elif not self.has_key:
return None
else:
return self._key
def get_digest(self):
if self.has_digest is None:
digest = SheerkaDataProvider.get_obj_digest(self.obj)
if digest is None:
self.has_digest = False
return None
else:
self.has_digest = True
self._digest = digest
return digest
elif not self.has_digest:
return None
else:
return self._digest
def set_digest(self, digest):
self.has_digest = True
self._digest = digest
def set_key(self, key):
self.has_key = True
self._key = key
class State: class State:
@@ -76,15 +121,29 @@ class State:
self.events = [] self.events = []
self.data = {} self.data = {}
def update(self, entry, obj, append=True): @staticmethod
def check_duplicate(items, obj: ObjToUpdate, key):
digest = obj.get_digest()
if digest is None:
return
if not isinstance(items, list):
items = [items]
for item in items:
item_digest = SheerkaDataProvider.get_obj_digest(item)
if item_digest == digest:
raise SheerkaDataProviderDuplicateKeyError("duplicate key", key, obj.obj)
def update(self, entry, obj: ObjToUpdate, append=True):
""" """
adds obj to entry adds obj to entry
:param entry: :param entry:
:param obj: :param obj:
:param append: if True, ducplicate keys will create lists :param append: if True, duplicate keys will create lists
:return: :return:
""" """
obj_to_use = {obj.key: obj.obj} if isinstance(obj, ObjWithKey) else obj obj_to_use = {obj.get_key(): obj.obj} if obj.has_key else obj.obj
if entry not in self.data: if entry not in self.data:
self.data[entry] = obj_to_use self.data[entry] = obj_to_use
@@ -96,21 +155,25 @@ class State:
self.data[entry] = obj_to_use self.data[entry] = obj_to_use
elif isinstance(self.data[entry], list): elif isinstance(self.data[entry], list):
self.data[entry].append(obj.obj if isinstance(obj, ObjWithKey) else obj) # do not use obj_to_use ! self.check_duplicate(self.data[entry], obj, entry)
self.data[entry].append(obj.obj)
elif isinstance(obj_to_use, dict): elif isinstance(obj_to_use, dict):
for k in obj_to_use: for k in obj_to_use:
if k not in self.data[entry]: if k not in self.data[entry]:
self.data[entry][k] = obj_to_use[k] self.data[entry][k] = obj_to_use[k]
elif isinstance(self.data[entry][k], list): elif isinstance(self.data[entry][k], list):
self.check_duplicate(self.data[entry][k], obj, entry + "." + k)
self.data[entry][k].append(obj_to_use[k]) self.data[entry][k].append(obj_to_use[k])
else: else:
self.check_duplicate(self.data[entry][k], obj, entry + "." + k)
self.data[entry][k] = [self.data[entry][k], obj_to_use[k]] self.data[entry][k] = [self.data[entry][k], obj_to_use[k]]
elif isinstance(self.data[entry], dict): elif isinstance(self.data[entry], dict):
raise SheerkaDataProviderError(f"Cannot found key on '{obj}' while all other elements have.", obj) raise SheerkaDataProviderError(f"Cannot found key on '{obj.obj}' while all other elements have.", obj.obj)
else: else:
self.check_duplicate(self.data[entry], obj, entry)
self.data[entry] = [self.data[entry], obj_to_use] self.data[entry] = [self.data[entry], obj_to_use]
def modify(self, entry, key, obj, obj_key): def modify(self, entry, key, obj, obj_key):
@@ -120,7 +183,7 @@ class State:
self.remove(entry, lambda k, o: k == key) # modify from on object to another self.remove(entry, lambda k, o: k == key) # modify from on object to another
append = True append = True
self.update(entry, ObjWithKey(obj_key, obj), append=append) self.update(entry, ObjToUpdate(obj, obj_key), append=append)
def modify_in_list(self, entry, key, obj, obj_key, obj_origin, load_ref_if_needed, save_ref_if_needed): def modify_in_list(self, entry, key, obj, obj_key, obj_origin, load_ref_if_needed, save_ref_if_needed):
found = False found = False
@@ -135,7 +198,7 @@ class State:
self.data[entry][key][i] = obj self.data[entry][key][i] = obj
else: else:
to_remove = i to_remove = i
self.update(entry, ObjWithKey(obj_key, obj), append=True) self.update(entry, ObjToUpdate(obj, obj_key), append=True)
found = True found = True
break break
@@ -194,6 +257,13 @@ class SheerkaDataProviderError(Exception):
self.obj = obj self.obj = obj
class SheerkaDataProviderDuplicateKeyError(Exception):
def __init__(self, message, key, obj):
Exception.__init__(self, message)
self.key = key
self.obj = obj
class SheerkaDataProvider: class SheerkaDataProvider:
"""Manages the state of the system""" """Manages the state of the system"""
@@ -233,7 +303,24 @@ class SheerkaDataProvider:
:param obj: :param obj:
:return: String version of that is found, None otherwise :return: String version of that is found, None otherwise
""" """
return str(obj.key) if hasattr(obj, "key") else str(obj.get_key()) if hasattr(obj, "get_key") else None return str(obj.key) if hasattr(obj, "key") \
else str(obj.get_key()) if hasattr(obj, "get_key") \
else None
@staticmethod
def get_obj_digest(obj):
"""
Tries to find the key of an object
Look for .digest, .get_digest()
:param obj:
:return: digest, None otherwise
"""
if isinstance(obj, str) and obj.startswith(SheerkaDataProvider.REF_PREFIX):
return obj[len(SheerkaDataProvider.REF_PREFIX):]
return obj.digest if hasattr(obj, "digest") \
else obj.get_digest() if hasattr(obj, "get_digest") \
else None
@staticmethod @staticmethod
def get_stream_digest(stream): def get_stream_digest(stream):
@@ -266,12 +353,15 @@ class SheerkaDataProvider:
log.debug(f"Adding obj '{obj}' in entry '{entry}' (allow_multiple={allow_multiple}, use_ref={use_ref})") log.debug(f"Adding obj '{obj}' in entry '{entry}' (allow_multiple={allow_multiple}, use_ref={use_ref})")
if not isinstance(obj, ObjToUpdate):
obj = ObjToUpdate(obj)
# check uniqueness, cannot add the same key twice if allow_multiple == False # check uniqueness, cannot add the same key twice if allow_multiple == False
key = self.get_obj_key(obj) key = obj.get_key()
log.debug(f"key found : '{key}'") if key else log.debug("No key found") log.debug(f"key found : '{key}'") if key else log.debug("No key found")
if not allow_multiple: if not allow_multiple:
if isinstance(obj, dict): if isinstance(obj.obj, dict):
for k in obj: for k in obj.obj:
if state.contains(entry, k): if state.contains(entry, k):
raise IndexError(f"{entry}.{k}") raise IndexError(f"{entry}.{k}")
else: else:
@@ -284,10 +374,10 @@ class SheerkaDataProvider:
log.debug(state.data) log.debug(state.data)
if use_ref: if use_ref:
digest = self.save_obj(obj) obj.set_digest(self.save_obj(obj.obj))
obj = ObjWithKey(key, self.REF_PREFIX + digest) if key else self.REF_PREFIX + digest obj.obj = self.REF_PREFIX + obj.get_digest()
state.update(entry, obj if (isinstance(obj, ObjWithKey) or key is None) else ObjWithKey(key, obj)) state.update(entry, obj)
new_snapshot = self.save_state(state) new_snapshot = self.save_state(state)
self.set_snapshot(new_snapshot) self.set_snapshot(new_snapshot)
@@ -304,7 +394,7 @@ class SheerkaDataProvider:
next_key = self.get_next_key(entry) next_key = self.get_next_key(entry)
if hasattr(obj, "set_key"): if hasattr(obj, "set_key"):
obj.set_key(next_key) obj.set_key(next_key)
self.add(event_digest, entry, ObjWithKey(next_key, obj)) self.add(event_digest, entry, ObjToUpdate(obj, next_key))
return entry, next_key return entry, next_key
def add_unique(self, event_digest: str, entry, obj): def add_unique(self, event_digest: str, entry, obj):
+13 -9
View File
@@ -39,6 +39,18 @@ def null():
def b(operator, left, right): def b(operator, left, right):
return BinaryNode([], operator, left, right) return BinaryNode([], operator, left, right)
def compare_ast(left, right):
left_as_string = ast.dump(left)
left_as_string = left_as_string.replace(", ctx=Load()", "")
left_as_string = left_as_string.replace(", kind=None", "")
right_as_string = right if isinstance(right, str) else ast.dump(right)
right_as_string = right_as_string.replace(", ctx=Load()", "")
right_as_string = right_as_string.replace(", kind=None", "")
return left_as_string == right_as_string
def test_i_can_tokenize(): def test_i_can_tokenize():
source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"=" source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"="
@@ -213,19 +225,11 @@ def test_i_can_parse_def_concept(text, expected_name, expected_expr):
assert isinstance(tree, DefConceptNode) assert isinstance(tree, DefConceptNode)
assert tree.name == expected_name assert tree.name == expected_name
if isinstance(tree.body, PythonNode): if isinstance(tree.body, PythonNode):
assert ast.dump(tree.body.ast) == ast.dump(expected_expr) assert compare_ast(tree.body.ast, expected_expr)
else: else:
assert tree.body == expected_expr assert tree.body == expected_expr
def compare_ast(left, right):
left_as_string = ast.dump(left)
left_as_string = left_as_string.replace(", ctx=Load()", "")
right_as_string = right if isinstance(right, str) else ast.dump(right)
right_as_string = right_as_string.replace(", ctx=Load()", "")
return left_as_string == right_as_string
def test_i_can_parse_complex_def_concept_statement(): def test_i_can_parse_complex_def_concept_statement():
+3 -3
View File
@@ -6,7 +6,7 @@ from os import path
import shutil import shutil
from core.concept import Concept, ConceptParts from core.concept import Concept, ConceptParts
from core.sheerka import Sheerka from core.sheerka import Sheerka, ExecutionContext
from parsers.DefaultParser import DefConceptNode, DefaultParser from parsers.DefaultParser import DefConceptNode, DefaultParser
from parsers.PythonParser import PythonParser from parsers.PythonParser import PythonParser
@@ -72,7 +72,7 @@ def test_i_can_add_a_concept():
concept = get_concept() concept = get_concept()
sheerka = Sheerka() sheerka = Sheerka()
sheerka.initialize(root_folder) sheerka.initialize(root_folder)
res = sheerka.add_concept(concept) res = sheerka.add_concept(ExecutionContext("xxx"), concept)
concept_found = res.value concept_found = res.value
assert res.status assert res.status
@@ -91,7 +91,7 @@ def test_i_can_add_a_concept():
assert all_props == ["a", "b"] assert all_props == ["a", "b"]
assert concept_found.key == "__var__0 + __var__1" assert concept_found.key == "__var__0 + __var__1"
assert concept_found.id == "100" assert concept_found.id == "1001"
# def test_i_cannot_add_the_same_concept_twice(): # def test_i_cannot_add_the_same_concept_twice():
# concept1 = DefConceptNode(name="concept") # concept1 = DefConceptNode(name="concept")
+114 -4
View File
@@ -3,7 +3,8 @@ import hashlib
import pytest import pytest
import os import os
from os import path from os import path
from sdp.sheerkaDataProvider import SheerkaDataProvider, Event, SheerkaDataProviderError from sdp.sheerkaDataProvider import SheerkaDataProvider, Event, SheerkaDataProviderError, \
SheerkaDataProviderDuplicateKeyError
from datetime import date, datetime from datetime import date, datetime
import shutil import shutil
import json import json
@@ -106,6 +107,49 @@ class ObjDumpJson:
self.key = as_dict["key"] self.key = as_dict["key"]
class ObjWithDigestNoKey:
def __init__(self, a, b):
self.a = a
self.b = b
def __hash__(self):
return hash((self.a, self.b))
def __eq__(self, obj):
return isinstance(obj, ObjNoKey) and \
self.a == obj.a and \
self.b == obj.b
def __repr__(self):
return f"ObjNoKey({self.a}, {self.b})"
def get_digest(self):
return str(self.a) + str(self.b)
class ObjWithDigestWithKey:
def __init__(self, a, b):
self.a = a
self.b = b
def __hash__(self):
return hash((self.a, self.b))
def __eq__(self, obj):
return isinstance(obj, ObjNoKey) and \
self.a == obj.a and \
self.b == obj.b
def __repr__(self):
return f"ObjNoKey({self.a}, {self.b})"
def get_key(self):
return self.a
def get_digest(self):
return str(self.a) + str(self.b)
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
def init_test(): def init_test():
if path.exists(tests_root): if path.exists(tests_root):
@@ -408,6 +452,72 @@ def test_i_can_add_string_using_auto_generated_key():
assert key3 == "1" assert key3 == "1"
def test_i_cannot_add_the_same_digest_twice_in_the_same_entry():
"""
If get_digest() is implemented, checks for duplicates
:return:
"""
sdp = SheerkaDataProvider(".sheerka")
with pytest.raises(SheerkaDataProviderDuplicateKeyError) as error:
sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "b"))
sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "b"))
assert error.value.obj.get_digest() == ObjWithDigestNoKey("a", "b").get_digest()
assert error.value.key == "entry"
assert error.value.args[0] == "duplicate key"
def test_i_cannot_add_the_same_digest_twice_in_the_same_entry2():
"""
If get_digest() is implemented, checks for duplicates in list when no key
:return:
"""
sdp = SheerkaDataProvider(".sheerka")
with pytest.raises(SheerkaDataProviderDuplicateKeyError) as error:
sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "b"))
sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "c"))
sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "b"))
assert error.value.obj.get_digest() == ObjWithDigestNoKey("a", "b").get_digest()
assert error.value.key == "entry"
assert error.value.args[0] == "duplicate key"
def test_i_cannot_add_the_same_digest_twice_in_the_same_entry3():
"""
If get_digest() is implemented, checks for duplicates when the key is provided
:return:
"""
sdp = SheerkaDataProvider(".sheerka")
with pytest.raises(SheerkaDataProviderDuplicateKeyError) as error:
sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b"))
sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b"))
assert error.value.obj.get_digest() == ObjWithDigestWithKey("a", "b").get_digest()
assert error.value.key == "entry.a"
assert error.value.args[0] == "duplicate key"
def test_i_cannot_add_the_same_digest_twice_in_the_same_entry4():
"""
If get_digest() is implemented, checks for duplicates in list when the key is provided
:return:
"""
sdp = SheerkaDataProvider(".sheerka")
with pytest.raises(SheerkaDataProviderDuplicateKeyError) as error:
sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b"))
sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "c"))
sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b"))
assert error.value.obj.get_digest() == ObjWithDigestWithKey("a", "b").get_digest()
assert error.value.key == "entry.a"
assert error.value.args[0] == "duplicate key"
def test_i_can_get_and_set_key(): def test_i_can_get_and_set_key():
sdp = SheerkaDataProvider(".sheerka") sdp = SheerkaDataProvider(".sheerka")
key_file = path.join(sdp.root, SheerkaDataProvider.KeysFile) key_file = path.join(sdp.root, SheerkaDataProvider.KeysFile)
@@ -499,12 +609,12 @@ def test_i_can_set_using_reference():
entry, key = sdp.set(evt_digest, "entry", ObjWithKey(2, "foo"), use_ref=True) entry, key = sdp.set(evt_digest, "entry", ObjWithKey(2, "foo"), use_ref=True)
state = sdp.load_state(sdp.get_snapshot()) state = sdp.load_state(sdp.get_snapshot())
assert state.data == {"entry": {"2": '##REF##:9b14e03847d73c640f54ea9b46ba62b19e5451ecd300428a225be012ad9f25f9'}} assert state.data == {"entry": {"2": '##REF##:95b5cbab545dded0b90b57a3d15a157b9a559fb586ee2f8d6ccbc6d2491f1268'}}
assert entry == "entry" assert entry == "entry"
assert key == "2" assert key == "2"
assert path.exists(sdp.get_obj_path(SheerkaDataProvider.ObjectsFolder, assert path.exists(sdp.get_obj_path(SheerkaDataProvider.ObjectsFolder,
"9b14e03847d73c640f54ea9b46ba62b19e5451ecd300428a225be012ad9f25f9")) "95b5cbab545dded0b90b57a3d15a157b9a559fb586ee2f8d6ccbc6d2491f1268"))
# sanity check, make sure that I can load back # sanity check, make sure that I can load back
loaded = sdp.get(entry, key) loaded = sdp.get(entry, key)
@@ -873,7 +983,7 @@ def test_i_can_modify_a_ref():
state = sdp.load_state(sdp.get_snapshot()) state = sdp.load_state(sdp.get_snapshot())
assert state.data == {"entry": { assert state.data == {"entry": {
"key1": ObjWithKey("key1", "foo"), "key1": ObjWithKey("key1", "foo"),
"key2": "##REF##:eb297e98710dd17244bb0e38eb9f1bf72cba692a8f8d94e9eb2d898e130cac8b"}} "key2": "##REF##:d70b0247311645ed18d275337cbcf79ad186d995236cdc8ad4fcfc708085bd3d"}}
assert entry == "entry" assert entry == "entry"
assert key == "key2" assert key == "key2"