Upgrade to Python 3.8 + duplicate check when adding in sdp

This commit is contained in:
2019-11-06 16:01:58 +01:00
parent 0d2adf1b6c
commit b818c992ec
6 changed files with 244 additions and 39 deletions
+1 -2
View File
@@ -99,8 +99,7 @@ class ErrorConcept(Concept):
Concept.__init__(self, "error", is_builtin=True, where=where, pre=pre, post=post, body=body, desc=desc)
def __repr__(self):
return f"{self.name} : {self.body}"
return f"({self.id}){self.name} : {self.body}"
class Property:
"""
+5 -3
View File
@@ -2,7 +2,7 @@ from dataclasses import dataclass
from core.concept import Concept, ErrorConcept, Property
from parsers.PythonParser import PythonParser, PythonGetNamesVisitor, PythonNode
from sdp.sheerkaDataProvider import SheerkaDataProvider, Event
from sdp.sheerkaDataProvider import SheerkaDataProvider, Event, SheerkaDataProviderDuplicateKeyError
from parsers.DefaultParser import DefaultParser, DefConceptNode
import logging
@@ -200,8 +200,10 @@ class Sheerka(Concept, metaclass=Singleton):
concept.add_codes(def_concept_node.get_codes())
self.set_id_if_needed(concept, False)
self.sdp.add(exec_context.event_digest, self.CONCEPTS_ENTRY, concept, use_ref=True)
try:
self.sdp.add(exec_context.event_digest, self.CONCEPTS_ENTRY, concept, use_ref=True)
except SheerkaDataProviderDuplicateKeyError as error:
return ReturnValue(False, ErrorConcept(body=error), error.args[0])
return ReturnValue(True, concept)
@staticmethod
+108 -18
View File
@@ -52,16 +52,61 @@ class Event(object):
self.message = as_dict["message"]
class ObjWithKey:
class ObjToUpdate:
"""
Internal key value class to hold the key (and the value)
when it is detected
It's created to distinguish from {key, value}
"""
def __init__(self, key, obj):
self.key = key
def __init__(self, obj, key=None, digest=None):
self.obj = obj
self.has_key = None
self.has_digest = None
self._key = None
self._digest = None
if key is not None:
self.set_key(key)
if digest is not None:
self.set_digest(digest)
def get_key(self):
if self.has_key is None:
key = SheerkaDataProvider.get_obj_key(self.obj)
if key is None:
self.has_key = False
return None
else:
self.has_key = True
self._key = key
return key
elif not self.has_key:
return None
else:
return self._key
def get_digest(self):
if self.has_digest is None:
digest = SheerkaDataProvider.get_obj_digest(self.obj)
if digest is None:
self.has_digest = False
return None
else:
self.has_digest = True
self._digest = digest
return digest
elif not self.has_digest:
return None
else:
return self._digest
def set_digest(self, digest):
self.has_digest = True
self._digest = digest
def set_key(self, key):
self.has_key = True
self._key = key
class State:
@@ -76,15 +121,29 @@ class State:
self.events = []
self.data = {}
def update(self, entry, obj, append=True):
@staticmethod
def check_duplicate(items, obj: ObjToUpdate, key):
digest = obj.get_digest()
if digest is None:
return
if not isinstance(items, list):
items = [items]
for item in items:
item_digest = SheerkaDataProvider.get_obj_digest(item)
if item_digest == digest:
raise SheerkaDataProviderDuplicateKeyError("duplicate key", key, obj.obj)
def update(self, entry, obj: ObjToUpdate, append=True):
"""
adds obj to entry
:param entry:
:param obj:
:param append: if True, ducplicate keys will create lists
:param append: if True, duplicate keys will create lists
:return:
"""
obj_to_use = {obj.key: obj.obj} if isinstance(obj, ObjWithKey) else obj
obj_to_use = {obj.get_key(): obj.obj} if obj.has_key else obj.obj
if entry not in self.data:
self.data[entry] = obj_to_use
@@ -96,21 +155,25 @@ class State:
self.data[entry] = obj_to_use
elif isinstance(self.data[entry], list):
self.data[entry].append(obj.obj if isinstance(obj, ObjWithKey) else obj) # do not use obj_to_use !
self.check_duplicate(self.data[entry], obj, entry)
self.data[entry].append(obj.obj)
elif isinstance(obj_to_use, dict):
for k in obj_to_use:
if k not in self.data[entry]:
self.data[entry][k] = obj_to_use[k]
elif isinstance(self.data[entry][k], list):
self.check_duplicate(self.data[entry][k], obj, entry + "." + k)
self.data[entry][k].append(obj_to_use[k])
else:
self.check_duplicate(self.data[entry][k], obj, entry + "." + k)
self.data[entry][k] = [self.data[entry][k], obj_to_use[k]]
elif isinstance(self.data[entry], dict):
raise SheerkaDataProviderError(f"Cannot found key on '{obj}' while all other elements have.", obj)
raise SheerkaDataProviderError(f"Cannot found key on '{obj.obj}' while all other elements have.", obj.obj)
else:
self.check_duplicate(self.data[entry], obj, entry)
self.data[entry] = [self.data[entry], obj_to_use]
def modify(self, entry, key, obj, obj_key):
@@ -120,7 +183,7 @@ class State:
self.remove(entry, lambda k, o: k == key) # modify from on object to another
append = True
self.update(entry, ObjWithKey(obj_key, obj), append=append)
self.update(entry, ObjToUpdate(obj, obj_key), append=append)
def modify_in_list(self, entry, key, obj, obj_key, obj_origin, load_ref_if_needed, save_ref_if_needed):
found = False
@@ -135,7 +198,7 @@ class State:
self.data[entry][key][i] = obj
else:
to_remove = i
self.update(entry, ObjWithKey(obj_key, obj), append=True)
self.update(entry, ObjToUpdate(obj, obj_key), append=True)
found = True
break
@@ -194,6 +257,13 @@ class SheerkaDataProviderError(Exception):
self.obj = obj
class SheerkaDataProviderDuplicateKeyError(Exception):
def __init__(self, message, key, obj):
Exception.__init__(self, message)
self.key = key
self.obj = obj
class SheerkaDataProvider:
"""Manages the state of the system"""
@@ -233,7 +303,24 @@ class SheerkaDataProvider:
:param obj:
:return: String version of that is found, None otherwise
"""
return str(obj.key) if hasattr(obj, "key") else str(obj.get_key()) if hasattr(obj, "get_key") else None
return str(obj.key) if hasattr(obj, "key") \
else str(obj.get_key()) if hasattr(obj, "get_key") \
else None
@staticmethod
def get_obj_digest(obj):
"""
Tries to find the key of an object
Look for .digest, .get_digest()
:param obj:
:return: digest, None otherwise
"""
if isinstance(obj, str) and obj.startswith(SheerkaDataProvider.REF_PREFIX):
return obj[len(SheerkaDataProvider.REF_PREFIX):]
return obj.digest if hasattr(obj, "digest") \
else obj.get_digest() if hasattr(obj, "get_digest") \
else None
@staticmethod
def get_stream_digest(stream):
@@ -266,12 +353,15 @@ class SheerkaDataProvider:
log.debug(f"Adding obj '{obj}' in entry '{entry}' (allow_multiple={allow_multiple}, use_ref={use_ref})")
if not isinstance(obj, ObjToUpdate):
obj = ObjToUpdate(obj)
# check uniqueness, cannot add the same key twice if allow_multiple == False
key = self.get_obj_key(obj)
key = obj.get_key()
log.debug(f"key found : '{key}'") if key else log.debug("No key found")
if not allow_multiple:
if isinstance(obj, dict):
for k in obj:
if isinstance(obj.obj, dict):
for k in obj.obj:
if state.contains(entry, k):
raise IndexError(f"{entry}.{k}")
else:
@@ -284,10 +374,10 @@ class SheerkaDataProvider:
log.debug(state.data)
if use_ref:
digest = self.save_obj(obj)
obj = ObjWithKey(key, self.REF_PREFIX + digest) if key else self.REF_PREFIX + digest
obj.set_digest(self.save_obj(obj.obj))
obj.obj = self.REF_PREFIX + obj.get_digest()
state.update(entry, obj if (isinstance(obj, ObjWithKey) or key is None) else ObjWithKey(key, obj))
state.update(entry, obj)
new_snapshot = self.save_state(state)
self.set_snapshot(new_snapshot)
@@ -304,7 +394,7 @@ class SheerkaDataProvider:
next_key = self.get_next_key(entry)
if hasattr(obj, "set_key"):
obj.set_key(next_key)
self.add(event_digest, entry, ObjWithKey(next_key, obj))
self.add(event_digest, entry, ObjToUpdate(obj, next_key))
return entry, next_key
def add_unique(self, event_digest: str, entry, obj):
+13 -9
View File
@@ -39,6 +39,18 @@ def null():
def b(operator, left, right):
return BinaryNode([], operator, left, right)
def compare_ast(left, right):
left_as_string = ast.dump(left)
left_as_string = left_as_string.replace(", ctx=Load()", "")
left_as_string = left_as_string.replace(", kind=None", "")
right_as_string = right if isinstance(right, str) else ast.dump(right)
right_as_string = right_as_string.replace(", ctx=Load()", "")
right_as_string = right_as_string.replace(", kind=None", "")
return left_as_string == right_as_string
def test_i_can_tokenize():
source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"="
@@ -213,19 +225,11 @@ def test_i_can_parse_def_concept(text, expected_name, expected_expr):
assert isinstance(tree, DefConceptNode)
assert tree.name == expected_name
if isinstance(tree.body, PythonNode):
assert ast.dump(tree.body.ast) == ast.dump(expected_expr)
assert compare_ast(tree.body.ast, expected_expr)
else:
assert tree.body == expected_expr
def compare_ast(left, right):
left_as_string = ast.dump(left)
left_as_string = left_as_string.replace(", ctx=Load()", "")
right_as_string = right if isinstance(right, str) else ast.dump(right)
right_as_string = right_as_string.replace(", ctx=Load()", "")
return left_as_string == right_as_string
def test_i_can_parse_complex_def_concept_statement():
+3 -3
View File
@@ -6,7 +6,7 @@ from os import path
import shutil
from core.concept import Concept, ConceptParts
from core.sheerka import Sheerka
from core.sheerka import Sheerka, ExecutionContext
from parsers.DefaultParser import DefConceptNode, DefaultParser
from parsers.PythonParser import PythonParser
@@ -72,7 +72,7 @@ def test_i_can_add_a_concept():
concept = get_concept()
sheerka = Sheerka()
sheerka.initialize(root_folder)
res = sheerka.add_concept(concept)
res = sheerka.add_concept(ExecutionContext("xxx"), concept)
concept_found = res.value
assert res.status
@@ -91,7 +91,7 @@ def test_i_can_add_a_concept():
assert all_props == ["a", "b"]
assert concept_found.key == "__var__0 + __var__1"
assert concept_found.id == "100"
assert concept_found.id == "1001"
# def test_i_cannot_add_the_same_concept_twice():
# concept1 = DefConceptNode(name="concept")
+114 -4
View File
@@ -3,7 +3,8 @@ import hashlib
import pytest
import os
from os import path
from sdp.sheerkaDataProvider import SheerkaDataProvider, Event, SheerkaDataProviderError
from sdp.sheerkaDataProvider import SheerkaDataProvider, Event, SheerkaDataProviderError, \
SheerkaDataProviderDuplicateKeyError
from datetime import date, datetime
import shutil
import json
@@ -106,6 +107,49 @@ class ObjDumpJson:
self.key = as_dict["key"]
class ObjWithDigestNoKey:
def __init__(self, a, b):
self.a = a
self.b = b
def __hash__(self):
return hash((self.a, self.b))
def __eq__(self, obj):
return isinstance(obj, ObjNoKey) and \
self.a == obj.a and \
self.b == obj.b
def __repr__(self):
return f"ObjNoKey({self.a}, {self.b})"
def get_digest(self):
return str(self.a) + str(self.b)
class ObjWithDigestWithKey:
def __init__(self, a, b):
self.a = a
self.b = b
def __hash__(self):
return hash((self.a, self.b))
def __eq__(self, obj):
return isinstance(obj, ObjNoKey) and \
self.a == obj.a and \
self.b == obj.b
def __repr__(self):
return f"ObjNoKey({self.a}, {self.b})"
def get_key(self):
return self.a
def get_digest(self):
return str(self.a) + str(self.b)
@pytest.fixture(autouse=True)
def init_test():
if path.exists(tests_root):
@@ -408,6 +452,72 @@ def test_i_can_add_string_using_auto_generated_key():
assert key3 == "1"
def test_i_cannot_add_the_same_digest_twice_in_the_same_entry():
"""
If get_digest() is implemented, checks for duplicates
:return:
"""
sdp = SheerkaDataProvider(".sheerka")
with pytest.raises(SheerkaDataProviderDuplicateKeyError) as error:
sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "b"))
sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "b"))
assert error.value.obj.get_digest() == ObjWithDigestNoKey("a", "b").get_digest()
assert error.value.key == "entry"
assert error.value.args[0] == "duplicate key"
def test_i_cannot_add_the_same_digest_twice_in_the_same_entry2():
"""
If get_digest() is implemented, checks for duplicates in list when no key
:return:
"""
sdp = SheerkaDataProvider(".sheerka")
with pytest.raises(SheerkaDataProviderDuplicateKeyError) as error:
sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "b"))
sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "c"))
sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "b"))
assert error.value.obj.get_digest() == ObjWithDigestNoKey("a", "b").get_digest()
assert error.value.key == "entry"
assert error.value.args[0] == "duplicate key"
def test_i_cannot_add_the_same_digest_twice_in_the_same_entry3():
"""
If get_digest() is implemented, checks for duplicates when the key is provided
:return:
"""
sdp = SheerkaDataProvider(".sheerka")
with pytest.raises(SheerkaDataProviderDuplicateKeyError) as error:
sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b"))
sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b"))
assert error.value.obj.get_digest() == ObjWithDigestWithKey("a", "b").get_digest()
assert error.value.key == "entry.a"
assert error.value.args[0] == "duplicate key"
def test_i_cannot_add_the_same_digest_twice_in_the_same_entry4():
"""
If get_digest() is implemented, checks for duplicates in list when the key is provided
:return:
"""
sdp = SheerkaDataProvider(".sheerka")
with pytest.raises(SheerkaDataProviderDuplicateKeyError) as error:
sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b"))
sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "c"))
sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b"))
assert error.value.obj.get_digest() == ObjWithDigestWithKey("a", "b").get_digest()
assert error.value.key == "entry.a"
assert error.value.args[0] == "duplicate key"
def test_i_can_get_and_set_key():
sdp = SheerkaDataProvider(".sheerka")
key_file = path.join(sdp.root, SheerkaDataProvider.KeysFile)
@@ -499,12 +609,12 @@ def test_i_can_set_using_reference():
entry, key = sdp.set(evt_digest, "entry", ObjWithKey(2, "foo"), use_ref=True)
state = sdp.load_state(sdp.get_snapshot())
assert state.data == {"entry": {"2": '##REF##:9b14e03847d73c640f54ea9b46ba62b19e5451ecd300428a225be012ad9f25f9'}}
assert state.data == {"entry": {"2": '##REF##:95b5cbab545dded0b90b57a3d15a157b9a559fb586ee2f8d6ccbc6d2491f1268'}}
assert entry == "entry"
assert key == "2"
assert path.exists(sdp.get_obj_path(SheerkaDataProvider.ObjectsFolder,
"9b14e03847d73c640f54ea9b46ba62b19e5451ecd300428a225be012ad9f25f9"))
"95b5cbab545dded0b90b57a3d15a157b9a559fb586ee2f8d6ccbc6d2491f1268"))
# sanity check, make sure that I can load back
loaded = sdp.get(entry, key)
@@ -873,7 +983,7 @@ def test_i_can_modify_a_ref():
state = sdp.load_state(sdp.get_snapshot())
assert state.data == {"entry": {
"key1": ObjWithKey("key1", "foo"),
"key2": "##REF##:eb297e98710dd17244bb0e38eb9f1bf72cba692a8f8d94e9eb2d898e130cac8b"}}
"key2": "##REF##:d70b0247311645ed18d275337cbcf79ad186d995236cdc8ad4fcfc708085bd3d"}}
assert entry == "entry"
assert key == "key2"