Implemented dictionnary based file system to speed up the tests
This commit is contained in:
+45
-70
@@ -1,9 +1,11 @@
|
||||
from os import path
|
||||
import os
|
||||
# from os import path
|
||||
# import os
|
||||
from datetime import datetime, date
|
||||
import hashlib
|
||||
import json
|
||||
import zlib
|
||||
|
||||
from sdp.sheerkaDataProviderIO import SheerkaDataProviderIO
|
||||
from sdp.sheerkaSerializer import Serializer, SerializerContext
|
||||
import logging
|
||||
|
||||
@@ -278,23 +280,11 @@ class SheerkaDataProvider:
|
||||
def __init__(self, root=None):
|
||||
log.debug("Initializing sdp.")
|
||||
|
||||
self.root = path.abspath(path.join(path.expanduser("~"), ".sheerka")) \
|
||||
if root is None \
|
||||
else path.abspath(root)
|
||||
log.debug("root is set to '" + self.root + "'")
|
||||
|
||||
if not path.exists(self.root):
|
||||
log.debug("root folder not found. Creating it.")
|
||||
os.makedirs(self.root)
|
||||
self.first_time = True
|
||||
else:
|
||||
self.first_time = False
|
||||
self.io = SheerkaDataProviderIO.get(root)
|
||||
self.first_time = self.io.first_time
|
||||
|
||||
self.serializer = Serializer()
|
||||
|
||||
def get_obj_path(self, object_type, digest):
|
||||
return path.join(self.root, object_type, digest[:24], digest)
|
||||
|
||||
@staticmethod
|
||||
def get_obj_key(obj):
|
||||
"""
|
||||
@@ -620,15 +610,11 @@ class SheerkaDataProvider:
|
||||
:return: digest of the event
|
||||
"""
|
||||
digest = event.get_digest()
|
||||
target_path = path.join(self.root, SheerkaDataProvider.EventFolder, digest[:24], digest)
|
||||
if path.exists(target_path):
|
||||
target_path = self.io.get_obj_path(SheerkaDataProvider.EventFolder, digest)
|
||||
if self.io.exists(target_path):
|
||||
return digest
|
||||
|
||||
if not path.exists(path.dirname(target_path)):
|
||||
os.makedirs(path.dirname(target_path))
|
||||
|
||||
with open(target_path, "wb") as f:
|
||||
f.write(self.serializer.serialize(event, None).read())
|
||||
self.io.write_binary(target_path, self.serializer.serialize(event, None).read())
|
||||
|
||||
return digest
|
||||
|
||||
@@ -638,31 +624,27 @@ class SheerkaDataProvider:
|
||||
:param digest:
|
||||
:return:
|
||||
"""
|
||||
target_path = path.join(self.root, SheerkaDataProvider.EventFolder, digest[:24], digest)
|
||||
with open(target_path, "rb") as f:
|
||||
target_path = self.io.get_obj_path(SheerkaDataProvider.EventFolder, digest)
|
||||
|
||||
with self.io.open(target_path, "rb") as f:
|
||||
return self.serializer.deserialize(f, None)
|
||||
|
||||
def save_state(self, state: State):
|
||||
digest = state.get_digest()
|
||||
log.debug(f"Saving new state. digest={digest}")
|
||||
target_path = path.join(self.root, SheerkaDataProvider.StateFolder, digest[:24], digest)
|
||||
if path.exists(target_path):
|
||||
target_path = self.io.get_obj_path(SheerkaDataProvider.StateFolder, digest)
|
||||
if self.io.exists(target_path):
|
||||
return digest
|
||||
|
||||
if not path.exists(path.dirname(target_path)):
|
||||
os.makedirs(path.dirname(target_path))
|
||||
|
||||
with open(target_path, "wb") as f:
|
||||
f.write(self.serializer.serialize(state, None).read())
|
||||
|
||||
self.io.write_binary(target_path, self.serializer.serialize(state, None).read())
|
||||
return digest
|
||||
|
||||
def load_state(self, digest):
|
||||
if digest is None:
|
||||
return State()
|
||||
|
||||
target_path = path.join(self.root, SheerkaDataProvider.StateFolder, digest[:24], digest)
|
||||
with open(target_path, "rb") as f:
|
||||
target_path = self.io.get_obj_path(SheerkaDataProvider.StateFolder, digest)
|
||||
with self.io.open(target_path, "rb") as f:
|
||||
return self.serializer.deserialize(f, None)
|
||||
|
||||
def save_obj(self, obj):
|
||||
@@ -670,16 +652,12 @@ class SheerkaDataProvider:
|
||||
stream = self.serializer.serialize(obj, SerializerContext(user_name="kodjo"))
|
||||
digest = obj.get_digest() if hasattr(obj, "get_digest") else self.get_stream_digest(stream)
|
||||
|
||||
target_path = path.join(self.root, SheerkaDataProvider.ObjectsFolder, digest[:24], digest)
|
||||
if path.exists(target_path):
|
||||
target_path = self.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, digest)
|
||||
if self.io.exists(target_path):
|
||||
log.debug(f"...already saved. digest is {digest}")
|
||||
return digest
|
||||
|
||||
if not path.exists(path.dirname(target_path)):
|
||||
os.makedirs(path.dirname(target_path))
|
||||
|
||||
with open(target_path, "wb") as f:
|
||||
f.write(stream.read())
|
||||
self.io.write_binary(target_path, stream.read())
|
||||
|
||||
log.debug(f"...digest={digest}.")
|
||||
return digest
|
||||
@@ -688,11 +666,11 @@ class SheerkaDataProvider:
|
||||
if digest is None:
|
||||
return None
|
||||
|
||||
target_path = path.join(self.root, SheerkaDataProvider.ObjectsFolder, digest[:24], digest)
|
||||
if not path.exists(target_path):
|
||||
target_path = self.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, digest)
|
||||
if not self.io.exists(target_path):
|
||||
return None
|
||||
|
||||
with open(target_path, "rb") as f:
|
||||
with self.io.open(target_path, "rb") as f:
|
||||
obj = self.serializer.deserialize(f, SerializerContext(origin=digest))
|
||||
|
||||
# set the origin of the object
|
||||
@@ -721,7 +699,7 @@ class SheerkaDataProvider:
|
||||
|
||||
def get_cache_params(self, category, key):
|
||||
digest = hashlib.sha3_256(f"{category}:{key}".encode("utf-8")).hexdigest()
|
||||
cache_path = path.join(self.root, SheerkaDataProvider.CacheFolder, digest[:24], digest)
|
||||
cache_path = self.io.get_obj_path(SheerkaDataProvider.CacheFolder, digest)
|
||||
return digest, cache_path
|
||||
|
||||
def add_to_cache(self, category, key, obj, update=False):
|
||||
@@ -735,15 +713,10 @@ class SheerkaDataProvider:
|
||||
"""
|
||||
digest, cache_path = self.get_cache_params(category, key)
|
||||
|
||||
if path.exists(cache_path) and not update:
|
||||
if self.io.exists(cache_path) and not update:
|
||||
return digest
|
||||
|
||||
if not path.exists(path.dirname(cache_path)):
|
||||
os.makedirs(path.dirname(cache_path))
|
||||
|
||||
with open(cache_path, "wb") as f:
|
||||
f.write(zlib.compress(obj.encode("utf-8"), 9))
|
||||
|
||||
self.io.write_binary(cache_path, zlib.compress(obj.encode("utf-8"), 9))
|
||||
return digest
|
||||
|
||||
def load_from_cache(self, category, key):
|
||||
@@ -755,10 +728,10 @@ class SheerkaDataProvider:
|
||||
"""
|
||||
digest, cache_path = self.get_cache_params(category, key)
|
||||
|
||||
if not path.exists(cache_path):
|
||||
if not self.io.exists(cache_path):
|
||||
raise IndexError(f"{category}.{key}")
|
||||
|
||||
with open(cache_path, "rb") as f:
|
||||
with self.io.open(cache_path, "rb") as f:
|
||||
return zlib.decompress(f.read()).decode("utf-8")
|
||||
|
||||
def remove_from_cache(self, category, key):
|
||||
@@ -769,8 +742,8 @@ class SheerkaDataProvider:
|
||||
:return:
|
||||
"""
|
||||
digest, cache_path = self.get_cache_params(category, key)
|
||||
if path.exists(cache_path):
|
||||
os.remove(cache_path)
|
||||
if self.io.exists(cache_path):
|
||||
self.io.remove(cache_path)
|
||||
|
||||
return digest
|
||||
|
||||
@@ -782,32 +755,34 @@ class SheerkaDataProvider:
|
||||
:return:
|
||||
"""
|
||||
digest, cache_path = self.get_cache_params(category, key)
|
||||
return path.exists(cache_path)
|
||||
return self.io.exists(cache_path)
|
||||
|
||||
def get_snapshot(self):
|
||||
head_file = path.join(self.root, SheerkaDataProvider.HeadFile)
|
||||
if not path.exists(head_file):
|
||||
head_file = self.io.path_join(SheerkaDataProvider.HeadFile)
|
||||
if not self.io.exists(head_file):
|
||||
return None
|
||||
with open(head_file, "r") as f:
|
||||
return f.read()
|
||||
return self.io.read_text(head_file)
|
||||
# with open(head_file, "r") as f:
|
||||
# return f.read()
|
||||
|
||||
def set_snapshot(self, digest):
|
||||
head_file = path.join(self.root, SheerkaDataProvider.HeadFile)
|
||||
with open(head_file, "w") as f:
|
||||
return f.write(digest)
|
||||
head_file = self.io.path_join(SheerkaDataProvider.HeadFile)
|
||||
return self.io.write_text(head_file, digest)
|
||||
# with open(head_file, "w") as f:
|
||||
# return f.write(digest)
|
||||
|
||||
def load_keys(self):
|
||||
keys_file = path.join(self.root, SheerkaDataProvider.KeysFile)
|
||||
if not path.exists(keys_file):
|
||||
keys_file = self.io.path_join(SheerkaDataProvider.KeysFile)
|
||||
if not self.io.exists(keys_file):
|
||||
keys = {}
|
||||
else:
|
||||
with open(keys_file, "r") as f:
|
||||
with self.io.open(keys_file, "r") as f:
|
||||
keys = json.load(f)
|
||||
return keys
|
||||
|
||||
def save_keys(self, keys):
|
||||
keys_file = path.join(self.root, SheerkaDataProvider.KeysFile)
|
||||
with open(keys_file, "w") as f:
|
||||
keys_file = self.io.path_join(SheerkaDataProvider.KeysFile)
|
||||
with self.io.open(keys_file, "w") as f:
|
||||
json.dump(keys, f)
|
||||
|
||||
def get_next_key(self, entry):
|
||||
|
||||
Reference in New Issue
Block a user