SettingUpClientServer (#7)

Reviewed-on: #7
This commit is contained in:
2023-01-19 16:08:23 +00:00
parent 95459c5b02
commit 21a397861a
19 changed files with 2480 additions and 2 deletions
+1 -1
View File
@@ -158,5 +158,5 @@ cython_debug/
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear # and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder. # option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/ .idea/
+9 -1
View File
@@ -1,3 +1,11 @@
# Sheerka # Sheerka
My personnal AI My personnal AI
## To start the server
```shell
cd src
uvicorn server:app --reload
```
+42
View File
@@ -0,0 +1,42 @@
anyio==3.6.2
attrs==22.2.0
bcrypt==4.0.1
certifi==2022.12.7
cffi==1.15.1
charset-normalizer==3.0.1
click==8.1.3
cryptography==39.0.0
ecdsa==0.18.0
exceptiongroup==1.1.0
fastapi==0.89.1
h11==0.14.0
httptools==0.5.0
idna==3.4
iniconfig==2.0.0
oauthlib==3.2.2
packaging==23.0
passlib==1.7.4
pluggy==1.0.0
prompt-toolkit==3.0.36
pyasn1==0.4.8
pycparser==2.21
pydantic==1.10.4
pytest==7.2.0
python-dotenv==0.21.0
python-jose==3.3.0
python-multipart==0.0.5
PyYAML==6.0
requests==2.28.2
requests-oauthlib==1.3.1
rsa==4.9
six==1.16.0
sniffio==1.3.0
starlette==0.22.0
tomli==2.0.1
typing_extensions==4.4.0
urllib3==1.26.14
uvicorn==0.20.0
uvloop==0.17.0
watchfiles==0.18.1
wcwidth==0.2.5
websockets==10.4
+191
View File
@@ -0,0 +1,191 @@
import argparse
import os
import re
import sys
from dataclasses import dataclass
from os import path
import prompt_toolkit
import requests
from prompt_toolkit import HTML, print_formatted_text, prompt
from prompt_toolkit.auto_suggest import AutoSuggestFromHistory
from prompt_toolkit.history import FileHistory
from requests import ConnectionError, HTTPError
from constants import CLIENT_OPERATION_QUIT, EXIT_COMMANDS
connect_regex = re.compile("connect\(['\"](.*?)['\"]\s*,\s*['\"](.*?)['\"]\)")
@dataclass
class TestResponse:
status: bool
message: str
class SheerkaClient:
def __init__(self, hostname: str, port: int):
self.hostname = hostname
self.port = port
self.url = f"{self.hostname}:{self.port}" if self.port else f"{self.hostname}"
self.history_file = path.abspath(path.join(path.expanduser("~"), ".sheerka", "history.txt"))
self.token = None
def init_folder(self):
root_path = path.dirname(self.history_file)
if not path.exists(root_path):
os.makedirs(root_path)
def check_url(self) -> TestResponse:
"""
Make the firs attempt to connect to sheerka
:return:
"""
try:
response = requests.get(self.url)
return TestResponse(bool(response), response.text)
except HTTPError as ex:
return TestResponse(False, f"Error : {ex}")
except ConnectionError:
return TestResponse(False, "Connection refused.")
def connect(self, username: str, password: str) -> TestResponse:
token_url = f"{self.url}/token"
try:
form_data = {"username": username, "password": password}
res = requests.post(token_url, data=form_data)
if res:
self.token = res.json()["access_token"]
return TestResponse(True, f"Connected as {username}")
else:
self.token = None
return TestResponse(False, res.json()["detail"])
except Exception as ex:
self.token = None
return TestResponse(False, str(ex))
def run(self):
while True:
try:
# get the input form prompt_toolkit
_in = prompt('sheerka> ',
history=FileHistory(self.history_file),
auto_suggest=AutoSuggestFromHistory()
# lexer=PygmentsLexer(PythonLexer)
)
_in = _in.strip()
if _in == "clear":
prompt_toolkit.shortcuts.clear()
continue
# allow reconnection
m = connect_regex.match(_in)
if m:
username, password = m.groups()
print(f"Trying to connect to {self.url}")
res = self.connect(username, password)
self.print_res(res)
continue
# Call Sheerka
if self.token:
headers = {"Authorization": f"Bearer {self.token}"}
response = requests.post(f"{self.url}/echo/{_in}", headers=headers)
else:
response = requests.post(f"{self.url}/echo/{_in}")
# read the response from the server
if response:
as_json = response.json()
# Print the response and loop
self.print_info(as_json['response'])
if as_json['command'] == CLIENT_OPERATION_QUIT:
break
else:
self.print_error(response.text)
# allow termination even if the sever is not responding
if _in in EXIT_COMMANDS:
self.print_info("Forced termination. Bye.")
break
except KeyboardInterrupt:
continue
except EOFError:
self.print_error("EOFError...")
return 3
@staticmethod
def print_error(message: str):
"""
Print the message in red
:param message:
:type message:
:return:
:rtype:
"""
print_formatted_text(HTML(f'<ansired>{message}</ansired>'))
@staticmethod
def print_info(message: str):
"""
:param message:
:type message:
:return:
:rtype:
"""
print(message)
def print_res(self, response: TestResponse) -> None:
"""
Print the colored message, depending on the status of the response
:param response:
:type response:
:return:
:rtype:
"""
if response.status:
self.print_info(response.message)
else:
self.print_error(response.message)
def parse_arguments(args: list):
"""
Parses the arguments
:param args:
:type args:
:return:
:rtype:
"""
parser = argparse.ArgumentParser(prog="SheerkaClient", description="CLI to connect to Sheerka")
parser.add_argument("hostname", nargs='?', default="http://localhost")
parser.add_argument("-p", "--port", action="store", type=int, default=56356)
parser.add_argument("-v", "--version", action="version")
parser.add_argument("-u", "--username", action="store")
parser.add_argument("-P", "--password", action="store")
return parser.parse_args(args)
if __name__ == "__main__":
parsed_args = parse_arguments(sys.argv[1:])
client = SheerkaClient(parsed_args.hostname, parsed_args.port)
client.init_folder()
result = client.check_url()
if not result.status:
print(result.message)
exit(0)
print(result.message)
if parsed_args.username:
result = client.connect(parsed_args.username, parsed_args.password)
client.print_res(result)
client.run()
+3
View File
@@ -0,0 +1,3 @@
SHEERKA_PORT = 56356
EXIT_COMMANDS = ("quit", "exit", "bye")
CLIENT_OPERATION_QUIT = "quit"
+2
View File
@@ -0,0 +1,2 @@
class ExecutionContext:
pass
+2
View File
@@ -0,0 +1,2 @@
class Sheerka:
pass
+31
View File
@@ -0,0 +1,31 @@
class CustomType:
"""
Base class for custom types used in Sheerka
A custom type is a type that has only one instance across the application and have a semantic meaning
For example the type 'None' is a singleton which have a semantic meaning.
We need to define others in Sheerka
"""
def __init__(self, value):
self.value = value
def __repr__(self):
return self.value
def __eq__(self, other):
return isinstance(other, CustomType) and self.value == other.value
def __hash__(self):
return hash(self.value)
class NotFoundType(CustomType):
"""
Using when an entry in not found in Cache or in sdp
"""
def __init__(self):
super(NotFoundType, self).__init__("**NotFound**")
NotFound = NotFoundType()
+32
View File
@@ -0,0 +1,32 @@
def get_class(qname):
"""
Loads a class from its full qualified name
:param qname:
:return:
"""
parts = qname.split('.')
module = ".".join(parts[:-1])
m = __import__(module)
for comp in parts[1:]:
m = getattr(m, comp)
return m
def get_full_qualified_name(obj):
"""
Returns the full qualified name of a class (including its module name )
:param obj:
:return:
"""
if obj.__class__ == type:
module = obj.__module__
if module is None or module == str.__class__.__module__:
return obj.__name__ # Avoid reporting __builtin__
else:
return module + '.' + obj.__name__
else:
module = obj.__class__.__module__
if module is None or module == str.__class__.__module__:
return obj.__class__.__name__ # Avoid reporting __builtin__
else:
return module + '.' + obj.__class__.__name__
+29
View File
@@ -0,0 +1,29 @@
anyio==3.6.2
attrs==22.2.0
certifi==2022.12.7
charset-normalizer==3.0.1
click==8.1.3
exceptiongroup==1.1.0
fastapi==0.89.1
h11==0.14.0
httptools==0.5.0
idna==3.4
iniconfig==2.0.0
packaging==23.0
pluggy==1.0.0
prompt-toolkit==3.0.36
pydantic==1.10.4
pytest==7.2.0
python-dotenv==0.21.0
PyYAML==6.0
requests==2.28.2
sniffio==1.3.0
starlette==0.22.0
tomli==2.0.1
typing_extensions==4.4.0
urllib3==1.26.14
uvicorn==0.20.0
uvloop==0.17.0
watchfiles==0.18.1
wcwidth==0.2.5
websockets==10.4
+650
View File
@@ -0,0 +1,650 @@
import hashlib
import json
import logging
import shutil
import time
from datetime import date, datetime
from os import path
from threading import RLock
from typing import Callable
from core.ExecutionContext import ExecutionContext
from core.Sheerka import Sheerka
from core.global_symbols import NotFound
from sdp.sheerkaDataProviderIO import SheerkaDataProviderIO
from sdp.sheerkaSerializer import Serializer, SerializerContext
def json_default_converter(o):
"""
Default formatter for json
It's used when the json serializer does not know
how to serialise a type
:param o:
:return:
"""
if isinstance(o, (date, datetime)):
return o.isoformat()
class Event(object):
"""
Class that represents something that modifies the state of the system
"""
def __init__(self, message="", user_id="", date=None, parents=None):
self.user_id: str = user_id # id of the user that triggers the modification
self.date: datetime | None = date or datetime.now() # when
self.message: str = message # user input or whatever that modifies the system
self.parents: list[str] = parents # digest(s) of the parent(s) of this event
self._digest: str | None = None # digest of the event
def __str__(self):
return f"{self.date.strftime('%d/%m/%Y %H:%M:%S')} {self.message}"
def __repr__(self):
return f"{self.get_digest()[:12]} {self.message}"
def get_digest(self):
"""
Returns the digest of the event
:return: sha256 of the event
"""
if self._digest:
return self._digest
if self.user_id == "":
# only possible during the unit test
# We use this little trick to speed up the unit test
self._digest = self.message[6:] if self.message.startswith("TEST::") else "xxx"
return self._digest
if not isinstance(self.message, str):
raise NotImplementedError(f"message={self.message}")
to_hash = f"Event:{self.user_id}{self.date}{self.message}{self.parents}".encode("utf-8")
self._digest = hashlib.sha256(to_hash).hexdigest()
return self._digest
def to_dict(self):
return self.__dict__
def from_dict(self, as_dict):
self.user_id = as_dict["user_id"]
self.date = datetime.fromisoformat(as_dict["date"])
self.message = as_dict["message"]
self.parents = as_dict["parents"]
self._digest = as_dict["_digest"] # freeze the digest
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, Event):
return (self.user_id == other.user_id and
self.date == other.date and
self.message == other.message and
self.parents == other.parents)
return False
def __hash__(self):
return hash(self.get_digest())
class State:
"""
Class that represents the state of the system (dictionary of all known entries)
"""
def __init__(self):
self.date = None
self.events = []
self.data = {}
def get_digest(self):
as_json = json.dumps(self.__dict__, default=json_default_converter)
return hashlib.sha256(as_json.encode("utf-8")).hexdigest()
class SheerkaDataProviderTransaction:
"""
Class that manage sdp transactions
All sdp actions (add / update / remove entries) with the transaction
will be persisted in one time, in the order of the declaration
Note that Transaction within Transaction is not supported
"""
def __init__(self, sdp, event):
self.sdp: SheerkaDataProvider = sdp
self.event: Event = event
self.state: State | None = None
self.snapshot: str | None = None
self.event_digest: str | None = None
def __enter__(self):
self.sdp.lock.acquire()
# save the event if needed
self.event_digest = self.sdp.save_event(self.event) if isinstance(self.event, Event) else self.event
# load state. I need a fresh copy, not the one from sdp
self.snapshot = self.sdp.get_snapshot(SheerkaDataProvider.HeadFile)
self.state = self.sdp.load_state(self.snapshot)
return self
def add(self, entry, key, items, use_ref=False):
"""Adds items to the state"""
with self.sdp.lock:
if entry not in self.state.data:
self.state.data[entry] = {}
if use_ref:
if isinstance(items, list):
items = [self.sdp.REF_PREFIX + self.sdp.save_obj(item) for item in items]
elif isinstance(items, set):
items = {self.sdp.REF_PREFIX + self.sdp.save_obj(item) for item in items}
else:
items = self.sdp.REF_PREFIX + self.sdp.save_obj(items)
if key is None:
self.state.data[entry] = items
else:
self.state.data[entry][key] = items
def remove(self, entry, key):
"""
Remove an entry
:param entry:
:param key:
:return:
"""
with self.sdp.lock:
try:
del (self.state.data[entry][key])
except KeyError:
pass
def clear(self, entry):
"""
Clear an entire entry
:param entry:
:return:
"""
with self.sdp.lock:
try:
self.state.data[entry].clear()
except KeyError:
pass
def __exit__(self, exc_type, exc_val, exc_tb):
self.state.parents = [] if self.snapshot is None else [self.snapshot]
self.state.events = [self.event_digest]
self.state.date = datetime.now()
self.snapshot = self.sdp.save_state(self.state)
self.sdp.set_snapshot(SheerkaDataProvider.HeadFile, self.snapshot)
self.sdp.update_state(self.state) # make sure to keep sync
return False # let's escalate the exceptions
class SheerkaDataProvider:
"""Manages the persistence state of the system"""
EventFolder = "events"
StateFolder = "state"
ObjectsFolder = "objects"
CacheFolder = "cache"
RefFolder = "refs"
HeadFile = "HEAD"
LastEventFile = "LAST_EVENT"
KeysFile = "keys"
OntologiesFiles = "ontologies"
REF_PREFIX = "##REF##:"
def __init__(self, root=None, sheerka=None, name="__default__"):
self.log = logging.getLogger(__name__)
self.init_log = logging.getLogger("init." + __name__)
self.init_log.debug("Initializing sdp.")
self.sheerka: Sheerka = sheerka
self.io = SheerkaDataProviderIO.get(root)
self.first_time = self.io.first_time
self.name = name
self.serializer = Serializer()
self.lock = RLock()
snapshot = self.get_snapshot(SheerkaDataProvider.HeadFile)
self.state = self.load_state(snapshot)
def __repr__(self):
return f"SheerkaDataProvider(name={self.name})"
@staticmethod
def get_stream_digest(stream):
"""
Compute a SHA256 from a stream
:param stream:
:type stream:
:return:
:rtype:
"""
sha256_hash = hashlib.sha256()
for byte_block in iter(lambda: stream.read(4096), b""):
sha256_hash.update(byte_block)
stream.seek(0)
return sha256_hash.hexdigest()
def get_transaction(self, event: Event) -> SheerkaDataProviderTransaction:
"""
Creates a new SheerkaTransaction and returns it
:param event:
:type event:
:return:
:rtype:
"""
return SheerkaDataProviderTransaction(self, event)
def get(self, entry: str, key: str | None = None, default=NotFound, load_origin=True):
"""
Get the value stored in an entry
:param entry: used to group entries that are related, like a mini database table
:param key:
:param default: value to return is not found
:param load_origin: adds the parent object's digest to the object
:return:
"""
with self.lock:
self.log.debug(f"getting {entry=}, {key=}, {default=}, {load_origin=}")
if entry not in self.state.data:
return default
if key is not None and key not in self.state.data[entry]:
return default
item = self.state.data[entry] if key is None else self.state.data[entry][key]
if isinstance(item, dict):
return item.copy() if key else {k: self.load_ref_if_needed(v, load_origin) for k, v in item.items()}
else:
return self.load_ref_if_needed(item, load_origin)
def list(self, entry: str, filter: Callable[[str, object], bool] = None):
"""
Lists elements of entry 'entry'
:param entry: name of the entry to list
:param filter: filter to use
:return: list of elements
"""
if entry not in self.state.data:
return []
elements = self.state.data[entry]
if isinstance(elements, dict):
# manage when elements have a key
filter_to_use = (lambda k, o: True) if filter is None else filter
for key, element in elements.items():
if filter_to_use(key, element):
if isinstance(element, list):
yield [self._inner_load_ref_if_needed(e) for e in element]
else:
yield self._inner_load_ref_if_needed(element)
else:
# manage when no key is defined for the elements
if not isinstance(elements, list) and not isinstance(elements, set):
elements = [elements]
filter_to_use = (lambda o: True) if filter is None else filter
for element in elements:
if filter_to_use(element):
yield self._inner_load_ref_if_needed(element)
def exists(self, entry: str, key: str | None = None):
"""
Returns true if the entry is defined
:param key:
:param entry:
:return:
"""
with self.lock:
exist = entry in self.state.data
if not exist or key is None:
return exist
return key in self.state.data[entry]
def reset(self):
"""
Test only, delete all entries
:return:
"""
self.first_time = self.io.first_time
if hasattr(self.io, "reset"):
self.io.reset()
snapshot = self.get_snapshot(SheerkaDataProvider.HeadFile)
self.state = self.load_state(snapshot)
def save_event(self, event: Event):
"""
return an event, given its digest
:param event:
:return: digest of the event
"""
parent = self.get_last_event()
event.parents = [parent] if parent else None
digest = event.get_digest() # must be call after setting the parents
target_path = self.io.get_obj_path(SheerkaDataProvider.EventFolder, digest)
if self.io.exists(target_path):
return digest
self.io.write_binary(target_path, self.serializer.serialize(event, None).read())
self.set_last_event(digest)
return digest
def load_event(self, digest: str | None = None):
"""
return an event, given its digest
:param digest:
:return:
"""
digest = digest or self.get_last_event()
if digest is None:
return None
target_path = self.io.get_obj_path(SheerkaDataProvider.EventFolder, digest)
with self.io.open(target_path, "rb") as f:
return self.serializer.deserialize(f, None)
def load_events(self, page_size: int, start=0):
"""
Load multiple events in the same command
:param start:
:param page_size: = -1 to load everything
:return:
"""
digest = None
if start:
for i in range(start):
event = self.load_event(digest)
if event is None or event.parents is None:
return
digest = event.parents[0]
count = 0
while count < page_size or page_size <= 0:
event = self.load_event(digest)
if event is None:
return
yield event
if event.parents is None:
return
digest = event.parents[0]
count += 1
def get_last_event(self):
last_event_file = self.io.path_join(self.LastEventFile)
if not self.io.exists(last_event_file):
return None
return self.io.read_text(last_event_file)
def set_last_event(self, digest: str):
"""
Updates the last event reference file
:param digest:
:type digest:
:return:
:rtype:
"""
last_event_file = self.io.path_join(self.LastEventFile)
return self.io.write_text(last_event_file, digest)
def set_snapshot(self, file: str, digest: str):
"""
Updates the HEAD file.
:param file: It's a parameter as we may manage branches
:type file:
:param digest:
:type digest:
:return:
:rtype:
"""
head_file = self.io.path_join(self.RefFolder, self.name, file)
return self.io.write_text(head_file, digest)
def get_snapshot(self, file: str):
head_file = self.io.path_join(self.RefFolder, self.name, file)
if not self.io.exists(head_file):
return None
return self.io.read_text(head_file)
def load_state(self, digest: str):
if digest is None:
return State()
target_path = self.io.get_obj_path(SheerkaDataProvider.StateFolder, digest)
with self.io.open(target_path, "rb") as f:
context = SerializerContext(sheerka=self.sheerka)
return self.serializer.deserialize(f, context)
def save_state(self, state: State):
digest = state.get_digest()
self.log.debug(f"Saving new state. digest={digest}")
target_path = self.io.get_obj_path(SheerkaDataProvider.StateFolder, digest)
if self.io.exists(target_path):
return digest
context = SerializerContext(sheerka=self.sheerka)
self.io.write_binary(target_path, self.serializer.serialize(state, context).read())
return digest
def update_state(self, state: State):
with self.lock:
self.state = state
def get_execution_context_file_path(self, digest: str, is_admin: bool) -> str:
"""
Compute the path to a given ExecutionContext file
When retrieving the list of command, we need to distinguish user triggered command
from internal command (generally at the startup of Sheerka), hence the is_admin parameter
:param digest:
:type digest:
:param is_admin:
:type is_admin:
:return:
:rtype:
"""
ext = "_admin_context" if is_admin else "_context"
return self.io.get_obj_path(SheerkaDataProvider.EventFolder, digest) + ext
def get_execution_result_file_path(self, digest) -> str:
"""
:param digest:
:type digest:
:return:
:rtype:
"""
ext = "_result"
return self.io.get_obj_path(SheerkaDataProvider.EventFolder, digest) + ext
def has_execution_context(self, digest, is_admin=False) -> bool:
"""
Check is a result file was created for a specific event
:param digest:
:param is_admin: True is the result is an internal admin result file
:return:
"""
target_path = self.get_execution_context_file_path(digest, is_admin)
return self.io.exists(target_path)
def save_execution_context(self, execution_context: ExecutionContext, is_admin=False) -> str:
"""
Save the execution context associated with an event
To make a long story short,
for every single user input, there is an event (which is the first thing that is created)
and a result (the ExecutionContext created by sheerka.evaluate_user_input()
:param execution_context:
:param is_admin: True is the result is an internal admin result file
:return:
"""
start = time.time()
message = execution_context.event.message
digest = execution_context.event.get_digest()
self.log.debug(f"Saving execution context. digest={digest}, message={message}")
target_path = self.get_execution_context_file_path(digest, is_admin)
if self.io.exists(target_path):
return digest
context = SerializerContext(sheerka=self.sheerka)
length = self.io.write_binary(target_path, self.serializer.serialize(execution_context, context).read())
elapsed = time.time() - start
self.log.debug(f"Saved execution context. message={message}, length={length}, elapsed={elapsed}")
return digest
def save_execution_result(self, digest: str, execution_result) -> str:
"""
On the top of execution context, also save some extra information like, status of the execution
:param digest:
:param execution_result:
:return:
"""
self.log.debug(f"Saving execution context extra information. digest={digest}")
target_path = self.get_execution_result_file_path(digest)
if self.io.exists(target_path):
return digest
context = SerializerContext(sheerka=self.sheerka)
length = self.io.write_binary(target_path, self.serializer.serialize(execution_result, context).read())
self.log.debug(f"Saved execution context. message={execution_result}, length={length}")
return digest
def load_execution_context(self, digest: str, is_admin=False) -> ExecutionContext:
"""
Load and deserialize a result file
:param digest:
:param is_admin: True is the result is an internal admin result file
:return:
"""
target_path = self.get_execution_context_file_path(digest, is_admin)
with self.io.open(target_path, "rb") as f:
context = SerializerContext(sheerka=self.sheerka)
return self.serializer.deserialize(f, context)
def load_execution_result(self, digest: str):
"""
Load and deserialize a result extra file
:param digest:
:return:
:param digest:
:return:
"""
target_path = self.get_execution_result_file_path(digest)
with self.io.open(target_path, "rb") as f:
context = SerializerContext(sheerka=self.sheerka)
return self.serializer.deserialize(f, context)
def load_ref_if_needed(self, item, load_origin):
"""
New version of the function.
The old one must be replaced at some point
Make sure we return the real object, even inside a collection
:param item:
:param load_origin:
:return:
"""
if isinstance(item, list):
return [self._inner_load_ref_if_needed(i, load_origin) for i in item]
elif isinstance(item, set):
return {self._inner_load_ref_if_needed(i, load_origin) for i in item}
elif isinstance(item, dict):
return item.copy()
else:
return self._inner_load_ref_if_needed(item, load_origin)
def save_obj(self, obj) -> str:
self.log.debug(f"Saving '{obj}' as reference...")
context = SerializerContext(user_name="kodjo", sheerka=self.sheerka)
stream = self.serializer.serialize(obj, context)
digest = obj.get_digest() if hasattr(obj, "get_digest") else self.get_stream_digest(stream)
target_path = self.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, digest)
if self.io.exists(target_path):
self.log.debug(f"...already saved. digest is {digest}")
return digest
self.io.write_binary(target_path, stream.read())
self.log.debug(f"...digest={digest}.")
return digest
def load_obj(self, digest, add_origin=True):
if digest is None:
return None
target_path = self.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, digest)
if not self.io.exists(target_path):
return None
with self.io.open(target_path, "rb") as f:
context = SerializerContext(origin=digest, sheerka=self.sheerka)
obj = self.serializer.deserialize(f, context)
# set the origin of the object
if add_origin:
if isinstance(obj, dict):
obj[Serializer.ORIGIN] = digest
elif not isinstance(obj, str):
setattr(obj, Serializer.ORIGIN, digest)
return obj
def save_ontologies(self, ontologies_names):
"""
Keep track of the sequence of ontologies
This is a quick and dirty ontology management
I would like the ontologies to have a digest and to know what is their parent
"""
ontology_file = self.io.path_join(SheerkaDataProvider.OntologiesFiles)
text = "\n".join(ontologies_names)
self.io.write_text(ontology_file, text)
def load_ontologies(self):
ontology_file = self.io.path_join(SheerkaDataProvider.OntologiesFiles)
if not self.io.exists(ontology_file):
return []
text = self.io.read_text(ontology_file)
return text.split("\n")
def _inner_load_ref_if_needed(self, obj, load_origin=True):
"""
Make sure the real obj is returned
:param obj:
:param load_origin:
:return:
"""
if not isinstance(obj, str) or not obj.startswith(SheerkaDataProvider.REF_PREFIX):
return obj
resolved = self.load_obj(obj[len(SheerkaDataProvider.REF_PREFIX):], load_origin)
return resolved or obj
def test_only_destroy_refs(self, name=None):
name = name or self.name
current_sdp_refs_folder = self.io.path_join(self.RefFolder, name)
if path.exists(current_sdp_refs_folder):
shutil.rmtree(current_sdp_refs_folder)
+182
View File
@@ -0,0 +1,182 @@
import io
import logging
import os
from os import path
class SheerkaDataProviderIO:
def __init__(self, root):
self.root = root
self.log = logging.getLogger(__name__)
self.init_log = logging.getLogger("init." + __name__)
def exists(self, file_path):
pass
def open(self, file_path, mode):
pass
def read_text(self, file_path):
pass
def read_binary(self, file_path):
pass
def write_text(self, file_path, content):
pass
def write_binary(self, file_path, content):
pass
def remove(self, file_path):
pass
@staticmethod
def get(root):
if root == "mem://":
return SheerkaDataProviderDictionaryIO()
else:
return SheerkaDataProviderFileIO(root)
def get_obj_path(self, object_type, digest):
return path.join(self.root, object_type, digest[:24], digest)
def path_join(self, *paths):
return path.join(self.root, *paths)
class SheerkaDataProviderFileIO(SheerkaDataProviderIO):
"""
Persist information in files, under a given folder
"""
def __init__(self, root: str):
"""
:param root: when given, force the root folder. Default is ~/.sheerka
:type root:
"""
root = path.abspath(path.join(path.expanduser("~"), ".sheerka")) \
if root is None \
else path.abspath(root)
super().__init__(root)
self.init_log.debug("root is set to '" + self.root + "'")
if not path.exists(self.root):
self.init_log.debug("root folder not found. Creating it.")
os.makedirs(self.root)
self.first_time = True # used by Sheerka to detect first initialization
else:
self.first_time = False
def open(self, file_path, mode):
return open(file_path, mode)
def read_text(self, file_path):
with open(file_path) as f:
return f.read()
def read_binary(self, file_path):
with open(file_path, "rb") as f:
return f.read()
def write_text(self, file_path, content):
self._write(file_path, content, "w")
return len(content)
def write_binary(self, file_path, content):
self._write(file_path, content, "wb")
return len(content)
def exists(self, file_path):
return path.exists(file_path)
def remove(self, file_path):
os.remove(file_path)
@staticmethod
def _write(file_path, content, mode):
if not path.exists(path.dirname(file_path)):
os.makedirs(path.dirname(file_path))
with open(file_path, mode) as f:
f.write(content)
class SheerkaDataProviderDictionaryIO(SheerkaDataProviderIO):
"""
Persist information in a single file, from a dict
The structure of the dict is
{
path_to_information1 : info1,
path_to_information2 : info2,
}
"""
def __init__(self):
super().__init__("")
self.cache = {}
self.init_log.debug("Initializing dictionary file.")
self.first_time = True
def exists(self, file_path):
if file_path == "":
return True
return file_path in self.cache
def read_text(self, file_path):
return self.cache[file_path]
def read_binary(self, file_path):
return self.cache[file_path]
def write_binary(self, file_path, content):
self.cache[file_path] = content
return len(content)
def write_text(self, file_path, content):
self.cache[file_path] = content
return len(content)
def remove(self, file_path):
del (self.cache[file_path])
def open(self, file_path, mode):
if "w" in mode:
stream = io.BytesIO() if "b" in mode else io.StringIO()
stream.close = on_close(self, file_path, stream)(stream.close)
return stream
if file_path not in self.cache:
raise FileNotFoundError(file_path)
return io.BytesIO(self.cache[file_path]) if "b" in mode else io.StringIO(self.cache[file_path])
def reset(self):
self.cache.clear()
self.first_time = True
def on_close(dictionary_io, file_path, stream):
"""
Decorator to intercept the close.
I guess that there are solution that are more elegant
It's used to persist the dictionary on exit
:param dictionary_io:
:param file_path:
:param stream:
:return:
"""
def decorator(func):
def wrapper(*args, **kwargs):
stream.seek(0)
dictionary_io.cache[file_path] = stream.read()
func(*args, **kwargs)
return wrapper
return decorator
+382
View File
@@ -0,0 +1,382 @@
import datetime
import io
import json
import logging
import pickle
import struct
from dataclasses import dataclass
from enum import Enum
from core.utils import get_class, get_full_qualified_name
def json_default_converter(o):
"""
Default formatter for json
It's used when the json serializer does not know
how to serialise a type
:param o:
:return:
"""
if isinstance(o, (datetime.date, datetime.datetime)):
return o.isoformat()
if isinstance(o, Enum):
return o.name
raise Exception(f"Cannot serialize object '{o}', class='{o.__class__.__name__}'")
# In debug mode, just
#
# with open("json_encoding_error.txt", "a") as f:
# f.write(o.__class__.__name__ + "\n")
@dataclass()
class SerializerContext:
user_name: str = None
origin: str = None
sheerka: object = None
class Serializer:
HEADER_FORMAT = "cH"
USERNAME = "user_name" # key to store user that as committed the snapshot
MODIFICATION_DATE = "modification_date" #
PARENTS = "parents"
ORIGIN = "##origin##"
HISTORY = "##history##"
def __init__(self):
self.log = logging.getLogger(__name__)
self.init_log = logging.getLogger("init." + __name__)
self.init_log.debug("Initializing serializers")
self._cache = []
# add builtin serializers
self.register(EventSerializer())
self.register(StateSerializer())
# self.register(ConceptSerializer())
# self.register(DictionarySerializer())
# self.register(ExecutionContextSerializer())
# self.register(MemoryObjectSerializer()) # before ServiceObjSerializer
# self.register(ServiceObjSerializer())
# self.register(RuleSerializer())
# self.register(CustomTypeSerializer())
def register(self, serializer):
"""
Register the list of all know serializers
:param serializer:
:return:
"""
self.init_log.debug(f"Adding serializer {serializer}")
self._cache.append(serializer)
def serialize(self, obj, context):
"""
Get the stream representation of an object
:param context:
:param obj:
:return:
"""
serializers = [s for s in self._cache if s.matches(obj)]
if not serializers:
raise TypeError(f"Don't know how to serialize {type(obj)}")
serializer = serializers[0]
self.log.debug(f"Serializing '{obj}' using '{serializer.name}'")
stream = io.BytesIO()
header = struct.pack(Serializer.HEADER_FORMAT, bytes(serializer.name, "utf-8"), serializer.version)
stream.write(header)
return serializer.dump(stream, obj, context)
def deserialize(self, stream, context):
"""
Loads an object from its stream representation
:param context:
:param stream:
:return:
"""
header = struct.unpack(Serializer.HEADER_FORMAT, stream.read(4))
serializers = [s for s in self._cache if s.name == header[0].decode("utf-8") and s.version == header[1]]
if not serializers:
raise TypeError(f"Don't know how serializer name={header[0]}, version={header[1]}")
serializer = serializers[0]
self.log.debug(f"deserializing using '{serializer}'")
return serializer.load(stream, context)
class BaseSerializer:
def __init__(self, name, version):
"""
Create a serializer, given a name and a version
:param name:
:param version:
:return:
"""
self.name = name
self.version = version
def matches(self, obj):
"""
Returns true if self can serialize obj
:param obj:
:return:
"""
pass
def dump(self, stream, obj, context):
"""
Returns the byte representation of how the object should be serialized
:param stream: to write to
:param obj: obj to serialize
:param context: additional info needed to dump
:return: stream of bytes
"""
pass
def load(self, stream, context):
"""
From a stream of bytes, create the object
:param stream:
:param context: additional info needed to load
:return: object
"""
pass
def __repr__(self):
return self.__class__.__name__ + ' (' + self.name + ", version=" + str(self.version) + ")"
class PickleSerializer(BaseSerializer):
"""
Use pickle module to serialize data
"""
def __init__(self, predicate, name="P", version=1):
BaseSerializer.__init__(self, name, version)
self.predicate = predicate
def matches(self, obj):
return self.predicate(obj)
def dump(self, stream, obj, context):
stream.write(pickle.dumps(obj))
stream.seek(0)
return stream
def load(self, stream, context):
return pickle.loads(stream.read())
class JsonSerializer(BaseSerializer):
"""
Use Json to serialize the date
"""
def __init__(self, predicate, name="J", version=1):
BaseSerializer.__init__(self, name, version)
self.predicate = predicate
def matches(self, obj):
return self.predicate(obj)
def dump(self, stream, obj, context):
as_dict: dict = obj.__dict__
as_dict["##type##"] = get_full_qualified_name(obj)
stream.write(json.dumps(as_dict, default=json_default_converter).encode("utf-8").strip())
stream.seek(0)
return stream
def load(self, stream, context):
as_dict: dict = json.loads(stream.read().decode("utf-8"))
obj_type = as_dict.pop("##type##")
obj_class = get_class(obj_type)
obj = obj_class.__new__(obj_class)
for attr, value in as_dict.items():
setattr(obj, attr, value)
return obj
class EventSerializer(BaseSerializer):
def __init__(self):
BaseSerializer.__init__(self, "E", 1)
def matches(self, obj):
return get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.Event"
def dump(self, stream, obj, context):
stream.write(json.dumps(obj.to_dict(), default=json_default_converter).encode("utf-8"))
stream.seek(0)
return stream
def load(self, stream, context):
json_stream = stream.read().decode("utf-8")
as_dict = json.loads(json_stream)
event = get_class("sdp.sheerkaDataProvider.Event")()
event.from_dict(as_dict)
return event
class StateSerializer(PickleSerializer):
def __init__(self):
super().__init__(
lambda obj: get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.State",
"S",
1)
#
#
# class JsonSerializer(BaseSerializer):
#
# def __init__(self, fully_qualified_name, name="J", version=1):
# BaseSerializer.__init__(self, name, version)
# self.fully_qualified_name = fully_qualified_name
#
# def matches(self, obj):
# return get_full_qualified_name(obj) == self.fully_qualified_name
#
# def dump(self, stream, obj, context):
# as_json = obj.to_dict()
# as_json.update({
# Serializer.HISTORY: {
# Serializer.USERNAME: context.user_name,
# Serializer.MODIFICATION_DATE: datetime.datetime.now().isoformat(),
# Serializer.PARENTS: [getattr(obj, Serializer.ORIGIN)] if hasattr(obj, Serializer.ORIGIN) else []
# }})
# stream.write(json.dumps(as_json, default=json_default_converter).encode("utf-8"))
# stream.seek(0)
# return stream
#
# def load(self, stream, context):
# json_stream = stream.read().decode("utf-8")
# json_message = json.loads(json_stream)
# obj = get_class(self.fully_qualified_name)()
# obj.from_dict(json_message)
# setattr(obj, Serializer.HISTORY, json_message[Serializer.HISTORY])
#
# return obj
#
#
# class PickleSerializer(BaseSerializer):
#
# def __init__(self, predicate, name="P", version=1):
# BaseSerializer.__init__(self, name, version)
# self.predicate = predicate
#
# def matches(self, obj):
# return self.predicate(obj)
#
# def dump(self, stream, obj, context):
# stream.write(pickle.dumps(obj))
# stream.seek(0)
# return stream
#
# def load(self, stream, context):
# return pickle.loads(stream.read())
#
#
#
#
# class SheerkaPickleSerializer(BaseSerializer):
#
# def __init__(self, predicate, name, version):
# BaseSerializer.__init__(self, name, version)
# self.predicate = predicate
#
# def matches(self, obj):
# return self.predicate(obj)
#
# def dump(self, stream, obj, context):
# stream.write(sheerkapickle.encode(context.sheerka, obj).encode("utf-8"))
# stream.seek(0)
# return stream
#
# def load(self, stream, context):
# json_stream = stream.read().decode("utf-8")
# obj = sheerkapickle.decode(context.sheerka, json_stream)
# return obj
#
#
# class ConceptSerializer(SheerkaPickleSerializer):
#
# def __init__(self):
# super().__init__(lambda obj: isinstance(obj, Concept), "C", 1)
#
#
# class DictionarySerializer(BaseSerializer):
# def __init__(self):
# super().__init__("D", 1)
#
# def matches(self, obj):
# return isinstance(obj, dict)
#
# def dump(self, stream, obj, context):
# stream.write(json.dumps(obj, default=json_default_converter).encode("utf-8"))
# stream.seek(0)
# return stream
#
# def load(self, stream, context):
# json_stream = stream.read().decode("utf-8")
# obj = json.loads(json_stream)
#
# return obj
#
#
# class ExecutionContextSerializer(SheerkaPickleSerializer):
# CLASS_NAME = "core.sheerka.ExecutionContext.ExecutionContext"
#
# def __init__(self):
# super().__init__(lambda obj: get_full_qualified_name(obj) == self.CLASS_NAME, "R", 1)
#
#
# class ServiceObjSerializer(PickleSerializer):
# base_class = get_class("core.sheerka.services.sheerka_service.ServiceObj")
#
# def __init__(self):
# super().__init__(
# lambda obj: isinstance(obj, self.base_class),
# "O",
# 1)
#
#
# class MemoryObjectSerializer(SheerkaPickleSerializer):
# CLASS_NAME = "core.sheerka.services.SheerkaMemory.MemoryObject"
#
# def __init__(self):
# super().__init__(lambda obj: get_full_qualified_name(obj) == self.CLASS_NAME, "M", 1)
#
#
# class RuleSerializer(SheerkaPickleSerializer):
# def __init__(self):
# super().__init__(lambda obj: isinstance(obj, Rule), "X", 1)
#
#
# class CustomTypeSerializer(BaseSerializer):
# def __init__(self):
# BaseSerializer.__init__(self, "T", 1)
#
# def matches(self, obj):
# return isinstance(obj, CustomType)
#
# def dump(self, stream, obj, context):
# stream.write(obj.value.encode("utf-8"))
# stream.seek(0)
# return stream
#
# def load(self, stream, context):
# value = stream.read().decode("utf-8")
# if value == NotInit.value:
# return NotInit
# elif value == NotFound.value:
# return NotFound
# elif value == Removed.value:
# return Removed
# raise NotImplemented(f"CustomTypeSerializer.load({value})")
+124
View File
@@ -0,0 +1,124 @@
from datetime import datetime, timedelta
from fastapi import Depends, HTTPException
from fastapi.security import OAuth2PasswordBearer
from jose import JWTError, jwt
from passlib.context import CryptContext
from pydantic import BaseModel
from starlette import status
# to get a string like this run:
# openssl rand -hex 32
SECRET_KEY = "af95f0590411260f1f127bd7ef9a03409aecadf7729b3e6822b11752433b97b5" # should be in env !
ALGORITHM = "HS256"
ACCESS_TOKEN_EXPIRE_MINUTES = 1
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
fake_users_db = {
"kodjo": {
"username": "kodjo",
"full_name": "Kodjo Sossouvi",
"email": "kodjo.sossouvi@gmail.com",
"hashed_password": "$2b$12$fb9jW7QUZ9KIEAAtVmWMEOGtehKy9FafUr7Zfrsb3ZMhsBbzZs7SC", # password is kodjo
"disabled": False,
},
}
class Token(BaseModel):
access_token: str
token_type: str
class TokenData(BaseModel):
username: str | None = None
class User(BaseModel):
"""
"""
username: str
email: str | None = None
full_name: str | None = None
disabled: bool | None = None
class UserInDB(User):
hashed_password: str
def get_password_hash(password: str):
"""
Hash the password
:param password:
:type password:
:return:
:rtype:
"""
return pwd_context.hash(password)
def get_user(db, username: str):
if username in db:
user_dict = db[username]
return UserInDB(**user_dict)
def authenticate_user(fake_db, username: str, password: str):
user = get_user(fake_db, username)
if not user:
return False
if not pwd_context.verify(password, user.hashed_password):
return False
return user
def create_access_token(data: dict, expires_delta: timedelta | None = None):
to_encode = data.copy()
if expires_delta:
expire = datetime.utcnow() + expires_delta
else:
expire = datetime.utcnow() + timedelta(minutes=15)
to_encode.update({"exp": expire})
encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
return encoded_jwt
async def get_current_user(token: str = Depends(oauth2_scheme)):
credentials_exception = HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Could not validate credentials",
headers={"WWW-Authenticate": "Bearer"},
)
try:
payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
username: str = payload.get("sub")
if username is None:
raise credentials_exception
user = get_user(fake_users_db, username=username)
if user is None:
raise credentials_exception
return user
except JWTError as ex:
raise credentials_exception
async def get_current_active_user(current_user: User = Depends(get_current_user)):
"""
Checks if the user is still active
:param current_user:
:type current_user:
:return:
:rtype:
"""
if current_user.disabled:
raise HTTPException(status_code=400, detail="Inactive user")
return current_user
+66
View File
@@ -0,0 +1,66 @@
from datetime import timedelta
import uvicorn
from fastapi import Depends, FastAPI, HTTPException
from fastapi.security import OAuth2PasswordRequestForm
from starlette import status
from constants import CLIENT_OPERATION_QUIT, EXIT_COMMANDS, SHEERKA_PORT
from server.authentication import ACCESS_TOKEN_EXPIRE_MINUTES, User, authenticate_user, create_access_token, \
fake_users_db, get_current_active_user
app = FastAPI()
@app.get("/")
async def root() -> str:
"""
Root path. Simply display a welcome message
:return:
:rtype:
"""
return "Welcome, my name is Sheerka."
@app.post("/token")
async def login(form_data: OAuth2PasswordRequestForm = Depends()):
user = authenticate_user(fake_users_db, form_data.username, form_data.password)
if not user:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Incorrect username or password",
headers={"WWW-Authenticate": "Bearer"},
)
access_token_expires = timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
access_token = create_access_token(data={"sub": user.username}, expires_delta=access_token_expires)
return {"access_token": access_token, "token_type": "bearer"}
@app.post("/echo/{message}", status_code=status.HTTP_200_OK, response_model=dict)
async def echo(message: str, current_user: User = Depends(get_current_active_user)) -> dict:
"""
:param current_user:
:type current_user:
:param message:
:type message:
:return:
:rtype:
"""
if message in EXIT_COMMANDS:
return {
"status": True,
"response": "Take care.",
"command": CLIENT_OPERATION_QUIT
}
return {
"status": True,
"response": f"from {current_user.username}: {message}",
"command": None,
}
if __name__ == "__main__":
uvicorn.run("server.main:app", port=SHEERKA_PORT, log_level="info")
+62
View File
@@ -0,0 +1,62 @@
import logging
from multiprocessing import Process
from time import sleep
import uvicorn
from fastapi import FastAPI
class MockServer:
""" Core application to test. """
def __init__(self, endpoints: list[dict]):
"""
:param endpoints:
:type endpoints: list of {path: '', response:''}
"""
self.api = FastAPI()
def raise_exception(ex):
raise ex
# register endpoints
for endpoint in endpoints:
method = endpoint["method"] if "method" in endpoint else "get"
if method == "post":
if "exception" in endpoint:
self.api.post(endpoint["path"])(lambda: raise_exception(endpoint["exception"]))
else:
self.api.post(endpoint["path"])(lambda: endpoint["response"])
else:
self.api.get(endpoint["path"])(lambda: endpoint["response"])
# register shutdown
self.api.on_event("shutdown")(self.close)
# create the process
self.proc = Process(target=uvicorn.run,
args=(self.api,),
kwargs={
"host": "127.0.0.1",
"port": 5000,
"log_level": "info"},
daemon=True)
async def close(self):
""" Gracefull shutdown. """
logging.warning("Shutting down the app.")
def start_server(self):
self.proc.start()
sleep(0.1)
def stop_server(self):
self.proc.terminate()
def __enter__(self):
self.start_server()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.stop_server()
+539
View File
@@ -0,0 +1,539 @@
import json
import os
import shutil
from datetime import date, datetime
from os import path
import pytest
from core.global_symbols import NotFound
from sdp.sheerkaDataProvider import Event, SheerkaDataProvider
from sdp.sheerkaSerializer import JsonSerializer, PickleSerializer
tests_root = path.abspath("../../build/tests")
evt_digest = "3a571cb6034ef6fc8d7fe91948d0d29728eed74de02bac7968b0e9facca2c2d7"
def read_json_file(sdp, file_name):
with sdp.io.open(file_name, "r") as f:
return json.load(f)
class ObjNoKey:
"""
Object with no key, they won't be ordered
Not suitable for Json dump as there is no to_dict() method
"""
def __init__(self, a, b):
self.a = a
self.b = b
def __hash__(self):
return hash((self.a, self.b))
def __eq__(self, obj):
return isinstance(obj, ObjNoKey) and \
self.a == obj.a and \
self.b == obj.b
def __repr__(self):
return f"ObjNoKey({self.a}, {self.b})"
class ObjWithDigestWithKey:
"""
Object with a key that can compute its digest.
It can be used to test objects sharing the same key (but that are different)
Not suitable for Json dump as there is no to_dict() method
"""
def __init__(self, a, b):
self.a = a
self.b = b
def __hash__(self):
return hash((self.a, self.b))
def __eq__(self, obj):
return isinstance(obj, ObjWithDigestWithKey) and \
self.a == obj.a and \
self.b == obj.b
def __repr__(self):
return f"ObjWithDigestWithKey({self.a}, {self.b})"
def get_key(self):
return self.a
def get_digest(self):
return str(self.a) + str(self.b)
@pytest.fixture(autouse=True)
def init_test():
"""
I test both SheerkaDataProviderFileIO and SheerkaDataProviderDictionaryIO
So it's important to reset the folders between two tests
:return:
:rtype:
"""
if path.exists(tests_root):
shutil.rmtree(tests_root)
if not path.exists(tests_root):
os.makedirs(tests_root)
current_pwd = os.getcwd()
os.chdir(tests_root)
yield None
os.chdir(current_pwd)
@pytest.mark.parametrize("root, expected", [
(".sheerka", path.abspath(path.join(tests_root, ".sheerka"))),
("mem://", "")
])
def test_i_can_init_the_data_provider(root, expected):
sdp = SheerkaDataProvider(root)
assert sdp.io.root == expected
assert sdp.io.exists(sdp.io.root)
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_can_save_and_load_an_event(root):
sdp = SheerkaDataProvider(root)
event = Event("hello world", date=date(year=2007, month=9, day=10), user_id="kodjo")
evt_digest = sdp.save_event(event)
evt = sdp.load_event(evt_digest)
assert evt.date == datetime(year=2007, month=9, day=10)
assert evt.user_id == "kodjo"
assert evt.message == "hello world"
assert evt.parents is None
assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.EventFolder, evt_digest[0:24], evt_digest))
# I can get the last event
evt = sdp.load_event()
assert evt.message == "hello world"
# check that the last event is updated
last_event_file = path.join(sdp.io.root, SheerkaDataProvider.LastEventFile)
assert sdp.io.exists(last_event_file)
assert sdp.io.read_text(last_event_file) == evt_digest
def test_i_can_save_and_load_events_with_multiple_sdp():
root = ".sheerka"
sdp1 = SheerkaDataProvider(root)
sdp1.save_event(Event("event 1", date=date(year=2007, month=9, day=10), user_id="kodjo"))
sdp1.save_event(Event("event 2", date=date(year=2007, month=9, day=10), user_id="kodjo"))
sdp2 = SheerkaDataProvider(root, "Another sdp")
sdp2.save_event(Event("event 3", date=date(year=2007, month=9, day=10), user_id="kodjo"))
sdp2.save_event(Event("event 4", date=date(year=2007, month=9, day=10), user_id="kodjo"))
events_from_1 = list(sdp1.load_events(-1))
events_from_2 = list(sdp2.load_events(-1))
assert [e.message for e in events_from_1] == ['event 4', 'event 3', 'event 2', 'event 1']
assert [e.message for e in events_from_2] == ['event 4', 'event 3', 'event 2', 'event 1']
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_can_get_event_history(root):
sdp = SheerkaDataProvider(root)
event = Event("hello world", date=date(year=2007, month=9, day=10), user_id="kodjo")
event2 = Event("hello world 2", date=date(year=2007, month=9, day=10), user_id="kodjo")
evt_digest1 = sdp.save_event(event)
evt_digest2 = sdp.save_event(event2)
evt = sdp.load_event(evt_digest2)
assert evt.date == datetime(year=2007, month=9, day=10)
assert evt.user_id == "kodjo"
assert evt.message == "hello world 2"
assert evt.parents == [evt_digest1]
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_can_load_events(root):
sdp = SheerkaDataProvider(root)
for i in range(15):
sdp.save_event(Event(f"TEST::Hello {i}"))
events = list(sdp.load_events(10)) # first ten
assert len(events) == 10
assert events[0].message == "TEST::Hello 14"
assert events[9].message == "TEST::Hello 5"
events = list(sdp.load_events(10, 5)) # skip first 5, then take 10
assert len(events) == 10
assert events[0].message == "TEST::Hello 9"
assert events[9].message == "TEST::Hello 0"
events = list(sdp.load_events(20, 10)) # skip first 10, take 20,(but only 5 remaining)
assert len(events) == 5
assert events[0].message == "TEST::Hello 4"
assert events[4].message == "TEST::Hello 0"
events = list(sdp.load_events(1, 20)) # skip first 20, take one
assert len(events) == 0
events = list(sdp.load_events(0)) # all
assert len(events) == 15
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_can_load_events_when_no_event(root):
sdp = SheerkaDataProvider(root)
events = list(sdp.load_events(1))
assert len(events) == 0
events = list(sdp.load_events(1, 5))
assert len(events) == 0
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_can_add_and_reload_one_item(root):
sdp = SheerkaDataProvider(root)
event = Event("hello world", date=date(year=2007, month=9, day=10), user_id="kodjo")
with sdp.get_transaction(event) as transaction:
transaction.add("entry", "key", "foo => bar")
transaction.add("entry", "key2", ObjNoKey("a", "b"))
transaction.add("entry2", "key", "value2")
last_commit = sdp.get_snapshot(SheerkaDataProvider.HeadFile)
state = sdp.load_state(last_commit)
loaded1 = sdp.get("entry", "key")
loaded2 = sdp.get("entry", "key2")
loaded3 = sdp.get("entry2", "key")
load_entry = sdp.get("entry")
# check that the event is saved
evt_digest = event.get_digest()
assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.EventFolder, evt_digest[0:24], evt_digest))
# check the values
assert loaded1 == "foo => bar"
assert loaded2 == ObjNoKey("a", "b")
assert loaded3 == "value2"
assert load_entry == {
"key": "foo => bar",
"key2": ObjNoKey("a", "b")
}
assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit))
assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.RefFolder, sdp.name, SheerkaDataProvider.HeadFile))
assert state.date is not None
assert state.parents == []
assert state.events == [evt_digest]
assert state.data == {"entry": {'key': 'foo => bar', 'key2': ObjNoKey("a", "b")},
'entry2': {'key': 'value2'}}
assert sdp.io.read_text(
path.join(sdp.io.root, SheerkaDataProvider.RefFolder, sdp.name, SheerkaDataProvider.HeadFile)) == last_commit
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_can_load_an_entry(root):
sdp = SheerkaDataProvider(root)
with sdp.get_transaction(Event(f"TEST::{evt_digest}")) as transaction:
transaction.add("entry", "key1", "foo")
transaction.add("entry", "key2", "bar")
transaction.add("entry", "key3", "baz")
item = sdp.get("entry", "key1")
assert item == "foo"
load_entry = sdp.get("entry")
assert load_entry == {
"key1": "foo",
"key2": "bar",
"key3": "baz",
}
# load entry was a copy
load_entry["key1"] = "another foo"
assert sdp.get("entry", "key1") == "foo"
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_can_add_and_reload_a_list_of_items(root):
sdp = SheerkaDataProvider(root)
with sdp.get_transaction(Event(f"TEST::{evt_digest}")) as transaction:
transaction.add("entry", "key", ["foo => bar", ObjNoKey("a", "b")])
last_commit = sdp.get_snapshot(SheerkaDataProvider.HeadFile)
state = sdp.load_state(last_commit)
loaded = sdp.get("entry", "key")
# check the values
assert loaded == ["foo => bar", ObjNoKey("a", "b")]
assert state.date is not None
assert state.parents == []
assert state.events == [evt_digest]
assert state.data == {"entry": {'key': ['foo => bar', ObjNoKey('a', 'b')]}}
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_can_add_and_reload_a_set_of_items(root):
sdp = SheerkaDataProvider(root)
with sdp.get_transaction(Event(f"TEST::{evt_digest}")) as transaction:
transaction.add("entry", "key", {"foo => bar", ObjNoKey("a", "b")})
last_commit = sdp.get_snapshot(SheerkaDataProvider.HeadFile)
state = sdp.load_state(last_commit)
loaded = sdp.get("entry", "key")
# check the values
assert loaded == {"foo => bar", ObjNoKey("a", "b")}
assert state.date is not None
assert state.parents == []
assert state.events == [evt_digest]
assert state.data == {"entry": {'key': {'foo => bar', ObjNoKey('a', 'b')}}}
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_can_add_and_reload_an_entry(root):
sdp = SheerkaDataProvider(root)
with sdp.get_transaction(Event(f"TEST::{evt_digest}")) as transaction:
transaction.add("entry1", None, "foo")
transaction.add("entry2", None, {"key": "foo", "key1": "bar"})
transaction.add("entry3", None, {"foo", "bar"})
transaction.add("entry4", None, ["foo", "bar"])
loaded_entry1 = sdp.get("entry1")
loaded_entry2 = sdp.get("entry2")
loaded_entry3 = sdp.get("entry3")
loaded_entry4 = sdp.get("entry4")
assert loaded_entry1 == "foo"
assert loaded_entry2 == {"key": "foo", "key1": "bar"}
assert loaded_entry3 == {"foo", "bar"}
assert loaded_entry4 == ["foo", "bar"]
# loaded values are copies
loaded_entry2["key"] = "foo2"
assert sdp.get("entry2", "key") == "foo"
loaded_entry3.remove("foo")
assert sdp.get("entry3") == {"foo", "bar"}
loaded_entry4[0] = "foo2"
assert sdp.get("entry4")[0] == "foo"
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_can_override_values(root):
sdp = SheerkaDataProvider(root)
with sdp.get_transaction(Event(f"TEST::{evt_digest}")) as transaction:
transaction.add("entry", "key", {"foo => bar", ObjNoKey("a", "b")})
with sdp.get_transaction(Event(f"TEST::{evt_digest}")) as transaction:
transaction.add("entry", "key", "new_value")
loaded = sdp.get("entry", "key")
assert loaded == "new_value"
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_can_add_an_object_and_save_it_as_a_reference(root):
sdp = SheerkaDataProvider(root)
sdp.serializer.register(JsonSerializer(lambda o: isinstance(o, ObjNoKey)))
with sdp.get_transaction(Event(f"TEST::{evt_digest}")) as transaction:
transaction.add("entry", "key1", ObjNoKey("a", "b"), use_ref=True)
transaction.add("entry", "key2", [ObjNoKey("a", "b"), ObjNoKey("c", "d")], use_ref=True)
transaction.add("entry", "key3", {ObjNoKey("a", "b"), ObjNoKey("c", "d")}, use_ref=True)
assert sdp.get("entry", "key1") == ObjNoKey("a", "b")
assert sdp.get("entry", "key2") == [ObjNoKey("a", "b"), ObjNoKey("c", "d")]
assert sdp.get("entry", "key3") == {ObjNoKey("a", "b"), ObjNoKey("c", "d")}
# I can ask for the whole entry
assert sdp.get("entry") == {"key1": ObjNoKey("a", "b"),
"key2": [ObjNoKey("a", "b"), ObjNoKey("c", "d")],
"key3": {ObjNoKey("a", "b"), ObjNoKey("c", "d")}}
state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile))
assert state.data == {
"entry": {'key1': '##REF##:fbc2b1c60ed753b49217cae851e342371ee39ebabc9778105f450812e615a513',
'key2': ['##REF##:fbc2b1c60ed753b49217cae851e342371ee39ebabc9778105f450812e615a513',
'##REF##:448420dbc57d61401d10a98759fccdabbe50e2e825b6da3bd018c190926bcda4'],
'key3': {'##REF##:448420dbc57d61401d10a98759fccdabbe50e2e825b6da3bd018c190926bcda4',
'##REF##:fbc2b1c60ed753b49217cae851e342371ee39ebabc9778105f450812e615a513'}}
}
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_can_add_an_object_as_a_reference_using_its_own_digest(root):
sdp = SheerkaDataProvider(root)
sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithDigestWithKey)))
with sdp.get_transaction(Event(f"TEST::{evt_digest}")) as transaction:
transaction.add("entry", "key1", ObjWithDigestWithKey("a", "b"), use_ref=True)
assert sdp.get("entry", "key1") == ObjWithDigestWithKey("a", "b")
state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile))
assert state.data == {
"entry": {'key1': '##REF##:ab'}
}
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_can_remove_elements(root):
sdp = SheerkaDataProvider(root)
with sdp.get_transaction(Event(f"TEST::{evt_digest}")) as transaction:
transaction.add("entry", "key", "value")
transaction.add("entry", "key2", "value2")
with sdp.get_transaction(Event(f"TEST::{evt_digest}")) as transaction:
transaction.remove("entry", "key")
assert sdp.get("entry", "key") is NotFound
state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile))
assert state.data == {
"entry": {'key2': 'value2'}
}
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_can_keep_state_history(root):
sdp = SheerkaDataProvider(root)
with sdp.get_transaction(Event("first event")) as transaction:
transaction.add("entry", "key", "value")
state_digest1 = transaction.snapshot
with sdp.get_transaction(Event("second event")) as transaction:
transaction.add("entry", "key2", "value2")
state_digest2 = transaction.snapshot
with sdp.get_transaction(Event("third event")) as transaction:
transaction.add("entry", "key2", "value2")
state_digest3 = transaction.snapshot
state = sdp.load_state(state_digest3)
assert state.parents == [state_digest2]
state = sdp.load_state(state_digest2)
assert state.parents == [state_digest1]
state = sdp.load_state(state_digest1)
assert state.parents == []
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_can_save_and_load_ontologies_names(root):
sdp = SheerkaDataProvider(root)
ontologies = ['new ontology', '#unit_test#', '__default__']
sdp.save_ontologies(ontologies)
assert sdp.load_ontologies() == ontologies
# extra
ontologies_files = path.join(sdp.io.root, SheerkaDataProvider.OntologiesFiles)
assert sdp.io.exists(ontologies_files)
assert sdp.io.read_text(ontologies_files) == """new ontology
#unit_test#
__default__"""
def test_i_can_remove_even_if_not_exist():
sdp = SheerkaDataProvider("mem://")
with sdp.get_transaction(Event(f"TEST::{evt_digest}")) as transaction:
transaction.remove("entry", None)
transaction.remove(None, "key")
transaction.remove("entry", "key")
def test_i_get_default_value_if_entry_is_missing():
sdp = SheerkaDataProvider("mem://")
assert sdp.get("fake_entry", "fake_key", "default_value") == "default_value"
def test_exists():
sdp = SheerkaDataProvider("mem://")
with sdp.get_transaction(Event(f"TEST::{evt_digest}")) as transaction:
transaction.add("entry", "key", "value")
assert not sdp.exists("entry2")
assert not sdp.exists("entry", "key2")
assert sdp.exists("entry", "key")
def test_not_found_is_returned_when_an_entry_is_not_found():
sdp = SheerkaDataProvider("mem://")
with sdp.get_transaction(Event(f"TEST::{evt_digest}")) as transaction:
transaction.add("entry", "key", "value")
assert sdp.get("entry", "key") == "value"
assert sdp.get("entry", "key2") == NotFound
assert sdp.get("entry2") == NotFound
+35
View File
@@ -0,0 +1,35 @@
import io
from sdp.sheerkaSerializer import JsonSerializer
class ObjNoKey:
"""
Object with no key, they won't be ordered
Not suitable for Json dump as there is no to_dict() method
"""
def __init__(self, a, b):
self.a = a
self.b = b
def __hash__(self):
return hash((self.a, self.b))
def __eq__(self, obj):
return isinstance(obj, ObjNoKey) and \
self.a == obj.a and \
self.b == obj.b
def __repr__(self):
return f"ObjNoKey({self.a}, {self.b})"
def test_i_can_json_serialize():
json_serializer = JsonSerializer(lambda obj: True)
obj = ObjNoKey("a", "b")
stream = io.BytesIO()
stream = json_serializer.dump(stream, obj, None)
res = json_serializer.load(stream, None)
assert res == obj
+98
View File
@@ -0,0 +1,98 @@
import json
from fastapi import HTTPException
from starlette import status
from client import SheerkaClient, parse_arguments
from mockserver import MockServer
def test_i_can_start_with_a_default_hostname():
parsed = parse_arguments([])
assert parsed.hostname == "http://localhost"
assert parsed.port == 56356
def test_i_can_override_hostname_and_port():
parsed = parse_arguments(["new_host", "--port", "1515"])
assert parsed.hostname == "new_host"
assert parsed.port == 1515
parsed = parse_arguments(["new_host", "-p", "1515"])
assert parsed.hostname == "new_host"
assert parsed.port == 1515
def test_i_can_provide_user_and_password():
parsed = parse_arguments(["--username", "my_user", "--password", "my_password"])
assert parsed.username == "my_user"
assert parsed.password == "my_password"
parsed = parse_arguments(["-u", "my_user", "-P", "my_password"])
assert parsed.username == "my_user"
assert parsed.password == "my_password"
def test_i_can_manage_when_no_server():
client = SheerkaClient("http://localhost", 80)
res = client.check_url()
assert res.status is False
assert res.message == "Connection refused."
def test_i_can_manage_when_resource_is_not_found():
with MockServer([]):
client = SheerkaClient("http://localhost", 5000)
res = client.check_url()
assert not res.status
assert res.message == '{"detail":"Not Found"}'
def test_i_can_connect_to_a_server():
with MockServer([{
"path": "/",
"response": "Hello world"
}]):
client = SheerkaClient("http://localhost", 5000)
res = client.check_url()
assert res.status
assert res.message == '"Hello world"'
def test_i_can_authenticate_with_valid_credentials():
with MockServer([{
"path": "/",
"response": "Hello world"
}, {
"method": "post",
"path": "/token",
"response": {"access_token": "xxxx", "token_type": "bearer"}
}]):
client = SheerkaClient("http://localhost", 5000)
res = client.connect("valid_username", "valid_password")
assert res.status
assert res.message == "Connected as valid_username"
def test_i_can_manage_when_authentication_fails():
with MockServer([{
"path": "/",
"response": "Hello world"
}, {
"method": "post",
"path": "/token",
"exception": HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Incorrect username or password",
headers={"WWW-Authenticate": "Bearer"},
)
}]):
client = SheerkaClient("http://localhost", 5000)
res = client.connect("username", "wrong_password")
assert not res.status
assert res.message == 'Incorrect username or password'