From 21a397861a1155ecc59816d33e0d82a7a7367d45 Mon Sep 17 00:00:00 2001 From: kodjo Date: Thu, 19 Jan 2023 16:08:23 +0000 Subject: [PATCH] SettingUpClientServer (#7) Reviewed-on: https://gitea.sheerka.synology.me/kodjo/Sheerka/pulls/7 --- .gitignore | 2 +- README.md | 10 +- requirements.txt | 42 ++ src/client.py | 191 ++++++++ src/constants.py | 3 + src/core/ExecutionContext.py | 2 + src/core/Sheerka.py | 2 + src/core/global_symbols.py | 31 ++ src/core/utils.py | 32 ++ src/requirements.txt | 29 ++ src/sdp/sheerkaDataProvider.py | 650 ++++++++++++++++++++++++++ src/sdp/sheerkaDataProviderIO.py | 182 ++++++++ src/sdp/sheerkaSerializer.py | 382 +++++++++++++++ src/server/authentication.py | 124 +++++ src/server/main.py | 66 +++ tests/mockserver.py | 62 +++ tests/sdp/test_sheerkaDataProvider.py | 539 +++++++++++++++++++++ tests/sdp/test_sheerkaSerializer.py | 35 ++ tests/test_client.py | 98 ++++ 19 files changed, 2480 insertions(+), 2 deletions(-) create mode 100644 requirements.txt create mode 100644 src/client.py create mode 100644 src/constants.py create mode 100644 src/core/ExecutionContext.py create mode 100644 src/core/Sheerka.py create mode 100644 src/core/global_symbols.py create mode 100644 src/core/utils.py create mode 100644 src/requirements.txt create mode 100644 src/sdp/sheerkaDataProvider.py create mode 100644 src/sdp/sheerkaDataProviderIO.py create mode 100644 src/sdp/sheerkaSerializer.py create mode 100644 src/server/authentication.py create mode 100644 src/server/main.py create mode 100644 tests/mockserver.py create mode 100644 tests/sdp/test_sheerkaDataProvider.py create mode 100644 tests/sdp/test_sheerkaSerializer.py create mode 100644 tests/test_client.py diff --git a/.gitignore b/.gitignore index 5d381cc..f295d3d 100644 --- a/.gitignore +++ b/.gitignore @@ -158,5 +158,5 @@ cython_debug/ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ +.idea/ diff --git a/README.md b/README.md index 1a15983..2a78234 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,11 @@ # Sheerka -My personnal AI \ No newline at end of file +My personnal AI + + +## To start the server + +```shell +cd src +uvicorn server:app --reload +``` \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..0f7e9b1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,42 @@ +anyio==3.6.2 +attrs==22.2.0 +bcrypt==4.0.1 +certifi==2022.12.7 +cffi==1.15.1 +charset-normalizer==3.0.1 +click==8.1.3 +cryptography==39.0.0 +ecdsa==0.18.0 +exceptiongroup==1.1.0 +fastapi==0.89.1 +h11==0.14.0 +httptools==0.5.0 +idna==3.4 +iniconfig==2.0.0 +oauthlib==3.2.2 +packaging==23.0 +passlib==1.7.4 +pluggy==1.0.0 +prompt-toolkit==3.0.36 +pyasn1==0.4.8 +pycparser==2.21 +pydantic==1.10.4 +pytest==7.2.0 +python-dotenv==0.21.0 +python-jose==3.3.0 +python-multipart==0.0.5 +PyYAML==6.0 +requests==2.28.2 +requests-oauthlib==1.3.1 +rsa==4.9 +six==1.16.0 +sniffio==1.3.0 +starlette==0.22.0 +tomli==2.0.1 +typing_extensions==4.4.0 +urllib3==1.26.14 +uvicorn==0.20.0 +uvloop==0.17.0 +watchfiles==0.18.1 +wcwidth==0.2.5 +websockets==10.4 diff --git a/src/client.py b/src/client.py new file mode 100644 index 0000000..9355c7e --- /dev/null +++ b/src/client.py @@ -0,0 +1,191 @@ +import argparse +import os +import re +import sys +from dataclasses import dataclass +from os import path + +import prompt_toolkit +import requests +from prompt_toolkit import HTML, print_formatted_text, prompt +from prompt_toolkit.auto_suggest import AutoSuggestFromHistory +from prompt_toolkit.history import FileHistory +from requests import ConnectionError, HTTPError + +from constants import CLIENT_OPERATION_QUIT, EXIT_COMMANDS + +connect_regex = re.compile("connect\(['\"](.*?)['\"]\s*,\s*['\"](.*?)['\"]\)") + + +@dataclass +class TestResponse: + status: bool + message: str + + +class SheerkaClient: + def __init__(self, hostname: str, port: int): + self.hostname = hostname + self.port = port + self.url = f"{self.hostname}:{self.port}" if self.port else f"{self.hostname}" + self.history_file = path.abspath(path.join(path.expanduser("~"), ".sheerka", "history.txt")) + self.token = None + + def init_folder(self): + root_path = path.dirname(self.history_file) + if not path.exists(root_path): + os.makedirs(root_path) + + def check_url(self) -> TestResponse: + """ + Make the firs attempt to connect to sheerka + :return: + """ + try: + response = requests.get(self.url) + return TestResponse(bool(response), response.text) + + except HTTPError as ex: + return TestResponse(False, f"Error : {ex}") + except ConnectionError: + return TestResponse(False, "Connection refused.") + + def connect(self, username: str, password: str) -> TestResponse: + token_url = f"{self.url}/token" + try: + form_data = {"username": username, "password": password} + res = requests.post(token_url, data=form_data) + if res: + self.token = res.json()["access_token"] + return TestResponse(True, f"Connected as {username}") + else: + self.token = None + return TestResponse(False, res.json()["detail"]) + except Exception as ex: + self.token = None + return TestResponse(False, str(ex)) + + def run(self): + while True: + try: + # get the input form prompt_toolkit + _in = prompt('sheerka> ', + history=FileHistory(self.history_file), + auto_suggest=AutoSuggestFromHistory() + # lexer=PygmentsLexer(PythonLexer) + ) + _in = _in.strip() + + if _in == "clear": + prompt_toolkit.shortcuts.clear() + continue + + # allow reconnection + m = connect_regex.match(_in) + if m: + username, password = m.groups() + print(f"Trying to connect to {self.url}") + res = self.connect(username, password) + self.print_res(res) + continue + + # Call Sheerka + if self.token: + headers = {"Authorization": f"Bearer {self.token}"} + response = requests.post(f"{self.url}/echo/{_in}", headers=headers) + else: + response = requests.post(f"{self.url}/echo/{_in}") + + # read the response from the server + if response: + as_json = response.json() + + # Print the response and loop + self.print_info(as_json['response']) + if as_json['command'] == CLIENT_OPERATION_QUIT: + break + else: + self.print_error(response.text) + + # allow termination even if the sever is not responding + if _in in EXIT_COMMANDS: + self.print_info("Forced termination. Bye.") + break + + except KeyboardInterrupt: + continue + except EOFError: + self.print_error("EOFError...") + return 3 + + @staticmethod + def print_error(message: str): + """ + Print the message in red + :param message: + :type message: + :return: + :rtype: + """ + print_formatted_text(HTML(f'{message}')) + + @staticmethod + def print_info(message: str): + """ + + :param message: + :type message: + :return: + :rtype: + """ + print(message) + + def print_res(self, response: TestResponse) -> None: + """ + Print the colored message, depending on the status of the response + :param response: + :type response: + :return: + :rtype: + """ + if response.status: + self.print_info(response.message) + else: + self.print_error(response.message) + + +def parse_arguments(args: list): + """ + Parses the arguments + :param args: + :type args: + :return: + :rtype: + """ + parser = argparse.ArgumentParser(prog="SheerkaClient", description="CLI to connect to Sheerka") + parser.add_argument("hostname", nargs='?', default="http://localhost") + parser.add_argument("-p", "--port", action="store", type=int, default=56356) + parser.add_argument("-v", "--version", action="version") + parser.add_argument("-u", "--username", action="store") + parser.add_argument("-P", "--password", action="store") + + return parser.parse_args(args) + + +if __name__ == "__main__": + parsed_args = parse_arguments(sys.argv[1:]) + client = SheerkaClient(parsed_args.hostname, parsed_args.port) + client.init_folder() + + result = client.check_url() + if not result.status: + print(result.message) + exit(0) + + print(result.message) + + if parsed_args.username: + result = client.connect(parsed_args.username, parsed_args.password) + client.print_res(result) + + client.run() diff --git a/src/constants.py b/src/constants.py new file mode 100644 index 0000000..dc0b95f --- /dev/null +++ b/src/constants.py @@ -0,0 +1,3 @@ +SHEERKA_PORT = 56356 +EXIT_COMMANDS = ("quit", "exit", "bye") +CLIENT_OPERATION_QUIT = "quit" diff --git a/src/core/ExecutionContext.py b/src/core/ExecutionContext.py new file mode 100644 index 0000000..809a16d --- /dev/null +++ b/src/core/ExecutionContext.py @@ -0,0 +1,2 @@ +class ExecutionContext: + pass diff --git a/src/core/Sheerka.py b/src/core/Sheerka.py new file mode 100644 index 0000000..d100674 --- /dev/null +++ b/src/core/Sheerka.py @@ -0,0 +1,2 @@ +class Sheerka: + pass diff --git a/src/core/global_symbols.py b/src/core/global_symbols.py new file mode 100644 index 0000000..89631df --- /dev/null +++ b/src/core/global_symbols.py @@ -0,0 +1,31 @@ +class CustomType: + """ + Base class for custom types used in Sheerka + A custom type is a type that has only one instance across the application and have a semantic meaning + For example the type 'None' is a singleton which have a semantic meaning. + We need to define others in Sheerka + """ + + def __init__(self, value): + self.value = value + + def __repr__(self): + return self.value + + def __eq__(self, other): + return isinstance(other, CustomType) and self.value == other.value + + def __hash__(self): + return hash(self.value) + + +class NotFoundType(CustomType): + """ + Using when an entry in not found in Cache or in sdp + """ + + def __init__(self): + super(NotFoundType, self).__init__("**NotFound**") + + +NotFound = NotFoundType() diff --git a/src/core/utils.py b/src/core/utils.py new file mode 100644 index 0000000..6e32806 --- /dev/null +++ b/src/core/utils.py @@ -0,0 +1,32 @@ +def get_class(qname): + """ + Loads a class from its full qualified name + :param qname: + :return: + """ + parts = qname.split('.') + module = ".".join(parts[:-1]) + m = __import__(module) + for comp in parts[1:]: + m = getattr(m, comp) + return m + + +def get_full_qualified_name(obj): + """ + Returns the full qualified name of a class (including its module name ) + :param obj: + :return: + """ + if obj.__class__ == type: + module = obj.__module__ + if module is None or module == str.__class__.__module__: + return obj.__name__ # Avoid reporting __builtin__ + else: + return module + '.' + obj.__name__ + else: + module = obj.__class__.__module__ + if module is None or module == str.__class__.__module__: + return obj.__class__.__name__ # Avoid reporting __builtin__ + else: + return module + '.' + obj.__class__.__name__ diff --git a/src/requirements.txt b/src/requirements.txt new file mode 100644 index 0000000..10abc25 --- /dev/null +++ b/src/requirements.txt @@ -0,0 +1,29 @@ +anyio==3.6.2 +attrs==22.2.0 +certifi==2022.12.7 +charset-normalizer==3.0.1 +click==8.1.3 +exceptiongroup==1.1.0 +fastapi==0.89.1 +h11==0.14.0 +httptools==0.5.0 +idna==3.4 +iniconfig==2.0.0 +packaging==23.0 +pluggy==1.0.0 +prompt-toolkit==3.0.36 +pydantic==1.10.4 +pytest==7.2.0 +python-dotenv==0.21.0 +PyYAML==6.0 +requests==2.28.2 +sniffio==1.3.0 +starlette==0.22.0 +tomli==2.0.1 +typing_extensions==4.4.0 +urllib3==1.26.14 +uvicorn==0.20.0 +uvloop==0.17.0 +watchfiles==0.18.1 +wcwidth==0.2.5 +websockets==10.4 diff --git a/src/sdp/sheerkaDataProvider.py b/src/sdp/sheerkaDataProvider.py new file mode 100644 index 0000000..206f798 --- /dev/null +++ b/src/sdp/sheerkaDataProvider.py @@ -0,0 +1,650 @@ +import hashlib +import json +import logging +import shutil +import time +from datetime import date, datetime +from os import path +from threading import RLock +from typing import Callable + +from core.ExecutionContext import ExecutionContext +from core.Sheerka import Sheerka +from core.global_symbols import NotFound +from sdp.sheerkaDataProviderIO import SheerkaDataProviderIO +from sdp.sheerkaSerializer import Serializer, SerializerContext + + +def json_default_converter(o): + """ + Default formatter for json + It's used when the json serializer does not know + how to serialise a type + :param o: + :return: + """ + if isinstance(o, (date, datetime)): + return o.isoformat() + + +class Event(object): + """ + Class that represents something that modifies the state of the system + """ + + def __init__(self, message="", user_id="", date=None, parents=None): + self.user_id: str = user_id # id of the user that triggers the modification + self.date: datetime | None = date or datetime.now() # when + self.message: str = message # user input or whatever that modifies the system + self.parents: list[str] = parents # digest(s) of the parent(s) of this event + self._digest: str | None = None # digest of the event + + def __str__(self): + return f"{self.date.strftime('%d/%m/%Y %H:%M:%S')} {self.message}" + + def __repr__(self): + return f"{self.get_digest()[:12]} {self.message}" + + def get_digest(self): + """ + Returns the digest of the event + :return: sha256 of the event + """ + + if self._digest: + return self._digest + + if self.user_id == "": + # only possible during the unit test + # We use this little trick to speed up the unit test + self._digest = self.message[6:] if self.message.startswith("TEST::") else "xxx" + return self._digest + + if not isinstance(self.message, str): + raise NotImplementedError(f"message={self.message}") + + to_hash = f"Event:{self.user_id}{self.date}{self.message}{self.parents}".encode("utf-8") + self._digest = hashlib.sha256(to_hash).hexdigest() + return self._digest + + def to_dict(self): + return self.__dict__ + + def from_dict(self, as_dict): + self.user_id = as_dict["user_id"] + self.date = datetime.fromisoformat(as_dict["date"]) + self.message = as_dict["message"] + self.parents = as_dict["parents"] + self._digest = as_dict["_digest"] # freeze the digest + + def __eq__(self, other): + if id(self) == id(other): + return True + + if isinstance(other, Event): + return (self.user_id == other.user_id and + self.date == other.date and + self.message == other.message and + self.parents == other.parents) + + return False + + def __hash__(self): + return hash(self.get_digest()) + + +class State: + """ + Class that represents the state of the system (dictionary of all known entries) + """ + + def __init__(self): + self.date = None + self.events = [] + self.data = {} + + def get_digest(self): + as_json = json.dumps(self.__dict__, default=json_default_converter) + return hashlib.sha256(as_json.encode("utf-8")).hexdigest() + + +class SheerkaDataProviderTransaction: + """ + Class that manage sdp transactions + All sdp actions (add / update / remove entries) with the transaction + will be persisted in one time, in the order of the declaration + + + Note that Transaction within Transaction is not supported + """ + + def __init__(self, sdp, event): + self.sdp: SheerkaDataProvider = sdp + self.event: Event = event + self.state: State | None = None + self.snapshot: str | None = None + self.event_digest: str | None = None + + def __enter__(self): + self.sdp.lock.acquire() + + # save the event if needed + self.event_digest = self.sdp.save_event(self.event) if isinstance(self.event, Event) else self.event + + # load state. I need a fresh copy, not the one from sdp + self.snapshot = self.sdp.get_snapshot(SheerkaDataProvider.HeadFile) + self.state = self.sdp.load_state(self.snapshot) + return self + + def add(self, entry, key, items, use_ref=False): + """Adds items to the state""" + with self.sdp.lock: + if entry not in self.state.data: + self.state.data[entry] = {} + + if use_ref: + if isinstance(items, list): + items = [self.sdp.REF_PREFIX + self.sdp.save_obj(item) for item in items] + elif isinstance(items, set): + items = {self.sdp.REF_PREFIX + self.sdp.save_obj(item) for item in items} + else: + items = self.sdp.REF_PREFIX + self.sdp.save_obj(items) + + if key is None: + self.state.data[entry] = items + else: + self.state.data[entry][key] = items + + def remove(self, entry, key): + """ + Remove an entry + :param entry: + :param key: + :return: + """ + with self.sdp.lock: + try: + del (self.state.data[entry][key]) + except KeyError: + pass + + def clear(self, entry): + """ + Clear an entire entry + :param entry: + :return: + """ + with self.sdp.lock: + try: + self.state.data[entry].clear() + except KeyError: + pass + + def __exit__(self, exc_type, exc_val, exc_tb): + self.state.parents = [] if self.snapshot is None else [self.snapshot] + self.state.events = [self.event_digest] + self.state.date = datetime.now() + + self.snapshot = self.sdp.save_state(self.state) + self.sdp.set_snapshot(SheerkaDataProvider.HeadFile, self.snapshot) + self.sdp.update_state(self.state) # make sure to keep sync + + return False # let's escalate the exceptions + + +class SheerkaDataProvider: + """Manages the persistence state of the system""" + + EventFolder = "events" + StateFolder = "state" + ObjectsFolder = "objects" + CacheFolder = "cache" + RefFolder = "refs" + HeadFile = "HEAD" + LastEventFile = "LAST_EVENT" + KeysFile = "keys" + OntologiesFiles = "ontologies" + REF_PREFIX = "##REF##:" + + def __init__(self, root=None, sheerka=None, name="__default__"): + self.log = logging.getLogger(__name__) + self.init_log = logging.getLogger("init." + __name__) + self.init_log.debug("Initializing sdp.") + + self.sheerka: Sheerka = sheerka + self.io = SheerkaDataProviderIO.get(root) + self.first_time = self.io.first_time + self.name = name + + self.serializer = Serializer() + self.lock = RLock() + + snapshot = self.get_snapshot(SheerkaDataProvider.HeadFile) + self.state = self.load_state(snapshot) + + def __repr__(self): + return f"SheerkaDataProvider(name={self.name})" + + @staticmethod + def get_stream_digest(stream): + """ + Compute a SHA256 from a stream + :param stream: + :type stream: + :return: + :rtype: + """ + sha256_hash = hashlib.sha256() + for byte_block in iter(lambda: stream.read(4096), b""): + sha256_hash.update(byte_block) + + stream.seek(0) + return sha256_hash.hexdigest() + + def get_transaction(self, event: Event) -> SheerkaDataProviderTransaction: + """ + Creates a new SheerkaTransaction and returns it + :param event: + :type event: + :return: + :rtype: + """ + return SheerkaDataProviderTransaction(self, event) + + def get(self, entry: str, key: str | None = None, default=NotFound, load_origin=True): + """ + Get the value stored in an entry + :param entry: used to group entries that are related, like a mini database table + :param key: + :param default: value to return is not found + :param load_origin: adds the parent object's digest to the object + :return: + """ + + with self.lock: + self.log.debug(f"getting {entry=}, {key=}, {default=}, {load_origin=}") + if entry not in self.state.data: + return default + + if key is not None and key not in self.state.data[entry]: + return default + + item = self.state.data[entry] if key is None else self.state.data[entry][key] + if isinstance(item, dict): + return item.copy() if key else {k: self.load_ref_if_needed(v, load_origin) for k, v in item.items()} + else: + return self.load_ref_if_needed(item, load_origin) + + def list(self, entry: str, filter: Callable[[str, object], bool] = None): + """ + Lists elements of entry 'entry' + :param entry: name of the entry to list + :param filter: filter to use + :return: list of elements + """ + if entry not in self.state.data: + return [] + + elements = self.state.data[entry] + + if isinstance(elements, dict): + # manage when elements have a key + filter_to_use = (lambda k, o: True) if filter is None else filter + for key, element in elements.items(): + if filter_to_use(key, element): + if isinstance(element, list): + yield [self._inner_load_ref_if_needed(e) for e in element] + else: + yield self._inner_load_ref_if_needed(element) + else: + # manage when no key is defined for the elements + if not isinstance(elements, list) and not isinstance(elements, set): + elements = [elements] + + filter_to_use = (lambda o: True) if filter is None else filter + for element in elements: + if filter_to_use(element): + yield self._inner_load_ref_if_needed(element) + + def exists(self, entry: str, key: str | None = None): + """ + Returns true if the entry is defined + :param key: + :param entry: + :return: + """ + with self.lock: + exist = entry in self.state.data + if not exist or key is None: + return exist + + return key in self.state.data[entry] + + def reset(self): + """ + Test only, delete all entries + :return: + """ + self.first_time = self.io.first_time + if hasattr(self.io, "reset"): + self.io.reset() + snapshot = self.get_snapshot(SheerkaDataProvider.HeadFile) + self.state = self.load_state(snapshot) + + def save_event(self, event: Event): + """ + return an event, given its digest + :param event: + :return: digest of the event + """ + parent = self.get_last_event() + event.parents = [parent] if parent else None + digest = event.get_digest() # must be call after setting the parents + + target_path = self.io.get_obj_path(SheerkaDataProvider.EventFolder, digest) + if self.io.exists(target_path): + return digest + + self.io.write_binary(target_path, self.serializer.serialize(event, None).read()) + self.set_last_event(digest) + + return digest + + def load_event(self, digest: str | None = None): + """ + return an event, given its digest + :param digest: + :return: + """ + digest = digest or self.get_last_event() + if digest is None: + return None + + target_path = self.io.get_obj_path(SheerkaDataProvider.EventFolder, digest) + + with self.io.open(target_path, "rb") as f: + return self.serializer.deserialize(f, None) + + def load_events(self, page_size: int, start=0): + """ + Load multiple events in the same command + :param start: + :param page_size: = -1 to load everything + :return: + """ + + digest = None + if start: + for i in range(start): + event = self.load_event(digest) + if event is None or event.parents is None: + return + digest = event.parents[0] + + count = 0 + while count < page_size or page_size <= 0: + event = self.load_event(digest) + if event is None: + return + + yield event + + if event.parents is None: + return + + digest = event.parents[0] + count += 1 + + def get_last_event(self): + last_event_file = self.io.path_join(self.LastEventFile) + if not self.io.exists(last_event_file): + return None + return self.io.read_text(last_event_file) + + def set_last_event(self, digest: str): + """ + Updates the last event reference file + :param digest: + :type digest: + :return: + :rtype: + """ + last_event_file = self.io.path_join(self.LastEventFile) + return self.io.write_text(last_event_file, digest) + + def set_snapshot(self, file: str, digest: str): + """ + Updates the HEAD file. + :param file: It's a parameter as we may manage branches + :type file: + :param digest: + :type digest: + :return: + :rtype: + """ + head_file = self.io.path_join(self.RefFolder, self.name, file) + return self.io.write_text(head_file, digest) + + def get_snapshot(self, file: str): + head_file = self.io.path_join(self.RefFolder, self.name, file) + if not self.io.exists(head_file): + return None + return self.io.read_text(head_file) + + def load_state(self, digest: str): + if digest is None: + return State() + + target_path = self.io.get_obj_path(SheerkaDataProvider.StateFolder, digest) + with self.io.open(target_path, "rb") as f: + context = SerializerContext(sheerka=self.sheerka) + return self.serializer.deserialize(f, context) + + def save_state(self, state: State): + digest = state.get_digest() + self.log.debug(f"Saving new state. digest={digest}") + target_path = self.io.get_obj_path(SheerkaDataProvider.StateFolder, digest) + if self.io.exists(target_path): + return digest + + context = SerializerContext(sheerka=self.sheerka) + self.io.write_binary(target_path, self.serializer.serialize(state, context).read()) + return digest + + def update_state(self, state: State): + with self.lock: + self.state = state + + def get_execution_context_file_path(self, digest: str, is_admin: bool) -> str: + """ + Compute the path to a given ExecutionContext file + When retrieving the list of command, we need to distinguish user triggered command + from internal command (generally at the startup of Sheerka), hence the is_admin parameter + :param digest: + :type digest: + :param is_admin: + :type is_admin: + :return: + :rtype: + """ + ext = "_admin_context" if is_admin else "_context" + return self.io.get_obj_path(SheerkaDataProvider.EventFolder, digest) + ext + + def get_execution_result_file_path(self, digest) -> str: + """ + :param digest: + :type digest: + :return: + :rtype: + """ + ext = "_result" + return self.io.get_obj_path(SheerkaDataProvider.EventFolder, digest) + ext + + def has_execution_context(self, digest, is_admin=False) -> bool: + """ + Check is a result file was created for a specific event + :param digest: + :param is_admin: True is the result is an internal admin result file + :return: + """ + target_path = self.get_execution_context_file_path(digest, is_admin) + return self.io.exists(target_path) + + def save_execution_context(self, execution_context: ExecutionContext, is_admin=False) -> str: + """ + Save the execution context associated with an event + To make a long story short, + for every single user input, there is an event (which is the first thing that is created) + and a result (the ExecutionContext created by sheerka.evaluate_user_input() + :param execution_context: + :param is_admin: True is the result is an internal admin result file + :return: + """ + start = time.time() + message = execution_context.event.message + digest = execution_context.event.get_digest() + self.log.debug(f"Saving execution context. digest={digest}, message={message}") + target_path = self.get_execution_context_file_path(digest, is_admin) + if self.io.exists(target_path): + return digest + + context = SerializerContext(sheerka=self.sheerka) + length = self.io.write_binary(target_path, self.serializer.serialize(execution_context, context).read()) + elapsed = time.time() - start + self.log.debug(f"Saved execution context. message={message}, length={length}, elapsed={elapsed}") + return digest + + def save_execution_result(self, digest: str, execution_result) -> str: + """ + On the top of execution context, also save some extra information like, status of the execution + :param digest: + :param execution_result: + :return: + """ + self.log.debug(f"Saving execution context extra information. digest={digest}") + target_path = self.get_execution_result_file_path(digest) + if self.io.exists(target_path): + return digest + + context = SerializerContext(sheerka=self.sheerka) + length = self.io.write_binary(target_path, self.serializer.serialize(execution_result, context).read()) + self.log.debug(f"Saved execution context. message={execution_result}, length={length}") + return digest + + def load_execution_context(self, digest: str, is_admin=False) -> ExecutionContext: + """ + Load and deserialize a result file + :param digest: + :param is_admin: True is the result is an internal admin result file + :return: + """ + target_path = self.get_execution_context_file_path(digest, is_admin) + + with self.io.open(target_path, "rb") as f: + context = SerializerContext(sheerka=self.sheerka) + return self.serializer.deserialize(f, context) + + def load_execution_result(self, digest: str): + """ + Load and deserialize a result extra file + :param digest: + :return: + :param digest: + :return: + """ + target_path = self.get_execution_result_file_path(digest) + + with self.io.open(target_path, "rb") as f: + context = SerializerContext(sheerka=self.sheerka) + return self.serializer.deserialize(f, context) + + def load_ref_if_needed(self, item, load_origin): + """ + New version of the function. + The old one must be replaced at some point + Make sure we return the real object, even inside a collection + :param item: + :param load_origin: + :return: + """ + if isinstance(item, list): + return [self._inner_load_ref_if_needed(i, load_origin) for i in item] + elif isinstance(item, set): + return {self._inner_load_ref_if_needed(i, load_origin) for i in item} + elif isinstance(item, dict): + return item.copy() + else: + return self._inner_load_ref_if_needed(item, load_origin) + + def save_obj(self, obj) -> str: + self.log.debug(f"Saving '{obj}' as reference...") + context = SerializerContext(user_name="kodjo", sheerka=self.sheerka) + stream = self.serializer.serialize(obj, context) + digest = obj.get_digest() if hasattr(obj, "get_digest") else self.get_stream_digest(stream) + + target_path = self.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, digest) + if self.io.exists(target_path): + self.log.debug(f"...already saved. digest is {digest}") + return digest + + self.io.write_binary(target_path, stream.read()) + + self.log.debug(f"...digest={digest}.") + return digest + + def load_obj(self, digest, add_origin=True): + if digest is None: + return None + + target_path = self.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, digest) + if not self.io.exists(target_path): + return None + + with self.io.open(target_path, "rb") as f: + context = SerializerContext(origin=digest, sheerka=self.sheerka) + obj = self.serializer.deserialize(f, context) + + # set the origin of the object + if add_origin: + if isinstance(obj, dict): + obj[Serializer.ORIGIN] = digest + elif not isinstance(obj, str): + setattr(obj, Serializer.ORIGIN, digest) + return obj + + def save_ontologies(self, ontologies_names): + """ + Keep track of the sequence of ontologies + This is a quick and dirty ontology management + I would like the ontologies to have a digest and to know what is their parent + """ + ontology_file = self.io.path_join(SheerkaDataProvider.OntologiesFiles) + text = "\n".join(ontologies_names) + self.io.write_text(ontology_file, text) + + def load_ontologies(self): + ontology_file = self.io.path_join(SheerkaDataProvider.OntologiesFiles) + if not self.io.exists(ontology_file): + return [] + + text = self.io.read_text(ontology_file) + return text.split("\n") + + def _inner_load_ref_if_needed(self, obj, load_origin=True): + """ + Make sure the real obj is returned + :param obj: + :param load_origin: + :return: + """ + if not isinstance(obj, str) or not obj.startswith(SheerkaDataProvider.REF_PREFIX): + return obj + + resolved = self.load_obj(obj[len(SheerkaDataProvider.REF_PREFIX):], load_origin) + return resolved or obj + + def test_only_destroy_refs(self, name=None): + name = name or self.name + current_sdp_refs_folder = self.io.path_join(self.RefFolder, name) + if path.exists(current_sdp_refs_folder): + shutil.rmtree(current_sdp_refs_folder) diff --git a/src/sdp/sheerkaDataProviderIO.py b/src/sdp/sheerkaDataProviderIO.py new file mode 100644 index 0000000..9a482c5 --- /dev/null +++ b/src/sdp/sheerkaDataProviderIO.py @@ -0,0 +1,182 @@ +import io +import logging +import os +from os import path + + +class SheerkaDataProviderIO: + + def __init__(self, root): + self.root = root + self.log = logging.getLogger(__name__) + self.init_log = logging.getLogger("init." + __name__) + + def exists(self, file_path): + pass + + def open(self, file_path, mode): + pass + + def read_text(self, file_path): + pass + + def read_binary(self, file_path): + pass + + def write_text(self, file_path, content): + pass + + def write_binary(self, file_path, content): + pass + + def remove(self, file_path): + pass + + @staticmethod + def get(root): + if root == "mem://": + return SheerkaDataProviderDictionaryIO() + else: + return SheerkaDataProviderFileIO(root) + + def get_obj_path(self, object_type, digest): + return path.join(self.root, object_type, digest[:24], digest) + + def path_join(self, *paths): + return path.join(self.root, *paths) + + +class SheerkaDataProviderFileIO(SheerkaDataProviderIO): + """ + Persist information in files, under a given folder + """ + + def __init__(self, root: str): + """ + + :param root: when given, force the root folder. Default is ~/.sheerka + :type root: + """ + root = path.abspath(path.join(path.expanduser("~"), ".sheerka")) \ + if root is None \ + else path.abspath(root) + super().__init__(root) + + self.init_log.debug("root is set to '" + self.root + "'") + + if not path.exists(self.root): + self.init_log.debug("root folder not found. Creating it.") + os.makedirs(self.root) + self.first_time = True # used by Sheerka to detect first initialization + else: + self.first_time = False + + def open(self, file_path, mode): + return open(file_path, mode) + + def read_text(self, file_path): + with open(file_path) as f: + return f.read() + + def read_binary(self, file_path): + with open(file_path, "rb") as f: + return f.read() + + def write_text(self, file_path, content): + self._write(file_path, content, "w") + return len(content) + + def write_binary(self, file_path, content): + self._write(file_path, content, "wb") + return len(content) + + def exists(self, file_path): + return path.exists(file_path) + + def remove(self, file_path): + os.remove(file_path) + + @staticmethod + def _write(file_path, content, mode): + if not path.exists(path.dirname(file_path)): + os.makedirs(path.dirname(file_path)) + + with open(file_path, mode) as f: + f.write(content) + + +class SheerkaDataProviderDictionaryIO(SheerkaDataProviderIO): + """ + Persist information in a single file, from a dict + The structure of the dict is + { + path_to_information1 : info1, + path_to_information2 : info2, + } + """ + + def __init__(self): + super().__init__("") + self.cache = {} + self.init_log.debug("Initializing dictionary file.") + self.first_time = True + + def exists(self, file_path): + if file_path == "": + return True + + return file_path in self.cache + + def read_text(self, file_path): + return self.cache[file_path] + + def read_binary(self, file_path): + return self.cache[file_path] + + def write_binary(self, file_path, content): + self.cache[file_path] = content + return len(content) + + def write_text(self, file_path, content): + self.cache[file_path] = content + return len(content) + + def remove(self, file_path): + del (self.cache[file_path]) + + def open(self, file_path, mode): + if "w" in mode: + stream = io.BytesIO() if "b" in mode else io.StringIO() + stream.close = on_close(self, file_path, stream)(stream.close) + return stream + + if file_path not in self.cache: + raise FileNotFoundError(file_path) + + return io.BytesIO(self.cache[file_path]) if "b" in mode else io.StringIO(self.cache[file_path]) + + def reset(self): + self.cache.clear() + self.first_time = True + + +def on_close(dictionary_io, file_path, stream): + """ + Decorator to intercept the close. + I guess that there are solution that are more elegant + It's used to persist the dictionary on exit + :param dictionary_io: + :param file_path: + :param stream: + :return: + """ + + def decorator(func): + def wrapper(*args, **kwargs): + stream.seek(0) + dictionary_io.cache[file_path] = stream.read() + func(*args, **kwargs) + + return wrapper + + return decorator diff --git a/src/sdp/sheerkaSerializer.py b/src/sdp/sheerkaSerializer.py new file mode 100644 index 0000000..decd5be --- /dev/null +++ b/src/sdp/sheerkaSerializer.py @@ -0,0 +1,382 @@ +import datetime +import io +import json +import logging +import pickle +import struct +from dataclasses import dataclass +from enum import Enum + +from core.utils import get_class, get_full_qualified_name + + +def json_default_converter(o): + """ + Default formatter for json + It's used when the json serializer does not know + how to serialise a type + :param o: + :return: + """ + if isinstance(o, (datetime.date, datetime.datetime)): + return o.isoformat() + + if isinstance(o, Enum): + return o.name + + raise Exception(f"Cannot serialize object '{o}', class='{o.__class__.__name__}'") + # In debug mode, just + # + # with open("json_encoding_error.txt", "a") as f: + # f.write(o.__class__.__name__ + "\n") + + +@dataclass() +class SerializerContext: + user_name: str = None + origin: str = None + sheerka: object = None + + +class Serializer: + HEADER_FORMAT = "cH" + USERNAME = "user_name" # key to store user that as committed the snapshot + MODIFICATION_DATE = "modification_date" # + PARENTS = "parents" + ORIGIN = "##origin##" + HISTORY = "##history##" + + def __init__(self): + self.log = logging.getLogger(__name__) + self.init_log = logging.getLogger("init." + __name__) + self.init_log.debug("Initializing serializers") + self._cache = [] + + # add builtin serializers + self.register(EventSerializer()) + self.register(StateSerializer()) + # self.register(ConceptSerializer()) + # self.register(DictionarySerializer()) + # self.register(ExecutionContextSerializer()) + # self.register(MemoryObjectSerializer()) # before ServiceObjSerializer + # self.register(ServiceObjSerializer()) + # self.register(RuleSerializer()) + # self.register(CustomTypeSerializer()) + + def register(self, serializer): + """ + Register the list of all know serializers + :param serializer: + :return: + """ + self.init_log.debug(f"Adding serializer {serializer}") + self._cache.append(serializer) + + def serialize(self, obj, context): + """ + Get the stream representation of an object + :param context: + :param obj: + :return: + """ + serializers = [s for s in self._cache if s.matches(obj)] + + if not serializers: + raise TypeError(f"Don't know how to serialize {type(obj)}") + + serializer = serializers[0] + self.log.debug(f"Serializing '{obj}' using '{serializer.name}'") + + stream = io.BytesIO() + header = struct.pack(Serializer.HEADER_FORMAT, bytes(serializer.name, "utf-8"), serializer.version) + stream.write(header) + + return serializer.dump(stream, obj, context) + + def deserialize(self, stream, context): + """ + Loads an object from its stream representation + :param context: + :param stream: + :return: + """ + header = struct.unpack(Serializer.HEADER_FORMAT, stream.read(4)) + serializers = [s for s in self._cache if s.name == header[0].decode("utf-8") and s.version == header[1]] + + if not serializers: + raise TypeError(f"Don't know how serializer name={header[0]}, version={header[1]}") + + serializer = serializers[0] + self.log.debug(f"deserializing using '{serializer}'") + return serializer.load(stream, context) + + +class BaseSerializer: + + def __init__(self, name, version): + """ + Create a serializer, given a name and a version + :param name: + :param version: + :return: + """ + self.name = name + self.version = version + + def matches(self, obj): + """ + Returns true if self can serialize obj + :param obj: + :return: + """ + pass + + def dump(self, stream, obj, context): + """ + Returns the byte representation of how the object should be serialized + :param stream: to write to + :param obj: obj to serialize + :param context: additional info needed to dump + :return: stream of bytes + """ + pass + + def load(self, stream, context): + """ + From a stream of bytes, create the object + :param stream: + :param context: additional info needed to load + :return: object + """ + pass + + def __repr__(self): + return self.__class__.__name__ + ' (' + self.name + ", version=" + str(self.version) + ")" + + +class PickleSerializer(BaseSerializer): + """ + Use pickle module to serialize data + """ + + def __init__(self, predicate, name="P", version=1): + BaseSerializer.__init__(self, name, version) + self.predicate = predicate + + def matches(self, obj): + return self.predicate(obj) + + def dump(self, stream, obj, context): + stream.write(pickle.dumps(obj)) + stream.seek(0) + return stream + + def load(self, stream, context): + return pickle.loads(stream.read()) + + +class JsonSerializer(BaseSerializer): + """ + Use Json to serialize the date + """ + + def __init__(self, predicate, name="J", version=1): + BaseSerializer.__init__(self, name, version) + self.predicate = predicate + + def matches(self, obj): + return self.predicate(obj) + + def dump(self, stream, obj, context): + as_dict: dict = obj.__dict__ + as_dict["##type##"] = get_full_qualified_name(obj) + stream.write(json.dumps(as_dict, default=json_default_converter).encode("utf-8").strip()) + stream.seek(0) + return stream + + def load(self, stream, context): + as_dict: dict = json.loads(stream.read().decode("utf-8")) + obj_type = as_dict.pop("##type##") + obj_class = get_class(obj_type) + obj = obj_class.__new__(obj_class) + for attr, value in as_dict.items(): + setattr(obj, attr, value) + return obj + + +class EventSerializer(BaseSerializer): + def __init__(self): + BaseSerializer.__init__(self, "E", 1) + + def matches(self, obj): + return get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.Event" + + def dump(self, stream, obj, context): + stream.write(json.dumps(obj.to_dict(), default=json_default_converter).encode("utf-8")) + stream.seek(0) + return stream + + def load(self, stream, context): + json_stream = stream.read().decode("utf-8") + as_dict = json.loads(json_stream) + event = get_class("sdp.sheerkaDataProvider.Event")() + event.from_dict(as_dict) + return event + + +class StateSerializer(PickleSerializer): + def __init__(self): + super().__init__( + lambda obj: get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.State", + "S", + 1) + +# +# +# class JsonSerializer(BaseSerializer): +# +# def __init__(self, fully_qualified_name, name="J", version=1): +# BaseSerializer.__init__(self, name, version) +# self.fully_qualified_name = fully_qualified_name +# +# def matches(self, obj): +# return get_full_qualified_name(obj) == self.fully_qualified_name +# +# def dump(self, stream, obj, context): +# as_json = obj.to_dict() +# as_json.update({ +# Serializer.HISTORY: { +# Serializer.USERNAME: context.user_name, +# Serializer.MODIFICATION_DATE: datetime.datetime.now().isoformat(), +# Serializer.PARENTS: [getattr(obj, Serializer.ORIGIN)] if hasattr(obj, Serializer.ORIGIN) else [] +# }}) +# stream.write(json.dumps(as_json, default=json_default_converter).encode("utf-8")) +# stream.seek(0) +# return stream +# +# def load(self, stream, context): +# json_stream = stream.read().decode("utf-8") +# json_message = json.loads(json_stream) +# obj = get_class(self.fully_qualified_name)() +# obj.from_dict(json_message) +# setattr(obj, Serializer.HISTORY, json_message[Serializer.HISTORY]) +# +# return obj +# +# +# class PickleSerializer(BaseSerializer): +# +# def __init__(self, predicate, name="P", version=1): +# BaseSerializer.__init__(self, name, version) +# self.predicate = predicate +# +# def matches(self, obj): +# return self.predicate(obj) +# +# def dump(self, stream, obj, context): +# stream.write(pickle.dumps(obj)) +# stream.seek(0) +# return stream +# +# def load(self, stream, context): +# return pickle.loads(stream.read()) +# +# + +# +# +# class SheerkaPickleSerializer(BaseSerializer): +# +# def __init__(self, predicate, name, version): +# BaseSerializer.__init__(self, name, version) +# self.predicate = predicate +# +# def matches(self, obj): +# return self.predicate(obj) +# +# def dump(self, stream, obj, context): +# stream.write(sheerkapickle.encode(context.sheerka, obj).encode("utf-8")) +# stream.seek(0) +# return stream +# +# def load(self, stream, context): +# json_stream = stream.read().decode("utf-8") +# obj = sheerkapickle.decode(context.sheerka, json_stream) +# return obj +# +# +# class ConceptSerializer(SheerkaPickleSerializer): +# +# def __init__(self): +# super().__init__(lambda obj: isinstance(obj, Concept), "C", 1) +# +# +# class DictionarySerializer(BaseSerializer): +# def __init__(self): +# super().__init__("D", 1) +# +# def matches(self, obj): +# return isinstance(obj, dict) +# +# def dump(self, stream, obj, context): +# stream.write(json.dumps(obj, default=json_default_converter).encode("utf-8")) +# stream.seek(0) +# return stream +# +# def load(self, stream, context): +# json_stream = stream.read().decode("utf-8") +# obj = json.loads(json_stream) +# +# return obj +# +# +# class ExecutionContextSerializer(SheerkaPickleSerializer): +# CLASS_NAME = "core.sheerka.ExecutionContext.ExecutionContext" +# +# def __init__(self): +# super().__init__(lambda obj: get_full_qualified_name(obj) == self.CLASS_NAME, "R", 1) +# +# +# class ServiceObjSerializer(PickleSerializer): +# base_class = get_class("core.sheerka.services.sheerka_service.ServiceObj") +# +# def __init__(self): +# super().__init__( +# lambda obj: isinstance(obj, self.base_class), +# "O", +# 1) +# +# +# class MemoryObjectSerializer(SheerkaPickleSerializer): +# CLASS_NAME = "core.sheerka.services.SheerkaMemory.MemoryObject" +# +# def __init__(self): +# super().__init__(lambda obj: get_full_qualified_name(obj) == self.CLASS_NAME, "M", 1) +# +# +# class RuleSerializer(SheerkaPickleSerializer): +# def __init__(self): +# super().__init__(lambda obj: isinstance(obj, Rule), "X", 1) +# +# +# class CustomTypeSerializer(BaseSerializer): +# def __init__(self): +# BaseSerializer.__init__(self, "T", 1) +# +# def matches(self, obj): +# return isinstance(obj, CustomType) +# +# def dump(self, stream, obj, context): +# stream.write(obj.value.encode("utf-8")) +# stream.seek(0) +# return stream +# +# def load(self, stream, context): +# value = stream.read().decode("utf-8") +# if value == NotInit.value: +# return NotInit +# elif value == NotFound.value: +# return NotFound +# elif value == Removed.value: +# return Removed +# raise NotImplemented(f"CustomTypeSerializer.load({value})") diff --git a/src/server/authentication.py b/src/server/authentication.py new file mode 100644 index 0000000..b9ac675 --- /dev/null +++ b/src/server/authentication.py @@ -0,0 +1,124 @@ +from datetime import datetime, timedelta + +from fastapi import Depends, HTTPException +from fastapi.security import OAuth2PasswordBearer +from jose import JWTError, jwt +from passlib.context import CryptContext +from pydantic import BaseModel +from starlette import status + +# to get a string like this run: +# openssl rand -hex 32 +SECRET_KEY = "af95f0590411260f1f127bd7ef9a03409aecadf7729b3e6822b11752433b97b5" # should be in env ! +ALGORITHM = "HS256" +ACCESS_TOKEN_EXPIRE_MINUTES = 1 + +pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto") +oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token") + +fake_users_db = { + "kodjo": { + "username": "kodjo", + "full_name": "Kodjo Sossouvi", + "email": "kodjo.sossouvi@gmail.com", + "hashed_password": "$2b$12$fb9jW7QUZ9KIEAAtVmWMEOGtehKy9FafUr7Zfrsb3ZMhsBbzZs7SC", # password is kodjo + "disabled": False, + }, +} + + +class Token(BaseModel): + access_token: str + token_type: str + + +class TokenData(BaseModel): + username: str | None = None + + +class User(BaseModel): + """ + + """ + username: str + email: str | None = None + full_name: str | None = None + disabled: bool | None = None + + +class UserInDB(User): + hashed_password: str + + +def get_password_hash(password: str): + """ + Hash the password + :param password: + :type password: + :return: + :rtype: + """ + return pwd_context.hash(password) + + +def get_user(db, username: str): + if username in db: + user_dict = db[username] + return UserInDB(**user_dict) + + +def authenticate_user(fake_db, username: str, password: str): + user = get_user(fake_db, username) + if not user: + return False + + if not pwd_context.verify(password, user.hashed_password): + return False + + return user + + +def create_access_token(data: dict, expires_delta: timedelta | None = None): + to_encode = data.copy() + if expires_delta: + expire = datetime.utcnow() + expires_delta + else: + expire = datetime.utcnow() + timedelta(minutes=15) + to_encode.update({"exp": expire}) + encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM) + return encoded_jwt + + +async def get_current_user(token: str = Depends(oauth2_scheme)): + credentials_exception = HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Could not validate credentials", + headers={"WWW-Authenticate": "Bearer"}, + ) + + try: + payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM]) + username: str = payload.get("sub") + if username is None: + raise credentials_exception + + user = get_user(fake_users_db, username=username) + if user is None: + raise credentials_exception + + return user + except JWTError as ex: + raise credentials_exception + + +async def get_current_active_user(current_user: User = Depends(get_current_user)): + """ + Checks if the user is still active + :param current_user: + :type current_user: + :return: + :rtype: + """ + if current_user.disabled: + raise HTTPException(status_code=400, detail="Inactive user") + return current_user diff --git a/src/server/main.py b/src/server/main.py new file mode 100644 index 0000000..bbc0fb3 --- /dev/null +++ b/src/server/main.py @@ -0,0 +1,66 @@ +from datetime import timedelta + +import uvicorn +from fastapi import Depends, FastAPI, HTTPException +from fastapi.security import OAuth2PasswordRequestForm +from starlette import status + +from constants import CLIENT_OPERATION_QUIT, EXIT_COMMANDS, SHEERKA_PORT +from server.authentication import ACCESS_TOKEN_EXPIRE_MINUTES, User, authenticate_user, create_access_token, \ + fake_users_db, get_current_active_user + +app = FastAPI() + + +@app.get("/") +async def root() -> str: + """ + Root path. Simply display a welcome message + :return: + :rtype: + """ + return "Welcome, my name is Sheerka." + + +@app.post("/token") +async def login(form_data: OAuth2PasswordRequestForm = Depends()): + user = authenticate_user(fake_users_db, form_data.username, form_data.password) + if not user: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Incorrect username or password", + headers={"WWW-Authenticate": "Bearer"}, + ) + + access_token_expires = timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES) + access_token = create_access_token(data={"sub": user.username}, expires_delta=access_token_expires) + return {"access_token": access_token, "token_type": "bearer"} + + +@app.post("/echo/{message}", status_code=status.HTTP_200_OK, response_model=dict) +async def echo(message: str, current_user: User = Depends(get_current_active_user)) -> dict: + """ + + :param current_user: + :type current_user: + :param message: + :type message: + :return: + :rtype: + """ + if message in EXIT_COMMANDS: + return { + "status": True, + "response": "Take care.", + "command": CLIENT_OPERATION_QUIT + } + + return { + "status": True, + "response": f"from {current_user.username}: {message}", + "command": None, + } + + +if __name__ == "__main__": + uvicorn.run("server.main:app", port=SHEERKA_PORT, log_level="info") diff --git a/tests/mockserver.py b/tests/mockserver.py new file mode 100644 index 0000000..93a6066 --- /dev/null +++ b/tests/mockserver.py @@ -0,0 +1,62 @@ +import logging +from multiprocessing import Process +from time import sleep + +import uvicorn +from fastapi import FastAPI + + +class MockServer: + """ Core application to test. """ + + def __init__(self, endpoints: list[dict]): + """ + + :param endpoints: + :type endpoints: list of {path: '', response:''} + """ + self.api = FastAPI() + + def raise_exception(ex): + raise ex + + # register endpoints + for endpoint in endpoints: + method = endpoint["method"] if "method" in endpoint else "get" + if method == "post": + if "exception" in endpoint: + self.api.post(endpoint["path"])(lambda: raise_exception(endpoint["exception"])) + else: + self.api.post(endpoint["path"])(lambda: endpoint["response"]) + else: + self.api.get(endpoint["path"])(lambda: endpoint["response"]) + + # register shutdown + self.api.on_event("shutdown")(self.close) + + # create the process + self.proc = Process(target=uvicorn.run, + args=(self.api,), + kwargs={ + "host": "127.0.0.1", + "port": 5000, + "log_level": "info"}, + daemon=True) + + async def close(self): + """ Gracefull shutdown. """ + logging.warning("Shutting down the app.") + + def start_server(self): + self.proc.start() + sleep(0.1) + + def stop_server(self): + self.proc.terminate() + + def __enter__(self): + self.start_server() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.stop_server() diff --git a/tests/sdp/test_sheerkaDataProvider.py b/tests/sdp/test_sheerkaDataProvider.py new file mode 100644 index 0000000..c4154cf --- /dev/null +++ b/tests/sdp/test_sheerkaDataProvider.py @@ -0,0 +1,539 @@ +import json +import os +import shutil +from datetime import date, datetime +from os import path + +import pytest + +from core.global_symbols import NotFound +from sdp.sheerkaDataProvider import Event, SheerkaDataProvider +from sdp.sheerkaSerializer import JsonSerializer, PickleSerializer + +tests_root = path.abspath("../../build/tests") +evt_digest = "3a571cb6034ef6fc8d7fe91948d0d29728eed74de02bac7968b0e9facca2c2d7" + + +def read_json_file(sdp, file_name): + with sdp.io.open(file_name, "r") as f: + return json.load(f) + + +class ObjNoKey: + """ + Object with no key, they won't be ordered + Not suitable for Json dump as there is no to_dict() method + """ + + def __init__(self, a, b): + self.a = a + self.b = b + + def __hash__(self): + return hash((self.a, self.b)) + + def __eq__(self, obj): + return isinstance(obj, ObjNoKey) and \ + self.a == obj.a and \ + self.b == obj.b + + def __repr__(self): + return f"ObjNoKey({self.a}, {self.b})" + + +class ObjWithDigestWithKey: + """ + Object with a key that can compute its digest. + It can be used to test objects sharing the same key (but that are different) + Not suitable for Json dump as there is no to_dict() method + """ + + def __init__(self, a, b): + self.a = a + self.b = b + + def __hash__(self): + return hash((self.a, self.b)) + + def __eq__(self, obj): + return isinstance(obj, ObjWithDigestWithKey) and \ + self.a == obj.a and \ + self.b == obj.b + + def __repr__(self): + return f"ObjWithDigestWithKey({self.a}, {self.b})" + + def get_key(self): + return self.a + + def get_digest(self): + return str(self.a) + str(self.b) + + +@pytest.fixture(autouse=True) +def init_test(): + """ + I test both SheerkaDataProviderFileIO and SheerkaDataProviderDictionaryIO + So it's important to reset the folders between two tests + :return: + :rtype: + """ + if path.exists(tests_root): + shutil.rmtree(tests_root) + + if not path.exists(tests_root): + os.makedirs(tests_root) + current_pwd = os.getcwd() + os.chdir(tests_root) + + yield None + + os.chdir(current_pwd) + + +@pytest.mark.parametrize("root, expected", [ + (".sheerka", path.abspath(path.join(tests_root, ".sheerka"))), + ("mem://", "") +]) +def test_i_can_init_the_data_provider(root, expected): + sdp = SheerkaDataProvider(root) + + assert sdp.io.root == expected + assert sdp.io.exists(sdp.io.root) + + +@pytest.mark.parametrize("root", [ + ".sheerka", + "mem://" +]) +def test_i_can_save_and_load_an_event(root): + sdp = SheerkaDataProvider(root) + event = Event("hello world", date=date(year=2007, month=9, day=10), user_id="kodjo") + + evt_digest = sdp.save_event(event) + evt = sdp.load_event(evt_digest) + + assert evt.date == datetime(year=2007, month=9, day=10) + assert evt.user_id == "kodjo" + assert evt.message == "hello world" + assert evt.parents is None + assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.EventFolder, evt_digest[0:24], evt_digest)) + + # I can get the last event + evt = sdp.load_event() + assert evt.message == "hello world" + + # check that the last event is updated + last_event_file = path.join(sdp.io.root, SheerkaDataProvider.LastEventFile) + assert sdp.io.exists(last_event_file) + assert sdp.io.read_text(last_event_file) == evt_digest + + +def test_i_can_save_and_load_events_with_multiple_sdp(): + root = ".sheerka" + sdp1 = SheerkaDataProvider(root) + sdp1.save_event(Event("event 1", date=date(year=2007, month=9, day=10), user_id="kodjo")) + sdp1.save_event(Event("event 2", date=date(year=2007, month=9, day=10), user_id="kodjo")) + + sdp2 = SheerkaDataProvider(root, "Another sdp") + sdp2.save_event(Event("event 3", date=date(year=2007, month=9, day=10), user_id="kodjo")) + sdp2.save_event(Event("event 4", date=date(year=2007, month=9, day=10), user_id="kodjo")) + + events_from_1 = list(sdp1.load_events(-1)) + events_from_2 = list(sdp2.load_events(-1)) + + assert [e.message for e in events_from_1] == ['event 4', 'event 3', 'event 2', 'event 1'] + assert [e.message for e in events_from_2] == ['event 4', 'event 3', 'event 2', 'event 1'] + + +@pytest.mark.parametrize("root", [ + ".sheerka", + "mem://" +]) +def test_i_can_get_event_history(root): + sdp = SheerkaDataProvider(root) + event = Event("hello world", date=date(year=2007, month=9, day=10), user_id="kodjo") + event2 = Event("hello world 2", date=date(year=2007, month=9, day=10), user_id="kodjo") + + evt_digest1 = sdp.save_event(event) + evt_digest2 = sdp.save_event(event2) + + evt = sdp.load_event(evt_digest2) + assert evt.date == datetime(year=2007, month=9, day=10) + assert evt.user_id == "kodjo" + assert evt.message == "hello world 2" + assert evt.parents == [evt_digest1] + + +@pytest.mark.parametrize("root", [ + ".sheerka", + "mem://" +]) +def test_i_can_load_events(root): + sdp = SheerkaDataProvider(root) + + for i in range(15): + sdp.save_event(Event(f"TEST::Hello {i}")) + + events = list(sdp.load_events(10)) # first ten + assert len(events) == 10 + assert events[0].message == "TEST::Hello 14" + assert events[9].message == "TEST::Hello 5" + + events = list(sdp.load_events(10, 5)) # skip first 5, then take 10 + assert len(events) == 10 + assert events[0].message == "TEST::Hello 9" + assert events[9].message == "TEST::Hello 0" + + events = list(sdp.load_events(20, 10)) # skip first 10, take 20,(but only 5 remaining) + assert len(events) == 5 + assert events[0].message == "TEST::Hello 4" + assert events[4].message == "TEST::Hello 0" + + events = list(sdp.load_events(1, 20)) # skip first 20, take one + assert len(events) == 0 + + events = list(sdp.load_events(0)) # all + assert len(events) == 15 + + +@pytest.mark.parametrize("root", [ + ".sheerka", + "mem://" +]) +def test_i_can_load_events_when_no_event(root): + sdp = SheerkaDataProvider(root) + + events = list(sdp.load_events(1)) + assert len(events) == 0 + + events = list(sdp.load_events(1, 5)) + assert len(events) == 0 + + +@pytest.mark.parametrize("root", [ + ".sheerka", + "mem://" +]) +def test_i_can_add_and_reload_one_item(root): + sdp = SheerkaDataProvider(root) + + event = Event("hello world", date=date(year=2007, month=9, day=10), user_id="kodjo") + with sdp.get_transaction(event) as transaction: + transaction.add("entry", "key", "foo => bar") + transaction.add("entry", "key2", ObjNoKey("a", "b")) + transaction.add("entry2", "key", "value2") + + last_commit = sdp.get_snapshot(SheerkaDataProvider.HeadFile) + state = sdp.load_state(last_commit) + loaded1 = sdp.get("entry", "key") + loaded2 = sdp.get("entry", "key2") + loaded3 = sdp.get("entry2", "key") + + load_entry = sdp.get("entry") + + # check that the event is saved + evt_digest = event.get_digest() + assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.EventFolder, evt_digest[0:24], evt_digest)) + + # check the values + assert loaded1 == "foo => bar" + assert loaded2 == ObjNoKey("a", "b") + assert loaded3 == "value2" + + assert load_entry == { + "key": "foo => bar", + "key2": ObjNoKey("a", "b") + } + + assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit)) + assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.RefFolder, sdp.name, SheerkaDataProvider.HeadFile)) + + assert state.date is not None + assert state.parents == [] + assert state.events == [evt_digest] + assert state.data == {"entry": {'key': 'foo => bar', 'key2': ObjNoKey("a", "b")}, + 'entry2': {'key': 'value2'}} + + assert sdp.io.read_text( + path.join(sdp.io.root, SheerkaDataProvider.RefFolder, sdp.name, SheerkaDataProvider.HeadFile)) == last_commit + + +@pytest.mark.parametrize("root", [ + ".sheerka", + "mem://" +]) +def test_i_can_load_an_entry(root): + sdp = SheerkaDataProvider(root) + + with sdp.get_transaction(Event(f"TEST::{evt_digest}")) as transaction: + transaction.add("entry", "key1", "foo") + transaction.add("entry", "key2", "bar") + transaction.add("entry", "key3", "baz") + + item = sdp.get("entry", "key1") + assert item == "foo" + + load_entry = sdp.get("entry") + assert load_entry == { + "key1": "foo", + "key2": "bar", + "key3": "baz", + } + + # load entry was a copy + load_entry["key1"] = "another foo" + assert sdp.get("entry", "key1") == "foo" + + +@pytest.mark.parametrize("root", [ + ".sheerka", + "mem://" +]) +def test_i_can_add_and_reload_a_list_of_items(root): + sdp = SheerkaDataProvider(root) + + with sdp.get_transaction(Event(f"TEST::{evt_digest}")) as transaction: + transaction.add("entry", "key", ["foo => bar", ObjNoKey("a", "b")]) + + last_commit = sdp.get_snapshot(SheerkaDataProvider.HeadFile) + state = sdp.load_state(last_commit) + loaded = sdp.get("entry", "key") + + # check the values + assert loaded == ["foo => bar", ObjNoKey("a", "b")] + + assert state.date is not None + assert state.parents == [] + assert state.events == [evt_digest] + assert state.data == {"entry": {'key': ['foo => bar', ObjNoKey('a', 'b')]}} + + +@pytest.mark.parametrize("root", [ + ".sheerka", + "mem://" +]) +def test_i_can_add_and_reload_a_set_of_items(root): + sdp = SheerkaDataProvider(root) + + with sdp.get_transaction(Event(f"TEST::{evt_digest}")) as transaction: + transaction.add("entry", "key", {"foo => bar", ObjNoKey("a", "b")}) + + last_commit = sdp.get_snapshot(SheerkaDataProvider.HeadFile) + state = sdp.load_state(last_commit) + loaded = sdp.get("entry", "key") + + # check the values + assert loaded == {"foo => bar", ObjNoKey("a", "b")} + + assert state.date is not None + assert state.parents == [] + assert state.events == [evt_digest] + assert state.data == {"entry": {'key': {'foo => bar', ObjNoKey('a', 'b')}}} + + +@pytest.mark.parametrize("root", [ + ".sheerka", + "mem://" +]) +def test_i_can_add_and_reload_an_entry(root): + sdp = SheerkaDataProvider(root) + + with sdp.get_transaction(Event(f"TEST::{evt_digest}")) as transaction: + transaction.add("entry1", None, "foo") + transaction.add("entry2", None, {"key": "foo", "key1": "bar"}) + transaction.add("entry3", None, {"foo", "bar"}) + transaction.add("entry4", None, ["foo", "bar"]) + + loaded_entry1 = sdp.get("entry1") + loaded_entry2 = sdp.get("entry2") + loaded_entry3 = sdp.get("entry3") + loaded_entry4 = sdp.get("entry4") + + assert loaded_entry1 == "foo" + assert loaded_entry2 == {"key": "foo", "key1": "bar"} + assert loaded_entry3 == {"foo", "bar"} + assert loaded_entry4 == ["foo", "bar"] + + # loaded values are copies + loaded_entry2["key"] = "foo2" + assert sdp.get("entry2", "key") == "foo" + + loaded_entry3.remove("foo") + assert sdp.get("entry3") == {"foo", "bar"} + + loaded_entry4[0] = "foo2" + assert sdp.get("entry4")[0] == "foo" + + +@pytest.mark.parametrize("root", [ + ".sheerka", + "mem://" +]) +def test_i_can_override_values(root): + sdp = SheerkaDataProvider(root) + + with sdp.get_transaction(Event(f"TEST::{evt_digest}")) as transaction: + transaction.add("entry", "key", {"foo => bar", ObjNoKey("a", "b")}) + + with sdp.get_transaction(Event(f"TEST::{evt_digest}")) as transaction: + transaction.add("entry", "key", "new_value") + + loaded = sdp.get("entry", "key") + assert loaded == "new_value" + + +@pytest.mark.parametrize("root", [ + ".sheerka", + "mem://" +]) +def test_i_can_add_an_object_and_save_it_as_a_reference(root): + sdp = SheerkaDataProvider(root) + sdp.serializer.register(JsonSerializer(lambda o: isinstance(o, ObjNoKey))) + + with sdp.get_transaction(Event(f"TEST::{evt_digest}")) as transaction: + transaction.add("entry", "key1", ObjNoKey("a", "b"), use_ref=True) + transaction.add("entry", "key2", [ObjNoKey("a", "b"), ObjNoKey("c", "d")], use_ref=True) + transaction.add("entry", "key3", {ObjNoKey("a", "b"), ObjNoKey("c", "d")}, use_ref=True) + + assert sdp.get("entry", "key1") == ObjNoKey("a", "b") + assert sdp.get("entry", "key2") == [ObjNoKey("a", "b"), ObjNoKey("c", "d")] + assert sdp.get("entry", "key3") == {ObjNoKey("a", "b"), ObjNoKey("c", "d")} + + # I can ask for the whole entry + assert sdp.get("entry") == {"key1": ObjNoKey("a", "b"), + "key2": [ObjNoKey("a", "b"), ObjNoKey("c", "d")], + "key3": {ObjNoKey("a", "b"), ObjNoKey("c", "d")}} + + state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) + assert state.data == { + "entry": {'key1': '##REF##:fbc2b1c60ed753b49217cae851e342371ee39ebabc9778105f450812e615a513', + 'key2': ['##REF##:fbc2b1c60ed753b49217cae851e342371ee39ebabc9778105f450812e615a513', + '##REF##:448420dbc57d61401d10a98759fccdabbe50e2e825b6da3bd018c190926bcda4'], + 'key3': {'##REF##:448420dbc57d61401d10a98759fccdabbe50e2e825b6da3bd018c190926bcda4', + '##REF##:fbc2b1c60ed753b49217cae851e342371ee39ebabc9778105f450812e615a513'}} + } + + +@pytest.mark.parametrize("root", [ + ".sheerka", + "mem://" +]) +def test_i_can_add_an_object_as_a_reference_using_its_own_digest(root): + sdp = SheerkaDataProvider(root) + sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithDigestWithKey))) + + with sdp.get_transaction(Event(f"TEST::{evt_digest}")) as transaction: + transaction.add("entry", "key1", ObjWithDigestWithKey("a", "b"), use_ref=True) + + assert sdp.get("entry", "key1") == ObjWithDigestWithKey("a", "b") + + state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) + assert state.data == { + "entry": {'key1': '##REF##:ab'} + } + + +@pytest.mark.parametrize("root", [ + ".sheerka", + "mem://" +]) +def test_i_can_remove_elements(root): + sdp = SheerkaDataProvider(root) + + with sdp.get_transaction(Event(f"TEST::{evt_digest}")) as transaction: + transaction.add("entry", "key", "value") + transaction.add("entry", "key2", "value2") + + with sdp.get_transaction(Event(f"TEST::{evt_digest}")) as transaction: + transaction.remove("entry", "key") + + assert sdp.get("entry", "key") is NotFound + + state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) + assert state.data == { + "entry": {'key2': 'value2'} + } + + +@pytest.mark.parametrize("root", [ + ".sheerka", + "mem://" +]) +def test_i_can_keep_state_history(root): + sdp = SheerkaDataProvider(root) + + with sdp.get_transaction(Event("first event")) as transaction: + transaction.add("entry", "key", "value") + state_digest1 = transaction.snapshot + + with sdp.get_transaction(Event("second event")) as transaction: + transaction.add("entry", "key2", "value2") + state_digest2 = transaction.snapshot + + with sdp.get_transaction(Event("third event")) as transaction: + transaction.add("entry", "key2", "value2") + state_digest3 = transaction.snapshot + + state = sdp.load_state(state_digest3) + assert state.parents == [state_digest2] + + state = sdp.load_state(state_digest2) + assert state.parents == [state_digest1] + + state = sdp.load_state(state_digest1) + assert state.parents == [] + + +@pytest.mark.parametrize("root", [ + ".sheerka", + "mem://" +]) +def test_i_can_save_and_load_ontologies_names(root): + sdp = SheerkaDataProvider(root) + + ontologies = ['new ontology', '#unit_test#', '__default__'] + sdp.save_ontologies(ontologies) + assert sdp.load_ontologies() == ontologies + + # extra + ontologies_files = path.join(sdp.io.root, SheerkaDataProvider.OntologiesFiles) + assert sdp.io.exists(ontologies_files) + assert sdp.io.read_text(ontologies_files) == """new ontology +#unit_test# +__default__""" + + +def test_i_can_remove_even_if_not_exist(): + sdp = SheerkaDataProvider("mem://") + with sdp.get_transaction(Event(f"TEST::{evt_digest}")) as transaction: + transaction.remove("entry", None) + transaction.remove(None, "key") + transaction.remove("entry", "key") + + +def test_i_get_default_value_if_entry_is_missing(): + sdp = SheerkaDataProvider("mem://") + assert sdp.get("fake_entry", "fake_key", "default_value") == "default_value" + + +def test_exists(): + sdp = SheerkaDataProvider("mem://") + + with sdp.get_transaction(Event(f"TEST::{evt_digest}")) as transaction: + transaction.add("entry", "key", "value") + + assert not sdp.exists("entry2") + assert not sdp.exists("entry", "key2") + assert sdp.exists("entry", "key") + + +def test_not_found_is_returned_when_an_entry_is_not_found(): + sdp = SheerkaDataProvider("mem://") + + with sdp.get_transaction(Event(f"TEST::{evt_digest}")) as transaction: + transaction.add("entry", "key", "value") + + assert sdp.get("entry", "key") == "value" + assert sdp.get("entry", "key2") == NotFound + assert sdp.get("entry2") == NotFound diff --git a/tests/sdp/test_sheerkaSerializer.py b/tests/sdp/test_sheerkaSerializer.py new file mode 100644 index 0000000..4e994e9 --- /dev/null +++ b/tests/sdp/test_sheerkaSerializer.py @@ -0,0 +1,35 @@ +import io + +from sdp.sheerkaSerializer import JsonSerializer + + +class ObjNoKey: + """ + Object with no key, they won't be ordered + Not suitable for Json dump as there is no to_dict() method + """ + + def __init__(self, a, b): + self.a = a + self.b = b + + def __hash__(self): + return hash((self.a, self.b)) + + def __eq__(self, obj): + return isinstance(obj, ObjNoKey) and \ + self.a == obj.a and \ + self.b == obj.b + + def __repr__(self): + return f"ObjNoKey({self.a}, {self.b})" + + +def test_i_can_json_serialize(): + json_serializer = JsonSerializer(lambda obj: True) + obj = ObjNoKey("a", "b") + stream = io.BytesIO() + + stream = json_serializer.dump(stream, obj, None) + res = json_serializer.load(stream, None) + assert res == obj diff --git a/tests/test_client.py b/tests/test_client.py new file mode 100644 index 0000000..b5c1a89 --- /dev/null +++ b/tests/test_client.py @@ -0,0 +1,98 @@ +import json + +from fastapi import HTTPException +from starlette import status + +from client import SheerkaClient, parse_arguments +from mockserver import MockServer + + +def test_i_can_start_with_a_default_hostname(): + parsed = parse_arguments([]) + + assert parsed.hostname == "http://localhost" + assert parsed.port == 56356 + + +def test_i_can_override_hostname_and_port(): + parsed = parse_arguments(["new_host", "--port", "1515"]) + + assert parsed.hostname == "new_host" + assert parsed.port == 1515 + + parsed = parse_arguments(["new_host", "-p", "1515"]) + + assert parsed.hostname == "new_host" + assert parsed.port == 1515 + + +def test_i_can_provide_user_and_password(): + parsed = parse_arguments(["--username", "my_user", "--password", "my_password"]) + assert parsed.username == "my_user" + assert parsed.password == "my_password" + + parsed = parse_arguments(["-u", "my_user", "-P", "my_password"]) + assert parsed.username == "my_user" + assert parsed.password == "my_password" + + +def test_i_can_manage_when_no_server(): + client = SheerkaClient("http://localhost", 80) + res = client.check_url() + + assert res.status is False + assert res.message == "Connection refused." + + +def test_i_can_manage_when_resource_is_not_found(): + with MockServer([]): + client = SheerkaClient("http://localhost", 5000) + res = client.check_url() + + assert not res.status + assert res.message == '{"detail":"Not Found"}' + + +def test_i_can_connect_to_a_server(): + with MockServer([{ + "path": "/", + "response": "Hello world" + }]): + client = SheerkaClient("http://localhost", 5000) + res = client.check_url() + assert res.status + assert res.message == '"Hello world"' + + +def test_i_can_authenticate_with_valid_credentials(): + with MockServer([{ + "path": "/", + "response": "Hello world" + }, { + "method": "post", + "path": "/token", + "response": {"access_token": "xxxx", "token_type": "bearer"} + }]): + client = SheerkaClient("http://localhost", 5000) + res = client.connect("valid_username", "valid_password") + assert res.status + assert res.message == "Connected as valid_username" + + +def test_i_can_manage_when_authentication_fails(): + with MockServer([{ + "path": "/", + "response": "Hello world" + }, { + "method": "post", + "path": "/token", + "exception": HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Incorrect username or password", + headers={"WWW-Authenticate": "Bearer"}, + ) + }]): + client = SheerkaClient("http://localhost", 5000) + res = client.connect("username", "wrong_password") + assert not res.status + assert res.message == 'Incorrect username or password'