Refactored to allow ConceptEvaluator

2019-11-14 22:04:38 +01:00
parent 576ce77740
commit 9e10e77737
30 changed files with 2406 additions and 1007 deletions
@@ -1,9 +1,9 @@
 from dataclasses import dataclass
-
-from core.concept import Concept, ErrorConcept, Property, TooManySuccessConcept, ReturnValueConcept
-from parsers.PythonParser import PythonGetNamesVisitor, PythonNode
+from core.builtin_concepts import BuiltinConcepts, ErrorConcept, ReturnValueConcept
+from core.concept import Concept, ConceptParts
+from evaluators.BaseEvaluator import OneReturnValueEvaluator
+from parsers.BaseParser import BaseParser
 from sdp.sheerkaDataProvider import SheerkaDataProvider, Event, SheerkaDataProviderDuplicateKeyError
-from parsers.DefaultParser import DefConceptNode, DefaultParser
 import core.utils

 import logging
@@ -11,60 +11,28 @@ import logging
 log = logging.getLogger(__name__)


-class Singleton(type):
-    _instances = {}
-
-    def __call__(cls, *args, **kwargs):
-        if cls not in cls._instances:
-            cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
-        return cls._instances[cls]
-
-
-@dataclass
-class ReturnValue:
-    """
-    Class that handle the return of a concept
-    To avoid using the try/except pattern for each and every call
-    To give context (ie return message) even when the call is successful
-    """
-    who: object
-    status: bool
-    value: Concept
-    message: str = None
-
-
-@dataclass
-class ExecutionContext:
-    """
-    To keep track of the execution of a request
-    """
-    sheerka: object
-    event_digest: str
-
-
 class Sheerka(Concept):
    """
    Main controller for the project
    """

-    NAME = "Sheerka"
-    UNKNOWN_CONCEPT_NAME = "Unknown Concept"
-    SUCCESS_CONCEPT_NAME = "Success"
-    CONCEPT_TOO_LONG_CONCEPT_NAME = "Concept too long"
-
    CONCEPTS_ENTRY = "All_Concepts"
    BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts"
    USER_CONCEPTS_KEYS = "User_Concepts"

    def __init__(self, debug=False):
        log.debug("Starting Sheerka.")
-        super().__init__(Sheerka.NAME)
+        super().__init__(BuiltinConcepts.SHEERKA, True, True, BuiltinConcepts.SHEERKA)

        # cache of the most used concepts
        # Note that these are only templates
        # They are used as a footprint for instantiation
        self.concepts_cache = {}

+        # cache for builtin types.
+        # It allow instantiation of a builtin clas
+        self.builtin_cache = {}
+
        # a concept can be instantiated
        # ex: File is a concept, but File('foo.txt') is an instance
        # TODO: manage contexts
@@ -78,7 +46,6 @@ class Sheerka(Concept):
        self.parsers = []
        self.evaluators = []

-        self.key = self.NAME
        self.debug = debug

    def initialize(self, root_folder=None):
@@ -86,7 +53,6 @@ class Sheerka(Concept):
        Starting Sheerka
        Loads the current configuration
        Notes that when it's the first time, it also create the needed working folders
-        :param debug:
        :param root_folder: root configuration folder
        :return: ReturnValue(Success or Error)
        """
@@ -94,22 +60,27 @@ class Sheerka(Concept):
        try:
            self.init_logging()
            self.sdp = SheerkaDataProvider(root_folder)
-            self.parsers.append(core.utils.get_class("parsers.DefaultParser.DefaultParser"))
-            self.parsers.append(core.utils.get_class("parsers.PythonParser.PythonParser"))
-            #self.parsers.append(core.utils.get_class("parsers.ExactConceptParser.ExactConceptParser"))
-
-            self.evaluators.append(core.utils.get_object("evaluators.DefaultEvaluator.DefaultEvaluator"))
-            self.evaluators.append(core.utils.get_object("evaluators.AddConceptEvaluator.AddConceptEvaluator"))
-            self.evaluators.append(core.utils.get_object("evaluators.PythonEvaluator.PythonEvaluator"))

            if self.sdp.first_time:
                self.sdp.set_key(self.USER_CONCEPTS_KEYS, 1000)

-            self.create_builtin_concepts()
-        except IOError as e:
-            return ReturnValue(self, False, self.get(ErrorConcept.NAME), e)
+            self.initialize_builtin_concepts()

-        return ReturnValue(self, True, self.get(Sheerka.SUCCESS_CONCEPT_NAME))
+            self.parsers.append(core.utils.get_class("parsers.DefaultParser.DefaultParser"))
+            self.parsers.append(core.utils.get_class("parsers.PythonParser.PythonParser"))
+            self.parsers.append(core.utils.get_class("parsers.ExactConceptParser.ExactConceptParser"))
+
+            self.evaluators.append(core.utils.new_object("evaluators.ParsersEvaluator.ParsersEvaluator"))
+            self.evaluators.append(core.utils.new_object("evaluators.AddConceptEvaluator.AddConceptEvaluator"))
+            self.evaluators.append(core.utils.new_object("evaluators.PythonEvaluator.PythonEvaluator"))
+            self.evaluators.append(core.utils.new_object("evaluators.ConceptEvaluator.ConceptEvaluator"))
+            self.evaluators.append(
+                core.utils.new_object("evaluators.DuplicateConceptEvaluator.DuplicateConceptEvaluator"))
+
+        except IOError as e:
+            return ReturnValueConcept(self, False, self.get(BuiltinConcepts.ERROR), e)
+
+        return ReturnValueConcept(self, True, self)

    def set_id_if_needed(self, obj, is_builtin):
        """
@@ -123,34 +94,35 @@ class Sheerka(Concept):
        obj.id = self.sdp.get_next_key(self.BUILTIN_CONCEPTS_KEYS if is_builtin else self.USER_CONCEPTS_KEYS)
        log.debug(f"Setting id '{obj.id}' to concept '{obj.name}'.")

-    def create_builtin_concepts(self):
+    def initialize_builtin_concepts(self):
        """
        Initializes the builtin concepts
        :return: None
        """
        log.debug("Initializing builtin concepts")
-        builtins = [
-            self,
-            Concept(Sheerka.UNKNOWN_CONCEPT_NAME, key=Sheerka.UNKNOWN_CONCEPT_NAME),
-            Concept(Sheerka.SUCCESS_CONCEPT_NAME, key=Sheerka.SUCCESS_CONCEPT_NAME),
-            Concept(Sheerka.CONCEPT_TOO_LONG_CONCEPT_NAME, key=Sheerka.CONCEPT_TOO_LONG_CONCEPT_NAME),
-            ErrorConcept(),
-            TooManySuccessConcept(),
-            ReturnValueConcept(),
-        ]
+        builtins_classes = self.get_builtins_classes_as_dict()

-        for concept in builtins:
-            self.add_in_cache(concept)
+        # this all initialization of the builtins seems to be little bit complicated
+        # why do we need to update it from DB ?
+        for key in BuiltinConcepts:
+            concept = self if key == BuiltinConcepts.SHEERKA \
+                else builtins_classes[str(key)]() if str(key) in builtins_classes \
+                else Concept(key, True, False, key)
+
+            if not concept.is_unique and str(key) in builtins_classes:
+                self.builtin_cache[key] = builtins_classes[str(key)]

            from_db = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.key)
            if from_db is None:
-                log.debug(f"'{concept.name}' concept is not found. Adding.")
+                log.debug(f"'{concept.name}' concept is not found in db. Adding.")
                self.set_id_if_needed(concept, True)
                self.sdp.add("init", self.CONCEPTS_ENTRY, concept, use_ref=True)
            else:
-                log.debug(f"Found concept '{from_db}'. Updating.")
+                log.debug(f"Found concept '{from_db}' in db. Updating.")
                concept.update_from(from_db)

+            self.add_in_cache(concept)
+
    def init_logging(self):
        if self.debug:
            log_format = "%(asctime)s %(name)s [%(levelname)s] %(message)s"
@@ -163,37 +135,75 @@ class Sheerka(Concept):

    def eval(self, text):
        evt_digest = self.sdp.save_event(Event(text))
-        exec_context = ExecutionContext(self, evt_digest)
-        return_values = self.try_parse(exec_context, text)
-        return_values = self.try_eval(exec_context, return_values)
+        exec_context = ExecutionContext(self.key, evt_digest, self)

-        # return_values = []
-        # for parser_name, status, node in result:
-        #     if not status:
-        #         return_values.append(ReturnValue(False, ErrorConcept(body=node)))
-        #     elif status and isinstance(node, DefConceptNode):
-        #         return_values.append(self.add_concept(exec_context, node))
-        #     else:
-        #         return_values.append(ReturnValue(True, node))
+        before_parsing = self.ret(self.eval.__name__, True, self.new(BuiltinConcepts.BEFORE_PARSING))
+        return_values = self.process(exec_context, [], [before_parsing])
+        return_values = core.utils.remove_from_list(return_values, [before_parsing])
+
+        parsing_results = self.parse(exec_context, text)
+        return_values.extend(parsing_results)
+        processing_parsing = self.ret(self.eval.__name__, True, self.new(BuiltinConcepts.PARSING))
+        return_values = self.process(exec_context, return_values, [processing_parsing])
+        return_values = core.utils.remove_from_list(return_values, [processing_parsing])
+
+        after_parsing = self.ret(self.eval.__name__, True, self.new(BuiltinConcepts.AFTER_PARSING))
+        return_values = self.process(exec_context, return_values, [after_parsing])
+        return_values = core.utils.remove_from_list(return_values, [after_parsing])

        return return_values

-    def try_parse(self, context, text):
+    def expect_one(self, context, items):
+
+        if not isinstance(items, list):
+            items = [items]
+
+        if len(items) == 0:
+            return self.ret(context.who, False, self.new(BuiltinConcepts.IS_EMPTY, obj=items))
+
+        successful_results = [item for item in items if item.status]
+        number_of_successful = len(successful_results)
+        total_items = len(items)
+
+        # remove errors when a winner is found
+        if number_of_successful == 1:
+            # log.debug(f"1 / {total_items} good item found.")
+            return successful_results[0]
+
+        # too many winners, which one to choose ?
+        if number_of_successful > 1:
+            log.debug(f"{number_of_successful} / {total_items} good items. Too many success")
+            return self.ret(context.who, False, self.new(BuiltinConcepts.TOO_MANY_SUCCESS, obj=successful_results))
+
+        # only errors, i cannot help you
+        log.debug(f"{total_items} items. Only errors")
+        return self.ret(context.who, False, self.new(BuiltinConcepts.TOO_MANY_ERRORS, obj=items))
+
+    def parse(self, context, text):
        result = []
-        log.debug(f"Parsing '{text}'")
+        if log.isEnabledFor(logging.DEBUG):
+            debug_text = "'" + text + "'" if isinstance(text, str) \
+                else "'" + BaseParser.get_text_from_tokens(text) + "' as tokens"
+            log.debug(f"Parsing {debug_text}")
        for parser in self.parsers:
            p = parser()
-            # try:
-            #     tree = p.parse()
-            #     result.append((p.name, tree))
-            # except Exception as e:
-            #     result.append((p.name, e))
-            tree = p.parse(context, text)
-            result.append(ReturnValue(p.name, not p.has_error, p.error_sink if p.has_error else tree))
+            res = p.parse(context, text)
+            if isinstance(res, list):
+                result.extend(res)
+            else:
+                result.append(res)
        return result

-    def try_eval(self, context, items):
+    def process(self, context, return_values, contextual_concepts=None):
        log.debug("Evaluating parsing result.")
+
+        # init
+        if not isinstance(return_values, list):
+            return_values = [return_values]
+
+        if contextual_concepts:
+            return_values.extend(contextual_concepts)
+
        # group the evaluators by priority and sort them
        # The first one to be applied will be the one with the highest priority
        grouped_evaluators = {}
@@ -201,60 +211,102 @@ class Sheerka(Concept):
            grouped_evaluators.setdefault(item.priority, []).append(item)
        sorted_priorities = sorted(grouped_evaluators.keys(), reverse=True)

-        for priority in sorted_priorities:
-            log.debug("Processing priority " + str(priority))
-            for item in items:
-                log.debug(item)
-            original_items = items[:]
-            evaluated_items = []
-            for evaluator in grouped_evaluators[priority]:
-                if evaluator.matches(context, original_items):
-                    result = evaluator.eval(context, original_items)
-                    if isinstance(result, list):
-                        evaluated_items.extend(result)
+        # process
+        while True:
+            simple_digest = return_values[:]  # set(id(r) for r in return_values)
+
+            for priority in sorted_priorities:
+                # log.debug("Processing priority " + str(priority))
+                # for item in return_values:
+                #     log.debug(item)
+                original_items = return_values[:]
+                evaluated_items = []
+                to_delete = []
+                for evaluator in grouped_evaluators[priority]:
+
+                    # process evaluators that work on return value
+                    if isinstance(evaluator, OneReturnValueEvaluator):
+                        for item in original_items:
+                            if evaluator.matches(context, item):
+                                result = evaluator.eval(context, item)
+                                if result is None:
+                                    continue
+                                elif isinstance(result, list):
+                                    evaluated_items.extend(result)
+                                    to_delete.append(item)
+                                elif isinstance(result, ReturnValueConcept):
+                                    evaluated_items.append(result)
+                                    to_delete.append(item)
+                                else:
+                                    error = self.new(BuiltinConcepts.INVALID_RETURN_VALUE, body=result,
+                                                     evaluator=evaluator)
+                                    evaluated_items.append(self.ret("sheerka.process", False, error, parents=[item]))
+                                    to_delete.append(item)
+                    # process evaluators that work on all return values
                    else:
-                        evaluated_items.append(result)
+                        if evaluator.matches(context, original_items):
+                            results = evaluator.eval(context, original_items)
+                            if not isinstance(results, list):
+                                results = [results]
+                            for result in results:
+                                evaluated_items.append(result)
+                                to_delete.extend(result.parents)

-            # what was computed by this group will be the input of the following group
-            items = evaluated_items if len(evaluated_items) > 0 else original_items
+                return_values = evaluated_items
+                return_values.extend([item for item in original_items if item not in to_delete])

-        return items
+            # have we done something ?
+            to_compare = return_values[:]  # set(id(r) for r in return_values)
+            if simple_digest == to_compare:
+                break

-    def add_concept(self, exec_context, def_concept_node: DefConceptNode):
+        return return_values
+
+    def create_new_concept(self, context, concept):
        """
        Adds a new concept to the system
-        :param exec_context:
-        :param def_concept_node:  DefConceptNode
+        :param context:
+        :param concept:  DefConceptNode
        :return: digest of the new concept
        """

-        # validate the node
-        get_names_visitor = PythonGetNamesVisitor()
+        concept.init_key()

-        concept = Concept(def_concept_node.name)
-        for prop in ("where", "pre", "post", "body"):
-            # put back the sources
-            concept_part_node = getattr(def_concept_node, prop)
-            if isinstance(concept_part_node, PythonNode):
-                get_names_visitor.visit(concept_part_node.ast)
-            source = concept_part_node.source if hasattr(concept_part_node, "source") else ""
-            setattr(concept, prop, source)
+        # checks for duplicate concepts
+        if self.sdp.exists(self.CONCEPTS_ENTRY, concept.key, concept.get_digest()):
+            error = SheerkaDataProviderDuplicateKeyError(self.CONCEPTS_ENTRY + "." + concept.key, concept)
+            return self.ret(self.create_new_concept.__name__, False, ErrorConcept(error), error.args[0])

-        # try to find variables (eg props)
-        # Note that with this method, the variables will be created in the order of appearance
-        for token in def_concept_node.tokens["name"]:
-            if token.value in get_names_visitor.names:
-                concept.set_prop(token.value, None)
-
-        concept.init_key(def_concept_node.tokens["name"])
-        concept.add_codes(def_concept_node.get_codes())
+        # set id before saving in db
        self.set_id_if_needed(concept, False)

+        # save the new context in sdp
        try:
-            self.sdp.add(exec_context.event_digest, self.CONCEPTS_ENTRY, concept, use_ref=True)
+            self.sdp.add(context.event_digest, self.CONCEPTS_ENTRY, concept, use_ref=True)
        except SheerkaDataProviderDuplicateKeyError as error:
-            return ReturnValue(self.add_concept.__name__, False, ErrorConcept(body=error), error.args[0])
-        return ReturnValue(self.add_concept.__name__, True, concept)
+            return self.ret(self.create_new_concept.__name__, False, ErrorConcept(error), error.args[0])
+
+        # add in cache for quick further reference
+        self.concepts_cache[concept.key] = concept
+
+        # process the return in needed
+        ret = self.ret(self.create_new_concept.__name__, True, self.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
+        return ret
+
+    def add_codes_to_concept(self, context, concept):
+        """
+        Updates the codes of the newly created concept
+        Basically, it runs the parsers on all parts
+        :param concept:
+        :param context:
+        :return:
+        """
+        for part_key in ConceptParts:
+            source = getattr(concept, part_key.value)
+            if source is None or source == "":
+                continue
+            ret_val = self.expect_one(context, self.parse(context, source))
+            concept.codes[part_key] = ret_val

    def add_in_cache(self, concept):
        """
@@ -268,36 +320,85 @@ class Sheerka(Concept):
    def get(self, concept_key):
        """
        Tries to find a concept
-        TODO: how to manage single vs multiple instances
+        What is return must be used a template for another concept.
+        You must not modify the returned concept
        :param concept_key:
        :return:
        """

+        if isinstance(concept_key, BuiltinConcepts):
+            concept_key = str(concept_key)
+
        # first search in cache
        if concept_key in self.concepts_cache:
            return self.concepts_cache[concept_key]

-        return self.sdp.get_safe(self.CONCEPTS_ENTRY, concept_key) or \
-               self.new(self.UNKNOWN_CONCEPT_NAME, body=concept_key)
+        # else look in sdp
+        from_db = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept_key)
+        if from_db is not None:
+            return from_db

-    def new(self, concept, **kwargs):
+        # else return new Unknown concept
+        # Note that I don't call the new() method, as it use get() -> cyclic call
+        unknown_concept = Concept()
+        template = self.concepts_cache[str(BuiltinConcepts.UNKNOWN_CONCEPT)]
+        unknown_concept.update_from(template)
+        unknown_concept.body = concept_key
+        return unknown_concept
+
+    def new(self, concept_key, **kwargs):
        """
        Returns an instance of a new concept
-        TODO: Checks if the concept is supposed to be unique (ex Sheerka, or the number 'one' for example)
-        :param concept:
+        When the concept is supposed to be unique, returns the same instance
+        :param concept_key:
        :param kwargs:
        :return:
        """
+        template = self.get(concept_key)

-        if isinstance(concept, str):
-            concept = self.get(concept)
+        # manage concept not found
+        if self.isinstance(template, BuiltinConcepts.UNKNOWN_CONCEPT) and \
+            concept_key != BuiltinConcepts.UNKNOWN_CONCEPT:
+            return template

+        # manage singleton
+        if template.is_unique:
+            return template
+
+        # otherwise, create another instance
+        concept = self.builtin_cache[concept_key]() if concept_key in self.builtin_cache else Concept()
+        concept.update_from(template)
+
+        # update the properties
        for k, v in kwargs.items():
-            if hasattr(concept, k):
+            if k in concept.props:
+                concept.set_prop(k, v)
+            elif hasattr(concept, k):
                setattr(concept, k, v)
+            else:
+                return self.new(BuiltinConcepts.UNKNOWN_PROPERTY, body=k, concept=concept)

+        # TODO : add the concept to the list of known concepts (self.instances)
        return concept

+    def ret(self, who, status, value, message=None, parents=None):
+        """
+        Creates and returns a ReturnValue concept
+        :param who:
+        :param status:
+        :param value:
+        :param message:
+        :param parents:
+        :return:
+        """
+        return self.new(
+            BuiltinConcepts.RETURN_VALUE,
+            who=who,
+            status=status,
+            value=value,
+            message=message,
+            parents=parents)
+
    def isinstance(self, a, b):
        """
        return true if the concept a is an instance of the concept b
@@ -306,15 +407,40 @@ class Sheerka(Concept):
        :return:
        """

-        if not isinstance(a, Concept):
-            raise SyntaxError("The first parameter of isinstance MUST be a concept")
+        if isinstance(a, BuiltinConcepts):  # common KSI error ;-)
+            raise SyntaxError("Remember that the first parameter of isinstance MUST be a concept")

-        b_key = b if isinstance(b, str) else b.key
+        if not isinstance(a, Concept):
+            return False
+
+        b_key = b.key if isinstance(b, Concept) else str(b)

        # TODO : manage when a is the list of all possible b
        # for example, if a is a color, it will be found the entry 'All_Colors'
        return a.key == b_key

+    @staticmethod
+    def get_builtins_classes_as_dict():
+        res = {}
+        for c in core.utils.get_classes("core.builtin_concepts"):
+            if issubclass(c, Concept) and c != Concept:
+                res[c().key] = c
+
+        return res
+
    @staticmethod
    def test():
        return "I have access to Sheerka !"
+
+
+@dataclass
+class ExecutionContext:
+    """
+    To keep track of the execution of a request
+    """
+    who: object
+    event_digest: str
+    sheerka: Sheerka
+
+    def push(self, who):
+        return ExecutionContext(who, self.event_digest, self.sheerka)