Added SheerkaComparisonManager

2020-05-17 20:19:26 +02:00
parent 56e0a9d338
commit 08e3086820
29 changed files with 586 additions and 148 deletions
@@ -0,0 +1,247 @@
+import core.utils
+from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
+
+NO_MATCH = "** No Match **"
+
+
+class SheerkaExecute:
+    """
+    Manage the execution of a process flow
+    """
+
+    def __init__(self, sheerka):
+        self.sheerka = sheerka
+
+    def call_parsers(self, execution_context, return_values):
+
+        # return_values must be a list
+        if not isinstance(return_values, list):
+            return_values = [return_values]
+
+        # first make the distinguish between what is for the parsers and what is not
+        result = []
+        to_process = []
+        for r in return_values:
+            if not r.status or not self.sheerka.isinstance(r.body, BuiltinConcepts.USER_INPUT):
+                result.append(r)
+            else:
+                to_process.append(r)
+
+        if not to_process:
+            return result
+
+        # keep track of the originals user inputs, as they need to be removed at the end
+        user_inputs = to_process[:]
+
+        # group the parsers by priorities
+        instantiated_parsers = [parser(sheerka=self.sheerka) for parser in self.sheerka.parsers.values()]
+        instantiated_parsers = self.preprocess(execution_context, instantiated_parsers)
+
+        grouped_parsers = {}
+        for parser in [p for p in instantiated_parsers if p.enabled]:
+            grouped_parsers.setdefault(parser.priority, []).append(parser)
+        sorted_priorities = sorted(grouped_parsers.keys(), reverse=True)
+
+        stop_processing = False
+        for priority in sorted_priorities:
+            inputs_for_this_group = to_process[:]
+
+            for parser in grouped_parsers[priority]:
+
+                for return_value in inputs_for_this_group:
+
+                    to_parse = return_value.body.body \
+                        if self.sheerka.isinstance(return_value.body, BuiltinConcepts.USER_INPUT) \
+                        else return_value.body
+
+                    # if self.sheerka.log.isEnabledFor(logging.DEBUG):
+                    #     debug_text = "'" + to_parse + "'" if isinstance(to_parse, str) \
+                    #         else "'" + BaseParser.get_text_from_tokens(to_parse) + "' as tokens"
+                    #     execution_context.log(f"Parsing {debug_text}")
+
+                    with execution_context.push(desc=f"Parsing using {parser.name}",
+                                                logger=parser.verbose_log) as sub_context:
+                        sub_context.add_inputs(to_parse=to_parse)
+                        res = parser.parse(sub_context, to_parse)
+                        if res is not None:
+                            if hasattr(res, "__iter__"):
+                                for r in res:
+                                    if r is None:
+                                        continue
+                                    r.parents = [return_value]
+                                    result.append(r)
+                                    if self.sheerka.isinstance(r.body, BuiltinConcepts.PARSER_RESULT):
+                                        # if a ParserResultConcept is returned, it will be used by the parsers
+                                        # of the following groups
+                                        to_process.append(r)
+                                    if r.status:
+                                        stop_processing = True
+
+                            else:
+                                res.parents = [return_value]
+                                result.append(res)
+                                if self.sheerka.isinstance(res.body, BuiltinConcepts.PARSER_RESULT):
+                                    # if a ParserResultConcept is returned, it will be used by the parsers
+                                    # of the following groups
+                                    to_process.append(res)
+                                if res.status:
+                                    stop_processing = True
+                        sub_context.add_values(return_values=res)
+
+            if stop_processing:
+                break  # Do not try the other priorities if a match is found
+
+        result = core.utils.remove_list_from_list(result, user_inputs)
+        return result
+
+    def call_evaluators(self, execution_context, return_values, process_step):
+
+        # return_values must be a list
+        if not isinstance(return_values, list):
+            return_values = [return_values]
+
+        # group the evaluators by priority and sort them
+        # The first one to be applied will be the one with the highest priority
+        grouped_evaluators = {}
+        instantiated_evaluators = [e_class() for e_class in self.sheerka.evaluators]
+
+        # pre-process evaluators if needed
+        instantiated_evaluators = self.preprocess(execution_context, instantiated_evaluators)
+
+        for evaluator in [e for e in instantiated_evaluators if e.enabled and process_step in e.steps]:
+            grouped_evaluators.setdefault(evaluator.priority, []).append(evaluator)
+
+        # order the groups by priority, the higher first
+        sorted_priorities = sorted(grouped_evaluators.keys(), reverse=True)
+
+        # process
+        iteration = 0
+        while True:
+            with execution_context.push(desc=f"iteration #{iteration}", iteration=iteration) as iteration_context:
+                simple_digest = return_values[:]
+                iteration_context.add_inputs(return_values=simple_digest)
+
+                for priority in sorted_priorities:
+
+                    original_items = return_values[:]
+                    evaluated_items = []
+                    to_delete = []
+                    for evaluator in grouped_evaluators[priority]:
+                        evaluator = self.preprocess(execution_context, evaluator.__class__())  # fresh copy
+
+                        sub_context_desc = f"Evaluating using {evaluator.name} ({priority=})"
+                        with iteration_context.push(desc=sub_context_desc, logger=evaluator.verbose_log) as sub_context:
+                            sub_context.add_inputs(return_values=original_items)
+
+                            # process evaluators that work on one simple return value at the time
+                            from evaluators.BaseEvaluator import OneReturnValueEvaluator
+                            if isinstance(evaluator, OneReturnValueEvaluator):
+                                debug_result = []
+                                for item in original_items:
+                                    if evaluator.matches(sub_context, item):
+
+                                        # init the evaluator is possible
+                                        if hasattr(evaluator, "init_evaluator") and not evaluator.is_initialized:
+                                            evaluator.init_evaluator(sub_context, original_items)
+
+                                        result = evaluator.eval(sub_context, item)
+                                        if result is None:
+                                            debug_result.append({"input": item, "return_value": None})
+                                            continue
+
+                                        to_delete.append(item)
+                                        if isinstance(result, list):
+                                            evaluated_items.extend(result)
+                                        elif isinstance(result, ReturnValueConcept):
+                                            evaluated_items.append(result)
+                                        else:
+                                            error = self.sheerka.new(BuiltinConcepts.INVALID_RETURN_VALUE, body=result,
+                                                                     evaluator=evaluator)
+                                            result = self.sheerka.ret("sheerka.process", False, error, parents=[item])
+                                            evaluated_items.append(result)
+                                        debug_result.append({"input": item, "return_value": result})
+                                    else:
+                                        debug_result.append({"input": item, "return_value": NO_MATCH})
+                                sub_context.add_values(return_values=debug_result)
+
+                            # process evaluators that work on all return values
+                            else:
+                                if evaluator.matches(sub_context, original_items):
+                                    results = evaluator.eval(sub_context, original_items)
+                                    if results is None:
+                                        continue
+                                    if not isinstance(results, list):
+                                        results = [results]
+                                    for result in results:
+                                        evaluated_items.append(result)
+                                        to_delete.extend(result.parents)
+                                    sub_context.add_values(return_values=results)
+                                else:
+                                    sub_context.add_values(return_values=NO_MATCH)
+
+                    return_values = evaluated_items
+                    return_values.extend([item for item in original_items if item not in to_delete])
+
+                iteration_context.add_values(return_values=return_values[:])
+
+            # have we done something ?
+            to_compare = return_values[:]
+            if simple_digest == to_compare:
+                break
+
+            # inc the iteration and continue
+            iteration += 1
+
+        return return_values
+
+    def execute(self, execution_context, return_values, execution_steps):
+        """
+        Executes process for all initial contexts
+        :param execution_context:
+        :param return_values:
+        :param execution_steps:
+        :return:
+        """
+
+        for step in execution_steps:
+            copy = return_values[:] if hasattr(return_values, "__iter__") else [return_values]
+            with execution_context.push(step=step, iteration=0, desc=f"{step=}") as sub_context:
+
+                if step == BuiltinConcepts.PARSING:
+                    return_values = self.call_parsers(sub_context, return_values)
+                else:
+                    return_values = self.call_evaluators(sub_context, return_values, step)
+
+                if copy != return_values:
+                    sub_context.log_result(return_values)
+
+                sub_context.add_values(return_values=return_values)
+
+        return return_values
+
+    def preprocess(self, context, parsers_or_evaluators):
+        if not context.preprocess:
+            return parsers_or_evaluators
+
+        if not hasattr(parsers_or_evaluators, "__iter__"):
+            single_one = True
+            parsers_or_evaluators = [parsers_or_evaluators]
+        else:
+            single_one = False
+
+        for preprocess in context.preprocess:
+            for e in parsers_or_evaluators:
+                if self.matches(e.name, preprocess.get_value("name")):
+                    for var_name in preprocess.values:
+                        if var_name == "name":
+                            continue
+                        if hasattr(e, var_name):
+                            setattr(e, var_name, preprocess.get_value(var_name))
+        return parsers_or_evaluators[0] if single_one else parsers_or_evaluators
+
+    @staticmethod
+    def matches(parser_or_evaluator_name, preprocessor_name):
+        if preprocessor_name.endswith("*"):
+            return parser_or_evaluator_name.startswith(preprocessor_name[:-1])
+        else:
+            return parser_or_evaluator_name == preprocessor_name