I can manage infinite recursion when building concept

2020-12-10 21:08:10 +01:00
parent 4b6e1dd55b
commit 657c7536f7
28 changed files with 816 additions and 446 deletions
@@ -8,7 +8,8 @@ from cache.Cache import Cache
 from cache.CacheManager import CacheManager
 from cache.DictionaryCache import DictionaryCache
 from cache.IncCache import IncCache
-from core.builtin_concepts import BuiltinConcepts, ErrorConcept, ReturnValueConcept, BuiltinErrors, UnknownConcept
+from core.builtin_concepts import ErrorConcept, ReturnValueConcept, UnknownConcept
+from core.builtin_concepts_ids import BuiltinErrors, BuiltinConcepts
 from core.concept import Concept, ConceptParts, NotInit, get_concept_attrs
 from core.error import ErrorObj
 from core.global_symbols import EVENT_USER_INPUT_EVALUATED
@@ -2,7 +2,7 @@ import sys
 import time
 from os import path

-from core.builtin_concepts import BuiltinConcepts, BuiltinContainers
+from core.builtin_concepts_ids import BuiltinConcepts, BuiltinContainers
 from core.builtin_helpers import ensure_concept
 from core.concept import Concept
 from core.sheerka.services.sheerka_service import BaseService
@@ -5,7 +5,8 @@ from cache.Cache import Cache
 from cache.CacheManager import ConceptNotFound
 from cache.ListIfNeededCache import ListIfNeededCache
 from cache.SetCache import SetCache
-from core.builtin_concepts import BuiltinConcepts, ErrorConcept, AllBuiltinConcepts, BuiltinUnique
+from core.builtin_concepts import ErrorConcept
+from core.builtin_concepts_ids import BuiltinConcepts, AllBuiltinConcepts, BuiltinUnique
 from core.builtin_helpers import ensure_concept
 from core.concept import Concept, DEFINITION_TYPE_DEF, DEFINITION_TYPE_BNF, freeze_concept_attrs, NotInit, \
    ConceptMetadata
@@ -9,6 +9,7 @@ from core.sheerka.services.SheerkaExecute import ParserInput
 from core.sheerka.services.sheerka_service import BaseService
 from core.tokenizer import Tokenizer
 from core.utils import unstr_concept
+from parsers.BaseNodeParser import ConceptNode
 from parsers.ExpressionParser import ExpressionParser, TrueifyVisitor

 CONCEPT_EVALUATION_STEPS = [
@@ -17,6 +18,11 @@ CONCEPT_EVALUATION_STEPS = [
    BuiltinConcepts.AFTER_EVALUATION]


+@dataclass
+class ChickenAndEggException(Exception):
+    error: Concept
+
+
@dataclass
 class WhereClauseDef:
    concept: Concept  # concept on which the where clause is applied
@@ -148,6 +154,27 @@ class SheerkaEvaluateConcept(BaseService):
        else:
            return None

+    def get_recursive_definitions(self, concept, return_values):
+        """
+        Returns the name of the parsers that will resolve to a recursive evaluation
+        :param concept:
+        :param return_values:
+        :return:
+        """
+        if concept.name in concept.variables():
+            # There is a variable with the same name as the concept
+            # During evaluation, inner variables take precedence other concepts
+            # So there won't be any cyclic reference, the variable will be picked
+            return
+        for parser in [r.body for r in return_values if
+                       r.status and self.sheerka.isinstance(r.body, BuiltinConcepts.PARSER_RESULT)]:
+            parsed = parser.body if isinstance(parser.body, list) else [parser.body]
+            for parsed_item in parsed:
+                if isinstance(parsed_item, Concept) and parsed_item.id == concept.id:
+                    yield parser.parser
+                elif isinstance(parsed_item, ConceptNode) and parsed_item.concept.id == concept.id:
+                    yield parser.parser
+
    def apply_where_clause(self, context, where_clause_def, return_values):
        """
        Apply intermediate where clause when evaluating concept variables
@@ -225,14 +252,57 @@ class SheerkaEvaluateConcept(BaseService):
        """

        def is_only_successful(r):
+            """
+
+            :param r: return_value
+            :return:
+            """
            return context.sheerka.isinstance(r, BuiltinConcepts.RETURN_VALUE) and \
                   context.sheerka.isinstance(r.body, BuiltinConcepts.ONLY_SUCCESSFUL)

        def parse_token_concept(s):
+            """
+
+            :param s: source
+            :return:
+            """
            if s.startswith("c:") and (identifier := unstr_concept(s)) != (None, None):
                return self.sheerka.fast_resolve(identifier)
            return None

+        def get_return_value(current_context, c, s, p):
+            """
+
+            :param current_context:
+            :param c: concept
+            :param s: source
+            :param p: part of the concept being parsed
+            :return:
+            """
+            while True:
+                return_value = parse_unrecognized(current_context,
+                                                  s,
+                                                  parsers="all",
+                                                  prop=p,
+                                                  filter_func=only_successful)
+
+                if not return_value.status:
+                    if current_context.preprocess:
+                        raise ChickenAndEggException(self.sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG, body={c}))
+                    else:
+                        raise Exception(f"Failed to build '{s}'. But it doesn't seems to be recursion")
+
+                return_value = return_value.body.body if is_only_successful(return_value) else [return_value]
+                recursive_parsers = list(self.get_recursive_definitions(c, return_value))
+
+                if len(recursive_parsers) == 0:
+                    return return_value
+
+                desc = f"Removing parsers {recursive_parsers}"
+                current_context = current_context.push(context.action, context.action_context, desc=desc)
+                for recursive_parser in recursive_parsers:
+                    current_context.add_preprocess(recursive_parser.name, enabled=False)
+
        for part_key in AllConceptParts:
            if part_key in concept.get_compiled():
                continue
@@ -253,12 +323,7 @@ class SheerkaEvaluateConcept(BaseService):
                    concept.get_compiled()[part_key] = concept_found
                else:
                    # ...or a list of ReturnValueConcept to resolve
-                    res = parse_unrecognized(context,
-                                             source,
-                                             parsers="all",
-                                             prop=part_key,
-                                             filter_func=only_successful)
-                    concept.get_compiled()[part_key] = res.body.body if is_only_successful(res) else res
+                    concept.get_compiled()[part_key] = get_return_value(context, concept, source, part_key)

        for var_name, default_value in concept.get_metadata().variables:
            if var_name in concept.get_compiled():
@@ -279,12 +344,7 @@ class SheerkaEvaluateConcept(BaseService):
                    concept.get_compiled()[var_name] = concept_found
                else:
                    # ...or a list of ReturnValueConcept to resolve
-                    res = parse_unrecognized(context,
-                                             default_value,
-                                             parsers="all",
-                                             prop=var_name,
-                                             filter_func=only_successful)
-                    concept.get_compiled()[var_name] = res.body.body if is_only_successful(res) else res
+                    concept.get_compiled()[var_name] = get_return_value(context, concept, default_value, var_name)

        # Updates the cache of concepts when possible
        # This piece of code is not used, a the compile part is removed by sheerka.new_from_template()
@@ -469,7 +529,10 @@ class SheerkaEvaluateConcept(BaseService):
            if context.sheerka.isa(concept, context.sheerka.new(BuiltinConcepts.AUTO_EVAL)):
                sub_context.protected_hints.add(BuiltinConcepts.EVAL_BODY_REQUESTED)

-            self.initialize_concept_asts(sub_context, concept)
+            try:
+                self.initialize_concept_asts(sub_context, concept)
+            except ChickenAndEggException as ex:
+                return ex.error

            # to make sure of the order, it don't use ConceptParts.get_parts()
            # variables must be evaluated first, body must be evaluated before where
@@ -525,7 +588,8 @@ class SheerkaEvaluateConcept(BaseService):
                    # validate PRE and WHERE condition
                    if part_key in (ConceptParts.PRE, ConceptParts.WHERE) and not self.sheerka.objvalue(resolved):
                        return self.sheerka.new(BuiltinConcepts.CONDITION_FAILED,
-                                                body=getattr(concept.get_metadata(), concept_part_value(metadata_to_eval)),
+                                                body=getattr(concept.get_metadata(),
+                                                             concept_part_value(metadata_to_eval)),
                                                concept=concept,
                                                prop=part_key)

@@ -169,16 +169,34 @@ class SheerkaExecute(BaseService):
        self.pi_cache = Cache(default=lambda key: ParserInput(key), max_size=20)
        self.instantiated_evaluators = None
        self.evaluators_by_name = None
-        self.grouped_evaluators_cache = {}  # key=step, value=tuple(evaluators for this step, sorted priorities)
+
+        self.instantiated_parsers = None
+        self.parsers_by_name = None
        self.old_values = []

+        # cache for all preregistered evaluator combination
+        # the key is the concatenation of the step and the name of evaluators in the group
+        # ex : BEFORE_EVALUATION|Python|Sya|Bnf
+        # The value is a tuple,
+        # The first entry is the grouped evaluators
+        # ex : {60 : [PythonEvaluator(), SyaEvaluator()], 50: [BnfEvaluator()]}
+        # The second entry are the sorted priorities ex [60, 50]
+        self.grouped_evaluators_cache = {}
+
+        # cache for preregistered parsers
+        # Same construction than the evaluators
+        # Except 1 : the key does not have a step component. It is simple the list of parsers' names
+        # Except 2 : we store the type of the parser, not its instance
+        self.grouped_parsers_cache = {}
+
    def initialize(self):
        self.sheerka.bind_service_method(self.execute, True)

        self.sheerka.cache_manager.register_cache(self.PARSERS_INPUTS_ENTRY, self.pi_cache, False)
-        self.reset_evaluators()
+        self.reset_registered_evaluators()
+        self.reset_registered_parsers()

-    def reset_evaluators(self):
+    def reset_registered_evaluators(self):
        # instantiate evaluators, once for all, only keep when it's enabled
        self.instantiated_evaluators = [e_class() for e_class in self.sheerka.evaluators]
        self.instantiated_evaluators = [e for e in self.instantiated_evaluators if e.enabled]
@@ -186,44 +204,49 @@ class SheerkaExecute(BaseService):

        # get default evaluators by process step
        for process_step in EVALUATOR_STEPS:
-            self.grouped_evaluators_cache[f"{process_step}|__default"] = self.get_grouped_evaluators(
+            self.grouped_evaluators_cache[f"{process_step}|__default"] = self.get_grouped(
                [e for e in self.instantiated_evaluators if process_step in e.steps])

-    # @staticmethod
-    # def get_grouped_evaluators(instantiated_evaluators, process_step):
-    #     """
-    #     For a given list of evaluators and a given process step
-    #     Computes
-    #         * the evaluators eligible for this step
-    #         * the list of sorted priorities for theses evaluators
-    #     :param instantiated_evaluators:
-    #     :param process_step:
-    #     :return:
-    #     """
-    #     grouped = {}
-    #     for evaluator in [e for e in instantiated_evaluators if e.enabled and process_step in e.steps]:
-    #         grouped.setdefault(evaluator.priority, []).append(evaluator)
-    #
-    #     sorted_groups = sorted(grouped.keys(), reverse=True)
-    #     return grouped, sorted_groups
+    def reset_registered_parsers(self):
+        """
+        Browse all parsers and only keep those which are enabled
+        :return:
+        """
+        self.instantiated_parsers = [parser(sheerka=self.sheerka) for parser in self.sheerka.parsers.values()]
+        self.instantiated_parsers = [p for p in self.instantiated_parsers if p.enabled]
+        self.parsers_by_name = {p.short_name: p for p in self.instantiated_parsers}
+
+        self.grouped_parsers_cache["__default"] = self.get_grouped(self.instantiated_parsers, use_classes=True)

    @staticmethod
-    def get_grouped_evaluators(evaluators):
+    def get_grouped(evaluators, use_classes=False):
        """
        For a given list of evaluators,
        group them by priorities
        sort the priorities
        :param evaluators:
+        :param use_classes: if True, store the class (the type) of the evaluator, not its instance
        :return: tuple({priority: List of evaluators with this priority}, list of sorted priorities)
        """
        grouped = {}
        for evaluator in evaluators:
-            grouped.setdefault(evaluator.priority, []).append(evaluator)
+            grouped.setdefault(evaluator.priority, []).append(type(evaluator) if use_classes else evaluator)

        sorted_groups = sorted(grouped.keys(), reverse=True)
        return grouped, sorted_groups

    def preprocess(self, items, preprocess_definitions):
+        """
+        Modifies the attributes of item
+        :param items: either a parser or an evaluator
+        :param preprocess_definitions: how to modify List of BuiltinConcepts.EVALUATOR_PRE_PROCESS
+            preprocess.get_value("preprocess_name") gives the name of the property to alter
+            preprocess.values() gives the alterations
+        ex:
+            Sheerka.new(BuiltinConcepts.EVALUATOR_PRE_PROCESS, preprocess_name="parser_name", enabled=False)
+            Will disable parser 'parser_name'
+        :return:
+        """
        for preprocess in preprocess_definitions:
            for item in items:
                if self.matches(item.name, preprocess.get_value("preprocess_name")):
@@ -234,51 +257,9 @@ class SheerkaExecute(BaseService):
                            self.old_values.append((item, var_name, getattr(item, var_name)))
                            setattr(item, var_name, value)

-    def preprocess_old(self, context, parsers_or_evaluators, mode):
-        if mode == "parsers":
-            if not context.preprocess and not context.preprocess_parsers:
-                return parsers_or_evaluators
-            items = context.preprocess_parsers
-        elif mode == "evaluators":
-            if not context.preprocess and not context.preprocess_evaluators:
-                return parsers_or_evaluators
-            items = context.preprocess_evaluators
-        else:
-            raise ValueError(mode)
-
-        if not hasattr(parsers_or_evaluators, "__iter__"):
-            single_one = True
-            parsers_or_evaluators = [parsers_or_evaluators]
-        else:
-            single_one = False
-
-        if items:
-            res = []
-            for item in items:
-                for e in parsers_or_evaluators:
-                    if item == e.name:
-                        res.append(e)
-                        break
-                else:
-                    raise ValueError(f"{item} not found.")
-            parsers_or_evaluators = res
-
-        if context.preprocess:
-            for preprocess in context.preprocess:
-                for e in parsers_or_evaluators:
-                    if self.matches(e.name, preprocess.get_value("name")):
-                        for var_name in preprocess.values:
-                            if var_name == "name":
-                                continue
-                            if hasattr(e, var_name):
-                                self.old_values.append((e, var_name, getattr(e, var_name)))
-                                setattr(e, var_name, preprocess.get_value(var_name))
-
-        return parsers_or_evaluators[0] if single_one else parsers_or_evaluators
-
    def get_evaluators(self, context, process_step):
        """
-        Returns the list of evaluators to use for a specific test
+        Returns the list of evaluators to use for a specific context need
        :param context:
        :param process_step:
        :return:
@@ -287,30 +268,64 @@ class SheerkaExecute(BaseService):
        if not context.preprocess_evaluators and not context.preprocess:
            return self.grouped_evaluators_cache[f"{process_step}|__default"]

-        # First case, only use a subset of evaluators
-        if context.preprocess_evaluators and not context.preprocess:
-            key = str(process_step) + "|" + "|".join(context.preprocess_evaluators)
+        # Other case, only use a subset of evaluators
+        selected = context.preprocess_evaluators
+        if selected and not context.preprocess:
+            key = str(process_step) + "|" + "|".join(selected)
            try:
                return self.grouped_evaluators_cache[key]
            except KeyError:
-                evaluators = [self.evaluators_by_name[e] for e in context.preprocess_evaluators]
-                grouped = self.get_grouped_evaluators(evaluators)
+                evaluators = [self.evaluators_by_name[e] for e in selected if e in self.evaluators_by_name]
+                evaluators = [e for e in evaluators if process_step in e.steps]  # check the process step
+                grouped = self.get_grouped(evaluators)
                self.grouped_evaluators_cache[key] = grouped
                return grouped

-        # final case, evaluators attributes are modified by the context
-        # So first, get the modified evaluators
-        evaluators = [self.evaluators_by_name[e] for e in
-                      context.preprocess_evaluators] if context.preprocess_evaluators else self.instantiated_evaluators
+        # Final case, evaluators attributes are modified by the context
+        evaluators = [self.evaluators_by_name[e] for e in selected if
+                      e in self.evaluators_by_name] if selected else self.instantiated_evaluators
+        evaluators = [e for e in evaluators if process_step in e.steps]  # check the process step
        self.preprocess(evaluators, context.preprocess)
        evaluators = [e for e in evaluators if e.enabled]  # make sure they are still enabled
-        key = str(process_step) + "|" + "|".join([e.name for e in evaluators if e.enabled])
-        try:
-            return self.grouped_evaluators_cache[key]
-        except KeyError:
-            grouped = self.get_grouped_evaluators(evaluators)
-            self.grouped_evaluators_cache[key] = grouped
-            return grouped
+        return self.get_grouped(evaluators)
+
+    def get_parsers(self, context):
+        """
+        We cannot use a single instance of a parser shared among executions as it's common to have a parser
+        calling another parser or even calling itself
+        So the cache holds the parser classes or sorted priorities
+        :param context:
+        :return:
+        """
+
+        def get_instances(from_cache):
+            grouped_instances = {priority: [p(sheerka=self.sheerka) for p in parsers_classes]
+                                 for priority, parsers_classes in from_cache[0].items()}
+            return grouped_instances, from_cache[1]
+
+        # Normal case, use all registered parsers
+        if not context.preprocess_parsers and not context.preprocess:
+            return get_instances(self.grouped_parsers_cache["__default"])
+
+        # Other case, only use a subset of parsers
+        # This case is heavily used by lexer node parsers, thru parse_unrecognized
+        if context.preprocess_parsers and not context.preprocess:
+            key = "|".join(context.preprocess_parsers)
+            try:
+                return get_instances(self.grouped_parsers_cache[key])
+            except KeyError:
+                parsers = [self.parsers_by_name[p] for p in context.preprocess_parsers if p in self.parsers_by_name]
+                self.grouped_parsers_cache[key] = self.get_grouped(parsers, use_classes=True)
+                return get_instances(self.grouped_parsers_cache[key])
+
+        # final case, parsers attributes are modified by the context
+        # This a the case when we want to disable a specific parser, or change the order of priority
+        parsers = [self.parsers_by_name[p] for p in context.preprocess_parsers if p in self.parsers_by_name] \
+            if context.preprocess_parsers else self.instantiated_parsers
+        self.preprocess(parsers, context.preprocess)
+        parsers = [p for p in parsers if p.enabled]  # only keep those that are still enabled
+        groups, sorted_priorities = self.get_grouped(parsers, use_classes=True)
+        return get_instances((groups, sorted_priorities))

    def get_parser_input(self, text, tokens=None):
        """
@@ -367,14 +382,7 @@ class SheerkaExecute(BaseService):
        # keep track of the originals user inputs, as they need to be removed at the end
        user_inputs = to_process[:]

-        # group the parsers by priorities
-        instantiated_parsers = [parser(sheerka=self.sheerka) for parser in self.sheerka.parsers.values()]
-        instantiated_parsers = self.preprocess_old(context, instantiated_parsers, "parsers")
-
-        grouped_parsers = {}
-        for parser in [p for p in instantiated_parsers if p.enabled]:
-            grouped_parsers.setdefault(parser.priority, []).append(parser)
-        sorted_priorities = sorted(grouped_parsers.keys(), reverse=True)
+        grouped_parsers, sorted_priorities = self.get_parsers(context)

        stop_processing = False
        for priority in sorted_priorities:
@@ -427,6 +435,7 @@ class SheerkaExecute(BaseService):
                break  # Do not try the other priorities if a match is found

        result = core.utils.remove_list_from_list(result, user_inputs)
+
        return result

    def call_evaluators(self, context, return_values, process_step):
@@ -470,7 +479,7 @@ class SheerkaExecute(BaseService):

                                        # init the evaluator is possible
                                        # KSI. 20201102 : Evaluators are now instantiated at startup,
-                                        # Can we move this section into reset_evaluators()
+                                        # Can we move this section into reset_registered_evaluators()
                                        if hasattr(evaluator, "init_evaluator") and not evaluator.is_initialized:
                                            evaluator.init_evaluator(sub_context, original_items)