Fixed initialisation issue for concepts with BNF definition

2019-12-20 12:25:15 +01:00
parent 5c95d918ad
commit 69f8c2835f
7 changed files with 112 additions and 41 deletions
@@ -15,6 +15,7 @@ concept_evaluation_steps = [BuiltinConcepts.EVALUATION, BuiltinConcepts.AFTER_EV
 CONCEPT_LEXER_PARSER_CLASS = "parsers.ConceptLexerParser.ConceptLexerParser"
 DEBUG_TAB_SIZE = 4

+
 class Sheerka(Concept):
    """
    Main controller for the project
@@ -38,10 +39,9 @@ class Sheerka(Concept):
        # key is the key of the concept (not the name or the id)
        self.concepts_cache = {}

-        #
-        # Cache for all concepts BNF
-        #
-        self.concepts_definitions = {}
+        # cache for concept definitions,
+        # Primarily used for unit test that does not have access to sdp
+        self.concepts_definition_cache = {}

        #
        # cache for concepts grammars
@@ -199,6 +199,7 @@ class Sheerka(Concept):
            return_values = [return_values]

        for return_value in return_values:
+            # make sure we only parse user input
            if not return_value.status or not self.isinstance(return_value.body, BuiltinConcepts.USER_INPUT):
                continue

@@ -207,7 +208,8 @@ class Sheerka(Concept):
            if self.log.isEnabledFor(logging.DEBUG):
                debug_text = "'" + to_parse + "'" if isinstance(to_parse, str) \
                    else "'" + BaseParser.get_text_from_tokens(to_parse) + "' as tokens"
-                # self.log.debug(f"Parsing {debug_text}")
+                execution_context.log(logger or self.log, f"Parsing {debug_text}")
+
            for parser in self.parsers.values():
                p = parser(sheerka=self)
                if logger:
@@ -224,17 +226,16 @@ class Sheerka(Concept):

        return result

-    def _call_evaluators(self, execution_context, return_values, process_step, evaluation_context=None):
-        """
-
-        """
+    def _call_evaluators(self, execution_context, return_values, process_step, evaluation_context=None, logger=None):

        # return_values must be a list
        if not isinstance(return_values, list):
            return_values = [return_values]

-        # evaluation context are contexts that may modify the behaviour of the execution
-        # They first need to be transformed into return values
+        # Evaluation context are contexts that may modify the behaviour of the execution
+        # For example, a concept to indicate that the value is not wanted
+        # Or a concept to indicate that we want the letter form of the response
+        # But first, they need to be transformed into return values
        if evaluation_context is None:
            evaluation_return_values = []
        else:
@@ -250,7 +251,11 @@ class Sheerka(Concept):
        # The first one to be applied will be the one with the highest priority
        grouped_evaluators = {}
        for evaluator in [e() for e in self.evaluators if e.enabled]:
+            if logger:
+                evaluator.log = logger
            grouped_evaluators.setdefault(evaluator.priority, []).append(evaluator)
+
+        # order the groups by priority, the higher first
        sorted_priorities = sorted(grouped_evaluators.keys(), reverse=True)

        # process
@@ -283,6 +288,7 @@ class Sheerka(Concept):
                                                     evaluator=evaluator)
                                    evaluated_items.append(self.ret("sheerka.process", False, error, parents=[item]))
                                    to_delete.append(item)
+
                    # process evaluators that work on all return values
                    else:
                        if evaluator.matches(execution_context, original_items):
@@ -326,7 +332,7 @@ class Sheerka(Concept):
            if step == BuiltinConcepts.PARSING:
                return_values = self._call_parsers(sub_context, return_values, logger)
            else:
-                return_values = self._call_evaluators(sub_context, return_values, step)
+                return_values = self._call_evaluators(sub_context, return_values, step, None, logger)

            sub_context.log_result(logger or self.log, return_values)

@@ -357,6 +363,7 @@ class Sheerka(Concept):
        init_ret_value = None

        # checks for duplicate concepts
+        # TODO checks if it exists in cache first
        if self.sdp.exists(self.CONCEPTS_ENTRY, concept.key, concept.get_digest()):
            error = SheerkaDataProviderDuplicateKeyError(self.CONCEPTS_ENTRY + "." + concept.key, concept)
            return self.ret(self.create_new_concept.__name__, False, ErrorConcept(error), error.args[0])
@@ -366,12 +373,12 @@ class Sheerka(Concept):

        # add the BNF if known
        if concept.bnf:
-            concepts_definitions = self.concepts_definitions.copy()
+            concepts_definitions = self.get_concept_definition()
            concepts_definitions[concept] = concept.bnf

            # check if it's a valid BNF or whether it breaks the known rules
-            concept_lexer_parser = self.parsers[CONCEPT_LEXER_PARSER_CLASS](grammars=self.concepts_grammars.copy())
-            sub_context = context.push(self.name, desc="Initializing concept definition")
+            concept_lexer_parser = self.parsers[CONCEPT_LEXER_PARSER_CLASS]()
+            sub_context = context.push(self.name, desc=f"Initializing concept definition for {concept}")
            sub_context.concepts[concept.key] = concept  # the concept is not in the real cache yet
            init_ret_value = concept_lexer_parser.initialize(sub_context, concepts_definitions)
            if not init_ret_value.status:
@@ -387,8 +394,6 @@ class Sheerka(Concept):

        # Updates the caches
        self.concepts_cache[concept.key] = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.key)
-        if concepts_definitions is not None:
-            self.concepts_definitions = concepts_definitions
        if init_ret_value is not None and init_ret_value.status:
            self.concepts_grammars = init_ret_value.body

@@ -396,12 +401,13 @@ class Sheerka(Concept):
        ret = self.ret(self.create_new_concept.__name__, True, self.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
        return ret

-    def initialize_concept_asts(self, context, concept: Concept):
+    def initialize_concept_asts(self, context, concept: Concept, logger=None):
        """
        Updates the codes of the newly created concept
        Basically, it runs the parsers on all parts
        :param concept:
        :param context:
+        :param logger:
        :return:
        """
        # steps = [BuiltinConcepts.BEFORE_PARSING, BuiltinConcepts.PARSING, BuiltinConcepts.AFTER_PARSING]
@@ -414,7 +420,7 @@ class Sheerka(Concept):
                continue
            else:
                to_parse = self.ret(context.who, True, self.new(BuiltinConcepts.USER_INPUT, body=source))
-                concept.cached_asts[part_key] = self.execute(context, to_parse, steps)
+                concept.cached_asts[part_key] = self.execute(context, to_parse, steps, logger)

        for prop in concept.props:
            to_parse = self.ret(context.who, True, self.new(BuiltinConcepts.USER_INPUT, body=concept.props[prop].value))
@@ -429,18 +435,18 @@ class Sheerka(Concept):
            else:
                self.concepts_cache[concept.key].cached_asts = concept.cached_asts

-    def eval_concept(self, context, concept: Concept, properties_to_eval=None):
+    def eval_concept(self, context, concept: Concept, properties_to_eval=None, logger=None):
        """
        Evaluation a concept
        It means that if the where clause is True, will evaluate the body
-        Also chc
        :param context:
        :param concept:
        :param properties_to_eval:
+        :param logger:
        :return:
        """
        if len(concept.cached_asts) == 0:
-            self.initialize_concept_asts(context, concept)
+            self.initialize_concept_asts(context, concept, logger)

        if properties_to_eval is None:
            properties_to_eval = ["where", "pre", "post", "body", "props"]
@@ -452,7 +458,7 @@ class Sheerka(Concept):
                part_key = ConceptParts(prop)
                if concept.cached_asts[part_key] is None:
                    continue
-                res = self.execute(context, concept.cached_asts[part_key], concept_evaluation_steps)
+                res = self.execute(context, concept.cached_asts[part_key], concept_evaluation_steps, logger)
                res = core.builtin_helpers.expect_one(context, res)
                setattr(concept.metadata, prop, res.value)

@@ -661,6 +667,13 @@ class Sheerka(Concept):

        return self.parsers_prefix + name

+    def get_concept_definition(self):
+        if self.concepts_definition_cache:
+            return self.concepts_definition_cache
+
+        self.concepts_definition_cache = self.sdp.get_safe(self.CONCEPTS_DEFINITIONS_ENTRY, load_origin=False) or {}
+        return self.concepts_definition_cache
+
    def concepts(self):
        res = []
        lst = self.sdp.list(self.CONCEPTS_ENTRY)
@@ -687,6 +700,9 @@ class Sheerka(Concept):
            else:
                self.log.info(item)

+    def dump_definitions(self):
+        defs = self.sdp.get(self.CONCEPTS_DEFINITIONS_ENTRY)
+        self.log.info(defs)

    @staticmethod
    def get_builtins_classes_as_dict():
@@ -29,12 +29,13 @@ class ConceptEvaluator(OneReturnValueEvaluator):
    def eval(self, context, return_value):
        sheerka = context.sheerka
        concept = return_value.value.value
+        context.log(self.verbose_log, f"Evaluating concept {concept}.", self.name)

        # pre condition should already be validated by the parser.
        # It's a mandatory condition for the concept before it can be recognized

        if len(concept.cached_asts) == 0:
-            sheerka.initialize_concept_asts(context, concept)
+            sheerka.initialize_concept_asts(context, concept, self.verbose_log)

        # TODO; check pre
        # if pre is not true, return Concept with a false value
@@ -27,18 +27,19 @@ class PythonEvaluator(OneReturnValueEvaluator):
        sheerka = context.sheerka
        node = return_value.value.value
        try:
-            context.log(self.verbose_log, f"Evaluating python node {node}", self.name)
+            context.log(self.verbose_log, f"Evaluating python node {node}.", self.name)
            my_locals = self.get_locals(context, node.ast_)
            context.log(self.verbose_log, f"locals={my_locals}", self.name)

            if isinstance(node.ast_, ast.Expression):
-                context.log(self.verbose_log, "Evaluating using 'eval'", self.name)
+                context.log(self.verbose_log, "Evaluating using 'eval'.", self.name)
                compiled = compile(node.ast_, "<string>", "eval")
                evaluated = eval(compiled, {}, my_locals)
            else:
-                context.log(self.verbose_log, "Evaluating using 'exec'", self.name)
+                context.log(self.verbose_log, "Evaluating using 'exec'.", self.name)
                evaluated = self.exec_with_return(node.ast_, my_locals)

+            context.log(self.verbose_log, f"{evaluated=}", self.name)
            return sheerka.ret(self.name, True, evaluated, parents=[return_value])
        except Exception as error:
            context.log_error(self.verbose_log, error, self.name)
@@ -48,6 +49,8 @@ class PythonEvaluator(OneReturnValueEvaluator):
    def get_locals(self, context, ast_):
        my_locals = {"sheerka": context.sheerka}
        if context.obj:
+            context.log(self.verbose_log,
+                        f"Concept '{context.obj}' is in context. Adding its properties to locals if any.", self.name)
            for prop_name, prop_value in context.obj.props.items():
                my_locals[prop_name] = prop_value.value

@@ -56,12 +59,16 @@ class PythonEvaluator(OneReturnValueEvaluator):
        unreferenced_names_visitor.visit(node_concept)

        for name in unreferenced_names_visitor.names:
+            context.log(self.verbose_log, f"Resolving '{name}'.", self.name)
            concept = context.sheerka.new(name)
            if context.sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
+                context.log(self.verbose_log, f"'{name}' is not a concept. Skipping.", self.name)
                continue

-            sub_context = context.push(self.name, desc="Evaluating body", obj=concept)
-            context.sheerka.eval_concept(sub_context, concept, ["body"])
+            context.log(self.verbose_log, f"'{name}' is a concept. Evaluating body.", self.name)
+            sub_context = context.push(self.name, desc=f"Evaluating {concept}'s body", obj=concept)
+            sub_context.log_new(self.verbose_log)
+            context.sheerka.eval_concept(sub_context, concept, ["body"], self.verbose_log)

            if not context.sheerka.isa(concept.body, BuiltinConcepts.ERROR):
                my_locals[name] = concept.body
@@ -53,7 +53,11 @@ class TooManySuccessEvaluator(AllReturnValuesEvaluator):
                context.log(self.verbose_log, f"value={sheerka.value(s.value)}", self.name)

        if not core.builtin_helpers.is_same_success(sheerka, self.success):
+            context.log(self.verbose_log,
+                        f"Values are different. Raising {BuiltinConcepts.TOO_MANY_SUCCESS}.", self.name)
            too_many_success = sheerka.new(BuiltinConcepts.TOO_MANY_SUCCESS, body=self.success)
            return sheerka.ret(self.name, False, too_many_success, parents=return_values)

+        context.log(self.verbose_log,
+                    f"Values are the same. Nothing to do.", self.name)
        return None
@@ -463,16 +463,31 @@ class ConceptMatch(Match):

        return self.concept == other.concept

+    @staticmethod
+    def get_parsing_expression_from_name(name):
+        tokens = Tokenizer(name)
+        nodes = [StrMatch(core.utils.strip_quotes(token.value)) for token in list(tokens)[:-1]]
+        if len(nodes) == 1:
+            return nodes[0]
+        else:
+            sequence = Sequence(nodes)
+            sequence.nodes = nodes
+            return sequence
+
    def _parse(self, parser):
        to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
        if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
            return None

-        if to_match not in parser.concepts_grammars:
-            return None
-
        self.concept = to_match  # Memoize
+
+        if to_match not in parser.concepts_grammars:
+            # Try to match the concept using its name
+            expr = self.get_parsing_expression_from_name(to_match.name)
+            node = expr.parse(parser)
+        else:
            node = parser.concepts_grammars[to_match].parse(parser)
+
        if node is None:
            return None

@@ -616,7 +631,7 @@ class ConceptLexerParser(BaseParser):
                isinstance(expression, OneOrMore) or \
                isinstance(expression, Optional):
                ret = expression
-                ret.nodes.extend([inner_get_model(e) for e in ret.elements])
+                ret.nodes = [inner_get_model(e) for e in ret.elements]
            else:
                ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False)

@@ -485,6 +485,27 @@ def test_i_can_detect_duplicates_when_reference():
    assert res[1].value.body == [(foo, 0, 0, "twenty")]


+def test_i_can_parse_concept_reference_that_is_not_in_grammar():
+    context = get_context()
+    one = Concept(name="one")
+    two = Concept(name="two")
+    foo = Concept(name="foo")
+    context.sheerka.add_in_cache(one)
+    context.sheerka.add_in_cache(two)
+
+    concepts = {foo: Sequence("twenty", OrderedChoice(one, two))}
+    parser = ConceptLexerParser()
+    parser.initialize(context, concepts)
+
+    res = parser.parse(context, "twenty two")
+    assert res.status
+    assert res.value.body == [(foo, 0, 2, "twenty two")]
+
+    res = parser.parse(context, "twenty one")
+    assert res.status
+    assert res.value.body == [(foo, 0, 2, "twenty one")]
+
+
 def test_i_can_parse_zero_or_more():
    context = get_context()
    foo = Concept(name="foo")
@@ -741,6 +762,7 @@ def test_infinite_recursion_does_not_fail_if_a_concept_is_missing():

    assert foo in parser.concepts_grammars

+
 def test_i_can_detect_indirect_infinite_recursion_with_optional():
    # TODO infinite recursion with optional
    pass
@@ -319,6 +319,7 @@ def test_list_of_concept_is_sorted_by_id():

    assert concepts[0].id < concepts[-1].id

+
 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 #
 #               E V A L U A T I O N S
@@ -597,9 +598,7 @@ def test_i_can_create_concept_with_bnf_definition():
    sheerka = get_sheerka(False, False)
    a = Concept("a")
    sheerka.add_in_cache(a)
-    sheerka.concepts_grammars = ConceptLexerParser().initialize(
-        get_context(sheerka),
-        {a: OrderedChoice("one", "two")}).body
+    sheerka.concepts_definition_cache = {a: OrderedChoice("one", "two")}

    res = sheerka.evaluate_user_input("def concept plus from bnf a ('plus' plus)?")
    assert len(res) == 1
@@ -637,7 +636,6 @@ def test_i_can_eval_bnf_definitions():
    assert sheerka.isinstance(res[0].value, concept_a)


-
 def test_i_can_eval_bnf_definitions_with_variables():
    sheerka = get_sheerka()
    concept_a = sheerka.evaluate_user_input("def concept a from bnf 'one' | 'two'")[0].body.body
@@ -659,18 +657,26 @@ def test_i_can_eval_bnf_definitions_from_separate_instances():
    but make sure that the BNF are correctly persisted and loaded
    """
    sheerka = get_sheerka(False)
-    concept_a = sheerka.evaluate_user_input("def concept a from bnf 'one' | 'two'")[0].body.body
+    concept_a = sheerka.evaluate_user_input("def concept a from bnf 'one' 'two'")[0].body.body

-    res = get_sheerka(False).evaluate_user_input("one")
+    res = get_sheerka(False).evaluate_user_input("one two")
    assert len(res) == 1
    assert res[0].status
    assert sheerka.isinstance(res[0].value, concept_a)

-    res = get_sheerka(False).evaluate_user_input("two")
+    # add another bnf definition
+    concept_b = sheerka.evaluate_user_input("def concept b from bnf a 'three'")[0].body.body
+
+    res = get_sheerka(False).evaluate_user_input("one two")  # previous one still works
    assert len(res) == 1
    assert res[0].status
    assert sheerka.isinstance(res[0].value, concept_a)

+    res = get_sheerka(False).evaluate_user_input("one two three")  # new one works
+    assert len(res) == 1
+    assert res[0].status
+    assert sheerka.isinstance(res[0].value, concept_b)
+

 def get_sheerka(use_dict=True, skip_builtins_in_db=True):
    root = "mem://" if use_dict else root_folder