From 69f8c2835fb1e0369a539b31daadb1b3371df643 Mon Sep 17 00:00:00 2001
From: Kodjo Sossouvi <kodjo.sossouvi@gmail.com>
Date: Fri, 20 Dec 2019 12:25:15 +0100
Subject: [PATCH] Fixed initialisation issue for concepts with BNF definition

---
 core/sheerka.py                       | 62 +++++++++++++++++----------
 evaluators/ConceptEvaluator.py        |  3 +-
 evaluators/PythonEvaluator.py         | 17 +++++---
 evaluators/TooManySuccessEvaluator.py |  4 ++
 parsers/ConceptLexerParser.py         | 25 ++++++++---
 tests/test_ConceptLexerParser.py      | 22 ++++++++++
 tests/test_sheerka.py                 | 20 ++++++---
 7 files changed, 112 insertions(+), 41 deletions(-)

diff --git a/core/sheerka.py b/core/sheerka.py
index e07a5ef..54bb29e 100644
--- a/core/sheerka.py
+++ b/core/sheerka.py
@@ -15,6 +15,7 @@ concept_evaluation_steps = [BuiltinConcepts.EVALUATION, BuiltinConcepts.AFTER_EV
 CONCEPT_LEXER_PARSER_CLASS = "parsers.ConceptLexerParser.ConceptLexerParser"
 DEBUG_TAB_SIZE = 4
 
+
 class Sheerka(Concept):
     """
     Main controller for the project
@@ -38,10 +39,9 @@ class Sheerka(Concept):
         # key is the key of the concept (not the name or the id)
         self.concepts_cache = {}
 
-        #
-        # Cache for all concepts BNF
-        #
-        self.concepts_definitions = {}
+        # cache for concept definitions,
+        # Primarily used for unit test that does not have access to sdp
+        self.concepts_definition_cache = {}
 
         #
         # cache for concepts grammars
@@ -199,6 +199,7 @@ class Sheerka(Concept):
             return_values = [return_values]
 
         for return_value in return_values:
+            # make sure we only parse user input
             if not return_value.status or not self.isinstance(return_value.body, BuiltinConcepts.USER_INPUT):
                 continue
 
@@ -207,7 +208,8 @@ class Sheerka(Concept):
             if self.log.isEnabledFor(logging.DEBUG):
                 debug_text = "'" + to_parse + "'" if isinstance(to_parse, str) \
                     else "'" + BaseParser.get_text_from_tokens(to_parse) + "' as tokens"
-                # self.log.debug(f"Parsing {debug_text}")
+                execution_context.log(logger or self.log, f"Parsing {debug_text}")
+
             for parser in self.parsers.values():
                 p = parser(sheerka=self)
                 if logger:
@@ -224,17 +226,16 @@ class Sheerka(Concept):
 
         return result
 
-    def _call_evaluators(self, execution_context, return_values, process_step, evaluation_context=None):
-        """
-
-        """
+    def _call_evaluators(self, execution_context, return_values, process_step, evaluation_context=None, logger=None):
 
         # return_values must be a list
         if not isinstance(return_values, list):
             return_values = [return_values]
 
-        # evaluation context are contexts that may modify the behaviour of the execution
-        # They first need to be transformed into return values
+        # Evaluation context are contexts that may modify the behaviour of the execution
+        # For example, a concept to indicate that the value is not wanted
+        # Or a concept to indicate that we want the letter form of the response
+        # But first, they need to be transformed into return values
         if evaluation_context is None:
             evaluation_return_values = []
         else:
@@ -250,7 +251,11 @@ class Sheerka(Concept):
         # The first one to be applied will be the one with the highest priority
         grouped_evaluators = {}
         for evaluator in [e() for e in self.evaluators if e.enabled]:
+            if logger:
+                evaluator.log = logger
             grouped_evaluators.setdefault(evaluator.priority, []).append(evaluator)
+
+        # order the groups by priority, the higher first
         sorted_priorities = sorted(grouped_evaluators.keys(), reverse=True)
 
         # process
@@ -283,6 +288,7 @@ class Sheerka(Concept):
                                                      evaluator=evaluator)
                                     evaluated_items.append(self.ret("sheerka.process", False, error, parents=[item]))
                                     to_delete.append(item)
+
                     # process evaluators that work on all return values
                     else:
                         if evaluator.matches(execution_context, original_items):
@@ -326,7 +332,7 @@ class Sheerka(Concept):
             if step == BuiltinConcepts.PARSING:
                 return_values = self._call_parsers(sub_context, return_values, logger)
             else:
-                return_values = self._call_evaluators(sub_context, return_values, step)
+                return_values = self._call_evaluators(sub_context, return_values, step, None, logger)
 
             sub_context.log_result(logger or self.log, return_values)
 
@@ -357,6 +363,7 @@ class Sheerka(Concept):
         init_ret_value = None
 
         # checks for duplicate concepts
+        # TODO checks if it exists in cache first
         if self.sdp.exists(self.CONCEPTS_ENTRY, concept.key, concept.get_digest()):
             error = SheerkaDataProviderDuplicateKeyError(self.CONCEPTS_ENTRY + "." + concept.key, concept)
             return self.ret(self.create_new_concept.__name__, False, ErrorConcept(error), error.args[0])
@@ -366,12 +373,12 @@ class Sheerka(Concept):
 
         # add the BNF if known
         if concept.bnf:
-            concepts_definitions = self.concepts_definitions.copy()
+            concepts_definitions = self.get_concept_definition()
             concepts_definitions[concept] = concept.bnf
 
             # check if it's a valid BNF or whether it breaks the known rules
-            concept_lexer_parser = self.parsers[CONCEPT_LEXER_PARSER_CLASS](grammars=self.concepts_grammars.copy())
-            sub_context = context.push(self.name, desc="Initializing concept definition")
+            concept_lexer_parser = self.parsers[CONCEPT_LEXER_PARSER_CLASS]()
+            sub_context = context.push(self.name, desc=f"Initializing concept definition for {concept}")
             sub_context.concepts[concept.key] = concept  # the concept is not in the real cache yet
             init_ret_value = concept_lexer_parser.initialize(sub_context, concepts_definitions)
             if not init_ret_value.status:
@@ -387,8 +394,6 @@ class Sheerka(Concept):
 
         # Updates the caches
         self.concepts_cache[concept.key] = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.key)
-        if concepts_definitions is not None:
-            self.concepts_definitions = concepts_definitions
         if init_ret_value is not None and init_ret_value.status:
             self.concepts_grammars = init_ret_value.body
 
@@ -396,12 +401,13 @@ class Sheerka(Concept):
         ret = self.ret(self.create_new_concept.__name__, True, self.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
         return ret
 
-    def initialize_concept_asts(self, context, concept: Concept):
+    def initialize_concept_asts(self, context, concept: Concept, logger=None):
         """
         Updates the codes of the newly created concept
         Basically, it runs the parsers on all parts
         :param concept:
         :param context:
+        :param logger:
         :return:
         """
         # steps = [BuiltinConcepts.BEFORE_PARSING, BuiltinConcepts.PARSING, BuiltinConcepts.AFTER_PARSING]
@@ -414,7 +420,7 @@ class Sheerka(Concept):
                 continue
             else:
                 to_parse = self.ret(context.who, True, self.new(BuiltinConcepts.USER_INPUT, body=source))
-                concept.cached_asts[part_key] = self.execute(context, to_parse, steps)
+                concept.cached_asts[part_key] = self.execute(context, to_parse, steps, logger)
 
         for prop in concept.props:
             to_parse = self.ret(context.who, True, self.new(BuiltinConcepts.USER_INPUT, body=concept.props[prop].value))
@@ -429,18 +435,18 @@ class Sheerka(Concept):
             else:
                 self.concepts_cache[concept.key].cached_asts = concept.cached_asts
 
-    def eval_concept(self, context, concept: Concept, properties_to_eval=None):
+    def eval_concept(self, context, concept: Concept, properties_to_eval=None, logger=None):
         """
         Evaluation a concept
         It means that if the where clause is True, will evaluate the body
-        Also chc
         :param context:
         :param concept:
         :param properties_to_eval:
+        :param logger:
         :return:
         """
         if len(concept.cached_asts) == 0:
-            self.initialize_concept_asts(context, concept)
+            self.initialize_concept_asts(context, concept, logger)
 
         if properties_to_eval is None:
             properties_to_eval = ["where", "pre", "post", "body", "props"]
@@ -452,7 +458,7 @@ class Sheerka(Concept):
                 part_key = ConceptParts(prop)
                 if concept.cached_asts[part_key] is None:
                     continue
-                res = self.execute(context, concept.cached_asts[part_key], concept_evaluation_steps)
+                res = self.execute(context, concept.cached_asts[part_key], concept_evaluation_steps, logger)
                 res = core.builtin_helpers.expect_one(context, res)
                 setattr(concept.metadata, prop, res.value)
 
@@ -661,6 +667,13 @@ class Sheerka(Concept):
 
         return self.parsers_prefix + name
 
+    def get_concept_definition(self):
+        if self.concepts_definition_cache:
+            return self.concepts_definition_cache
+
+        self.concepts_definition_cache = self.sdp.get_safe(self.CONCEPTS_DEFINITIONS_ENTRY, load_origin=False) or {}
+        return self.concepts_definition_cache
+
     def concepts(self):
         res = []
         lst = self.sdp.list(self.CONCEPTS_ENTRY)
@@ -687,6 +700,9 @@ class Sheerka(Concept):
             else:
                 self.log.info(item)
 
+    def dump_definitions(self):
+        defs = self.sdp.get(self.CONCEPTS_DEFINITIONS_ENTRY)
+        self.log.info(defs)
 
     @staticmethod
     def get_builtins_classes_as_dict():
diff --git a/evaluators/ConceptEvaluator.py b/evaluators/ConceptEvaluator.py
index 752edba..b3f8e2d 100644
--- a/evaluators/ConceptEvaluator.py
+++ b/evaluators/ConceptEvaluator.py
@@ -29,12 +29,13 @@ class ConceptEvaluator(OneReturnValueEvaluator):
     def eval(self, context, return_value):
         sheerka = context.sheerka
         concept = return_value.value.value
+        context.log(self.verbose_log, f"Evaluating concept {concept}.", self.name)
 
         # pre condition should already be validated by the parser.
         # It's a mandatory condition for the concept before it can be recognized
 
         if len(concept.cached_asts) == 0:
-            sheerka.initialize_concept_asts(context, concept)
+            sheerka.initialize_concept_asts(context, concept, self.verbose_log)
 
         # TODO; check pre
         # if pre is not true, return Concept with a false value
diff --git a/evaluators/PythonEvaluator.py b/evaluators/PythonEvaluator.py
index 190d57b..1507a45 100644
--- a/evaluators/PythonEvaluator.py
+++ b/evaluators/PythonEvaluator.py
@@ -27,18 +27,19 @@ class PythonEvaluator(OneReturnValueEvaluator):
         sheerka = context.sheerka
         node = return_value.value.value
         try:
-            context.log(self.verbose_log, f"Evaluating python node {node}", self.name)
+            context.log(self.verbose_log, f"Evaluating python node {node}.", self.name)
             my_locals = self.get_locals(context, node.ast_)
             context.log(self.verbose_log, f"locals={my_locals}", self.name)
 
             if isinstance(node.ast_, ast.Expression):
-                context.log(self.verbose_log, "Evaluating using 'eval'", self.name)
+                context.log(self.verbose_log, "Evaluating using 'eval'.", self.name)
                 compiled = compile(node.ast_, "<string>", "eval")
                 evaluated = eval(compiled, {}, my_locals)
             else:
-                context.log(self.verbose_log, "Evaluating using 'exec'", self.name)
+                context.log(self.verbose_log, "Evaluating using 'exec'.", self.name)
                 evaluated = self.exec_with_return(node.ast_, my_locals)
 
+            context.log(self.verbose_log, f"{evaluated=}", self.name)
             return sheerka.ret(self.name, True, evaluated, parents=[return_value])
         except Exception as error:
             context.log_error(self.verbose_log, error, self.name)
@@ -48,6 +49,8 @@ class PythonEvaluator(OneReturnValueEvaluator):
     def get_locals(self, context, ast_):
         my_locals = {"sheerka": context.sheerka}
         if context.obj:
+            context.log(self.verbose_log,
+                        f"Concept '{context.obj}' is in context. Adding its properties to locals if any.", self.name)
             for prop_name, prop_value in context.obj.props.items():
                 my_locals[prop_name] = prop_value.value
 
@@ -56,12 +59,16 @@ class PythonEvaluator(OneReturnValueEvaluator):
         unreferenced_names_visitor.visit(node_concept)
 
         for name in unreferenced_names_visitor.names:
+            context.log(self.verbose_log, f"Resolving '{name}'.", self.name)
             concept = context.sheerka.new(name)
             if context.sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
+                context.log(self.verbose_log, f"'{name}' is not a concept. Skipping.", self.name)
                 continue
 
-            sub_context = context.push(self.name, desc="Evaluating body", obj=concept)
-            context.sheerka.eval_concept(sub_context, concept, ["body"])
+            context.log(self.verbose_log, f"'{name}' is a concept. Evaluating body.", self.name)
+            sub_context = context.push(self.name, desc=f"Evaluating {concept}'s body", obj=concept)
+            sub_context.log_new(self.verbose_log)
+            context.sheerka.eval_concept(sub_context, concept, ["body"], self.verbose_log)
 
             if not context.sheerka.isa(concept.body, BuiltinConcepts.ERROR):
                 my_locals[name] = concept.body
diff --git a/evaluators/TooManySuccessEvaluator.py b/evaluators/TooManySuccessEvaluator.py
index 4833cec..f3aa740 100644
--- a/evaluators/TooManySuccessEvaluator.py
+++ b/evaluators/TooManySuccessEvaluator.py
@@ -53,7 +53,11 @@ class TooManySuccessEvaluator(AllReturnValuesEvaluator):
                 context.log(self.verbose_log, f"value={sheerka.value(s.value)}", self.name)
 
         if not core.builtin_helpers.is_same_success(sheerka, self.success):
+            context.log(self.verbose_log,
+                        f"Values are different. Raising {BuiltinConcepts.TOO_MANY_SUCCESS}.", self.name)
             too_many_success = sheerka.new(BuiltinConcepts.TOO_MANY_SUCCESS, body=self.success)
             return sheerka.ret(self.name, False, too_many_success, parents=return_values)
 
+        context.log(self.verbose_log,
+                    f"Values are the same. Nothing to do.", self.name)
         return None
diff --git a/parsers/ConceptLexerParser.py b/parsers/ConceptLexerParser.py
index d55bb10..6e86c03 100644
--- a/parsers/ConceptLexerParser.py
+++ b/parsers/ConceptLexerParser.py
@@ -463,16 +463,31 @@ class ConceptMatch(Match):
 
         return self.concept == other.concept
 
+    @staticmethod
+    def get_parsing_expression_from_name(name):
+        tokens = Tokenizer(name)
+        nodes = [StrMatch(core.utils.strip_quotes(token.value)) for token in list(tokens)[:-1]]
+        if len(nodes) == 1:
+            return nodes[0]
+        else:
+            sequence = Sequence(nodes)
+            sequence.nodes = nodes
+            return sequence
+
     def _parse(self, parser):
         to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
         if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
             return None
 
-        if to_match not in parser.concepts_grammars:
-            return None
-
         self.concept = to_match  # Memoize
-        node = parser.concepts_grammars[to_match].parse(parser)
+
+        if to_match not in parser.concepts_grammars:
+            # Try to match the concept using its name
+            expr = self.get_parsing_expression_from_name(to_match.name)
+            node = expr.parse(parser)
+        else:
+            node = parser.concepts_grammars[to_match].parse(parser)
+
         if node is None:
             return None
 
@@ -616,7 +631,7 @@ class ConceptLexerParser(BaseParser):
                 isinstance(expression, OneOrMore) or \
                 isinstance(expression, Optional):
                 ret = expression
-                ret.nodes.extend([inner_get_model(e) for e in ret.elements])
+                ret.nodes = [inner_get_model(e) for e in ret.elements]
             else:
                 ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False)
 
diff --git a/tests/test_ConceptLexerParser.py b/tests/test_ConceptLexerParser.py
index 9e30417..993daab 100644
--- a/tests/test_ConceptLexerParser.py
+++ b/tests/test_ConceptLexerParser.py
@@ -485,6 +485,27 @@ def test_i_can_detect_duplicates_when_reference():
     assert res[1].value.body == [(foo, 0, 0, "twenty")]
 
 
+def test_i_can_parse_concept_reference_that_is_not_in_grammar():
+    context = get_context()
+    one = Concept(name="one")
+    two = Concept(name="two")
+    foo = Concept(name="foo")
+    context.sheerka.add_in_cache(one)
+    context.sheerka.add_in_cache(two)
+
+    concepts = {foo: Sequence("twenty", OrderedChoice(one, two))}
+    parser = ConceptLexerParser()
+    parser.initialize(context, concepts)
+
+    res = parser.parse(context, "twenty two")
+    assert res.status
+    assert res.value.body == [(foo, 0, 2, "twenty two")]
+
+    res = parser.parse(context, "twenty one")
+    assert res.status
+    assert res.value.body == [(foo, 0, 2, "twenty one")]
+
+
 def test_i_can_parse_zero_or_more():
     context = get_context()
     foo = Concept(name="foo")
@@ -741,6 +762,7 @@ def test_infinite_recursion_does_not_fail_if_a_concept_is_missing():
 
     assert foo in parser.concepts_grammars
 
+
 def test_i_can_detect_indirect_infinite_recursion_with_optional():
     # TODO infinite recursion with optional
     pass
diff --git a/tests/test_sheerka.py b/tests/test_sheerka.py
index ba4015a..13d527e 100644
--- a/tests/test_sheerka.py
+++ b/tests/test_sheerka.py
@@ -319,6 +319,7 @@ def test_list_of_concept_is_sorted_by_id():
 
     assert concepts[0].id < concepts[-1].id
 
+
 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 #
 #               E V A L U A T I O N S
@@ -597,9 +598,7 @@ def test_i_can_create_concept_with_bnf_definition():
     sheerka = get_sheerka(False, False)
     a = Concept("a")
     sheerka.add_in_cache(a)
-    sheerka.concepts_grammars = ConceptLexerParser().initialize(
-        get_context(sheerka),
-        {a: OrderedChoice("one", "two")}).body
+    sheerka.concepts_definition_cache = {a: OrderedChoice("one", "two")}
 
     res = sheerka.evaluate_user_input("def concept plus from bnf a ('plus' plus)?")
     assert len(res) == 1
@@ -637,7 +636,6 @@ def test_i_can_eval_bnf_definitions():
     assert sheerka.isinstance(res[0].value, concept_a)
 
 
-
 def test_i_can_eval_bnf_definitions_with_variables():
     sheerka = get_sheerka()
     concept_a = sheerka.evaluate_user_input("def concept a from bnf 'one' | 'two'")[0].body.body
@@ -659,18 +657,26 @@ def test_i_can_eval_bnf_definitions_from_separate_instances():
     but make sure that the BNF are correctly persisted and loaded
     """
     sheerka = get_sheerka(False)
-    concept_a = sheerka.evaluate_user_input("def concept a from bnf 'one' | 'two'")[0].body.body
+    concept_a = sheerka.evaluate_user_input("def concept a from bnf 'one' 'two'")[0].body.body
 
-    res = get_sheerka(False).evaluate_user_input("one")
+    res = get_sheerka(False).evaluate_user_input("one two")
     assert len(res) == 1
     assert res[0].status
     assert sheerka.isinstance(res[0].value, concept_a)
 
-    res = get_sheerka(False).evaluate_user_input("two")
+    # add another bnf definition
+    concept_b = sheerka.evaluate_user_input("def concept b from bnf a 'three'")[0].body.body
+
+    res = get_sheerka(False).evaluate_user_input("one two")  # previous one still works
     assert len(res) == 1
     assert res[0].status
     assert sheerka.isinstance(res[0].value, concept_a)
 
+    res = get_sheerka(False).evaluate_user_input("one two three")  # new one works
+    assert len(res) == 1
+    assert res[0].status
+    assert sheerka.isinstance(res[0].value, concept_b)
+
 
 def get_sheerka(use_dict=True, skip_builtins_in_db=True):
     root = "mem://" if use_dict else root_folder