From 197b0700facad40b8e15e35df4704aaf8e37d87a Mon Sep 17 00:00:00 2001
From: Kodjo Sossouvi <kodjo.sossouvi@gmail.com>
Date: Sun, 29 Dec 2019 18:56:41 +0100
Subject: [PATCH] Added keyword c:xxx: to express that we want the concept, not
 its body

---
 core/tokenizer.py             | 32 ++++++++++++++++
 docs/blog.rst                 | 62 +++++++++++++++++++++++++++++-
 evaluators/PythonEvaluator.py | 24 +++++++++---
 parsers/BaseParser.py         |  7 +++-
 parsers/PythonParser.py       | 16 ++++++--
 tests/test_BaseParser.py      | 10 +++++
 tests/test_PythonEvaluator.py | 72 ++++++++++++-----------------------
 tests/test_PythonParser.py    | 12 ++++++
 tests/test_tokenizer.py       | 21 ++++++----
 9 files changed, 191 insertions(+), 65 deletions(-)

diff --git a/core/tokenizer.py b/core/tokenizer.py
index 8f292e0..bf06e4a 100644
--- a/core/tokenizer.py
+++ b/core/tokenizer.py
@@ -8,6 +8,7 @@ class TokenKind(Enum):
     NEWLINE = "newline"
     KEYWORD = "keyword"
     IDENTIFIER = "identifier"
+    CONCEPT = "concept"
     STRING = "string"
     NUMBER = "number"
     TRUE = "true"
@@ -210,6 +211,11 @@ class Tokenizer:
                 self.i += len(newline)
                 self.column = 1
                 self.line += 1
+            elif c == "c" and self.i + 1 < self.text_len and self.text[self.i + 1] == ":":
+                concept_name = self.eat_concept_name(self.i + 2, self.line, self.column)
+                yield Token(TokenKind.CONCEPT, concept_name, self.i, self.line, self.column)
+                self.i += len(concept_name) + 3
+                self.column += len(concept_name) + 3
             elif c.isalpha() or c == "_":
                 identifier = self.eat_identifier(self.i)
                 token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
@@ -233,6 +239,32 @@ class Tokenizer:
 
         yield Token(TokenKind.EOF, "", self.i, self.line, self.column)
 
+    def eat_concept_name(self, start, line, column):
+        result = ""
+        i = start
+        end_colon_found = False
+
+        while i < self.text_len:
+            c = self.text[i]
+
+            if c == "\n":
+                raise LexerError(f"New line is forbidden in concept name", result, i, line, column + 2 + len(result))
+
+            if c == ":":
+                end_colon_found = True
+                break
+
+            result += c
+            i += 1
+
+        if not end_colon_found:
+            raise LexerError(f"Missing ending colon", result, i, line, column + 2 + len(result))
+
+        if result == "":
+            raise LexerError(f"Context name not found", result, start, line, column + 2 + len(result))
+
+        return result
+
     def eat_whitespace(self, start):
         result = self.text[start]
         i = start + 1
diff --git a/docs/blog.rst b/docs/blog.rst
index dbbf485..2ea1a54 100644
--- a/docs/blog.rst
+++ b/docs/blog.rst
@@ -767,4 +767,64 @@ Let's see an example
     def concept one as 1
     def concept two as 2
 
-    eval one + two
\ No newline at end of file
+    eval one + two
+
+In this situation, I expect PythonEvaluator to resolve the concepts 'one' and 'two' and to return 1 + 2, hence 3
+
+In this other situation
+
+::
+
+    def concept one as 1
+    def concept desc a as sheerka.desc(a)
+    desc one
+
+I expect Python evaluator NOT to resolve the concept one and to pass it strait to the function.
+
+Unfortunately for me, in the current implementation. 'a' is resolved to the concept 'one', which is resolved to its
+body "1". So the call failed, as there is not concept 1 (moreover, 1 is an integer, it's not even the string "1").
+
+There also be some cases where 'sheerka.desc()' expects the name of a concept (and the resolution of the concept
+will be done inside the function). In this case, it's not the body nor the concept itself that is required, but the name
+of the concept.
+
+So here are three cases where the behaviour of PythonEvaluator is required to be different. I cannot hard code theses
+behaviours as they depend on the context.
+
+The global idea, to resolve this situation is to give to Sheerka a memory. What I am currently working on is the possibility
+**to create** and **to recognize** concepts. As a recall :
+
+You can create simple concepts
+
+::
+
+    def concept one as 1
+
+or concept using bnf
+
+::
+
+    def concept twenties from bnf twenty (one | two | three...)=unit as 20 + unit
+
+
+Both can be recognised.
+But if I define
+
+::
+
+    def a plus b as a + 1
+
+
+:code:`one + two` will be recognized but twenty two plus one is not correctly implemented yet.
+
+To go back on my issue with the variables resolutions with PythonEvaluator, the idea is to implement rules that will
+recognize the concept, so you will tell Sheerka if the value, the concept or the name is expected.
+
+I am far from implementing the rules. To be honest, I don't even know now how they will look like.
+
+So I am going to introduce the keyword :code:`concept:name:` or :code:`c:name:`
+
+It will means that the concept is required.
+
+If the name is required, you can use :code:`"'name'"` or :code:`'"name"'`.
+It's already working. There is nothing to do for this one.
\ No newline at end of file
diff --git a/evaluators/PythonEvaluator.py b/evaluators/PythonEvaluator.py
index 894ab14..db539cb 100644
--- a/evaluators/PythonEvaluator.py
+++ b/evaluators/PythonEvaluator.py
@@ -1,4 +1,5 @@
 import copy
+from enum import Enum
 
 from core.ast.visitors import UnreferencedNamesVisitor
 from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
@@ -18,6 +19,7 @@ class PythonEvaluator(OneReturnValueEvaluator):
 
     def __init__(self):
         super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 50)
+        self.locals = {}
 
     def matches(self, context, return_value):
         return return_value.status and \
@@ -71,23 +73,35 @@ class PythonEvaluator(OneReturnValueEvaluator):
 
         for name in unreferenced_names_visitor.names:
             context.log(self.verbose_log, f"Resolving '{name}'.", self.name)
+            return_concept = False
 
-            if name in my_locals:
+            if name.startswith("__C__") and name.endswith("__C__"):
+                name_resolved = name[5:-5]
+                return_concept = True
+            else:
+                name_resolved = name
+
+            if name_resolved in my_locals:
                 context.log(self.verbose_log, f"Using value from property.", self.name)
                 continue
 
-            concept = context.sheerka.new(name)
+            concept = context.sheerka.new(name_resolved)
             if context.sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
-                context.log(self.verbose_log, f"'{name}' is not a concept. Skipping.", self.name)
+                context.log(self.verbose_log, f"'{name_resolved}' is not a concept. Skipping.", self.name)
                 continue
 
-            context.log(self.verbose_log, f"'{name}' is a concept. Evaluating.", self.name)
+            context.log(self.verbose_log, f"'{name_resolved}' is a concept. Evaluating.", self.name)
             sub_context = context.push(self.name, desc=f"Evaluating '{concept}'", obj=concept)
             sub_context.log_new(self.verbose_log)
             evaluated = context.sheerka.evaluate_concept(sub_context, concept, self.verbose_log)
 
             if evaluated.key == concept.key:
-                my_locals[name] = evaluated.body if ConceptParts.BODY in evaluated.cached_asts else evaluated
+                my_locals[name] = evaluated if return_concept else \
+                    evaluated.body if ConceptParts.BODY in evaluated.cached_asts else \
+                        evaluated
+
+        if self.locals:
+            my_locals.update(self.locals)
 
         return my_locals
 
diff --git a/parsers/BaseParser.py b/parsers/BaseParser.py
index 64ef148..cb68391 100644
--- a/parsers/BaseParser.py
+++ b/parsers/BaseParser.py
@@ -86,7 +86,12 @@ class BaseParser:
         if not hasattr(tokens, "__iter__"):
             tokens = [tokens]
 
+        switcher = {
+            TokenKind.KEYWORD: lambda t: Keywords(t.value).value,
+            TokenKind.CONCEPT: lambda t: f"__C__{t.value}__C__"
+        }
+
         for token in tokens:
-            value = Keywords(token.value).value if token.type == TokenKind.KEYWORD else token.value
+            value = switcher.get(token.type, lambda t: t.value)(token)
             res += value
         return res
diff --git a/parsers/PythonParser.py b/parsers/PythonParser.py
index 06fcbfd..773d2bd 100644
--- a/parsers/PythonParser.py
+++ b/parsers/PythonParser.py
@@ -1,4 +1,5 @@
 from core.builtin_concepts import BuiltinConcepts
+from core.tokenizer import Tokenizer
 from parsers.BaseParser import BaseParser, Node, ErrorNode
 from dataclasses import dataclass
 import ast
@@ -62,16 +63,23 @@ class PythonParser(BaseParser):
         self.source = kwargs.get("source", "<undef>")
 
     def parse(self, context, text):
-        text = text if isinstance(text, str) else self.get_text_from_tokens(text)
-        text = text.strip()
+        if isinstance(text, str) and "c:" in text:
+            source = self.get_text_from_tokens(list(Tokenizer(text)))
+        elif isinstance(text, str):
+            source = text
+        else:
+            source = self.get_text_from_tokens(text)
+        source = source.strip()
+
+        text = text if isinstance(text, str) else source
 
         sheerka = context.sheerka
 
         #  first, try to parse an expression
-        res, tree, error = self.try_parse_expression(text)
+        res, tree, error = self.try_parse_expression(source)
         if not res:
             # then try to parse a statement
-            res, tree, error = self.try_parse_statement(text)
+            res, tree, error = self.try_parse_statement(source)
             if not res:
                 self.has_error = True
                 error_node = PythonErrorNode(text, error)
diff --git a/tests/test_BaseParser.py b/tests/test_BaseParser.py
index 95d3902..f6c8b7d 100644
--- a/tests/test_BaseParser.py
+++ b/tests/test_BaseParser.py
@@ -4,4 +4,14 @@ from core.tokenizer import Tokenizer, Token, TokenKind
 from parsers.BaseParser import BaseParser
 
 
+@pytest.mark.parametrize("text, expected_text", [
+    ("hello world", "hello world"),
+    ("'hello' 'world'", "'hello' 'world'"),
+    ("def concept a from", "def concept a from"),
+    ("()[]{}1=1.5+-/*><&é", "()[]{}1=1.5+-/*><&é"),
+    ("execute(c:concept_name:)", "execute(__C__concept_name__C__)")
 
+])
+def test_i_can_get_text_from_tokens(text, expected_text):
+    tokens = list(Tokenizer(text))
+    assert BaseParser.get_text_from_tokens(tokens) == expected_text
diff --git a/tests/test_PythonEvaluator.py b/tests/test_PythonEvaluator.py
index dac804e..13d920c 100644
--- a/tests/test_PythonEvaluator.py
+++ b/tests/test_PythonEvaluator.py
@@ -14,6 +14,10 @@ def get_context():
     return ExecutionContext("test", Event(), sheerka)
 
 
+def get_context_name(context):
+    return context.name
+
+
 @pytest.mark.parametrize("ret_val, expected", [
     (ReturnValueConcept("some_name", True, ParserResultConcept(value=PythonNode("", None))), True),
     (ReturnValueConcept("some_name", True, ParserResultConcept(value="other thing")), False),
@@ -57,32 +61,6 @@ def test_i_cannot_eval_simple_concept(concept):
     assert context.sheerka.isinstance(evaluated.value, BuiltinConcepts.NOT_FOR_ME)
 
 
-#
-# def test_i_can_eval_expression_that_references_concepts():
-#     context = get_context()
-#     context.sheerka.add_in_cache(Concept("foo"))
-#
-#     parsed = PythonParser().parse(context, "foo")
-#     evaluated = PythonEvaluator().eval(context, parsed)
-#
-#     assert evaluated.status
-#     assert evaluated.value == Concept("foo").init_key()
-#
-#
-# def test_i_can_eval_expression_that_references_concepts_with_body():
-#     """
-#     I can test expression with variables
-#     :return:
-#     """
-#     context = get_context()
-#     context.sheerka.add_in_cache(Concept("foo", body="2"))
-#
-#     parsed = PythonParser().parse(context, "foo")
-#     evaluated = PythonEvaluator().eval(context, parsed)
-#
-#     assert evaluated.status
-#     assert evaluated.value == 2
-
 def test_i_can_eval_expression_with_that_references_concepts():
     """
     I can test modules with variables
@@ -127,24 +105,24 @@ def test_i_can_eval_module_with_that_references_concepts_with_body():
     assert evaluated.status
     assert evaluated.value == 2
 
-#
-# def test_i_can_eval_concept_with_props():
-#     context = get_context()
-#     context.sheerka.add_in_cache(Concept("foo").set_prop("prop", "'a'"))
-#
-#     parsed = PythonParser().parse(context, "foo")
-#     evaluated = PythonEvaluator().eval(context, parsed)
-#
-#     assert evaluated.status
-#     assert evaluated.value == Concept("foo").set_prop("prop", "a").init_key()  # evaluated version of foo
-#
-#
-# def test_i_cannot_eval_when_body_references_unknown_concept():
-#     context = get_context()
-#     context.sheerka.add_in_cache(Concept("foo", body="bar"))
-#
-#     parsed = PythonParser().parse(context, "foo")
-#     evaluated = PythonEvaluator().eval(context, parsed)
-#
-#     assert not evaluated.status
-#     assert context.sheerka.isinstance(evaluated.value, BuiltinConcepts.ERROR)
+
+def test_i_can_eval_concept_token():
+    context = get_context()
+    context.sheerka.add_in_cache(Concept("foo", body="2"))
+
+    parsed = PythonParser().parse(context, "get_context_name(c:foo:)")
+    python_evaluator = PythonEvaluator()
+    python_evaluator.locals["get_context_name"] = get_context_name
+    evaluated = python_evaluator.eval(context, parsed)
+
+    assert evaluated.status
+    assert evaluated.value == "foo"
+
+    # sanity, to make sure that otherwise foo is resolved to '2'
+    parsed = PythonParser().parse(context, "get_context_name(foo)")
+    python_evaluator = PythonEvaluator()
+    python_evaluator.locals["get_context_name"] = get_context_name
+    evaluated = python_evaluator.eval(context, parsed)
+
+    assert not evaluated.status
+    assert evaluated.body.body.args[0] == "'int' object has no attribute 'name'"
diff --git a/tests/test_PythonParser.py b/tests/test_PythonParser.py
index 3a15644..24db25b 100644
--- a/tests/test_PythonParser.py
+++ b/tests/test_PythonParser.py
@@ -55,3 +55,15 @@ def test_i_can_detect_error():
     assert isinstance(res.value, ParserResultConcept)
     assert isinstance(res.value.value[0], PythonErrorNode)
     assert isinstance(res.value.value[0].exception, SyntaxError)
+
+
+def test_i_can_parse_a_concept():
+    text = "c:concept_name: + 1"
+
+    parser = PythonParser()
+    res = parser.parse(get_context(), text)
+
+    assert res
+    assert res.value.value == PythonNode(
+        "c:concept_name: + 1",
+        ast.parse("__C__concept_name__C__+1", mode="eval"))
diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py
index b09c82c..3584d2c 100644
--- a/tests/test_tokenizer.py
+++ b/tests/test_tokenizer.py
@@ -3,7 +3,7 @@ from core.tokenizer import Tokenizer, Token, TokenKind, LexerError, Keywords
 
 
 def test_i_can_tokenize():
-    source = "+*-/{}[]()    ,;:.?\n\n\r\r\r\nidentifier_0\t  \t10.15 10 'string\n' \"another string\"=|&<>"
+    source = "+*-/{}[]()    ,;:.?\n\n\r\r\r\nidentifier_0\t  \t10.15 10 'string\n' \"another string\"=|&<>c:name:"
     tokens = list(Tokenizer(source))
     assert tokens[0] == Token(TokenKind.PLUS, "+", 0, 1, 1)
     assert tokens[1] == Token(TokenKind.STAR, "*", 1, 1, 2)
@@ -39,6 +39,9 @@ def test_i_can_tokenize():
     assert tokens[31] == Token(TokenKind.AMPER, '&', 78, 6, 20)
     assert tokens[32] == Token(TokenKind.LESS, '<', 79, 6, 21)
     assert tokens[33] == Token(TokenKind.GREATER, '>', 80, 6, 22)
+    assert tokens[34] == Token(TokenKind.CONCEPT, 'name', 81, 6, 23)
+
+    assert tokens[35] == Token(TokenKind.EOF, '', 88, 6, 30)
 
 
 @pytest.mark.parametrize("text, expected", [
@@ -58,15 +61,19 @@ def test_i_can_tokenize_identifiers(text, expected):
     assert comparison == expected
 
 
-@pytest.mark.parametrize("text, error_text, index, line, column", [
-    ("'string", "'string", 7, 1, 8),
-    ('"string', '"string', 7, 1, 8),
-    ('"a" + "string', '"string', 13, 1, 14),
-    ('"a"\n\n"string', '"string', 12, 3, 8),
+@pytest.mark.parametrize("text, message, error_text, index, line, column", [
+    ("'string", "Missing Trailing quote", "'string", 7, 1, 8),
+    ('"string', "Missing Trailing quote", '"string', 7, 1, 8),
+    ('"a" + "string', "Missing Trailing quote", '"string', 13, 1, 14),
+    ('"a"\n\n"string', "Missing Trailing quote", '"string', 12, 3, 8),
+    ("c::", "Context name not found", "", 2, 1, 3),
+    ("c:foo\nbar:", "New line is forbidden in concept name", "foo", 5, 1, 6),
+    ("c:foo", "Missing ending colon", "foo", 5, 1, 6)
 ])
-def test_i_can_detect_unfinished_strings(text, error_text, index, line, column):
+def test_i_can_detect_unfinished_strings(text, message, error_text, index, line, column):
     with pytest.raises(LexerError) as e:
         list(Tokenizer(text))
+    assert e.value.message == message
     assert e.value.text == error_text
     assert e.value.index == index
     assert e.value.line == line