Added keyword c:xxx: to express that we want the concept, not its body

2019-12-29 18:56:41 +01:00
parent 81b2355633
commit 197b0700fa
9 changed files with 191 additions and 65 deletions
@@ -8,6 +8,7 @@ class TokenKind(Enum):
    NEWLINE = "newline"
    KEYWORD = "keyword"
    IDENTIFIER = "identifier"
    CONCEPT = "concept"
    STRING = "string"
    NUMBER = "number"
    TRUE = "true"
@@ -210,6 +211,11 @@ class Tokenizer:
                self.i += len(newline)
                self.column = 1
                self.line += 1
            elif c == "c" and self.i + 1 < self.text_len and self.text[self.i + 1] == ":":
                concept_name = self.eat_concept_name(self.i + 2, self.line, self.column)
                yield Token(TokenKind.CONCEPT, concept_name, self.i, self.line, self.column)
                self.i += len(concept_name) + 3
                self.column += len(concept_name) + 3
            elif c.isalpha() or c == "_":
                identifier = self.eat_identifier(self.i)
                token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
@@ -233,6 +239,32 @@ class Tokenizer:
        yield Token(TokenKind.EOF, "", self.i, self.line, self.column)
    def eat_concept_name(self, start, line, column):
        result = ""
        i = start
        end_colon_found = False
        while i < self.text_len:
            c = self.text[i]
            if c == "\n":
                raise LexerError(f"New line is forbidden in concept name", result, i, line, column + 2 + len(result))
            if c == ":":
                end_colon_found = True
                break
            result += c
            i += 1
        if not end_colon_found:
            raise LexerError(f"Missing ending colon", result, i, line, column + 2 + len(result))
        if result == "":
            raise LexerError(f"Context name not found", result, start, line, column + 2 + len(result))
        return result
    def eat_whitespace(self, start):
        result = self.text[start]
        i = start + 1
@@ -767,4 +767,64 @@ Let's see an example
    def concept one as 1
    def concept two as 2
-    eval one + two
+    eval one + two
 In this situation, I expect PythonEvaluator to resolve the concepts 'one' and 'two' and to return 1 + 2, hence 3
 In this other situation
 ::
    def concept one as 1
    def concept desc a as sheerka.desc(a)
    desc one
 I expect Python evaluator NOT to resolve the concept one and to pass it strait to the function.
 Unfortunately for me, in the current implementation. 'a' is resolved to the concept 'one', which is resolved to its
 body "1". So the call failed, as there is not concept 1 (moreover, 1 is an integer, it's not even the string "1").
 There also be some cases where 'sheerka.desc()' expects the name of a concept (and the resolution of the concept
 will be done inside the function). In this case, it's not the body nor the concept itself that is required, but the name
 of the concept.
 So here are three cases where the behaviour of PythonEvaluator is required to be different. I cannot hard code theses
 behaviours as they depend on the context.
 The global idea, to resolve this situation is to give to Sheerka a memory. What I am currently working on is the possibility
 **to create** and **to recognize** concepts. As a recall :
 You can create simple concepts
 ::
    def concept one as 1
 or concept using bnf
 ::
    def concept twenties from bnf twenty (one | two | three...)=unit as 20 + unit
 Both can be recognised.
 But if I define
 ::
    def a plus b as a + 1
 :code:`one + two` will be recognized but twenty two plus one is not correctly implemented yet.
 To go back on my issue with the variables resolutions with PythonEvaluator, the idea is to implement rules that will
 recognize the concept, so you will tell Sheerka if the value, the concept or the name is expected.
 I am far from implementing the rules. To be honest, I don't even know now how they will look like.
 So I am going to introduce the keyword :code:`concept:name:` or :code:`c:name:`
 It will means that the concept is required.
 If the name is required, you can use :code:`"'name'"` or :code:`'"name"'`.
 It's already working. There is nothing to do for this one.
@@ -1,4 +1,5 @@
 import copy
 from enum import Enum
 from core.ast.visitors import UnreferencedNamesVisitor
 from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
@@ -18,6 +19,7 @@ class PythonEvaluator(OneReturnValueEvaluator):
    def __init__(self):
        super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 50)
        self.locals = {}
    def matches(self, context, return_value):
        return return_value.status and \
@@ -71,23 +73,35 @@ class PythonEvaluator(OneReturnValueEvaluator):
        for name in unreferenced_names_visitor.names:
            context.log(self.verbose_log, f"Resolving '{name}'.", self.name)
            return_concept = False
-            if name in my_locals:
+            if name.startswith("__C__") and name.endswith("__C__"):
                name_resolved = name[5:-5]
                return_concept = True
            else:
                name_resolved = name
            if name_resolved in my_locals:
                context.log(self.verbose_log, f"Using value from property.", self.name)
                continue
-            concept = context.sheerka.new(name)
+            concept = context.sheerka.new(name_resolved)
            if context.sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
-                context.log(self.verbose_log, f"'{name}' is not a concept. Skipping.", self.name)
+                context.log(self.verbose_log, f"'{name_resolved}' is not a concept. Skipping.", self.name)
                continue
-            context.log(self.verbose_log, f"'{name}' is a concept. Evaluating.", self.name)
+            context.log(self.verbose_log, f"'{name_resolved}' is a concept. Evaluating.", self.name)
            sub_context = context.push(self.name, desc=f"Evaluating '{concept}'", obj=concept)
            sub_context.log_new(self.verbose_log)
            evaluated = context.sheerka.evaluate_concept(sub_context, concept, self.verbose_log)
            if evaluated.key == concept.key:
-                my_locals[name] = evaluated.body if ConceptParts.BODY in evaluated.cached_asts else evaluated
+                my_locals[name] = evaluated if return_concept else \
                    evaluated.body if ConceptParts.BODY in evaluated.cached_asts else \
                        evaluated
        if self.locals:
            my_locals.update(self.locals)
        return my_locals
@@ -86,7 +86,12 @@ class BaseParser:
        if not hasattr(tokens, "__iter__"):
            tokens = [tokens]
        switcher = {
            TokenKind.KEYWORD: lambda t: Keywords(t.value).value,
            TokenKind.CONCEPT: lambda t: f"__C__{t.value}__C__"
        }
        for token in tokens:
-            value = Keywords(token.value).value if token.type == TokenKind.KEYWORD else token.value
+            value = switcher.get(token.type, lambda t: t.value)(token)
            res += value
        return res
@@ -1,4 +1,5 @@
 from core.builtin_concepts import BuiltinConcepts
 from core.tokenizer import Tokenizer
 from parsers.BaseParser import BaseParser, Node, ErrorNode
 from dataclasses import dataclass
 import ast
@@ -62,16 +63,23 @@ class PythonParser(BaseParser):
        self.source = kwargs.get("source", "<undef>")
    def parse(self, context, text):
-        text = text if isinstance(text, str) else self.get_text_from_tokens(text)
+        if isinstance(text, str) and "c:" in text:
-        text = text.strip()
+            source = self.get_text_from_tokens(list(Tokenizer(text)))
        elif isinstance(text, str):
            source = text
        else:
            source = self.get_text_from_tokens(text)
        source = source.strip()
        text = text if isinstance(text, str) else source
        sheerka = context.sheerka
        #  first, try to parse an expression
-        res, tree, error = self.try_parse_expression(text)
+        res, tree, error = self.try_parse_expression(source)
        if not res:
            # then try to parse a statement
-            res, tree, error = self.try_parse_statement(text)
+            res, tree, error = self.try_parse_statement(source)
            if not res:
                self.has_error = True
                error_node = PythonErrorNode(text, error)
@@ -4,4 +4,14 @@ from core.tokenizer import Tokenizer, Token, TokenKind
 from parsers.BaseParser import BaseParser
@pytest.mark.parametrize("text, expected_text", [
    ("hello world", "hello world"),
    ("'hello' 'world'", "'hello' 'world'"),
    ("def concept a from", "def concept a from"),
    ("()[]{}1=1.5+-/*><&é", "()[]{}1=1.5+-/*><&é"),
    ("execute(c:concept_name:)", "execute(__C__concept_name__C__)")
 ])
 def test_i_can_get_text_from_tokens(text, expected_text):
    tokens = list(Tokenizer(text))
    assert BaseParser.get_text_from_tokens(tokens) == expected_text
@@ -14,6 +14,10 @@ def get_context():
    return ExecutionContext("test", Event(), sheerka)
 def get_context_name(context):
    return context.name
@pytest.mark.parametrize("ret_val, expected", [
    (ReturnValueConcept("some_name", True, ParserResultConcept(value=PythonNode("", None))), True),
    (ReturnValueConcept("some_name", True, ParserResultConcept(value="other thing")), False),
@@ -57,32 +61,6 @@ def test_i_cannot_eval_simple_concept(concept):
    assert context.sheerka.isinstance(evaluated.value, BuiltinConcepts.NOT_FOR_ME)
 #
 # def test_i_can_eval_expression_that_references_concepts():
 #     context = get_context()
 #     context.sheerka.add_in_cache(Concept("foo"))
 #
 #     parsed = PythonParser().parse(context, "foo")
 #     evaluated = PythonEvaluator().eval(context, parsed)
 #
 #     assert evaluated.status
 #     assert evaluated.value == Concept("foo").init_key()
 #
 #
 # def test_i_can_eval_expression_that_references_concepts_with_body():
 #     """
 #     I can test expression with variables
 #     :return:
 #     """
 #     context = get_context()
 #     context.sheerka.add_in_cache(Concept("foo", body="2"))
 #
 #     parsed = PythonParser().parse(context, "foo")
 #     evaluated = PythonEvaluator().eval(context, parsed)
 #
 #     assert evaluated.status
 #     assert evaluated.value == 2
 def test_i_can_eval_expression_with_that_references_concepts():
    """
    I can test modules with variables
@@ -127,24 +105,24 @@ def test_i_can_eval_module_with_that_references_concepts_with_body():
    assert evaluated.status
    assert evaluated.value == 2
-#
+
-# def test_i_can_eval_concept_with_props():
+def test_i_can_eval_concept_token():
-#     context = get_context()
+    context = get_context()
-#     context.sheerka.add_in_cache(Concept("foo").set_prop("prop", "'a'"))
+    context.sheerka.add_in_cache(Concept("foo", body="2"))
-#
+
-#     parsed = PythonParser().parse(context, "foo")
+    parsed = PythonParser().parse(context, "get_context_name(c:foo:)")
-#     evaluated = PythonEvaluator().eval(context, parsed)
+    python_evaluator = PythonEvaluator()
-#
+    python_evaluator.locals["get_context_name"] = get_context_name
-#     assert evaluated.status
+    evaluated = python_evaluator.eval(context, parsed)
-#     assert evaluated.value == Concept("foo").set_prop("prop", "a").init_key()  # evaluated version of foo
+
-#
+    assert evaluated.status
-#
+    assert evaluated.value == "foo"
-# def test_i_cannot_eval_when_body_references_unknown_concept():
+
-#     context = get_context()
+    # sanity, to make sure that otherwise foo is resolved to '2'
-#     context.sheerka.add_in_cache(Concept("foo", body="bar"))
+    parsed = PythonParser().parse(context, "get_context_name(foo)")
-#
+    python_evaluator = PythonEvaluator()
-#     parsed = PythonParser().parse(context, "foo")
+    python_evaluator.locals["get_context_name"] = get_context_name
-#     evaluated = PythonEvaluator().eval(context, parsed)
+    evaluated = python_evaluator.eval(context, parsed)
-#
+
-#     assert not evaluated.status
+    assert not evaluated.status
-#     assert context.sheerka.isinstance(evaluated.value, BuiltinConcepts.ERROR)
+    assert evaluated.body.body.args[0] == "'int' object has no attribute 'name'"
@@ -55,3 +55,15 @@ def test_i_can_detect_error():
    assert isinstance(res.value, ParserResultConcept)
    assert isinstance(res.value.value[0], PythonErrorNode)
    assert isinstance(res.value.value[0].exception, SyntaxError)
 def test_i_can_parse_a_concept():
    text = "c:concept_name: + 1"
    parser = PythonParser()
    res = parser.parse(get_context(), text)
    assert res
    assert res.value.value == PythonNode(
        "c:concept_name: + 1",
        ast.parse("__C__concept_name__C__+1", mode="eval"))
@@ -3,7 +3,7 @@ from core.tokenizer import Tokenizer, Token, TokenKind, LexerError, Keywords
 def test_i_can_tokenize():
-    source = "+*-/{}[]()    ,;:.?\n\n\r\r\r\nidentifier_0\t  \t10.15 10 'string\n' \"another string\"=|&<>"
+    source = "+*-/{}[]()    ,;:.?\n\n\r\r\r\nidentifier_0\t  \t10.15 10 'string\n' \"another string\"=|&<>c:name:"
    tokens = list(Tokenizer(source))
    assert tokens[0] == Token(TokenKind.PLUS, "+", 0, 1, 1)
    assert tokens[1] == Token(TokenKind.STAR, "*", 1, 1, 2)
@@ -39,6 +39,9 @@ def test_i_can_tokenize():
    assert tokens[31] == Token(TokenKind.AMPER, '&', 78, 6, 20)
    assert tokens[32] == Token(TokenKind.LESS, '<', 79, 6, 21)
    assert tokens[33] == Token(TokenKind.GREATER, '>', 80, 6, 22)
    assert tokens[34] == Token(TokenKind.CONCEPT, 'name', 81, 6, 23)
    assert tokens[35] == Token(TokenKind.EOF, '', 88, 6, 30)
@pytest.mark.parametrize("text, expected", [
@@ -58,15 +61,19 @@ def test_i_can_tokenize_identifiers(text, expected):
    assert comparison == expected
-@pytest.mark.parametrize("text, error_text, index, line, column", [
+@pytest.mark.parametrize("text, message, error_text, index, line, column", [
-    ("'string", "'string", 7, 1, 8),
+    ("'string", "Missing Trailing quote", "'string", 7, 1, 8),
-    ('"string', '"string', 7, 1, 8),
+    ('"string', "Missing Trailing quote", '"string', 7, 1, 8),
-    ('"a" + "string', '"string', 13, 1, 14),
+    ('"a" + "string', "Missing Trailing quote", '"string', 13, 1, 14),
-    ('"a"\n\n"string', '"string', 12, 3, 8),
+    ('"a"\n\n"string', "Missing Trailing quote", '"string', 12, 3, 8),
    ("c::", "Context name not found", "", 2, 1, 3),
    ("c:foo\nbar:", "New line is forbidden in concept name", "foo", 5, 1, 6),
    ("c:foo", "Missing ending colon", "foo", 5, 1, 6)
 ])
-def test_i_can_detect_unfinished_strings(text, error_text, index, line, column):
+def test_i_can_detect_unfinished_strings(text, message, error_text, index, line, column):
    with pytest.raises(LexerError) as e:
        list(Tokenizer(text))
    assert e.value.message == message
    assert e.value.text == error_text
    assert e.value.index == index
    assert e.value.line == line