From 197b0700facad40b8e15e35df4704aaf8e37d87a Mon Sep 17 00:00:00 2001 From: Kodjo Sossouvi Date: Sun, 29 Dec 2019 18:56:41 +0100 Subject: [PATCH] Added keyword c:xxx: to express that we want the concept, not its body --- core/tokenizer.py | 32 ++++++++++++++++ docs/blog.rst | 62 +++++++++++++++++++++++++++++- evaluators/PythonEvaluator.py | 24 +++++++++--- parsers/BaseParser.py | 7 +++- parsers/PythonParser.py | 16 ++++++-- tests/test_BaseParser.py | 10 +++++ tests/test_PythonEvaluator.py | 72 ++++++++++++----------------------- tests/test_PythonParser.py | 12 ++++++ tests/test_tokenizer.py | 21 ++++++---- 9 files changed, 191 insertions(+), 65 deletions(-) diff --git a/core/tokenizer.py b/core/tokenizer.py index 8f292e0..bf06e4a 100644 --- a/core/tokenizer.py +++ b/core/tokenizer.py @@ -8,6 +8,7 @@ class TokenKind(Enum): NEWLINE = "newline" KEYWORD = "keyword" IDENTIFIER = "identifier" + CONCEPT = "concept" STRING = "string" NUMBER = "number" TRUE = "true" @@ -210,6 +211,11 @@ class Tokenizer: self.i += len(newline) self.column = 1 self.line += 1 + elif c == "c" and self.i + 1 < self.text_len and self.text[self.i + 1] == ":": + concept_name = self.eat_concept_name(self.i + 2, self.line, self.column) + yield Token(TokenKind.CONCEPT, concept_name, self.i, self.line, self.column) + self.i += len(concept_name) + 3 + self.column += len(concept_name) + 3 elif c.isalpha() or c == "_": identifier = self.eat_identifier(self.i) token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER @@ -233,6 +239,32 @@ class Tokenizer: yield Token(TokenKind.EOF, "", self.i, self.line, self.column) + def eat_concept_name(self, start, line, column): + result = "" + i = start + end_colon_found = False + + while i < self.text_len: + c = self.text[i] + + if c == "\n": + raise LexerError(f"New line is forbidden in concept name", result, i, line, column + 2 + len(result)) + + if c == ":": + end_colon_found = True + break + + result += c + i += 1 + + if not end_colon_found: + raise LexerError(f"Missing ending colon", result, i, line, column + 2 + len(result)) + + if result == "": + raise LexerError(f"Context name not found", result, start, line, column + 2 + len(result)) + + return result + def eat_whitespace(self, start): result = self.text[start] i = start + 1 diff --git a/docs/blog.rst b/docs/blog.rst index dbbf485..2ea1a54 100644 --- a/docs/blog.rst +++ b/docs/blog.rst @@ -767,4 +767,64 @@ Let's see an example def concept one as 1 def concept two as 2 - eval one + two \ No newline at end of file + eval one + two + +In this situation, I expect PythonEvaluator to resolve the concepts 'one' and 'two' and to return 1 + 2, hence 3 + +In this other situation + +:: + + def concept one as 1 + def concept desc a as sheerka.desc(a) + desc one + +I expect Python evaluator NOT to resolve the concept one and to pass it strait to the function. + +Unfortunately for me, in the current implementation. 'a' is resolved to the concept 'one', which is resolved to its +body "1". So the call failed, as there is not concept 1 (moreover, 1 is an integer, it's not even the string "1"). + +There also be some cases where 'sheerka.desc()' expects the name of a concept (and the resolution of the concept +will be done inside the function). In this case, it's not the body nor the concept itself that is required, but the name +of the concept. + +So here are three cases where the behaviour of PythonEvaluator is required to be different. I cannot hard code theses +behaviours as they depend on the context. + +The global idea, to resolve this situation is to give to Sheerka a memory. What I am currently working on is the possibility +**to create** and **to recognize** concepts. As a recall : + +You can create simple concepts + +:: + + def concept one as 1 + +or concept using bnf + +:: + + def concept twenties from bnf twenty (one | two | three...)=unit as 20 + unit + + +Both can be recognised. +But if I define + +:: + + def a plus b as a + 1 + + +:code:`one + two` will be recognized but twenty two plus one is not correctly implemented yet. + +To go back on my issue with the variables resolutions with PythonEvaluator, the idea is to implement rules that will +recognize the concept, so you will tell Sheerka if the value, the concept or the name is expected. + +I am far from implementing the rules. To be honest, I don't even know now how they will look like. + +So I am going to introduce the keyword :code:`concept:name:` or :code:`c:name:` + +It will means that the concept is required. + +If the name is required, you can use :code:`"'name'"` or :code:`'"name"'`. +It's already working. There is nothing to do for this one. \ No newline at end of file diff --git a/evaluators/PythonEvaluator.py b/evaluators/PythonEvaluator.py index 894ab14..db539cb 100644 --- a/evaluators/PythonEvaluator.py +++ b/evaluators/PythonEvaluator.py @@ -1,4 +1,5 @@ import copy +from enum import Enum from core.ast.visitors import UnreferencedNamesVisitor from core.builtin_concepts import BuiltinConcepts, ParserResultConcept @@ -18,6 +19,7 @@ class PythonEvaluator(OneReturnValueEvaluator): def __init__(self): super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 50) + self.locals = {} def matches(self, context, return_value): return return_value.status and \ @@ -71,23 +73,35 @@ class PythonEvaluator(OneReturnValueEvaluator): for name in unreferenced_names_visitor.names: context.log(self.verbose_log, f"Resolving '{name}'.", self.name) + return_concept = False - if name in my_locals: + if name.startswith("__C__") and name.endswith("__C__"): + name_resolved = name[5:-5] + return_concept = True + else: + name_resolved = name + + if name_resolved in my_locals: context.log(self.verbose_log, f"Using value from property.", self.name) continue - concept = context.sheerka.new(name) + concept = context.sheerka.new(name_resolved) if context.sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT): - context.log(self.verbose_log, f"'{name}' is not a concept. Skipping.", self.name) + context.log(self.verbose_log, f"'{name_resolved}' is not a concept. Skipping.", self.name) continue - context.log(self.verbose_log, f"'{name}' is a concept. Evaluating.", self.name) + context.log(self.verbose_log, f"'{name_resolved}' is a concept. Evaluating.", self.name) sub_context = context.push(self.name, desc=f"Evaluating '{concept}'", obj=concept) sub_context.log_new(self.verbose_log) evaluated = context.sheerka.evaluate_concept(sub_context, concept, self.verbose_log) if evaluated.key == concept.key: - my_locals[name] = evaluated.body if ConceptParts.BODY in evaluated.cached_asts else evaluated + my_locals[name] = evaluated if return_concept else \ + evaluated.body if ConceptParts.BODY in evaluated.cached_asts else \ + evaluated + + if self.locals: + my_locals.update(self.locals) return my_locals diff --git a/parsers/BaseParser.py b/parsers/BaseParser.py index 64ef148..cb68391 100644 --- a/parsers/BaseParser.py +++ b/parsers/BaseParser.py @@ -86,7 +86,12 @@ class BaseParser: if not hasattr(tokens, "__iter__"): tokens = [tokens] + switcher = { + TokenKind.KEYWORD: lambda t: Keywords(t.value).value, + TokenKind.CONCEPT: lambda t: f"__C__{t.value}__C__" + } + for token in tokens: - value = Keywords(token.value).value if token.type == TokenKind.KEYWORD else token.value + value = switcher.get(token.type, lambda t: t.value)(token) res += value return res diff --git a/parsers/PythonParser.py b/parsers/PythonParser.py index 06fcbfd..773d2bd 100644 --- a/parsers/PythonParser.py +++ b/parsers/PythonParser.py @@ -1,4 +1,5 @@ from core.builtin_concepts import BuiltinConcepts +from core.tokenizer import Tokenizer from parsers.BaseParser import BaseParser, Node, ErrorNode from dataclasses import dataclass import ast @@ -62,16 +63,23 @@ class PythonParser(BaseParser): self.source = kwargs.get("source", "") def parse(self, context, text): - text = text if isinstance(text, str) else self.get_text_from_tokens(text) - text = text.strip() + if isinstance(text, str) and "c:" in text: + source = self.get_text_from_tokens(list(Tokenizer(text))) + elif isinstance(text, str): + source = text + else: + source = self.get_text_from_tokens(text) + source = source.strip() + + text = text if isinstance(text, str) else source sheerka = context.sheerka # first, try to parse an expression - res, tree, error = self.try_parse_expression(text) + res, tree, error = self.try_parse_expression(source) if not res: # then try to parse a statement - res, tree, error = self.try_parse_statement(text) + res, tree, error = self.try_parse_statement(source) if not res: self.has_error = True error_node = PythonErrorNode(text, error) diff --git a/tests/test_BaseParser.py b/tests/test_BaseParser.py index 95d3902..f6c8b7d 100644 --- a/tests/test_BaseParser.py +++ b/tests/test_BaseParser.py @@ -4,4 +4,14 @@ from core.tokenizer import Tokenizer, Token, TokenKind from parsers.BaseParser import BaseParser +@pytest.mark.parametrize("text, expected_text", [ + ("hello world", "hello world"), + ("'hello' 'world'", "'hello' 'world'"), + ("def concept a from", "def concept a from"), + ("()[]{}1=1.5+-/*><&é", "()[]{}1=1.5+-/*><&é"), + ("execute(c:concept_name:)", "execute(__C__concept_name__C__)") +]) +def test_i_can_get_text_from_tokens(text, expected_text): + tokens = list(Tokenizer(text)) + assert BaseParser.get_text_from_tokens(tokens) == expected_text diff --git a/tests/test_PythonEvaluator.py b/tests/test_PythonEvaluator.py index dac804e..13d920c 100644 --- a/tests/test_PythonEvaluator.py +++ b/tests/test_PythonEvaluator.py @@ -14,6 +14,10 @@ def get_context(): return ExecutionContext("test", Event(), sheerka) +def get_context_name(context): + return context.name + + @pytest.mark.parametrize("ret_val, expected", [ (ReturnValueConcept("some_name", True, ParserResultConcept(value=PythonNode("", None))), True), (ReturnValueConcept("some_name", True, ParserResultConcept(value="other thing")), False), @@ -57,32 +61,6 @@ def test_i_cannot_eval_simple_concept(concept): assert context.sheerka.isinstance(evaluated.value, BuiltinConcepts.NOT_FOR_ME) -# -# def test_i_can_eval_expression_that_references_concepts(): -# context = get_context() -# context.sheerka.add_in_cache(Concept("foo")) -# -# parsed = PythonParser().parse(context, "foo") -# evaluated = PythonEvaluator().eval(context, parsed) -# -# assert evaluated.status -# assert evaluated.value == Concept("foo").init_key() -# -# -# def test_i_can_eval_expression_that_references_concepts_with_body(): -# """ -# I can test expression with variables -# :return: -# """ -# context = get_context() -# context.sheerka.add_in_cache(Concept("foo", body="2")) -# -# parsed = PythonParser().parse(context, "foo") -# evaluated = PythonEvaluator().eval(context, parsed) -# -# assert evaluated.status -# assert evaluated.value == 2 - def test_i_can_eval_expression_with_that_references_concepts(): """ I can test modules with variables @@ -127,24 +105,24 @@ def test_i_can_eval_module_with_that_references_concepts_with_body(): assert evaluated.status assert evaluated.value == 2 -# -# def test_i_can_eval_concept_with_props(): -# context = get_context() -# context.sheerka.add_in_cache(Concept("foo").set_prop("prop", "'a'")) -# -# parsed = PythonParser().parse(context, "foo") -# evaluated = PythonEvaluator().eval(context, parsed) -# -# assert evaluated.status -# assert evaluated.value == Concept("foo").set_prop("prop", "a").init_key() # evaluated version of foo -# -# -# def test_i_cannot_eval_when_body_references_unknown_concept(): -# context = get_context() -# context.sheerka.add_in_cache(Concept("foo", body="bar")) -# -# parsed = PythonParser().parse(context, "foo") -# evaluated = PythonEvaluator().eval(context, parsed) -# -# assert not evaluated.status -# assert context.sheerka.isinstance(evaluated.value, BuiltinConcepts.ERROR) + +def test_i_can_eval_concept_token(): + context = get_context() + context.sheerka.add_in_cache(Concept("foo", body="2")) + + parsed = PythonParser().parse(context, "get_context_name(c:foo:)") + python_evaluator = PythonEvaluator() + python_evaluator.locals["get_context_name"] = get_context_name + evaluated = python_evaluator.eval(context, parsed) + + assert evaluated.status + assert evaluated.value == "foo" + + # sanity, to make sure that otherwise foo is resolved to '2' + parsed = PythonParser().parse(context, "get_context_name(foo)") + python_evaluator = PythonEvaluator() + python_evaluator.locals["get_context_name"] = get_context_name + evaluated = python_evaluator.eval(context, parsed) + + assert not evaluated.status + assert evaluated.body.body.args[0] == "'int' object has no attribute 'name'" diff --git a/tests/test_PythonParser.py b/tests/test_PythonParser.py index 3a15644..24db25b 100644 --- a/tests/test_PythonParser.py +++ b/tests/test_PythonParser.py @@ -55,3 +55,15 @@ def test_i_can_detect_error(): assert isinstance(res.value, ParserResultConcept) assert isinstance(res.value.value[0], PythonErrorNode) assert isinstance(res.value.value[0].exception, SyntaxError) + + +def test_i_can_parse_a_concept(): + text = "c:concept_name: + 1" + + parser = PythonParser() + res = parser.parse(get_context(), text) + + assert res + assert res.value.value == PythonNode( + "c:concept_name: + 1", + ast.parse("__C__concept_name__C__+1", mode="eval")) diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py index b09c82c..3584d2c 100644 --- a/tests/test_tokenizer.py +++ b/tests/test_tokenizer.py @@ -3,7 +3,7 @@ from core.tokenizer import Tokenizer, Token, TokenKind, LexerError, Keywords def test_i_can_tokenize(): - source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"=|&<>" + source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"=|&<>c:name:" tokens = list(Tokenizer(source)) assert tokens[0] == Token(TokenKind.PLUS, "+", 0, 1, 1) assert tokens[1] == Token(TokenKind.STAR, "*", 1, 1, 2) @@ -39,6 +39,9 @@ def test_i_can_tokenize(): assert tokens[31] == Token(TokenKind.AMPER, '&', 78, 6, 20) assert tokens[32] == Token(TokenKind.LESS, '<', 79, 6, 21) assert tokens[33] == Token(TokenKind.GREATER, '>', 80, 6, 22) + assert tokens[34] == Token(TokenKind.CONCEPT, 'name', 81, 6, 23) + + assert tokens[35] == Token(TokenKind.EOF, '', 88, 6, 30) @pytest.mark.parametrize("text, expected", [ @@ -58,15 +61,19 @@ def test_i_can_tokenize_identifiers(text, expected): assert comparison == expected -@pytest.mark.parametrize("text, error_text, index, line, column", [ - ("'string", "'string", 7, 1, 8), - ('"string', '"string', 7, 1, 8), - ('"a" + "string', '"string', 13, 1, 14), - ('"a"\n\n"string', '"string', 12, 3, 8), +@pytest.mark.parametrize("text, message, error_text, index, line, column", [ + ("'string", "Missing Trailing quote", "'string", 7, 1, 8), + ('"string', "Missing Trailing quote", '"string', 7, 1, 8), + ('"a" + "string', "Missing Trailing quote", '"string', 13, 1, 14), + ('"a"\n\n"string', "Missing Trailing quote", '"string', 12, 3, 8), + ("c::", "Context name not found", "", 2, 1, 3), + ("c:foo\nbar:", "New line is forbidden in concept name", "foo", 5, 1, 6), + ("c:foo", "Missing ending colon", "foo", 5, 1, 6) ]) -def test_i_can_detect_unfinished_strings(text, error_text, index, line, column): +def test_i_can_detect_unfinished_strings(text, message, error_text, index, line, column): with pytest.raises(LexerError) as e: list(Tokenizer(text)) + assert e.value.message == message assert e.value.text == error_text assert e.value.index == index assert e.value.line == line