Refactored Parsers. Introduced BaseCustomGrammarParser. Renamed DefaultParser into DefConceptParser

2020-10-02 04:45:47 +02:00
parent d100b7e8b3
commit e8f2705dcf
28 changed files with 1411 additions and 872 deletions
@@ -0,0 +1,227 @@
+import pytest
+from core.sheerka.services.SheerkaExecute import ParserInput
+from core.tokenizer import Keywords, Tokenizer, TokenKind
+from parsers.BaseCustomGrammarParser import BaseCustomGrammarParser, SyntaxErrorNode, KeywordNotFound
+from parsers.BaseParser import UnexpectedEofNode, UnexpectedTokenErrorNode
+
+from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
+
+
+class TestBaseCustomGrammarParser(TestUsingMemoryBasedSheerka):
+
+    @staticmethod
+    def compare_results(actual, expected, compare_str=False):
+        resolved_expected = {}
+        for k, v in expected.items():
+            tokens = list(Tokenizer(v, yield_eof=False))
+            resolved_expected[k] = [tokens[0]] + tokens[2:]
+
+        def get_better_representation(value):
+            better_repr = {}
+            for k, tokens in value.items():
+                value = "".join([t.str_value if compare_str else t.repr_value for t in tokens[1:]])
+                better_repr[k] = [tokens[0].repr_value, value]
+            return better_repr
+
+        actual_to_compare = get_better_representation(actual)
+        expected_to_compare = get_better_representation(resolved_expected)
+
+        assert actual_to_compare == expected_to_compare
+
+    def init_parser(self, text):
+        sheerka, context = self.init_concepts()
+
+        parser = BaseCustomGrammarParser("TestBaseCustomLanguageParser", 0)
+
+        parser.reset_parser(context, ParserInput(text))
+        parser.parser_input.next_token(False)  # do not skip starting whitespaces
+
+        return sheerka, context, parser
+
+    @pytest.mark.parametrize("text, expected", [
+        ("when xxx yyy", {Keywords.WHEN: "when xxx yyy"}),
+        ("when uuu vvv print xxx yyy", {Keywords.WHEN: "when uuu vvv ", Keywords.PRINT: "print xxx yyy"}),
+        ("print xxx yyy when uuu vvv", {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy "}),
+        ("  when xxx", {Keywords.WHEN: "when xxx"}),
+    ])
+    def test_i_can_get_parts(self, text, expected):
+        sheerka, context, parser = self.init_parser(text)
+
+        res = parser.get_parts(["when", "print"])
+        self.compare_results(res, expected)
+
+    def test_i_can_get_parts_when_multilines(self):
+        text = """when
+def func(x):
+    return x+1
+func(a)
+"""
+        expected = {Keywords.WHEN: "when def func(x):\n\treturn x+1\nfunc(a)\n"}
+        sheerka, context, parser = self.init_parser(text)
+
+        res = parser.get_parts(["when"])
+        self.compare_results(res, expected)
+
+    @pytest.mark.parametrize("text", [
+        "",
+        "no keyword",
+        "anything before when xxx print yyy",
+    ])
+    def test_i_cannot_get_parts_when_no_keyword_found(self, text):
+        sheerka, context, parser = self.init_parser(text)
+
+        assert parser.get_parts(["when", "print"]) is None
+        assert len(parser.error_sink) == 1
+        assert isinstance(parser.error_sink[0], KeywordNotFound)
+        assert parser.error_sink[0].keywords == ['when', 'print']
+
+    def test_i_cannot_get_part_when_the_first_expected_token_is_incorrect(self):
+        sheerka, context, parser = self.init_parser("when xxx print yyy")
+
+        assert parser.get_parts(["when", "print"], Keywords.PRINT) is None
+        assert parser.error_sink == [UnexpectedTokenErrorNode(f"'print' keyword not found.",
+                                                              "when",
+                                                              [Keywords.PRINT])]
+
+    def test_i_can_detect_when_a_keyword_appears_several_times(self):
+        sheerka, context, parser = self.init_parser("print hello when True print True")
+
+        parser.get_parts(["print"])
+        assert len(parser.error_sink) == 1
+        assert isinstance(parser.error_sink[0], SyntaxErrorNode)
+        assert parser.error_sink[0].message == "Too many 'print' declarations."
+
+    @pytest.mark.parametrize("text", [
+        "print",
+        "print ",
+        "when xxx print",
+        "when xxx print ",
+    ])
+    def test_i_can_detect_incorrect_end_of_file_after_keyword(self, text):
+        sheerka, context, parser = self.init_parser(text)
+
+        assert parser.get_parts(["print", "when"]) is not None
+        assert len(parser.error_sink) == 1
+        assert isinstance(parser.error_sink[0], UnexpectedEofNode)
+        assert parser.error_sink[0].message == "While parsing keyword 'print'."
+
+    def test_i_can_double_quoted_strings_are_expanded(self):
+        """
+        When inside a double quote, the double quote is removed and its content it used as is.
+        It allows usage of keywords withing parts
+        :return:
+        """
+        sheerka, context, parser = self.init_parser('print "when can be used" when True')
+        expected = {Keywords.PRINT: "print when can be used", Keywords.WHEN: "when True"}
+
+        res = parser.get_parts(["print", "when"])
+        self.compare_results(res, expected)
+
+    def test_single_quoted_strings_are_not_expanded(self):
+        sheerka, context, parser = self.init_parser("print 'when can be used' when True")
+        expected = {Keywords.PRINT: "print 'when can be used' ", Keywords.WHEN: "when True"}
+
+        res = parser.get_parts(["print", "when"])
+        self.compare_results(res, expected)
+
+    def test_i_can_manage_colon(self):
+        text = """when:
+    xxx
+    when
+    print
+print:
+    xxx:
+        when
+        print
+    yyy
+"""
+        sheerka, context, parser = self.init_parser(text)
+        expected = {Keywords.PRINT: "print xxx:\n    when\n    print\nyyy", Keywords.WHEN: "when xxx\nwhen\nprint"}
+
+        res = parser.get_parts(["print", "when"])
+        self.compare_results(res, expected, compare_str=True)
+
+    def test_indentation_is_normalized_when_using_colon(self):
+        text = """print:
+        xxx:
+            when
+            print
+        yyy
+        """
+        sheerka, context, parser = self.init_parser(text)
+        expected = {Keywords.PRINT: "print xxx:\n    when\n    print\nyyy"}
+
+        res = parser.get_parts(["print", "when"])
+        self.compare_results(res, expected, compare_str=True)
+
+    def test_i_can_mix_parts_with_colon_and_parts_without_colon(self):
+        text = """when:
+            xxx
+            when
+            print
+print xxx"""
+        sheerka, context, parser = self.init_parser(text)
+        expected = {Keywords.PRINT: "print xxx", Keywords.WHEN: "when xxx\nwhen\nprint"}
+
+        res = parser.get_parts(["print", "when"])
+        self.compare_results(res, expected, compare_str=True)
+
+    @pytest.mark.parametrize("text", [
+        "when:\nx x",
+        "when:  \nx x",
+    ])
+    def test_i_cannot_manage_colon_when_tab_is_missing(self, text):
+        sheerka, context, parser = self.init_parser(text)
+
+        assert parser.get_parts(["when"])
+        assert parser.error_sink == [UnexpectedTokenErrorNode("Indentation not found.", "x", [TokenKind.WHITESPACE])]
+
+    @pytest.mark.parametrize("text", [
+        "",
+        "\n",
+        "  \n",
+        "x",  # less than two characters
+        "\n\t"
+    ])
+    def test_i_cannot_get_body_when_body_is_too_short(self, text):
+        sheerka, context, parser = self.init_parser("")
+
+        assert parser.get_body(list(Tokenizer(text, yield_eof=False))) is None
+        assert parser.error_sink == [SyntaxErrorNode(None, "Body is empty or too short.")]
+
+    def test_a_new_line_is_expected_when_get_body(self):
+        sheerka, context, parser = self.init_parser("")
+
+        assert parser.get_body(list(Tokenizer("not a newline", yield_eof=False))) is None
+        assert parser.error_sink == [UnexpectedTokenErrorNode("New line not found.", "not", [TokenKind.NEWLINE])]
+
+    @pytest.mark.parametrize("text", [
+        "\nx x",
+        "  \nx x",
+    ])
+    def test_tab_is_mandatory_after_new_line_when_get_body(self, text):
+        sheerka, context, parser = self.init_parser("")
+
+        assert parser.get_body(list(Tokenizer(text, yield_eof=False))) is None
+        assert parser.error_sink == [UnexpectedTokenErrorNode("Indentation not found.", "x", [TokenKind.WHITESPACE])]
+
+    def test_i_can_detect_missing_tab_when_get_body(self):
+        text = "\n\txxx\n\tyyy\nzzz"
+
+        sheerka, context, parser = self.init_parser("")
+        assert parser.get_body(list(Tokenizer(text, yield_eof=False))) is None
+        assert parser.error_sink == [UnexpectedTokenErrorNode("Indentation not found.", "zzz", [TokenKind.WHITESPACE])]
+
+    def test_i_can_detect_invalid_indentation_when_get_body(self):
+        sheerka, context, parser = self.init_parser("")
+        assert parser.get_body(list(Tokenizer("\n\t\txxx\n\tyyy", yield_eof=False))) is None
+        assert parser.error_sink == [SyntaxErrorNode(None, "Invalid indentation.")]
+
+    def test_i_can_get_body(self):
+        sheerka, context, parser = self.init_parser("")
+        res = parser.get_body(list(Tokenizer("\n\txxx\n\tyyyy", yield_eof=False)))
+        expected = list(Tokenizer("xxx\n yyyy", yield_eof=False))
+        expected[2].value = ""
+
+        assert [t.repr_value for t in res] == [t.repr_value for t in expected]
+        assert parser.error_sink == []
@@ -33,7 +33,7 @@ def update_concepts_ids(sheerka, parsing_expression):
        update_concepts_ids(sheerka, pe)


-eof_token = Token(TokenKind.EOF, "", 0, 0, 0)
+eof_token = "<EOF>"


 class TestBnfParser(TestUsingMemoryBasedSheerka):
@@ -7,10 +7,11 @@ from core.concept import DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF, Concept, CV
 from core.sheerka.services.SheerkaExecute import ParserInput
 from core.tokenizer import Keywords, Tokenizer, LexerError
 from parsers.BaseNodeParser import SCWC
+from parsers.BaseParser import NotInitializedNode, UnexpectedEofNode
 from parsers.BnfNodeParser import OrderedChoice, ConceptExpression, StrMatch, Sequence
 from parsers.BnfParser import BnfParser
-from parsers.DefaultParser import DefaultParser, NameNode, SyntaxErrorNode, CannotHandleErrorNode
-from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode
+from parsers.DefConceptParser import DefConceptParser, NameNode, SyntaxErrorNode
+from parsers.DefConceptParser import UnexpectedTokenErrorNode, DefConceptNode
 from parsers.FunctionParser import FunctionParser
 from parsers.PythonParser import PythonParser, PythonNode

@@ -48,7 +49,7 @@ def get_concept_part(part):
    if isinstance(part, str):
        node = PythonNode(part.strip(), ast.parse(part.strip(), mode="eval"))
        return ReturnValueConcept(
-            who="parsers.Default",
+            who="parsers.DefConcept",
            status=True,
            value=ParserResultConcept(
                source=part,
@@ -59,7 +60,7 @@ def get_concept_part(part):
        # node = PythonNode(part.strip(), ast.parse(part.strip(), mode="eval"))
        nodes = compute_expected_array({}, part.source, [SCWC(part.first, part.last, *part.content)])
        return ReturnValueConcept(
-            who="parsers.Default",
+            who="parsers.DefConcept",
            status=True,
            value=ParserResultConcept(
                source=part.source,
@@ -70,7 +71,7 @@ def get_concept_part(part):
    if isinstance(part, PN):
        node = PythonNode(part.source.strip(), ast.parse(part.source.strip(), mode=part.mode))
        return ReturnValueConcept(
-            who="parsers.Default",
+            who="parsers.DefConcept",
            status=True,
            value=ParserResultConcept(
                source=part.source,
@@ -79,7 +80,7 @@ def get_concept_part(part):

    if isinstance(part, PythonNode):
        return ReturnValueConcept(
-            who="parsers.Default",
+            who="parsers.DefConcept",
            status=True,
            value=ParserResultConcept(
                source=part.source,
@@ -110,13 +111,26 @@ class FN:
    content: list


-class TestDefaultParser(TestUsingMemoryBasedSheerka):
+class TestDefConceptParser(TestUsingMemoryBasedSheerka):

    def init_parser(self, *concepts):
        sheerka, context, *updated = self.init_concepts(*concepts, singleton=True)
-        parser = DefaultParser()
+        parser = DefConceptParser()
        return sheerka, context, parser, *updated

+    @pytest.mark.parametrize("text, error", [
+        ("concept", UnexpectedTokenErrorNode("'def' keyword not found.", "concept", [Keywords.DEF])),
+        ("hello word", UnexpectedTokenErrorNode("'def' keyword not found.", "hello", [Keywords.DEF])),
+        ("def hello", UnexpectedTokenErrorNode("'concept' keyword not found.", "hello", [Keywords.CONCEPT])),
+    ])
+    def test_i_can_detect_not_for_me(self, text, error):
+        sheerka, context, parser, *concepts = self.init_parser()
+        res = parser.parse(context, ParserInput(text))
+
+        assert not res.status
+        assert context.sheerka.isinstance(res.value, BuiltinConcepts.NOT_FOR_ME)
+        assert res.value.reason == [error]
+
    @pytest.mark.parametrize("text, expected", [
        ("def concept hello", get_def_concept(name="hello")),
        ("def concept hello  ", get_def_concept(name="hello")),
@@ -124,13 +138,11 @@ class TestDefaultParser(TestUsingMemoryBasedSheerka):
        ("def concept a+b", get_def_concept(name="a + b")),
        ("def concept 'a+b'+c", get_def_concept(name="'a+b' + c")),
        ("def concept 'as if'", get_def_concept(name="'as if'")),
-        ("def concept 'as' if", get_def_concept(name="'as if'")),
-        ("def concept hello as 'hello'", get_def_concept(name="hello", body="'hello'")),
-        ("def concept hello as 1", get_def_concept(name="hello", body="1")),
-        ("def concept hello as 1 + 1", get_def_concept(name="hello", body="1 + 1")),
+        ("def concept 'as' if", get_def_concept(name="'as' if")),
+        ('def concept "as if"', get_def_concept(name="as if")),
    ])
-    def test_i_can_parse_def_concept(self, text, expected):
-        sheerka, context, parser = self.init_parser()
+    def test_i_can_parse_def_concept_name(self, text, expected):
+        sheerka, context, parser, *concepts = self.init_parser()
        res = parser.parse(context, ParserInput(text))
        node = res.value.value

@@ -140,6 +152,113 @@ class TestDefaultParser(TestUsingMemoryBasedSheerka):
        assert isinstance(res.value, ParserResultConcept)
        assert node == expected

+    def test_name_is_mandatory(self):
+        text = "def concept as 'hello'"
+
+        sheerka, context, parser, *concepts = self.init_parser()
+        res = parser.parse(context, ParserInput(text))
+        return_value = res.value
+
+        assert not res.status
+        assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
+        assert isinstance(return_value.body[0], SyntaxErrorNode)
+        assert return_value.body[0].message == "Name is mandatory"
+
+    @pytest.mark.parametrize("text", [
+        "def concept hello\nmy friend",
+        "def concept hello \nmy friend",
+        "def concept hello\n my friend",
+        "def concept hello \n my friend",
+        "def concept hello from hello\nmy friend",
+        "def concept hello from def hello\nmy friend",
+        "def concept hello from bnf hello\nmy friend",
+        "def concept hello from:\n\thello\nmy friend",
+        "def concept hello from def:\n\thello\nmy friend",
+        "def concept hello from bnf:\n\thello\nmy friend",
+    ])
+    def test_new_line_is_not_allowed_in_the_name(self, text):
+        text = "def concept hello \n my friend as 'hello'"
+
+        sheerka, context, parser, *concepts = self.init_parser()
+        res = parser.parse(context, ParserInput(text))
+        return_value = res.value
+
+        assert not res.status
+        assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
+        assert return_value.body == [SyntaxErrorNode(None, "Newline are not allowed in name.")]
+
+    def test_concept_keyword_is_mandatory_but_the_concept_is_recognized(self):
+        text = "def hello as a where b pre c post d"
+
+        sheerka, context, parser, *concepts = self.init_parser()
+        res = parser.parse(context, ParserInput(text))
+        return_value = res.value
+
+        assert not res.status
+        assert sheerka.isinstance(return_value, BuiltinConcepts.NOT_FOR_ME)
+        assert isinstance(return_value.reason[0], UnexpectedTokenErrorNode)
+        assert return_value.reason[0].message == "'concept' keyword not found."
+        assert return_value.reason[0].expected_tokens == [Keywords.CONCEPT]
+        assert return_value.reason[0].token.value == "hello"
+
+    def test_i_can_detect_empty_declaration(self):
+        sheerka, context, parser, *concepts = self.init_parser()
+        text = "def concept foo as where True"
+        res = parser.parse(context, ParserInput(text))
+        error = res.body.body[0]
+
+        assert not res.status
+        assert sheerka.isinstance(res.value, BuiltinConcepts.ERROR)
+        assert isinstance(error, SyntaxErrorNode)
+        assert error.message == "Empty 'as' declaration."
+
+    def test_empty_parts_are_not_initialized(self):
+        sheerka, context, parser, *concepts = self.init_parser()
+        text = "def concept foo"
+        res = parser.parse(context, ParserInput(text))
+        parser_result = res.body
+        node = res.body.body
+
+        assert res.status
+        assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
+        assert isinstance(node, DefConceptNode)
+        assert node.body == NotInitializedNode()
+        assert node.where == NotInitializedNode()
+        assert node.pre == NotInitializedNode()
+        assert node.post == NotInitializedNode()
+        assert node.ret == NotInitializedNode()
+
+    @pytest.mark.parametrize("part", [
+        "as",
+        "pre",
+        "post",
+        "ret",
+        "where"
+    ])
+    def test_i_can_parse_def_concept_parts(self, part):
+        sheerka, context, parser, *concepts = self.init_parser()
+        text = "def concept foo " + part + " True"
+        res = parser.parse(context, ParserInput(text))
+        node = res.value.value
+
+        assert res.status
+        assert res.who == parser.name
+        assert res.value.source == text
+        assert isinstance(res.value, ParserResultConcept)
+
+        part_mapping = "body" if part == "as" else part
+        args = {part_mapping: get_concept_part("True")}
+        expected = get_def_concept("foo", **args)
+        assert node == expected
+
+    def test_i_can_detect_error_in_declaration(self):
+        sheerka, context, parser, *concepts = self.init_parser()
+        res = parser.parse(context, ParserInput("def concept hello where 1+"))
+        return_value = res.value
+
+        assert not res.status
+        assert sheerka.isinstance(return_value, BuiltinConcepts.TOO_MANY_ERRORS)
+
    def test_i_can_parse_complex_def_concept_statement(self):
        text = """def concept a mult b
 where a,b
@@ -148,7 +267,7 @@ post isinstance(res, a)
 as res = a * b
 ret a if isinstance(a, Concept) else self
 """
-        sheerka, context, parser = self.init_parser()
+        sheerka, context, parser, *concepts = self.init_parser()
        res = parser.parse(context, ParserInput(text))
        return_value = res.value
        expected_concept = get_def_concept(
@@ -177,7 +296,7 @@ func(a)
            body=PN("def func(x):\n    return x+1\nfunc(a)\n", "exec")
        )

-        sheerka, context, parser = self.init_parser()
+        sheerka, context, parser, *concepts = self.init_parser()
        res = parser.parse(context, ParserInput(text))
        return_value = res.value

@@ -199,7 +318,7 @@ def concept add one to a as:
                ast.parse("def func(x):\n    return x+1\nfunc(a)", mode="exec"))
        )

-        sheerka, context, parser = self.init_parser()
+        sheerka, context, parser, *concepts = self.init_parser()
        res = parser.parse(context, ParserInput(text))
        return_value = res.value

@@ -208,156 +327,17 @@ def concept add one to a as:
        assert return_value.value == expected_concept

    @pytest.mark.parametrize("text", [
-        "def concept foo as:\npass",
-        "def concept foo where:\npass",
-        "def concept foo pre:\npass",
-        "def concept foo post:\npass",
-        "def concept foo from:\nanother definition",
-        "def concept foo from def:\nanother definition",
-        "def concept foo from bnf:\n'another' 'definition'",
+        "def concept name from bnf",
+        "def concept name from bnf ",
+        "def concept name from bnf as True",
    ])
-    def test_indentation_is_mandatory_after_a_colon(self, text):
-        sheerka, context, parser = self.init_parser()
+    def test_i_cannot_parse_empty_bnf_definition(self, text):
+        sheerka, context, parser, *concepts = self.init_parser()
        res = parser.parse(context, ParserInput(text))
-        return_value = res.value
-
+        error = res.body
        assert not res.status
-        assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
-        assert isinstance(return_value.body[0], SyntaxErrorNode)
-        assert return_value.body[0].message == "Indentation not found."
-
-    @pytest.mark.parametrize("text", [
-        "def concept plus from:\n\ta plus b",
-        "def concept plus from def:\n\ta plus b",
-
-        # space before the colon
-        "def concept plus from :\n\ta plus b",
-        "def concept plus from def :\n\ta plus b",
-
-        # space after the colon
-        "def concept plus from: \n\ta plus b",
-        "def concept plus from def: \n\ta plus b",
-    ])
-    def test_i_can_use_colon_and_definition_together(self, text):
-        sheerka, context, parser = self.init_parser()
-        res = parser.parse(context, ParserInput(text))
-        defined_concept = res.body.body
-        defined_concept_tokens = [t.repr_value for t in defined_concept.definition.tokens]
-
-        assert res.status
-        assert defined_concept.definition_type == DEFINITION_TYPE_DEF
-        assert defined_concept_tokens == [t.repr_value for t in Tokenizer("a plus b", yield_eof=False)]
-
-    @pytest.mark.parametrize("text", [
-        "def concept plus from bnf:\n\t'a' 'plus' 'b'",
-        "def concept plus from bnf :\n\t'a' 'plus' 'b'",
-        "def concept plus from bnf: \n\t'a' 'plus' 'b'",
-    ])
-    def test_i_can_use_colon_and_bnf_definition_together(self, text):
-        sheerka, context, parser = self.init_parser()
-        res = parser.parse(context, ParserInput(text))
-        defined_concept = res.body.body
-
-        assert res.status
-        assert defined_concept.definition.status
-        assert defined_concept.definition.body.body == Sequence(StrMatch("a"), StrMatch("plus"), StrMatch("b"))
-
-    def test_i_can_use_colon_to_protect_keyword(self):
-        text = """
-def concept today as:
-    from datetime import date
-    today = date.today()
-from:
-    give me the date !
-"""
-        sheerka, context, parser = self.init_parser()
-        res = parser.parse(context, ParserInput(text))
-        defined_concept = res.body.body
-        defined_concept_tokens = [t.repr_value for t in defined_concept.definition.tokens]
-
-        assert res.status
-        assert defined_concept.definition_type == DEFINITION_TYPE_DEF
-        assert defined_concept_tokens == [t.repr_value for t in Tokenizer("give me the date !", yield_eof=False)]
-        assert defined_concept.body.status
-
-    def test_i_can_use_colon_to_protect_keyword_2(self):
-        text = """
-def concept today as:
-    from datetime import date
-    today = date.today()
-from give me the date !
-"""
-        sheerka, context, parser = self.init_parser()
-        res = parser.parse(context, ParserInput(text))
-        defined_concept = res.body.body
-        defined_concept_tokens = [t.repr_value for t in defined_concept.definition.tokens]
-
-        assert res.status
-        assert defined_concept.definition_type == DEFINITION_TYPE_DEF
-        assert defined_concept_tokens == [t.repr_value for t in Tokenizer("give me the date !", yield_eof=False)]
-        assert defined_concept.body.status
-
-    def test_name_is_mandatory(self):
-        text = "def concept as 'hello'"
-
-        sheerka, context, parser = self.init_parser()
-        res = parser.parse(context, ParserInput(text))
-        return_value = res.value
-
-        assert not res.status
-        assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
-        assert isinstance(return_value.body[0], SyntaxErrorNode)
-        assert return_value.body[0].message == "Name is mandatory"
-
-    def test_concept_keyword_is_mandatory_but_the_concept_is_recognized(self):
-        text = "def hello as a where b pre c post d"
-
-        sheerka, context, parser = self.init_parser()
-        res = parser.parse(context, ParserInput(text))
-        return_value = res.value
-
-        assert not res.status
-        assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
-        assert isinstance(return_value.body[0], UnexpectedTokenErrorNode)
-        assert return_value.body[0].message == "Syntax error."
-        assert return_value.body[0].expected_tokens == [Keywords.CONCEPT]
-
-    @pytest.mark.parametrize("text", [
-        "def concept hello where 1+",
-        "def concept hello pre 1+",
-        "def concept hello post 1+",
-        "def concept hello as 1+"
-    ])
-    def test_i_can_detect_error_in_declaration(self, text):
-        sheerka, context, parser = self.init_parser()
-        res = parser.parse(context, ParserInput(text))
-        return_value = res.value
-
-        assert not res.status
-        assert sheerka.isinstance(return_value, BuiltinConcepts.TOO_MANY_ERRORS)
-
-    @pytest.mark.parametrize("text", [
-        "def concept hello\nmy friend",
-        "def concept hello \nmy friend",
-        "def concept hello\n my friend",
-        "def concept hello \n my friend",
-        "def concept hello from hello\nmy friend",
-        "def concept hello from def hello\nmy friend",
-        "def concept hello from bnf hello\nmy friend",
-        "def concept hello from:\n\thello\nmy friend",
-        "def concept hello from def:\n\thello\nmy friend",
-        "def concept hello from bnf:\n\thello\nmy friend",
-    ])
-    def test_new_line_is_not_allowed_in_the_name(self, text):
-        text = "def concept hello \n my friend as 'hello'"
-
-        sheerka, context, parser = self.init_parser()
-        res = parser.parse(context, ParserInput(text))
-        return_value = res.value
-
-        assert not res.status
-        assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
-        assert return_value.body == [SyntaxErrorNode([], "Newline are not allowed in name.")]
+        assert sheerka.isinstance(error, BuiltinConcepts.ERROR)
+        assert error.body == [SyntaxErrorNode([], "Empty 'bnf' declaration")]

    def test_i_can_parse_def_concept_from_bnf(self):
        text = "def concept name from bnf a_concept | 'a_string' as __definition[0]"
@@ -383,35 +363,40 @@ from give me the date !
        assert not parser.has_error

    @pytest.mark.parametrize("text", [
-        'def concept "def concept x"',
-        'def concept "def concept x" as x',
+        "def concept plus from bnf:\n\t'a' 'plus' 'b'",
+        "def concept plus from bnf :\n\t'a' 'plus' 'b'",
+        "def concept plus from bnf: \n\t'a' 'plus' 'b'",
    ])
-    def test_i_can_use_double_quotes_to_protect_keywords(self, text):
-        sheerka, context, parser = self.init_parser()
+    def test_i_can_use_colon_and_bnf_definition_together(self, text):
+        sheerka, context, parser, *concepts = self.init_parser()
        res = parser.parse(context, ParserInput(text))
-        concept_defined = res.value.value
+        defined_concept = res.body.body

        assert res.status
-        assert concept_defined.name.tokens == list(Tokenizer("def concept x", yield_eof=False))
+        assert defined_concept.definition.status
+        assert defined_concept.definition.body.body == Sequence(StrMatch("a"), StrMatch("plus"), StrMatch("b"))

-    @pytest.mark.parametrize("text", [
-        "def concept name from bnf as here is my body",
-        "def concept name from def as here is my body",
-        "def concept name from as here is my body"
+    @pytest.mark.parametrize("text, error", [
+        ("def concept name from def as True", SyntaxErrorNode([], "Empty 'from' declaration.")),
+        ("def concept name from def", SyntaxErrorNode([], "Empty 'from' declaration.")),
+        ("def concept name from def ", SyntaxErrorNode([], "Empty 'from' declaration.")),
+        ("def concept name from as True", SyntaxErrorNode([], "Empty 'from' declaration.")),
+        ("def concept name from", UnexpectedEofNode("While parsing keyword 'from'.")),
+        ("def concept name from ", UnexpectedEofNode("While parsing keyword 'from'.")),
    ])
-    def test_i_can_detect_empty_bnf_declaration(self, text):
-        sheerka, context, parser = self.init_parser()
+    def test_i_can_detect_empty_def_declaration(self, text, error):
+        sheerka, context, parser, *concepts = self.init_parser()
        res = parser.parse(context, ParserInput(text))

        assert not res.status
        assert sheerka.isinstance(res.value, BuiltinConcepts.ERROR)
-        assert res.value.body[0] == SyntaxErrorNode([], "Empty declaration")
+        assert res.value.body[0] == error

    @pytest.mark.parametrize("text", [
        "def concept addition from a plus b as a + b",
        "def concept addition from def a plus b as a + b"])
    def test_i_can_def_concept_from_definition(self, text):
-        sheerka, context, parser = self.init_parser()
+        sheerka, context, parser, *concepts = self.init_parser()
        res = parser.parse(context, ParserInput(text))
        expected = get_def_concept("addition", definition="a plus b", body="a + b")
        node = res.value.value
@@ -422,6 +407,114 @@ from give me the date !
        assert isinstance(res.value, ParserResultConcept)
        assert node == expected

+    @pytest.mark.parametrize("text", [
+        "def concept plus from:\n\ta plus b",
+        "def concept plus from def:\n\ta plus b",
+
+        # space before the colon
+        "def concept plus from :\n\ta plus b",
+        "def concept plus from def :\n\ta plus b",
+
+        # space after the colon
+        "def concept plus from: \n\ta plus b",
+        "def concept plus from def: \n\ta plus b",
+    ])
+    def test_i_can_use_colon_and_definition_together(self, text):
+        sheerka, context, parser, *concepts = self.init_parser()
+        res = parser.parse(context, ParserInput(text))
+        defined_concept = res.body.body
+        defined_concept_tokens = [t.repr_value for t in defined_concept.definition.tokens]
+
+        assert res.status
+        assert defined_concept.definition_type == DEFINITION_TYPE_DEF
+        assert defined_concept_tokens == [t.repr_value for t in Tokenizer("a plus b", yield_eof=False)]
+
+    def test_i_can_use_colon_to_protect_keyword(self):
+        text = """
+def concept today as:
+    from datetime import date
+    today = date.today()
+from:
+    give me the date !
+"""
+        sheerka, context, parser, *concepts = self.init_parser()
+        res = parser.parse(context, ParserInput(text))
+        defined_concept = res.body.body
+        defined_concept_tokens = [t.repr_value for t in defined_concept.definition.tokens]
+
+        assert res.status
+        assert defined_concept.definition_type == DEFINITION_TYPE_DEF
+        assert defined_concept_tokens == [t.repr_value for t in Tokenizer("give me the date !", yield_eof=False)]
+        assert defined_concept.body.status
+
+    def test_i_can_use_colon_to_protect_keyword_2(self):
+        text = """
+def concept today as:
+    from datetime import date
+    today = date.today()
+from give me the date !
+"""
+        sheerka, context, parser, *concepts = self.init_parser()
+        res = parser.parse(context, ParserInput(text))
+        defined_concept = res.body.body
+        defined_concept_tokens = [t.repr_value for t in defined_concept.definition.tokens]
+
+        assert res.status
+        assert defined_concept.definition_type == DEFINITION_TYPE_DEF
+        assert defined_concept_tokens == [t.repr_value for t in Tokenizer("give me the date !", yield_eof=False)]
+        assert defined_concept.body.status
+
+    @pytest.mark.parametrize("text", [
+        "def",
+        "def concept_name"
+    ])
+    def test_i_cannot_parse_invalid_entries(self, text):
+        sheerka, context, parser, *concepts = self.init_parser()
+        res = parser.parse(context, ParserInput(text))
+
+        assert not res.status
+        assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
+        assert isinstance(res.body.reason[0], UnexpectedTokenErrorNode)
+
+    @pytest.mark.parametrize("text, error_msg, error_text", [
+        ("'name", "Missing Trailing quote", "'name"),
+        ("foo isa 'name", "Missing Trailing quote", "'name"),
+        ("def concept 'name", "Missing Trailing quote", "'name"),
+        ("def concept name as 'body", "Missing Trailing quote", "'body"),
+        ("def concept name from bnf 'expression", "Missing Trailing quote", "'expression"),
+        ("def concept c::", "Concept identifiers not found", ""),
+    ])
+    def test_i_cannot_parse_when_tokenizer_fails(self, text, error_msg, error_text):
+        sheerka, context, parser, *concepts = self.init_parser()
+        res = parser.parse(context, ParserInput(text))
+
+        assert not res.status
+        assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
+        assert isinstance(res.body.body[0], LexerError)
+        assert res.body.body[0].message == error_msg
+        assert res.body.body[0].text == error_text
+
+    def test_i_cannot_parse_bnf_definition_referencing_unknown_concept(self):
+        text = "def concept name from bnf unknown"
+        sheerka, context, parser, *concepts = self.init_parser()
+        res = parser.parse(context, ParserInput(text))
+
+        assert not res.status
+        assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT)
+        assert res.value.body == ("key", "unknown")
+
+    @pytest.mark.parametrize("text", [
+        'def concept "def concept x"',
+        'def concept "def concept x" as x',
+    ])
+    def test_i_can_use_double_quotes_to_protect_keywords(self, text):
+        sheerka, context, parser, *concepts = self.init_parser()
+        res = parser.parse(context, ParserInput(text))
+        concept_defined = res.value.value
+
+        assert res.status
+        assert concept_defined.name.tokens == list(Tokenizer("def concept x", yield_eof=False))
+
    def test_i_can_parse_when_ambiguity_in_where_pre_clause(self):
        sheerka, context, parser, *concepts = self.init_parser(
            Concept("x is a y", pre="in_context(BuiltinConcepts.EVAL_QUESTION_REQUESTED)"),
@@ -430,7 +523,7 @@ from give me the date !

        text = "def concept foo x y where x is a y"
        res = parser.parse(context, ParserInput(text))
-        expected_body = self.pretval(CV(concepts[0], pre=True), source="x is a y", who="parsers.Default",
+        expected_body = self.pretval(CV(concepts[0], pre=True), source="x is a y", who="parsers.DefConcept",
                                     parser="parsers.ExactConcept")
        expected = get_def_concept("foo x y", where=expected_body)
        node = res.value.value
@@ -443,7 +536,7 @@ from give me the date !

        text = "def concept foo x y pre x is a y"
        res = parser.parse(context, ParserInput(text))
-        expected_body = self.pretval(CV(concepts[0], pre=True), source="x is a y", who="parsers.Default",
+        expected_body = self.pretval(CV(concepts[0], pre=True), source="x is a y", who="parsers.DefConcept",
                                     parser="parsers.ExactConcept")
        expected = get_def_concept("foo x y", pre=expected_body)
        node = res.value.value
@@ -454,63 +547,5 @@ from give me the date !
        assert isinstance(res.value, ParserResultConcept)
        assert node == expected

-    def test_i_can_detect_not_for_me(self):
-        text = "hello world"
-        sheerka, context, parser = self.init_parser()
-        res = parser.parse(context, ParserInput(text))

-        assert not res.status
-        assert context.sheerka.isinstance(res.value, BuiltinConcepts.NOT_FOR_ME)
-        assert isinstance(res.value.body[0], CannotHandleErrorNode)

-    @pytest.mark.parametrize("text", [
-        "def",
-        "def concept_name"
-    ])
-    def test_i_cannot_parse_invalid_entries(self, text):
-        sheerka, context, parser = self.init_parser()
-        res = parser.parse(context, ParserInput(text))
-
-        assert not res.status
-        assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
-        assert isinstance(res.body.body[0], UnexpectedTokenErrorNode)
-
-    @pytest.mark.parametrize("text", [
-        "concept",
-        "isa number",
-        "name isa",
-    ])
-    def test_i_cannot_parse_not_for_me_entries(self, text):
-        sheerka, context, parser = self.init_parser()
-        res = parser.parse(context, ParserInput(text))
-
-        assert not res.status
-        assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
-        assert isinstance(res.body.body[0], CannotHandleErrorNode)
-
-    @pytest.mark.parametrize("text, error_msg, error_text", [
-        ("'name", "Missing Trailing quote", "'name"),
-        ("foo isa 'name", "Missing Trailing quote", "'name"),
-        ("def concept 'name", "Missing Trailing quote", "'name"),
-        ("def concept name as 'body", "Missing Trailing quote", "'body"),
-        ("def concept name from bnf 'expression", "Missing Trailing quote", "'expression"),
-        ("def concept c::", "Concept identifiers not found", ""),
-    ])
-    def test_i_cannot_parse_when_tokenizer_fails(self, text, error_msg, error_text):
-        sheerka, context, parser = self.init_parser()
-        res = parser.parse(context, ParserInput(text))
-
-        assert not res.status
-        assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
-        assert isinstance(res.body.body[0], LexerError)
-        assert res.body.body[0].message == error_msg
-        assert res.body.body[0].text == error_text
-
-    def test_i_cannot_parse_bnf_definition_referencing_unknown_concept(self):
-        text = "def concept name from bnf unknown"
-        sheerka, context, parser = self.init_parser()
-        res = parser.parse(context, ParserInput(text))
-
-        assert not res.status
-        assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT)
-        assert res.value.body == ("key", "unknown")
@@ -5,7 +5,7 @@ from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
 from core.concept import Concept
 from core.sheerka.services.SheerkaExecute import ParserInput
 from core.tokenizer import Tokenizer, TokenKind
-from parsers.BaseParser import UnexpectedEof, UnexpectedTokenErrorNode
+from parsers.BaseParser import UnexpectedEofNode, UnexpectedTokenErrorNode
 from parsers.ExpressionParser import PropertyEqualsNode, PropertyEqualsSequenceNode, PropertyContainsNode, AndNode, \
    OrNode, NotNode, LambdaNode, IsaNode, NameExprNode, ExpressionParser, LeftPartNotFoundError, TrueifyVisitor

@@ -33,14 +33,14 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):

    @pytest.mark.parametrize("expression, expected", [
        ("one complicated expression", n("one complicated expression")),
-        ("function_call(a,b,c)", n("function_call(a,b,c)")),
-        ("one expression or another expression", OrNode(n("one expression"), n("another expression"))),
-        ("one expression and another expression", AndNode(n("one expression"), n("another expression"))),
-        ("one or two or three", OrNode(n("one"), n("two"), n("three"))),
-        ("one and two and three", AndNode(n("one"), n("two"), n("three"))),
-        ("one or two and three", OrNode(n("one"), AndNode(n("two"), n("three")))),
-        ("one and two or three", OrNode(AndNode(n("one"), n("two")), n("three"))),
-        ("one and (two or three)", AndNode(n("one"), OrNode(n("two"), n("three")))),
+        # ("function_call(a,b,c)", n("function_call(a,b,c)")),
+        # ("one expression or another expression", OrNode(n("one expression"), n("another expression"))),
+        # ("one expression and another expression", AndNode(n("one expression"), n("another expression"))),
+        # ("one or two or three", OrNode(n("one"), n("two"), n("three"))),
+        # ("one and two and three", AndNode(n("one"), n("two"), n("three"))),
+        # ("one or two and three", OrNode(n("one"), AndNode(n("two"), n("three")))),
+        # ("one and two or three", OrNode(AndNode(n("one"), n("two")), n("three"))),
+        # ("one and (two or three)", AndNode(n("one"), OrNode(n("two"), n("three")))),
    ])
    def test_i_can_parse_expression(self, expression, expected):
        sheerka, context, parser = self.init_parser()
@@ -54,12 +54,12 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
        assert expressions == expected

    @pytest.mark.parametrize("expression, expected_errors", [
-        ("one or", [UnexpectedEof("When parsing 'or'")]),
-        ("one and", [UnexpectedEof("When parsing 'and'")]),
+        ("one or", [UnexpectedEofNode("When parsing 'or'")]),
+        ("one and", [UnexpectedEofNode("When parsing 'and'")]),
        ("and one", [LeftPartNotFoundError()]),
        ("or one", [LeftPartNotFoundError()]),
-        ("or", [LeftPartNotFoundError(), UnexpectedEof("When parsing 'or'")]),
-        ("and", [LeftPartNotFoundError(), UnexpectedEof("When parsing 'and'")]),
+        ("or", [LeftPartNotFoundError(), UnexpectedEofNode("When parsing 'or'")]),
+        ("and", [LeftPartNotFoundError(), UnexpectedEofNode("When parsing 'and'")]),
    ])
    def test_i_can_detect_error(self, expression, expected_errors):
        sheerka, context, parser = self.init_parser()
@@ -74,17 +74,17 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):

        res = parser.parse(context, ParserInput("("))
        assert not res.status
-        assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
-        assert isinstance(res.body.body[0], UnexpectedTokenErrorNode)
-        assert res.body.body[0].token.type == TokenKind.EOF
-        assert res.body.body[0].expected_tokens == [TokenKind.RPAR]
+        assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
+        assert isinstance(res.body.reason[0], UnexpectedTokenErrorNode)
+        assert res.body.reason[0].token.type == TokenKind.EOF
+        assert res.body.reason[0].expected_tokens == [TokenKind.RPAR]

        res = parser.parse(context, ParserInput(")"))
        assert not res.status
-        assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
-        assert isinstance(res.body.body[0], UnexpectedTokenErrorNode)
-        assert res.body.body[0].token.type == TokenKind.RPAR
-        assert res.body.body[0].expected_tokens == []
+        assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
+        assert isinstance(res.body.reason[0], UnexpectedTokenErrorNode)
+        assert res.body.reason[0].token.type == TokenKind.RPAR
+        assert res.body.reason[0].expected_tokens == []

        res = parser.parse(context, ParserInput("one and two)"))
        assert not res.status
@@ -0,0 +1,71 @@
+import pytest
+from core.builtin_concepts import BuiltinConcepts
+from core.sheerka.services.SheerkaExecute import ParserInput
+from parsers.BaseCustomGrammarParser import KeywordNotFound
+from parsers.FormatRuleParser import FormatRuleParser, FormatAstRawText, FormatRuleNode
+
+from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
+
+cmap = {}
+
+
+class TestFormatRuleParser(TestUsingMemoryBasedSheerka):
+    sheerka = None
+
+    @classmethod
+    def setup_class(cls):
+        t = cls()
+        cls.sheerka, context, _ = t.init_parser(cmap)
+
+    def init_parser(self, concepts_map=None):
+        if concepts_map is not None:
+            sheerka, context, *concepts = self.init_concepts(*concepts_map.values(), create_new=True)
+        else:
+            sheerka = TestFormatRuleParser.sheerka
+            context = self.get_context(sheerka)
+
+        parser = FormatRuleParser()
+        return sheerka, context, parser
+
+    def test_i_can_detect_empty_expression(self):
+        sheerka, context, parser = self.init_parser()
+        res = parser.parse(context, ParserInput(""))
+
+        assert not res.status
+        assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY)
+
+    def test_input_must_be_a_parser_input(self):
+        sheerka, context, parser = self.init_parser()
+        parser.parse(context, "not a parser input") is None
+
+    def test_i_can_parse_a_simple_rule(self):
+        sheerka, context, parser = self.init_parser()
+
+        text = "when isinstance(last_value(), Concept) print hello world!"
+        res = parser.parse(context, ParserInput(text))
+        parser_result = res.body
+        format_rule = res.body.body
+        rule = format_rule.rule
+        format_ast = format_rule.format_ast
+
+        assert res.status
+        assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
+        assert isinstance(format_rule, FormatRuleNode)
+
+        assert sheerka.isinstance(rule, BuiltinConcepts.RETURN_VALUE)
+        assert format_ast == FormatAstRawText("hello world!")
+
+    @pytest.mark.parametrize("text, error", [
+        ("hello world", [KeywordNotFound(None, keywords=['when', 'print'])]),
+        ("when True", [KeywordNotFound([], keywords=['print'])]),
+        ("print True", [KeywordNotFound([], keywords=['when'])]),
+    ])
+    def test_cannot_parse_when_not_for_me(self, text, error):
+        sheerka, context, parser = self.init_parser()
+
+        res = parser.parse(context, ParserInput(text))
+        not_for_me = res.body
+
+        assert not res.status
+        assert sheerka.isinstance(not_for_me, BuiltinConcepts.NOT_FOR_ME)
+        assert not_for_me.reason == error
@@ -70,6 +70,8 @@ class TestFunctionParser(TestUsingMemoryBasedSheerka):
        sheerka, context, parser = self.init_parser()

        parser.reset_parser(context, ParserInput(expression))
+        parser.parser_input.next_token()
+
        res = parser.parse_function()

        assert res == expected