Added bnf when adding a new concept + Started logging filtering

2019-12-13 20:26:11 +01:00
parent 75c8793d53
commit c668cc46d2
29 changed files with 1487 additions and 190 deletions
@@ -0,0 +1,181 @@
+import ast
+
+import pytest
+
+from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts
+from core.concept import VARIABLE_PREFIX, ConceptParts, Concept
+from core.sheerka import Sheerka, ExecutionContext
+from core.tokenizer import Tokenizer
+from evaluators.AddConceptEvaluator import AddConceptEvaluator
+from parsers.BaseParser import BaseParser
+from parsers.ConceptLexerParser import Sequence, RegexParser, StrMatch, ZeroOrMore, ConceptMatch
+from parsers.DefaultParser import DefConceptNode, NameNode
+from parsers.ExactConceptParser import ExactConceptParser
+from parsers.PythonParser import PythonNode, PythonParser
+
+
+def get_context():
+    sheerka = Sheerka(skip_builtins_in_db=True)
+    sheerka.initialize("mem://")
+    return ExecutionContext("test", "xxx", sheerka)
+
+
+def get_concept(name, where=None, pre=None, post=None, body=None, definition=None):
+    concept = DefConceptNode([], name=NameNode(list(Tokenizer(name))))
+
+    if body:
+        concept.body = get_concept_part(body)
+    if where:
+        concept.where = get_concept_part(where)
+    if pre:
+        concept.pre = get_concept_part(pre)
+    if post:
+        concept.post = get_concept_part(post)
+    if definition:
+        concept.definition = definition
+
+    return ReturnValueConcept(BaseParser.PREFIX + "some_name", True, ParserResultConcept(value=concept))
+
+
+def get_concept_part(part):
+    if isinstance(part, str):
+        node = PythonNode(part, ast.parse(part, mode="eval"))
+        return ReturnValueConcept(
+            who="Parsers:DefaultParser",
+            status=True,
+            value=ParserResultConcept(
+                source=part,
+                parser=PythonParser(),
+                value=node))
+
+    if isinstance(part, PythonNode):
+        return ReturnValueConcept(
+            who="Parsers:DefaultParser",
+            status=True,
+            value=ParserResultConcept(
+                source=part.source,
+                parser=PythonParser(),
+                value=part))
+
+    if isinstance(part, ReturnValueConcept):
+        return part
+
+
+def get_concept_definition(source, parsing_expression):
+    return ReturnValueConcept(
+        who="Parsers:RegexParser",
+        status=True,
+        value=ParserResultConcept(
+            source=source,
+            parser=RegexParser(),
+            value=parsing_expression
+        )
+    )
+
+
+@pytest.mark.parametrize("ret_val, expected", [
+    (ReturnValueConcept(BaseParser.PREFIX + "some_name", True, ParserResultConcept(value=DefConceptNode([]))), True),
+    (ReturnValueConcept(BaseParser.PREFIX + "some_name", False, ParserResultConcept(value=DefConceptNode([]))), False),
+    (ReturnValueConcept(BaseParser.PREFIX + "some_name", True, "not a ParserResultConcept"), False),
+    (ReturnValueConcept(BaseParser.PREFIX + "some_name", True, ParserResultConcept()), False),
+])
+def test_i_can_match(ret_val, expected):
+    context = get_context()
+    assert AddConceptEvaluator().matches(context, ret_val) == expected
+
+
+def test_that_the_source_is_correctly_set():
+    context = get_context()
+    def_concept_return_value = get_concept(
+        name="hello a",
+        definition=get_concept_definition("hello a", Sequence(StrMatch("hello"), StrMatch("a"))),
+        where="isinstance(a, str )",
+        pre="a is not None",
+        body="print('hello' + a)")
+
+    evaluated = AddConceptEvaluator().eval(context, def_concept_return_value)
+
+    assert evaluated.status
+    assert context.sheerka.isinstance(evaluated.body, BuiltinConcepts.NEW_CONCEPT)
+
+    created_concept = evaluated.body.body
+    assert created_concept.metadata.name == "hello a"
+    assert created_concept.metadata.where == "isinstance(a, str )"
+    assert created_concept.metadata.pre == "a is not None"
+    assert created_concept.metadata.post is None
+    assert created_concept.metadata.body == "print('hello' + a)"
+    assert created_concept.metadata.definition == "hello a"
+
+
+def test_that_the_ast_is_correctly_initialized():
+    context = get_context()
+    def_concept_return_value = get_concept(
+        name="hello a",
+        definition=get_concept_definition("hello a", Sequence(StrMatch("hello"), StrMatch("a"))),
+        where="isinstance(a, str )",
+        pre="a is not None",
+        body="print('hello' + a)")
+
+    evaluated = AddConceptEvaluator().eval(context, def_concept_return_value)
+
+    assert evaluated.status
+    assert context.sheerka.isinstance(evaluated.body, BuiltinConcepts.NEW_CONCEPT)
+
+    created_concept = evaluated.body.body
+
+    assert ConceptParts.WHERE in created_concept.cached_asts
+    assert ConceptParts.PRE in created_concept.cached_asts
+    assert ConceptParts.BODY in created_concept.cached_asts
+    assert ConceptParts.POST not in created_concept.cached_asts
+
+
+def test_that_the_new_concept_is_correctly_saved():
+    context = get_context()
+    def_concept_return_value = get_concept(
+        name="hello a",
+        definition=get_concept_definition("hello a", Sequence(StrMatch("hello"), StrMatch("a"))),
+        where="isinstance(a, str )",
+        pre="a is not None",
+        body="print('hello' + a)")
+
+    from_db = context.sheerka.get("hello " + VARIABLE_PREFIX + "0")
+    assert context.sheerka.isinstance(from_db, BuiltinConcepts.UNKNOWN_CONCEPT)
+
+    AddConceptEvaluator().eval(context, def_concept_return_value)
+    context.sheerka.concepts_cache = {}  # reset cache
+    from_db = context.sheerka.get("hello " + VARIABLE_PREFIX + "0")
+
+    assert from_db.metadata.key == f"hello {VARIABLE_PREFIX}0"
+    assert from_db.metadata.name == "hello a"
+    assert from_db.metadata.where == "isinstance(a, str )"
+    assert from_db.metadata.pre == "a is not None"
+    assert from_db.metadata.post is None
+    assert from_db.metadata.body == "print('hello' + a)"
+    assert from_db.metadata.definition == "hello a"
+    assert len(from_db.props) == 1
+    assert "a" in from_db.props
+
+    assert from_db.cached_asts == {}  # ast is not saved in db
+
+
+def test_i_can_get_props_from_python_node():
+    ret_val = get_concept_part("isinstance(a, str)")
+    context = get_context()
+
+    assert AddConceptEvaluator.get_props(context.sheerka, ret_val, ["a"]) == ["a"]
+
+
+def test_i_can_get_props_from_another_concept():
+    concept = Concept("hello").set_prop("a").set_prop("b")
+    ret_val = ReturnValueConcept(who="some_parser",
+                                 status=True,
+                                 value=ParserResultConcept(value=concept))
+
+    assert AddConceptEvaluator.get_props(get_context(), ret_val, []) == ["a", "b"]
+
+
+def test_i_can_get_props_from_definition():
+    parsing_expression = Sequence(ConceptMatch('mult'), ZeroOrMore(Sequence(StrMatch("+"), ConceptMatch("add"))))
+    ret_val = get_concept_definition("mult (('+'|'-') add)?", parsing_expression)
+
+    assert AddConceptEvaluator.get_props(get_context(), ret_val, []) == ["add", "mult"]
@@ -0,0 +1,7 @@
+import pytest
+
+from core.tokenizer import Tokenizer, Token, TokenKind
+from parsers.BaseParser import BaseParser
+
+
+
@@ -5,6 +5,7 @@ from core.concept import Concept
 from core.sheerka import Sheerka, ExecutionContext
 from evaluators.ConceptEvaluator import ConceptEvaluator
 from parsers.BaseParser import BaseParser
+from parsers.ExactConceptParser import ExactConceptParser


 def get_context():
@@ -13,12 +14,21 @@ def get_context():
    return ExecutionContext("test", "xxx", sheerka)


+def get_return_value(concept, source=None):
+    return ReturnValueConcept(
+        "some_name",
+        True,
+        ParserResultConcept(parser=ExactConceptParser(),
+                            source=source or concept.name,
+                            value=concept,
+                            try_parsed=concept))
+
+
@pytest.mark.parametrize("ret_val, expected", [
-    (ReturnValueConcept(BaseParser.PREFIX + "some_name", True, Concept()), True),
-    (ReturnValueConcept(BaseParser.PREFIX + "some_name", False, Concept()), False),
-    (ReturnValueConcept("Not a parser", True, Concept()), False),
-    (ReturnValueConcept(BaseParser.PREFIX + "some_name", True, "not a concept"), False),
-    (ReturnValueConcept(BaseParser.PREFIX + "some_name", True, ParserResultConcept()), False),
+    (ReturnValueConcept("some_name", True, ParserResultConcept(value=Concept())), True),
+    (ReturnValueConcept("some_name", False, ParserResultConcept(value=Concept())), False),
+    (ReturnValueConcept("some_name", True, ParserResultConcept(value="Not a concept")), False),
+    (ReturnValueConcept("some_name", True, Concept()), False),
 ])
 def test_i_can_match(ret_val, expected):
    context = get_context()
@@ -30,7 +40,7 @@ def test_concept_is_returned_when_no_body():
    concept = Concept(name="one").init_key()

    evaluator = ConceptEvaluator()
-    item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept)
+    item = get_return_value(concept)
    result = evaluator.eval(context, item)

    assert result.who == evaluator.name
@@ -44,7 +54,7 @@ def test_body_is_evaluated_when_python_body():
    concept = Concept(name="one", body="1").init_key()

    evaluator = ConceptEvaluator()
-    item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept)
+    item = get_return_value(concept)
    result = evaluator.eval(context, item)

    assert result.who == evaluator.name
@@ -60,7 +70,7 @@ def test_body_is_evaluated_when_concept_body():
    concept_un = Concept(name="un", body="one").init_key()

    evaluator = ConceptEvaluator()
-    item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_un)
+    item = get_return_value(concept_un)
    result = evaluator.eval(context, item)

    assert result.who == evaluator.name
@@ -80,7 +90,7 @@ def test_body_is_evaluated_when_concept_body_with_a_body():
    concept_un = Concept(name="un", body="one").init_key()

    evaluator = ConceptEvaluator()
-    item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_un)
+    item = get_return_value(concept_un)
    result = evaluator.eval(context, item)

    assert result.who == evaluator.name
@@ -97,7 +107,7 @@ def test_i_can_evaluate_longer_chains():
    concept_d = context.sheerka.add_in_cache(Concept(name="d", body="c").init_key())

    evaluator = ConceptEvaluator()
-    item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_d)
+    item = get_return_value(concept_d)
    result = evaluator.eval(context, item)

    assert result.status
@@ -112,7 +122,7 @@ def test_i_can_evaluate_longer_chains_2():
    concept_d = context.sheerka.add_in_cache(Concept(name="d", body="c").init_key())

    evaluator = ConceptEvaluator()
-    item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_d)
+    item = get_return_value(concept_d)
    result = evaluator.eval(context, item)

    assert result.status
@@ -133,7 +143,7 @@ def test_i_can_recognize_concept_properties():
                                                .set_prop("b", "two").init_key())

    evaluator = ConceptEvaluator()
-    item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_plus)
+    item = get_return_value(concept_plus)
    result = evaluator.eval(context, item)

    assert result.status
@@ -156,7 +166,7 @@ def test_i_can_recognize_concept_properties_with_body():
                                                .set_prop("b", "two").init_key())

    evaluator = ConceptEvaluator()
-    item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_plus)
+    item = get_return_value(concept_plus)
    result = evaluator.eval(context, item)

    assert result.status
@@ -174,7 +184,7 @@ def test_i_can_recognize_concept_properties_with_body_when_concept_has_a_body():
                                                .set_prop("b", "two").init_key())

    evaluator = ConceptEvaluator()
-    item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_plus)
+    item = get_return_value(concept_plus)
    result = evaluator.eval(context, item)

    assert result.status
@@ -189,7 +199,7 @@ def test_i_cannot_recognize_a_concept_if_one_of_the_prop_is_unknown():
                                                .set_prop("b", "two").init_key())

    evaluator = ConceptEvaluator()
-    item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_plus)
+    item = get_return_value(concept_plus)
    result = evaluator.eval(context, item)

    assert not result.status
@@ -2,8 +2,18 @@ import pytest
 from core.builtin_concepts import BuiltinConcepts
 from core.concept import Concept
 from core.sheerka import Sheerka, ExecutionContext
+from core.tokenizer import Tokenizer, TokenKind
 from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \
-    CrossRef
+    CrossRef, RegexParser, ZeroOrMore, OneOrMore, UnexpectedEndOfFileError, UnexpectedTokenErrorNode, ConceptMatch, \
+    ParsingExpressionVisitor
+
+
+class ConceptVisitor(ParsingExpressionVisitor):
+    def __init__(self):
+        self.concepts = set()
+
+    def visit_ConceptMatch(self, node):
+        self.concepts.add(node.concept_name)


@pytest.mark.parametrize("match, text", [
@@ -23,7 +33,7 @@ def test_i_can_match_simple_tokens(match, text):
    foo = Concept(name="foo")
    concepts = {foo: text}
    parser = ConceptLexerParser()
-    parser.initialize(concepts)
+    parser.initialize(context, concepts)

    res = parser.parse(context, text)

@@ -38,7 +48,7 @@ def test_i_can_match_multiple_concepts_in_one_input():
    two = Concept(name="two")
    concepts = {one: "one", two: "two"}
    parser = ConceptLexerParser()
-    parser.initialize(concepts)
+    parser.initialize(context, concepts)

    res = parser.parse(context, "one two one")

@@ -69,7 +79,7 @@ def test_i_cannot_match_when_part_of_the_input_is_unknown():
    two = Concept(name="two")
    concepts = {one: "one", two: "two"}
    parser = ConceptLexerParser()
-    parser.initialize(concepts)
+    parser.initialize(context, concepts)

    res = parser.parse(context, "one two three")
    assert not res.status
@@ -86,7 +96,7 @@ def test_i_can_match_sequence():
    foo = Concept(name="foo")
    concepts = {foo: Sequence("one", "two", "three")}
    parser = ConceptLexerParser()
-    parser.initialize(concepts)
+    parser.initialize(context, concepts)

    res = parser.parse(context, "one two three")

@@ -100,7 +110,7 @@ def test_wrong_sequence_is_not_matched():
    foo = Concept(name="foo")
    concepts = {foo: Sequence("one", "two", "three")}
    parser = ConceptLexerParser()
-    parser.initialize(concepts)
+    parser.initialize(context, concepts)

    res = parser.parse(context, "one two three one")

@@ -116,7 +126,7 @@ def test_i_cannot_match_sequence_if_end_of_file():
    foo = Concept(name="foo")
    concepts = {foo: Sequence("one", "two", "three")}
    parser = ConceptLexerParser()
-    parser.initialize(concepts)
+    parser.initialize(context, concepts)

    res = parser.parse(context, "one two")
    assert not res.status
@@ -133,7 +143,7 @@ def test_i_always_choose_the_longest_match():
    concepts = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")}

    parser = ConceptLexerParser()
-    parser.initialize(concepts)
+    parser.initialize(context, concepts)

    res = parser.parse(context, "one two three")

@@ -149,7 +159,7 @@ def test_i_can_match_several_sequences():
    concepts = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")}

    parser = ConceptLexerParser()
-    parser.initialize(concepts)
+    parser.initialize(context, concepts)

    res = parser.parse(context, "one two three one two")

@@ -166,7 +176,7 @@ def test_i_can_match_ordered_choice():
    foo = Concept(name="foo")
    concepts = {foo: OrderedChoice("one", "two")}
    parser = ConceptLexerParser()
-    parser.initialize(concepts)
+    parser.initialize(context, concepts)

    res1 = parser.parse(context, "one")
    assert res1.status
@@ -189,7 +199,7 @@ def test_i_cannot_match_ordered_choice_with_empty_alternative():
    foo = Concept(name="foo")
    concepts = {foo: Sequence(OrderedChoice("one", ""), "two")}
    parser = ConceptLexerParser()
-    parser.initialize(concepts)
+    parser.initialize(context, concepts)

    res = parser.parse(context, "ok")  # because token[0] is not "one" and not "" (it is 'two')
    assert not res.status
@@ -201,7 +211,7 @@ def test_i_can_mix_sequences_and_ordered_choices():

    concepts = {foo: Sequence(OrderedChoice("twenty", "thirty"), "one", "ok")}
    parser = ConceptLexerParser()
-    parser.initialize(concepts)
+    parser.initialize(context, concepts)

    res1 = parser.parse(context, "twenty one ok")
    assert res1.status
@@ -225,7 +235,7 @@ def test_i_can_mix_ordered_choices_and_sequences():

    concepts = {foo: OrderedChoice(Sequence("twenty", "thirty"), "one")}
    parser = ConceptLexerParser()
-    parser.initialize(concepts)
+    parser.initialize(context, concepts)

    res = parser.parse(context, "twenty thirty")
    assert res.status
@@ -240,7 +250,7 @@ def test_i_cannot_parse_empty_optional():

    concepts = {foo: Optional("one")}
    parser = ConceptLexerParser()
-    parser.initialize(concepts)
+    parser.initialize(context, concepts)

    res = parser.parse(context, "")
    assert not res.status
@@ -253,7 +263,7 @@ def test_i_can_parse_optional():

    concepts = {foo: Optional("one")}
    parser = ConceptLexerParser()
-    parser.initialize(concepts)
+    parser.initialize(context, concepts)

    res = parser.parse(context, "one")
    assert res.status
@@ -266,7 +276,7 @@ def test_i_can_parse_sequence_starting_with_optional():

    concepts = {foo: Sequence(Optional("twenty"), "one")}
    parser = ConceptLexerParser()
-    parser.initialize(concepts)
+    parser.initialize(context, concepts)

    res = parser.parse(context, "twenty one")
    assert res.status
@@ -283,7 +293,7 @@ def test_i_can_parse_sequence_ending_with_optional():

    concepts = {foo: Sequence("one", "two", Optional("three"))}
    parser = ConceptLexerParser()
-    parser.initialize(concepts)
+    parser.initialize(context, concepts)

    res = parser.parse(context, "one two three")
    assert res.status
@@ -300,7 +310,7 @@ def test_i_can_parse_sequence_with_optional_in_between():

    concepts = {foo: Sequence("one", Optional("two"), "three")}
    parser = ConceptLexerParser()
-    parser.initialize(concepts)
+    parser.initialize(context, concepts)

    res = parser.parse(context, "one two three")
    assert res.status
@@ -312,19 +322,16 @@ def test_i_can_parse_sequence_with_optional_in_between():


 def test_i_can_use_reference():
-    # The problem here is when there are multiple match for the same input
-    # The parsing result is a list of all concepts found
-    # So it's already a list that represents a sequence, not a choice
-    # So I need to create a choice concept
-    # create the return value for every possible graph
-    # --> The latter seems to be the best as we don't defer the resolution of the problem to someone else
+    # when there are multiple matches for the same input
+    # Do I need to create a choice concept ?
+    # No, create a return value for every possible graph
    context = get_context()
    foo = Concept(name="foo")
    bar = Concept(name="bar")

    concepts = {foo: Sequence("one", "two"), bar: foo}
    parser = ConceptLexerParser()
-    parser.initialize(concepts)
+    parser.initialize(context, concepts)

    res = parser.parse(context, "one two")
    assert len(res) == 2
@@ -350,7 +357,7 @@ def test_i_can_use_context_reference_with_multiple_levels():

    concepts = {foo: Sequence("one", "two"), bar: foo, baz: bar}
    parser = ConceptLexerParser()
-    parser.initialize(concepts)
+    parser.initialize(context, concepts)

    res = parser.parse(context, "one two")
    assert len(res) == 3
@@ -375,7 +382,7 @@ def test_order_is_not_important_when_using_references():

    concepts = {bar: foo, foo: Sequence("one", "two")}
    parser = ConceptLexerParser()
-    parser.initialize(concepts)
+    parser.initialize(context, concepts)

    res = parser.parse(context, "one two")
    assert len(res) == 2
@@ -390,7 +397,7 @@ def test_i_can_parse_when_reference():

    concepts = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")}
    parser = ConceptLexerParser()
-    parser.initialize(concepts)
+    parser.initialize(context, concepts)

    res = parser.parse(context, "twenty two")
    assert res.status
@@ -415,7 +422,7 @@ def test_i_can_detect_duplicates_when_reference():
        foo: OrderedChoice("twenty", "thirty")
    }
    parser = ConceptLexerParser()
-    parser.initialize(concepts)
+    parser.initialize(context, concepts)

    res = parser.parse(context, "twenty")
    assert len(res) == 2
@@ -437,7 +444,7 @@ def test_i_can_detect_infinite_recursion():
        foo: bar
    }
    parser = ConceptLexerParser()
-    parser.initialize(concepts)
+    parser.initialize(get_context(), concepts)

    assert bar not in parser.concepts_dict
    assert foo not in parser.concepts_dict
@@ -452,7 +459,7 @@ def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice():
        foo: OrderedChoice(bar, "foo")
    }
    parser = ConceptLexerParser()
-    parser.initialize(concepts)
+    parser.initialize(get_context(), concepts)

    assert foo not in parser.concepts_dict  # removed because of the infinite recursion
    assert bar not in parser.concepts_dict  # removed because of the infinite recursion
@@ -464,7 +471,7 @@ def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice():
        foo: OrderedChoice("foo", bar)
    }
    parser = ConceptLexerParser()
-    parser.initialize(concepts)
+    parser.initialize(context, concepts)
    assert foo in parser.concepts_dict
    assert bar in parser.concepts_dict

@@ -485,7 +492,7 @@ def test_i_can_detect_indirect_infinite_recursion_with_sequence():
        foo: Sequence("one", bar, "two")
    }
    parser = ConceptLexerParser()
-    parser.initialize(concepts)
+    parser.initialize(get_context(), concepts)

    assert foo not in parser.concepts_dict  # removed because of the infinite recursion
    assert bar not in parser.concepts_dict  # removed because of the infinite recursion
@@ -500,7 +507,7 @@ def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choic
        foo: Sequence("one", OrderedChoice(bar, "other"), "two")
    }
    parser = ConceptLexerParser()
-    parser.initialize(concepts)
+    parser.initialize(get_context(), concepts)

    assert foo not in parser.concepts_dict  # removed because of the infinite recursion
    assert bar not in parser.concepts_dict  # removed because of the infinite recursion
@@ -510,6 +517,140 @@ def test_i_can_detect_indirect_infinite_recursion_with_optional():
    # TODO infinite recursion with optional
    pass

+
+@pytest.mark.parametrize("expression, expected", [
+    ("'str'", StrMatch("str")),
+    ("1", StrMatch("1")),
+    ("  1", StrMatch("1")),
+    (",", StrMatch(",")),
+    ("'foo'?", Optional(StrMatch("foo"))),
+    ("'foo'*", ZeroOrMore(StrMatch("foo"))),
+    ("'foo'+", OneOrMore(StrMatch("foo"))),
+    ("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
+    ("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
+    ("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))),
+    ("1 2 | 3 4+", OrderedChoice(
+        Sequence(StrMatch("1"), StrMatch("2")),
+        Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))),
+    ("1 (2 | 3) 4+", Sequence(StrMatch("1"), OrderedChoice(StrMatch("2"), StrMatch("3")), OneOrMore(StrMatch("4")))),
+    ("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))),
+    ("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))),
+    ("1 *", Sequence(StrMatch("1"), StrMatch("*"))),
+    ("1 ?", Sequence(StrMatch("1"), StrMatch("?"))),
+    ("1 +", Sequence(StrMatch("1"), StrMatch("+"))),
+    ("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))),
+    ("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))),
+    ("(1 )", StrMatch("1")),
+])
+def test_i_can_parse_regex(expression, expected):
+    parser = RegexParser()
+    res = parser.parse(get_context(), Tokenizer(expression))
+
+    assert not parser.has_error
+    assert res.status
+    assert res.value.value == expected
+    assert res.value.source == expression
+
+
+@pytest.mark.parametrize("expression, error", [
+    ("1 ", UnexpectedEndOfFileError()),
+    ("1|", UnexpectedEndOfFileError()),
+    ("(1|)", UnexpectedTokenErrorNode("Unexpected token 'TokenKind.EOF'", [TokenKind.RPAR])),
+])
+def test_i_can_detect_errors(expression, error):
+    parser = RegexParser()
+    res = parser.parse(get_context(), Tokenizer(expression))
+    ret_value = res.value.value
+    assert parser.has_error
+    assert not res.status
+    assert ret_value[0] == error
+
+
+def test_i_can_parse_regex_with_reference():
+    expression = "foo"
+    parser = RegexParser()
+    res = parser.parse(get_context(), Tokenizer(expression))
+
+    assert res.status
+    assert res.value.value == ConceptMatch("foo")
+    assert res.value.source == expression
+
+
+def test_i_can_parse_cross_ref_with_modifier():
+    expression = "foo*"
+    parser = RegexParser()
+    res = parser.parse(get_context(), Tokenizer(expression))
+
+    assert res.status
+    assert res.value.value == ZeroOrMore(ConceptMatch("foo"))
+    assert res.value.source == expression
+
+
+def test_i_can_parse_sequence_with_cross_ref():
+    expression = "foo 'and' bar+"
+    parser = RegexParser()
+    res = parser.parse(get_context(), Tokenizer(expression))
+
+    assert res.status
+    assert res.value.value == Sequence(ConceptMatch("foo"), StrMatch("and"), OneOrMore(ConceptMatch("bar")))
+    assert res.value.source == expression
+
+
+def test_i_can_parse_choice_with_cross_ref():
+    foo = Concept("foo")
+    bar = Concept("bar")
+    context = get_context()
+    context.sheerka.add_in_cache(foo)
+    context.sheerka.add_in_cache(bar)
+
+    expression = "foo | bar?"
+    parser = RegexParser()
+    res = parser.parse(context, Tokenizer(expression))
+
+    assert res.status
+    assert res.value.value == OrderedChoice(ConceptMatch("foo"), Optional(ConceptMatch("bar")))
+    assert res.value.source == expression
+
+
+def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text():
+    foo = Concept(name="foo")
+    bar = Concept(name="bar")
+    context = get_context()
+    context.sheerka.add_in_cache(foo)
+    context.sheerka.add_in_cache(bar)
+
+    regex_parser = RegexParser()
+    foo_definition = regex_parser.parse(context, "'twenty' | 'thirty'").value.value
+    bar_definition = regex_parser.parse(context, "foo ('one' | 'two')").value.value
+
+    concepts = {bar: bar_definition, foo: foo_definition}
+    concept_parser = ConceptLexerParser()
+    concept_parser.initialize(context, concepts)
+
+    res = concept_parser.parse(context, "twenty two")
+    assert res.status
+    assert res.value.body == [ConceptNode(bar, 0, 2, source="twenty two")]
+
+    res = concept_parser.parse(context, "thirty one")
+    assert res.status
+    assert res.value.body == [ConceptNode(bar, 0, 2, source="thirty one")]
+
+    res = concept_parser.parse(context, "twenty")
+    assert res.status
+    assert res.value.body == [ConceptNode(foo, 0, 0, source="twenty")]
+
+
+def test_i_can_visit_parsing_expression():
+    mult = Concept(name="mult")
+    add = Concept(name="add")
+
+    visitor = ConceptVisitor()
+    visitor.visit(Sequence(mult, Optional(Sequence("+", add))))
+
+    assert sorted(list(visitor.concepts)) == ["add", "mult"]
+
+
+
 #
 # def test_i_can_parse_basic_arithmetic_operations_and_resolve_properties():
 #     context = get_context()
@@ -2,12 +2,15 @@ import pytest
 import ast

 from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept
+from core.concept import Concept
 from core.sheerka import Sheerka, ExecutionContext
+from parsers.ConceptLexerParser import OrderedChoice, StrMatch, ConceptMatch, RegexParser
 from parsers.PythonParser import PythonParser, PythonNode
 from core.tokenizer import Keywords, Tokenizer
 from parsers.DefaultParser import DefaultParser, NameNode, SyntaxErrorNode
 from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode

+
 # def nop():
 #     return NopNode()
 #
@@ -52,7 +55,7 @@ from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode
 #     return left_as_string == right_as_string
 #

-def get_concept(name, where=None, pre=None, post=None, body=None):
+def get_concept(name, where=None, pre=None, post=None, body=None, definition=None):
    concept = DefConceptNode([], name=NameNode(list(Tokenizer(name))))

    if body:
@@ -63,6 +66,12 @@ def get_concept(name, where=None, pre=None, post=None, body=None):
        concept.pre = get_concept_part(pre)
    if post:
        concept.post = get_concept_part(post)
+    if definition:
+        concept.definition = ReturnValueConcept(
+            "Parsers:RegexParser",
+            True,
+            definition)
+
    return concept


@@ -324,3 +333,29 @@ def test_new_line_is_not_allowed_in_the_name():

    assert not res.status
    assert return_value.value == [SyntaxErrorNode([], "Newline are not allowed in name.")]
+
+
+def test_i_can_parse_def_concept_from_regex():
+    text = "def concept name from bnf a_concept | 'a_string' as __definition[0]"
+    parser = DefaultParser()
+    res = parser.parse(get_context(), text)
+    node = res.value.value
+    definition = OrderedChoice(ConceptMatch("a_concept"), StrMatch("a_string"))
+    parser_result = ParserResultConcept(RegexParser(), "a_concept | 'a_string'", definition, definition)
+    expected = get_concept(name="name", body="__definition[0]", definition=parser_result)
+
+    assert res.status
+    assert res.who == parser.name
+    assert res.value.source == text
+    assert isinstance(res.value, ParserResultConcept)
+    assert node == expected
+
+
+def test_i_can_detect_empty_bnf_declaration():
+    text = "def concept name from bnf as __definition[0]"
+
+    parser = DefaultParser()
+    res = parser.parse(get_context(), text)
+
+    assert not res.status
+    assert res.value.value[0] == SyntaxErrorNode([], "Empty declaration")
@@ -41,7 +41,7 @@ def test_i_can_recognize_a_simple_concept():

    assert len(results) == 1
    assert results[0].status
-    assert results[0].value == concept
+    assert results[0].value.value == concept


 def test_i_can_recognize_concepts_defined_several_times():
@@ -53,14 +53,14 @@ def test_i_can_recognize_concepts_defined_several_times():
    results = ExactConceptParser().parse(context, source)

    assert len(results) == 2
-    results = sorted(results, key=lambda x: x.value.name)  # because of the usage of sets
+    results = sorted(results, key=lambda x: x.value.value.name)  # because of the usage of sets

    assert results[0].status
-    assert results[0].value.name == "hello a"
-    assert results[0].value.props["a"].value == "world"
+    assert results[0].value.value.name == "hello a"
+    assert results[0].value.value.props["a"].value == "world"

    assert results[1].status
-    assert results[1].value.name == "hello world"
+    assert results[1].value.value.name == "hello world"


 def test_i_can_recognize_a_concept_with_variables():
@@ -72,9 +72,10 @@ def test_i_can_recognize_a_concept_with_variables():

    assert len(results) == 1
    assert results[0].status
-    assert results[0].value.key == concept.key
-    assert results[0].value.props["a"].value == "10"
-    assert results[0].value.props["b"].value == "5"
+    concept_found = results[0].value.value
+    assert concept_found.key == concept.key
+    assert concept_found.props["a"].value == "10"
+    assert concept_found.props["b"].value == "5"


 def test_i_can_recognize_a_concept_with_duplicate_variables():
@@ -86,9 +87,10 @@ def test_i_can_recognize_a_concept_with_duplicate_variables():

    assert len(results) == 1
    assert results[0].status
-    assert results[0].value.key == concept.key
-    assert results[0].value.props["a"].value == "10"
-    assert results[0].value.props["b"].value == "5"
+    concept_found = results[0].value.value
+    assert concept_found.key == concept.key
+    assert concept_found.props["a"].value == "10"
+    assert concept_found.props["b"].value == "5"


 def test_i_can_manage_unknown_concept():
@@ -121,7 +123,7 @@ def test_i_can_detect_concept_from_tokens():

    assert len(results) == 1
    assert results[0].status
-    assert results[0].value == concept
+    assert results[0].value.value == concept


 def get_context():
@@ -1,5 +1,7 @@
 import ast

+import pytest
+
 from core.ast.nodes import NodeParent, GenericNodeConcept
 import core.ast.nodes
 from core.ast.visitors import ConceptNodeVisitor, UnreferencedNamesVisitor
@@ -102,7 +104,7 @@ def my_function(a,b):
    assert sheerka.value(visitor.names[6]) == "a"


-def test_i_can_get_non_referenced_variables():
+def test_i_can_get_unreferenced_variables():
    source = """
 def my_function(a,b):
    for i in range(b):
@@ -126,6 +128,23 @@ my_function(x,y)
    assert "y" in values


+@pytest.mark.parametrize("source, expected", [
+    ("a,b", ["a", "b"]),
+    ("isinstance(a, int)", ["a", "int"])
+
+])
+def test_i_can_get_unreferenced_variables_from_simple_expressions(source, expected):
+    sheerka = get_sheerka()
+
+    node = ast.parse(source)
+    concept_node = core.ast.nodes.python_to_concept(node)
+
+    visitor = UnreferencedNamesVisitor(sheerka)
+    visitor.visit(concept_node)
+
+    assert sorted(list(visitor.names)) == expected
+
+
 def test_i_can_compare_NodeParent_with_tuple():
    node_parent = NodeParent(GenericNodeConcept("For", None), "target")
    assert node_parent == ("For", "target")
@@ -20,6 +20,13 @@ def test_i_can_get_concept_key(name, variables, expected):
    assert concept.metadata.key == expected


+def test_key_does_not_use_variable_when_definition_is_set():
+    concept = Concept("plus").set_prop('plus')
+
+    concept.init_key()
+    assert concept.metadata.key == "plus"
+
+
 def test_i_can_serialize():
    """
    Test concept.to_dict()
@@ -9,6 +9,8 @@ from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
 from core.concept import Concept, PROPERTIES_TO_SERIALIZE
 from core.sheerka import Sheerka, ExecutionContext
 from evaluators.MutipleSameSuccessEvaluator import MultipleSameSuccessEvaluator
+from parsers.ConceptLexerParser import Sequence, ZeroOrMore, StrMatch, OrderedChoice, Optional, ConceptMatch, \
+    ConceptLexerParser
 from sdp.sheerkaDataProvider import SheerkaDataProvider

 tests_root = path.abspath("../build/tests")
@@ -422,7 +424,7 @@ as:
 def test_i_can_eval_def_concept_part_when_one_part_is_a_ref_of_another_concept():
    """
    In this test, we test that the properties of 'concept a xx b' (which are 'a' and 'b')
-    are correctly detected, because of the concept 'a plus b' in its body
+    are correctly detected, thanks to the source code 'a plus b' in its body
    :return:
    """
    sheerka = get_sheerka()
@@ -558,7 +560,7 @@ def test_i_can_manage_concepts_with_the_same_key_when_values_are_the_same():
    assert res[0].who == sheerka.get_evaluator_name(MultipleSameSuccessEvaluator.NAME)


-def test_i_can_create_concepts_on_python_codes():
+def test_i_can_create_concepts_with_python_code_as_body():
    sheerka = get_sheerka()
    context = get_context(sheerka)

@@ -570,6 +572,40 @@ def test_i_can_create_concepts_on_python_codes():
    assert isinstance(res[0].value, list)


+def test_i_can_create_concept_with_bnf_definition():
+    sheerka = get_sheerka()
+    a = Concept("a")
+    sheerka.add_in_cache(a)
+    sheerka.concepts_grammars = ConceptLexerParser().initialize(
+        get_context(sheerka),
+        {a: OrderedChoice("one", "two")}).body
+
+    res = sheerka.eval("def concept plus from bnf a ('plus' plus)?")
+    assert len(res) == 1
+    assert res[0].status
+    assert sheerka.isinstance(res[0].value, BuiltinConcepts.NEW_CONCEPT)
+
+    saved_concept = sheerka.sdp.get_safe(sheerka.CONCEPTS_ENTRY, "plus")
+    assert saved_concept.key == "plus"
+    assert saved_concept.metadata.definition == "a ('plus' plus)?"
+    assert "a" in saved_concept.props
+    assert "plus" in saved_concept.props
+
+    saved_definitions = sheerka.sdp.get_safe(sheerka.CONCEPTS_DEFINITIONS_ENTRY)
+    expected_bnf = Sequence(
+        ConceptMatch("a"),
+        Optional(Sequence(StrMatch("plus"), ConceptMatch("plus"))),
+        rule_name="plus")
+    assert saved_definitions[saved_concept] == expected_bnf
+
+    new_concept = res[0].value.body
+    assert new_concept.metadata.name == "plus"
+    assert new_concept.metadata.definition == "a ('plus' plus)?"
+    assert new_concept.bnf == expected_bnf
+    assert "a" in new_concept.props
+    assert "plus" in new_concept.props
+
+
 def get_sheerka(root="mem://", skip_builtins_in_db=True):
    sheerka = Sheerka(skip_builtins_in_db)
    sheerka.initialize(root)
@@ -311,6 +311,18 @@ def test_i_cannot_add_several_obj_no_key_if_allow_multiple_is_false(root):
    "mem://"
 ])
 def test_i_can_add_a_dict(root):
+    """
+    Adding a dictionary.
+    Note that there is no key when adding a dictionary
+
+    If you add {'my_key': 'my_value'}
+    'my_key is not considered as the key of the entry'
+
+    Because if you add {'my_key': 'my_value', 'my_key2': 'my_value2'}
+    There are now multiple keys.
+
+    So for dictionary entries, the key is not managed
+    """
    sdp = SheerkaDataProvider(root)
    obj = {"my_key": "my_value"}

@@ -735,6 +747,7 @@ def test_i_can_set_using_reference(root):
    # sanity check, make sure that I can load back
    loaded = sdp.get(entry, key)
    assert loaded == ObjWithKey(2, "foo")
+    assert getattr(loaded, Serializer.ORIGIN) == "95b5cbab545dded0b90b57a3d15a157b9a559fb586ee2f8d6ccbc6d2491f1268"


@pytest.mark.parametrize("root", [
@@ -754,7 +767,35 @@ def test_i_can_add_reference_of_an_object_with_a_key(root):
    assert key == obj.key
    assert entry == "entry"
    assert state.data == {'entry': {'my_key': f"{SheerkaDataProvider.REF_PREFIX}{digest}"}}
-    assert sdp.load_obj(digest) == obj
+
+    loaded = sdp.load_obj(digest)
+    assert loaded == obj
+    assert getattr(loaded, Serializer.ORIGIN) == digest
+
+
+@pytest.mark.parametrize("root", [
+    ".sheerka",
+    "mem://"
+])
+def test_i_can_add_reference_a_dictionary(root):
+    sdp = SheerkaDataProvider(root)
+    obj = {"my_key": "value1"}
+
+    obj_serializer = ObjectSerializer(core.utils.get_full_qualified_name(obj))
+    sdp.serializer.register(obj_serializer)
+
+    entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True)
+    state = sdp.load_state(sdp.get_snapshot())
+    digest = state.data["entry"][len(SheerkaDataProvider.REF_PREFIX):]
+
+    assert key is None
+    assert entry == "entry"
+    assert state.data == {'entry': f"{SheerkaDataProvider.REF_PREFIX}{digest}"}
+
+    loaded = sdp.load_obj(digest)
+    assert loaded["my_key"] == obj["my_key"]
+    assert loaded[Serializer.ORIGIN] == digest
+    assert len(loaded) == 2


@pytest.mark.parametrize("root", [
@@ -1,6 +1,8 @@
 import core.utils
 import pytest

+from core.tokenizer import Token, TokenKind
+

@pytest.mark.parametrize("lst, as_string", [
    (None, "",),
@@ -76,3 +78,63 @@ def test_i_can_get_sub_classes():
 def test_i_can_product(a, b, expected):
    res = core.utils.product(a, b)
    assert res == expected
+
+
+@pytest.mark.parametrize("input_as_list, expected_as_list", [
+    ([" "], []),
+    ([" ", "one"], ["one"]),
+    (["one", " "], ["one"]),
+    ([" ", "one", " "], ["one"]),
+
+    (["\n", "one"], ["one"]),
+    (["one", "\n"], ["one"]),
+    (["\n", "one", "\n"], ["one"]),
+
+    ([" ", "\n", "one"], ["one"]),
+    (["one", " ", "\n"], ["one"]),
+    ([" ", "\n", "one", " ", "\n"], ["one"]),
+
+    (["\n", " ", "one"], ["one"]),
+    (["one", "\n", " "], ["one"]),
+    (["\n", " ", "one", "\n", " "], ["one"]),
+
+    ([" ", "\n", " ", "one"], ["one"]),
+    (["one", " ", "\n", " "], ["one"]),
+    ([" ", "\n", " ", "one", " ", "\n", " "], ["one"]),
+
+    (["\n", " ", "\n", "one"], ["one"]),
+    (["one", "\n", " ", "\n"], ["one"]),
+    (["\n", " ", "\n", "one", "\n", " ", "\n"], ["one"]),
+
+])
+def test_i_can_strip(input_as_list, expected_as_list):
+    actual = core.utils.strip_tokens(get_tokens(input_as_list))
+    expected = get_tokens(expected_as_list)
+    assert actual == expected
+
+
+def test_by_default_eof_is_not_stripped():
+    actual = core.utils.strip_tokens(get_tokens(["one", "two", " ", "\n", "<EOF>"]))
+    expected = get_tokens(["one", "two", " ", "\n", "<EOF>"])
+    assert actual == expected
+
+
+def test_i_can_strip_eof():
+    actual = core.utils.strip_tokens(get_tokens(["one", "two", " ", "\n", "<EOF>"]), True)
+    expected = get_tokens(["one", "two"])
+    assert actual == expected
+
+
+def get_tokens(lst):
+    res = []
+    for e in lst:
+        if e == " ":
+            res.append(Token(TokenKind.WHITESPACE, " ", 0, 0, 0))
+        elif e == "\n":
+            res.append(Token(TokenKind.NEWLINE, "\n", 0, 0, 0))
+        elif e == "<EOF>":
+            res.append(Token(TokenKind.EOF, "\n", 0, 0, 0))
+        else:
+            res.append(Token(TokenKind.IDENTIFIER, e, 0, 0, 0))
+
+    return res