Added simple form of concept composition

This commit is contained in:
2020-01-15 18:38:29 +01:00
parent 51fa9629d0
commit 8152f82c6b
22 changed files with 1105 additions and 544 deletions
+63 -63
View File
@@ -6,7 +6,7 @@ from core.sheerka import Sheerka, ExecutionContext
from core.tokenizer import Tokenizer, TokenKind, Token
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \
ParsingExpressionVisitor, TerminalNode, NonTerminalNode, LexerNode, ConceptMatch, ZeroOrMore, OneOrMore, \
UnrecognizedTokensNode
UnrecognizedTokensNode, cnode, short_cnode
from sdp.sheerkaDataProvider import Event
@@ -163,7 +163,7 @@ def test_i_always_choose_the_longest_match():
assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [("foo", 0, 4, "one two three")]
assert return_value == [cnode("foo", 0, 4, "one two three")]
def test_i_can_match_several_sequences():
@@ -176,8 +176,8 @@ def test_i_can_match_several_sequences():
assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [
("foo", 0, 4, "one two three"),
("bar", 6, 8, "one two"),
cnode("foo", 0, 4, "one two three"),
cnode("bar", 6, 8, "one two"),
]
@@ -189,13 +189,13 @@ def test_i_can_match_ordered_choice():
res1 = parser.parse(context, "one")
assert res1.status
assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT)
assert res1.value.body == [("foo", 0, 0, "one")]
assert res1.value.body == [cnode("foo", 0, 0, "one")]
assert res1.value.body[0].underlying == u(grammar[foo], 0, 0, [u("one", 0, 0)])
res2 = parser.parse(context, "two")
assert res2.status
assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT)
assert res2.value.body == [("foo", 0, 0, "two")]
assert res2.value.body == [cnode("foo", 0, 0, "two")]
assert res2.value.body[0].underlying == u(grammar[foo], 0, 0, [u("two", 0, 0)])
res3 = parser.parse(context, "three")
@@ -259,11 +259,11 @@ def test_i_can_mix_ordered_choices_and_sequences():
res = parser.parse(context, "twenty thirty")
assert res.status
assert res.value.value == [("foo", 0, 2, "twenty thirty")]
assert res.value.value == [cnode("foo", 0, 2, "twenty thirty")]
res = parser.parse(context, "one")
assert res.status
assert res.value.value == [("foo", 0, 0, "one")]
assert res.value.value == [cnode("foo", 0, 0, "one")]
def test_i_cannot_parse_empty_optional():
@@ -319,11 +319,11 @@ def test_i_can_parse_sequence_ending_with_optional():
res = parser.parse(context, "one two three")
assert res.status
assert res.value.body == [("foo", 0, 4, "one two three")]
assert res.value.body == [cnode("foo", 0, 4, "one two three")]
res = parser.parse(context, "one two")
assert res.status
assert res.value.body == [("foo", 0, 2, "one two")]
assert res.value.body == [cnode("foo", 0, 2, "one two")]
def test_i_can_parse_sequence_with_optional_in_between():
@@ -335,11 +335,11 @@ def test_i_can_parse_sequence_with_optional_in_between():
res = parser.parse(context, "one two three")
assert res.status
assert res.value.body == [("foo", 0, 4, "one two three")]
assert res.value.body == [cnode("foo", 0, 4, "one two three")]
res = parser.parse(context, "one three")
assert res.status
assert res.value.body == [("foo", 0, 2, "one three")]
assert res.value.body == [cnode("foo", 0, 2, "one three")]
def test_i_cannot_parse_wrong_input_with_optional():
@@ -370,13 +370,13 @@ def test_i_can_use_reference():
assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [("foo", 0, 2, "one two")]
assert res[0].value.body == [cnode("foo", 0, 2, "one two")]
concept_found_1 = res[0].value.body[0].concept
assert cbody(concept_found_1) == DoNotResolve("one two")
assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [("bar", 0, 2, "one two")]
assert res[1].value.body == [cnode("bar", 0, 2, "one two")]
concept_found_2 = res[1].value.body[0].concept
# the body and the prop['foo'] are the same concept 'foo'
assert cbody(concept_found_2) == get_expected(foo, "one two")
@@ -400,13 +400,13 @@ def test_i_can_use_a_reference_with_a_body():
assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [("foo", 0, 2, "one two")]
assert res[0].value.body == [cnode("foo", 0, 2, "one two")]
concept_found_1 = res[0].value.body[0].concept
assert concept_found_1.body == "'foo'"
assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [("bar", 0, 2, "one two")]
assert res[1].value.body == [cnode("bar", 0, 2, "one two")]
concept_found_2 = res[1].value.body[0].concept
# the body and the prop['foo'] are the same concept 'foo'
assert cbody(concept_found_2) == foo
@@ -430,20 +430,20 @@ def test_i_can_use_context_reference_with_multiple_levels():
assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [("foo", 0, 2, "one two")]
assert res[0].value.body == [cnode("foo", 0, 2, "one two")]
concept_found_1 = res[0].value.body[0].concept
assert cbody(concept_found_1) == DoNotResolve("one two")
assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [("bar", 0, 2, "one two")]
assert res[1].value.body == [cnode("bar", 0, 2, "one two")]
concept_found_2 = res[1].value.body[0].concept
assert cbody(concept_found_2) == get_expected(foo, "one two")
assert id(cprop(concept_found_2, "foo")) == id(cbody(concept_found_2))
assert res[2].status
assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT)
assert res[2].value.body == [("baz", 0, 2, "one two")]
assert res[2].value.body == [cnode("baz", 0, 2, "one two")]
concept_found_3 = res[2].value.body[0].concept
expected_foo = get_expected(foo, "one two")
assert cbody(concept_found_3) == get_expected(bar, expected_foo)
@@ -465,8 +465,8 @@ def test_order_is_not_important_when_using_references():
res = parser.parse(context, "one two")
assert len(res) == 2
assert res[0].value.body == [("bar", 0, 2, "one two")]
assert res[1].value.body == [("foo", 0, 2, "one two")]
assert res[0].value.body == [cnode("bar", 0, 2, "one two")]
assert res[1].value.body == [cnode("foo", 0, 2, "one two")]
def test_i_can_parse_when_reference():
@@ -477,21 +477,21 @@ def test_i_can_parse_when_reference():
res = parser.parse(context, "twenty two")
assert res.status
assert res.value.body == [("bar", 0, 2, "twenty two")]
assert res.value.body == [cnode("bar", 0, 2, "twenty two")]
concept_found = res.value.body[0].concept
assert cbody(concept_found) == DoNotResolve("twenty two")
assert cprop(concept_found, "foo") == get_expected(foo, "twenty")
res = parser.parse(context, "thirty one")
assert res.status
assert res.value.body == [("bar", 0, 2, "thirty one")]
assert res.value.body == [cnode("bar", 0, 2, "thirty one")]
concept_found = res.value.body[0].concept
assert cbody(concept_found) == DoNotResolve("thirty one")
assert cprop(concept_found, "foo") == get_expected(foo, "thirty")
res = parser.parse(context, "twenty")
assert res.status
assert res.value.body == [("foo", 0, 0, "twenty")]
assert res.value.body == [cnode("foo", 0, 0, "twenty")]
concept_found = res.value.body[0].concept
assert cbody(concept_found) == DoNotResolve("twenty")
@@ -504,14 +504,14 @@ def test_i_can_parse_when_reference_has_a_body():
res = parser.parse(context, "twenty two")
assert res.status
assert res.value.body == [("bar", 0, 2, "twenty two")]
assert res.value.body == [cnode("bar", 0, 2, "twenty two")]
concept_found = res.value.body[0].concept
assert cbody(concept_found) == DoNotResolve("twenty two")
assert cprop(concept_found, "foo") == foo
res = parser.parse(context, "twenty")
assert res.status
assert res.value.body == [("foo", 0, 0, "twenty")]
assert res.value.body == [cnode("foo", 0, 0, "twenty")]
concept_found = res.value.body[0].concept
assert concept_found.body == "'one'"
@@ -529,14 +529,14 @@ def test_i_can_parse_multiple_results():
assert len(res) == 2
assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [("bar", 0, 2, "one two")]
assert res[0].value.body == [cnode("bar", 0, 2, "one two")]
concept_found_0 = res[0].value.body[0].concept
assert cbody(concept_found_0) == DoNotResolve("one two")
assert len(concept_found_0.props) == 0
assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [("foo", 0, 2, "one two")]
assert res[1].value.body == [cnode("foo", 0, 2, "one two")]
concept_found_1 = res[1].value.body[0].concept
assert cbody(concept_found_1) == DoNotResolve("one two")
assert len(concept_found_1.props) == 0
@@ -555,19 +555,19 @@ def test_i_can_parse_multiple_results_times_two():
assert len(res) == 4
assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [("bar", "one two"), ("bar", "one two")]
assert res[0].value.body == [short_cnode("bar", "one two"), short_cnode("bar", "one two")]
assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [("foo", "one two"), ("bar", "one two")]
assert res[1].value.body == [short_cnode("foo", "one two"), short_cnode("bar", "one two")]
assert res[2].status
assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT)
assert res[2].value.body == [("bar", "one two"), ("foo", "one two")]
assert res[2].value.body == [short_cnode("bar", "one two"), short_cnode("foo", "one two")]
assert res[3].status
assert context.sheerka.isinstance(res[3].value, BuiltinConcepts.PARSER_RESULT)
assert res[3].value.body == [("foo", "one two"), ("foo", "one two")]
assert res[3].value.body == [short_cnode("foo", "one two"), short_cnode("foo", "one two")]
def test_i_can_parse_multiple_results_when_reference():
@@ -589,11 +589,11 @@ def test_i_can_parse_multiple_results_when_reference():
assert len(res) == 2
assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [("bar", 0, 0, "twenty")]
assert res[0].value.body == [cnode("bar", 0, 0, "twenty")]
assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [("foo", 0, 0, "twenty")]
assert res[1].value.body == [cnode("foo", 0, 0, "twenty")]
def test_i_can_parse_concept_reference_that_is_not_in_grammar():
@@ -608,14 +608,14 @@ def test_i_can_parse_concept_reference_that_is_not_in_grammar():
res = parser.parse(context, "twenty two")
assert res.status
assert res.value.body == [("foo", 0, 2, "twenty two")]
assert res.value.body == [cnode("foo", 0, 2, "twenty two")]
concept_found = res.value.body[0].concept
assert cbody(concept_found) == DoNotResolve("twenty two")
assert cprop(concept_found, "two") == get_expected(two, "two")
res = parser.parse(context, "twenty one")
assert res.status
assert res.value.body == [("foo", 0, 2, "twenty one")]
assert res.value.body == [cnode("foo", 0, 2, "twenty one")]
def test_i_can_parse_zero_or_more():
@@ -625,7 +625,7 @@ def test_i_can_parse_zero_or_more():
context, res, wrapper, return_value = execute([foo], grammar, "one one")
assert res.status
assert return_value == [("foo", 0, 2, "one one")]
assert return_value == [cnode("foo", 0, 2, "one one")]
assert return_value[0].underlying == u(grammar[foo], 0, 2, [u("one", 0, 0), u("one", 2, 2)])
concept_found = return_value[0].concept
@@ -639,11 +639,11 @@ def test_i_can_parse_sequence_and_zero_or_more():
res = parser.parse(context, "one one two")
assert res.status
assert res.value.value == [("foo", 0, 4, "one one two")]
assert res.value.value == [cnode("foo", 0, 4, "one one two")]
res = parser.parse(context, "two")
assert res.status
assert res.value.value == [("foo", 0, 0, "two")]
assert res.value.value == [cnode("foo", 0, 0, "two")]
def test_i_cannot_parse_zero_and_more_when_wrong_entry():
@@ -657,7 +657,7 @@ def test_i_cannot_parse_zero_and_more_when_wrong_entry():
res = parser.parse(context, "one two")
assert not res.status
assert res.value.value == [
("foo", 0, 0, "one"),
cnode("foo", 0, 0, "one"),
UnrecognizedTokensNode(1, 2, [t(" "), t("two")])
]
@@ -675,7 +675,7 @@ def test_i_can_parse_zero_and_more_with_separator():
context, res, wrapper, return_value = execute([foo], grammar, "one, one , one")
assert res.status
assert return_value == [("foo", 0, 7, "one, one , one")]
assert return_value == [cnode("foo", 0, 7, "one, one , one")]
def test_that_zero_and_more_is_greedy():
@@ -686,7 +686,7 @@ def test_that_zero_and_more_is_greedy():
context, res, wrapper, return_value = execute([foo], grammar, "one one one")
assert res.status
assert return_value == [("foo", 0, 4, "one one one")]
assert return_value == [cnode("foo", 0, 4, "one one one")]
def test_i_can_parse_one_and_more():
@@ -696,7 +696,7 @@ def test_i_can_parse_one_and_more():
context, res, wrapper, return_value = execute([foo], grammar, "one one")
assert res.status
assert return_value == [("foo", 0, 2, "one one")]
assert return_value == [cnode("foo", 0, 2, "one one")]
assert return_value[0].underlying == u(grammar[foo], 0, 2, [
u("one", 0, 0),
u("one", 2, 2)])
@@ -709,7 +709,7 @@ def test_i_can_parse_sequence_and_one_or_more():
res = parser.parse(context, "one one two")
assert res.status
assert res.value.value == [("foo", 0, 4, "one one two")]
assert res.value.value == [cnode("foo", 0, 4, "one one two")]
res = parser.parse(context, "two")
assert not res.status
@@ -725,7 +725,7 @@ def test_i_can_parse_one_and_more_with_separator():
context, res, wrapper, return_value = execute([foo], grammar, "one, one , one")
assert res.status
assert return_value == [("foo", 0, 7, "one, one , one")]
assert return_value == [cnode("foo", 0, 7, "one, one , one")]
assert return_value[0].underlying == u(grammar[foo], 0, 7, [
u("one", 0, 0),
u("one", 3, 3),
@@ -740,7 +740,7 @@ def test_that_one_and_more_is_greedy():
context, res, wrapper, return_value = execute([foo], grammar, "one one one")
assert res.status
assert return_value == [("foo", 0, 4, "one one one")]
assert return_value == [cnode("foo", 0, 4, "one one one")]
def test_i_can_detect_infinite_recursion():
@@ -785,9 +785,9 @@ def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice():
res = parser.parse(context, "foo")
assert len(res) == 2
assert res[0].status
assert res[0].value.body == [("bar", 0, 0, "foo")]
assert res[0].value.body == [cnode("bar", 0, 0, "foo")]
assert res[1].status
assert res[1].value.body == [("foo", 0, 0, "foo")]
assert res[1].value.body == [cnode("foo", 0, 0, "foo")]
def test_i_can_detect_indirect_infinite_recursion_with_sequence():
@@ -912,7 +912,7 @@ def test_i_cannot_parse_when_wrong_sequence():
assert not res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [
("foo", "one two three"),
short_cnode("foo", "one two three"),
UnrecognizedTokensNode(5, 6, [t(" "), t("one")])
]
@@ -945,14 +945,14 @@ def test_i_cannot_parse_multiple_results_when_unknown_tokens_at_the_end():
assert not res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [
("bar", 0, 2, "one two"),
cnode("bar", 0, 2, "one two"),
UnrecognizedTokensNode(3, 6, [t(" "), t("four"), t(" "), t("five")])
]
assert not res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [
("foo", 0, 2, "one two"),
cnode("foo", 0, 2, "one two"),
UnrecognizedTokensNode(3, 6, [t(" "), t("four"), t(" "), t("five")])
]
@@ -973,14 +973,14 @@ def test_i_cannot_parse_multiple_results_when_beginning_by_unknown_tokens():
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [
UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]),
("bar", 4, 6, "one two"),
cnode("bar", 4, 6, "one two"),
]
assert not res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [
UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]),
("foo", 4, 6, "one two"),
cnode("foo", 4, 6, "one two"),
]
@@ -999,7 +999,7 @@ def test_i_cannot_parse_multiple_results_when_surrounded_by_unknown_tokens():
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [
UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]),
("bar", 4, 6, "one two"),
cnode("bar", 4, 6, "one two"),
UnrecognizedTokensNode(7, 10, [t(" "), t("six"), t(" "), t("seven")]),
]
@@ -1007,7 +1007,7 @@ def test_i_cannot_parse_multiple_results_when_surrounded_by_unknown_tokens():
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [
UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]),
("foo", 4, 6, "one two"),
cnode("foo", 4, 6, "one two"),
UnrecognizedTokensNode(7, 10, [t(" "), t("six"), t(" "), t("seven")]),
]
@@ -1029,17 +1029,17 @@ def test_i_cannot_parse_multiple_results_when_unknown_tokens_in_the_middle():
assert not res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [
("bar", 0, 2, "one two"),
cnode("bar", 0, 2, "one two"),
UnrecognizedTokensNode(3, 7, [t(" "), t("four"), t(" "), t("five"), t(" ")]),
("baz", 8, 8, "six"),
cnode("baz", 8, 8, "six"),
]
assert not res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [
("foo", 0, 2, "one two"),
cnode("foo", 0, 2, "one two"),
UnrecognizedTokensNode(3, 7, [t(" "), t("four"), t(" "), t("five"), t(" ")]),
("baz", 8, 8, "six"),
cnode("baz", 8, 8, "six"),
]
@@ -1052,7 +1052,7 @@ def test_i_can_get_the_inner_concept_when_possible():
assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [("foo", 0, 0, "one")]
assert return_value == [cnode("foo", 0, 0, "one")]
concept_found = return_value[0].concept
assert cbody(concept_found) == get_expected(one, "one")
assert id(cprop(concept_found, "one")) == id(cbody(concept_found))
@@ -1069,7 +1069,7 @@ def test_i_can_get_the_inner_concept_when_possible_with_rule_name():
assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [("foo", 0, 0, "one")]
assert return_value == [cnode("foo", 0, 0, "one")]
concept_found = return_value[0].concept
assert cbody(concept_found) == get_expected(one, "one")
assert id(cprop(concept_found, "one")) == id(cbody(concept_found))
@@ -1086,7 +1086,7 @@ def test_i_get_multiple_props_when_zero_or_more():
context, res, wrapper, return_value = execute([foo, one], grammar, "one one one")
assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [("foo", 0, 4, "one one one")]
assert return_value == [cnode("foo", 0, 4, "one one one")]
concept_found = return_value[0].concept
assert cbody(concept_found) == DoNotResolve("one one one")
assert len(concept_found.cached_asts["one"]) == 3
@@ -1106,7 +1106,7 @@ def test_i_get_multiple_props_when_zero_or_more_and_different_values():
context, res, wrapper, return_value = execute([foo, one], grammar, "one ok un ok uno ok")
assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [("foo", "one ok un ok uno ok")]
assert return_value == [short_cnode("foo", "one ok un ok uno ok")]
concept_found = return_value[0].concept
assert cprop(concept_found, "one")[0] == get_expected(one, "one")
assert cprop(concept_found, "one")[1] == get_expected(one, "un")