Added ZeroAndMore and OneAndMore to BNF. BNF expressions can now be captured

This commit is contained in:
2019-12-18 12:01:51 +01:00
parent 88cd3162be
commit 8dbe2e1b20
9 changed files with 425 additions and 91 deletions
+26 -47
View File
@@ -26,6 +26,7 @@ def get_context():
("'foo'+", OneOrMore(StrMatch("foo"))),
("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1'|' 2 '|' 3", Sequence(StrMatch("1"), StrMatch("|"), StrMatch("2"), StrMatch("|"), StrMatch("3"))),
("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))),
("1 2 | 3 4+", OrderedChoice(
Sequence(StrMatch("1"), StrMatch("2")),
@@ -39,6 +40,29 @@ def get_context():
("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))),
("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))),
("(1 )", StrMatch("1")),
("foo", ConceptMatch("foo")),
("foo*", ZeroOrMore(ConceptMatch("foo"))),
("foo 'and' bar+", Sequence(ConceptMatch("foo"), StrMatch("and"), OneOrMore(ConceptMatch("bar")))),
("foo | bar?", OrderedChoice(ConceptMatch("foo"), Optional(ConceptMatch("bar")))),
("'str' = var", Sequence(StrMatch("str"), StrMatch("="), ConceptMatch("var"))),
("'str''='var", Sequence(StrMatch("str"), StrMatch("="), ConceptMatch("var"))),
("'str'=var", StrMatch("str", rule_name="var")),
("'foo'?=var", Optional(StrMatch("foo"), rule_name="var")),
("('foo'?)=var", Optional(StrMatch("foo"), rule_name="var")),
("'foo'*=var", ZeroOrMore(StrMatch("foo"), rule_name="var")),
("('foo'*)=var", ZeroOrMore(StrMatch("foo"), rule_name="var")),
("'foo'+=var", OneOrMore(StrMatch("foo"), rule_name="var")),
("('foo'+)=var", OneOrMore(StrMatch("foo"), rule_name="var")),
("'foo'=var?", Optional(StrMatch("foo", rule_name="var"))),
("('foo'=var)?", Optional(StrMatch("foo", rule_name="var"))),
("'foo'=var*", ZeroOrMore(StrMatch("foo", rule_name="var"))),
("('foo'=var)*", ZeroOrMore(StrMatch("foo", rule_name="var"))),
("'foo'=var+", OneOrMore(StrMatch("foo", rule_name="var"))),
("('foo'=var)+", OneOrMore(StrMatch("foo", rule_name="var"))),
("(1 | 2 | 3)=var", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"), rule_name="var")),
("(1 2)=var", Sequence(StrMatch("1"), StrMatch("2"), rule_name="var")),
("(1 2)+=var", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")), rule_name="var")),
("(1 2)=var+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2"), rule_name="var"))),
])
def test_i_can_parse_regex(expression, expected):
parser = BnfParser()
@@ -53,7 +77,8 @@ def test_i_can_parse_regex(expression, expected):
@pytest.mark.parametrize("expression, error", [
("1 ", UnexpectedEndOfFileError()),
("1|", UnexpectedEndOfFileError()),
("(1|)", UnexpectedTokenErrorNode("Unexpected token 'TokenKind.EOF'", [TokenKind.RPAR])),
("(1|)", UnexpectedTokenErrorNode("Unexpected token 'Token(<EOF>)'", [TokenKind.RPAR])),
("1=", UnexpectedTokenErrorNode("Unexpected token 'Token(<EOF>)'", [TokenKind.IDENTIFIER])),
])
def test_i_can_detect_errors(expression, error):
parser = BnfParser()
@@ -64,52 +89,6 @@ def test_i_can_detect_errors(expression, error):
assert ret_value[0] == error
def test_i_can_parse_regex_with_reference():
expression = "foo"
parser = BnfParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert res.status
assert res.value.value == ConceptMatch("foo")
assert res.value.source == expression
def test_i_can_parse_cross_ref_with_modifier():
expression = "foo*"
parser = BnfParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert res.status
assert res.value.value == ZeroOrMore(ConceptMatch("foo"))
assert res.value.source == expression
def test_i_can_parse_sequence_with_cross_ref():
expression = "foo 'and' bar+"
parser = BnfParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert res.status
assert res.value.value == Sequence(ConceptMatch("foo"), StrMatch("and"), OneOrMore(ConceptMatch("bar")))
assert res.value.source == expression
def test_i_can_parse_choice_with_cross_ref():
foo = Concept("foo")
bar = Concept("bar")
context = get_context()
context.sheerka.add_in_cache(foo)
context.sheerka.add_in_cache(bar)
expression = "foo | bar?"
parser = BnfParser()
res = parser.parse(context, Tokenizer(expression))
assert res.status
assert res.value.value == OrderedChoice(ConceptMatch("foo"), Optional(ConceptMatch("bar")))
assert res.value.source == expression
def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text():
foo = Concept(name="foo")
bar = Concept(name="bar")
+193 -1
View File
@@ -3,7 +3,7 @@ from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \
ParsingExpressionVisitor, TerminalNode, NonTerminalNode, LexerNode, ConceptMatch
ParsingExpressionVisitor, TerminalNode, NonTerminalNode, LexerNode, ConceptMatch, ZeroOrMore, OneOrMore
class ConceptVisitor(ParsingExpressionVisitor):
@@ -352,6 +352,22 @@ def test_i_can_parse_sequence_with_optional_in_between():
assert res.value.body == [(foo, 0, 2, "one three")]
def test_i_cannot_parse_wrong_input_with_optional():
context = get_context()
foo = Concept(name="foo")
concepts = {foo: Optional("one")}
parser = ConceptLexerParser()
parser.initialize(context, concepts)
res = parser.parse(context, "two")
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.try_parsed == []
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body[0].body == "two"
def test_i_can_use_reference():
# when there are multiple matches for the same input
# Do I need to create a choice concept ?
@@ -469,6 +485,172 @@ def test_i_can_detect_duplicates_when_reference():
assert res[1].value.body == [(foo, 0, 0, "twenty")]
def test_i_can_parse_zero_or_more():
context = get_context()
foo = Concept(name="foo")
concepts = {foo: ZeroOrMore("one")}
parser = ConceptLexerParser()
parser.initialize(context, concepts)
res = parser.parse(context, "one one")
assert res.status
assert res.value.value == [ConceptNode(foo, 0, 2, source="one one",
underlying=u(concepts[foo], 0, 2, [
u("one", 0, 0),
u("one", 2, 2)]))]
def test_i_can_parse_sequence_and_zero_or_more():
context = get_context()
foo = Concept(name="foo")
concepts = {foo: Sequence(ZeroOrMore("one"), "two")}
parser = ConceptLexerParser()
parser.initialize(context, concepts)
res = parser.parse(context, "one one two")
assert res.status
assert res.value.value == [ConceptNode(foo, 0, 4, source="one one two",
underlying=u(concepts[foo], 0, 4, [
u(ZeroOrMore("one"), 0, 2, [
u("one", 0, 0),
u("one", 2, 2)]),
u("two", 4, 4)]))]
res = parser.parse(context, "two")
assert res.status
assert res.value.value == [ConceptNode(foo, 0, 0, source="two",
underlying=u(concepts[foo], 0, 0, [u("two", 0, 0)]))]
def test_i_cannot_parse_zero_and_more_when_wrong_entry():
context = get_context()
foo = Concept(name="foo")
concepts = {foo: ZeroOrMore("one")}
parser = ConceptLexerParser()
parser.initialize(context, concepts)
res = parser.parse(context, "one two")
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.try_parsed == [
ConceptNode(foo, 0, 0, source="one", underlying=u(ZeroOrMore("one"), 0, 0, [u("one", 0, 0)]))]
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body[0].body == "two"
res = parser.parse(context, "two")
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.try_parsed == []
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body[0].body == "two"
def test_i_can_parse_zero_and_more_with_separator():
context = get_context()
foo = Concept(name="foo")
concepts = {foo: ZeroOrMore("one", sep=",")}
parser = ConceptLexerParser()
parser.initialize(context, concepts)
res = parser.parse(context, "one, one , one")
assert res.status
assert res.value.value == [ConceptNode(foo, 0, 7, source="one, one , one",
underlying=u(concepts[foo], 0, 7, [
u("one", 0, 0),
u("one", 3, 3),
u("one", 7, 7)]))]
def test_that_zero_and_more_is_greedy():
context = get_context()
foo = Concept(name="foo")
bar = Concept(name="bar")
concepts = {foo: ZeroOrMore("one"), bar: "one"}
parser = ConceptLexerParser()
parser.initialize(context, concepts)
res = parser.parse(context, "one one one")
assert res.status
assert res.value.value == [(foo, 0, 4, "one one one")]
def test_i_can_parse_one_and_more():
context = get_context()
foo = Concept(name="foo")
concepts = {foo: OneOrMore("one")}
parser = ConceptLexerParser()
parser.initialize(context, concepts)
res = parser.parse(context, "one one")
assert res.status
assert res.value.value == [ConceptNode(foo, 0, 2, source="one one",
underlying=u(concepts[foo], 0, 2, [
u("one", 0, 0),
u("one", 2, 2)]))]
def test_i_can_parse_sequence_and_one_or_more():
context = get_context()
foo = Concept(name="foo")
concepts = {foo: Sequence(OneOrMore("one"), "two")}
parser = ConceptLexerParser()
parser.initialize(context, concepts)
res = parser.parse(context, "one one two")
assert res.status
assert res.value.value == [ConceptNode(foo, 0, 4, source="one one two",
underlying=u(concepts[foo], 0, 4, [
u(ZeroOrMore("one"), 0, 2, [
u("one", 0, 0),
u("one", 2, 2)]),
u("two", 4, 4)]))]
res = parser.parse(context, "two")
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.try_parsed == []
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body[0].body == "two"
def test_i_can_parse_one_and_more_with_separator():
context = get_context()
foo = Concept(name="foo")
concepts = {foo: OneOrMore("one", sep=",")}
parser = ConceptLexerParser()
parser.initialize(context, concepts)
res = parser.parse(context, "one, one , one")
assert res.status
assert res.value.value == [ConceptNode(foo, 0, 7, source="one, one , one",
underlying=u(concepts[foo], 0, 7, [
u("one", 0, 0),
u("one", 3, 3),
u("one", 7, 7)]))]
def test_that_one_and_more_is_greedy():
context = get_context()
foo = Concept(name="foo")
bar = Concept(name="bar")
concepts = {foo: OneOrMore("one"), bar: "one"}
parser = ConceptLexerParser()
parser.initialize(context, concepts)
res = parser.parse(context, "one one one")
assert res.status
assert res.value.value == [(foo, 0, 4, "one one one")]
def test_i_can_detect_infinite_recursion():
foo = Concept(name="foo")
bar = Concept(name="bar")
@@ -552,6 +734,16 @@ def test_i_can_detect_indirect_infinite_recursion_with_optional():
pass
def test_i_can_detect_indirect_infinite_recursion_with_zero_and_more():
# TODO infinite recursion with optional
pass
def test_i_can_detect_indirect_infinite_recursion_with_one_and_more():
# TODO infinite recursion with optional
pass
def test_i_can_visit_parsing_expression():
mult = Concept(name="mult")
add = Concept(name="add")
+52 -9
View File
@@ -5,7 +5,7 @@ from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext
from evaluators.ConceptNodeEvaluator import ConceptNodeEvaluator
from parsers.ConceptLexerParser import ConceptNode, ConceptLexerParser, NonTerminalNode, Sequence, TerminalNode, \
StrMatch, Optional, OrderedChoice
StrMatch, Optional, OrderedChoice, ZeroOrMore
def get_context():
@@ -61,7 +61,7 @@ def test_concept_is_returned_when_list_of_one_concept_node():
assert result.who == evaluator.name
assert result.status
assert result.value == node.concept
assert result.value == Concept("foo", body="foo").init_key()
assert result.parents == [ret_val]
@@ -119,6 +119,22 @@ def test_concept_property_is_correctly_updated_for_optional():
assert updated.props["o"].value == "two"
def test_concept_property_is_correctly_updated_for_zero_or_more():
context = get_context()
foo = Concept("foo")
grammar = {foo: ZeroOrMore("one", rule_name="variable")}
concept_node = get_concept_node(context, grammar, "one one one")
updated = ConceptNodeEvaluator().update_concept(
context.sheerka,
context.sheerka.new(concept_node.concept.key),
concept_node.underlying)
assert "variable" in updated.props
assert updated.props["variable"].value == "one one one"
def test_concept_property_is_correctly_updated_when_list_of_properties():
context = get_context()
@@ -141,9 +157,10 @@ def test_concept_property_is_correctly_updated_when_another_concept():
foo = Concept("foo")
bar = Concept("bar")
context.sheerka.add_in_cache(foo)
context.sheerka.add_in_cache(bar)
grammar = {
foo: Sequence("one", "two", rule_name="variable"),
bar: Sequence(foo, "three", rule_name="variable")}
foo: Sequence("one", "two", rule_name="var"),
bar: Sequence(foo, "three", rule_name="var")}
concept_node = get_concept_node(context, grammar, "one two three")
updated = ConceptNodeEvaluator().update_concept(
@@ -151,11 +168,11 @@ def test_concept_property_is_correctly_updated_when_another_concept():
context.sheerka.new(concept_node.concept.key),
concept_node.underlying)
assert updated.props["variable"].value == "one two three"
assert updated.props["foo"].value == Concept("foo").set_prop("variable", "one two").init_key()
assert updated.props["var"].value == "one two three"
assert updated.props["foo"].value == Concept("foo", body="one two").set_prop("var", "one two").init_key()
def test_concept_property_is_correctly_updated_when_concept_recursion():
def test_concept_property_is_correctly_updated_when_concept_recursion_using_optional():
context = get_context()
number = Concept("number")
@@ -173,6 +190,32 @@ def test_concept_property_is_correctly_updated_when_concept_recursion():
context.sheerka.new(concept_node.concept.key),
concept_node.underlying)
assert updated.props["number"].value == Concept("number").init_key()
assert updated.props["number"].value == Concept("number", body="one").init_key()
assert updated.props["op"].value == "plus"
assert updated.props["add"].value == Concept("add").set_prop("number", Concept("number").init_key()).init_key()
expected_add = Concept("add", body="two").set_prop("number", Concept("number", body="two").init_key()).init_key()
assert updated.props["add"].value == expected_add
def test_concept_property_is_correctly_updated_when_concept_recursion_using_zero_or_more():
context = get_context()
number = Concept("number")
add = Concept("add")
context.sheerka.add_in_cache(number)
context.sheerka.add_in_cache(add)
grammar = {
number: OrderedChoice("one", "two", 'three'),
add: Sequence(number, ZeroOrMore(Sequence(OrderedChoice("plus", "minus", rule_name="op"), number)))
}
concept_node = get_concept_node(context, grammar, "one plus two minus three")
updated = ConceptNodeEvaluator().update_concept(
context.sheerka,
context.sheerka.new(concept_node.concept.key),
concept_node.underlying,
init_empty_body=True)
assert updated.props["number"].value == [Concept("number", body="one").init_key(),
Concept("number", body="two").init_key(),
Concept("number", body="three").init_key()]
assert updated.props["op"].value == ["plus", "minus"]
+1 -1
View File
@@ -626,7 +626,7 @@ def test_i_can_eval_bnf_definitions_with_variables():
return_value = res[0].value
assert sheerka.isinstance(return_value, concept_b)
assert return_value.props["a"] == Property("a", concept_a)
assert return_value.props["a"] == Property("a", sheerka.new(concept_a.key, body="one"))
def test_i_can_eval_bnf_definitions_from_separate_instances():