I can define and eval BNF definitions

This commit is contained in:
2019-12-17 21:19:44 +01:00
parent c668cc46d2
commit 88cd3162be
25 changed files with 1099 additions and 569 deletions
+97 -172
View File
@@ -2,10 +2,8 @@ import pytest
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext
from core.tokenizer import Tokenizer, TokenKind
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \
CrossRef, RegexParser, ZeroOrMore, OneOrMore, UnexpectedEndOfFileError, UnexpectedTokenErrorNode, ConceptMatch, \
ParsingExpressionVisitor
ParsingExpressionVisitor, TerminalNode, NonTerminalNode, LexerNode, ConceptMatch
class ConceptVisitor(ParsingExpressionVisitor):
@@ -13,7 +11,17 @@ class ConceptVisitor(ParsingExpressionVisitor):
self.concepts = set()
def visit_ConceptMatch(self, node):
self.concepts.add(node.concept_name)
self.concepts.add(node.concept)
def u(parsing_expression, start, end, children=None):
if isinstance(parsing_expression, str):
parsing_expression = StrMatch(parsing_expression)
if isinstance(parsing_expression, StrMatch):
return TerminalNode(parsing_expression, start, end, parsing_expression.to_match)
return NonTerminalNode(parsing_expression, start, end, [], children)
@pytest.mark.parametrize("match, text", [
@@ -39,7 +47,7 @@ def test_i_can_match_simple_tokens(match, text):
assert res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.value == [ConceptNode(foo, 0, 0, source=text)]
assert res.value.value == [ConceptNode(foo, 0, 0, source=text, underlying=u(match, 0, 0))]
def test_i_can_match_multiple_concepts_in_one_input():
@@ -55,9 +63,9 @@ def test_i_can_match_multiple_concepts_in_one_input():
assert res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.value == [
ConceptNode(one, 0, 0, source="one"),
ConceptNode(two, 2, 2, source="two"),
ConceptNode(one, 4, 4, source="one"),
ConceptNode(one, 0, 0, source="one", underlying=u("one", 0, 0)),
ConceptNode(two, 2, 2, source="two", underlying=u("two", 2, 2)),
ConceptNode(one, 4, 4, source="one", underlying=u("one", 4, 4)),
]
@@ -85,8 +93,8 @@ def test_i_cannot_match_when_part_of_the_input_is_unknown():
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.try_parsed == [
ConceptNode(one, 0, 0, source="one"),
ConceptNode(two, 2, 2, source="two")] # these two were recognized
ConceptNode(one, 0, 0, source="one", underlying=u("one", 0, 0)),
ConceptNode(two, 2, 2, source="two", underlying=u("two", 2, 2))] # these two were recognized
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body[0].body == "three"
@@ -102,7 +110,11 @@ def test_i_can_match_sequence():
assert res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.value == [ConceptNode(foo, 0, 4, source="one two three")]
assert res.value.value == [
ConceptNode(foo, 0, 4, source="one two three", underlying=u(concepts[foo], 0, 4, [
u("one", 0, 0),
u("two", 2, 2),
u("three", 4, 4)]))]
def test_wrong_sequence_is_not_matched():
@@ -116,7 +128,7 @@ def test_wrong_sequence_is_not_matched():
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.try_parsed == [ConceptNode(foo, 0, 4, source="one two three")]
assert res.value.try_parsed == [(foo, "one two three")]
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body[0].body == "one"
@@ -149,7 +161,7 @@ def test_i_always_choose_the_longest_match():
assert res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.value == [ConceptNode(foo, 0, 4, source="one two three")]
assert res.value.value == [(foo, "one two three")]
def test_i_can_match_several_sequences():
@@ -166,8 +178,8 @@ def test_i_can_match_several_sequences():
assert res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.value == [
ConceptNode(foo, 0, 4, source="one two three"),
ConceptNode(bar, 6, 8, source="one two"),
(foo, 0, 4, "one two three"),
(bar, 6, 8, "one two"),
]
@@ -181,12 +193,14 @@ def test_i_can_match_ordered_choice():
res1 = parser.parse(context, "one")
assert res1.status
assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT)
assert res1.value.body == [ConceptNode(foo, 0, 0, source="one")]
assert res1.value.body == [
ConceptNode(foo, 0, 0, source="one", underlying=u(concepts[foo], 0, 0, [u("one", 0, 0)]))]
res2 = parser.parse(context, "two")
assert res2.status
assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT)
assert res2.value.body == [ConceptNode(foo, 0, 0, source="two")]
assert res2.value.body == [
ConceptNode(foo, 0, 0, source="two", underlying=u(concepts[foo], 0, 0, [u("two", 0, 0)]))]
res3 = parser.parse(context, "three")
assert not res3.status
@@ -216,12 +230,20 @@ def test_i_can_mix_sequences_and_ordered_choices():
res1 = parser.parse(context, "twenty one ok")
assert res1.status
assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT)
assert res1.value.body == [ConceptNode(foo, 0, 4, source="twenty one ok")]
assert res1.value.body == [ConceptNode(foo, 0, 4, source="twenty one ok",
underlying=u(concepts[foo], 0, 4, [
u(OrderedChoice("twenty", "thirty"), 0, 0, [u("twenty", 0, 0)]),
u("one", 2, 2),
u("ok", 4, 4)]))]
res2 = parser.parse(context, "thirty one ok")
assert res2.status
assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT)
assert res2.value.body == [ConceptNode(foo, 0, 4, source="thirty one ok")]
assert res2.value.body == [ConceptNode(foo, 0, 4, source="thirty one ok",
underlying=u(concepts[foo], 0, 4, [
u(OrderedChoice("twenty", "thirty"), 0, 0, [u("thirty", 0, 0)]),
u("one", 2, 2),
u("ok", 4, 4)]))]
res3 = parser.parse(context, "twenty one")
assert not res3.status
@@ -267,7 +289,8 @@ def test_i_can_parse_optional():
res = parser.parse(context, "one")
assert res.status
assert res.value.value == [ConceptNode(foo, 0, 0, source="one")]
assert res.value.value == [ConceptNode(foo, 0, 0, source="one",
underlying=u(concepts[foo], 0, 0, [u("one", 0, 0)]))]
def test_i_can_parse_sequence_starting_with_optional():
@@ -280,11 +303,19 @@ def test_i_can_parse_sequence_starting_with_optional():
res = parser.parse(context, "twenty one")
assert res.status
assert res.value.body == [ConceptNode(foo, 0, 2, source="twenty one")]
assert res.value.body == [ConceptNode(
foo, 0, 2,
source="twenty one",
underlying=u(concepts[foo], 0, 2,
[
u(Optional("twenty"), 0, 0, [u("twenty", 0, 0)]),
u("one", 2, 2)]
))]
res = parser.parse(context, "one")
assert res.status
assert res.value.body == [ConceptNode(foo, 0, 0, source="one")]
assert res.value.body == [ConceptNode(foo, 0, 0, source="one",
underlying=u(concepts[foo], 0, 0, [u("one", 0, 0)]))]
def test_i_can_parse_sequence_ending_with_optional():
@@ -297,11 +328,11 @@ def test_i_can_parse_sequence_ending_with_optional():
res = parser.parse(context, "one two three")
assert res.status
assert res.value.body == [ConceptNode(foo, 0, 4, source="one two three")]
assert res.value.body == [(foo, 0, 4, "one two three")]
res = parser.parse(context, "one two")
assert res.status
assert res.value.body == [ConceptNode(foo, 0, 2, source="one two")]
assert res.value.body == [(foo, 0, 2, "one two")]
def test_i_can_parse_sequence_with_optional_in_between():
@@ -314,11 +345,11 @@ def test_i_can_parse_sequence_with_optional_in_between():
res = parser.parse(context, "one two three")
assert res.status
assert res.value.body == [ConceptNode(foo, 0, 4, source="one two three")]
assert res.value.body == [(foo, 0, 4, "one two three")]
res = parser.parse(context, "one three")
assert res.status
assert res.value.body == [ConceptNode(foo, 0, 2, source="one three")]
assert res.value.body == [(foo, 0, 2, "one three")]
def test_i_can_use_reference():
@@ -338,11 +369,14 @@ def test_i_can_use_reference():
assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [ConceptNode(foo, 0, 2, source="one two")]
assert res[0].value.body == [ConceptNode(foo, 0, 2, source="one two",
underlying=u(concepts[foo], 0, 2, [u("one", 0, 0), u("two", 2, 2)]))]
assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [ConceptNode(bar, 0, 2, source="one two")]
assert res[1].value.body == [ConceptNode(bar, 0, 2, source="one two",
underlying=u(ConceptMatch(foo, rule_name="foo"), 0, 2,
[u(concepts[foo], 0, 2, [u("one", 0, 0), u("two", 2, 2)])]))]
def test_i_can_use_context_reference_with_multiple_levels():
@@ -364,15 +398,15 @@ def test_i_can_use_context_reference_with_multiple_levels():
assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [ConceptNode(foo, 0, 2, source="one two")]
assert res[0].value.body == [(foo, 0, 2, "one two")]
assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [ConceptNode(bar, 0, 2, source="one two")]
assert res[1].value.body == [(bar, 0, 2, "one two")]
assert res[2].status
assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT)
assert res[2].value.body == [ConceptNode(baz, 0, 2, source="one two")]
assert res[2].value.body == [(baz, 0, 2, "one two")]
def test_order_is_not_important_when_using_references():
@@ -386,8 +420,8 @@ def test_order_is_not_important_when_using_references():
res = parser.parse(context, "one two")
assert len(res) == 2
assert res[0].value.body == [ConceptNode(bar, 0, 2, source="one two")]
assert res[1].value.body == [ConceptNode(foo, 0, 2, source="one two")]
assert res[0].value.body == [(bar, 0, 2, "one two")]
assert res[1].value.body == [(foo, 0, 2, "one two")]
def test_i_can_parse_when_reference():
@@ -401,15 +435,15 @@ def test_i_can_parse_when_reference():
res = parser.parse(context, "twenty two")
assert res.status
assert res.value.body == [ConceptNode(bar, 0, 2, source="twenty two")]
assert res.value.body == [(bar, 0, 2, "twenty two")]
res = parser.parse(context, "thirty one")
assert res.status
assert res.value.body == [ConceptNode(bar, 0, 2, source="thirty one")]
assert res.value.body == [(bar, 0, 2, "thirty one")]
res = parser.parse(context, "twenty")
assert res.status
assert res.value.body == [ConceptNode(foo, 0, 0, source="twenty")]
assert res.value.body == [(foo, 0, 0, "twenty")]
def test_i_can_detect_duplicates_when_reference():
@@ -428,11 +462,11 @@ def test_i_can_detect_duplicates_when_reference():
assert len(res) == 2
assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [ConceptNode(bar, 0, 0, source="twenty")]
assert res[0].value.body == [(bar, 0, 0, "twenty")]
assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [ConceptNode(foo, 0, 0, source="twenty")]
assert res[1].value.body == [(foo, 0, 0, "twenty")]
def test_i_can_detect_infinite_recursion():
@@ -446,8 +480,8 @@ def test_i_can_detect_infinite_recursion():
parser = ConceptLexerParser()
parser.initialize(get_context(), concepts)
assert bar not in parser.concepts_dict
assert foo not in parser.concepts_dict
assert bar not in parser.concepts_grammars
assert foo not in parser.concepts_grammars
def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice():
@@ -461,8 +495,8 @@ def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice():
parser = ConceptLexerParser()
parser.initialize(get_context(), concepts)
assert foo not in parser.concepts_dict # removed because of the infinite recursion
assert bar not in parser.concepts_dict # removed because of the infinite recursion
assert foo not in parser.concepts_grammars # removed because of the infinite recursion
assert bar not in parser.concepts_grammars # removed because of the infinite recursion
# the other way around is possible
context = get_context()
@@ -472,15 +506,15 @@ def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice():
}
parser = ConceptLexerParser()
parser.initialize(context, concepts)
assert foo in parser.concepts_dict
assert bar in parser.concepts_dict
assert foo in parser.concepts_grammars
assert bar in parser.concepts_grammars
res = parser.parse(context, "foo")
assert len(res) == 2
assert res[0].status
assert res[0].value.body == [ConceptNode(bar, 0, 0, source="foo")]
assert res[0].value.body == [(bar, 0, 0, "foo")]
assert res[1].status
assert res[1].value.body == [ConceptNode(foo, 0, 0, source="foo")]
assert res[1].value.body == [(foo, 0, 0, "foo")]
def test_i_can_detect_indirect_infinite_recursion_with_sequence():
@@ -494,8 +528,8 @@ def test_i_can_detect_indirect_infinite_recursion_with_sequence():
parser = ConceptLexerParser()
parser.initialize(get_context(), concepts)
assert foo not in parser.concepts_dict # removed because of the infinite recursion
assert bar not in parser.concepts_dict # removed because of the infinite recursion
assert foo not in parser.concepts_grammars # removed because of the infinite recursion
assert bar not in parser.concepts_grammars # removed because of the infinite recursion
def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choice():
@@ -509,8 +543,8 @@ def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choic
parser = ConceptLexerParser()
parser.initialize(get_context(), concepts)
assert foo not in parser.concepts_dict # removed because of the infinite recursion
assert bar not in parser.concepts_dict # removed because of the infinite recursion
assert foo not in parser.concepts_grammars # removed because of the infinite recursion
assert bar not in parser.concepts_grammars # removed because of the infinite recursion
def test_i_can_detect_indirect_infinite_recursion_with_optional():
@@ -518,128 +552,6 @@ def test_i_can_detect_indirect_infinite_recursion_with_optional():
pass
@pytest.mark.parametrize("expression, expected", [
("'str'", StrMatch("str")),
("1", StrMatch("1")),
(" 1", StrMatch("1")),
(",", StrMatch(",")),
("'foo'?", Optional(StrMatch("foo"))),
("'foo'*", ZeroOrMore(StrMatch("foo"))),
("'foo'+", OneOrMore(StrMatch("foo"))),
("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))),
("1 2 | 3 4+", OrderedChoice(
Sequence(StrMatch("1"), StrMatch("2")),
Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))),
("1 (2 | 3) 4+", Sequence(StrMatch("1"), OrderedChoice(StrMatch("2"), StrMatch("3")), OneOrMore(StrMatch("4")))),
("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))),
("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))),
("1 *", Sequence(StrMatch("1"), StrMatch("*"))),
("1 ?", Sequence(StrMatch("1"), StrMatch("?"))),
("1 +", Sequence(StrMatch("1"), StrMatch("+"))),
("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))),
("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))),
("(1 )", StrMatch("1")),
])
def test_i_can_parse_regex(expression, expected):
parser = RegexParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert not parser.has_error
assert res.status
assert res.value.value == expected
assert res.value.source == expression
@pytest.mark.parametrize("expression, error", [
("1 ", UnexpectedEndOfFileError()),
("1|", UnexpectedEndOfFileError()),
("(1|)", UnexpectedTokenErrorNode("Unexpected token 'TokenKind.EOF'", [TokenKind.RPAR])),
])
def test_i_can_detect_errors(expression, error):
parser = RegexParser()
res = parser.parse(get_context(), Tokenizer(expression))
ret_value = res.value.value
assert parser.has_error
assert not res.status
assert ret_value[0] == error
def test_i_can_parse_regex_with_reference():
expression = "foo"
parser = RegexParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert res.status
assert res.value.value == ConceptMatch("foo")
assert res.value.source == expression
def test_i_can_parse_cross_ref_with_modifier():
expression = "foo*"
parser = RegexParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert res.status
assert res.value.value == ZeroOrMore(ConceptMatch("foo"))
assert res.value.source == expression
def test_i_can_parse_sequence_with_cross_ref():
expression = "foo 'and' bar+"
parser = RegexParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert res.status
assert res.value.value == Sequence(ConceptMatch("foo"), StrMatch("and"), OneOrMore(ConceptMatch("bar")))
assert res.value.source == expression
def test_i_can_parse_choice_with_cross_ref():
foo = Concept("foo")
bar = Concept("bar")
context = get_context()
context.sheerka.add_in_cache(foo)
context.sheerka.add_in_cache(bar)
expression = "foo | bar?"
parser = RegexParser()
res = parser.parse(context, Tokenizer(expression))
assert res.status
assert res.value.value == OrderedChoice(ConceptMatch("foo"), Optional(ConceptMatch("bar")))
assert res.value.source == expression
def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text():
foo = Concept(name="foo")
bar = Concept(name="bar")
context = get_context()
context.sheerka.add_in_cache(foo)
context.sheerka.add_in_cache(bar)
regex_parser = RegexParser()
foo_definition = regex_parser.parse(context, "'twenty' | 'thirty'").value.value
bar_definition = regex_parser.parse(context, "foo ('one' | 'two')").value.value
concepts = {bar: bar_definition, foo: foo_definition}
concept_parser = ConceptLexerParser()
concept_parser.initialize(context, concepts)
res = concept_parser.parse(context, "twenty two")
assert res.status
assert res.value.body == [ConceptNode(bar, 0, 2, source="twenty two")]
res = concept_parser.parse(context, "thirty one")
assert res.status
assert res.value.body == [ConceptNode(bar, 0, 2, source="thirty one")]
res = concept_parser.parse(context, "twenty")
assert res.status
assert res.value.body == [ConceptNode(foo, 0, 0, source="twenty")]
def test_i_can_visit_parsing_expression():
mult = Concept(name="mult")
add = Concept(name="add")
@@ -650,6 +562,19 @@ def test_i_can_visit_parsing_expression():
assert sorted(list(visitor.concepts)) == ["add", "mult"]
def test_i_can_initialize_rule_names():
context = get_context()
foo = Concept(name="foo")
bar = Concept(name="bar")
concepts = {foo: Sequence("one", "two"), bar: foo}
parser = ConceptLexerParser()
ret = parser.initialize(context, concepts)
return_value = ret.body
assert return_value[foo].rule_name == ""
assert return_value[bar].rule_name == "foo"
#
# def test_i_can_parse_basic_arithmetic_operations_and_resolve_properties():