Files
Sheerka-Old/tests/parsers/test_BnfParser.py
T

196 lines
8.5 KiB
Python

import pytest
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from core.tokenizer import Tokenizer, TokenKind, LexerError, Token
from parsers.BaseNodeParser import cnode
from parsers.BaseParser import UnexpectedTokenErrorNode
from parsers.BnfParser import BnfParser, UnexpectedEndOfFileError
from parsers.BnfNodeParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \
BnfNodeParser, ConceptExpression
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
class ClassWithName:
def __init__(self, name):
self.name = name
def c(name, rule_name=None):
concept = Concept(name).init_key()
return ConceptExpression(concept, rule_name=rule_name or name)
eof_token = Token(TokenKind.EOF, "", 0, 0, 0)
class TestBnfParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("expression, expected", [
("'str'", StrMatch("str")),
("1", StrMatch("1")),
(" 1", StrMatch("1")),
(",", StrMatch(",")),
("'foo'?", Optional(StrMatch("foo"))),
("'foo'*", ZeroOrMore(StrMatch("foo"))),
("'foo'+", OneOrMore(StrMatch("foo"))),
("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1'|' 2 '|' 3", Sequence(StrMatch("1"), StrMatch("|"), StrMatch("2"), StrMatch("|"), StrMatch("3"))),
("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))),
("1 2 | 3 4+", OrderedChoice(
Sequence(StrMatch("1"), StrMatch("2")),
Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))),
("1 (2 | 3) 4+", Sequence(
StrMatch("1"),
OrderedChoice(StrMatch("2"), StrMatch("3")),
OneOrMore(StrMatch("4")))),
("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))),
("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))),
("1 *", Sequence(StrMatch("1"), StrMatch("*"))),
("1 ?", Sequence(StrMatch("1"), StrMatch("?"))),
("1 +", Sequence(StrMatch("1"), StrMatch("+"))),
("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))),
("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))),
("(1 )", StrMatch("1")),
("'str'=var", StrMatch("str", rule_name="var")),
("'foo'?=var", Optional(StrMatch("foo"), rule_name="var")),
("('foo'?)=var", Optional(StrMatch("foo"), rule_name="var")),
("'foo'*=var", ZeroOrMore(StrMatch("foo"), rule_name="var")),
("('foo'*)=var", ZeroOrMore(StrMatch("foo"), rule_name="var")),
("'foo'+=var", OneOrMore(StrMatch("foo"), rule_name="var")),
("('foo'+)=var", OneOrMore(StrMatch("foo"), rule_name="var")),
("'foo'=var?", Optional(StrMatch("foo", rule_name="var"))),
("('foo'=var)?", Optional(StrMatch("foo", rule_name="var"))),
("'foo'=var*", ZeroOrMore(StrMatch("foo", rule_name="var"))),
("('foo'=var)*", ZeroOrMore(StrMatch("foo", rule_name="var"))),
("'foo'=var+", OneOrMore(StrMatch("foo", rule_name="var"))),
("('foo'=var)+", OneOrMore(StrMatch("foo", rule_name="var"))),
("(1 | 2 | 3)=var", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"), rule_name="var")),
("(1 2)=var", Sequence(StrMatch("1"), StrMatch("2"), rule_name="var")),
("(1 2)+=var", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")), rule_name="var")),
("(1 2)=var+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2"), rule_name="var"))),
("(1=a 2=b)=c", Sequence(StrMatch("1", rule_name="a"), StrMatch("2", rule_name="b"), rule_name="c")),
("(1*=a)", ZeroOrMore(StrMatch("1"), rule_name="a")),
("'a'* 'b'+", Sequence(ZeroOrMore(StrMatch("a")), OneOrMore(StrMatch("b")))),
("('a'* 'b'+)", Sequence(ZeroOrMore(StrMatch("a")), OneOrMore(StrMatch("b")))),
("('a'*=x 'b'+=y)=z", Sequence(
ZeroOrMore(StrMatch("a"), rule_name="x"),
OneOrMore(StrMatch("b"), rule_name="y"), rule_name="z")),
])
def test_i_can_parse_regex(self, expression, expected):
parser = BnfParser()
res = parser.parse(self.get_context(), Tokenizer(expression))
assert not parser.has_error
assert res.status
assert res.value.value == expected
assert res.value.source == expression
@pytest.mark.parametrize("expression, expected", [
("foo", c("foo")),
("foo*", ZeroOrMore(c("foo"))),
("foo 'and' bar+", Sequence(c("foo"), StrMatch("and"), OneOrMore(c("bar")))),
("foo | bar?", OrderedChoice(c("foo"), Optional(c("bar")))),
("'str' = var", Sequence(StrMatch("str"), StrMatch("="), c("var"))),
("'str''='var", Sequence(StrMatch("str"), StrMatch("="), c("var"))),
("foo=f", c("foo", "f")),
])
def test_i_can_parse_regex_with_concept(self, expression, expected):
foo = Concept("foo")
bar = Concept("bar")
var = Concept("var")
context = self.get_context()
for c in (foo, bar, var):
context.sheerka.add_in_cache(c)
parser = BnfParser()
res = parser.parse(context, Tokenizer(expression))
assert not parser.has_error
assert res.status
assert res.value.value == expected
assert res.value.source == expression
@pytest.mark.parametrize("expression, expected", [
("foo", ConceptExpression("foo")),
("foo=f", ConceptExpression("foo", rule_name="f")),
])
def test_i_can_parse_regex_with_concept_when_the_concept_is_still_under_definition(self, expression, expected):
context = self.get_context()
context.obj = ClassWithName("foo")
parser = BnfParser()
res = parser.parse(context, Tokenizer(expression))
assert not parser.has_error
assert res.status
assert res.value.value == expected
assert res.value.source == expression
@pytest.mark.parametrize("expression, error", [
("1 ", UnexpectedEndOfFileError()),
("1|", UnexpectedEndOfFileError()),
("(1|)", UnexpectedTokenErrorNode("Unexpected token 'Token(<EOF>)'", eof_token, [TokenKind.RPAR])),
("1=", UnexpectedTokenErrorNode("Unexpected token 'Token(<EOF>)'", eof_token, [TokenKind.IDENTIFIER])),
("'name", LexerError("Missing Trailing quote", "'name", 5, 1, 6))
])
def test_i_can_detect_errors(self, expression, error):
parser = BnfParser()
res = parser.parse(self.get_context(), Tokenizer(expression))
ret_value = res.value.value
assert parser.has_error
assert not res.status
assert ret_value[0] == error
def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text(self):
foo = Concept(name="foo")
bar = Concept(name="bar")
context = self.get_context()
context.sheerka.add_in_cache(foo)
context.sheerka.add_in_cache(bar)
regex_parser = BnfParser()
foo_definition = regex_parser.parse(context, "'twenty' | 'thirty'").value.value
bar_definition = regex_parser.parse(context, "foo ('one' | 'two')").value.value
concepts = {bar: bar_definition, foo: foo_definition}
concept_parser = BnfNodeParser()
concept_parser.initialize(context, concepts)
res = concept_parser.parse(context, "twenty two")
assert res.status
assert res.value.body == [cnode("bar", 0, 2, "twenty two")]
res = concept_parser.parse(context, "thirty one")
assert res.status
assert res.value.body == [cnode("bar", 0, 2, "thirty one")]
res = concept_parser.parse(context, "twenty")
assert res.status
assert res.value.body == [cnode("foo", 0, 0, "twenty")]
def test_i_cannot_parse_when_too_many_concepts(self):
foo1 = Concept(name="foo", body="1")
foo2 = Concept(name="foo", body="2")
context = self.get_context()
context.sheerka.cache_by_key["foo"] = [foo1, foo2]
regex_parser = BnfParser()
res = regex_parser.parse(context, "foo")
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.CANNOT_RESOLVE_CONCEPT)
assert res.value.body == ('key', 'foo')
def test_i_cannot_parse_when_unknown_concept(self):
context = self.get_context()
regex_parser = BnfParser()
res = regex_parser.parse(self.get_context(), "foo")
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body == ('key', 'foo')