177 lines
7.8 KiB
Python
177 lines
7.8 KiB
Python
import pytest
|
|
|
|
from core.builtin_concepts import BuiltinConcepts
|
|
from core.concept import Concept
|
|
from core.tokenizer import Tokenizer, TokenKind, LexerError
|
|
from parsers.BaseParser import UnexpectedTokenErrorNode
|
|
from parsers.BnfParser import BnfParser, UnexpectedEndOfFileError
|
|
from parsers.ConceptLexerParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \
|
|
ConceptLexerParser, ConceptExpression, cnode
|
|
|
|
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
|
|
|
|
|
class ClassWithName:
|
|
def __init__(self, name):
|
|
self.name = name
|
|
|
|
|
|
class TestBnfParser(TestUsingMemoryBasedSheerka):
|
|
|
|
@pytest.mark.parametrize("expression, expected", [
|
|
("'str'", StrMatch("str")),
|
|
("1", StrMatch("1")),
|
|
(" 1", StrMatch("1")),
|
|
(",", StrMatch(",")),
|
|
("'foo'?", Optional(StrMatch("foo"))),
|
|
("'foo'*", ZeroOrMore(StrMatch("foo"))),
|
|
("'foo'+", OneOrMore(StrMatch("foo"))),
|
|
("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
|
|
("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
|
|
("1'|' 2 '|' 3", Sequence(StrMatch("1"), StrMatch("|"), StrMatch("2"), StrMatch("|"), StrMatch("3"))),
|
|
("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))),
|
|
("1 2 | 3 4+", OrderedChoice(
|
|
Sequence(StrMatch("1"), StrMatch("2")),
|
|
Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))),
|
|
(
|
|
"1 (2 | 3) 4+",
|
|
Sequence(StrMatch("1"), OrderedChoice(StrMatch("2"), StrMatch("3")), OneOrMore(StrMatch("4")))),
|
|
("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))),
|
|
("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))),
|
|
("1 *", Sequence(StrMatch("1"), StrMatch("*"))),
|
|
("1 ?", Sequence(StrMatch("1"), StrMatch("?"))),
|
|
("1 +", Sequence(StrMatch("1"), StrMatch("+"))),
|
|
("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))),
|
|
("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))),
|
|
("(1 )", StrMatch("1")),
|
|
("'str'=var", StrMatch("str", rule_name="var")),
|
|
("'foo'?=var", Optional(StrMatch("foo"), rule_name="var")),
|
|
("('foo'?)=var", Optional(StrMatch("foo"), rule_name="var")),
|
|
("'foo'*=var", ZeroOrMore(StrMatch("foo"), rule_name="var")),
|
|
("('foo'*)=var", ZeroOrMore(StrMatch("foo"), rule_name="var")),
|
|
("'foo'+=var", OneOrMore(StrMatch("foo"), rule_name="var")),
|
|
("('foo'+)=var", OneOrMore(StrMatch("foo"), rule_name="var")),
|
|
("'foo'=var?", Optional(StrMatch("foo", rule_name="var"))),
|
|
("('foo'=var)?", Optional(StrMatch("foo", rule_name="var"))),
|
|
("'foo'=var*", ZeroOrMore(StrMatch("foo", rule_name="var"))),
|
|
("('foo'=var)*", ZeroOrMore(StrMatch("foo", rule_name="var"))),
|
|
("'foo'=var+", OneOrMore(StrMatch("foo", rule_name="var"))),
|
|
("('foo'=var)+", OneOrMore(StrMatch("foo", rule_name="var"))),
|
|
("(1 | 2 | 3)=var", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"), rule_name="var")),
|
|
("(1 2)=var", Sequence(StrMatch("1"), StrMatch("2"), rule_name="var")),
|
|
("(1 2)+=var", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")), rule_name="var")),
|
|
("(1 2)=var+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2"), rule_name="var"))),
|
|
])
|
|
def test_i_can_parse_regex(self, expression, expected):
|
|
parser = BnfParser()
|
|
res = parser.parse(self.get_context(), Tokenizer(expression))
|
|
|
|
assert not parser.has_error
|
|
assert res.status
|
|
assert res.value.value == expected
|
|
assert res.value.source == expression
|
|
|
|
@pytest.mark.parametrize("expression, expected", [
|
|
("foo", Concept("foo").init_key()),
|
|
("foo*", ZeroOrMore(Concept("foo").init_key())),
|
|
("foo 'and' bar+", Sequence(Concept("foo").init_key(), StrMatch("and"), OneOrMore(Concept("bar").init_key()))),
|
|
("foo | bar?", OrderedChoice(Concept("foo").init_key(), Optional(Concept("bar").init_key()))),
|
|
("'str' = var", Sequence(StrMatch("str"), StrMatch("="), Concept("var").init_key())),
|
|
("'str''='var", Sequence(StrMatch("str"), StrMatch("="), Concept("var").init_key())),
|
|
])
|
|
def test_i_can_parse_regex_with_concept(self, expression, expected):
|
|
foo = Concept("foo")
|
|
bar = Concept("bar")
|
|
var = Concept("var")
|
|
context = self.get_context()
|
|
|
|
for c in (foo, bar, var):
|
|
context.sheerka.add_in_cache(c)
|
|
parser = BnfParser()
|
|
res = parser.parse(context, Tokenizer(expression))
|
|
|
|
assert not parser.has_error
|
|
assert res.status
|
|
assert res.value.value == expected
|
|
assert res.value.source == expression
|
|
|
|
def test_i_can_parse_regex_with_concept_when_the_concept_is_still_under_definition(self):
|
|
expression = "foo"
|
|
expected = ConceptExpression("foo")
|
|
|
|
context = self.get_context()
|
|
context.obj = ClassWithName("foo")
|
|
|
|
parser = BnfParser()
|
|
res = parser.parse(context, Tokenizer(expression))
|
|
|
|
assert not parser.has_error
|
|
assert res.status
|
|
assert res.value.value == expected
|
|
assert res.value.source == expression
|
|
|
|
@pytest.mark.parametrize("expression, error", [
|
|
("1 ", UnexpectedEndOfFileError()),
|
|
("1|", UnexpectedEndOfFileError()),
|
|
("(1|)", UnexpectedTokenErrorNode("Unexpected token 'Token(<EOF>)'", [TokenKind.RPAR])),
|
|
("1=", UnexpectedTokenErrorNode("Unexpected token 'Token(<EOF>)'", [TokenKind.IDENTIFIER])),
|
|
("'name", LexerError("Missing Trailing quote", "'name", 5, 1, 6))
|
|
])
|
|
def test_i_can_detect_errors(self, expression, error):
|
|
parser = BnfParser()
|
|
res = parser.parse(self.get_context(), Tokenizer(expression))
|
|
ret_value = res.value.value
|
|
assert parser.has_error
|
|
assert not res.status
|
|
assert ret_value[0] == error
|
|
|
|
def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text(self):
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
context = self.get_context()
|
|
context.sheerka.add_in_cache(foo)
|
|
context.sheerka.add_in_cache(bar)
|
|
|
|
regex_parser = BnfParser()
|
|
foo_definition = regex_parser.parse(context, "'twenty' | 'thirty'").value.value
|
|
bar_definition = regex_parser.parse(context, "foo ('one' | 'two')").value.value
|
|
|
|
concepts = {bar: bar_definition, foo: foo_definition}
|
|
concept_parser = ConceptLexerParser()
|
|
concept_parser.initialize(context, concepts)
|
|
|
|
res = concept_parser.parse(context, "twenty two")
|
|
assert res.status
|
|
assert res.value.body == [cnode("bar", 0, 2, "twenty two")]
|
|
|
|
res = concept_parser.parse(context, "thirty one")
|
|
assert res.status
|
|
assert res.value.body == [cnode("bar", 0, 2, "thirty one")]
|
|
|
|
res = concept_parser.parse(context, "twenty")
|
|
assert res.status
|
|
assert res.value.body == [cnode("foo", 0, 0, "twenty")]
|
|
|
|
def test_i_cannot_parse_when_too_many_concepts(self):
|
|
foo1 = Concept(name="foo", body="1")
|
|
foo2 = Concept(name="foo", body="2")
|
|
context = self.get_context()
|
|
context.sheerka.cache_by_key["foo"] = [foo1, foo2]
|
|
|
|
regex_parser = BnfParser()
|
|
res = regex_parser.parse(context, "foo")
|
|
|
|
assert not res.status
|
|
assert context.sheerka.isinstance(res.value, BuiltinConcepts.CANNOT_RESOLVE_CONCEPT)
|
|
assert res.value.body == ('key', 'foo')
|
|
|
|
def test_i_cannot_parse_when_unknown_concept(self):
|
|
context = self.get_context()
|
|
|
|
regex_parser = BnfParser()
|
|
res = regex_parser.parse(self.get_context(), "foo")
|
|
|
|
assert not res.status
|
|
assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT)
|
|
assert res.value.body == ('key', 'foo')
|