Files
Sheerka-Old/tests/parsers/test_BnfParser.py
T

239 lines
10 KiB
Python

import pytest
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, DEFINITION_TYPE_BNF
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Tokenizer, TokenKind, LexerError
from parsers.BaseNodeParser import cnode
from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError
from parsers.BnfDefinitionParser import BnfDefinitionParser
from parsers.BnfNodeParser import BnfNodeParser
from parsers.BnfNodeParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, \
OneOrMore, ConceptExpression
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
class ClassWithName:
def __init__(self, name):
self.name = name
def c(name, rule_name=None):
concept = Concept(name).init_key()
return ConceptExpression(concept, rule_name=rule_name or name)
def update_concepts_ids(sheerka, parsing_expression):
if isinstance(parsing_expression, ConceptExpression):
if not parsing_expression.concept.id:
concept = sheerka.get_by_key(parsing_expression.concept.key)
parsing_expression.concept.get_metadata().id = concept.id
for pe in parsing_expression.elements:
update_concepts_ids(sheerka, pe)
eof_token = "<EOF>"
class TestBnfParser(TestUsingMemoryBasedSheerka):
def init_parser(self, *concepts):
sheerka, context, *updated = self.init_concepts(*concepts, singleton=True)
parser = BnfDefinitionParser()
return sheerka, context, parser, *updated
@pytest.mark.parametrize("expression, expected", [
("'str'", StrMatch("str")),
("1", StrMatch("1")),
(" 1", StrMatch("1")),
(",", StrMatch(",")),
("'foo'?", Optional(StrMatch("foo"))),
("'foo'*", ZeroOrMore(StrMatch("foo"))),
("'foo'+", OneOrMore(StrMatch("foo"))),
("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1'|' 2 '|' 3", Sequence(StrMatch("1"), StrMatch("|"), StrMatch("2"), StrMatch("|"), StrMatch("3"))),
("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))),
("1 2 | 3 4+", OrderedChoice(
Sequence(StrMatch("1"), StrMatch("2")),
Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))),
("1 (2 | 3) 4+", Sequence(
StrMatch("1"),
OrderedChoice(StrMatch("2"), StrMatch("3")),
OneOrMore(StrMatch("4")))),
("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))),
("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))),
("1 *", Sequence(StrMatch("1"), StrMatch("*"))),
("1 ?", Sequence(StrMatch("1"), StrMatch("?"))),
("1 +", Sequence(StrMatch("1"), StrMatch("+"))),
("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))),
("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))),
("(1 )", StrMatch("1")),
("'str'=var", StrMatch("str", rule_name="var")),
("'foo'?=var", Optional(StrMatch("foo"), rule_name="var")),
("('foo'?)=var", Optional(StrMatch("foo"), rule_name="var")),
("'foo'*=var", ZeroOrMore(StrMatch("foo"), rule_name="var")),
("('foo'*)=var", ZeroOrMore(StrMatch("foo"), rule_name="var")),
("'foo'+=var", OneOrMore(StrMatch("foo"), rule_name="var")),
("('foo'+)=var", OneOrMore(StrMatch("foo"), rule_name="var")),
("'foo'=var?", Optional(StrMatch("foo", rule_name="var"))),
("('foo'=var)?", Optional(StrMatch("foo", rule_name="var"))),
("'foo'=var*", ZeroOrMore(StrMatch("foo", rule_name="var"))),
("('foo'=var)*", ZeroOrMore(StrMatch("foo", rule_name="var"))),
("'foo'=var+", OneOrMore(StrMatch("foo", rule_name="var"))),
("('foo'=var)+", OneOrMore(StrMatch("foo", rule_name="var"))),
("(1 | 2 | 3)=var", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"), rule_name="var")),
("(1 2)=var", Sequence(StrMatch("1"), StrMatch("2"), rule_name="var")),
("(1 2)+=var", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")), rule_name="var")),
("(1 2)=var+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2"), rule_name="var"))),
("(1=a 2=b)=c", Sequence(StrMatch("1", rule_name="a"), StrMatch("2", rule_name="b"), rule_name="c")),
("(1*=a)", ZeroOrMore(StrMatch("1"), rule_name="a")),
("'a'* 'b'+", Sequence(ZeroOrMore(StrMatch("a")), OneOrMore(StrMatch("b")))),
("('a'* 'b'+)", Sequence(ZeroOrMore(StrMatch("a")), OneOrMore(StrMatch("b")))),
("('a'*=x 'b'+=y)=z", Sequence(
ZeroOrMore(StrMatch("a"), rule_name="x"),
OneOrMore(StrMatch("b"), rule_name="y"), rule_name="z")),
("'--filter'",
Sequence(StrMatch("-", skip_whitespace=False), StrMatch("-", skip_whitespace=False), StrMatch("filter")))
])
def test_i_can_parse_regex(self, expression, expected):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, Tokenizer(expression))
assert not parser.has_error
assert res.status
assert res.value.value == expected
assert res.value.source == expression
@pytest.mark.parametrize("expression, expected", [
("foo", c("foo")),
("foo*", ZeroOrMore(c("foo"))),
("foo 'and' bar+", Sequence(c("foo"), StrMatch("and"), OneOrMore(c("bar")))),
("foo | bar?", OrderedChoice(c("foo"), Optional(c("bar")))),
("'str' = var", Sequence(StrMatch("str"), StrMatch("="), c("var"))),
("'str''='var", Sequence(StrMatch("str"), StrMatch("="), c("var"))),
("foo=f", c("foo", "f")),
("foo=f 'constant'", Sequence(c("foo", "f"), StrMatch("constant"))),
("def 'concept'", Sequence(c("def"), StrMatch("concept"))),
])
def test_i_can_parse_regex_with_concept(self, expression, expected):
sheerka, context, parser, *concepts = self.init_parser("foo", "bar", "var", "def")
update_concepts_ids(sheerka, expected)
res = parser.parse(context, Tokenizer(expression))
assert not parser.has_error
assert res.status
assert res.value.value == expected
assert res.value.source == expression
@pytest.mark.parametrize("expression, expected", [
("foo", ConceptExpression("foo")),
("foo=f", ConceptExpression("foo", rule_name="f")),
])
def test_i_can_parse_regex_with_concept_when_the_concept_is_still_under_definition(self, expression, expected):
sheerka, context, parser = self.init_parser()
context.obj = ClassWithName("foo")
res = parser.parse(context, Tokenizer(expression))
assert not parser.has_error
assert res.status
assert res.value.value == expected
assert res.value.source == expression
@pytest.mark.parametrize("expression, error", [
("1 ", UnexpectedEofParsingError()),
("1|", UnexpectedEofParsingError()),
("(1|)", UnexpectedTokenParsingError("Unexpected token 'Token(<EOF>)'", eof_token, [TokenKind.RPAR])),
("1=", UnexpectedTokenParsingError("Unexpected token 'Token(<EOF>)'", eof_token, [TokenKind.IDENTIFIER])),
])
def test_i_can_detect_errors(self, expression, error):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, Tokenizer(expression))
ret_value = res.body.body
assert parser.has_error
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert not res.status
assert ret_value[0] == error
def test_i_can_detect_lexer_error(self):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, Tokenizer("'name"))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert isinstance(res.body.body[0], LexerError)
assert res.body.body[0].message == "Missing Trailing quote"
assert res.body.body[0].text == "'name"
def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text(self):
sheerka, context, regex_parser, foo, bar = self.init_parser("foo", "bar")
for concept in [foo, bar]:
concept.get_metadata().definition_type = DEFINITION_TYPE_BNF
foo.set_bnf(regex_parser.parse(context, "'twenty' | 'thirty'").value.value)
bar.set_bnf(regex_parser.parse(context, "foo ('one' | 'two')").value.value)
bnf_parser = BnfNodeParser()
bnf_parser.init_from_concepts(context, [foo, bar])
res = bnf_parser.parse(context, ParserInput("twenty two"))
assert res.status
assert res.value.body == [cnode("bar", 0, 2, "twenty two")]
res = bnf_parser.parse(context, ParserInput("thirty one"))
assert res.status
assert res.value.body == [cnode("bar", 0, 2, "thirty one")]
res = bnf_parser.parse(context, ParserInput("twenty"))
assert res.status
assert res.value.body == [cnode("foo", 0, 0, "twenty")]
def test_i_cannot_parse_when_too_many_concepts(self):
sheerka, context, regex_parser, foo1, foo2 = self.init_parser(
Concept(name="foo", body="1"),
Concept(name="foo", body="2"))
res = regex_parser.parse(context, "foo")
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.CANNOT_RESOLVE_CONCEPT)
assert res.value.body == ('key', 'foo')
def test_i_cannot_parse_when_unknown_concept(self):
sheerka, context, regex_parser = self.init_parser()
res = regex_parser.parse(self.get_context(), "foo")
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body == ('key', 'foo')
def test_concept_expression_are_correctly_created_when_isa_concept_is_detected(self):
sheerka, context, parser, one, two, number, twenties = self.init_parser(
"one",
"two",
"number",
"twenties")
sheerka.set_isa(context, sheerka.new("one"), number)
sheerka.set_isa(context, sheerka.new("two"), number)
sheerka.set_isa(context, sheerka.new("twenties"), number)
sub_context = context.push(BuiltinConcepts.INIT_BNF, twenties)
expression = "'twenty' number=n1"
res = parser.parse(sub_context, Tokenizer(expression))
assert not parser.has_error
assert res.status
pexpression = res.value.value
assert pexpression == Sequence(StrMatch('twenty'), ConceptExpression(number, "n1"))