Files
Sheerka-Old/tests/parsers/test_BnfParser.py
T

240 lines
10 KiB
Python

import pytest
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, DEFINITION_TYPE_BNF
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Tokenizer, TokenKind, LexerError, Token
from parsers.BaseNodeParser import cnode
from parsers.BaseParser import UnexpectedTokenErrorNode
from parsers.BnfNodeParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, \
OneOrMore, ConceptExpression
from parsers.BnfNodeParser import BnfNodeParser
from parsers.BnfParser import BnfParser, UnexpectedEndOfFileError
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
class ClassWithName:
def __init__(self, name):
self.name = name
def c(name, rule_name=None):
concept = Concept(name).init_key()
return ConceptExpression(concept, rule_name=rule_name or name)
def update_concepts_ids(sheerka, parsing_expression):
if isinstance(parsing_expression, ConceptExpression):
if not parsing_expression.concept.id:
concept = sheerka.get_by_key(parsing_expression.concept.key)
parsing_expression.concept.metadata.id = concept.id
for pe in parsing_expression.elements:
update_concepts_ids(sheerka, pe)
eof_token = "<EOF>"
class TestBnfParser(TestUsingMemoryBasedSheerka):
def init_parser(self, *concepts):
sheerka, context, *updated = self.init_concepts(*concepts, singleton=True)
parser = BnfParser()
return sheerka, context, parser, *updated
@pytest.mark.parametrize("expression, expected", [
("'str'", StrMatch("str")),
("1", StrMatch("1")),
(" 1", StrMatch("1")),
(",", StrMatch(",")),
("'foo'?", Optional(StrMatch("foo"))),
("'foo'*", ZeroOrMore(StrMatch("foo"))),
("'foo'+", OneOrMore(StrMatch("foo"))),
("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1'|' 2 '|' 3", Sequence(StrMatch("1"), StrMatch("|"), StrMatch("2"), StrMatch("|"), StrMatch("3"))),
("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))),
("1 2 | 3 4+", OrderedChoice(
Sequence(StrMatch("1"), StrMatch("2")),
Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))),
("1 (2 | 3) 4+", Sequence(
StrMatch("1"),
OrderedChoice(StrMatch("2"), StrMatch("3")),
OneOrMore(StrMatch("4")))),
("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))),
("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))),
("1 *", Sequence(StrMatch("1"), StrMatch("*"))),
("1 ?", Sequence(StrMatch("1"), StrMatch("?"))),
("1 +", Sequence(StrMatch("1"), StrMatch("+"))),
("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))),
("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))),
("(1 )", StrMatch("1")),
("'str'=var", StrMatch("str", rule_name="var")),
("'foo'?=var", Optional(StrMatch("foo"), rule_name="var")),
("('foo'?)=var", Optional(StrMatch("foo"), rule_name="var")),
("'foo'*=var", ZeroOrMore(StrMatch("foo"), rule_name="var")),
("('foo'*)=var", ZeroOrMore(StrMatch("foo"), rule_name="var")),
("'foo'+=var", OneOrMore(StrMatch("foo"), rule_name="var")),
("('foo'+)=var", OneOrMore(StrMatch("foo"), rule_name="var")),
("'foo'=var?", Optional(StrMatch("foo", rule_name="var"))),
("('foo'=var)?", Optional(StrMatch("foo", rule_name="var"))),
("'foo'=var*", ZeroOrMore(StrMatch("foo", rule_name="var"))),
("('foo'=var)*", ZeroOrMore(StrMatch("foo", rule_name="var"))),
("'foo'=var+", OneOrMore(StrMatch("foo", rule_name="var"))),
("('foo'=var)+", OneOrMore(StrMatch("foo", rule_name="var"))),
("(1 | 2 | 3)=var", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"), rule_name="var")),
("(1 2)=var", Sequence(StrMatch("1"), StrMatch("2"), rule_name="var")),
("(1 2)+=var", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")), rule_name="var")),
("(1 2)=var+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2"), rule_name="var"))),
("(1=a 2=b)=c", Sequence(StrMatch("1", rule_name="a"), StrMatch("2", rule_name="b"), rule_name="c")),
("(1*=a)", ZeroOrMore(StrMatch("1"), rule_name="a")),
("'a'* 'b'+", Sequence(ZeroOrMore(StrMatch("a")), OneOrMore(StrMatch("b")))),
("('a'* 'b'+)", Sequence(ZeroOrMore(StrMatch("a")), OneOrMore(StrMatch("b")))),
("('a'*=x 'b'+=y)=z", Sequence(
ZeroOrMore(StrMatch("a"), rule_name="x"),
OneOrMore(StrMatch("b"), rule_name="y"), rule_name="z")),
("'--filter'",
Sequence(StrMatch("-", skip_whitespace=False), StrMatch("-", skip_whitespace=False), StrMatch("filter")))
])
def test_i_can_parse_regex(self, expression, expected):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, Tokenizer(expression))
assert not parser.has_error
assert res.status
assert res.value.value == expected
assert res.value.source == expression
@pytest.mark.parametrize("expression, expected", [
("foo", c("foo")),
("foo*", ZeroOrMore(c("foo"))),
("foo 'and' bar+", Sequence(c("foo"), StrMatch("and"), OneOrMore(c("bar")))),
("foo | bar?", OrderedChoice(c("foo"), Optional(c("bar")))),
("'str' = var", Sequence(StrMatch("str"), StrMatch("="), c("var"))),
("'str''='var", Sequence(StrMatch("str"), StrMatch("="), c("var"))),
("foo=f", c("foo", "f")),
("foo=f 'constant'", Sequence(c("foo", "f"), StrMatch("constant"))),
("def 'concept'", Sequence(c("def"), StrMatch("concept"))),
])
def test_i_can_parse_regex_with_concept(self, expression, expected):
sheerka, context, parser, *concepts = self.init_parser("foo", "bar", "var", "def")
update_concepts_ids(sheerka, expected)
res = parser.parse(context, Tokenizer(expression))
assert not parser.has_error
assert res.status
assert res.value.value == expected
assert res.value.source == expression
@pytest.mark.parametrize("expression, expected", [
("foo", ConceptExpression("foo")),
("foo=f", ConceptExpression("foo", rule_name="f")),
])
def test_i_can_parse_regex_with_concept_when_the_concept_is_still_under_definition(self, expression, expected):
sheerka, context, parser = self.init_parser()
context.obj = ClassWithName("foo")
res = parser.parse(context, Tokenizer(expression))
assert not parser.has_error
assert res.status
assert res.value.value == expected
assert res.value.source == expression
@pytest.mark.parametrize("expression, error", [
("1 ", UnexpectedEndOfFileError()),
("1|", UnexpectedEndOfFileError()),
("(1|)", UnexpectedTokenErrorNode("Unexpected token 'Token(<EOF>)'", eof_token, [TokenKind.RPAR])),
("1=", UnexpectedTokenErrorNode("Unexpected token 'Token(<EOF>)'", eof_token, [TokenKind.IDENTIFIER])),
])
def test_i_can_detect_errors(self, expression, error):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, Tokenizer(expression))
ret_value = res.body.body
assert parser.has_error
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert not res.status
assert ret_value[0] == error
def test_i_can_detect_lexer_error(self):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, Tokenizer("'name"))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert isinstance(res.body.body[0], LexerError)
assert res.body.body[0].message == "Missing Trailing quote"
assert res.body.body[0].text == "'name"
def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text(self):
sheerka, context, regex_parser, foo, bar = self.init_parser("foo", "bar")
for concept in [foo, bar]:
concept.metadata.definition_type = DEFINITION_TYPE_BNF
foo.bnf = regex_parser.parse(context, "'twenty' | 'thirty'").value.value
bar.bnf = regex_parser.parse(context, "foo ('one' | 'two')").value.value
bnf_parser = BnfNodeParser()
bnf_parser.init_from_concepts(context, [foo, bar])
res = bnf_parser.parse(context, ParserInput("twenty two"))
assert res.status
assert res.value.body == [cnode("bar", 0, 2, "twenty two")]
res = bnf_parser.parse(context, ParserInput("thirty one"))
assert res.status
assert res.value.body == [cnode("bar", 0, 2, "thirty one")]
res = bnf_parser.parse(context, ParserInput("twenty"))
assert res.status
assert res.value.body == [cnode("foo", 0, 0, "twenty")]
def test_i_cannot_parse_when_too_many_concepts(self):
sheerka, context, regex_parser, foo1, foo2 = self.init_parser(
Concept(name="foo", body="1"),
Concept(name="foo", body="2"))
res = regex_parser.parse(context, "foo")
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.CANNOT_RESOLVE_CONCEPT)
assert res.value.body == ('key', 'foo')
def test_i_cannot_parse_when_unknown_concept(self):
sheerka, context, regex_parser = self.init_parser()
res = regex_parser.parse(self.get_context(), "foo")
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body == ('key', 'foo')
def test_concept_expression_are_correctly_created_when_isa_concept_is_detected(self):
sheerka, context, parser, one, two, number, twenties = self.init_parser(
"one",
"two",
"number",
"twenties")
sheerka.set_isa(context, sheerka.new("one"), number)
sheerka.set_isa(context, sheerka.new("two"), number)
sheerka.set_isa(context, sheerka.new("twenties"), number)
sub_context = context.push(BuiltinConcepts.INIT_BNF, twenties)
expression = "'twenty' number=n1"
res = parser.parse(sub_context, Tokenizer(expression))
assert not parser.has_error
assert res.status
pexpression = res.value.value
assert pexpression == Sequence(StrMatch('twenty'), ConceptExpression(number, "n1"))
assert pexpression.elements[1].recurse_id == "1004#1003(n1)"