Files
Sheerka-Old/tests/parsers/test_BnfParser.py
T
kodjo 05577012f3 Fixed #32 : concept groups are not correctly updated
Fixed #35 : Refactor test helper class (CNC, CC, CIO)
Fixed #36 : Concept values are not used when declared with variable expression
Fixed #37 : Objects in memory lose their values are restart
Fixed #38 : func(a=b, c) (which is not allowed) raise an exception
2021-03-05 11:16:19 +01:00

283 lines
13 KiB
Python

import pytest
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, DEFINITION_TYPE_BNF
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Tokenizer, TokenKind, LexerError
from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError
from parsers.BnfDefinitionParser import BnfDefinitionParser
from parsers.BnfNodeParser import BnfNodeParser, RegExMatch, VariableExpression
from parsers.BnfNodeParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, \
OneOrMore, ConceptExpression
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
from tests.parsers.parsers_utils import CN, compare_with_test_object
class ClassWithName:
def __init__(self, name):
self.name = name
def c(name, rule_name=None):
concept = Concept(name).init_key()
return ConceptExpression(concept, rule_name=rule_name or name)
def update_concepts_ids(sheerka, parsing_expression):
if isinstance(parsing_expression, ConceptExpression):
if not parsing_expression.concept.id:
concept = sheerka.get_by_key(parsing_expression.concept.key)
parsing_expression.concept.get_metadata().id = concept.id
for pe in parsing_expression.elements:
update_concepts_ids(sheerka, pe)
eof_token = "<EOF>"
class TestBnfParser(TestUsingMemoryBasedSheerka):
def init_parser(self, *concepts):
sheerka, context, *updated = self.init_concepts(*concepts, singleton=True)
parser = BnfDefinitionParser()
return sheerka, context, parser, *updated
@pytest.mark.parametrize("expression, expected", [
("'str'", StrMatch("str")),
("1", StrMatch("1")),
(" 1", StrMatch("1")),
(",", StrMatch(",")),
("r'str'", RegExMatch("str")),
("'foo'?", Optional(StrMatch("foo"))),
("'foo'*", ZeroOrMore(StrMatch("foo"))),
("'foo'+", OneOrMore(StrMatch("foo"))),
("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1'|' 2 '|' 3", Sequence(StrMatch("1"), StrMatch("|"), StrMatch("2"), StrMatch("|"), StrMatch("3"))),
("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))),
("1 2 | 3 4+", OrderedChoice(
Sequence(StrMatch("1"), StrMatch("2")),
Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))),
("1 (2 | 3) 4+", Sequence(
StrMatch("1"),
OrderedChoice(StrMatch("2"), StrMatch("3")),
OneOrMore(StrMatch("4")))),
("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))),
("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))),
("1 *", Sequence(StrMatch("1"), StrMatch("*"))),
("1 ?", Sequence(StrMatch("1"), StrMatch("?"))),
("1 +", Sequence(StrMatch("1"), StrMatch("+"))),
("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))),
("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))),
("(1 )", StrMatch("1")),
("'str'=var", StrMatch("str", rule_name="var")),
("'foo'?=var", Optional(StrMatch("foo"), rule_name="var")),
("('foo'?)=var", Optional(StrMatch("foo"), rule_name="var")),
("'foo'*=var", ZeroOrMore(StrMatch("foo"), rule_name="var")),
("('foo'*)=var", ZeroOrMore(StrMatch("foo"), rule_name="var")),
("'foo'+=var", OneOrMore(StrMatch("foo"), rule_name="var")),
("('foo'+)=var", OneOrMore(StrMatch("foo"), rule_name="var")),
("'foo'=var?", Optional(StrMatch("foo", rule_name="var"))),
("('foo'=var)?", Optional(StrMatch("foo", rule_name="var"))),
("'foo'=var*", ZeroOrMore(StrMatch("foo", rule_name="var"))),
("('foo'=var)*", ZeroOrMore(StrMatch("foo", rule_name="var"))),
("'foo'=var+", OneOrMore(StrMatch("foo", rule_name="var"))),
("('foo'=var)+", OneOrMore(StrMatch("foo", rule_name="var"))),
("r'str'=var", RegExMatch("str", rule_name="var")),
("r'foo'?=var", Optional(RegExMatch("foo"), rule_name="var")),
("(r'foo'?)=var", Optional(RegExMatch("foo"), rule_name="var")),
("r'foo'*=var", ZeroOrMore(RegExMatch("foo"), rule_name="var")),
("(r'foo'*)=var", ZeroOrMore(RegExMatch("foo"), rule_name="var")),
("r'foo'+=var", OneOrMore(RegExMatch("foo"), rule_name="var")),
("(r'foo'+)=var", OneOrMore(RegExMatch("foo"), rule_name="var")),
("r'foo'=var?", Optional(RegExMatch("foo", rule_name="var"))),
("(r'foo'=var)?", Optional(RegExMatch("foo", rule_name="var"))),
("r'foo'=var*", ZeroOrMore(RegExMatch("foo", rule_name="var"))),
("(r'foo'=var)*", ZeroOrMore(RegExMatch("foo", rule_name="var"))),
("r'foo'=var+", OneOrMore(RegExMatch("foo", rule_name="var"))),
("(r'foo'=var)+", OneOrMore(RegExMatch("foo", rule_name="var"))),
("(1 | 2 | 3)=var", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"), rule_name="var")),
("(1 2)=var", Sequence(StrMatch("1"), StrMatch("2"), rule_name="var")),
("(1 2)+=var", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")), rule_name="var")),
("(1 2)=var+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2"), rule_name="var"))),
("(1=a 2=b)=c", Sequence(StrMatch("1", rule_name="a"), StrMatch("2", rule_name="b"), rule_name="c")),
("(1*=a)", ZeroOrMore(StrMatch("1"), rule_name="a")),
("'a'* 'b'+", Sequence(ZeroOrMore(StrMatch("a")), OneOrMore(StrMatch("b")))),
("('a'* 'b'+)", Sequence(ZeroOrMore(StrMatch("a")), OneOrMore(StrMatch("b")))),
("('a'*=x 'b'+=y)=z", Sequence(
ZeroOrMore(StrMatch("a"), rule_name="x"),
OneOrMore(StrMatch("b"), rule_name="y"), rule_name="z")),
("'--filter'",
Sequence(StrMatch("-", skip_whitespace=False), StrMatch("-", skip_whitespace=False), StrMatch("filter")))
])
def test_i_can_parse_regex(self, expression, expected):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, Tokenizer(expression))
assert not parser.has_error
assert res.status
assert res.value.value == expected
assert res.value.source == expression
@pytest.mark.parametrize("expression, expected", [
("foo", c("foo")),
("foo*", ZeroOrMore(c("foo"))),
("foo 'and' bar+", Sequence(c("foo"), StrMatch("and"), OneOrMore(c("bar")))),
("foo | bar?", OrderedChoice(c("foo"), Optional(c("bar")))),
("'str' = var", Sequence(StrMatch("str"), StrMatch("="), c("var"))),
("'str''='var", Sequence(StrMatch("str"), StrMatch("="), c("var"))),
("foo=f", c("foo", "f")),
("foo=f 'constant'", Sequence(c("foo", "f"), StrMatch("constant"))),
("def 'concept'", Sequence(c("def"), StrMatch("concept"))),
("c:foo:", c("foo")),
("c:|1001:", c("foo")),
])
def test_i_can_parse_regex_with_concept(self, expression, expected):
sheerka, context, parser, *concepts = self.init_parser("foo", "bar", "var", "def")
update_concepts_ids(sheerka, expected)
res = parser.parse(context, Tokenizer(expression))
assert not parser.has_error
assert res.status
assert res.value.value == expected
assert res.value.source == expression
@pytest.mark.parametrize("expression, expected", [
("x", VariableExpression("x")),
("x bar", Sequence(VariableExpression("x"), c("bar"))),
("bar x", Sequence(c("bar"), VariableExpression("x"))),
("x 'and' bar", Sequence(VariableExpression("x"), StrMatch("and"), c("bar"))),
("x | bar", OrderedChoice(VariableExpression("x"), c("bar"))),
("x*", ZeroOrMore(VariableExpression("x"))),
("x+", OneOrMore(VariableExpression("x"))),
("'str' = x", Sequence(StrMatch("str"), StrMatch("="), VariableExpression("x"))),
("'str''='x", Sequence(StrMatch("str"), StrMatch("="), VariableExpression("x"))),
("foo=x", VariableExpression("x")),
])
def test_i_can_parse_regex_with_variable(self, expression, expected):
# A variable is an identifier that cannot be resolved to a concept
sheerka, context, regex_parser, bar = self.init_parser("bar")
update_concepts_ids(sheerka, expected)
res = regex_parser.parse(self.get_context(), expression)
assert res.status
assert res.value.value == expected
assert res.value.source == expression
@pytest.mark.parametrize("expression, expected", [
("foo", ConceptExpression("foo")),
("foo=f", ConceptExpression("foo", rule_name="f")),
])
def test_i_can_parse_regex_with_concept_when_the_concept_is_still_under_definition(self, expression, expected):
sheerka, context, parser = self.init_parser()
context.obj = ClassWithName("foo")
res = parser.parse(context, Tokenizer(expression))
assert not parser.has_error
assert res.status
assert res.value.value == expected
assert res.value.source == expression
@pytest.mark.parametrize("expression, error", [
("1 ", UnexpectedEofParsingError()),
("1|", UnexpectedEofParsingError()),
("(1|)", UnexpectedTokenParsingError("Unexpected token 'Token(<EOF>)'", eof_token, [TokenKind.RPAR])),
("1=", UnexpectedTokenParsingError("Unexpected token 'Token(<EOF>)'", eof_token, [TokenKind.IDENTIFIER])),
])
def test_i_can_detect_errors(self, expression, error):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, Tokenizer(expression))
ret_value = res.body.body
assert parser.has_error
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert not res.status
assert ret_value[0] == error
def test_i_can_detect_lexer_error(self):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, Tokenizer("'name"))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert isinstance(res.body.body[0], LexerError)
assert res.body.body[0].message == "Missing Trailing quote"
assert res.body.body[0].text == "'name"
def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text(self):
sheerka, context, regex_parser, foo, bar = self.init_parser("foo", "bar")
for concept in [foo, bar]:
concept.get_metadata().definition_type = DEFINITION_TYPE_BNF
foo.set_bnf(regex_parser.parse(context, "'twenty' | 'thirty'").value.value)
bar.set_bnf(regex_parser.parse(context, "foo ('one' | 'two')").value.value)
bnf_parser = BnfNodeParser()
bnf_parser.init_from_concepts(context, [foo, bar])
res = bnf_parser.parse(context, ParserInput("twenty two"))
assert res.status
compare_with_test_object(res.value.body, [CN("bar", "twenty two", 0, 2)])
res = bnf_parser.parse(context, ParserInput("thirty one"))
assert res.status
compare_with_test_object(res.value.body, [CN("bar", "thirty one", 0, 2)])
res = bnf_parser.parse(context, ParserInput("twenty"))
assert res.status
compare_with_test_object(res.value.body, [CN("foo", "twenty", 0, 0)])
def test_i_cannot_parse_when_too_many_concepts(self):
sheerka, context, regex_parser, foo1, foo2 = self.init_parser(
Concept(name="foo", body="1"),
Concept(name="foo", body="2"))
res = regex_parser.parse(context, "foo")
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.CANNOT_RESOLVE_CONCEPT)
assert res.value.body == ('key', 'foo')
@pytest.mark.parametrize("text, expected", [
("c:foo:", {'key': 'foo'}),
("c:|1001:", {'id': '1001'}),
("c:foo|1001:", {'key': 'foo', 'id': '1001'}),
])
def test_i_cannot_parse_when_unknown_concept(self, text, expected):
sheerka, context, regex_parser = self.init_parser()
res = regex_parser.parse(self.get_context(), text)
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body == expected
def test_concept_expression_are_correctly_created_when_isa_concept_is_detected(self):
sheerka, context, parser, one, two, number, twenties = self.init_parser(
"one",
"two",
"number",
"twenties")
sheerka.set_isa(context, sheerka.new("one"), number)
sheerka.set_isa(context, sheerka.new("two"), number)
sheerka.set_isa(context, sheerka.new("twenties"), number)
sub_context = context.push(BuiltinConcepts.INIT_BNF, twenties)
expression = "'twenty' number=n1"
res = parser.parse(sub_context, Tokenizer(expression))
assert not parser.has_error
assert res.status
pexpression = res.value.value
assert pexpression == Sequence(StrMatch('twenty'), ConceptExpression(number, "n1"))