239 lines
10 KiB
Python
239 lines
10 KiB
Python
import pytest
|
|
from core.builtin_concepts import BuiltinConcepts
|
|
from core.concept import Concept, DEFINITION_TYPE_BNF
|
|
from core.sheerka.services.SheerkaExecute import ParserInput
|
|
from core.tokenizer import Tokenizer, TokenKind, LexerError, Token
|
|
from parsers.BaseNodeParser import cnode
|
|
from parsers.BaseParser import UnexpectedTokenErrorNode
|
|
from parsers.BnfNodeParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \
|
|
BnfNodeParser, ConceptExpression
|
|
from parsers.BnfParser import BnfParser, UnexpectedEndOfFileError
|
|
|
|
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
|
|
|
|
|
class ClassWithName:
|
|
def __init__(self, name):
|
|
self.name = name
|
|
|
|
|
|
def c(name, rule_name=None):
|
|
concept = Concept(name).init_key()
|
|
return ConceptExpression(concept, rule_name=rule_name or name)
|
|
|
|
|
|
def update_concepts_ids(sheerka, parsing_expression):
|
|
if isinstance(parsing_expression, ConceptExpression):
|
|
if not parsing_expression.concept.id:
|
|
concept = sheerka.get_by_key(parsing_expression.concept.key)
|
|
parsing_expression.concept.metadata.id = concept.id
|
|
|
|
for pe in parsing_expression.elements:
|
|
update_concepts_ids(sheerka, pe)
|
|
|
|
|
|
eof_token = Token(TokenKind.EOF, "", 0, 0, 0)
|
|
|
|
|
|
class TestBnfParser(TestUsingMemoryBasedSheerka):
|
|
|
|
def init_parser(self, *concepts):
|
|
sheerka, context, *updated = self.init_concepts(*concepts, singleton=True)
|
|
parser = BnfParser()
|
|
|
|
return sheerka, context, parser, *updated
|
|
|
|
@pytest.mark.parametrize("expression, expected", [
|
|
("'str'", StrMatch("str")),
|
|
("1", StrMatch("1")),
|
|
(" 1", StrMatch("1")),
|
|
(",", StrMatch(",")),
|
|
("'foo'?", Optional(StrMatch("foo"))),
|
|
("'foo'*", ZeroOrMore(StrMatch("foo"))),
|
|
("'foo'+", OneOrMore(StrMatch("foo"))),
|
|
("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
|
|
("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
|
|
("1'|' 2 '|' 3", Sequence(StrMatch("1"), StrMatch("|"), StrMatch("2"), StrMatch("|"), StrMatch("3"))),
|
|
("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))),
|
|
("1 2 | 3 4+", OrderedChoice(
|
|
Sequence(StrMatch("1"), StrMatch("2")),
|
|
Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))),
|
|
("1 (2 | 3) 4+", Sequence(
|
|
StrMatch("1"),
|
|
OrderedChoice(StrMatch("2"), StrMatch("3")),
|
|
OneOrMore(StrMatch("4")))),
|
|
("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))),
|
|
("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))),
|
|
("1 *", Sequence(StrMatch("1"), StrMatch("*"))),
|
|
("1 ?", Sequence(StrMatch("1"), StrMatch("?"))),
|
|
("1 +", Sequence(StrMatch("1"), StrMatch("+"))),
|
|
("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))),
|
|
("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))),
|
|
("(1 )", StrMatch("1")),
|
|
("'str'=var", StrMatch("str", rule_name="var")),
|
|
("'foo'?=var", Optional(StrMatch("foo"), rule_name="var")),
|
|
("('foo'?)=var", Optional(StrMatch("foo"), rule_name="var")),
|
|
("'foo'*=var", ZeroOrMore(StrMatch("foo"), rule_name="var")),
|
|
("('foo'*)=var", ZeroOrMore(StrMatch("foo"), rule_name="var")),
|
|
("'foo'+=var", OneOrMore(StrMatch("foo"), rule_name="var")),
|
|
("('foo'+)=var", OneOrMore(StrMatch("foo"), rule_name="var")),
|
|
("'foo'=var?", Optional(StrMatch("foo", rule_name="var"))),
|
|
("('foo'=var)?", Optional(StrMatch("foo", rule_name="var"))),
|
|
("'foo'=var*", ZeroOrMore(StrMatch("foo", rule_name="var"))),
|
|
("('foo'=var)*", ZeroOrMore(StrMatch("foo", rule_name="var"))),
|
|
("'foo'=var+", OneOrMore(StrMatch("foo", rule_name="var"))),
|
|
("('foo'=var)+", OneOrMore(StrMatch("foo", rule_name="var"))),
|
|
("(1 | 2 | 3)=var", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"), rule_name="var")),
|
|
("(1 2)=var", Sequence(StrMatch("1"), StrMatch("2"), rule_name="var")),
|
|
("(1 2)+=var", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")), rule_name="var")),
|
|
("(1 2)=var+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2"), rule_name="var"))),
|
|
("(1=a 2=b)=c", Sequence(StrMatch("1", rule_name="a"), StrMatch("2", rule_name="b"), rule_name="c")),
|
|
("(1*=a)", ZeroOrMore(StrMatch("1"), rule_name="a")),
|
|
("'a'* 'b'+", Sequence(ZeroOrMore(StrMatch("a")), OneOrMore(StrMatch("b")))),
|
|
("('a'* 'b'+)", Sequence(ZeroOrMore(StrMatch("a")), OneOrMore(StrMatch("b")))),
|
|
("('a'*=x 'b'+=y)=z", Sequence(
|
|
ZeroOrMore(StrMatch("a"), rule_name="x"),
|
|
OneOrMore(StrMatch("b"), rule_name="y"), rule_name="z")),
|
|
("'--filter'",
|
|
Sequence(StrMatch("-", skip_whitespace=False), StrMatch("-", skip_whitespace=False), StrMatch("filter")))
|
|
])
|
|
def test_i_can_parse_regex(self, expression, expected):
|
|
sheerka, context, parser = self.init_parser()
|
|
|
|
res = parser.parse(context, Tokenizer(expression))
|
|
|
|
assert not parser.has_error
|
|
assert res.status
|
|
assert res.value.value == expected
|
|
assert res.value.source == expression
|
|
|
|
@pytest.mark.parametrize("expression, expected", [
|
|
("foo", c("foo")),
|
|
("foo*", ZeroOrMore(c("foo"))),
|
|
("foo 'and' bar+", Sequence(c("foo"), StrMatch("and"), OneOrMore(c("bar")))),
|
|
("foo | bar?", OrderedChoice(c("foo"), Optional(c("bar")))),
|
|
("'str' = var", Sequence(StrMatch("str"), StrMatch("="), c("var"))),
|
|
("'str''='var", Sequence(StrMatch("str"), StrMatch("="), c("var"))),
|
|
("foo=f", c("foo", "f")),
|
|
("foo=f 'constant'", Sequence(c("foo", "f"), StrMatch("constant"))),
|
|
("def 'concept'", Sequence(c("def"), StrMatch("concept"))),
|
|
])
|
|
def test_i_can_parse_regex_with_concept(self, expression, expected):
|
|
sheerka, context, parser, *concepts = self.init_parser("foo", "bar", "var", "def")
|
|
|
|
update_concepts_ids(sheerka, expected)
|
|
|
|
res = parser.parse(context, Tokenizer(expression))
|
|
|
|
assert not parser.has_error
|
|
assert res.status
|
|
assert res.value.value == expected
|
|
assert res.value.source == expression
|
|
|
|
@pytest.mark.parametrize("expression, expected", [
|
|
("foo", ConceptExpression("foo")),
|
|
("foo=f", ConceptExpression("foo", rule_name="f")),
|
|
])
|
|
def test_i_can_parse_regex_with_concept_when_the_concept_is_still_under_definition(self, expression, expected):
|
|
sheerka, context, parser = self.init_parser()
|
|
context.obj = ClassWithName("foo")
|
|
|
|
res = parser.parse(context, Tokenizer(expression))
|
|
|
|
assert not parser.has_error
|
|
assert res.status
|
|
assert res.value.value == expected
|
|
assert res.value.source == expression
|
|
|
|
@pytest.mark.parametrize("expression, error", [
|
|
("1 ", UnexpectedEndOfFileError()),
|
|
("1|", UnexpectedEndOfFileError()),
|
|
("(1|)", UnexpectedTokenErrorNode("Unexpected token 'Token(<EOF>)'", eof_token, [TokenKind.RPAR])),
|
|
("1=", UnexpectedTokenErrorNode("Unexpected token 'Token(<EOF>)'", eof_token, [TokenKind.IDENTIFIER])),
|
|
])
|
|
def test_i_can_detect_errors(self, expression, error):
|
|
sheerka, context, parser = self.init_parser()
|
|
|
|
res = parser.parse(context, Tokenizer(expression))
|
|
ret_value = res.body.body
|
|
assert parser.has_error
|
|
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
|
|
assert not res.status
|
|
assert ret_value[0] == error
|
|
|
|
def test_i_can_detect_lexer_error(self):
|
|
sheerka, context, parser = self.init_parser()
|
|
|
|
res = parser.parse(context, Tokenizer("'name"))
|
|
|
|
assert not res.status
|
|
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
|
|
assert isinstance(res.body.body[0], LexerError)
|
|
assert res.body.body[0].message == "Missing Trailing quote"
|
|
assert res.body.body[0].text == "'name"
|
|
|
|
def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text(self):
|
|
sheerka, context, regex_parser, foo, bar = self.init_parser("foo", "bar")
|
|
|
|
for concept in [foo, bar]:
|
|
concept.metadata.definition_type = DEFINITION_TYPE_BNF
|
|
|
|
foo.bnf = regex_parser.parse(context, "'twenty' | 'thirty'").value.value
|
|
bar.bnf = regex_parser.parse(context, "foo ('one' | 'two')").value.value
|
|
|
|
bnf_parser = BnfNodeParser()
|
|
bnf_parser.init_from_concepts(context, [foo, bar])
|
|
|
|
res = bnf_parser.parse(context, ParserInput("twenty two"))
|
|
assert res.status
|
|
assert res.value.body == [cnode("bar", 0, 2, "twenty two")]
|
|
|
|
res = bnf_parser.parse(context, ParserInput("thirty one"))
|
|
assert res.status
|
|
assert res.value.body == [cnode("bar", 0, 2, "thirty one")]
|
|
|
|
res = bnf_parser.parse(context, ParserInput("twenty"))
|
|
assert res.status
|
|
assert res.value.body == [cnode("foo", 0, 0, "twenty")]
|
|
|
|
def test_i_cannot_parse_when_too_many_concepts(self):
|
|
sheerka, context, regex_parser, foo1, foo2 = self.init_parser(
|
|
Concept(name="foo", body="1"),
|
|
Concept(name="foo", body="2"))
|
|
|
|
res = regex_parser.parse(context, "foo")
|
|
|
|
assert not res.status
|
|
assert context.sheerka.isinstance(res.value, BuiltinConcepts.CANNOT_RESOLVE_CONCEPT)
|
|
assert res.value.body == ('key', 'foo')
|
|
|
|
def test_i_cannot_parse_when_unknown_concept(self):
|
|
sheerka, context, regex_parser = self.init_parser()
|
|
res = regex_parser.parse(self.get_context(), "foo")
|
|
|
|
assert not res.status
|
|
assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT)
|
|
assert res.value.body == ('key', 'foo')
|
|
|
|
def test_concept_expression_are_correctly_created_when_isa_concept_is_detected(self):
|
|
sheerka, context, parser, one, two, number, twenties = self.init_parser(
|
|
"one",
|
|
"two",
|
|
"number",
|
|
"twenties")
|
|
|
|
sheerka.set_isa(context, sheerka.new("one"), number)
|
|
sheerka.set_isa(context, sheerka.new("two"), number)
|
|
sheerka.set_isa(context, sheerka.new("twenties"), number)
|
|
|
|
sub_context = context.push(BuiltinConcepts.INIT_BNF, twenties)
|
|
|
|
expression = "'twenty' number=n1"
|
|
res = parser.parse(sub_context, Tokenizer(expression))
|
|
|
|
assert not parser.has_error
|
|
assert res.status
|
|
pexpression = res.value.value
|
|
assert pexpression == Sequence(StrMatch('twenty'), ConceptExpression(number, "n1"))
|
|
assert pexpression.elements[1].recurse_id == "1003#n1#1004"
|