Files
Sheerka-Old/tests/test_BnfParser.py
T

120 lines
5.5 KiB
Python

import pytest
from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext
from core.tokenizer import Tokenizer, TokenKind, LexerError
from parsers.BaseParser import UnexpectedTokenErrorNode
from parsers.BnfParser import BnfParser, UnexpectedEndOfFileError
from parsers.ConceptLexerParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \
ConceptLexerParser, ConceptNode, ConceptMatch, cnode
from sdp.sheerkaDataProvider import Event
def get_context():
sheerka = Sheerka(skip_builtins_in_db=True)
sheerka.initialize("mem://")
return ExecutionContext("sheerka", Event(), sheerka)
@pytest.mark.parametrize("expression, expected", [
("'str'", StrMatch("str")),
("1", StrMatch("1")),
(" 1", StrMatch("1")),
(",", StrMatch(",")),
("'foo'?", Optional(StrMatch("foo"))),
("'foo'*", ZeroOrMore(StrMatch("foo"))),
("'foo'+", OneOrMore(StrMatch("foo"))),
("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1'|' 2 '|' 3", Sequence(StrMatch("1"), StrMatch("|"), StrMatch("2"), StrMatch("|"), StrMatch("3"))),
("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))),
("1 2 | 3 4+", OrderedChoice(
Sequence(StrMatch("1"), StrMatch("2")),
Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))),
("1 (2 | 3) 4+", Sequence(StrMatch("1"), OrderedChoice(StrMatch("2"), StrMatch("3")), OneOrMore(StrMatch("4")))),
("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))),
("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))),
("1 *", Sequence(StrMatch("1"), StrMatch("*"))),
("1 ?", Sequence(StrMatch("1"), StrMatch("?"))),
("1 +", Sequence(StrMatch("1"), StrMatch("+"))),
("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))),
("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))),
("(1 )", StrMatch("1")),
("foo", ConceptMatch("foo")),
("foo*", ZeroOrMore(ConceptMatch("foo"))),
("foo 'and' bar+", Sequence(ConceptMatch("foo"), StrMatch("and"), OneOrMore(ConceptMatch("bar")))),
("foo | bar?", OrderedChoice(ConceptMatch("foo"), Optional(ConceptMatch("bar")))),
("'str' = var", Sequence(StrMatch("str"), StrMatch("="), ConceptMatch("var"))),
("'str''='var", Sequence(StrMatch("str"), StrMatch("="), ConceptMatch("var"))),
("'str'=var", StrMatch("str", rule_name="var")),
("'foo'?=var", Optional(StrMatch("foo"), rule_name="var")),
("('foo'?)=var", Optional(StrMatch("foo"), rule_name="var")),
("'foo'*=var", ZeroOrMore(StrMatch("foo"), rule_name="var")),
("('foo'*)=var", ZeroOrMore(StrMatch("foo"), rule_name="var")),
("'foo'+=var", OneOrMore(StrMatch("foo"), rule_name="var")),
("('foo'+)=var", OneOrMore(StrMatch("foo"), rule_name="var")),
("'foo'=var?", Optional(StrMatch("foo", rule_name="var"))),
("('foo'=var)?", Optional(StrMatch("foo", rule_name="var"))),
("'foo'=var*", ZeroOrMore(StrMatch("foo", rule_name="var"))),
("('foo'=var)*", ZeroOrMore(StrMatch("foo", rule_name="var"))),
("'foo'=var+", OneOrMore(StrMatch("foo", rule_name="var"))),
("('foo'=var)+", OneOrMore(StrMatch("foo", rule_name="var"))),
("(1 | 2 | 3)=var", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"), rule_name="var")),
("(1 2)=var", Sequence(StrMatch("1"), StrMatch("2"), rule_name="var")),
("(1 2)+=var", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")), rule_name="var")),
("(1 2)=var+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2"), rule_name="var"))),
])
def test_i_can_parse_regex(expression, expected):
parser = BnfParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert not parser.has_error
assert res.status
assert res.value.value == expected
assert res.value.source == expression
@pytest.mark.parametrize("expression, error", [
("1 ", UnexpectedEndOfFileError()),
("1|", UnexpectedEndOfFileError()),
("(1|)", UnexpectedTokenErrorNode("Unexpected token 'Token(<EOF>)'", [TokenKind.RPAR])),
("1=", UnexpectedTokenErrorNode("Unexpected token 'Token(<EOF>)'", [TokenKind.IDENTIFIER])),
("'name", LexerError("Missing Trailing quote", "'name", 5, 1, 6))
])
def test_i_can_detect_errors(expression, error):
parser = BnfParser()
res = parser.parse(get_context(), Tokenizer(expression))
ret_value = res.value.value
assert parser.has_error
assert not res.status
assert ret_value[0] == error
def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text():
foo = Concept(name="foo")
bar = Concept(name="bar")
context = get_context()
context.sheerka.add_in_cache(foo)
context.sheerka.add_in_cache(bar)
regex_parser = BnfParser()
foo_definition = regex_parser.parse(context, "'twenty' | 'thirty'").value.value
bar_definition = regex_parser.parse(context, "foo ('one' | 'two')").value.value
concepts = {bar: bar_definition, foo: foo_definition}
concept_parser = ConceptLexerParser()
concept_parser.initialize(context, concepts)
res = concept_parser.parse(context, "twenty two")
assert res.status
assert res.value.body == [cnode("bar", 0, 2, "twenty two")]
res = concept_parser.parse(context, "thirty one")
assert res.status
assert res.value.body == [cnode("bar", 0, 2, "thirty one")]
res = concept_parser.parse(context, "twenty")
assert res.status
assert res.value.body == [cnode("foo", 0, 0, "twenty")]