import pytest from core.builtin_concepts import BuiltinConcepts from core.concept import Concept, DEFINITION_TYPE_BNF from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import Tokenizer, TokenKind, LexerError, Token from parsers.BaseNodeParser import cnode from parsers.BaseParser import UnexpectedTokenErrorNode from parsers.BnfNodeParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \ BnfNodeParser, ConceptExpression from parsers.BnfParser import BnfParser, UnexpectedEndOfFileError from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka class ClassWithName: def __init__(self, name): self.name = name def c(name, rule_name=None): concept = Concept(name).init_key() return ConceptExpression(concept, rule_name=rule_name or name) eof_token = Token(TokenKind.EOF, "", 0, 0, 0) class TestBnfParser(TestUsingMemoryBasedSheerka): def init_parser(self, *concepts): sheerka, context, *updated = self.init_concepts(*concepts, singleton=True) parser = BnfParser() return sheerka, context, parser, *updated @pytest.mark.parametrize("expression, expected", [ ("'str'", StrMatch("str")), ("1", StrMatch("1")), (" 1", StrMatch("1")), (",", StrMatch(",")), ("'foo'?", Optional(StrMatch("foo"))), ("'foo'*", ZeroOrMore(StrMatch("foo"))), ("'foo'+", OneOrMore(StrMatch("foo"))), ("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))), ("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))), ("1'|' 2 '|' 3", Sequence(StrMatch("1"), StrMatch("|"), StrMatch("2"), StrMatch("|"), StrMatch("3"))), ("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))), ("1 2 | 3 4+", OrderedChoice( Sequence(StrMatch("1"), StrMatch("2")), Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))), ("1 (2 | 3) 4+", Sequence( StrMatch("1"), OrderedChoice(StrMatch("2"), StrMatch("3")), OneOrMore(StrMatch("4")))), ("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))), ("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))), ("1 *", Sequence(StrMatch("1"), StrMatch("*"))), ("1 ?", Sequence(StrMatch("1"), StrMatch("?"))), ("1 +", Sequence(StrMatch("1"), StrMatch("+"))), ("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))), ("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))), ("(1 )", StrMatch("1")), ("'str'=var", StrMatch("str", rule_name="var")), ("'foo'?=var", Optional(StrMatch("foo"), rule_name="var")), ("('foo'?)=var", Optional(StrMatch("foo"), rule_name="var")), ("'foo'*=var", ZeroOrMore(StrMatch("foo"), rule_name="var")), ("('foo'*)=var", ZeroOrMore(StrMatch("foo"), rule_name="var")), ("'foo'+=var", OneOrMore(StrMatch("foo"), rule_name="var")), ("('foo'+)=var", OneOrMore(StrMatch("foo"), rule_name="var")), ("'foo'=var?", Optional(StrMatch("foo", rule_name="var"))), ("('foo'=var)?", Optional(StrMatch("foo", rule_name="var"))), ("'foo'=var*", ZeroOrMore(StrMatch("foo", rule_name="var"))), ("('foo'=var)*", ZeroOrMore(StrMatch("foo", rule_name="var"))), ("'foo'=var+", OneOrMore(StrMatch("foo", rule_name="var"))), ("('foo'=var)+", OneOrMore(StrMatch("foo", rule_name="var"))), ("(1 | 2 | 3)=var", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"), rule_name="var")), ("(1 2)=var", Sequence(StrMatch("1"), StrMatch("2"), rule_name="var")), ("(1 2)+=var", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")), rule_name="var")), ("(1 2)=var+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2"), rule_name="var"))), ("(1=a 2=b)=c", Sequence(StrMatch("1", rule_name="a"), StrMatch("2", rule_name="b"), rule_name="c")), ("(1*=a)", ZeroOrMore(StrMatch("1"), rule_name="a")), ("'a'* 'b'+", Sequence(ZeroOrMore(StrMatch("a")), OneOrMore(StrMatch("b")))), ("('a'* 'b'+)", Sequence(ZeroOrMore(StrMatch("a")), OneOrMore(StrMatch("b")))), ("('a'*=x 'b'+=y)=z", Sequence( ZeroOrMore(StrMatch("a"), rule_name="x"), OneOrMore(StrMatch("b"), rule_name="y"), rule_name="z")), ("'--filter'", Sequence(StrMatch("-", skip_whitespace=False), StrMatch("-", skip_whitespace=False), StrMatch("filter"))) ]) def test_i_can_parse_regex(self, expression, expected): sheerka, context, parser = self.init_parser() res = parser.parse(context, Tokenizer(expression)) assert not parser.has_error assert res.status assert res.value.value == expected assert res.value.source == expression @pytest.mark.parametrize("expression, expected", [ ("foo", c("foo")), ("foo*", ZeroOrMore(c("foo"))), ("foo 'and' bar+", Sequence(c("foo"), StrMatch("and"), OneOrMore(c("bar")))), ("foo | bar?", OrderedChoice(c("foo"), Optional(c("bar")))), ("'str' = var", Sequence(StrMatch("str"), StrMatch("="), c("var"))), ("'str''='var", Sequence(StrMatch("str"), StrMatch("="), c("var"))), ("foo=f", c("foo", "f")), ("foo=f 'constant'", Sequence(c("foo", "f"), StrMatch("constant"))), ("def 'concept'", Sequence(c("def"), StrMatch("concept"))), ]) def test_i_can_parse_regex_with_concept(self, expression, expected): sheerka, context, parser, foo, bar, var, _def = self.init_parser("foo", "bar", "var", "def") res = parser.parse(context, Tokenizer(expression)) assert not parser.has_error assert res.status assert res.value.value == expected assert res.value.source == expression @pytest.mark.parametrize("expression, expected", [ ("foo", ConceptExpression("foo")), ("foo=f", ConceptExpression("foo", rule_name="f")), ]) def test_i_can_parse_regex_with_concept_when_the_concept_is_still_under_definition(self, expression, expected): sheerka, context, parser = self.init_parser() context.obj = ClassWithName("foo") res = parser.parse(context, Tokenizer(expression)) assert not parser.has_error assert res.status assert res.value.value == expected assert res.value.source == expression @pytest.mark.parametrize("expression, error", [ ("1 ", UnexpectedEndOfFileError()), ("1|", UnexpectedEndOfFileError()), ("(1|)", UnexpectedTokenErrorNode("Unexpected token 'Token()'", eof_token, [TokenKind.RPAR])), ("1=", UnexpectedTokenErrorNode("Unexpected token 'Token()'", eof_token, [TokenKind.IDENTIFIER])), ]) def test_i_can_detect_errors(self, expression, error): sheerka, context, parser = self.init_parser() res = parser.parse(context, Tokenizer(expression)) ret_value = res.body.body assert parser.has_error assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR) assert not res.status assert ret_value[0] == error def test_i_can_detect_lexer_error(self): sheerka, context, parser = self.init_parser() res = parser.parse(context, Tokenizer("'name")) assert not res.status assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR) assert isinstance(res.body.body[0], LexerError) assert res.body.body[0].message == "Missing Trailing quote" assert res.body.body[0].text == "'name" def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text(self): sheerka, context, regex_parser, foo, bar = self.init_parser("foo", "bar") for concept in [foo, bar]: concept.metadata.definition_type = DEFINITION_TYPE_BNF foo.bnf = regex_parser.parse(context, "'twenty' | 'thirty'").value.value bar.bnf = regex_parser.parse(context, "foo ('one' | 'two')").value.value bnf_parser = BnfNodeParser() bnf_parser.init_from_concepts(context, [foo, bar]) res = bnf_parser.parse(context, ParserInput("twenty two")) assert res.status assert res.value.body == [cnode("bar", 0, 2, "twenty two")] res = bnf_parser.parse(context, ParserInput("thirty one")) assert res.status assert res.value.body == [cnode("bar", 0, 2, "thirty one")] res = bnf_parser.parse(context, ParserInput("twenty")) assert res.status assert res.value.body == [cnode("foo", 0, 0, "twenty")] def test_i_cannot_parse_when_too_many_concepts(self): sheerka, context, regex_parser, foo1, foo2 = self.init_parser( Concept(name="foo", body="1"), Concept(name="foo", body="2")) res = regex_parser.parse(context, "foo") assert not res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.CANNOT_RESOLVE_CONCEPT) assert res.value.body == ('key', 'foo') def test_i_cannot_parse_when_unknown_concept(self): sheerka, context, regex_parser = self.init_parser() res = regex_parser.parse(self.get_context(), "foo") assert not res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT) assert res.value.body == ('key', 'foo')