import pytest from core.builtin_concepts import BuiltinConcepts from core.concept import Concept from core.tokenizer import Tokenizer, TokenKind, LexerError, Token from parsers.BaseParser import UnexpectedTokenErrorNode from parsers.BnfParser import BnfParser, UnexpectedEndOfFileError from parsers.ConceptLexerParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \ ConceptLexerParser, ConceptExpression, cnode from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka class ClassWithName: def __init__(self, name): self.name = name def c(name, rule_name=None): concept = Concept(name).init_key() return ConceptExpression(concept, rule_name=rule_name or name) eof_token = Token(TokenKind.EOF, "", 0, 0, 0) class TestBnfParser(TestUsingMemoryBasedSheerka): @pytest.mark.parametrize("expression, expected", [ ("'str'", StrMatch("str")), ("1", StrMatch("1")), (" 1", StrMatch("1")), (",", StrMatch(",")), ("'foo'?", Optional(StrMatch("foo"))), ("'foo'*", ZeroOrMore(StrMatch("foo"))), ("'foo'+", OneOrMore(StrMatch("foo"))), ("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))), ("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))), ("1'|' 2 '|' 3", Sequence(StrMatch("1"), StrMatch("|"), StrMatch("2"), StrMatch("|"), StrMatch("3"))), ("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))), ("1 2 | 3 4+", OrderedChoice( Sequence(StrMatch("1"), StrMatch("2")), Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))), ("1 (2 | 3) 4+", Sequence( StrMatch("1"), OrderedChoice(StrMatch("2"), StrMatch("3")), OneOrMore(StrMatch("4")))), ("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))), ("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))), ("1 *", Sequence(StrMatch("1"), StrMatch("*"))), ("1 ?", Sequence(StrMatch("1"), StrMatch("?"))), ("1 +", Sequence(StrMatch("1"), StrMatch("+"))), ("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))), ("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))), ("(1 )", StrMatch("1")), ("'str'=var", StrMatch("str", rule_name="var")), ("'foo'?=var", Optional(StrMatch("foo"), rule_name="var")), ("('foo'?)=var", Optional(StrMatch("foo"), rule_name="var")), ("'foo'*=var", ZeroOrMore(StrMatch("foo"), rule_name="var")), ("('foo'*)=var", ZeroOrMore(StrMatch("foo"), rule_name="var")), ("'foo'+=var", OneOrMore(StrMatch("foo"), rule_name="var")), ("('foo'+)=var", OneOrMore(StrMatch("foo"), rule_name="var")), ("'foo'=var?", Optional(StrMatch("foo", rule_name="var"))), ("('foo'=var)?", Optional(StrMatch("foo", rule_name="var"))), ("'foo'=var*", ZeroOrMore(StrMatch("foo", rule_name="var"))), ("('foo'=var)*", ZeroOrMore(StrMatch("foo", rule_name="var"))), ("'foo'=var+", OneOrMore(StrMatch("foo", rule_name="var"))), ("('foo'=var)+", OneOrMore(StrMatch("foo", rule_name="var"))), ("(1 | 2 | 3)=var", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"), rule_name="var")), ("(1 2)=var", Sequence(StrMatch("1"), StrMatch("2"), rule_name="var")), ("(1 2)+=var", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")), rule_name="var")), ("(1 2)=var+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2"), rule_name="var"))), ("(1=a 2=b)=c", Sequence(StrMatch("1", rule_name="a"), StrMatch("2", rule_name="b"), rule_name="c")), ("(1*=a)", ZeroOrMore(StrMatch("1"), rule_name="a")), ("'a'* 'b'+", Sequence(ZeroOrMore(StrMatch("a")), OneOrMore(StrMatch("b")))), ("('a'* 'b'+)", Sequence(ZeroOrMore(StrMatch("a")), OneOrMore(StrMatch("b")))), ("('a'*=x 'b'+=y)=z", Sequence( ZeroOrMore(StrMatch("a"), rule_name="x"), OneOrMore(StrMatch("b"), rule_name="y"), rule_name="z")), ]) def test_i_can_parse_regex(self, expression, expected): parser = BnfParser() res = parser.parse(self.get_context(), Tokenizer(expression)) assert not parser.has_error assert res.status assert res.value.value == expected assert res.value.source == expression @pytest.mark.parametrize("expression, expected", [ ("foo", c("foo")), ("foo*", ZeroOrMore(c("foo"))), ("foo 'and' bar+", Sequence(c("foo"), StrMatch("and"), OneOrMore(c("bar")))), ("foo | bar?", OrderedChoice(c("foo"), Optional(c("bar")))), ("'str' = var", Sequence(StrMatch("str"), StrMatch("="), c("var"))), ("'str''='var", Sequence(StrMatch("str"), StrMatch("="), c("var"))), ("foo=f", c("foo", "f")), ]) def test_i_can_parse_regex_with_concept(self, expression, expected): foo = Concept("foo") bar = Concept("bar") var = Concept("var") context = self.get_context() for c in (foo, bar, var): context.sheerka.add_in_cache(c) parser = BnfParser() res = parser.parse(context, Tokenizer(expression)) assert not parser.has_error assert res.status assert res.value.value == expected assert res.value.source == expression @pytest.mark.parametrize("expression, expected", [ ("foo", ConceptExpression("foo")), ("foo=f", ConceptExpression("foo", rule_name="f")), ]) def test_i_can_parse_regex_with_concept_when_the_concept_is_still_under_definition(self, expression, expected): context = self.get_context() context.obj = ClassWithName("foo") parser = BnfParser() res = parser.parse(context, Tokenizer(expression)) assert not parser.has_error assert res.status assert res.value.value == expected assert res.value.source == expression @pytest.mark.parametrize("expression, error", [ ("1 ", UnexpectedEndOfFileError()), ("1|", UnexpectedEndOfFileError()), ("(1|)", UnexpectedTokenErrorNode("Unexpected token 'Token()'", eof_token, [TokenKind.RPAR])), ("1=", UnexpectedTokenErrorNode("Unexpected token 'Token()'", eof_token, [TokenKind.IDENTIFIER])), ("'name", LexerError("Missing Trailing quote", "'name", 5, 1, 6)) ]) def test_i_can_detect_errors(self, expression, error): parser = BnfParser() res = parser.parse(self.get_context(), Tokenizer(expression)) ret_value = res.value.value assert parser.has_error assert not res.status assert ret_value[0] == error def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text(self): foo = Concept(name="foo") bar = Concept(name="bar") context = self.get_context() context.sheerka.add_in_cache(foo) context.sheerka.add_in_cache(bar) regex_parser = BnfParser() foo_definition = regex_parser.parse(context, "'twenty' | 'thirty'").value.value bar_definition = regex_parser.parse(context, "foo ('one' | 'two')").value.value concepts = {bar: bar_definition, foo: foo_definition} concept_parser = ConceptLexerParser() concept_parser.initialize(context, concepts) res = concept_parser.parse(context, "twenty two") assert res.status assert res.value.body == [cnode("bar", 0, 2, "twenty two")] res = concept_parser.parse(context, "thirty one") assert res.status assert res.value.body == [cnode("bar", 0, 2, "thirty one")] res = concept_parser.parse(context, "twenty") assert res.status assert res.value.body == [cnode("foo", 0, 0, "twenty")] def test_i_cannot_parse_when_too_many_concepts(self): foo1 = Concept(name="foo", body="1") foo2 = Concept(name="foo", body="2") context = self.get_context() context.sheerka.cache_by_key["foo"] = [foo1, foo2] regex_parser = BnfParser() res = regex_parser.parse(context, "foo") assert not res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.CANNOT_RESOLVE_CONCEPT) assert res.value.body == ('key', 'foo') def test_i_cannot_parse_when_unknown_concept(self): context = self.get_context() regex_parser = BnfParser() res = regex_parser.parse(self.get_context(), "foo") assert not res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT) assert res.value.body == ('key', 'foo')