import pytest from core.builtin_concepts import BuiltinConcepts from core.concept import Concept, DEFINITION_TYPE_BNF from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import Tokenizer, TokenKind, LexerError from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError from parsers.BnfDefinitionParser import BnfDefinitionParser from parsers.BnfNodeParser import BnfNodeParser, RegExMatch, VariableExpression from parsers.BnfNodeParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, \ OneOrMore, ConceptExpression from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka from tests.parsers.parsers_utils import CN, compare_with_test_object class ClassWithName: def __init__(self, name): self.name = name def c(name, rule_name=None): concept = Concept(name).init_key() return ConceptExpression(concept, rule_name=rule_name or name) def update_concepts_ids(sheerka, parsing_expression): if isinstance(parsing_expression, ConceptExpression): if not parsing_expression.concept.id: concept = sheerka.get_by_key(parsing_expression.concept.key) parsing_expression.concept.get_metadata().id = concept.id for pe in parsing_expression.elements: update_concepts_ids(sheerka, pe) eof_token = "" class TestBnfParser(TestUsingMemoryBasedSheerka): def init_parser(self, *concepts): sheerka, context, *updated = self.init_concepts(*concepts, singleton=True) parser = BnfDefinitionParser() return sheerka, context, parser, *updated @pytest.mark.parametrize("expression, expected", [ ("'str'", StrMatch("str")), ("1", StrMatch("1")), (" 1", StrMatch("1")), (",", StrMatch(",")), ("r'str'", RegExMatch("str")), ("'foo'?", Optional(StrMatch("foo"))), ("'foo'*", ZeroOrMore(StrMatch("foo"))), ("'foo'+", OneOrMore(StrMatch("foo"))), ("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))), ("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))), ("1'|' 2 '|' 3", Sequence(StrMatch("1"), StrMatch("|"), StrMatch("2"), StrMatch("|"), StrMatch("3"))), ("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))), ("1 2 | 3 4+", OrderedChoice( Sequence(StrMatch("1"), StrMatch("2")), Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))), ("1 (2 | 3) 4+", Sequence( StrMatch("1"), OrderedChoice(StrMatch("2"), StrMatch("3")), OneOrMore(StrMatch("4")))), ("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))), ("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))), ("1 *", Sequence(StrMatch("1"), StrMatch("*"))), ("1 ?", Sequence(StrMatch("1"), StrMatch("?"))), ("1 +", Sequence(StrMatch("1"), StrMatch("+"))), ("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))), ("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))), ("(1 )", StrMatch("1")), ("'str'=var", StrMatch("str", rule_name="var")), ("'foo'?=var", Optional(StrMatch("foo"), rule_name="var")), ("('foo'?)=var", Optional(StrMatch("foo"), rule_name="var")), ("'foo'*=var", ZeroOrMore(StrMatch("foo"), rule_name="var")), ("('foo'*)=var", ZeroOrMore(StrMatch("foo"), rule_name="var")), ("'foo'+=var", OneOrMore(StrMatch("foo"), rule_name="var")), ("('foo'+)=var", OneOrMore(StrMatch("foo"), rule_name="var")), ("'foo'=var?", Optional(StrMatch("foo", rule_name="var"))), ("('foo'=var)?", Optional(StrMatch("foo", rule_name="var"))), ("'foo'=var*", ZeroOrMore(StrMatch("foo", rule_name="var"))), ("('foo'=var)*", ZeroOrMore(StrMatch("foo", rule_name="var"))), ("'foo'=var+", OneOrMore(StrMatch("foo", rule_name="var"))), ("('foo'=var)+", OneOrMore(StrMatch("foo", rule_name="var"))), ("r'str'=var", RegExMatch("str", rule_name="var")), ("r'foo'?=var", Optional(RegExMatch("foo"), rule_name="var")), ("(r'foo'?)=var", Optional(RegExMatch("foo"), rule_name="var")), ("r'foo'*=var", ZeroOrMore(RegExMatch("foo"), rule_name="var")), ("(r'foo'*)=var", ZeroOrMore(RegExMatch("foo"), rule_name="var")), ("r'foo'+=var", OneOrMore(RegExMatch("foo"), rule_name="var")), ("(r'foo'+)=var", OneOrMore(RegExMatch("foo"), rule_name="var")), ("r'foo'=var?", Optional(RegExMatch("foo", rule_name="var"))), ("(r'foo'=var)?", Optional(RegExMatch("foo", rule_name="var"))), ("r'foo'=var*", ZeroOrMore(RegExMatch("foo", rule_name="var"))), ("(r'foo'=var)*", ZeroOrMore(RegExMatch("foo", rule_name="var"))), ("r'foo'=var+", OneOrMore(RegExMatch("foo", rule_name="var"))), ("(r'foo'=var)+", OneOrMore(RegExMatch("foo", rule_name="var"))), ("(1 | 2 | 3)=var", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"), rule_name="var")), ("(1 2)=var", Sequence(StrMatch("1"), StrMatch("2"), rule_name="var")), ("(1 2)+=var", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")), rule_name="var")), ("(1 2)=var+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2"), rule_name="var"))), ("(1=a 2=b)=c", Sequence(StrMatch("1", rule_name="a"), StrMatch("2", rule_name="b"), rule_name="c")), ("(1*=a)", ZeroOrMore(StrMatch("1"), rule_name="a")), ("'a'* 'b'+", Sequence(ZeroOrMore(StrMatch("a")), OneOrMore(StrMatch("b")))), ("('a'* 'b'+)", Sequence(ZeroOrMore(StrMatch("a")), OneOrMore(StrMatch("b")))), ("('a'*=x 'b'+=y)=z", Sequence( ZeroOrMore(StrMatch("a"), rule_name="x"), OneOrMore(StrMatch("b"), rule_name="y"), rule_name="z")), ("'--filter'", Sequence(StrMatch("-", skip_whitespace=False), StrMatch("-", skip_whitespace=False), StrMatch("filter"))) ]) def test_i_can_parse_regex(self, expression, expected): sheerka, context, parser = self.init_parser() res = parser.parse(context, Tokenizer(expression)) assert not parser.has_error assert res.status assert res.value.value == expected assert res.value.source == expression @pytest.mark.parametrize("expression, expected", [ ("foo", c("foo")), ("foo*", ZeroOrMore(c("foo"))), ("foo 'and' bar+", Sequence(c("foo"), StrMatch("and"), OneOrMore(c("bar")))), ("foo | bar?", OrderedChoice(c("foo"), Optional(c("bar")))), ("'str' = var", Sequence(StrMatch("str"), StrMatch("="), c("var"))), ("'str''='var", Sequence(StrMatch("str"), StrMatch("="), c("var"))), ("foo=f", c("foo", "f")), ("foo=f 'constant'", Sequence(c("foo", "f"), StrMatch("constant"))), ("def 'concept'", Sequence(c("def"), StrMatch("concept"))), ("c:foo:", c("foo")), ("c:|1001:", c("foo")), ]) def test_i_can_parse_regex_with_concept(self, expression, expected): sheerka, context, parser, *concepts = self.init_parser("foo", "bar", "var", "def") update_concepts_ids(sheerka, expected) res = parser.parse(context, Tokenizer(expression)) assert not parser.has_error assert res.status assert res.value.value == expected assert res.value.source == expression @pytest.mark.parametrize("expression, expected", [ ("x", VariableExpression("x")), ("x bar", Sequence(VariableExpression("x"), c("bar"))), ("bar x", Sequence(c("bar"), VariableExpression("x"))), ("x 'and' bar", Sequence(VariableExpression("x"), StrMatch("and"), c("bar"))), ("x | bar", OrderedChoice(VariableExpression("x"), c("bar"))), ("x*", ZeroOrMore(VariableExpression("x"))), ("x+", OneOrMore(VariableExpression("x"))), ("'str' = x", Sequence(StrMatch("str"), StrMatch("="), VariableExpression("x"))), ("'str''='x", Sequence(StrMatch("str"), StrMatch("="), VariableExpression("x"))), ("foo=x", VariableExpression("x")), ]) def test_i_can_parse_regex_with_variable(self, expression, expected): # A variable is an identifier that cannot be resolved to a concept sheerka, context, regex_parser, bar = self.init_parser("bar") update_concepts_ids(sheerka, expected) res = regex_parser.parse(self.get_context(), expression) assert res.status assert res.value.value == expected assert res.value.source == expression @pytest.mark.parametrize("expression, expected", [ ("foo", ConceptExpression("foo")), ("foo=f", ConceptExpression("foo", rule_name="f")), ]) def test_i_can_parse_regex_with_concept_when_the_concept_is_still_under_definition(self, expression, expected): sheerka, context, parser = self.init_parser() context.obj = ClassWithName("foo") res = parser.parse(context, Tokenizer(expression)) assert not parser.has_error assert res.status assert res.value.value == expected assert res.value.source == expression @pytest.mark.parametrize("expression, error", [ ("1 ", UnexpectedEofParsingError()), ("1|", UnexpectedEofParsingError()), ("(1|)", UnexpectedTokenParsingError("Unexpected token 'Token()'", eof_token, [TokenKind.RPAR])), ("1=", UnexpectedTokenParsingError("Unexpected token 'Token()'", eof_token, [TokenKind.IDENTIFIER])), ]) def test_i_can_detect_errors(self, expression, error): sheerka, context, parser = self.init_parser() res = parser.parse(context, Tokenizer(expression)) ret_value = res.body.body assert parser.has_error assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR) assert not res.status assert ret_value[0] == error def test_i_can_detect_lexer_error(self): sheerka, context, parser = self.init_parser() res = parser.parse(context, Tokenizer("'name")) assert not res.status assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR) assert isinstance(res.body.body[0], LexerError) assert res.body.body[0].message == "Missing Trailing quote" assert res.body.body[0].text == "'name" def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text(self): sheerka, context, regex_parser, foo, bar = self.init_parser("foo", "bar") for concept in [foo, bar]: concept.get_metadata().definition_type = DEFINITION_TYPE_BNF foo.set_bnf(regex_parser.parse(context, "'twenty' | 'thirty'").value.value) bar.set_bnf(regex_parser.parse(context, "foo ('one' | 'two')").value.value) bnf_parser = BnfNodeParser() bnf_parser.init_from_concepts(context, [foo, bar]) res = bnf_parser.parse(context, ParserInput("twenty two")) assert res.status compare_with_test_object(res.value.body, [CN("bar", "twenty two", 0, 2)]) res = bnf_parser.parse(context, ParserInput("thirty one")) assert res.status compare_with_test_object(res.value.body, [CN("bar", "thirty one", 0, 2)]) res = bnf_parser.parse(context, ParserInput("twenty")) assert res.status compare_with_test_object(res.value.body, [CN("foo", "twenty", 0, 0)]) def test_i_cannot_parse_when_too_many_concepts(self): sheerka, context, regex_parser, foo1, foo2 = self.init_parser( Concept(name="foo", body="1"), Concept(name="foo", body="2")) res = regex_parser.parse(context, "foo") assert not res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.CANNOT_RESOLVE_CONCEPT) assert res.value.body == ('key', 'foo') @pytest.mark.parametrize("text, expected", [ ("c:foo:", {'key': 'foo'}), ("c:|1001:", {'id': '1001'}), ("c:foo|1001:", {'key': 'foo', 'id': '1001'}), ]) def test_i_cannot_parse_when_unknown_concept(self, text, expected): sheerka, context, regex_parser = self.init_parser() res = regex_parser.parse(self.get_context(), text) assert not res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT) assert res.value.body == expected def test_concept_expression_are_correctly_created_when_isa_concept_is_detected(self): sheerka, context, parser, one, two, number, twenties = self.init_parser( "one", "two", "number", "twenties") sheerka.set_isa(context, sheerka.new("one"), number) sheerka.set_isa(context, sheerka.new("two"), number) sheerka.set_isa(context, sheerka.new("twenties"), number) sub_context = context.push(BuiltinConcepts.INIT_BNF, twenties) expression = "'twenty' number=n1" res = parser.parse(sub_context, Tokenizer(expression)) assert not parser.has_error assert res.status pexpression = res.value.value assert pexpression == Sequence(StrMatch('twenty'), ConceptExpression(number, "n1"))