import pytest from core.builtin_concepts import BuiltinConcepts from core.concept import Concept, DEFINITION_TYPE_BNF from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import Tokenizer, TokenKind, LexerError from parsers.BaseNodeParser import cnode from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError from parsers.BnfDefinitionParser import BnfDefinitionParser from parsers.BnfNodeParser import BnfNodeParser from parsers.BnfNodeParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, \ OneOrMore, ConceptExpression from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka class ClassWithName: def __init__(self, name): self.name = name def c(name, rule_name=None): concept = Concept(name).init_key() return ConceptExpression(concept, rule_name=rule_name or name) def update_concepts_ids(sheerka, parsing_expression): if isinstance(parsing_expression, ConceptExpression): if not parsing_expression.concept.id: concept = sheerka.get_by_key(parsing_expression.concept.key) parsing_expression.concept.get_metadata().id = concept.id for pe in parsing_expression.elements: update_concepts_ids(sheerka, pe) eof_token = "" class TestBnfParser(TestUsingMemoryBasedSheerka): def init_parser(self, *concepts): sheerka, context, *updated = self.init_concepts(*concepts, singleton=True) parser = BnfDefinitionParser() return sheerka, context, parser, *updated @pytest.mark.parametrize("expression, expected", [ ("'str'", StrMatch("str")), ("1", StrMatch("1")), (" 1", StrMatch("1")), (",", StrMatch(",")), ("'foo'?", Optional(StrMatch("foo"))), ("'foo'*", ZeroOrMore(StrMatch("foo"))), ("'foo'+", OneOrMore(StrMatch("foo"))), ("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))), ("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))), ("1'|' 2 '|' 3", Sequence(StrMatch("1"), StrMatch("|"), StrMatch("2"), StrMatch("|"), StrMatch("3"))), ("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))), ("1 2 | 3 4+", OrderedChoice( Sequence(StrMatch("1"), StrMatch("2")), Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))), ("1 (2 | 3) 4+", Sequence( StrMatch("1"), OrderedChoice(StrMatch("2"), StrMatch("3")), OneOrMore(StrMatch("4")))), ("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))), ("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))), ("1 *", Sequence(StrMatch("1"), StrMatch("*"))), ("1 ?", Sequence(StrMatch("1"), StrMatch("?"))), ("1 +", Sequence(StrMatch("1"), StrMatch("+"))), ("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))), ("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))), ("(1 )", StrMatch("1")), ("'str'=var", StrMatch("str", rule_name="var")), ("'foo'?=var", Optional(StrMatch("foo"), rule_name="var")), ("('foo'?)=var", Optional(StrMatch("foo"), rule_name="var")), ("'foo'*=var", ZeroOrMore(StrMatch("foo"), rule_name="var")), ("('foo'*)=var", ZeroOrMore(StrMatch("foo"), rule_name="var")), ("'foo'+=var", OneOrMore(StrMatch("foo"), rule_name="var")), ("('foo'+)=var", OneOrMore(StrMatch("foo"), rule_name="var")), ("'foo'=var?", Optional(StrMatch("foo", rule_name="var"))), ("('foo'=var)?", Optional(StrMatch("foo", rule_name="var"))), ("'foo'=var*", ZeroOrMore(StrMatch("foo", rule_name="var"))), ("('foo'=var)*", ZeroOrMore(StrMatch("foo", rule_name="var"))), ("'foo'=var+", OneOrMore(StrMatch("foo", rule_name="var"))), ("('foo'=var)+", OneOrMore(StrMatch("foo", rule_name="var"))), ("(1 | 2 | 3)=var", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"), rule_name="var")), ("(1 2)=var", Sequence(StrMatch("1"), StrMatch("2"), rule_name="var")), ("(1 2)+=var", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")), rule_name="var")), ("(1 2)=var+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2"), rule_name="var"))), ("(1=a 2=b)=c", Sequence(StrMatch("1", rule_name="a"), StrMatch("2", rule_name="b"), rule_name="c")), ("(1*=a)", ZeroOrMore(StrMatch("1"), rule_name="a")), ("'a'* 'b'+", Sequence(ZeroOrMore(StrMatch("a")), OneOrMore(StrMatch("b")))), ("('a'* 'b'+)", Sequence(ZeroOrMore(StrMatch("a")), OneOrMore(StrMatch("b")))), ("('a'*=x 'b'+=y)=z", Sequence( ZeroOrMore(StrMatch("a"), rule_name="x"), OneOrMore(StrMatch("b"), rule_name="y"), rule_name="z")), ("'--filter'", Sequence(StrMatch("-", skip_whitespace=False), StrMatch("-", skip_whitespace=False), StrMatch("filter"))) ]) def test_i_can_parse_regex(self, expression, expected): sheerka, context, parser = self.init_parser() res = parser.parse(context, Tokenizer(expression)) assert not parser.has_error assert res.status assert res.value.value == expected assert res.value.source == expression @pytest.mark.parametrize("expression, expected", [ ("foo", c("foo")), ("foo*", ZeroOrMore(c("foo"))), ("foo 'and' bar+", Sequence(c("foo"), StrMatch("and"), OneOrMore(c("bar")))), ("foo | bar?", OrderedChoice(c("foo"), Optional(c("bar")))), ("'str' = var", Sequence(StrMatch("str"), StrMatch("="), c("var"))), ("'str''='var", Sequence(StrMatch("str"), StrMatch("="), c("var"))), ("foo=f", c("foo", "f")), ("foo=f 'constant'", Sequence(c("foo", "f"), StrMatch("constant"))), ("def 'concept'", Sequence(c("def"), StrMatch("concept"))), ]) def test_i_can_parse_regex_with_concept(self, expression, expected): sheerka, context, parser, *concepts = self.init_parser("foo", "bar", "var", "def") update_concepts_ids(sheerka, expected) res = parser.parse(context, Tokenizer(expression)) assert not parser.has_error assert res.status assert res.value.value == expected assert res.value.source == expression @pytest.mark.parametrize("expression, expected", [ ("foo", ConceptExpression("foo")), ("foo=f", ConceptExpression("foo", rule_name="f")), ]) def test_i_can_parse_regex_with_concept_when_the_concept_is_still_under_definition(self, expression, expected): sheerka, context, parser = self.init_parser() context.obj = ClassWithName("foo") res = parser.parse(context, Tokenizer(expression)) assert not parser.has_error assert res.status assert res.value.value == expected assert res.value.source == expression @pytest.mark.parametrize("expression, error", [ ("1 ", UnexpectedEofParsingError()), ("1|", UnexpectedEofParsingError()), ("(1|)", UnexpectedTokenParsingError("Unexpected token 'Token()'", eof_token, [TokenKind.RPAR])), ("1=", UnexpectedTokenParsingError("Unexpected token 'Token()'", eof_token, [TokenKind.IDENTIFIER])), ]) def test_i_can_detect_errors(self, expression, error): sheerka, context, parser = self.init_parser() res = parser.parse(context, Tokenizer(expression)) ret_value = res.body.body assert parser.has_error assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR) assert not res.status assert ret_value[0] == error def test_i_can_detect_lexer_error(self): sheerka, context, parser = self.init_parser() res = parser.parse(context, Tokenizer("'name")) assert not res.status assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR) assert isinstance(res.body.body[0], LexerError) assert res.body.body[0].message == "Missing Trailing quote" assert res.body.body[0].text == "'name" def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text(self): sheerka, context, regex_parser, foo, bar = self.init_parser("foo", "bar") for concept in [foo, bar]: concept.get_metadata().definition_type = DEFINITION_TYPE_BNF foo.set_bnf(regex_parser.parse(context, "'twenty' | 'thirty'").value.value) bar.set_bnf(regex_parser.parse(context, "foo ('one' | 'two')").value.value) bnf_parser = BnfNodeParser() bnf_parser.init_from_concepts(context, [foo, bar]) res = bnf_parser.parse(context, ParserInput("twenty two")) assert res.status assert res.value.body == [cnode("bar", 0, 2, "twenty two")] res = bnf_parser.parse(context, ParserInput("thirty one")) assert res.status assert res.value.body == [cnode("bar", 0, 2, "thirty one")] res = bnf_parser.parse(context, ParserInput("twenty")) assert res.status assert res.value.body == [cnode("foo", 0, 0, "twenty")] def test_i_cannot_parse_when_too_many_concepts(self): sheerka, context, regex_parser, foo1, foo2 = self.init_parser( Concept(name="foo", body="1"), Concept(name="foo", body="2")) res = regex_parser.parse(context, "foo") assert not res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.CANNOT_RESOLVE_CONCEPT) assert res.value.body == ('key', 'foo') def test_i_cannot_parse_when_unknown_concept(self): sheerka, context, regex_parser = self.init_parser() res = regex_parser.parse(self.get_context(), "foo") assert not res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT) assert res.value.body == ('key', 'foo') def test_concept_expression_are_correctly_created_when_isa_concept_is_detected(self): sheerka, context, parser, one, two, number, twenties = self.init_parser( "one", "two", "number", "twenties") sheerka.set_isa(context, sheerka.new("one"), number) sheerka.set_isa(context, sheerka.new("two"), number) sheerka.set_isa(context, sheerka.new("twenties"), number) sub_context = context.push(BuiltinConcepts.INIT_BNF, twenties) expression = "'twenty' number=n1" res = parser.parse(sub_context, Tokenizer(expression)) assert not parser.has_error assert res.status pexpression = res.value.value assert pexpression == Sequence(StrMatch('twenty'), ConceptExpression(number, "n1"))