import pytest from core.concept import Concept from core.sheerka import Sheerka, ExecutionContext from core.tokenizer import Tokenizer, TokenKind from parsers.BaseParser import UnexpectedTokenErrorNode from parsers.BnfParser import BnfParser, UnexpectedEndOfFileError from parsers.ConceptLexerParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \ ConceptLexerParser, ConceptNode, ConceptMatch def get_context(): sheerka = Sheerka(skip_builtins_in_db=True) sheerka.initialize("mem://") return ExecutionContext("sheerka", "xxxx", sheerka) @pytest.mark.parametrize("expression, expected", [ ("'str'", StrMatch("str")), ("1", StrMatch("1")), (" 1", StrMatch("1")), (",", StrMatch(",")), ("'foo'?", Optional(StrMatch("foo"))), ("'foo'*", ZeroOrMore(StrMatch("foo"))), ("'foo'+", OneOrMore(StrMatch("foo"))), ("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))), ("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))), ("1'|' 2 '|' 3", Sequence(StrMatch("1"), StrMatch("|"), StrMatch("2"), StrMatch("|"), StrMatch("3"))), ("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))), ("1 2 | 3 4+", OrderedChoice( Sequence(StrMatch("1"), StrMatch("2")), Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))), ("1 (2 | 3) 4+", Sequence(StrMatch("1"), OrderedChoice(StrMatch("2"), StrMatch("3")), OneOrMore(StrMatch("4")))), ("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))), ("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))), ("1 *", Sequence(StrMatch("1"), StrMatch("*"))), ("1 ?", Sequence(StrMatch("1"), StrMatch("?"))), ("1 +", Sequence(StrMatch("1"), StrMatch("+"))), ("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))), ("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))), ("(1 )", StrMatch("1")), ("foo", ConceptMatch("foo")), ("foo*", ZeroOrMore(ConceptMatch("foo"))), ("foo 'and' bar+", Sequence(ConceptMatch("foo"), StrMatch("and"), OneOrMore(ConceptMatch("bar")))), ("foo | bar?", OrderedChoice(ConceptMatch("foo"), Optional(ConceptMatch("bar")))), ("'str' = var", Sequence(StrMatch("str"), StrMatch("="), ConceptMatch("var"))), ("'str''='var", Sequence(StrMatch("str"), StrMatch("="), ConceptMatch("var"))), ("'str'=var", StrMatch("str", rule_name="var")), ("'foo'?=var", Optional(StrMatch("foo"), rule_name="var")), ("('foo'?)=var", Optional(StrMatch("foo"), rule_name="var")), ("'foo'*=var", ZeroOrMore(StrMatch("foo"), rule_name="var")), ("('foo'*)=var", ZeroOrMore(StrMatch("foo"), rule_name="var")), ("'foo'+=var", OneOrMore(StrMatch("foo"), rule_name="var")), ("('foo'+)=var", OneOrMore(StrMatch("foo"), rule_name="var")), ("'foo'=var?", Optional(StrMatch("foo", rule_name="var"))), ("('foo'=var)?", Optional(StrMatch("foo", rule_name="var"))), ("'foo'=var*", ZeroOrMore(StrMatch("foo", rule_name="var"))), ("('foo'=var)*", ZeroOrMore(StrMatch("foo", rule_name="var"))), ("'foo'=var+", OneOrMore(StrMatch("foo", rule_name="var"))), ("('foo'=var)+", OneOrMore(StrMatch("foo", rule_name="var"))), ("(1 | 2 | 3)=var", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"), rule_name="var")), ("(1 2)=var", Sequence(StrMatch("1"), StrMatch("2"), rule_name="var")), ("(1 2)+=var", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")), rule_name="var")), ("(1 2)=var+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2"), rule_name="var"))), ]) def test_i_can_parse_regex(expression, expected): parser = BnfParser() res = parser.parse(get_context(), Tokenizer(expression)) assert not parser.has_error assert res.status assert res.value.value == expected assert res.value.source == expression @pytest.mark.parametrize("expression, error", [ ("1 ", UnexpectedEndOfFileError()), ("1|", UnexpectedEndOfFileError()), ("(1|)", UnexpectedTokenErrorNode("Unexpected token 'Token()'", [TokenKind.RPAR])), ("1=", UnexpectedTokenErrorNode("Unexpected token 'Token()'", [TokenKind.IDENTIFIER])), ]) def test_i_can_detect_errors(expression, error): parser = BnfParser() res = parser.parse(get_context(), Tokenizer(expression)) ret_value = res.value.value assert parser.has_error assert not res.status assert ret_value[0] == error def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text(): foo = Concept(name="foo") bar = Concept(name="bar") context = get_context() context.sheerka.add_in_cache(foo) context.sheerka.add_in_cache(bar) regex_parser = BnfParser() foo_definition = regex_parser.parse(context, "'twenty' | 'thirty'").value.value bar_definition = regex_parser.parse(context, "foo ('one' | 'two')").value.value concepts = {bar: bar_definition, foo: foo_definition} concept_parser = ConceptLexerParser() concept_parser.initialize(context, concepts) res = concept_parser.parse(context, "twenty two") assert res.status assert res.value.body == [(bar, 0, 2, "twenty two")] res = concept_parser.parse(context, "thirty one") assert res.status assert res.value.body == [(bar, 0, 2, "thirty one")] res = concept_parser.parse(context, "twenty") assert res.status assert res.value.body == [(foo, 0, 0, "twenty")]