import pytest from core.builtin_concepts import BuiltinConcepts from core.concept import Concept from core.sheerka import Sheerka, ExecutionContext from core.tokenizer import Tokenizer, TokenKind, LexerError from parsers.BaseParser import UnexpectedTokenErrorNode from parsers.BnfParser import BnfParser, UnexpectedEndOfFileError from parsers.ConceptLexerParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \ ConceptLexerParser, ConceptNode, ConceptExpression, cnode from sdp.sheerkaDataProvider import Event def get_context(): sheerka = Sheerka(skip_builtins_in_db=True) sheerka.initialize("mem://") return ExecutionContext("sheerka", Event(), sheerka) class ClassWithName(): def __init__(self, name): self.name = name @pytest.mark.parametrize("expression, expected", [ ("'str'", StrMatch("str")), ("1", StrMatch("1")), (" 1", StrMatch("1")), (",", StrMatch(",")), ("'foo'?", Optional(StrMatch("foo"))), ("'foo'*", ZeroOrMore(StrMatch("foo"))), ("'foo'+", OneOrMore(StrMatch("foo"))), ("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))), ("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))), ("1'|' 2 '|' 3", Sequence(StrMatch("1"), StrMatch("|"), StrMatch("2"), StrMatch("|"), StrMatch("3"))), ("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))), ("1 2 | 3 4+", OrderedChoice( Sequence(StrMatch("1"), StrMatch("2")), Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))), ("1 (2 | 3) 4+", Sequence(StrMatch("1"), OrderedChoice(StrMatch("2"), StrMatch("3")), OneOrMore(StrMatch("4")))), ("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))), ("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))), ("1 *", Sequence(StrMatch("1"), StrMatch("*"))), ("1 ?", Sequence(StrMatch("1"), StrMatch("?"))), ("1 +", Sequence(StrMatch("1"), StrMatch("+"))), ("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))), ("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))), ("(1 )", StrMatch("1")), ("'str'=var", StrMatch("str", rule_name="var")), ("'foo'?=var", Optional(StrMatch("foo"), rule_name="var")), ("('foo'?)=var", Optional(StrMatch("foo"), rule_name="var")), ("'foo'*=var", ZeroOrMore(StrMatch("foo"), rule_name="var")), ("('foo'*)=var", ZeroOrMore(StrMatch("foo"), rule_name="var")), ("'foo'+=var", OneOrMore(StrMatch("foo"), rule_name="var")), ("('foo'+)=var", OneOrMore(StrMatch("foo"), rule_name="var")), ("'foo'=var?", Optional(StrMatch("foo", rule_name="var"))), ("('foo'=var)?", Optional(StrMatch("foo", rule_name="var"))), ("'foo'=var*", ZeroOrMore(StrMatch("foo", rule_name="var"))), ("('foo'=var)*", ZeroOrMore(StrMatch("foo", rule_name="var"))), ("'foo'=var+", OneOrMore(StrMatch("foo", rule_name="var"))), ("('foo'=var)+", OneOrMore(StrMatch("foo", rule_name="var"))), ("(1 | 2 | 3)=var", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"), rule_name="var")), ("(1 2)=var", Sequence(StrMatch("1"), StrMatch("2"), rule_name="var")), ("(1 2)+=var", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")), rule_name="var")), ("(1 2)=var+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2"), rule_name="var"))), ]) def test_i_can_parse_regex(expression, expected): parser = BnfParser() res = parser.parse(get_context(), Tokenizer(expression)) assert not parser.has_error assert res.status assert res.value.value == expected assert res.value.source == expression @pytest.mark.parametrize("expression, expected", [ ("foo", Concept("foo").init_key()), ("foo*", ZeroOrMore(Concept("foo").init_key())), ("foo 'and' bar+", Sequence(Concept("foo").init_key(), StrMatch("and"), OneOrMore(Concept("bar").init_key()))), ("foo | bar?", OrderedChoice(Concept("foo").init_key(), Optional(Concept("bar").init_key()))), ("'str' = var", Sequence(StrMatch("str"), StrMatch("="), Concept("var").init_key())), ("'str''='var", Sequence(StrMatch("str"), StrMatch("="), Concept("var").init_key())), ]) def test_i_can_parse_regex_with_concept(expression, expected): foo = Concept("foo") bar = Concept("bar") var = Concept("var") context = get_context() for c in (foo, bar, var): context.sheerka.add_in_cache(c) parser = BnfParser() res = parser.parse(context, Tokenizer(expression)) assert not parser.has_error assert res.status assert res.value.value == expected assert res.value.source == expression def test_i_can_parse_regex_with_concept_when_the_concept_is_still_under_definition(): expression = "foo" expected = ConceptExpression("foo") context = get_context() context.obj = ClassWithName("foo") parser = BnfParser() res = parser.parse(context, Tokenizer(expression)) assert not parser.has_error assert res.status assert res.value.value == expected assert res.value.source == expression @pytest.mark.parametrize("expression, error", [ ("1 ", UnexpectedEndOfFileError()), ("1|", UnexpectedEndOfFileError()), ("(1|)", UnexpectedTokenErrorNode("Unexpected token 'Token()'", [TokenKind.RPAR])), ("1=", UnexpectedTokenErrorNode("Unexpected token 'Token()'", [TokenKind.IDENTIFIER])), ("'name", LexerError("Missing Trailing quote", "'name", 5, 1, 6)) ]) def test_i_can_detect_errors(expression, error): parser = BnfParser() res = parser.parse(get_context(), Tokenizer(expression)) ret_value = res.value.value assert parser.has_error assert not res.status assert ret_value[0] == error def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text(): foo = Concept(name="foo") bar = Concept(name="bar") context = get_context() context.sheerka.add_in_cache(foo) context.sheerka.add_in_cache(bar) regex_parser = BnfParser() foo_definition = regex_parser.parse(context, "'twenty' | 'thirty'").value.value bar_definition = regex_parser.parse(context, "foo ('one' | 'two')").value.value concepts = {bar: bar_definition, foo: foo_definition} concept_parser = ConceptLexerParser() concept_parser.initialize(context, concepts) res = concept_parser.parse(context, "twenty two") assert res.status assert res.value.body == [cnode("bar", 0, 2, "twenty two")] res = concept_parser.parse(context, "thirty one") assert res.status assert res.value.body == [cnode("bar", 0, 2, "thirty one")] res = concept_parser.parse(context, "twenty") assert res.status assert res.value.body == [cnode("foo", 0, 0, "twenty")] def test_i_cannot_parse_when_too_many_concepts(): foo1 = Concept(name="foo", body="1") foo2 = Concept(name="foo", body="2") context = get_context() context.sheerka.cache_by_key["foo"] = [foo1, foo2] regex_parser = BnfParser() res = regex_parser.parse(context, "foo") assert not res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.CANNOT_RESOLVE_CONCEPT) assert res.value.body == ('key', 'foo') def test_i_cannot_parse_when_unknown_concept(): context = get_context() regex_parser = BnfParser() res = regex_parser.parse(get_context(), "foo") assert not res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT) assert res.value.body == ('key', 'foo')