import pytest from core.concept import Concept from core.sheerka import Sheerka, ExecutionContext from core.tokenizer import Tokenizer, TokenKind from parsers.BaseParser import UnexpectedTokenErrorNode from parsers.BnfParser import BnfParser, UnexpectedEndOfFileError from parsers.ConceptLexerParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \ ConceptLexerParser, ConceptNode, ConceptMatch def get_context(): sheerka = Sheerka(skip_builtins_in_db=True) sheerka.initialize("mem://") return ExecutionContext("sheerka", "xxxx", sheerka) @pytest.mark.parametrize("expression, expected", [ ("'str'", StrMatch("str")), ("1", StrMatch("1")), (" 1", StrMatch("1")), (",", StrMatch(",")), ("'foo'?", Optional(StrMatch("foo"))), ("'foo'*", ZeroOrMore(StrMatch("foo"))), ("'foo'+", OneOrMore(StrMatch("foo"))), ("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))), ("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))), ("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))), ("1 2 | 3 4+", OrderedChoice( Sequence(StrMatch("1"), StrMatch("2")), Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))), ("1 (2 | 3) 4+", Sequence(StrMatch("1"), OrderedChoice(StrMatch("2"), StrMatch("3")), OneOrMore(StrMatch("4")))), ("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))), ("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))), ("1 *", Sequence(StrMatch("1"), StrMatch("*"))), ("1 ?", Sequence(StrMatch("1"), StrMatch("?"))), ("1 +", Sequence(StrMatch("1"), StrMatch("+"))), ("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))), ("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))), ("(1 )", StrMatch("1")), ]) def test_i_can_parse_regex(expression, expected): parser = BnfParser() res = parser.parse(get_context(), Tokenizer(expression)) assert not parser.has_error assert res.status assert res.value.value == expected assert res.value.source == expression @pytest.mark.parametrize("expression, error", [ ("1 ", UnexpectedEndOfFileError()), ("1|", UnexpectedEndOfFileError()), ("(1|)", UnexpectedTokenErrorNode("Unexpected token 'TokenKind.EOF'", [TokenKind.RPAR])), ]) def test_i_can_detect_errors(expression, error): parser = BnfParser() res = parser.parse(get_context(), Tokenizer(expression)) ret_value = res.value.value assert parser.has_error assert not res.status assert ret_value[0] == error def test_i_can_parse_regex_with_reference(): expression = "foo" parser = BnfParser() res = parser.parse(get_context(), Tokenizer(expression)) assert res.status assert res.value.value == ConceptMatch("foo") assert res.value.source == expression def test_i_can_parse_cross_ref_with_modifier(): expression = "foo*" parser = BnfParser() res = parser.parse(get_context(), Tokenizer(expression)) assert res.status assert res.value.value == ZeroOrMore(ConceptMatch("foo")) assert res.value.source == expression def test_i_can_parse_sequence_with_cross_ref(): expression = "foo 'and' bar+" parser = BnfParser() res = parser.parse(get_context(), Tokenizer(expression)) assert res.status assert res.value.value == Sequence(ConceptMatch("foo"), StrMatch("and"), OneOrMore(ConceptMatch("bar"))) assert res.value.source == expression def test_i_can_parse_choice_with_cross_ref(): foo = Concept("foo") bar = Concept("bar") context = get_context() context.sheerka.add_in_cache(foo) context.sheerka.add_in_cache(bar) expression = "foo | bar?" parser = BnfParser() res = parser.parse(context, Tokenizer(expression)) assert res.status assert res.value.value == OrderedChoice(ConceptMatch("foo"), Optional(ConceptMatch("bar"))) assert res.value.source == expression def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text(): foo = Concept(name="foo") bar = Concept(name="bar") context = get_context() context.sheerka.add_in_cache(foo) context.sheerka.add_in_cache(bar) regex_parser = BnfParser() foo_definition = regex_parser.parse(context, "'twenty' | 'thirty'").value.value bar_definition = regex_parser.parse(context, "foo ('one' | 'two')").value.value concepts = {bar: bar_definition, foo: foo_definition} concept_parser = ConceptLexerParser() concept_parser.initialize(context, concepts) res = concept_parser.parse(context, "twenty two") assert res.status assert res.value.body == [(bar, 0, 2, "twenty two")] res = concept_parser.parse(context, "thirty one") assert res.status assert res.value.body == [(bar, 0, 2, "thirty one")] res = concept_parser.parse(context, "twenty") assert res.status assert res.value.body == [(foo, 0, 0, "twenty")]