import pytest from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import Keywords, Tokenizer, TokenKind from parsers.BaseCustomGrammarParser import BaseCustomGrammarParser, SyntaxErrorNode, KeywordNotFound from parsers.BaseParser import UnexpectedEofParsingError, UnexpectedTokenParsingError from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka def get_tokens(items): return [list(Tokenizer(item, yield_eof=False))[0] for item in items] class TestBaseCustomGrammarParser(TestUsingMemoryBasedSheerka): @staticmethod def compare_results(actual, expected, compare_str=False): resolved_expected = {} for k, v in expected.items(): if isinstance(v, str): # case like {Keywords.DEF_VAR: "def_var var1 def_var var2"} tokens = list(Tokenizer(v, yield_eof=False)) resolved_expected[k] = [tokens[0]] + tokens[2:] else: # case like {Keywords.DEF_VAR: get_tokens(["def_var", "var1", "var2"])} resolved_expected[k] = v def get_better_representation(value): better_repr = {} for k, tokens in value.items(): value = "".join([t.str_value if compare_str else t.repr_value for t in tokens[1:]]) better_repr[k] = [tokens[0].repr_value, value] return better_repr # it's easier to compare two list of string actual_to_compare = get_better_representation(actual) expected_to_compare = get_better_representation(resolved_expected) assert actual_to_compare == expected_to_compare def init_parser(self, text): sheerka, context = self.init_concepts() parser = BaseCustomGrammarParser("TestBaseCustomLanguageParser", 0) parser.reset_parser(context, ParserInput(text)) parser.parser_input.next_token(False) # do not skip starting whitespaces return sheerka, context, parser @pytest.mark.parametrize("text, strip_tokens, expected", [ ("when xxx yyy", False, {Keywords.WHEN: "when xxx yyy"}), ("when uuu vvv print xxx yyy", False, {Keywords.WHEN: "when uuu vvv ", Keywords.PRINT: "print xxx yyy"}), ("print xxx yyy when uuu vvv", False, {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy "}), (" when xxx", False, {Keywords.WHEN: "when xxx"}), ("when xxx yyy", True, {Keywords.WHEN: "when xxx yyy"}), ("when uuu vvv print xxx yyy", True, {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy"}), ("print xxx yyy when uuu vvv", True, {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy"}), (" when xxx", True, {Keywords.WHEN: "when xxx"}), ]) def test_i_can_get_parts(self, text, strip_tokens, expected): sheerka, context, parser = self.init_parser(text) res = parser.get_parts(["when", "print"], strip_tokens=strip_tokens) self.compare_results(res, expected) def test_i_can_get_parts_when_multilines(self): text = """when def func(x): \treturn x+1 func(a) """ expected = {Keywords.WHEN: "when def func(x):\n\treturn x+1\nfunc(a)\n"} sheerka, context, parser = self.init_parser(text) res = parser.get_parts(["when"]) self.compare_results(res, expected) @pytest.mark.parametrize("text, allow_multiple, expected", [ ("def_var var1 def_var var2", {}, {Keywords.DEF_VAR: "def_var var1 def_var var2"}), ("def_var var1 def_var var2", {"def_var"}, {Keywords.DEF_VAR: get_tokens(["def_var", "var1", "var2"])}), ("def_var x y z def_var var2", {"def_var"}, {Keywords.DEF_VAR: get_tokens(["def_var", "'x y z'", "var2"])}), ("def_var 'x y z' def_var var2", {"def_var"}, {Keywords.DEF_VAR: get_tokens(["def_var", "'x y z'", "var2"])}), ("def_var var1 def_var x y z def_var var2", {"def_var"}, {Keywords.DEF_VAR: get_tokens(["def_var", "var1", "'x y z'", "var2"])}), ]) def test_i_can_get_parts_when_allow_multiple_is_set(self, text, allow_multiple, expected): sheerka, context, parser = self.init_parser(text) res = parser.get_parts(["def_var"], allow_multiple=allow_multiple) self.compare_results(res, expected) @pytest.mark.parametrize("text", [ "", "no keyword", "anything before when xxx print yyy", ]) def test_i_cannot_get_parts_when_no_keyword_found(self, text): sheerka, context, parser = self.init_parser(text) assert parser.get_parts(["when", "print"]) is None assert len(parser.error_sink) == 1 assert isinstance(parser.error_sink[0], KeywordNotFound) assert parser.error_sink[0].keywords == ['when', 'print'] def test_i_cannot_get_part_when_the_first_expected_token_is_incorrect(self): sheerka, context, parser = self.init_parser("when xxx print yyy") assert parser.get_parts(["when", "print"], Keywords.PRINT) is None assert parser.error_sink == [UnexpectedTokenParsingError(f"'print' keyword not found.", "when", [Keywords.PRINT])] @pytest.mark.parametrize("text", [ "print", "print ", "when xxx print", "when xxx print ", ]) def test_i_can_detect_incorrect_end_of_file_after_keyword(self, text): sheerka, context, parser = self.init_parser(text) assert parser.get_parts(["print", "when"]) is not None assert len(parser.error_sink) == 1 assert isinstance(parser.error_sink[0], UnexpectedEofParsingError) assert parser.error_sink[0].message == "While parsing keyword 'print'." def test_i_can_double_quoted_strings_are_expanded(self): """ When inside a double quote, the double quote is removed and its content it used as is. It allows usage of keywords withing parts :return: """ sheerka, context, parser = self.init_parser('print "when can be used" when True') expected = {Keywords.PRINT: "print when can be used", Keywords.WHEN: "when True"} res = parser.get_parts(["print", "when"]) self.compare_results(res, expected) def test_single_quoted_strings_are_not_expanded(self): sheerka, context, parser = self.init_parser("print 'when can be used' when True") expected = {Keywords.PRINT: "print 'when can be used' ", Keywords.WHEN: "when True"} res = parser.get_parts(["print", "when"]) self.compare_results(res, expected) def test_i_can_manage_colon(self): text = """when: xxx when print print: xxx: when print yyy """ sheerka, context, parser = self.init_parser(text) expected = {Keywords.PRINT: "print xxx:\n when\n print\nyyy", Keywords.WHEN: "when xxx\nwhen\nprint"} res = parser.get_parts(["print", "when"]) self.compare_results(res, expected, compare_str=True) def test_indentation_is_normalized_when_using_colon(self): text = """print: xxx: when print yyy """ sheerka, context, parser = self.init_parser(text) expected = {Keywords.PRINT: "print xxx:\n when\n print\nyyy"} res = parser.get_parts(["print", "when"]) self.compare_results(res, expected, compare_str=True) def test_i_can_mix_parts_with_colon_and_parts_without_colon(self): text = """when: xxx when print print xxx""" sheerka, context, parser = self.init_parser(text) expected = {Keywords.PRINT: "print xxx", Keywords.WHEN: "when xxx\nwhen\nprint"} res = parser.get_parts(["print", "when"]) self.compare_results(res, expected, compare_str=True) @pytest.mark.parametrize("text", [ "when:\nx x", "when: \nx x", ]) def test_i_cannot_manage_colon_when_tab_is_missing(self, text): sheerka, context, parser = self.init_parser(text) assert parser.get_parts(["when"]) assert parser.error_sink == [UnexpectedTokenParsingError("Indentation not found.", "x", [TokenKind.WHITESPACE])] @pytest.mark.parametrize("text", [ "", "\n", " \n", "x", # less than two characters "\n\t" ]) def test_i_cannot_get_body_when_body_is_too_short(self, text): sheerka, context, parser = self.init_parser("") assert parser.get_body(list(Tokenizer(text, yield_eof=False))) is None assert parser.error_sink == [SyntaxErrorNode(None, "Body is empty or too short.")] def test_a_new_line_is_expected_when_get_body(self): sheerka, context, parser = self.init_parser("") assert parser.get_body(list(Tokenizer("not a newline", yield_eof=False))) is None assert parser.error_sink == [UnexpectedTokenParsingError("New line not found.", "not", [TokenKind.NEWLINE])] @pytest.mark.parametrize("text", [ "\nx x", " \nx x", ]) def test_tab_is_mandatory_after_new_line_when_get_body(self, text): sheerka, context, parser = self.init_parser("") assert parser.get_body(list(Tokenizer(text, yield_eof=False))) is None assert parser.error_sink == [UnexpectedTokenParsingError("Indentation not found.", "x", [TokenKind.WHITESPACE])] def test_i_can_detect_missing_tab_when_get_body(self): text = "\n\txxx\n\tyyy\nzzz" sheerka, context, parser = self.init_parser("") assert parser.get_body(list(Tokenizer(text, yield_eof=False))) is None assert parser.error_sink == [ UnexpectedTokenParsingError("Indentation not found.", "zzz", [TokenKind.WHITESPACE])] def test_i_can_detect_invalid_indentation_when_get_body(self): sheerka, context, parser = self.init_parser("") assert parser.get_body(list(Tokenizer("\n\t\txxx\n\tyyy", yield_eof=False))) is None assert parser.error_sink == [SyntaxErrorNode(None, "Invalid indentation.")] def test_i_can_get_body(self): sheerka, context, parser = self.init_parser("") res = parser.get_body(list(Tokenizer("\n\txxx\n\tyyyy", yield_eof=False))) expected = list(Tokenizer("xxx\n yyyy", yield_eof=False)) expected[2].value = "" assert [t.repr_value for t in res] == [t.repr_value for t in expected] assert parser.error_sink == []