import pytest from parsers.ParserInput import ParserInput from parsers.parser_utils import parse_parts, strip_tokens from parsers.tokenizer import Keywords, Tokenizer def compare_results(actual, expected, compare_str=False): resolved_expected = {} for k, v in expected.items(): if isinstance(v, str): # case like {Keywords.DEF_VAR: "def_var var1 def_var var2"} tokens = list(Tokenizer(v, yield_eof=False)) resolved_expected[k] = [tokens[0]] + tokens[2:] else: # case like {Keywords.DEF_VAR: get_tokens(["def_var", "var1", "var2"])} resolved_expected[k] = v def get_better_representation(value): better_repr = {} for k, tokens in value.items(): value = "".join([t.str_value if compare_str else t.repr_value for t in tokens[1:]]) better_repr[k] = [tokens[0].repr_value, value] return better_repr # it's easier to compare two list of string actual_to_compare = get_better_representation(actual) expected_to_compare = get_better_representation(resolved_expected) assert actual_to_compare == expected_to_compare def get_tokens(lst): """ Returns a list of Tokens, for a list of item :param lst: :type lst: :return: :rtype: """ return list(Tokenizer(lst, yield_eof=False).remove_positions()) @pytest.mark.parametrize("input_as_list, expected_as_list", [ ([" "], []), ([" ", "one"], ["one"]), (["one", " "], ["one"]), ([" ", "one", " "], ["one"]), (["\n", "one"], ["one"]), (["one", "\n"], ["one"]), (["\n", "one", "\n"], ["one"]), ([" ", "\n", "one"], ["one"]), (["one", " ", "\n"], ["one"]), ([" ", "\n", "one", " ", "\n"], ["one"]), (["\n", " ", "one"], ["one"]), (["one", "\n", " "], ["one"]), (["\n", " ", "one", "\n", " "], ["one"]), ([" ", "\n", " ", "one"], ["one"]), (["one", " ", "\n", " "], ["one"]), ([" ", "\n", " ", "one", " ", "\n", " "], ["one"]), (["\n", " ", "\n", "one"], ["one"]), (["one", "\n", " ", "\n"], ["one"]), (["\n", " ", "\n", "one", "\n", " ", "\n"], ["one"]), ]) def test_i_can_strip(input_as_list, expected_as_list): actual = strip_tokens(get_tokens(input_as_list)) # KSI 20201007 Why not use Tokenizer ?!! For perf ? expected = get_tokens(expected_as_list) assert actual == expected @pytest.mark.parametrize("text, strip, expected", [ ("when xxx yyy", False, {Keywords.WHEN: "when xxx yyy"}), ("when uuu vvv print xxx yyy", False, {Keywords.WHEN: "when uuu vvv ", Keywords.PRINT: "print xxx yyy"}), ("print xxx yyy when uuu vvv", False, {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy "}), (" when xxx", False, {Keywords.WHEN: "when xxx"}), ("when xxx yyy", True, {Keywords.WHEN: "when xxx yyy"}), ("when uuu vvv print xxx yyy", True, {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy"}), ("print xxx yyy when uuu vvv", True, {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy"}), (" when xxx", True, {Keywords.WHEN: "when xxx"}), ]) def test_i_can_get_parts(text, strip, expected): parser_input = ParserInput(text) parser_input.init() parser_input.next_token() error_sink = [] res = parse_parts(parser_input, error_sink, ["when", "print"], strip=strip) compare_results(res, expected)