import pytest from core.tokenizer import Tokenizer, TokenKind, Token from parsers.BaseParser import BaseParser, BaseSplitIterParser @pytest.mark.parametrize("text, expected_text", [ ("hello world", "hello world"), ("'hello' 'world'", "'hello' 'world'"), ("def concept a from", "def concept a from"), ("()[]{}1=1.5+-/*><&é", "()[]{}1=1.5+-/*><&é"), ("execute(c:concept_name:)", "execute(c:concept_name:)") ]) def test_i_can_get_text_from_tokens(text, expected_text): tokens = list(Tokenizer(text)) assert BaseParser.get_text_from_tokens(tokens) == expected_text @pytest.mark.parametrize("text, custom, expected_text", [ ("execute(c:concept_name:)", {TokenKind.CONCEPT: lambda t: f"__C__{t.value[0]}"}, "execute(__C__concept_name)") ]) def test_i_can_get_text_from_tokens_with_custom_switcher(text, custom, expected_text): tokens = list(Tokenizer(text)) assert BaseParser.get_text_from_tokens(tokens, custom) == expected_text @pytest.mark.parametrize("text, expected", [ ("", [""]), ("one two -f --file", ["one", "two", "-f", "--file", ""]), ("one 'two three'", ["one", "two three", ""]), ('one "two three"', ["one", "two three", ""]), ('one\\ two three"', ["one two", "three", ""]), ("one 'two\\' three'", ["one", "two' three", ""]), ("one\\\\two three", ["one\\two", "three", ""]), ("one\ntwo three", ["one", "two", "three", ""]), ("one \n two three", ["one", "two", "three", ""]), ("'one \n two' three", ["one \n two", "three", ""]), ("a=b", ["a", "=", "b", ""]), ("a = b", ["a", "=", "b", ""]), ("a==b", ["a", "==", "b", ""]), ("a == b", ["a", "==", "b", ""]), ]) def test_i_can_split_using_base_split_iterparser_class(text, expected): parser = BaseSplitIterParser("BaseSplitIterParser", 0) parser.reset_parser(None, text) res = [t.value for t in parser.split()] assert res == expected def test_i_can_test_split_iter_parser_indexes(): parser = BaseSplitIterParser("BaseSplitIterParser", 0) text = "one two \n three = ==()," parser.reset_parser(None, text) res = [] while parser.next_token(): res.append(parser.get_token()) assert res[0] == Token(TokenKind.WORD, "one", 0, 1, 1) assert res[1] == Token(TokenKind.WORD, "two", 4, 1, 5) assert res[2] == Token(TokenKind.WORD, "three", 10, 2, 2) assert res[3] == Token(TokenKind.EQUALS, "=", 16, 2, 8) assert res[4] == Token(TokenKind.EQUALSEQUALS, "==", 18, 2, 10) assert res[5] == Token(TokenKind.LPAR, "(", 20, 2, 12) assert res[6] == Token(TokenKind.RPAR, ")", 21, 2, 13) assert res[7] == Token(TokenKind.COMMA, ",", 22, 2, 14)