Files
Sheerka-Old/tests/parsers/test_BaseParser.py
T
2020-05-25 18:09:12 +02:00

85 lines
3.2 KiB
Python

import pytest
from core.tokenizer import Tokenizer, TokenKind, Token
from parsers.BaseParser import BaseParser, BaseSplitIterParser
@pytest.mark.parametrize("text, expected_text", [
("hello world", "hello world"),
("'hello' 'world'", "'hello' 'world'"),
("def concept a from", "def concept a from"),
("()[]{}1=1.5+-/*><&é", "()[]{}1=1.5+-/*><&é"),
("execute(c:concept_name:)", "execute(c:concept_name:)")
])
def test_i_can_get_text_from_tokens(text, expected_text):
tokens = list(Tokenizer(text))
assert BaseParser.get_text_from_tokens(tokens) == expected_text
@pytest.mark.parametrize("text, custom, expected_text", [
("execute(c:concept_name:)", {TokenKind.CONCEPT: lambda t: f"__C__{t.value[0]}"}, "execute(__C__concept_name)")
])
def test_i_can_get_text_from_tokens_with_custom_switcher(text, custom, expected_text):
tokens = list(Tokenizer(text))
assert BaseParser.get_text_from_tokens(tokens, custom) == expected_text
@pytest.mark.parametrize("text, expected", [
("", ["<eof>"]),
("one two -f --file", ["one", "two", "-f", "--file", "<eof>"]),
("one 'two three'", ["one", "two three", "<eof>"]),
('one "two three"', ["one", "two three", "<eof>"]),
('one\\ two three"', ["one two", "three", "<eof>"]),
("one 'two\\' three'", ["one", "two' three", "<eof>"]),
("one\\\\two three", ["one\\two", "three", "<eof>"]),
("one\ntwo three", ["one", "two", "three", "<eof>"]),
("one \n two three", ["one", "two", "three", "<eof>"]),
("'one \n two' three", ["one \n two", "three", "<eof>"]),
("a=b", ["a", "=", "b", "<eof>"]),
("a = b", ["a", "=", "b", "<eof>"]),
("a==b", ["a", "==", "b", "<eof>"]),
("a == b", ["a", "==", "b", "<eof>"]),
])
def test_i_can_split_using_base_split_iterparser_class(text, expected):
parser = BaseSplitIterParser("BaseSplitIterParser", 0)
parser.reset_parser(None, text)
res = [t.value for t in parser.split()]
assert res == expected
def test_i_can_test_split_iter_parser_indexes():
parser = BaseSplitIterParser("BaseSplitIterParser", 0)
text = "one two \n three = ==(),"
parser.reset_parser(None, text)
res = []
while parser.next_token():
res.append(parser.get_token())
assert res[0] == Token(TokenKind.WORD, "one", 0, 1, 1)
assert res[1] == Token(TokenKind.WORD, "two", 4, 1, 5)
assert res[2] == Token(TokenKind.WORD, "three", 10, 2, 2)
assert res[3] == Token(TokenKind.EQUALS, "=", 16, 2, 8)
assert res[4] == Token(TokenKind.EQUALSEQUALS, "==", 18, 2, 10)
assert res[5] == Token(TokenKind.LPAR, "(", 20, 2, 12)
assert res[6] == Token(TokenKind.RPAR, ")", 21, 2, 13)
assert res[7] == Token(TokenKind.COMMA, ",", 22, 2, 14)
@pytest.mark.parametrize("tokens, expected", [
(None, None),
([], (0, 0)),
(list(Tokenizer("")), (0, 0)),
(list(Tokenizer("", yield_eof=False)), (0, 0)),
(list(Tokenizer(" a")), (1, 1)),
(list(Tokenizer(" a", yield_eof=False)), (1, 1)),
(list(Tokenizer("a ")), (0, 0)),
(list(Tokenizer("a ", yield_eof=False)), (0, 0)),
(list(Tokenizer(" a ")), (1, 1)),
(list(Tokenizer(" a ", yield_eof=False)), (1, 1)),
])
def test_i_can_get_tokens_boundaries(tokens, expected):
assert BaseParser.get_tokens_boundaries(tokens) == expected