Fixed #9 : I can parse 'def concept'
This commit is contained in:
@@ -0,0 +1,95 @@
|
||||
import pytest
|
||||
|
||||
from parsers.ParserInput import ParserInput
|
||||
from parsers.parser_utils import parse_parts, strip_tokens
|
||||
from parsers.tokenizer import Keywords, Tokenizer
|
||||
|
||||
|
||||
def compare_results(actual, expected, compare_str=False):
|
||||
resolved_expected = {}
|
||||
for k, v in expected.items():
|
||||
if isinstance(v, str):
|
||||
# case like {Keywords.DEF_VAR: "def_var var1 def_var var2"}
|
||||
tokens = list(Tokenizer(v, yield_eof=False))
|
||||
resolved_expected[k] = [tokens[0]] + tokens[2:]
|
||||
else:
|
||||
# case like {Keywords.DEF_VAR: get_tokens(["def_var", "var1", "var2"])}
|
||||
resolved_expected[k] = v
|
||||
|
||||
def get_better_representation(value):
|
||||
better_repr = {}
|
||||
for k, tokens in value.items():
|
||||
value = "".join([t.str_value if compare_str else t.repr_value for t in tokens[1:]])
|
||||
better_repr[k] = [tokens[0].repr_value, value]
|
||||
return better_repr
|
||||
|
||||
# it's easier to compare two list of string
|
||||
actual_to_compare = get_better_representation(actual)
|
||||
expected_to_compare = get_better_representation(resolved_expected)
|
||||
|
||||
assert actual_to_compare == expected_to_compare
|
||||
|
||||
|
||||
def get_tokens(lst):
|
||||
"""
|
||||
Returns a list of Tokens, for a list of item
|
||||
:param lst:
|
||||
:type lst:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return list(Tokenizer(lst, yield_eof=False).remove_positions())
|
||||
|
||||
|
||||
@pytest.mark.parametrize("input_as_list, expected_as_list", [
|
||||
([" "], []),
|
||||
([" ", "one"], ["one"]),
|
||||
(["one", " "], ["one"]),
|
||||
([" ", "one", " "], ["one"]),
|
||||
|
||||
(["\n", "one"], ["one"]),
|
||||
(["one", "\n"], ["one"]),
|
||||
(["\n", "one", "\n"], ["one"]),
|
||||
|
||||
([" ", "\n", "one"], ["one"]),
|
||||
(["one", " ", "\n"], ["one"]),
|
||||
([" ", "\n", "one", " ", "\n"], ["one"]),
|
||||
|
||||
(["\n", " ", "one"], ["one"]),
|
||||
(["one", "\n", " "], ["one"]),
|
||||
(["\n", " ", "one", "\n", " "], ["one"]),
|
||||
|
||||
([" ", "\n", " ", "one"], ["one"]),
|
||||
(["one", " ", "\n", " "], ["one"]),
|
||||
([" ", "\n", " ", "one", " ", "\n", " "], ["one"]),
|
||||
|
||||
(["\n", " ", "\n", "one"], ["one"]),
|
||||
(["one", "\n", " ", "\n"], ["one"]),
|
||||
(["\n", " ", "\n", "one", "\n", " ", "\n"], ["one"]),
|
||||
|
||||
])
|
||||
def test_i_can_strip(input_as_list, expected_as_list):
|
||||
actual = strip_tokens(get_tokens(input_as_list)) # KSI 20201007 Why not use Tokenizer ?!! For perf ?
|
||||
expected = get_tokens(expected_as_list)
|
||||
assert actual == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("text, strip, expected", [
|
||||
("when xxx yyy", False, {Keywords.WHEN: "when xxx yyy"}),
|
||||
("when uuu vvv print xxx yyy", False, {Keywords.WHEN: "when uuu vvv ", Keywords.PRINT: "print xxx yyy"}),
|
||||
("print xxx yyy when uuu vvv", False, {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy "}),
|
||||
(" when xxx", False, {Keywords.WHEN: "when xxx"}),
|
||||
|
||||
("when xxx yyy", True, {Keywords.WHEN: "when xxx yyy"}),
|
||||
("when uuu vvv print xxx yyy", True, {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy"}),
|
||||
("print xxx yyy when uuu vvv", True, {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy"}),
|
||||
(" when xxx", True, {Keywords.WHEN: "when xxx"}),
|
||||
])
|
||||
def test_i_can_get_parts(text, strip, expected):
|
||||
parser_input = ParserInput(text)
|
||||
parser_input.init()
|
||||
parser_input.next_token()
|
||||
error_sink = []
|
||||
|
||||
res = parse_parts(parser_input, error_sink, ["when", "print"], strip=strip)
|
||||
compare_results(res, expected)
|
||||
Reference in New Issue
Block a user