Files
Sheerka-Old/tests/parsers/test_BaseCustomGrammarParser.py
T

228 lines
8.9 KiB
Python

import pytest
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Keywords, Tokenizer, TokenKind
from parsers.BaseCustomGrammarParser import BaseCustomGrammarParser, SyntaxErrorNode, KeywordNotFound
from parsers.BaseParser import UnexpectedEofNode, UnexpectedTokenErrorNode
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
class TestBaseCustomGrammarParser(TestUsingMemoryBasedSheerka):
@staticmethod
def compare_results(actual, expected, compare_str=False):
resolved_expected = {}
for k, v in expected.items():
tokens = list(Tokenizer(v, yield_eof=False))
resolved_expected[k] = [tokens[0]] + tokens[2:]
def get_better_representation(value):
better_repr = {}
for k, tokens in value.items():
value = "".join([t.str_value if compare_str else t.repr_value for t in tokens[1:]])
better_repr[k] = [tokens[0].repr_value, value]
return better_repr
actual_to_compare = get_better_representation(actual)
expected_to_compare = get_better_representation(resolved_expected)
assert actual_to_compare == expected_to_compare
def init_parser(self, text):
sheerka, context = self.init_concepts()
parser = BaseCustomGrammarParser("TestBaseCustomLanguageParser", 0)
parser.reset_parser(context, ParserInput(text))
parser.parser_input.next_token(False) # do not skip starting whitespaces
return sheerka, context, parser
@pytest.mark.parametrize("text, expected", [
("when xxx yyy", {Keywords.WHEN: "when xxx yyy"}),
("when uuu vvv print xxx yyy", {Keywords.WHEN: "when uuu vvv ", Keywords.PRINT: "print xxx yyy"}),
("print xxx yyy when uuu vvv", {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy "}),
(" when xxx", {Keywords.WHEN: "when xxx"}),
])
def test_i_can_get_parts(self, text, expected):
sheerka, context, parser = self.init_parser(text)
res = parser.get_parts(["when", "print"])
self.compare_results(res, expected)
def test_i_can_get_parts_when_multilines(self):
text = """when
def func(x):
return x+1
func(a)
"""
expected = {Keywords.WHEN: "when def func(x):\n\treturn x+1\nfunc(a)\n"}
sheerka, context, parser = self.init_parser(text)
res = parser.get_parts(["when"])
self.compare_results(res, expected)
@pytest.mark.parametrize("text", [
"",
"no keyword",
"anything before when xxx print yyy",
])
def test_i_cannot_get_parts_when_no_keyword_found(self, text):
sheerka, context, parser = self.init_parser(text)
assert parser.get_parts(["when", "print"]) is None
assert len(parser.error_sink) == 1
assert isinstance(parser.error_sink[0], KeywordNotFound)
assert parser.error_sink[0].keywords == ['when', 'print']
def test_i_cannot_get_part_when_the_first_expected_token_is_incorrect(self):
sheerka, context, parser = self.init_parser("when xxx print yyy")
assert parser.get_parts(["when", "print"], Keywords.PRINT) is None
assert parser.error_sink == [UnexpectedTokenErrorNode(f"'print' keyword not found.",
"when",
[Keywords.PRINT])]
def test_i_can_detect_when_a_keyword_appears_several_times(self):
sheerka, context, parser = self.init_parser("print hello when True print True")
parser.get_parts(["print"])
assert len(parser.error_sink) == 1
assert isinstance(parser.error_sink[0], SyntaxErrorNode)
assert parser.error_sink[0].message == "Too many 'print' declarations."
@pytest.mark.parametrize("text", [
"print",
"print ",
"when xxx print",
"when xxx print ",
])
def test_i_can_detect_incorrect_end_of_file_after_keyword(self, text):
sheerka, context, parser = self.init_parser(text)
assert parser.get_parts(["print", "when"]) is not None
assert len(parser.error_sink) == 1
assert isinstance(parser.error_sink[0], UnexpectedEofNode)
assert parser.error_sink[0].message == "While parsing keyword 'print'."
def test_i_can_double_quoted_strings_are_expanded(self):
"""
When inside a double quote, the double quote is removed and its content it used as is.
It allows usage of keywords withing parts
:return:
"""
sheerka, context, parser = self.init_parser('print "when can be used" when True')
expected = {Keywords.PRINT: "print when can be used", Keywords.WHEN: "when True"}
res = parser.get_parts(["print", "when"])
self.compare_results(res, expected)
def test_single_quoted_strings_are_not_expanded(self):
sheerka, context, parser = self.init_parser("print 'when can be used' when True")
expected = {Keywords.PRINT: "print 'when can be used' ", Keywords.WHEN: "when True"}
res = parser.get_parts(["print", "when"])
self.compare_results(res, expected)
def test_i_can_manage_colon(self):
text = """when:
xxx
when
print
print:
xxx:
when
print
yyy
"""
sheerka, context, parser = self.init_parser(text)
expected = {Keywords.PRINT: "print xxx:\n when\n print\nyyy", Keywords.WHEN: "when xxx\nwhen\nprint"}
res = parser.get_parts(["print", "when"])
self.compare_results(res, expected, compare_str=True)
def test_indentation_is_normalized_when_using_colon(self):
text = """print:
xxx:
when
print
yyy
"""
sheerka, context, parser = self.init_parser(text)
expected = {Keywords.PRINT: "print xxx:\n when\n print\nyyy"}
res = parser.get_parts(["print", "when"])
self.compare_results(res, expected, compare_str=True)
def test_i_can_mix_parts_with_colon_and_parts_without_colon(self):
text = """when:
xxx
when
print
print xxx"""
sheerka, context, parser = self.init_parser(text)
expected = {Keywords.PRINT: "print xxx", Keywords.WHEN: "when xxx\nwhen\nprint"}
res = parser.get_parts(["print", "when"])
self.compare_results(res, expected, compare_str=True)
@pytest.mark.parametrize("text", [
"when:\nx x",
"when: \nx x",
])
def test_i_cannot_manage_colon_when_tab_is_missing(self, text):
sheerka, context, parser = self.init_parser(text)
assert parser.get_parts(["when"])
assert parser.error_sink == [UnexpectedTokenErrorNode("Indentation not found.", "x", [TokenKind.WHITESPACE])]
@pytest.mark.parametrize("text", [
"",
"\n",
" \n",
"x", # less than two characters
"\n\t"
])
def test_i_cannot_get_body_when_body_is_too_short(self, text):
sheerka, context, parser = self.init_parser("")
assert parser.get_body(list(Tokenizer(text, yield_eof=False))) is None
assert parser.error_sink == [SyntaxErrorNode(None, "Body is empty or too short.")]
def test_a_new_line_is_expected_when_get_body(self):
sheerka, context, parser = self.init_parser("")
assert parser.get_body(list(Tokenizer("not a newline", yield_eof=False))) is None
assert parser.error_sink == [UnexpectedTokenErrorNode("New line not found.", "not", [TokenKind.NEWLINE])]
@pytest.mark.parametrize("text", [
"\nx x",
" \nx x",
])
def test_tab_is_mandatory_after_new_line_when_get_body(self, text):
sheerka, context, parser = self.init_parser("")
assert parser.get_body(list(Tokenizer(text, yield_eof=False))) is None
assert parser.error_sink == [UnexpectedTokenErrorNode("Indentation not found.", "x", [TokenKind.WHITESPACE])]
def test_i_can_detect_missing_tab_when_get_body(self):
text = "\n\txxx\n\tyyy\nzzz"
sheerka, context, parser = self.init_parser("")
assert parser.get_body(list(Tokenizer(text, yield_eof=False))) is None
assert parser.error_sink == [UnexpectedTokenErrorNode("Indentation not found.", "zzz", [TokenKind.WHITESPACE])]
def test_i_can_detect_invalid_indentation_when_get_body(self):
sheerka, context, parser = self.init_parser("")
assert parser.get_body(list(Tokenizer("\n\t\txxx\n\tyyy", yield_eof=False))) is None
assert parser.error_sink == [SyntaxErrorNode(None, "Invalid indentation.")]
def test_i_can_get_body(self):
sheerka, context, parser = self.init_parser("")
res = parser.get_body(list(Tokenizer("\n\txxx\n\tyyyy", yield_eof=False)))
expected = list(Tokenizer("xxx\n yyyy", yield_eof=False))
expected[2].value = ""
assert [t.repr_value for t in res] == [t.repr_value for t in expected]
assert parser.error_sink == []