Files
Sheerka-Old/tests/parsers/test_BaseCustomGrammarParser.py
kodjo 54e5681c5a Fixed #109 : Mix python and concept. List comprehension
Fixed #110 : SheerkaDebugManager: add list_debug_settings
Fixed #111 : SheerkaDebugManager: Implement ListDebugLogger
Fixed #112 : SyaNodeParser: rewrite this parser
Fixed #113 : Sheerka.: Add enable_parser_caching to disable parsers caching
Fixed #114 : SyaNodeParser : Implement fast cache to resolve unrecognized tokens requests
Fixed #115 : BnfNodeParser : Implement fast cache to resolve unrecognized tokens requests
Fixed #116 : SequenceNodeParser : Implement fast cache to resolve unrecognized tokens requests
Fixed #117 : ResolveMultiplePluralAmbiguityEvaluator: Resolve Multiple plural ambiguity
2021-09-06 11:51:50 +02:00

250 lines
10 KiB
Python

import pytest
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Keywords, Tokenizer, TokenKind
from parsers.BaseCustomGrammarParser import BaseCustomGrammarParser, SyntaxErrorNode, KeywordNotFound
from parsers.BaseParser import UnexpectedEofParsingError, UnexpectedTokenParsingError
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
def get_tokens(items):
return [list(Tokenizer(item, yield_eof=False))[0] for item in items]
class TestBaseCustomGrammarParser(TestUsingMemoryBasedSheerka):
@staticmethod
def compare_results(actual, expected, compare_str=False):
resolved_expected = {}
for k, v in expected.items():
if isinstance(v, str):
# case like {Keywords.DEF_VAR: "def_var var1 def_var var2"}
tokens = list(Tokenizer(v, yield_eof=False))
resolved_expected[k] = [tokens[0]] + tokens[2:]
else:
# case like {Keywords.DEF_VAR: get_tokens(["def_var", "var1", "var2"])}
resolved_expected[k] = v
def get_better_representation(value):
better_repr = {}
for k, tokens in value.items():
value = "".join([t.str_value if compare_str else t.repr_value for t in tokens[1:]])
better_repr[k] = [tokens[0].repr_value, value]
return better_repr
# it's easier to compare two list of string
actual_to_compare = get_better_representation(actual)
expected_to_compare = get_better_representation(resolved_expected)
assert actual_to_compare == expected_to_compare
def init_parser(self, text):
sheerka, context = self.init_concepts()
parser = BaseCustomGrammarParser("TestBaseCustomLanguageParser", 0)
parser.reset_parser(context, ParserInput(text))
parser.parser_input.next_token(False) # do not skip starting whitespaces
return sheerka, context, parser
@pytest.mark.parametrize("text, strip_tokens, expected", [
("when xxx yyy", False, {Keywords.WHEN: "when xxx yyy"}),
("when uuu vvv print xxx yyy", False, {Keywords.WHEN: "when uuu vvv ", Keywords.PRINT: "print xxx yyy"}),
("print xxx yyy when uuu vvv", False, {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy "}),
(" when xxx", False, {Keywords.WHEN: "when xxx"}),
("when xxx yyy", True, {Keywords.WHEN: "when xxx yyy"}),
("when uuu vvv print xxx yyy", True, {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy"}),
("print xxx yyy when uuu vvv", True, {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy"}),
(" when xxx", True, {Keywords.WHEN: "when xxx"}),
])
def test_i_can_get_parts(self, text, strip_tokens, expected):
sheerka, context, parser = self.init_parser(text)
res = parser.get_parts(["when", "print"], strip_tokens=strip_tokens)
self.compare_results(res, expected)
def test_i_can_get_parts_when_multilines(self):
text = """when
def func(x):
\treturn x+1
func(a)
"""
expected = {Keywords.WHEN: "when def func(x):\n\treturn x+1\nfunc(a)\n"}
sheerka, context, parser = self.init_parser(text)
res = parser.get_parts(["when"])
self.compare_results(res, expected)
@pytest.mark.parametrize("text, allow_multiple, expected", [
("def_var var1 def_var var2", {}, {Keywords.DEF_VAR: "def_var var1 def_var var2"}),
("def_var var1 def_var var2", {"def_var"}, {Keywords.DEF_VAR: get_tokens(["def_var", "var1", "var2"])}),
("def_var x y z def_var var2", {"def_var"}, {Keywords.DEF_VAR: get_tokens(["def_var", "'x y z'", "var2"])}),
("def_var 'x y z' def_var var2", {"def_var"}, {Keywords.DEF_VAR: get_tokens(["def_var", "'x y z'", "var2"])}),
("def_var var1 def_var x y z def_var var2", {"def_var"},
{Keywords.DEF_VAR: get_tokens(["def_var", "var1", "'x y z'", "var2"])}),
])
def test_i_can_get_parts_when_allow_multiple_is_set(self, text, allow_multiple, expected):
sheerka, context, parser = self.init_parser(text)
res = parser.get_parts(["def_var"], allow_multiple=allow_multiple)
self.compare_results(res, expected)
@pytest.mark.parametrize("text", [
"",
"no keyword",
"anything before when xxx print yyy",
])
def test_i_cannot_get_parts_when_no_keyword_found(self, text):
sheerka, context, parser = self.init_parser(text)
assert parser.get_parts(["when", "print"]) is None
assert len(parser.error_sink) == 1
assert isinstance(parser.error_sink[0], KeywordNotFound)
assert parser.error_sink[0].keywords == ['when', 'print']
def test_i_cannot_get_part_when_the_first_expected_token_is_incorrect(self):
sheerka, context, parser = self.init_parser("when xxx print yyy")
assert parser.get_parts(["when", "print"], Keywords.PRINT) is None
assert parser.error_sink == [UnexpectedTokenParsingError(f"'print' keyword not found.",
"when",
[Keywords.PRINT])]
@pytest.mark.parametrize("text", [
"print",
"print ",
"when xxx print",
"when xxx print ",
])
def test_i_can_detect_incorrect_end_of_file_after_keyword(self, text):
sheerka, context, parser = self.init_parser(text)
assert parser.get_parts(["print", "when"]) is not None
assert len(parser.error_sink) == 1
assert isinstance(parser.error_sink[0], UnexpectedEofParsingError)
assert parser.error_sink[0].message == "while parsing keyword 'print'"
def test_i_can_double_quoted_strings_are_expanded(self):
"""
When inside a double quote, the double quote is removed and its content it used as is.
It allows usage of keywords withing parts
:return:
"""
sheerka, context, parser = self.init_parser('print "when can be used" when True')
expected = {Keywords.PRINT: "print when can be used", Keywords.WHEN: "when True"}
res = parser.get_parts(["print", "when"])
self.compare_results(res, expected)
def test_single_quoted_strings_are_not_expanded(self):
sheerka, context, parser = self.init_parser("print 'when can be used' when True")
expected = {Keywords.PRINT: "print 'when can be used' ", Keywords.WHEN: "when True"}
res = parser.get_parts(["print", "when"])
self.compare_results(res, expected)
def test_i_can_manage_colon(self):
text = """when:
xxx
when
print
print:
xxx:
when
print
yyy
"""
sheerka, context, parser = self.init_parser(text)
expected = {Keywords.PRINT: "print xxx:\n when\n print\nyyy", Keywords.WHEN: "when xxx\nwhen\nprint"}
res = parser.get_parts(["print", "when"])
self.compare_results(res, expected, compare_str=True)
def test_indentation_is_normalized_when_using_colon(self):
text = """print:
xxx:
when
print
yyy
"""
sheerka, context, parser = self.init_parser(text)
expected = {Keywords.PRINT: "print xxx:\n when\n print\nyyy"}
res = parser.get_parts(["print", "when"])
self.compare_results(res, expected, compare_str=True)
def test_i_can_mix_parts_with_colon_and_parts_without_colon(self):
text = """when:
xxx
when
print
print xxx"""
sheerka, context, parser = self.init_parser(text)
expected = {Keywords.PRINT: "print xxx", Keywords.WHEN: "when xxx\nwhen\nprint"}
res = parser.get_parts(["print", "when"])
self.compare_results(res, expected, compare_str=True)
@pytest.mark.parametrize("text", [
"when:\nx x",
"when: \nx x",
])
def test_i_cannot_manage_colon_when_tab_is_missing(self, text):
sheerka, context, parser = self.init_parser(text)
assert parser.get_parts(["when"])
assert parser.error_sink == [UnexpectedTokenParsingError("Indentation not found.", "x", [TokenKind.WHITESPACE])]
@pytest.mark.parametrize("text", [
"",
"\n",
" \n",
"x", # less than two characters
"\n\t"
])
def test_i_cannot_get_body_when_body_is_too_short(self, text):
sheerka, context, parser = self.init_parser("")
assert parser.get_body(list(Tokenizer(text, yield_eof=False))) is None
assert parser.error_sink == [SyntaxErrorNode(None, "Body is empty or too short.")]
def test_a_new_line_is_expected_when_get_body(self):
sheerka, context, parser = self.init_parser("")
assert parser.get_body(list(Tokenizer("not a newline", yield_eof=False))) is None
assert parser.error_sink == [UnexpectedTokenParsingError("New line not found.", "not", [TokenKind.NEWLINE])]
@pytest.mark.parametrize("text", [
"\nx x",
" \nx x",
])
def test_tab_is_mandatory_after_new_line_when_get_body(self, text):
sheerka, context, parser = self.init_parser("")
assert parser.get_body(list(Tokenizer(text, yield_eof=False))) is None
assert parser.error_sink == [UnexpectedTokenParsingError("Indentation not found.", "x", [TokenKind.WHITESPACE])]
def test_i_can_detect_missing_tab_when_get_body(self):
text = "\n\txxx\n\tyyy\nzzz"
sheerka, context, parser = self.init_parser("")
assert parser.get_body(list(Tokenizer(text, yield_eof=False))) is None
assert parser.error_sink == [
UnexpectedTokenParsingError("Indentation not found.", "zzz", [TokenKind.WHITESPACE])]
def test_i_can_detect_invalid_indentation_when_get_body(self):
sheerka, context, parser = self.init_parser("")
assert parser.get_body(list(Tokenizer("\n\t\txxx\n\tyyy", yield_eof=False))) is None
assert parser.error_sink == [SyntaxErrorNode(None, "Invalid indentation.")]
def test_i_can_get_body(self):
sheerka, context, parser = self.init_parser("")
res = parser.get_body(list(Tokenizer("\n\txxx\n\tyyyy", yield_eof=False)))
expected = list(Tokenizer("xxx\n yyyy", yield_eof=False))
expected[2].value = ""
assert [t.repr_value for t in res] == [t.repr_value for t in expected]
assert parser.error_sink == []