54e5681c5a
Fixed #110 : SheerkaDebugManager: add list_debug_settings Fixed #111 : SheerkaDebugManager: Implement ListDebugLogger Fixed #112 : SyaNodeParser: rewrite this parser Fixed #113 : Sheerka.: Add enable_parser_caching to disable parsers caching Fixed #114 : SyaNodeParser : Implement fast cache to resolve unrecognized tokens requests Fixed #115 : BnfNodeParser : Implement fast cache to resolve unrecognized tokens requests Fixed #116 : SequenceNodeParser : Implement fast cache to resolve unrecognized tokens requests Fixed #117 : ResolveMultiplePluralAmbiguityEvaluator: Resolve Multiple plural ambiguity
250 lines
10 KiB
Python
250 lines
10 KiB
Python
import pytest
|
|
|
|
from core.sheerka.services.SheerkaExecute import ParserInput
|
|
from core.tokenizer import Keywords, Tokenizer, TokenKind
|
|
from parsers.BaseCustomGrammarParser import BaseCustomGrammarParser, SyntaxErrorNode, KeywordNotFound
|
|
from parsers.BaseParser import UnexpectedEofParsingError, UnexpectedTokenParsingError
|
|
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
|
|
|
|
|
def get_tokens(items):
|
|
return [list(Tokenizer(item, yield_eof=False))[0] for item in items]
|
|
|
|
|
|
class TestBaseCustomGrammarParser(TestUsingMemoryBasedSheerka):
|
|
|
|
@staticmethod
|
|
def compare_results(actual, expected, compare_str=False):
|
|
resolved_expected = {}
|
|
for k, v in expected.items():
|
|
if isinstance(v, str):
|
|
# case like {Keywords.DEF_VAR: "def_var var1 def_var var2"}
|
|
tokens = list(Tokenizer(v, yield_eof=False))
|
|
resolved_expected[k] = [tokens[0]] + tokens[2:]
|
|
else:
|
|
# case like {Keywords.DEF_VAR: get_tokens(["def_var", "var1", "var2"])}
|
|
resolved_expected[k] = v
|
|
|
|
def get_better_representation(value):
|
|
better_repr = {}
|
|
for k, tokens in value.items():
|
|
value = "".join([t.str_value if compare_str else t.repr_value for t in tokens[1:]])
|
|
better_repr[k] = [tokens[0].repr_value, value]
|
|
return better_repr
|
|
|
|
# it's easier to compare two list of string
|
|
actual_to_compare = get_better_representation(actual)
|
|
expected_to_compare = get_better_representation(resolved_expected)
|
|
|
|
assert actual_to_compare == expected_to_compare
|
|
|
|
def init_parser(self, text):
|
|
sheerka, context = self.init_concepts()
|
|
|
|
parser = BaseCustomGrammarParser("TestBaseCustomLanguageParser", 0)
|
|
|
|
parser.reset_parser(context, ParserInput(text))
|
|
parser.parser_input.next_token(False) # do not skip starting whitespaces
|
|
|
|
return sheerka, context, parser
|
|
|
|
@pytest.mark.parametrize("text, strip_tokens, expected", [
|
|
("when xxx yyy", False, {Keywords.WHEN: "when xxx yyy"}),
|
|
("when uuu vvv print xxx yyy", False, {Keywords.WHEN: "when uuu vvv ", Keywords.PRINT: "print xxx yyy"}),
|
|
("print xxx yyy when uuu vvv", False, {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy "}),
|
|
(" when xxx", False, {Keywords.WHEN: "when xxx"}),
|
|
|
|
("when xxx yyy", True, {Keywords.WHEN: "when xxx yyy"}),
|
|
("when uuu vvv print xxx yyy", True, {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy"}),
|
|
("print xxx yyy when uuu vvv", True, {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy"}),
|
|
(" when xxx", True, {Keywords.WHEN: "when xxx"}),
|
|
])
|
|
def test_i_can_get_parts(self, text, strip_tokens, expected):
|
|
sheerka, context, parser = self.init_parser(text)
|
|
|
|
res = parser.get_parts(["when", "print"], strip_tokens=strip_tokens)
|
|
self.compare_results(res, expected)
|
|
|
|
def test_i_can_get_parts_when_multilines(self):
|
|
text = """when
|
|
def func(x):
|
|
\treturn x+1
|
|
func(a)
|
|
"""
|
|
expected = {Keywords.WHEN: "when def func(x):\n\treturn x+1\nfunc(a)\n"}
|
|
sheerka, context, parser = self.init_parser(text)
|
|
|
|
res = parser.get_parts(["when"])
|
|
self.compare_results(res, expected)
|
|
|
|
@pytest.mark.parametrize("text, allow_multiple, expected", [
|
|
("def_var var1 def_var var2", {}, {Keywords.DEF_VAR: "def_var var1 def_var var2"}),
|
|
("def_var var1 def_var var2", {"def_var"}, {Keywords.DEF_VAR: get_tokens(["def_var", "var1", "var2"])}),
|
|
("def_var x y z def_var var2", {"def_var"}, {Keywords.DEF_VAR: get_tokens(["def_var", "'x y z'", "var2"])}),
|
|
("def_var 'x y z' def_var var2", {"def_var"}, {Keywords.DEF_VAR: get_tokens(["def_var", "'x y z'", "var2"])}),
|
|
("def_var var1 def_var x y z def_var var2", {"def_var"},
|
|
{Keywords.DEF_VAR: get_tokens(["def_var", "var1", "'x y z'", "var2"])}),
|
|
])
|
|
def test_i_can_get_parts_when_allow_multiple_is_set(self, text, allow_multiple, expected):
|
|
sheerka, context, parser = self.init_parser(text)
|
|
|
|
res = parser.get_parts(["def_var"], allow_multiple=allow_multiple)
|
|
self.compare_results(res, expected)
|
|
|
|
@pytest.mark.parametrize("text", [
|
|
"",
|
|
"no keyword",
|
|
"anything before when xxx print yyy",
|
|
])
|
|
def test_i_cannot_get_parts_when_no_keyword_found(self, text):
|
|
sheerka, context, parser = self.init_parser(text)
|
|
|
|
assert parser.get_parts(["when", "print"]) is None
|
|
assert len(parser.error_sink) == 1
|
|
assert isinstance(parser.error_sink[0], KeywordNotFound)
|
|
assert parser.error_sink[0].keywords == ['when', 'print']
|
|
|
|
def test_i_cannot_get_part_when_the_first_expected_token_is_incorrect(self):
|
|
sheerka, context, parser = self.init_parser("when xxx print yyy")
|
|
|
|
assert parser.get_parts(["when", "print"], Keywords.PRINT) is None
|
|
assert parser.error_sink == [UnexpectedTokenParsingError(f"'print' keyword not found.",
|
|
"when",
|
|
[Keywords.PRINT])]
|
|
|
|
@pytest.mark.parametrize("text", [
|
|
"print",
|
|
"print ",
|
|
"when xxx print",
|
|
"when xxx print ",
|
|
])
|
|
def test_i_can_detect_incorrect_end_of_file_after_keyword(self, text):
|
|
sheerka, context, parser = self.init_parser(text)
|
|
|
|
assert parser.get_parts(["print", "when"]) is not None
|
|
assert len(parser.error_sink) == 1
|
|
assert isinstance(parser.error_sink[0], UnexpectedEofParsingError)
|
|
assert parser.error_sink[0].message == "while parsing keyword 'print'"
|
|
|
|
def test_i_can_double_quoted_strings_are_expanded(self):
|
|
"""
|
|
When inside a double quote, the double quote is removed and its content it used as is.
|
|
It allows usage of keywords withing parts
|
|
:return:
|
|
"""
|
|
sheerka, context, parser = self.init_parser('print "when can be used" when True')
|
|
expected = {Keywords.PRINT: "print when can be used", Keywords.WHEN: "when True"}
|
|
|
|
res = parser.get_parts(["print", "when"])
|
|
self.compare_results(res, expected)
|
|
|
|
def test_single_quoted_strings_are_not_expanded(self):
|
|
sheerka, context, parser = self.init_parser("print 'when can be used' when True")
|
|
expected = {Keywords.PRINT: "print 'when can be used' ", Keywords.WHEN: "when True"}
|
|
|
|
res = parser.get_parts(["print", "when"])
|
|
self.compare_results(res, expected)
|
|
|
|
def test_i_can_manage_colon(self):
|
|
text = """when:
|
|
xxx
|
|
when
|
|
print
|
|
print:
|
|
xxx:
|
|
when
|
|
print
|
|
yyy
|
|
"""
|
|
sheerka, context, parser = self.init_parser(text)
|
|
expected = {Keywords.PRINT: "print xxx:\n when\n print\nyyy", Keywords.WHEN: "when xxx\nwhen\nprint"}
|
|
|
|
res = parser.get_parts(["print", "when"])
|
|
self.compare_results(res, expected, compare_str=True)
|
|
|
|
def test_indentation_is_normalized_when_using_colon(self):
|
|
text = """print:
|
|
xxx:
|
|
when
|
|
print
|
|
yyy
|
|
"""
|
|
sheerka, context, parser = self.init_parser(text)
|
|
expected = {Keywords.PRINT: "print xxx:\n when\n print\nyyy"}
|
|
|
|
res = parser.get_parts(["print", "when"])
|
|
self.compare_results(res, expected, compare_str=True)
|
|
|
|
def test_i_can_mix_parts_with_colon_and_parts_without_colon(self):
|
|
text = """when:
|
|
xxx
|
|
when
|
|
print
|
|
print xxx"""
|
|
sheerka, context, parser = self.init_parser(text)
|
|
expected = {Keywords.PRINT: "print xxx", Keywords.WHEN: "when xxx\nwhen\nprint"}
|
|
|
|
res = parser.get_parts(["print", "when"])
|
|
self.compare_results(res, expected, compare_str=True)
|
|
|
|
@pytest.mark.parametrize("text", [
|
|
"when:\nx x",
|
|
"when: \nx x",
|
|
])
|
|
def test_i_cannot_manage_colon_when_tab_is_missing(self, text):
|
|
sheerka, context, parser = self.init_parser(text)
|
|
|
|
assert parser.get_parts(["when"])
|
|
assert parser.error_sink == [UnexpectedTokenParsingError("Indentation not found.", "x", [TokenKind.WHITESPACE])]
|
|
|
|
@pytest.mark.parametrize("text", [
|
|
"",
|
|
"\n",
|
|
" \n",
|
|
"x", # less than two characters
|
|
"\n\t"
|
|
])
|
|
def test_i_cannot_get_body_when_body_is_too_short(self, text):
|
|
sheerka, context, parser = self.init_parser("")
|
|
|
|
assert parser.get_body(list(Tokenizer(text, yield_eof=False))) is None
|
|
assert parser.error_sink == [SyntaxErrorNode(None, "Body is empty or too short.")]
|
|
|
|
def test_a_new_line_is_expected_when_get_body(self):
|
|
sheerka, context, parser = self.init_parser("")
|
|
|
|
assert parser.get_body(list(Tokenizer("not a newline", yield_eof=False))) is None
|
|
assert parser.error_sink == [UnexpectedTokenParsingError("New line not found.", "not", [TokenKind.NEWLINE])]
|
|
|
|
@pytest.mark.parametrize("text", [
|
|
"\nx x",
|
|
" \nx x",
|
|
])
|
|
def test_tab_is_mandatory_after_new_line_when_get_body(self, text):
|
|
sheerka, context, parser = self.init_parser("")
|
|
|
|
assert parser.get_body(list(Tokenizer(text, yield_eof=False))) is None
|
|
assert parser.error_sink == [UnexpectedTokenParsingError("Indentation not found.", "x", [TokenKind.WHITESPACE])]
|
|
|
|
def test_i_can_detect_missing_tab_when_get_body(self):
|
|
text = "\n\txxx\n\tyyy\nzzz"
|
|
|
|
sheerka, context, parser = self.init_parser("")
|
|
assert parser.get_body(list(Tokenizer(text, yield_eof=False))) is None
|
|
assert parser.error_sink == [
|
|
UnexpectedTokenParsingError("Indentation not found.", "zzz", [TokenKind.WHITESPACE])]
|
|
|
|
def test_i_can_detect_invalid_indentation_when_get_body(self):
|
|
sheerka, context, parser = self.init_parser("")
|
|
assert parser.get_body(list(Tokenizer("\n\t\txxx\n\tyyy", yield_eof=False))) is None
|
|
assert parser.error_sink == [SyntaxErrorNode(None, "Invalid indentation.")]
|
|
|
|
def test_i_can_get_body(self):
|
|
sheerka, context, parser = self.init_parser("")
|
|
res = parser.get_body(list(Tokenizer("\n\txxx\n\tyyyy", yield_eof=False)))
|
|
expected = list(Tokenizer("xxx\n yyyy", yield_eof=False))
|
|
expected[2].value = ""
|
|
|
|
assert [t.repr_value for t in res] == [t.repr_value for t in expected]
|
|
assert parser.error_sink == []
|