Refactored Parsers. Introduced BaseCustomGrammarParser. Renamed DefaultParser into DefConceptParser

This commit is contained in:
2020-10-02 04:45:47 +02:00
parent d100b7e8b3
commit e8f2705dcf
28 changed files with 1411 additions and 872 deletions
@@ -0,0 +1,227 @@
import pytest
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Keywords, Tokenizer, TokenKind
from parsers.BaseCustomGrammarParser import BaseCustomGrammarParser, SyntaxErrorNode, KeywordNotFound
from parsers.BaseParser import UnexpectedEofNode, UnexpectedTokenErrorNode
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
class TestBaseCustomGrammarParser(TestUsingMemoryBasedSheerka):
@staticmethod
def compare_results(actual, expected, compare_str=False):
resolved_expected = {}
for k, v in expected.items():
tokens = list(Tokenizer(v, yield_eof=False))
resolved_expected[k] = [tokens[0]] + tokens[2:]
def get_better_representation(value):
better_repr = {}
for k, tokens in value.items():
value = "".join([t.str_value if compare_str else t.repr_value for t in tokens[1:]])
better_repr[k] = [tokens[0].repr_value, value]
return better_repr
actual_to_compare = get_better_representation(actual)
expected_to_compare = get_better_representation(resolved_expected)
assert actual_to_compare == expected_to_compare
def init_parser(self, text):
sheerka, context = self.init_concepts()
parser = BaseCustomGrammarParser("TestBaseCustomLanguageParser", 0)
parser.reset_parser(context, ParserInput(text))
parser.parser_input.next_token(False) # do not skip starting whitespaces
return sheerka, context, parser
@pytest.mark.parametrize("text, expected", [
("when xxx yyy", {Keywords.WHEN: "when xxx yyy"}),
("when uuu vvv print xxx yyy", {Keywords.WHEN: "when uuu vvv ", Keywords.PRINT: "print xxx yyy"}),
("print xxx yyy when uuu vvv", {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy "}),
(" when xxx", {Keywords.WHEN: "when xxx"}),
])
def test_i_can_get_parts(self, text, expected):
sheerka, context, parser = self.init_parser(text)
res = parser.get_parts(["when", "print"])
self.compare_results(res, expected)
def test_i_can_get_parts_when_multilines(self):
text = """when
def func(x):
return x+1
func(a)
"""
expected = {Keywords.WHEN: "when def func(x):\n\treturn x+1\nfunc(a)\n"}
sheerka, context, parser = self.init_parser(text)
res = parser.get_parts(["when"])
self.compare_results(res, expected)
@pytest.mark.parametrize("text", [
"",
"no keyword",
"anything before when xxx print yyy",
])
def test_i_cannot_get_parts_when_no_keyword_found(self, text):
sheerka, context, parser = self.init_parser(text)
assert parser.get_parts(["when", "print"]) is None
assert len(parser.error_sink) == 1
assert isinstance(parser.error_sink[0], KeywordNotFound)
assert parser.error_sink[0].keywords == ['when', 'print']
def test_i_cannot_get_part_when_the_first_expected_token_is_incorrect(self):
sheerka, context, parser = self.init_parser("when xxx print yyy")
assert parser.get_parts(["when", "print"], Keywords.PRINT) is None
assert parser.error_sink == [UnexpectedTokenErrorNode(f"'print' keyword not found.",
"when",
[Keywords.PRINT])]
def test_i_can_detect_when_a_keyword_appears_several_times(self):
sheerka, context, parser = self.init_parser("print hello when True print True")
parser.get_parts(["print"])
assert len(parser.error_sink) == 1
assert isinstance(parser.error_sink[0], SyntaxErrorNode)
assert parser.error_sink[0].message == "Too many 'print' declarations."
@pytest.mark.parametrize("text", [
"print",
"print ",
"when xxx print",
"when xxx print ",
])
def test_i_can_detect_incorrect_end_of_file_after_keyword(self, text):
sheerka, context, parser = self.init_parser(text)
assert parser.get_parts(["print", "when"]) is not None
assert len(parser.error_sink) == 1
assert isinstance(parser.error_sink[0], UnexpectedEofNode)
assert parser.error_sink[0].message == "While parsing keyword 'print'."
def test_i_can_double_quoted_strings_are_expanded(self):
"""
When inside a double quote, the double quote is removed and its content it used as is.
It allows usage of keywords withing parts
:return:
"""
sheerka, context, parser = self.init_parser('print "when can be used" when True')
expected = {Keywords.PRINT: "print when can be used", Keywords.WHEN: "when True"}
res = parser.get_parts(["print", "when"])
self.compare_results(res, expected)
def test_single_quoted_strings_are_not_expanded(self):
sheerka, context, parser = self.init_parser("print 'when can be used' when True")
expected = {Keywords.PRINT: "print 'when can be used' ", Keywords.WHEN: "when True"}
res = parser.get_parts(["print", "when"])
self.compare_results(res, expected)
def test_i_can_manage_colon(self):
text = """when:
xxx
when
print
print:
xxx:
when
print
yyy
"""
sheerka, context, parser = self.init_parser(text)
expected = {Keywords.PRINT: "print xxx:\n when\n print\nyyy", Keywords.WHEN: "when xxx\nwhen\nprint"}
res = parser.get_parts(["print", "when"])
self.compare_results(res, expected, compare_str=True)
def test_indentation_is_normalized_when_using_colon(self):
text = """print:
xxx:
when
print
yyy
"""
sheerka, context, parser = self.init_parser(text)
expected = {Keywords.PRINT: "print xxx:\n when\n print\nyyy"}
res = parser.get_parts(["print", "when"])
self.compare_results(res, expected, compare_str=True)
def test_i_can_mix_parts_with_colon_and_parts_without_colon(self):
text = """when:
xxx
when
print
print xxx"""
sheerka, context, parser = self.init_parser(text)
expected = {Keywords.PRINT: "print xxx", Keywords.WHEN: "when xxx\nwhen\nprint"}
res = parser.get_parts(["print", "when"])
self.compare_results(res, expected, compare_str=True)
@pytest.mark.parametrize("text", [
"when:\nx x",
"when: \nx x",
])
def test_i_cannot_manage_colon_when_tab_is_missing(self, text):
sheerka, context, parser = self.init_parser(text)
assert parser.get_parts(["when"])
assert parser.error_sink == [UnexpectedTokenErrorNode("Indentation not found.", "x", [TokenKind.WHITESPACE])]
@pytest.mark.parametrize("text", [
"",
"\n",
" \n",
"x", # less than two characters
"\n\t"
])
def test_i_cannot_get_body_when_body_is_too_short(self, text):
sheerka, context, parser = self.init_parser("")
assert parser.get_body(list(Tokenizer(text, yield_eof=False))) is None
assert parser.error_sink == [SyntaxErrorNode(None, "Body is empty or too short.")]
def test_a_new_line_is_expected_when_get_body(self):
sheerka, context, parser = self.init_parser("")
assert parser.get_body(list(Tokenizer("not a newline", yield_eof=False))) is None
assert parser.error_sink == [UnexpectedTokenErrorNode("New line not found.", "not", [TokenKind.NEWLINE])]
@pytest.mark.parametrize("text", [
"\nx x",
" \nx x",
])
def test_tab_is_mandatory_after_new_line_when_get_body(self, text):
sheerka, context, parser = self.init_parser("")
assert parser.get_body(list(Tokenizer(text, yield_eof=False))) is None
assert parser.error_sink == [UnexpectedTokenErrorNode("Indentation not found.", "x", [TokenKind.WHITESPACE])]
def test_i_can_detect_missing_tab_when_get_body(self):
text = "\n\txxx\n\tyyy\nzzz"
sheerka, context, parser = self.init_parser("")
assert parser.get_body(list(Tokenizer(text, yield_eof=False))) is None
assert parser.error_sink == [UnexpectedTokenErrorNode("Indentation not found.", "zzz", [TokenKind.WHITESPACE])]
def test_i_can_detect_invalid_indentation_when_get_body(self):
sheerka, context, parser = self.init_parser("")
assert parser.get_body(list(Tokenizer("\n\t\txxx\n\tyyy", yield_eof=False))) is None
assert parser.error_sink == [SyntaxErrorNode(None, "Invalid indentation.")]
def test_i_can_get_body(self):
sheerka, context, parser = self.init_parser("")
res = parser.get_body(list(Tokenizer("\n\txxx\n\tyyyy", yield_eof=False)))
expected = list(Tokenizer("xxx\n yyyy", yield_eof=False))
expected[2].value = ""
assert [t.repr_value for t in res] == [t.repr_value for t in expected]
assert parser.error_sink == []