Refactored Parsers. Introduced BaseCustomGrammarParser. Renamed DefaultParser into DefConceptParser

This commit is contained in:
2020-10-02 04:45:47 +02:00
parent d100b7e8b3
commit e8f2705dcf
28 changed files with 1411 additions and 872 deletions
@@ -0,0 +1,227 @@
import pytest
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Keywords, Tokenizer, TokenKind
from parsers.BaseCustomGrammarParser import BaseCustomGrammarParser, SyntaxErrorNode, KeywordNotFound
from parsers.BaseParser import UnexpectedEofNode, UnexpectedTokenErrorNode
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
class TestBaseCustomGrammarParser(TestUsingMemoryBasedSheerka):
@staticmethod
def compare_results(actual, expected, compare_str=False):
resolved_expected = {}
for k, v in expected.items():
tokens = list(Tokenizer(v, yield_eof=False))
resolved_expected[k] = [tokens[0]] + tokens[2:]
def get_better_representation(value):
better_repr = {}
for k, tokens in value.items():
value = "".join([t.str_value if compare_str else t.repr_value for t in tokens[1:]])
better_repr[k] = [tokens[0].repr_value, value]
return better_repr
actual_to_compare = get_better_representation(actual)
expected_to_compare = get_better_representation(resolved_expected)
assert actual_to_compare == expected_to_compare
def init_parser(self, text):
sheerka, context = self.init_concepts()
parser = BaseCustomGrammarParser("TestBaseCustomLanguageParser", 0)
parser.reset_parser(context, ParserInput(text))
parser.parser_input.next_token(False) # do not skip starting whitespaces
return sheerka, context, parser
@pytest.mark.parametrize("text, expected", [
("when xxx yyy", {Keywords.WHEN: "when xxx yyy"}),
("when uuu vvv print xxx yyy", {Keywords.WHEN: "when uuu vvv ", Keywords.PRINT: "print xxx yyy"}),
("print xxx yyy when uuu vvv", {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy "}),
(" when xxx", {Keywords.WHEN: "when xxx"}),
])
def test_i_can_get_parts(self, text, expected):
sheerka, context, parser = self.init_parser(text)
res = parser.get_parts(["when", "print"])
self.compare_results(res, expected)
def test_i_can_get_parts_when_multilines(self):
text = """when
def func(x):
return x+1
func(a)
"""
expected = {Keywords.WHEN: "when def func(x):\n\treturn x+1\nfunc(a)\n"}
sheerka, context, parser = self.init_parser(text)
res = parser.get_parts(["when"])
self.compare_results(res, expected)
@pytest.mark.parametrize("text", [
"",
"no keyword",
"anything before when xxx print yyy",
])
def test_i_cannot_get_parts_when_no_keyword_found(self, text):
sheerka, context, parser = self.init_parser(text)
assert parser.get_parts(["when", "print"]) is None
assert len(parser.error_sink) == 1
assert isinstance(parser.error_sink[0], KeywordNotFound)
assert parser.error_sink[0].keywords == ['when', 'print']
def test_i_cannot_get_part_when_the_first_expected_token_is_incorrect(self):
sheerka, context, parser = self.init_parser("when xxx print yyy")
assert parser.get_parts(["when", "print"], Keywords.PRINT) is None
assert parser.error_sink == [UnexpectedTokenErrorNode(f"'print' keyword not found.",
"when",
[Keywords.PRINT])]
def test_i_can_detect_when_a_keyword_appears_several_times(self):
sheerka, context, parser = self.init_parser("print hello when True print True")
parser.get_parts(["print"])
assert len(parser.error_sink) == 1
assert isinstance(parser.error_sink[0], SyntaxErrorNode)
assert parser.error_sink[0].message == "Too many 'print' declarations."
@pytest.mark.parametrize("text", [
"print",
"print ",
"when xxx print",
"when xxx print ",
])
def test_i_can_detect_incorrect_end_of_file_after_keyword(self, text):
sheerka, context, parser = self.init_parser(text)
assert parser.get_parts(["print", "when"]) is not None
assert len(parser.error_sink) == 1
assert isinstance(parser.error_sink[0], UnexpectedEofNode)
assert parser.error_sink[0].message == "While parsing keyword 'print'."
def test_i_can_double_quoted_strings_are_expanded(self):
"""
When inside a double quote, the double quote is removed and its content it used as is.
It allows usage of keywords withing parts
:return:
"""
sheerka, context, parser = self.init_parser('print "when can be used" when True')
expected = {Keywords.PRINT: "print when can be used", Keywords.WHEN: "when True"}
res = parser.get_parts(["print", "when"])
self.compare_results(res, expected)
def test_single_quoted_strings_are_not_expanded(self):
sheerka, context, parser = self.init_parser("print 'when can be used' when True")
expected = {Keywords.PRINT: "print 'when can be used' ", Keywords.WHEN: "when True"}
res = parser.get_parts(["print", "when"])
self.compare_results(res, expected)
def test_i_can_manage_colon(self):
text = """when:
xxx
when
print
print:
xxx:
when
print
yyy
"""
sheerka, context, parser = self.init_parser(text)
expected = {Keywords.PRINT: "print xxx:\n when\n print\nyyy", Keywords.WHEN: "when xxx\nwhen\nprint"}
res = parser.get_parts(["print", "when"])
self.compare_results(res, expected, compare_str=True)
def test_indentation_is_normalized_when_using_colon(self):
text = """print:
xxx:
when
print
yyy
"""
sheerka, context, parser = self.init_parser(text)
expected = {Keywords.PRINT: "print xxx:\n when\n print\nyyy"}
res = parser.get_parts(["print", "when"])
self.compare_results(res, expected, compare_str=True)
def test_i_can_mix_parts_with_colon_and_parts_without_colon(self):
text = """when:
xxx
when
print
print xxx"""
sheerka, context, parser = self.init_parser(text)
expected = {Keywords.PRINT: "print xxx", Keywords.WHEN: "when xxx\nwhen\nprint"}
res = parser.get_parts(["print", "when"])
self.compare_results(res, expected, compare_str=True)
@pytest.mark.parametrize("text", [
"when:\nx x",
"when: \nx x",
])
def test_i_cannot_manage_colon_when_tab_is_missing(self, text):
sheerka, context, parser = self.init_parser(text)
assert parser.get_parts(["when"])
assert parser.error_sink == [UnexpectedTokenErrorNode("Indentation not found.", "x", [TokenKind.WHITESPACE])]
@pytest.mark.parametrize("text", [
"",
"\n",
" \n",
"x", # less than two characters
"\n\t"
])
def test_i_cannot_get_body_when_body_is_too_short(self, text):
sheerka, context, parser = self.init_parser("")
assert parser.get_body(list(Tokenizer(text, yield_eof=False))) is None
assert parser.error_sink == [SyntaxErrorNode(None, "Body is empty or too short.")]
def test_a_new_line_is_expected_when_get_body(self):
sheerka, context, parser = self.init_parser("")
assert parser.get_body(list(Tokenizer("not a newline", yield_eof=False))) is None
assert parser.error_sink == [UnexpectedTokenErrorNode("New line not found.", "not", [TokenKind.NEWLINE])]
@pytest.mark.parametrize("text", [
"\nx x",
" \nx x",
])
def test_tab_is_mandatory_after_new_line_when_get_body(self, text):
sheerka, context, parser = self.init_parser("")
assert parser.get_body(list(Tokenizer(text, yield_eof=False))) is None
assert parser.error_sink == [UnexpectedTokenErrorNode("Indentation not found.", "x", [TokenKind.WHITESPACE])]
def test_i_can_detect_missing_tab_when_get_body(self):
text = "\n\txxx\n\tyyy\nzzz"
sheerka, context, parser = self.init_parser("")
assert parser.get_body(list(Tokenizer(text, yield_eof=False))) is None
assert parser.error_sink == [UnexpectedTokenErrorNode("Indentation not found.", "zzz", [TokenKind.WHITESPACE])]
def test_i_can_detect_invalid_indentation_when_get_body(self):
sheerka, context, parser = self.init_parser("")
assert parser.get_body(list(Tokenizer("\n\t\txxx\n\tyyy", yield_eof=False))) is None
assert parser.error_sink == [SyntaxErrorNode(None, "Invalid indentation.")]
def test_i_can_get_body(self):
sheerka, context, parser = self.init_parser("")
res = parser.get_body(list(Tokenizer("\n\txxx\n\tyyyy", yield_eof=False)))
expected = list(Tokenizer("xxx\n yyyy", yield_eof=False))
expected[2].value = ""
assert [t.repr_value for t in res] == [t.repr_value for t in expected]
assert parser.error_sink == []
+1 -1
View File
@@ -33,7 +33,7 @@ def update_concepts_ids(sheerka, parsing_expression):
update_concepts_ids(sheerka, pe)
eof_token = Token(TokenKind.EOF, "", 0, 0, 0)
eof_token = "<EOF>"
class TestBnfParser(TestUsingMemoryBasedSheerka):
@@ -7,10 +7,11 @@ from core.concept import DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF, Concept, CV
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Keywords, Tokenizer, LexerError
from parsers.BaseNodeParser import SCWC
from parsers.BaseParser import NotInitializedNode, UnexpectedEofNode
from parsers.BnfNodeParser import OrderedChoice, ConceptExpression, StrMatch, Sequence
from parsers.BnfParser import BnfParser
from parsers.DefaultParser import DefaultParser, NameNode, SyntaxErrorNode, CannotHandleErrorNode
from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode
from parsers.DefConceptParser import DefConceptParser, NameNode, SyntaxErrorNode
from parsers.DefConceptParser import UnexpectedTokenErrorNode, DefConceptNode
from parsers.FunctionParser import FunctionParser
from parsers.PythonParser import PythonParser, PythonNode
@@ -48,7 +49,7 @@ def get_concept_part(part):
if isinstance(part, str):
node = PythonNode(part.strip(), ast.parse(part.strip(), mode="eval"))
return ReturnValueConcept(
who="parsers.Default",
who="parsers.DefConcept",
status=True,
value=ParserResultConcept(
source=part,
@@ -59,7 +60,7 @@ def get_concept_part(part):
# node = PythonNode(part.strip(), ast.parse(part.strip(), mode="eval"))
nodes = compute_expected_array({}, part.source, [SCWC(part.first, part.last, *part.content)])
return ReturnValueConcept(
who="parsers.Default",
who="parsers.DefConcept",
status=True,
value=ParserResultConcept(
source=part.source,
@@ -70,7 +71,7 @@ def get_concept_part(part):
if isinstance(part, PN):
node = PythonNode(part.source.strip(), ast.parse(part.source.strip(), mode=part.mode))
return ReturnValueConcept(
who="parsers.Default",
who="parsers.DefConcept",
status=True,
value=ParserResultConcept(
source=part.source,
@@ -79,7 +80,7 @@ def get_concept_part(part):
if isinstance(part, PythonNode):
return ReturnValueConcept(
who="parsers.Default",
who="parsers.DefConcept",
status=True,
value=ParserResultConcept(
source=part.source,
@@ -110,13 +111,26 @@ class FN:
content: list
class TestDefaultParser(TestUsingMemoryBasedSheerka):
class TestDefConceptParser(TestUsingMemoryBasedSheerka):
def init_parser(self, *concepts):
sheerka, context, *updated = self.init_concepts(*concepts, singleton=True)
parser = DefaultParser()
parser = DefConceptParser()
return sheerka, context, parser, *updated
@pytest.mark.parametrize("text, error", [
("concept", UnexpectedTokenErrorNode("'def' keyword not found.", "concept", [Keywords.DEF])),
("hello word", UnexpectedTokenErrorNode("'def' keyword not found.", "hello", [Keywords.DEF])),
("def hello", UnexpectedTokenErrorNode("'concept' keyword not found.", "hello", [Keywords.CONCEPT])),
])
def test_i_can_detect_not_for_me(self, text, error):
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.NOT_FOR_ME)
assert res.value.reason == [error]
@pytest.mark.parametrize("text, expected", [
("def concept hello", get_def_concept(name="hello")),
("def concept hello ", get_def_concept(name="hello")),
@@ -124,13 +138,11 @@ class TestDefaultParser(TestUsingMemoryBasedSheerka):
("def concept a+b", get_def_concept(name="a + b")),
("def concept 'a+b'+c", get_def_concept(name="'a+b' + c")),
("def concept 'as if'", get_def_concept(name="'as if'")),
("def concept 'as' if", get_def_concept(name="'as if'")),
("def concept hello as 'hello'", get_def_concept(name="hello", body="'hello'")),
("def concept hello as 1", get_def_concept(name="hello", body="1")),
("def concept hello as 1 + 1", get_def_concept(name="hello", body="1 + 1")),
("def concept 'as' if", get_def_concept(name="'as' if")),
('def concept "as if"', get_def_concept(name="as if")),
])
def test_i_can_parse_def_concept(self, text, expected):
sheerka, context, parser = self.init_parser()
def test_i_can_parse_def_concept_name(self, text, expected):
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
node = res.value.value
@@ -140,6 +152,113 @@ class TestDefaultParser(TestUsingMemoryBasedSheerka):
assert isinstance(res.value, ParserResultConcept)
assert node == expected
def test_name_is_mandatory(self):
text = "def concept as 'hello'"
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
return_value = res.value
assert not res.status
assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
assert isinstance(return_value.body[0], SyntaxErrorNode)
assert return_value.body[0].message == "Name is mandatory"
@pytest.mark.parametrize("text", [
"def concept hello\nmy friend",
"def concept hello \nmy friend",
"def concept hello\n my friend",
"def concept hello \n my friend",
"def concept hello from hello\nmy friend",
"def concept hello from def hello\nmy friend",
"def concept hello from bnf hello\nmy friend",
"def concept hello from:\n\thello\nmy friend",
"def concept hello from def:\n\thello\nmy friend",
"def concept hello from bnf:\n\thello\nmy friend",
])
def test_new_line_is_not_allowed_in_the_name(self, text):
text = "def concept hello \n my friend as 'hello'"
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
return_value = res.value
assert not res.status
assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
assert return_value.body == [SyntaxErrorNode(None, "Newline are not allowed in name.")]
def test_concept_keyword_is_mandatory_but_the_concept_is_recognized(self):
text = "def hello as a where b pre c post d"
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
return_value = res.value
assert not res.status
assert sheerka.isinstance(return_value, BuiltinConcepts.NOT_FOR_ME)
assert isinstance(return_value.reason[0], UnexpectedTokenErrorNode)
assert return_value.reason[0].message == "'concept' keyword not found."
assert return_value.reason[0].expected_tokens == [Keywords.CONCEPT]
assert return_value.reason[0].token.value == "hello"
def test_i_can_detect_empty_declaration(self):
sheerka, context, parser, *concepts = self.init_parser()
text = "def concept foo as where True"
res = parser.parse(context, ParserInput(text))
error = res.body.body[0]
assert not res.status
assert sheerka.isinstance(res.value, BuiltinConcepts.ERROR)
assert isinstance(error, SyntaxErrorNode)
assert error.message == "Empty 'as' declaration."
def test_empty_parts_are_not_initialized(self):
sheerka, context, parser, *concepts = self.init_parser()
text = "def concept foo"
res = parser.parse(context, ParserInput(text))
parser_result = res.body
node = res.body.body
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert isinstance(node, DefConceptNode)
assert node.body == NotInitializedNode()
assert node.where == NotInitializedNode()
assert node.pre == NotInitializedNode()
assert node.post == NotInitializedNode()
assert node.ret == NotInitializedNode()
@pytest.mark.parametrize("part", [
"as",
"pre",
"post",
"ret",
"where"
])
def test_i_can_parse_def_concept_parts(self, part):
sheerka, context, parser, *concepts = self.init_parser()
text = "def concept foo " + part + " True"
res = parser.parse(context, ParserInput(text))
node = res.value.value
assert res.status
assert res.who == parser.name
assert res.value.source == text
assert isinstance(res.value, ParserResultConcept)
part_mapping = "body" if part == "as" else part
args = {part_mapping: get_concept_part("True")}
expected = get_def_concept("foo", **args)
assert node == expected
def test_i_can_detect_error_in_declaration(self):
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput("def concept hello where 1+"))
return_value = res.value
assert not res.status
assert sheerka.isinstance(return_value, BuiltinConcepts.TOO_MANY_ERRORS)
def test_i_can_parse_complex_def_concept_statement(self):
text = """def concept a mult b
where a,b
@@ -148,7 +267,7 @@ post isinstance(res, a)
as res = a * b
ret a if isinstance(a, Concept) else self
"""
sheerka, context, parser = self.init_parser()
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
return_value = res.value
expected_concept = get_def_concept(
@@ -177,7 +296,7 @@ func(a)
body=PN("def func(x):\n return x+1\nfunc(a)\n", "exec")
)
sheerka, context, parser = self.init_parser()
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
return_value = res.value
@@ -199,7 +318,7 @@ def concept add one to a as:
ast.parse("def func(x):\n return x+1\nfunc(a)", mode="exec"))
)
sheerka, context, parser = self.init_parser()
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
return_value = res.value
@@ -208,156 +327,17 @@ def concept add one to a as:
assert return_value.value == expected_concept
@pytest.mark.parametrize("text", [
"def concept foo as:\npass",
"def concept foo where:\npass",
"def concept foo pre:\npass",
"def concept foo post:\npass",
"def concept foo from:\nanother definition",
"def concept foo from def:\nanother definition",
"def concept foo from bnf:\n'another' 'definition'",
"def concept name from bnf",
"def concept name from bnf ",
"def concept name from bnf as True",
])
def test_indentation_is_mandatory_after_a_colon(self, text):
sheerka, context, parser = self.init_parser()
def test_i_cannot_parse_empty_bnf_definition(self, text):
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
return_value = res.value
error = res.body
assert not res.status
assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
assert isinstance(return_value.body[0], SyntaxErrorNode)
assert return_value.body[0].message == "Indentation not found."
@pytest.mark.parametrize("text", [
"def concept plus from:\n\ta plus b",
"def concept plus from def:\n\ta plus b",
# space before the colon
"def concept plus from :\n\ta plus b",
"def concept plus from def :\n\ta plus b",
# space after the colon
"def concept plus from: \n\ta plus b",
"def concept plus from def: \n\ta plus b",
])
def test_i_can_use_colon_and_definition_together(self, text):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
defined_concept = res.body.body
defined_concept_tokens = [t.repr_value for t in defined_concept.definition.tokens]
assert res.status
assert defined_concept.definition_type == DEFINITION_TYPE_DEF
assert defined_concept_tokens == [t.repr_value for t in Tokenizer("a plus b", yield_eof=False)]
@pytest.mark.parametrize("text", [
"def concept plus from bnf:\n\t'a' 'plus' 'b'",
"def concept plus from bnf :\n\t'a' 'plus' 'b'",
"def concept plus from bnf: \n\t'a' 'plus' 'b'",
])
def test_i_can_use_colon_and_bnf_definition_together(self, text):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
defined_concept = res.body.body
assert res.status
assert defined_concept.definition.status
assert defined_concept.definition.body.body == Sequence(StrMatch("a"), StrMatch("plus"), StrMatch("b"))
def test_i_can_use_colon_to_protect_keyword(self):
text = """
def concept today as:
from datetime import date
today = date.today()
from:
give me the date !
"""
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
defined_concept = res.body.body
defined_concept_tokens = [t.repr_value for t in defined_concept.definition.tokens]
assert res.status
assert defined_concept.definition_type == DEFINITION_TYPE_DEF
assert defined_concept_tokens == [t.repr_value for t in Tokenizer("give me the date !", yield_eof=False)]
assert defined_concept.body.status
def test_i_can_use_colon_to_protect_keyword_2(self):
text = """
def concept today as:
from datetime import date
today = date.today()
from give me the date !
"""
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
defined_concept = res.body.body
defined_concept_tokens = [t.repr_value for t in defined_concept.definition.tokens]
assert res.status
assert defined_concept.definition_type == DEFINITION_TYPE_DEF
assert defined_concept_tokens == [t.repr_value for t in Tokenizer("give me the date !", yield_eof=False)]
assert defined_concept.body.status
def test_name_is_mandatory(self):
text = "def concept as 'hello'"
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
return_value = res.value
assert not res.status
assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
assert isinstance(return_value.body[0], SyntaxErrorNode)
assert return_value.body[0].message == "Name is mandatory"
def test_concept_keyword_is_mandatory_but_the_concept_is_recognized(self):
text = "def hello as a where b pre c post d"
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
return_value = res.value
assert not res.status
assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
assert isinstance(return_value.body[0], UnexpectedTokenErrorNode)
assert return_value.body[0].message == "Syntax error."
assert return_value.body[0].expected_tokens == [Keywords.CONCEPT]
@pytest.mark.parametrize("text", [
"def concept hello where 1+",
"def concept hello pre 1+",
"def concept hello post 1+",
"def concept hello as 1+"
])
def test_i_can_detect_error_in_declaration(self, text):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
return_value = res.value
assert not res.status
assert sheerka.isinstance(return_value, BuiltinConcepts.TOO_MANY_ERRORS)
@pytest.mark.parametrize("text", [
"def concept hello\nmy friend",
"def concept hello \nmy friend",
"def concept hello\n my friend",
"def concept hello \n my friend",
"def concept hello from hello\nmy friend",
"def concept hello from def hello\nmy friend",
"def concept hello from bnf hello\nmy friend",
"def concept hello from:\n\thello\nmy friend",
"def concept hello from def:\n\thello\nmy friend",
"def concept hello from bnf:\n\thello\nmy friend",
])
def test_new_line_is_not_allowed_in_the_name(self, text):
text = "def concept hello \n my friend as 'hello'"
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
return_value = res.value
assert not res.status
assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
assert return_value.body == [SyntaxErrorNode([], "Newline are not allowed in name.")]
assert sheerka.isinstance(error, BuiltinConcepts.ERROR)
assert error.body == [SyntaxErrorNode([], "Empty 'bnf' declaration")]
def test_i_can_parse_def_concept_from_bnf(self):
text = "def concept name from bnf a_concept | 'a_string' as __definition[0]"
@@ -383,35 +363,40 @@ from give me the date !
assert not parser.has_error
@pytest.mark.parametrize("text", [
'def concept "def concept x"',
'def concept "def concept x" as x',
"def concept plus from bnf:\n\t'a' 'plus' 'b'",
"def concept plus from bnf :\n\t'a' 'plus' 'b'",
"def concept plus from bnf: \n\t'a' 'plus' 'b'",
])
def test_i_can_use_double_quotes_to_protect_keywords(self, text):
sheerka, context, parser = self.init_parser()
def test_i_can_use_colon_and_bnf_definition_together(self, text):
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
concept_defined = res.value.value
defined_concept = res.body.body
assert res.status
assert concept_defined.name.tokens == list(Tokenizer("def concept x", yield_eof=False))
assert defined_concept.definition.status
assert defined_concept.definition.body.body == Sequence(StrMatch("a"), StrMatch("plus"), StrMatch("b"))
@pytest.mark.parametrize("text", [
"def concept name from bnf as here is my body",
"def concept name from def as here is my body",
"def concept name from as here is my body"
@pytest.mark.parametrize("text, error", [
("def concept name from def as True", SyntaxErrorNode([], "Empty 'from' declaration.")),
("def concept name from def", SyntaxErrorNode([], "Empty 'from' declaration.")),
("def concept name from def ", SyntaxErrorNode([], "Empty 'from' declaration.")),
("def concept name from as True", SyntaxErrorNode([], "Empty 'from' declaration.")),
("def concept name from", UnexpectedEofNode("While parsing keyword 'from'.")),
("def concept name from ", UnexpectedEofNode("While parsing keyword 'from'.")),
])
def test_i_can_detect_empty_bnf_declaration(self, text):
sheerka, context, parser = self.init_parser()
def test_i_can_detect_empty_def_declaration(self, text, error):
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
assert not res.status
assert sheerka.isinstance(res.value, BuiltinConcepts.ERROR)
assert res.value.body[0] == SyntaxErrorNode([], "Empty declaration")
assert res.value.body[0] == error
@pytest.mark.parametrize("text", [
"def concept addition from a plus b as a + b",
"def concept addition from def a plus b as a + b"])
def test_i_can_def_concept_from_definition(self, text):
sheerka, context, parser = self.init_parser()
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
expected = get_def_concept("addition", definition="a plus b", body="a + b")
node = res.value.value
@@ -422,6 +407,114 @@ from give me the date !
assert isinstance(res.value, ParserResultConcept)
assert node == expected
@pytest.mark.parametrize("text", [
"def concept plus from:\n\ta plus b",
"def concept plus from def:\n\ta plus b",
# space before the colon
"def concept plus from :\n\ta plus b",
"def concept plus from def :\n\ta plus b",
# space after the colon
"def concept plus from: \n\ta plus b",
"def concept plus from def: \n\ta plus b",
])
def test_i_can_use_colon_and_definition_together(self, text):
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
defined_concept = res.body.body
defined_concept_tokens = [t.repr_value for t in defined_concept.definition.tokens]
assert res.status
assert defined_concept.definition_type == DEFINITION_TYPE_DEF
assert defined_concept_tokens == [t.repr_value for t in Tokenizer("a plus b", yield_eof=False)]
def test_i_can_use_colon_to_protect_keyword(self):
text = """
def concept today as:
from datetime import date
today = date.today()
from:
give me the date !
"""
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
defined_concept = res.body.body
defined_concept_tokens = [t.repr_value for t in defined_concept.definition.tokens]
assert res.status
assert defined_concept.definition_type == DEFINITION_TYPE_DEF
assert defined_concept_tokens == [t.repr_value for t in Tokenizer("give me the date !", yield_eof=False)]
assert defined_concept.body.status
def test_i_can_use_colon_to_protect_keyword_2(self):
text = """
def concept today as:
from datetime import date
today = date.today()
from give me the date !
"""
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
defined_concept = res.body.body
defined_concept_tokens = [t.repr_value for t in defined_concept.definition.tokens]
assert res.status
assert defined_concept.definition_type == DEFINITION_TYPE_DEF
assert defined_concept_tokens == [t.repr_value for t in Tokenizer("give me the date !", yield_eof=False)]
assert defined_concept.body.status
@pytest.mark.parametrize("text", [
"def",
"def concept_name"
])
def test_i_cannot_parse_invalid_entries(self, text):
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
assert isinstance(res.body.reason[0], UnexpectedTokenErrorNode)
@pytest.mark.parametrize("text, error_msg, error_text", [
("'name", "Missing Trailing quote", "'name"),
("foo isa 'name", "Missing Trailing quote", "'name"),
("def concept 'name", "Missing Trailing quote", "'name"),
("def concept name as 'body", "Missing Trailing quote", "'body"),
("def concept name from bnf 'expression", "Missing Trailing quote", "'expression"),
("def concept c::", "Concept identifiers not found", ""),
])
def test_i_cannot_parse_when_tokenizer_fails(self, text, error_msg, error_text):
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert isinstance(res.body.body[0], LexerError)
assert res.body.body[0].message == error_msg
assert res.body.body[0].text == error_text
def test_i_cannot_parse_bnf_definition_referencing_unknown_concept(self):
text = "def concept name from bnf unknown"
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body == ("key", "unknown")
@pytest.mark.parametrize("text", [
'def concept "def concept x"',
'def concept "def concept x" as x',
])
def test_i_can_use_double_quotes_to_protect_keywords(self, text):
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
concept_defined = res.value.value
assert res.status
assert concept_defined.name.tokens == list(Tokenizer("def concept x", yield_eof=False))
def test_i_can_parse_when_ambiguity_in_where_pre_clause(self):
sheerka, context, parser, *concepts = self.init_parser(
Concept("x is a y", pre="in_context(BuiltinConcepts.EVAL_QUESTION_REQUESTED)"),
@@ -430,7 +523,7 @@ from give me the date !
text = "def concept foo x y where x is a y"
res = parser.parse(context, ParserInput(text))
expected_body = self.pretval(CV(concepts[0], pre=True), source="x is a y", who="parsers.Default",
expected_body = self.pretval(CV(concepts[0], pre=True), source="x is a y", who="parsers.DefConcept",
parser="parsers.ExactConcept")
expected = get_def_concept("foo x y", where=expected_body)
node = res.value.value
@@ -443,7 +536,7 @@ from give me the date !
text = "def concept foo x y pre x is a y"
res = parser.parse(context, ParserInput(text))
expected_body = self.pretval(CV(concepts[0], pre=True), source="x is a y", who="parsers.Default",
expected_body = self.pretval(CV(concepts[0], pre=True), source="x is a y", who="parsers.DefConcept",
parser="parsers.ExactConcept")
expected = get_def_concept("foo x y", pre=expected_body)
node = res.value.value
@@ -454,63 +547,5 @@ from give me the date !
assert isinstance(res.value, ParserResultConcept)
assert node == expected
def test_i_can_detect_not_for_me(self):
text = "hello world"
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.NOT_FOR_ME)
assert isinstance(res.value.body[0], CannotHandleErrorNode)
@pytest.mark.parametrize("text", [
"def",
"def concept_name"
])
def test_i_cannot_parse_invalid_entries(self, text):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert isinstance(res.body.body[0], UnexpectedTokenErrorNode)
@pytest.mark.parametrize("text", [
"concept",
"isa number",
"name isa",
])
def test_i_cannot_parse_not_for_me_entries(self, text):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
assert isinstance(res.body.body[0], CannotHandleErrorNode)
@pytest.mark.parametrize("text, error_msg, error_text", [
("'name", "Missing Trailing quote", "'name"),
("foo isa 'name", "Missing Trailing quote", "'name"),
("def concept 'name", "Missing Trailing quote", "'name"),
("def concept name as 'body", "Missing Trailing quote", "'body"),
("def concept name from bnf 'expression", "Missing Trailing quote", "'expression"),
("def concept c::", "Concept identifiers not found", ""),
])
def test_i_cannot_parse_when_tokenizer_fails(self, text, error_msg, error_text):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert isinstance(res.body.body[0], LexerError)
assert res.body.body[0].message == error_msg
assert res.body.body[0].text == error_text
def test_i_cannot_parse_bnf_definition_referencing_unknown_concept(self):
text = "def concept name from bnf unknown"
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body == ("key", "unknown")
+21 -21
View File
@@ -5,7 +5,7 @@ from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
from core.concept import Concept
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Tokenizer, TokenKind
from parsers.BaseParser import UnexpectedEof, UnexpectedTokenErrorNode
from parsers.BaseParser import UnexpectedEofNode, UnexpectedTokenErrorNode
from parsers.ExpressionParser import PropertyEqualsNode, PropertyEqualsSequenceNode, PropertyContainsNode, AndNode, \
OrNode, NotNode, LambdaNode, IsaNode, NameExprNode, ExpressionParser, LeftPartNotFoundError, TrueifyVisitor
@@ -33,14 +33,14 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("expression, expected", [
("one complicated expression", n("one complicated expression")),
("function_call(a,b,c)", n("function_call(a,b,c)")),
("one expression or another expression", OrNode(n("one expression"), n("another expression"))),
("one expression and another expression", AndNode(n("one expression"), n("another expression"))),
("one or two or three", OrNode(n("one"), n("two"), n("three"))),
("one and two and three", AndNode(n("one"), n("two"), n("three"))),
("one or two and three", OrNode(n("one"), AndNode(n("two"), n("three")))),
("one and two or three", OrNode(AndNode(n("one"), n("two")), n("three"))),
("one and (two or three)", AndNode(n("one"), OrNode(n("two"), n("three")))),
# ("function_call(a,b,c)", n("function_call(a,b,c)")),
# ("one expression or another expression", OrNode(n("one expression"), n("another expression"))),
# ("one expression and another expression", AndNode(n("one expression"), n("another expression"))),
# ("one or two or three", OrNode(n("one"), n("two"), n("three"))),
# ("one and two and three", AndNode(n("one"), n("two"), n("three"))),
# ("one or two and three", OrNode(n("one"), AndNode(n("two"), n("three")))),
# ("one and two or three", OrNode(AndNode(n("one"), n("two")), n("three"))),
# ("one and (two or three)", AndNode(n("one"), OrNode(n("two"), n("three")))),
])
def test_i_can_parse_expression(self, expression, expected):
sheerka, context, parser = self.init_parser()
@@ -54,12 +54,12 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
assert expressions == expected
@pytest.mark.parametrize("expression, expected_errors", [
("one or", [UnexpectedEof("When parsing 'or'")]),
("one and", [UnexpectedEof("When parsing 'and'")]),
("one or", [UnexpectedEofNode("When parsing 'or'")]),
("one and", [UnexpectedEofNode("When parsing 'and'")]),
("and one", [LeftPartNotFoundError()]),
("or one", [LeftPartNotFoundError()]),
("or", [LeftPartNotFoundError(), UnexpectedEof("When parsing 'or'")]),
("and", [LeftPartNotFoundError(), UnexpectedEof("When parsing 'and'")]),
("or", [LeftPartNotFoundError(), UnexpectedEofNode("When parsing 'or'")]),
("and", [LeftPartNotFoundError(), UnexpectedEofNode("When parsing 'and'")]),
])
def test_i_can_detect_error(self, expression, expected_errors):
sheerka, context, parser = self.init_parser()
@@ -74,17 +74,17 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
res = parser.parse(context, ParserInput("("))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert isinstance(res.body.body[0], UnexpectedTokenErrorNode)
assert res.body.body[0].token.type == TokenKind.EOF
assert res.body.body[0].expected_tokens == [TokenKind.RPAR]
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
assert isinstance(res.body.reason[0], UnexpectedTokenErrorNode)
assert res.body.reason[0].token.type == TokenKind.EOF
assert res.body.reason[0].expected_tokens == [TokenKind.RPAR]
res = parser.parse(context, ParserInput(")"))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert isinstance(res.body.body[0], UnexpectedTokenErrorNode)
assert res.body.body[0].token.type == TokenKind.RPAR
assert res.body.body[0].expected_tokens == []
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
assert isinstance(res.body.reason[0], UnexpectedTokenErrorNode)
assert res.body.reason[0].token.type == TokenKind.RPAR
assert res.body.reason[0].expected_tokens == []
res = parser.parse(context, ParserInput("one and two)"))
assert not res.status
+71
View File
@@ -0,0 +1,71 @@
import pytest
from core.builtin_concepts import BuiltinConcepts
from core.sheerka.services.SheerkaExecute import ParserInput
from parsers.BaseCustomGrammarParser import KeywordNotFound
from parsers.FormatRuleParser import FormatRuleParser, FormatAstRawText, FormatRuleNode
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
cmap = {}
class TestFormatRuleParser(TestUsingMemoryBasedSheerka):
sheerka = None
@classmethod
def setup_class(cls):
t = cls()
cls.sheerka, context, _ = t.init_parser(cmap)
def init_parser(self, concepts_map=None):
if concepts_map is not None:
sheerka, context, *concepts = self.init_concepts(*concepts_map.values(), create_new=True)
else:
sheerka = TestFormatRuleParser.sheerka
context = self.get_context(sheerka)
parser = FormatRuleParser()
return sheerka, context, parser
def test_i_can_detect_empty_expression(self):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(""))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY)
def test_input_must_be_a_parser_input(self):
sheerka, context, parser = self.init_parser()
parser.parse(context, "not a parser input") is None
def test_i_can_parse_a_simple_rule(self):
sheerka, context, parser = self.init_parser()
text = "when isinstance(last_value(), Concept) print hello world!"
res = parser.parse(context, ParserInput(text))
parser_result = res.body
format_rule = res.body.body
rule = format_rule.rule
format_ast = format_rule.format_ast
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert isinstance(format_rule, FormatRuleNode)
assert sheerka.isinstance(rule, BuiltinConcepts.RETURN_VALUE)
assert format_ast == FormatAstRawText("hello world!")
@pytest.mark.parametrize("text, error", [
("hello world", [KeywordNotFound(None, keywords=['when', 'print'])]),
("when True", [KeywordNotFound([], keywords=['print'])]),
("print True", [KeywordNotFound([], keywords=['when'])]),
])
def test_cannot_parse_when_not_for_me(self, text, error):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
not_for_me = res.body
assert not res.status
assert sheerka.isinstance(not_for_me, BuiltinConcepts.NOT_FOR_ME)
assert not_for_me.reason == error
+2
View File
@@ -70,6 +70,8 @@ class TestFunctionParser(TestUsingMemoryBasedSheerka):
sheerka, context, parser = self.init_parser()
parser.reset_parser(context, ParserInput(expression))
parser.parser_input.next_token()
res = parser.parse_function()
assert res == expected