338 lines
12 KiB
Python
338 lines
12 KiB
Python
import pytest
|
|
|
|
from parsers.PythonParser import PythonParser, PythonNode, PythonErrorNode
|
|
from parsers.tokenizer import Tokenizer, Token, TokenKind, Keywords, LexerError
|
|
from parsers.DefaultParser import DefaultParser
|
|
from parsers.DefaultParser import NumberNode, StringNode, VariableNode, TrueNode, FalseNode, NullNode, BinaryNode
|
|
from parsers.DefaultParser import Node, UnexpectedTokenErrorNode, DefConceptNode, NopNode
|
|
import ast
|
|
|
|
|
|
def nop():
|
|
return NopNode()
|
|
|
|
|
|
def n(number):
|
|
return NumberNode([], number)
|
|
|
|
|
|
def s(string, quote="'"):
|
|
return StringNode([], string, quote)
|
|
|
|
|
|
def v(name):
|
|
return VariableNode([], name)
|
|
|
|
|
|
def t():
|
|
return TrueNode([])
|
|
|
|
|
|
def f():
|
|
return FalseNode([])
|
|
|
|
|
|
def null():
|
|
return NullNode([])
|
|
|
|
|
|
def b(operator, left, right):
|
|
return BinaryNode([], operator, left, right)
|
|
|
|
|
|
def test_i_can_tokenize():
|
|
source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"="
|
|
tokens = list(Tokenizer(source))
|
|
assert tokens[0] == Token(TokenKind.PLUS, "+", 0, 1, 1)
|
|
assert tokens[1] == Token(TokenKind.STAR, "*", 1, 1, 2)
|
|
assert tokens[2] == Token(TokenKind.MINUS, "-", 2, 1, 3)
|
|
assert tokens[3] == Token(TokenKind.SLASH, "/", 3, 1, 4)
|
|
assert tokens[4] == Token(TokenKind.LBRACE, "{", 4, 1, 5)
|
|
assert tokens[5] == Token(TokenKind.RBRACE, "}", 5, 1, 6)
|
|
assert tokens[6] == Token(TokenKind.LBRACKET, "[", 6, 1, 7)
|
|
assert tokens[7] == Token(TokenKind.RBRACKET, "]", 7, 1, 8)
|
|
assert tokens[8] == Token(TokenKind.LPAR, "(", 8, 1, 9)
|
|
assert tokens[9] == Token(TokenKind.RPAR, ")", 9, 1, 10)
|
|
assert tokens[10] == Token(TokenKind.WHITESPACE, " ", 10, 1, 11)
|
|
assert tokens[11] == Token(TokenKind.COMMA, ",", 14, 1, 15)
|
|
assert tokens[12] == Token(TokenKind.SEMICOLON, ";", 15, 1, 16)
|
|
assert tokens[13] == Token(TokenKind.COLON, ":", 16, 1, 17)
|
|
assert tokens[14] == Token(TokenKind.DOT, ".", 17, 1, 18)
|
|
assert tokens[15] == Token(TokenKind.QMARK, "?", 18, 1, 19)
|
|
assert tokens[16] == Token(TokenKind.NEWLINE, "\n", 19, 1, 20)
|
|
assert tokens[17] == Token(TokenKind.NEWLINE, "\n\r", 20, 2, 1)
|
|
assert tokens[18] == Token(TokenKind.NEWLINE, "\r", 22, 3, 1)
|
|
assert tokens[19] == Token(TokenKind.NEWLINE, "\r\n", 23, 4, 1)
|
|
assert tokens[20] == Token(TokenKind.IDENTIFIER, "identifier_0", 25, 5, 1)
|
|
assert tokens[21] == Token(TokenKind.WHITESPACE, "\t \t", 37, 5, 13)
|
|
assert tokens[22] == Token(TokenKind.NUMBER, "10.15", 41, 5, 17)
|
|
assert tokens[23] == Token(TokenKind.WHITESPACE, " ", 46, 5, 22)
|
|
assert tokens[24] == Token(TokenKind.NUMBER, "10", 47, 5, 23)
|
|
assert tokens[25] == Token(TokenKind.WHITESPACE, " ", 49, 5, 25)
|
|
assert tokens[26] == Token(TokenKind.STRING, "'string\n'", 50, 5, 26)
|
|
assert tokens[27] == Token(TokenKind.WHITESPACE, " ", 59, 6, 1)
|
|
assert tokens[28] == Token(TokenKind.STRING, '"another string"', 60, 6, 2)
|
|
assert tokens[29] == Token(TokenKind.EQUALS, '=', 76, 6, 18)
|
|
|
|
|
|
@pytest.mark.parametrize("text, expected", [
|
|
("_ident", True),
|
|
("ident", True),
|
|
("ident123", True),
|
|
("ident_123", True),
|
|
("ident-like-this", True),
|
|
("àèùéû", True),
|
|
("011254", False),
|
|
("0abcd", False),
|
|
("-abcd", False)
|
|
])
|
|
def test_i_can_tokenize_identifiers(text, expected):
|
|
tokens = list(Tokenizer(text))
|
|
comparison = tokens[0].type == TokenKind.IDENTIFIER
|
|
assert comparison == expected
|
|
|
|
|
|
@pytest.mark.parametrize("text, error_text, index, line, column", [
|
|
("'string", "'string", 7, 1, 8),
|
|
('"string', '"string', 7, 1, 8),
|
|
('"a" + "string', '"string', 13, 1, 14),
|
|
('"a"\n\n"string', '"string', 12, 3, 8),
|
|
])
|
|
def test_i_can_detect_unfinished_strings(text, error_text, index, line, column):
|
|
with pytest.raises(LexerError) as e:
|
|
list(Tokenizer(text))
|
|
assert e.value.text == error_text
|
|
assert e.value.index == index
|
|
assert e.value.line == line
|
|
assert e.value.column == column
|
|
|
|
|
|
@pytest.mark.parametrize("text, expected_text, expected_newlines", [
|
|
("'foo'", "'foo'", 0),
|
|
('"foo"', '"foo"', 0),
|
|
("'foo\rbar'", "'foo\rbar'", 1),
|
|
("'foo\nbar'", "'foo\nbar'", 1),
|
|
("'foo\n\rbar'", "'foo\n\rbar'", 1),
|
|
("'foo\r\nbar'", "'foo\r\nbar'", 1),
|
|
("'foo\r\rbar'", "'foo\r\rbar'", 2),
|
|
("'foo\n\nbar'", "'foo\n\nbar'", 2),
|
|
("'foo\r\n\n\rbar'", "'foo\r\n\n\rbar'", 2),
|
|
("'\rfoo\rbar\r'", "'\rfoo\rbar\r'", 3),
|
|
("'\nfoo\nbar\n'", "'\nfoo\nbar\n'", 3),
|
|
("'\n\rfoo\r\n'", "'\n\rfoo\r\n'", 2),
|
|
(r"'foo\'bar'", r"'foo\'bar'", 0),
|
|
(r'"foo\"bar"', r'"foo\"bar"', 0),
|
|
('"foo"bar"', '"foo"', 0),
|
|
("'foo'bar'", "'foo'", 0),
|
|
])
|
|
def test_i_can_parse_strings(text, expected_text, expected_newlines):
|
|
lexer = Tokenizer(text)
|
|
text_found, nb_of_newlines = lexer.eat_string(0, 1, 1)
|
|
|
|
assert nb_of_newlines == expected_newlines
|
|
assert text_found == expected_text
|
|
|
|
|
|
@pytest.mark.parametrize("text", [
|
|
"1", "3.1415", "0.5", "01", "-5", "-5.10"
|
|
])
|
|
def test_i_can_parse_numbers(text):
|
|
tokens = list(Tokenizer(text))
|
|
assert tokens[0].type == TokenKind.NUMBER
|
|
assert tokens[0].value == text
|
|
|
|
|
|
@pytest.mark.parametrize("text, expected", [
|
|
("def", Keywords.DEF),
|
|
("concept", Keywords.CONCEPT),
|
|
("as", Keywords.AS),
|
|
("pre", Keywords.PRE),
|
|
("post", Keywords.POST)
|
|
])
|
|
def test_i_can_recognize_keywords(text, expected):
|
|
tokens = list(Tokenizer(text))
|
|
assert tokens[0].type == TokenKind.KEYWORD
|
|
assert tokens[0].value == expected
|
|
|
|
|
|
@pytest.mark.parametrize("text, expected", [
|
|
("1", n(1)),
|
|
("+1", n(1)),
|
|
("-1", n(-1)),
|
|
("'foo'", s("foo")),
|
|
("identifier", v("identifier")),
|
|
("true", t()),
|
|
("false", f()),
|
|
("null", null()),
|
|
("1 * 2", b(TokenKind.STAR, n(1), n(2))),
|
|
("1 * 2/3", b(TokenKind.STAR, n(1), b(TokenKind.SLASH, n(2), n(3)))),
|
|
("1 + 2", b(TokenKind.PLUS, n(1), n(2))),
|
|
("1 + 2 - 3", b(TokenKind.PLUS, n(1), b(TokenKind.MINUS, n(2), n(3)))),
|
|
("1 + 2-3", b(TokenKind.PLUS, n(1), b(TokenKind.PLUS, n(2), n(-3)))),
|
|
("1 + 2 +-3", b(TokenKind.PLUS, n(1), b(TokenKind.PLUS, n(2), n(-3)))),
|
|
("1 + 2 * 3", b(TokenKind.PLUS, n(1), b(TokenKind.STAR, n(2), n(3)))),
|
|
("1 * 2 + 3", b(TokenKind.PLUS, b(TokenKind.STAR, n(1), n(2)), n(3))),
|
|
("(1 + 2) * 3", b(TokenKind.STAR, b(TokenKind.PLUS, n(1), n(2)), n(3))),
|
|
("1 * (2 + 3)", b(TokenKind.STAR, n(1), b(TokenKind.PLUS, n(2), n(3)))),
|
|
])
|
|
def test_i_can_parse_simple_expression(text, expected):
|
|
parser = DefaultParser(text, None)
|
|
ast = parser.parse()
|
|
assert ast.is_same(expected)
|
|
|
|
|
|
@pytest.mark.parametrize("text, token_found, expected_tokens", [
|
|
("1+", TokenKind.EOF,
|
|
[TokenKind.NUMBER, TokenKind.STRING, TokenKind.IDENTIFIER, 'true', 'false', 'null', TokenKind.LPAR]),
|
|
("(1+1", TokenKind.EOF, [TokenKind.RPAR])
|
|
])
|
|
def test_i_can_detect_unexpected_end_of_code(text, token_found, expected_tokens):
|
|
parser = DefaultParser(text, None)
|
|
parser.parse()
|
|
|
|
assert parser.has_error
|
|
assert parser.error_sink[0].tokens[0].type == token_found
|
|
assert parser.error_sink[0].expected_tokens == expected_tokens
|
|
|
|
|
|
@pytest.mark.parametrize("text, expected_name, expected_expr", [
|
|
("def concept hello", "hello", nop()),
|
|
("def concept hello ", "hello", nop()),
|
|
("def concept a+b", "a + b", nop()),
|
|
("def concept 'a+b'", "a+b", nop()),
|
|
("def concept 'a+b'+c", "a+b + c", nop()),
|
|
("def concept 'as if'", "as if", nop()),
|
|
("def concept 'as' if", "as if", nop()),
|
|
("def concept hello as 'hello'", "hello", ast.Expression(body=ast.Str(s='hello'))),
|
|
("def concept hello as 1", "hello", ast.Expression(body=ast.Num(n=1))),
|
|
("def concept h as 1 + 1", "h", ast.Expression(ast.BinOp(left=ast.Num(n=1), op=ast.Add(), right=ast.Num(n=1)))),
|
|
])
|
|
def test_i_can_parse_def_concept(text, expected_name, expected_expr):
|
|
parser = DefaultParser(text, PythonParser)
|
|
tree = parser.parse()
|
|
assert isinstance(tree, DefConceptNode)
|
|
assert tree.name == expected_name
|
|
if isinstance(tree.body, PythonNode):
|
|
assert ast.dump(tree.body.ast) == ast.dump(expected_expr)
|
|
else:
|
|
assert tree.body == expected_expr
|
|
|
|
|
|
def compare_ast(left, right):
|
|
left_as_string = ast.dump(left)
|
|
left_as_string = left_as_string.replace(", ctx=Load()", "")
|
|
|
|
right_as_string = right if isinstance(right, str) else ast.dump(right)
|
|
right_as_string = right_as_string.replace(", ctx=Load()", "")
|
|
|
|
return left_as_string == right_as_string
|
|
|
|
|
|
def test_i_can_parse_complex_def_concept_statement():
|
|
text = """def concept a plus b
|
|
where a,b
|
|
pre isinstance(a, int) and isinstance(b, float)
|
|
post isinstance(res, int)
|
|
as res = a + b
|
|
"""
|
|
parser = DefaultParser(text, PythonParser)
|
|
tree = parser.parse()
|
|
assert not parser.has_error
|
|
assert isinstance(tree, DefConceptNode)
|
|
assert tree.name == "a plus b"
|
|
assert tree.where.source == "a,b"
|
|
assert isinstance(tree.where.ast, ast.Expression)
|
|
assert tree.pre.source == "isinstance(a, int) and isinstance(b, float)"
|
|
assert isinstance(tree.pre.ast, ast.Expression)
|
|
assert tree.post.source == "isinstance(res, int)"
|
|
assert isinstance(tree.post.ast, ast.Expression)
|
|
assert tree.body.source == "res = a + b"
|
|
assert isinstance(tree.body.ast, ast.Module)
|
|
|
|
|
|
def test_i_can_use_colon_to_declare_indentation():
|
|
text = """
|
|
def concept add one to a as:
|
|
def func(x):
|
|
return x+1
|
|
func(a)
|
|
"""
|
|
parser = DefaultParser(text, PythonParser)
|
|
tree = parser.parse()
|
|
assert not parser.has_error
|
|
assert isinstance(tree, DefConceptNode)
|
|
|
|
def test_i_can_use_colon_to_declare_indentation2():
|
|
text = """
|
|
def concept add one to a as:
|
|
def func(x):
|
|
return x+1
|
|
"""
|
|
parser = DefaultParser(text, PythonParser)
|
|
tree = parser.parse()
|
|
assert not parser.has_error
|
|
assert isinstance(tree, DefConceptNode)
|
|
|
|
|
|
def test_without_colon_i_get_an_indent_error():
|
|
text = """
|
|
def concept add one to a as
|
|
def func(x):
|
|
return x+1
|
|
func(a)
|
|
"""
|
|
parser = DefaultParser(text, PythonParser)
|
|
tree = parser.parse()
|
|
assert parser.has_error
|
|
assert isinstance(tree, DefConceptNode)
|
|
assert isinstance(parser.error_sink[0].exception, IndentationError)
|
|
|
|
|
|
def test_i_can_detect_error():
|
|
"""
|
|
In this test, func(b) is not correctly indented while colon is specified after the 'as' keyword
|
|
"""
|
|
|
|
text = """
|
|
def concept add one to a as:
|
|
def func(x):
|
|
return x+1
|
|
func(a)
|
|
func(b)
|
|
"""
|
|
parser = DefaultParser(text, PythonParser)
|
|
tree = parser.parse()
|
|
assert parser.has_error
|
|
assert isinstance(tree, DefConceptNode)
|
|
assert isinstance(parser.error_sink[0], UnexpectedTokenErrorNode)
|
|
# check that the error is caused by 'func(b)'
|
|
assert parser.error_sink[0].tokens[0].line == 6
|
|
assert parser.error_sink[0].tokens[0].column == 1
|
|
|
|
|
|
@pytest.mark.parametrize("text, token_found, expected_tokens", [
|
|
("def hello as 'hello'", "hello", [Keywords.CONCEPT]),
|
|
("def concept as", Keywords.AS, ["<name>"]),
|
|
])
|
|
def test_i_can_detect_unexpected_token_error_in_def_concept(text, token_found, expected_tokens):
|
|
parser = DefaultParser(text, PythonParser)
|
|
parser.parse()
|
|
|
|
assert parser.has_error
|
|
assert isinstance(parser.error_sink[0], UnexpectedTokenErrorNode)
|
|
assert parser.error_sink[0].tokens[0].value == token_found
|
|
assert parser.error_sink[0].expected_tokens == expected_tokens
|
|
|
|
|
|
@pytest.mark.parametrize("text", [
|
|
"def concept hello where 1+",
|
|
"def concept hello pre 1+",
|
|
"def concept hello post 1+",
|
|
"def concept hello as 1+"
|
|
])
|
|
def test_i_can_detect_error_in_declaration(text):
|
|
parser = DefaultParser(text, PythonParser)
|
|
parser.parse()
|
|
assert parser.has_error
|
|
assert isinstance(parser.error_sink[0], PythonErrorNode)
|