import pytest from parsers.PythonParser import PythonParser, PythonNode, PythonErrorNode from parsers.tokenizer import Tokenizer, Token, TokenKind, Keywords, LexerError from parsers.DefaultParser import DefaultParser from parsers.DefaultParser import NumberNode, StringNode, VariableNode, TrueNode, FalseNode, NullNode, BinaryNode from parsers.DefaultParser import Node, UnexpectedTokenErrorNode, DefConceptNode, NopNode import ast def nop(): return NopNode() def n(number): return NumberNode([], number) def s(string, quote="'"): return StringNode([], string, quote) def v(name): return VariableNode([], name) def t(): return TrueNode([]) def f(): return FalseNode([]) def null(): return NullNode([]) def b(operator, left, right): return BinaryNode([], operator, left, right) def compare_ast(left, right): left_as_string = ast.dump(left) left_as_string = left_as_string.replace(", ctx=Load()", "") left_as_string = left_as_string.replace(", kind=None", "") right_as_string = right if isinstance(right, str) else ast.dump(right) right_as_string = right_as_string.replace(", ctx=Load()", "") right_as_string = right_as_string.replace(", kind=None", "") return left_as_string == right_as_string def test_i_can_tokenize(): source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"=" tokens = list(Tokenizer(source)) assert tokens[0] == Token(TokenKind.PLUS, "+", 0, 1, 1) assert tokens[1] == Token(TokenKind.STAR, "*", 1, 1, 2) assert tokens[2] == Token(TokenKind.MINUS, "-", 2, 1, 3) assert tokens[3] == Token(TokenKind.SLASH, "/", 3, 1, 4) assert tokens[4] == Token(TokenKind.LBRACE, "{", 4, 1, 5) assert tokens[5] == Token(TokenKind.RBRACE, "}", 5, 1, 6) assert tokens[6] == Token(TokenKind.LBRACKET, "[", 6, 1, 7) assert tokens[7] == Token(TokenKind.RBRACKET, "]", 7, 1, 8) assert tokens[8] == Token(TokenKind.LPAR, "(", 8, 1, 9) assert tokens[9] == Token(TokenKind.RPAR, ")", 9, 1, 10) assert tokens[10] == Token(TokenKind.WHITESPACE, " ", 10, 1, 11) assert tokens[11] == Token(TokenKind.COMMA, ",", 14, 1, 15) assert tokens[12] == Token(TokenKind.SEMICOLON, ";", 15, 1, 16) assert tokens[13] == Token(TokenKind.COLON, ":", 16, 1, 17) assert tokens[14] == Token(TokenKind.DOT, ".", 17, 1, 18) assert tokens[15] == Token(TokenKind.QMARK, "?", 18, 1, 19) assert tokens[16] == Token(TokenKind.NEWLINE, "\n", 19, 1, 20) assert tokens[17] == Token(TokenKind.NEWLINE, "\n\r", 20, 2, 1) assert tokens[18] == Token(TokenKind.NEWLINE, "\r", 22, 3, 1) assert tokens[19] == Token(TokenKind.NEWLINE, "\r\n", 23, 4, 1) assert tokens[20] == Token(TokenKind.IDENTIFIER, "identifier_0", 25, 5, 1) assert tokens[21] == Token(TokenKind.WHITESPACE, "\t \t", 37, 5, 13) assert tokens[22] == Token(TokenKind.NUMBER, "10.15", 41, 5, 17) assert tokens[23] == Token(TokenKind.WHITESPACE, " ", 46, 5, 22) assert tokens[24] == Token(TokenKind.NUMBER, "10", 47, 5, 23) assert tokens[25] == Token(TokenKind.WHITESPACE, " ", 49, 5, 25) assert tokens[26] == Token(TokenKind.STRING, "'string\n'", 50, 5, 26) assert tokens[27] == Token(TokenKind.WHITESPACE, " ", 59, 6, 1) assert tokens[28] == Token(TokenKind.STRING, '"another string"', 60, 6, 2) assert tokens[29] == Token(TokenKind.EQUALS, '=', 76, 6, 18) @pytest.mark.parametrize("text, expected", [ ("_ident", True), ("ident", True), ("ident123", True), ("ident_123", True), ("ident-like-this", True), ("àèùéû", True), ("011254", False), ("0abcd", False), ("-abcd", False) ]) def test_i_can_tokenize_identifiers(text, expected): tokens = list(Tokenizer(text)) comparison = tokens[0].type == TokenKind.IDENTIFIER assert comparison == expected @pytest.mark.parametrize("text, error_text, index, line, column", [ ("'string", "'string", 7, 1, 8), ('"string', '"string', 7, 1, 8), ('"a" + "string', '"string', 13, 1, 14), ('"a"\n\n"string', '"string', 12, 3, 8), ]) def test_i_can_detect_unfinished_strings(text, error_text, index, line, column): with pytest.raises(LexerError) as e: list(Tokenizer(text)) assert e.value.text == error_text assert e.value.index == index assert e.value.line == line assert e.value.column == column @pytest.mark.parametrize("text, expected_text, expected_newlines", [ ("'foo'", "'foo'", 0), ('"foo"', '"foo"', 0), ("'foo\rbar'", "'foo\rbar'", 1), ("'foo\nbar'", "'foo\nbar'", 1), ("'foo\n\rbar'", "'foo\n\rbar'", 1), ("'foo\r\nbar'", "'foo\r\nbar'", 1), ("'foo\r\rbar'", "'foo\r\rbar'", 2), ("'foo\n\nbar'", "'foo\n\nbar'", 2), ("'foo\r\n\n\rbar'", "'foo\r\n\n\rbar'", 2), ("'\rfoo\rbar\r'", "'\rfoo\rbar\r'", 3), ("'\nfoo\nbar\n'", "'\nfoo\nbar\n'", 3), ("'\n\rfoo\r\n'", "'\n\rfoo\r\n'", 2), (r"'foo\'bar'", r"'foo\'bar'", 0), (r'"foo\"bar"', r'"foo\"bar"', 0), ('"foo"bar"', '"foo"', 0), ("'foo'bar'", "'foo'", 0), ]) def test_i_can_parse_strings(text, expected_text, expected_newlines): lexer = Tokenizer(text) text_found, nb_of_newlines = lexer.eat_string(0, 1, 1) assert nb_of_newlines == expected_newlines assert text_found == expected_text @pytest.mark.parametrize("text", [ "1", "3.1415", "0.5", "01", "-5", "-5.10" ]) def test_i_can_parse_numbers(text): tokens = list(Tokenizer(text)) assert tokens[0].type == TokenKind.NUMBER assert tokens[0].value == text @pytest.mark.parametrize("text, expected", [ ("def", Keywords.DEF), ("concept", Keywords.CONCEPT), ("as", Keywords.AS), ("pre", Keywords.PRE), ("post", Keywords.POST) ]) def test_i_can_recognize_keywords(text, expected): tokens = list(Tokenizer(text)) assert tokens[0].type == TokenKind.KEYWORD assert tokens[0].value == expected # @pytest.mark.parametrize("text, expected", [ # ("1", n(1)), # ("+1", n(1)), # ("-1", n(-1)), # ("'foo'", s("foo")), # ("identifier", v("identifier")), # ("true", t()), # ("false", f()), # ("null", null()), # ("1 * 2", b(TokenKind.STAR, n(1), n(2))), # ("1 * 2/3", b(TokenKind.STAR, n(1), b(TokenKind.SLASH, n(2), n(3)))), # ("1 + 2", b(TokenKind.PLUS, n(1), n(2))), # ("1 + 2 - 3", b(TokenKind.PLUS, n(1), b(TokenKind.MINUS, n(2), n(3)))), # ("1 + 2-3", b(TokenKind.PLUS, n(1), b(TokenKind.PLUS, n(2), n(-3)))), # ("1 + 2 +-3", b(TokenKind.PLUS, n(1), b(TokenKind.PLUS, n(2), n(-3)))), # ("1 + 2 * 3", b(TokenKind.PLUS, n(1), b(TokenKind.STAR, n(2), n(3)))), # ("1 * 2 + 3", b(TokenKind.PLUS, b(TokenKind.STAR, n(1), n(2)), n(3))), # ("(1 + 2) * 3", b(TokenKind.STAR, b(TokenKind.PLUS, n(1), n(2)), n(3))), # ("1 * (2 + 3)", b(TokenKind.STAR, n(1), b(TokenKind.PLUS, n(2), n(3)))), # ]) # def test_i_can_parse_simple_expression(text, expected): # parser = DefaultParser(text, None) # ast = parser.parse() # assert ast.is_same(expected) # # # @pytest.mark.parametrize("text, token_found, expected_tokens", [ # ("1+", TokenKind.EOF, # [TokenKind.NUMBER, TokenKind.STRING, TokenKind.IDENTIFIER, 'true', 'false', 'null', TokenKind.LPAR]), # ("(1+1", TokenKind.EOF, [TokenKind.RPAR]) # ]) # def test_i_can_detect_unexpected_end_of_code(text, token_found, expected_tokens): # parser = DefaultParser(text, None) # parser.parse() # # assert parser.has_error # assert parser.error_sink[0].tokens[0].type == token_found # assert parser.error_sink[0].expected_tokens == expected_tokens @pytest.mark.parametrize("text, expected_name, expected_expr", [ ("def concept hello", "hello", nop()), ("def concept hello ", "hello", nop()), ("def concept a+b", "a + b", nop()), ("def concept 'a+b'", "a+b", nop()), ("def concept 'a+b'+c", "a+b + c", nop()), ("def concept 'as if'", "as if", nop()), ("def concept 'as' if", "as if", nop()), ("def concept hello as 'hello'", "hello", ast.Expression(body=ast.Str(s='hello'))), ("def concept hello as 1", "hello", ast.Expression(body=ast.Num(n=1))), ("def concept h as 1 + 1", "h", ast.Expression(ast.BinOp(left=ast.Num(n=1), op=ast.Add(), right=ast.Num(n=1)))), ]) def test_i_can_parse_def_concept(text, expected_name, expected_expr): parser = DefaultParser(text, PythonParser) tree = parser.parse() assert isinstance(tree, DefConceptNode) assert tree.name == expected_name if isinstance(tree.body, PythonNode): assert compare_ast(tree.body.ast, expected_expr) else: assert tree.body == expected_expr def test_i_can_parse_complex_def_concept_statement(): text = """def concept a plus b where a,b pre isinstance(a, int) and isinstance(b, float) post isinstance(res, int) as res = a + b """ parser = DefaultParser(text, PythonParser) tree = parser.parse() assert not parser.has_error assert isinstance(tree, DefConceptNode) assert tree.name == "a plus b" assert tree.where.source == "a,b" assert isinstance(tree.where.ast, ast.Expression) assert tree.pre.source == "isinstance(a, int) and isinstance(b, float)" assert isinstance(tree.pre.ast, ast.Expression) assert tree.post.source == "isinstance(res, int)" assert isinstance(tree.post.ast, ast.Expression) assert tree.body.source == "res = a + b" assert isinstance(tree.body.ast, ast.Module) def test_i_can_use_colon_to_declare_indentation(): text = """ def concept add one to a as: def func(x): return x+1 func(a) """ parser = DefaultParser(text, PythonParser) tree = parser.parse() assert not parser.has_error assert isinstance(tree, DefConceptNode) def test_i_can_use_colon_to_declare_indentation2(): text = """ def concept add one to a as: def func(x): return x+1 """ parser = DefaultParser(text, PythonParser) tree = parser.parse() assert not parser.has_error assert isinstance(tree, DefConceptNode) def test_without_colon_i_get_an_indent_error(): text = """ def concept add one to a as def func(x): return x+1 func(a) """ parser = DefaultParser(text, PythonParser) tree = parser.parse() assert parser.has_error assert isinstance(tree, DefConceptNode) assert isinstance(parser.error_sink[0].exception, IndentationError) def test_i_can_detect_error(): """ In this test, func(b) is not correctly indented while colon is specified after the 'as' keyword """ text = """ def concept add one to a as: def func(x): return x+1 func(a) func(b) """ parser = DefaultParser(text, PythonParser) tree = parser.parse() assert parser.has_error assert isinstance(tree, DefConceptNode) assert isinstance(parser.error_sink[0], UnexpectedTokenErrorNode) # check that the error is caused by 'func(b)' assert parser.error_sink[0].tokens[0].line == 6 assert parser.error_sink[0].tokens[0].column == 1 @pytest.mark.parametrize("text, token_found, expected_tokens", [ ("def hello as 'hello'", "hello", [Keywords.CONCEPT]), ("def concept as", Keywords.AS, [""]), ]) def test_i_can_detect_unexpected_token_error_in_def_concept(text, token_found, expected_tokens): parser = DefaultParser(text, PythonParser) parser.parse() assert parser.has_error assert isinstance(parser.error_sink[0], UnexpectedTokenErrorNode) assert parser.error_sink[0].tokens[0].value == token_found assert parser.error_sink[0].expected_tokens == expected_tokens @pytest.mark.parametrize("text", [ "def concept hello where 1+", "def concept hello pre 1+", "def concept hello post 1+", "def concept hello as 1+" ]) def test_i_can_detect_error_in_declaration(text): parser = DefaultParser(text, PythonParser) parser.parse() assert parser.has_error assert isinstance(parser.error_sink[0], PythonErrorNode)