Added DefaultParser

This commit is contained in:
2019-10-29 18:39:51 +01:00
parent 101319b8b6
commit 8107e149b9
18 changed files with 1581 additions and 376 deletions
+284 -43
View File
@@ -1,39 +1,78 @@
import pytest
from parsers.defaultparser import TokenIter, Token, Tokens
from parsers.PythonParser import PythonParser, PythonNode, PythonErrorNode
from parsers.tokenizer import Tokenizer, Token, TokenKind, Keywords, LexerError
from parsers.DefaultParser import DefaultParser
from parsers.DefaultParser import NumberNode, StringNode, VariableNode, TrueNode, FalseNode, NullNode, BinaryNode
from parsers.DefaultParser import Node, UnexpectedTokenErrorNode, DefConceptNode, NopNode
import ast
def nop():
return NopNode()
def n(number):
return NumberNode([], number)
def s(string, quote="'"):
return StringNode([], string, quote)
def v(name):
return VariableNode([], name)
def t():
return TrueNode([])
def f():
return FalseNode([])
def null():
return NullNode([])
def b(operator, left, right):
return BinaryNode([], operator, left, right)
def test_i_can_tokenize():
source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\""
tokens = list(TokenIter(source))
assert tokens[0] == Token(Tokens.PLUS, "+", 0, 1, 1)
assert tokens[1] == Token(Tokens.STAR, "*", 1, 1, 2)
assert tokens[2] == Token(Tokens.MINUS, "-", 2, 1, 3)
assert tokens[3] == Token(Tokens.SLASH, "/", 3, 1, 4)
assert tokens[4] == Token(Tokens.LBRACE, "{", 4, 1, 5)
assert tokens[5] == Token(Tokens.RBRACE, "}", 5, 1, 6)
assert tokens[6] == Token(Tokens.LBRACKET, "[", 6, 1, 7)
assert tokens[7] == Token(Tokens.RBRACKET, "]", 7, 1, 8)
assert tokens[8] == Token(Tokens.LPAR, "(", 8, 1, 9)
assert tokens[9] == Token(Tokens.RPAR, ")", 9, 1, 10)
assert tokens[10] == Token(Tokens.WHITESPACE, " ", 10, 1, 11)
assert tokens[11] == Token(Tokens.COMMA, ",", 14, 1, 15)
assert tokens[12] == Token(Tokens.SEMICOLON, ";", 15, 1, 16)
assert tokens[13] == Token(Tokens.COLON, ":", 16, 1, 17)
assert tokens[14] == Token(Tokens.DOT, ".", 17, 1, 18)
assert tokens[15] == Token(Tokens.QMARK, "?", 18, 1, 19)
assert tokens[16] == Token(Tokens.NEWLINE, "\n", 19, 1, 20)
assert tokens[17] == Token(Tokens.NEWLINE, "\n\r", 20, 2, 1)
assert tokens[18] == Token(Tokens.NEWLINE, "\r", 22, 3, 1)
assert tokens[19] == Token(Tokens.NEWLINE, "\r\n", 23, 4, 1)
assert tokens[20] == Token(Tokens.IDENTIFIER, "identifier_0", 25, 5, 1)
assert tokens[21] == Token(Tokens.WHITESPACE, "\t \t", 37, 5, 13)
assert tokens[22] == Token(Tokens.NUMBER, "10.15", 41, 5, 17)
assert tokens[23] == Token(Tokens.WHITESPACE, " ", 46, 5, 22)
assert tokens[24] == Token(Tokens.NUMBER, "10", 47, 5, 23)
assert tokens[25] == Token(Tokens.WHITESPACE, " ", 49, 5, 25)
assert tokens[26] == Token(Tokens.STRING, "'string\n'", 50, 5, 26)
assert tokens[27] == Token(Tokens.WHITESPACE, " ", 59, 6, 1)
assert tokens[28] == Token(Tokens.STRING, '"another string"', 60, 6, 2)
source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"="
tokens = list(Tokenizer(source))
assert tokens[0] == Token(TokenKind.PLUS, "+", 0, 1, 1)
assert tokens[1] == Token(TokenKind.STAR, "*", 1, 1, 2)
assert tokens[2] == Token(TokenKind.MINUS, "-", 2, 1, 3)
assert tokens[3] == Token(TokenKind.SLASH, "/", 3, 1, 4)
assert tokens[4] == Token(TokenKind.LBRACE, "{", 4, 1, 5)
assert tokens[5] == Token(TokenKind.RBRACE, "}", 5, 1, 6)
assert tokens[6] == Token(TokenKind.LBRACKET, "[", 6, 1, 7)
assert tokens[7] == Token(TokenKind.RBRACKET, "]", 7, 1, 8)
assert tokens[8] == Token(TokenKind.LPAR, "(", 8, 1, 9)
assert tokens[9] == Token(TokenKind.RPAR, ")", 9, 1, 10)
assert tokens[10] == Token(TokenKind.WHITESPACE, " ", 10, 1, 11)
assert tokens[11] == Token(TokenKind.COMMA, ",", 14, 1, 15)
assert tokens[12] == Token(TokenKind.SEMICOLON, ";", 15, 1, 16)
assert tokens[13] == Token(TokenKind.COLON, ":", 16, 1, 17)
assert tokens[14] == Token(TokenKind.DOT, ".", 17, 1, 18)
assert tokens[15] == Token(TokenKind.QMARK, "?", 18, 1, 19)
assert tokens[16] == Token(TokenKind.NEWLINE, "\n", 19, 1, 20)
assert tokens[17] == Token(TokenKind.NEWLINE, "\n\r", 20, 2, 1)
assert tokens[18] == Token(TokenKind.NEWLINE, "\r", 22, 3, 1)
assert tokens[19] == Token(TokenKind.NEWLINE, "\r\n", 23, 4, 1)
assert tokens[20] == Token(TokenKind.IDENTIFIER, "identifier_0", 25, 5, 1)
assert tokens[21] == Token(TokenKind.WHITESPACE, "\t \t", 37, 5, 13)
assert tokens[22] == Token(TokenKind.NUMBER, "10.15", 41, 5, 17)
assert tokens[23] == Token(TokenKind.WHITESPACE, " ", 46, 5, 22)
assert tokens[24] == Token(TokenKind.NUMBER, "10", 47, 5, 23)
assert tokens[25] == Token(TokenKind.WHITESPACE, " ", 49, 5, 25)
assert tokens[26] == Token(TokenKind.STRING, "'string\n'", 50, 5, 26)
assert tokens[27] == Token(TokenKind.WHITESPACE, " ", 59, 6, 1)
assert tokens[28] == Token(TokenKind.STRING, '"another string"', 60, 6, 2)
assert tokens[29] == Token(TokenKind.EQUALS, '=', 76, 6, 18)
@pytest.mark.parametrize("text, expected", [
@@ -48,11 +87,26 @@ def test_i_can_tokenize():
("-abcd", False)
])
def test_i_can_tokenize_identifiers(text, expected):
tokens = list(TokenIter(text))
comparison = tokens[0].type == Tokens.IDENTIFIER
tokens = list(Tokenizer(text))
comparison = tokens[0].type == TokenKind.IDENTIFIER
assert comparison == expected
@pytest.mark.parametrize("text, error_text, index, line, column", [
("'string", "'string", 7, 1, 8),
('"string', '"string', 7, 1, 8),
('"a" + "string', '"string', 13, 1, 14),
('"a"\n\n"string', '"string', 12, 3, 8),
])
def test_i_can_detect_unfinished_strings(text, error_text, index, line, column):
with pytest.raises(LexerError) as e:
list(Tokenizer(text))
assert e.value.text == error_text
assert e.value.index == index
assert e.value.line == line
assert e.value.column == column
@pytest.mark.parametrize("text, expected_text, expected_newlines", [
("'foo'", "'foo'", 0),
('"foo"', '"foo"', 0),
@@ -72,8 +126,8 @@ def test_i_can_tokenize_identifiers(text, expected):
("'foo'bar'", "'foo'", 0),
])
def test_i_can_parse_strings(text, expected_text, expected_newlines):
lexer = TokenIter(text)
text_found, nb_of_newlines = lexer.eat_string(0)
lexer = Tokenizer(text)
text_found, nb_of_newlines = lexer.eat_string(0, 1, 1)
assert nb_of_newlines == expected_newlines
assert text_found == expected_text
@@ -83,14 +137,201 @@ def test_i_can_parse_strings(text, expected_text, expected_newlines):
"1", "3.1415", "0.5", "01", "-5", "-5.10"
])
def test_i_can_parse_numbers(text):
tokens = list(TokenIter(text))
assert tokens[0].type == Tokens.NUMBER
tokens = list(Tokenizer(text))
assert tokens[0].type == TokenKind.NUMBER
assert tokens[0].value == text
@pytest.mark.parametrize("text", [
"def", "concept", "as", "pre", "post"
@pytest.mark.parametrize("text, expected", [
("def", Keywords.DEF),
("concept", Keywords.CONCEPT),
("as", Keywords.AS),
("pre", Keywords.PRE),
("post", Keywords.POST)
])
def test_i_can_recognize_keywords(text):
tokens = list(TokenIter(text))
assert tokens[0].type == Tokens.KEYWORD
def test_i_can_recognize_keywords(text, expected):
tokens = list(Tokenizer(text))
assert tokens[0].type == TokenKind.KEYWORD
assert tokens[0].value == expected
@pytest.mark.parametrize("text, expected", [
("1", n(1)),
("+1", n(1)),
("-1", n(-1)),
("'foo'", s("foo")),
("identifier", v("identifier")),
("true", t()),
("false", f()),
("null", null()),
("1 * 2", b(TokenKind.STAR, n(1), n(2))),
("1 * 2/3", b(TokenKind.STAR, n(1), b(TokenKind.SLASH, n(2), n(3)))),
("1 + 2", b(TokenKind.PLUS, n(1), n(2))),
("1 + 2 - 3", b(TokenKind.PLUS, n(1), b(TokenKind.MINUS, n(2), n(3)))),
("1 + 2-3", b(TokenKind.PLUS, n(1), b(TokenKind.PLUS, n(2), n(-3)))),
("1 + 2 +-3", b(TokenKind.PLUS, n(1), b(TokenKind.PLUS, n(2), n(-3)))),
("1 + 2 * 3", b(TokenKind.PLUS, n(1), b(TokenKind.STAR, n(2), n(3)))),
("1 * 2 + 3", b(TokenKind.PLUS, b(TokenKind.STAR, n(1), n(2)), n(3))),
("(1 + 2) * 3", b(TokenKind.STAR, b(TokenKind.PLUS, n(1), n(2)), n(3))),
("1 * (2 + 3)", b(TokenKind.STAR, n(1), b(TokenKind.PLUS, n(2), n(3)))),
])
def test_i_can_parse_simple_expression(text, expected):
parser = DefaultParser(text, None)
ast = parser.parse()
assert ast.is_same(expected)
@pytest.mark.parametrize("text, token_found, expected_tokens", [
("1+", TokenKind.EOF,
[TokenKind.NUMBER, TokenKind.STRING, TokenKind.IDENTIFIER, 'true', 'false', 'null', TokenKind.LPAR]),
("(1+1", TokenKind.EOF, [TokenKind.RPAR])
])
def test_i_can_detect_unexpected_end_of_code(text, token_found, expected_tokens):
parser = DefaultParser(text, None)
parser.parse()
assert parser.has_error
assert parser.error_sink[0].tokens[0].type == token_found
assert parser.error_sink[0].expected_tokens == expected_tokens
@pytest.mark.parametrize("text, expected_name, expected_expr", [
("def concept hello", "hello", nop()),
("def concept hello ", "hello", nop()),
("def concept a+b", "a + b", nop()),
("def concept 'a+b'", "a+b", nop()),
("def concept 'a+b'+c", "a+b + c", nop()),
("def concept 'as if'", "as if", nop()),
("def concept 'as' if", "as if", nop()),
("def concept hello as 'hello'", "hello", ast.Expression(body=ast.Str(s='hello'))),
("def concept hello as 1", "hello", ast.Expression(body=ast.Num(n=1))),
("def concept h as 1 + 1", "h", ast.Expression(ast.BinOp(left=ast.Num(n=1), op=ast.Add(), right=ast.Num(n=1)))),
])
def test_i_can_parse_def_concept(text, expected_name, expected_expr):
parser = DefaultParser(text, PythonParser)
tree = parser.parse()
assert isinstance(tree, DefConceptNode)
assert tree.name == expected_name
if isinstance(tree.body, PythonNode):
assert ast.dump(tree.body.ast) == ast.dump(expected_expr)
else:
assert tree.body == expected_expr
def compare_ast(left, right):
left_as_string = ast.dump(left)
left_as_string = left_as_string.replace(", ctx=Load()", "")
right_as_string = right if isinstance(right, str) else ast.dump(right)
right_as_string = right_as_string.replace(", ctx=Load()", "")
return left_as_string == right_as_string
def test_i_can_parse_complex_def_concept_statement():
text = """def concept a plus b
where a,b
pre isinstance(a, int) and isinstance(b, float)
post isinstance(res, int)
as res = a + b
"""
parser = DefaultParser(text, PythonParser)
tree = parser.parse()
assert not parser.has_error
assert isinstance(tree, DefConceptNode)
assert tree.name == "a plus b"
assert tree.where.source == "a,b"
assert isinstance(tree.where.ast, ast.Expression)
assert tree.pre.source == "isinstance(a, int) and isinstance(b, float)"
assert isinstance(tree.pre.ast, ast.Expression)
assert tree.post.source == "isinstance(res, int)"
assert isinstance(tree.post.ast, ast.Expression)
assert tree.body.source == "res = a + b"
assert isinstance(tree.body.ast, ast.Module)
def test_i_can_use_colon_to_declare_indentation():
text = """
def concept add one to a as:
def func(x):
return x+1
func(a)
"""
parser = DefaultParser(text, PythonParser)
tree = parser.parse()
assert not parser.has_error
assert isinstance(tree, DefConceptNode)
def test_i_can_use_colon_to_declare_indentation2():
text = """
def concept add one to a as:
def func(x):
return x+1
"""
parser = DefaultParser(text, PythonParser)
tree = parser.parse()
assert not parser.has_error
assert isinstance(tree, DefConceptNode)
def test_without_colon_i_get_an_indent_error():
text = """
def concept add one to a as
def func(x):
return x+1
func(a)
"""
parser = DefaultParser(text, PythonParser)
tree = parser.parse()
assert parser.has_error
assert isinstance(tree, DefConceptNode)
assert isinstance(parser.error_sink[0].exception, IndentationError)
def test_i_can_detect_error():
"""
In this test, func(b) is not correctly indented while colon is specified after the 'as' keyword
"""
text = """
def concept add one to a as:
def func(x):
return x+1
func(a)
func(b)
"""
parser = DefaultParser(text, PythonParser)
tree = parser.parse()
assert parser.has_error
assert isinstance(tree, DefConceptNode)
assert isinstance(parser.error_sink[0], UnexpectedTokenErrorNode)
# check that the error is caused by 'func(b)'
assert parser.error_sink[0].tokens[0].line == 6
assert parser.error_sink[0].tokens[0].column == 1
@pytest.mark.parametrize("text, token_found, expected_tokens", [
("def hello as 'hello'", "hello", [Keywords.CONCEPT]),
("def concept as", Keywords.AS, ["<name>"]),
])
def test_i_can_detect_unexpected_token_error_in_def_concept(text, token_found, expected_tokens):
parser = DefaultParser(text, PythonParser)
parser.parse()
assert parser.has_error
assert isinstance(parser.error_sink[0], UnexpectedTokenErrorNode)
assert parser.error_sink[0].tokens[0].value == token_found
assert parser.error_sink[0].expected_tokens == expected_tokens
@pytest.mark.parametrize("text", [
"def concept hello where 1+",
"def concept hello pre 1+",
"def concept hello post 1+",
"def concept hello as 1+"
])
def test_i_can_detect_error_in_declaration(text):
parser = DefaultParser(text, PythonParser)
parser.parse()
assert parser.has_error
assert isinstance(parser.error_sink[0], PythonErrorNode)
+52 -15
View File
@@ -1,12 +1,17 @@
import ast
import pytest
import os
from os import path
import shutil
from core.concept import Concept
from core.concept import Concept, ConceptParts
from core.sheerka import Sheerka
from parsers.DefaultParser import DefConceptNode, DefaultParser
from parsers.PythonParser import PythonParser
tests_root = path.abspath("../build/tests")
root_folder = "init_folder"
@pytest.fixture(autouse=True)
@@ -25,8 +30,6 @@ def init_test():
def test_root_folder_is_created_after_initialization():
root_folder = "init_folder"
return_value = Sheerka().initialize(root_folder)
assert return_value.status, "initialisation should be successful"
assert Sheerka().concept_equals(return_value.value, Sheerka().get_concept("success"))
@@ -34,22 +37,56 @@ def test_root_folder_is_created_after_initialization():
def test_lists_of_concepts_is_initialized():
root_folder = "init_folder"
Sheerka().initialize(root_folder)
assert len(Sheerka().concepts) > 1
def test_null_concept_are_equals():
concept1 = Concept("test1")
concept2 = Concept("test2")
concept3 = Concept("test3")
# def test_null_concept_are_equals():
# concept1 = Concept("test1")
# concept2 = Concept("test2")
# concept3 = Concept("test3")
#
# assert not Sheerka.concept_equals(concept1, None)
# assert not Sheerka.concept_equals(None, concept1)
# assert not Sheerka.concept_equals(concept1, concept2)
# assert not Sheerka.concept_equals(concept1, concept3)
#
# assert Sheerka.concept_equals(None, None)
# assert Sheerka.concept_equals(concept1, concept1)
assert not Sheerka.concept_equals(concept1, None)
assert not Sheerka.concept_equals(None, concept1)
assert not Sheerka.concept_equals(concept1, concept2)
assert not Sheerka.concept_equals(concept1, concept3)
def get_concept():
text = """
def concept a+b
where isinstance(a, int) and isinstance(b, int)
pre isinstance(a, int) and isinstance(b, int)
post isinstance(res, int)
as:
def func(x,y):
return x+y
func(a,b)
"""
parser = DefaultParser(text, PythonParser)
return parser.parse()
assert Sheerka.concept_equals(None, None)
assert Sheerka.concept_equals(concept1, concept1)
def test_i_can_add_a_concept():
concept = get_concept()
sheerka = Sheerka()
sheerka.initialize(root_folder)
res = sheerka.add_concept(concept)
assert res.status
assert res.value == Concept(
name="a + b",
where="isinstance(a, int) and isinstance(b, int)",
pre="isinstance(a, int) and isinstance(b, int)",
post="isinstance(res, int)",
body="def func(x,y):\n return x+y\nfunc(a,b)")
assert isinstance(res.value.codes[ConceptParts.WHERE], ast.Expression)
assert isinstance(res.value.codes[ConceptParts.PRE], ast.Expression)
assert isinstance(res.value.codes[ConceptParts.POST], ast.Expression)
assert isinstance(res.value.codes[ConceptParts.BODY], ast.Module)
# def test_i_cannot_add_the_same_concept_twice():
# concept1 = DefConceptNode(name="concept")
# sheerka = Sheerka
+84 -1
View File
@@ -1,3 +1,5 @@
import hashlib
import pytest
import os
from os import path
@@ -6,6 +8,8 @@ from datetime import date, datetime
import shutil
import json
from sdp.sheerkaSerializer import ObjectSerializer, BaseSerializer, Serializer
tests_root = path.abspath("../build/tests")
@@ -70,6 +74,33 @@ class ObjNoKey:
return f"ObjNoKey({self.a}, {self.b})"
class ObjDumpJson:
def __init__(self, key, value):
self.key = key
self.value = value
def __eq__(self, obj):
return isinstance(obj, ObjDumpJson) and \
self.key == obj.key and \
self.value == obj.value
def __repr__(self):
return f"ObjDumpJson({self.key}, {self.value})"
def get_key(self):
return self.key
def get_digest(self):
return hashlib.sha256(f"Concept:{self.key}{self.value}".encode("utf-8")).hexdigest()
def to_dict(self):
return self.__dict__
def from_dict(self, as_dict):
self.value = as_dict["value"]
self.key = as_dict["key"]
@pytest.fixture(autouse=True)
def init_test():
if path.exists(tests_root):
@@ -570,4 +601,56 @@ def test_i_can_test_than_an_entry_exits():
assert not sdp.exists("entry")
sdp.add(Event("event"), "entry", "value")
assert sdp.exists("entry")
assert sdp.exists("entry")
def test_i_can_save_and_load_object_with_history():
sdp = SheerkaDataProvider(".sheerka")
obj = ObjDumpJson("my_key", "value1")
sdp.serializer.register(ObjectSerializer(BaseSerializer.get_full_qualified_name(obj)))
entry, key = sdp.add_ref("Obj", obj)
loaded = sdp.get(entry, key)
history = getattr(loaded, Serializer.HISTORY)
assert key == obj.key
assert entry == "Obj"
assert loaded.key == obj.key
assert loaded.value == obj.value
assert getattr(history, Serializer.USERNAME) == "kodjo"
assert getattr(history, Serializer.MODIFICATION_DATE) != ""
assert getattr(history, Serializer.PARENTS) == []
assert os.path.exists(sdp.get_obj_path(sdp.ObjectsFolder, obj.get_digest()))
# save a second type with no modification
previous_modification_time = getattr(history, Serializer.MODIFICATION_DATE)
previous_parents = getattr(history, Serializer.PARENTS)
sdp.add_ref("Obj", loaded)
loaded = sdp.get(entry, key)
history = getattr(loaded, Serializer.HISTORY)
assert getattr(history, Serializer.MODIFICATION_DATE) == previous_modification_time
assert getattr(history, Serializer.PARENTS) == previous_parents
# save again, but with a modification
previous_digest = loaded.get_digest()
loaded.value = "value2"
sdp.add_ref("Obj", loaded)
loaded2 = sdp.get(entry, key)
history2 = getattr(loaded, Serializer.HISTORY)
assert loaded2.key == loaded.key
assert loaded2.value == loaded.value
assert getattr(history2, Serializer.USERNAME) == "kodjo"
assert getattr(history2, Serializer.MODIFICATION_DATE) != ""
assert getattr(history2, Serializer.PARENTS) == [previous_digest]
+43 -3
View File
@@ -1,16 +1,56 @@
import pytest
from dataclasses import dataclass
from sdp.sheerkaDataProvider import Event
from sdp.sheerkaSerializer import Serializer
from sdp.sheerkaSerializer import Serializer, ObjectSerializer, SerializerContext, BaseSerializer
from datetime import datetime
@dataclass()
class Obj:
key: str = ""
prop1: str = ""
def from_dict(self, json_object):
self.prop1 = json_object["prop1"]
self.key = json_object["key"]
return self
def to_dict(self):
return self.__dict__
def test_i_can_serialize_an_event():
event = Event("test", user="user", date=datetime.fromisoformat("2019-10-21T10:20:30.999"))
serializer = Serializer()
stream = serializer.serialize(event)
loaded = serializer.deserialize(stream)
stream = serializer.serialize(event, None)
loaded = serializer.deserialize(stream, None)
assert event.version == loaded.version
assert event.user == loaded.user
assert event.date == loaded.date
assert event.message == loaded.message
def test_i_can_serialize_an_object():
obj = Obj("10", "value")
serializer = Serializer()
serializer.register(ObjectSerializer("tests.test_sheerkaSerializer.Obj"))
context = SerializerContext("kodjo", "6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b")
stream = serializer.serialize(obj, context)
loaded = serializer.deserialize(stream, context)
assert getattr(loaded, Serializer.HISTORY)[Serializer.USERNAME] == "kodjo"
assert getattr(loaded, Serializer.HISTORY)[Serializer.MODIFICATION_DATE] != ""
assert getattr(loaded, Serializer.HISTORY)[Serializer.PARENTS] == []
assert loaded.key == "10"
assert loaded.prop1 == "value"
@pytest.mark.parametrize("obj, expected", [
(Obj("10", "value"), "tests.test_sheerkaSerializer.Obj")
])
def test_get_full_qualified_name(obj, expected):
assert expected == BaseSerializer.get_full_qualified_name(obj)