Added DefaultParser

This commit is contained in:
2019-10-29 18:39:51 +01:00
parent 101319b8b6
commit 8107e149b9
18 changed files with 1581 additions and 376 deletions
+73 -10
View File
@@ -1,29 +1,92 @@
import hashlib
from enum import Enum
class ConceptParts(Enum):
WHERE = "where"
PRE = "pre"
POST = "post"
BODY = "body"
class Concept: class Concept:
""" """
Default concept object Default concept object
A concept is a the base object of our universe A concept is a the base object of our universe
Everything is a concept Everything is a concept
""" """
props_to_serialize = ("id", "name", "where", "pre", "post", "body", "desc")
concepts_id = 0 key_name = "concepts"
def __init__(self, name, is_builtin=False): def __init__(self, name=None, is_builtin=False, where=None, pre=None, post=None, body=None, desc=None):
self.name = name self.name = name
self.is_builtin = is_builtin self.is_builtin = is_builtin
self.pre = None # list of pre conditions before calling the main function self.where = where # condition to recognize variables in name
self.post = None # list of post conditions after calling the main function self.pre = pre # list of pre conditions before calling the main function
self.main = None # main method, can also be the value of the concept self.post = post # list of post conditions after calling the main function
self.id = Concept.concepts_id self.body = body # main method, can also be the value of the concept
Concept.concepts_id = Concept.concepts_id + 1 self.desc = desc
self.key = None
self.parent = None
self.props = [] # list of Property for this concept self.props = [] # list of Property for this concept
self.functions = {} # list of helper functions self.functions = {} # list of helper functions
def __str__(self): self.codes = {}
return f"({self.id}){self.name}"
def __repr__(self): def __repr__(self):
return f"({self.id}){self.name}" return f"({self.key}){self.name}"
def __eq__(self, other):
if not isinstance(other, Concept):
return False
return self.name == other.name and \
self.where == other.where and \
self.pre == other.pre and \
self.post == other.post and \
self.body == other.body
def __hash__(self):
return hash(self.name)
def add_codes(self, codes):
"""
From a dict of <ConceptParts, AST>
fill the codes
:param codes:
:return:
"""
possibles_codes = set(item.value for item in ConceptParts)
if codes is None:
return
for key in codes:
if key in possibles_codes:
self.codes[ConceptParts(key)] = codes[key]
def get_digest(self):
"""
Returns the digest of the event
:return: hexa form of the sha256
"""
return hashlib.sha256(f"Concept:{self.name}{self.pre}{self.post}{self.body}".encode("utf-8")).hexdigest()
def to_dict(self):
props_as_dict = dict((prop, getattr(self, prop)) for prop in self.props_to_serialize)
return props_as_dict
def from_dict(self, as_dict):
for prop in self.props_to_serialize:
setattr(self, prop, as_dict[prop])
return self
class ErrorConcept(Concept):
def __init__(self, where=None, pre=None, post=None, body=None, desc=None):
Concept.__init__(self, "error", is_builtin=True, where=where, pre=pre, post=post, body=body, desc=desc)
def __repr__(self):
return f"{self.name} : {self.body}"
class Property: class Property:
+49 -9
View File
@@ -1,8 +1,9 @@
import os
from dataclasses import dataclass from dataclasses import dataclass
from core.concept import Concept from core.concept import Concept, ErrorConcept
from sdp.sheerkaDataProvider import SheerkaDataProvider from parsers.PythonParser import PythonParser
from sdp.sheerkaDataProvider import SheerkaDataProvider, Event
from parsers.DefaultParser import DefaultParser, DefConceptNode
class Singleton(type): class Singleton(type):
@@ -54,6 +55,7 @@ class Sheerka(Concept, metaclass=Singleton):
self.create_builtin_concepts() self.create_builtin_concepts()
self.sdp = None self.sdp = None
self.parsers = []
def create_builtin_concepts(self): def create_builtin_concepts(self):
""" """
@@ -76,11 +78,38 @@ class Sheerka(Concept, metaclass=Singleton):
try: try:
self.sdp = SheerkaDataProvider(root_folder) self.sdp = SheerkaDataProvider(root_folder)
self.parsers.append(lambda text: DefaultParser(text, PythonParser))
except IOError as e: except IOError as e:
return ReturnValue(False, self.get_concept(Sheerka.ERROR_CONCEPT_NAME, True), e) return ReturnValue(False, self.get_concept(Sheerka.ERROR_CONCEPT_NAME, True), e)
return ReturnValue(True, self.get_concept(Sheerka.SUCCESS_CONCEPT_NAME, True)) return ReturnValue(True, self.get_concept(Sheerka.SUCCESS_CONCEPT_NAME, True))
def eval(self, text):
#evt_digest = self.sdp.save_event(Event(text))
result = self.try_parse(text)
return_values = []
for parser_name, status, node in result:
if not status:
return_values.append(ReturnValue(False, ErrorConcept(body=node)))
elif status and isinstance(node, DefConceptNode):
return_values.append(self.add_concept(node))
return return_values
def try_parse(self, text):
result = []
for parser in self.parsers:
p = parser(text)
# try:
# tree = p.parse()
# result.append((p.name, tree))
# except Exception as e:
# result.append((p.name, e))
tree = p.parse()
result.append((p.name, not p.has_error, p.error_sink if p.has_error else tree))
return result
def get_concept(self, name, is_builtin=False): def get_concept(self, name, is_builtin=False):
""" """
Given a concept name, tries to find it Given a concept name, tries to find it
@@ -93,6 +122,22 @@ class Sheerka(Concept, metaclass=Singleton):
return concept return concept
return self.concepts[1] return self.concepts[1]
def add_concept(self, def_concept_node: DefConceptNode):
"""
Adds a new concept to the system
:param def_concept_node: DefConceptNode
:return: digest of the new concept
"""
concept = Concept(def_concept_node.name)
for prop in ("where", "pre", "post", "body"):
concept_part_node = getattr(def_concept_node, prop)
value = concept_part_node.source if hasattr(concept_part_node, "source") else ""
setattr(concept, prop, value)
concept.add_codes(def_concept_node.get_codes())
return ReturnValue(True, concept)
@staticmethod @staticmethod
def concept_equals(concept1, concept2): def concept_equals(concept1, concept2):
"""True if the two concepts refer to the same concept""" """True if the two concepts refer to the same concept"""
@@ -102,9 +147,4 @@ class Sheerka(Concept, metaclass=Singleton):
if concept1 is None or concept2 is None: if concept1 is None or concept2 is None:
return False return False
return concept1.id == concept2.id return concept1.key == concept2.key
def record_event(self, event):
self.sdp.save_event(event)
+1 -1
View File
@@ -84,7 +84,7 @@ concept is_the_opposite:
a, b a, b
test: test:
a.pre == not b.pre && a.post == b.post a.pre == not b.pre && a.post == not b.post
print all concepts print all concepts
+19
View File
@@ -18,3 +18,22 @@ def concept a is a number as :
--> adds concept a is a number --> adds concept a is a number
--> add the pre condition to the concept a plus b --> add the pre condition to the concept a plus b
``` ```
# Define a new concept in one line
```
def concept words
def concept words [where whereclause] [as expression] [pre precond] [post postcond]
```
# Define a complicated concept
```
def concept
as:
...
where:
...
pre:
...
post:
...
```
+17
View File
@@ -0,0 +1,17 @@
```
> "hello
-> unfinished quote "
> def concept unfinished quote q
... where:
...... q in ('"', '"')
... desc:
...... "Error detected by the default parser where the trailing quote is missing"
... input = sheerka.last_input
> when unfinished quote q as c:
... add rule as:
...... if q in sheerka.input:
......... sheerka.resume(c, c.input + input)
......... remove rule
```
+3 -5
View File
@@ -1,7 +1,7 @@
import sys import sys
from core.utils import sysarg_to_string from core.utils import sysarg_to_string
from core.sheerka import Sheerka from core.sheerka import Sheerka
from sdp.sheerkaDataProvider import Event
def main(): def main():
@@ -10,12 +10,10 @@ def main():
# first, record the event # first, record the event
event_as_string = sysarg_to_string(sys.argv[1:]) event_as_string = sysarg_to_string(sys.argv[1:])
evt_digest = sheerka.record_event(Event(event_as_string)) result = sheerka.eval(event_as_string)
# launch the parsers
# execute the concepts # execute the concepts
print(event_as_string) print(result)
return True return True
+41
View File
@@ -0,0 +1,41 @@
from dataclasses import dataclass, field
from parsers.tokenizer import TokenKind, Keywords
@dataclass()
class Node:
pass
@dataclass()
class NopNode(Node):
pass
def __repr__(self):
return "nop"
@dataclass()
class ErrorNode(Node):
pass
class BaseParser:
def __init__(self, name, text):
self.name = name
self.text = text
self.has_error = False
self.error_sink = []
def parse(self):
pass
@staticmethod
def get_text_from_tokens(tokens):
if tokens is None:
return ""
res = ""
for token in tokens:
value = Keywords(token.value).value if token.type == TokenKind.KEYWORD else token.value
res += value
return res
+383
View File
@@ -0,0 +1,383 @@
from parsers.BaseParser import BaseParser, Node, NopNode, ErrorNode
from parsers.tokenizer import Tokenizer, TokenKind, Token, Keywords
from dataclasses import dataclass, field
@dataclass()
class DefaultParserNode(Node):
tokens: list = field(compare=False)
def is_same(self, other):
if type(self) != type(other):
return False
if hasattr(self, "value") and self.value != other.value:
return False
return True
@dataclass()
class DefaultParserErrorNode(DefaultParserNode, ErrorNode):
pass
@dataclass()
class UnexpectedTokenErrorNode(DefaultParserErrorNode):
message: str
expected_tokens: list
@dataclass()
class SyntaxErrorNode(DefaultParserErrorNode):
message: str
pass
@dataclass()
class DefConceptNode(DefaultParserNode):
name: str
where: Node = None
pre: Node = None
post: Node = None
body: Node = NopNode
def get_codes(self):
codes = {}
for prop in ["where", "pre", "post", "body"]:
prop_value = getattr(self, prop)
if hasattr(prop_value, "ast"):
codes[prop] = prop_value.ast
return codes
@dataclass()
class NumberNode(DefaultParserNode):
value: object
def __repr__(self):
return str(self.value)
@dataclass()
class StringNode(DefaultParserNode):
value: str
quote: str
def is_same(self, other):
if not super(StringNode, self).is_same(other):
return False
return self.quote == other.quote
def __repr__(self):
return self.quote + self.value + self.quote
@dataclass()
class VariableNode(DefaultParserNode):
value: str
def __repr__(self):
return self.value
@dataclass()
class TrueNode(DefaultParserNode):
pass
def __repr__(self):
return "true"
@dataclass()
class FalseNode(DefaultParserNode):
pass
def __repr__(self):
return "false"
@dataclass()
class NullNode(DefaultParserNode):
pass
def __repr__(self):
return "null"
@dataclass()
class BinaryNode(DefaultParserNode):
operator: TokenKind
left: Node
right: Node
def is_same(self, other):
if not super(BinaryNode, self).is_same(other):
return False
if self.operator != other.operator:
return False
if not self.left.is_same(other.left):
return False
return self.right.is_same(other.right)
def __repr__(self):
return f"({self.left} {self.operator} {self.right})"
class DefaultParser(BaseParser):
def __init__(self, text, sub_parser):
BaseParser.__init__(self, "Default", text)
self.sub_parser = sub_parser
self.lexer = Tokenizer(text)
self.lexer_iter = iter(Tokenizer(text))
self._current = None
self.next_token()
def collect_tokens(self, *args):
result = []
for item in args:
if isinstance(item, Node):
result.extend(item.tokens)
else:
result.append(item)
return result
def add_error(self, error, next_token=True):
self.has_error = True
self.error_sink.append(error)
if next_token:
self.next_token()
return error
def get_token(self) -> Token:
return self._current
def next_token(self, skip_whitespace=True):
try:
self._current = next(self.lexer_iter)
if skip_whitespace:
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
self._current = next(self.lexer_iter)
except StopIteration:
self._current = None
return
@staticmethod
def get_concept_name(tokens):
name = ""
first = True
for token in tokens:
if token.type == TokenKind.EOF:
break
if not first:
name += " "
name += token.value[1:-1] if token.type == TokenKind.STRING else token.value
first = False
return name
@staticmethod
def fix_indentation(tokens):
"""
In the following example
def concept add one to a as:
def func(x):
return x+1
func(a)
indentations in front of 'def func(x)', 'return x+1' and 'func(a)' must be fixed to avoid a python syntax error
:param tokens:
:return:
"""
if tokens[1].type != TokenKind.COLON:
return tokens[1:]
if len(tokens) < 3:
return UnexpectedTokenErrorNode(tokens[0:2], "Unexpected end of file", [TokenKind.NEWLINE])
if tokens[2].type != TokenKind.NEWLINE:
return UnexpectedTokenErrorNode([tokens[2]], "Unexpected token after colon", [TokenKind.NEWLINE])
if tokens[3].type != TokenKind.WHITESPACE:
return SyntaxErrorNode([tokens[3]], "Indentation not found")
indent_size = len(tokens[3].value)
# now fix the other indentations
i = 4
while i < len(tokens) - 1:
if tokens[i].type == TokenKind.NEWLINE:
if tokens[i + 1].type != TokenKind.WHITESPACE:
return UnexpectedTokenErrorNode([tokens[i + 1]], "Unexpected token", [TokenKind.WHITESPACE])
if len(tokens[i + 1].value) < indent_size:
return SyntaxErrorNode([tokens[i + 1]], "Invalid indentation.")
tokens[i + 1].value = " " * (len(tokens[i + 1].value) - indent_size)
i += 1
return tokens[4:]
def parse(self):
return self.parse_statement()
def parse_statement(self):
token = self.get_token()
if token.value == Keywords.DEF:
self.next_token()
return self.parse_def_concept()
else:
return self.parse_expression()
def parse_def_concept(self):
"""
def concept name [where xxx] [pre xxx] [post xxx] [as xxx]
"""
def_concept_parts = [Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST]
token = self.get_token()
if token.value != Keywords.CONCEPT:
return self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT]))
self.next_token()
token = self.get_token()
if token.value in (Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST):
return self.add_error(UnexpectedTokenErrorNode([token], "Concept name is missing.", ["<name>"]))
name_as_tokens = []
while token.type != TokenKind.EOF and token.value not in def_concept_parts:
name_as_tokens.append(token)
self.next_token()
token = self.get_token()
name = self.get_concept_name(name_as_tokens)
# try to parse as, where, pre and post declarations
tokens = {
Keywords.AS: None,
Keywords.WHERE: None,
Keywords.PRE: None,
Keywords.POST: None,
}
current_part = None
while token.type != TokenKind.EOF:
if token.value in def_concept_parts:
keyword = token.value
if tokens[keyword]:
return self.add_error(SyntaxErrorNode([token], f"Too many '{keyword.value}' declarations."))
tokens[keyword] = [token] # first element of the list is the keyword
current_part = keyword
self.next_token()
else:
if current_part is None:
return self.add_error(UnexpectedTokenErrorNode([token], "Unexpected token", def_concept_parts))
else:
tokens[current_part].append(token)
self.next_token(False)
token = self.get_token()
asts = {
Keywords.AS: NopNode(),
Keywords.WHERE: NopNode(),
Keywords.PRE: NopNode(),
Keywords.POST: NopNode(),
}
# check for empty declarations
for keyword in tokens:
current_tokens = tokens[keyword]
if current_tokens is not None:
if len(current_tokens) == 0: # only one element means empty decl
return self.add_error(SyntaxErrorNode([current_tokens[0]], "Empty declaration"), False)
else:
current_tokens = self.fix_indentation(current_tokens)
if isinstance(current_tokens, ErrorNode):
self.add_error(current_tokens)
continue
# start = current_tokens[0].index
# end = current_tokens[-1].index + len(current_tokens[-1].value)
sub_parser = self.sub_parser(current_tokens, source=keyword.value)
sub_tree = sub_parser.parse()
if isinstance(sub_tree, ErrorNode):
self.add_error(sub_tree, False)
asts[keyword] = sub_tree
return DefConceptNode([], name,
asts[Keywords.WHERE],
asts[Keywords.PRE],
asts[Keywords.POST],
asts[Keywords.AS])
def parse_expression(self):
return self.parse_addition()
def parse_addition(self):
left = self.parse_multiply()
token = self.get_token()
if token is None or token.type == TokenKind.EOF:
return left
if token.type == TokenKind.NUMBER: # example 15 +5 or 15 -5
right = self.parse_addition()
return BinaryNode(self.collect_tokens(left, token, right), TokenKind.PLUS, left, right)
if token.type not in (TokenKind.PLUS, TokenKind.MINUS):
return left
self.next_token()
right = self.parse_addition()
return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right)
def parse_multiply(self):
left = self.parse_atom()
token = self.get_token()
if token is None or token.type == TokenKind.EOF:
return left
if token.type not in (TokenKind.STAR, TokenKind.SLASH):
return left
self.next_token()
right = self.parse_multiply()
return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right)
def parse_atom(self):
token = self.get_token()
if token.type == TokenKind.NUMBER:
self.next_token()
return NumberNode([token], float(token.value) if '.' in token.value else int(token.value))
elif token.type == TokenKind.STRING:
self.next_token()
return StringNode([token], token.value[1:-1], token.value[0])
elif token.type == TokenKind.IDENTIFIER:
if token.value == "true":
self.next_token()
return TrueNode([token])
elif token.value == "false":
self.next_token()
return FalseNode([token])
elif token.value == "null":
self.next_token()
return NullNode([token])
else:
self.next_token()
return VariableNode([token], token.value)
elif token.type == TokenKind.LPAR:
self.next_token()
exp = self.parse_expression()
token = self.get_token()
self.next_token()
if token.type != TokenKind.RPAR:
error = UnexpectedTokenErrorNode([token], "Right parenthesis not found.", [TokenKind.RPAR])
self.add_error(error)
return error
return exp
else:
error = UnexpectedTokenErrorNode([token], "Unexpected token",
[TokenKind.NUMBER, TokenKind.STRING, TokenKind.IDENTIFIER, "true", "false",
"null", TokenKind.LPAR])
return self.add_error(error)
+75
View File
@@ -0,0 +1,75 @@
from parsers.BaseParser import BaseParser, Node, ErrorNode
from dataclasses import dataclass
import ast
import copy
@dataclass()
class PythonErrorNode(ErrorNode):
source: str
exception: Exception
@dataclass()
class PythonNode(Node):
source: str
ast: ast.AST
def __repr__(self):
return "PythonNode(" + ast.dump(self.ast) + ")"
#return "PythonNode(" + self.source + ")"
class PythonParser(BaseParser):
def __init__(self, text, source="<undef>"):
text = text if isinstance(text, str) else self.get_text_from_tokens(text)
text = text.strip()
BaseParser.__init__(self, "PythonParser", text)
self.source = source
def parse(self):
# first, try to parse an expression
res, tree, error = self.try_parse_expression()
if not res:
# then try to parse a statement
res, tree, error = self.try_parse_statement()
if not res:
self.has_error = True
error_node = PythonErrorNode(self.text, error)
self.error_sink.append(error_node)
return error_node
return PythonNode(self.text, tree)
def try_parse_expression(self):
try:
return True, ast.parse(self.text, f"<{self.source}>", 'eval'), None
except Exception as error:
return False, None, error
def try_parse_statement(self):
try:
return True, ast.parse(self.text, f"<{self.source}>", 'exec'), None
except Exception as error:
return False, None, error
def expr_to_expression(self, expr):
expr.lineno = 0
expr.col_offset = 0
result = ast.Expression(expr.value, lineno=0, col_offset=0)
return result
def exec_with_return(self, code):
code_ast = ast.parse(code)
init_ast = copy.deepcopy(code_ast)
init_ast.body = code_ast.body[:-1]
last_ast = copy.deepcopy(code_ast)
last_ast.body = code_ast.body[-1:]
exec(compile(init_ast, "<ast>", "exec"), globals())
if type(last_ast.body[0]) == ast.Expr:
return eval(compile(self.expr_to_expression(last_ast.body[0]), "<ast>", "eval"), globals())
else:
exec(compile(last_ast, "<ast>", "exec"), globals())
-249
View File
@@ -1,249 +0,0 @@
from dataclasses import dataclass
@dataclass(frozen=True)
class Token:
type: str
value: str
index: int
line: int
column: int
@dataclass(frozen=True)
class LexerError(Exception):
message: str
text: str
index: int
line: int
column: int
class Tokens:
EOF = "eof"
WHITESPACE = "whitespace"
NEWLINE = "newline"
KEYWORD = "keyword"
IDENTIFIER = "identifier"
STRING = "string"
NUMBER = "number"
TRUE = "true"
FALSE = "false"
LPAR = "lpar"
RPAR = "rpar"
LBRACKET = "lbrace"
RBRACKET = "rbracket"
LBRACE = "lbrace"
RBRACE = "rbrace"
PLUS = "plus"
MINUS = "minus"
STAR = "star"
SLASH = "slash"
PERCENT = "percent"
COMMA = "comma"
SEMICOLON = "semicolon"
COLON = "colon"
DOT = "dot"
QMARK = "qmark"
VBAR = "vbar"
AMPER = "amper"
class TokenIter:
KEYWORDS = ("def", "concept", "as", "pre", "post")
"""
Class that can iterate on the tokens
"""
def __init__(self, text):
self.text = text
self.text_len = len(text)
def __iter__(self):
i = 0
line = 1
column = 1
while i < self.text_len:
c = self.text[i]
if c == "+":
yield Token(Tokens.PLUS, "+", i, line, column)
i += 1
column += 1
elif c == "-":
if i + 1 < self.text_len and self.text[i + 1].isdigit():
number = self.eat_number(i)
yield Token(Tokens.NUMBER, number, i, line, column)
i += len(number)
column += len(number)
else:
yield Token(Tokens.MINUS, "-", i, line, column)
i += 1
column += 1
elif c == "/":
yield Token(Tokens.SLASH, "/", i, line, column)
i += 1
column += 1
elif c == "*":
yield Token(Tokens.STAR, "*", i, line, column)
i += 1
column += 1
elif c == "{":
yield Token(Tokens.LBRACE, "{", i, line, column)
i += 1
column += 1
elif c == "}":
yield Token(Tokens.RBRACE, "}", i, line, column)
i += 1
column += 1
elif c == "(":
yield Token(Tokens.LPAR, "(", i, line, column)
i += 1
column += 1
elif c == ")":
yield Token(Tokens.RPAR, ")", i, line, column)
i += 1
column += 1
elif c == "[":
yield Token(Tokens.LBRACKET, "[", i, line, column)
i += 1
column += 1
elif c == "]":
yield Token(Tokens.RBRACKET, "]", i, line, column)
i += 1
column += 1
elif c == " " or c == "\t":
whitespace = self.eat_whitespace(i)
yield Token(Tokens.WHITESPACE, whitespace, i, line, column)
i += len(whitespace)
column += len(whitespace)
elif c == ",":
yield Token(Tokens.COMMA, ",", i, line, column)
i += 1
column += 1
elif c == ".":
yield Token(Tokens.DOT, ".", i, line, column)
i += 1
column += 1
elif c == ";":
yield Token(Tokens.SEMICOLON, ";", i, line, column)
i += 1
column += 1
elif c == ":":
yield Token(Tokens.COLON, ":", i, line, column)
i += 1
column += 1
elif c == "?":
yield Token(Tokens.QMARK, "?", i, line, column)
i += 1
column += 1
elif c == "\n" or c == "\r":
newline = self.eat_newline(i)
yield Token(Tokens.NEWLINE, newline, i, line, column)
i += len(newline)
column = 1
line += 1
elif c.isalpha() or c == "_":
identifier = self.eat_identifier(i)
type = Tokens.KEYWORD if identifier in self.KEYWORDS else Tokens.IDENTIFIER
yield Token(type, identifier, i, line, column)
i += len(identifier)
column += len(identifier)
elif c.isdigit():
number = self.eat_number(i)
yield Token(Tokens.NUMBER, number, i, line, column)
i += len(number)
column += len(number)
elif c == "'" or c == '"':
string, newlines = self.eat_string(i)
yield Token(Tokens.STRING, string, i, line, column)
i += len(string)
column = 1 if newlines > 0 else column + len(string)
line += newlines
else:
raise LexerError(f"Unknown token '{c}'", self.text, i, line, column)
yield Token(Tokens.EOF, "", i, line, column)
def eat_whitespace(self, start):
result = self.text[start]
i = start + 1
while i < self.text_len:
c = self.text[i]
if c == " " or c == "\t":
result += c
i += 1
else:
break
return result
def eat_newline(self, start):
if start + 1 == self.text_len:
return self.text[start]
current = self.text[start]
next = self.text[start + 1]
if current == "\n" and next == "\r" or current == "\r" and next == "\n":
return current + next
return current
def eat_identifier(self, start):
result = self.text[start]
i = start + 1
while i < self.text_len:
c = self.text[i]
if c.isalpha() or c == "_" or c == "-" or c.isdigit():
result += c
i += 1
else:
break
return result
def eat_number(self, start):
result = self.text[start]
i = start + 1
while i < self.text_len:
c = self.text[i]
if c.isdigit() or c == ".":
result += c
i += 1
else:
break
return result
def eat_string(self, start):
quote = self.text[start]
result = self.text[start]
lines_count = 0
i = start + 1
escape = False
newline = None
while i < self.text_len:
c = self.text[i]
result += c
i += 1
if newline:
lines_count += 1
newline = c if c == newline else None
else:
if c == "\r" or c == "\n":
newline = c
if c == "\\":
escape = True
elif c == quote and not escape:
break
else:
escape = False
if newline:
lines_count += 1
return result, lines_count
+297
View File
@@ -0,0 +1,297 @@
from dataclasses import dataclass
from enum import Enum
class TokenKind(Enum):
EOF = "eof"
WHITESPACE = "whitespace"
NEWLINE = "newline"
KEYWORD = "keyword"
IDENTIFIER = "identifier"
STRING = "string"
NUMBER = "number"
TRUE = "true"
FALSE = "false"
LPAR = "lpar"
RPAR = "rpar"
LBRACKET = "lbrace"
RBRACKET = "rbracket"
LBRACE = "lbrace"
RBRACE = "rbrace"
PLUS = "plus"
MINUS = "minus"
STAR = "star"
SLASH = "slash"
PERCENT = "percent"
COMMA = "comma"
SEMICOLON = "semicolon"
COLON = "colon"
DOT = "dot"
QMARK = "qmark"
VBAR = "vbar"
AMPER = "amper"
EQUALS = "="
@dataclass()
class Token:
type: TokenKind
value: object
index: int
line: int
column: int
@dataclass()
class LexerError(Exception):
message: str
text: str
index: int
line: int
column: int
class Keywords(Enum):
DEF = "def"
CONCEPT = "concept"
AS = "as"
WHERE = "where"
PRE = "pre"
POST = "post"
class Tokenizer:
"""
Class that can iterate on the tokens
"""
KEYWORDS = set(x.value for x in Keywords)
def __init__(self, text):
self.text = text
self.text_len = len(text)
self.column = 1
self.line = 1
self.i = 0
def __iter__(self):
while self.i < self.text_len:
c = self.text[self.i]
if c == "+":
if self.i + 1 < self.text_len and self.text[self.i + 1].isdigit():
number = self.eat_number(self.i)
yield Token(TokenKind.NUMBER, number, self.i, self.line, self.column)
self.i += len(number)
self.column += len(number)
else:
yield Token(TokenKind.PLUS, "+", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "-":
if self.i + 1 < self.text_len and self.text[self.i + 1].isdigit():
number = self.eat_number(self.i)
yield Token(TokenKind.NUMBER, number, self.i, self.line, self.column)
self.i += len(number)
self.column += len(number)
else:
yield Token(TokenKind.MINUS, "-", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "/":
yield Token(TokenKind.SLASH, "/", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "*":
yield Token(TokenKind.STAR, "*", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "{":
yield Token(TokenKind.LBRACE, "{", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "}":
yield Token(TokenKind.RBRACE, "}", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "(":
yield Token(TokenKind.LPAR, "(", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == ")":
yield Token(TokenKind.RPAR, ")", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "[":
yield Token(TokenKind.LBRACKET, "[", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "]":
yield Token(TokenKind.RBRACKET, "]", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "=":
yield Token(TokenKind.EQUALS, "=", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == " " or c == "\t":
whitespace = self.eat_whitespace(self.i)
yield Token(TokenKind.WHITESPACE, whitespace, self.i, self.line, self.column)
self.i += len(whitespace)
self.column += len(whitespace)
elif c == ",":
yield Token(TokenKind.COMMA, ",", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == ".":
yield Token(TokenKind.DOT, ".", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == ";":
yield Token(TokenKind.SEMICOLON, ";", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == ":":
yield Token(TokenKind.COLON, ":", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "?":
yield Token(TokenKind.QMARK, "?", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "\n" or c == "\r":
newline = self.eat_newline(self.i)
yield Token(TokenKind.NEWLINE, newline, self.i, self.line, self.column)
self.i += len(newline)
self.column = 1
self.line += 1
elif c.isalpha() or c == "_":
identifier = self.eat_identifier(self.i)
token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
value = Keywords(identifier) if identifier in self.KEYWORDS else identifier
yield Token(token_type, value, self.i, self.line, self.column)
self.i += len(identifier)
self.column += len(identifier)
elif c.isdigit():
number = self.eat_number(self.i)
yield Token(TokenKind.NUMBER, number, self.i, self.line, self.column)
self.i += len(number)
self.column += len(number)
elif c == "'" or c == '"':
string, newlines = self.eat_string(self.i, self.line, self.column)
yield Token(TokenKind.STRING, string, self.i, self.line, self.column)
self.i += len(string)
self.column = 1 if newlines > 0 else self.column + len(string)
self.line += newlines
else:
raise LexerError(f"Unknown token '{c}'", self.text, self.i, self.line, self.column)
yield Token(TokenKind.EOF, "", self.i, self.line, self.column)
def eat_whitespace(self, start):
result = self.text[start]
i = start + 1
while i < self.text_len:
c = self.text[i]
if c == " " or c == "\t":
result += c
i += 1
else:
break
return result
def eat_newline(self, start):
if start + 1 == self.text_len:
return self.text[start]
current = self.text[start]
next = self.text[start + 1]
if current == "\n" and next == "\r" or current == "\r" and next == "\n":
return current + next
return current
def eat_identifier(self, start):
result = self.text[start]
i = start + 1
while i < self.text_len:
c = self.text[i]
if c.isalpha() or c == "_" or c == "-" or c.isdigit():
result += c
i += 1
else:
break
return result
def eat_number(self, start):
result = self.text[start]
i = start + 1
while i < self.text_len:
c = self.text[i]
if c.isdigit() or c == ".":
result += c
i += 1
else:
break
return result
def eat_string(self, start_index, start_line, start_column):
quote = self.text[start_index]
result = self.text[start_index]
lines_count = 0
i = start_index + 1
escape = False
newline = None
while i < self.text_len:
c = self.text[i]
result += c
i += 1
if newline:
lines_count += 1
newline = c if c == newline else None
else:
if c == "\r" or c == "\n":
newline = c
if c == "\\":
escape = True
elif c == quote and not escape:
break
else:
escape = False
# add trailing new line if needed
if newline:
lines_count += 1
if result[-1] != quote:
raise LexerError("Missing Trailing quote", result, i, start_line + lines_count,
1 if lines_count > 0 else start_column + len(result))
return result, lines_count
def seek(self, words):
if self.i == self.text_len:
return 0
# init
offsets = {}
start_index = self.i
buffer = ""
while self.i < self.text_len:
c = self.text[self.i]
# skip white space
if c in (" ", "\t"):
self.i += 1
continue
for word in words:
if c == word[offset]:
os
+34
View File
@@ -1,5 +1,39 @@
# How to serialize ? # How to serialize ?
## General rule
- 1 byte : type of object code - 1 byte : type of object code
- int : version of the encoder - int : version of the encoder
- data : can be the json representation of the object - data : can be the json representation of the object
### Current supported types
- E : events
- O : object (with history management)
- P : pickle
## How concepts are serialized ?
- get the id of the concept
- get the hash of the concept > it will be its unique key
structure of the serialisation:
```json
{
"id" : "id",
"parent": <hash code of the previous version of the concept> or "",
"name": <name of the concept>,
"where": "",
"pre": "",
"post": "",
"body": "",
"desc": "",
...
}
```
## Idea to manage ObjectSerializer
Problem:
During serialization, there is no issue. The match() method is the unique way to get the correct serialier.
During the deserialisation, all Object serializer have type = '0' and version = 1.
So how to choose the correct one ?
A possible solution will be to add the type of the object to deserialize to the saved stream
--> SHA256 for every object. Too much data saved.
The id is to let to inc the version automatically in the Serialiser (during the registration) and to keep the mapping within sdp.state
+41 -13
View File
@@ -4,7 +4,7 @@ from datetime import datetime, date
import hashlib import hashlib
import json import json
import zlib import zlib
from sdp.sheerkaSerializer import Serializer from sdp.sheerkaSerializer import Serializer, SerializerContext
def json_default_converter(o): def json_default_converter(o):
@@ -38,15 +38,15 @@ class Event(object):
if not isinstance(self.message, str): if not isinstance(self.message, str):
raise NotImplementedError raise NotImplementedError
return hashlib.sha256(f"{self.user}{self.date}{self.message}".encode("utf-8")).hexdigest() return hashlib.sha256(f"Event:{self.user}{self.date}{self.message}".encode("utf-8")).hexdigest()
def to_json(self): def to_dict(self):
return json.dumps(self.__dict__, default=json_default_converter) return self.__dict__
def from_json(self, json_message): def from_dict(self, as_dict):
self.user = json_message["user"] self.user = as_dict["user"]
self.date = datetime.fromisoformat(json_message["date"]) self.date = datetime.fromisoformat(as_dict["date"])
self.message = json_message["message"] self.message = as_dict["message"]
class State: class State:
@@ -120,6 +120,7 @@ class SheerkaDataProvider:
EventFolder = "events" EventFolder = "events"
StateFolder = "state" StateFolder = "state"
ObjectsFolder = "objects"
CacheFolder = "cache" CacheFolder = "cache"
HeadFile = "HEAD" HeadFile = "HEAD"
KeysFile = "keys" KeysFile = "keys"
@@ -135,6 +136,9 @@ class SheerkaDataProvider:
self.serializer = Serializer() self.serializer = Serializer()
def get_obj_path(self, object_type, digest):
path.join(self.root, object_type, digest[:24], digest)
def add(self, event: Event, entry, obj): def add(self, event: Event, entry, obj):
""" """
Adds obj to the entry 'entry' Adds obj to the entry 'entry'
@@ -366,7 +370,7 @@ class SheerkaDataProvider:
os.makedirs(path.dirname(target_path)) os.makedirs(path.dirname(target_path))
with open(target_path, "wb") as f: with open(target_path, "wb") as f:
f.write(self.serializer.serialize(event).read()) f.write(self.serializer.serialize(event, None).read())
return digest return digest
@@ -378,7 +382,7 @@ class SheerkaDataProvider:
""" """
target_path = path.join(self.root, SheerkaDataProvider.EventFolder, digest[:24], digest) target_path = path.join(self.root, SheerkaDataProvider.EventFolder, digest[:24], digest)
with open(target_path, "rb") as f: with open(target_path, "rb") as f:
return self.serializer.deserialize(f) return self.serializer.deserialize(f, None)
def save_state(self, state: State): def save_state(self, state: State):
digest = state.get_digest() digest = state.get_digest()
@@ -390,7 +394,7 @@ class SheerkaDataProvider:
os.makedirs(path.dirname(target_path)) os.makedirs(path.dirname(target_path))
with open(target_path, "wb") as f: with open(target_path, "wb") as f:
f.write(self.serializer.serialize(state).read()) f.write(self.serializer.serialize(state, None).read())
return digest return digest
@@ -400,7 +404,32 @@ class SheerkaDataProvider:
target_path = path.join(self.root, SheerkaDataProvider.StateFolder, digest[:24], digest) target_path = path.join(self.root, SheerkaDataProvider.StateFolder, digest[:24], digest)
with open(target_path, "rb") as f: with open(target_path, "rb") as f:
return self.serializer.deserialize(f) return self.serializer.deserialize(f, None)
def save_obj(self, obj):
if hasattr(obj, "key") and hasattr(obj, "key_name") and obj.key is None:
obj.key = self.get_next_key(obj.key_name)
digest = obj.get_digest()
target_path = path.join(self.root, SheerkaDataProvider.ObjectsFolder, digest[:24], digest)
if path.exists(target_path):
return digest
if not path.exists(path.dirname(target_path)):
os.makedirs(path.dirname(target_path))
with open(target_path, "wb") as f:
f.write(self.serializer.serialize(obj, SerializerContext("kodjo", digest)).read())
return digest
def load_obj(self, digest):
if digest is None:
return State()
target_path = path.join(self.root, SheerkaDataProvider.ObjectsFolder, digest[:24], digest)
with open(target_path, "rb") as f:
return self.serializer.deserialize(f, SerializerContext("kodjo", digest))
def get_cache_params(self, category, key): def get_cache_params(self, category, key):
digest = hashlib.sha3_256(f"{category}:{key}".encode("utf-8")).hexdigest() digest = hashlib.sha3_256(f"{category}:{key}".encode("utf-8")).hexdigest()
@@ -507,4 +536,3 @@ class SheerkaDataProvider:
keys[entry] = value keys[entry] = value
self.save_keys(keys) self.save_keys(keys)
return str(value) return str(value)
+84 -26
View File
@@ -3,6 +3,7 @@ import pickle
import datetime import datetime
import struct import struct
import io import io
from dataclasses import dataclass
def json_default_converter(o): def json_default_converter(o):
@@ -17,15 +18,27 @@ def json_default_converter(o):
return o.isoformat() return o.isoformat()
@dataclass()
class SerializerContext:
user_name: str
origin: str
class Serializer: class Serializer:
HEADER_FORMAT = "cH" HEADER_FORMAT = "cH"
USERNAME = "user_name" # key to store user that as committed the snapshot
MODIFICATION_DATE = "modification_date" #
PARENTS = "parents"
ORIGIN = "origin"
HISTORY = "##history##"
def __init__(self): def __init__(self):
self._cache = [] self._cache = []
# add builtin serializers # add builtin serializers
self._cache.append(EventSerializer()) self.register(EventSerializer())
self._cache.append(PickleSerializer()) self.register(PickleSerializer())
self.register(ConceptSerializer())
def register(self, serializer): def register(self, serializer):
""" """
@@ -35,9 +48,10 @@ class Serializer:
""" """
self._cache.append(serializer) self._cache.append(serializer)
def serialize(self, obj): def serialize(self, obj, context):
""" """
Get the stream representation of an object Get the stream representation of an object
:param context:
:param obj: :param obj:
:return: :return:
""" """
@@ -52,11 +66,12 @@ class Serializer:
header = struct.pack(Serializer.HEADER_FORMAT, bytes(serializer.name, "utf-8"), serializer.version) header = struct.pack(Serializer.HEADER_FORMAT, bytes(serializer.name, "utf-8"), serializer.version)
stream.write(header) stream.write(header)
return serializer.dump(stream, obj) return serializer.dump(stream, obj, context)
def deserialize(self, stream): def deserialize(self, stream, context):
""" """
Loads an object from its stream representation Loads an object from its stream representation
:param context:
:param stream: :param stream:
:return: :return:
""" """
@@ -67,7 +82,7 @@ class Serializer:
raise TypeError(f"Don't know how serializer name={header[0]}, version={header[1]}") raise TypeError(f"Don't know how serializer name={header[0]}, version={header[1]}")
serializer = serializers[0] serializer = serializers[0]
return serializer.load(stream) return serializer.load(stream, context)
class BaseSerializer: class BaseSerializer:
@@ -82,8 +97,7 @@ class BaseSerializer:
self.name = name self.name = name
self.version = version self.version = version
@staticmethod def match(self, obj):
def match(obj):
""" """
Returns true if self can serialize obj Returns true if self can serialize obj
:param obj: :param obj:
@@ -91,26 +105,32 @@ class BaseSerializer:
""" """
pass pass
def dump(self, stream, obj): def dump(self, stream, obj, context):
""" """
Returns the byte representation of how the object should be serialized Returns the byte representation of how the object should be serialized
:param stream: to write to :param stream: to write to
:param obj: :param obj: obj to serialize
:param context: additional info needed to dump
:return: stream of bytes :return: stream of bytes
""" """
pass pass
def load(self, stream): def load(self, stream, context):
""" """
From a stream of bytes, create the object From a stream of bytes, create the object
:param stream: :param stream:
:param context: additional info needed to load
:return: object :return: object
""" """
pass pass
@staticmethod @staticmethod
def get_class(kls): def get_class(kls):
"""
Loads a class from its string full qualified name
:param kls:
:return:
"""
parts = kls.split('.') parts = kls.split('.')
module = ".".join(parts[:-1]) module = ".".join(parts[:-1])
m = __import__(module) m = __import__(module)
@@ -120,6 +140,11 @@ class BaseSerializer:
@staticmethod @staticmethod
def get_full_qualified_name(obj): def get_full_qualified_name(obj):
"""
Returns the full qualified name of a class (including its module name )
:param obj:
:return:
"""
module = obj.__class__.__module__ module = obj.__class__.__module__
if module is None or module == str.__class__.__module__: if module is None or module == str.__class__.__module__:
return obj.__class__.__name__ # Avoid reporting __builtin__ return obj.__class__.__name__ # Avoid reporting __builtin__
@@ -128,40 +153,73 @@ class BaseSerializer:
class EventSerializer(BaseSerializer): class EventSerializer(BaseSerializer):
@staticmethod
def match(obj):
return BaseSerializer.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.Event"
def __init__(self): def __init__(self):
BaseSerializer.__init__(self, "E", 1) BaseSerializer.__init__(self, "E", 1)
def dump(self, stream, obj): def match(self, obj):
stream.write(obj.to_json().encode("utf-8")) return BaseSerializer.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.Event"
def dump(self, stream, obj, context):
stream.write(json.dumps(obj.to_dict(), default=json_default_converter).encode("utf-8"))
stream.seek(0) stream.seek(0)
return stream return stream
def load(self, stream): def load(self, stream, context):
json_stream = stream.read().decode("utf-8") json_stream = stream.read().decode("utf-8")
json_message = json.loads(json_stream) as_dict = json.loads(json_stream)
event = BaseSerializer.get_class("sdp.sheerkaDataProvider.Event")() event = BaseSerializer.get_class("sdp.sheerkaDataProvider.Event")()
event.from_json(json_message) event.from_dict(as_dict)
return event return event
class ObjectSerializer(BaseSerializer):
def __init__(self, fully_qualified_name, name="O", version=1):
BaseSerializer.__init__(self, name, version)
self.fully_qualified_name = fully_qualified_name
def match(self, obj):
return BaseSerializer.get_full_qualified_name(obj) == self.fully_qualified_name
def dump(self, stream, obj, context):
as_json = obj.to_dict()
as_json.update({
Serializer.HISTORY: {
Serializer.USERNAME: context.user_name,
Serializer.MODIFICATION_DATE: datetime.datetime.now().isoformat(),
Serializer.PARENTS: [getattr(obj, Serializer.ORIGIN)] if hasattr(obj, Serializer.ORIGIN) else []
}})
stream.write(json.dumps(as_json, default=json_default_converter).encode("utf-8"))
stream.seek(0)
return stream
def load(self, stream, context):
json_stream = stream.read().decode("utf-8")
json_message = json.loads(json_stream)
obj = BaseSerializer.get_class(self.fully_qualified_name)()
obj.from_dict(json_message)
setattr(obj, Serializer.HISTORY, json_message[Serializer.HISTORY])
return obj
class PickleSerializer(BaseSerializer): class PickleSerializer(BaseSerializer):
@staticmethod
def match(obj):
return BaseSerializer.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.State"
def __init__(self): def __init__(self):
BaseSerializer.__init__(self, "P", 1) BaseSerializer.__init__(self, "P", 1)
def dump(self, stream, obj): def match(self, obj):
return BaseSerializer.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.State"
def dump(self, stream, obj, context):
stream.write(pickle.dumps(obj)) stream.write(pickle.dumps(obj))
stream.seek(0) stream.seek(0)
return stream return stream
def load(self, stream): def load(self, stream, context):
return pickle.loads(stream.read()) return pickle.loads(stream.read())
class ConceptSerializer(ObjectSerializer):
def __init__(self):
ObjectSerializer.__init__(self, "core.concept.Concept", "C", 1)
+284 -43
View File
@@ -1,39 +1,78 @@
import pytest import pytest
from parsers.defaultparser import TokenIter, Token, Tokens
from parsers.PythonParser import PythonParser, PythonNode, PythonErrorNode
from parsers.tokenizer import Tokenizer, Token, TokenKind, Keywords, LexerError
from parsers.DefaultParser import DefaultParser
from parsers.DefaultParser import NumberNode, StringNode, VariableNode, TrueNode, FalseNode, NullNode, BinaryNode
from parsers.DefaultParser import Node, UnexpectedTokenErrorNode, DefConceptNode, NopNode
import ast
def nop():
return NopNode()
def n(number):
return NumberNode([], number)
def s(string, quote="'"):
return StringNode([], string, quote)
def v(name):
return VariableNode([], name)
def t():
return TrueNode([])
def f():
return FalseNode([])
def null():
return NullNode([])
def b(operator, left, right):
return BinaryNode([], operator, left, right)
def test_i_can_tokenize(): def test_i_can_tokenize():
source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"" source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"="
tokens = list(TokenIter(source)) tokens = list(Tokenizer(source))
assert tokens[0] == Token(Tokens.PLUS, "+", 0, 1, 1) assert tokens[0] == Token(TokenKind.PLUS, "+", 0, 1, 1)
assert tokens[1] == Token(Tokens.STAR, "*", 1, 1, 2) assert tokens[1] == Token(TokenKind.STAR, "*", 1, 1, 2)
assert tokens[2] == Token(Tokens.MINUS, "-", 2, 1, 3) assert tokens[2] == Token(TokenKind.MINUS, "-", 2, 1, 3)
assert tokens[3] == Token(Tokens.SLASH, "/", 3, 1, 4) assert tokens[3] == Token(TokenKind.SLASH, "/", 3, 1, 4)
assert tokens[4] == Token(Tokens.LBRACE, "{", 4, 1, 5) assert tokens[4] == Token(TokenKind.LBRACE, "{", 4, 1, 5)
assert tokens[5] == Token(Tokens.RBRACE, "}", 5, 1, 6) assert tokens[5] == Token(TokenKind.RBRACE, "}", 5, 1, 6)
assert tokens[6] == Token(Tokens.LBRACKET, "[", 6, 1, 7) assert tokens[6] == Token(TokenKind.LBRACKET, "[", 6, 1, 7)
assert tokens[7] == Token(Tokens.RBRACKET, "]", 7, 1, 8) assert tokens[7] == Token(TokenKind.RBRACKET, "]", 7, 1, 8)
assert tokens[8] == Token(Tokens.LPAR, "(", 8, 1, 9) assert tokens[8] == Token(TokenKind.LPAR, "(", 8, 1, 9)
assert tokens[9] == Token(Tokens.RPAR, ")", 9, 1, 10) assert tokens[9] == Token(TokenKind.RPAR, ")", 9, 1, 10)
assert tokens[10] == Token(Tokens.WHITESPACE, " ", 10, 1, 11) assert tokens[10] == Token(TokenKind.WHITESPACE, " ", 10, 1, 11)
assert tokens[11] == Token(Tokens.COMMA, ",", 14, 1, 15) assert tokens[11] == Token(TokenKind.COMMA, ",", 14, 1, 15)
assert tokens[12] == Token(Tokens.SEMICOLON, ";", 15, 1, 16) assert tokens[12] == Token(TokenKind.SEMICOLON, ";", 15, 1, 16)
assert tokens[13] == Token(Tokens.COLON, ":", 16, 1, 17) assert tokens[13] == Token(TokenKind.COLON, ":", 16, 1, 17)
assert tokens[14] == Token(Tokens.DOT, ".", 17, 1, 18) assert tokens[14] == Token(TokenKind.DOT, ".", 17, 1, 18)
assert tokens[15] == Token(Tokens.QMARK, "?", 18, 1, 19) assert tokens[15] == Token(TokenKind.QMARK, "?", 18, 1, 19)
assert tokens[16] == Token(Tokens.NEWLINE, "\n", 19, 1, 20) assert tokens[16] == Token(TokenKind.NEWLINE, "\n", 19, 1, 20)
assert tokens[17] == Token(Tokens.NEWLINE, "\n\r", 20, 2, 1) assert tokens[17] == Token(TokenKind.NEWLINE, "\n\r", 20, 2, 1)
assert tokens[18] == Token(Tokens.NEWLINE, "\r", 22, 3, 1) assert tokens[18] == Token(TokenKind.NEWLINE, "\r", 22, 3, 1)
assert tokens[19] == Token(Tokens.NEWLINE, "\r\n", 23, 4, 1) assert tokens[19] == Token(TokenKind.NEWLINE, "\r\n", 23, 4, 1)
assert tokens[20] == Token(Tokens.IDENTIFIER, "identifier_0", 25, 5, 1) assert tokens[20] == Token(TokenKind.IDENTIFIER, "identifier_0", 25, 5, 1)
assert tokens[21] == Token(Tokens.WHITESPACE, "\t \t", 37, 5, 13) assert tokens[21] == Token(TokenKind.WHITESPACE, "\t \t", 37, 5, 13)
assert tokens[22] == Token(Tokens.NUMBER, "10.15", 41, 5, 17) assert tokens[22] == Token(TokenKind.NUMBER, "10.15", 41, 5, 17)
assert tokens[23] == Token(Tokens.WHITESPACE, " ", 46, 5, 22) assert tokens[23] == Token(TokenKind.WHITESPACE, " ", 46, 5, 22)
assert tokens[24] == Token(Tokens.NUMBER, "10", 47, 5, 23) assert tokens[24] == Token(TokenKind.NUMBER, "10", 47, 5, 23)
assert tokens[25] == Token(Tokens.WHITESPACE, " ", 49, 5, 25) assert tokens[25] == Token(TokenKind.WHITESPACE, " ", 49, 5, 25)
assert tokens[26] == Token(Tokens.STRING, "'string\n'", 50, 5, 26) assert tokens[26] == Token(TokenKind.STRING, "'string\n'", 50, 5, 26)
assert tokens[27] == Token(Tokens.WHITESPACE, " ", 59, 6, 1) assert tokens[27] == Token(TokenKind.WHITESPACE, " ", 59, 6, 1)
assert tokens[28] == Token(Tokens.STRING, '"another string"', 60, 6, 2) assert tokens[28] == Token(TokenKind.STRING, '"another string"', 60, 6, 2)
assert tokens[29] == Token(TokenKind.EQUALS, '=', 76, 6, 18)
@pytest.mark.parametrize("text, expected", [ @pytest.mark.parametrize("text, expected", [
@@ -48,11 +87,26 @@ def test_i_can_tokenize():
("-abcd", False) ("-abcd", False)
]) ])
def test_i_can_tokenize_identifiers(text, expected): def test_i_can_tokenize_identifiers(text, expected):
tokens = list(TokenIter(text)) tokens = list(Tokenizer(text))
comparison = tokens[0].type == Tokens.IDENTIFIER comparison = tokens[0].type == TokenKind.IDENTIFIER
assert comparison == expected assert comparison == expected
@pytest.mark.parametrize("text, error_text, index, line, column", [
("'string", "'string", 7, 1, 8),
('"string', '"string', 7, 1, 8),
('"a" + "string', '"string', 13, 1, 14),
('"a"\n\n"string', '"string', 12, 3, 8),
])
def test_i_can_detect_unfinished_strings(text, error_text, index, line, column):
with pytest.raises(LexerError) as e:
list(Tokenizer(text))
assert e.value.text == error_text
assert e.value.index == index
assert e.value.line == line
assert e.value.column == column
@pytest.mark.parametrize("text, expected_text, expected_newlines", [ @pytest.mark.parametrize("text, expected_text, expected_newlines", [
("'foo'", "'foo'", 0), ("'foo'", "'foo'", 0),
('"foo"', '"foo"', 0), ('"foo"', '"foo"', 0),
@@ -72,8 +126,8 @@ def test_i_can_tokenize_identifiers(text, expected):
("'foo'bar'", "'foo'", 0), ("'foo'bar'", "'foo'", 0),
]) ])
def test_i_can_parse_strings(text, expected_text, expected_newlines): def test_i_can_parse_strings(text, expected_text, expected_newlines):
lexer = TokenIter(text) lexer = Tokenizer(text)
text_found, nb_of_newlines = lexer.eat_string(0) text_found, nb_of_newlines = lexer.eat_string(0, 1, 1)
assert nb_of_newlines == expected_newlines assert nb_of_newlines == expected_newlines
assert text_found == expected_text assert text_found == expected_text
@@ -83,14 +137,201 @@ def test_i_can_parse_strings(text, expected_text, expected_newlines):
"1", "3.1415", "0.5", "01", "-5", "-5.10" "1", "3.1415", "0.5", "01", "-5", "-5.10"
]) ])
def test_i_can_parse_numbers(text): def test_i_can_parse_numbers(text):
tokens = list(TokenIter(text)) tokens = list(Tokenizer(text))
assert tokens[0].type == Tokens.NUMBER assert tokens[0].type == TokenKind.NUMBER
assert tokens[0].value == text assert tokens[0].value == text
@pytest.mark.parametrize("text", [ @pytest.mark.parametrize("text, expected", [
"def", "concept", "as", "pre", "post" ("def", Keywords.DEF),
("concept", Keywords.CONCEPT),
("as", Keywords.AS),
("pre", Keywords.PRE),
("post", Keywords.POST)
]) ])
def test_i_can_recognize_keywords(text): def test_i_can_recognize_keywords(text, expected):
tokens = list(TokenIter(text)) tokens = list(Tokenizer(text))
assert tokens[0].type == Tokens.KEYWORD assert tokens[0].type == TokenKind.KEYWORD
assert tokens[0].value == expected
@pytest.mark.parametrize("text, expected", [
("1", n(1)),
("+1", n(1)),
("-1", n(-1)),
("'foo'", s("foo")),
("identifier", v("identifier")),
("true", t()),
("false", f()),
("null", null()),
("1 * 2", b(TokenKind.STAR, n(1), n(2))),
("1 * 2/3", b(TokenKind.STAR, n(1), b(TokenKind.SLASH, n(2), n(3)))),
("1 + 2", b(TokenKind.PLUS, n(1), n(2))),
("1 + 2 - 3", b(TokenKind.PLUS, n(1), b(TokenKind.MINUS, n(2), n(3)))),
("1 + 2-3", b(TokenKind.PLUS, n(1), b(TokenKind.PLUS, n(2), n(-3)))),
("1 + 2 +-3", b(TokenKind.PLUS, n(1), b(TokenKind.PLUS, n(2), n(-3)))),
("1 + 2 * 3", b(TokenKind.PLUS, n(1), b(TokenKind.STAR, n(2), n(3)))),
("1 * 2 + 3", b(TokenKind.PLUS, b(TokenKind.STAR, n(1), n(2)), n(3))),
("(1 + 2) * 3", b(TokenKind.STAR, b(TokenKind.PLUS, n(1), n(2)), n(3))),
("1 * (2 + 3)", b(TokenKind.STAR, n(1), b(TokenKind.PLUS, n(2), n(3)))),
])
def test_i_can_parse_simple_expression(text, expected):
parser = DefaultParser(text, None)
ast = parser.parse()
assert ast.is_same(expected)
@pytest.mark.parametrize("text, token_found, expected_tokens", [
("1+", TokenKind.EOF,
[TokenKind.NUMBER, TokenKind.STRING, TokenKind.IDENTIFIER, 'true', 'false', 'null', TokenKind.LPAR]),
("(1+1", TokenKind.EOF, [TokenKind.RPAR])
])
def test_i_can_detect_unexpected_end_of_code(text, token_found, expected_tokens):
parser = DefaultParser(text, None)
parser.parse()
assert parser.has_error
assert parser.error_sink[0].tokens[0].type == token_found
assert parser.error_sink[0].expected_tokens == expected_tokens
@pytest.mark.parametrize("text, expected_name, expected_expr", [
("def concept hello", "hello", nop()),
("def concept hello ", "hello", nop()),
("def concept a+b", "a + b", nop()),
("def concept 'a+b'", "a+b", nop()),
("def concept 'a+b'+c", "a+b + c", nop()),
("def concept 'as if'", "as if", nop()),
("def concept 'as' if", "as if", nop()),
("def concept hello as 'hello'", "hello", ast.Expression(body=ast.Str(s='hello'))),
("def concept hello as 1", "hello", ast.Expression(body=ast.Num(n=1))),
("def concept h as 1 + 1", "h", ast.Expression(ast.BinOp(left=ast.Num(n=1), op=ast.Add(), right=ast.Num(n=1)))),
])
def test_i_can_parse_def_concept(text, expected_name, expected_expr):
parser = DefaultParser(text, PythonParser)
tree = parser.parse()
assert isinstance(tree, DefConceptNode)
assert tree.name == expected_name
if isinstance(tree.body, PythonNode):
assert ast.dump(tree.body.ast) == ast.dump(expected_expr)
else:
assert tree.body == expected_expr
def compare_ast(left, right):
left_as_string = ast.dump(left)
left_as_string = left_as_string.replace(", ctx=Load()", "")
right_as_string = right if isinstance(right, str) else ast.dump(right)
right_as_string = right_as_string.replace(", ctx=Load()", "")
return left_as_string == right_as_string
def test_i_can_parse_complex_def_concept_statement():
text = """def concept a plus b
where a,b
pre isinstance(a, int) and isinstance(b, float)
post isinstance(res, int)
as res = a + b
"""
parser = DefaultParser(text, PythonParser)
tree = parser.parse()
assert not parser.has_error
assert isinstance(tree, DefConceptNode)
assert tree.name == "a plus b"
assert tree.where.source == "a,b"
assert isinstance(tree.where.ast, ast.Expression)
assert tree.pre.source == "isinstance(a, int) and isinstance(b, float)"
assert isinstance(tree.pre.ast, ast.Expression)
assert tree.post.source == "isinstance(res, int)"
assert isinstance(tree.post.ast, ast.Expression)
assert tree.body.source == "res = a + b"
assert isinstance(tree.body.ast, ast.Module)
def test_i_can_use_colon_to_declare_indentation():
text = """
def concept add one to a as:
def func(x):
return x+1
func(a)
"""
parser = DefaultParser(text, PythonParser)
tree = parser.parse()
assert not parser.has_error
assert isinstance(tree, DefConceptNode)
def test_i_can_use_colon_to_declare_indentation2():
text = """
def concept add one to a as:
def func(x):
return x+1
"""
parser = DefaultParser(text, PythonParser)
tree = parser.parse()
assert not parser.has_error
assert isinstance(tree, DefConceptNode)
def test_without_colon_i_get_an_indent_error():
text = """
def concept add one to a as
def func(x):
return x+1
func(a)
"""
parser = DefaultParser(text, PythonParser)
tree = parser.parse()
assert parser.has_error
assert isinstance(tree, DefConceptNode)
assert isinstance(parser.error_sink[0].exception, IndentationError)
def test_i_can_detect_error():
"""
In this test, func(b) is not correctly indented while colon is specified after the 'as' keyword
"""
text = """
def concept add one to a as:
def func(x):
return x+1
func(a)
func(b)
"""
parser = DefaultParser(text, PythonParser)
tree = parser.parse()
assert parser.has_error
assert isinstance(tree, DefConceptNode)
assert isinstance(parser.error_sink[0], UnexpectedTokenErrorNode)
# check that the error is caused by 'func(b)'
assert parser.error_sink[0].tokens[0].line == 6
assert parser.error_sink[0].tokens[0].column == 1
@pytest.mark.parametrize("text, token_found, expected_tokens", [
("def hello as 'hello'", "hello", [Keywords.CONCEPT]),
("def concept as", Keywords.AS, ["<name>"]),
])
def test_i_can_detect_unexpected_token_error_in_def_concept(text, token_found, expected_tokens):
parser = DefaultParser(text, PythonParser)
parser.parse()
assert parser.has_error
assert isinstance(parser.error_sink[0], UnexpectedTokenErrorNode)
assert parser.error_sink[0].tokens[0].value == token_found
assert parser.error_sink[0].expected_tokens == expected_tokens
@pytest.mark.parametrize("text", [
"def concept hello where 1+",
"def concept hello pre 1+",
"def concept hello post 1+",
"def concept hello as 1+"
])
def test_i_can_detect_error_in_declaration(text):
parser = DefaultParser(text, PythonParser)
parser.parse()
assert parser.has_error
assert isinstance(parser.error_sink[0], PythonErrorNode)
+52 -15
View File
@@ -1,12 +1,17 @@
import ast
import pytest import pytest
import os import os
from os import path from os import path
import shutil import shutil
from core.concept import Concept from core.concept import Concept, ConceptParts
from core.sheerka import Sheerka from core.sheerka import Sheerka
from parsers.DefaultParser import DefConceptNode, DefaultParser
from parsers.PythonParser import PythonParser
tests_root = path.abspath("../build/tests") tests_root = path.abspath("../build/tests")
root_folder = "init_folder"
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
@@ -25,8 +30,6 @@ def init_test():
def test_root_folder_is_created_after_initialization(): def test_root_folder_is_created_after_initialization():
root_folder = "init_folder"
return_value = Sheerka().initialize(root_folder) return_value = Sheerka().initialize(root_folder)
assert return_value.status, "initialisation should be successful" assert return_value.status, "initialisation should be successful"
assert Sheerka().concept_equals(return_value.value, Sheerka().get_concept("success")) assert Sheerka().concept_equals(return_value.value, Sheerka().get_concept("success"))
@@ -34,22 +37,56 @@ def test_root_folder_is_created_after_initialization():
def test_lists_of_concepts_is_initialized(): def test_lists_of_concepts_is_initialized():
root_folder = "init_folder"
Sheerka().initialize(root_folder) Sheerka().initialize(root_folder)
assert len(Sheerka().concepts) > 1 assert len(Sheerka().concepts) > 1
def test_null_concept_are_equals(): # def test_null_concept_are_equals():
concept1 = Concept("test1") # concept1 = Concept("test1")
concept2 = Concept("test2") # concept2 = Concept("test2")
concept3 = Concept("test3") # concept3 = Concept("test3")
#
# assert not Sheerka.concept_equals(concept1, None)
# assert not Sheerka.concept_equals(None, concept1)
# assert not Sheerka.concept_equals(concept1, concept2)
# assert not Sheerka.concept_equals(concept1, concept3)
#
# assert Sheerka.concept_equals(None, None)
# assert Sheerka.concept_equals(concept1, concept1)
assert not Sheerka.concept_equals(concept1, None) def get_concept():
assert not Sheerka.concept_equals(None, concept1) text = """
assert not Sheerka.concept_equals(concept1, concept2) def concept a+b
assert not Sheerka.concept_equals(concept1, concept3) where isinstance(a, int) and isinstance(b, int)
pre isinstance(a, int) and isinstance(b, int)
post isinstance(res, int)
as:
def func(x,y):
return x+y
func(a,b)
"""
parser = DefaultParser(text, PythonParser)
return parser.parse()
assert Sheerka.concept_equals(None, None)
assert Sheerka.concept_equals(concept1, concept1)
def test_i_can_add_a_concept():
concept = get_concept()
sheerka = Sheerka()
sheerka.initialize(root_folder)
res = sheerka.add_concept(concept)
assert res.status
assert res.value == Concept(
name="a + b",
where="isinstance(a, int) and isinstance(b, int)",
pre="isinstance(a, int) and isinstance(b, int)",
post="isinstance(res, int)",
body="def func(x,y):\n return x+y\nfunc(a,b)")
assert isinstance(res.value.codes[ConceptParts.WHERE], ast.Expression)
assert isinstance(res.value.codes[ConceptParts.PRE], ast.Expression)
assert isinstance(res.value.codes[ConceptParts.POST], ast.Expression)
assert isinstance(res.value.codes[ConceptParts.BODY], ast.Module)
# def test_i_cannot_add_the_same_concept_twice():
# concept1 = DefConceptNode(name="concept")
# sheerka = Sheerka
+83
View File
@@ -1,3 +1,5 @@
import hashlib
import pytest import pytest
import os import os
from os import path from os import path
@@ -6,6 +8,8 @@ from datetime import date, datetime
import shutil import shutil
import json import json
from sdp.sheerkaSerializer import ObjectSerializer, BaseSerializer, Serializer
tests_root = path.abspath("../build/tests") tests_root = path.abspath("../build/tests")
@@ -70,6 +74,33 @@ class ObjNoKey:
return f"ObjNoKey({self.a}, {self.b})" return f"ObjNoKey({self.a}, {self.b})"
class ObjDumpJson:
def __init__(self, key, value):
self.key = key
self.value = value
def __eq__(self, obj):
return isinstance(obj, ObjDumpJson) and \
self.key == obj.key and \
self.value == obj.value
def __repr__(self):
return f"ObjDumpJson({self.key}, {self.value})"
def get_key(self):
return self.key
def get_digest(self):
return hashlib.sha256(f"Concept:{self.key}{self.value}".encode("utf-8")).hexdigest()
def to_dict(self):
return self.__dict__
def from_dict(self, as_dict):
self.value = as_dict["value"]
self.key = as_dict["key"]
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
def init_test(): def init_test():
if path.exists(tests_root): if path.exists(tests_root):
@@ -571,3 +602,55 @@ def test_i_can_test_than_an_entry_exits():
assert not sdp.exists("entry") assert not sdp.exists("entry")
sdp.add(Event("event"), "entry", "value") sdp.add(Event("event"), "entry", "value")
assert sdp.exists("entry") assert sdp.exists("entry")
def test_i_can_save_and_load_object_with_history():
sdp = SheerkaDataProvider(".sheerka")
obj = ObjDumpJson("my_key", "value1")
sdp.serializer.register(ObjectSerializer(BaseSerializer.get_full_qualified_name(obj)))
entry, key = sdp.add_ref("Obj", obj)
loaded = sdp.get(entry, key)
history = getattr(loaded, Serializer.HISTORY)
assert key == obj.key
assert entry == "Obj"
assert loaded.key == obj.key
assert loaded.value == obj.value
assert getattr(history, Serializer.USERNAME) == "kodjo"
assert getattr(history, Serializer.MODIFICATION_DATE) != ""
assert getattr(history, Serializer.PARENTS) == []
assert os.path.exists(sdp.get_obj_path(sdp.ObjectsFolder, obj.get_digest()))
# save a second type with no modification
previous_modification_time = getattr(history, Serializer.MODIFICATION_DATE)
previous_parents = getattr(history, Serializer.PARENTS)
sdp.add_ref("Obj", loaded)
loaded = sdp.get(entry, key)
history = getattr(loaded, Serializer.HISTORY)
assert getattr(history, Serializer.MODIFICATION_DATE) == previous_modification_time
assert getattr(history, Serializer.PARENTS) == previous_parents
# save again, but with a modification
previous_digest = loaded.get_digest()
loaded.value = "value2"
sdp.add_ref("Obj", loaded)
loaded2 = sdp.get(entry, key)
history2 = getattr(loaded, Serializer.HISTORY)
assert loaded2.key == loaded.key
assert loaded2.value == loaded.value
assert getattr(history2, Serializer.USERNAME) == "kodjo"
assert getattr(history2, Serializer.MODIFICATION_DATE) != ""
assert getattr(history2, Serializer.PARENTS) == [previous_digest]
+43 -3
View File
@@ -1,16 +1,56 @@
import pytest
from dataclasses import dataclass
from sdp.sheerkaDataProvider import Event from sdp.sheerkaDataProvider import Event
from sdp.sheerkaSerializer import Serializer from sdp.sheerkaSerializer import Serializer, ObjectSerializer, SerializerContext, BaseSerializer
from datetime import datetime from datetime import datetime
@dataclass()
class Obj:
key: str = ""
prop1: str = ""
def from_dict(self, json_object):
self.prop1 = json_object["prop1"]
self.key = json_object["key"]
return self
def to_dict(self):
return self.__dict__
def test_i_can_serialize_an_event(): def test_i_can_serialize_an_event():
event = Event("test", user="user", date=datetime.fromisoformat("2019-10-21T10:20:30.999")) event = Event("test", user="user", date=datetime.fromisoformat("2019-10-21T10:20:30.999"))
serializer = Serializer() serializer = Serializer()
stream = serializer.serialize(event) stream = serializer.serialize(event, None)
loaded = serializer.deserialize(stream) loaded = serializer.deserialize(stream, None)
assert event.version == loaded.version assert event.version == loaded.version
assert event.user == loaded.user assert event.user == loaded.user
assert event.date == loaded.date assert event.date == loaded.date
assert event.message == loaded.message assert event.message == loaded.message
def test_i_can_serialize_an_object():
obj = Obj("10", "value")
serializer = Serializer()
serializer.register(ObjectSerializer("tests.test_sheerkaSerializer.Obj"))
context = SerializerContext("kodjo", "6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b")
stream = serializer.serialize(obj, context)
loaded = serializer.deserialize(stream, context)
assert getattr(loaded, Serializer.HISTORY)[Serializer.USERNAME] == "kodjo"
assert getattr(loaded, Serializer.HISTORY)[Serializer.MODIFICATION_DATE] != ""
assert getattr(loaded, Serializer.HISTORY)[Serializer.PARENTS] == []
assert loaded.key == "10"
assert loaded.prop1 == "value"
@pytest.mark.parametrize("obj, expected", [
(Obj("10", "value"), "tests.test_sheerkaSerializer.Obj")
])
def test_get_full_qualified_name(obj, expected):
assert expected == BaseSerializer.get_full_qualified_name(obj)