Added DefaultParser
This commit is contained in:
+73
-10
@@ -1,29 +1,92 @@
|
|||||||
|
import hashlib
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
|
class ConceptParts(Enum):
|
||||||
|
WHERE = "where"
|
||||||
|
PRE = "pre"
|
||||||
|
POST = "post"
|
||||||
|
BODY = "body"
|
||||||
|
|
||||||
|
|
||||||
class Concept:
|
class Concept:
|
||||||
"""
|
"""
|
||||||
Default concept object
|
Default concept object
|
||||||
A concept is a the base object of our universe
|
A concept is a the base object of our universe
|
||||||
Everything is a concept
|
Everything is a concept
|
||||||
"""
|
"""
|
||||||
|
props_to_serialize = ("id", "name", "where", "pre", "post", "body", "desc")
|
||||||
|
|
||||||
concepts_id = 0
|
key_name = "concepts"
|
||||||
|
|
||||||
def __init__(self, name, is_builtin=False):
|
def __init__(self, name=None, is_builtin=False, where=None, pre=None, post=None, body=None, desc=None):
|
||||||
self.name = name
|
self.name = name
|
||||||
self.is_builtin = is_builtin
|
self.is_builtin = is_builtin
|
||||||
self.pre = None # list of pre conditions before calling the main function
|
self.where = where # condition to recognize variables in name
|
||||||
self.post = None # list of post conditions after calling the main function
|
self.pre = pre # list of pre conditions before calling the main function
|
||||||
self.main = None # main method, can also be the value of the concept
|
self.post = post # list of post conditions after calling the main function
|
||||||
self.id = Concept.concepts_id
|
self.body = body # main method, can also be the value of the concept
|
||||||
Concept.concepts_id = Concept.concepts_id + 1
|
self.desc = desc
|
||||||
|
self.key = None
|
||||||
|
self.parent = None
|
||||||
|
|
||||||
self.props = [] # list of Property for this concept
|
self.props = [] # list of Property for this concept
|
||||||
self.functions = {} # list of helper functions
|
self.functions = {} # list of helper functions
|
||||||
|
|
||||||
def __str__(self):
|
self.codes = {}
|
||||||
return f"({self.id}){self.name}"
|
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return f"({self.id}){self.name}"
|
return f"({self.key}){self.name}"
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if not isinstance(other, Concept):
|
||||||
|
return False
|
||||||
|
return self.name == other.name and \
|
||||||
|
self.where == other.where and \
|
||||||
|
self.pre == other.pre and \
|
||||||
|
self.post == other.post and \
|
||||||
|
self.body == other.body
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash(self.name)
|
||||||
|
|
||||||
|
def add_codes(self, codes):
|
||||||
|
"""
|
||||||
|
From a dict of <ConceptParts, AST>
|
||||||
|
fill the codes
|
||||||
|
:param codes:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
possibles_codes = set(item.value for item in ConceptParts)
|
||||||
|
if codes is None:
|
||||||
|
return
|
||||||
|
for key in codes:
|
||||||
|
if key in possibles_codes:
|
||||||
|
self.codes[ConceptParts(key)] = codes[key]
|
||||||
|
|
||||||
|
def get_digest(self):
|
||||||
|
"""
|
||||||
|
Returns the digest of the event
|
||||||
|
:return: hexa form of the sha256
|
||||||
|
"""
|
||||||
|
return hashlib.sha256(f"Concept:{self.name}{self.pre}{self.post}{self.body}".encode("utf-8")).hexdigest()
|
||||||
|
|
||||||
|
def to_dict(self):
|
||||||
|
props_as_dict = dict((prop, getattr(self, prop)) for prop in self.props_to_serialize)
|
||||||
|
return props_as_dict
|
||||||
|
|
||||||
|
def from_dict(self, as_dict):
|
||||||
|
for prop in self.props_to_serialize:
|
||||||
|
setattr(self, prop, as_dict[prop])
|
||||||
|
return self
|
||||||
|
|
||||||
|
|
||||||
|
class ErrorConcept(Concept):
|
||||||
|
def __init__(self, where=None, pre=None, post=None, body=None, desc=None):
|
||||||
|
Concept.__init__(self, "error", is_builtin=True, where=where, pre=pre, post=post, body=body, desc=desc)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"{self.name} : {self.body}"
|
||||||
|
|
||||||
|
|
||||||
class Property:
|
class Property:
|
||||||
|
|||||||
+49
-9
@@ -1,8 +1,9 @@
|
|||||||
import os
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from core.concept import Concept
|
from core.concept import Concept, ErrorConcept
|
||||||
from sdp.sheerkaDataProvider import SheerkaDataProvider
|
from parsers.PythonParser import PythonParser
|
||||||
|
from sdp.sheerkaDataProvider import SheerkaDataProvider, Event
|
||||||
|
from parsers.DefaultParser import DefaultParser, DefConceptNode
|
||||||
|
|
||||||
|
|
||||||
class Singleton(type):
|
class Singleton(type):
|
||||||
@@ -54,6 +55,7 @@ class Sheerka(Concept, metaclass=Singleton):
|
|||||||
self.create_builtin_concepts()
|
self.create_builtin_concepts()
|
||||||
|
|
||||||
self.sdp = None
|
self.sdp = None
|
||||||
|
self.parsers = []
|
||||||
|
|
||||||
def create_builtin_concepts(self):
|
def create_builtin_concepts(self):
|
||||||
"""
|
"""
|
||||||
@@ -76,11 +78,38 @@ class Sheerka(Concept, metaclass=Singleton):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
self.sdp = SheerkaDataProvider(root_folder)
|
self.sdp = SheerkaDataProvider(root_folder)
|
||||||
|
self.parsers.append(lambda text: DefaultParser(text, PythonParser))
|
||||||
except IOError as e:
|
except IOError as e:
|
||||||
return ReturnValue(False, self.get_concept(Sheerka.ERROR_CONCEPT_NAME, True), e)
|
return ReturnValue(False, self.get_concept(Sheerka.ERROR_CONCEPT_NAME, True), e)
|
||||||
|
|
||||||
return ReturnValue(True, self.get_concept(Sheerka.SUCCESS_CONCEPT_NAME, True))
|
return ReturnValue(True, self.get_concept(Sheerka.SUCCESS_CONCEPT_NAME, True))
|
||||||
|
|
||||||
|
def eval(self, text):
|
||||||
|
#evt_digest = self.sdp.save_event(Event(text))
|
||||||
|
result = self.try_parse(text)
|
||||||
|
|
||||||
|
return_values = []
|
||||||
|
for parser_name, status, node in result:
|
||||||
|
if not status:
|
||||||
|
return_values.append(ReturnValue(False, ErrorConcept(body=node)))
|
||||||
|
elif status and isinstance(node, DefConceptNode):
|
||||||
|
return_values.append(self.add_concept(node))
|
||||||
|
|
||||||
|
return return_values
|
||||||
|
|
||||||
|
def try_parse(self, text):
|
||||||
|
result = []
|
||||||
|
for parser in self.parsers:
|
||||||
|
p = parser(text)
|
||||||
|
# try:
|
||||||
|
# tree = p.parse()
|
||||||
|
# result.append((p.name, tree))
|
||||||
|
# except Exception as e:
|
||||||
|
# result.append((p.name, e))
|
||||||
|
tree = p.parse()
|
||||||
|
result.append((p.name, not p.has_error, p.error_sink if p.has_error else tree))
|
||||||
|
return result
|
||||||
|
|
||||||
def get_concept(self, name, is_builtin=False):
|
def get_concept(self, name, is_builtin=False):
|
||||||
"""
|
"""
|
||||||
Given a concept name, tries to find it
|
Given a concept name, tries to find it
|
||||||
@@ -93,6 +122,22 @@ class Sheerka(Concept, metaclass=Singleton):
|
|||||||
return concept
|
return concept
|
||||||
return self.concepts[1]
|
return self.concepts[1]
|
||||||
|
|
||||||
|
def add_concept(self, def_concept_node: DefConceptNode):
|
||||||
|
"""
|
||||||
|
Adds a new concept to the system
|
||||||
|
:param def_concept_node: DefConceptNode
|
||||||
|
:return: digest of the new concept
|
||||||
|
"""
|
||||||
|
|
||||||
|
concept = Concept(def_concept_node.name)
|
||||||
|
for prop in ("where", "pre", "post", "body"):
|
||||||
|
concept_part_node = getattr(def_concept_node, prop)
|
||||||
|
value = concept_part_node.source if hasattr(concept_part_node, "source") else ""
|
||||||
|
setattr(concept, prop, value)
|
||||||
|
|
||||||
|
concept.add_codes(def_concept_node.get_codes())
|
||||||
|
return ReturnValue(True, concept)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def concept_equals(concept1, concept2):
|
def concept_equals(concept1, concept2):
|
||||||
"""True if the two concepts refer to the same concept"""
|
"""True if the two concepts refer to the same concept"""
|
||||||
@@ -102,9 +147,4 @@ class Sheerka(Concept, metaclass=Singleton):
|
|||||||
if concept1 is None or concept2 is None:
|
if concept1 is None or concept2 is None:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return concept1.id == concept2.id
|
return concept1.key == concept2.key
|
||||||
|
|
||||||
def record_event(self, event):
|
|
||||||
self.sdp.save_event(event)
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
+1
-1
@@ -84,7 +84,7 @@ concept is_the_opposite:
|
|||||||
a, b
|
a, b
|
||||||
|
|
||||||
test:
|
test:
|
||||||
a.pre == not b.pre && a.post == b.post
|
a.pre == not b.pre && a.post == not b.post
|
||||||
|
|
||||||
|
|
||||||
print all concepts
|
print all concepts
|
||||||
|
|||||||
@@ -18,3 +18,22 @@ def concept a is a number as :
|
|||||||
--> adds concept a is a number
|
--> adds concept a is a number
|
||||||
--> add the pre condition to the concept a plus b
|
--> add the pre condition to the concept a plus b
|
||||||
```
|
```
|
||||||
|
|
||||||
|
# Define a new concept in one line
|
||||||
|
```
|
||||||
|
def concept words
|
||||||
|
def concept words [where whereclause] [as expression] [pre precond] [post postcond]
|
||||||
|
```
|
||||||
|
|
||||||
|
# Define a complicated concept
|
||||||
|
```
|
||||||
|
def concept
|
||||||
|
as:
|
||||||
|
...
|
||||||
|
where:
|
||||||
|
...
|
||||||
|
pre:
|
||||||
|
...
|
||||||
|
post:
|
||||||
|
...
|
||||||
|
```
|
||||||
@@ -0,0 +1,17 @@
|
|||||||
|
```
|
||||||
|
> "hello
|
||||||
|
-> unfinished quote "
|
||||||
|
> def concept unfinished quote q
|
||||||
|
... where:
|
||||||
|
...... q in ('"', '"')
|
||||||
|
... desc:
|
||||||
|
...... "Error detected by the default parser where the trailing quote is missing"
|
||||||
|
... input = sheerka.last_input
|
||||||
|
|
||||||
|
|
||||||
|
> when unfinished quote q as c:
|
||||||
|
... add rule as:
|
||||||
|
...... if q in sheerka.input:
|
||||||
|
......... sheerka.resume(c, c.input + input)
|
||||||
|
......... remove rule
|
||||||
|
```
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
import sys
|
import sys
|
||||||
from core.utils import sysarg_to_string
|
from core.utils import sysarg_to_string
|
||||||
from core.sheerka import Sheerka
|
from core.sheerka import Sheerka
|
||||||
from sdp.sheerkaDataProvider import Event
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@@ -10,12 +10,10 @@ def main():
|
|||||||
|
|
||||||
# first, record the event
|
# first, record the event
|
||||||
event_as_string = sysarg_to_string(sys.argv[1:])
|
event_as_string = sysarg_to_string(sys.argv[1:])
|
||||||
evt_digest = sheerka.record_event(Event(event_as_string))
|
result = sheerka.eval(event_as_string)
|
||||||
|
|
||||||
# launch the parsers
|
|
||||||
|
|
||||||
# execute the concepts
|
# execute the concepts
|
||||||
print(event_as_string)
|
print(result)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,41 @@
|
|||||||
|
from dataclasses import dataclass, field
|
||||||
|
from parsers.tokenizer import TokenKind, Keywords
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class Node:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class NopNode(Node):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "nop"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class ErrorNode(Node):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class BaseParser:
|
||||||
|
def __init__(self, name, text):
|
||||||
|
self.name = name
|
||||||
|
self.text = text
|
||||||
|
self.has_error = False
|
||||||
|
self.error_sink = []
|
||||||
|
|
||||||
|
def parse(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_text_from_tokens(tokens):
|
||||||
|
if tokens is None:
|
||||||
|
return ""
|
||||||
|
res = ""
|
||||||
|
for token in tokens:
|
||||||
|
value = Keywords(token.value).value if token.type == TokenKind.KEYWORD else token.value
|
||||||
|
res += value
|
||||||
|
return res
|
||||||
@@ -0,0 +1,383 @@
|
|||||||
|
from parsers.BaseParser import BaseParser, Node, NopNode, ErrorNode
|
||||||
|
from parsers.tokenizer import Tokenizer, TokenKind, Token, Keywords
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class DefaultParserNode(Node):
|
||||||
|
tokens: list = field(compare=False)
|
||||||
|
|
||||||
|
def is_same(self, other):
|
||||||
|
if type(self) != type(other):
|
||||||
|
return False
|
||||||
|
|
||||||
|
if hasattr(self, "value") and self.value != other.value:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class DefaultParserErrorNode(DefaultParserNode, ErrorNode):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class UnexpectedTokenErrorNode(DefaultParserErrorNode):
|
||||||
|
message: str
|
||||||
|
expected_tokens: list
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class SyntaxErrorNode(DefaultParserErrorNode):
|
||||||
|
message: str
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class DefConceptNode(DefaultParserNode):
|
||||||
|
name: str
|
||||||
|
where: Node = None
|
||||||
|
pre: Node = None
|
||||||
|
post: Node = None
|
||||||
|
body: Node = NopNode
|
||||||
|
|
||||||
|
def get_codes(self):
|
||||||
|
codes = {}
|
||||||
|
for prop in ["where", "pre", "post", "body"]:
|
||||||
|
prop_value = getattr(self, prop)
|
||||||
|
if hasattr(prop_value, "ast"):
|
||||||
|
codes[prop] = prop_value.ast
|
||||||
|
return codes
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class NumberNode(DefaultParserNode):
|
||||||
|
value: object
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return str(self.value)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class StringNode(DefaultParserNode):
|
||||||
|
value: str
|
||||||
|
quote: str
|
||||||
|
|
||||||
|
def is_same(self, other):
|
||||||
|
if not super(StringNode, self).is_same(other):
|
||||||
|
return False
|
||||||
|
return self.quote == other.quote
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return self.quote + self.value + self.quote
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class VariableNode(DefaultParserNode):
|
||||||
|
value: str
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return self.value
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class TrueNode(DefaultParserNode):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "true"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class FalseNode(DefaultParserNode):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "false"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class NullNode(DefaultParserNode):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "null"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class BinaryNode(DefaultParserNode):
|
||||||
|
operator: TokenKind
|
||||||
|
left: Node
|
||||||
|
right: Node
|
||||||
|
|
||||||
|
def is_same(self, other):
|
||||||
|
if not super(BinaryNode, self).is_same(other):
|
||||||
|
return False
|
||||||
|
if self.operator != other.operator:
|
||||||
|
return False
|
||||||
|
if not self.left.is_same(other.left):
|
||||||
|
return False
|
||||||
|
return self.right.is_same(other.right)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"({self.left} {self.operator} {self.right})"
|
||||||
|
|
||||||
|
|
||||||
|
class DefaultParser(BaseParser):
|
||||||
|
def __init__(self, text, sub_parser):
|
||||||
|
BaseParser.__init__(self, "Default", text)
|
||||||
|
self.sub_parser = sub_parser
|
||||||
|
self.lexer = Tokenizer(text)
|
||||||
|
self.lexer_iter = iter(Tokenizer(text))
|
||||||
|
self._current = None
|
||||||
|
|
||||||
|
self.next_token()
|
||||||
|
|
||||||
|
def collect_tokens(self, *args):
|
||||||
|
result = []
|
||||||
|
for item in args:
|
||||||
|
if isinstance(item, Node):
|
||||||
|
result.extend(item.tokens)
|
||||||
|
else:
|
||||||
|
result.append(item)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def add_error(self, error, next_token=True):
|
||||||
|
self.has_error = True
|
||||||
|
self.error_sink.append(error)
|
||||||
|
if next_token:
|
||||||
|
self.next_token()
|
||||||
|
return error
|
||||||
|
|
||||||
|
def get_token(self) -> Token:
|
||||||
|
return self._current
|
||||||
|
|
||||||
|
def next_token(self, skip_whitespace=True):
|
||||||
|
try:
|
||||||
|
self._current = next(self.lexer_iter)
|
||||||
|
if skip_whitespace:
|
||||||
|
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
|
||||||
|
self._current = next(self.lexer_iter)
|
||||||
|
except StopIteration:
|
||||||
|
self._current = None
|
||||||
|
return
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_concept_name(tokens):
|
||||||
|
name = ""
|
||||||
|
first = True
|
||||||
|
for token in tokens:
|
||||||
|
if token.type == TokenKind.EOF:
|
||||||
|
break
|
||||||
|
if not first:
|
||||||
|
name += " "
|
||||||
|
name += token.value[1:-1] if token.type == TokenKind.STRING else token.value
|
||||||
|
first = False
|
||||||
|
|
||||||
|
return name
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def fix_indentation(tokens):
|
||||||
|
"""
|
||||||
|
In the following example
|
||||||
|
def concept add one to a as:
|
||||||
|
def func(x):
|
||||||
|
return x+1
|
||||||
|
func(a)
|
||||||
|
indentations in front of 'def func(x)', 'return x+1' and 'func(a)' must be fixed to avoid a python syntax error
|
||||||
|
:param tokens:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
if tokens[1].type != TokenKind.COLON:
|
||||||
|
return tokens[1:]
|
||||||
|
|
||||||
|
if len(tokens) < 3:
|
||||||
|
return UnexpectedTokenErrorNode(tokens[0:2], "Unexpected end of file", [TokenKind.NEWLINE])
|
||||||
|
|
||||||
|
if tokens[2].type != TokenKind.NEWLINE:
|
||||||
|
return UnexpectedTokenErrorNode([tokens[2]], "Unexpected token after colon", [TokenKind.NEWLINE])
|
||||||
|
|
||||||
|
if tokens[3].type != TokenKind.WHITESPACE:
|
||||||
|
return SyntaxErrorNode([tokens[3]], "Indentation not found")
|
||||||
|
indent_size = len(tokens[3].value)
|
||||||
|
|
||||||
|
# now fix the other indentations
|
||||||
|
i = 4
|
||||||
|
while i < len(tokens) - 1:
|
||||||
|
if tokens[i].type == TokenKind.NEWLINE:
|
||||||
|
if tokens[i + 1].type != TokenKind.WHITESPACE:
|
||||||
|
return UnexpectedTokenErrorNode([tokens[i + 1]], "Unexpected token", [TokenKind.WHITESPACE])
|
||||||
|
|
||||||
|
if len(tokens[i + 1].value) < indent_size:
|
||||||
|
return SyntaxErrorNode([tokens[i + 1]], "Invalid indentation.")
|
||||||
|
|
||||||
|
tokens[i + 1].value = " " * (len(tokens[i + 1].value) - indent_size)
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
return tokens[4:]
|
||||||
|
|
||||||
|
def parse(self):
|
||||||
|
return self.parse_statement()
|
||||||
|
|
||||||
|
def parse_statement(self):
|
||||||
|
token = self.get_token()
|
||||||
|
if token.value == Keywords.DEF:
|
||||||
|
self.next_token()
|
||||||
|
return self.parse_def_concept()
|
||||||
|
else:
|
||||||
|
return self.parse_expression()
|
||||||
|
|
||||||
|
def parse_def_concept(self):
|
||||||
|
"""
|
||||||
|
def concept name [where xxx] [pre xxx] [post xxx] [as xxx]
|
||||||
|
"""
|
||||||
|
|
||||||
|
def_concept_parts = [Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST]
|
||||||
|
|
||||||
|
token = self.get_token()
|
||||||
|
if token.value != Keywords.CONCEPT:
|
||||||
|
return self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT]))
|
||||||
|
|
||||||
|
self.next_token()
|
||||||
|
token = self.get_token()
|
||||||
|
|
||||||
|
if token.value in (Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST):
|
||||||
|
return self.add_error(UnexpectedTokenErrorNode([token], "Concept name is missing.", ["<name>"]))
|
||||||
|
|
||||||
|
name_as_tokens = []
|
||||||
|
while token.type != TokenKind.EOF and token.value not in def_concept_parts:
|
||||||
|
name_as_tokens.append(token)
|
||||||
|
self.next_token()
|
||||||
|
token = self.get_token()
|
||||||
|
name = self.get_concept_name(name_as_tokens)
|
||||||
|
|
||||||
|
# try to parse as, where, pre and post declarations
|
||||||
|
tokens = {
|
||||||
|
Keywords.AS: None,
|
||||||
|
Keywords.WHERE: None,
|
||||||
|
Keywords.PRE: None,
|
||||||
|
Keywords.POST: None,
|
||||||
|
}
|
||||||
|
current_part = None
|
||||||
|
while token.type != TokenKind.EOF:
|
||||||
|
if token.value in def_concept_parts:
|
||||||
|
keyword = token.value
|
||||||
|
if tokens[keyword]:
|
||||||
|
return self.add_error(SyntaxErrorNode([token], f"Too many '{keyword.value}' declarations."))
|
||||||
|
tokens[keyword] = [token] # first element of the list is the keyword
|
||||||
|
current_part = keyword
|
||||||
|
self.next_token()
|
||||||
|
else:
|
||||||
|
if current_part is None:
|
||||||
|
return self.add_error(UnexpectedTokenErrorNode([token], "Unexpected token", def_concept_parts))
|
||||||
|
else:
|
||||||
|
tokens[current_part].append(token)
|
||||||
|
self.next_token(False)
|
||||||
|
|
||||||
|
token = self.get_token()
|
||||||
|
|
||||||
|
asts = {
|
||||||
|
Keywords.AS: NopNode(),
|
||||||
|
Keywords.WHERE: NopNode(),
|
||||||
|
Keywords.PRE: NopNode(),
|
||||||
|
Keywords.POST: NopNode(),
|
||||||
|
}
|
||||||
|
|
||||||
|
# check for empty declarations
|
||||||
|
for keyword in tokens:
|
||||||
|
current_tokens = tokens[keyword]
|
||||||
|
if current_tokens is not None:
|
||||||
|
if len(current_tokens) == 0: # only one element means empty decl
|
||||||
|
return self.add_error(SyntaxErrorNode([current_tokens[0]], "Empty declaration"), False)
|
||||||
|
else:
|
||||||
|
current_tokens = self.fix_indentation(current_tokens)
|
||||||
|
if isinstance(current_tokens, ErrorNode):
|
||||||
|
self.add_error(current_tokens)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# start = current_tokens[0].index
|
||||||
|
# end = current_tokens[-1].index + len(current_tokens[-1].value)
|
||||||
|
sub_parser = self.sub_parser(current_tokens, source=keyword.value)
|
||||||
|
sub_tree = sub_parser.parse()
|
||||||
|
if isinstance(sub_tree, ErrorNode):
|
||||||
|
self.add_error(sub_tree, False)
|
||||||
|
asts[keyword] = sub_tree
|
||||||
|
|
||||||
|
return DefConceptNode([], name,
|
||||||
|
asts[Keywords.WHERE],
|
||||||
|
asts[Keywords.PRE],
|
||||||
|
asts[Keywords.POST],
|
||||||
|
asts[Keywords.AS])
|
||||||
|
|
||||||
|
def parse_expression(self):
|
||||||
|
return self.parse_addition()
|
||||||
|
|
||||||
|
def parse_addition(self):
|
||||||
|
left = self.parse_multiply()
|
||||||
|
token = self.get_token()
|
||||||
|
if token is None or token.type == TokenKind.EOF:
|
||||||
|
return left
|
||||||
|
|
||||||
|
if token.type == TokenKind.NUMBER: # example 15 +5 or 15 -5
|
||||||
|
right = self.parse_addition()
|
||||||
|
return BinaryNode(self.collect_tokens(left, token, right), TokenKind.PLUS, left, right)
|
||||||
|
|
||||||
|
if token.type not in (TokenKind.PLUS, TokenKind.MINUS):
|
||||||
|
return left
|
||||||
|
|
||||||
|
self.next_token()
|
||||||
|
right = self.parse_addition()
|
||||||
|
return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right)
|
||||||
|
|
||||||
|
def parse_multiply(self):
|
||||||
|
left = self.parse_atom()
|
||||||
|
token = self.get_token()
|
||||||
|
if token is None or token.type == TokenKind.EOF:
|
||||||
|
return left
|
||||||
|
|
||||||
|
if token.type not in (TokenKind.STAR, TokenKind.SLASH):
|
||||||
|
return left
|
||||||
|
|
||||||
|
self.next_token()
|
||||||
|
right = self.parse_multiply()
|
||||||
|
return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right)
|
||||||
|
|
||||||
|
def parse_atom(self):
|
||||||
|
token = self.get_token()
|
||||||
|
if token.type == TokenKind.NUMBER:
|
||||||
|
self.next_token()
|
||||||
|
return NumberNode([token], float(token.value) if '.' in token.value else int(token.value))
|
||||||
|
elif token.type == TokenKind.STRING:
|
||||||
|
self.next_token()
|
||||||
|
return StringNode([token], token.value[1:-1], token.value[0])
|
||||||
|
elif token.type == TokenKind.IDENTIFIER:
|
||||||
|
if token.value == "true":
|
||||||
|
self.next_token()
|
||||||
|
return TrueNode([token])
|
||||||
|
elif token.value == "false":
|
||||||
|
self.next_token()
|
||||||
|
return FalseNode([token])
|
||||||
|
elif token.value == "null":
|
||||||
|
self.next_token()
|
||||||
|
return NullNode([token])
|
||||||
|
else:
|
||||||
|
self.next_token()
|
||||||
|
return VariableNode([token], token.value)
|
||||||
|
elif token.type == TokenKind.LPAR:
|
||||||
|
self.next_token()
|
||||||
|
exp = self.parse_expression()
|
||||||
|
token = self.get_token()
|
||||||
|
self.next_token()
|
||||||
|
|
||||||
|
if token.type != TokenKind.RPAR:
|
||||||
|
error = UnexpectedTokenErrorNode([token], "Right parenthesis not found.", [TokenKind.RPAR])
|
||||||
|
self.add_error(error)
|
||||||
|
return error
|
||||||
|
|
||||||
|
return exp
|
||||||
|
else:
|
||||||
|
error = UnexpectedTokenErrorNode([token], "Unexpected token",
|
||||||
|
[TokenKind.NUMBER, TokenKind.STRING, TokenKind.IDENTIFIER, "true", "false",
|
||||||
|
"null", TokenKind.LPAR])
|
||||||
|
return self.add_error(error)
|
||||||
@@ -0,0 +1,75 @@
|
|||||||
|
from parsers.BaseParser import BaseParser, Node, ErrorNode
|
||||||
|
from dataclasses import dataclass
|
||||||
|
import ast
|
||||||
|
import copy
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class PythonErrorNode(ErrorNode):
|
||||||
|
source: str
|
||||||
|
exception: Exception
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class PythonNode(Node):
|
||||||
|
source: str
|
||||||
|
ast: ast.AST
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "PythonNode(" + ast.dump(self.ast) + ")"
|
||||||
|
#return "PythonNode(" + self.source + ")"
|
||||||
|
|
||||||
|
|
||||||
|
class PythonParser(BaseParser):
|
||||||
|
def __init__(self, text, source="<undef>"):
|
||||||
|
text = text if isinstance(text, str) else self.get_text_from_tokens(text)
|
||||||
|
text = text.strip()
|
||||||
|
BaseParser.__init__(self, "PythonParser", text)
|
||||||
|
self.source = source
|
||||||
|
|
||||||
|
def parse(self):
|
||||||
|
# first, try to parse an expression
|
||||||
|
res, tree, error = self.try_parse_expression()
|
||||||
|
if not res:
|
||||||
|
# then try to parse a statement
|
||||||
|
res, tree, error = self.try_parse_statement()
|
||||||
|
if not res:
|
||||||
|
self.has_error = True
|
||||||
|
error_node = PythonErrorNode(self.text, error)
|
||||||
|
self.error_sink.append(error_node)
|
||||||
|
return error_node
|
||||||
|
return PythonNode(self.text, tree)
|
||||||
|
|
||||||
|
def try_parse_expression(self):
|
||||||
|
try:
|
||||||
|
return True, ast.parse(self.text, f"<{self.source}>", 'eval'), None
|
||||||
|
except Exception as error:
|
||||||
|
return False, None, error
|
||||||
|
|
||||||
|
def try_parse_statement(self):
|
||||||
|
try:
|
||||||
|
return True, ast.parse(self.text, f"<{self.source}>", 'exec'), None
|
||||||
|
except Exception as error:
|
||||||
|
return False, None, error
|
||||||
|
|
||||||
|
def expr_to_expression(self, expr):
|
||||||
|
expr.lineno = 0
|
||||||
|
expr.col_offset = 0
|
||||||
|
result = ast.Expression(expr.value, lineno=0, col_offset=0)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def exec_with_return(self, code):
|
||||||
|
code_ast = ast.parse(code)
|
||||||
|
|
||||||
|
init_ast = copy.deepcopy(code_ast)
|
||||||
|
init_ast.body = code_ast.body[:-1]
|
||||||
|
|
||||||
|
last_ast = copy.deepcopy(code_ast)
|
||||||
|
last_ast.body = code_ast.body[-1:]
|
||||||
|
|
||||||
|
exec(compile(init_ast, "<ast>", "exec"), globals())
|
||||||
|
if type(last_ast.body[0]) == ast.Expr:
|
||||||
|
return eval(compile(self.expr_to_expression(last_ast.body[0]), "<ast>", "eval"), globals())
|
||||||
|
else:
|
||||||
|
exec(compile(last_ast, "<ast>", "exec"), globals())
|
||||||
@@ -1,249 +0,0 @@
|
|||||||
from dataclasses import dataclass
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class Token:
|
|
||||||
type: str
|
|
||||||
value: str
|
|
||||||
index: int
|
|
||||||
line: int
|
|
||||||
column: int
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class LexerError(Exception):
|
|
||||||
message: str
|
|
||||||
text: str
|
|
||||||
index: int
|
|
||||||
line: int
|
|
||||||
column: int
|
|
||||||
|
|
||||||
|
|
||||||
class Tokens:
|
|
||||||
EOF = "eof"
|
|
||||||
WHITESPACE = "whitespace"
|
|
||||||
NEWLINE = "newline"
|
|
||||||
KEYWORD = "keyword"
|
|
||||||
IDENTIFIER = "identifier"
|
|
||||||
STRING = "string"
|
|
||||||
NUMBER = "number"
|
|
||||||
TRUE = "true"
|
|
||||||
FALSE = "false"
|
|
||||||
LPAR = "lpar"
|
|
||||||
RPAR = "rpar"
|
|
||||||
LBRACKET = "lbrace"
|
|
||||||
RBRACKET = "rbracket"
|
|
||||||
LBRACE = "lbrace"
|
|
||||||
RBRACE = "rbrace"
|
|
||||||
PLUS = "plus"
|
|
||||||
MINUS = "minus"
|
|
||||||
STAR = "star"
|
|
||||||
SLASH = "slash"
|
|
||||||
PERCENT = "percent"
|
|
||||||
COMMA = "comma"
|
|
||||||
SEMICOLON = "semicolon"
|
|
||||||
COLON = "colon"
|
|
||||||
DOT = "dot"
|
|
||||||
QMARK = "qmark"
|
|
||||||
VBAR = "vbar"
|
|
||||||
AMPER = "amper"
|
|
||||||
|
|
||||||
|
|
||||||
class TokenIter:
|
|
||||||
KEYWORDS = ("def", "concept", "as", "pre", "post")
|
|
||||||
|
|
||||||
"""
|
|
||||||
Class that can iterate on the tokens
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, text):
|
|
||||||
self.text = text
|
|
||||||
self.text_len = len(text)
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
|
|
||||||
i = 0
|
|
||||||
line = 1
|
|
||||||
column = 1
|
|
||||||
while i < self.text_len:
|
|
||||||
c = self.text[i]
|
|
||||||
if c == "+":
|
|
||||||
yield Token(Tokens.PLUS, "+", i, line, column)
|
|
||||||
i += 1
|
|
||||||
column += 1
|
|
||||||
elif c == "-":
|
|
||||||
if i + 1 < self.text_len and self.text[i + 1].isdigit():
|
|
||||||
number = self.eat_number(i)
|
|
||||||
yield Token(Tokens.NUMBER, number, i, line, column)
|
|
||||||
i += len(number)
|
|
||||||
column += len(number)
|
|
||||||
else:
|
|
||||||
yield Token(Tokens.MINUS, "-", i, line, column)
|
|
||||||
i += 1
|
|
||||||
column += 1
|
|
||||||
elif c == "/":
|
|
||||||
yield Token(Tokens.SLASH, "/", i, line, column)
|
|
||||||
i += 1
|
|
||||||
column += 1
|
|
||||||
elif c == "*":
|
|
||||||
yield Token(Tokens.STAR, "*", i, line, column)
|
|
||||||
i += 1
|
|
||||||
column += 1
|
|
||||||
elif c == "{":
|
|
||||||
yield Token(Tokens.LBRACE, "{", i, line, column)
|
|
||||||
i += 1
|
|
||||||
column += 1
|
|
||||||
elif c == "}":
|
|
||||||
yield Token(Tokens.RBRACE, "}", i, line, column)
|
|
||||||
i += 1
|
|
||||||
column += 1
|
|
||||||
elif c == "(":
|
|
||||||
yield Token(Tokens.LPAR, "(", i, line, column)
|
|
||||||
i += 1
|
|
||||||
column += 1
|
|
||||||
elif c == ")":
|
|
||||||
yield Token(Tokens.RPAR, ")", i, line, column)
|
|
||||||
i += 1
|
|
||||||
column += 1
|
|
||||||
elif c == "[":
|
|
||||||
yield Token(Tokens.LBRACKET, "[", i, line, column)
|
|
||||||
i += 1
|
|
||||||
column += 1
|
|
||||||
elif c == "]":
|
|
||||||
yield Token(Tokens.RBRACKET, "]", i, line, column)
|
|
||||||
i += 1
|
|
||||||
column += 1
|
|
||||||
elif c == " " or c == "\t":
|
|
||||||
whitespace = self.eat_whitespace(i)
|
|
||||||
yield Token(Tokens.WHITESPACE, whitespace, i, line, column)
|
|
||||||
i += len(whitespace)
|
|
||||||
column += len(whitespace)
|
|
||||||
elif c == ",":
|
|
||||||
yield Token(Tokens.COMMA, ",", i, line, column)
|
|
||||||
i += 1
|
|
||||||
column += 1
|
|
||||||
elif c == ".":
|
|
||||||
yield Token(Tokens.DOT, ".", i, line, column)
|
|
||||||
i += 1
|
|
||||||
column += 1
|
|
||||||
elif c == ";":
|
|
||||||
yield Token(Tokens.SEMICOLON, ";", i, line, column)
|
|
||||||
i += 1
|
|
||||||
column += 1
|
|
||||||
elif c == ":":
|
|
||||||
yield Token(Tokens.COLON, ":", i, line, column)
|
|
||||||
i += 1
|
|
||||||
column += 1
|
|
||||||
elif c == "?":
|
|
||||||
yield Token(Tokens.QMARK, "?", i, line, column)
|
|
||||||
i += 1
|
|
||||||
column += 1
|
|
||||||
elif c == "\n" or c == "\r":
|
|
||||||
newline = self.eat_newline(i)
|
|
||||||
yield Token(Tokens.NEWLINE, newline, i, line, column)
|
|
||||||
i += len(newline)
|
|
||||||
column = 1
|
|
||||||
line += 1
|
|
||||||
elif c.isalpha() or c == "_":
|
|
||||||
identifier = self.eat_identifier(i)
|
|
||||||
type = Tokens.KEYWORD if identifier in self.KEYWORDS else Tokens.IDENTIFIER
|
|
||||||
yield Token(type, identifier, i, line, column)
|
|
||||||
i += len(identifier)
|
|
||||||
column += len(identifier)
|
|
||||||
elif c.isdigit():
|
|
||||||
number = self.eat_number(i)
|
|
||||||
yield Token(Tokens.NUMBER, number, i, line, column)
|
|
||||||
i += len(number)
|
|
||||||
column += len(number)
|
|
||||||
elif c == "'" or c == '"':
|
|
||||||
string, newlines = self.eat_string(i)
|
|
||||||
yield Token(Tokens.STRING, string, i, line, column)
|
|
||||||
i += len(string)
|
|
||||||
column = 1 if newlines > 0 else column + len(string)
|
|
||||||
line += newlines
|
|
||||||
else:
|
|
||||||
raise LexerError(f"Unknown token '{c}'", self.text, i, line, column)
|
|
||||||
|
|
||||||
yield Token(Tokens.EOF, "", i, line, column)
|
|
||||||
|
|
||||||
def eat_whitespace(self, start):
|
|
||||||
result = self.text[start]
|
|
||||||
i = start + 1
|
|
||||||
while i < self.text_len:
|
|
||||||
c = self.text[i]
|
|
||||||
if c == " " or c == "\t":
|
|
||||||
result += c
|
|
||||||
i += 1
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
def eat_newline(self, start):
|
|
||||||
if start + 1 == self.text_len:
|
|
||||||
return self.text[start]
|
|
||||||
|
|
||||||
current = self.text[start]
|
|
||||||
next = self.text[start + 1]
|
|
||||||
if current == "\n" and next == "\r" or current == "\r" and next == "\n":
|
|
||||||
return current + next
|
|
||||||
|
|
||||||
return current
|
|
||||||
|
|
||||||
def eat_identifier(self, start):
|
|
||||||
result = self.text[start]
|
|
||||||
i = start + 1
|
|
||||||
while i < self.text_len:
|
|
||||||
c = self.text[i]
|
|
||||||
if c.isalpha() or c == "_" or c == "-" or c.isdigit():
|
|
||||||
result += c
|
|
||||||
i += 1
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
def eat_number(self, start):
|
|
||||||
result = self.text[start]
|
|
||||||
i = start + 1
|
|
||||||
while i < self.text_len:
|
|
||||||
c = self.text[i]
|
|
||||||
if c.isdigit() or c == ".":
|
|
||||||
result += c
|
|
||||||
i += 1
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
def eat_string(self, start):
|
|
||||||
quote = self.text[start]
|
|
||||||
result = self.text[start]
|
|
||||||
lines_count = 0
|
|
||||||
|
|
||||||
i = start + 1
|
|
||||||
escape = False
|
|
||||||
newline = None
|
|
||||||
while i < self.text_len:
|
|
||||||
c = self.text[i]
|
|
||||||
result += c
|
|
||||||
i += 1
|
|
||||||
|
|
||||||
if newline:
|
|
||||||
lines_count += 1
|
|
||||||
newline = c if c == newline else None
|
|
||||||
else:
|
|
||||||
if c == "\r" or c == "\n":
|
|
||||||
newline = c
|
|
||||||
|
|
||||||
if c == "\\":
|
|
||||||
escape = True
|
|
||||||
elif c == quote and not escape:
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
escape = False
|
|
||||||
|
|
||||||
if newline:
|
|
||||||
lines_count += 1
|
|
||||||
|
|
||||||
return result, lines_count
|
|
||||||
@@ -0,0 +1,297 @@
|
|||||||
|
from dataclasses import dataclass
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
|
class TokenKind(Enum):
|
||||||
|
EOF = "eof"
|
||||||
|
WHITESPACE = "whitespace"
|
||||||
|
NEWLINE = "newline"
|
||||||
|
KEYWORD = "keyword"
|
||||||
|
IDENTIFIER = "identifier"
|
||||||
|
STRING = "string"
|
||||||
|
NUMBER = "number"
|
||||||
|
TRUE = "true"
|
||||||
|
FALSE = "false"
|
||||||
|
LPAR = "lpar"
|
||||||
|
RPAR = "rpar"
|
||||||
|
LBRACKET = "lbrace"
|
||||||
|
RBRACKET = "rbracket"
|
||||||
|
LBRACE = "lbrace"
|
||||||
|
RBRACE = "rbrace"
|
||||||
|
PLUS = "plus"
|
||||||
|
MINUS = "minus"
|
||||||
|
STAR = "star"
|
||||||
|
SLASH = "slash"
|
||||||
|
PERCENT = "percent"
|
||||||
|
COMMA = "comma"
|
||||||
|
SEMICOLON = "semicolon"
|
||||||
|
COLON = "colon"
|
||||||
|
DOT = "dot"
|
||||||
|
QMARK = "qmark"
|
||||||
|
VBAR = "vbar"
|
||||||
|
AMPER = "amper"
|
||||||
|
EQUALS = "="
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class Token:
|
||||||
|
type: TokenKind
|
||||||
|
value: object
|
||||||
|
index: int
|
||||||
|
line: int
|
||||||
|
column: int
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class LexerError(Exception):
|
||||||
|
message: str
|
||||||
|
text: str
|
||||||
|
index: int
|
||||||
|
line: int
|
||||||
|
column: int
|
||||||
|
|
||||||
|
|
||||||
|
class Keywords(Enum):
|
||||||
|
DEF = "def"
|
||||||
|
CONCEPT = "concept"
|
||||||
|
AS = "as"
|
||||||
|
WHERE = "where"
|
||||||
|
PRE = "pre"
|
||||||
|
POST = "post"
|
||||||
|
|
||||||
|
|
||||||
|
class Tokenizer:
|
||||||
|
"""
|
||||||
|
Class that can iterate on the tokens
|
||||||
|
"""
|
||||||
|
|
||||||
|
KEYWORDS = set(x.value for x in Keywords)
|
||||||
|
|
||||||
|
def __init__(self, text):
|
||||||
|
self.text = text
|
||||||
|
self.text_len = len(text)
|
||||||
|
self.column = 1
|
||||||
|
self.line = 1
|
||||||
|
self.i = 0
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
|
||||||
|
while self.i < self.text_len:
|
||||||
|
c = self.text[self.i]
|
||||||
|
if c == "+":
|
||||||
|
if self.i + 1 < self.text_len and self.text[self.i + 1].isdigit():
|
||||||
|
number = self.eat_number(self.i)
|
||||||
|
yield Token(TokenKind.NUMBER, number, self.i, self.line, self.column)
|
||||||
|
self.i += len(number)
|
||||||
|
self.column += len(number)
|
||||||
|
else:
|
||||||
|
yield Token(TokenKind.PLUS, "+", self.i, self.line, self.column)
|
||||||
|
self.i += 1
|
||||||
|
self.column += 1
|
||||||
|
elif c == "-":
|
||||||
|
if self.i + 1 < self.text_len and self.text[self.i + 1].isdigit():
|
||||||
|
number = self.eat_number(self.i)
|
||||||
|
yield Token(TokenKind.NUMBER, number, self.i, self.line, self.column)
|
||||||
|
self.i += len(number)
|
||||||
|
self.column += len(number)
|
||||||
|
else:
|
||||||
|
yield Token(TokenKind.MINUS, "-", self.i, self.line, self.column)
|
||||||
|
self.i += 1
|
||||||
|
self.column += 1
|
||||||
|
elif c == "/":
|
||||||
|
yield Token(TokenKind.SLASH, "/", self.i, self.line, self.column)
|
||||||
|
self.i += 1
|
||||||
|
self.column += 1
|
||||||
|
elif c == "*":
|
||||||
|
yield Token(TokenKind.STAR, "*", self.i, self.line, self.column)
|
||||||
|
self.i += 1
|
||||||
|
self.column += 1
|
||||||
|
elif c == "{":
|
||||||
|
yield Token(TokenKind.LBRACE, "{", self.i, self.line, self.column)
|
||||||
|
self.i += 1
|
||||||
|
self.column += 1
|
||||||
|
elif c == "}":
|
||||||
|
yield Token(TokenKind.RBRACE, "}", self.i, self.line, self.column)
|
||||||
|
self.i += 1
|
||||||
|
self.column += 1
|
||||||
|
elif c == "(":
|
||||||
|
yield Token(TokenKind.LPAR, "(", self.i, self.line, self.column)
|
||||||
|
self.i += 1
|
||||||
|
self.column += 1
|
||||||
|
elif c == ")":
|
||||||
|
yield Token(TokenKind.RPAR, ")", self.i, self.line, self.column)
|
||||||
|
self.i += 1
|
||||||
|
self.column += 1
|
||||||
|
elif c == "[":
|
||||||
|
yield Token(TokenKind.LBRACKET, "[", self.i, self.line, self.column)
|
||||||
|
self.i += 1
|
||||||
|
self.column += 1
|
||||||
|
elif c == "]":
|
||||||
|
yield Token(TokenKind.RBRACKET, "]", self.i, self.line, self.column)
|
||||||
|
self.i += 1
|
||||||
|
self.column += 1
|
||||||
|
elif c == "=":
|
||||||
|
yield Token(TokenKind.EQUALS, "=", self.i, self.line, self.column)
|
||||||
|
self.i += 1
|
||||||
|
self.column += 1
|
||||||
|
elif c == " " or c == "\t":
|
||||||
|
whitespace = self.eat_whitespace(self.i)
|
||||||
|
yield Token(TokenKind.WHITESPACE, whitespace, self.i, self.line, self.column)
|
||||||
|
self.i += len(whitespace)
|
||||||
|
self.column += len(whitespace)
|
||||||
|
elif c == ",":
|
||||||
|
yield Token(TokenKind.COMMA, ",", self.i, self.line, self.column)
|
||||||
|
self.i += 1
|
||||||
|
self.column += 1
|
||||||
|
elif c == ".":
|
||||||
|
yield Token(TokenKind.DOT, ".", self.i, self.line, self.column)
|
||||||
|
self.i += 1
|
||||||
|
self.column += 1
|
||||||
|
elif c == ";":
|
||||||
|
yield Token(TokenKind.SEMICOLON, ";", self.i, self.line, self.column)
|
||||||
|
self.i += 1
|
||||||
|
self.column += 1
|
||||||
|
elif c == ":":
|
||||||
|
yield Token(TokenKind.COLON, ":", self.i, self.line, self.column)
|
||||||
|
self.i += 1
|
||||||
|
self.column += 1
|
||||||
|
elif c == "?":
|
||||||
|
yield Token(TokenKind.QMARK, "?", self.i, self.line, self.column)
|
||||||
|
self.i += 1
|
||||||
|
self.column += 1
|
||||||
|
elif c == "\n" or c == "\r":
|
||||||
|
newline = self.eat_newline(self.i)
|
||||||
|
yield Token(TokenKind.NEWLINE, newline, self.i, self.line, self.column)
|
||||||
|
self.i += len(newline)
|
||||||
|
self.column = 1
|
||||||
|
self.line += 1
|
||||||
|
elif c.isalpha() or c == "_":
|
||||||
|
identifier = self.eat_identifier(self.i)
|
||||||
|
token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
|
||||||
|
value = Keywords(identifier) if identifier in self.KEYWORDS else identifier
|
||||||
|
yield Token(token_type, value, self.i, self.line, self.column)
|
||||||
|
self.i += len(identifier)
|
||||||
|
self.column += len(identifier)
|
||||||
|
elif c.isdigit():
|
||||||
|
number = self.eat_number(self.i)
|
||||||
|
yield Token(TokenKind.NUMBER, number, self.i, self.line, self.column)
|
||||||
|
self.i += len(number)
|
||||||
|
self.column += len(number)
|
||||||
|
elif c == "'" or c == '"':
|
||||||
|
string, newlines = self.eat_string(self.i, self.line, self.column)
|
||||||
|
yield Token(TokenKind.STRING, string, self.i, self.line, self.column)
|
||||||
|
self.i += len(string)
|
||||||
|
self.column = 1 if newlines > 0 else self.column + len(string)
|
||||||
|
self.line += newlines
|
||||||
|
else:
|
||||||
|
raise LexerError(f"Unknown token '{c}'", self.text, self.i, self.line, self.column)
|
||||||
|
|
||||||
|
yield Token(TokenKind.EOF, "", self.i, self.line, self.column)
|
||||||
|
|
||||||
|
def eat_whitespace(self, start):
|
||||||
|
result = self.text[start]
|
||||||
|
i = start + 1
|
||||||
|
while i < self.text_len:
|
||||||
|
c = self.text[i]
|
||||||
|
if c == " " or c == "\t":
|
||||||
|
result += c
|
||||||
|
i += 1
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def eat_newline(self, start):
|
||||||
|
if start + 1 == self.text_len:
|
||||||
|
return self.text[start]
|
||||||
|
|
||||||
|
current = self.text[start]
|
||||||
|
next = self.text[start + 1]
|
||||||
|
if current == "\n" and next == "\r" or current == "\r" and next == "\n":
|
||||||
|
return current + next
|
||||||
|
|
||||||
|
return current
|
||||||
|
|
||||||
|
def eat_identifier(self, start):
|
||||||
|
result = self.text[start]
|
||||||
|
i = start + 1
|
||||||
|
while i < self.text_len:
|
||||||
|
c = self.text[i]
|
||||||
|
if c.isalpha() or c == "_" or c == "-" or c.isdigit():
|
||||||
|
result += c
|
||||||
|
i += 1
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def eat_number(self, start):
|
||||||
|
result = self.text[start]
|
||||||
|
i = start + 1
|
||||||
|
while i < self.text_len:
|
||||||
|
c = self.text[i]
|
||||||
|
if c.isdigit() or c == ".":
|
||||||
|
result += c
|
||||||
|
i += 1
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def eat_string(self, start_index, start_line, start_column):
|
||||||
|
quote = self.text[start_index]
|
||||||
|
result = self.text[start_index]
|
||||||
|
lines_count = 0
|
||||||
|
|
||||||
|
i = start_index + 1
|
||||||
|
escape = False
|
||||||
|
newline = None
|
||||||
|
while i < self.text_len:
|
||||||
|
c = self.text[i]
|
||||||
|
result += c
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
if newline:
|
||||||
|
lines_count += 1
|
||||||
|
newline = c if c == newline else None
|
||||||
|
else:
|
||||||
|
if c == "\r" or c == "\n":
|
||||||
|
newline = c
|
||||||
|
|
||||||
|
if c == "\\":
|
||||||
|
escape = True
|
||||||
|
elif c == quote and not escape:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
escape = False
|
||||||
|
|
||||||
|
# add trailing new line if needed
|
||||||
|
if newline:
|
||||||
|
lines_count += 1
|
||||||
|
|
||||||
|
if result[-1] != quote:
|
||||||
|
raise LexerError("Missing Trailing quote", result, i, start_line + lines_count,
|
||||||
|
1 if lines_count > 0 else start_column + len(result))
|
||||||
|
|
||||||
|
return result, lines_count
|
||||||
|
|
||||||
|
def seek(self, words):
|
||||||
|
if self.i == self.text_len:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# init
|
||||||
|
offsets = {}
|
||||||
|
start_index = self.i
|
||||||
|
|
||||||
|
buffer = ""
|
||||||
|
while self.i < self.text_len:
|
||||||
|
c = self.text[self.i]
|
||||||
|
|
||||||
|
# skip white space
|
||||||
|
if c in (" ", "\t"):
|
||||||
|
self.i += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
for word in words:
|
||||||
|
if c == word[offset]:
|
||||||
|
os
|
||||||
@@ -1,5 +1,39 @@
|
|||||||
# How to serialize ?
|
# How to serialize ?
|
||||||
|
|
||||||
|
## General rule
|
||||||
- 1 byte : type of object code
|
- 1 byte : type of object code
|
||||||
- int : version of the encoder
|
- int : version of the encoder
|
||||||
- data : can be the json representation of the object
|
- data : can be the json representation of the object
|
||||||
|
|
||||||
|
### Current supported types
|
||||||
|
- E : events
|
||||||
|
- O : object (with history management)
|
||||||
|
- P : pickle
|
||||||
|
|
||||||
|
## How concepts are serialized ?
|
||||||
|
- get the id of the concept
|
||||||
|
- get the hash of the concept −> it will be its unique key
|
||||||
|
structure of the serialisation:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"id" : "id",
|
||||||
|
"parent": <hash code of the previous version of the concept> or "",
|
||||||
|
"name": <name of the concept>,
|
||||||
|
"where": "",
|
||||||
|
"pre": "",
|
||||||
|
"post": "",
|
||||||
|
"body": "",
|
||||||
|
"desc": "",
|
||||||
|
...
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Idea to manage ObjectSerializer
|
||||||
|
Problem:
|
||||||
|
During serialization, there is no issue. The match() method is the unique way to get the correct serialier.
|
||||||
|
During the deserialisation, all Object serializer have type = '0' and version = 1.
|
||||||
|
So how to choose the correct one ?
|
||||||
|
A possible solution will be to add the type of the object to deserialize to the saved stream
|
||||||
|
--> SHA256 for every object. Too much data saved.
|
||||||
|
The id is to let to inc the version automatically in the Serialiser (during the registration) and to keep the mapping within sdp.state
|
||||||
|
|
||||||
|
|||||||
+41
-13
@@ -4,7 +4,7 @@ from datetime import datetime, date
|
|||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import zlib
|
import zlib
|
||||||
from sdp.sheerkaSerializer import Serializer
|
from sdp.sheerkaSerializer import Serializer, SerializerContext
|
||||||
|
|
||||||
|
|
||||||
def json_default_converter(o):
|
def json_default_converter(o):
|
||||||
@@ -38,15 +38,15 @@ class Event(object):
|
|||||||
if not isinstance(self.message, str):
|
if not isinstance(self.message, str):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
return hashlib.sha256(f"{self.user}{self.date}{self.message}".encode("utf-8")).hexdigest()
|
return hashlib.sha256(f"Event:{self.user}{self.date}{self.message}".encode("utf-8")).hexdigest()
|
||||||
|
|
||||||
def to_json(self):
|
def to_dict(self):
|
||||||
return json.dumps(self.__dict__, default=json_default_converter)
|
return self.__dict__
|
||||||
|
|
||||||
def from_json(self, json_message):
|
def from_dict(self, as_dict):
|
||||||
self.user = json_message["user"]
|
self.user = as_dict["user"]
|
||||||
self.date = datetime.fromisoformat(json_message["date"])
|
self.date = datetime.fromisoformat(as_dict["date"])
|
||||||
self.message = json_message["message"]
|
self.message = as_dict["message"]
|
||||||
|
|
||||||
|
|
||||||
class State:
|
class State:
|
||||||
@@ -120,6 +120,7 @@ class SheerkaDataProvider:
|
|||||||
|
|
||||||
EventFolder = "events"
|
EventFolder = "events"
|
||||||
StateFolder = "state"
|
StateFolder = "state"
|
||||||
|
ObjectsFolder = "objects"
|
||||||
CacheFolder = "cache"
|
CacheFolder = "cache"
|
||||||
HeadFile = "HEAD"
|
HeadFile = "HEAD"
|
||||||
KeysFile = "keys"
|
KeysFile = "keys"
|
||||||
@@ -135,6 +136,9 @@ class SheerkaDataProvider:
|
|||||||
|
|
||||||
self.serializer = Serializer()
|
self.serializer = Serializer()
|
||||||
|
|
||||||
|
def get_obj_path(self, object_type, digest):
|
||||||
|
path.join(self.root, object_type, digest[:24], digest)
|
||||||
|
|
||||||
def add(self, event: Event, entry, obj):
|
def add(self, event: Event, entry, obj):
|
||||||
"""
|
"""
|
||||||
Adds obj to the entry 'entry'
|
Adds obj to the entry 'entry'
|
||||||
@@ -366,7 +370,7 @@ class SheerkaDataProvider:
|
|||||||
os.makedirs(path.dirname(target_path))
|
os.makedirs(path.dirname(target_path))
|
||||||
|
|
||||||
with open(target_path, "wb") as f:
|
with open(target_path, "wb") as f:
|
||||||
f.write(self.serializer.serialize(event).read())
|
f.write(self.serializer.serialize(event, None).read())
|
||||||
|
|
||||||
return digest
|
return digest
|
||||||
|
|
||||||
@@ -378,7 +382,7 @@ class SheerkaDataProvider:
|
|||||||
"""
|
"""
|
||||||
target_path = path.join(self.root, SheerkaDataProvider.EventFolder, digest[:24], digest)
|
target_path = path.join(self.root, SheerkaDataProvider.EventFolder, digest[:24], digest)
|
||||||
with open(target_path, "rb") as f:
|
with open(target_path, "rb") as f:
|
||||||
return self.serializer.deserialize(f)
|
return self.serializer.deserialize(f, None)
|
||||||
|
|
||||||
def save_state(self, state: State):
|
def save_state(self, state: State):
|
||||||
digest = state.get_digest()
|
digest = state.get_digest()
|
||||||
@@ -390,7 +394,7 @@ class SheerkaDataProvider:
|
|||||||
os.makedirs(path.dirname(target_path))
|
os.makedirs(path.dirname(target_path))
|
||||||
|
|
||||||
with open(target_path, "wb") as f:
|
with open(target_path, "wb") as f:
|
||||||
f.write(self.serializer.serialize(state).read())
|
f.write(self.serializer.serialize(state, None).read())
|
||||||
|
|
||||||
return digest
|
return digest
|
||||||
|
|
||||||
@@ -400,7 +404,32 @@ class SheerkaDataProvider:
|
|||||||
|
|
||||||
target_path = path.join(self.root, SheerkaDataProvider.StateFolder, digest[:24], digest)
|
target_path = path.join(self.root, SheerkaDataProvider.StateFolder, digest[:24], digest)
|
||||||
with open(target_path, "rb") as f:
|
with open(target_path, "rb") as f:
|
||||||
return self.serializer.deserialize(f)
|
return self.serializer.deserialize(f, None)
|
||||||
|
|
||||||
|
def save_obj(self, obj):
|
||||||
|
if hasattr(obj, "key") and hasattr(obj, "key_name") and obj.key is None:
|
||||||
|
obj.key = self.get_next_key(obj.key_name)
|
||||||
|
|
||||||
|
digest = obj.get_digest()
|
||||||
|
target_path = path.join(self.root, SheerkaDataProvider.ObjectsFolder, digest[:24], digest)
|
||||||
|
if path.exists(target_path):
|
||||||
|
return digest
|
||||||
|
|
||||||
|
if not path.exists(path.dirname(target_path)):
|
||||||
|
os.makedirs(path.dirname(target_path))
|
||||||
|
|
||||||
|
with open(target_path, "wb") as f:
|
||||||
|
f.write(self.serializer.serialize(obj, SerializerContext("kodjo", digest)).read())
|
||||||
|
|
||||||
|
return digest
|
||||||
|
|
||||||
|
def load_obj(self, digest):
|
||||||
|
if digest is None:
|
||||||
|
return State()
|
||||||
|
|
||||||
|
target_path = path.join(self.root, SheerkaDataProvider.ObjectsFolder, digest[:24], digest)
|
||||||
|
with open(target_path, "rb") as f:
|
||||||
|
return self.serializer.deserialize(f, SerializerContext("kodjo", digest))
|
||||||
|
|
||||||
def get_cache_params(self, category, key):
|
def get_cache_params(self, category, key):
|
||||||
digest = hashlib.sha3_256(f"{category}:{key}".encode("utf-8")).hexdigest()
|
digest = hashlib.sha3_256(f"{category}:{key}".encode("utf-8")).hexdigest()
|
||||||
@@ -507,4 +536,3 @@ class SheerkaDataProvider:
|
|||||||
keys[entry] = value
|
keys[entry] = value
|
||||||
self.save_keys(keys)
|
self.save_keys(keys)
|
||||||
return str(value)
|
return str(value)
|
||||||
|
|
||||||
|
|||||||
+84
-26
@@ -3,6 +3,7 @@ import pickle
|
|||||||
import datetime
|
import datetime
|
||||||
import struct
|
import struct
|
||||||
import io
|
import io
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
|
||||||
def json_default_converter(o):
|
def json_default_converter(o):
|
||||||
@@ -17,15 +18,27 @@ def json_default_converter(o):
|
|||||||
return o.isoformat()
|
return o.isoformat()
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class SerializerContext:
|
||||||
|
user_name: str
|
||||||
|
origin: str
|
||||||
|
|
||||||
|
|
||||||
class Serializer:
|
class Serializer:
|
||||||
HEADER_FORMAT = "cH"
|
HEADER_FORMAT = "cH"
|
||||||
|
USERNAME = "user_name" # key to store user that as committed the snapshot
|
||||||
|
MODIFICATION_DATE = "modification_date" #
|
||||||
|
PARENTS = "parents"
|
||||||
|
ORIGIN = "origin"
|
||||||
|
HISTORY = "##history##"
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self._cache = []
|
self._cache = []
|
||||||
|
|
||||||
# add builtin serializers
|
# add builtin serializers
|
||||||
self._cache.append(EventSerializer())
|
self.register(EventSerializer())
|
||||||
self._cache.append(PickleSerializer())
|
self.register(PickleSerializer())
|
||||||
|
self.register(ConceptSerializer())
|
||||||
|
|
||||||
def register(self, serializer):
|
def register(self, serializer):
|
||||||
"""
|
"""
|
||||||
@@ -35,9 +48,10 @@ class Serializer:
|
|||||||
"""
|
"""
|
||||||
self._cache.append(serializer)
|
self._cache.append(serializer)
|
||||||
|
|
||||||
def serialize(self, obj):
|
def serialize(self, obj, context):
|
||||||
"""
|
"""
|
||||||
Get the stream representation of an object
|
Get the stream representation of an object
|
||||||
|
:param context:
|
||||||
:param obj:
|
:param obj:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
@@ -52,11 +66,12 @@ class Serializer:
|
|||||||
header = struct.pack(Serializer.HEADER_FORMAT, bytes(serializer.name, "utf-8"), serializer.version)
|
header = struct.pack(Serializer.HEADER_FORMAT, bytes(serializer.name, "utf-8"), serializer.version)
|
||||||
stream.write(header)
|
stream.write(header)
|
||||||
|
|
||||||
return serializer.dump(stream, obj)
|
return serializer.dump(stream, obj, context)
|
||||||
|
|
||||||
def deserialize(self, stream):
|
def deserialize(self, stream, context):
|
||||||
"""
|
"""
|
||||||
Loads an object from its stream representation
|
Loads an object from its stream representation
|
||||||
|
:param context:
|
||||||
:param stream:
|
:param stream:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
@@ -67,7 +82,7 @@ class Serializer:
|
|||||||
raise TypeError(f"Don't know how serializer name={header[0]}, version={header[1]}")
|
raise TypeError(f"Don't know how serializer name={header[0]}, version={header[1]}")
|
||||||
|
|
||||||
serializer = serializers[0]
|
serializer = serializers[0]
|
||||||
return serializer.load(stream)
|
return serializer.load(stream, context)
|
||||||
|
|
||||||
|
|
||||||
class BaseSerializer:
|
class BaseSerializer:
|
||||||
@@ -82,8 +97,7 @@ class BaseSerializer:
|
|||||||
self.name = name
|
self.name = name
|
||||||
self.version = version
|
self.version = version
|
||||||
|
|
||||||
@staticmethod
|
def match(self, obj):
|
||||||
def match(obj):
|
|
||||||
"""
|
"""
|
||||||
Returns true if self can serialize obj
|
Returns true if self can serialize obj
|
||||||
:param obj:
|
:param obj:
|
||||||
@@ -91,26 +105,32 @@ class BaseSerializer:
|
|||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def dump(self, stream, obj):
|
def dump(self, stream, obj, context):
|
||||||
"""
|
"""
|
||||||
Returns the byte representation of how the object should be serialized
|
Returns the byte representation of how the object should be serialized
|
||||||
|
|
||||||
:param stream: to write to
|
:param stream: to write to
|
||||||
:param obj:
|
:param obj: obj to serialize
|
||||||
|
:param context: additional info needed to dump
|
||||||
:return: stream of bytes
|
:return: stream of bytes
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def load(self, stream):
|
def load(self, stream, context):
|
||||||
"""
|
"""
|
||||||
From a stream of bytes, create the object
|
From a stream of bytes, create the object
|
||||||
:param stream:
|
:param stream:
|
||||||
|
:param context: additional info needed to load
|
||||||
:return: object
|
:return: object
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_class(kls):
|
def get_class(kls):
|
||||||
|
"""
|
||||||
|
Loads a class from its string full qualified name
|
||||||
|
:param kls:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
parts = kls.split('.')
|
parts = kls.split('.')
|
||||||
module = ".".join(parts[:-1])
|
module = ".".join(parts[:-1])
|
||||||
m = __import__(module)
|
m = __import__(module)
|
||||||
@@ -120,6 +140,11 @@ class BaseSerializer:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_full_qualified_name(obj):
|
def get_full_qualified_name(obj):
|
||||||
|
"""
|
||||||
|
Returns the full qualified name of a class (including its module name )
|
||||||
|
:param obj:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
module = obj.__class__.__module__
|
module = obj.__class__.__module__
|
||||||
if module is None or module == str.__class__.__module__:
|
if module is None or module == str.__class__.__module__:
|
||||||
return obj.__class__.__name__ # Avoid reporting __builtin__
|
return obj.__class__.__name__ # Avoid reporting __builtin__
|
||||||
@@ -128,40 +153,73 @@ class BaseSerializer:
|
|||||||
|
|
||||||
|
|
||||||
class EventSerializer(BaseSerializer):
|
class EventSerializer(BaseSerializer):
|
||||||
@staticmethod
|
|
||||||
def match(obj):
|
|
||||||
return BaseSerializer.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.Event"
|
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
BaseSerializer.__init__(self, "E", 1)
|
BaseSerializer.__init__(self, "E", 1)
|
||||||
|
|
||||||
def dump(self, stream, obj):
|
def match(self, obj):
|
||||||
stream.write(obj.to_json().encode("utf-8"))
|
return BaseSerializer.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.Event"
|
||||||
|
|
||||||
|
def dump(self, stream, obj, context):
|
||||||
|
stream.write(json.dumps(obj.to_dict(), default=json_default_converter).encode("utf-8"))
|
||||||
stream.seek(0)
|
stream.seek(0)
|
||||||
return stream
|
return stream
|
||||||
|
|
||||||
def load(self, stream):
|
def load(self, stream, context):
|
||||||
json_stream = stream.read().decode("utf-8")
|
json_stream = stream.read().decode("utf-8")
|
||||||
json_message = json.loads(json_stream)
|
as_dict = json.loads(json_stream)
|
||||||
event = BaseSerializer.get_class("sdp.sheerkaDataProvider.Event")()
|
event = BaseSerializer.get_class("sdp.sheerkaDataProvider.Event")()
|
||||||
event.from_json(json_message)
|
event.from_dict(as_dict)
|
||||||
return event
|
return event
|
||||||
|
|
||||||
|
|
||||||
|
class ObjectSerializer(BaseSerializer):
|
||||||
|
|
||||||
|
def __init__(self, fully_qualified_name, name="O", version=1):
|
||||||
|
BaseSerializer.__init__(self, name, version)
|
||||||
|
self.fully_qualified_name = fully_qualified_name
|
||||||
|
|
||||||
|
def match(self, obj):
|
||||||
|
return BaseSerializer.get_full_qualified_name(obj) == self.fully_qualified_name
|
||||||
|
|
||||||
|
def dump(self, stream, obj, context):
|
||||||
|
as_json = obj.to_dict()
|
||||||
|
as_json.update({
|
||||||
|
Serializer.HISTORY: {
|
||||||
|
Serializer.USERNAME: context.user_name,
|
||||||
|
Serializer.MODIFICATION_DATE: datetime.datetime.now().isoformat(),
|
||||||
|
Serializer.PARENTS: [getattr(obj, Serializer.ORIGIN)] if hasattr(obj, Serializer.ORIGIN) else []
|
||||||
|
}})
|
||||||
|
stream.write(json.dumps(as_json, default=json_default_converter).encode("utf-8"))
|
||||||
|
stream.seek(0)
|
||||||
|
return stream
|
||||||
|
|
||||||
|
def load(self, stream, context):
|
||||||
|
json_stream = stream.read().decode("utf-8")
|
||||||
|
json_message = json.loads(json_stream)
|
||||||
|
obj = BaseSerializer.get_class(self.fully_qualified_name)()
|
||||||
|
obj.from_dict(json_message)
|
||||||
|
setattr(obj, Serializer.HISTORY, json_message[Serializer.HISTORY])
|
||||||
|
|
||||||
|
return obj
|
||||||
|
|
||||||
|
|
||||||
class PickleSerializer(BaseSerializer):
|
class PickleSerializer(BaseSerializer):
|
||||||
@staticmethod
|
|
||||||
def match(obj):
|
|
||||||
return BaseSerializer.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.State"
|
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
BaseSerializer.__init__(self, "P", 1)
|
BaseSerializer.__init__(self, "P", 1)
|
||||||
|
|
||||||
def dump(self, stream, obj):
|
def match(self, obj):
|
||||||
|
return BaseSerializer.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.State"
|
||||||
|
|
||||||
|
def dump(self, stream, obj, context):
|
||||||
stream.write(pickle.dumps(obj))
|
stream.write(pickle.dumps(obj))
|
||||||
stream.seek(0)
|
stream.seek(0)
|
||||||
return stream
|
return stream
|
||||||
|
|
||||||
def load(self, stream):
|
def load(self, stream, context):
|
||||||
return pickle.loads(stream.read())
|
return pickle.loads(stream.read())
|
||||||
|
|
||||||
|
|
||||||
|
class ConceptSerializer(ObjectSerializer):
|
||||||
|
def __init__(self):
|
||||||
|
ObjectSerializer.__init__(self, "core.concept.Concept", "C", 1)
|
||||||
|
|||||||
+284
-43
@@ -1,39 +1,78 @@
|
|||||||
import pytest
|
import pytest
|
||||||
from parsers.defaultparser import TokenIter, Token, Tokens
|
|
||||||
|
from parsers.PythonParser import PythonParser, PythonNode, PythonErrorNode
|
||||||
|
from parsers.tokenizer import Tokenizer, Token, TokenKind, Keywords, LexerError
|
||||||
|
from parsers.DefaultParser import DefaultParser
|
||||||
|
from parsers.DefaultParser import NumberNode, StringNode, VariableNode, TrueNode, FalseNode, NullNode, BinaryNode
|
||||||
|
from parsers.DefaultParser import Node, UnexpectedTokenErrorNode, DefConceptNode, NopNode
|
||||||
|
import ast
|
||||||
|
|
||||||
|
|
||||||
|
def nop():
|
||||||
|
return NopNode()
|
||||||
|
|
||||||
|
|
||||||
|
def n(number):
|
||||||
|
return NumberNode([], number)
|
||||||
|
|
||||||
|
|
||||||
|
def s(string, quote="'"):
|
||||||
|
return StringNode([], string, quote)
|
||||||
|
|
||||||
|
|
||||||
|
def v(name):
|
||||||
|
return VariableNode([], name)
|
||||||
|
|
||||||
|
|
||||||
|
def t():
|
||||||
|
return TrueNode([])
|
||||||
|
|
||||||
|
|
||||||
|
def f():
|
||||||
|
return FalseNode([])
|
||||||
|
|
||||||
|
|
||||||
|
def null():
|
||||||
|
return NullNode([])
|
||||||
|
|
||||||
|
|
||||||
|
def b(operator, left, right):
|
||||||
|
return BinaryNode([], operator, left, right)
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_tokenize():
|
def test_i_can_tokenize():
|
||||||
source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\""
|
source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"="
|
||||||
tokens = list(TokenIter(source))
|
tokens = list(Tokenizer(source))
|
||||||
assert tokens[0] == Token(Tokens.PLUS, "+", 0, 1, 1)
|
assert tokens[0] == Token(TokenKind.PLUS, "+", 0, 1, 1)
|
||||||
assert tokens[1] == Token(Tokens.STAR, "*", 1, 1, 2)
|
assert tokens[1] == Token(TokenKind.STAR, "*", 1, 1, 2)
|
||||||
assert tokens[2] == Token(Tokens.MINUS, "-", 2, 1, 3)
|
assert tokens[2] == Token(TokenKind.MINUS, "-", 2, 1, 3)
|
||||||
assert tokens[3] == Token(Tokens.SLASH, "/", 3, 1, 4)
|
assert tokens[3] == Token(TokenKind.SLASH, "/", 3, 1, 4)
|
||||||
assert tokens[4] == Token(Tokens.LBRACE, "{", 4, 1, 5)
|
assert tokens[4] == Token(TokenKind.LBRACE, "{", 4, 1, 5)
|
||||||
assert tokens[5] == Token(Tokens.RBRACE, "}", 5, 1, 6)
|
assert tokens[5] == Token(TokenKind.RBRACE, "}", 5, 1, 6)
|
||||||
assert tokens[6] == Token(Tokens.LBRACKET, "[", 6, 1, 7)
|
assert tokens[6] == Token(TokenKind.LBRACKET, "[", 6, 1, 7)
|
||||||
assert tokens[7] == Token(Tokens.RBRACKET, "]", 7, 1, 8)
|
assert tokens[7] == Token(TokenKind.RBRACKET, "]", 7, 1, 8)
|
||||||
assert tokens[8] == Token(Tokens.LPAR, "(", 8, 1, 9)
|
assert tokens[8] == Token(TokenKind.LPAR, "(", 8, 1, 9)
|
||||||
assert tokens[9] == Token(Tokens.RPAR, ")", 9, 1, 10)
|
assert tokens[9] == Token(TokenKind.RPAR, ")", 9, 1, 10)
|
||||||
assert tokens[10] == Token(Tokens.WHITESPACE, " ", 10, 1, 11)
|
assert tokens[10] == Token(TokenKind.WHITESPACE, " ", 10, 1, 11)
|
||||||
assert tokens[11] == Token(Tokens.COMMA, ",", 14, 1, 15)
|
assert tokens[11] == Token(TokenKind.COMMA, ",", 14, 1, 15)
|
||||||
assert tokens[12] == Token(Tokens.SEMICOLON, ";", 15, 1, 16)
|
assert tokens[12] == Token(TokenKind.SEMICOLON, ";", 15, 1, 16)
|
||||||
assert tokens[13] == Token(Tokens.COLON, ":", 16, 1, 17)
|
assert tokens[13] == Token(TokenKind.COLON, ":", 16, 1, 17)
|
||||||
assert tokens[14] == Token(Tokens.DOT, ".", 17, 1, 18)
|
assert tokens[14] == Token(TokenKind.DOT, ".", 17, 1, 18)
|
||||||
assert tokens[15] == Token(Tokens.QMARK, "?", 18, 1, 19)
|
assert tokens[15] == Token(TokenKind.QMARK, "?", 18, 1, 19)
|
||||||
assert tokens[16] == Token(Tokens.NEWLINE, "\n", 19, 1, 20)
|
assert tokens[16] == Token(TokenKind.NEWLINE, "\n", 19, 1, 20)
|
||||||
assert tokens[17] == Token(Tokens.NEWLINE, "\n\r", 20, 2, 1)
|
assert tokens[17] == Token(TokenKind.NEWLINE, "\n\r", 20, 2, 1)
|
||||||
assert tokens[18] == Token(Tokens.NEWLINE, "\r", 22, 3, 1)
|
assert tokens[18] == Token(TokenKind.NEWLINE, "\r", 22, 3, 1)
|
||||||
assert tokens[19] == Token(Tokens.NEWLINE, "\r\n", 23, 4, 1)
|
assert tokens[19] == Token(TokenKind.NEWLINE, "\r\n", 23, 4, 1)
|
||||||
assert tokens[20] == Token(Tokens.IDENTIFIER, "identifier_0", 25, 5, 1)
|
assert tokens[20] == Token(TokenKind.IDENTIFIER, "identifier_0", 25, 5, 1)
|
||||||
assert tokens[21] == Token(Tokens.WHITESPACE, "\t \t", 37, 5, 13)
|
assert tokens[21] == Token(TokenKind.WHITESPACE, "\t \t", 37, 5, 13)
|
||||||
assert tokens[22] == Token(Tokens.NUMBER, "10.15", 41, 5, 17)
|
assert tokens[22] == Token(TokenKind.NUMBER, "10.15", 41, 5, 17)
|
||||||
assert tokens[23] == Token(Tokens.WHITESPACE, " ", 46, 5, 22)
|
assert tokens[23] == Token(TokenKind.WHITESPACE, " ", 46, 5, 22)
|
||||||
assert tokens[24] == Token(Tokens.NUMBER, "10", 47, 5, 23)
|
assert tokens[24] == Token(TokenKind.NUMBER, "10", 47, 5, 23)
|
||||||
assert tokens[25] == Token(Tokens.WHITESPACE, " ", 49, 5, 25)
|
assert tokens[25] == Token(TokenKind.WHITESPACE, " ", 49, 5, 25)
|
||||||
assert tokens[26] == Token(Tokens.STRING, "'string\n'", 50, 5, 26)
|
assert tokens[26] == Token(TokenKind.STRING, "'string\n'", 50, 5, 26)
|
||||||
assert tokens[27] == Token(Tokens.WHITESPACE, " ", 59, 6, 1)
|
assert tokens[27] == Token(TokenKind.WHITESPACE, " ", 59, 6, 1)
|
||||||
assert tokens[28] == Token(Tokens.STRING, '"another string"', 60, 6, 2)
|
assert tokens[28] == Token(TokenKind.STRING, '"another string"', 60, 6, 2)
|
||||||
|
assert tokens[29] == Token(TokenKind.EQUALS, '=', 76, 6, 18)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("text, expected", [
|
@pytest.mark.parametrize("text, expected", [
|
||||||
@@ -48,11 +87,26 @@ def test_i_can_tokenize():
|
|||||||
("-abcd", False)
|
("-abcd", False)
|
||||||
])
|
])
|
||||||
def test_i_can_tokenize_identifiers(text, expected):
|
def test_i_can_tokenize_identifiers(text, expected):
|
||||||
tokens = list(TokenIter(text))
|
tokens = list(Tokenizer(text))
|
||||||
comparison = tokens[0].type == Tokens.IDENTIFIER
|
comparison = tokens[0].type == TokenKind.IDENTIFIER
|
||||||
assert comparison == expected
|
assert comparison == expected
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("text, error_text, index, line, column", [
|
||||||
|
("'string", "'string", 7, 1, 8),
|
||||||
|
('"string', '"string', 7, 1, 8),
|
||||||
|
('"a" + "string', '"string', 13, 1, 14),
|
||||||
|
('"a"\n\n"string', '"string', 12, 3, 8),
|
||||||
|
])
|
||||||
|
def test_i_can_detect_unfinished_strings(text, error_text, index, line, column):
|
||||||
|
with pytest.raises(LexerError) as e:
|
||||||
|
list(Tokenizer(text))
|
||||||
|
assert e.value.text == error_text
|
||||||
|
assert e.value.index == index
|
||||||
|
assert e.value.line == line
|
||||||
|
assert e.value.column == column
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("text, expected_text, expected_newlines", [
|
@pytest.mark.parametrize("text, expected_text, expected_newlines", [
|
||||||
("'foo'", "'foo'", 0),
|
("'foo'", "'foo'", 0),
|
||||||
('"foo"', '"foo"', 0),
|
('"foo"', '"foo"', 0),
|
||||||
@@ -72,8 +126,8 @@ def test_i_can_tokenize_identifiers(text, expected):
|
|||||||
("'foo'bar'", "'foo'", 0),
|
("'foo'bar'", "'foo'", 0),
|
||||||
])
|
])
|
||||||
def test_i_can_parse_strings(text, expected_text, expected_newlines):
|
def test_i_can_parse_strings(text, expected_text, expected_newlines):
|
||||||
lexer = TokenIter(text)
|
lexer = Tokenizer(text)
|
||||||
text_found, nb_of_newlines = lexer.eat_string(0)
|
text_found, nb_of_newlines = lexer.eat_string(0, 1, 1)
|
||||||
|
|
||||||
assert nb_of_newlines == expected_newlines
|
assert nb_of_newlines == expected_newlines
|
||||||
assert text_found == expected_text
|
assert text_found == expected_text
|
||||||
@@ -83,14 +137,201 @@ def test_i_can_parse_strings(text, expected_text, expected_newlines):
|
|||||||
"1", "3.1415", "0.5", "01", "-5", "-5.10"
|
"1", "3.1415", "0.5", "01", "-5", "-5.10"
|
||||||
])
|
])
|
||||||
def test_i_can_parse_numbers(text):
|
def test_i_can_parse_numbers(text):
|
||||||
tokens = list(TokenIter(text))
|
tokens = list(Tokenizer(text))
|
||||||
assert tokens[0].type == Tokens.NUMBER
|
assert tokens[0].type == TokenKind.NUMBER
|
||||||
assert tokens[0].value == text
|
assert tokens[0].value == text
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("text", [
|
@pytest.mark.parametrize("text, expected", [
|
||||||
"def", "concept", "as", "pre", "post"
|
("def", Keywords.DEF),
|
||||||
|
("concept", Keywords.CONCEPT),
|
||||||
|
("as", Keywords.AS),
|
||||||
|
("pre", Keywords.PRE),
|
||||||
|
("post", Keywords.POST)
|
||||||
])
|
])
|
||||||
def test_i_can_recognize_keywords(text):
|
def test_i_can_recognize_keywords(text, expected):
|
||||||
tokens = list(TokenIter(text))
|
tokens = list(Tokenizer(text))
|
||||||
assert tokens[0].type == Tokens.KEYWORD
|
assert tokens[0].type == TokenKind.KEYWORD
|
||||||
|
assert tokens[0].value == expected
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("text, expected", [
|
||||||
|
("1", n(1)),
|
||||||
|
("+1", n(1)),
|
||||||
|
("-1", n(-1)),
|
||||||
|
("'foo'", s("foo")),
|
||||||
|
("identifier", v("identifier")),
|
||||||
|
("true", t()),
|
||||||
|
("false", f()),
|
||||||
|
("null", null()),
|
||||||
|
("1 * 2", b(TokenKind.STAR, n(1), n(2))),
|
||||||
|
("1 * 2/3", b(TokenKind.STAR, n(1), b(TokenKind.SLASH, n(2), n(3)))),
|
||||||
|
("1 + 2", b(TokenKind.PLUS, n(1), n(2))),
|
||||||
|
("1 + 2 - 3", b(TokenKind.PLUS, n(1), b(TokenKind.MINUS, n(2), n(3)))),
|
||||||
|
("1 + 2-3", b(TokenKind.PLUS, n(1), b(TokenKind.PLUS, n(2), n(-3)))),
|
||||||
|
("1 + 2 +-3", b(TokenKind.PLUS, n(1), b(TokenKind.PLUS, n(2), n(-3)))),
|
||||||
|
("1 + 2 * 3", b(TokenKind.PLUS, n(1), b(TokenKind.STAR, n(2), n(3)))),
|
||||||
|
("1 * 2 + 3", b(TokenKind.PLUS, b(TokenKind.STAR, n(1), n(2)), n(3))),
|
||||||
|
("(1 + 2) * 3", b(TokenKind.STAR, b(TokenKind.PLUS, n(1), n(2)), n(3))),
|
||||||
|
("1 * (2 + 3)", b(TokenKind.STAR, n(1), b(TokenKind.PLUS, n(2), n(3)))),
|
||||||
|
])
|
||||||
|
def test_i_can_parse_simple_expression(text, expected):
|
||||||
|
parser = DefaultParser(text, None)
|
||||||
|
ast = parser.parse()
|
||||||
|
assert ast.is_same(expected)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("text, token_found, expected_tokens", [
|
||||||
|
("1+", TokenKind.EOF,
|
||||||
|
[TokenKind.NUMBER, TokenKind.STRING, TokenKind.IDENTIFIER, 'true', 'false', 'null', TokenKind.LPAR]),
|
||||||
|
("(1+1", TokenKind.EOF, [TokenKind.RPAR])
|
||||||
|
])
|
||||||
|
def test_i_can_detect_unexpected_end_of_code(text, token_found, expected_tokens):
|
||||||
|
parser = DefaultParser(text, None)
|
||||||
|
parser.parse()
|
||||||
|
|
||||||
|
assert parser.has_error
|
||||||
|
assert parser.error_sink[0].tokens[0].type == token_found
|
||||||
|
assert parser.error_sink[0].expected_tokens == expected_tokens
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("text, expected_name, expected_expr", [
|
||||||
|
("def concept hello", "hello", nop()),
|
||||||
|
("def concept hello ", "hello", nop()),
|
||||||
|
("def concept a+b", "a + b", nop()),
|
||||||
|
("def concept 'a+b'", "a+b", nop()),
|
||||||
|
("def concept 'a+b'+c", "a+b + c", nop()),
|
||||||
|
("def concept 'as if'", "as if", nop()),
|
||||||
|
("def concept 'as' if", "as if", nop()),
|
||||||
|
("def concept hello as 'hello'", "hello", ast.Expression(body=ast.Str(s='hello'))),
|
||||||
|
("def concept hello as 1", "hello", ast.Expression(body=ast.Num(n=1))),
|
||||||
|
("def concept h as 1 + 1", "h", ast.Expression(ast.BinOp(left=ast.Num(n=1), op=ast.Add(), right=ast.Num(n=1)))),
|
||||||
|
])
|
||||||
|
def test_i_can_parse_def_concept(text, expected_name, expected_expr):
|
||||||
|
parser = DefaultParser(text, PythonParser)
|
||||||
|
tree = parser.parse()
|
||||||
|
assert isinstance(tree, DefConceptNode)
|
||||||
|
assert tree.name == expected_name
|
||||||
|
if isinstance(tree.body, PythonNode):
|
||||||
|
assert ast.dump(tree.body.ast) == ast.dump(expected_expr)
|
||||||
|
else:
|
||||||
|
assert tree.body == expected_expr
|
||||||
|
|
||||||
|
|
||||||
|
def compare_ast(left, right):
|
||||||
|
left_as_string = ast.dump(left)
|
||||||
|
left_as_string = left_as_string.replace(", ctx=Load()", "")
|
||||||
|
|
||||||
|
right_as_string = right if isinstance(right, str) else ast.dump(right)
|
||||||
|
right_as_string = right_as_string.replace(", ctx=Load()", "")
|
||||||
|
|
||||||
|
return left_as_string == right_as_string
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_parse_complex_def_concept_statement():
|
||||||
|
text = """def concept a plus b
|
||||||
|
where a,b
|
||||||
|
pre isinstance(a, int) and isinstance(b, float)
|
||||||
|
post isinstance(res, int)
|
||||||
|
as res = a + b
|
||||||
|
"""
|
||||||
|
parser = DefaultParser(text, PythonParser)
|
||||||
|
tree = parser.parse()
|
||||||
|
assert not parser.has_error
|
||||||
|
assert isinstance(tree, DefConceptNode)
|
||||||
|
assert tree.name == "a plus b"
|
||||||
|
assert tree.where.source == "a,b"
|
||||||
|
assert isinstance(tree.where.ast, ast.Expression)
|
||||||
|
assert tree.pre.source == "isinstance(a, int) and isinstance(b, float)"
|
||||||
|
assert isinstance(tree.pre.ast, ast.Expression)
|
||||||
|
assert tree.post.source == "isinstance(res, int)"
|
||||||
|
assert isinstance(tree.post.ast, ast.Expression)
|
||||||
|
assert tree.body.source == "res = a + b"
|
||||||
|
assert isinstance(tree.body.ast, ast.Module)
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_use_colon_to_declare_indentation():
|
||||||
|
text = """
|
||||||
|
def concept add one to a as:
|
||||||
|
def func(x):
|
||||||
|
return x+1
|
||||||
|
func(a)
|
||||||
|
"""
|
||||||
|
parser = DefaultParser(text, PythonParser)
|
||||||
|
tree = parser.parse()
|
||||||
|
assert not parser.has_error
|
||||||
|
assert isinstance(tree, DefConceptNode)
|
||||||
|
|
||||||
|
def test_i_can_use_colon_to_declare_indentation2():
|
||||||
|
text = """
|
||||||
|
def concept add one to a as:
|
||||||
|
def func(x):
|
||||||
|
return x+1
|
||||||
|
"""
|
||||||
|
parser = DefaultParser(text, PythonParser)
|
||||||
|
tree = parser.parse()
|
||||||
|
assert not parser.has_error
|
||||||
|
assert isinstance(tree, DefConceptNode)
|
||||||
|
|
||||||
|
|
||||||
|
def test_without_colon_i_get_an_indent_error():
|
||||||
|
text = """
|
||||||
|
def concept add one to a as
|
||||||
|
def func(x):
|
||||||
|
return x+1
|
||||||
|
func(a)
|
||||||
|
"""
|
||||||
|
parser = DefaultParser(text, PythonParser)
|
||||||
|
tree = parser.parse()
|
||||||
|
assert parser.has_error
|
||||||
|
assert isinstance(tree, DefConceptNode)
|
||||||
|
assert isinstance(parser.error_sink[0].exception, IndentationError)
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_detect_error():
|
||||||
|
"""
|
||||||
|
In this test, func(b) is not correctly indented while colon is specified after the 'as' keyword
|
||||||
|
"""
|
||||||
|
|
||||||
|
text = """
|
||||||
|
def concept add one to a as:
|
||||||
|
def func(x):
|
||||||
|
return x+1
|
||||||
|
func(a)
|
||||||
|
func(b)
|
||||||
|
"""
|
||||||
|
parser = DefaultParser(text, PythonParser)
|
||||||
|
tree = parser.parse()
|
||||||
|
assert parser.has_error
|
||||||
|
assert isinstance(tree, DefConceptNode)
|
||||||
|
assert isinstance(parser.error_sink[0], UnexpectedTokenErrorNode)
|
||||||
|
# check that the error is caused by 'func(b)'
|
||||||
|
assert parser.error_sink[0].tokens[0].line == 6
|
||||||
|
assert parser.error_sink[0].tokens[0].column == 1
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("text, token_found, expected_tokens", [
|
||||||
|
("def hello as 'hello'", "hello", [Keywords.CONCEPT]),
|
||||||
|
("def concept as", Keywords.AS, ["<name>"]),
|
||||||
|
])
|
||||||
|
def test_i_can_detect_unexpected_token_error_in_def_concept(text, token_found, expected_tokens):
|
||||||
|
parser = DefaultParser(text, PythonParser)
|
||||||
|
parser.parse()
|
||||||
|
|
||||||
|
assert parser.has_error
|
||||||
|
assert isinstance(parser.error_sink[0], UnexpectedTokenErrorNode)
|
||||||
|
assert parser.error_sink[0].tokens[0].value == token_found
|
||||||
|
assert parser.error_sink[0].expected_tokens == expected_tokens
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("text", [
|
||||||
|
"def concept hello where 1+",
|
||||||
|
"def concept hello pre 1+",
|
||||||
|
"def concept hello post 1+",
|
||||||
|
"def concept hello as 1+"
|
||||||
|
])
|
||||||
|
def test_i_can_detect_error_in_declaration(text):
|
||||||
|
parser = DefaultParser(text, PythonParser)
|
||||||
|
parser.parse()
|
||||||
|
assert parser.has_error
|
||||||
|
assert isinstance(parser.error_sink[0], PythonErrorNode)
|
||||||
|
|||||||
+52
-15
@@ -1,12 +1,17 @@
|
|||||||
|
import ast
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import os
|
import os
|
||||||
from os import path
|
from os import path
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
from core.concept import Concept
|
from core.concept import Concept, ConceptParts
|
||||||
from core.sheerka import Sheerka
|
from core.sheerka import Sheerka
|
||||||
|
from parsers.DefaultParser import DefConceptNode, DefaultParser
|
||||||
|
from parsers.PythonParser import PythonParser
|
||||||
|
|
||||||
tests_root = path.abspath("../build/tests")
|
tests_root = path.abspath("../build/tests")
|
||||||
|
root_folder = "init_folder"
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
@pytest.fixture(autouse=True)
|
||||||
@@ -25,8 +30,6 @@ def init_test():
|
|||||||
|
|
||||||
|
|
||||||
def test_root_folder_is_created_after_initialization():
|
def test_root_folder_is_created_after_initialization():
|
||||||
root_folder = "init_folder"
|
|
||||||
|
|
||||||
return_value = Sheerka().initialize(root_folder)
|
return_value = Sheerka().initialize(root_folder)
|
||||||
assert return_value.status, "initialisation should be successful"
|
assert return_value.status, "initialisation should be successful"
|
||||||
assert Sheerka().concept_equals(return_value.value, Sheerka().get_concept("success"))
|
assert Sheerka().concept_equals(return_value.value, Sheerka().get_concept("success"))
|
||||||
@@ -34,22 +37,56 @@ def test_root_folder_is_created_after_initialization():
|
|||||||
|
|
||||||
|
|
||||||
def test_lists_of_concepts_is_initialized():
|
def test_lists_of_concepts_is_initialized():
|
||||||
root_folder = "init_folder"
|
|
||||||
|
|
||||||
Sheerka().initialize(root_folder)
|
Sheerka().initialize(root_folder)
|
||||||
assert len(Sheerka().concepts) > 1
|
assert len(Sheerka().concepts) > 1
|
||||||
|
|
||||||
|
|
||||||
def test_null_concept_are_equals():
|
# def test_null_concept_are_equals():
|
||||||
concept1 = Concept("test1")
|
# concept1 = Concept("test1")
|
||||||
concept2 = Concept("test2")
|
# concept2 = Concept("test2")
|
||||||
concept3 = Concept("test3")
|
# concept3 = Concept("test3")
|
||||||
|
#
|
||||||
|
# assert not Sheerka.concept_equals(concept1, None)
|
||||||
|
# assert not Sheerka.concept_equals(None, concept1)
|
||||||
|
# assert not Sheerka.concept_equals(concept1, concept2)
|
||||||
|
# assert not Sheerka.concept_equals(concept1, concept3)
|
||||||
|
#
|
||||||
|
# assert Sheerka.concept_equals(None, None)
|
||||||
|
# assert Sheerka.concept_equals(concept1, concept1)
|
||||||
|
|
||||||
assert not Sheerka.concept_equals(concept1, None)
|
def get_concept():
|
||||||
assert not Sheerka.concept_equals(None, concept1)
|
text = """
|
||||||
assert not Sheerka.concept_equals(concept1, concept2)
|
def concept a+b
|
||||||
assert not Sheerka.concept_equals(concept1, concept3)
|
where isinstance(a, int) and isinstance(b, int)
|
||||||
|
pre isinstance(a, int) and isinstance(b, int)
|
||||||
|
post isinstance(res, int)
|
||||||
|
as:
|
||||||
|
def func(x,y):
|
||||||
|
return x+y
|
||||||
|
func(a,b)
|
||||||
|
"""
|
||||||
|
parser = DefaultParser(text, PythonParser)
|
||||||
|
return parser.parse()
|
||||||
|
|
||||||
assert Sheerka.concept_equals(None, None)
|
|
||||||
assert Sheerka.concept_equals(concept1, concept1)
|
|
||||||
|
|
||||||
|
def test_i_can_add_a_concept():
|
||||||
|
concept = get_concept()
|
||||||
|
sheerka = Sheerka()
|
||||||
|
sheerka.initialize(root_folder)
|
||||||
|
res = sheerka.add_concept(concept)
|
||||||
|
|
||||||
|
assert res.status
|
||||||
|
assert res.value == Concept(
|
||||||
|
name="a + b",
|
||||||
|
where="isinstance(a, int) and isinstance(b, int)",
|
||||||
|
pre="isinstance(a, int) and isinstance(b, int)",
|
||||||
|
post="isinstance(res, int)",
|
||||||
|
body="def func(x,y):\n return x+y\nfunc(a,b)")
|
||||||
|
assert isinstance(res.value.codes[ConceptParts.WHERE], ast.Expression)
|
||||||
|
assert isinstance(res.value.codes[ConceptParts.PRE], ast.Expression)
|
||||||
|
assert isinstance(res.value.codes[ConceptParts.POST], ast.Expression)
|
||||||
|
assert isinstance(res.value.codes[ConceptParts.BODY], ast.Module)
|
||||||
|
|
||||||
|
# def test_i_cannot_add_the_same_concept_twice():
|
||||||
|
# concept1 = DefConceptNode(name="concept")
|
||||||
|
# sheerka = Sheerka
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
import hashlib
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import os
|
import os
|
||||||
from os import path
|
from os import path
|
||||||
@@ -6,6 +8,8 @@ from datetime import date, datetime
|
|||||||
import shutil
|
import shutil
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
from sdp.sheerkaSerializer import ObjectSerializer, BaseSerializer, Serializer
|
||||||
|
|
||||||
tests_root = path.abspath("../build/tests")
|
tests_root = path.abspath("../build/tests")
|
||||||
|
|
||||||
|
|
||||||
@@ -70,6 +74,33 @@ class ObjNoKey:
|
|||||||
return f"ObjNoKey({self.a}, {self.b})"
|
return f"ObjNoKey({self.a}, {self.b})"
|
||||||
|
|
||||||
|
|
||||||
|
class ObjDumpJson:
|
||||||
|
def __init__(self, key, value):
|
||||||
|
self.key = key
|
||||||
|
self.value = value
|
||||||
|
|
||||||
|
def __eq__(self, obj):
|
||||||
|
return isinstance(obj, ObjDumpJson) and \
|
||||||
|
self.key == obj.key and \
|
||||||
|
self.value == obj.value
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"ObjDumpJson({self.key}, {self.value})"
|
||||||
|
|
||||||
|
def get_key(self):
|
||||||
|
return self.key
|
||||||
|
|
||||||
|
def get_digest(self):
|
||||||
|
return hashlib.sha256(f"Concept:{self.key}{self.value}".encode("utf-8")).hexdigest()
|
||||||
|
|
||||||
|
def to_dict(self):
|
||||||
|
return self.__dict__
|
||||||
|
|
||||||
|
def from_dict(self, as_dict):
|
||||||
|
self.value = as_dict["value"]
|
||||||
|
self.key = as_dict["key"]
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
@pytest.fixture(autouse=True)
|
||||||
def init_test():
|
def init_test():
|
||||||
if path.exists(tests_root):
|
if path.exists(tests_root):
|
||||||
@@ -571,3 +602,55 @@ def test_i_can_test_than_an_entry_exits():
|
|||||||
assert not sdp.exists("entry")
|
assert not sdp.exists("entry")
|
||||||
sdp.add(Event("event"), "entry", "value")
|
sdp.add(Event("event"), "entry", "value")
|
||||||
assert sdp.exists("entry")
|
assert sdp.exists("entry")
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_save_and_load_object_with_history():
|
||||||
|
sdp = SheerkaDataProvider(".sheerka")
|
||||||
|
obj = ObjDumpJson("my_key", "value1")
|
||||||
|
sdp.serializer.register(ObjectSerializer(BaseSerializer.get_full_qualified_name(obj)))
|
||||||
|
|
||||||
|
entry, key = sdp.add_ref("Obj", obj)
|
||||||
|
loaded = sdp.get(entry, key)
|
||||||
|
history = getattr(loaded, Serializer.HISTORY)
|
||||||
|
|
||||||
|
assert key == obj.key
|
||||||
|
assert entry == "Obj"
|
||||||
|
assert loaded.key == obj.key
|
||||||
|
assert loaded.value == obj.value
|
||||||
|
|
||||||
|
assert getattr(history, Serializer.USERNAME) == "kodjo"
|
||||||
|
assert getattr(history, Serializer.MODIFICATION_DATE) != ""
|
||||||
|
assert getattr(history, Serializer.PARENTS) == []
|
||||||
|
|
||||||
|
assert os.path.exists(sdp.get_obj_path(sdp.ObjectsFolder, obj.get_digest()))
|
||||||
|
|
||||||
|
# save a second type with no modification
|
||||||
|
previous_modification_time = getattr(history, Serializer.MODIFICATION_DATE)
|
||||||
|
previous_parents = getattr(history, Serializer.PARENTS)
|
||||||
|
|
||||||
|
sdp.add_ref("Obj", loaded)
|
||||||
|
loaded = sdp.get(entry, key)
|
||||||
|
history = getattr(loaded, Serializer.HISTORY)
|
||||||
|
|
||||||
|
assert getattr(history, Serializer.MODIFICATION_DATE) == previous_modification_time
|
||||||
|
assert getattr(history, Serializer.PARENTS) == previous_parents
|
||||||
|
|
||||||
|
# save again, but with a modification
|
||||||
|
previous_digest = loaded.get_digest()
|
||||||
|
loaded.value = "value2"
|
||||||
|
|
||||||
|
sdp.add_ref("Obj", loaded)
|
||||||
|
loaded2 = sdp.get(entry, key)
|
||||||
|
history2 = getattr(loaded, Serializer.HISTORY)
|
||||||
|
|
||||||
|
assert loaded2.key == loaded.key
|
||||||
|
assert loaded2.value == loaded.value
|
||||||
|
|
||||||
|
assert getattr(history2, Serializer.USERNAME) == "kodjo"
|
||||||
|
assert getattr(history2, Serializer.MODIFICATION_DATE) != ""
|
||||||
|
assert getattr(history2, Serializer.PARENTS) == [previous_digest]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,16 +1,56 @@
|
|||||||
|
import pytest
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from sdp.sheerkaDataProvider import Event
|
from sdp.sheerkaDataProvider import Event
|
||||||
from sdp.sheerkaSerializer import Serializer
|
from sdp.sheerkaSerializer import Serializer, ObjectSerializer, SerializerContext, BaseSerializer
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class Obj:
|
||||||
|
key: str = ""
|
||||||
|
prop1: str = ""
|
||||||
|
|
||||||
|
def from_dict(self, json_object):
|
||||||
|
self.prop1 = json_object["prop1"]
|
||||||
|
self.key = json_object["key"]
|
||||||
|
return self
|
||||||
|
|
||||||
|
def to_dict(self):
|
||||||
|
return self.__dict__
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_serialize_an_event():
|
def test_i_can_serialize_an_event():
|
||||||
event = Event("test", user="user", date=datetime.fromisoformat("2019-10-21T10:20:30.999"))
|
event = Event("test", user="user", date=datetime.fromisoformat("2019-10-21T10:20:30.999"))
|
||||||
serializer = Serializer()
|
serializer = Serializer()
|
||||||
|
|
||||||
stream = serializer.serialize(event)
|
stream = serializer.serialize(event, None)
|
||||||
loaded = serializer.deserialize(stream)
|
loaded = serializer.deserialize(stream, None)
|
||||||
|
|
||||||
assert event.version == loaded.version
|
assert event.version == loaded.version
|
||||||
assert event.user == loaded.user
|
assert event.user == loaded.user
|
||||||
assert event.date == loaded.date
|
assert event.date == loaded.date
|
||||||
assert event.message == loaded.message
|
assert event.message == loaded.message
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_serialize_an_object():
|
||||||
|
obj = Obj("10", "value")
|
||||||
|
serializer = Serializer()
|
||||||
|
serializer.register(ObjectSerializer("tests.test_sheerkaSerializer.Obj"))
|
||||||
|
context = SerializerContext("kodjo", "6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b")
|
||||||
|
|
||||||
|
stream = serializer.serialize(obj, context)
|
||||||
|
loaded = serializer.deserialize(stream, context)
|
||||||
|
|
||||||
|
assert getattr(loaded, Serializer.HISTORY)[Serializer.USERNAME] == "kodjo"
|
||||||
|
assert getattr(loaded, Serializer.HISTORY)[Serializer.MODIFICATION_DATE] != ""
|
||||||
|
assert getattr(loaded, Serializer.HISTORY)[Serializer.PARENTS] == []
|
||||||
|
assert loaded.key == "10"
|
||||||
|
assert loaded.prop1 == "value"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("obj, expected", [
|
||||||
|
(Obj("10", "value"), "tests.test_sheerkaSerializer.Obj")
|
||||||
|
])
|
||||||
|
def test_get_full_qualified_name(obj, expected):
|
||||||
|
assert expected == BaseSerializer.get_full_qualified_name(obj)
|
||||||
|
|||||||
Reference in New Issue
Block a user