Added ExactConceptParser

This commit is contained in:
2019-11-09 17:29:50 +01:00
parent a636198222
commit 576ce77740
12 changed files with 603 additions and 169 deletions
+76 -5
View File
@@ -2,6 +2,8 @@ import hashlib
from enum import Enum
import logging
from core.tokenizer import Tokenizer, TokenKind
log = logging.getLogger(__name__)
@@ -20,6 +22,8 @@ class Concept:
"""
props_to_serialize = ("id", "is_builtin", "name", "where", "pre", "post", "body", "desc")
PROPERTY_PREFIX = "__var__"
def __init__(self, name=None, is_builtin=False, where=None, pre=None, post=None, body=None, desc=None, key=None):
self.name = name
self.is_builtin = is_builtin
@@ -31,7 +35,7 @@ class Concept:
self.id = None
self.key = key
self.props = [] # list of Property for this concept
self.props = {} # list of Property for this concept
self.functions = {} # list of helper functions
self.codes = {} # cached ast for the where, pre, post and body parts
@@ -54,10 +58,48 @@ class Concept:
def get_key(self):
return self.key
def init_key(self, tokens=None):
"""
Create the key for this concept.
Must be called only when the concept if fully initialized
The method is not called set_key to make sure that no other class set the key by mistake
:param tokens:
:return:
"""
if self.key is not None:
return self.key
if tokens is None:
tokens = iter(Tokenizer(self.name))
variables = list(self.props.keys())
key = ""
first = True
for token in tokens:
if token.type == TokenKind.EOF:
break
if token.type == TokenKind.WHITESPACE:
continue
if not first:
key += " "
if variables is not None and token.value in variables:
key += self.PROPERTY_PREFIX + str(variables.index(token.value))
else:
key += token.value[1:-1] if token.type == TokenKind.STRING else token.value
first = False
self.key = key
return self
def add_codes(self, codes):
"""
From a dict of <ConceptParts, AST>
fill the codes
Gets the ASTs for 'where', 'pre', 'post' and 'body'
There ASTs are know when the concept is freshly parsed.
So the values are kept in cache.
For concepts loaded from sdp, these ASTs must be created again
:param codes:
:return:
"""
@@ -68,6 +110,8 @@ class Concept:
if key in possibles_codes:
self.codes[ConceptParts(key)] = codes[key]
return self
def get_digest(self):
"""
Returns the digest of the event
@@ -76,23 +120,47 @@ class Concept:
return hashlib.sha256(f"Concept:{self.name}{self.pre}{self.post}{self.body}".encode("utf-8")).hexdigest()
def to_dict(self):
"""
Returns a dict representing 'self'
:return:
"""
props_as_dict = dict((prop, getattr(self, prop)) for prop in self.props_to_serialize)
props_as_dict["props"] = [(p.name, p.value) for p in self.props]
props_as_dict["props"] = [(p, self.props[p].value) for p in self.props]
return props_as_dict
def from_dict(self, as_dict):
"""
Initializes 'self' from a dict
:param as_dict:
:return:
"""
for prop in self.props_to_serialize:
if prop in as_dict:
setattr(self, prop, as_dict[prop])
if "props" in as_dict:
for n, v in as_dict["props"]:
self.props.append(Property(n, v))
self.set_prop(n, v)
return self
def update_from(self, other):
"""
Update self using the properties of another concept
This method is to mimic the class to instance pattern
'other' is the class, the template, and 'self' is a new instance
:param other:
:return:
"""
for prop in self.props_to_serialize:
setattr(self, prop, getattr(other, prop))
return self
def set_prop(self, prop_name, prop_value):
self.props[prop_name] = Property(prop_name, prop_value)
def set_prop_by_index(self, index, prop_value):
prop_name = list(self.props.keys())[index]
self.props[prop_name] = Property(prop_name, prop_value)
class ErrorConcept(Concept):
NAME = "Error"
@@ -132,3 +200,6 @@ class Property:
def __init__(self, name, value):
self.name = name
self.value = value
def __repr__(self):
return f"{self.name}={self.value}"
+42 -20
View File
@@ -1,9 +1,9 @@
from dataclasses import dataclass
from core.concept import Concept, ErrorConcept, Property, TooManySuccessConcept, ReturnValueConcept
from parsers.PythonParser import PythonParser, PythonGetNamesVisitor, PythonNode
from parsers.PythonParser import PythonGetNamesVisitor, PythonNode
from sdp.sheerkaDataProvider import SheerkaDataProvider, Event, SheerkaDataProviderDuplicateKeyError
from parsers.DefaultParser import DefaultParser, DefConceptNode
from parsers.DefaultParser import DefConceptNode, DefaultParser
import core.utils
import logging
@@ -50,6 +50,7 @@ class Sheerka(Concept):
NAME = "Sheerka"
UNKNOWN_CONCEPT_NAME = "Unknown Concept"
SUCCESS_CONCEPT_NAME = "Success"
CONCEPT_TOO_LONG_CONCEPT_NAME = "Concept too long"
CONCEPTS_ENTRY = "All_Concepts"
BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts"
@@ -60,6 +61,8 @@ class Sheerka(Concept):
super().__init__(Sheerka.NAME)
# cache of the most used concepts
# Note that these are only templates
# They are used as a footprint for instantiation
self.concepts_cache = {}
# a concept can be instantiated
@@ -91,8 +94,9 @@ class Sheerka(Concept):
try:
self.init_logging()
self.sdp = SheerkaDataProvider(root_folder)
self.parsers.append(lambda text: DefaultParser(text, PythonParser))
self.parsers.append(lambda text: PythonParser(text))
self.parsers.append(core.utils.get_class("parsers.DefaultParser.DefaultParser"))
self.parsers.append(core.utils.get_class("parsers.PythonParser.PythonParser"))
#self.parsers.append(core.utils.get_class("parsers.ExactConceptParser.ExactConceptParser"))
self.evaluators.append(core.utils.get_object("evaluators.DefaultEvaluator.DefaultEvaluator"))
self.evaluators.append(core.utils.get_object("evaluators.AddConceptEvaluator.AddConceptEvaluator"))
@@ -103,7 +107,7 @@ class Sheerka(Concept):
self.create_builtin_concepts()
except IOError as e:
return ReturnValue(self, False, self.get(Sheerka.ERROR_CONCEPT_NAME), e)
return ReturnValue(self, False, self.get(ErrorConcept.NAME), e)
return ReturnValue(self, True, self.get(Sheerka.SUCCESS_CONCEPT_NAME))
@@ -129,12 +133,15 @@ class Sheerka(Concept):
self,
Concept(Sheerka.UNKNOWN_CONCEPT_NAME, key=Sheerka.UNKNOWN_CONCEPT_NAME),
Concept(Sheerka.SUCCESS_CONCEPT_NAME, key=Sheerka.SUCCESS_CONCEPT_NAME),
Concept(Sheerka.CONCEPT_TOO_LONG_CONCEPT_NAME, key=Sheerka.CONCEPT_TOO_LONG_CONCEPT_NAME),
ErrorConcept(),
TooManySuccessConcept(),
ReturnValueConcept(),
]
for concept in builtins:
self.add_in_cache(concept)
from_db = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.key)
if from_db is None:
log.debug(f"'{concept.name}' concept is not found. Adding.")
@@ -143,7 +150,6 @@ class Sheerka(Concept):
else:
log.debug(f"Found concept '{from_db}'. Updating.")
concept.update_from(from_db)
self.concepts_cache[concept.key] = concept
def init_logging(self):
if self.debug:
@@ -158,7 +164,7 @@ class Sheerka(Concept):
def eval(self, text):
evt_digest = self.sdp.save_event(Event(text))
exec_context = ExecutionContext(self, evt_digest)
return_values = self.try_parse(text)
return_values = self.try_parse(exec_context, text)
return_values = self.try_eval(exec_context, return_values)
# return_values = []
@@ -172,17 +178,17 @@ class Sheerka(Concept):
return return_values
def try_parse(self, text):
def try_parse(self, context, text):
result = []
log.debug(f"Parsing '{text}'")
for parser in self.parsers:
p = parser(text)
p = parser()
# try:
# tree = p.parse()
# result.append((p.name, tree))
# except Exception as e:
# result.append((p.name, e))
tree = p.parse()
tree = p.parse(context, text)
result.append(ReturnValue(p.name, not p.has_error, p.error_sink if p.has_error else tree))
return result
@@ -235,11 +241,12 @@ class Sheerka(Concept):
setattr(concept, prop, source)
# try to find variables (eg props)
# Note that with this method, the variables will be created in the order of appearance
for token in def_concept_node.tokens["name"]:
if token.value in get_names_visitor.names:
concept.props.append(Property(token.value, None))
concept.set_prop(token.value, None)
concept.key = DefaultParser.get_concept_name(def_concept_node.tokens["name"], [p.name for p in concept.props])
concept.init_key(def_concept_node.tokens["name"])
concept.add_codes(def_concept_node.get_codes())
self.set_id_if_needed(concept, False)
@@ -249,22 +256,34 @@ class Sheerka(Concept):
return ReturnValue(self.add_concept.__name__, False, ErrorConcept(body=error), error.args[0])
return ReturnValue(self.add_concept.__name__, True, concept)
def get(self, concept_name):
def add_in_cache(self, concept):
"""
Adds a concept template in cache.
The cache is used as a proxy before looking at sdp
:param concept:
:return:
"""
self.concepts_cache[concept.key] = concept
def get(self, concept_key):
"""
Tries to find a concept
:param concept_name:
TODO: how to manage single vs multiple instances
:param concept_key:
:return:
"""
# first search in cache
if concept_name in self.concepts_cache:
return self.concepts_cache[concept_name]
if concept_key in self.concepts_cache:
return self.concepts_cache[concept_key]
return self.sdp.get(self.CONCEPTS_ENTRY, concept_name)
return self.sdp.get_safe(self.CONCEPTS_ENTRY, concept_key) or \
self.new(self.UNKNOWN_CONCEPT_NAME, body=concept_key)
def new(self, concept, **kwargs):
"""
Returns an instance of a new concept
TODO: Checks if the concept is supposed to be unique (ex Sheerka, or the number 'one' for example)
:param concept:
:param kwargs:
:return:
@@ -287,11 +306,14 @@ class Sheerka(Concept):
:return:
"""
if not isinstance(a, Concept) or not isinstance(b, Concept):
return False
if not isinstance(a, Concept):
raise SyntaxError("The first parameter of isinstance MUST be a concept")
b_key = b if isinstance(b, str) else b.key
# TODO : manage when a is the list of all possible b
return a.key == b.key
# for example, if a is a color, it will be found the entry 'All_Colors'
return a.key == b_key
@staticmethod
def test():
@@ -31,6 +31,20 @@ class TokenKind(Enum):
VBAR = "vbar"
AMPER = "amper"
EQUALS = "="
AT = "at"
BACK_QUOTE = "bquote" # `
BACK_SLASH = "bslash" # \
CARAT = "carat" # ^
DOLLAR = "dollar" # $
EMARK = "emark" # !
GREATER = "greater" # >
LESS = "less" # <
HASH = "HASH" # #
TILDE = "tilde" # ~
UNDERSCORE = "underscore" # _
DEGREE = "degree" # °
@dataclass()
@@ -159,6 +173,14 @@ class Tokenizer:
yield Token(TokenKind.QMARK, "?", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "|":
yield Token(TokenKind.VBAR, "|", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "&":
yield Token(TokenKind.AMPER, "&", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "\n" or c == "\r":
newline = self.eat_newline(self.i)
yield Token(TokenKind.NEWLINE, newline, self.i, self.line, self.column)
+2 -2
View File
@@ -1,5 +1,5 @@
from core.concept import TooManySuccessConcept
from core.sheerka import ReturnValue
from core.sheerka import Sheerka
from evaluators.BaseEvaluator import BaseEvaluator
import logging
@@ -32,7 +32,7 @@ class DefaultEvaluator(BaseEvaluator):
log.debug(f"{number_of_successful} / {total_items} good items. Too many success")
return ReturnValue(self.name,
False,
context.sheerka.new(Sheerka.TOO_MANY_SUCCESS_CONCEPT_NAME, body=items))
context.sheerka.new(TooManySuccessConcept.NAME, body=items))
# only errors, i cannot help you
log.debug(f"{total_items} items. Only errors")
+4 -5
View File
@@ -1,5 +1,5 @@
from dataclasses import dataclass, field
from parsers.tokenizer import TokenKind, Keywords
from dataclasses import dataclass
from core.tokenizer import TokenKind, Keywords
@dataclass()
@@ -21,13 +21,12 @@ class ErrorNode(Node):
class BaseParser:
def __init__(self, name, text):
def __init__(self, name):
self.name = name
self.text = text
self.has_error = False
self.error_sink = []
def parse(self):
def parse(self, context, text):
pass
@staticmethod
+102 -94
View File
@@ -1,5 +1,5 @@
from parsers.BaseParser import BaseParser, Node, NopNode, ErrorNode
from parsers.tokenizer import Tokenizer, TokenKind, Token, Keywords
from core.tokenizer import Tokenizer, TokenKind, Token, Keywords
from dataclasses import dataclass, field
import logging
@@ -147,24 +147,29 @@ class BinaryNode(DefaultParserNode):
class DefaultParser(BaseParser):
def __init__(self, text, sub_parser):
BaseParser.__init__(self, "DefaultParser", text)
"""
Parse sheerka specific grammar (like def concept)
"""
def __init__(self, sub_parser=None):
BaseParser.__init__(self, "DefaultParser")
self.sub_parser = sub_parser
self.lexer = Tokenizer(text)
self.lexer_iter = None
self._current = None
self.context = None
self.text = None
def reset_parser(self, context, text):
self.context = context
# hack before implementing all the sub parsers
if context:
self.sub_parser = context.sheerka.parsers[1]
self.text = text
self.lexer_iter = iter(Tokenizer(text))
self._current = None
self.next_token()
def collect_tokens(self, *args):
result = []
for item in args:
if isinstance(item, Node):
result.extend(item.tokens)
else:
result.append(item)
return result
def add_error(self, error, next_token=True):
self.has_error = True
self.error_sink.append(error)
@@ -186,21 +191,23 @@ class DefaultParser(BaseParser):
return
@staticmethod
def get_concept_name(tokens, variables=None):
name = ""
def get_concept_key(tokens, variables=None):
key = ""
first = True
for token in tokens:
if token.type == TokenKind.EOF:
break
if token.type == TokenKind.WHITESPACE:
continue
if not first:
name += " "
key += " "
if variables is not None and token.value in variables:
name += "__var__" + str(variables.index(token.value))
key += "__var__" + str(variables.index(token.value))
else:
name += token.value[1:-1] if token.type == TokenKind.STRING else token.value
key += token.value[1:-1] if token.type == TokenKind.STRING else token.value
first = False
return name
return key
@staticmethod
def fix_indentation(tokens):
@@ -242,7 +249,8 @@ class DefaultParser(BaseParser):
return tokens[4:]
def parse(self):
def parse(self, context, text):
self.reset_parser(context, text)
return self.parse_statement()
def parse_statement(self):
@@ -277,7 +285,7 @@ class DefaultParser(BaseParser):
name_as_tokens.append(token)
self.next_token()
token = self.get_token()
name = self.get_concept_name(name_as_tokens)
name = self.get_concept_key(name_as_tokens)
tokens_found["name"] = name_as_tokens
# try to parse as, where, pre and post declarations
@@ -328,8 +336,8 @@ class DefaultParser(BaseParser):
# start = current_tokens[0].index
# end = current_tokens[-1].index + len(current_tokens[-1].value)
sub_parser = self.sub_parser(current_tokens, source=keyword.value)
sub_tree = sub_parser.parse()
sub_parser = self.sub_parser(source=keyword.value)
sub_tree = sub_parser.parse(self.context, current_tokens)
if isinstance(sub_tree, ErrorNode):
self.add_error(sub_tree, False)
asts[keyword] = sub_tree
@@ -344,74 +352,74 @@ class DefaultParser(BaseParser):
log.debug(f"Found DefConcept node '{def_concept_node}'")
return def_concept_node
def parse_expression(self):
return self.parse_addition()
def parse_addition(self):
left = self.parse_multiply()
token = self.get_token()
if token is None or token.type == TokenKind.EOF:
return left
if token.type == TokenKind.NUMBER: # example 15 +5 or 15 -5
right = self.parse_addition()
return BinaryNode(self.collect_tokens(left, token, right), TokenKind.PLUS, left, right)
if token.type not in (TokenKind.PLUS, TokenKind.MINUS):
return left
self.next_token()
right = self.parse_addition()
return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right)
def parse_multiply(self):
left = self.parse_atom()
token = self.get_token()
if token is None or token.type == TokenKind.EOF:
return left
if token.type not in (TokenKind.STAR, TokenKind.SLASH):
return left
self.next_token()
right = self.parse_multiply()
return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right)
def parse_atom(self):
token = self.get_token()
if token.type == TokenKind.NUMBER:
self.next_token()
return NumberNode([token], float(token.value) if '.' in token.value else int(token.value))
elif token.type == TokenKind.STRING:
self.next_token()
return StringNode([token], token.value[1:-1], token.value[0])
elif token.type == TokenKind.IDENTIFIER:
if token.value == "true":
self.next_token()
return TrueNode([token])
elif token.value == "false":
self.next_token()
return FalseNode([token])
elif token.value == "null":
self.next_token()
return NullNode([token])
else:
self.next_token()
return VariableNode([token], token.value)
elif token.type == TokenKind.LPAR:
self.next_token()
exp = self.parse_expression()
token = self.get_token()
self.next_token()
if token.type != TokenKind.RPAR:
error = UnexpectedTokenErrorNode([token], "Right parenthesis not found.", [TokenKind.RPAR])
self.add_error(error)
return error
return exp
else:
error = UnexpectedTokenErrorNode([token], "Unexpected token",
[TokenKind.NUMBER, TokenKind.STRING, TokenKind.IDENTIFIER, "true", "false",
"null", TokenKind.LPAR])
return self.add_error(error)
# def parse_expression(self):
# return self.parse_addition()
#
# def parse_addition(self):
# left = self.parse_multiply()
# token = self.get_token()
# if token is None or token.type == TokenKind.EOF:
# return left
#
# if token.type == TokenKind.NUMBER: # example 15 +5 or 15 -5
# right = self.parse_addition()
# return BinaryNode(self.collect_tokens(left, token, right), TokenKind.PLUS, left, right)
#
# if token.type not in (TokenKind.PLUS, TokenKind.MINUS):
# return left
#
# self.next_token()
# right = self.parse_addition()
# return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right)
#
# def parse_multiply(self):
# left = self.parse_atom()
# token = self.get_token()
# if token is None or token.type == TokenKind.EOF:
# return left
#
# if token.type not in (TokenKind.STAR, TokenKind.SLASH):
# return left
#
# self.next_token()
# right = self.parse_multiply()
# return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right)
#
# def parse_atom(self):
# token = self.get_token()
# if token.type == TokenKind.NUMBER:
# self.next_token()
# return NumberNode([token], float(token.value) if '.' in token.value else int(token.value))
# elif token.type == TokenKind.STRING:
# self.next_token()
# return StringNode([token], token.value[1:-1], token.value[0])
# elif token.type == TokenKind.IDENTIFIER:
# if token.value == "true":
# self.next_token()
# return TrueNode([token])
# elif token.value == "false":
# self.next_token()
# return FalseNode([token])
# elif token.value == "null":
# self.next_token()
# return NullNode([token])
# else:
# self.next_token()
# return VariableNode([token], token.value)
# elif token.type == TokenKind.LPAR:
# self.next_token()
# exp = self.parse_expression()
# token = self.get_token()
# self.next_token()
#
# if token.type != TokenKind.RPAR:
# error = UnexpectedTokenErrorNode([token], "Right parenthesis not found.", [TokenKind.RPAR])
# self.add_error(error)
# return error
#
# return exp
# else:
# error = UnexpectedTokenErrorNode([token], "Unexpected token",
# [TokenKind.NUMBER, TokenKind.STRING, TokenKind.IDENTIFIER, "true", "false",
# "null", TokenKind.LPAR])
# return self.add_error(error)
+107
View File
@@ -0,0 +1,107 @@
from core.sheerka import ReturnValue
from parsers.BaseParser import BaseParser
from core.tokenizer import Tokenizer, Keywords, TokenKind
from core.concept import Concept
class ExactConceptParser(BaseParser):
"""
Tries to recognize a single concept
"""
MAX_WORDS_SIZE = 10
def __init__(self):
BaseParser.__init__(self, "ConceptParser")
def parse(self, context, text):
res = []
sheerka = context.sheerka
words = self.get_words(text)
if len(words) > self.MAX_WORDS_SIZE:
return ReturnValue(self.name, False, sheerka.new(sheerka.CONCEPT_TOO_LONG_CONCEPT_NAME))
recognized = False
for combination in self.combinations(words):
concept_key = " ".join(combination)
# Very important question to think about later
# Must we return a new instance or the existing one
# That will depend on the context
# Let's return a new one for now and see if it works
concept = sheerka.new(concept_key)
if not sheerka.isinstance(concept, sheerka.UNKNOWN_CONCEPT_NAME):
# update the properties if needed
for i, token in enumerate(combination):
if token.startswith(Concept.PROPERTY_PREFIX):
index = int(token[len(Concept.PROPERTY_PREFIX):])
concept.set_prop_by_index(index, words[i])
res.append(ReturnValue(self.name, True, concept))
recognized = True
if recognized:
return res
return ReturnValue(self.name, False, sheerka.new(sheerka.UNKNOWN_CONCEPT_NAME, body=text))
@staticmethod
def get_words(text):
res = []
for t in iter(Tokenizer(text)):
if t.type == TokenKind.EOF:
break
if t.type == TokenKind.NEWLINE or t.type == TokenKind.WHITESPACE:
continue
res.append(t.value.value if isinstance(t.value, Keywords) else t.value)
return res
def combinations(self, iterable):
# combinations('foo', 'bar', 'baz') -->
# ('foo', 'bar', 'baz'),
# ('__var__0', 'bar', 'baz'),
# ('foo', '__var__0', 'baz'),
# ('foo', 'bar', '__var__0'),
# ('__var__0', '__var__1', 'baz'),
# ('__var__0', 'bar', '__var__1'),
# ('foo', '__var__0', '__var__1'),
# ('__var__0', '__var__1', '__var__2')]
pool = tuple(iterable)
n = len(pool)
res = set()
for r in range(0, n + 1):
indices = list(range(r))
res.add(self.get_tuple(pool, indices))
while True:
for i in reversed(range(r)):
if indices[i] != i + n - r:
break
else:
break
indices[i] += 1
for j in range(i + 1, r):
indices[j] = indices[j - 1] + 1
res.add(self.get_tuple(pool, indices))
return res
@staticmethod
def get_tuple(pool, indices):
res = []
vars = {}
k = 0
# init vars
for i in indices:
value = pool[i]
if value not in vars:
vars[pool[i]] = f"{Concept.PROPERTY_PREFIX}{k}"
k += 1
# create tuple
for i in range(len(pool)):
value = pool[i]
res.append(vars[value] if value in vars else value)
return tuple(res)
+18 -13
View File
@@ -26,36 +26,41 @@ class PythonNode(Node):
class PythonParser(BaseParser):
def __init__(self, text, source="<undef>"):
text = text if isinstance(text, str) else self.get_text_from_tokens(text)
text = text.strip()
BaseParser.__init__(self, "PythonParser", text)
"""
Parse Python scripts
"""
def __init__(self, source="<undef>"):
BaseParser.__init__(self, "PythonParser")
self.source = source
def parse(self):
def parse(self, context, text):
text = text if isinstance(text, str) else self.get_text_from_tokens(text)
text = text.strip()
# first, try to parse an expression
res, tree, error = self.try_parse_expression()
res, tree, error = self.try_parse_expression(text)
if not res:
# then try to parse a statement
res, tree, error = self.try_parse_statement()
res, tree, error = self.try_parse_statement(text)
if not res:
self.has_error = True
error_node = PythonErrorNode(self.text, error)
error_node = PythonErrorNode(text, error)
self.error_sink.append(error_node)
return error_node
log.debug("Recognized python code.")
return PythonNode(self.text, tree)
return PythonNode(text, tree)
def try_parse_expression(self):
def try_parse_expression(self, text):
try:
return True, ast.parse(self.text, f"<{self.source}>", 'eval'), None
return True, ast.parse(text, f"<{self.source}>", 'eval'), None
except Exception as error:
return False, None, error
def try_parse_statement(self):
def try_parse_statement(self, text):
try:
return True, ast.parse(self.text, f"<{self.source}>", 'exec'), None
return True, ast.parse(text, f"<{self.source}>", 'exec'), None
except Exception as error:
return False, None, error
+152
View File
@@ -0,0 +1,152 @@
import pytest
from os import path
import shutil
import os
from core.concept import Concept, Property
from core.sheerka import Sheerka, ExecutionContext
from parsers.DefaultParser import DefaultParser
from parsers.ExactConceptParser import ExactConceptParser
tests_root = path.abspath("../build/tests")
root_folder = "init_folder"
@pytest.fixture(autouse=True)
def init_test():
if path.exists(tests_root):
shutil.rmtree(tests_root)
if not path.exists(tests_root):
os.makedirs(tests_root)
current_pwd = os.getcwd()
os.chdir(tests_root)
yield None
os.chdir(current_pwd)
def test_i_can_compute_combinations():
parser = ExactConceptParser()
res = parser.combinations(["foo", "bar", "baz"])
assert res == {('foo', 'bar', 'baz'),
('__var__0', 'bar', 'baz'),
('foo', '__var__0', 'baz'),
('foo', 'bar', '__var__0'),
('__var__0', '__var__1', 'baz'),
('__var__0', 'bar', '__var__1'),
('foo', '__var__0', '__var__1'),
('__var__0', '__var__1', '__var__2')}
def test_i_can_compute_combinations_with_duplicates():
parser = ExactConceptParser()
res = parser.combinations(["foo", "bar", "foo"])
assert res == {('foo', 'bar', 'foo'),
('__var__0', 'bar', '__var__0'),
('foo', '__var__0', 'foo'),
('__var__0', '__var__1', '__var__0'),
('__var__1', '__var__0', '__var__1')}
# TODO: the last tuple is not possible, so the algo can be improved
def test_i_can_recognize_a_simple_concept():
sheerka = get_sheerka()
concept = get_concept("hello world", [])
sheerka.add_in_cache(concept)
source = "hello world"
context = ExecutionContext(sheerka, "xxxx")
results = ExactConceptParser().parse(context, source)
assert len(results) == 1
assert results[0].status
assert results[0].value.key == concept.key
def test_i_can_recognize_concepts_defined_several_times():
sheerka = get_sheerka()
sheerka.add_in_cache(get_concept("hello world", []))
sheerka.add_in_cache(get_concept("hello a", ["a"]))
source = "hello world"
context = ExecutionContext(sheerka, "xxxx")
results = ExactConceptParser().parse(context, source)
assert len(results) == 2
results = sorted(results, key=lambda x: x.value.name) # because of the usage of sets
assert results[0].status
assert results[0].value.name == "hello a"
assert results[0].value.props["a"].value == "world"
assert results[1].status
assert results[1].value.name == "hello world"
def test_i_can_recognize_a_concept_with_variables():
sheerka = get_sheerka()
concept = get_concept("a + b", ["a", "b"])
sheerka.concepts_cache[concept.key] = concept
source = "10 + 5"
context = ExecutionContext(sheerka, "xxxx")
results = ExactConceptParser().parse(context, source)
assert len(results) == 1
assert results[0].status
assert results[0].value.key == concept.key
assert results[0].value.props["a"].value == "10"
assert results[0].value.props["b"].value == "5"
def test_i_can_recognize_a_concept_with_duplicate_variables():
sheerka = get_sheerka()
concept = get_concept("a + b + a", ["a", "b"])
sheerka.concepts_cache[concept.key] = concept
source = "10 + 5 + 10"
context = ExecutionContext(sheerka, "xxxx")
results = ExactConceptParser().parse(context, source)
assert len(results) == 1
assert results[0].status
assert results[0].value.key == concept.key
assert results[0].value.props["a"].value == "10"
assert results[0].value.props["b"].value == "5"
def test_i_can_manage_unknown_concept():
sheerka = get_sheerka()
source = "def concept hello world" # this is not a concept by itself
context = ExecutionContext(sheerka, "xxxx")
res = ExactConceptParser().parse(context, source)
assert not res.status
assert sheerka.isinstance(res.value, Sheerka.UNKNOWN_CONCEPT_NAME)
def test_i_can_detect_concepts_too_long():
sheerka = get_sheerka()
source = "a very very long concept that cannot be an unique one"
context = ExecutionContext(sheerka, "xxxx")
res = ExactConceptParser().parse(context, source)
assert not res.status
assert sheerka.isinstance(res.value, Sheerka.CONCEPT_TOO_LONG_CONCEPT_NAME)
def get_concept(name, variables):
c = Concept(name=name)
if variables:
for v in variables:
c.props[v] = Property(v, None)
c.init_key()
return c
def get_sheerka():
sheerka = Sheerka()
sheerka.initialize(root_folder)
return sheerka
+38
View File
@@ -0,0 +1,38 @@
import pytest
from core.concept import Concept
@pytest.mark.parametrize("name, variables, expected", [
("my name is a", ["a"], "my name is __var__0"),
("a b c d", ["b", "c"], "a __var__0 __var__1 d"),
("a 'b c' d", ["b", "c"], "a b c d"),
("a | b", ["a", "b"], "__var__0 | __var__1"),
("a b a c", ["a", "b"], "__var__0 __var__1 __var__0 c"),
("a b a c", ["b", "a"], "__var__1 __var__0 __var__1 c"),
])
def test_i_can_get_concept_key(name, variables, expected):
concept = Concept(name)
for v in variables:
concept.set_prop(v, None)
concept.init_key()
assert concept.key == expected
def test_i_can_serialize():
"""
Test concept.to_dict()
:return:
"""
# TODO
pass
def test_i_can_deserialize():
"""
Test concept.from_dict()
:return:
"""
# TODO
pass
+27 -22
View File
@@ -1,10 +1,11 @@
import pytest
from parsers.ExactConceptParser import ExactConceptParser
from parsers.PythonParser import PythonParser, PythonNode, PythonErrorNode
from parsers.tokenizer import Tokenizer, Token, TokenKind, Keywords, LexerError
from core.tokenizer import Tokenizer, Token, TokenKind, Keywords, LexerError
from parsers.DefaultParser import DefaultParser
from parsers.DefaultParser import NumberNode, StringNode, VariableNode, TrueNode, FalseNode, NullNode, BinaryNode
from parsers.DefaultParser import Node, UnexpectedTokenErrorNode, DefConceptNode, NopNode
from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode, NopNode
import ast
@@ -39,6 +40,7 @@ def null():
def b(operator, left, right):
return BinaryNode([], operator, left, right)
def compare_ast(left, right):
left_as_string = ast.dump(left)
left_as_string = left_as_string.replace(", ctx=Load()", "")
@@ -51,9 +53,8 @@ def compare_ast(left, right):
return left_as_string == right_as_string
def test_i_can_tokenize():
source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"="
source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"=|&"
tokens = list(Tokenizer(source))
assert tokens[0] == Token(TokenKind.PLUS, "+", 0, 1, 1)
assert tokens[1] == Token(TokenKind.STAR, "*", 1, 1, 2)
@@ -85,6 +86,8 @@ def test_i_can_tokenize():
assert tokens[27] == Token(TokenKind.WHITESPACE, " ", 59, 6, 1)
assert tokens[28] == Token(TokenKind.STRING, '"another string"', 60, 6, 2)
assert tokens[29] == Token(TokenKind.EQUALS, '=', 76, 6, 18)
assert tokens[30] == Token(TokenKind.VBAR, '|', 77, 6, 19)
assert tokens[31] == Token(TokenKind.AMPER, '&', 78, 6, 20)
@pytest.mark.parametrize("text, expected", [
@@ -220,8 +223,8 @@ def test_i_can_recognize_keywords(text, expected):
("def concept h as 1 + 1", "h", ast.Expression(ast.BinOp(left=ast.Num(n=1), op=ast.Add(), right=ast.Num(n=1)))),
])
def test_i_can_parse_def_concept(text, expected_name, expected_expr):
parser = DefaultParser(text, PythonParser)
tree = parser.parse()
parser = DefaultParser(PythonParser)
tree = parser.parse(None, text)
assert isinstance(tree, DefConceptNode)
assert tree.name == expected_name
if isinstance(tree.body, PythonNode):
@@ -230,8 +233,6 @@ def test_i_can_parse_def_concept(text, expected_name, expected_expr):
assert tree.body == expected_expr
def test_i_can_parse_complex_def_concept_statement():
text = """def concept a plus b
where a,b
@@ -239,8 +240,8 @@ def test_i_can_parse_complex_def_concept_statement():
post isinstance(res, int)
as res = a + b
"""
parser = DefaultParser(text, PythonParser)
tree = parser.parse()
parser = DefaultParser(PythonParser)
tree = parser.parse(None, text)
assert not parser.has_error
assert isinstance(tree, DefConceptNode)
assert tree.name == "a plus b"
@@ -261,19 +262,20 @@ def concept add one to a as:
return x+1
func(a)
"""
parser = DefaultParser(text, PythonParser)
tree = parser.parse()
parser = DefaultParser(PythonParser)
tree = parser.parse(None, text)
assert not parser.has_error
assert isinstance(tree, DefConceptNode)
def test_i_can_use_colon_to_declare_indentation2():
text = """
def concept add one to a as:
def func(x):
return x+1
"""
parser = DefaultParser(text, PythonParser)
tree = parser.parse()
parser = DefaultParser(PythonParser)
tree = parser.parse(None, text)
assert not parser.has_error
assert isinstance(tree, DefConceptNode)
@@ -285,8 +287,8 @@ def concept add one to a as
return x+1
func(a)
"""
parser = DefaultParser(text, PythonParser)
tree = parser.parse()
parser = DefaultParser(PythonParser)
tree = parser.parse(None, text)
assert parser.has_error
assert isinstance(tree, DefConceptNode)
assert isinstance(parser.error_sink[0].exception, IndentationError)
@@ -304,8 +306,8 @@ def concept add one to a as:
func(a)
func(b)
"""
parser = DefaultParser(text, PythonParser)
tree = parser.parse()
parser = DefaultParser(PythonParser)
tree = parser.parse(None, text)
assert parser.has_error
assert isinstance(tree, DefConceptNode)
assert isinstance(parser.error_sink[0], UnexpectedTokenErrorNode)
@@ -319,8 +321,8 @@ func(b)
("def concept as", Keywords.AS, ["<name>"]),
])
def test_i_can_detect_unexpected_token_error_in_def_concept(text, token_found, expected_tokens):
parser = DefaultParser(text, PythonParser)
parser.parse()
parser = DefaultParser(PythonParser)
parser.parse(None, text)
assert parser.has_error
assert isinstance(parser.error_sink[0], UnexpectedTokenErrorNode)
@@ -335,7 +337,10 @@ def test_i_can_detect_unexpected_token_error_in_def_concept(text, token_found, e
"def concept hello as 1+"
])
def test_i_can_detect_error_in_declaration(text):
parser = DefaultParser(text, PythonParser)
parser.parse()
parser = DefaultParser(PythonParser)
parser.parse(None, text)
assert parser.has_error
assert isinstance(parser.error_sink[0], PythonErrorNode)
+13 -8
View File
@@ -37,8 +37,7 @@ def test_root_folder_is_created_after_initialization():
def test_lists_of_concepts_is_initialized():
sheerka = Sheerka()
sheerka.initialize(root_folder)
sheerka = get_sheerka()
assert len(sheerka.concepts_cache) > 1
@@ -53,14 +52,13 @@ def get_concept():
return x+y
func(a,b)
"""
parser = DefaultParser(text, PythonParser)
return parser.parse()
parser = DefaultParser(PythonParser)
return parser.parse(None, text)
def test_i_can_add_a_concept():
sheerka = get_sheerka()
concept = get_concept()
sheerka = Sheerka()
sheerka.initialize(root_folder)
res = sheerka.add_concept(ExecutionContext(sheerka, "xxx"), concept)
concept_found = res.value
@@ -76,7 +74,7 @@ def test_i_can_add_a_concept():
assert isinstance(concept_found.codes[ConceptParts.POST], ast.Expression)
assert isinstance(concept_found.codes[ConceptParts.BODY], ast.Module)
all_props = [p.name for p in concept_found.props]
all_props = list(concept_found.props.keys())
assert all_props == ["a", "b"]
assert concept_found.key == "__var__0 + __var__1"
@@ -123,7 +121,14 @@ def test_i_can_instantiate_a_concept():
"""
Test the new() functionnality
make sure that some Concept are singleton (ex Sheerka, True, False)
but some other need a new instance everytime
otherwise, make sure that new() returns a **new** instance
:return:
"""
pass
def get_sheerka():
sheerka = Sheerka()
sheerka.initialize(root_folder)
return sheerka