First steps of ConceptLexer. Need to update DefaultParser before continuing

This commit is contained in:
2019-11-29 17:26:50 +01:00
parent 5d37addc7d
commit 5e539a4b28
21 changed files with 1409 additions and 55 deletions
+48 -9
View File
@@ -1,6 +1,7 @@
from core.builtin_concepts import BuiltinConcepts, ListConcept from core.builtin_concepts import BuiltinConcepts, ListConcept
from core.concept import Concept from core.concept import Concept
import ast import ast
import core.utils
import logging import logging
@@ -45,18 +46,18 @@ class NodeParent:
class NodeConcept(Concept): class NodeConcept(Concept):
def __init__(self, key, parent: NodeParent): def __init__(self, key, node_type, parent: NodeParent):
super().__init__(key, True, False, key) super().__init__(key, True, False, key)
self.parent = parent self.parent = parent
self.node_type = node_type
def get_node_type(self): def get_node_type(self):
return self.key return self.node_type
class GenericNodeConcept(NodeConcept): class GenericNodeConcept(NodeConcept):
def __init__(self, node_type, parent): def __init__(self, node_type, parent):
super().__init__(BuiltinConcepts.GENERIC_NODE, parent) super().__init__(BuiltinConcepts.GENERIC_NODE, node_type, parent)
self.node_type = node_type
def __repr__(self): def __repr__(self):
return "Generic:" + self.node_type return "Generic:" + self.node_type
@@ -74,17 +75,25 @@ class GenericNodeConcept(NodeConcept):
return self.body return self.body
class IdentifierConcept(NodeConcept): class IdentifierNodeConcept(NodeConcept):
def __init__(self, parent, name): def __init__(self, parent, name):
super().__init__(BuiltinConcepts.IDENTIFIER_NODE, parent) super().__init__(BuiltinConcepts.IDENTIFIER_NODE, "Name", parent)
self.body = name self.body = name
def transform(node): class CallNodeConcept(NodeConcept):
def __init__(self, parent=None):
super().__init__(BuiltinConcepts.IDENTIFIER_NODE, "Call", parent)
def get_args_names(self, sheerka):
return sheerka.values(self.get_prop("args"))
def python_to_concept(python_node):
""" """
Transform Python AST node into concept nodes Transform Python AST node into concept nodes
for better usage for better usage
:param node: :param python_node:
:return: :return:
""" """
@@ -107,4 +116,34 @@ def transform(node):
concept.set_prop(field, value) concept.set_prop(field, value)
return concept return concept
return _transform(node, None) return _transform(python_node, None)
def concept_to_python(concept_node):
"""
Transform back concept_node to Python AST node
:param concept_node:
:return:
"""
def _transform(node):
node_type = node.get_node_type()
ast_object = core.utils.new_object("_ast." + node_type)
for field in node.props:
if field not in ast_object._fields:
continue
value = node.get_prop(field)
if isinstance(value, list) or isinstance(value, Concept) and value.key == str(BuiltinConcepts.LIST):
lst = []
for i in value:
lst.append(_transform(i))
setattr(ast_object, field, lst)
elif isinstance(value, NodeConcept):
setattr(ast_object, field, _transform(value))
else:
setattr(ast_object, field, value)
return ast_object
res = _transform(concept_node)
return res
+11 -3
View File
@@ -57,13 +57,13 @@ class UnreferencedNamesVisitor(ConceptNodeVisitor):
def visit_Name(self, node): def visit_Name(self, node):
parents = get_parents(node) parents = get_parents(node)
if ("For", "target") in parents: # variable used by the 'for' iteration if ("For", "target") in parents: # variable used by the 'for' iteration
return return
if ("Call", "func") in parents: # name of the function if ("Call", "func") in parents: # name of the function
return return
if ("Assign", "targets") in parents: # variable which is assigned if ("Assign", "targets") in parents: # variable which is assigned
return return
if self.can_be_discarded(self.sheerka.value(node), parents): if self.can_be_discarded(self.sheerka.value(node), parents):
@@ -91,6 +91,14 @@ class UnreferencedNamesVisitor(ConceptNodeVisitor):
return False return False
class ExtractPredicateVisitor(ConceptNodeVisitor):
def __init__(self, variable_name):
self.predicates = []
self.variable_name = variable_name
def get_parents(node): def get_parents(node):
if node.parent is None: if node.parent is None:
return [] return []
+109
View File
@@ -1,3 +1,8 @@
import ast
import core.ast.nodes
from core.ast.nodes import CallNodeConcept, GenericNodeConcept
from core.ast.visitors import UnreferencedNamesVisitor
from core.builtin_concepts import BuiltinConcepts from core.builtin_concepts import BuiltinConcepts
@@ -81,3 +86,107 @@ def expect_one(context, return_values):
False, False,
sheerka.new(BuiltinConcepts.TOO_MANY_ERRORS, obj=return_values), sheerka.new(BuiltinConcepts.TOO_MANY_ERRORS, obj=return_values),
parents=return_values) parents=return_values)
def get_names(sheerka, concept_node):
"""
Finds all the names referenced by the concept_node
:param sheerka:
:param concept_node:
:return:
"""
unreferenced_names_visitor = UnreferencedNamesVisitor(sheerka)
unreferenced_names_visitor.visit(concept_node)
return list(unreferenced_names_visitor.names)
def extract_predicates(sheerka, expression, variables_to_include, variables_to_exclude):
"""
from expression, tries to find all the predicates referencing a variable, and the variable only
for example
exp : isinstance(a, int) and isinstance(b, str)
will return 'isinstance(a, int)' if variable_name == 'a'
:param sheerka:
:param expression:
:param variables_to_include:
:param variables_to_exclude:
:return: list of predicates
"""
if len(variables_to_include) == 0:
return []
def _get_predicates(_nodes):
_predicates = []
for _node in _nodes:
python_node = ast.Expression(body=core.ast.nodes.concept_to_python(_node))
python_node = ast.fix_missing_locations(python_node)
_predicates.append(python_node)
return _predicates
if isinstance(expression, str):
node = ast.parse(expression, mode="eval")
else:
return NotImplementedError()
concept_node = core.ast.nodes.python_to_concept(node)
main_op = concept_node.get_prop("body")
return _get_predicates(_extract_predicates(sheerka, main_op, variables_to_include, variables_to_exclude))
def _extract_predicates(sheerka, node, variables_to_include, variables_to_exclude):
predicates = []
def _matches(_names, to_include, to_exclude):
_res = None
for n in _names:
if n in to_include and _res is None:
_res = True
if n in to_exclude:
_res = False
return _res
if node.node_type == "Compare":
if node.get_prop("left").node_type == "Name":
"""Simple case of one comparison"""
comparison_name = sheerka.value(node.get_prop("left"))
if comparison_name in variables_to_include and comparison_name not in variables_to_exclude:
predicates.append(node)
else:
"""The left part is an expression"""
res = _extract_predicates(sheerka, node.get_prop("left"), variables_to_include, variables_to_exclude)
if len(res) > 0:
predicates.append(node)
elif node.node_type == "Call":
"""Simple case predicate"""
call_node = node if isinstance(node, CallNodeConcept) else CallNodeConcept().update_from(node)
args = list(call_node.get_args_names(sheerka))
if _matches(args, variables_to_include, variables_to_exclude):
predicates.append(node)
elif node.node_type == "UnaryOp" and node.get_prop("op").node_type == "Not":
"""Simple case of negation"""
res = _extract_predicates(sheerka, node.get_prop("operand"), variables_to_include, variables_to_exclude)
if len(res) > 0:
predicates.append(node)
elif node.node_type == "BinOp":
names = get_names(sheerka, node)
if _matches(names, variables_to_include, variables_to_exclude):
predicates.append(node)
elif node.node_type == "BoolOp":
all_op = True
temp_res = []
for op in node.get_prop("values"):
res = _extract_predicates(sheerka, op, variables_to_include, variables_to_exclude)
if len(res) == 0:
all_op = False
else:
temp_res.extend(res)
if all_op:
predicates.append(node)
else:
for res in temp_res:
predicates.append(res)
return predicates
+16 -10
View File
@@ -23,7 +23,7 @@ class Sheerka(Concept):
BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts" BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts"
USER_CONCEPTS_KEYS = "User_Concepts" USER_CONCEPTS_KEYS = "User_Concepts"
def __init__(self, debug=False): def __init__(self, debug=False, skip_builtins_in_db=False):
log.debug("Starting Sheerka.") log.debug("Starting Sheerka.")
super().__init__(BuiltinConcepts.SHEERKA, True, True, BuiltinConcepts.SHEERKA) super().__init__(BuiltinConcepts.SHEERKA, True, True, BuiltinConcepts.SHEERKA)
@@ -53,6 +53,7 @@ class Sheerka(Concept):
self.parsers_prefix = None self.parsers_prefix = None
self.debug = debug self.debug = debug
self.skip_builtins_in_db = skip_builtins_in_db
def initialize(self, root_folder=None): def initialize(self, root_folder=None):
""" """
@@ -109,14 +110,15 @@ class Sheerka(Concept):
if not concept.is_unique and str(key) in builtins_classes: if not concept.is_unique and str(key) in builtins_classes:
self.builtin_cache[key] = builtins_classes[str(key)] self.builtin_cache[key] = builtins_classes[str(key)]
from_db = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.key) if not self.skip_builtins_in_db:
if from_db is None: from_db = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.key)
log.debug(f"'{concept.name}' concept is not found in db. Adding.") if from_db is None:
self.set_id_if_needed(concept, True) log.debug(f"'{concept.name}' concept is not found in db. Adding.")
self.sdp.add("init", self.CONCEPTS_ENTRY, concept, use_ref=True) self.set_id_if_needed(concept, True)
else: self.sdp.add("init", self.CONCEPTS_ENTRY, concept, use_ref=True)
log.debug(f"Found concept '{from_db}' in db. Updating.") else:
concept.update_from(from_db) log.debug(f"Found concept '{from_db}' in db. Updating.")
concept.update_from(from_db)
self.add_in_cache(concept) self.add_in_cache(concept)
@@ -125,7 +127,11 @@ class Sheerka(Concept):
Init the parsers Init the parsers
:return: :return:
""" """
for parser in core.utils.get_sub_classes("parsers", "parsers.BaseParser.BaseParser"): base_class = core.utils.get_class("parsers.BaseParser.BaseParser")
for parser in core.utils.get_sub_classes("parsers", base_class):
if parser.__module__ == base_class.__module__:
continue
log.debug(f"Adding builtin parser '{parser.__name__}'") log.debug(f"Adding builtin parser '{parser.__name__}'")
self.parsers.append(parser) self.parsers.append(parser)
+7 -5
View File
@@ -54,12 +54,14 @@ class Token:
column: int column: int
def __repr__(self): def __repr__(self):
if type == TokenKind.IDENTIFIER: if self.type == TokenKind.IDENTIFIER:
value = "ident:" + str(self.value) value = str(self.value)
elif type == TokenKind.WHITESPACE: elif self.type == TokenKind.WHITESPACE:
value = " " value = "<ws>"
elif type == TokenKind.NEWLINE: elif self.type == TokenKind.NEWLINE:
value = r"\n" value = r"\n"
elif self.type == TokenKind.EOF:
value = "<EOF>"
else: else:
value = self.value value = self.value
+27 -6
View File
@@ -105,27 +105,28 @@ def get_classes_from_package(package_name):
yield c yield c
def get_sub_classes(package_name, base_class_name): def get_sub_classes(package_name, base_class):
pkg = __import__(package_name) pkg = __import__(package_name)
prefix = pkg.__name__ + "." prefix = pkg.__name__ + "."
for (module_loader, name, ispkg) in pkgutil.iter_modules(pkg.__path__, prefix): for (module_loader, name, ispkg) in pkgutil.iter_modules(pkg.__path__, prefix):
importlib.import_module(name) importlib.import_module(name)
base_class = get_class(base_class_name) base_class = get_class(base_class) if isinstance(base_class, str) else base_class
return base_class.__subclasses__() return set(base_class.__subclasses__()).union(
[s for c in base_class.__subclasses__() for s in get_sub_classes(package_name, c)])
def remove_from_list(lst, to_remove): def remove_from_list(lst, to_remove_predicate):
""" """
Removes elements from a list if they exist Removes elements from a list if they exist
:param lst: :param lst:
:param to_remove: :param to_remove_predicate:
:return: :return:
""" """
flagged = [] flagged = []
for item in lst: for item in lst:
if to_remove(item): if to_remove_predicate(item):
flagged.append(item) flagged.append(item)
for item in flagged: for item in flagged:
@@ -134,3 +135,23 @@ def remove_from_list(lst, to_remove):
return lst return lst
def product(a, b):
"""
Kind of cartesian product between list a and b
knowing that a is also a list
So it's a cartesian product between a list of list and a list
"""
if a is None or len(a) == 0:
return b
if b is None or len(b) == 0:
return a
res = []
for item_b in b:
for item_a in a:
items = item_a + [item_b]
res.append(items)
return res
+1 -1
View File
@@ -47,7 +47,7 @@ class PythonEvaluator(OneReturnValueEvaluator):
for prop_name, prop_value in context.obj.props.items(): for prop_name, prop_value in context.obj.props.items():
my_locals[prop_name] = prop_value.value my_locals[prop_name] = prop_value.value
node_concept = core.ast.nodes.transform(ast_) node_concept = core.ast.nodes.python_to_concept(ast_)
unreferenced_names_visitor = UnreferencedNamesVisitor(context.sheerka) unreferenced_names_visitor = UnreferencedNamesVisitor(context.sheerka)
unreferenced_names_visitor.visit(node_concept) unreferenced_names_visitor.visit(node_concept)
+4
View File
@@ -51,6 +51,10 @@ class BaseParser:
if tokens is None: if tokens is None:
return "" return ""
res = "" res = ""
if not hasattr(tokens, "__iter__"):
tokens = [tokens]
for token in tokens: for token in tokens:
value = Keywords(token.value).value if token.type == TokenKind.KEYWORD else token.value value = Keywords(token.value).value if token.type == TokenKind.KEYWORD else token.value
res += value res += value
+495
View File
@@ -0,0 +1,495 @@
#####################################################################################################
# This part of code is highly inspired by the arpeggio project (https://github.com/textX/Arpeggio)
# I don't directly use the project, but it helped me figure out
# what to do.
# Dejanović I., Milosavljević G., Vaderna R.:
# Arpeggio: A flexible PEG parser for Python,
# Knowledge-Based Systems, 2016, 95, 71 - 74, doi:10.1016/j.knosys.2015.12.004
#####################################################################################################
from dataclasses import field, dataclass
from collections import defaultdict
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from core.tokenizer import TokenKind, Tokenizer, Token
from parsers.BaseParser import BaseParser, Node, ErrorNode
import core.utils
import logging
log = logging.getLogger(__name__)
def flatten(iterable):
if iterable is None:
return []
result = []
for e in iterable:
if e.parsing_expression.rule_name is not None and e.parsing_expression.rule_name != "":
if hasattr(e, "children"):
e.children = flatten(e.children)
result.append(e)
elif hasattr(e, "children"):
result.extend(flatten(e.children))
else:
result.append(e)
return result
@dataclass()
class LexerNode(Node):
start: int
end: int
class ConceptNode(LexerNode):
def __init__(self, concept, start, end, tokens=None, source=None, children=None):
super().__init__(start, end)
self.concept = concept
self.tokens = tokens
self.source = source
self.children = children
if self.source is None:
self.source = BaseParser.get_text_from_tokens(self.tokens)
def __eq__(self, other):
if not super().__eq__(other):
return False
if not isinstance(other, ConceptNode):
return False
return self.concept == other.concept and \
self.source == other.source
def __hash__(self):
return hash((self.concept, self.start, self.end, self.source))
class NonTerminalNode(LexerNode):
def __init__(self, parsing_expression, start, end, children=None):
super().__init__(start, end)
self.parsing_expression = parsing_expression
self.children = children
def __repr__(self):
name = self.parsing_expression.rule_name or self.parsing_expression.__class__.__name__
if len(self.children) > 0:
sub_names = "(" + ",".join([repr(child) for child in self.children]) + ")"
else:
sub_names = ""
return name + sub_names
class TerminalNode(LexerNode):
def __init__(self, parsing_expression, start, end, value):
super().__init__(start, end)
self.parsing_expression = parsing_expression
self.value = value
def __repr__(self):
name = self.parsing_expression.rule_name or ""
return name + f"'{self.value}'"
@dataclass()
class GrammarErrorNode(ErrorNode):
message: str
class ParsingExpression:
def __init__(self, *args, **kwargs):
self.elements = args
nodes = kwargs.get('nodes', [])
if not hasattr(nodes, '__iter__'):
nodes = [nodes]
self.nodes = nodes
self.rule_name = kwargs.get('rule_name', '')
def parse(self, parser):
return self._parse(parser)
class Sequence(ParsingExpression):
"""
Will match sequence of parser expressions in exact order they are defined.
"""
def _parse(self, parser):
init_pos = parser.pos
end_pos = parser.pos
children = []
for e in self.nodes:
node = e.parse(parser)
if node is None:
return None
else:
if node.end != -1: # because Optional returns -1 when no match
children.append(node)
end_pos = node.end
return NonTerminalNode(self, init_pos, end_pos, children)
class OrderedChoice(ParsingExpression):
"""
Will match one among multiple
It will stop at the first match (so the order of definition is important)
"""
def _parse(self, parser):
init_pos = parser.pos
for e in self.nodes:
node = e.parse(parser)
if node:
return NonTerminalNode(self, init_pos, node.end, [node])
parser.seek(init_pos) # backtrack
return None
class Optional(ParsingExpression):
"""
Will match or not the elements
if many matches, will choose longest one
If you need order, use Optional(OrderedChoice)
"""
def _parse(self, parser):
init_pos = parser.pos
selected_node = NonTerminalNode(self, parser.pos, -1, [])
for e in self.nodes:
node = e.parse(parser)
if node:
if node.end > selected_node.end:
selected_node = node
parser.seek(init_pos) # backtrack
if selected_node.end != -1:
parser.seek(selected_node.end)
parser.next_token() # eat the tokens found
return selected_node
class Match(ParsingExpression):
"""
Base class for all classes that will try to match something from the input.
"""
def __init__(self, rule_name, root=False):
super(Match, self).__init__(rule_name=rule_name, root=root)
def parse(self, parser):
result = self._parse(parser)
return result
class StrMatch(Match):
"""
Matches a literal
"""
def __init__(self, to_match, rule_name="", root=False, ignore_case=None):
super(Match, self).__init__(rule_name=rule_name, root=root)
self.to_match = to_match
self.ignore_case = ignore_case
def __repr__(self):
return f"StrMatch('{self.to_match}')"
def _parse(self, parser):
token = parser.get_token()
m = str(token.value).lower() == self.to_match.lower() if self.ignore_case \
else token.value == self.to_match
if m:
node = TerminalNode(self, parser.pos, parser.pos, token.value)
parser.next_token()
return node
return None
class CrossRef:
"""
During the creation of the model,
Creates reference to a concept, as it may not be resolved yet
"""
def __init__(self, concept):
self.concept = concept
class ConceptLexerParser(BaseParser):
def __init__(self):
super().__init__("ConceptLexer")
self.concepts_dict = {}
self.ignore_case = True
self.token = None
self.pos = -1
self.tokens = None
self.context = None
self.text = None
self.sheerka = None
def add_error(self, error, next_token=True):
self.has_error = True
self.error_sink.append(error)
if next_token:
self.next_token()
return error
def reset_parser(self, context, text):
self.context = context
self.sheerka = context.sheerka
self.text = text
if isinstance(text, str):
self.tokens = list(Tokenizer(text))
else:
self.tokens = list(text)
self.tokens.append(Token(TokenKind.EOF, "", -1, -1, -1)) # make sure to finish with end of file token
self.token = None
self.pos = -1
self.next_token()
def get_token(self) -> Token:
return self.token
def next_token(self, skip_whitespace=True):
if self.token and self.token.type == TokenKind.EOF:
return False
self.pos += 1
self.token = self.tokens[self.pos]
if skip_whitespace:
while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
self.pos += 1
self.token = self.tokens[self.pos]
return self.token.type != TokenKind.EOF
def seek(self, pos):
self.pos = pos
self.token = self.tokens[self.pos]
return True
def rewind(self, offset, skip_whitespace=True):
self.pos += offset
self.token = self.tokens[self.pos]
if skip_whitespace:
while self.pos > 0 and (self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE):
self.pos -= 1
self.token = self.tokens[self.pos]
def initialize(self, dict):
"""
Adds a bunch of concepts, and how they can be recognized
:param dict: dictionary of concept; concept_definition
:return:
"""
nodes_to_resolve = []
concepts_to_resolve = set()
# ## Gets the grammars
for concept, concept_def in dict.items():
concept.init_key() # make sure that the key is initialized
grammar = self.get_model(concept, concept_def, nodes_to_resolve, concepts_to_resolve)
self.concepts_dict[concept] = grammar
# ## Removes concepts with infinite recursions
concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve)
for concept in concepts_to_remove:
concepts_to_resolve.remove(concept)
del self.concepts_dict[concept]
# ## Resolves cross references and remove grammar with unresolved references
self.resolve_cross_references(concepts_to_resolve, nodes_to_resolve)
def get_model(self, concept, concept_def, nodes_to_resolve, concepts_to_resolve):
def inner_get_model(expression):
if isinstance(expression, Concept):
ret = CrossRef(expression)
concepts_to_resolve.add(concept)
nodes_to_resolve.append(ret)
elif isinstance(expression, str):
ret = StrMatch(expression, ignore_case=self.ignore_case)
elif isinstance(expression, StrMatch):
ret = expression
if ret.ignore_case is None:
ret.ignore_case = self.ignore_case
elif isinstance(expression, Sequence) or \
isinstance(expression, OrderedChoice) or \
isinstance(expression, Optional):
ret = expression
ret.nodes.extend([inner_get_model(e) for e in ret.elements])
if any((isinstance(x, CrossRef) for x in ret.nodes)):
concepts_to_resolve.add(concept)
nodes_to_resolve.append(ret)
else:
ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."))
return ret
model = inner_get_model(concept_def)
if isinstance(model, CrossRef):
concepts_to_resolve.add(concept)
model.rule_name = concept.key
return model
def detect_infinite_recursion(self, concepts_to_resolve):
# infinite recursion matcher
def _is_infinite_recursion(ref_concept, node):
if isinstance(node, CrossRef):
if node.concept == ref_concept:
return True
return _is_infinite_recursion(ref_concept, self.concepts_dict[node.concept])
if isinstance(node, OrderedChoice):
return _is_infinite_recursion(ref_concept, node.nodes[0])
if isinstance(node, Sequence):
for node in node.nodes:
if _is_infinite_recursion(ref_concept, node):
return True
return False
return False
removed_concepts = []
for e in concepts_to_resolve:
to_resolve = self.concepts_dict[e]
if _is_infinite_recursion(e, to_resolve):
removed_concepts.append(e)
return removed_concepts
# Cross-ref resolving
def resolve_cross_references(self, concepts_to_resolve, nodes_to_resolve):
repeat = True
while repeat:
repeat = False
for e in concepts_to_resolve:
to_resolve = self.concepts_dict[e]
if isinstance(to_resolve, CrossRef):
repeat = True
self.concepts_dict[e] = self.concepts_dict[to_resolve.concept]
for e in nodes_to_resolve:
if not isinstance(e, ParsingExpression):
continue # cases when a concept directly references another concept
for i, node in enumerate(e.nodes):
if isinstance(node, CrossRef):
if node.concept in self.concepts_dict:
e.nodes[i] = self.concepts_dict[node.concept]
def parse(self, context, text):
if text == "":
return context.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.IS_EMPTY)
)
self.reset_parser(context, text)
concepts_found = [[]]
# actually list of list
# The first dimension is the number of possibilities found
# The second dimension is the number of concepts found, under one possibility
#
# Example 1
# concept foo : 'one' 'two'
# concept bar : 'one' 'two'
# input 'one two' -> will produce two possibilities (foo and bar).
#
# Example 2
# concept foo : 'one'
# concept bar : 'two'
# input 'one two' -> will produce one possibility which is (foo, bar) (foo then bar)
while True:
init_pos = self.pos
res = []
for concept, grammar in self.concepts_dict.items():
self.seek(init_pos)
node = grammar.parse(self)
if node is not None:
concept_node = ConceptNode(concept, node.start, node.end, self.tokens[node.start: node.end + 1])
if hasattr(node, "children"):
concept_node.children = node.children
res.append(concept_node)
if len(res) == 0: # not recognized
self.seek(init_pos)
not_recognized = self.get_text_from_tokens(self.get_token())
self.add_error(self.sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=not_recognized))
break
res = self.get_bests(res) # only keep the concept that eat the more tokens
for r in res:
r.children = flatten(r.children)
concepts_found = core.utils.product(concepts_found, res)
# loop
self.seek(res[0].end)
if not self.next_token():
break
# manage when nothing is recognized (or other error)
if self.has_error:
return self.sheerka.ret(
self.name,
False,
self.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text,
body=self.error_sink,
try_parsed=concepts_found[0] if len(concepts_found) == 1 else concepts_found))
# else
# returns as many ReturnValue than choices found
ret = []
for choice in concepts_found:
ret.append(
self.sheerka.ret(
self.name,
True,
self.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text,
body=choice,
try_parsed=choice)))
return ret[0] if len(ret) == 1 else ret
@staticmethod
def get_bests(results):
"""
Returns the result that is the longest
:param results:
:return:
"""
by_end_pos = defaultdict(list)
for result in results:
by_end_pos[result.end].append(result)
return by_end_pos[max(by_end_pos)]
+2 -2
View File
@@ -129,7 +129,7 @@ class State:
if digest is None: if digest is None:
return return
if not isinstance(items, list): if not hasattr(items, "__iter__"):
items = [items] items = [items]
for item in items: for item in items:
@@ -575,9 +575,9 @@ class SheerkaDataProvider:
def exists(self, entry, key=None, digest=None): def exists(self, entry, key=None, digest=None):
""" """
Returns true if the entry is defined Returns true if the entry is defined
:param digest:
:param key: :param key:
:param entry: :param entry:
:param digest: digest of the object, when several entries share the same key
:return: :return:
""" """
snapshot = self.get_snapshot() snapshot = self.get_snapshot()
+1 -1
View File
@@ -8,7 +8,7 @@ from parsers.BaseParser import BaseParser
def get_context(): def get_context():
sheerka = Sheerka() sheerka = Sheerka(skip_builtins_in_db=True)
sheerka.initialize("mem://") sheerka.initialize("mem://")
return ExecutionContext("test", "xxx", sheerka) return ExecutionContext("test", "xxx", sheerka)
+560
View File
@@ -0,0 +1,560 @@
import pytest
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \
CrossRef
@pytest.mark.parametrize("match, text", [
("foo", "foo"),
("'foo'", "'foo'"),
("1", "1"),
("3.14", "3.14"),
("+", "+"),
(StrMatch("foo"), "foo"),
(StrMatch("'foo'"), "'foo'"),
(StrMatch("1"), "1"),
(StrMatch("3.14"), "3.14"),
(StrMatch("+"), "+"),
])
def test_i_can_match_simple_tokens(match, text):
context = get_context()
foo = Concept(name="foo")
concepts = {foo: text}
parser = ConceptLexerParser()
parser.initialize(concepts)
res = parser.parse(context, text)
assert res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.value == [ConceptNode(foo, 0, 0, source=text)]
def test_i_can_match_multiple_concepts_in_one_input():
context = get_context()
one = Concept(name="one")
two = Concept(name="two")
concepts = {one: "one", two: "two"}
parser = ConceptLexerParser()
parser.initialize(concepts)
res = parser.parse(context, "one two one")
assert res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.value == [
ConceptNode(one, 0, 0, source="one"),
ConceptNode(two, 2, 2, source="two"),
ConceptNode(one, 4, 4, source="one"),
]
def test_i_cannot_match_an_unknown_input():
context = get_context()
parser = ConceptLexerParser() # no grammar registered
res = parser.parse(context, "foo")
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body[0].body == "foo"
def test_i_cannot_match_when_part_of_the_input_is_unknown():
context = get_context()
one = Concept(name="one")
two = Concept(name="two")
concepts = {one: "one", two: "two"}
parser = ConceptLexerParser()
parser.initialize(concepts)
res = parser.parse(context, "one two three")
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.try_parsed == [
ConceptNode(one, 0, 0, source="one"),
ConceptNode(two, 2, 2, source="two")] # these two were recognized
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body[0].body == "three"
def test_i_can_match_sequence():
context = get_context()
foo = Concept(name="foo")
concepts = {foo: Sequence("one", "two", "three")}
parser = ConceptLexerParser()
parser.initialize(concepts)
res = parser.parse(context, "one two three")
assert res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.value == [ConceptNode(foo, 0, 4, source="one two three")]
def test_wrong_sequence_is_not_matched():
context = get_context()
foo = Concept(name="foo")
concepts = {foo: Sequence("one", "two", "three")}
parser = ConceptLexerParser()
parser.initialize(concepts)
res = parser.parse(context, "one two three one")
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.try_parsed == [ConceptNode(foo, 0, 4, source="one two three")]
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body[0].body == "one"
def test_i_cannot_match_sequence_if_end_of_file():
context = get_context()
foo = Concept(name="foo")
concepts = {foo: Sequence("one", "two", "three")}
parser = ConceptLexerParser()
parser.initialize(concepts)
res = parser.parse(context, "one two")
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.try_parsed == []
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body[0].body == "one"
def test_i_always_choose_the_longest_match():
context = get_context()
foo = Concept(name="foo")
bar = Concept(name="bar")
concepts = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")}
parser = ConceptLexerParser()
parser.initialize(concepts)
res = parser.parse(context, "one two three")
assert res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.value == [ConceptNode(foo, 0, 4, source="one two three")]
def test_i_can_match_several_sequences():
context = get_context()
foo = Concept(name="foo")
bar = Concept(name="bar")
concepts = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")}
parser = ConceptLexerParser()
parser.initialize(concepts)
res = parser.parse(context, "one two three one two")
assert res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.value == [
ConceptNode(foo, 0, 4, source="one two three"),
ConceptNode(bar, 6, 8, source="one two"),
]
def test_i_can_match_ordered_choice():
context = get_context()
foo = Concept(name="foo")
concepts = {foo: OrderedChoice("one", "two")}
parser = ConceptLexerParser()
parser.initialize(concepts)
res1 = parser.parse(context, "one")
assert res1.status
assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT)
assert res1.value.body == [ConceptNode(foo, 0, 0, source="one")]
res2 = parser.parse(context, "two")
assert res2.status
assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT)
assert res2.value.body == [ConceptNode(foo, 0, 0, source="two")]
res3 = parser.parse(context, "three")
assert not res3.status
assert context.sheerka.isinstance(res3.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
assert res3.value.body[0].body == "three"
def test_i_cannot_match_ordered_choice_with_empty_alternative():
context = get_context()
foo = Concept(name="foo")
concepts = {foo: Sequence(OrderedChoice("one", ""), "two")}
parser = ConceptLexerParser()
parser.initialize(concepts)
res = parser.parse(context, "ok") # because token[0] is not "one" and not "" (it is 'two')
assert not res.status
def test_i_can_mix_sequences_and_ordered_choices():
context = get_context()
foo = Concept(name="foo")
concepts = {foo: Sequence(OrderedChoice("twenty", "thirty"), "one", "ok")}
parser = ConceptLexerParser()
parser.initialize(concepts)
res1 = parser.parse(context, "twenty one ok")
assert res1.status
assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT)
assert res1.value.body == [ConceptNode(foo, 0, 4, source="twenty one ok")]
res2 = parser.parse(context, "thirty one ok")
assert res2.status
assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT)
assert res2.value.body == [ConceptNode(foo, 0, 4, source="thirty one ok")]
res3 = parser.parse(context, "twenty one")
assert not res3.status
assert res3.value.body[0].body == "twenty"
assert res3.value.try_parsed == []
def test_i_can_mix_ordered_choices_and_sequences():
context = get_context()
foo = Concept(name="foo")
concepts = {foo: OrderedChoice(Sequence("twenty", "thirty"), "one")}
parser = ConceptLexerParser()
parser.initialize(concepts)
res = parser.parse(context, "twenty thirty")
assert res.status
res = parser.parse(context, "one")
assert res.status
def test_i_cannot_parse_empty_optional():
context = get_context()
foo = Concept(name="foo")
concepts = {foo: Optional("one")}
parser = ConceptLexerParser()
parser.initialize(concepts)
res = parser.parse(context, "")
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.IS_EMPTY)
def test_i_can_parse_optional():
context = get_context()
foo = Concept(name="foo")
concepts = {foo: Optional("one")}
parser = ConceptLexerParser()
parser.initialize(concepts)
res = parser.parse(context, "one")
assert res.status
assert res.value.value == [ConceptNode(foo, 0, 0, source="one")]
def test_i_can_parse_sequence_starting_with_optional():
context = get_context()
foo = Concept(name="foo")
concepts = {foo: Sequence(Optional("twenty"), "one")}
parser = ConceptLexerParser()
parser.initialize(concepts)
res = parser.parse(context, "twenty one")
assert res.status
assert res.value.body == [ConceptNode(foo, 0, 2, source="twenty one")]
res = parser.parse(context, "one")
assert res.status
assert res.value.body == [ConceptNode(foo, 0, 0, source="one")]
def test_i_can_parse_sequence_ending_with_optional():
context = get_context()
foo = Concept(name="foo")
concepts = {foo: Sequence("one", "two", Optional("three"))}
parser = ConceptLexerParser()
parser.initialize(concepts)
res = parser.parse(context, "one two three")
assert res.status
assert res.value.body == [ConceptNode(foo, 0, 4, source="one two three")]
res = parser.parse(context, "one two")
assert res.status
assert res.value.body == [ConceptNode(foo, 0, 2, source="one two")]
def test_i_can_parse_sequence_with_optional_in_between():
context = get_context()
foo = Concept(name="foo")
concepts = {foo: Sequence("one", Optional("two"), "three")}
parser = ConceptLexerParser()
parser.initialize(concepts)
res = parser.parse(context, "one two three")
assert res.status
assert res.value.body == [ConceptNode(foo, 0, 4, source="one two three")]
res = parser.parse(context, "one three")
assert res.status
assert res.value.body == [ConceptNode(foo, 0, 2, source="one three")]
def test_i_can_use_reference():
# The problem here is when there are multiple match for the same input
# The parsing result is a list of all concepts found
# So it's already a list that represents a sequence, not a choice
# So I need to create a choice concept
# create the return value for every possible graph
# --> The latter seems to be the best as we don't defer the resolution of the problem to someone else
context = get_context()
foo = Concept(name="foo")
bar = Concept(name="bar")
concepts = {foo: Sequence("one", "two"), bar: foo}
parser = ConceptLexerParser()
parser.initialize(concepts)
res = parser.parse(context, "one two")
assert len(res) == 2
assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [ConceptNode(foo, 0, 2, source="one two")]
assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [ConceptNode(bar, 0, 2, source="one two")]
def test_i_can_use_context_reference_with_multiple_levels():
"""
Same than previous one, but with reference of reference
:return:
"""
context = get_context()
foo = Concept(name="foo")
bar = Concept(name="bar")
baz = Concept(name="baz")
concepts = {foo: Sequence("one", "two"), bar: foo, baz: bar}
parser = ConceptLexerParser()
parser.initialize(concepts)
res = parser.parse(context, "one two")
assert len(res) == 3
assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [ConceptNode(foo, 0, 2, source="one two")]
assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [ConceptNode(bar, 0, 2, source="one two")]
assert res[2].status
assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT)
assert res[2].value.body == [ConceptNode(baz, 0, 2, source="one two")]
def test_order_is_not_important_when_using_references():
context = get_context()
foo = Concept(name="foo")
bar = Concept(name="bar")
concepts = {bar: foo, foo: Sequence("one", "two")}
parser = ConceptLexerParser()
parser.initialize(concepts)
res = parser.parse(context, "one two")
assert len(res) == 2
assert res[0].value.body == [ConceptNode(bar, 0, 2, source="one two")]
assert res[1].value.body == [ConceptNode(foo, 0, 2, source="one two")]
def test_i_can_parse_when_reference():
context = get_context()
foo = Concept(name="foo")
bar = Concept(name="bar")
concepts = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")}
parser = ConceptLexerParser()
parser.initialize(concepts)
res = parser.parse(context, "twenty two")
assert res.status
assert res.value.body == [ConceptNode(bar, 0, 2, source="twenty two")]
res = parser.parse(context, "thirty one")
assert res.status
assert res.value.body == [ConceptNode(bar, 0, 2, source="thirty one")]
res = parser.parse(context, "twenty")
assert res.status
assert res.value.body == [ConceptNode(foo, 0, 0, source="twenty")]
def test_i_can_detect_duplicates_when_reference():
context = get_context()
foo = Concept(name="foo")
bar = Concept(name="bar")
concepts = {
bar: Sequence(foo, Optional(OrderedChoice("one", "two"))),
foo: OrderedChoice("twenty", "thirty")
}
parser = ConceptLexerParser()
parser.initialize(concepts)
res = parser.parse(context, "twenty")
assert len(res) == 2
assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [ConceptNode(bar, 0, 0, source="twenty")]
assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [ConceptNode(foo, 0, 0, source="twenty")]
def test_i_can_detect_infinite_recursion():
foo = Concept(name="foo")
bar = Concept(name="bar")
concepts = {
bar: foo,
foo: bar
}
parser = ConceptLexerParser()
parser.initialize(concepts)
assert bar not in parser.concepts_dict
assert foo not in parser.concepts_dict
def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice():
foo = Concept(name="foo")
bar = Concept(name="bar")
concepts = {
bar: foo,
foo: OrderedChoice(bar, "foo")
}
parser = ConceptLexerParser()
parser.initialize(concepts)
assert foo not in parser.concepts_dict # removed because of the infinite recursion
assert bar not in parser.concepts_dict # removed because of the infinite recursion
# the other way around is possible
context = get_context()
concepts = {
bar: foo,
foo: OrderedChoice("foo", bar)
}
parser = ConceptLexerParser()
parser.initialize(concepts)
assert foo in parser.concepts_dict
assert bar in parser.concepts_dict
res = parser.parse(context, "foo")
assert len(res) == 2
assert res[0].status
assert res[0].value.body == [ConceptNode(bar, 0, 0, source="foo")]
assert res[1].status
assert res[1].value.body == [ConceptNode(foo, 0, 0, source="foo")]
def test_i_can_detect_indirect_infinite_recursion_with_sequence():
foo = Concept(name="foo")
bar = Concept(name="bar")
concepts = {
bar: foo,
foo: Sequence("one", bar, "two")
}
parser = ConceptLexerParser()
parser.initialize(concepts)
assert foo not in parser.concepts_dict # removed because of the infinite recursion
assert bar not in parser.concepts_dict # removed because of the infinite recursion
def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choice():
foo = Concept(name="foo")
bar = Concept(name="bar")
concepts = {
bar: foo,
foo: Sequence("one", OrderedChoice(bar, "other"), "two")
}
parser = ConceptLexerParser()
parser.initialize(concepts)
assert foo not in parser.concepts_dict # removed because of the infinite recursion
assert bar not in parser.concepts_dict # removed because of the infinite recursion
def test_i_can_detect_indirect_infinite_recursion_with_optional():
# TODO infinite recursion with optional
pass
#
# def test_i_can_parse_basic_arithmetic_operations_and_resolve_properties():
# context = get_context()
# add = Concept(name="add")
# mult = Concept(name="mult")
# atom = Concept(name="atom")
#
# concepts = {
# add: Sequence(mult, Optional(Sequence(OrderedChoice('+', '-', rule_name="sign"), add))),
# mult: Sequence(atom, Optional(Sequence(OrderedChoice('*', '/'), mult))),
# atom: OrderedChoice(OrderedChoice('1', '2', '3'), Sequence('(', add, ')')),
# }
#
# parser = ConceptLexerParser()
# parser.register(concepts)
#
# # res = parser.parse(context, "1")
# # assert len(res) == 3 # add, mult, atom
# #
# # res = parser.parse(context, "1 * 2")
# # assert len(res) == 2 # add and mult
# #
# # res = parser.parse(context, "1 + 2")
# # assert res.status
# # assert res.value.value == [ConceptNode(add, 0, 4, source="1 + 2")]
#
# res = parser.parse(context, "1 * 2 + 3")
# assert res.status
# assert res.value.value == [ConceptNode(add, 0, 4, source="1 + 2 + 3")]
def test_i_can_register_concepts_with_the_same_name():
# TODO : concepts are registered by name,
# what when two concepts have the same name ?
pass
def test_i_can_parse_very_very_long_input():
# TODO: In the current implementation, all the tokens are loaded in memory
# It's clearly not the good approach
pass
def get_context():
sheerka = Sheerka(skip_builtins_in_db=True)
sheerka.initialize("mem://")
return ExecutionContext("sheerka", "xxxx", sheerka)
+1 -1
View File
@@ -67,7 +67,7 @@ def get_concept(name, where=None, pre=None, post=None, body=None):
def get_context(): def get_context():
sheerka = Sheerka() sheerka = Sheerka(skip_builtins_in_db=True)
sheerka.initialize("mem://") sheerka.initialize("mem://")
return ExecutionContext("test", "xxx", sheerka) return ExecutionContext("test", "xxx", sheerka)
+1 -1
View File
@@ -125,7 +125,7 @@ def test_i_can_detect_concept_from_tokens():
def get_context(): def get_context():
sheerka = Sheerka() sheerka = Sheerka(skip_builtins_in_db=True)
sheerka.initialize("mem://") sheerka.initialize("mem://")
return ExecutionContext("sheerka", "xxxx", sheerka) return ExecutionContext("sheerka", "xxxx", sheerka)
+1 -1
View File
@@ -7,7 +7,7 @@ from parsers.BaseParser import BaseParser
def get_context(): def get_context():
sheerka = Sheerka() sheerka = Sheerka(skip_builtins_in_db=True)
sheerka.initialize("mem://") sheerka.initialize("mem://")
return ExecutionContext("test", "xxx", sheerka) return ExecutionContext("test", "xxx", sheerka)
+1 -1
View File
@@ -8,7 +8,7 @@ from parsers.PythonParser import PythonNode, PythonParser
def get_context(): def get_context():
sheerka = Sheerka() sheerka = Sheerka(skip_builtins_in_db=True)
sheerka.initialize("mem://") sheerka.initialize("mem://")
return ExecutionContext("test", "xxx", sheerka) return ExecutionContext("test", "xxx", sheerka)
+1 -1
View File
@@ -9,7 +9,7 @@ from parsers.PythonParser import PythonNode, PythonParser, PythonErrorNode
def get_context(): def get_context():
sheerka = Sheerka() sheerka = Sheerka(skip_builtins_in_db=True)
sheerka.initialize("mem://") sheerka.initialize("mem://")
return ExecutionContext("test", "xxx", sheerka) return ExecutionContext("test", "xxx", sheerka)
+29 -4
View File
@@ -8,7 +8,7 @@ from core.sheerka import Sheerka
def get_sheerka(): def get_sheerka():
sheerka = Sheerka() sheerka = Sheerka(skip_builtins_in_db=True)
sheerka.initialize("mem://") sheerka.initialize("mem://")
return sheerka return sheerka
@@ -34,7 +34,7 @@ def my_function(a,b):
return a return a
""" """
tree = ast.parse(source) tree = ast.parse(source)
tree_as_concept = core.ast.nodes.transform(tree) tree_as_concept = core.ast.nodes.python_to_concept(tree)
sheerka = get_sheerka() sheerka = get_sheerka()
assert tree_as_concept.node_type == "Module" assert tree_as_concept.node_type == "Module"
@@ -87,7 +87,7 @@ def my_function(a,b):
""" """
node = ast.parse(source) node = ast.parse(source)
concept_node = core.ast.nodes.transform(node) concept_node = core.ast.nodes.python_to_concept(node)
visitor = TestNameVisitor() visitor = TestNameVisitor()
visitor.visit(concept_node) visitor.visit(concept_node)
@@ -115,7 +115,7 @@ my_function(x,y)
sheerka = get_sheerka() sheerka = get_sheerka()
node = ast.parse(source) node = ast.parse(source)
concept_node = core.ast.nodes.transform(node) concept_node = core.ast.nodes.python_to_concept(node)
visitor = UnreferencedNamesVisitor(sheerka) visitor = UnreferencedNamesVisitor(sheerka)
visitor.visit(concept_node) visitor.visit(concept_node)
@@ -129,3 +129,28 @@ my_function(x,y)
def test_i_can_compare_NodeParent_with_tuple(): def test_i_can_compare_NodeParent_with_tuple():
node_parent = NodeParent(GenericNodeConcept("For", None), "target") node_parent = NodeParent(GenericNodeConcept("For", None), "target")
assert node_parent == ("For", "target") assert node_parent == ("For", "target")
def test_i_can_transform_back():
source = """
def my_function(a,b):
for i in range(b):
a = a + b
return a
my_function(x, y)
"""
node = ast.parse(source)
concept_node = core.ast.nodes.python_to_concept(node)
transformed_back = core.ast.nodes.concept_to_python(concept_node)
assert dump_ast(transformed_back) == dump_ast(node)
def dump_ast(node):
dump = ast.dump(node)
for to_remove in [", ctx=Load()", ", kind=None", ", type_ignores=[]"]:
dump = dump.replace(to_remove, "")
return dump
+51 -1
View File
@@ -1,3 +1,7 @@
import ast
import pytest
from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts
from core.sheerka import Sheerka, ExecutionContext from core.sheerka import Sheerka, ExecutionContext
import core.builtin_helpers import core.builtin_helpers
@@ -110,8 +114,47 @@ def test_i_can_use_expect_one_when_not_a_list_false():
assert res == item assert res == item
@pytest.mark.parametrize("expression, vars_to_include, vars_to_exclude, expected_expr", [
("a == 1", [], [], []),
("a == 1", ["a"], [], ["a == 1"]),
("a == 1", [], ["a"], []),
("predicate(a)", [], [], []),
("predicate(a)", ["a"], [], ["predicate(a)"]),
("predicate(a, b)", ["a"], [], ["predicate(a, b)"]),
("predicate(a, b)", ["b"], [], ["predicate(a, b)"]),
("predicate(a, b)", ["a", "b"], [], ["predicate(a, b)"]),
("predicate(a, b)", ["a"], ["b"], []),
("a + b == 1", [], [], []),
("a + b == 1", ["a"], [], ["a + b == 1"]),
("a + b == 1", ["a"], ["b"], []),
("a + b == 1", ["b"], [], ["a + b == 1"]),
("a + b == 1", ["a", "b"], [], ["a + b == 1"]),
("a == 1 and b == 2", [], [], []),
("a == 1 and b == 2", ["a"], [], ["a == 1"]),
("a == 1 and b == 2", ["b"], [], ["b == 2"]),
("a == 1 and b == 2", ["a"], ["b"], ["a == 1"]),
("a == 1 and b == 2", ["a", "b"], [], ["a == 1 and b == 2"]),
("predicate(a,c) and predicate(b,c)", ["a", "b"], [], ["predicate(a,c) and predicate(b,c)"]),
("not(a == 1)", [], [], []),
("not(a == 1)", ["a"], [], ["not(a==1)"]),
("a == 1 or b == 2", [], [], []),
("a == 1 or b == 2", ["a"], [], ["a == 1"]),
("a == 1 or b == 2", ["b"], [], ["b == 2"]),
("a == 1 or b == 2", ["a", "b"], [], ["a == 1 or b == 2"]),
("predicate(a,c) or predicate(b,c)", ["a", "b"], [], ["predicate(a,c) or predicate(b,c)"]),
])
def test_i_can_extract_predicates(expression, vars_to_include, vars_to_exclude, expected_expr):
sheerka = get_sheerka()
expected = [ast.parse(expr, mode="eval") for expr in expected_expr]
actual = core.builtin_helpers.extract_predicates(sheerka, expression, vars_to_include, vars_to_exclude)
assert len(actual) == len(expected)
for i in range(len(actual)):
assert dump_ast(actual[i]) == dump_ast(expected[i])
def get_sheerka(): def get_sheerka():
sheerka = Sheerka() sheerka = Sheerka(skip_builtins_in_db=True)
sheerka.initialize("mem://") sheerka.initialize("mem://")
return sheerka return sheerka
@@ -119,3 +162,10 @@ def get_sheerka():
def get_context(sheerka): def get_context(sheerka):
return ExecutionContext("test", "xxx", sheerka) return ExecutionContext("test", "xxx", sheerka)
def dump_ast(node):
dump = ast.dump(node)
for to_remove in [", ctx=Load()", ", kind=None", ", type_ignores=[]"]:
dump = dump.replace(to_remove, "")
return dump
+23 -8
View File
@@ -50,7 +50,7 @@ def test_i_can_list_builtin_concepts():
def test_builtin_concepts_are_initialized(): def test_builtin_concepts_are_initialized():
sheerka = get_sheerka() sheerka = get_sheerka(skip_builtins_in_db=False)
assert len(sheerka.concepts_cache) == len(BuiltinConcepts) assert len(sheerka.concepts_cache) == len(BuiltinConcepts)
for concept_name in BuiltinConcepts: for concept_name in BuiltinConcepts:
assert str(concept_name) in sheerka.concepts_cache assert str(concept_name) in sheerka.concepts_cache
@@ -61,7 +61,7 @@ def test_builtin_concepts_are_initialized():
def test_builtin_concepts_can_be_updated(): def test_builtin_concepts_can_be_updated():
sheerka = get_sheerka(root_folder) sheerka = get_sheerka(root_folder, skip_builtins_in_db=False)
loaded_sheerka = sheerka.get(BuiltinConcepts.SHEERKA) loaded_sheerka = sheerka.get(BuiltinConcepts.SHEERKA)
loaded_sheerka.desc = "I have a description" loaded_sheerka.desc = "I have a description"
sheerka.sdp.modify("Test", sheerka.CONCEPTS_ENTRY, loaded_sheerka.key, loaded_sheerka) sheerka.sdp.modify("Test", sheerka.CONCEPTS_ENTRY, loaded_sheerka.key, loaded_sheerka)
@@ -89,7 +89,8 @@ def test_i_can_add_a_concept():
assert concept_found.id == "1001" assert concept_found.id == "1001"
assert concept.key in sheerka.concepts_cache assert concept.key in sheerka.concepts_cache
assert sheerka.sdp.io.exists(sheerka.sdp.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, concept_found.get_digest())) assert sheerka.sdp.io.exists(
sheerka.sdp.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, concept_found.get_digest()))
def test_i_cannot_add_the_same_concept_twice(): def test_i_cannot_add_the_same_concept_twice():
@@ -414,7 +415,8 @@ as:
assert getattr(concept_saved, prop) == getattr(expected, prop) assert getattr(concept_saved, prop) == getattr(expected, prop)
assert concept_saved.key in sheerka.concepts_cache assert concept_saved.key in sheerka.concepts_cache
assert sheerka.sdp.io.exists(sheerka.sdp.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, concept_saved.get_digest())) assert sheerka.sdp.io.exists(
sheerka.sdp.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, concept_saved.get_digest()))
def test_i_can_eval_def_concept_part_when_one_part_is_a_ref_of_another_concept(): def test_i_can_eval_def_concept_part_when_one_part_is_a_ref_of_another_concept():
@@ -443,7 +445,8 @@ def test_i_can_eval_def_concept_part_when_one_part_is_a_ref_of_another_concept()
assert getattr(concept_saved, prop) == getattr(expected, prop) assert getattr(concept_saved, prop) == getattr(expected, prop)
assert concept_saved.key in sheerka.concepts_cache assert concept_saved.key in sheerka.concepts_cache
assert sheerka.sdp.io.exists(sheerka.sdp.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, concept_saved.get_digest())) assert sheerka.sdp.io.exists(
sheerka.sdp.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, concept_saved.get_digest()))
def test_i_cannot_eval_the_same_def_concept_twice(): def test_i_cannot_eval_the_same_def_concept_twice():
@@ -551,12 +554,24 @@ def test_i_can_manage_concepts_with_the_same_key_when_values_are_the_same():
res = sheerka.eval("hello 'foo'") res = sheerka.eval("hello 'foo'")
assert len(res) == 1 assert len(res) == 1
assert res[0].status assert res[0].status
assert res[0].value, "hello foo" assert res[0].value == "hello foo"
assert res[0].who == sheerka.get_evaluator_name(MultipleSameSuccessEvaluator.NAME) assert res[0].who == sheerka.get_evaluator_name(MultipleSameSuccessEvaluator.NAME)
def get_sheerka(root="mem://"): def test_i_can_create_concepts_on_python_codes():
sheerka = Sheerka() sheerka = get_sheerka()
context = get_context(sheerka)
sheerka.create_new_concept(context, Concept(name="concepts", body="sheerka.concepts()"))
res = sheerka.eval("concepts")
assert len(res) == 1
assert res[0].status
assert isinstance(res[0].value, list)
def get_sheerka(root="mem://", skip_builtins_in_db=True):
sheerka = Sheerka(skip_builtins_in_db)
sheerka.initialize(root) sheerka.initialize(root)
return sheerka return sheerka
+20
View File
@@ -51,8 +51,28 @@ def test_i_can_get_sub_classes():
default_parser = core.utils.get_class("parsers.DefaultParser.DefaultParser") default_parser = core.utils.get_class("parsers.DefaultParser.DefaultParser")
exact_concept_parser = core.utils.get_class("parsers.ExactConceptParser.ExactConceptParser") exact_concept_parser = core.utils.get_class("parsers.ExactConceptParser.ExactConceptParser")
python_parser = core.utils.get_class("parsers.PythonParser.PythonParser") python_parser = core.utils.get_class("parsers.PythonParser.PythonParser")
concept_lexer_parser = core.utils.get_class("parsers.ConceptLexerParser.ConceptLexerParser")
assert base_parser not in sub_classes assert base_parser not in sub_classes
assert default_parser in sub_classes assert default_parser in sub_classes
assert exact_concept_parser in sub_classes assert exact_concept_parser in sub_classes
assert python_parser in sub_classes assert python_parser in sub_classes
assert concept_lexer_parser in sub_classes
@pytest.mark.parametrize("a,b, expected", [
([], [], []),
([], ['a'], ['a']),
([[]], ['a'], [['a']]),
(['a'], [], ['a']),
([['a']], [], [['a']]),
([['a']], ['b'], [['a', 'b']]),
([['a'], ['b']], ['c'], [['a', 'c'], ['b', 'c']]),
([['a1', 'a2'], ['b1', 'b2', 'b3']], ['c'], [['a1', 'a2', 'c'], ['b1', 'b2', 'b3', 'c']]),
([[]], ['a', 'b'], [['a'], ['b']]),
([['a'], ['b']], ['c', 'd', 'e'], [['a', 'c'], ['b', 'c'], ['a', 'd'], ['b', 'd'], ['a', 'e'], ['b', 'e']]),
])
def test_i_can_product(a, b, expected):
res = core.utils.product(a, b)
assert res == expected