diff --git a/core/ast/nodes.py b/core/ast/nodes.py index f88c209..715afd8 100644 --- a/core/ast/nodes.py +++ b/core/ast/nodes.py @@ -1,6 +1,7 @@ from core.builtin_concepts import BuiltinConcepts, ListConcept from core.concept import Concept import ast +import core.utils import logging @@ -45,18 +46,18 @@ class NodeParent: class NodeConcept(Concept): - def __init__(self, key, parent: NodeParent): + def __init__(self, key, node_type, parent: NodeParent): super().__init__(key, True, False, key) self.parent = parent + self.node_type = node_type def get_node_type(self): - return self.key + return self.node_type class GenericNodeConcept(NodeConcept): def __init__(self, node_type, parent): - super().__init__(BuiltinConcepts.GENERIC_NODE, parent) - self.node_type = node_type + super().__init__(BuiltinConcepts.GENERIC_NODE, node_type, parent) def __repr__(self): return "Generic:" + self.node_type @@ -74,17 +75,25 @@ class GenericNodeConcept(NodeConcept): return self.body -class IdentifierConcept(NodeConcept): +class IdentifierNodeConcept(NodeConcept): def __init__(self, parent, name): - super().__init__(BuiltinConcepts.IDENTIFIER_NODE, parent) + super().__init__(BuiltinConcepts.IDENTIFIER_NODE, "Name", parent) self.body = name -def transform(node): +class CallNodeConcept(NodeConcept): + def __init__(self, parent=None): + super().__init__(BuiltinConcepts.IDENTIFIER_NODE, "Call", parent) + + def get_args_names(self, sheerka): + return sheerka.values(self.get_prop("args")) + + +def python_to_concept(python_node): """ Transform Python AST node into concept nodes for better usage - :param node: + :param python_node: :return: """ @@ -107,4 +116,34 @@ def transform(node): concept.set_prop(field, value) return concept - return _transform(node, None) + return _transform(python_node, None) + + +def concept_to_python(concept_node): + """ + Transform back concept_node to Python AST node + :param concept_node: + :return: + """ + + def _transform(node): + node_type = node.get_node_type() + ast_object = core.utils.new_object("_ast." + node_type) + for field in node.props: + if field not in ast_object._fields: + continue + + value = node.get_prop(field) + if isinstance(value, list) or isinstance(value, Concept) and value.key == str(BuiltinConcepts.LIST): + lst = [] + for i in value: + lst.append(_transform(i)) + setattr(ast_object, field, lst) + elif isinstance(value, NodeConcept): + setattr(ast_object, field, _transform(value)) + else: + setattr(ast_object, field, value) + return ast_object + + res = _transform(concept_node) + return res diff --git a/core/ast/visitors.py b/core/ast/visitors.py index 9af0378..6d1cb4b 100644 --- a/core/ast/visitors.py +++ b/core/ast/visitors.py @@ -57,13 +57,13 @@ class UnreferencedNamesVisitor(ConceptNodeVisitor): def visit_Name(self, node): parents = get_parents(node) - if ("For", "target") in parents: # variable used by the 'for' iteration + if ("For", "target") in parents: # variable used by the 'for' iteration return - if ("Call", "func") in parents: # name of the function + if ("Call", "func") in parents: # name of the function return - if ("Assign", "targets") in parents: # variable which is assigned + if ("Assign", "targets") in parents: # variable which is assigned return if self.can_be_discarded(self.sheerka.value(node), parents): @@ -91,6 +91,14 @@ class UnreferencedNamesVisitor(ConceptNodeVisitor): return False +class ExtractPredicateVisitor(ConceptNodeVisitor): + def __init__(self, variable_name): + self.predicates = [] + self.variable_name = variable_name + + + + def get_parents(node): if node.parent is None: return [] diff --git a/core/builtin_helpers.py b/core/builtin_helpers.py index 144acbf..481a458 100644 --- a/core/builtin_helpers.py +++ b/core/builtin_helpers.py @@ -1,3 +1,8 @@ +import ast +import core.ast.nodes +from core.ast.nodes import CallNodeConcept, GenericNodeConcept +from core.ast.visitors import UnreferencedNamesVisitor + from core.builtin_concepts import BuiltinConcepts @@ -81,3 +86,107 @@ def expect_one(context, return_values): False, sheerka.new(BuiltinConcepts.TOO_MANY_ERRORS, obj=return_values), parents=return_values) + + +def get_names(sheerka, concept_node): + """ + Finds all the names referenced by the concept_node + :param sheerka: + :param concept_node: + :return: + """ + unreferenced_names_visitor = UnreferencedNamesVisitor(sheerka) + unreferenced_names_visitor.visit(concept_node) + return list(unreferenced_names_visitor.names) + + +def extract_predicates(sheerka, expression, variables_to_include, variables_to_exclude): + """ + from expression, tries to find all the predicates referencing a variable, and the variable only + for example + exp : isinstance(a, int) and isinstance(b, str) + will return 'isinstance(a, int)' if variable_name == 'a' + :param sheerka: + :param expression: + :param variables_to_include: + :param variables_to_exclude: + :return: list of predicates + """ + + if len(variables_to_include) == 0: + return [] + + def _get_predicates(_nodes): + _predicates = [] + for _node in _nodes: + python_node = ast.Expression(body=core.ast.nodes.concept_to_python(_node)) + python_node = ast.fix_missing_locations(python_node) + _predicates.append(python_node) + return _predicates + + if isinstance(expression, str): + node = ast.parse(expression, mode="eval") + else: + return NotImplementedError() + + concept_node = core.ast.nodes.python_to_concept(node) + main_op = concept_node.get_prop("body") + + return _get_predicates(_extract_predicates(sheerka, main_op, variables_to_include, variables_to_exclude)) + + +def _extract_predicates(sheerka, node, variables_to_include, variables_to_exclude): + predicates = [] + + def _matches(_names, to_include, to_exclude): + _res = None + for n in _names: + if n in to_include and _res is None: + _res = True + if n in to_exclude: + _res = False + return _res + + if node.node_type == "Compare": + if node.get_prop("left").node_type == "Name": + """Simple case of one comparison""" + comparison_name = sheerka.value(node.get_prop("left")) + if comparison_name in variables_to_include and comparison_name not in variables_to_exclude: + predicates.append(node) + else: + """The left part is an expression""" + res = _extract_predicates(sheerka, node.get_prop("left"), variables_to_include, variables_to_exclude) + if len(res) > 0: + predicates.append(node) + elif node.node_type == "Call": + """Simple case predicate""" + call_node = node if isinstance(node, CallNodeConcept) else CallNodeConcept().update_from(node) + args = list(call_node.get_args_names(sheerka)) + if _matches(args, variables_to_include, variables_to_exclude): + predicates.append(node) + elif node.node_type == "UnaryOp" and node.get_prop("op").node_type == "Not": + """Simple case of negation""" + res = _extract_predicates(sheerka, node.get_prop("operand"), variables_to_include, variables_to_exclude) + if len(res) > 0: + predicates.append(node) + elif node.node_type == "BinOp": + names = get_names(sheerka, node) + if _matches(names, variables_to_include, variables_to_exclude): + predicates.append(node) + elif node.node_type == "BoolOp": + all_op = True + temp_res = [] + for op in node.get_prop("values"): + res = _extract_predicates(sheerka, op, variables_to_include, variables_to_exclude) + if len(res) == 0: + all_op = False + else: + temp_res.extend(res) + + if all_op: + predicates.append(node) + else: + for res in temp_res: + predicates.append(res) + + return predicates diff --git a/core/sheerka.py b/core/sheerka.py index 328abc7..fbd8613 100644 --- a/core/sheerka.py +++ b/core/sheerka.py @@ -23,7 +23,7 @@ class Sheerka(Concept): BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts" USER_CONCEPTS_KEYS = "User_Concepts" - def __init__(self, debug=False): + def __init__(self, debug=False, skip_builtins_in_db=False): log.debug("Starting Sheerka.") super().__init__(BuiltinConcepts.SHEERKA, True, True, BuiltinConcepts.SHEERKA) @@ -53,6 +53,7 @@ class Sheerka(Concept): self.parsers_prefix = None self.debug = debug + self.skip_builtins_in_db = skip_builtins_in_db def initialize(self, root_folder=None): """ @@ -109,14 +110,15 @@ class Sheerka(Concept): if not concept.is_unique and str(key) in builtins_classes: self.builtin_cache[key] = builtins_classes[str(key)] - from_db = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.key) - if from_db is None: - log.debug(f"'{concept.name}' concept is not found in db. Adding.") - self.set_id_if_needed(concept, True) - self.sdp.add("init", self.CONCEPTS_ENTRY, concept, use_ref=True) - else: - log.debug(f"Found concept '{from_db}' in db. Updating.") - concept.update_from(from_db) + if not self.skip_builtins_in_db: + from_db = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.key) + if from_db is None: + log.debug(f"'{concept.name}' concept is not found in db. Adding.") + self.set_id_if_needed(concept, True) + self.sdp.add("init", self.CONCEPTS_ENTRY, concept, use_ref=True) + else: + log.debug(f"Found concept '{from_db}' in db. Updating.") + concept.update_from(from_db) self.add_in_cache(concept) @@ -125,7 +127,11 @@ class Sheerka(Concept): Init the parsers :return: """ - for parser in core.utils.get_sub_classes("parsers", "parsers.BaseParser.BaseParser"): + base_class = core.utils.get_class("parsers.BaseParser.BaseParser") + for parser in core.utils.get_sub_classes("parsers", base_class): + if parser.__module__ == base_class.__module__: + continue + log.debug(f"Adding builtin parser '{parser.__name__}'") self.parsers.append(parser) diff --git a/core/tokenizer.py b/core/tokenizer.py index 83072ad..44aee50 100644 --- a/core/tokenizer.py +++ b/core/tokenizer.py @@ -54,12 +54,14 @@ class Token: column: int def __repr__(self): - if type == TokenKind.IDENTIFIER: - value = "ident:" + str(self.value) - elif type == TokenKind.WHITESPACE: - value = " " - elif type == TokenKind.NEWLINE: + if self.type == TokenKind.IDENTIFIER: + value = str(self.value) + elif self.type == TokenKind.WHITESPACE: + value = "" + elif self.type == TokenKind.NEWLINE: value = r"\n" + elif self.type == TokenKind.EOF: + value = "" else: value = self.value diff --git a/core/utils.py b/core/utils.py index 882924f..6e777eb 100644 --- a/core/utils.py +++ b/core/utils.py @@ -105,27 +105,28 @@ def get_classes_from_package(package_name): yield c -def get_sub_classes(package_name, base_class_name): +def get_sub_classes(package_name, base_class): pkg = __import__(package_name) prefix = pkg.__name__ + "." for (module_loader, name, ispkg) in pkgutil.iter_modules(pkg.__path__, prefix): importlib.import_module(name) - base_class = get_class(base_class_name) - return base_class.__subclasses__() + base_class = get_class(base_class) if isinstance(base_class, str) else base_class + return set(base_class.__subclasses__()).union( + [s for c in base_class.__subclasses__() for s in get_sub_classes(package_name, c)]) -def remove_from_list(lst, to_remove): +def remove_from_list(lst, to_remove_predicate): """ Removes elements from a list if they exist :param lst: - :param to_remove: + :param to_remove_predicate: :return: """ flagged = [] for item in lst: - if to_remove(item): + if to_remove_predicate(item): flagged.append(item) for item in flagged: @@ -134,3 +135,23 @@ def remove_from_list(lst, to_remove): return lst +def product(a, b): + """ + Kind of cartesian product between list a and b + knowing that a is also a list + + So it's a cartesian product between a list of list and a list + """ + + if a is None or len(a) == 0: + return b + if b is None or len(b) == 0: + return a + + res = [] + for item_b in b: + for item_a in a: + items = item_a + [item_b] + res.append(items) + + return res diff --git a/evaluators/PythonEvaluator.py b/evaluators/PythonEvaluator.py index a20292d..633f739 100644 --- a/evaluators/PythonEvaluator.py +++ b/evaluators/PythonEvaluator.py @@ -47,7 +47,7 @@ class PythonEvaluator(OneReturnValueEvaluator): for prop_name, prop_value in context.obj.props.items(): my_locals[prop_name] = prop_value.value - node_concept = core.ast.nodes.transform(ast_) + node_concept = core.ast.nodes.python_to_concept(ast_) unreferenced_names_visitor = UnreferencedNamesVisitor(context.sheerka) unreferenced_names_visitor.visit(node_concept) diff --git a/parsers/BaseParser.py b/parsers/BaseParser.py index 399a7af..4c8693b 100644 --- a/parsers/BaseParser.py +++ b/parsers/BaseParser.py @@ -51,6 +51,10 @@ class BaseParser: if tokens is None: return "" res = "" + + if not hasattr(tokens, "__iter__"): + tokens = [tokens] + for token in tokens: value = Keywords(token.value).value if token.type == TokenKind.KEYWORD else token.value res += value diff --git a/parsers/ConceptLexerParser.py b/parsers/ConceptLexerParser.py new file mode 100644 index 0000000..437cfe4 --- /dev/null +++ b/parsers/ConceptLexerParser.py @@ -0,0 +1,495 @@ +##################################################################################################### +# This part of code is highly inspired by the arpeggio project (https://github.com/textX/Arpeggio) +# I don't directly use the project, but it helped me figure out +# what to do. +# Dejanović I., Milosavljević G., Vaderna R.: +# Arpeggio: A flexible PEG parser for Python, +# Knowledge-Based Systems, 2016, 95, 71 - 74, doi:10.1016/j.knosys.2015.12.004 +##################################################################################################### +from dataclasses import field, dataclass +from collections import defaultdict +from core.builtin_concepts import BuiltinConcepts +from core.concept import Concept +from core.tokenizer import TokenKind, Tokenizer, Token +from parsers.BaseParser import BaseParser, Node, ErrorNode +import core.utils +import logging + +log = logging.getLogger(__name__) + + +def flatten(iterable): + if iterable is None: + return [] + + result = [] + for e in iterable: + if e.parsing_expression.rule_name is not None and e.parsing_expression.rule_name != "": + if hasattr(e, "children"): + e.children = flatten(e.children) + result.append(e) + elif hasattr(e, "children"): + result.extend(flatten(e.children)) + else: + result.append(e) + return result + + +@dataclass() +class LexerNode(Node): + start: int + end: int + + +class ConceptNode(LexerNode): + def __init__(self, concept, start, end, tokens=None, source=None, children=None): + super().__init__(start, end) + self.concept = concept + self.tokens = tokens + self.source = source + self.children = children + + if self.source is None: + self.source = BaseParser.get_text_from_tokens(self.tokens) + + def __eq__(self, other): + if not super().__eq__(other): + return False + + if not isinstance(other, ConceptNode): + return False + + return self.concept == other.concept and \ + self.source == other.source + + def __hash__(self): + return hash((self.concept, self.start, self.end, self.source)) + + +class NonTerminalNode(LexerNode): + def __init__(self, parsing_expression, start, end, children=None): + super().__init__(start, end) + self.parsing_expression = parsing_expression + self.children = children + + def __repr__(self): + name = self.parsing_expression.rule_name or self.parsing_expression.__class__.__name__ + if len(self.children) > 0: + sub_names = "(" + ",".join([repr(child) for child in self.children]) + ")" + else: + sub_names = "" + return name + sub_names + + +class TerminalNode(LexerNode): + def __init__(self, parsing_expression, start, end, value): + super().__init__(start, end) + self.parsing_expression = parsing_expression + self.value = value + + def __repr__(self): + name = self.parsing_expression.rule_name or "" + return name + f"'{self.value}'" + + +@dataclass() +class GrammarErrorNode(ErrorNode): + message: str + + +class ParsingExpression: + def __init__(self, *args, **kwargs): + self.elements = args + + nodes = kwargs.get('nodes', []) + if not hasattr(nodes, '__iter__'): + nodes = [nodes] + self.nodes = nodes + + self.rule_name = kwargs.get('rule_name', '') + + def parse(self, parser): + return self._parse(parser) + + +class Sequence(ParsingExpression): + """ + Will match sequence of parser expressions in exact order they are defined. + """ + + def _parse(self, parser): + init_pos = parser.pos + end_pos = parser.pos + + children = [] + for e in self.nodes: + node = e.parse(parser) + if node is None: + return None + else: + if node.end != -1: # because Optional returns -1 when no match + children.append(node) + end_pos = node.end + + return NonTerminalNode(self, init_pos, end_pos, children) + + +class OrderedChoice(ParsingExpression): + """ + Will match one among multiple + It will stop at the first match (so the order of definition is important) + """ + + def _parse(self, parser): + init_pos = parser.pos + + for e in self.nodes: + node = e.parse(parser) + if node: + return NonTerminalNode(self, init_pos, node.end, [node]) + + parser.seek(init_pos) # backtrack + + return None + + +class Optional(ParsingExpression): + """ + Will match or not the elements + if many matches, will choose longest one + If you need order, use Optional(OrderedChoice) + """ + + def _parse(self, parser): + init_pos = parser.pos + selected_node = NonTerminalNode(self, parser.pos, -1, []) + + for e in self.nodes: + node = e.parse(parser) + if node: + if node.end > selected_node.end: + selected_node = node + + parser.seek(init_pos) # backtrack + + if selected_node.end != -1: + parser.seek(selected_node.end) + parser.next_token() # eat the tokens found + + return selected_node + + +class Match(ParsingExpression): + """ + Base class for all classes that will try to match something from the input. + """ + + def __init__(self, rule_name, root=False): + super(Match, self).__init__(rule_name=rule_name, root=root) + + def parse(self, parser): + result = self._parse(parser) + return result + + +class StrMatch(Match): + """ + Matches a literal + """ + + def __init__(self, to_match, rule_name="", root=False, ignore_case=None): + super(Match, self).__init__(rule_name=rule_name, root=root) + self.to_match = to_match + self.ignore_case = ignore_case + + def __repr__(self): + return f"StrMatch('{self.to_match}')" + + def _parse(self, parser): + token = parser.get_token() + m = str(token.value).lower() == self.to_match.lower() if self.ignore_case \ + else token.value == self.to_match + + if m: + node = TerminalNode(self, parser.pos, parser.pos, token.value) + parser.next_token() + return node + + return None + + +class CrossRef: + """ + During the creation of the model, + Creates reference to a concept, as it may not be resolved yet + """ + + def __init__(self, concept): + self.concept = concept + + +class ConceptLexerParser(BaseParser): + def __init__(self): + super().__init__("ConceptLexer") + self.concepts_dict = {} + self.ignore_case = True + + self.token = None + self.pos = -1 + self.tokens = None + + self.context = None + self.text = None + self.sheerka = None + + def add_error(self, error, next_token=True): + self.has_error = True + self.error_sink.append(error) + if next_token: + self.next_token() + return error + + def reset_parser(self, context, text): + self.context = context + self.sheerka = context.sheerka + self.text = text + + if isinstance(text, str): + self.tokens = list(Tokenizer(text)) + else: + self.tokens = list(text) + self.tokens.append(Token(TokenKind.EOF, "", -1, -1, -1)) # make sure to finish with end of file token + + self.token = None + self.pos = -1 + self.next_token() + + def get_token(self) -> Token: + return self.token + + def next_token(self, skip_whitespace=True): + if self.token and self.token.type == TokenKind.EOF: + return False + + self.pos += 1 + self.token = self.tokens[self.pos] + + if skip_whitespace: + while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE: + self.pos += 1 + self.token = self.tokens[self.pos] + + return self.token.type != TokenKind.EOF + + def seek(self, pos): + self.pos = pos + self.token = self.tokens[self.pos] + return True + + def rewind(self, offset, skip_whitespace=True): + self.pos += offset + self.token = self.tokens[self.pos] + + if skip_whitespace: + while self.pos > 0 and (self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE): + self.pos -= 1 + self.token = self.tokens[self.pos] + + def initialize(self, dict): + """ + Adds a bunch of concepts, and how they can be recognized + :param dict: dictionary of concept; concept_definition + :return: + """ + + nodes_to_resolve = [] + concepts_to_resolve = set() + + # ## Gets the grammars + for concept, concept_def in dict.items(): + concept.init_key() # make sure that the key is initialized + grammar = self.get_model(concept, concept_def, nodes_to_resolve, concepts_to_resolve) + self.concepts_dict[concept] = grammar + + # ## Removes concepts with infinite recursions + concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve) + for concept in concepts_to_remove: + concepts_to_resolve.remove(concept) + del self.concepts_dict[concept] + + # ## Resolves cross references and remove grammar with unresolved references + self.resolve_cross_references(concepts_to_resolve, nodes_to_resolve) + + def get_model(self, concept, concept_def, nodes_to_resolve, concepts_to_resolve): + def inner_get_model(expression): + if isinstance(expression, Concept): + ret = CrossRef(expression) + concepts_to_resolve.add(concept) + nodes_to_resolve.append(ret) + elif isinstance(expression, str): + ret = StrMatch(expression, ignore_case=self.ignore_case) + elif isinstance(expression, StrMatch): + ret = expression + if ret.ignore_case is None: + ret.ignore_case = self.ignore_case + elif isinstance(expression, Sequence) or \ + isinstance(expression, OrderedChoice) or \ + isinstance(expression, Optional): + ret = expression + ret.nodes.extend([inner_get_model(e) for e in ret.elements]) + if any((isinstance(x, CrossRef) for x in ret.nodes)): + concepts_to_resolve.add(concept) + nodes_to_resolve.append(ret) + else: + ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'.")) + return ret + + model = inner_get_model(concept_def) + if isinstance(model, CrossRef): + concepts_to_resolve.add(concept) + + model.rule_name = concept.key + return model + + def detect_infinite_recursion(self, concepts_to_resolve): + + # infinite recursion matcher + def _is_infinite_recursion(ref_concept, node): + if isinstance(node, CrossRef): + if node.concept == ref_concept: + return True + return _is_infinite_recursion(ref_concept, self.concepts_dict[node.concept]) + + if isinstance(node, OrderedChoice): + return _is_infinite_recursion(ref_concept, node.nodes[0]) + + if isinstance(node, Sequence): + for node in node.nodes: + if _is_infinite_recursion(ref_concept, node): + return True + return False + + return False + + removed_concepts = [] + for e in concepts_to_resolve: + to_resolve = self.concepts_dict[e] + if _is_infinite_recursion(e, to_resolve): + removed_concepts.append(e) + return removed_concepts + + # Cross-ref resolving + def resolve_cross_references(self, concepts_to_resolve, nodes_to_resolve): + + repeat = True + while repeat: + repeat = False + for e in concepts_to_resolve: + to_resolve = self.concepts_dict[e] + if isinstance(to_resolve, CrossRef): + repeat = True + self.concepts_dict[e] = self.concepts_dict[to_resolve.concept] + + for e in nodes_to_resolve: + if not isinstance(e, ParsingExpression): + continue # cases when a concept directly references another concept + + for i, node in enumerate(e.nodes): + if isinstance(node, CrossRef): + if node.concept in self.concepts_dict: + e.nodes[i] = self.concepts_dict[node.concept] + + def parse(self, context, text): + if text == "": + return context.sheerka.ret( + self.name, + False, + context.sheerka.new(BuiltinConcepts.IS_EMPTY) + ) + + self.reset_parser(context, text) + + concepts_found = [[]] + # actually list of list + # The first dimension is the number of possibilities found + # The second dimension is the number of concepts found, under one possibility + # + # Example 1 + # concept foo : 'one' 'two' + # concept bar : 'one' 'two' + # input 'one two' -> will produce two possibilities (foo and bar). + # + # Example 2 + # concept foo : 'one' + # concept bar : 'two' + # input 'one two' -> will produce one possibility which is (foo, bar) (foo then bar) + + while True: + init_pos = self.pos + res = [] + for concept, grammar in self.concepts_dict.items(): + self.seek(init_pos) + node = grammar.parse(self) + if node is not None: + concept_node = ConceptNode(concept, node.start, node.end, self.tokens[node.start: node.end + 1]) + if hasattr(node, "children"): + concept_node.children = node.children + res.append(concept_node) + + if len(res) == 0: # not recognized + self.seek(init_pos) + not_recognized = self.get_text_from_tokens(self.get_token()) + self.add_error(self.sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=not_recognized)) + break + + res = self.get_bests(res) # only keep the concept that eat the more tokens + for r in res: + r.children = flatten(r.children) + concepts_found = core.utils.product(concepts_found, res) + + # loop + self.seek(res[0].end) + if not self.next_token(): + break + + # manage when nothing is recognized (or other error) + if self.has_error: + return self.sheerka.ret( + self.name, + False, + self.sheerka.new( + BuiltinConcepts.PARSER_RESULT, + parser=self, + source=text, + body=self.error_sink, + try_parsed=concepts_found[0] if len(concepts_found) == 1 else concepts_found)) + + # else + # returns as many ReturnValue than choices found + ret = [] + for choice in concepts_found: + ret.append( + self.sheerka.ret( + self.name, + True, + self.sheerka.new( + BuiltinConcepts.PARSER_RESULT, + parser=self, + source=text, + body=choice, + try_parsed=choice))) + + return ret[0] if len(ret) == 1 else ret + + @staticmethod + def get_bests(results): + """ + Returns the result that is the longest + :param results: + :return: + """ + by_end_pos = defaultdict(list) + for result in results: + by_end_pos[result.end].append(result) + + return by_end_pos[max(by_end_pos)] diff --git a/sdp/sheerkaDataProvider.py b/sdp/sheerkaDataProvider.py index 3e51e05..2ebd9fa 100644 --- a/sdp/sheerkaDataProvider.py +++ b/sdp/sheerkaDataProvider.py @@ -129,7 +129,7 @@ class State: if digest is None: return - if not isinstance(items, list): + if not hasattr(items, "__iter__"): items = [items] for item in items: @@ -575,9 +575,9 @@ class SheerkaDataProvider: def exists(self, entry, key=None, digest=None): """ Returns true if the entry is defined - :param digest: :param key: :param entry: + :param digest: digest of the object, when several entries share the same key :return: """ snapshot = self.get_snapshot() diff --git a/tests/test_ConceptEvaluator.py b/tests/test_ConceptEvaluator.py index ed32a7c..af273d6 100644 --- a/tests/test_ConceptEvaluator.py +++ b/tests/test_ConceptEvaluator.py @@ -8,7 +8,7 @@ from parsers.BaseParser import BaseParser def get_context(): - sheerka = Sheerka() + sheerka = Sheerka(skip_builtins_in_db=True) sheerka.initialize("mem://") return ExecutionContext("test", "xxx", sheerka) diff --git a/tests/test_ConceptLexerParser.py b/tests/test_ConceptLexerParser.py new file mode 100644 index 0000000..bb808c3 --- /dev/null +++ b/tests/test_ConceptLexerParser.py @@ -0,0 +1,560 @@ +import pytest +from core.builtin_concepts import BuiltinConcepts +from core.concept import Concept +from core.sheerka import Sheerka, ExecutionContext +from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \ + CrossRef + + +@pytest.mark.parametrize("match, text", [ + ("foo", "foo"), + ("'foo'", "'foo'"), + ("1", "1"), + ("3.14", "3.14"), + ("+", "+"), + (StrMatch("foo"), "foo"), + (StrMatch("'foo'"), "'foo'"), + (StrMatch("1"), "1"), + (StrMatch("3.14"), "3.14"), + (StrMatch("+"), "+"), +]) +def test_i_can_match_simple_tokens(match, text): + context = get_context() + foo = Concept(name="foo") + concepts = {foo: text} + parser = ConceptLexerParser() + parser.initialize(concepts) + + res = parser.parse(context, text) + + assert res.status + assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) + assert res.value.value == [ConceptNode(foo, 0, 0, source=text)] + + +def test_i_can_match_multiple_concepts_in_one_input(): + context = get_context() + one = Concept(name="one") + two = Concept(name="two") + concepts = {one: "one", two: "two"} + parser = ConceptLexerParser() + parser.initialize(concepts) + + res = parser.parse(context, "one two one") + + assert res.status + assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) + assert res.value.value == [ + ConceptNode(one, 0, 0, source="one"), + ConceptNode(two, 2, 2, source="two"), + ConceptNode(one, 4, 4, source="one"), + ] + + +def test_i_cannot_match_an_unknown_input(): + context = get_context() + parser = ConceptLexerParser() # no grammar registered + + res = parser.parse(context, "foo") + + assert not res.status + assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) + assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT) + assert res.value.body[0].body == "foo" + + +def test_i_cannot_match_when_part_of_the_input_is_unknown(): + context = get_context() + one = Concept(name="one") + two = Concept(name="two") + concepts = {one: "one", two: "two"} + parser = ConceptLexerParser() + parser.initialize(concepts) + + res = parser.parse(context, "one two three") + assert not res.status + assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) + assert res.value.try_parsed == [ + ConceptNode(one, 0, 0, source="one"), + ConceptNode(two, 2, 2, source="two")] # these two were recognized + assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT) + assert res.value.body[0].body == "three" + + +def test_i_can_match_sequence(): + context = get_context() + foo = Concept(name="foo") + concepts = {foo: Sequence("one", "two", "three")} + parser = ConceptLexerParser() + parser.initialize(concepts) + + res = parser.parse(context, "one two three") + + assert res.status + assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) + assert res.value.value == [ConceptNode(foo, 0, 4, source="one two three")] + + +def test_wrong_sequence_is_not_matched(): + context = get_context() + foo = Concept(name="foo") + concepts = {foo: Sequence("one", "two", "three")} + parser = ConceptLexerParser() + parser.initialize(concepts) + + res = parser.parse(context, "one two three one") + + assert not res.status + assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) + assert res.value.try_parsed == [ConceptNode(foo, 0, 4, source="one two three")] + assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT) + assert res.value.body[0].body == "one" + + +def test_i_cannot_match_sequence_if_end_of_file(): + context = get_context() + foo = Concept(name="foo") + concepts = {foo: Sequence("one", "two", "three")} + parser = ConceptLexerParser() + parser.initialize(concepts) + + res = parser.parse(context, "one two") + assert not res.status + assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) + assert res.value.try_parsed == [] + assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT) + assert res.value.body[0].body == "one" + + +def test_i_always_choose_the_longest_match(): + context = get_context() + foo = Concept(name="foo") + bar = Concept(name="bar") + concepts = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")} + + parser = ConceptLexerParser() + parser.initialize(concepts) + + res = parser.parse(context, "one two three") + + assert res.status + assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) + assert res.value.value == [ConceptNode(foo, 0, 4, source="one two three")] + + +def test_i_can_match_several_sequences(): + context = get_context() + foo = Concept(name="foo") + bar = Concept(name="bar") + concepts = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")} + + parser = ConceptLexerParser() + parser.initialize(concepts) + + res = parser.parse(context, "one two three one two") + + assert res.status + assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) + assert res.value.value == [ + ConceptNode(foo, 0, 4, source="one two three"), + ConceptNode(bar, 6, 8, source="one two"), + ] + + +def test_i_can_match_ordered_choice(): + context = get_context() + foo = Concept(name="foo") + concepts = {foo: OrderedChoice("one", "two")} + parser = ConceptLexerParser() + parser.initialize(concepts) + + res1 = parser.parse(context, "one") + assert res1.status + assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT) + assert res1.value.body == [ConceptNode(foo, 0, 0, source="one")] + + res2 = parser.parse(context, "two") + assert res2.status + assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT) + assert res2.value.body == [ConceptNode(foo, 0, 0, source="two")] + + res3 = parser.parse(context, "three") + assert not res3.status + assert context.sheerka.isinstance(res3.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT) + assert res3.value.body[0].body == "three" + + +def test_i_cannot_match_ordered_choice_with_empty_alternative(): + context = get_context() + foo = Concept(name="foo") + concepts = {foo: Sequence(OrderedChoice("one", ""), "two")} + parser = ConceptLexerParser() + parser.initialize(concepts) + + res = parser.parse(context, "ok") # because token[0] is not "one" and not "" (it is 'two') + assert not res.status + + +def test_i_can_mix_sequences_and_ordered_choices(): + context = get_context() + foo = Concept(name="foo") + + concepts = {foo: Sequence(OrderedChoice("twenty", "thirty"), "one", "ok")} + parser = ConceptLexerParser() + parser.initialize(concepts) + + res1 = parser.parse(context, "twenty one ok") + assert res1.status + assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT) + assert res1.value.body == [ConceptNode(foo, 0, 4, source="twenty one ok")] + + res2 = parser.parse(context, "thirty one ok") + assert res2.status + assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT) + assert res2.value.body == [ConceptNode(foo, 0, 4, source="thirty one ok")] + + res3 = parser.parse(context, "twenty one") + assert not res3.status + assert res3.value.body[0].body == "twenty" + assert res3.value.try_parsed == [] + + +def test_i_can_mix_ordered_choices_and_sequences(): + context = get_context() + foo = Concept(name="foo") + + concepts = {foo: OrderedChoice(Sequence("twenty", "thirty"), "one")} + parser = ConceptLexerParser() + parser.initialize(concepts) + + res = parser.parse(context, "twenty thirty") + assert res.status + + res = parser.parse(context, "one") + assert res.status + + +def test_i_cannot_parse_empty_optional(): + context = get_context() + foo = Concept(name="foo") + + concepts = {foo: Optional("one")} + parser = ConceptLexerParser() + parser.initialize(concepts) + + res = parser.parse(context, "") + assert not res.status + assert context.sheerka.isinstance(res.value, BuiltinConcepts.IS_EMPTY) + + +def test_i_can_parse_optional(): + context = get_context() + foo = Concept(name="foo") + + concepts = {foo: Optional("one")} + parser = ConceptLexerParser() + parser.initialize(concepts) + + res = parser.parse(context, "one") + assert res.status + assert res.value.value == [ConceptNode(foo, 0, 0, source="one")] + + +def test_i_can_parse_sequence_starting_with_optional(): + context = get_context() + foo = Concept(name="foo") + + concepts = {foo: Sequence(Optional("twenty"), "one")} + parser = ConceptLexerParser() + parser.initialize(concepts) + + res = parser.parse(context, "twenty one") + assert res.status + assert res.value.body == [ConceptNode(foo, 0, 2, source="twenty one")] + + res = parser.parse(context, "one") + assert res.status + assert res.value.body == [ConceptNode(foo, 0, 0, source="one")] + + +def test_i_can_parse_sequence_ending_with_optional(): + context = get_context() + foo = Concept(name="foo") + + concepts = {foo: Sequence("one", "two", Optional("three"))} + parser = ConceptLexerParser() + parser.initialize(concepts) + + res = parser.parse(context, "one two three") + assert res.status + assert res.value.body == [ConceptNode(foo, 0, 4, source="one two three")] + + res = parser.parse(context, "one two") + assert res.status + assert res.value.body == [ConceptNode(foo, 0, 2, source="one two")] + + +def test_i_can_parse_sequence_with_optional_in_between(): + context = get_context() + foo = Concept(name="foo") + + concepts = {foo: Sequence("one", Optional("two"), "three")} + parser = ConceptLexerParser() + parser.initialize(concepts) + + res = parser.parse(context, "one two three") + assert res.status + assert res.value.body == [ConceptNode(foo, 0, 4, source="one two three")] + + res = parser.parse(context, "one three") + assert res.status + assert res.value.body == [ConceptNode(foo, 0, 2, source="one three")] + + +def test_i_can_use_reference(): + # The problem here is when there are multiple match for the same input + # The parsing result is a list of all concepts found + # So it's already a list that represents a sequence, not a choice + # So I need to create a choice concept + # create the return value for every possible graph + # --> The latter seems to be the best as we don't defer the resolution of the problem to someone else + context = get_context() + foo = Concept(name="foo") + bar = Concept(name="bar") + + concepts = {foo: Sequence("one", "two"), bar: foo} + parser = ConceptLexerParser() + parser.initialize(concepts) + + res = parser.parse(context, "one two") + assert len(res) == 2 + + assert res[0].status + assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) + assert res[0].value.body == [ConceptNode(foo, 0, 2, source="one two")] + + assert res[1].status + assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) + assert res[1].value.body == [ConceptNode(bar, 0, 2, source="one two")] + + +def test_i_can_use_context_reference_with_multiple_levels(): + """ + Same than previous one, but with reference of reference + :return: + """ + context = get_context() + foo = Concept(name="foo") + bar = Concept(name="bar") + baz = Concept(name="baz") + + concepts = {foo: Sequence("one", "two"), bar: foo, baz: bar} + parser = ConceptLexerParser() + parser.initialize(concepts) + + res = parser.parse(context, "one two") + assert len(res) == 3 + + assert res[0].status + assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) + assert res[0].value.body == [ConceptNode(foo, 0, 2, source="one two")] + + assert res[1].status + assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) + assert res[1].value.body == [ConceptNode(bar, 0, 2, source="one two")] + + assert res[2].status + assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT) + assert res[2].value.body == [ConceptNode(baz, 0, 2, source="one two")] + + +def test_order_is_not_important_when_using_references(): + context = get_context() + foo = Concept(name="foo") + bar = Concept(name="bar") + + concepts = {bar: foo, foo: Sequence("one", "two")} + parser = ConceptLexerParser() + parser.initialize(concepts) + + res = parser.parse(context, "one two") + assert len(res) == 2 + assert res[0].value.body == [ConceptNode(bar, 0, 2, source="one two")] + assert res[1].value.body == [ConceptNode(foo, 0, 2, source="one two")] + + +def test_i_can_parse_when_reference(): + context = get_context() + foo = Concept(name="foo") + bar = Concept(name="bar") + + concepts = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")} + parser = ConceptLexerParser() + parser.initialize(concepts) + + res = parser.parse(context, "twenty two") + assert res.status + assert res.value.body == [ConceptNode(bar, 0, 2, source="twenty two")] + + res = parser.parse(context, "thirty one") + assert res.status + assert res.value.body == [ConceptNode(bar, 0, 2, source="thirty one")] + + res = parser.parse(context, "twenty") + assert res.status + assert res.value.body == [ConceptNode(foo, 0, 0, source="twenty")] + + +def test_i_can_detect_duplicates_when_reference(): + context = get_context() + foo = Concept(name="foo") + bar = Concept(name="bar") + + concepts = { + bar: Sequence(foo, Optional(OrderedChoice("one", "two"))), + foo: OrderedChoice("twenty", "thirty") + } + parser = ConceptLexerParser() + parser.initialize(concepts) + + res = parser.parse(context, "twenty") + assert len(res) == 2 + assert res[0].status + assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) + assert res[0].value.body == [ConceptNode(bar, 0, 0, source="twenty")] + + assert res[1].status + assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) + assert res[1].value.body == [ConceptNode(foo, 0, 0, source="twenty")] + + +def test_i_can_detect_infinite_recursion(): + foo = Concept(name="foo") + bar = Concept(name="bar") + + concepts = { + bar: foo, + foo: bar + } + parser = ConceptLexerParser() + parser.initialize(concepts) + + assert bar not in parser.concepts_dict + assert foo not in parser.concepts_dict + + +def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice(): + foo = Concept(name="foo") + bar = Concept(name="bar") + + concepts = { + bar: foo, + foo: OrderedChoice(bar, "foo") + } + parser = ConceptLexerParser() + parser.initialize(concepts) + + assert foo not in parser.concepts_dict # removed because of the infinite recursion + assert bar not in parser.concepts_dict # removed because of the infinite recursion + + # the other way around is possible + context = get_context() + concepts = { + bar: foo, + foo: OrderedChoice("foo", bar) + } + parser = ConceptLexerParser() + parser.initialize(concepts) + assert foo in parser.concepts_dict + assert bar in parser.concepts_dict + + res = parser.parse(context, "foo") + assert len(res) == 2 + assert res[0].status + assert res[0].value.body == [ConceptNode(bar, 0, 0, source="foo")] + assert res[1].status + assert res[1].value.body == [ConceptNode(foo, 0, 0, source="foo")] + + +def test_i_can_detect_indirect_infinite_recursion_with_sequence(): + foo = Concept(name="foo") + bar = Concept(name="bar") + + concepts = { + bar: foo, + foo: Sequence("one", bar, "two") + } + parser = ConceptLexerParser() + parser.initialize(concepts) + + assert foo not in parser.concepts_dict # removed because of the infinite recursion + assert bar not in parser.concepts_dict # removed because of the infinite recursion + + +def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choice(): + foo = Concept(name="foo") + bar = Concept(name="bar") + + concepts = { + bar: foo, + foo: Sequence("one", OrderedChoice(bar, "other"), "two") + } + parser = ConceptLexerParser() + parser.initialize(concepts) + + assert foo not in parser.concepts_dict # removed because of the infinite recursion + assert bar not in parser.concepts_dict # removed because of the infinite recursion + + +def test_i_can_detect_indirect_infinite_recursion_with_optional(): + # TODO infinite recursion with optional + pass + +# +# def test_i_can_parse_basic_arithmetic_operations_and_resolve_properties(): +# context = get_context() +# add = Concept(name="add") +# mult = Concept(name="mult") +# atom = Concept(name="atom") +# +# concepts = { +# add: Sequence(mult, Optional(Sequence(OrderedChoice('+', '-', rule_name="sign"), add))), +# mult: Sequence(atom, Optional(Sequence(OrderedChoice('*', '/'), mult))), +# atom: OrderedChoice(OrderedChoice('1', '2', '3'), Sequence('(', add, ')')), +# } +# +# parser = ConceptLexerParser() +# parser.register(concepts) +# +# # res = parser.parse(context, "1") +# # assert len(res) == 3 # add, mult, atom +# # +# # res = parser.parse(context, "1 * 2") +# # assert len(res) == 2 # add and mult +# # +# # res = parser.parse(context, "1 + 2") +# # assert res.status +# # assert res.value.value == [ConceptNode(add, 0, 4, source="1 + 2")] +# +# res = parser.parse(context, "1 * 2 + 3") +# assert res.status +# assert res.value.value == [ConceptNode(add, 0, 4, source="1 + 2 + 3")] + + +def test_i_can_register_concepts_with_the_same_name(): + # TODO : concepts are registered by name, + # what when two concepts have the same name ? + pass + + +def test_i_can_parse_very_very_long_input(): + # TODO: In the current implementation, all the tokens are loaded in memory + # It's clearly not the good approach + pass + + +def get_context(): + sheerka = Sheerka(skip_builtins_in_db=True) + sheerka.initialize("mem://") + + return ExecutionContext("sheerka", "xxxx", sheerka) diff --git a/tests/test_DefaultParser.py b/tests/test_DefaultParser.py index 860fbe8..ac5b22d 100644 --- a/tests/test_DefaultParser.py +++ b/tests/test_DefaultParser.py @@ -67,7 +67,7 @@ def get_concept(name, where=None, pre=None, post=None, body=None): def get_context(): - sheerka = Sheerka() + sheerka = Sheerka(skip_builtins_in_db=True) sheerka.initialize("mem://") return ExecutionContext("test", "xxx", sheerka) diff --git a/tests/test_ExactConceptParser.py b/tests/test_ExactConceptParser.py index 3dd875b..bff0f69 100644 --- a/tests/test_ExactConceptParser.py +++ b/tests/test_ExactConceptParser.py @@ -125,7 +125,7 @@ def test_i_can_detect_concept_from_tokens(): def get_context(): - sheerka = Sheerka() + sheerka = Sheerka(skip_builtins_in_db=True) sheerka.initialize("mem://") return ExecutionContext("sheerka", "xxxx", sheerka) diff --git a/tests/test_MultipleSameSuccessEvaluator.py b/tests/test_MultipleSameSuccessEvaluator.py index 006bfca..34d449f 100644 --- a/tests/test_MultipleSameSuccessEvaluator.py +++ b/tests/test_MultipleSameSuccessEvaluator.py @@ -7,7 +7,7 @@ from parsers.BaseParser import BaseParser def get_context(): - sheerka = Sheerka() + sheerka = Sheerka(skip_builtins_in_db=True) sheerka.initialize("mem://") return ExecutionContext("test", "xxx", sheerka) diff --git a/tests/test_PyhtonEvaluator.py b/tests/test_PyhtonEvaluator.py index e919af6..81bb3ce 100644 --- a/tests/test_PyhtonEvaluator.py +++ b/tests/test_PyhtonEvaluator.py @@ -8,7 +8,7 @@ from parsers.PythonParser import PythonNode, PythonParser def get_context(): - sheerka = Sheerka() + sheerka = Sheerka(skip_builtins_in_db=True) sheerka.initialize("mem://") return ExecutionContext("test", "xxx", sheerka) diff --git a/tests/test_PythonParser.py b/tests/test_PythonParser.py index e6b0850..ed48618 100644 --- a/tests/test_PythonParser.py +++ b/tests/test_PythonParser.py @@ -9,7 +9,7 @@ from parsers.PythonParser import PythonNode, PythonParser, PythonErrorNode def get_context(): - sheerka = Sheerka() + sheerka = Sheerka(skip_builtins_in_db=True) sheerka.initialize("mem://") return ExecutionContext("test", "xxx", sheerka) diff --git a/tests/test_ast.py b/tests/test_ast.py index 5e7afaf..ef6ff57 100644 --- a/tests/test_ast.py +++ b/tests/test_ast.py @@ -8,7 +8,7 @@ from core.sheerka import Sheerka def get_sheerka(): - sheerka = Sheerka() + sheerka = Sheerka(skip_builtins_in_db=True) sheerka.initialize("mem://") return sheerka @@ -34,7 +34,7 @@ def my_function(a,b): return a """ tree = ast.parse(source) - tree_as_concept = core.ast.nodes.transform(tree) + tree_as_concept = core.ast.nodes.python_to_concept(tree) sheerka = get_sheerka() assert tree_as_concept.node_type == "Module" @@ -87,7 +87,7 @@ def my_function(a,b): """ node = ast.parse(source) - concept_node = core.ast.nodes.transform(node) + concept_node = core.ast.nodes.python_to_concept(node) visitor = TestNameVisitor() visitor.visit(concept_node) @@ -115,7 +115,7 @@ my_function(x,y) sheerka = get_sheerka() node = ast.parse(source) - concept_node = core.ast.nodes.transform(node) + concept_node = core.ast.nodes.python_to_concept(node) visitor = UnreferencedNamesVisitor(sheerka) visitor.visit(concept_node) @@ -129,3 +129,28 @@ my_function(x,y) def test_i_can_compare_NodeParent_with_tuple(): node_parent = NodeParent(GenericNodeConcept("For", None), "target") assert node_parent == ("For", "target") + + +def test_i_can_transform_back(): + source = """ +def my_function(a,b): + for i in range(b): + a = a + b + return a + + +my_function(x, y) + """ + + node = ast.parse(source) + concept_node = core.ast.nodes.python_to_concept(node) + + transformed_back = core.ast.nodes.concept_to_python(concept_node) + assert dump_ast(transformed_back) == dump_ast(node) + + +def dump_ast(node): + dump = ast.dump(node) + for to_remove in [", ctx=Load()", ", kind=None", ", type_ignores=[]"]: + dump = dump.replace(to_remove, "") + return dump diff --git a/tests/test_builtin_helpers.py b/tests/test_builtin_helpers.py index 10856b0..d8a7082 100644 --- a/tests/test_builtin_helpers.py +++ b/tests/test_builtin_helpers.py @@ -1,3 +1,7 @@ +import ast + +import pytest + from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts from core.sheerka import Sheerka, ExecutionContext import core.builtin_helpers @@ -110,8 +114,47 @@ def test_i_can_use_expect_one_when_not_a_list_false(): assert res == item +@pytest.mark.parametrize("expression, vars_to_include, vars_to_exclude, expected_expr", [ + ("a == 1", [], [], []), + ("a == 1", ["a"], [], ["a == 1"]), + ("a == 1", [], ["a"], []), + ("predicate(a)", [], [], []), + ("predicate(a)", ["a"], [], ["predicate(a)"]), + ("predicate(a, b)", ["a"], [], ["predicate(a, b)"]), + ("predicate(a, b)", ["b"], [], ["predicate(a, b)"]), + ("predicate(a, b)", ["a", "b"], [], ["predicate(a, b)"]), + ("predicate(a, b)", ["a"], ["b"], []), + ("a + b == 1", [], [], []), + ("a + b == 1", ["a"], [], ["a + b == 1"]), + ("a + b == 1", ["a"], ["b"], []), + ("a + b == 1", ["b"], [], ["a + b == 1"]), + ("a + b == 1", ["a", "b"], [], ["a + b == 1"]), + ("a == 1 and b == 2", [], [], []), + ("a == 1 and b == 2", ["a"], [], ["a == 1"]), + ("a == 1 and b == 2", ["b"], [], ["b == 2"]), + ("a == 1 and b == 2", ["a"], ["b"], ["a == 1"]), + ("a == 1 and b == 2", ["a", "b"], [], ["a == 1 and b == 2"]), + ("predicate(a,c) and predicate(b,c)", ["a", "b"], [], ["predicate(a,c) and predicate(b,c)"]), + ("not(a == 1)", [], [], []), + ("not(a == 1)", ["a"], [], ["not(a==1)"]), + ("a == 1 or b == 2", [], [], []), + ("a == 1 or b == 2", ["a"], [], ["a == 1"]), + ("a == 1 or b == 2", ["b"], [], ["b == 2"]), + ("a == 1 or b == 2", ["a", "b"], [], ["a == 1 or b == 2"]), + ("predicate(a,c) or predicate(b,c)", ["a", "b"], [], ["predicate(a,c) or predicate(b,c)"]), +]) +def test_i_can_extract_predicates(expression, vars_to_include, vars_to_exclude, expected_expr): + sheerka = get_sheerka() + expected = [ast.parse(expr, mode="eval") for expr in expected_expr] + + actual = core.builtin_helpers.extract_predicates(sheerka, expression, vars_to_include, vars_to_exclude) + assert len(actual) == len(expected) + for i in range(len(actual)): + assert dump_ast(actual[i]) == dump_ast(expected[i]) + + def get_sheerka(): - sheerka = Sheerka() + sheerka = Sheerka(skip_builtins_in_db=True) sheerka.initialize("mem://") return sheerka @@ -119,3 +162,10 @@ def get_sheerka(): def get_context(sheerka): return ExecutionContext("test", "xxx", sheerka) + + +def dump_ast(node): + dump = ast.dump(node) + for to_remove in [", ctx=Load()", ", kind=None", ", type_ignores=[]"]: + dump = dump.replace(to_remove, "") + return dump diff --git a/tests/test_sheerka.py b/tests/test_sheerka.py index 4993e76..d72a66c 100644 --- a/tests/test_sheerka.py +++ b/tests/test_sheerka.py @@ -50,7 +50,7 @@ def test_i_can_list_builtin_concepts(): def test_builtin_concepts_are_initialized(): - sheerka = get_sheerka() + sheerka = get_sheerka(skip_builtins_in_db=False) assert len(sheerka.concepts_cache) == len(BuiltinConcepts) for concept_name in BuiltinConcepts: assert str(concept_name) in sheerka.concepts_cache @@ -61,7 +61,7 @@ def test_builtin_concepts_are_initialized(): def test_builtin_concepts_can_be_updated(): - sheerka = get_sheerka(root_folder) + sheerka = get_sheerka(root_folder, skip_builtins_in_db=False) loaded_sheerka = sheerka.get(BuiltinConcepts.SHEERKA) loaded_sheerka.desc = "I have a description" sheerka.sdp.modify("Test", sheerka.CONCEPTS_ENTRY, loaded_sheerka.key, loaded_sheerka) @@ -89,7 +89,8 @@ def test_i_can_add_a_concept(): assert concept_found.id == "1001" assert concept.key in sheerka.concepts_cache - assert sheerka.sdp.io.exists(sheerka.sdp.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, concept_found.get_digest())) + assert sheerka.sdp.io.exists( + sheerka.sdp.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, concept_found.get_digest())) def test_i_cannot_add_the_same_concept_twice(): @@ -414,7 +415,8 @@ as: assert getattr(concept_saved, prop) == getattr(expected, prop) assert concept_saved.key in sheerka.concepts_cache - assert sheerka.sdp.io.exists(sheerka.sdp.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, concept_saved.get_digest())) + assert sheerka.sdp.io.exists( + sheerka.sdp.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, concept_saved.get_digest())) def test_i_can_eval_def_concept_part_when_one_part_is_a_ref_of_another_concept(): @@ -443,7 +445,8 @@ def test_i_can_eval_def_concept_part_when_one_part_is_a_ref_of_another_concept() assert getattr(concept_saved, prop) == getattr(expected, prop) assert concept_saved.key in sheerka.concepts_cache - assert sheerka.sdp.io.exists(sheerka.sdp.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, concept_saved.get_digest())) + assert sheerka.sdp.io.exists( + sheerka.sdp.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, concept_saved.get_digest())) def test_i_cannot_eval_the_same_def_concept_twice(): @@ -551,12 +554,24 @@ def test_i_can_manage_concepts_with_the_same_key_when_values_are_the_same(): res = sheerka.eval("hello 'foo'") assert len(res) == 1 assert res[0].status - assert res[0].value, "hello foo" + assert res[0].value == "hello foo" assert res[0].who == sheerka.get_evaluator_name(MultipleSameSuccessEvaluator.NAME) -def get_sheerka(root="mem://"): - sheerka = Sheerka() +def test_i_can_create_concepts_on_python_codes(): + sheerka = get_sheerka() + context = get_context(sheerka) + + sheerka.create_new_concept(context, Concept(name="concepts", body="sheerka.concepts()")) + res = sheerka.eval("concepts") + + assert len(res) == 1 + assert res[0].status + assert isinstance(res[0].value, list) + + +def get_sheerka(root="mem://", skip_builtins_in_db=True): + sheerka = Sheerka(skip_builtins_in_db) sheerka.initialize(root) return sheerka diff --git a/tests/test_utils.py b/tests/test_utils.py index 8288ed5..88ae4c5 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -51,8 +51,28 @@ def test_i_can_get_sub_classes(): default_parser = core.utils.get_class("parsers.DefaultParser.DefaultParser") exact_concept_parser = core.utils.get_class("parsers.ExactConceptParser.ExactConceptParser") python_parser = core.utils.get_class("parsers.PythonParser.PythonParser") + concept_lexer_parser = core.utils.get_class("parsers.ConceptLexerParser.ConceptLexerParser") assert base_parser not in sub_classes assert default_parser in sub_classes assert exact_concept_parser in sub_classes assert python_parser in sub_classes + assert concept_lexer_parser in sub_classes + + +@pytest.mark.parametrize("a,b, expected", [ + ([], [], []), + ([], ['a'], ['a']), + ([[]], ['a'], [['a']]), + (['a'], [], ['a']), + ([['a']], [], [['a']]), + + ([['a']], ['b'], [['a', 'b']]), + ([['a'], ['b']], ['c'], [['a', 'c'], ['b', 'c']]), + ([['a1', 'a2'], ['b1', 'b2', 'b3']], ['c'], [['a1', 'a2', 'c'], ['b1', 'b2', 'b3', 'c']]), + ([[]], ['a', 'b'], [['a'], ['b']]), + ([['a'], ['b']], ['c', 'd', 'e'], [['a', 'c'], ['b', 'c'], ['a', 'd'], ['b', 'd'], ['a', 'e'], ['b', 'e']]), +]) +def test_i_can_product(a, b, expected): + res = core.utils.product(a, b) + assert res == expected