Implemented a first and basic version of a Rete rule engine

2021-02-09 16:06:32 +01:00
parent 821dbed189
commit a2a8d5c5e5
110 changed files with 7301 additions and 1654 deletions
@@ -1,183 +1,64 @@
-from dataclasses import dataclass
-from typing import List, Tuple, Callable
+from itertools import product

 from core.builtin_concepts import BuiltinConcepts
-from core.concept import Concept
+from core.builtin_helpers import only_successful, parse_unrecognized, get_inner_body, parse_python, \
+    get_lexer_nodes_using_positions
 from core.sheerka.services.SheerkaExecute import ParserInput
-from core.tokenizer import TokenKind, Token
-from parsers.BaseParser import Node, BaseParser, UnexpectedTokenParsingError, UnexpectedEofParsingError, ParsingError
+from core.sheerka.services.sheerka_service import FailedToCompileError
+from core.tokenizer import TokenKind, Tokenizer, Keywords
+from core.utils import get_text_from_tokens
+from parsers.BaseNodeParser import UnrecognizedTokensNode
+from parsers.BaseParser import BaseParser, UnexpectedTokenParsingError, UnexpectedEofParsingError
+from parsers.PythonWithConceptsParser import PythonWithConceptsParser
+from parsers.expressions import ParenthesisNode, OrNode, AndNode, NotNode, LeftPartNotFoundError, \
+    ParenthesisMismatchError, NameExprNode, ExprNode, VariableNode, ComparisonNode
+from sheerkarete.common import V
+from sheerkarete.conditions import Condition, AndConditions


-class ExprNode(Node):
-    """
-    Base ExprNode
-    eval() must be overridden
-    """
+class ReteConditionsEmitter:

-    def eval(self, obj):
-        return True
+    def __init__(self, context):
+        from parsers.ComparisonParser import ComparisonParser
+        self.context = context
+        self.comparison_parser = ComparisonParser()
+        self.var_counter = 0
+        self.variables = {}

+    def add_variable(self, target):
+        var_name = f"__x_{self.var_counter:02}__"
+        self.var_counter += 1
+        self.variables[target] = var_name
+        return var_name

-@dataclass()
-class LeftPartNotFoundError(ParsingError):
-    """
-    When the expression starts with 'or' or 'and'
-    """
-    pass
+    def init_variable_if_needed(self, node, res):
+        if node.name not in self.variables:
+            var_name = self.add_variable(node.name)
+            res.append(Condition(V(var_name), "__name__", node.name))

+        return V(self.variables[node.name])

-class NameExprNode(ExprNode):
-    def __init__(self, tokens):
-        self.tokens = tokens
-        self.value = "".join([t.str_value for t in self.tokens])
+    def get_conditions(self, expr_nodes):
+        conditions = []
+        for expr_node in expr_nodes:
+            parsed_ret = self.comparison_parser.parse(self.context, expr_node.tokens)
+            if not parsed_ret.status:
+                raise FailedToCompileError(parsed_ret.body)
+            tree = parsed_ret.body.body

-    def eval(self, obj):
-        return self.value
+            if isinstance(tree, VariableNode):
+                var_name = self.init_variable_if_needed(tree, conditions)
+                if tree.attributes_str is not None:
+                    conditions.append(Condition(var_name, tree.attributes_str, True))

-    def __repr__(self):
-        return f"NameExprNode('{self.value}')"
+            elif isinstance(tree, ComparisonNode):
+                if isinstance(tree.left, VariableNode):
+                    left = self.init_variable_if_needed(tree.left, conditions)
+                    attr = tree.left.attributes_str or "__self__"
+                    right = eval(get_text_from_tokens(tree.right.tokens))
+                    conditions.append(Condition(left, attr, right))

-    def __str__(self):
-        return self.value
-
-
-@dataclass
-class PropertyEqualsNode(ExprNode):
-    prop: str
-    value: object
-
-    def eval(self, obj):
-        if hasattr(obj, self.prop):
-            return str(getattr(obj, self.prop)) == self.value
-
-        return False
-
-
-@dataclass()
-class PropertyContainsNode(ExprNode):
-    prop: str
-    value: object
-
-    def eval(self, obj):
-        if hasattr(obj, self.prop):
-            return self.value in str(getattr(obj, self.prop))
-
-        return False
-
-
-@dataclass
-class PropertyEqualsSequenceNode(ExprNode):
-    """
-    To use when the test must be done across parent and child
-    """
-    props: List[str]
-    values: List[object]
-
-    def eval(self, obj):
-        index = len(self.props) - 1
-
-        while True:
-            if not hasattr(obj, self.props[index]) or getattr(obj, self.props[index]) != self.values[index]:
-                return False
-
-            if index == 0:
-                break
-
-            index -= 1
-            obj = obj.get_parent() if hasattr(obj, "get_parent") else obj.parent
-            if obj is None:
-                return False
-
-        return True
-
-
-@dataclass()
-class IsaNode(ExprNode):
-    """
-    To use to replicate instanceof, sheerka.instanceof,
-    """
-    obj_class: object
-
-    def eval(self, obj):
-        if isinstance(self.obj_class, type):
-            return isinstance(obj, self.obj_class)
-
-        if isinstance(self.obj_class, (BuiltinConcepts, str)):
-            return isinstance(obj, Concept) and str(self.obj_class) == obj.key
-
-        return False
-
-
-@dataclass()
-class LambdaNode(ExprNode):
-    """
-    Generic expression to ease the tests
-    """
-    lambda_exp: Callable[[object], bool]
-
-    def eval(self, obj):
-        try:
-            return self.lambda_exp(obj)
-        except Exception:
-            pass
-
-
-@dataclass(init=False)
-class AndNode(ExprNode):
-    parts: Tuple[ExprNode]
-
-    def __init__(self, *parts: ExprNode):
-        self.parts = parts
-
-    def eval(self, obj):
-        res = self.parts[0].eval(obj) and self.parts[1].eval(obj)
-        for part in self.parts[2:]:
-            res &= part.eval(obj)
-        return res
-
-    def __repr__(self):
-        return f"AndNode(" + ", ".join([repr(p) for p in self.parts]) + ")"
-
-    def __str__(self):
-        return " and ".join([str(p) for p in self.parts])
-
-
-@dataclass(init=False)
-class OrNode(ExprNode):
-    parts: Tuple[ExprNode]
-
-    def __init__(self, *parts: ExprNode):
-        self.parts = parts
-
-    def eval(self, obj):
-        res = self.parts[0].eval(obj) or self.parts[1].eval(obj)
-        for part in self.parts[2:]:
-            res |= part.eval(obj)
-        return res
-
-    def __repr__(self):
-        return f"OrNode(" + ", ".join([repr(p) for p in self.parts]) + ")"
-
-    def __str__(self):
-        return " or ".join([str(p) for p in self.parts])
-
-
-@dataclass()
-class NotNode(ExprNode):
-    node: ExprNode
-
-    def eval(self, obj):
-        return not self.node.eval(obj)
-
-
-class FalseNode(ExprNode):
-    def eval(self, obj):
-        return False
-
-
-class TrueNode(ExprNode):
-    def eval(self, obj):
-        return True
+        return [AndConditions(conditions)]


 class ExpressionParser(BaseParser):
@@ -191,6 +72,15 @@ class ExpressionParser(BaseParser):

    def __init__(self, **kwargs):
        super().__init__("Expression", 50, False, yield_eof=True)
+        self.and_tokens = list(Tokenizer(" and ", yield_eof=False))
+        self.and_not_tokens = list(Tokenizer(" and not ", yield_eof=False))
+        self.not_tokens = list(Tokenizer("not ", yield_eof=False))
+
+    @staticmethod
+    def clean_parenthesis_nodes(nodes):
+        for i, node in enumerate(nodes):
+            if isinstance(node, ParenthesisNode):
+                nodes[i] = node.node

    def parse(self, context, parser_input: ParserInput):
        """
@@ -232,6 +122,7 @@ class ExpressionParser(BaseParser):
        return ret

    def parse_or(self):
+        start = self.parser_input.pos
        expr = self.parse_and()
        token = self.parser_input.token
        if token.type != TokenKind.IDENTIFIER or token.value != "or":
@@ -243,14 +134,19 @@ class ExpressionParser(BaseParser):
            expr = self.parse_and()
            if expr is None:
                self.add_error(UnexpectedEofParsingError("When parsing 'or'"))
-                return OrNode(*parts)
+                end = self.parser_input.pos
+                self.clean_parenthesis_nodes(parts)
+                return OrNode(start, end, self.parser_input.tokens[start: end + 1], *parts)
            parts.append(expr)
            token = self.parser_input.token

-        return OrNode(*parts)
+        end = parts[-1].end
+        self.clean_parenthesis_nodes(parts)
+        return OrNode(start, end, self.parser_input.tokens[start: end + 1], *parts)

    def parse_and(self):
-        expr = self.parse_names()
+        start = self.parser_input.pos
+        expr = self.parse_not()
        token = self.parser_input.token
        if token.type != TokenKind.IDENTIFIER or token.value != "and":
            return expr
@@ -258,27 +154,46 @@ class ExpressionParser(BaseParser):
        parts = [expr]
        while token.type == TokenKind.IDENTIFIER and token.value == "and":
            self.parser_input.next_token()
-            expr = self.parse_names()
+            expr = self.parse_not()
            if expr is None:
                self.add_error(UnexpectedEofParsingError("When parsing 'and'"))
-                return AndNode(*parts)
+                end = self.parser_input.pos
+                self.clean_parenthesis_nodes(parts)
+                return AndNode(start, end, self.parser_input.tokens[start: end + 1], *parts)
            parts.append(expr)
            token = self.parser_input.token

-        return AndNode(*parts)
+        end = parts[-1].end
+        self.clean_parenthesis_nodes(parts)
+        return AndNode(start, end, self.parser_input.tokens[start: end + 1], *parts)
+
+    def parse_not(self):
+        token = self.parser_input.token
+        start = self.parser_input.pos
+        if token.type == TokenKind.IDENTIFIER and token.value == "not":
+            self.parser_input.next_token()
+            parsed = self.parse_not()
+            node = parsed.node if isinstance(parsed, ParenthesisNode) else parsed
+            return NotNode(start,
+                           parsed.end,
+                           self.parser_input.tokens[start: parsed.end + 1],
+                           node)
+        else:
+            return self.parse_names()

    def parse_names(self):

        def stop():
            return token.type == TokenKind.EOF or \
                   paren_count == 0 and token.type == TokenKind.RPAR or \
-                   token.type == TokenKind.IDENTIFIER and token.value in ("and", "or")
+                   token.type == TokenKind.IDENTIFIER and token.value in ("and", "or", "not")

        token = self.parser_input.token
        if token.type == TokenKind.EOF:
            return None

        if token.type == TokenKind.LPAR:
+            start = self.parser_input.pos
            self.parser_input.next_token()
            expr = self.parse_or()
            token = self.parser_input.token
@@ -286,14 +201,18 @@ class ExpressionParser(BaseParser):
                self.error_sink.append(
                    UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, [TokenKind.RPAR]))
                return expr
+            end = self.parser_input.pos
            self.parser_input.next_token()
-            return expr
+            return ParenthesisNode(start, end, None, expr)

        buffer = []
        paren_count = 0
+        last_paren = None
+        start = self.parser_input.pos
        while not stop():
            buffer.append(token)
            if token.type == TokenKind.LPAR:
+                last_paren = token
                paren_count += 1
            if token.type == TokenKind.RPAR:
                paren_count -= 1
@@ -305,65 +224,106 @@ class ExpressionParser(BaseParser):
                self.error_sink.append(LeftPartNotFoundError())
            return None

+        if paren_count != 0:
+            self.error_sink.append(ParenthesisMismatchError(last_paren))
+            return None
+
        if buffer[-1].type == TokenKind.WHITESPACE:
            buffer.pop()

-        return NameExprNode(buffer)
+        end = start + len(buffer) - 1
+        return NameExprNode(start, end, buffer)

+    def compile_conjunctions(self, context, conjunctions, who):
+        """
+        Transform a list of conjunctions (AND and OR) into one or multiple CompiledExpr
+        :param context:
+        :param conjunctions: list of ExprNode
+        :param who: service that calls the method
+        :returns: List Of CompiledExpr
+        May throw FailedToRecognized if a conjunction cannot be parsed
+        """
+        recognized = []
+        for conjunction in conjunctions:
+            # try to recognize conjunction, one by one
+            # negative conjunction can be a concept starting with 'not'
+            parsed_ret = parse_unrecognized(
+                context,
+                conjunction.get_value(),  # we remove the 'NOT' part when needed to ease the recognition
+                parsers="all",
+                who=who,
+                prop=Keywords.WHEN,
+                filter_func=only_successful)

-class ExpressionVisitor:
-    """
-    Pyhtonic implementation of visitors for ExprNode
-    """
+            if parsed_ret.status:
+                recognized.append(get_inner_body(context, parsed_ret.body))
+            else:
+                raise FailedToCompileError(parsed_ret.body)

-    def visit(self, expr_node):
-        name = expr_node.__class__.__name__
+        # for each conjunction, we have a list of recognized concepts (or python node)
+        # we need a cartesian product of the results
+        # Explanation for later
+        # conjunction[0] : 'x is a y' that can be resolved with two concepts c:|1001: and c:|1002:
+        # conjunction[1] : 'y is an z' that can also be resolved with two concepts (c:|1003: and c:|1004)
+        # so to understand the full question 'x is a y and y is an z'
+        # we can have c:|1001: then c:|1003:
+        # or          c:|1001: then c:|1004:
+        # or          c:|1002: then c:|1003:
+        # or          c:|1002: then c:|1004:
+        # if one of this combination works, it means that the question 'x is a y and y is an z' was matched
+        # hence the cartesian product
+        product_of_recognized = list(product(*recognized))

-        method = 'visit_' + name
-        visitor = getattr(self, method, self.generic_visit)
-        return visitor(expr_node)
+        return_values = []
+        for recognized_conjunctions in product_of_recognized:
+            if len(recognized_conjunctions) == 1 and not isinstance(conjunctions[0], NotNode):
+                return_values.append(recognized_conjunctions[0])
+            elif len(recognized_conjunctions) == 1 and recognized_conjunctions[0].who == "parsers.Python":
+                # it is a negated python Node. Need to parse again
+                ret = parse_python(context, source=str(conjunctions[0]))
+                if ret.status:
+                    return_values.append(ret)
+                else:
+                    # find a way to track the failure
+                    pass
+            else:
+                # complex result. Use PythonWithNode
+                lexer_nodes = get_lexer_nodes_using_positions(recognized_conjunctions,
+                                                              self._get_positions(conjunctions))

-    def generic_visit(self, expr_node):
-        """Called if no explicit visitor function exists for a node."""
-        for field, value in expr_node.__dict__.items():
-            if isinstance(value, (list, tuple)):
-                for item in value:
-                    if isinstance(item, ExprNode):
-                        self.visit(item)
-            elif isinstance(value, ExprNode):
-                self.visit(value)
+                # put back the 'and' / 'not' node
+                for i in range(len(lexer_nodes) - 1, 0, -1):
+                    end = lexer_nodes[i].start - 1
+                    start = lexer_nodes[i - 1].end + 1
+                    if isinstance(conjunctions[i], NotNode):
+                        lexer_nodes.insert(i, UnrecognizedTokensNode(start, end, self.and_not_tokens))
+                    else:
+                        lexer_nodes.insert(i, UnrecognizedTokensNode(start, end, self.and_tokens))

+                # add the starting 'not' if needed
+                # and reindex the following positions
+                if isinstance(conjunctions[0], NotNode):
+                    lexer_nodes[0].start = 2
+                    lexer_nodes.insert(0, UnrecognizedTokensNode(0, 1, self.not_tokens))

-class TrueifyVisitor(ExpressionVisitor):
-    """
-    Visit an ExprNode
-    replace all the nodes containing a variable to 'trueify' with True
-    The node containing both variables to trueify and to skip are skipped
-    """
+                python_with_concept_node_ret = PythonWithConceptsParser().parse_nodes(context, lexer_nodes)
+                if not python_with_concept_node_ret.status:
+                    # find a way to track the failure
+                    pass
+                return_values.append(python_with_concept_node_ret)

-    def __init__(self, to_trueify, to_skip):
-        self.to_trueify = to_trueify
-        self.to_skip = to_skip
+        rete_cond_emitter = ReteConditionsEmitter(context)
+        rete_disjunctions = rete_cond_emitter.get_conditions(conjunctions)

-    def visit_AndNode(self, expr_node):
-        parts = []
-        for part in expr_node.parts:
-            parts.append(self.visit(part))
-        return AndNode(*parts)
+        return return_values, rete_disjunctions

-    def visit_OrNode(self, expr_node):
-        parts = []
-        for part in expr_node.parts:
-            parts.append(self.visit(part))
-        return OrNode(*parts)
-
-    def visit_NameExprNode(self, expr_node):
-        return_true = False
-        for t in expr_node.tokens:
-            if t.type == TokenKind.IDENTIFIER:
-                if t.value in self.to_skip:
-                    return expr_node
-                if t.value in self.to_trueify:
-                    return_true = True
-
-        return NameExprNode([Token(TokenKind.IDENTIFIER, "True", -1, -1, -1)]) if return_true else expr_node
+    @staticmethod
+    def _get_positions(expr_nodes):
+        """
+        simply manage NotNodes to address the fact that the 'not' part in removed
+        """
+        for expr in expr_nodes:
+            if isinstance(expr, NotNode):
+                yield ExprNode(expr.start + 2, expr.end, expr.tokens[2:])
+            else:
+                yield expr