Implemented a first and basic version of a Rete rule engine

2021-02-09 16:06:32 +01:00
parent 821dbed189
commit a2a8d5c5e5
110 changed files with 7301 additions and 1654 deletions
@@ -1,8 +1,193 @@
-from core.concept import CC, Concept, ConceptParts, DoNotResolve, CIO
+import ast
+from dataclasses import dataclass
+
+from core.builtin_helpers import CreateObjectIdentifiers
+from core.concept import CC, Concept, ConceptParts, DoNotResolve, CIO, CMV
 from core.tokenizer import Tokenizer, TokenKind, Token
-from parsers.BaseNodeParser import scnode, utnode, cnode, SCWC, CNC, short_cnode, SourceCodeWithConceptNode, CN, UTN, \
-    SCN, RN
+from core.utils import get_text_from_tokens, tokens_index
+from parsers.BaseNodeParser import scnode, utnode, cnode, SCWC, CNC, short_cnode, CN, UTN, \
+    SCN, RN, UnrecognizedTokensNode, SourceCodeNode
+from parsers.PythonParser import PythonNode
 from parsers.SyaNodeParser import SyaConceptParserHelper
+from parsers.expressions import NameExprNode, AndNode, OrNode, NotNode, VariableNode, ComparisonNode, ComparisonType
+from sheerkarete.common import V
+from sheerkarete.conditions import Condition, AndConditions
+
+
+@dataclass
+class Obj:
+    prop_a: object
+    prop_b: object = None
+    prop_c: object = None
+    parent: object = None
+
+
+class AND:
+    """ Test class for AndNode"""
+
+    def __init__(self, *parts, source=None):
+        self.parts = parts
+        self.source = source
+
+
+class OR:
+    """ Test class for OrNode"""
+
+    def __init__(self, *parts, source=None):
+        self.parts = parts
+        self.source = source
+
+
+@dataclass
+class NOT:
+    """ Test class for NotNode"""
+    expr: object
+    source: str = None
+
+
+@dataclass
+class EXPR:
+    """Test class for NameNode. E stands for Expression"""
+    source: str
+
+
+@dataclass
+class VAR:
+    """Test class for VarNode"""
+    full_name: str
+    source: str = None
+
+
+@dataclass
+class EQ:
+    left: object
+    right: object
+    source = None
+
+
+@dataclass
+class NEQ:
+    left: object
+    right: object
+    source = None
+
+
+@dataclass
+class GT:
+    left: object
+    right: object
+    source = None
+
+
+@dataclass
+class GTE:
+    left: object
+    right: object
+    source = None
+
+
+@dataclass
+class LT:
+    left: object
+    right: object
+    source = None
+
+
+@dataclass
+class LTE:
+    left: object
+    right: object
+    source = None
+
+
+@dataclass
+class IN:
+    left: object
+    right: object
+    source = None
+
+
+@dataclass
+class NIN:  # for NOT INT
+    left: object
+    right: object
+    source = None
+
+
+comparison_type_mapping = {
+    "EQ": ComparisonType.EQUALS,
+    "NEQ": ComparisonType.NOT_EQUAlS,
+    "LT": ComparisonType.LESS_THAN,
+    "LTE": ComparisonType.LESS_THAN_OR_EQUALS,
+    "GT": ComparisonType.GREATER_THAN,
+    "GTE": ComparisonType.GREATER_THAN_OR_EQUALS,
+    "IN": ComparisonType.IN,
+    "NIN": ComparisonType.NOT_IN,
+}
+
+
+def get_expr_node_from_test_node(full_text, test_node):
+    """
+    Returns EXPR, OR, NOT, AND object to ease the comparison with the real ExprNode
+    """
+    full_text_as_tokens = list(Tokenizer(full_text, yield_eof=False))
+
+    def get_pos(nodes):
+        start, end = None, None
+        for n in nodes:
+            if start is None or start > n.start:
+                start = n.start
+            if end is None or end < n.end:
+                end = n.end
+        return start, end
+
+    def get_pos_from_source(source):
+        source_as_node = list(Tokenizer(source, yield_eof=False))
+        start = tokens_index(full_text_as_tokens, source_as_node)
+        end = start + len(source_as_node) - 1
+        return start, end
+
+    def get_expr_node(node):
+
+        if isinstance(node, EXPR):
+            value_as_tokens = list(Tokenizer(node.source, yield_eof=False))
+            start = tokens_index(full_text_as_tokens, value_as_tokens, 0)
+            end = start + len(value_as_tokens) - 1
+            return NameExprNode(start, end, full_text_as_tokens[start: end + 1])
+
+        if isinstance(node, AND):
+            parts = [get_expr_node(part) for part in node.parts]
+            start, end = get_pos_from_source(node.source) if node.source else get_pos(parts)
+            return AndNode(start, end, full_text_as_tokens[start: end + 1], *parts)
+
+        if isinstance(node, OR):
+            parts = [get_expr_node(part) for part in node.parts]
+            start, end = get_pos_from_source(node.source) if node.source else get_pos(parts)
+            return OrNode(start, end, full_text_as_tokens[start: end + 1], *parts)
+
+        if isinstance(node, NOT):
+            part = get_expr_node(node.expr)
+            start, end = get_pos_from_source(node.source) if node.source else (part.start - 2, part.end)
+            return NotNode(start, end, full_text_as_tokens[start: end + 1], part)
+
+        if isinstance(node, VAR):
+            value_as_tokens = list(Tokenizer(node.source or node.full_name, yield_eof=False))
+            start = tokens_index(full_text_as_tokens, value_as_tokens, 0)
+            end = start + len(value_as_tokens) - 1
+            parts = node.full_name.split(".")
+            if len(parts) == 1:
+                return VariableNode(start, end, full_text_as_tokens[start: end + 1], parts[0])
+            else:
+                return VariableNode(start, end, full_text_as_tokens[start: end + 1], parts[0], *parts[1:])
+
+        if isinstance(node, (EQ, NEQ, GT, GTE, LT, LTE, IN, NIN)):
+            node_type = comparison_type_mapping[type(node).__name__]
+            left_node, right_node = get_expr_node(node.left), get_expr_node(node.right)
+            start, end = get_pos_from_source(node.source) if node.source else get_pos([left_node, right_node])
+            return ComparisonNode(start, end, full_text_as_tokens[start: end + 1],
+                                  node_type, left_node, right_node)
+
+    return get_expr_node(test_node)


 def _index(tokens, expr, index):
@@ -101,7 +286,7 @@ def get_node(
        sub_expr.fix_pos(sub_expr.first)
        sub_expr.fix_pos(sub_expr.last)
        return sub_expr
-        #return SourceCodeWithConceptNode(first, last, content).pseudo_fix_source()
+        # return SourceCodeWithConceptNode(first, last, content).pseudo_fix_source()

    if isinstance(sub_expr, SCN):
        node = get_node(concepts_map, expression_as_tokens, sub_expr.source, sya=sya)
@@ -128,7 +313,8 @@ def get_node(
        sub_expr.fix_pos((concept_node.start, concept_node.end if hasattr(concept_node, "end") else concept_node.start))
        if hasattr(sub_expr, "compiled"):
            for k, v in sub_expr.compiled.items():
-                node = get_node(concepts_map, expression_as_tokens, v, sya=sya, exclude_body=exclude_body)  # need to get start and end positions
+                node = get_node(concepts_map, expression_as_tokens, v, sya=sya,
+                                exclude_body=exclude_body)  # need to get start and end positions
                if isinstance(v, str) and v in concepts_map:
                    new_value_concept = concepts_map[v]
                    new_value = CC(Concept().update_from(new_value_concept), exclude_body=exclude_body)
@@ -214,3 +400,85 @@ def compute_expected_array(concepts_map, expression, expected, sya=False, init_e
        sya=sya,
        init_empty_body=init_empty_body,
        exclude_body=exclude_body) for sub_expr in expected]
+
+
+def get_unrecognized_node(start, text):
+    tokens = list(Tokenizer(text, yield_eof=False))
+    return UnrecognizedTokensNode(start, start + len(tokens) - 1, tokens)
+
+
+def get_source_code_node(start, text, concepts_map, id_manager=None):
+    id_manager = id_manager or CreateObjectIdentifiers()
+    id_mapping = {}
+    concept_mapping_by_id = {}
+
+    # get the concepts, mapped by their new id
+    for concept_name, concept in concepts_map.items():
+        concept_identifier = id_manager.get_identifier(concept, "__C__")
+        id_mapping[concept_name] = concept_identifier
+        concept_mapping_by_id[concept_identifier] = concept
+
+    # transform the source code to use the new id
+    tokens = list(Tokenizer(text, yield_eof=False))
+    text_to_compile_tokens = []
+    for t in tokens:
+        if t.type == TokenKind.IDENTIFIER and t.value in id_mapping:
+            text_to_compile_tokens.append(Token(TokenKind.IDENTIFIER, id_mapping[t.value], -1, -1, -1))
+        else:
+            text_to_compile_tokens.append(t)
+    text_to_compile = get_text_from_tokens(text_to_compile_tokens)
+
+    # create the python node
+    ast_ = ast.parse(text_to_compile, "<source>", 'eval')
+    python_node = PythonNode(text_to_compile, ast_, text)
+    python_node.objects = concept_mapping_by_id
+
+    return SourceCodeNode(start, start + len(tokens) - 1, tokens, text, python_node)
+
+
+def resolve_test_concept(concept_map, hint):
+    if isinstance(hint, str):
+        return concept_map[hint]
+
+    if isinstance(hint, CC):
+        concept = concept_map[hint.concept_key]
+        compiled = {k: resolve_test_concept(concept_map, v) for k, v in hint.compiled.items()}
+        return CC(concept, source=hint.source, exclude_body=hint.exclude_body, **compiled)
+
+    if isinstance(hint, CMV):
+        concept = concept_map[hint.concept_key]
+        return CMV(concept, **hint.variables)
+
+    # CV
+    #
+    # CMV
+    #
+    # CIO
+    raise NotImplementedError()
+
+
+def get_rete_conditions(*conditions_as_string):
+    """
+    Transform a list of string into a list of Condition (Rete conditions)
+    :param conditions_as_string: conditions in the form 'identifier|attribute|value'
+    when one argument starts with "#" it means that it's a variables
+    ex : "#__x_00__|__name__|'__ret'" -> Condition(V('#__x_00__'), '__name__', '__ret')
+
+    Caution, the value part is evaluated
+    "identifier|__name__|'True'" -> Condition(identifier, '__name__', 'True')  # the string 'True'
+    "identifier|__name__|True" -> Condition(identifier, '__name__', True) # the bool True
+    """
+    res = []
+    for as_string in conditions_as_string:
+        identifier, attribute, value = as_string.split("|")
+        if identifier.startswith("#"):
+            identifier = V(identifier[1:])
+        if value.startswith("'"):
+            value = value[1:-1]
+        elif value in ("True", "False"):
+            value = (value == "True")
+        else:
+            value = int(value)
+
+        res.append(Condition(identifier, attribute, value))
+    return AndConditions(res)