Implemented a first and basic version of a Rete rule engine

This commit is contained in:
2021-02-09 16:06:32 +01:00
parent 821dbed189
commit a2a8d5c5e5
110 changed files with 7301 additions and 1654 deletions
+184 -224
View File
@@ -1,183 +1,64 @@
from dataclasses import dataclass
from typing import List, Tuple, Callable
from itertools import product
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from core.builtin_helpers import only_successful, parse_unrecognized, get_inner_body, parse_python, \
get_lexer_nodes_using_positions
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import TokenKind, Token
from parsers.BaseParser import Node, BaseParser, UnexpectedTokenParsingError, UnexpectedEofParsingError, ParsingError
from core.sheerka.services.sheerka_service import FailedToCompileError
from core.tokenizer import TokenKind, Tokenizer, Keywords
from core.utils import get_text_from_tokens
from parsers.BaseNodeParser import UnrecognizedTokensNode
from parsers.BaseParser import BaseParser, UnexpectedTokenParsingError, UnexpectedEofParsingError
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
from parsers.expressions import ParenthesisNode, OrNode, AndNode, NotNode, LeftPartNotFoundError, \
ParenthesisMismatchError, NameExprNode, ExprNode, VariableNode, ComparisonNode
from sheerkarete.common import V
from sheerkarete.conditions import Condition, AndConditions
class ExprNode(Node):
"""
Base ExprNode
eval() must be overridden
"""
class ReteConditionsEmitter:
def eval(self, obj):
return True
def __init__(self, context):
from parsers.ComparisonParser import ComparisonParser
self.context = context
self.comparison_parser = ComparisonParser()
self.var_counter = 0
self.variables = {}
def add_variable(self, target):
var_name = f"__x_{self.var_counter:02}__"
self.var_counter += 1
self.variables[target] = var_name
return var_name
@dataclass()
class LeftPartNotFoundError(ParsingError):
"""
When the expression starts with 'or' or 'and'
"""
pass
def init_variable_if_needed(self, node, res):
if node.name not in self.variables:
var_name = self.add_variable(node.name)
res.append(Condition(V(var_name), "__name__", node.name))
return V(self.variables[node.name])
class NameExprNode(ExprNode):
def __init__(self, tokens):
self.tokens = tokens
self.value = "".join([t.str_value for t in self.tokens])
def get_conditions(self, expr_nodes):
conditions = []
for expr_node in expr_nodes:
parsed_ret = self.comparison_parser.parse(self.context, expr_node.tokens)
if not parsed_ret.status:
raise FailedToCompileError(parsed_ret.body)
tree = parsed_ret.body.body
def eval(self, obj):
return self.value
if isinstance(tree, VariableNode):
var_name = self.init_variable_if_needed(tree, conditions)
if tree.attributes_str is not None:
conditions.append(Condition(var_name, tree.attributes_str, True))
def __repr__(self):
return f"NameExprNode('{self.value}')"
elif isinstance(tree, ComparisonNode):
if isinstance(tree.left, VariableNode):
left = self.init_variable_if_needed(tree.left, conditions)
attr = tree.left.attributes_str or "__self__"
right = eval(get_text_from_tokens(tree.right.tokens))
conditions.append(Condition(left, attr, right))
def __str__(self):
return self.value
@dataclass
class PropertyEqualsNode(ExprNode):
prop: str
value: object
def eval(self, obj):
if hasattr(obj, self.prop):
return str(getattr(obj, self.prop)) == self.value
return False
@dataclass()
class PropertyContainsNode(ExprNode):
prop: str
value: object
def eval(self, obj):
if hasattr(obj, self.prop):
return self.value in str(getattr(obj, self.prop))
return False
@dataclass
class PropertyEqualsSequenceNode(ExprNode):
"""
To use when the test must be done across parent and child
"""
props: List[str]
values: List[object]
def eval(self, obj):
index = len(self.props) - 1
while True:
if not hasattr(obj, self.props[index]) or getattr(obj, self.props[index]) != self.values[index]:
return False
if index == 0:
break
index -= 1
obj = obj.get_parent() if hasattr(obj, "get_parent") else obj.parent
if obj is None:
return False
return True
@dataclass()
class IsaNode(ExprNode):
"""
To use to replicate instanceof, sheerka.instanceof,
"""
obj_class: object
def eval(self, obj):
if isinstance(self.obj_class, type):
return isinstance(obj, self.obj_class)
if isinstance(self.obj_class, (BuiltinConcepts, str)):
return isinstance(obj, Concept) and str(self.obj_class) == obj.key
return False
@dataclass()
class LambdaNode(ExprNode):
"""
Generic expression to ease the tests
"""
lambda_exp: Callable[[object], bool]
def eval(self, obj):
try:
return self.lambda_exp(obj)
except Exception:
pass
@dataclass(init=False)
class AndNode(ExprNode):
parts: Tuple[ExprNode]
def __init__(self, *parts: ExprNode):
self.parts = parts
def eval(self, obj):
res = self.parts[0].eval(obj) and self.parts[1].eval(obj)
for part in self.parts[2:]:
res &= part.eval(obj)
return res
def __repr__(self):
return f"AndNode(" + ", ".join([repr(p) for p in self.parts]) + ")"
def __str__(self):
return " and ".join([str(p) for p in self.parts])
@dataclass(init=False)
class OrNode(ExprNode):
parts: Tuple[ExprNode]
def __init__(self, *parts: ExprNode):
self.parts = parts
def eval(self, obj):
res = self.parts[0].eval(obj) or self.parts[1].eval(obj)
for part in self.parts[2:]:
res |= part.eval(obj)
return res
def __repr__(self):
return f"OrNode(" + ", ".join([repr(p) for p in self.parts]) + ")"
def __str__(self):
return " or ".join([str(p) for p in self.parts])
@dataclass()
class NotNode(ExprNode):
node: ExprNode
def eval(self, obj):
return not self.node.eval(obj)
class FalseNode(ExprNode):
def eval(self, obj):
return False
class TrueNode(ExprNode):
def eval(self, obj):
return True
return [AndConditions(conditions)]
class ExpressionParser(BaseParser):
@@ -191,6 +72,15 @@ class ExpressionParser(BaseParser):
def __init__(self, **kwargs):
super().__init__("Expression", 50, False, yield_eof=True)
self.and_tokens = list(Tokenizer(" and ", yield_eof=False))
self.and_not_tokens = list(Tokenizer(" and not ", yield_eof=False))
self.not_tokens = list(Tokenizer("not ", yield_eof=False))
@staticmethod
def clean_parenthesis_nodes(nodes):
for i, node in enumerate(nodes):
if isinstance(node, ParenthesisNode):
nodes[i] = node.node
def parse(self, context, parser_input: ParserInput):
"""
@@ -232,6 +122,7 @@ class ExpressionParser(BaseParser):
return ret
def parse_or(self):
start = self.parser_input.pos
expr = self.parse_and()
token = self.parser_input.token
if token.type != TokenKind.IDENTIFIER or token.value != "or":
@@ -243,14 +134,19 @@ class ExpressionParser(BaseParser):
expr = self.parse_and()
if expr is None:
self.add_error(UnexpectedEofParsingError("When parsing 'or'"))
return OrNode(*parts)
end = self.parser_input.pos
self.clean_parenthesis_nodes(parts)
return OrNode(start, end, self.parser_input.tokens[start: end + 1], *parts)
parts.append(expr)
token = self.parser_input.token
return OrNode(*parts)
end = parts[-1].end
self.clean_parenthesis_nodes(parts)
return OrNode(start, end, self.parser_input.tokens[start: end + 1], *parts)
def parse_and(self):
expr = self.parse_names()
start = self.parser_input.pos
expr = self.parse_not()
token = self.parser_input.token
if token.type != TokenKind.IDENTIFIER or token.value != "and":
return expr
@@ -258,27 +154,46 @@ class ExpressionParser(BaseParser):
parts = [expr]
while token.type == TokenKind.IDENTIFIER and token.value == "and":
self.parser_input.next_token()
expr = self.parse_names()
expr = self.parse_not()
if expr is None:
self.add_error(UnexpectedEofParsingError("When parsing 'and'"))
return AndNode(*parts)
end = self.parser_input.pos
self.clean_parenthesis_nodes(parts)
return AndNode(start, end, self.parser_input.tokens[start: end + 1], *parts)
parts.append(expr)
token = self.parser_input.token
return AndNode(*parts)
end = parts[-1].end
self.clean_parenthesis_nodes(parts)
return AndNode(start, end, self.parser_input.tokens[start: end + 1], *parts)
def parse_not(self):
token = self.parser_input.token
start = self.parser_input.pos
if token.type == TokenKind.IDENTIFIER and token.value == "not":
self.parser_input.next_token()
parsed = self.parse_not()
node = parsed.node if isinstance(parsed, ParenthesisNode) else parsed
return NotNode(start,
parsed.end,
self.parser_input.tokens[start: parsed.end + 1],
node)
else:
return self.parse_names()
def parse_names(self):
def stop():
return token.type == TokenKind.EOF or \
paren_count == 0 and token.type == TokenKind.RPAR or \
token.type == TokenKind.IDENTIFIER and token.value in ("and", "or")
token.type == TokenKind.IDENTIFIER and token.value in ("and", "or", "not")
token = self.parser_input.token
if token.type == TokenKind.EOF:
return None
if token.type == TokenKind.LPAR:
start = self.parser_input.pos
self.parser_input.next_token()
expr = self.parse_or()
token = self.parser_input.token
@@ -286,14 +201,18 @@ class ExpressionParser(BaseParser):
self.error_sink.append(
UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, [TokenKind.RPAR]))
return expr
end = self.parser_input.pos
self.parser_input.next_token()
return expr
return ParenthesisNode(start, end, None, expr)
buffer = []
paren_count = 0
last_paren = None
start = self.parser_input.pos
while not stop():
buffer.append(token)
if token.type == TokenKind.LPAR:
last_paren = token
paren_count += 1
if token.type == TokenKind.RPAR:
paren_count -= 1
@@ -305,65 +224,106 @@ class ExpressionParser(BaseParser):
self.error_sink.append(LeftPartNotFoundError())
return None
if paren_count != 0:
self.error_sink.append(ParenthesisMismatchError(last_paren))
return None
if buffer[-1].type == TokenKind.WHITESPACE:
buffer.pop()
return NameExprNode(buffer)
end = start + len(buffer) - 1
return NameExprNode(start, end, buffer)
def compile_conjunctions(self, context, conjunctions, who):
"""
Transform a list of conjunctions (AND and OR) into one or multiple CompiledExpr
:param context:
:param conjunctions: list of ExprNode
:param who: service that calls the method
:returns: List Of CompiledExpr
May throw FailedToRecognized if a conjunction cannot be parsed
"""
recognized = []
for conjunction in conjunctions:
# try to recognize conjunction, one by one
# negative conjunction can be a concept starting with 'not'
parsed_ret = parse_unrecognized(
context,
conjunction.get_value(), # we remove the 'NOT' part when needed to ease the recognition
parsers="all",
who=who,
prop=Keywords.WHEN,
filter_func=only_successful)
class ExpressionVisitor:
"""
Pyhtonic implementation of visitors for ExprNode
"""
if parsed_ret.status:
recognized.append(get_inner_body(context, parsed_ret.body))
else:
raise FailedToCompileError(parsed_ret.body)
def visit(self, expr_node):
name = expr_node.__class__.__name__
# for each conjunction, we have a list of recognized concepts (or python node)
# we need a cartesian product of the results
# Explanation for later
# conjunction[0] : 'x is a y' that can be resolved with two concepts c:|1001: and c:|1002:
# conjunction[1] : 'y is an z' that can also be resolved with two concepts (c:|1003: and c:|1004)
# so to understand the full question 'x is a y and y is an z'
# we can have c:|1001: then c:|1003:
# or c:|1001: then c:|1004:
# or c:|1002: then c:|1003:
# or c:|1002: then c:|1004:
# if one of this combination works, it means that the question 'x is a y and y is an z' was matched
# hence the cartesian product
product_of_recognized = list(product(*recognized))
method = 'visit_' + name
visitor = getattr(self, method, self.generic_visit)
return visitor(expr_node)
return_values = []
for recognized_conjunctions in product_of_recognized:
if len(recognized_conjunctions) == 1 and not isinstance(conjunctions[0], NotNode):
return_values.append(recognized_conjunctions[0])
elif len(recognized_conjunctions) == 1 and recognized_conjunctions[0].who == "parsers.Python":
# it is a negated python Node. Need to parse again
ret = parse_python(context, source=str(conjunctions[0]))
if ret.status:
return_values.append(ret)
else:
# find a way to track the failure
pass
else:
# complex result. Use PythonWithNode
lexer_nodes = get_lexer_nodes_using_positions(recognized_conjunctions,
self._get_positions(conjunctions))
def generic_visit(self, expr_node):
"""Called if no explicit visitor function exists for a node."""
for field, value in expr_node.__dict__.items():
if isinstance(value, (list, tuple)):
for item in value:
if isinstance(item, ExprNode):
self.visit(item)
elif isinstance(value, ExprNode):
self.visit(value)
# put back the 'and' / 'not' node
for i in range(len(lexer_nodes) - 1, 0, -1):
end = lexer_nodes[i].start - 1
start = lexer_nodes[i - 1].end + 1
if isinstance(conjunctions[i], NotNode):
lexer_nodes.insert(i, UnrecognizedTokensNode(start, end, self.and_not_tokens))
else:
lexer_nodes.insert(i, UnrecognizedTokensNode(start, end, self.and_tokens))
# add the starting 'not' if needed
# and reindex the following positions
if isinstance(conjunctions[0], NotNode):
lexer_nodes[0].start = 2
lexer_nodes.insert(0, UnrecognizedTokensNode(0, 1, self.not_tokens))
class TrueifyVisitor(ExpressionVisitor):
"""
Visit an ExprNode
replace all the nodes containing a variable to 'trueify' with True
The node containing both variables to trueify and to skip are skipped
"""
python_with_concept_node_ret = PythonWithConceptsParser().parse_nodes(context, lexer_nodes)
if not python_with_concept_node_ret.status:
# find a way to track the failure
pass
return_values.append(python_with_concept_node_ret)
def __init__(self, to_trueify, to_skip):
self.to_trueify = to_trueify
self.to_skip = to_skip
rete_cond_emitter = ReteConditionsEmitter(context)
rete_disjunctions = rete_cond_emitter.get_conditions(conjunctions)
def visit_AndNode(self, expr_node):
parts = []
for part in expr_node.parts:
parts.append(self.visit(part))
return AndNode(*parts)
return return_values, rete_disjunctions
def visit_OrNode(self, expr_node):
parts = []
for part in expr_node.parts:
parts.append(self.visit(part))
return OrNode(*parts)
def visit_NameExprNode(self, expr_node):
return_true = False
for t in expr_node.tokens:
if t.type == TokenKind.IDENTIFIER:
if t.value in self.to_skip:
return expr_node
if t.value in self.to_trueify:
return_true = True
return NameExprNode([Token(TokenKind.IDENTIFIER, "True", -1, -1, -1)]) if return_true else expr_node
@staticmethod
def _get_positions(expr_nodes):
"""
simply manage NotNodes to address the fact that the 'not' part in removed
"""
for expr in expr_nodes:
if isinstance(expr, NotNode):
yield ExprNode(expr.start + 2, expr.end, expr.tokens[2:])
else:
yield expr