Refactored sheerka class: splitted to use sub handlers. Refactored unit tests to use classes.

This commit is contained in:
2020-01-22 17:49:28 +01:00
parent 821614a6c4
commit c489a38ebc
120 changed files with 7947 additions and 8190 deletions
View File
View File
+152
View File
@@ -0,0 +1,152 @@
from core.builtin_concepts import BuiltinConcepts, ListConcept
from core.concept import Concept
import ast
import core.utils
import logging
log = logging.getLogger(__name__)
class NodeParent:
"""
Class that represent the ancestor of a Node
For example, the 'For' nodes has three fields (target, iter and body)
So, for a node under For.iter
node -> For
field -> iter
"""
def __init__(self, node, field):
self.node = node
self.field = field
def __repr__(self):
if self.node is None:
return None
if self.field is None:
return self.node.get_node_type()
return self.node.get_node_type() + "." + self.field
def __eq__(self, other):
# I can compare with type for simplification
if isinstance(other, tuple):
return self.node.get_node_type() == other[0] and self.field == other[1]
# normal equals implementation
if not isinstance(other, NodeParent):
return False
return self.node.get_node_type() == other.node.get_node_type() and self.field == other.field
def __hash__(self):
return hash((self.node.get_node_type(), self.field))
class NodeConcept(Concept):
def __init__(self, key, node_type, parent: NodeParent):
super().__init__(key, True, False, key)
self.parent = parent
self.node_type = node_type
def get_node_type(self):
return self.node_type
class GenericNodeConcept(NodeConcept):
def __init__(self, node_type, parent):
super().__init__(BuiltinConcepts.GENERIC_NODE, node_type, parent)
def __repr__(self):
return "Generic:" + self.node_type
def get_node_type(self):
return self.node_type
def get_value(self):
if self.node_type == "Name":
return self.get_prop("id")
if self.node_type == "arg":
return self.get_prop("arg")
return self.body
class IdentifierNodeConcept(NodeConcept):
def __init__(self, parent, name):
super().__init__(BuiltinConcepts.IDENTIFIER_NODE, "Name", parent)
self.body = name
class CallNodeConcept(NodeConcept):
def __init__(self, parent=None):
super().__init__(BuiltinConcepts.IDENTIFIER_NODE, "Call", parent)
def get_args_names(self, sheerka):
return sheerka.get_values(self.get_prop("args"))
def python_to_concept(python_node):
"""
Transform Python AST node into concept nodes
for better usage
:param python_node:
:return:
"""
def _transform(node, parent):
node_type = node.__class__.__name__
concept = GenericNodeConcept(node_type, parent).init_key()
for field in node._fields:
if not hasattr(node, field):
continue
value = getattr(node, field)
concept.def_prop(field)
if isinstance(value, list):
lst = ListConcept().init_key()
for i in value:
lst.append(_transform(i, NodeParent(concept, field)))
concept.set_prop(field, lst)
elif isinstance(value, ast.AST):
concept.set_prop(field, _transform(value, NodeParent(concept, field)))
else:
concept.set_prop(field, value)
concept.metadata.is_evaluated = True
return concept
return _transform(python_node, None)
def concept_to_python(concept_node):
"""
Transform back concept_node to Python AST node
:param concept_node:
:return:
"""
def _transform(node):
node_type = node.get_node_type()
ast_object = core.utils.new_object("_ast." + node_type)
for field in node.props:
if field not in ast_object._fields:
continue
value = node.get_prop(field)
if isinstance(value, list) or isinstance(value, Concept) and value.key == str(BuiltinConcepts.LIST):
lst = []
for i in value:
lst.append(_transform(i))
setattr(ast_object, field, lst)
elif isinstance(value, NodeConcept):
setattr(ast_object, field, _transform(value))
else:
setattr(ast_object, field, value)
return ast_object
res = _transform(concept_node)
return res
+130
View File
@@ -0,0 +1,130 @@
from core.ast.nodes import GenericNodeConcept, NodeConcept
from core.builtin_concepts import ListConcept
class ConceptNodeVisitor:
"""
Base class to visit NodeConcept
It is insolently inspired by python AST.Visitor class
"""
def visit(self, node):
"""Visit a node."""
name = node.node_type if isinstance(node, GenericNodeConcept) else node.name
name = str(name).capitalize()
method = 'visit_' + name
visitor = getattr(self, method, self.generic_visit)
return visitor(node)
def generic_visit(self, node):
"""Called if no explicit visitor function exists for a node."""
for field, value in iter_props(node):
if isinstance(value, ListConcept):
for item in value:
if isinstance(item, NodeConcept):
self.visit(item)
elif isinstance(value, NodeConcept):
self.visit(value)
def visit_Constant(self, node):
value = node.get_prop("value")
type_name = _const_node_type_names.get(type(value))
if type_name is None:
for cls, name in _const_node_type_names.items():
if isinstance(value, cls):
type_name = name
break
if type_name is not None:
method = 'visit_' + type_name
try:
visitor = getattr(self, method)
except AttributeError:
pass
else:
import warnings
warnings.warn(f"{method} is deprecated; add visit_Constant",
PendingDeprecationWarning, 2)
return visitor(node)
return self.generic_visit(node)
class UnreferencedNamesVisitor(ConceptNodeVisitor):
def __init__(self, sheerka):
self.names = set()
self.sheerka = sheerka
def visit_Name(self, node):
parents = get_parents(node)
if ("For", "target") in parents: # variable used by the 'for' iteration
return
if ("Call", "func") in parents: # name of the function
return
if ("Assign", "targets") in parents: # variable which is assigned
return
if self.can_be_discarded(self.sheerka.value(node), parents):
return
self.names.add(self.sheerka.value(node))
def can_be_discarded(self, variable_name, parents):
for node in (parent.node for parent in parents):
if node is None:
return False
if node.get_node_type() == "For" and self.sheerka.value(node.get_prop("target")) == variable_name:
# variable used by the loop
return True
if node.get_node_type() == "FunctionDef":
# variable defined as a function parameter
args = node.get_prop("args")
args_values = list(self.sheerka.get_values(args.get_prop("args")))
if variable_name in args_values:
return True
return False
class ExtractPredicateVisitor(ConceptNodeVisitor):
def __init__(self, variable_name):
self.predicates = []
self.variable_name = variable_name
def get_parents(node):
if node.parent is None:
return []
res = []
while True:
if node.parent is None:
break
res.append(node.parent)
node = node.parent.node
return res
def iter_props(node):
for p in node.props:
yield p, node.props[p].value
_const_node_type_names = {
bool: 'NameConstant', # should be before int
type(None): 'NameConstant',
int: 'Num',
float: 'Num',
complex: 'Num',
str: 'Str',
bytes: 'Bytes',
type(...): 'Ellipsis',
}
+387
View File
@@ -0,0 +1,387 @@
from enum import Enum
from core.concept import Concept, ConceptParts
class BuiltinConcepts(Enum):
"""
List of builtin concepts that do no need any specific implementation
Please note that the value of the enum is informal. It is not used in the system
For example, the concept 'NODE' DOES NOT have the key, the id or whatever 200
The key if the name of the concept
The id is a sequential number given just before the concept is saved in sdp
The values of the enum is not used the code
"""
SHEERKA = "sheerka"
BEFORE_PARSING = "before parsing" # activated before evaluation by the parsers
PARSING = "parsing" # activated during the parsing. It contains the text to parse
AFTER_PARSING = "after parsing" # after parsing
BEFORE_EVALUATION = "before evaluation" # before evaluation
EVALUATION = "evaluation" # activated when the parsing process seems to be finished
AFTER_EVALUATION = "after evaluation" # activated when the parsing process seems to be finished
BEFORE_RENDERING = "before rendering" # activate before the output is rendered
RENDERING = "rendering" # rendering the response from sheerka
AFTER_RENDERING = "after rendering" # rendering the response from sheerka
USER_INPUT = "user input" # represent an input from an user
SUCCESS = "success"
ERROR = "error"
UNKNOWN_CONCEPT = "unknown concept" # the request concept is not recognized
CANNOT_RESOLVE_CONCEPT = "cannot resolve concept" # when too many concepts with the same name
RETURN_VALUE = "return value" # a value is returned
CONCEPT_TOO_LONG = "concept too long" # concept cannot be processed by exactConcept parser
NEW_CONCEPT = "new concept" # when a new concept is added
UNKNOWN_PROPERTY = "unknown property" # when requesting for a unknown property
PARSER_RESULT = "parser result"
TOO_MANY_SUCCESS = "too many success" # when expecting a limited number of successful return value
TOO_MANY_ERRORS = "too many errors" # when expecting a limited number of successful return value
NOT_FOR_ME = "not for me" # a parser recognize that the entry is not meant for it
IS_EMPTY = "is empty" # when a set is empty
INVALID_RETURN_VALUE = "invalid return value" # the return value of an evaluator is not correct
CONCEPT_ALREADY_DEFINED = "concept already defined" # when you try to add the same concept twice
NOP = "no operation" # no operation concept. Does nothing
CONCEPT_EVAL_ERROR = "concept evaluation error" # cannot evaluate a property or metadata of a concept
ENUMERATION = "enum" # represents a list or a set
LIST = "list" # represents a list
CONCEPT_ALREADY_IN_SET = "concept already in set"
EVALUATOR_PRE_PROCESS = "evaluator pre process" # used modify / tweak behaviour of evaluators
CONCEPT_EVAL_REQUESTED = "concept eval requested"
REDUCE_REQUESTED = "reduce requested" # remove meaningless error when possible
NOT_A_SET = "not a set" # the concept has no entry in sets
NODE = "node"
GENERIC_NODE = "generic node"
IDENTIFIER_NODE = "identifier node"
def __repr__(self):
return "__" + self.name
def __str__(self):
return "__" + self.name
BuiltinUnique = [
BuiltinConcepts.BEFORE_PARSING,
BuiltinConcepts.PARSING,
BuiltinConcepts.AFTER_PARSING,
BuiltinConcepts.BEFORE_EVALUATION,
BuiltinConcepts.EVALUATION,
BuiltinConcepts.AFTER_EVALUATION,
BuiltinConcepts.BEFORE_RENDERING,
BuiltinConcepts.RENDERING,
BuiltinConcepts.AFTER_RENDERING,
BuiltinConcepts.SUCCESS,
BuiltinConcepts.NOP,
BuiltinConcepts.CONCEPT_EVAL_REQUESTED,
BuiltinConcepts.REDUCE_REQUESTED,
]
BuiltinErrors = [str(e) for e in {
BuiltinConcepts.ERROR,
BuiltinConcepts.UNKNOWN_CONCEPT,
BuiltinConcepts.CANNOT_RESOLVE_CONCEPT,
BuiltinConcepts.CONCEPT_TOO_LONG,
BuiltinConcepts.UNKNOWN_PROPERTY,
BuiltinConcepts.TOO_MANY_SUCCESS,
BuiltinConcepts.TOO_MANY_ERRORS,
BuiltinConcepts.INVALID_RETURN_VALUE,
BuiltinConcepts.CONCEPT_ALREADY_DEFINED,
BuiltinConcepts.CONCEPT_EVAL_ERROR,
BuiltinConcepts.CONCEPT_ALREADY_IN_SET,
BuiltinConcepts.NOT_A_SET,
}]
"""
Some concepts have a specific implementation
It's mainly to ease the usage
"""
class UserInputConcept(Concept):
def __init__(self, text=None, user_name=None):
super().__init__(BuiltinConcepts.USER_INPUT, True, False, BuiltinConcepts.USER_INPUT)
self.set_metadata_value(ConceptParts.BODY, text)
self.set_prop("user_name", user_name)
self.metadata.is_evaluated = True
@property
def text(self):
return self.body
@property
def user_name(self):
return self.props["user_name"].value
def __repr__(self):
return f"({self.id}){self.name}: '{self.body}'"
class ErrorConcept(Concept):
def __init__(self, error=None):
super().__init__(BuiltinConcepts.ERROR, True, False, BuiltinConcepts.ERROR)
self.set_metadata_value(ConceptParts.BODY, error)
self.metadata.is_evaluated = True
def __repr__(self):
return f"({self.id}){self.name}: {self.body}"
class UnknownConcept(Concept):
def __init__(self, metadata=None):
super().__init__(BuiltinConcepts.UNKNOWN_CONCEPT, True, False, BuiltinConcepts.UNKNOWN_CONCEPT)
self.set_metadata_value(ConceptParts.BODY, metadata)
self.metadata.is_evaluated = True
def __repr__(self):
return f"({self.id}){self.name}: {self.body}"
class ReturnValueConcept(Concept):
"""
This class represents the result of a data flow processing
It's the main input for the evaluators
"""
def __init__(self, who=None, status=None, value=None, message=None, parents=None):
super().__init__(BuiltinConcepts.RETURN_VALUE, True, False, BuiltinConcepts.RETURN_VALUE)
self.set_metadata_value(ConceptParts.BODY, value)
self.set_prop("who", who)
self.set_prop("status", status)
self.set_prop("message", message)
self.set_prop("parents", parents)
self.metadata.is_evaluated = True
@property
def who(self):
return self.props["who"].value
@who.setter
def who(self, value):
self.set_prop("who", value)
@property
def status(self):
return self.props["status"].value
@status.setter
def status(self, value):
self.set_prop("status", value)
@property
def value(self):
return self.body
@value.setter
def value(self, value):
self.set_metadata_value(ConceptParts.BODY, value)
@property
def message(self):
return self.props["message"].value
@message.setter
def message(self, value):
self.set_prop("message", value)
@property
def parents(self):
return self.props["parents"].value
@parents.setter
def parents(self, value):
self.set_prop("parents", value)
def __repr__(self):
return f"ReturnValue(who={self.who}, status={self.status}, value={self.value}, message={self.message})"
def __eq__(self, other):
if not isinstance(other, ReturnValueConcept):
return False
return self.who == other.who and \
self.status == other.status and \
self.value == other.value and \
self.message == other.message
def __hash__(self):
if hasattr(self.value, "__iter__") and not isinstance(self.value, str):
value_hash = hash(tuple(self.value))
else:
value_hash = hash(self.value)
return hash((self.who, self.status, value_hash))
class UnknownPropertyConcept(Concept):
"""
This error is raised when, during sheerka.new(), an unknown property is asked
"""
def __init__(self, property_name=None, concept=None):
super().__init__(BuiltinConcepts.UNKNOWN_PROPERTY, True, False, BuiltinConcepts.UNKNOWN_PROPERTY)
self.set_metadata_value(ConceptParts.BODY, property_name)
self.set_prop("concept", concept)
self.metadata.is_evaluated = True
def __repr__(self):
return f"UnknownProperty(property={self.property_name}, concept={self.concept})"
@property
def concept(self):
return self.props["concept"].value
@property
def property_name(self):
return self.body
class ParserResultConcept(Concept):
"""
Result of a parsing
"""
def __init__(self, parser=None, source=None, value=None, try_parsed=None):
super().__init__(BuiltinConcepts.PARSER_RESULT, True, False, BuiltinConcepts.PARSER_RESULT)
self.set_metadata_value(ConceptParts.BODY, value)
self.set_prop("parser", parser)
self.set_prop("source", source)
self.set_prop("try_parsed", try_parsed) # in case of error, what was found before the error
self.metadata.is_evaluated = True
def __repr__(self):
text = f"ParserResult(parser={self.props['parser'].value}"
source = self.props['source'].value
text += f", source='{source}')" if source else f", body='{self.body}')"
return text
def __eq__(self, other):
if not isinstance(other, ParserResultConcept):
return False
return self.source == other.source and \
self.parser == other.parser and \
self.body == other.body and \
self.try_parsed == other.try_parsed
def __hash__(self):
return hash(self.metadata.name)
@property
def value(self):
return self.body
@property
def try_parsed(self):
return self.props["try_parsed"].value
@property
def source(self):
return self.props["source"].value
@property
def parser(self):
return self.props["parser"].value
class InvalidReturnValueConcept(Concept):
"""
Error returned when an evaluator is not correctly coded
The accepted return value are
ReturnValueConcept, list of ReturnValueConcept or None
"""
def __init__(self, return_value=None, evaluator=None):
super().__init__(
BuiltinConcepts.INVALID_RETURN_VALUE,
True,
False,
BuiltinConcepts.INVALID_RETURN_VALUE)
self.set_metadata_value(ConceptParts.BODY, return_value)
self.set_prop("evaluator", evaluator)
self.metadata.is_evaluated = True
class ConceptEvalError(Concept):
def __init__(self, error=None, concept=None, property_name=None):
super().__init__(BuiltinConcepts.CONCEPT_EVAL_ERROR,
True,
False,
BuiltinConcepts.CONCEPT_EVAL_ERROR)
self.set_metadata_value(ConceptParts.BODY, error)
self.set_prop("concept", concept)
self.set_prop("property_name", property_name)
self.metadata.is_evaluated = True
def __repr__(self):
return f"ConceptEvalError(error={self.error}, concept={self.concept}, property={self.property_name})"
@property
def error(self):
return self.body
@property
def concept(self):
return self.props["concept"].value
@property
def property_name(self):
return self.props["property_name"].value
class EnumerationConcept(Concept):
def __init__(self, iteration=None):
super().__init__(BuiltinConcepts.ENUMERATION, True, False, BuiltinConcepts.ENUMERATION)
self.set_metadata_value(ConceptParts.BODY, iteration)
self.metadata.is_evaluated = True
def __iter__(self):
return iter(self.body)
class ListConcept(Concept):
def __init__(self, items=None):
super().__init__(BuiltinConcepts.LIST, True, False, BuiltinConcepts.LIST)
self.set_metadata_value(ConceptParts.BODY, items or [])
self.metadata.is_evaluated = True
def append(self, obj):
self.body.append(obj)
def __len__(self):
return len(self.body)
def __getitem__(self, key):
return self.body[key]
def __setitem__(self, key, value):
self.body[key] = value
def __iter__(self):
return iter(self.body)
def __contains__(self, item):
return item in self.body
class ConceptAlreadyInSet(Concept):
def __init__(self, concept=None, concept_set=None):
super().__init__(BuiltinConcepts.CONCEPT_ALREADY_IN_SET,
True,
False,
BuiltinConcepts.CONCEPT_ALREADY_IN_SET)
self.set_metadata_value(ConceptParts.BODY, concept)
self.set_prop("concept_set", concept_set)
self.metadata.is_evaluated = True
def __repr__(self):
return f"ConceptAlreadyInSet(concept={self.concept}, concept_set={self.concept_set})"
@property
def concept(self):
return self.body
@property
def concept_set(self):
return self.props["concept_set"].value
+214
View File
@@ -0,0 +1,214 @@
import ast
import logging
import core.ast.nodes
from core.ast.nodes import CallNodeConcept, GenericNodeConcept
from core.ast.visitors import UnreferencedNamesVisitor
from core.builtin_concepts import BuiltinConcepts
def is_same_success(sheerka, return_values):
"""
Returns True if all returns values are successful and have the same value
:param sheerka:
:param return_values:
:return:
"""
assert isinstance(return_values, list)
if not return_values[0].status:
return False
reference = sheerka.value(return_values[0].value)
for return_value in return_values[1:]:
if not return_value.status:
return False
actual = sheerka.value(return_value.value)
if actual != reference:
return False
return True
def expect_one(context, return_values, logger=None):
"""
Checks if there is at least one success return value
If there is more than one, check if it's the same value
:param context:
:param return_values:
:param logger:
:return:
"""
if not isinstance(return_values, list):
return return_values
sheerka = context.sheerka
if len(return_values) == 0:
return sheerka.ret(
context.who,
False,
sheerka.new(BuiltinConcepts.IS_EMPTY, body=return_values),
parents=return_values)
successful_results = [item for item in return_values if item.status]
number_of_successful = len(successful_results)
# total_items = len(return_values)
# remove errors when a winner is found
if number_of_successful == 1:
return sheerka.ret(
context.who,
True,
successful_results[0].body,
parents=return_values)
# too many winners, which one to choose ?
if number_of_successful > 1:
if is_same_success(sheerka, successful_results):
return sheerka.ret(
context.who,
True,
successful_results[0].value,
parents=return_values)
else:
if logger and logger.isEnabledFor(logging.DEBUG):
context.log(logger, f"Too many successful results found by expect_one()", context.who)
for s in successful_results:
context.log(logger, f"-> {s}", context.who)
return sheerka.ret(
context.who,
False,
sheerka.new(BuiltinConcepts.TOO_MANY_SUCCESS, body=successful_results),
parents=return_values)
# only errors, i cannot help you
if logger and logger.isEnabledFor(logging.DEBUG):
context.log(logger, f"Too many errors found by expect_one()", context.who)
for s in successful_results:
context.log(logger, f"-> {s}", context.who)
if len(return_values) == 1:
return sheerka.ret(
context.who,
False,
return_values[0],
parents=return_values)
else:
return sheerka.ret(
context.who,
False,
sheerka.new(BuiltinConcepts.TOO_MANY_ERRORS, body=return_values),
parents=return_values)
def get_names(sheerka, concept_node):
"""
Finds all the names referenced by the concept_node
:param sheerka:
:param concept_node:
:return:
"""
unreferenced_names_visitor = UnreferencedNamesVisitor(sheerka)
unreferenced_names_visitor.visit(concept_node)
return list(unreferenced_names_visitor.names)
def extract_predicates(sheerka, expression, variables_to_include, variables_to_exclude):
"""
from a given expression and a variable (or list of variables)
tries to find out all the predicates referencing the(se) variable(s), and the(se) variable(s) solely
for example
exp : isinstance(a, int) and isinstance(b, str)
will return 'isinstance(a, int)' if variable_name == 'a'
:param sheerka:
:param expression:
:param variables_to_include:
:param variables_to_exclude:
:return: list of predicates
"""
if len(variables_to_include) == 0:
return []
def _get_predicates(_nodes):
_predicates = []
for _node in _nodes:
python_node = ast.Expression(body=core.ast.nodes.concept_to_python(_node))
python_node = ast.fix_missing_locations(python_node)
_predicates.append(python_node)
return _predicates
if isinstance(expression, str):
node = ast.parse(expression, mode="eval")
else:
return NotImplementedError()
concept_node = core.ast.nodes.python_to_concept(node)
main_op = concept_node.get_prop("body")
return _get_predicates(_extract_predicates(sheerka, main_op, variables_to_include, variables_to_exclude))
def _extract_predicates(sheerka, node, variables_to_include, variables_to_exclude):
predicates = []
def _matches(_names, to_include, to_exclude):
_res = None
for n in _names:
if n in to_include and _res is None:
_res = True
if n in to_exclude:
_res = False
return _res
if node.node_type == "Compare":
if node.get_prop("left").node_type == "Name":
"""Simple case of one comparison"""
comparison_name = sheerka.value(node.get_prop("left"))
if comparison_name in variables_to_include and comparison_name not in variables_to_exclude:
predicates.append(node)
else:
"""The left part is an expression"""
res = _extract_predicates(sheerka, node.get_prop("left"), variables_to_include, variables_to_exclude)
if len(res) > 0:
predicates.append(node)
elif node.node_type == "Call":
"""Simple case predicate"""
call_node = node if isinstance(node, CallNodeConcept) else CallNodeConcept().update_from(node)
args = list(call_node.get_args_names(sheerka))
if _matches(args, variables_to_include, variables_to_exclude):
predicates.append(node)
elif node.node_type == "UnaryOp" and node.get_prop("op").node_type == "Not":
"""Simple case of negation"""
res = _extract_predicates(sheerka, node.get_prop("operand"), variables_to_include, variables_to_exclude)
if len(res) > 0:
predicates.append(node)
elif node.node_type == "BinOp":
names = get_names(sheerka, node)
if _matches(names, variables_to_include, variables_to_exclude):
predicates.append(node)
elif node.node_type == "BoolOp":
all_op = True
temp_res = []
for op in node.get_prop("values"):
res = _extract_predicates(sheerka, op, variables_to_include, variables_to_exclude)
if len(res) == 0:
all_op = False
else:
temp_res.extend(res)
if all_op:
predicates.append(node)
else:
for res in temp_res:
predicates.append(res)
return predicates
+405
View File
@@ -0,0 +1,405 @@
import hashlib
from collections import namedtuple
from dataclasses import dataclass, field
from enum import Enum
from core.sheerka_logger import get_logger
import core.utils
from core.tokenizer import Tokenizer, TokenKind
PROPERTIES_FOR_DIGEST = ("name", "key",
"definition", "definition_type",
"is_builtin", "is_unique",
"where", "pre", "post", "body",
"desc", "props")
PROPERTIES_TO_SERIALIZE = PROPERTIES_FOR_DIGEST + tuple(["id"])
PROPERTIES_FOR_NEW = ("where", "pre", "post", "body", "desc")
VARIABLE_PREFIX = "__var__"
class ConceptParts(Enum):
"""
Lists metadata that can contains some code
"""
WHERE = "where"
PRE = "pre"
POST = "post"
BODY = "body"
@staticmethod
def get_parts():
return set(item.value for item in ConceptParts)
@dataclass
class ConceptMetadata:
name: str
is_builtin: bool
is_unique: bool
key: str # name od the concept, where prop are replaced. to ease search
body: str # main method, can also be the value of the concept
where: str # condition to recognize variables in name
pre: str # list of pre conditions before calling the main function
post: str # list of post conditions after calling the main function
definition: str # regex used to define the concept
definition_type: str # definition can be done with something else than regex
desc: str # possible description for the concept
id: str # unique identifier for a concept. The id will never be modified (but the key can)
props: list # list properties, with their default values
is_evaluated: bool = False # True is the concept is evaluated by sheerka.eval_concept()
simplec = namedtuple("concept", "name body") # for simple concept (tests purposes only)
class Concept:
"""
Default concept object
A concept is a the base object of our universe
Everything is a concept
"""
def __init__(self, name=None,
is_builtin=False,
is_unique=False,
key=None,
body=None,
where=None,
pre=None,
post=None,
definition=None,
definition_type=None,
desc=None,
id=None,
props=None):
metadata = ConceptMetadata(
str(name) if name else None,
is_builtin,
is_unique,
str(key) if key else None,
body,
where,
pre,
post,
definition,
definition_type,
desc,
id,
props or []
)
self.metadata = metadata
self.compiled = {} # cached ast for the where, pre, post and body parts
self.values = {} # values of metadata once resolved
self.props = {} # resolved properties of this concept
self.bnf = None
self.log = get_logger("core." + self.__class__.__name__)
self.init_log = get_logger("init.core." + self.__class__.__name__)
def __repr__(self):
return f"({self.metadata.id}){self.metadata.name}"
def __eq__(self, other):
if isinstance(other, simplec):
return self.name == other.name and self.body == other.body
if id(self) == id(other):
return True
if not isinstance(other, Concept):
return False
# check the metadata
for prop in PROPERTIES_TO_SERIALIZE:
# print(prop) # use full to know which id does not match
my_value = getattr(self.metadata, prop)
other_value = getattr(other.metadata, prop)
if isinstance(my_value, Concept) and isinstance(other_value, Concept):
# need to check if circular references
if id(self) == id(other):
continue
sub_value = getattr(other_value.metadata, prop)
while isinstance(sub_value, Concept):
if id(self) == id(sub_value):
return False # circular reference
sub_value = getattr(sub_value.metadata, prop)
if my_value != other_value:
return False
else:
if my_value != other_value:
return False
# checks the values
if len(self.values) != len(other.values):
return False
for metadata in self.values:
if self.get_metadata_value(metadata) != other.get_metadata_value(metadata):
return False
if len(self.props) != len(other.props):
return False
for prop in self.props:
if self.get_prop(prop) != other.get_prop(prop):
return False
return True
def __hash__(self):
return hash(self.metadata.name)
def __getattr__(self, item):
# I have this complicated implementation because of the usage of Pickle
if 'props' in vars(self) and item in self.props:
return self.props[item].value
name = self.name if 'metadata' in vars(self) else 'Concept'
raise AttributeError(f"'{name}' concept has no attribute '{item}'")
def def_prop(self, prop_name: str, default_value=None):
"""
Adds a property to the metadata
:param prop_name:
:param default_value:
:return:
"""
assert default_value is None or isinstance(default_value, str) # default properties will have to be evaluated
self.metadata.props.append((prop_name, default_value))
self.props[prop_name] = Property(prop_name, None) # do not set the default value
# why not setting props to the default values ?
# Because it may not be the real values, as metadata.props need to be evaluated
return self
def def_prop_by_index(self, index: int, value):
"""
Re-assign a value to a property (mainly used by ExactConceptParser)
:param index:
:param value:
:return:
"""
assert value is None or isinstance(value, str) # default properties will have to be evaluated
prop = self.metadata.props[index]
self.metadata.props[index] = (prop[0], value)
return self
@property
def name(self):
return self.metadata.name
@property
def id(self):
return self.metadata.id
@property
def key(self):
return self.metadata.key
def init_key(self, tokens=None):
"""
Create the key for this concept.
Must be called only when the concept if fully initialized
The method is not called set_key to make sure that no other class set the key by mistake
:param tokens:
:return:
"""
if self.metadata.key is not None:
return self
if tokens is None:
tokens = list(Tokenizer(self.metadata.name))
variables = [p[0] for p in self.metadata.props] if len(core.utils.strip_tokens(tokens, True)) > 1 else []
key = ""
first = True
for token in tokens:
if token.type == TokenKind.EOF:
break
if token.type == TokenKind.WHITESPACE:
continue
if not first:
key += " " # spaces are normalized
if token.value in variables:
key += VARIABLE_PREFIX + str(variables.index(token.value))
else:
key += token.value[1:-1] if token.type == TokenKind.STRING else token.value
first = False
self.metadata.key = key
return self
@property
def body(self):
return self.values[ConceptParts.BODY] if ConceptParts.BODY in self.values else None
def add_codes(self, codes):
"""
Gets the ASTs for 'where', 'pre', 'post' and 'body'
There ASTs are know when the concept is freshly parsed.
So the values are kept in cache.
For concepts loaded from sdp, these ASTs must be created again
TODO : Seems to be a service method. Can be put somewhere else
:param codes:
:return:
"""
if codes is None:
return
for key in codes:
self.compiled[key] = codes[key]
return self
def get_digest(self):
"""
Returns the digest of the event
:return: hexa form of the sha256
"""
return hashlib.sha256(f"Concept:{self.to_dict(PROPERTIES_FOR_DIGEST)}".encode("utf-8")).hexdigest()
def to_dict(self, props_to_use=None):
"""
Returns a dict representing 'self'
:return:
"""
props_to_use = props_to_use or PROPERTIES_TO_SERIALIZE
props_as_dict = dict((prop, getattr(self.metadata, prop)) for prop in props_to_use)
return props_as_dict
def from_dict(self, as_dict):
"""
Initializes 'self' from a dict
:param as_dict:
:return:
"""
for prop in PROPERTIES_TO_SERIALIZE:
if prop in as_dict:
if prop == "props":
for name, value in as_dict[prop]:
self.def_prop(name, value)
else:
setattr(self.metadata, prop, as_dict[prop])
return self
def update_from(self, other):
"""
Update self using the properties of another concept
This method is to mimic the class to instance pattern
'other' is the class, the template, and 'self' is a new instance
:param other:
:return:
"""
if other is None:
return self
if id(other) == id(self):
return self
# update metadata
self.from_dict(other.to_dict())
# update values
for k, v in other.values.items():
self.values[k] = v
# update properties
for k, v in other.props.items():
self.set_prop(k, v.value)
return self
def set_prop(self, prop_name: str, prop_value):
"""Directly sets a value to a property"""
self.props[prop_name] = Property(prop_name, prop_value)
return self
def get_prop(self, prop_name: str):
return self.props[prop_name].value
def set_metadata_value(self, metadata: ConceptParts, value):
"""
Set the resolved value of a metadata (not the metadata itself)
:param metadata:
:param value:
:return:
"""
self.values[metadata] = value
def get_metadata_value(self, metadata: ConceptParts):
"""
Gets the resolved value of a metadata
:param metadata:
:return:
"""
return self.values[metadata]
def auto_init(self):
"""
Sometimes (for tests purposes)
You don't need the full process of evaluation to to get the values of the concept
Directly use the values of the metadata
:return:
"""
if self.metadata.is_evaluated:
return self
for metadata in ConceptParts:
value = getattr(self.metadata, metadata.value)
if value is not None:
self.values[metadata] = value
for prop, value in self.metadata.props:
self.set_prop(prop, value)
self.metadata.is_evaluated = True
return self
class Property:
"""
Defines the variables of a concept
It as its specific class, because from experience,
property management is more complex than a key/value pair
"""
def __init__(self, name, value):
self.name = name
self.value = value
def __repr__(self):
return f"{self.name}={self.value}"
def __eq__(self, other):
if not isinstance(other, Property):
return False
return self.name == other.name and self.value == other.value
def __hash__(self):
return hash((self.name, self.value))
@dataclass()
class DoNotResolve:
"""
This class is used to that the metadata (or the prop) of the concept must not be evaluated
thru sheerka.execute
For example, if you want to set a value to the BODY that will not change when
when the concept will be evaluated,
set concept.compiled[BODY] to DoNotResolve(value)
"""
value: object
+203
View File
@@ -0,0 +1,203 @@
import logging
import time
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from sdp.sheerkaDataProvider import Event
DEBUG_TAB_SIZE = 4
class ExecutionContext:
"""
To keep track of the execution of a request
"""
ids = {}
@staticmethod
def get_id(event_digest):
if event_digest in ExecutionContext.ids:
ExecutionContext.ids[event_digest] += 1
else:
ExecutionContext.ids[event_digest] = 0
return ExecutionContext.ids[event_digest]
def __init__(self,
who,
event: Event,
sheerka,
desc: str = None,
**kwargs):
self._parent = None
self._id = ExecutionContext.get_id(event.get_digest())
self._tab = ""
self._bag = {} # other variables
self._start = 0
self._stop = 0
self.who = who # who is asking
self.event = event # what was the (original) trigger
self.sheerka = sheerka # sheerka
self.desc = desc # human description of what is going on
self.children = []
self.preprocess = None
self.inputs = {} # what was the parameters of the execution context
self.values = {} # what was produced by the execution context
self.obj = kwargs.pop("obj", None)
self.concepts = kwargs.pop("concepts", {})
# update the other elements
for k, v in kwargs.items():
self._bag[k] = v
@property
def elapsed(self):
if self._start == 0:
return 0
return (self._stop if self._stop > 0 else time.time_ns()) - self._start
@property
def elapsed_str(self):
nano_sec = self.elapsed
dt = nano_sec / 1e6
return f"{dt} ms" if dt < 1000 else f"{dt / 1000} s"
@property
def id(self):
return self._id
def __getattr__(self, item):
if item in self._bag:
return self._bag[item]
raise AttributeError(f"'ExecutionContext' object has no attribute '{item}'")
def __enter__(self):
self._start = time.time_ns()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self._stop = time.time_ns()
def __repr__(self):
msg = f"ExecutionContext(who={self.who}, id={self._id}"
if self.desc:
msg += f", desc='{self.desc}'"
msg += ")"
return msg
def add_preprocess(self, name, **kwargs):
preprocess = self.sheerka.new(BuiltinConcepts.EVALUATOR_PRE_PROCESS)
preprocess.set_prop("name", name)
for k, v in kwargs.items():
preprocess.set_prop(k, v)
if not self.preprocess:
self.preprocess = set()
self.preprocess.add(preprocess)
return self
def add_inputs(self, **kwargs):
for k, v in kwargs.items():
self.inputs[k] = v
return self
def add_values(self, **kwargs):
for k, v in kwargs.items():
self.values[k] = v
return self
def get_concept(self, key):
# search in obj
if isinstance(self.obj, Concept):
if self.obj.key == key:
return self.obj
for prop in self.obj.props:
if prop == key:
value = self.obj.props[prop].value
if isinstance(value, Concept):
return value
# search in concepts
if self.concepts:
for k, c in self.concepts.items():
if k == key:
return c
return self.sheerka.get(key)
def new_concept(self, key, **kwargs):
# search in obj
if self.obj:
if self.obj.key == key:
return self.sheerka.new_from_template(self.obj, key, **kwargs)
for prop in self.obj.props:
if prop == key:
value = self.obj.props[prop].value
if isinstance(value, Concept):
return self.sheerka.new_from_template(value, key, **kwargs)
else:
return value
if self.concepts:
for k, c in self.concepts.items():
if k == key:
return self.sheerka.new_from_template(c, key, **kwargs)
return self.sheerka.new(key, **kwargs)
def push(self, who=None, desc=None, **kwargs):
who = who or self.who
_kwargs = {"obj": self.obj, "concepts": self.concepts}
_kwargs.update(self._bag)
_kwargs.update(kwargs)
new = ExecutionContext(
who,
self.event,
self.sheerka,
desc,
**_kwargs,
)
new._parent = self
new._tab = self._tab + " " * DEBUG_TAB_SIZE
new.preprocess = self.preprocess
self.children.append(new)
return new
def log_new(self, logger):
logger.debug(f"[{self._id:2}]" + self._tab + str(self))
def log(self, logger, message, who=None):
logger.debug(f"[{self._id:2}]" + self._tab + (f"[{who}] " if who else "") + str(message))
def log_error(self, logger, message, who=None):
logger.exception(f"[{self._id:2}]" + self._tab + (f"[{who}] " if who else "") + str(message))
def log_result(self, logger, return_values):
if not logger.isEnabledFor(logging.DEBUG):
return
if len(return_values) == 0:
logger.debug(self._tab + "No return value")
for r in return_values:
to_str = self.return_value_to_str(r)
logger.debug(f"[{self._id:2}]" + self._tab + "-> " + to_str)
def to_dict(self):
from core.sheerka_transform import SheerkaTransform
st = SheerkaTransform(self.sheerka)
return st.to_dict(self)
@staticmethod
def return_value_to_str(r):
value = str(r.value)
if len(value) > 50:
value = value[:47] + "..."
to_str = f"ReturnValue(who={r.who}, status={r.status}, value={value})"
return to_str
+600
View File
@@ -0,0 +1,600 @@
from core.builtin_concepts import BuiltinConcepts, ErrorConcept, ReturnValueConcept, BuiltinErrors, BuiltinUnique, \
UnknownConcept
from core.concept import Concept, ConceptParts, PROPERTIES_FOR_NEW
from core.sheerka.ExecutionContext import ExecutionContext
from core.sheerka.SheerkaCreateNewConcept import SheerkaCreateNewConcept
from core.sheerka.SheerkaDump import SheerkaDump
from core.sheerka.SheerkaEvaluateConcept import SheerkaEvaluateConcept
from core.sheerka.SheerkaExecute import SheerkaExecute
from core.sheerka.SheerkaSetsManager import SheerkaSetsManager
from sdp.sheerkaDataProvider import SheerkaDataProvider, Event
import core.utils
import core.builtin_helpers
from core.sheerka_logger import console_handler
import logging
# CONCEPT_EVALUATION_STEPS = [
# BuiltinConcepts.BEFORE_EVALUATION,
# BuiltinConcepts.EVALUATION,
# BuiltinConcepts.AFTER_EVALUATION]
CONCEPT_LEXER_PARSER_CLASS = "parsers.ConceptLexerParser.ConceptLexerParser"
class Sheerka(Concept):
"""
Main controller for the project
"""
CONCEPTS_ENTRY = "All_Concepts" # to store all the concepts
CONCEPTS_BY_ID_ENTRY = "Concepts_By_ID"
CONCEPTS_DEFINITIONS_ENTRY = "Concepts_Definitions" # to store definitions (bnf) of concepts
BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts" # sequential key for builtin concepts
USER_CONCEPTS_KEYS = "User_Concepts" # sequential key for user defined concepts
def __init__(self, skip_builtins_in_db=False, debug=False, loggers=None):
self.init_logging(debug, loggers)
super().__init__(BuiltinConcepts.SHEERKA, True, True, BuiltinConcepts.SHEERKA)
self.log.debug("Starting Sheerka.")
# cache of the most used concepts
# Note that these are only templates
# They are used as a footprint for instantiation
# Except of source when the concept is supposed to be unique
# key is the key of the concept (not the name or the id)
self.cache_by_key = {}
self.cache_by_id = {}
# cache for concept definitions,
# Primarily used for unit test that does not have access to sdp
self.concepts_definition_cache = {}
#
# cache for concepts grammars
# a grammar is a resolved BNF
self.concepts_grammars = {}
# a concept can be instantiated
# ex: File is a concept, but File('foo.txt') is an instance
# TODO: manage contexts
self.instances = []
# List of the known rules by the system
# ex: hello => say('hello')
self.rules = []
self.sdp: SheerkaDataProvider = None # SheerkaDataProvider
self.builtin_cache = {} # cache for builtin concepts
self.parsers = {} # cache for builtin parsers
self.evaluators = [] # cache for builtin evaluators
self.evaluators_prefix: str = None
self.parsers_prefix: str = None
self.skip_builtins_in_db = skip_builtins_in_db
self.execute_handler = SheerkaExecute(self)
self.create_new_concept_handler = SheerkaCreateNewConcept(self)
self.dump_handler = SheerkaDump(self)
self.sets_handler = SheerkaSetsManager(self)
self.evaluate_concept_handler = SheerkaEvaluateConcept(self)
def initialize(self, root_folder: str = None):
"""
Starting Sheerka
Loads the current configuration
Notes that when it's the first time, it also create the needed working folders
:param root_folder: root configuration folder
:return: ReturnValue(Success or Error)
"""
try:
self.sdp = SheerkaDataProvider(root_folder)
if self.sdp.first_time:
self.sdp.set_key(self.USER_CONCEPTS_KEYS, 1000)
event = Event("Initializing Sheerka.")
self.sdp.save_event(event)
exec_context = ExecutionContext(self.key, event, self)
self.initialize_builtin_concepts()
self.initialize_builtin_parsers()
self.initialize_builtin_evaluators()
self.initialize_concepts_definitions(exec_context)
except IOError as e:
return ReturnValueConcept(self, False, self.get(BuiltinConcepts.ERROR), e)
return ReturnValueConcept(self, True, self)
def initialize_builtin_concepts(self):
"""
Initializes the builtin concepts
:return: None
"""
self.init_log.debug("Initializing builtin concepts")
builtins_classes = self.get_builtins_classes_as_dict()
# this all initialization of the builtins seems to be little bit complicated
# why do we need to update it from DB ?
for key in BuiltinConcepts:
concept = self if key == BuiltinConcepts.SHEERKA \
else builtins_classes[str(key)]() if str(key) in builtins_classes \
else Concept(key, True, False, key)
if key in BuiltinUnique:
concept.metadata.is_unique = True
concept.metadata.is_evaluated = True
if not concept.metadata.is_unique and str(key) in builtins_classes:
self.builtin_cache[key] = builtins_classes[str(key)]
if not self.skip_builtins_in_db:
from_db = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.metadata.key)
if from_db is None:
self.init_log.debug(f"'{concept.name}' concept is not found in db. Adding.")
self.set_id_if_needed(concept, True)
self.sdp.add("init", self.CONCEPTS_ENTRY, concept, use_ref=True)
else:
self.init_log.debug(f"Found concept '{from_db}' in db. Updating.")
concept.update_from(from_db)
self.add_in_cache(concept)
def initialize_builtin_parsers(self):
"""
Init the parsers
:return:
"""
core.utils.init_package_import("parsers")
base_class = core.utils.get_class("parsers.BaseParser.BaseParser")
for parser in core.utils.get_sub_classes("parsers", base_class):
if parser.__module__ == base_class.__module__:
continue
self.init_log.debug(f"Adding builtin parser '{parser.__name__}'")
self.parsers[core.utils.get_full_qualified_name(parser)] = parser
def initialize_builtin_evaluators(self):
"""
Init the evaluators
:return:
"""
core.utils.init_package_import("evaluators")
for evaluator in core.utils.get_sub_classes("evaluators", "evaluators.BaseEvaluator.OneReturnValueEvaluator"):
self.init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
self.evaluators.append(evaluator)
for evaluator in core.utils.get_sub_classes("evaluators", "evaluators.BaseEvaluator.AllReturnValuesEvaluator"):
self.init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
self.evaluators.append(evaluator)
def initialize_concepts_definitions(self, execution_context):
self.init_log.debug("Initializing concepts definitions")
definitions = self.sdp.get_safe(self.CONCEPTS_DEFINITIONS_ENTRY, load_origin=False)
if definitions is None:
self.init_log.debug("No BNF defined")
return
lexer_parser = self.parsers[CONCEPT_LEXER_PARSER_CLASS]()
ret_val = lexer_parser.initialize(execution_context, definitions)
if not ret_val.status:
self.init_log.error("Failed to initialize concepts definitions " + str(ret_val.body))
return
self.concepts_grammars = lexer_parser.concepts_grammars
def reset_cache(self, filter_to_use=None):
"""
reset the different cache that exists
:param filter_to_use:
:return:
"""
if filter_to_use is None:
self.cache_by_key = {}
self.cache_by_id = {}
else:
raise NotImplementedError()
return self
def evaluate_user_input(self, text: str, user_name="kodjo"):
"""
Note to KSI: If you try to add execution context to this function,
You may end in an infinite loop
:param text:
:param user_name:
:return:
"""
self.log.debug(f"Processing user input '{text}', {user_name=}.")
event = Event(text, user_name)
evt_digest = self.sdp.save_event(event)
self.log.debug(f"{evt_digest=}")
with ExecutionContext(self.key, event, self, f"Evaluating '{text}'") as execution_context:
user_input = self.ret(self.name, True, self.new(BuiltinConcepts.USER_INPUT, body=text, user_name=user_name))
reduce_requested = self.ret(self.name, True, self.new(BuiltinConcepts.REDUCE_REQUESTED))
steps = [
BuiltinConcepts.BEFORE_PARSING,
BuiltinConcepts.PARSING,
BuiltinConcepts.AFTER_PARSING,
BuiltinConcepts.BEFORE_EVALUATION,
BuiltinConcepts.EVALUATION,
BuiltinConcepts.AFTER_EVALUATION
]
ret = self.execute(execution_context, [user_input, reduce_requested], steps)
execution_context.add_values(return_values=ret)
if not self.skip_builtins_in_db:
self.sdp.save_result(execution_context)
return ret
def execute(self, execution_context, return_values, execution_steps, logger=None):
"""
Executes process for all initial contexts
:param execution_context:
:param return_values:
:param execution_steps:
:param logger: logger to use (if not directly called by sheerka)
:return:
"""
return self.execute_handler.execute(execution_context, return_values, execution_steps, logger)
def set_id_if_needed(self, obj: Concept, is_builtin: bool):
"""
Set the key for the concept if needed
For test purpose only !!!!!
:param obj:
:param is_builtin:
:return:
"""
if obj.metadata.id is not None:
return
entry = self.BUILTIN_CONCEPTS_KEYS if is_builtin else self.USER_CONCEPTS_KEYS
obj.metadata.id = self.sdp.get_next_key(entry)
self.log.debug(f"Setting id '{obj.metadata.id}' to concept '{obj.metadata.name}'.")
def create_new_concept(self, context, concept: Concept, logger=None):
"""
Adds a new concept to the system
:param context:
:param concept: DefConceptNode
:param logger
:return: digest of the new concept
"""
return self.create_new_concept_handler.create_new_concept(context, concept, logger)
def add_concept_to_set(self, context, concept, concept_set, logger=None):
"""
Add an entry in sdp to tell that concept isa concept_set
:param context:
:param concept:
:param concept_set:
:param logger:
:return:
"""
return self.sets_handler.add_concept_to_set(context, concept, concept_set, logger)
def get_set_elements(self, concept):
"""
Concept is supposed to be a set
Returns all elements if the set
:param concept:
:return:
"""
return self.sets_handler.get_set_elements(concept)
def evaluate_concept(self, context, concept: Concept, logger=None):
"""
Evaluation a concept
It means that if the where clause is True, will evaluate the body
:param context:
:param concept:
:param logger:
:return: value of the evaluation or error
"""
return self.evaluate_concept_handler.evaluate_concept(context, concept, logger)
def add_in_cache(self, concept: Concept):
"""
Adds a concept template in cache.
The cache is used as a proxy before looking at sdp
:param concept:
:return:
"""
# sanity check
if concept.key is None:
concept.init_key()
if concept.key is None:
raise KeyError()
self.cache_by_key[concept.key] = concept
if concept.id:
self.cache_by_id[concept.id] = concept
return concept
def get(self, concept_key, concept_id=None):
"""
Tries to find a concept
What is return must be used a template for another concept.
You must not modify the returned concept
:param concept_key: key of the concept
:param concept_id: when multiple concepts with the same key, use the id
:return:
"""
if concept_key is None:
return ErrorConcept("Concept key is undefined.")
if isinstance(concept_key, BuiltinConcepts):
concept_key = str(concept_key)
# first search in cache
result = self.cache_by_key[concept_key] if concept_key in self.cache_by_key else \
self.sdp.get_safe(self.CONCEPTS_ENTRY, concept_key)
if result and (concept_id is None or not isinstance(result, list)):
return result
if isinstance(result, list):
if concept_id:
for c in result:
if c.id == concept_id:
return c
else:
return result
metadata = [("key", concept_key), ("id", concept_id)] if concept_id else ("key", concept_key)
return self._get_unknown(metadata)
def get_by_id(self, concept_id):
if concept_id is None:
return ErrorConcept("Concept id is undefined.")
# first search in cache
result = self.cache_by_id[concept_id] if concept_id in self.cache_by_id else \
self.sdp.get_safe(self.CONCEPTS_BY_ID_ENTRY, concept_id)
return result or self._get_unknown(('id', concept_id))
def get_concept_definition(self):
if self.concepts_definition_cache:
return self.concepts_definition_cache
self.concepts_definition_cache = self.sdp.get_safe(
self.CONCEPTS_DEFINITIONS_ENTRY,
load_origin=False) or {}
return self.concepts_definition_cache
def new(self, concept_key, **kwargs):
"""
Returns an instance of a new concept
When the concept is supposed to be unique, returns the same instance
:param concept_key:
:param kwargs:
:return:
"""
if isinstance(concept_key, tuple):
concept_key, concept_id = concept_key[0], concept_key[1]
else:
concept_id = None
template = self.get(concept_key, concept_id)
# manage concept not found
if self.isinstance(template, BuiltinConcepts.UNKNOWN_CONCEPT) and \
concept_key != BuiltinConcepts.UNKNOWN_CONCEPT:
return template
if isinstance(template, list):
# if template is a list, it means that there a multiple concepts under the same key
concepts = [self.new_from_template(t, concept_key, **kwargs) for t in template]
return concepts
else:
return self.new_from_template(template, concept_key, **kwargs)
def new_from_template(self, template, key, **kwargs):
# manage singleton
if template.metadata.is_unique:
return template
# otherwise, create another instance
concept = self.builtin_cache[key]() if key in self.builtin_cache else Concept()
concept.update_from(template)
if len(kwargs) == 0:
return concept
# update the properties, values, attributes
# Not quite sure that this is the correct process order
for k, v in kwargs.items():
if k in concept.props:
concept.set_prop(k, v)
elif k in PROPERTIES_FOR_NEW:
concept.values[ConceptParts(k)] = v
elif hasattr(concept, k):
setattr(concept, k, v)
else:
return self.new(BuiltinConcepts.UNKNOWN_PROPERTY, body=k, concept=concept)
# TODO : add the concept to the list of known concepts (self.instances)
concept.metadata.is_evaluated = True
return concept
def ret(self, who: str, status: bool, value, message=None, parents=None):
"""
Creates and returns a ReturnValue concept
:param who:
:param status:
:param value:
:param message:
:param parents:
:return:
"""
return self.new(
BuiltinConcepts.RETURN_VALUE,
who=who,
status=status,
value=value,
message=message,
parents=parents)
def value(self, obj, reduce_simple_list=False):
if obj is None:
return None
if hasattr(obj, "get_value"):
return obj.get_value()
if not isinstance(obj, Concept):
return obj
if obj.body is None:
return obj
if reduce_simple_list and (isinstance(obj.body, list) or isinstance(obj.body, set)) and len(obj.body) == 1:
body_to_use = obj.body[0]
else:
body_to_use = obj.body
return self.value(body_to_use)
def get_values(self, objs):
if not (isinstance(objs, list) or
self.isinstance(objs, BuiltinConcepts.LIST) or
self.isinstance(objs, BuiltinConcepts.ENUMERATION)):
objs = [objs]
return (self.value(obj) for obj in objs)
def is_success(self, obj):
if isinstance(obj, bool): # quick win
return obj
if isinstance(obj, ReturnValueConcept):
return obj.status
if isinstance(obj, Concept) and obj.metadata.is_builtin and obj.key in BuiltinErrors:
return False
return obj
def is_known(self, obj):
if not isinstance(obj, Concept):
return True
return obj.key != str(BuiltinConcepts.UNKNOWN_CONCEPT)
def isinstance(self, a, b):
"""
return true if the concept a is an instance of the concept b
:param a:
:param b:
:return:
"""
if isinstance(a, BuiltinConcepts): # common KSI error ;-)
raise SyntaxError("Remember that the first parameter of isinstance MUST be a concept")
if not isinstance(a, Concept):
return False
b_key = b.key if isinstance(b, Concept) else str(b)
return a.key == b_key
def isa(self, a, b):
return self.sets_handler.isa(a, b)
def isagroup(self, concept):
return self.sets_handler.isagroup(concept)
def get_evaluator_name(self, name):
if self.evaluators_prefix is None:
base_evaluator_class = core.utils.get_class("evaluators.BaseEvaluator.BaseEvaluator")
self.evaluators_prefix = base_evaluator_class.PREFIX
return self.evaluators_prefix + name
def get_parser_name(self, name):
if self.parsers_prefix is None:
base_parser_class = core.utils.get_class("parsers.BaseParser.BaseParser")
self.parsers_prefix = base_parser_class.PREFIX
return self.parsers_prefix + name
def concepts(self):
res = []
lst = self.sdp.list(self.CONCEPTS_ENTRY)
for item in lst:
if isinstance(item, list):
res.extend(item)
else:
res.append(item)
return sorted(res, key=lambda i: int(i.id))
def test(self):
return f"I have access to Sheerka !"
def test_error(self):
raise Exception("I can raise an error")
@staticmethod
def _get_unknown(metadata):
"""
Returns the concept 'UnknownConcept' for a requested id or key
Note that I don't call the new() method to prevent cyclic call
:param metadata:
:return:
"""
# metadata is a list of tuple that contains the known metadata for this concept
# ex : (key, 'not_found)
# or
# (id, invalid_id)
#
# the metadata can be a list, if several attributes where given
# (key, 'not_found), (id, invalid_id)
unknown_concept = UnknownConcept()
unknown_concept.set_metadata_value(ConceptParts.BODY, metadata)
for meta in (metadata if isinstance(metadata, list) else [metadata]):
unknown_concept.set_prop(meta[0], meta[1])
unknown_concept.metadata.is_evaluated = True
return unknown_concept
@staticmethod
def get_builtins_classes_as_dict():
res = {}
for c in core.utils.get_classes("core.builtin_concepts"):
if issubclass(c, Concept) and c != Concept:
res[c().metadata.key] = c
return res
@staticmethod
def init_logging(debug, loggers):
core.sheerka_logger.set_enabled(loggers)
if debug:
# log_format = "%(asctime)s %(name)s [%(levelname)s] %(message)s"
log_format = "%(asctime)s [%(levelname)s] %(message)s"
log_level = logging.DEBUG
else:
log_format = "%(message)s"
log_level = logging.INFO
logging.basicConfig(format=log_format, level=log_level, handlers=[console_handler])
@@ -0,0 +1,99 @@
from core.builtin_concepts import BuiltinConcepts, ErrorConcept
from core.concept import Concept
from sdp.sheerkaDataProvider import SheerkaDataProviderDuplicateKeyError
CONCEPT_LEXER_PARSER_CLASS = "parsers.ConceptLexerParser.ConceptLexerParser"
class SheerkaCreateNewConcept:
"""
Manage the creation of a new concept
"""
def __init__(self, sheerka):
self.sheerka = sheerka
self.logger_name = self.create_new_concept.__name__
def create_new_concept(self, context, concept: Concept, logger=None):
"""
Adds a new concept to the system
:param context:
:param concept: DefConceptNode
:param logger
:return: digest of the new concept
"""
logger = logger or self.sheerka.log
concept.init_key()
concepts_definitions = None
init_ret_value = None
# checks for duplicate concepts
# TODO checks if it exists in cache first
if self.sheerka.sdp.exists(self.sheerka.CONCEPTS_ENTRY, concept.key, concept.get_digest()):
error = SheerkaDataProviderDuplicateKeyError(self.sheerka.CONCEPTS_ENTRY + "." + concept.key, concept)
return self.sheerka.ret(
self.logger_name,
False,
self.sheerka.new(BuiltinConcepts.CONCEPT_ALREADY_DEFINED, body=concept),
error.args[0])
# set id before saving in db
self.sheerka.set_id_if_needed(concept, False)
# add the BNF if known
if concept.bnf:
concepts_definitions = self.sheerka.get_concept_definition()
concepts_definitions[concept] = concept.bnf
# check if it's a valid BNF or whether it breaks the known rules
concept_lexer_parser = self.sheerka.parsers[CONCEPT_LEXER_PARSER_CLASS]()
with context.push(self.sheerka.name, desc=f"Initializing concept definition for {concept}") as sub_context:
sub_context.concepts[concept.key] = concept # the concept is not in the real cache yet
sub_context.log_new(logger)
init_ret_value = concept_lexer_parser.initialize(sub_context, concepts_definitions)
sub_context.add_values(return_values=init_ret_value)
if not init_ret_value.status:
return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value))
# save the new concept in sdp
try:
# TODO : needs to make these calls atomic (or at least one single call)
self.sheerka.sdp.add(
context.event.get_digest(),
self.sheerka.CONCEPTS_ENTRY,
concept,
use_ref=True)
self.sheerka.sdp.add(
context.event.get_digest(),
self.sheerka.CONCEPTS_BY_ID_ENTRY,
{concept.id: concept.get_digest()},
is_ref=True)
if concepts_definitions is not None:
self.sheerka.sdp.set(
context.event.get_digest(),
self.sheerka.CONCEPTS_DEFINITIONS_ENTRY,
concepts_definitions,
use_ref=True)
except SheerkaDataProviderDuplicateKeyError as error:
context.log_error(logger, "Failed to create a new concept.", who=self.logger_name)
return self.sheerka.ret(
self.logger_name,
False,
self.sheerka.new(BuiltinConcepts.CONCEPT_ALREADY_DEFINED, body=concept),
error.args[0])
# Updates the caches
self.sheerka.cache_by_key[concept.key] = self.sheerka.sdp.get_safe(self.sheerka.CONCEPTS_ENTRY, concept.key)
self.sheerka.cache_by_id[concept.id] = concept
if init_ret_value is not None and init_ret_value.status:
self.sheerka.concepts_grammars = init_ret_value.body
# process the return in needed
ret = self.sheerka.ret(self.logger_name, True, self.sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
return ret
+44
View File
@@ -0,0 +1,44 @@
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
class SheerkaDump:
def __init__(self, sheerka):
self.sheerka = sheerka
def dump_concepts(self):
lst = self.sheerkasdp.list(self.sheerkaCONCEPTS_ENTRY)
for item in lst:
if hasattr(item, "__iter__"):
for i in item:
self.sheerkalog.info(i)
else:
self.sheerkalog.info(item)
def dump_definitions(self):
defs = self.sheerkasdp.get(self.sheerkaCONCEPTS_DEFINITIONS_ENTRY)
self.sheerkalog.info(defs)
def dump_desc(self, *concept_names):
first = True
for concept_name in concept_names:
if isinstance(concept_name, Concept):
concepts = concept_name
else:
concepts = self.sheerkaget(concept_name)
if self.sheerkaisinstance(concepts, BuiltinConcepts.UNKNOWN_CONCEPT):
self.sheerkalog.error(f"Concept '{concept_name}' is unknown")
return False
if not hasattr(concepts, "__iter__"):
concepts = [concepts]
for c in concepts:
if not first:
self.sheerkalog.info("")
self.sheerkalog.info(f"name : {c.name}")
self.sheerkalog.info(f"bnf : {c.metadata.definition}")
self.sheerkalog.info(f"key : {c.key}")
self.sheerkalog.info(f"body : {c.body}")
self.sheerkalog.info(f"digest : {c.get_digest()}")
first = False
+195
View File
@@ -0,0 +1,195 @@
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, DoNotResolve, ConceptParts
import core.builtin_helpers
CONCEPT_EVALUATION_STEPS = [
BuiltinConcepts.BEFORE_EVALUATION,
BuiltinConcepts.EVALUATION,
BuiltinConcepts.AFTER_EVALUATION]
class SheerkaEvaluateConcept:
def __init__(self, sheerka):
self.sheerka = sheerka
self.logger_name = self.evaluate_concept.__name__
def initialize_concept_asts(self, context, concept: Concept, logger=None):
"""
Updates the codes of the newly created concept
Basically, it runs the parsers on all parts
:param concept:
:param context:
:param logger:
:return:
"""
steps = [BuiltinConcepts.BEFORE_PARSING, BuiltinConcepts.PARSING, BuiltinConcepts.AFTER_PARSING]
for part_key in ConceptParts:
if part_key in concept.compiled:
continue
source = getattr(concept.metadata, part_key.value)
if source is None or not isinstance(source, str):
continue
if source.strip() == "":
concept.compiled[part_key] = DoNotResolve(source)
else:
with context.push(desc=f"Initializing compiled for {part_key}") as sub_context:
sub_context.log_new(logger)
sub_context.add_inputs(source=source)
to_parse = self.sheerka.ret(context.who, True,
self.sheerka.new(BuiltinConcepts.USER_INPUT, body=source))
res = self.sheerka.execute(sub_context, to_parse, steps, logger)
concept.compiled[part_key] = res
sub_context.add_values(return_values=res)
for prop, default_value in concept.metadata.props:
if prop in concept.compiled:
continue
if default_value is None or not isinstance(default_value, str):
continue
if default_value.strip() == "":
concept.compiled[prop] = DoNotResolve(default_value)
else:
with context.push(desc=f"Initializing AST for property {prop}") as sub_context:
sub_context.log_new(logger)
sub_context.add_inputs(source=default_value)
to_parse = self.sheerka.ret(context.who, True,
self.sheerka.new(BuiltinConcepts.USER_INPUT, body=default_value))
res = self.sheerka.execute(context, to_parse, steps)
concept.compiled[prop] = res
sub_context.add_values(return_values=res)
# Updates the cache of concepts when possible
if concept.key in self.sheerka.cache_by_key:
entry = self.sheerka.cache_by_key[concept.key]
if isinstance(entry, list):
# TODO : manage when there are multiple entries
pass
else:
self.sheerka.cache_by_key[concept.key].compiled = concept.compiled
def resolve(self, context, to_resolve, current_prop, current_concept, logger):
if isinstance(to_resolve, DoNotResolve):
return to_resolve.value
desc = f"Evaluating {current_prop} (concept={current_concept})"
context.log(logger, desc, self.logger_name)
with context.push(desc=desc, obj=current_concept) as sub_context:
sub_context.log_new(logger)
# when it's a concept, evaluate it
if isinstance(to_resolve, Concept) and \
not context.sheerka.isinstance(to_resolve, BuiltinConcepts.RETURN_VALUE):
evaluated = self.evaluate_concept(sub_context, to_resolve, logger)
sub_context.add_values(return_values=evaluated)
if evaluated.key == to_resolve.key:
return evaluated
else:
error = evaluated
# otherwise, execute all return values to find out what is the value
else:
r = self.sheerka.execute(sub_context, to_resolve, CONCEPT_EVALUATION_STEPS, logger)
one_r = core.builtin_helpers.expect_one(context, r)
sub_context.add_values(return_values=one_r)
if one_r.status:
return one_r.value
else:
error = one_r.value
return self.sheerka.new(BuiltinConcepts.CONCEPT_EVAL_ERROR,
body=error,
concept=current_concept,
property_name=current_prop)
def resolve_list(self, context, list_to_resolve, current_prop, current_concept, logger):
"""When dealing with a list, there are two possibilities"""
# It may be a list of ReturnValueConcept to execute (always the case for metadata)
# or a list of single values (may be the case for properties)
# in this latter case, all values are to be processed one by one and a list should be returned
if len(list_to_resolve) == 0:
return []
if self.sheerka.isinstance(list_to_resolve[0], BuiltinConcepts.RETURN_VALUE):
return self.resolve(context, list_to_resolve, current_prop, current_concept, logger)
res = []
for to_resolve in list_to_resolve:
# sanity check
if self.sheerka.isinstance(to_resolve, BuiltinConcepts.RETURN_VALUE):
return self.sheerka.new(BuiltinConcepts.CONCEPT_EVAL_ERROR,
body="Mix between real values and return values",
concept=current_concept,
property_name=current_prop)
r = self.resolve(context, to_resolve, current_prop, current_concept, logger)
if self.sheerka.isinstance(r, BuiltinConcepts.CONCEPT_EVAL_ERROR):
return r
res.append(r)
return res
def evaluate_concept(self, context, concept: Concept, logger=None):
"""
Evaluation a concept
It means that if the where clause is True, will evaluate the body
:param context:
:param concept:
:param logger:
:return: value of the evaluation or error
"""
logger = logger or self.sheerka.log
if concept.metadata.is_evaluated:
return concept
# WHERE condition should already be validated by the parser.
# It's a mandatory condition for the concept before it can be recognized
#
# TODO : Validate the PRE condition
#
self.initialize_concept_asts(context, concept, logger)
# to make sure of the order, it don't use ConceptParts.get_parts()
# props must be evaluated first
all_metadata_to_eval = ["props", "where", "pre", "post", "body"]
for metadata_to_eval in all_metadata_to_eval:
if metadata_to_eval == "props":
for prop_name in (p for p in concept.props if p in concept.compiled):
prop_ast = concept.compiled[prop_name]
if isinstance(prop_ast, list):
# Do not send the current concept for the properties
resolved = self.resolve_list(context, prop_ast, prop_name, None, logger)
else:
# Do not send the current concept for the properties
resolved = self.resolve(context, prop_ast, prop_name, None, logger)
if context.sheerka.isinstance(resolved, BuiltinConcepts.CONCEPT_EVAL_ERROR):
resolved.set_prop("concept", concept) # since current concept was not sent
return resolved
else:
concept.set_prop(prop_name, resolved)
else:
part_key = ConceptParts(metadata_to_eval)
if part_key in concept.compiled and concept.compiled[part_key] is not None:
metadata_ast = concept.compiled[part_key]
resolved = self.resolve(context, metadata_ast, part_key, concept, logger)
if context.sheerka.isinstance(resolved, BuiltinConcepts.CONCEPT_EVAL_ERROR):
return resolved
else:
concept.values[part_key] = resolved
#
# TODO : Validate the POST condition
#
concept.init_key() # only does it if needed
concept.metadata.is_evaluated = True
return concept
+254
View File
@@ -0,0 +1,254 @@
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
import core.utils
class SheerkaExecute:
"""
Manage the execution of a process flow
"""
def __init__(self, sheerka):
self.sheerka = sheerka
def call_parsers(self, execution_context, return_values, logger=None):
# return_values must be a list
if not isinstance(return_values, list):
return_values = [return_values]
# first make the distinguish between what is for the parsers and what is not
result = []
to_process = []
for r in return_values:
if not r.status or not self.sheerka.isinstance(r.body, BuiltinConcepts.USER_INPUT):
result.append(r)
else:
to_process.append(r)
if not to_process:
return result
# keep track of the originals user inputs, as they need to be removed at the end
user_inputs = to_process[:]
# group the parsers by priorities
instantiated_parsers = [parser(sheerka=self.sheerka) for parser in self.sheerka.parsers.values()]
grouped_parsers = {}
for parser in [p for p in instantiated_parsers if p.enabled]:
if logger:
parser.log = logger
grouped_parsers.setdefault(parser.priority, []).append(parser)
sorted_priorities = sorted(grouped_parsers.keys(), reverse=True)
stop_processing = False
for priority in sorted_priorities:
inputs_for_this_group = to_process[:]
for parser in grouped_parsers[priority]:
return_value_success_found = False
for return_value in inputs_for_this_group:
to_parse = return_value.body.body \
if self.sheerka.isinstance(return_value.body, BuiltinConcepts.USER_INPUT) \
else return_value.body
# if self.sheerka.log.isEnabledFor(logging.DEBUG):
# debug_text = "'" + to_parse + "'" if isinstance(to_parse, str) \
# else "'" + BaseParser.get_text_from_tokens(to_parse) + "' as tokens"
# execution_context.log(logger or self.sheerka.log, f"Parsing {debug_text}")
with execution_context.push(desc=f"Parsing using {parser.name}") as sub_context:
sub_context.add_inputs(to_parse=to_parse)
res = parser.parse(sub_context, to_parse)
if res is not None:
if hasattr(res, "__iter__"):
for r in res:
if r is None:
continue
r.parents = [return_value]
result.append(r)
if self.sheerka.isinstance(r.body, BuiltinConcepts.PARSER_RESULT):
to_process.append(r)
if r.status:
return_value_success_found = True
else:
res.parents = [return_value]
result.append(res)
if self.sheerka.isinstance(res.body, BuiltinConcepts.PARSER_RESULT):
to_process.append(res)
if res.status:
return_value_success_found = True
sub_context.add_values(return_values=res)
if return_value_success_found:
stop_processing = True
break # Stop the other return_values (but not the other parsers with the same priority)
if stop_processing:
break # Do not try the other priorities if a match is found
result = core.utils.remove_list_from_list(result, user_inputs)
return result
def call_evaluators(self, execution_context, return_values, process_step, evaluation_context=None, logger=None):
# return_values must be a list
if not isinstance(return_values, list):
return_values = [return_values]
# Evaluation context are contexts that may modify the behaviour of the execution
# For example, a concept to indicate that the value is not wanted
# Or a concept to indicate that we want the letter form of the response
# But first, they need to be transformed into return values
if evaluation_context is None:
evaluation_return_values = []
else:
evaluation_return_values = [self.sheerka.ret(execution_context.who, True, c) for c in evaluation_context]
# add the current step as part as the evaluation context
evaluation_return_values.append(self.sheerka.ret(execution_context.who, True, self.sheerka.new(process_step)))
# the pool of return values are the mix
return_values.extend(evaluation_return_values)
# group the evaluators by priority and sort them
# The first one to be applied will be the one with the highest priority
grouped_evaluators = {}
instantiated_evaluators = [e_class() for e_class in self.sheerka.evaluators]
# pre-process evaluators if needed
instantiated_evaluators = self._preprocess_evaluators(execution_context, instantiated_evaluators)
for evaluator in [e for e in instantiated_evaluators if e.enabled and process_step in e.steps]:
if logger:
evaluator.log = logger
grouped_evaluators.setdefault(evaluator.priority, []).append(evaluator)
# order the groups by priority, the higher first
sorted_priorities = sorted(grouped_evaluators.keys(), reverse=True)
# process
iteration = 0
while True:
with execution_context.push(desc=f"iteration #{iteration}", iteration=iteration) as iteration_context:
simple_digest = return_values[:]
iteration_context.add_inputs(return_values=simple_digest)
for priority in sorted_priorities:
original_items = return_values[:]
evaluated_items = []
to_delete = []
for evaluator in grouped_evaluators[priority]:
evaluator = self._preprocess_evaluators(execution_context, evaluator.__class__()) # fresh copy
sub_context_desc = f"Evaluating using {evaluator.name} ({priority=})"
with iteration_context.push(desc=sub_context_desc) as sub_context:
sub_context.add_inputs(return_values=original_items)
# process evaluators that work on one simple return value at the time
from evaluators.BaseEvaluator import OneReturnValueEvaluator
if isinstance(evaluator, OneReturnValueEvaluator):
debug_result = []
for item in original_items:
if evaluator.matches(sub_context, item):
result = evaluator.eval(sub_context, item)
if result is None:
debug_result.append({"input": item, "return_value": None})
continue
to_delete.append(item)
if isinstance(result, list):
evaluated_items.extend(result)
elif isinstance(result, ReturnValueConcept):
evaluated_items.append(result)
else:
error = self.sheerka.new(BuiltinConcepts.INVALID_RETURN_VALUE, body=result,
evaluator=evaluator)
result = self.sheerka.ret("sheerka.process", False, error, parents=[item])
evaluated_items.append(result)
debug_result.append({"input": item, "return_value": result})
else:
debug_result.append({"input": item, "return_value": "** No Match **"})
sub_context.add_values(return_values=debug_result)
# process evaluators that work on all return values
else:
if evaluator.matches(sub_context, original_items):
results = evaluator.eval(sub_context, original_items)
if results is None:
continue
if not isinstance(results, list):
results = [results]
for result in results:
evaluated_items.append(result)
to_delete.extend(result.parents)
sub_context.add_values(return_values=results)
else:
sub_context.add_values(return_values="** No Match **")
return_values = evaluated_items
return_values.extend([item for item in original_items if item not in to_delete])
iteration_context.add_values(return_values=return_values[:])
# have we done something ?
to_compare = return_values[:]
if simple_digest == to_compare:
break
# inc the iteration and continue
iteration += 1
# remove all evaluation context that are not reduced
return_values = core.utils.remove_list_from_list(return_values, evaluation_return_values)
return return_values
def execute(self, execution_context, return_values, execution_steps, logger=None):
"""
Executes process for all initial contexts
:param execution_context:
:param return_values:
:param execution_steps:
:param logger: logger to use (if not directly called by sheerka)
:return:
"""
for step in execution_steps:
copy = return_values[:] if hasattr(return_values, "__iter__") else [return_values]
with execution_context.push(step=step, iteration=0, desc=f"{step=}", return_values=copy) as sub_context:
sub_context.log(logger or self.sheerka.log, f"{step=}, context='{sub_context}'")
if step == BuiltinConcepts.PARSING:
return_values = self.call_parsers(sub_context, return_values, logger)
else:
return_values = self.call_evaluators(sub_context, return_values, step, None, logger)
if copy != return_values:
sub_context.log_result(logger or self.sheerka.log, return_values)
sub_context.add_values(return_values=return_values)
return return_values
def _preprocess_evaluators(self, context, evaluators):
if not context.preprocess:
return evaluators
if not hasattr(evaluators, "__iter__"):
single_one = True
evaluators = [evaluators]
else:
single_one = False
for preprocess in context.preprocess:
for e in evaluators:
if preprocess.props["name"].value == e.name:
for prop, value in preprocess.props.items():
if prop == "name":
continue
if hasattr(e, prop):
setattr(e, prop, value.value)
return evaluators[0] if single_one else evaluators
+83
View File
@@ -0,0 +1,83 @@
from core.builtin_concepts import BuiltinConcepts, ErrorConcept
from core.concept import Concept
GROUP_PREFIX = 'All_'
class SheerkaSetsManager:
def __init__(self, sheerka):
self.sheerka = sheerka
self.logger_name = self.add_concept_to_set.__name__
def add_concept_to_set(self, context, concept, concept_set, logger=None):
"""
Add an entry in sdp to tell that concept isa concept_set
:param context:
:param concept:
:param concept_set:
:param logger:
:return:
"""
logger = logger or self.sheerka.log
context.log(logger, f"Adding concept {concept} to set {concept_set}", who=self.logger_name)
assert concept.id
assert concept_set.id
try:
ret = self.sheerka.sdp.add_unique(context.event.get_digest(), GROUP_PREFIX + concept_set.id, concept.id)
if ret == (None, None): # concept already in set
return self.sheerka.ret(
self.logger_name,
False,
self.sheerka.new(BuiltinConcepts.CONCEPT_ALREADY_IN_SET, body=concept, concept_set=concept_set))
else:
return self.sheerka.ret(self.logger_name, True, self.sheerka.new(BuiltinConcepts.SUCCESS))
except Exception as error:
context.log_error(logger, "Failed to add to set.", who=self.logger_name)
return self.sheerka.ret(self.logger_name, False, ErrorConcept(error), error.args[0])
def get_set_elements(self, concept):
"""
Concept is supposed to be a set
Returns all elements if the set
:param concept:
:return:
"""
assert concept.id
ids = self.sheerka.sdp.get_safe(GROUP_PREFIX + concept.id)
if ids is None:
return self.sheerka.new(BuiltinConcepts.NOT_A_SET, body=concept)
elements = [self.sheerka.get_by_id(element_id) for element_id in ids]
return elements
def isa(self, a, b):
"""
return true if the concept a is a b
Will handle when the keyword isa will be implemented
:param a:
:param b:
:return:
"""
if isinstance(a, BuiltinConcepts): # common KSI error ;-)
raise SyntaxError("Remember that the first parameter of isinstance MUST be a concept")
assert isinstance(a, Concept)
assert isinstance(b, Concept)
# TODO, first check the 'isa' property of a
return self.sheerka.sdp.exists(GROUP_PREFIX + b.id, a.id)
def isagroup(self, concept):
"""True if exists All_<concept_id> in sdp"""
if not concept.id:
return None
res = self.sheerka.sdp.get_safe(GROUP_PREFIX + concept.id)
return res is not None
View File
+50
View File
@@ -0,0 +1,50 @@
import logging
import sys
enabled = []
disabled = ["init", "sdp", "parsers", "evaluators", "verbose"]
console_handler = logging.StreamHandler(sys.stdout)
all_loggers = {}
def set_enabled(to_enable):
if to_enable is None:
return
if not hasattr(to_enable, "__iter__"):
to_enable = [to_enable]
enabled.extend(to_enable)
def to_discard(logger_class):
if logger_class is None:
return False
if logger_class in enabled or logger_class.strip(".") in enabled:
return False
if logger_class not in disabled:
return False
return True
def get_logger(logger_name):
if logger_name in all_loggers:
return all_loggers[logger_name]
logger = logging.getLogger(logger_name)
all_loggers[logger_name] = logger
for d in disabled:
if logger_name.startswith(d + ".") and to_discard(d):
logger.disabled = True
for e in enabled:
if logger_name.startswith("verbose." + e):
logger.disabled = False
return logger
+161
View File
@@ -0,0 +1,161 @@
import dataclasses
from enum import Enum
from core.concept import Concept, PROPERTIES_TO_SERIALIZE
from core.sheerka.Sheerka import ExecutionContext
from core.tokenizer import Token
from evaluators.BaseEvaluator import BaseEvaluator
from parsers.BaseParser import BaseParser, Node
from parsers.BnfParser import BnfParser
from parsers.ConceptLexerParser import UnrecognizedTokensNode, ParsingExpression
from parsers.PythonParser import PythonNode
from sdp.sheerkaDataProvider import Event
OBJ_TYPE_KEY = "__type__"
OBJ_ID_KEY = "__id__"
OBJ_NAME_KEY = "__name__"
default_concept = Concept()
class SheerkaTransformType(Enum):
Concept = 1
Reference = 2
ExecutionContext = 3
Event = 4
Node = 5
Exception = 6
def __repr__(self):
return self.__class__.__name__ + "." + self.name
class SheerkaTransform:
def __init__(self, sheerka):
self.ids = {}
self.sheerka = sheerka
self.id_count = -1
def to_dict(self, obj):
if isinstance(obj, (Concept, ExecutionContext, Event)):
exists, _id = self.exist(obj)
if exists:
return {
OBJ_TYPE_KEY: SheerkaTransformType.Reference,
OBJ_ID_KEY: _id
}
else:
self.id_count = self.id_count + 1
self.ids[obj] = self.id_count
if isinstance(obj, Concept):
return self.concept_to_dict(obj)
elif isinstance(obj, ExecutionContext):
return self.execution_context_to_dict(obj)
elif isinstance(obj, Event):
return {
OBJ_TYPE_KEY: SheerkaTransformType.Event,
OBJ_ID_KEY: self.id_count,
'digest': obj.get_digest()}
elif isinstance(obj, (BaseParser, BaseEvaluator, BnfParser)):
return obj.name
elif isinstance(obj, Token):
return obj.__dict__
elif isinstance(obj, PythonNode):
return {
OBJ_TYPE_KEY: SheerkaTransformType.Node,
OBJ_NAME_KEY: "PythonNode",
'source': obj.source,
'ast_': obj.get_dump(obj.ast_)
}
elif isinstance(obj, Node):
to_dict = {
OBJ_TYPE_KEY: SheerkaTransformType.Node,
OBJ_NAME_KEY: obj.__class__.__name__,
}
for k, v in obj.__dict__.items():
to_dict[k] = self.to_dict(v)
return to_dict
elif isinstance(obj, Exception):
to_dict = {
OBJ_TYPE_KEY: SheerkaTransformType.Exception,
OBJ_NAME_KEY: obj.__class__.__name__,
}
for k, v in obj.__dict__.items():
to_dict[k] = self.to_dict(v)
return to_dict
elif isinstance(obj, ParsingExpression):
return obj.__repr__()
elif isinstance(obj, dict):
return dict((str(k) if isinstance(k, Concept) else k, self.to_dict(v)) for k, v in obj.items())
elif hasattr(obj, "__iter__") and not isinstance(obj, str):
return list(self.to_dict(o) for o in obj)
else:
return obj
def concept_to_dict(self, obj: Concept):
to_dict = {
OBJ_TYPE_KEY: SheerkaTransformType.Concept,
OBJ_ID_KEY: self.id_count,
}
if obj.id:
ref = self.sheerka.get(obj.key, obj.id)
to_dict["id"] = obj.id
else:
ref = default_concept
# transform metadata
for prop in PROPERTIES_TO_SERIALIZE:
value = getattr(obj.metadata, prop)
ref_value = getattr(ref.metadata, prop)
if value != ref_value:
to_dict["meta." + prop] = self.to_dict(value)
# transform value
for metadata, value in obj.values.items():
ref_value = ref.values[metadata] if metadata in ref.values else None
if value != ref_value:
to_dict[metadata.value] = self.to_dict(value)
# transform properties
for prop in obj.props:
value = obj.props[prop].value
if prop not in ref.props or value != ref.props[prop].value:
if "props" not in to_dict:
to_dict["props"] = []
to_dict["props"].append((prop, self.to_dict(value)))
return to_dict
def execution_context_to_dict(self, obj: ExecutionContext):
to_dict = {
OBJ_TYPE_KEY: SheerkaTransformType.ExecutionContext,
OBJ_ID_KEY: self.id_count
}
for property_name in obj.__dict__:
if property_name == "sheerka":
continue
to_dict[property_name] = self.to_dict(getattr(obj, property_name))
return to_dict
def exist(self, obj):
for k, v in self.ids.items():
if id(k) == id(obj) or k == obj:
return True, v
return False, None
+411
View File
@@ -0,0 +1,411 @@
from dataclasses import dataclass
from enum import Enum
class TokenKind(Enum):
EOF = "eof"
WHITESPACE = "whitespace"
NEWLINE = "newline"
KEYWORD = "keyword"
IDENTIFIER = "identifier"
CONCEPT = "concept"
STRING = "string"
NUMBER = "number"
TRUE = "true"
FALSE = "false"
LPAR = "lpar"
RPAR = "rpar"
LBRACKET = "lbrace"
RBRACKET = "rbracket"
LBRACE = "lbrace"
RBRACE = "rbrace"
PLUS = "plus"
MINUS = "minus"
STAR = "star"
SLASH = "slash"
PERCENT = "percent"
COMMA = "comma"
SEMICOLON = "semicolon"
COLON = "colon"
DOT = "dot"
QMARK = "qmark"
VBAR = "vbar"
AMPER = "amper"
EQUALS = "="
AT = "at"
BACK_QUOTE = "bquote" # `
BACK_SLASH = "bslash" # \
CARAT = "carat" # ^
DOLLAR = "dollar" # $
EURO = "dollar" # €
STERLING = "steling" # £
EMARK = "emark" # !
GREATER = "greater" # >
LESS = "less" # <
HASH = "HASH" # #
TILDE = "tilde" # ~
UNDERSCORE = "underscore" # _
DEGREE = "degree" # °
@dataclass()
class Token:
type: TokenKind
value: object
index: int
line: int
column: int
def __repr__(self):
if self.type == TokenKind.IDENTIFIER:
value = str(self.value)
elif self.type == TokenKind.WHITESPACE:
value = "<ws>"
elif self.type == TokenKind.NEWLINE:
value = r"\n"
elif self.type == TokenKind.EOF:
value = "<EOF>"
else:
value = self.value
return f"Token({value})"
@dataclass()
class LexerError(Exception):
message: str
text: str
index: int
line: int
column: int
class Keywords(Enum):
DEF = "def"
CONCEPT = "concept"
FROM = "from"
BNF = "bnf"
AS = "as"
WHERE = "where"
PRE = "pre"
POST = "post"
ISA = "isa"
class Tokenizer:
"""
Class that can iterate on the tokens
"""
KEYWORDS = set(x.value for x in Keywords)
def __init__(self, text):
self.text = text
self.text_len = len(text)
self.column = 1
self.line = 1
self.i = 0
def __iter__(self):
while self.i < self.text_len:
c = self.text[self.i]
if c == "+":
if self.i + 1 < self.text_len and self.text[self.i + 1].isdigit():
number = self.eat_number(self.i)
yield Token(TokenKind.NUMBER, number, self.i, self.line, self.column)
self.i += len(number)
self.column += len(number)
else:
yield Token(TokenKind.PLUS, "+", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "-":
if self.i + 1 < self.text_len and self.text[self.i + 1].isdigit():
number = self.eat_number(self.i)
yield Token(TokenKind.NUMBER, number, self.i, self.line, self.column)
self.i += len(number)
self.column += len(number)
else:
yield Token(TokenKind.MINUS, "-", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "_":
if self.i + 1 < self.text_len and self.text[self.i + 1].isalpha():
identifier = self.eat_identifier(self.i)
token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
value = Keywords(identifier) if identifier in self.KEYWORDS else identifier
yield Token(token_type, value, self.i, self.line, self.column)
self.i += len(identifier)
self.column += len(identifier)
else:
yield Token(TokenKind.UNDERSCORE, "_", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "/":
yield Token(TokenKind.SLASH, "/", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "*":
yield Token(TokenKind.STAR, "*", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "{":
yield Token(TokenKind.LBRACE, "{", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "}":
yield Token(TokenKind.RBRACE, "}", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "(":
yield Token(TokenKind.LPAR, "(", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == ")":
yield Token(TokenKind.RPAR, ")", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "[":
yield Token(TokenKind.LBRACKET, "[", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "]":
yield Token(TokenKind.RBRACKET, "]", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "=":
yield Token(TokenKind.EQUALS, "=", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == " " or c == "\t":
whitespace = self.eat_whitespace(self.i)
yield Token(TokenKind.WHITESPACE, whitespace, self.i, self.line, self.column)
self.i += len(whitespace)
self.column += len(whitespace)
elif c == ",":
yield Token(TokenKind.COMMA, ",", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == ".":
yield Token(TokenKind.DOT, ".", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == ";":
yield Token(TokenKind.SEMICOLON, ";", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == ":":
yield Token(TokenKind.COLON, ":", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "?":
yield Token(TokenKind.QMARK, "?", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "|":
yield Token(TokenKind.VBAR, "|", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "&":
yield Token(TokenKind.AMPER, "&", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "<":
yield Token(TokenKind.LESS, "<", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == ">":
yield Token(TokenKind.GREATER, ">", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "!":
yield Token(TokenKind.EMARK, "!", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "`":
yield Token(TokenKind.BACK_QUOTE, "`", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "\\":
yield Token(TokenKind.BACK_SLASH, "\\", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "^":
yield Token(TokenKind.CARAT, "^", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "$":
yield Token(TokenKind.DOLLAR, "$", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "":
yield Token(TokenKind.EURO, "", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "£":
yield Token(TokenKind.STERLING, "£", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "#":
yield Token(TokenKind.HASH, "#", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "°":
yield Token(TokenKind.DEGREE, "°", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "~":
yield Token(TokenKind.TILDE, "~", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "\n" or c == "\r":
newline = self.eat_newline(self.i)
yield Token(TokenKind.NEWLINE, newline, self.i, self.line, self.column)
self.i += len(newline)
self.column = 1
self.line += 1
elif c == "c" and self.i + 1 < self.text_len and self.text[self.i + 1] == ":":
concept_name = self.eat_concept_name(self.i + 2, self.line, self.column)
yield Token(TokenKind.CONCEPT, concept_name, self.i, self.line, self.column)
self.i += len(concept_name) + 3
self.column += len(concept_name) + 3
elif c.isalpha() or c == "_":
identifier = self.eat_identifier(self.i)
token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
value = Keywords(identifier) if identifier in self.KEYWORDS else identifier
yield Token(token_type, value, self.i, self.line, self.column)
self.i += len(identifier)
self.column += len(identifier)
elif c.isdigit():
number = self.eat_number(self.i)
yield Token(TokenKind.NUMBER, number, self.i, self.line, self.column)
self.i += len(number)
self.column += len(number)
elif c == "'" or c == '"':
string, newlines = self.eat_string(self.i, self.line, self.column)
yield Token(TokenKind.STRING, string, self.i, self.line, self.column) # quotes are kept
self.i += len(string)
self.column = 1 if newlines > 0 else self.column + len(string)
self.line += newlines
elif c == "_":
yield Token(TokenKind.UNDERSCORE, "_", self.i, self.line, self.column)
self.i += 1
self.column += 1
else:
raise LexerError(f"Unknown token '{c}'", self.text, self.i, self.line, self.column)
yield Token(TokenKind.EOF, "", self.i, self.line, self.column)
def eat_concept_name(self, start, line, column):
result = ""
i = start
end_colon_found = False
while i < self.text_len:
c = self.text[i]
if c == "\n":
raise LexerError(f"New line is forbidden in concept name", result, i, line, column + 2 + len(result))
if c == ":":
end_colon_found = True
break
result += c
i += 1
if not end_colon_found:
raise LexerError(f"Missing ending colon", result, i, line, column + 2 + len(result))
if result == "":
raise LexerError(f"Concept name not found", result, start, line, column + 2 + len(result))
return result
def eat_whitespace(self, start):
result = self.text[start]
i = start + 1
while i < self.text_len:
c = self.text[i]
if c == " " or c == "\t":
result += c
i += 1
else:
break
return result
def eat_newline(self, start):
if start + 1 == self.text_len:
return self.text[start]
current = self.text[start]
next = self.text[start + 1]
if current == "\n" and next == "\r" or current == "\r" and next == "\n":
return current + next
return current
def eat_identifier(self, start):
result = self.text[start]
i = start + 1
while i < self.text_len:
c = self.text[i]
if c.isalpha() or c == "_" or c == "-" or c.isdigit():
result += c
i += 1
else:
break
return result
def eat_number(self, start):
result = self.text[start]
i = start + 1
while i < self.text_len:
c = self.text[i]
if c.isdigit() or c == ".":
result += c
i += 1
else:
break
return result
def eat_string(self, start_index, start_line, start_column):
quote = self.text[start_index]
result = self.text[start_index]
lines_count = 0
i = start_index + 1
escape = False
newline = None
while i < self.text_len:
c = self.text[i]
result += c
i += 1
if newline:
lines_count += 1
newline = c if c == newline else None
else:
if c == "\r" or c == "\n":
newline = c
if c == "\\":
escape = True
elif c == quote and not escape:
break
else:
escape = False
# add trailing new line if needed
if newline:
lines_count += 1
if result[-1] != quote:
raise LexerError("Missing Trailing quote", result, i, start_line + lines_count,
1 if lines_count > 0 else start_column + len(result))
return result, lines_count
+238
View File
@@ -0,0 +1,238 @@
import importlib
import inspect
import pkgutil
from core.tokenizer import TokenKind
def sysarg_to_string(argv):
"""
Transform a list of strings into a single string
Add quotes if needed
:return:
"""
if argv is None or not argv:
return ""
result = ""
first = True
for s in argv:
if not first:
result += " "
result += '"' + s + '"' if " " in s else s
first = False
if result[0] in ('"', "'"):
result = result[1:-1] # strip quotes
return result
def get_class(qname):
"""
Loads a class from its full qualified name
:param qname:
:return:
"""
parts = qname.split('.')
module = ".".join(parts[:-1])
m = __import__(module)
for comp in parts[1:]:
m = getattr(m, comp)
return m
def get_module(qname):
"""
Loads a module from its full qualified name
:param qname:
:return:
"""
parts = qname.split('.')
m = __import__(qname)
for comp in parts[1:]:
m = getattr(m, comp)
return m
def new_object(kls, *args, **kwargs):
"""
New instance of an object
:param kls:
:param args:
:param kwargs:
:return:
"""
obj_type = get_class(kls)
return obj_type(*args, **kwargs)
def get_full_qualified_name(obj):
"""
Returns the full qualified name of a class (including its module name )
:param obj:
:return:
"""
if obj.__class__ == type:
module = obj.__module__
if module is None or module == str.__class__.__module__:
return obj.__name__ # Avoid reporting __builtin__
else:
return module + '.' + obj.__name__
else:
module = obj.__class__.__module__
if module is None or module == str.__class__.__module__:
return obj.__class__.__name__ # Avoid reporting __builtin__
else:
return module + '.' + obj.__class__.__name__
def get_classes(module_name):
"""
Gets all classes, for a given module_name
:param module_name: name of the module
:return:
"""
mod = get_module(module_name)
for name in dir(mod):
obj = getattr(mod, name)
if inspect.isclass(obj):
yield obj
def get_classes_from_package(package_name):
"""
Gets all classes, for a given package
:param package_name: name of the package
:return:
"""
pkg = __import__(package_name)
prefix = pkg.__name__ + "."
for importer, modname, ispkg in pkgutil.iter_modules(pkg.__path__, prefix):
for c in get_classes(modname):
yield c
def init_package_import(package_name):
pkg = __import__(package_name)
prefix = pkg.__name__ + "."
for (module_loader, name, ispkg) in pkgutil.iter_modules(pkg.__path__, prefix):
importlib.import_module(name)
def get_sub_classes(package_name, base_class):
base_class = get_class(base_class) if isinstance(base_class, str) else base_class
all_class = set(base_class.__subclasses__()).union(
[s for c in base_class.__subclasses__() for s in get_sub_classes(package_name, c)])
# limit to the classes of the package
return [c for c in all_class if c.__module__.startswith(package_name)]
def remove_from_list(lst, to_remove_predicate):
"""
Removes elements from a list if they exist
:param lst:
:param to_remove_predicate:
:return:
"""
flagged = []
for item in lst:
if to_remove_predicate(item):
flagged.append(item)
for item in flagged:
lst.remove(item)
return lst
def remove_list_from_list(lst, to_remove):
# https://stackoverflow.com/questions/2514961/remove-all-values-within-one-list-from-another-list/30353802
# explains that list comprehension is not the best approach
for item in to_remove:
try:
lst.remove(item)
except ValueError:
pass
return lst
def product(a, b):
"""
Kind of cartesian product between lists a and b
knowing that a is also a list
So it's a cartesian product between a list of list and a list
"""
if a is None or len(a) == 0:
return b
if b is None or len(b) == 0:
return a
res = []
for item_b in b:
for item_a in a:
items = item_a + [item_b]
res.append(items)
return res
def strip_quotes(text):
if not isinstance(text, str):
return text
if text == "":
return ""
if text[0] == "'" or text[0] == '"':
return text[1:-1]
return text
def strip_tokens(tokens, strip_eof=False):
"""
Remove the starting and trailing spaces and newline
"""
if tokens is None:
return None
start = 0
length = len(tokens)
while start < length and tokens[start].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
start += 1
if start == length:
return []
end_tokens = (TokenKind.WHITESPACE, TokenKind.NEWLINE, TokenKind.EOF) \
if strip_eof \
else (TokenKind.WHITESPACE, TokenKind.NEWLINE)
end = length - 1
while end > 0 and tokens[end].type in end_tokens:
end -= 1
return tokens[start: end + 1]
def escape_char(text, to_escape):
res = ""
for c in text:
res += ("\\" + c) if c in to_escape else c
return res
def pp(items):
if not hasattr(items, "__iter__"):
return str(items)
if len(items) == 0:
return str(items)
return " \n" + " \n".join(str(item) for item in items)
+131
View File
@@ -0,0 +1,131 @@
from core.ast.nodes import python_to_concept
from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts
from core.builtin_helpers import get_names
from core.concept import Concept
from evaluators.BaseEvaluator import OneReturnValueEvaluator
from parsers.BaseParser import NotInitializedNode
from parsers.ConceptLexerParser import ParsingExpression, ParsingExpressionVisitor
from parsers.DefaultParser import DefConceptNode
from parsers.PythonParser import PythonNode
class ConceptOrRuleNameVisitor(ParsingExpressionVisitor):
"""
Gets the concepts referenced by BNF
If a rule_name is given, it will also be considered as a potential property
"""
def __init__(self):
self.names = set()
def visit_ConceptExpression(self, node):
if node.rule_name:
self.names.add(node.rule_name)
elif isinstance(node.concept, Concept):
self.names.add(node.concept.name)
else:
self.names.add(node.concept)
def visit_all(self, node):
if node.rule_name:
self.names.add(node.rule_name)
class AddConceptEvaluator(OneReturnValueEvaluator):
"""
Used to add a new concept
"""
NAME = "AddNewConcept"
def __init__(self):
super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 50)
def matches(self, context, return_value):
return return_value.status and \
isinstance(return_value.value, ParserResultConcept) and \
isinstance(return_value.value.value, DefConceptNode)
def eval(self, context, return_value):
context.log(self.log, "Adding a new concept", self.name)
def_concept_node = return_value.value.value
sheerka = context.sheerka
# validate the node
props_found = set()
concept = Concept(def_concept_node.name)
for prop in ("definition", "where", "pre", "post", "body"):
# put back the sources
part_ret_val = getattr(def_concept_node, prop)
if not isinstance(part_ret_val, ReturnValueConcept) or not part_ret_val.status:
continue # Nothing to do is not initialized
# update the parts
source = self.get_source(part_ret_val)
setattr(concept.metadata, prop, source)
# try to find what can be a property
concept_name = [part.value for part in def_concept_node.name.tokens]
for p in self.get_props(sheerka, part_ret_val, concept_name):
props_found.add(p)
# add props order by appearance when possible
for token in def_concept_node.name.tokens:
if token.value in props_found:
concept.def_prop(token.value, None)
# add the remaining properties
for p in props_found:
if p not in concept.props:
concept.def_prop(p, None)
# finish initialisation
concept.init_key(def_concept_node.name.tokens)
if not isinstance(def_concept_node.definition, NotInitializedNode) and \
sheerka.is_success(def_concept_node.definition):
concept.bnf = def_concept_node.definition.value.value
ret = sheerka.create_new_concept(context, concept, self.verbose_log)
if not ret.status:
error_cause = sheerka.value(ret.body)
context.log(self.log, f"Failed to add concept '{concept.name}'. Reason: {error_cause}", self.name)
return sheerka.ret(self.name, ret.status, ret.value, parents=[return_value])
@staticmethod
def get_source(ret_value):
return ret_value.value.source
@staticmethod
def get_props(sheerka, ret_value, concept_name):
"""
Try to find out the variables
This function can only be a draft, as there may be tons of different situations
I guess that it can only be complete when will we have access to Sheerka memory
"""
#
# Case of python code
#
if isinstance(ret_value.value, ParserResultConcept) and isinstance(ret_value.value.value, PythonNode):
python_node = ret_value.value.value
as_concept_node = python_to_concept(python_node.ast_)
variables = get_names(sheerka, as_concept_node)
variables = filter(lambda x: x in concept_name, variables)
return list(variables)
#
# case of concept
#
if isinstance(ret_value.value, ParserResultConcept) and isinstance(ret_value.value.value, Concept):
return list(ret_value.value.value.props.keys())
#
# case of BNF
#
if isinstance(ret_value.value, ParserResultConcept) and isinstance(ret_value.value.value, ParsingExpression):
visitor = ConceptOrRuleNameVisitor()
visitor.visit(ret_value.value.value)
return sorted(list(visitor.names))
return []
@@ -0,0 +1,77 @@
import core.builtin_helpers
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
from evaluators.BaseEvaluator import OneReturnValueEvaluator
from parsers.DefaultParser import IsaConceptNode
ALL_STEPS = [
BuiltinConcepts.BEFORE_PARSING,
BuiltinConcepts.PARSING,
BuiltinConcepts.EVALUATION,
BuiltinConcepts.AFTER_EVALUATION
]
class AddConceptInSetEvaluator(OneReturnValueEvaluator):
"""
Tells that a concept is a part of a set
"""
NAME = "AddConceptInSet"
def __init__(self):
super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 50)
def matches(self, context, return_value):
return return_value.status and \
isinstance(return_value.value, ParserResultConcept) and \
isinstance(return_value.value.value, IsaConceptNode)
def eval(self, context, return_value):
def _resolve(name_node):
ret_val = sheerka.ret(
self.name,
True,
sheerka.new(BuiltinConcepts.USER_INPUT, body=name_node.tokens, user_name="N/A"))
with context.push(desc=f"Recognizing '{name_node}'") as sub_context:
r = sheerka.execute(sub_context, ret_val, ALL_STEPS, self.verbose_log)
one_r = core.builtin_helpers.expect_one(context, r)
sub_context.add_values(return_values=one_r)
return one_r
isa_node = return_value.value.value
sheerka = context.sheerka
context.log(self.log, f"Adding a concept {isa_node.concept} to set {isa_node.set}", self.name)
# Try to recognize the concept
res = _resolve(isa_node.concept)
if not res.status:
return sheerka.ret(
self.name,
False,
sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=str(isa_node.concept)),
parents=[return_value])
concept = res.value
res = _resolve(isa_node.set)
if not res.status:
return sheerka.ret(
self.name,
False,
sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=str(isa_node.set)),
parents=[return_value])
concept_set = res.value
res = sheerka.add_concept_to_set(context, concept, concept_set, self.verbose_log)
if not res.status:
context.log(self.log, f"Failed. Reason: {sheerka.value(res.body)}.", self.name)
else:
context.log(self.log, f"Concept added.", self.name)
return sheerka.ret(
self.name,
res.status,
res.body,
parents=[return_value])
+52
View File
@@ -0,0 +1,52 @@
from core.sheerka.Sheerka import ExecutionContext
from core.sheerka_logger import get_logger
class BaseEvaluator:
"""
Base class to evaluate ReturnValues
"""
PREFIX = "evaluators."
def __init__(self, name, steps, priority: int, enabled=True):
self.log = get_logger(self.PREFIX + self.__class__.__name__)
self.init_log = get_logger("init." + self.PREFIX + self.__class__.__name__)
self.verbose_log = get_logger("verbose." + self.PREFIX + self.__class__.__name__)
self.name = self.PREFIX + name
self.steps = steps
self.priority = priority
self.enabled = enabled
def __repr__(self):
return f"{self.name} ({self.priority})"
class OneReturnValueEvaluator(BaseEvaluator):
"""
Evaluate one specific return value
"""
def matches(self, context: ExecutionContext, return_value):
pass
def eval(self, context: ExecutionContext, return_value):
pass
class AllReturnValuesEvaluator(BaseEvaluator):
"""
Evaluates the groups of ReturnValues
"""
def __init__(self, name, steps, priority: int, enabled=True):
super().__init__(name, steps, priority, enabled)
self.eaten = []
def matches(self, context: ExecutionContext, return_values):
pass
def eval(self, context: ExecutionContext, return_values):
pass
+53
View File
@@ -0,0 +1,53 @@
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
from core.concept import Concept, ConceptParts
from evaluators.BaseEvaluator import OneReturnValueEvaluator
class ConceptEvaluator(OneReturnValueEvaluator):
"""
The concept evaluatuor is the main class that know what to do with a concept
It verifies the PRE
If ok, can execute or not the BODY
Then checks the POST conditions
"""
NAME = "Concept"
def __init__(self, return_body=False):
super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 50)
self.return_body = return_body
def matches(self, context, return_value):
return return_value.status and \
isinstance(return_value.value, ParserResultConcept) and \
isinstance(return_value.value.value, Concept)
def eval(self, context, return_value):
sheerka = context.sheerka
concept = return_value.value.value
context.log(self.verbose_log, f"Evaluating concept {concept}.", self.name)
# If the concept that is requested is in the context(at least its name), drop the call.
# Why ?
# If we evaluate Concept("foo", body="a").set_prop("a", "'property_a'")
# The body should be 'property_a', and not a concept called 'a'
if context.obj and concept.name in context.obj.props:
value = context.obj.props[concept.name].value
context.log(self.verbose_log, f"{concept.name} is a property. Returning value '{value}'.", self.name)
return sheerka.ret(self.name, True, value, parents=[return_value])
evaluated = sheerka.evaluate_concept(context, concept, self.verbose_log)
if evaluated.key != concept.key:
# evaluated.key != concept.key means that we have transformed the concept
# When you successfully evaluate an error, the status should not be false
return sheerka.ret(
self.name,
False,
evaluated,
parents=[return_value])
if not self.return_body or ConceptParts.BODY not in evaluated.compiled:
return sheerka.ret(self.name, True, evaluated, parents=[return_value])
else:
return sheerka.ret(self.name, True, evaluated.body, parents=[return_value])
+43
View File
@@ -0,0 +1,43 @@
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from evaluators.BaseEvaluator import AllReturnValuesEvaluator
class EvalEvaluator(AllReturnValuesEvaluator):
"""
Returns the body of all successful concepts
"""
NAME = "Eval"
def __init__(self):
super().__init__(self.NAME, [BuiltinConcepts.AFTER_EVALUATION], 80)
self.eval_requested = None
def matches(self, context, return_values):
sheerka = context.sheerka
for ret in return_values:
if ret.status and sheerka.isinstance(ret.body, BuiltinConcepts.CONCEPT_EVAL_REQUESTED):
self.eval_requested = ret
return True
return False
def eval(self, context, return_values):
sheerka = context.sheerka
result = []
for ret_val in return_values:
if ret_val.status and isinstance(ret_val.body, Concept) and ret_val.body.body:
context.log(self.verbose_log, f"Evaluating {ret_val}", who=self)
result.append(sheerka.ret(self.name, True, ret_val.body.body, parents=[ret_val, self.eval_requested]))
if len(result) > 0:
return result
else:
# suppress the successful BuiltinConcepts.CONCEPT_EVAL_REQUESTED
return sheerka.ret(
self.name,
False,
sheerka.new(BuiltinConcepts.CONCEPT_EVAL_REQUESTED),
parents=[self.eval_requested])
+102
View File
@@ -0,0 +1,102 @@
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
from evaluators.BaseEvaluator import OneReturnValueEvaluator
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode
from parsers.PythonParser import LexerNodeParserHelperForPython, PythonNode
class LexerNodeEvaluator(OneReturnValueEvaluator):
"""
After a BNF is recognized, generates the concept or the list concepts
"""
NAME = "LexerNode"
def __init__(self):
super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 60)
self.identifiers = {} # cache for already created identifier (the key is id(concept))
self.identifiers_key = {} # number of identifiers with the same root (prefix)
def matches(self, context, return_value):
if not return_value.status:
return False
if not isinstance(return_value.value, ParserResultConcept):
return False
value = return_value.value.value
if isinstance(value, (ConceptNode, SourceCodeNode)):
return True
if hasattr(value, "__iter__"):
for node in value:
if not isinstance(node, (ConceptNode, SourceCodeNode)):
return False
return True
return False
def eval(self, context, return_value):
"""
From a concept node, creates a new concept
and makes sure that the properties are correctly set
"""
nodes = return_value.value.value
if not hasattr(nodes, "__iter__"):
nodes = [nodes]
context.log(self.verbose_log, f"{nodes=}", self.name)
for node in nodes:
if isinstance(node, SourceCodeNode):
ret = self.evaluate_python_code(context, nodes)
break
else:
ret = self.evaluate_concepts_only(context, nodes)
ret.parents = [return_value]
return ret
def evaluate_concepts_only(self, context, nodes):
concepts = []
source = ""
sheerka = context.sheerka
for node in nodes:
if isinstance(node, ConceptNode):
source += node.source if source == "" else (" " + node.source)
concepts.append(node.concept)
if len(concepts) == 1:
return sheerka.ret(
self.name,
True,
context.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=source,
body=concepts[0],
try_parsed=None))
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=nodes))
def evaluate_python_code(self, context, nodes):
sheerka = context.sheerka
helper = LexerNodeParserHelperForPython()
result = helper.parse(context, nodes)
if isinstance(result, PythonNode):
return sheerka.ret(
self.name,
True,
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=result.source,
body=result,
try_parsed=None))
else:
return sheerka.ret(
self.name,
False,
result.body)
@@ -0,0 +1,83 @@
from core.builtin_concepts import BuiltinConcepts
import core.builtin_helpers
from core.concept import Concept
from evaluators.BaseEvaluator import AllReturnValuesEvaluator, BaseEvaluator
from evaluators.ConceptEvaluator import ConceptEvaluator
from evaluators.PythonEvaluator import PythonEvaluator
from parsers.BaseParser import BaseParser
class MultipleSameSuccessEvaluator(AllReturnValuesEvaluator):
"""
Used to filter the responses
It has a low priority to let other evaluators try to resolve the errors
It reduces the responses when several evaluators give the same answer
"""
NAME = "MultipleSameSuccess"
def __init__(self):
super().__init__(self.NAME, [BuiltinConcepts.AFTER_EVALUATION], 50)
self.success = []
def matches(self, context, return_values):
nb_successful_evaluators = 0
only_parsers_in_error = True
to_process = False
for ret in return_values:
if ret.status and context.sheerka.isinstance(ret.body, BuiltinConcepts.REDUCE_REQUESTED):
to_process = True
self.eaten.append(ret)
elif ret.who.startswith(BaseEvaluator.PREFIX):
if ret.status:
nb_successful_evaluators += 1
self.success.append(ret)
self.eaten.append(ret)
elif ret.who.startswith(BaseParser.PREFIX):
self.eaten.append(ret)
if ret.status:
only_parsers_in_error = False
return to_process and nb_successful_evaluators > 1 and only_parsers_in_error
def eval(self, context, return_values):
sheerka = context.sheerka
context.log(self.verbose_log, f"{len(self.success)} successful return value(s)", who=self)
for s in self.success:
context.log(self.verbose_log, f"{s}", who=self)
if not core.builtin_helpers.is_same_success(sheerka, self.success):
return None
# ######################################
# !!!!! W A R N I N G !!!!!!!!
# I have a massive issue with how I implement this feature
# I have forced an arbitrary order between Concept evaluator and Python evaluator
# I gave a random order to the other
#
# I guess that we need a proper algorithm to elect which return value to use if they have the same result
# I guts feeling is that, it will depend on the intent of the user
# So it depends on the context
# try to return a concept if possible
# give the priority to the ConceptEvaluator
for s in self.success:
if isinstance(s.value, Concept) and s.who == ConceptEvaluator().name:
return sheerka.ret(self.name, True, s.value, parents=self.eaten)
# Then the PythonEvaluator
for s in self.success:
if isinstance(s.value, Concept) and s.who == PythonEvaluator().name:
return sheerka.ret(self.name, True, s.value, parents=self.eaten)
# Then the first concept.
# It's not predictable, so I guess that it's not a good implementation choice
for s in self.success:
if isinstance(s.value, Concept):
return sheerka.ret(self.name, True, s.value, parents=self.eaten)
return sheerka.ret(self.name, True, self.success[0].value, parents=self.eaten)
+42
View File
@@ -0,0 +1,42 @@
from core.builtin_concepts import BuiltinConcepts
from evaluators.BaseEvaluator import AllReturnValuesEvaluator
from parsers.BaseParser import BaseParser
class OneErrorEvaluator(AllReturnValuesEvaluator):
"""
Use to reduce when there is only one evaluator in error
The rest of the return values must be parsers in error
"""
NAME = "OneError"
def __init__(self):
super().__init__(self.NAME, [BuiltinConcepts.AFTER_EVALUATION], 30)
self.return_value_in_error = None
def matches(self, context, return_values):
nb_evaluators_in_error = 0
to_process = False
for ret in return_values:
if ret.status and (ret.who.startswith(self.PREFIX) or ret.who.startswith(BaseParser.PREFIX)):
return False
elif ret.status and context.sheerka.isinstance(ret.body, BuiltinConcepts.REDUCE_REQUESTED):
to_process = True
self.eaten.append(ret)
elif not ret.status and ret.who.startswith(self.PREFIX):
nb_evaluators_in_error += 1
self.return_value_in_error = ret
self.eaten.append(ret)
elif not ret.status and ret.who.startswith(BaseParser.PREFIX):
self.eaten.append(ret)
return to_process and nb_evaluators_in_error == 1
def eval(self, context, return_values):
context.log(self.verbose_log, f"1 return value in error, {len(self.eaten)} item(s) eaten", who=self)
context.log(self.verbose_log, f"{self.return_value_in_error}", who=self)
sheerka = context.sheerka
return sheerka.ret(self.name, False, self.return_value_in_error.value, parents=self.eaten)
+44
View File
@@ -0,0 +1,44 @@
from core.builtin_concepts import BuiltinConcepts
from evaluators.BaseEvaluator import AllReturnValuesEvaluator
from parsers.BaseParser import BaseParser
class OneSuccessEvaluator(AllReturnValuesEvaluator):
"""
Used to filter the responses
It has a low priority to let other evaluators try to resolve the errors
Make sure that there is only one successful answer
"""
NAME = "OneSuccess"
def __init__(self):
super().__init__(self.NAME, [BuiltinConcepts.AFTER_EVALUATION], 60) # before MultipleSameSuccess
self.successful_return_value = None
def matches(self, context, return_values):
nb_successful_evaluators = 0
to_process = False
for ret in return_values:
if ret.status and ret.who.startswith(BaseParser.PREFIX):
return False
elif ret.status and context.sheerka.isinstance(ret.body, BuiltinConcepts.REDUCE_REQUESTED):
to_process = True
self.eaten.append(ret)
elif ret.status and ret.who.startswith(self.PREFIX):
nb_successful_evaluators += 1
self.successful_return_value = ret
self.eaten.append(ret)
elif not ret.status:
self.eaten.append(ret)
return to_process and nb_successful_evaluators == 1
def eval(self, context, return_values):
context.log(self.verbose_log, f"1 successful return value, {len(self.eaten)} item(s) eaten", who=self)
context.log(self.verbose_log, f"{self.successful_return_value}", who=self)
sheerka = context.sheerka
return sheerka.ret(self.name, True, self.successful_return_value.value, parents=self.eaten)
+40
View File
@@ -0,0 +1,40 @@
from core.builtin_concepts import BuiltinConcepts
from evaluators.BaseEvaluator import OneReturnValueEvaluator
class PrepareEvalEvaluator(OneReturnValueEvaluator):
"""
To parse evaluation requests
"""
NAME = "PrepareEval"
def __init__(self, **kwargs):
super().__init__(self.NAME, [BuiltinConcepts.BEFORE_PARSING], 90)
self.text = None
def matches(self, context, return_value):
if not (return_value.status and
context.sheerka.isinstance(return_value.body, BuiltinConcepts.USER_INPUT) and
isinstance(return_value.body.body, str)):
return False
text = return_value.body.body.strip()
if not text.startswith("eval "):
return False
self.text = text
return True
def eval(self, context, return_value):
sheerka = context.sheerka
new_text_to_parse = sheerka.ret(
self.name,
True, sheerka.new(BuiltinConcepts.USER_INPUT, body=self.text[5:], user_name=context.event.user))
evaluation_requested = sheerka.ret(
self.name,
True, sheerka.new(BuiltinConcepts.CONCEPT_EVAL_REQUESTED))
return [new_text_to_parse, evaluation_requested]
+189
View File
@@ -0,0 +1,189 @@
import copy
from enum import Enum
from core.ast.visitors import UnreferencedNamesVisitor
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
from core.concept import ConceptParts, Concept
from evaluators.BaseEvaluator import OneReturnValueEvaluator
from parsers.PythonParser import PythonNode
import ast
import core.ast.nodes
class PythonEvaluator(OneReturnValueEvaluator):
NAME = "Python"
"""
Evaluate a Python node, ie, evaluate some Python code
"""
def __init__(self):
super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 50)
self.locals = {}
def matches(self, context, return_value):
return return_value.status and \
isinstance(return_value.value, ParserResultConcept) and \
isinstance(return_value.value.value, PythonNode)
def eval(self, context, return_value):
sheerka = context.sheerka
node = return_value.value.value
try:
context.log(self.verbose_log, f"Evaluating python node {node}.", self.name)
# Do not evaluate if the ast refers to a concept (leave it to ConceptEvaluator)
if isinstance(node.ast_, ast.Expression) and isinstance(node.ast_.body, ast.Name):
c = context.sheerka.get(node.ast_.body.id)
if not context.sheerka.isinstance(c, BuiltinConcepts.UNKNOWN_CONCEPT):
context.log(self.verbose_log, "It's a simple concept. Not for me.", self.name)
not_for_me = context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=node)
return sheerka.ret(self.name, False, not_for_me, parents=[return_value])
my_locals = self.get_locals(context, node)
context.log(self.verbose_log, f"locals={my_locals}", self.name)
if isinstance(node.ast_, ast.Expression):
context.log(self.verbose_log, "Evaluating using 'eval'.", self.name)
compiled = compile(node.ast_, "<string>", "eval")
evaluated = eval(compiled, {}, my_locals)
else:
context.log(self.verbose_log, "Evaluating using 'exec'.", self.name)
evaluated = self.exec_with_return(node.ast_, my_locals)
context.log(self.verbose_log, f"{evaluated=}", self.name)
return sheerka.ret(self.name, True, evaluated, parents=[return_value])
except Exception as error:
context.log_error(self.verbose_log, error, self.name)
error = sheerka.new(BuiltinConcepts.ERROR, body=error)
return sheerka.ret(self.name, False, error, parents=[return_value])
def get_locals(self, context, node):
my_locals = {
"sheerka": context.sheerka,
"desc": context.sheerka.dump_handler.dump_desc,
"concepts": context.sheerka.dump_handler.dump_concepts,
"definitions": context.sheerka.dump_handler.dump_definitions,
}
if context.obj:
context.log(self.verbose_log,
f"Concept '{context.obj}' is in context. Adding its properties to locals if any.", self.name)
for prop_name, prop_value in context.obj.props.items():
if not isinstance(prop_value.value, Concept):
my_locals[prop_name] = prop_value.value
else:
my_locals[prop_name] = context.sheerka.value(prop_value.value)
node_concept = core.ast.nodes.python_to_concept(node.ast_)
unreferenced_names_visitor = UnreferencedNamesVisitor(context.sheerka)
unreferenced_names_visitor.visit(node_concept)
for name in unreferenced_names_visitor.names:
context.log(self.verbose_log, f"Resolving '{name}'.", self.name)
if name in node.concepts:
context.log(self.verbose_log, f"Using value from node.", self.name)
concept = node.concepts[name]
return_concept = False
else:
concept_key, concept_id, return_concept = self.resolve_name(context, name)
if concept_key in my_locals:
context.log(self.verbose_log, f"Using value from property.", self.name)
continue
context.log(self.verbose_log, f"Instantiating new concept.", self.name)
concept = context.sheerka.new((concept_key, concept_id))
if context.sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
context.log(self.verbose_log, f"'{concept_key}' is not a concept. Skipping.", self.name)
continue
context.log(self.verbose_log, f"Evaluating '{concept}'", self.name)
with context.push(self.name, desc=f"Evaluating '{concept}'", obj=concept) as sub_context:
sub_context.log_new(self.verbose_log)
evaluated = context.sheerka.evaluate_concept(sub_context, concept, self.verbose_log)
sub_context.add_values(return_values=evaluated)
if evaluated.key == concept.key:
my_locals[name] = evaluated if return_concept else context.sheerka.value(evaluated)
if self.locals:
my_locals.update(self.locals)
return my_locals
def resolve_name(self, context, to_resolve):
"""
Try to match
__C__concept_key__C__
or
__C__concept_key__concept_id__C__
:param context:
:param to_resolve:
:return:
"""
if not to_resolve.startswith("__C__"):
return to_resolve, None, False
context.log(self.verbose_log, f"Resolving name '{to_resolve}'.", self.name)
if len(to_resolve) >= 18 and to_resolve[:18] == "__C__USE_CONCEPT__":
use_concept = True
index = 18
else:
use_concept = False
index = 5
try:
next_index = to_resolve.index("__", index)
if next_index == index:
context.log(self.verbose_log, f"Error: no key between '__'.", self.name)
return None
concept_key = to_resolve[index: next_index]
except ValueError:
context.log(self.verbose_log, f"Error: Missing trailing '__'.", self.name)
return None
if next_index == len(to_resolve) - 5:
context.log(self.verbose_log, f"Recognized concept '{concept_key}'", self.name)
return concept_key, None, use_concept
index = next_index + 2
try:
next_index = to_resolve.index("__", index)
if next_index == index:
context.log(self.verbose_log, f"Error: no id between '__'.", self.name)
return None
concept_id = to_resolve[index: next_index]
except ValueError:
context.log(self.verbose_log, f"Recognized concept '{concept_key}'.", self.name)
return concept_key, None, use_concept
context.log(self.verbose_log, f"Recognized concept '{concept_key}' (id='{concept_id}').", self.name)
return concept_key, concept_id, use_concept
@staticmethod
def expr_to_expression(expr):
expr.lineno = 0
expr.col_offset = 0
result = ast.Expression(expr.value, lineno=0, col_offset=0)
return result
def exec_with_return(self, code_ast, my_locals):
init_ast = copy.deepcopy(code_ast)
init_ast.body = code_ast.body[:-1]
last_ast = copy.deepcopy(code_ast)
last_ast.body = code_ast.body[-1:]
exec(compile(init_ast, "<ast>", "exec"), {}, my_locals)
if type(last_ast.body[0]) == ast.Expr:
return eval(compile(self.expr_to_expression(last_ast.body[0]), "<ast>", "eval"), {}, my_locals)
else:
exec(compile(last_ast, "<ast>", "exec"), {}, my_locals)
+54
View File
@@ -0,0 +1,54 @@
import logging
from core.builtin_concepts import BuiltinConcepts
import core.builtin_helpers
from evaluators.BaseEvaluator import AllReturnValuesEvaluator, BaseEvaluator
from parsers.BaseParser import BaseParser
class TooManySuccessEvaluator(AllReturnValuesEvaluator):
"""
Used to filter the responses
It has a low priority to let other evaluators try to resolve the errors
Raises an error when that are several successful answers, with different values
"""
NAME = "TooManySuccess"
def __init__(self):
super().__init__(self.NAME, [BuiltinConcepts.AFTER_EVALUATION], 60)
self.success = []
def matches(self, context, return_values):
to_process = False
for ret in return_values:
if ret.status and ret.who.startswith(BaseParser.PREFIX):
return False
elif ret.status and context.sheerka.isinstance(ret.body, BuiltinConcepts.REDUCE_REQUESTED):
to_process = True
self.eaten.append(ret)
elif ret.status and ret.who.startswith(self.PREFIX):
self.success.append(ret)
self.eaten.append(ret)
elif not ret.status:
self.eaten.append(ret)
return to_process and len(self.success) > 1
def eval(self, context, return_values):
sheerka = context.sheerka
if self.verbose_log.isEnabledFor(logging.DEBUG):
for s in self.success:
context.log(self.verbose_log, s, self.name)
context.log(self.verbose_log, f"value={sheerka.value(s.value)}", self.name)
if not core.builtin_helpers.is_same_success(sheerka, self.success):
context.log(self.verbose_log,
f"Values are different. Raising {BuiltinConcepts.TOO_MANY_SUCCESS}.", self.name)
too_many_success = sheerka.new(BuiltinConcepts.TOO_MANY_SUCCESS, body=self.success)
return sheerka.ret(self.name, False, too_many_success, parents=self.eaten)
context.log(self.verbose_log, f"Values are the same. Nothing to do.", self.name)
return None
View File
+120
View File
@@ -0,0 +1,120 @@
from dataclasses import dataclass
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from core.tokenizer import TokenKind, Keywords
from core.sheerka_logger import get_logger
import logging
@dataclass()
class Node:
pass
@dataclass()
class NopNode(Node):
pass
def __repr__(self):
return "nop"
class NotInitializedNode(Node):
pass
def __repr__(self):
return "**N/A**"
@dataclass()
class ErrorNode(Node):
pass
@dataclass()
class UnexpectedTokenErrorNode(ErrorNode):
message: str
expected_tokens: list
class BaseParser:
PREFIX = "parsers."
def __init__(self, name, priority: int, enabled=True):
self.log = get_logger("parsers." + self.__class__.__name__)
self.init_log = get_logger("init." + self.PREFIX + self.__class__.__name__)
self.verbose_log = get_logger("verbose." + self.PREFIX + self.__class__.__name__)
self.name = self.PREFIX + name
self.priority = priority
self.enabled = enabled
self.has_error = False
self.error_sink = []
def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
return self.name == other.name
def __hash__(self):
return hash(self.name)
def __repr__(self):
return self.name
def parse(self, context, text):
pass
def log_result(self, context, source, ret):
if not self.log.isEnabledFor(logging.DEBUG):
return
if ret.status:
value = context.return_value_to_str(ret)
context.log(self.log, f"Recognized '{source}' as {value}", self.name)
else:
context.log(self.log, f"Failed to recognize '{source}'", self.name)
def log_multiple_results(self, context, source, list_of_ret):
if not self.log.isEnabledFor(logging.DEBUG):
return
context.log(self.log, f"Recognized '{source}' as multiple concepts", self.name)
for r in list_of_ret:
value = context.return_value_to_str(r)
context.log(self.log, f" Recognized '{value}'", self.name)
def get_return_value_body(self, sheerka, source, tree, try_parse):
if len(self.error_sink) == 1 and isinstance(self.error_sink[0], Concept):
return self.error_sink[0]
return sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=source,
body=self.error_sink if self.has_error else tree,
try_parsed=try_parse)
@staticmethod
def get_text_from_tokens(tokens, custom_switcher=None):
if tokens is None:
return ""
res = ""
if not hasattr(tokens, "__iter__"):
tokens = [tokens]
switcher = {
TokenKind.KEYWORD: lambda t: Keywords(t.value).value,
TokenKind.CONCEPT: lambda t: "c:" + t.value + ":",
}
if custom_switcher:
switcher.update(custom_switcher)
for token in tokens:
value = switcher.get(token.type, lambda t: t.value)(token)
res += value
return res
+270
View File
@@ -0,0 +1,270 @@
from dataclasses import dataclass
import core.utils
from core.builtin_concepts import BuiltinConcepts
from core.sheerka.Sheerka import ExecutionContext
from core.tokenizer import Tokenizer, Token, TokenKind, LexerError
from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode
from parsers.ConceptLexerParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, StrMatch
@dataclass()
class UnexpectedEndOfFileError(ErrorNode):
pass
class BnfParser(BaseParser):
"""
Parser used to transform litteral into ParsingExpression
example :
a | b, c -> Sequence(OrderedChoice(a, b) ,c)
'|' (pipe) is used for OrderedChoice
',' (comma) is used for Sequence
'?' (question mark) is used for Optional
'*' (star) is used for ZeroOrMore
'+' (plus) is used for OneOrMore
"""
def __init__(self, **kwargs):
super().__init__("Bnf", 50, False)
# self.has_error = False
# self.error_sink = []
# self.name = BaseParser.PREFIX + "Bnf"
self.lexer_iter = None
self._current = None
self.after_current = None
self.nb_open_par = 0
self.context = None
self.source = ""
self.sheerka = None
def __eq__(self, other):
if not isinstance(other, BnfParser):
return False
return True
def reset_parser(self, context, text):
self.context = context
self.sheerka = context.sheerka
self.lexer_iter = iter(Tokenizer(text.strip())) if isinstance(text, str) else iter(text)
self._current = None
self.after_current = None
self.nb_open_par = 0
self.next_token()
self.eat_white_space()
def add_error(self, error, next_token=True):
self.has_error = True
self.error_sink.append(error)
if next_token:
self.next_token()
return error
def get_token(self) -> Token:
return self._current
def next_token(self, skip_whitespace=False):
if self._current and self._current.type == TokenKind.EOF:
return
try:
self._current = self.after_current or next(self.lexer_iter)
self.source += str(self._current.value)
self.after_current = None
if skip_whitespace:
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
self._current = next(self.lexer_iter)
self.source += str(self._current.value)
except StopIteration:
self._current = Token(TokenKind.EOF, "", -1, -1, -1)
def next_after(self):
if self.after_current is not None:
return self.after_current
try:
self.after_current = next(self.lexer_iter)
# self.source += str(self.after_current.value)
return self.after_current
except StopIteration:
self.after_current = Token(TokenKind.EOF, "", -1, -1, -1)
return self.after_current
def eat_white_space(self):
if self.after_current is not None:
self._current = self.after_current
self.source += str(self._current.value)
self.after_current = None
try:
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
self._current = next(self.lexer_iter)
self.source += str(self._current.value)
except StopIteration:
self._current = None
def maybe_sequence(self, first, second):
token = self.get_token()
return token.type == second or token.type == first and self.next_after().type == second
def parse(self, context: ExecutionContext, text):
tree = None
try:
self.reset_parser(context, text)
tree = self.parser_outer_rule_name()
token = self.get_token()
if token and token.type != TokenKind.EOF:
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", []))
except LexerError as e:
self.add_error(e, False)
value = self.get_return_value_body(context.sheerka, self.source, tree, tree)
ret = self.sheerka.ret(
self.name,
not self.has_error,
value)
return ret
def parser_outer_rule_name(self):
return self.parser_rule_name(self.parse_choice)
def parse_choice(self):
sequence = self.parse_sequence()
self.eat_white_space()
token = self.get_token()
if token is None or token.type != TokenKind.VBAR:
return sequence
elements = [sequence]
while True:
# maybe eat the vertical bar
self.eat_white_space()
token = self.get_token()
if token is None or token.type != TokenKind.VBAR:
break
self.next_token(skip_whitespace=True)
sequence = self.parse_sequence()
elements.append(sequence)
return OrderedChoice(*elements)
def parse_sequence(self):
expr_and_modifier = self.parse_modifier()
token = self.get_token()
if token is None or \
token.type == TokenKind.EOF or \
token.type == TokenKind.EQUALS or \
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
return expr_and_modifier
elements = [expr_and_modifier]
while True:
token = self.get_token()
if token is None or \
token.type == TokenKind.EOF or \
token.type == TokenKind.EQUALS or \
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
break
self.eat_white_space()
sequence = self.parse_modifier()
elements.append(sequence)
return Sequence(*elements)
def parse_modifier(self):
expression = self.parser_inner_rule_name()
token = self.get_token()
if token.type == TokenKind.QMARK:
self.next_token()
return Optional(expression)
if token.type == TokenKind.STAR:
self.next_token()
return ZeroOrMore(expression)
if token.type == TokenKind.PLUS:
self.next_token()
return OneOrMore(expression)
return expression
def parser_inner_rule_name(self):
return self.parser_rule_name(self.parse_expression)
def parse_expression(self):
token = self.get_token()
if token.type == TokenKind.EOF:
self.add_error(UnexpectedEndOfFileError(), False)
if token.type == TokenKind.LPAR:
self.nb_open_par += 1
self.next_token()
expression = self.parse_choice()
token = self.get_token()
if token.type == TokenKind.RPAR:
self.nb_open_par -= 1
self.next_token()
return expression
else:
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", [TokenKind.RPAR]))
return expression
if token.type == TokenKind.IDENTIFIER:
self.next_token()
concept_name = str(token.value)
# we are trying to match against a concept which is still under construction !
# (for example of recursive bnf definition)
if self.context.obj and hasattr(self.context.obj, "name"):
if concept_name == str(self.context.obj.name):
return ConceptExpression(concept_name)
concept = self.context.get_concept(concept_name)
if not self.sheerka.is_known(concept):
self.add_error(concept)
return None
elif hasattr(concept, "__iter__"):
self.add_error(
self.sheerka.new(BuiltinConcepts.CANNOT_RESOLVE_CONCEPT,
body=("key", concept_name)))
return None
else:
return concept
ret = StrMatch(core.utils.strip_quotes(token.value))
self.next_token()
return ret
def parser_rule_name(self, next_to_parse):
expression = next_to_parse()
token = self.get_token()
if token is None or token.type != TokenKind.EQUALS:
return expression
self.next_token() # eat equals
token = self.get_token()
if token is None or token.type != TokenKind.IDENTIFIER:
return self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", [TokenKind.IDENTIFIER]))
expression.rule_name = token.value
self.next_token()
return expression
+994
View File
@@ -0,0 +1,994 @@
#####################################################################################################
# This implementation of the parser is highly inspired by the arpeggio project (https://github.com/textX/Arpeggio)
# I don't directly use the project, but it helped me figure out
# what to do.
# Dejanović I., Milosavljević G., Vaderna R.:
# Arpeggio: A flexible PEG parser for Python,
# Knowledge-Based Systems, 2016, 95, 71 - 74, doi:10.1016/j.knosys.2015.12.004
#####################################################################################################
from collections import namedtuple
from dataclasses import dataclass
from collections import defaultdict
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, ConceptParts, DoNotResolve
from core.tokenizer import TokenKind, Tokenizer, Token
from parsers.BaseParser import BaseParser, Node, ErrorNode
import core.utils
@dataclass()
class LexerNode(Node):
start: int # starting index in the tokens list
end: int # ending index in the tokens list
tokens: list = None # tokens
source: str = None # string representation of what was parsed
def __post_init__(self):
if self.source is None:
self.source = BaseParser.get_text_from_tokens(self.tokens)
def __eq__(self, other):
if not isinstance(other, LexerNode):
return False
return self.start == other.start and \
self.end == other.end and \
self.source == other.source and \
self.tokens == other.tokens
class UnrecognizedTokensNode(LexerNode):
def __init__(self, start, end, tokens):
super().__init__(start, end, tokens)
def add_token(self, token, pos):
self.tokens.append(token)
self.end = pos
def fix_source(self):
self.source = BaseParser.get_text_from_tokens(self.tokens)
def not_whitespace(self):
return not (len(self.tokens) == 1 and self.tokens[0].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE))
def __eq__(self, other):
if isinstance(other, utnode):
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
if not isinstance(other, UnrecognizedTokensNode):
return False
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.start, self.end, self.source))
def __repr__(self):
return f"UnrecognizedTokensNode(start={self.start}, end={self.end}, source='{self.source}')"
class ConceptNode(LexerNode):
"""
Returned by the ConceptLexerParser
It represents a recognized concept
"""
def __init__(self, concept, start, end, tokens=None, source=None, underlying=None):
super().__init__(start, end, tokens, source)
self.concept = concept
self.underlying = underlying
if self.source is None:
self.source = BaseParser.get_text_from_tokens(self.tokens)
def __eq__(self, other):
if isinstance(other, cnode):
return self.concept.key == other.concept_key and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source
if isinstance(other, short_cnode):
return self.concept.key == other.concept_key and self.source == other.source
if not isinstance(other, ConceptNode):
return False
return self.concept == other.concept and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source and \
self.underlying == other.underlying
def __hash__(self):
return hash((self.concept, self.start, self.end, self.source, self.underlying))
def __repr__(self):
return f"ConceptNode(concept='{self.concept}', start={self.start}, end={self.end}, source='{self.source}')"
class SourceCodeNode(LexerNode):
"""
Returned when some source code (like Python source code is recognized)
"""
def __init__(self, node, start, end, tokens=None, source=None):
super().__init__(start, end, tokens, source)
self.node = node # The PythonNode (or whatever language node) that is found
def __eq__(self, other):
if isinstance(other, scnode):
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
if not isinstance(other, SourceCodeNode):
return False
return self.node == other.node and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.start, self.end, self.source))
def __repr__(self):
return f"SourceCodeNode(start={self.start}, end={self.end}, source='{self.source}')"
cnode = namedtuple("ConceptNode", "concept_key start end source")
short_cnode = namedtuple("ConceptNode", "concept_key source")
utnode = namedtuple("UnrecognizedTokensNode", "start end source")
scnode = namedtuple("SourceCodeNode", "start end source")
class NonTerminalNode(LexerNode):
"""
Returned by the ConceptLexerParser
"""
def __init__(self, parsing_expression, start, end, tokens, children=None):
super().__init__(start, end, tokens)
self.parsing_expression = parsing_expression
self.children = children
def __repr__(self):
name = self.parsing_expression.rule_name or self.parsing_expression.__class__.__name__
if len(self.children) > 0:
sub_names = "(" + ",".join([repr(child) for child in self.children]) + ")"
else:
sub_names = ""
return name + sub_names
def __eq__(self, other):
if not isinstance(other, NonTerminalNode):
return False
return self.parsing_expression == other.parsing_expression and \
self.start == other.start and \
self.end == other.end and \
self.children == other.children
def __hash__(self):
return hash((self.parsing_expression, self.start, self.end, self.children))
class TerminalNode(LexerNode):
"""
Returned by the ConceptLexerParser
"""
def __init__(self, parsing_expression, start, end, value):
super().__init__(start, end, source=value)
self.parsing_expression = parsing_expression
self.value = value
def __repr__(self):
name = self.parsing_expression.rule_name or ""
return name + f"'{self.value}'"
def __eq__(self, other):
if not isinstance(other, TerminalNode):
return False
return self.parsing_expression == other.parsing_expression and \
self.start == other.start and \
self.end == other.end and \
self.value == other.value
def __hash__(self):
return hash((self.parsing_expression, self.start, self.end, self.value))
@dataclass()
class GrammarErrorNode(ErrorNode):
message: str
@dataclass()
class UnknownConceptNode(ErrorNode):
concept_key: str
@dataclass()
class TooManyConceptNode(ErrorNode):
concept_key: str
class ParsingExpression:
def __init__(self, *args, **kwargs):
self.elements = args
nodes = kwargs.get('nodes', [])
if not hasattr(nodes, '__iter__'):
nodes = [nodes]
self.nodes = nodes
self.rule_name = kwargs.get('rule_name', '')
def __eq__(self, other):
if not isinstance(other, ParsingExpression):
return False
return self.rule_name == other.rule_name and self.elements == other.elements
def __hash__(self):
return hash((self.rule_name, self.elements))
def parse(self, parser):
return self._parse(parser)
class ConceptExpression(ParsingExpression):
"""
Will match a concept
It used only for rule definition
When the grammar is created, it is replaced by the actual concept
"""
def __init__(self, concept, rule_name=""):
super().__init__(rule_name=rule_name)
self.concept = concept
def __repr__(self):
return f"{self.concept}"
def __eq__(self, other):
if not super().__eq__(other):
return False
if not isinstance(other, ConceptExpression):
return False
if isinstance(self.concept, Concept):
return self.concept.name == other.concept.name
# when it's only the name of the concept
return self.concept == other.concept
def __hash__(self):
return hash((self.concept, self.rule_name))
@staticmethod
def get_parsing_expression_from_name(name):
tokens = Tokenizer(name)
nodes = [StrMatch(core.utils.strip_quotes(token.value)) for token in list(tokens)[:-1]]
if len(nodes) == 1:
return nodes[0]
else:
sequence = Sequence(nodes)
sequence.nodes = nodes
return sequence
def _parse(self, parser):
to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
return None
self.concept = to_match # Memoize
if to_match not in parser.concepts_grammars:
# Try to match the concept using its name
expr = self.get_parsing_expression_from_name(to_match.name)
node = expr.parse(parser)
else:
node = parser.concepts_grammars[to_match].parse(parser)
if node is None:
return None
return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
class ConceptGroupExpression(ConceptExpression):
def _parse(self, parser):
to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
return None
self.concept = to_match # Memoize
if to_match not in parser.concepts_grammars:
concepts_in_group = parser.sheerka.get_set_elements(self.concept)
nodes = [ConceptExpression(c, rule_name=c.name) for c in concepts_in_group]
expr = OrderedChoice(nodes)
expr.nodes = nodes
node = expr.parse(parser)
else:
node = parser.concepts_grammars[to_match].parse(parser)
if node is None:
return None
return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
class Sequence(ParsingExpression):
"""
Will match sequence of parser expressions in exact order they are defined.
"""
def _parse(self, parser):
init_pos = parser.pos
end_pos = parser.pos
children = []
for e in self.nodes:
node = e.parse(parser)
if node is None:
return None
else:
if node.end != -1: # because returns -1 when no match
children.append(node)
end_pos = node.end
return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
def __repr__(self):
to_str = ", ".join(repr(n) for n in self.elements)
return f"({to_str})"
class OrderedChoice(ParsingExpression):
"""
Will match one among multiple
It will stop at the first match (so the order of definition is important)
"""
def _parse(self, parser):
init_pos = parser.pos
for e in self.nodes:
node = e.parse(parser)
if node:
return NonTerminalNode(self, init_pos, node.end, parser.tokens[init_pos: node.end + 1], [node])
parser.seek(init_pos) # backtrack
return None
def __repr__(self):
to_str = "| ".join(repr(n) for n in self.elements)
return f"({to_str})"
class Optional(ParsingExpression):
"""
Will match or not the elements
if many matches, will choose longest one
If you need order, use Optional(OrderedChoice)
"""
def _parse(self, parser):
init_pos = parser.pos
selected_node = NonTerminalNode(self, parser.pos, -1, [], []) # means that nothing is found
for e in self.nodes:
node = e.parse(parser)
if node:
if node.end > selected_node.end:
selected_node = NonTerminalNode(
self,
node.start,
node.end,
parser.tokens[node.start: node.end + 1],
[node])
parser.seek(init_pos) # backtrack
if selected_node.end != -1:
parser.seek(selected_node.end)
parser.next_token() # eat the tokens found
return selected_node
def __repr__(self):
if len(self.elements) == 1:
return f"{self.elements[0]}?"
else:
to_str = ", ".join(repr(n) for n in self.elements)
return f"({to_str})?"
class Repetition(ParsingExpression):
"""
Base class for all repetition-like parser expressions (?,*,+)
Args:
eolterm(bool): Flag that indicates that end of line should
terminate repetition match.
"""
def __init__(self, *elements, **kwargs):
super(Repetition, self).__init__(*elements, **kwargs)
self.sep = kwargs.get('sep', None)
class ZeroOrMore(Repetition):
"""
ZeroOrMore will try to match parser expression specified zero or more
times. It will never fail.
"""
def _parse(self, parser):
init_pos = parser.pos
end_pos = -1
children = []
while True:
current_pos = parser.pos
# maybe eat the separator if needed
if self.sep and children:
sep_result = self.sep.parse(parser)
if sep_result is None:
parser.seek(current_pos)
break
# eat the ZeroOrMore
node = self.nodes[0].parse(parser)
if node is None:
parser.seek(current_pos)
break
else:
if node.end != -1: # because returns -1 when no match
children.append(node)
end_pos = node.end
if len(children) == 0:
return NonTerminalNode(self, init_pos, -1, [], [])
return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
def __repr__(self):
to_str = ", ".join(repr(n) for n in self.elements)
return f"({to_str})*"
class OneOrMore(Repetition):
"""
OneOrMore will try to match parser expression specified one or more times.
"""
def _parse(self, parser):
init_pos = parser.pos
end_pos = -1
children = []
while True:
current_pos = parser.pos
# maybe eat the separator if needed
if self.sep and children:
sep_result = self.sep.parse(parser)
if sep_result is None:
parser.seek(current_pos)
break
# eat the ZeroOrMore
node = self.nodes[0].parse(parser)
if node is None:
parser.seek(current_pos)
break
else:
if node.end != -1: # because returns -1 when no match
children.append(node)
end_pos = node.end
if len(children) == 0: # if nothing is found, it's an error
return None
return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
def __repr__(self):
to_str = ", ".join(repr(n) for n in self.elements)
return f"({to_str})+"
class UnorderedGroup(Repetition):
"""
Will try to match all of the parsing expression in any order.
"""
def _parse(self, parser):
raise NotImplementedError()
# def __repr__(self):
# to_str = ", ".join(repr(n) for n in self.elements)
# return f"({to_str})#"
class Match(ParsingExpression):
"""
Base class for all classes that will try to match something from the input.
"""
def __init__(self, rule_name, root=False):
super(Match, self).__init__(rule_name=rule_name, root=root)
def parse(self, parser):
result = self._parse(parser)
return result
class StrMatch(Match):
"""
Matches a literal
"""
def __init__(self, to_match, rule_name="", root=False, ignore_case=True):
super(Match, self).__init__(rule_name=rule_name, root=root)
self.to_match = to_match
self.ignore_case = ignore_case
def __repr__(self):
return f"'{self.to_match}'"
def __eq__(self, other):
if not super().__eq__(other):
return False
if not isinstance(other, StrMatch):
return False
return self.to_match == other.to_match and self.ignore_case == other.ignore_case
def _parse(self, parser):
token = parser.get_token()
m = str(token.value).lower() == self.to_match.lower() if self.ignore_case \
else token.value == self.to_match
if m:
node = TerminalNode(self, parser.pos, parser.pos, token.value)
parser.next_token()
return node
return None
class ConceptLexerParser(BaseParser):
def __init__(self, **kwargs):
super().__init__("ConceptLexer", 50)
if 'grammars' in kwargs:
self.concepts_grammars = kwargs.get("grammars")
elif 'sheerka' in kwargs:
self.concepts_grammars = kwargs.get("sheerka").concepts_grammars
else:
self.concepts_grammars = {}
self.ignore_case = True
self.token = None
self.pos = -1
self.tokens = None
self.context = None
self.text = None
self.sheerka = None
def add_error(self, error, next_token=True):
self.has_error = True
self.error_sink.append(error)
if next_token:
self.next_token()
return error
def reset_parser(self, context, text):
self.context = context
self.sheerka = context.sheerka
self.text = text
if isinstance(text, str):
try:
self.tokens = list(Tokenizer(text))
except core.tokenizer.LexerError as e:
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
return False
else:
self.tokens = list(text)
self.tokens.append(Token(TokenKind.EOF, "", -1, -1, -1)) # make sure to finish with end of file token
self.token = None
self.pos = -1
self.next_token(False)
return True
def get_token(self) -> Token:
return self.token
def next_token(self, skip_whitespace=True):
if self.token and self.token.type == TokenKind.EOF:
return False
self.pos += 1
self.token = self.tokens[self.pos]
if skip_whitespace:
while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
self.pos += 1
self.token = self.tokens[self.pos]
return self.token.type != TokenKind.EOF
def seek(self, pos):
self.pos = pos
self.token = self.tokens[self.pos]
return True
def rewind(self, offset, skip_whitespace=True):
self.pos += offset
self.token = self.tokens[self.pos]
if skip_whitespace:
while self.pos > 0 and (self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE):
self.pos -= 1
self.token = self.tokens[self.pos]
def initialize(self, context, concepts_definitions):
"""
Adds a bunch of concepts, and how they can be recognized
:param context: execution context
:param concepts_definitions: dictionary of concept, concept_definition
:return:
"""
self.context = context
self.sheerka = context.sheerka
concepts_to_resolve = set()
# ## Gets the grammars
for concept, concept_def in concepts_definitions.items():
concept.init_key() # make sure that the key is initialized
grammar = self.get_model(concept_def, concepts_to_resolve)
self.concepts_grammars[concept] = grammar
if self.has_error:
return self.sheerka.ret(self.name, False, self.error_sink)
# ## Removes concepts with infinite recursions
concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve)
for concept in concepts_to_remove:
concepts_to_resolve.remove(concept)
del self.concepts_grammars[concept]
if self.has_error:
return self.sheerka.ret(self.name, False, self.error_sink)
else:
return self.sheerka.ret(self.name, True, self.concepts_grammars)
def get_concept(self, concept_name):
if concept_name in self.context.concepts:
return self.context.concepts[concept_name]
return self.sheerka.get(concept_name)
def get_model(self, concept_def, concepts_to_resolve):
# TODO
# inner_get_model must not modify the initial ParsingExpression
# A copy must be created
def inner_get_model(expression):
if isinstance(expression, Concept):
if self.sheerka.isagroup(expression):
ret = ConceptGroupExpression(expression, rule_name=expression.name)
else:
ret = ConceptExpression(expression, rule_name=expression.name)
concepts_to_resolve.add(expression)
elif isinstance(expression, ConceptExpression):
if expression.rule_name is None or expression.rule_name == "":
expression.rule_name = expression.concept.name if isinstance(expression.concept, Concept) \
else expression.concept
concepts_to_resolve.add(expression.concept)
ret = expression
elif isinstance(expression, str):
ret = StrMatch(expression, ignore_case=self.ignore_case)
elif isinstance(expression, StrMatch):
ret = expression
if ret.ignore_case is None:
ret.ignore_case = self.ignore_case
elif isinstance(expression, Sequence) or \
isinstance(expression, OrderedChoice) or \
isinstance(expression, ZeroOrMore) or \
isinstance(expression, OneOrMore) or \
isinstance(expression, Optional):
ret = expression
ret.nodes = [inner_get_model(e) for e in ret.elements]
else:
ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False)
# Translate separator expression.
if isinstance(expression, Repetition) and expression.sep:
expression.sep = inner_get_model(expression.sep)
return ret
model = inner_get_model(concept_def)
return model
def detect_infinite_recursion(self, concepts_to_resolve):
# infinite recursion matcher
def _is_infinite_recursion(ref_concept, node):
if isinstance(node, ConceptExpression):
if node.concept == ref_concept:
return True
if isinstance(node.concept, str):
to_match = self.get_concept(node.concept)
if self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
return False
else:
to_match = node.concept
if to_match not in self.concepts_grammars:
return False
return _is_infinite_recursion(ref_concept, self.concepts_grammars[to_match])
if isinstance(node, OrderedChoice):
return _is_infinite_recursion(ref_concept, node.nodes[0])
if isinstance(node, Sequence):
for node in node.nodes:
if _is_infinite_recursion(ref_concept, node):
return True
return False
return False
removed_concepts = []
for e in concepts_to_resolve:
if isinstance(e, str):
e = self.get_concept(e)
if self.sheerka.isinstance(e, BuiltinConcepts.UNKNOWN_CONCEPT):
continue
if e not in self.concepts_grammars:
continue
to_resolve = self.concepts_grammars[e]
if _is_infinite_recursion(e, to_resolve):
removed_concepts.append(e)
return removed_concepts
def parse(self, context, text):
if text == "":
return context.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.IS_EMPTY)
)
if not self.reset_parser(context, text):
return self.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
concepts_found = [[]]
unrecognized_tokens = None
has_unrecognized = False
# actually list of list
# The first dimension is the number of possibilities found
# The second dimension is the number of concepts found, under one possibility
#
# Example 1
# concept foo : 'one' 'two'
# concept bar : 'one' 'two'
# input 'one two' -> will produce two possibilities (foo and bar).
#
# Example 2
# concept foo : 'one'
# concept bar : 'two'
# input 'one two' -> will produce one possibility which is (foo, bar) (foo then bar)
while True:
init_pos = self.pos
res = []
for concept, grammar in self.concepts_grammars.items():
self.seek(init_pos)
node = grammar.parse(self) # a node is TerminalNode or NonTerminalNode
if node is not None and node.end != -1:
updated_concept = self.finalize_concept(context.sheerka, concept, node)
concept_node = ConceptNode(
updated_concept,
node.start,
node.end,
self.tokens[node.start: node.end + 1],
None,
node)
res.append(concept_node)
if len(res) == 0: # not recognized
self.seek(init_pos)
if unrecognized_tokens:
unrecognized_tokens.add_token(self.get_token(), init_pos)
else:
unrecognized_tokens = UnrecognizedTokensNode(init_pos, init_pos, [self.get_token()])
if not self.next_token(False):
break
else: # some concepts are recognized
if unrecognized_tokens and unrecognized_tokens.not_whitespace():
unrecognized_tokens.fix_source()
concepts_found = core.utils.product(concepts_found, [unrecognized_tokens])
has_unrecognized = True
unrecognized_tokens = None
res = self.get_bests(res) # only keep the concepts that eat the more tokens
concepts_found = core.utils.product(concepts_found, res)
# loop
self.seek(res[0].end)
if not self.next_token(False):
break
# Fix the source for unrecognized tokens
if unrecognized_tokens and unrecognized_tokens.not_whitespace():
unrecognized_tokens.fix_source()
concepts_found = core.utils.product(concepts_found, [unrecognized_tokens])
has_unrecognized = True
# else
# returns as many ReturnValue than choices found
ret = []
for choice in concepts_found:
ret.append(
self.sheerka.ret(
self.name,
not has_unrecognized,
self.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text,
body=choice,
try_parsed=choice)))
if len(ret) == 1:
self.log_result(context, text, ret[0])
return ret[0]
else:
self.log_multiple_results(context, text, ret)
return ret
def finalize_concept(self, sheerka, template, underlying, init_empty_body=True):
"""
Updates the properties of the concept
Goes in recursion if the property is a concept
"""
# this cache is to make sure that we return the same concept for the same ConceptExpression
_underlying_value_cache = {}
def _add_prop(_concept, prop_name, value):
"""
Adds a new entry,
makes a list if the property already exists
"""
if prop_name not in _concept.compiled or _concept.compiled[prop_name] is None:
# new entry
_concept.compiled[prop_name] = value
else:
# make a list if there was a value
previous_value = _concept.compiled[prop_name]
if isinstance(previous_value, list):
previous_value.append(value)
else:
new_value = [previous_value, value]
_concept.compiled[prop_name] = new_value
def _look_for_concept_match(_underlying):
if isinstance(_underlying.parsing_expression, ConceptExpression):
return _underlying
if not isinstance(_underlying, NonTerminalNode):
return None
if len(_underlying.children) != 1:
return None
return _look_for_concept_match(_underlying.children[0])
def _get_underlying_value(_underlying):
concept_match_node = _look_for_concept_match(_underlying)
if concept_match_node:
if id(concept_match_node) in _underlying_value_cache:
result = _underlying_value_cache[id(concept_match_node)]
else:
ref_tpl = concept_match_node.parsing_expression.concept
result = self.finalize_concept(sheerka, ref_tpl, concept_match_node.children[0], init_empty_body)
_underlying_value_cache[id(concept_match_node)] = result
else:
result = DoNotResolve(_underlying.source)
return result
def _process_rule_name(_concept, _underlying):
if _underlying.parsing_expression.rule_name:
value = _get_underlying_value(_underlying)
_add_prop(_concept, _underlying.parsing_expression.rule_name, value)
if isinstance(_underlying, NonTerminalNode):
for child in _underlying.children:
_process_rule_name(_concept, child)
key = (template.key, template.id) if template.id else template.key
concept = sheerka.new(key)
if init_empty_body and concept.metadata.body is None:
value = _get_underlying_value(underlying)
concept.compiled[ConceptParts.BODY] = value
if underlying.parsing_expression.rule_name:
_add_prop(concept, underlying.parsing_expression.rule_name, value)
if isinstance(underlying, NonTerminalNode):
for node in underlying.children:
_process_rule_name(concept, node)
return concept
@staticmethod
def get_bests(results):
"""
Returns the result that is the longest
:param results:
:return:
"""
by_end_pos = defaultdict(list)
for result in results:
by_end_pos[result.end].append(result)
return by_end_pos[max(by_end_pos)]
class ParsingExpressionVisitor:
"""
visit ParsingExpression
"""
def visit(self, parsing_expression):
name = parsing_expression.__class__.__name__
method = 'visit_' + name
visitor = getattr(self, method, self.generic_visit)
return visitor(parsing_expression)
def generic_visit(self, parsing_expression):
if hasattr(self, "visit_all"):
self.visit_all(parsing_expression)
for node in parsing_expression.elements:
if isinstance(node, Concept):
self.visit(ConceptExpression(node.key or node.name))
elif isinstance(node, str):
self.visit(StrMatch(node))
else:
self.visit(node)
+110
View File
@@ -0,0 +1,110 @@
from core.builtin_concepts import BuiltinConcepts
from core.tokenizer import TokenKind, Token
from parsers.BaseParser import BaseParser
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode
from parsers.MultipleConceptsParser import MultipleConceptsParser
from core.concept import VARIABLE_PREFIX
import logging
multiple_concepts_parser = MultipleConceptsParser()
class ConceptsWithConceptsParser(BaseParser):
def __init__(self, **kwargs):
super().__init__("ConceptsWithConcepts", 25)
@staticmethod
def get_tokens(nodes):
tokens = []
for node in nodes:
if isinstance(node, ConceptNode):
index, line, column = node.tokens[0].index, node.tokens[0].line, node.tokens[0].column
tokens.append(Token(TokenKind.CONCEPT, node.concept, index, line, column))
else:
for token in node.tokens:
if token.type == TokenKind.EOF:
break
elif token.type in (TokenKind.NEWLINE, TokenKind.WHITESPACE):
continue
else:
tokens.append(token)
return tokens
@staticmethod
def get_key(nodes):
key = ""
index = 0
for node in nodes:
if key:
key += " "
if isinstance(node, UnrecognizedTokensNode):
key += node.source.strip()
else:
key += f"{VARIABLE_PREFIX}{index}"
index += 1
return key
def finalize_concept(self, context, concept, nodes):
index = 0
for node in nodes:
if isinstance(node, ConceptNode):
prop_name = list(concept.props.keys())[index]
concept.compiled[prop_name] = node.concept
context.log(
self.verbose_log,
f"Setting property '{prop_name}='{node.concept}'.",
self.name)
index += 1
elif isinstance(node, SourceCodeNode):
prop_name = list(concept.props.keys())[index]
sheerka = context.sheerka
value = sheerka.new(BuiltinConcepts.PARSER_RESULT, parser=self, source=node.source, body=node.node)
concept.compiled[prop_name] = [context.sheerka.ret(self.name, True, value)]
context.log(
self.verbose_log,
f"Setting property '{prop_name}'='Python({node.source})'.",
self.name)
index += 1
return concept
def parse(self, context, text):
sheerka = context.sheerka
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
return None
if not text.parser == multiple_concepts_parser:
return None
nodes = text.body
concept_key = self.get_key(nodes)
concept = sheerka.new(concept_key)
if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
return sheerka.ret(
self.name,
False,
sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text.body))
concepts = concept if hasattr(concept, "__iter__") else [concept]
for concept in concepts:
self.finalize_concept(context, concept, nodes)
res = []
for concept in concepts:
res.append(sheerka.ret(
self.name,
True,
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text.source,
body=concept,
try_parsed=None)))
return res[0] if len(res) == 1 else res
+430
View File
@@ -0,0 +1,430 @@
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserResultConcept
from core.concept import ConceptParts
import core.builtin_helpers
import core.utils
from parsers.BaseParser import BaseParser, Node, ErrorNode, NotInitializedNode
from core.tokenizer import Tokenizer, TokenKind, Token, Keywords
from dataclasses import dataclass, field
from parsers.BnfParser import BnfParser
from core.sheerka.Sheerka import ExecutionContext
@dataclass()
class DefaultParserNode(Node):
"""
Base node for all default parser nodes
"""
tokens: list = field(compare=False, repr=False)
@dataclass()
class DefaultParserErrorNode(DefaultParserNode, ErrorNode):
pass
@dataclass()
class UnexpectedTokenErrorNode(DefaultParserErrorNode):
message: str
expected_tokens: list
@dataclass()
class SyntaxErrorNode(DefaultParserErrorNode):
"""
The input is recognized, but there is a syntax error
"""
message: str
@dataclass()
class CannotHandleErrorNode(DefaultParserErrorNode):
"""
The input is not recognized
"""
text: str
@dataclass()
class NameNode(DefaultParserNode):
def get_name(self):
name = ""
first = True
for token in self.tokens:
if token.type == TokenKind.EOF:
break
if token.type == TokenKind.WHITESPACE:
continue
if not first:
name += " "
name += token.value[1:-1] if token.type == TokenKind.STRING else token.value
first = False
return name
def __repr__(self):
return self.get_name()
def __eq__(self, other):
if not isinstance(other, NameNode):
return False
return self.get_name() == other.get_name()
def __hash__(self):
return hash(self.get_name())
@dataclass()
class DefConceptNode(DefaultParserNode):
name: NameNode = NotInitializedNode()
where: ReturnValueConcept = NotInitializedNode()
pre: ReturnValueConcept = NotInitializedNode()
post: ReturnValueConcept = NotInitializedNode()
body: ReturnValueConcept = NotInitializedNode()
definition: ReturnValueConcept = NotInitializedNode()
def get_asts(self):
asts = {}
for part_key in ConceptParts:
prop_value = getattr(self, part_key.value)
if isinstance(prop_value, ReturnValueConcept) and isinstance(prop_value.body,
ParserResultConcept) and hasattr(
prop_value.body.body, "ast_"):
asts[part_key] = prop_value
#asts[part_key] = prop_value.body.body.ast_
return asts
@dataclass()
class IsaConceptNode(DefaultParserNode):
concept: NameNode = NotInitializedNode()
set: NameNode = NotInitializedNode()
class DefaultParser(BaseParser):
"""
Parse sheerka specific grammar (like def concept)
"""
def __init__(self, **kwargs):
BaseParser.__init__(self, "Default", 50)
self.lexer_iter = None
self._current = None
self.context: ExecutionContext = None
self.text = None
self.sheerka = None
@staticmethod
def fix_indentation(tokens):
"""
In the following example
def concept add one to a as:
def func(x):
return x+1
func(a)
indentations in front of 'def func(x)', 'return x+1' and 'func(a)' must be fixed to avoid a python syntax error
:param tokens:
:return:
"""
if tokens[0].type != TokenKind.COLON:
return tokens
if len(tokens) < 3:
return UnexpectedTokenErrorNode(tokens[0:2], "Unexpected end of file", [TokenKind.NEWLINE])
if tokens[1].type != TokenKind.NEWLINE:
return UnexpectedTokenErrorNode([tokens[1]], "Unexpected token after colon", [TokenKind.NEWLINE])
if tokens[2].type != TokenKind.WHITESPACE:
return SyntaxErrorNode([tokens[2]], "Indentation not found.")
indent_size = len(tokens[2].value)
# now fix the other indentations
i = 3
while i < len(tokens) - 1:
if tokens[i].type == TokenKind.NEWLINE:
if tokens[i + 1].type != TokenKind.WHITESPACE:
return UnexpectedTokenErrorNode([tokens[i + 1]], "Unexpected token", [TokenKind.WHITESPACE])
if len(tokens[i + 1].value) < indent_size:
return SyntaxErrorNode([tokens[i + 1]], "Invalid indentation.")
tokens[i + 1].value = " " * (len(tokens[i + 1].value) - indent_size)
i += 1
return tokens[3:]
def reset_parser(self, context, text):
self.context = context
self.sheerka = context.sheerka
self.text = text
self.lexer_iter = iter(Tokenizer(text))
self._current = None
self.next_token()
def add_error(self, error, next_token=True):
self.has_error = True
self.error_sink.append(error)
if next_token:
self.next_token()
return error
def get_token(self) -> Token:
return self._current
def next_token(self, skip_whitespace=True):
try:
self._current = next(self.lexer_iter)
if skip_whitespace:
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
self._current = next(self.lexer_iter)
except StopIteration:
self._current = None
return
def parse(self, context, text):
# default parser can only manage string text
if not isinstance(text, str):
ret = context.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text))
self.log_result(context, text, ret)
return ret
tree = None
try:
self.reset_parser(context, text)
tree = self.parse_statement()
except core.tokenizer.LexerError as e:
self.add_error(e, False)
# If a error is found it must be sent to error_sink
# tree must contain what was recognized
if self.has_error and isinstance(self.error_sink[0], CannotHandleErrorNode):
body = self.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=self.error_sink)
else:
body = self.get_return_value_body(context.sheerka, text, tree, tree)
# body = self.sheerka.new(
# BuiltinConcepts.PARSER_RESULT,
# parser=self,
# source=text,
# body=self.error_sink if self.has_error else tree,
# try_parsed=tree)
ret = self.sheerka.ret(
self.name,
not self.has_error,
body)
self.log_result(context, text, ret)
return ret
def parse_statement(self):
token = self.get_token()
if token.value == Keywords.DEF:
self.next_token()
self.context.log(self.verbose_log, "Keyword DEF found.", self.name)
return self.parse_def_concept(token)
else:
return self.parse_isa_concept()
def parse_def_concept(self, def_token):
"""
def concept name [where xxx] [pre xxx] [post xxx] [as xxx]
"""
# init
keywords_tokens = [def_token]
concept_found = DefConceptNode(keywords_tokens)
# the definition of a concept consists of several parts
# Keywords.CONCEPT to get the name of the concept
# Keywords.FROM [Keywords.REGEX] to get the definition of the concept
# Keywords.AS to get the body
# Keywords.WHERE to get the conditions to recognize for the variables
# Keywords.PRE to know if the conditions to evaluate the concept
# Keywords.POST to apply or verify once the concept is executed
#
# Regroup the tokens by parts
first_token, tokens_found_by_parts = self.regroup_tokens_by_parts(keywords_tokens)
if first_token.type == TokenKind.EOF:
return self.add_error(UnexpectedTokenErrorNode([first_token], "Unexpected end of file", [Keywords.CONCEPT]))
# get the name
concept_found.name = self.get_concept_name(first_token, tokens_found_by_parts)
# get the definition
concept_found.definition = self.get_concept_definition(concept_found, tokens_found_by_parts)
# get the ASTs for the remaining parts
asts_found_by_parts = self.get_concept_parts(tokens_found_by_parts)
concept_found.where = asts_found_by_parts[Keywords.WHERE]
concept_found.pre = asts_found_by_parts[Keywords.PRE]
concept_found.post = asts_found_by_parts[Keywords.POST]
concept_found.body = asts_found_by_parts[Keywords.AS]
return concept_found
def parse_isa_concept(self):
concept_name = self.parse_concept_name()
if isinstance(concept_name, DefaultParserErrorNode):
return concept_name
keyword = []
token = self.get_token()
if token.value != Keywords.ISA:
return self.add_error(CannotHandleErrorNode([token], ""))
keyword.append(token)
self.next_token()
set_name = self.parse_concept_name()
return IsaConceptNode(keyword, concept_name, set_name)
def parse_concept_name(self):
tokens = []
token = self.get_token()
while not (token.type == TokenKind.EOF or token.type == TokenKind.KEYWORD):
tokens.append(token)
self.next_token()
token = self.get_token()
if len(tokens) == 0:
return self.add_error(UnexpectedTokenErrorNode([token], "Unexpected token", []))
else:
return NameNode(tokens)
def regroup_tokens_by_parts(self, keywords_tokens):
def_concept_parts = [Keywords.CONCEPT, Keywords.FROM, Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST]
# tokens found, when trying to recognize the parts
tokens_found_by_parts = {
Keywords.CONCEPT: [],
Keywords.FROM: None,
Keywords.AS: None,
Keywords.WHERE: None,
Keywords.PRE: None,
Keywords.POST: None,
}
current_part = Keywords.CONCEPT
token = self.get_token()
first_token = token
# loop thru the tokens, and put them in the correct tokens_found_by_parts entry
while token.type != TokenKind.EOF:
if token.value in def_concept_parts:
keywords_tokens.append(token) # keep track of the keywords
keyword = token.value
if tokens_found_by_parts[keyword]:
# a part is defined more than once
self.add_error(SyntaxErrorNode([token], f"Too many '{keyword.value}' declarations."))
tokens_found_by_parts[current_part].append(token) # adds the token again
else:
tokens_found_by_parts[keyword] = [token]
current_part = keyword
self.next_token()
else:
tokens_found_by_parts[current_part].append(token)
self.next_token(False)
token = self.get_token()
return first_token, tokens_found_by_parts
def get_concept_name(self, first_token, tokens_found_by_parts):
name_first_token_index = 1
token = self.get_token()
if first_token.value != Keywords.CONCEPT:
self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT]))
name_first_token_index = 0
name_tokens = tokens_found_by_parts[Keywords.CONCEPT]
if len(name_tokens) == name_first_token_index:
self.add_error(SyntaxErrorNode([], "Name is mandatory"))
if name_tokens[-1].type == TokenKind.NEWLINE:
name_tokens = name_tokens[:-1] # strip trailing newlines
if TokenKind.NEWLINE in [t.type for t in name_tokens]:
self.add_error(SyntaxErrorNode(tokens_found_by_parts[Keywords.CONCEPT], "Newline are not allowed in name."))
name_node = NameNode(name_tokens[name_first_token_index:]) # skip the first token
return name_node
def get_concept_definition(self, current_concept_def, tokens_found_by_parts):
if tokens_found_by_parts[Keywords.FROM] is None:
return NotInitializedNode()
definition_tokens = tokens_found_by_parts[Keywords.FROM]
if definition_tokens[1].value != Keywords.BNF:
return NotInitializedNode()
tokens = core.utils.strip_tokens(definition_tokens[2:])
if len(tokens) == 0:
self.add_error(SyntaxErrorNode([definition_tokens[1]], "Empty declaration"), False)
return NotInitializedNode()
regex_parser = BnfParser()
with self.context.push(self.name, obj=current_concept_def) as sub_context:
parsing_result = regex_parser.parse(sub_context, tokens)
sub_context.add_values(return_values=parsing_result)
if not parsing_result.status:
self.add_error(parsing_result.value)
return NotInitializedNode()
return parsing_result
def get_concept_parts(self, tokens_found_by_parts):
asts_found_by_parts = {
Keywords.AS: NotInitializedNode(),
Keywords.WHERE: NotInitializedNode(),
Keywords.PRE: NotInitializedNode(),
Keywords.POST: NotInitializedNode(),
}
for keyword in tokens_found_by_parts:
if keyword == Keywords.CONCEPT or keyword == Keywords.FROM:
continue # already done
tokens = tokens_found_by_parts[keyword]
if tokens is None:
continue # nothing to do
if len(tokens) == 1: # check for empty declarations
self.add_error(SyntaxErrorNode([tokens[0]], "Empty declaration"), False)
continue
tokens = self.fix_indentation(tokens[1:]) # manage multi-lines declarations
if isinstance(tokens, ErrorNode):
self.add_error(tokens)
continue
# ask the other parsers if they recognize the tokens
with self.context.push(self.name, desc=f"Parsing {keyword}") as sub_context:
sub_context.log_new(self.verbose_log)
to_parse = self.sheerka.ret(
sub_context.who,
True,
self.sheerka.new(BuiltinConcepts.USER_INPUT, body=tokens))
steps = [BuiltinConcepts.PARSING]
parsed = self.sheerka.execute(sub_context, to_parse, steps, self.verbose_log)
parsing_result = core.builtin_helpers.expect_one(sub_context, parsed, self.verbose_log)
sub_context.add_values(return_values=parsing_result)
if not parsing_result.status:
self.add_error(parsing_result.value)
continue
asts_found_by_parts[keyword] = parsing_result
return asts_found_by_parts
+28
View File
@@ -0,0 +1,28 @@
from core.builtin_concepts import BuiltinConcepts
from parsers.BaseParser import BaseParser
class EmptyStringParser(BaseParser):
"""
To parse empty or blank strings
"""
def __init__(self, **kwargs):
BaseParser.__init__(self, "EmptyString", 90)
def parse(self, context, text):
sheerka = context.sheerka
if isinstance(text, str) and text.strip() == "" or \
isinstance(text, list) and text == [] or \
text is None:
ret = sheerka.ret(self.name, True, sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source="",
body=sheerka.new(BuiltinConcepts.NOP)))
else:
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME))
self.log_result(context, text, ret)
return ret
+150
View File
@@ -0,0 +1,150 @@
import logging
from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts
from parsers.BaseParser import BaseParser
from core.tokenizer import Tokenizer, Keywords, TokenKind, LexerError
from core.concept import VARIABLE_PREFIX
class ExactConceptParser(BaseParser):
"""
Tries to recognize a single concept
"""
MAX_WORDS_SIZE = 10
def __init__(self, **kwargs):
BaseParser.__init__(self, "ExactConcept", 80)
def parse(self, context, text):
"""
text can be string, but text can also be an list of tokens
:param context:
:param text:
:return:
"""
context.log(self.verbose_log, f"Parsing '{text}'", self.name)
res = []
sheerka = context.sheerka
try:
words = self.get_words(text)
except LexerError as e:
context.log(self.verbose_log, f"Error found in tokenizer {e}", self.name)
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=e))
if len(words) > self.MAX_WORDS_SIZE:
context.log(self.verbose_log, f"Max words reached. Stopping.", self.name)
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=text))
recognized = False
for combination in self.combinations(words):
concept_key = " ".join(combination)
result = sheerka.new(concept_key)
if sheerka.isinstance(result, BuiltinConcepts.UNKNOWN_CONCEPT):
continue
# concepts = result.body if sheerka.isinstance(result, BuiltinConcepts.ENUMERATION) else [result]
concepts = result if isinstance(result, list) else [result]
for concept in concepts:
context.log(self.verbose_log, f"Recognized concept {concept}.", self.name)
# update the properties if needed
for i, token in enumerate(combination):
if token.startswith(VARIABLE_PREFIX):
index = int(token[len(VARIABLE_PREFIX):])
concept.def_prop_by_index(index, words[i])
if self.verbose_log.isEnabledFor(logging.DEBUG):
prop_name = list(concept.props.keys())[index]
context.log(
self.verbose_log,
f"Added property {index}: {prop_name}='{words[i]}'.",
self.name)
res.append(ReturnValueConcept(
self.name,
True,
context.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text if isinstance(text, str) else self.get_text_from_tokens(text),
body=concept,
try_parsed=concept)))
recognized = True
if recognized:
if len(res) == 1:
self.log_result(context, text, res[0])
else:
self.log_multiple_results(context, text, res)
return res
return res
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=text))
self.log_result(context, text, ret)
return ret
@staticmethod
def get_words(text):
tokens = iter(Tokenizer(text)) if isinstance(text, str) else text
res = []
for t in tokens:
if t.type == TokenKind.EOF:
break
if t.type == TokenKind.NEWLINE or t.type == TokenKind.WHITESPACE:
continue
res.append(t.value.value if isinstance(t.value, Keywords) else t.value)
return res
def combinations(self, iterable):
# combinations('foo', 'bar', 'baz') -->
# ('foo', 'bar', 'baz'),
# ('__var__0', 'bar', 'baz'),
# ('foo', '__var__0', 'baz'),
# ('foo', 'bar', '__var__0'),
# ('__var__0', '__var__1', 'baz'),
# ('__var__0', 'bar', '__var__1'),
# ('foo', '__var__0', '__var__1'),
# ('__var__0', '__var__1', '__var__2')]
pool = tuple(iterable)
n = len(pool)
res = set()
for r in range(0, n + 1):
indices = list(range(r))
res.add(self.get_tuple(pool, indices))
while True:
for i in reversed(range(r)):
if indices[i] != i + n - r:
break
else:
break
indices[i] += 1
for j in range(i + 1, r):
indices[j] = indices[j - 1] + 1
res.add(self.get_tuple(pool, indices))
return res
@staticmethod
def get_tuple(pool, indices):
res = []
vars = {}
k = 0
# init vars
for i in indices:
value = pool[i]
if value not in vars:
vars[pool[i]] = f"{VARIABLE_PREFIX}{k}"
k += 1
# create tuple
for i in range(len(pool)):
value = pool[i]
res.append(vars[value] if value in vars else value)
return tuple(res)
+164
View File
@@ -0,0 +1,164 @@
import ast
from core.builtin_concepts import BuiltinConcepts
from core.tokenizer import TokenKind
from parsers.BaseParser import BaseParser
from parsers.ConceptLexerParser import ConceptLexerParser, UnrecognizedTokensNode, ConceptNode, SourceCodeNode
import core.utils
from parsers.PythonParser import PythonParser
concept_lexer_parser = ConceptLexerParser()
class MultipleConceptsParser(BaseParser):
"""
Parser that will take the result of ConceptLexerParser and
try to resolve the unrecognized tokens token by token
It is a success when it returns a list ConceptNode exclusively
"""
def __init__(self, **kwargs):
BaseParser.__init__(self, "MultipleConcepts", 45)
@staticmethod
def finalize(nodes_found, unrecognized_tokens):
if not unrecognized_tokens:
return nodes_found, unrecognized_tokens
unrecognized_tokens.fix_source()
if unrecognized_tokens.not_whitespace():
nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
return nodes_found, None
@staticmethod
def create_or_add(unrecognized_tokens, token, index):
if unrecognized_tokens:
unrecognized_tokens.add_token(token, index)
else:
unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
return unrecognized_tokens
def parse(self, context, text):
sheerka = context.sheerka
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
return None
if not text.parser == concept_lexer_parser:
return None
sheerka = context.sheerka
nodes = text.value
nodes_found = [[]]
concepts_only = True
for node in nodes:
if isinstance(node, UnrecognizedTokensNode):
unrecognized_tokens = None
i = 0
while i < len(node.tokens):
token_index = node.start + i
token = node.tokens[i]
concepts_nodes = self.get_concepts_nodes(context, token_index, token)
if concepts_nodes is not None:
nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
nodes_found = core.utils.product(nodes_found, concepts_nodes)
i += 1
continue
source_code_node = self.get_source_code_node(context, token_index, node.tokens[i:])
if source_code_node:
nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
nodes_found = core.utils.product(nodes_found, [source_code_node])
i += len(source_code_node.tokens)
continue
# not a concept nor some source code
unrecognized_tokens = self.create_or_add(unrecognized_tokens, token, token_index)
concepts_only &= token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE)
i += 1
# finish processing if needed
nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
else:
nodes_found = core.utils.product(nodes_found, [node])
ret = []
for choice in nodes_found:
ret.append(
sheerka.ret(
self.name,
concepts_only,
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text.source,
body=choice,
try_parsed=None))
)
if len(ret) == 1:
self.log_result(context, text.source, ret[0])
return ret[0]
else:
self.log_multiple_results(context, text.source, ret)
return ret
@staticmethod
def get_concepts_nodes(context, index, token):
"""
Tries to recognize a concept
from the univers of all known concepts
"""
if token.type != TokenKind.IDENTIFIER:
return None
concept = context.new_concept(token.value)
if hasattr(concept, "__iter__") or context.sheerka.is_known(concept):
concepts = concept if hasattr(concept, "__iter__") else [concept]
concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts]
return concepts_nodes
return None
@staticmethod
def get_source_code_node(context, index, tokens):
"""
Tries to recognize source code.
For the time being, only Python is supported
:param context:
:param tokens:
:param index:
:return:
"""
if len(tokens) == 0 or (len(tokens) == 1 and tokens[0].type == TokenKind.EOF):
return None
end_index = len(tokens)
while end_index > 0:
parser = PythonParser()
tokens_to_parse = tokens[:end_index]
res = parser.parse(context, tokens_to_parse)
if res.status:
# only expression are accepted
ast_ = res.value.value.ast_
if not isinstance(ast_, ast.Expression):
return None
try:
compiled = compile(ast_, "<string>", "eval")
eval(compiled, {}, {})
except Exception:
return None
source = BaseParser.get_text_from_tokens(tokens_to_parse)
return SourceCodeNode(res.value.value, index, index + end_index - 1, tokens_to_parse, source)
end_index -= 1
return None
+214
View File
@@ -0,0 +1,214 @@
from core.builtin_concepts import BuiltinConcepts
from core.tokenizer import Tokenizer, LexerError, TokenKind
from parsers.BaseParser import BaseParser, Node, ErrorNode
from dataclasses import dataclass, field
import ast
import logging
from parsers.ConceptLexerParser import ConceptNode
log = logging.getLogger(__name__)
@dataclass()
class PythonErrorNode(ErrorNode):
source: str
exception: Exception
# def __post_init__(self):
# self.log.debug("-> PythonErrorNode: " + str(self.exception))
class PythonNode(Node):
def __init__(self, source, ast_=None, concepts=None):
self.source = source
self.ast_ = ast_ if ast_ else ast.parse(source, mode="eval") if source else None
self.concepts = concepts or {} # when concepts are recognized in the expression
# def __repr__(self):
# return "PythonNode(source='" + self.source + "', ast=" + self.get_dump(self.ast_) + ")"
def __repr__(self):
ast_type = "expr" if isinstance(self.ast_, ast.Expression) else "module"
return "PythonNode(" + ast_type + "='" + self.source + "')"
def __eq__(self, other):
if not isinstance(other, PythonNode):
return False
if self.source != other.source:
return False
self_dump = self.get_dump(self.ast_)
other_dump = self.get_dump(other.ast_)
return self_dump == other_dump
def __hash__(self):
return hash((self.source, self.ast_.hash))
@staticmethod
def get_dump(ast_):
dump = ast.dump(ast_)
for to_remove in [", ctx=Load()", ", kind=None", ", type_ignores=[]"]:
dump = dump.replace(to_remove, "")
return dump
class PythonParser(BaseParser):
"""
Parse Python scripts
"""
def __init__(self, **kwargs):
BaseParser.__init__(self, "Python", 50)
self.source = kwargs.get("source", "<undef>")
def parse(self, context, text):
sheerka = context.sheerka
tree = None
python_switcher = {
TokenKind.CONCEPT: lambda t: f"__C__USE_CONCEPT__{t.value}__C__"
}
try:
if isinstance(text, str) and "c:" in text:
source = self.get_text_from_tokens(list(Tokenizer(text)), python_switcher)
elif isinstance(text, str):
source = text
else:
source = self.get_text_from_tokens(text, python_switcher)
source = source.strip()
text = text if isinstance(text, str) else source
# first, try to parse an expression
res, tree, error = self.try_parse_expression(source)
if not res:
# then try to parse a statement
res, tree, error = self.try_parse_statement(source)
if not res:
self.has_error = True
error_node = PythonErrorNode(text, error)
self.error_sink.append(error_node)
except LexerError as e:
self.has_error = True
self.error_sink.append(e)
ret = sheerka.ret(
self.name,
not self.has_error,
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text,
body=self.error_sink if self.has_error else PythonNode(text, tree),
try_parsed=None))
self.log_result(context, text, ret)
return ret
def try_parse_expression(self, text):
try:
return True, ast.parse(text, f"<{self.source}>", 'eval'), None
except Exception as error:
return False, None, error
def try_parse_statement(self, text):
try:
return True, ast.parse(text, f"<{self.source}>", 'exec'), None
except Exception as error:
return False, None, error
class PythonGetNamesVisitor(ast.NodeVisitor):
"""
This visitor will find all the name declared in the ast
"""
def __init__(self):
self.names = set()
def visit_Name(self, node):
self.names.add(node.id)
class LexerNodeParserHelperForPython:
"""Helper class to parse mix of concepts and Python"""
def __init__(self):
self.identifiers = {} # cache for already created identifier (the key is id(concept))
self.identifiers_key = {} # number of identifiers with the same root (prefix)
def _get_identifier(self, concept):
"""
Get an identifier for a concept.
Make sure to return the same identifier if the same concept
Make sure to return a different identifier if same name but different concept
Internal function because I don't want identifiers, identifiers_key and python_ids_mappings
to be instance variables
I would like to keep this parser as stateless as possible
:param concept:
:return:
"""
if id(concept) in self.identifiers:
return self.identifiers[id(concept)]
identifier = "__C__" + self._sanitize(concept.key or concept.name)
if concept.id:
identifier += "__" + concept.id
if identifier in self.identifiers_key:
self.identifiers_key[identifier] += 1
identifier += f"_{self.identifiers_key[identifier]}"
else:
self.identifiers_key[identifier] = 0
identifier += "__C__"
self.identifiers[id(concept)] = identifier
return identifier
@staticmethod
def _sanitize(identifier):
res = ""
for c in identifier:
res += c if c.isalnum() else "0"
return res
def parse(self, context, nodes):
source = ""
to_parse = ""
concepts = {} # the key is the Python identifier
for node in nodes:
if isinstance(node, ConceptNode):
source += node.source
if to_parse:
to_parse += " "
concept = node.concept
python_id = self._get_identifier(concept)
to_parse += python_id
concepts[python_id] = concept
else:
source += node.source
to_parse += node.source
with context.push(self, desc="Trying Python for '" + to_parse + "'") as sub_context:
sub_context.add_inputs(to_parse=to_parse)
python_parser = PythonParser()
result = python_parser.parse(sub_context, to_parse)
sub_context.add_values(return_values=result)
if result.status:
python_node = result.body.body
python_node.source = source
python_node.concepts = concepts
return python_node
return result.body # the error
+105
View File
@@ -0,0 +1,105 @@
from core.builtin_concepts import BuiltinConcepts
from parsers.BaseParser import BaseParser
from parsers.ConceptLexerParser import ConceptNode
from parsers.MultipleConceptsParser import MultipleConceptsParser
from parsers.PythonParser import PythonParser
multiple_concepts_parser = MultipleConceptsParser()
class PythonWithConceptsParser(BaseParser):
def __init__(self, **kwargs):
super().__init__("PythonWithConcepts", 20)
self.identifiers = None
self.identifiers_key = None
@staticmethod
def sanitize(identifier):
res = ""
for c in identifier:
res += c if c.isalnum() else "0"
return res
def parse(self, context, text):
sheerka = context.sheerka
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
return None
if not text.parser == multiple_concepts_parser:
return None
nodes = text.body
source = ""
to_parse = ""
identifiers = {}
identifiers_key = {}
python_ids_mappings = {}
def _get_identifier(c):
"""
Get an identifier for a concept.
Make sure to return the same identifier if the same concept
Make sure to return a different identifier if same name but different concept
Internal function because I don't want identifiers, identifiers_key and python_ids_mappings
to be instance variables
I would like to keep this parser as stateless as possible
:param c:
:return:
"""
if id(c) in identifiers:
return identifiers[id(c)]
identifier = "__C__" + self.sanitize(c.key or c.name)
if c.id:
identifier += "__" + c.id
if identifier in identifiers_key:
identifiers_key[identifier] += 1
identifier += f"_{identifiers_key[identifier]}"
else:
identifiers_key[identifier] = 0
identifier += "__C__"
identifiers[id(c)] = identifier
return identifier
for node in nodes:
if isinstance(node, ConceptNode):
source += node.source
if to_parse:
to_parse += " "
concept = node.concept
python_id = _get_identifier(concept)
to_parse += python_id
python_ids_mappings[python_id] = concept
else:
source += node.source
to_parse += node.source
with context.push(self, "Trying Python for '" + to_parse + "'") as sub_context:
python_parser = PythonParser()
result = python_parser.parse(sub_context, to_parse)
if result.status:
python_node = result.body.body
python_node.source = source
python_node.concepts = python_ids_mappings
return sheerka.ret(
self.name,
True,
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=source,
body=result.body.body,
try_parsed=None))
else:
return sheerka.ret(
self.name,
False,
result.body)
View File
View File
+43
View File
@@ -0,0 +1,43 @@
# How to serialize ?
## General rule
- 1 byte : type of object code
- int : version of the encoder
- data : can be the json representation of the object
### Current supported types
- E : events
- J : Json object (with history management)
- P : pickle (no history)
- S : state (history, but not managed by the serializer )
- C : concept (with history management)
- D : concept definitions (no history management)
- R : executionContext ('R' stands for Result or ReturnValue, no history management)
## How concepts are serialized ?
- get the id of the concept
- get the hash of the concept > it will be its unique key
structure of the serialisation:
```json
{
"id" : "id",
"parent": <hash code of the previous version of the concept> or "",
"name": <name of the concept>,
"where": "",
"pre": "",
"post": "",
"body": "",
"desc": "",
...
}
```
## Idea to manage ObjectSerializer
Problem:
During serialization, there is no issue. The match() method is the unique way to get the correct serialier.
During the deserialisation, all Object serializer have type = '0' and version = 1.
So how to choose the correct one ?
A possible solution will be to add the type of the object to deserialize to the saved stream
--> SHA256 for every object. Too much data saved.
The id is to let to inc the version automatically in the Serialiser (during the registration) and to keep the mapping within sdp.state
+877
View File
@@ -0,0 +1,877 @@
from datetime import datetime, date
import hashlib
import json
import zlib
from sdp.sheerkaDataProviderIO import SheerkaDataProviderIO
from sdp.sheerkaSerializer import Serializer, SerializerContext
from core.sheerka_logger import get_logger
def json_default_converter(o):
"""
Default formatter for json
It's used when the json serializer does not know
how to serialise a type
:param o:
:return:
"""
if isinstance(o, (date, datetime)):
return o.isoformat()
class Event(object):
"""
Class that represents something that modifies the state of the system
"""
def __init__(self, message="", user="", date=datetime.now()):
self.version = 1
self.user = user
self.date = date
self.message = message
self._digest = None
def get_digest(self):
"""
Returns the digest of the event
:return: hexa form of the sha256
"""
if self._digest:
return self._digest
if self.message == "" and self.user == "":
self._digest = "xxx" # to speed unit tests
return self._digest
if not isinstance(self.message, str):
raise NotImplementedError
self._digest = hashlib.sha256(f"Event:{self.user}{self.date}{self.message}".encode("utf-8")).hexdigest()
return self._digest
def to_dict(self):
return self.__dict__
def from_dict(self, as_dict):
self.user = as_dict["user"]
self.date = datetime.fromisoformat(as_dict["date"])
self.message = as_dict["message"]
class ObjToUpdate:
"""
Internal key value class to hold the key (and the value)
when it is detected
It's created to distinguish from {key, value}
"""
def __init__(self, obj, key=None, digest=None):
self.obj = obj
self.has_key = None
self.has_digest = None
self._key = None
self._digest = None
if key is not None:
self.set_key(key)
if digest is not None:
self.set_digest(digest)
def get_key(self):
if self.has_key is None:
key = SheerkaDataProvider.get_obj_key(self.obj)
if key is None:
self.has_key = False
return None
else:
self.has_key = True
self._key = key
return key
elif not self.has_key:
return None
else:
return self._key
def get_digest(self):
if self.has_digest is None:
digest = SheerkaDataProvider.get_obj_digest(self.obj)
if digest is None:
self.has_digest = False
return None
else:
self.has_digest = True
self._digest = digest
return digest
elif not self.has_digest:
return None
else:
return self._digest
def set_digest(self, digest):
self.has_digest = True
self._digest = digest
def set_key(self, key):
self.has_key = True
self._key = key
class State:
"""
Class that represents the state of the system (dictionary of all known entries)
"""
def __init__(self):
self.version = 1
self.date = None
self.parents = []
self.events = []
self.data = {}
@staticmethod
def check_duplicate(items, obj: ObjToUpdate, key):
digest = obj.get_digest()
if digest is None:
return
if not hasattr(items, "__iter__"):
items = [items]
for item in items:
item_digest = SheerkaDataProvider.get_obj_digest(item)
if item_digest == digest:
raise SheerkaDataProviderDuplicateKeyError(key, obj.obj)
def update(self, entry, obj: ObjToUpdate, append=True):
"""
adds obj to entry
:param entry:
:param obj:
:param append: if True, duplicate keys will create lists
:return:
"""
obj_to_use = {obj.get_key(): obj.obj} if obj.has_key else obj.obj
if entry not in self.data:
self.data[entry] = obj_to_use
elif not append:
if isinstance(obj_to_use, dict):
self.data[entry].update(obj_to_use)
else:
self.data[entry] = obj_to_use
elif isinstance(self.data[entry], list):
self.check_duplicate(self.data[entry], obj, entry)
self.data[entry].append(obj.obj)
elif isinstance(obj_to_use, dict):
for k in obj_to_use:
if k not in self.data[entry]:
self.data[entry][k] = obj_to_use[k]
elif isinstance(self.data[entry][k], list):
self.check_duplicate(self.data[entry][k], obj, entry + "." + k)
self.data[entry][k].append(obj_to_use[k])
else:
self.check_duplicate(self.data[entry][k], obj, entry + "." + k)
self.data[entry][k] = [self.data[entry][k], obj_to_use[k]]
elif isinstance(self.data[entry], dict):
raise SheerkaDataProviderError(f"Cannot found key on '{obj.obj}' while all other elements have.", obj.obj)
else:
self.check_duplicate(self.data[entry], obj, entry)
self.data[entry] = [self.data[entry], obj_to_use]
def modify(self, entry, key, obj, obj_key):
# if the key changes, make sure to remove the previous entry
append = False
if obj_key != key:
self.remove(entry, lambda k, o: k == key) # modify from on object to another
append = True
self.update(entry, ObjToUpdate(obj, obj_key), append=append)
def modify_in_list(self, entry, key, obj, obj_key, obj_origin, load_ref_if_needed, save_ref_if_needed):
found = False
to_remove = None
for i in range(len(self.data[entry][key])):
item, is_ref = load_ref_if_needed(self.data[entry][key][i])
if not hasattr(item, "get_digest"):
continue
if item.get_digest() == obj_origin:
obj = save_ref_if_needed(is_ref, obj)
if obj_key == key:
self.data[entry][key][i] = obj
else:
to_remove = i
self.update(entry, ObjToUpdate(obj, obj_key), append=True)
found = True
break
if not found:
raise (SheerkaDataProviderError(f"Cannot modify '{entry}.{key}'. Item '{obj_origin}' not found.", obj))
if to_remove is not None:
del self.data[entry][key][to_remove]
def remove(self, entry, filter):
if filter is None:
del (self.data[entry])
elif isinstance(self.data[entry], dict):
keys_to_remove = []
for key, element in self.data[entry].items():
if filter(key, element):
keys_to_remove.append(key)
for key in keys_to_remove:
del (self.data[entry][key])
elif not isinstance(self.data[entry], list):
if filter(self.data[entry]):
del (self.data[entry])
else:
for element in self.data[entry]:
if filter(element):
self.data[entry].remove(element)
def get_digest(self):
as_json = json.dumps(self.__dict__, default=json_default_converter)
return hashlib.sha256(as_json.encode("utf-8")).hexdigest()
def contains(self, entry, key):
"""
if key is None, returns True if entry exists
if key has a value
returns True if entry is an dict and contains key
:param entry:
:param key:
:return:
"""
if entry not in self.data:
return False
if key is None:
return entry in self.data
if not isinstance(self.data[entry], dict):
return False
return key in self.data[entry]
class SheerkaDataProviderError(Exception):
def __init__(self, message, obj):
Exception.__init__(self, message)
self.obj = obj
class SheerkaDataProviderDuplicateKeyError(Exception):
def __init__(self, key, obj):
Exception.__init__(self, "Duplicate object.")
self.key = key
self.obj = obj
class SheerkaDataProvider:
"""Manages the state of the system"""
EventFolder = "events"
StateFolder = "state"
ObjectsFolder = "objects"
CacheFolder = "cache"
HeadFile = "HEAD"
KeysFile = "keys"
REF_PREFIX = "##REF##:"
def __init__(self, root=None):
self.log = get_logger(__name__)
self.init_log = get_logger("init." + __name__)
self.init_log.debug("Initializing sdp.")
self.io = SheerkaDataProviderIO.get(root)
self.first_time = self.io.first_time
self.serializer = Serializer()
@staticmethod
def get_obj_key(obj):
"""
Tries to find the key of an object
Look for .key, .get_key()
:param obj:
:return: String version of that is found, None otherwise
"""
return str(obj.key) if hasattr(obj, "key") \
else str(obj.get_key()) if hasattr(obj, "get_key") \
else None
@staticmethod
def get_obj_digest(obj):
"""
Tries to find the key of an object
Look for .digest, .get_digest()
:param obj:
:return: digest, None otherwise
"""
if isinstance(obj, str) and obj.startswith(SheerkaDataProvider.REF_PREFIX):
return obj[len(SheerkaDataProvider.REF_PREFIX):]
return obj.digest if hasattr(obj, "digest") \
else obj.get_digest() if hasattr(obj, "get_digest") \
else None
@staticmethod
def get_obj_origin(obj):
"""
Get the digest used to save obj if set
"""
if isinstance(obj, dict) and Serializer.ORIGIN in obj:
return obj[Serializer.ORIGIN]
if hasattr(obj, Serializer.ORIGIN):
return getattr(obj, Serializer.ORIGIN)
return None
@staticmethod
def get_stream_digest(stream):
sha256_hash = hashlib.sha256()
for byte_block in iter(lambda: stream.read(4096), b""):
sha256_hash.update(byte_block)
stream.seek(0)
return sha256_hash.hexdigest()
@staticmethod
def is_reference(obj):
return isinstance(obj, str) and obj.startswith(SheerkaDataProvider.REF_PREFIX)
def add(self, event_digest: str, entry, obj, allow_multiple=True, use_ref=False, is_ref=False):
"""
Adds obj to the entry 'entry'
:param event_digest: digest of the event that triggers the modification of the state
:param entry: entry of the state to update
:param obj: obj to insert or add
:param allow_multiple: if set to true, the same key can be added several times.
All entries will be put in a list
:param use_ref: if True the actual object is saved under 'objects' folder,
only a reference is saved in the state
:return: (entry, key) to retrieve the object
"""
if use_ref and is_ref:
raise SheerkaDataProviderError("Cannot use use_ref and is_ref at the same time", None)
if is_ref and not isinstance(obj, dict):
raise SheerkaDataProviderError("is_ref can only be used with dictionaries", obj)
snapshot = self.get_snapshot()
state = self.load_state(snapshot)
self.log.debug(f"Adding obj '{obj}' in entry '{entry}' (allow_multiple={allow_multiple}, use_ref={use_ref})")
if not isinstance(obj, ObjToUpdate):
obj = ObjToUpdate(obj)
# check uniqueness, cannot add the same key twice if allow_multiple == False
key = obj.get_key()
self.log.debug(f"key found : '{key}'") if key else self.log.debug("No key found")
if not allow_multiple:
if isinstance(obj.obj, dict):
for k in obj.obj:
if state.contains(entry, k):
raise IndexError(f"{entry}.{k}")
else:
if state.contains(entry, key):
raise IndexError(f"{entry}.{key}" if key else entry)
state.parents = [] if snapshot is None else [snapshot]
state.events = [event_digest]
state.date = datetime.now()
if use_ref:
obj.set_digest(self.save_obj(obj.obj))
obj.obj = self.REF_PREFIX + obj.get_digest()
if is_ref:
for k, v in obj.obj.items():
obj.obj[k] = self.REF_PREFIX + v
state.update(entry, obj)
new_snapshot = self.save_state(state)
self.set_snapshot(new_snapshot)
return entry, key
def add_with_auto_key(self, event_digest: str, entry, obj):
"""
Add obj to entry. An autogenerated key created for obj
:param event_digest:
:param entry:
:param obj:
:return:
"""
next_key = self.get_next_key(entry)
if hasattr(obj, "set_key"):
obj.set_key(next_key)
self.add(event_digest, entry, ObjToUpdate(obj, next_key))
return entry, next_key
def add_unique(self, event_digest: str, entry, obj):
"""Add an entry and make sure it's unique"""
snapshot = self.get_snapshot()
state = self.load_state(snapshot)
state.parents = [] if snapshot is None else [snapshot]
state.events = [event_digest]
state.date = datetime.now()
if entry not in state.data:
state.data[entry] = {obj}
already_exist = False
else:
already_exist = obj in state.data[entry]
if not already_exist:
state.data[entry].add(obj)
new_snapshot = self.save_state(state)
self.set_snapshot(new_snapshot)
return (None if already_exist else entry), None
def set(self, event_digest, entry, obj, use_ref=False, is_ref=False):
"""
Add or replace an entry. The entry is reinitialized.
If the previous value was dict, all keys are lost
:param event_digest:
:param entry:
:param obj:
:param use_ref: Do not save obj in State (save it under objects), use_ref in State
:param is_ref: obj is supposed to be a reference
:return:
"""
if use_ref and is_ref:
raise SheerkaDataProviderError("Cannot use use_ref and is_ref at the same time", None)
if is_ref and not isinstance(obj, dict):
raise SheerkaDataProviderError("is_ref can only be used with dictionaries", obj)
snapshot = self.get_snapshot()
state = self.load_state(snapshot)
state.parents = [] if snapshot is None else [snapshot]
state.events = [event_digest]
state.date = datetime.now()
key = self.get_obj_key(obj)
obj = self.save_ref_if_needed(use_ref, obj)
if is_ref:
for k, v in obj.items():
obj[k] = self.REF_PREFIX + v
state.data[entry] = obj if key is None else {key: obj}
new_snapshot = self.save_state(state)
self.set_snapshot(new_snapshot)
return entry, key
def modify(self, event_digest, entry, key, obj):
"""
Replace an element
If the key is not provided, has the same effect than set eg, the entry is reset
:param event_digest:
:param entry:
:param key: key of the object to update
:param obj: new data
:return:
"""
if key is None:
raise SheerkaDataProviderError("Key is mandatory.", None)
snapshot = self.get_snapshot()
state = self.load_state(snapshot)
if entry not in state.data:
raise IndexError(entry)
if key is not None and key not in state.data[entry]:
raise IndexError(f"{entry}.{key}")
state.parents = [] if snapshot is None else [snapshot]
state.events = [event_digest]
state.date = datetime.now()
# Gets obj original key, it will help to know if the key has changed
obj_key = self.get_obj_key(obj) or key
if isinstance(state.data[entry][key], list):
obj_origin = self.get_obj_origin(obj)
if obj_origin is None:
raise (SheerkaDataProviderError(f"Multiple entries under '{entry}.{key}'", obj))
state.modify_in_list(entry, key, obj, obj_key, obj_origin, self.load_ref_if_needed, self.save_ref_if_needed)
else:
obj = self.save_ref_if_needed(self.is_reference(state.data[entry][key]), obj)
state.modify(entry, key, obj, obj_key)
new_snapshot = self.save_state(state)
self.set_snapshot(new_snapshot)
return entry, obj_key
def list(self, entry, filter=None):
"""
Lists elements of entry 'entry'
:param entry: name of the entry to list
:param filter: filter to use
:return: list of elements
"""
snapshot = self.get_snapshot()
state = self.load_state(snapshot)
if entry not in state.data:
return []
elements = state.data[entry]
if isinstance(elements, dict):
# manage when elements have a key
filter_to_use = (lambda k, o: True) if filter is None else filter
for key, element in elements.items():
if filter_to_use(key, element):
if isinstance(element, list):
yield [self.load_ref_if_needed(e)[0] for e in element]
else:
yield self.load_ref_if_needed(element)[0]
else:
# manage when no key is defined for the elements
if not isinstance(elements, list) and not isinstance(elements, set):
elements = [elements]
filter_to_use = (lambda o: True) if filter is None else filter
for element in elements:
if filter_to_use(element):
yield self.load_ref_if_needed(element)[0]
def remove(self, event_digest, entry, filter=None):
"""
Removes elements under the entry 'entry'
:param event_digest: event that triggers the deletion
:param entry:
:param filter: filter to use
:return: new sha256 of the state
TODO: Remove by key
"""
snapshot = self.get_snapshot()
state = self.load_state(snapshot)
if entry not in state.data:
raise IndexError(entry)
state.parents = [] if snapshot is None else [snapshot]
state.events = [event_digest]
state.date = datetime.now()
state.remove(entry, filter)
new_snapshot = self.save_state(state)
self.set_snapshot(new_snapshot)
return new_snapshot
def get(self, entry, key=None, load_origin=True):
"""
Retrieve an element by its key
:param entry:
:param key:
:return:
"""
snapshot = self.get_snapshot()
state = self.load_state(snapshot)
if entry not in state.data:
raise IndexError(entry)
if key is not None and key not in state.data[entry]:
raise IndexError(f"{entry}.{key}")
item = state.data[entry] if key is None else state.data[entry][key]
if isinstance(item, list):
return [self.load_ref_if_needed(i, load_origin)[0] for i in item]
return self.load_ref_if_needed(item, load_origin)[0]
def get_safe(self, entry, key=None, load_origin=True):
"""
Retrieve an element by its key. Return None if the element does not exist
:param entry:
:param key:
:return:
"""
snapshot = self.get_snapshot()
state = self.load_state(snapshot)
if entry not in state.data:
return None
if key is not None and key not in state.data[entry]:
return None
item = state.data[entry] if key is None else state.data[entry][key]
if isinstance(item, list):
return [self.load_ref_if_needed(i, load_origin)[0] for i in item]
return self.load_ref_if_needed(item, load_origin)[0]
def exists(self, entry, key=None, digest=None):
"""
Returns true if the entry is defined
:param key:
:param entry:
:param digest: digest of the object, when several entries share the same key
:return:
"""
snapshot = self.get_snapshot()
state = self.load_state(snapshot)
exist = entry in state.data
if not exist or key is None:
return exist
items = state.data[entry]
exist = key in items
if not exist or digest is None:
return exist
items = items[key]
if not isinstance(items, list):
items = [items]
for item in items:
item_digest = SheerkaDataProvider.get_obj_digest(item)
if item_digest == digest:
return True
return False
def save_event(self, event: Event):
"""
return an event, given its digest
:param event:
:return: digest of the event
"""
digest = event.get_digest()
target_path = self.io.get_obj_path(SheerkaDataProvider.EventFolder, digest)
if self.io.exists(target_path):
return digest
self.io.write_binary(target_path, self.serializer.serialize(event, None).read())
return digest
def load_event(self, digest):
"""
return an event, given its digest
:param digest:
:return:
"""
target_path = self.io.get_obj_path(SheerkaDataProvider.EventFolder, digest)
with self.io.open(target_path, "rb") as f:
return self.serializer.deserialize(f, None)
def save_result(self, execution_context):
"""
Save the execution context associated with an event
To make a long story short,
for every single user input, there is an event (which is the first thing that is created)
and a result (the ExecutionContext created by sheerka.evaluate_user_input()
:param execution_context:
:return:
"""
digest = execution_context.event.get_digest()
self.log.debug(f"Saving execution context. digest={digest}")
target_path = self.io.get_obj_path(SheerkaDataProvider.EventFolder, digest) + "_result"
if self.io.exists(target_path):
return digest
self.io.write_binary(target_path, self.serializer.serialize(execution_context, None).read())
return digest
def load_result(self, digest):
target_path = self.io.get_obj_path(SheerkaDataProvider.EventFolder, digest) + "_result"
with self.io.open(target_path, "rb") as f:
return self.serializer.deserialize(f, None)
def save_state(self, state: State):
digest = state.get_digest()
self.log.debug(f"Saving new state. digest={digest}")
target_path = self.io.get_obj_path(SheerkaDataProvider.StateFolder, digest)
if self.io.exists(target_path):
return digest
self.io.write_binary(target_path, self.serializer.serialize(state, None).read())
return digest
def load_state(self, digest):
if digest is None:
return State()
target_path = self.io.get_obj_path(SheerkaDataProvider.StateFolder, digest)
with self.io.open(target_path, "rb") as f:
return self.serializer.deserialize(f, None)
def save_obj(self, obj):
self.log.debug(f"Saving '{obj}' as reference...")
stream = self.serializer.serialize(obj, SerializerContext(user_name="kodjo"))
digest = obj.get_digest() if hasattr(obj, "get_digest") else self.get_stream_digest(stream)
target_path = self.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, digest)
if self.io.exists(target_path):
self.log.debug(f"...already saved. digest is {digest}")
return digest
self.io.write_binary(target_path, stream.read())
self.log.debug(f"...digest={digest}.")
return digest
def load_obj(self, digest, add_origin=True):
if digest is None:
return None
target_path = self.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, digest)
if not self.io.exists(target_path):
return None
with self.io.open(target_path, "rb") as f:
obj = self.serializer.deserialize(f, SerializerContext(origin=digest))
# set the origin of the object
if add_origin:
if isinstance(obj, dict):
obj[Serializer.ORIGIN] = digest
elif not isinstance(obj, str):
setattr(obj, Serializer.ORIGIN, digest)
return obj
def load_ref_if_needed(self, obj, load_origin=True):
if not isinstance(obj, str):
return obj, False
if not obj.startswith(SheerkaDataProvider.REF_PREFIX):
return obj, False
resolved = self.load_obj(obj[len(SheerkaDataProvider.REF_PREFIX):], load_origin)
if resolved is None:
return obj, False
return resolved, True
def save_ref_if_needed(self, save_ref, obj):
if not save_ref:
return obj
digest = self.save_obj(obj)
return self.REF_PREFIX + digest
def get_cache_params(self, category, key):
digest = hashlib.sha3_256(f"{category}:{key}".encode("utf-8")).hexdigest()
cache_path = self.io.get_obj_path(SheerkaDataProvider.CacheFolder, digest)
return digest, cache_path
def add_to_cache(self, category, key, obj, update=False):
"""
Save obj in the internal cache system
:param category:
:param key:
:param obj:
:param update:
:return:
"""
digest, cache_path = self.get_cache_params(category, key)
if self.io.exists(cache_path) and not update:
return digest
self.io.write_binary(cache_path, zlib.compress(obj.encode("utf-8"), 9))
return digest
def load_from_cache(self, category, key):
"""
Reload a compress object from the cache
:param category:
:param key:
:return:
"""
digest, cache_path = self.get_cache_params(category, key)
if not self.io.exists(cache_path):
raise IndexError(f"{category}.{key}")
with self.io.open(cache_path, "rb") as f:
return zlib.decompress(f.read()).decode("utf-8")
def remove_from_cache(self, category, key):
"""
:param category:
:param key:
:return:
"""
digest, cache_path = self.get_cache_params(category, key)
if self.io.exists(cache_path):
self.io.remove(cache_path)
return digest
def in_cache(self, category, key):
"""
Returns true if the key is in cache
:param category:
:param key:
:return:
"""
digest, cache_path = self.get_cache_params(category, key)
return self.io.exists(cache_path)
def get_snapshot(self):
head_file = self.io.path_join(SheerkaDataProvider.HeadFile)
if not self.io.exists(head_file):
return None
return self.io.read_text(head_file)
# with open(head_file, "r") as f:
# return f.read()
def set_snapshot(self, digest):
head_file = self.io.path_join(SheerkaDataProvider.HeadFile)
return self.io.write_text(head_file, digest)
# with open(head_file, "w") as f:
# return f.write(digest)
def load_keys(self):
keys_file = self.io.path_join(SheerkaDataProvider.KeysFile)
if not self.io.exists(keys_file):
keys = {}
else:
with self.io.open(keys_file, "r") as f:
keys = json.load(f)
return keys
def save_keys(self, keys):
keys_file = self.io.path_join(SheerkaDataProvider.KeysFile)
with self.io.open(keys_file, "w") as f:
json.dump(keys, f)
def get_next_key(self, entry):
keys = self.load_keys()
next_key = keys.get(entry, 0) + 1
keys[entry] = next_key
self.save_keys(keys)
return str(next_key)
def set_key(self, entry, value):
keys = self.load_keys()
keys[entry] = value
self.save_keys(keys)
return str(value)
+192
View File
@@ -0,0 +1,192 @@
import io
from os import path
import os
from fs.memoryfs import MemoryFS
from core.sheerka_logger import get_logger
class SheerkaDataProviderIO:
def __init__(self, root):
self.root = root
self.log = get_logger(__name__)
self.init_log = get_logger("init." + __name__)
def exists(self, file_path):
pass
def open(self, file_path, mode):
pass
def read_text(self, file_path):
pass
def read_binary(self, file_path):
pass
def write_text(self, file_path, content):
pass
def write_binary(self, file_path, content):
pass
def remove(self, file_path):
pass
@staticmethod
def get(root):
if root == "mem://":
return SheerkaDataProviderDictionaryIO()
else:
return SheerkaDataProviderFileIO(root)
def get_obj_path(self, object_type, digest):
return path.join(self.root, object_type, digest[:24], digest)
def path_join(self, *paths):
return path.join(self.root, *paths)
class SheerkaDataProviderFileIO(SheerkaDataProviderIO):
def __init__(self, root):
root = path.abspath(path.join(path.expanduser("~"), ".sheerka")) \
if root is None \
else path.abspath(root)
super().__init__(root)
self.init_log.debug("root is set to '" + self.root + "'")
if not path.exists(self.root):
self.init_log.debug("root folder not found. Creating it.")
os.makedirs(self.root)
self.first_time = True
else:
self.first_time = False
def open(self, file_path, mode):
return open(file_path, mode)
def read_text(self, file_path):
with open(file_path) as f:
return f.read()
def read_binary(self, file_path):
with open(file_path, "rb") as f:
return f.read()
def write_text(self, file_path, content):
self._write(file_path, content, "w")
def write_binary(self, file_path, content):
self._write(file_path, content, "wb")
def exists(self, file_path):
return path.exists(file_path)
def remove(self, file_path):
os.remove(file_path)
@staticmethod
def _write(file_path, content, mode):
if not path.exists(path.dirname(file_path)):
os.makedirs(path.dirname(file_path))
with open(file_path, mode) as f:
f.write(content)
class SheerkaDataProviderMemoryIO(SheerkaDataProviderIO):
def __init__(self):
super().__init__("")
self.mem_fs = MemoryFS()
self.init_log.debug("Initializing memory file.")
self.first_time = True
def open(self, file_path, mode):
return self.mem_fs.open(file_path, mode)
def exists(self, file_path):
return self.mem_fs.exists(file_path)
def read_text(self, file_path):
return self.mem_fs.readtext(file_path)
def read_binary(self, file_path):
return self.mem_fs.readbytes(file_path)
def write_binary(self, file_path, content):
self._ensure_parent_folder(file_path)
self.mem_fs.writebytes(file_path, content)
def write_text(self, file_path, content):
self._ensure_parent_folder(file_path)
self.mem_fs.writetext(file_path, content)
def remove(self, file_path):
self.mem_fs.remove(file_path)
def _ensure_parent_folder(self, file_path):
if not self.mem_fs.exists(path.dirname(file_path)):
self.mem_fs.makedirs(path.dirname(file_path))
class SheerkaDataProviderDictionaryIO(SheerkaDataProviderIO):
def __init__(self):
super().__init__("")
self.cache = {}
self.init_log.debug("Initializing dictionary file.")
self.first_time = True
def exists(self, file_path):
if file_path == "":
return True
return file_path in self.cache
def read_text(self, file_path):
return self.cache[file_path]
def read_binary(self, file_path):
return self.cache[file_path]
def write_binary(self, file_path, content):
self.cache[file_path] = content
def write_text(self, file_path, content):
self.cache[file_path] = content
def remove(self, file_path):
del (self.cache[file_path])
def open(self, file_path, mode):
if "w" in mode:
stream = io.BytesIO() if "b" in mode else io.StringIO()
stream.close = on_close(self, file_path, stream)(stream.close)
return stream
return io.BytesIO(self.cache[file_path]) if "b" in mode else io.StringIO(self.cache[file_path])
def on_close(dictionary_io, file_path, stream):
"""
Decorator to intercept the close.
I guess that there are solution that are more elegant
:param dictionary_io:
:param file_path:
:param stream:
:return:
"""
def decorator(func):
def wrapper(*args, **kwargs):
stream.seek(0)
dictionary_io.cache[file_path] = stream.read()
func(*args, **kwargs)
return wrapper
return decorator
+273
View File
@@ -0,0 +1,273 @@
import dataclasses
import json
import pickle
import datetime
import struct
import io
from dataclasses import dataclass
from core.sheerka_logger import get_logger
from enum import Enum
import core.utils
from core.concept import Concept
from core.tokenizer import Token
from parsers.BaseParser import Node
def json_default_converter(o):
"""
Default formatter for json
It's used when the json serializer does not know
how to serialise a type
:param o:
:return:
"""
if isinstance(o, (datetime.date, datetime.datetime)):
return o.isoformat()
if isinstance(o, Enum):
return o.name
raise Exception("Cannot serialize " + o.__class__.__name__)
# with open("json_encoding_error.txt", "a") as f:
# f.write(o.__class__.__name__ + "\n")
@dataclass()
class SerializerContext:
user_name: str = None
origin: str = None
class Serializer:
HEADER_FORMAT = "cH"
USERNAME = "user_name" # key to store user that as committed the snapshot
MODIFICATION_DATE = "modification_date" #
PARENTS = "parents"
ORIGIN = "##origin##"
HISTORY = "##history##"
def __init__(self):
self.log = get_logger(__name__)
self.init_log = get_logger("init." + __name__)
self.init_log.debug("Initializing serializers")
self._cache = []
# add builtin serializers
self.register(EventSerializer())
self.register(StateSerializer())
self.register(ConceptSerializer())
self.register(DictionarySerializer())
self.register(ExecutionContextSerializer())
def register(self, serializer):
"""
Register the list of all know serializers
:param serializer:
:return:
"""
self.init_log.debug(f"Adding serializer {serializer}")
self._cache.append(serializer)
def serialize(self, obj, context):
"""
Get the stream representation of an object
:param context:
:param obj:
:return:
"""
serializers = [s for s in self._cache if s.matches(obj)]
if not serializers:
raise TypeError(f"Don't know how to serialize {type(obj)}")
serializer = serializers[0]
stream = io.BytesIO()
header = struct.pack(Serializer.HEADER_FORMAT, bytes(serializer.name, "utf-8"), serializer.version)
stream.write(header)
return serializer.dump(stream, obj, context)
def deserialize(self, stream, context):
"""
Loads an object from its stream representation
:param context:
:param stream:
:return:
"""
header = struct.unpack(Serializer.HEADER_FORMAT, stream.read(4))
serializers = [s for s in self._cache if s.name == header[0].decode("utf-8") and s.version == header[1]]
if not serializers:
raise TypeError(f"Don't know how serializer name={header[0]}, version={header[1]}")
serializer = serializers[0]
return serializer.load(stream, context)
class BaseSerializer:
def __init__(self, name, version):
"""
Create a serializer, given a name and a version
:param name:
:param version:
:return:
"""
self.name = name
self.version = version
def matches(self, obj):
"""
Returns true if self can serialize obj
:param obj:
:return:
"""
pass
def dump(self, stream, obj, context):
"""
Returns the byte representation of how the object should be serialized
:param stream: to write to
:param obj: obj to serialize
:param context: additional info needed to dump
:return: stream of bytes
"""
pass
def load(self, stream, context):
"""
From a stream of bytes, create the object
:param stream:
:param context: additional info needed to load
:return: object
"""
pass
def __repr__(self):
return self.__class__.__name__ + ' (' + self.name + ", version=" + str(self.version) + ")"
class EventSerializer(BaseSerializer):
def __init__(self):
BaseSerializer.__init__(self, "E", 1)
def matches(self, obj):
return core.utils.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.Event"
def dump(self, stream, obj, context):
stream.write(json.dumps(obj.to_dict(), default=json_default_converter).encode("utf-8"))
stream.seek(0)
return stream
def load(self, stream, context):
json_stream = stream.read().decode("utf-8")
as_dict = json.loads(json_stream)
event = core.utils.get_class("sdp.sheerkaDataProvider.Event")()
event.from_dict(as_dict)
return event
class JsonSerializer(BaseSerializer):
def __init__(self, fully_qualified_name, name="J", version=1):
BaseSerializer.__init__(self, name, version)
self.fully_qualified_name = fully_qualified_name
def matches(self, obj):
return core.utils.get_full_qualified_name(obj) == self.fully_qualified_name
def dump(self, stream, obj, context):
as_json = obj.to_dict()
as_json.update({
Serializer.HISTORY: {
Serializer.USERNAME: context.user_name,
Serializer.MODIFICATION_DATE: datetime.datetime.now().isoformat(),
Serializer.PARENTS: [getattr(obj, Serializer.ORIGIN)] if hasattr(obj, Serializer.ORIGIN) else []
}})
stream.write(json.dumps(as_json, default=json_default_converter).encode("utf-8"))
stream.seek(0)
return stream
def load(self, stream, context):
json_stream = stream.read().decode("utf-8")
json_message = json.loads(json_stream)
obj = core.utils.get_class(self.fully_qualified_name)()
obj.from_dict(json_message)
setattr(obj, Serializer.HISTORY, json_message[Serializer.HISTORY])
return obj
class PickleSerializer(BaseSerializer):
def __init__(self, predicate, name="P", version=1):
BaseSerializer.__init__(self, name, version)
self.predicate = predicate
def matches(self, obj):
return self.predicate(obj)
def dump(self, stream, obj, context):
stream.write(pickle.dumps(obj))
stream.seek(0)
return stream
def load(self, stream, context):
return pickle.loads(stream.read())
class StateSerializer(PickleSerializer):
def __init__(self, ):
PickleSerializer.__init__(
self,
lambda obj: core.utils.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.State",
"S",
1)
class ConceptSerializer(JsonSerializer):
def __init__(self):
JsonSerializer.__init__(self, "core.concept.Concept", "C", 1)
def matches(self, obj):
return isinstance(obj, Concept)
class DictionarySerializer(PickleSerializer):
def __init__(self, ):
PickleSerializer.__init__(
self,
lambda obj: isinstance(obj, dict),
"D",
1)
class ExecutionContextSerializer(BaseSerializer):
def __init__(self):
BaseSerializer.__init__(self, "R", 1)
def matches(self, obj):
return core.utils.get_full_qualified_name(obj) == "core.sheerka.ExecutionContext.ExecutionContext"
def dump(self, stream, obj, context):
as_json = obj.to_dict()
stream.write(json.dumps(as_json, default=json_default_converter).encode("utf-8"))
stream.seek(0)
return stream
def load(self, stream, context):
json_stream = stream.read().decode("utf-8")
json_message = json.loads(json_stream)
obj = core.utils.get_class("core.sheerka.ExecutionContext")()
obj.from_dict(json_message)
return obj
#
# class SheerkaSerializer(ObjectSerializer):
# def __init__(self):
# ObjectSerializer.__init__(self, "core.sheerka.Sheerka", "C", 1)