Refactored sheerka class: splitted to use sub handlers. Refactored unit tests to use classes.
This commit is contained in:
@@ -0,0 +1,152 @@
|
||||
from core.builtin_concepts import BuiltinConcepts, ListConcept
|
||||
from core.concept import Concept
|
||||
import ast
|
||||
import core.utils
|
||||
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class NodeParent:
|
||||
"""
|
||||
Class that represent the ancestor of a Node
|
||||
For example, the 'For' nodes has three fields (target, iter and body)
|
||||
So, for a node under For.iter
|
||||
node -> For
|
||||
field -> iter
|
||||
"""
|
||||
|
||||
def __init__(self, node, field):
|
||||
self.node = node
|
||||
self.field = field
|
||||
|
||||
def __repr__(self):
|
||||
if self.node is None:
|
||||
return None
|
||||
|
||||
if self.field is None:
|
||||
return self.node.get_node_type()
|
||||
|
||||
return self.node.get_node_type() + "." + self.field
|
||||
|
||||
def __eq__(self, other):
|
||||
# I can compare with type for simplification
|
||||
if isinstance(other, tuple):
|
||||
return self.node.get_node_type() == other[0] and self.field == other[1]
|
||||
|
||||
# normal equals implementation
|
||||
if not isinstance(other, NodeParent):
|
||||
return False
|
||||
|
||||
return self.node.get_node_type() == other.node.get_node_type() and self.field == other.field
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.node.get_node_type(), self.field))
|
||||
|
||||
|
||||
class NodeConcept(Concept):
|
||||
def __init__(self, key, node_type, parent: NodeParent):
|
||||
super().__init__(key, True, False, key)
|
||||
self.parent = parent
|
||||
self.node_type = node_type
|
||||
|
||||
def get_node_type(self):
|
||||
return self.node_type
|
||||
|
||||
|
||||
class GenericNodeConcept(NodeConcept):
|
||||
def __init__(self, node_type, parent):
|
||||
super().__init__(BuiltinConcepts.GENERIC_NODE, node_type, parent)
|
||||
|
||||
def __repr__(self):
|
||||
return "Generic:" + self.node_type
|
||||
|
||||
def get_node_type(self):
|
||||
return self.node_type
|
||||
|
||||
def get_value(self):
|
||||
if self.node_type == "Name":
|
||||
return self.get_prop("id")
|
||||
|
||||
if self.node_type == "arg":
|
||||
return self.get_prop("arg")
|
||||
|
||||
return self.body
|
||||
|
||||
|
||||
class IdentifierNodeConcept(NodeConcept):
|
||||
def __init__(self, parent, name):
|
||||
super().__init__(BuiltinConcepts.IDENTIFIER_NODE, "Name", parent)
|
||||
self.body = name
|
||||
|
||||
|
||||
class CallNodeConcept(NodeConcept):
|
||||
def __init__(self, parent=None):
|
||||
super().__init__(BuiltinConcepts.IDENTIFIER_NODE, "Call", parent)
|
||||
|
||||
def get_args_names(self, sheerka):
|
||||
return sheerka.get_values(self.get_prop("args"))
|
||||
|
||||
|
||||
def python_to_concept(python_node):
|
||||
"""
|
||||
Transform Python AST node into concept nodes
|
||||
for better usage
|
||||
:param python_node:
|
||||
:return:
|
||||
"""
|
||||
|
||||
def _transform(node, parent):
|
||||
node_type = node.__class__.__name__
|
||||
concept = GenericNodeConcept(node_type, parent).init_key()
|
||||
for field in node._fields:
|
||||
if not hasattr(node, field):
|
||||
continue
|
||||
|
||||
value = getattr(node, field)
|
||||
concept.def_prop(field)
|
||||
if isinstance(value, list):
|
||||
lst = ListConcept().init_key()
|
||||
for i in value:
|
||||
lst.append(_transform(i, NodeParent(concept, field)))
|
||||
concept.set_prop(field, lst)
|
||||
elif isinstance(value, ast.AST):
|
||||
concept.set_prop(field, _transform(value, NodeParent(concept, field)))
|
||||
else:
|
||||
concept.set_prop(field, value)
|
||||
|
||||
concept.metadata.is_evaluated = True
|
||||
return concept
|
||||
|
||||
return _transform(python_node, None)
|
||||
|
||||
|
||||
def concept_to_python(concept_node):
|
||||
"""
|
||||
Transform back concept_node to Python AST node
|
||||
:param concept_node:
|
||||
:return:
|
||||
"""
|
||||
|
||||
def _transform(node):
|
||||
node_type = node.get_node_type()
|
||||
ast_object = core.utils.new_object("_ast." + node_type)
|
||||
for field in node.props:
|
||||
if field not in ast_object._fields:
|
||||
continue
|
||||
|
||||
value = node.get_prop(field)
|
||||
if isinstance(value, list) or isinstance(value, Concept) and value.key == str(BuiltinConcepts.LIST):
|
||||
lst = []
|
||||
for i in value:
|
||||
lst.append(_transform(i))
|
||||
setattr(ast_object, field, lst)
|
||||
elif isinstance(value, NodeConcept):
|
||||
setattr(ast_object, field, _transform(value))
|
||||
else:
|
||||
setattr(ast_object, field, value)
|
||||
return ast_object
|
||||
|
||||
res = _transform(concept_node)
|
||||
return res
|
||||
@@ -0,0 +1,130 @@
|
||||
from core.ast.nodes import GenericNodeConcept, NodeConcept
|
||||
from core.builtin_concepts import ListConcept
|
||||
|
||||
|
||||
class ConceptNodeVisitor:
|
||||
"""
|
||||
Base class to visit NodeConcept
|
||||
It is insolently inspired by python AST.Visitor class
|
||||
"""
|
||||
|
||||
def visit(self, node):
|
||||
|
||||
"""Visit a node."""
|
||||
name = node.node_type if isinstance(node, GenericNodeConcept) else node.name
|
||||
name = str(name).capitalize()
|
||||
|
||||
method = 'visit_' + name
|
||||
visitor = getattr(self, method, self.generic_visit)
|
||||
return visitor(node)
|
||||
|
||||
def generic_visit(self, node):
|
||||
"""Called if no explicit visitor function exists for a node."""
|
||||
for field, value in iter_props(node):
|
||||
if isinstance(value, ListConcept):
|
||||
for item in value:
|
||||
if isinstance(item, NodeConcept):
|
||||
self.visit(item)
|
||||
elif isinstance(value, NodeConcept):
|
||||
self.visit(value)
|
||||
|
||||
def visit_Constant(self, node):
|
||||
value = node.get_prop("value")
|
||||
type_name = _const_node_type_names.get(type(value))
|
||||
if type_name is None:
|
||||
for cls, name in _const_node_type_names.items():
|
||||
if isinstance(value, cls):
|
||||
type_name = name
|
||||
break
|
||||
if type_name is not None:
|
||||
method = 'visit_' + type_name
|
||||
try:
|
||||
visitor = getattr(self, method)
|
||||
except AttributeError:
|
||||
pass
|
||||
else:
|
||||
import warnings
|
||||
warnings.warn(f"{method} is deprecated; add visit_Constant",
|
||||
PendingDeprecationWarning, 2)
|
||||
return visitor(node)
|
||||
return self.generic_visit(node)
|
||||
|
||||
|
||||
class UnreferencedNamesVisitor(ConceptNodeVisitor):
|
||||
def __init__(self, sheerka):
|
||||
self.names = set()
|
||||
self.sheerka = sheerka
|
||||
|
||||
def visit_Name(self, node):
|
||||
parents = get_parents(node)
|
||||
if ("For", "target") in parents: # variable used by the 'for' iteration
|
||||
return
|
||||
|
||||
if ("Call", "func") in parents: # name of the function
|
||||
return
|
||||
|
||||
if ("Assign", "targets") in parents: # variable which is assigned
|
||||
return
|
||||
|
||||
if self.can_be_discarded(self.sheerka.value(node), parents):
|
||||
return
|
||||
|
||||
self.names.add(self.sheerka.value(node))
|
||||
|
||||
def can_be_discarded(self, variable_name, parents):
|
||||
|
||||
for node in (parent.node for parent in parents):
|
||||
if node is None:
|
||||
return False
|
||||
|
||||
if node.get_node_type() == "For" and self.sheerka.value(node.get_prop("target")) == variable_name:
|
||||
# variable used by the loop
|
||||
return True
|
||||
|
||||
if node.get_node_type() == "FunctionDef":
|
||||
# variable defined as a function parameter
|
||||
args = node.get_prop("args")
|
||||
args_values = list(self.sheerka.get_values(args.get_prop("args")))
|
||||
if variable_name in args_values:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class ExtractPredicateVisitor(ConceptNodeVisitor):
|
||||
def __init__(self, variable_name):
|
||||
self.predicates = []
|
||||
self.variable_name = variable_name
|
||||
|
||||
|
||||
|
||||
|
||||
def get_parents(node):
|
||||
if node.parent is None:
|
||||
return []
|
||||
|
||||
res = []
|
||||
while True:
|
||||
if node.parent is None:
|
||||
break
|
||||
res.append(node.parent)
|
||||
node = node.parent.node
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def iter_props(node):
|
||||
for p in node.props:
|
||||
yield p, node.props[p].value
|
||||
|
||||
|
||||
_const_node_type_names = {
|
||||
bool: 'NameConstant', # should be before int
|
||||
type(None): 'NameConstant',
|
||||
int: 'Num',
|
||||
float: 'Num',
|
||||
complex: 'Num',
|
||||
str: 'Str',
|
||||
bytes: 'Bytes',
|
||||
type(...): 'Ellipsis',
|
||||
}
|
||||
@@ -0,0 +1,387 @@
|
||||
from enum import Enum
|
||||
|
||||
from core.concept import Concept, ConceptParts
|
||||
|
||||
|
||||
class BuiltinConcepts(Enum):
|
||||
"""
|
||||
List of builtin concepts that do no need any specific implementation
|
||||
Please note that the value of the enum is informal. It is not used in the system
|
||||
For example, the concept 'NODE' DOES NOT have the key, the id or whatever 200
|
||||
The key if the name of the concept
|
||||
The id is a sequential number given just before the concept is saved in sdp
|
||||
|
||||
The values of the enum is not used the code
|
||||
"""
|
||||
SHEERKA = "sheerka"
|
||||
|
||||
BEFORE_PARSING = "before parsing" # activated before evaluation by the parsers
|
||||
PARSING = "parsing" # activated during the parsing. It contains the text to parse
|
||||
AFTER_PARSING = "after parsing" # after parsing
|
||||
BEFORE_EVALUATION = "before evaluation" # before evaluation
|
||||
EVALUATION = "evaluation" # activated when the parsing process seems to be finished
|
||||
AFTER_EVALUATION = "after evaluation" # activated when the parsing process seems to be finished
|
||||
BEFORE_RENDERING = "before rendering" # activate before the output is rendered
|
||||
RENDERING = "rendering" # rendering the response from sheerka
|
||||
AFTER_RENDERING = "after rendering" # rendering the response from sheerka
|
||||
|
||||
USER_INPUT = "user input" # represent an input from an user
|
||||
SUCCESS = "success"
|
||||
ERROR = "error"
|
||||
UNKNOWN_CONCEPT = "unknown concept" # the request concept is not recognized
|
||||
CANNOT_RESOLVE_CONCEPT = "cannot resolve concept" # when too many concepts with the same name
|
||||
RETURN_VALUE = "return value" # a value is returned
|
||||
CONCEPT_TOO_LONG = "concept too long" # concept cannot be processed by exactConcept parser
|
||||
NEW_CONCEPT = "new concept" # when a new concept is added
|
||||
UNKNOWN_PROPERTY = "unknown property" # when requesting for a unknown property
|
||||
PARSER_RESULT = "parser result"
|
||||
TOO_MANY_SUCCESS = "too many success" # when expecting a limited number of successful return value
|
||||
TOO_MANY_ERRORS = "too many errors" # when expecting a limited number of successful return value
|
||||
NOT_FOR_ME = "not for me" # a parser recognize that the entry is not meant for it
|
||||
IS_EMPTY = "is empty" # when a set is empty
|
||||
INVALID_RETURN_VALUE = "invalid return value" # the return value of an evaluator is not correct
|
||||
CONCEPT_ALREADY_DEFINED = "concept already defined" # when you try to add the same concept twice
|
||||
NOP = "no operation" # no operation concept. Does nothing
|
||||
CONCEPT_EVAL_ERROR = "concept evaluation error" # cannot evaluate a property or metadata of a concept
|
||||
ENUMERATION = "enum" # represents a list or a set
|
||||
LIST = "list" # represents a list
|
||||
CONCEPT_ALREADY_IN_SET = "concept already in set"
|
||||
EVALUATOR_PRE_PROCESS = "evaluator pre process" # used modify / tweak behaviour of evaluators
|
||||
CONCEPT_EVAL_REQUESTED = "concept eval requested"
|
||||
REDUCE_REQUESTED = "reduce requested" # remove meaningless error when possible
|
||||
NOT_A_SET = "not a set" # the concept has no entry in sets
|
||||
|
||||
NODE = "node"
|
||||
GENERIC_NODE = "generic node"
|
||||
IDENTIFIER_NODE = "identifier node"
|
||||
|
||||
def __repr__(self):
|
||||
return "__" + self.name
|
||||
|
||||
def __str__(self):
|
||||
return "__" + self.name
|
||||
|
||||
|
||||
BuiltinUnique = [
|
||||
BuiltinConcepts.BEFORE_PARSING,
|
||||
BuiltinConcepts.PARSING,
|
||||
BuiltinConcepts.AFTER_PARSING,
|
||||
BuiltinConcepts.BEFORE_EVALUATION,
|
||||
BuiltinConcepts.EVALUATION,
|
||||
BuiltinConcepts.AFTER_EVALUATION,
|
||||
BuiltinConcepts.BEFORE_RENDERING,
|
||||
BuiltinConcepts.RENDERING,
|
||||
BuiltinConcepts.AFTER_RENDERING,
|
||||
BuiltinConcepts.SUCCESS,
|
||||
BuiltinConcepts.NOP,
|
||||
BuiltinConcepts.CONCEPT_EVAL_REQUESTED,
|
||||
BuiltinConcepts.REDUCE_REQUESTED,
|
||||
]
|
||||
|
||||
BuiltinErrors = [str(e) for e in {
|
||||
BuiltinConcepts.ERROR,
|
||||
BuiltinConcepts.UNKNOWN_CONCEPT,
|
||||
BuiltinConcepts.CANNOT_RESOLVE_CONCEPT,
|
||||
BuiltinConcepts.CONCEPT_TOO_LONG,
|
||||
BuiltinConcepts.UNKNOWN_PROPERTY,
|
||||
BuiltinConcepts.TOO_MANY_SUCCESS,
|
||||
BuiltinConcepts.TOO_MANY_ERRORS,
|
||||
BuiltinConcepts.INVALID_RETURN_VALUE,
|
||||
BuiltinConcepts.CONCEPT_ALREADY_DEFINED,
|
||||
BuiltinConcepts.CONCEPT_EVAL_ERROR,
|
||||
BuiltinConcepts.CONCEPT_ALREADY_IN_SET,
|
||||
BuiltinConcepts.NOT_A_SET,
|
||||
}]
|
||||
|
||||
"""
|
||||
Some concepts have a specific implementation
|
||||
It's mainly to ease the usage
|
||||
"""
|
||||
|
||||
|
||||
class UserInputConcept(Concept):
|
||||
def __init__(self, text=None, user_name=None):
|
||||
super().__init__(BuiltinConcepts.USER_INPUT, True, False, BuiltinConcepts.USER_INPUT)
|
||||
self.set_metadata_value(ConceptParts.BODY, text)
|
||||
self.set_prop("user_name", user_name)
|
||||
self.metadata.is_evaluated = True
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
return self.body
|
||||
|
||||
@property
|
||||
def user_name(self):
|
||||
return self.props["user_name"].value
|
||||
|
||||
def __repr__(self):
|
||||
return f"({self.id}){self.name}: '{self.body}'"
|
||||
|
||||
|
||||
class ErrorConcept(Concept):
|
||||
def __init__(self, error=None):
|
||||
super().__init__(BuiltinConcepts.ERROR, True, False, BuiltinConcepts.ERROR)
|
||||
self.set_metadata_value(ConceptParts.BODY, error)
|
||||
self.metadata.is_evaluated = True
|
||||
|
||||
def __repr__(self):
|
||||
return f"({self.id}){self.name}: {self.body}"
|
||||
|
||||
|
||||
class UnknownConcept(Concept):
|
||||
def __init__(self, metadata=None):
|
||||
super().__init__(BuiltinConcepts.UNKNOWN_CONCEPT, True, False, BuiltinConcepts.UNKNOWN_CONCEPT)
|
||||
self.set_metadata_value(ConceptParts.BODY, metadata)
|
||||
self.metadata.is_evaluated = True
|
||||
|
||||
def __repr__(self):
|
||||
return f"({self.id}){self.name}: {self.body}"
|
||||
|
||||
|
||||
class ReturnValueConcept(Concept):
|
||||
"""
|
||||
This class represents the result of a data flow processing
|
||||
It's the main input for the evaluators
|
||||
"""
|
||||
|
||||
def __init__(self, who=None, status=None, value=None, message=None, parents=None):
|
||||
super().__init__(BuiltinConcepts.RETURN_VALUE, True, False, BuiltinConcepts.RETURN_VALUE)
|
||||
self.set_metadata_value(ConceptParts.BODY, value)
|
||||
self.set_prop("who", who)
|
||||
self.set_prop("status", status)
|
||||
self.set_prop("message", message)
|
||||
self.set_prop("parents", parents)
|
||||
self.metadata.is_evaluated = True
|
||||
|
||||
@property
|
||||
def who(self):
|
||||
return self.props["who"].value
|
||||
|
||||
@who.setter
|
||||
def who(self, value):
|
||||
self.set_prop("who", value)
|
||||
|
||||
@property
|
||||
def status(self):
|
||||
return self.props["status"].value
|
||||
|
||||
@status.setter
|
||||
def status(self, value):
|
||||
self.set_prop("status", value)
|
||||
|
||||
@property
|
||||
def value(self):
|
||||
return self.body
|
||||
|
||||
@value.setter
|
||||
def value(self, value):
|
||||
self.set_metadata_value(ConceptParts.BODY, value)
|
||||
|
||||
@property
|
||||
def message(self):
|
||||
return self.props["message"].value
|
||||
|
||||
@message.setter
|
||||
def message(self, value):
|
||||
self.set_prop("message", value)
|
||||
|
||||
@property
|
||||
def parents(self):
|
||||
return self.props["parents"].value
|
||||
|
||||
@parents.setter
|
||||
def parents(self, value):
|
||||
self.set_prop("parents", value)
|
||||
|
||||
def __repr__(self):
|
||||
return f"ReturnValue(who={self.who}, status={self.status}, value={self.value}, message={self.message})"
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, ReturnValueConcept):
|
||||
return False
|
||||
|
||||
return self.who == other.who and \
|
||||
self.status == other.status and \
|
||||
self.value == other.value and \
|
||||
self.message == other.message
|
||||
|
||||
def __hash__(self):
|
||||
if hasattr(self.value, "__iter__") and not isinstance(self.value, str):
|
||||
value_hash = hash(tuple(self.value))
|
||||
else:
|
||||
value_hash = hash(self.value)
|
||||
|
||||
return hash((self.who, self.status, value_hash))
|
||||
|
||||
|
||||
class UnknownPropertyConcept(Concept):
|
||||
"""
|
||||
This error is raised when, during sheerka.new(), an unknown property is asked
|
||||
"""
|
||||
|
||||
def __init__(self, property_name=None, concept=None):
|
||||
super().__init__(BuiltinConcepts.UNKNOWN_PROPERTY, True, False, BuiltinConcepts.UNKNOWN_PROPERTY)
|
||||
self.set_metadata_value(ConceptParts.BODY, property_name)
|
||||
self.set_prop("concept", concept)
|
||||
self.metadata.is_evaluated = True
|
||||
|
||||
def __repr__(self):
|
||||
return f"UnknownProperty(property={self.property_name}, concept={self.concept})"
|
||||
|
||||
@property
|
||||
def concept(self):
|
||||
return self.props["concept"].value
|
||||
|
||||
@property
|
||||
def property_name(self):
|
||||
return self.body
|
||||
|
||||
|
||||
class ParserResultConcept(Concept):
|
||||
"""
|
||||
Result of a parsing
|
||||
"""
|
||||
|
||||
def __init__(self, parser=None, source=None, value=None, try_parsed=None):
|
||||
super().__init__(BuiltinConcepts.PARSER_RESULT, True, False, BuiltinConcepts.PARSER_RESULT)
|
||||
self.set_metadata_value(ConceptParts.BODY, value)
|
||||
self.set_prop("parser", parser)
|
||||
self.set_prop("source", source)
|
||||
self.set_prop("try_parsed", try_parsed) # in case of error, what was found before the error
|
||||
self.metadata.is_evaluated = True
|
||||
|
||||
def __repr__(self):
|
||||
text = f"ParserResult(parser={self.props['parser'].value}"
|
||||
source = self.props['source'].value
|
||||
text += f", source='{source}')" if source else f", body='{self.body}')"
|
||||
return text
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, ParserResultConcept):
|
||||
return False
|
||||
|
||||
return self.source == other.source and \
|
||||
self.parser == other.parser and \
|
||||
self.body == other.body and \
|
||||
self.try_parsed == other.try_parsed
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.metadata.name)
|
||||
|
||||
@property
|
||||
def value(self):
|
||||
return self.body
|
||||
|
||||
@property
|
||||
def try_parsed(self):
|
||||
return self.props["try_parsed"].value
|
||||
|
||||
@property
|
||||
def source(self):
|
||||
return self.props["source"].value
|
||||
|
||||
@property
|
||||
def parser(self):
|
||||
return self.props["parser"].value
|
||||
|
||||
|
||||
class InvalidReturnValueConcept(Concept):
|
||||
"""
|
||||
Error returned when an evaluator is not correctly coded
|
||||
The accepted return value are
|
||||
ReturnValueConcept, list of ReturnValueConcept or None
|
||||
"""
|
||||
|
||||
def __init__(self, return_value=None, evaluator=None):
|
||||
super().__init__(
|
||||
BuiltinConcepts.INVALID_RETURN_VALUE,
|
||||
True,
|
||||
False,
|
||||
BuiltinConcepts.INVALID_RETURN_VALUE)
|
||||
self.set_metadata_value(ConceptParts.BODY, return_value)
|
||||
self.set_prop("evaluator", evaluator)
|
||||
self.metadata.is_evaluated = True
|
||||
|
||||
|
||||
class ConceptEvalError(Concept):
|
||||
def __init__(self, error=None, concept=None, property_name=None):
|
||||
super().__init__(BuiltinConcepts.CONCEPT_EVAL_ERROR,
|
||||
True,
|
||||
False,
|
||||
BuiltinConcepts.CONCEPT_EVAL_ERROR)
|
||||
self.set_metadata_value(ConceptParts.BODY, error)
|
||||
self.set_prop("concept", concept)
|
||||
self.set_prop("property_name", property_name)
|
||||
self.metadata.is_evaluated = True
|
||||
|
||||
def __repr__(self):
|
||||
return f"ConceptEvalError(error={self.error}, concept={self.concept}, property={self.property_name})"
|
||||
|
||||
@property
|
||||
def error(self):
|
||||
return self.body
|
||||
|
||||
@property
|
||||
def concept(self):
|
||||
return self.props["concept"].value
|
||||
|
||||
@property
|
||||
def property_name(self):
|
||||
return self.props["property_name"].value
|
||||
|
||||
|
||||
class EnumerationConcept(Concept):
|
||||
def __init__(self, iteration=None):
|
||||
super().__init__(BuiltinConcepts.ENUMERATION, True, False, BuiltinConcepts.ENUMERATION)
|
||||
self.set_metadata_value(ConceptParts.BODY, iteration)
|
||||
self.metadata.is_evaluated = True
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.body)
|
||||
|
||||
|
||||
class ListConcept(Concept):
|
||||
def __init__(self, items=None):
|
||||
super().__init__(BuiltinConcepts.LIST, True, False, BuiltinConcepts.LIST)
|
||||
self.set_metadata_value(ConceptParts.BODY, items or [])
|
||||
self.metadata.is_evaluated = True
|
||||
|
||||
def append(self, obj):
|
||||
self.body.append(obj)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.body)
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self.body[key]
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
self.body[key] = value
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.body)
|
||||
|
||||
def __contains__(self, item):
|
||||
return item in self.body
|
||||
|
||||
|
||||
class ConceptAlreadyInSet(Concept):
|
||||
def __init__(self, concept=None, concept_set=None):
|
||||
super().__init__(BuiltinConcepts.CONCEPT_ALREADY_IN_SET,
|
||||
True,
|
||||
False,
|
||||
BuiltinConcepts.CONCEPT_ALREADY_IN_SET)
|
||||
self.set_metadata_value(ConceptParts.BODY, concept)
|
||||
self.set_prop("concept_set", concept_set)
|
||||
self.metadata.is_evaluated = True
|
||||
|
||||
def __repr__(self):
|
||||
return f"ConceptAlreadyInSet(concept={self.concept}, concept_set={self.concept_set})"
|
||||
|
||||
@property
|
||||
def concept(self):
|
||||
return self.body
|
||||
|
||||
@property
|
||||
def concept_set(self):
|
||||
return self.props["concept_set"].value
|
||||
@@ -0,0 +1,214 @@
|
||||
import ast
|
||||
import logging
|
||||
|
||||
import core.ast.nodes
|
||||
from core.ast.nodes import CallNodeConcept, GenericNodeConcept
|
||||
from core.ast.visitors import UnreferencedNamesVisitor
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
|
||||
|
||||
|
||||
def is_same_success(sheerka, return_values):
|
||||
"""
|
||||
Returns True if all returns values are successful and have the same value
|
||||
:param sheerka:
|
||||
:param return_values:
|
||||
:return:
|
||||
"""
|
||||
assert isinstance(return_values, list)
|
||||
|
||||
if not return_values[0].status:
|
||||
return False
|
||||
|
||||
reference = sheerka.value(return_values[0].value)
|
||||
|
||||
for return_value in return_values[1:]:
|
||||
if not return_value.status:
|
||||
return False
|
||||
|
||||
actual = sheerka.value(return_value.value)
|
||||
if actual != reference:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def expect_one(context, return_values, logger=None):
|
||||
"""
|
||||
Checks if there is at least one success return value
|
||||
If there is more than one, check if it's the same value
|
||||
:param context:
|
||||
:param return_values:
|
||||
:param logger:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if not isinstance(return_values, list):
|
||||
return return_values
|
||||
|
||||
sheerka = context.sheerka
|
||||
|
||||
if len(return_values) == 0:
|
||||
return sheerka.ret(
|
||||
context.who,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.IS_EMPTY, body=return_values),
|
||||
parents=return_values)
|
||||
|
||||
successful_results = [item for item in return_values if item.status]
|
||||
number_of_successful = len(successful_results)
|
||||
# total_items = len(return_values)
|
||||
|
||||
# remove errors when a winner is found
|
||||
if number_of_successful == 1:
|
||||
return sheerka.ret(
|
||||
context.who,
|
||||
True,
|
||||
successful_results[0].body,
|
||||
parents=return_values)
|
||||
|
||||
# too many winners, which one to choose ?
|
||||
if number_of_successful > 1:
|
||||
if is_same_success(sheerka, successful_results):
|
||||
return sheerka.ret(
|
||||
context.who,
|
||||
True,
|
||||
successful_results[0].value,
|
||||
parents=return_values)
|
||||
else:
|
||||
if logger and logger.isEnabledFor(logging.DEBUG):
|
||||
context.log(logger, f"Too many successful results found by expect_one()", context.who)
|
||||
for s in successful_results:
|
||||
context.log(logger, f"-> {s}", context.who)
|
||||
return sheerka.ret(
|
||||
context.who,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.TOO_MANY_SUCCESS, body=successful_results),
|
||||
parents=return_values)
|
||||
|
||||
# only errors, i cannot help you
|
||||
if logger and logger.isEnabledFor(logging.DEBUG):
|
||||
context.log(logger, f"Too many errors found by expect_one()", context.who)
|
||||
for s in successful_results:
|
||||
context.log(logger, f"-> {s}", context.who)
|
||||
|
||||
if len(return_values) == 1:
|
||||
return sheerka.ret(
|
||||
context.who,
|
||||
False,
|
||||
return_values[0],
|
||||
parents=return_values)
|
||||
else:
|
||||
return sheerka.ret(
|
||||
context.who,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.TOO_MANY_ERRORS, body=return_values),
|
||||
parents=return_values)
|
||||
|
||||
|
||||
def get_names(sheerka, concept_node):
|
||||
"""
|
||||
Finds all the names referenced by the concept_node
|
||||
:param sheerka:
|
||||
:param concept_node:
|
||||
:return:
|
||||
"""
|
||||
unreferenced_names_visitor = UnreferencedNamesVisitor(sheerka)
|
||||
unreferenced_names_visitor.visit(concept_node)
|
||||
return list(unreferenced_names_visitor.names)
|
||||
|
||||
|
||||
def extract_predicates(sheerka, expression, variables_to_include, variables_to_exclude):
|
||||
"""
|
||||
from a given expression and a variable (or list of variables)
|
||||
tries to find out all the predicates referencing the(se) variable(s), and the(se) variable(s) solely
|
||||
for example
|
||||
exp : isinstance(a, int) and isinstance(b, str)
|
||||
will return 'isinstance(a, int)' if variable_name == 'a'
|
||||
:param sheerka:
|
||||
:param expression:
|
||||
:param variables_to_include:
|
||||
:param variables_to_exclude:
|
||||
:return: list of predicates
|
||||
"""
|
||||
|
||||
if len(variables_to_include) == 0:
|
||||
return []
|
||||
|
||||
def _get_predicates(_nodes):
|
||||
_predicates = []
|
||||
for _node in _nodes:
|
||||
python_node = ast.Expression(body=core.ast.nodes.concept_to_python(_node))
|
||||
python_node = ast.fix_missing_locations(python_node)
|
||||
_predicates.append(python_node)
|
||||
return _predicates
|
||||
|
||||
if isinstance(expression, str):
|
||||
node = ast.parse(expression, mode="eval")
|
||||
else:
|
||||
return NotImplementedError()
|
||||
|
||||
concept_node = core.ast.nodes.python_to_concept(node)
|
||||
main_op = concept_node.get_prop("body")
|
||||
|
||||
return _get_predicates(_extract_predicates(sheerka, main_op, variables_to_include, variables_to_exclude))
|
||||
|
||||
|
||||
def _extract_predicates(sheerka, node, variables_to_include, variables_to_exclude):
|
||||
predicates = []
|
||||
|
||||
def _matches(_names, to_include, to_exclude):
|
||||
_res = None
|
||||
for n in _names:
|
||||
if n in to_include and _res is None:
|
||||
_res = True
|
||||
if n in to_exclude:
|
||||
_res = False
|
||||
return _res
|
||||
|
||||
if node.node_type == "Compare":
|
||||
if node.get_prop("left").node_type == "Name":
|
||||
"""Simple case of one comparison"""
|
||||
comparison_name = sheerka.value(node.get_prop("left"))
|
||||
if comparison_name in variables_to_include and comparison_name not in variables_to_exclude:
|
||||
predicates.append(node)
|
||||
else:
|
||||
"""The left part is an expression"""
|
||||
res = _extract_predicates(sheerka, node.get_prop("left"), variables_to_include, variables_to_exclude)
|
||||
if len(res) > 0:
|
||||
predicates.append(node)
|
||||
elif node.node_type == "Call":
|
||||
"""Simple case predicate"""
|
||||
call_node = node if isinstance(node, CallNodeConcept) else CallNodeConcept().update_from(node)
|
||||
args = list(call_node.get_args_names(sheerka))
|
||||
if _matches(args, variables_to_include, variables_to_exclude):
|
||||
predicates.append(node)
|
||||
elif node.node_type == "UnaryOp" and node.get_prop("op").node_type == "Not":
|
||||
"""Simple case of negation"""
|
||||
res = _extract_predicates(sheerka, node.get_prop("operand"), variables_to_include, variables_to_exclude)
|
||||
if len(res) > 0:
|
||||
predicates.append(node)
|
||||
elif node.node_type == "BinOp":
|
||||
names = get_names(sheerka, node)
|
||||
if _matches(names, variables_to_include, variables_to_exclude):
|
||||
predicates.append(node)
|
||||
elif node.node_type == "BoolOp":
|
||||
all_op = True
|
||||
temp_res = []
|
||||
for op in node.get_prop("values"):
|
||||
res = _extract_predicates(sheerka, op, variables_to_include, variables_to_exclude)
|
||||
if len(res) == 0:
|
||||
all_op = False
|
||||
else:
|
||||
temp_res.extend(res)
|
||||
|
||||
if all_op:
|
||||
predicates.append(node)
|
||||
else:
|
||||
for res in temp_res:
|
||||
predicates.append(res)
|
||||
|
||||
return predicates
|
||||
|
||||
|
||||
@@ -0,0 +1,405 @@
|
||||
import hashlib
|
||||
from collections import namedtuple
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from core.sheerka_logger import get_logger
|
||||
|
||||
import core.utils
|
||||
from core.tokenizer import Tokenizer, TokenKind
|
||||
|
||||
PROPERTIES_FOR_DIGEST = ("name", "key",
|
||||
"definition", "definition_type",
|
||||
"is_builtin", "is_unique",
|
||||
"where", "pre", "post", "body",
|
||||
"desc", "props")
|
||||
PROPERTIES_TO_SERIALIZE = PROPERTIES_FOR_DIGEST + tuple(["id"])
|
||||
PROPERTIES_FOR_NEW = ("where", "pre", "post", "body", "desc")
|
||||
VARIABLE_PREFIX = "__var__"
|
||||
|
||||
|
||||
class ConceptParts(Enum):
|
||||
"""
|
||||
Lists metadata that can contains some code
|
||||
"""
|
||||
WHERE = "where"
|
||||
PRE = "pre"
|
||||
POST = "post"
|
||||
BODY = "body"
|
||||
|
||||
@staticmethod
|
||||
def get_parts():
|
||||
return set(item.value for item in ConceptParts)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ConceptMetadata:
|
||||
name: str
|
||||
is_builtin: bool
|
||||
is_unique: bool
|
||||
key: str # name od the concept, where prop are replaced. to ease search
|
||||
body: str # main method, can also be the value of the concept
|
||||
where: str # condition to recognize variables in name
|
||||
pre: str # list of pre conditions before calling the main function
|
||||
post: str # list of post conditions after calling the main function
|
||||
definition: str # regex used to define the concept
|
||||
definition_type: str # definition can be done with something else than regex
|
||||
desc: str # possible description for the concept
|
||||
id: str # unique identifier for a concept. The id will never be modified (but the key can)
|
||||
props: list # list properties, with their default values
|
||||
is_evaluated: bool = False # True is the concept is evaluated by sheerka.eval_concept()
|
||||
|
||||
|
||||
simplec = namedtuple("concept", "name body") # for simple concept (tests purposes only)
|
||||
|
||||
|
||||
class Concept:
|
||||
"""
|
||||
Default concept object
|
||||
A concept is a the base object of our universe
|
||||
Everything is a concept
|
||||
"""
|
||||
|
||||
def __init__(self, name=None,
|
||||
is_builtin=False,
|
||||
is_unique=False,
|
||||
key=None,
|
||||
body=None,
|
||||
where=None,
|
||||
pre=None,
|
||||
post=None,
|
||||
definition=None,
|
||||
definition_type=None,
|
||||
desc=None,
|
||||
id=None,
|
||||
props=None):
|
||||
|
||||
metadata = ConceptMetadata(
|
||||
str(name) if name else None,
|
||||
is_builtin,
|
||||
is_unique,
|
||||
str(key) if key else None,
|
||||
body,
|
||||
where,
|
||||
pre,
|
||||
post,
|
||||
definition,
|
||||
definition_type,
|
||||
desc,
|
||||
id,
|
||||
props or []
|
||||
)
|
||||
|
||||
self.metadata = metadata
|
||||
self.compiled = {} # cached ast for the where, pre, post and body parts
|
||||
self.values = {} # values of metadata once resolved
|
||||
self.props = {} # resolved properties of this concept
|
||||
self.bnf = None
|
||||
self.log = get_logger("core." + self.__class__.__name__)
|
||||
self.init_log = get_logger("init.core." + self.__class__.__name__)
|
||||
|
||||
def __repr__(self):
|
||||
return f"({self.metadata.id}){self.metadata.name}"
|
||||
|
||||
def __eq__(self, other):
|
||||
|
||||
if isinstance(other, simplec):
|
||||
return self.name == other.name and self.body == other.body
|
||||
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if not isinstance(other, Concept):
|
||||
return False
|
||||
|
||||
# check the metadata
|
||||
for prop in PROPERTIES_TO_SERIALIZE:
|
||||
# print(prop) # use full to know which id does not match
|
||||
my_value = getattr(self.metadata, prop)
|
||||
other_value = getattr(other.metadata, prop)
|
||||
if isinstance(my_value, Concept) and isinstance(other_value, Concept):
|
||||
# need to check if circular references
|
||||
if id(self) == id(other):
|
||||
continue
|
||||
|
||||
sub_value = getattr(other_value.metadata, prop)
|
||||
while isinstance(sub_value, Concept):
|
||||
if id(self) == id(sub_value):
|
||||
return False # circular reference
|
||||
sub_value = getattr(sub_value.metadata, prop)
|
||||
|
||||
if my_value != other_value:
|
||||
return False
|
||||
|
||||
else:
|
||||
if my_value != other_value:
|
||||
return False
|
||||
|
||||
# checks the values
|
||||
if len(self.values) != len(other.values):
|
||||
return False
|
||||
|
||||
for metadata in self.values:
|
||||
if self.get_metadata_value(metadata) != other.get_metadata_value(metadata):
|
||||
return False
|
||||
|
||||
if len(self.props) != len(other.props):
|
||||
return False
|
||||
|
||||
for prop in self.props:
|
||||
if self.get_prop(prop) != other.get_prop(prop):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.metadata.name)
|
||||
|
||||
def __getattr__(self, item):
|
||||
# I have this complicated implementation because of the usage of Pickle
|
||||
|
||||
if 'props' in vars(self) and item in self.props:
|
||||
return self.props[item].value
|
||||
|
||||
name = self.name if 'metadata' in vars(self) else 'Concept'
|
||||
raise AttributeError(f"'{name}' concept has no attribute '{item}'")
|
||||
|
||||
def def_prop(self, prop_name: str, default_value=None):
|
||||
"""
|
||||
Adds a property to the metadata
|
||||
:param prop_name:
|
||||
:param default_value:
|
||||
:return:
|
||||
"""
|
||||
assert default_value is None or isinstance(default_value, str) # default properties will have to be evaluated
|
||||
self.metadata.props.append((prop_name, default_value))
|
||||
self.props[prop_name] = Property(prop_name, None) # do not set the default value
|
||||
|
||||
# why not setting props to the default values ?
|
||||
# Because it may not be the real values, as metadata.props need to be evaluated
|
||||
return self
|
||||
|
||||
def def_prop_by_index(self, index: int, value):
|
||||
"""
|
||||
Re-assign a value to a property (mainly used by ExactConceptParser)
|
||||
:param index:
|
||||
:param value:
|
||||
:return:
|
||||
"""
|
||||
assert value is None or isinstance(value, str) # default properties will have to be evaluated
|
||||
prop = self.metadata.props[index]
|
||||
self.metadata.props[index] = (prop[0], value)
|
||||
return self
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return self.metadata.name
|
||||
|
||||
@property
|
||||
def id(self):
|
||||
return self.metadata.id
|
||||
|
||||
@property
|
||||
def key(self):
|
||||
return self.metadata.key
|
||||
|
||||
def init_key(self, tokens=None):
|
||||
"""
|
||||
Create the key for this concept.
|
||||
Must be called only when the concept if fully initialized
|
||||
|
||||
The method is not called set_key to make sure that no other class set the key by mistake
|
||||
:param tokens:
|
||||
:return:
|
||||
"""
|
||||
if self.metadata.key is not None:
|
||||
return self
|
||||
|
||||
if tokens is None:
|
||||
tokens = list(Tokenizer(self.metadata.name))
|
||||
|
||||
variables = [p[0] for p in self.metadata.props] if len(core.utils.strip_tokens(tokens, True)) > 1 else []
|
||||
|
||||
key = ""
|
||||
first = True
|
||||
for token in tokens:
|
||||
if token.type == TokenKind.EOF:
|
||||
break
|
||||
if token.type == TokenKind.WHITESPACE:
|
||||
continue
|
||||
if not first:
|
||||
key += " " # spaces are normalized
|
||||
if token.value in variables:
|
||||
key += VARIABLE_PREFIX + str(variables.index(token.value))
|
||||
else:
|
||||
key += token.value[1:-1] if token.type == TokenKind.STRING else token.value
|
||||
first = False
|
||||
|
||||
self.metadata.key = key
|
||||
return self
|
||||
|
||||
@property
|
||||
def body(self):
|
||||
return self.values[ConceptParts.BODY] if ConceptParts.BODY in self.values else None
|
||||
|
||||
def add_codes(self, codes):
|
||||
"""
|
||||
Gets the ASTs for 'where', 'pre', 'post' and 'body'
|
||||
There ASTs are know when the concept is freshly parsed.
|
||||
So the values are kept in cache.
|
||||
|
||||
For concepts loaded from sdp, these ASTs must be created again
|
||||
TODO : Seems to be a service method. Can be put somewhere else
|
||||
:param codes:
|
||||
:return:
|
||||
"""
|
||||
if codes is None:
|
||||
return
|
||||
|
||||
for key in codes:
|
||||
self.compiled[key] = codes[key]
|
||||
|
||||
return self
|
||||
|
||||
def get_digest(self):
|
||||
"""
|
||||
Returns the digest of the event
|
||||
:return: hexa form of the sha256
|
||||
"""
|
||||
return hashlib.sha256(f"Concept:{self.to_dict(PROPERTIES_FOR_DIGEST)}".encode("utf-8")).hexdigest()
|
||||
|
||||
def to_dict(self, props_to_use=None):
|
||||
"""
|
||||
Returns a dict representing 'self'
|
||||
:return:
|
||||
"""
|
||||
|
||||
props_to_use = props_to_use or PROPERTIES_TO_SERIALIZE
|
||||
|
||||
props_as_dict = dict((prop, getattr(self.metadata, prop)) for prop in props_to_use)
|
||||
return props_as_dict
|
||||
|
||||
def from_dict(self, as_dict):
|
||||
"""
|
||||
Initializes 'self' from a dict
|
||||
:param as_dict:
|
||||
:return:
|
||||
"""
|
||||
for prop in PROPERTIES_TO_SERIALIZE:
|
||||
if prop in as_dict:
|
||||
if prop == "props":
|
||||
for name, value in as_dict[prop]:
|
||||
self.def_prop(name, value)
|
||||
else:
|
||||
setattr(self.metadata, prop, as_dict[prop])
|
||||
return self
|
||||
|
||||
def update_from(self, other):
|
||||
"""
|
||||
Update self using the properties of another concept
|
||||
This method is to mimic the class to instance pattern
|
||||
'other' is the class, the template, and 'self' is a new instance
|
||||
:param other:
|
||||
:return:
|
||||
"""
|
||||
if other is None:
|
||||
return self
|
||||
|
||||
if id(other) == id(self):
|
||||
return self
|
||||
|
||||
# update metadata
|
||||
self.from_dict(other.to_dict())
|
||||
|
||||
# update values
|
||||
for k, v in other.values.items():
|
||||
self.values[k] = v
|
||||
|
||||
# update properties
|
||||
for k, v in other.props.items():
|
||||
self.set_prop(k, v.value)
|
||||
|
||||
return self
|
||||
|
||||
def set_prop(self, prop_name: str, prop_value):
|
||||
"""Directly sets a value to a property"""
|
||||
self.props[prop_name] = Property(prop_name, prop_value)
|
||||
return self
|
||||
|
||||
def get_prop(self, prop_name: str):
|
||||
return self.props[prop_name].value
|
||||
|
||||
def set_metadata_value(self, metadata: ConceptParts, value):
|
||||
"""
|
||||
Set the resolved value of a metadata (not the metadata itself)
|
||||
:param metadata:
|
||||
:param value:
|
||||
:return:
|
||||
"""
|
||||
self.values[metadata] = value
|
||||
|
||||
def get_metadata_value(self, metadata: ConceptParts):
|
||||
"""
|
||||
Gets the resolved value of a metadata
|
||||
:param metadata:
|
||||
:return:
|
||||
"""
|
||||
return self.values[metadata]
|
||||
|
||||
def auto_init(self):
|
||||
"""
|
||||
Sometimes (for tests purposes)
|
||||
You don't need the full process of evaluation to to get the values of the concept
|
||||
Directly use the values of the metadata
|
||||
:return:
|
||||
"""
|
||||
|
||||
if self.metadata.is_evaluated:
|
||||
return self
|
||||
|
||||
for metadata in ConceptParts:
|
||||
value = getattr(self.metadata, metadata.value)
|
||||
if value is not None:
|
||||
self.values[metadata] = value
|
||||
|
||||
for prop, value in self.metadata.props:
|
||||
self.set_prop(prop, value)
|
||||
|
||||
self.metadata.is_evaluated = True
|
||||
return self
|
||||
|
||||
|
||||
class Property:
|
||||
"""
|
||||
Defines the variables of a concept
|
||||
It as its specific class, because from experience,
|
||||
property management is more complex than a key/value pair
|
||||
"""
|
||||
|
||||
def __init__(self, name, value):
|
||||
self.name = name
|
||||
self.value = value
|
||||
|
||||
def __repr__(self):
|
||||
return f"{self.name}={self.value}"
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, Property):
|
||||
return False
|
||||
|
||||
return self.name == other.name and self.value == other.value
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.name, self.value))
|
||||
|
||||
|
||||
@dataclass()
|
||||
class DoNotResolve:
|
||||
"""
|
||||
This class is used to that the metadata (or the prop) of the concept must not be evaluated
|
||||
thru sheerka.execute
|
||||
|
||||
For example, if you want to set a value to the BODY that will not change when
|
||||
when the concept will be evaluated,
|
||||
set concept.compiled[BODY] to DoNotResolve(value)
|
||||
"""
|
||||
value: object
|
||||
@@ -0,0 +1,203 @@
|
||||
import logging
|
||||
import time
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept
|
||||
from sdp.sheerkaDataProvider import Event
|
||||
|
||||
DEBUG_TAB_SIZE = 4
|
||||
|
||||
|
||||
class ExecutionContext:
|
||||
"""
|
||||
To keep track of the execution of a request
|
||||
"""
|
||||
|
||||
ids = {}
|
||||
|
||||
@staticmethod
|
||||
def get_id(event_digest):
|
||||
if event_digest in ExecutionContext.ids:
|
||||
ExecutionContext.ids[event_digest] += 1
|
||||
else:
|
||||
ExecutionContext.ids[event_digest] = 0
|
||||
return ExecutionContext.ids[event_digest]
|
||||
|
||||
def __init__(self,
|
||||
who,
|
||||
event: Event,
|
||||
sheerka,
|
||||
desc: str = None,
|
||||
**kwargs):
|
||||
|
||||
self._parent = None
|
||||
self._id = ExecutionContext.get_id(event.get_digest())
|
||||
self._tab = ""
|
||||
self._bag = {} # other variables
|
||||
self._start = 0
|
||||
self._stop = 0
|
||||
|
||||
self.who = who # who is asking
|
||||
self.event = event # what was the (original) trigger
|
||||
self.sheerka = sheerka # sheerka
|
||||
self.desc = desc # human description of what is going on
|
||||
self.children = []
|
||||
self.preprocess = None
|
||||
|
||||
self.inputs = {} # what was the parameters of the execution context
|
||||
self.values = {} # what was produced by the execution context
|
||||
|
||||
self.obj = kwargs.pop("obj", None)
|
||||
self.concepts = kwargs.pop("concepts", {})
|
||||
# update the other elements
|
||||
for k, v in kwargs.items():
|
||||
self._bag[k] = v
|
||||
|
||||
@property
|
||||
def elapsed(self):
|
||||
if self._start == 0:
|
||||
return 0
|
||||
|
||||
return (self._stop if self._stop > 0 else time.time_ns()) - self._start
|
||||
|
||||
@property
|
||||
def elapsed_str(self):
|
||||
nano_sec = self.elapsed
|
||||
dt = nano_sec / 1e6
|
||||
return f"{dt} ms" if dt < 1000 else f"{dt / 1000} s"
|
||||
|
||||
@property
|
||||
def id(self):
|
||||
return self._id
|
||||
|
||||
def __getattr__(self, item):
|
||||
if item in self._bag:
|
||||
return self._bag[item]
|
||||
|
||||
raise AttributeError(f"'ExecutionContext' object has no attribute '{item}'")
|
||||
|
||||
def __enter__(self):
|
||||
self._start = time.time_ns()
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self._stop = time.time_ns()
|
||||
|
||||
def __repr__(self):
|
||||
msg = f"ExecutionContext(who={self.who}, id={self._id}"
|
||||
if self.desc:
|
||||
msg += f", desc='{self.desc}'"
|
||||
msg += ")"
|
||||
return msg
|
||||
|
||||
def add_preprocess(self, name, **kwargs):
|
||||
preprocess = self.sheerka.new(BuiltinConcepts.EVALUATOR_PRE_PROCESS)
|
||||
preprocess.set_prop("name", name)
|
||||
for k, v in kwargs.items():
|
||||
preprocess.set_prop(k, v)
|
||||
|
||||
if not self.preprocess:
|
||||
self.preprocess = set()
|
||||
self.preprocess.add(preprocess)
|
||||
return self
|
||||
|
||||
def add_inputs(self, **kwargs):
|
||||
for k, v in kwargs.items():
|
||||
self.inputs[k] = v
|
||||
return self
|
||||
|
||||
def add_values(self, **kwargs):
|
||||
for k, v in kwargs.items():
|
||||
self.values[k] = v
|
||||
return self
|
||||
|
||||
def get_concept(self, key):
|
||||
# search in obj
|
||||
if isinstance(self.obj, Concept):
|
||||
if self.obj.key == key:
|
||||
return self.obj
|
||||
for prop in self.obj.props:
|
||||
if prop == key:
|
||||
value = self.obj.props[prop].value
|
||||
if isinstance(value, Concept):
|
||||
return value
|
||||
|
||||
# search in concepts
|
||||
if self.concepts:
|
||||
for k, c in self.concepts.items():
|
||||
if k == key:
|
||||
return c
|
||||
|
||||
return self.sheerka.get(key)
|
||||
|
||||
def new_concept(self, key, **kwargs):
|
||||
# search in obj
|
||||
if self.obj:
|
||||
if self.obj.key == key:
|
||||
return self.sheerka.new_from_template(self.obj, key, **kwargs)
|
||||
for prop in self.obj.props:
|
||||
if prop == key:
|
||||
value = self.obj.props[prop].value
|
||||
if isinstance(value, Concept):
|
||||
return self.sheerka.new_from_template(value, key, **kwargs)
|
||||
else:
|
||||
return value
|
||||
|
||||
if self.concepts:
|
||||
for k, c in self.concepts.items():
|
||||
if k == key:
|
||||
return self.sheerka.new_from_template(c, key, **kwargs)
|
||||
|
||||
return self.sheerka.new(key, **kwargs)
|
||||
|
||||
def push(self, who=None, desc=None, **kwargs):
|
||||
who = who or self.who
|
||||
_kwargs = {"obj": self.obj, "concepts": self.concepts}
|
||||
_kwargs.update(self._bag)
|
||||
_kwargs.update(kwargs)
|
||||
new = ExecutionContext(
|
||||
who,
|
||||
self.event,
|
||||
self.sheerka,
|
||||
desc,
|
||||
**_kwargs,
|
||||
)
|
||||
new._parent = self
|
||||
new._tab = self._tab + " " * DEBUG_TAB_SIZE
|
||||
new.preprocess = self.preprocess
|
||||
|
||||
self.children.append(new)
|
||||
return new
|
||||
|
||||
def log_new(self, logger):
|
||||
logger.debug(f"[{self._id:2}]" + self._tab + str(self))
|
||||
|
||||
def log(self, logger, message, who=None):
|
||||
logger.debug(f"[{self._id:2}]" + self._tab + (f"[{who}] " if who else "") + str(message))
|
||||
|
||||
def log_error(self, logger, message, who=None):
|
||||
logger.exception(f"[{self._id:2}]" + self._tab + (f"[{who}] " if who else "") + str(message))
|
||||
|
||||
def log_result(self, logger, return_values):
|
||||
if not logger.isEnabledFor(logging.DEBUG):
|
||||
return
|
||||
|
||||
if len(return_values) == 0:
|
||||
logger.debug(self._tab + "No return value")
|
||||
|
||||
for r in return_values:
|
||||
to_str = self.return_value_to_str(r)
|
||||
logger.debug(f"[{self._id:2}]" + self._tab + "-> " + to_str)
|
||||
|
||||
def to_dict(self):
|
||||
from core.sheerka_transform import SheerkaTransform
|
||||
st = SheerkaTransform(self.sheerka)
|
||||
return st.to_dict(self)
|
||||
|
||||
@staticmethod
|
||||
def return_value_to_str(r):
|
||||
value = str(r.value)
|
||||
if len(value) > 50:
|
||||
value = value[:47] + "..."
|
||||
to_str = f"ReturnValue(who={r.who}, status={r.status}, value={value})"
|
||||
return to_str
|
||||
@@ -0,0 +1,600 @@
|
||||
from core.builtin_concepts import BuiltinConcepts, ErrorConcept, ReturnValueConcept, BuiltinErrors, BuiltinUnique, \
|
||||
UnknownConcept
|
||||
from core.concept import Concept, ConceptParts, PROPERTIES_FOR_NEW
|
||||
from core.sheerka.ExecutionContext import ExecutionContext
|
||||
from core.sheerka.SheerkaCreateNewConcept import SheerkaCreateNewConcept
|
||||
from core.sheerka.SheerkaDump import SheerkaDump
|
||||
from core.sheerka.SheerkaEvaluateConcept import SheerkaEvaluateConcept
|
||||
from core.sheerka.SheerkaExecute import SheerkaExecute
|
||||
from core.sheerka.SheerkaSetsManager import SheerkaSetsManager
|
||||
from sdp.sheerkaDataProvider import SheerkaDataProvider, Event
|
||||
import core.utils
|
||||
import core.builtin_helpers
|
||||
|
||||
from core.sheerka_logger import console_handler
|
||||
|
||||
import logging
|
||||
|
||||
# CONCEPT_EVALUATION_STEPS = [
|
||||
# BuiltinConcepts.BEFORE_EVALUATION,
|
||||
# BuiltinConcepts.EVALUATION,
|
||||
# BuiltinConcepts.AFTER_EVALUATION]
|
||||
|
||||
CONCEPT_LEXER_PARSER_CLASS = "parsers.ConceptLexerParser.ConceptLexerParser"
|
||||
|
||||
|
||||
class Sheerka(Concept):
|
||||
"""
|
||||
Main controller for the project
|
||||
"""
|
||||
|
||||
CONCEPTS_ENTRY = "All_Concepts" # to store all the concepts
|
||||
CONCEPTS_BY_ID_ENTRY = "Concepts_By_ID"
|
||||
CONCEPTS_DEFINITIONS_ENTRY = "Concepts_Definitions" # to store definitions (bnf) of concepts
|
||||
BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts" # sequential key for builtin concepts
|
||||
USER_CONCEPTS_KEYS = "User_Concepts" # sequential key for user defined concepts
|
||||
|
||||
def __init__(self, skip_builtins_in_db=False, debug=False, loggers=None):
|
||||
self.init_logging(debug, loggers)
|
||||
|
||||
super().__init__(BuiltinConcepts.SHEERKA, True, True, BuiltinConcepts.SHEERKA)
|
||||
self.log.debug("Starting Sheerka.")
|
||||
|
||||
# cache of the most used concepts
|
||||
# Note that these are only templates
|
||||
# They are used as a footprint for instantiation
|
||||
# Except of source when the concept is supposed to be unique
|
||||
# key is the key of the concept (not the name or the id)
|
||||
self.cache_by_key = {}
|
||||
self.cache_by_id = {}
|
||||
|
||||
# cache for concept definitions,
|
||||
# Primarily used for unit test that does not have access to sdp
|
||||
self.concepts_definition_cache = {}
|
||||
|
||||
#
|
||||
# cache for concepts grammars
|
||||
# a grammar is a resolved BNF
|
||||
self.concepts_grammars = {}
|
||||
|
||||
# a concept can be instantiated
|
||||
# ex: File is a concept, but File('foo.txt') is an instance
|
||||
# TODO: manage contexts
|
||||
self.instances = []
|
||||
|
||||
# List of the known rules by the system
|
||||
# ex: hello => say('hello')
|
||||
self.rules = []
|
||||
|
||||
self.sdp: SheerkaDataProvider = None # SheerkaDataProvider
|
||||
self.builtin_cache = {} # cache for builtin concepts
|
||||
self.parsers = {} # cache for builtin parsers
|
||||
self.evaluators = [] # cache for builtin evaluators
|
||||
|
||||
self.evaluators_prefix: str = None
|
||||
self.parsers_prefix: str = None
|
||||
|
||||
self.skip_builtins_in_db = skip_builtins_in_db
|
||||
|
||||
self.execute_handler = SheerkaExecute(self)
|
||||
self.create_new_concept_handler = SheerkaCreateNewConcept(self)
|
||||
self.dump_handler = SheerkaDump(self)
|
||||
self.sets_handler = SheerkaSetsManager(self)
|
||||
self.evaluate_concept_handler = SheerkaEvaluateConcept(self)
|
||||
|
||||
def initialize(self, root_folder: str = None):
|
||||
"""
|
||||
Starting Sheerka
|
||||
Loads the current configuration
|
||||
Notes that when it's the first time, it also create the needed working folders
|
||||
:param root_folder: root configuration folder
|
||||
:return: ReturnValue(Success or Error)
|
||||
"""
|
||||
|
||||
try:
|
||||
self.sdp = SheerkaDataProvider(root_folder)
|
||||
if self.sdp.first_time:
|
||||
self.sdp.set_key(self.USER_CONCEPTS_KEYS, 1000)
|
||||
|
||||
event = Event("Initializing Sheerka.")
|
||||
self.sdp.save_event(event)
|
||||
exec_context = ExecutionContext(self.key, event, self)
|
||||
|
||||
self.initialize_builtin_concepts()
|
||||
self.initialize_builtin_parsers()
|
||||
self.initialize_builtin_evaluators()
|
||||
self.initialize_concepts_definitions(exec_context)
|
||||
|
||||
except IOError as e:
|
||||
return ReturnValueConcept(self, False, self.get(BuiltinConcepts.ERROR), e)
|
||||
|
||||
return ReturnValueConcept(self, True, self)
|
||||
|
||||
def initialize_builtin_concepts(self):
|
||||
"""
|
||||
Initializes the builtin concepts
|
||||
:return: None
|
||||
"""
|
||||
self.init_log.debug("Initializing builtin concepts")
|
||||
builtins_classes = self.get_builtins_classes_as_dict()
|
||||
|
||||
# this all initialization of the builtins seems to be little bit complicated
|
||||
# why do we need to update it from DB ?
|
||||
for key in BuiltinConcepts:
|
||||
concept = self if key == BuiltinConcepts.SHEERKA \
|
||||
else builtins_classes[str(key)]() if str(key) in builtins_classes \
|
||||
else Concept(key, True, False, key)
|
||||
|
||||
if key in BuiltinUnique:
|
||||
concept.metadata.is_unique = True
|
||||
concept.metadata.is_evaluated = True
|
||||
|
||||
if not concept.metadata.is_unique and str(key) in builtins_classes:
|
||||
self.builtin_cache[key] = builtins_classes[str(key)]
|
||||
|
||||
if not self.skip_builtins_in_db:
|
||||
from_db = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.metadata.key)
|
||||
if from_db is None:
|
||||
self.init_log.debug(f"'{concept.name}' concept is not found in db. Adding.")
|
||||
self.set_id_if_needed(concept, True)
|
||||
self.sdp.add("init", self.CONCEPTS_ENTRY, concept, use_ref=True)
|
||||
else:
|
||||
self.init_log.debug(f"Found concept '{from_db}' in db. Updating.")
|
||||
concept.update_from(from_db)
|
||||
|
||||
self.add_in_cache(concept)
|
||||
|
||||
def initialize_builtin_parsers(self):
|
||||
"""
|
||||
Init the parsers
|
||||
:return:
|
||||
"""
|
||||
core.utils.init_package_import("parsers")
|
||||
base_class = core.utils.get_class("parsers.BaseParser.BaseParser")
|
||||
for parser in core.utils.get_sub_classes("parsers", base_class):
|
||||
if parser.__module__ == base_class.__module__:
|
||||
continue
|
||||
|
||||
self.init_log.debug(f"Adding builtin parser '{parser.__name__}'")
|
||||
self.parsers[core.utils.get_full_qualified_name(parser)] = parser
|
||||
|
||||
def initialize_builtin_evaluators(self):
|
||||
"""
|
||||
Init the evaluators
|
||||
:return:
|
||||
"""
|
||||
core.utils.init_package_import("evaluators")
|
||||
for evaluator in core.utils.get_sub_classes("evaluators", "evaluators.BaseEvaluator.OneReturnValueEvaluator"):
|
||||
self.init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
|
||||
self.evaluators.append(evaluator)
|
||||
|
||||
for evaluator in core.utils.get_sub_classes("evaluators", "evaluators.BaseEvaluator.AllReturnValuesEvaluator"):
|
||||
self.init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
|
||||
self.evaluators.append(evaluator)
|
||||
|
||||
def initialize_concepts_definitions(self, execution_context):
|
||||
self.init_log.debug("Initializing concepts definitions")
|
||||
definitions = self.sdp.get_safe(self.CONCEPTS_DEFINITIONS_ENTRY, load_origin=False)
|
||||
|
||||
if definitions is None:
|
||||
self.init_log.debug("No BNF defined")
|
||||
return
|
||||
|
||||
lexer_parser = self.parsers[CONCEPT_LEXER_PARSER_CLASS]()
|
||||
ret_val = lexer_parser.initialize(execution_context, definitions)
|
||||
if not ret_val.status:
|
||||
self.init_log.error("Failed to initialize concepts definitions " + str(ret_val.body))
|
||||
return
|
||||
|
||||
self.concepts_grammars = lexer_parser.concepts_grammars
|
||||
|
||||
def reset_cache(self, filter_to_use=None):
|
||||
"""
|
||||
reset the different cache that exists
|
||||
:param filter_to_use:
|
||||
:return:
|
||||
"""
|
||||
if filter_to_use is None:
|
||||
self.cache_by_key = {}
|
||||
self.cache_by_id = {}
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
|
||||
return self
|
||||
|
||||
def evaluate_user_input(self, text: str, user_name="kodjo"):
|
||||
"""
|
||||
Note to KSI: If you try to add execution context to this function,
|
||||
You may end in an infinite loop
|
||||
:param text:
|
||||
:param user_name:
|
||||
:return:
|
||||
"""
|
||||
self.log.debug(f"Processing user input '{text}', {user_name=}.")
|
||||
event = Event(text, user_name)
|
||||
evt_digest = self.sdp.save_event(event)
|
||||
self.log.debug(f"{evt_digest=}")
|
||||
|
||||
with ExecutionContext(self.key, event, self, f"Evaluating '{text}'") as execution_context:
|
||||
user_input = self.ret(self.name, True, self.new(BuiltinConcepts.USER_INPUT, body=text, user_name=user_name))
|
||||
reduce_requested = self.ret(self.name, True, self.new(BuiltinConcepts.REDUCE_REQUESTED))
|
||||
|
||||
steps = [
|
||||
BuiltinConcepts.BEFORE_PARSING,
|
||||
BuiltinConcepts.PARSING,
|
||||
BuiltinConcepts.AFTER_PARSING,
|
||||
BuiltinConcepts.BEFORE_EVALUATION,
|
||||
BuiltinConcepts.EVALUATION,
|
||||
BuiltinConcepts.AFTER_EVALUATION
|
||||
]
|
||||
|
||||
ret = self.execute(execution_context, [user_input, reduce_requested], steps)
|
||||
execution_context.add_values(return_values=ret)
|
||||
|
||||
if not self.skip_builtins_in_db:
|
||||
self.sdp.save_result(execution_context)
|
||||
return ret
|
||||
|
||||
def execute(self, execution_context, return_values, execution_steps, logger=None):
|
||||
"""
|
||||
Executes process for all initial contexts
|
||||
:param execution_context:
|
||||
:param return_values:
|
||||
:param execution_steps:
|
||||
:param logger: logger to use (if not directly called by sheerka)
|
||||
:return:
|
||||
"""
|
||||
return self.execute_handler.execute(execution_context, return_values, execution_steps, logger)
|
||||
|
||||
def set_id_if_needed(self, obj: Concept, is_builtin: bool):
|
||||
"""
|
||||
Set the key for the concept if needed
|
||||
For test purpose only !!!!!
|
||||
:param obj:
|
||||
:param is_builtin:
|
||||
:return:
|
||||
"""
|
||||
if obj.metadata.id is not None:
|
||||
return
|
||||
|
||||
entry = self.BUILTIN_CONCEPTS_KEYS if is_builtin else self.USER_CONCEPTS_KEYS
|
||||
obj.metadata.id = self.sdp.get_next_key(entry)
|
||||
self.log.debug(f"Setting id '{obj.metadata.id}' to concept '{obj.metadata.name}'.")
|
||||
|
||||
def create_new_concept(self, context, concept: Concept, logger=None):
|
||||
"""
|
||||
Adds a new concept to the system
|
||||
:param context:
|
||||
:param concept: DefConceptNode
|
||||
:param logger
|
||||
:return: digest of the new concept
|
||||
"""
|
||||
|
||||
return self.create_new_concept_handler.create_new_concept(context, concept, logger)
|
||||
|
||||
def add_concept_to_set(self, context, concept, concept_set, logger=None):
|
||||
"""
|
||||
Add an entry in sdp to tell that concept isa concept_set
|
||||
:param context:
|
||||
:param concept:
|
||||
:param concept_set:
|
||||
:param logger:
|
||||
:return:
|
||||
"""
|
||||
return self.sets_handler.add_concept_to_set(context, concept, concept_set, logger)
|
||||
|
||||
def get_set_elements(self, concept):
|
||||
"""
|
||||
Concept is supposed to be a set
|
||||
Returns all elements if the set
|
||||
:param concept:
|
||||
:return:
|
||||
"""
|
||||
|
||||
return self.sets_handler.get_set_elements(concept)
|
||||
|
||||
def evaluate_concept(self, context, concept: Concept, logger=None):
|
||||
"""
|
||||
Evaluation a concept
|
||||
It means that if the where clause is True, will evaluate the body
|
||||
:param context:
|
||||
:param concept:
|
||||
:param logger:
|
||||
:return: value of the evaluation or error
|
||||
"""
|
||||
return self.evaluate_concept_handler.evaluate_concept(context, concept, logger)
|
||||
|
||||
def add_in_cache(self, concept: Concept):
|
||||
"""
|
||||
Adds a concept template in cache.
|
||||
The cache is used as a proxy before looking at sdp
|
||||
:param concept:
|
||||
:return:
|
||||
"""
|
||||
|
||||
# sanity check
|
||||
if concept.key is None:
|
||||
concept.init_key()
|
||||
|
||||
if concept.key is None:
|
||||
raise KeyError()
|
||||
|
||||
self.cache_by_key[concept.key] = concept
|
||||
|
||||
if concept.id:
|
||||
self.cache_by_id[concept.id] = concept
|
||||
|
||||
return concept
|
||||
|
||||
def get(self, concept_key, concept_id=None):
|
||||
"""
|
||||
Tries to find a concept
|
||||
What is return must be used a template for another concept.
|
||||
You must not modify the returned concept
|
||||
:param concept_key: key of the concept
|
||||
:param concept_id: when multiple concepts with the same key, use the id
|
||||
:return:
|
||||
"""
|
||||
|
||||
if concept_key is None:
|
||||
return ErrorConcept("Concept key is undefined.")
|
||||
|
||||
if isinstance(concept_key, BuiltinConcepts):
|
||||
concept_key = str(concept_key)
|
||||
|
||||
# first search in cache
|
||||
result = self.cache_by_key[concept_key] if concept_key in self.cache_by_key else \
|
||||
self.sdp.get_safe(self.CONCEPTS_ENTRY, concept_key)
|
||||
|
||||
if result and (concept_id is None or not isinstance(result, list)):
|
||||
return result
|
||||
|
||||
if isinstance(result, list):
|
||||
if concept_id:
|
||||
for c in result:
|
||||
if c.id == concept_id:
|
||||
return c
|
||||
else:
|
||||
return result
|
||||
|
||||
metadata = [("key", concept_key), ("id", concept_id)] if concept_id else ("key", concept_key)
|
||||
return self._get_unknown(metadata)
|
||||
|
||||
def get_by_id(self, concept_id):
|
||||
if concept_id is None:
|
||||
return ErrorConcept("Concept id is undefined.")
|
||||
|
||||
# first search in cache
|
||||
result = self.cache_by_id[concept_id] if concept_id in self.cache_by_id else \
|
||||
self.sdp.get_safe(self.CONCEPTS_BY_ID_ENTRY, concept_id)
|
||||
|
||||
return result or self._get_unknown(('id', concept_id))
|
||||
|
||||
def get_concept_definition(self):
|
||||
if self.concepts_definition_cache:
|
||||
return self.concepts_definition_cache
|
||||
|
||||
self.concepts_definition_cache = self.sdp.get_safe(
|
||||
self.CONCEPTS_DEFINITIONS_ENTRY,
|
||||
load_origin=False) or {}
|
||||
return self.concepts_definition_cache
|
||||
|
||||
def new(self, concept_key, **kwargs):
|
||||
"""
|
||||
Returns an instance of a new concept
|
||||
When the concept is supposed to be unique, returns the same instance
|
||||
:param concept_key:
|
||||
:param kwargs:
|
||||
:return:
|
||||
"""
|
||||
if isinstance(concept_key, tuple):
|
||||
concept_key, concept_id = concept_key[0], concept_key[1]
|
||||
else:
|
||||
concept_id = None
|
||||
|
||||
template = self.get(concept_key, concept_id)
|
||||
|
||||
# manage concept not found
|
||||
if self.isinstance(template, BuiltinConcepts.UNKNOWN_CONCEPT) and \
|
||||
concept_key != BuiltinConcepts.UNKNOWN_CONCEPT:
|
||||
return template
|
||||
|
||||
if isinstance(template, list):
|
||||
# if template is a list, it means that there a multiple concepts under the same key
|
||||
concepts = [self.new_from_template(t, concept_key, **kwargs) for t in template]
|
||||
return concepts
|
||||
else:
|
||||
return self.new_from_template(template, concept_key, **kwargs)
|
||||
|
||||
def new_from_template(self, template, key, **kwargs):
|
||||
# manage singleton
|
||||
if template.metadata.is_unique:
|
||||
return template
|
||||
|
||||
# otherwise, create another instance
|
||||
concept = self.builtin_cache[key]() if key in self.builtin_cache else Concept()
|
||||
concept.update_from(template)
|
||||
|
||||
if len(kwargs) == 0:
|
||||
return concept
|
||||
|
||||
# update the properties, values, attributes
|
||||
# Not quite sure that this is the correct process order
|
||||
for k, v in kwargs.items():
|
||||
if k in concept.props:
|
||||
concept.set_prop(k, v)
|
||||
elif k in PROPERTIES_FOR_NEW:
|
||||
concept.values[ConceptParts(k)] = v
|
||||
elif hasattr(concept, k):
|
||||
setattr(concept, k, v)
|
||||
else:
|
||||
return self.new(BuiltinConcepts.UNKNOWN_PROPERTY, body=k, concept=concept)
|
||||
|
||||
# TODO : add the concept to the list of known concepts (self.instances)
|
||||
concept.metadata.is_evaluated = True
|
||||
return concept
|
||||
|
||||
def ret(self, who: str, status: bool, value, message=None, parents=None):
|
||||
"""
|
||||
Creates and returns a ReturnValue concept
|
||||
:param who:
|
||||
:param status:
|
||||
:param value:
|
||||
:param message:
|
||||
:param parents:
|
||||
:return:
|
||||
"""
|
||||
return self.new(
|
||||
BuiltinConcepts.RETURN_VALUE,
|
||||
who=who,
|
||||
status=status,
|
||||
value=value,
|
||||
message=message,
|
||||
parents=parents)
|
||||
|
||||
def value(self, obj, reduce_simple_list=False):
|
||||
if obj is None:
|
||||
return None
|
||||
|
||||
if hasattr(obj, "get_value"):
|
||||
return obj.get_value()
|
||||
|
||||
if not isinstance(obj, Concept):
|
||||
return obj
|
||||
|
||||
if obj.body is None:
|
||||
return obj
|
||||
|
||||
if reduce_simple_list and (isinstance(obj.body, list) or isinstance(obj.body, set)) and len(obj.body) == 1:
|
||||
body_to_use = obj.body[0]
|
||||
else:
|
||||
body_to_use = obj.body
|
||||
|
||||
return self.value(body_to_use)
|
||||
|
||||
def get_values(self, objs):
|
||||
if not (isinstance(objs, list) or
|
||||
self.isinstance(objs, BuiltinConcepts.LIST) or
|
||||
self.isinstance(objs, BuiltinConcepts.ENUMERATION)):
|
||||
objs = [objs]
|
||||
|
||||
return (self.value(obj) for obj in objs)
|
||||
|
||||
def is_success(self, obj):
|
||||
if isinstance(obj, bool): # quick win
|
||||
return obj
|
||||
|
||||
if isinstance(obj, ReturnValueConcept):
|
||||
return obj.status
|
||||
|
||||
if isinstance(obj, Concept) and obj.metadata.is_builtin and obj.key in BuiltinErrors:
|
||||
return False
|
||||
|
||||
return obj
|
||||
|
||||
def is_known(self, obj):
|
||||
if not isinstance(obj, Concept):
|
||||
return True
|
||||
|
||||
return obj.key != str(BuiltinConcepts.UNKNOWN_CONCEPT)
|
||||
|
||||
def isinstance(self, a, b):
|
||||
"""
|
||||
return true if the concept a is an instance of the concept b
|
||||
:param a:
|
||||
:param b:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if isinstance(a, BuiltinConcepts): # common KSI error ;-)
|
||||
raise SyntaxError("Remember that the first parameter of isinstance MUST be a concept")
|
||||
|
||||
if not isinstance(a, Concept):
|
||||
return False
|
||||
|
||||
b_key = b.key if isinstance(b, Concept) else str(b)
|
||||
|
||||
return a.key == b_key
|
||||
|
||||
def isa(self, a, b):
|
||||
return self.sets_handler.isa(a, b)
|
||||
|
||||
def isagroup(self, concept):
|
||||
return self.sets_handler.isagroup(concept)
|
||||
|
||||
def get_evaluator_name(self, name):
|
||||
if self.evaluators_prefix is None:
|
||||
base_evaluator_class = core.utils.get_class("evaluators.BaseEvaluator.BaseEvaluator")
|
||||
self.evaluators_prefix = base_evaluator_class.PREFIX
|
||||
|
||||
return self.evaluators_prefix + name
|
||||
|
||||
def get_parser_name(self, name):
|
||||
if self.parsers_prefix is None:
|
||||
base_parser_class = core.utils.get_class("parsers.BaseParser.BaseParser")
|
||||
self.parsers_prefix = base_parser_class.PREFIX
|
||||
|
||||
return self.parsers_prefix + name
|
||||
|
||||
def concepts(self):
|
||||
res = []
|
||||
lst = self.sdp.list(self.CONCEPTS_ENTRY)
|
||||
for item in lst:
|
||||
if isinstance(item, list):
|
||||
res.extend(item)
|
||||
else:
|
||||
res.append(item)
|
||||
|
||||
return sorted(res, key=lambda i: int(i.id))
|
||||
|
||||
def test(self):
|
||||
return f"I have access to Sheerka !"
|
||||
|
||||
def test_error(self):
|
||||
raise Exception("I can raise an error")
|
||||
|
||||
@staticmethod
|
||||
def _get_unknown(metadata):
|
||||
"""
|
||||
Returns the concept 'UnknownConcept' for a requested id or key
|
||||
Note that I don't call the new() method to prevent cyclic call
|
||||
:param metadata:
|
||||
:return:
|
||||
"""
|
||||
|
||||
# metadata is a list of tuple that contains the known metadata for this concept
|
||||
# ex : (key, 'not_found)
|
||||
# or
|
||||
# (id, invalid_id)
|
||||
#
|
||||
# the metadata can be a list, if several attributes where given
|
||||
# (key, 'not_found), (id, invalid_id)
|
||||
|
||||
unknown_concept = UnknownConcept()
|
||||
unknown_concept.set_metadata_value(ConceptParts.BODY, metadata)
|
||||
for meta in (metadata if isinstance(metadata, list) else [metadata]):
|
||||
unknown_concept.set_prop(meta[0], meta[1])
|
||||
unknown_concept.metadata.is_evaluated = True
|
||||
return unknown_concept
|
||||
|
||||
@staticmethod
|
||||
def get_builtins_classes_as_dict():
|
||||
res = {}
|
||||
for c in core.utils.get_classes("core.builtin_concepts"):
|
||||
if issubclass(c, Concept) and c != Concept:
|
||||
res[c().metadata.key] = c
|
||||
|
||||
return res
|
||||
|
||||
@staticmethod
|
||||
def init_logging(debug, loggers):
|
||||
core.sheerka_logger.set_enabled(loggers)
|
||||
if debug:
|
||||
# log_format = "%(asctime)s %(name)s [%(levelname)s] %(message)s"
|
||||
log_format = "%(asctime)s [%(levelname)s] %(message)s"
|
||||
log_level = logging.DEBUG
|
||||
else:
|
||||
log_format = "%(message)s"
|
||||
log_level = logging.INFO
|
||||
|
||||
logging.basicConfig(format=log_format, level=log_level, handlers=[console_handler])
|
||||
@@ -0,0 +1,99 @@
|
||||
from core.builtin_concepts import BuiltinConcepts, ErrorConcept
|
||||
from core.concept import Concept
|
||||
from sdp.sheerkaDataProvider import SheerkaDataProviderDuplicateKeyError
|
||||
|
||||
CONCEPT_LEXER_PARSER_CLASS = "parsers.ConceptLexerParser.ConceptLexerParser"
|
||||
|
||||
|
||||
class SheerkaCreateNewConcept:
|
||||
"""
|
||||
Manage the creation of a new concept
|
||||
"""
|
||||
|
||||
def __init__(self, sheerka):
|
||||
self.sheerka = sheerka
|
||||
self.logger_name = self.create_new_concept.__name__
|
||||
|
||||
def create_new_concept(self, context, concept: Concept, logger=None):
|
||||
"""
|
||||
Adds a new concept to the system
|
||||
:param context:
|
||||
:param concept: DefConceptNode
|
||||
:param logger
|
||||
:return: digest of the new concept
|
||||
"""
|
||||
|
||||
logger = logger or self.sheerka.log
|
||||
|
||||
concept.init_key()
|
||||
concepts_definitions = None
|
||||
init_ret_value = None
|
||||
|
||||
# checks for duplicate concepts
|
||||
# TODO checks if it exists in cache first
|
||||
|
||||
if self.sheerka.sdp.exists(self.sheerka.CONCEPTS_ENTRY, concept.key, concept.get_digest()):
|
||||
error = SheerkaDataProviderDuplicateKeyError(self.sheerka.CONCEPTS_ENTRY + "." + concept.key, concept)
|
||||
return self.sheerka.ret(
|
||||
self.logger_name,
|
||||
False,
|
||||
self.sheerka.new(BuiltinConcepts.CONCEPT_ALREADY_DEFINED, body=concept),
|
||||
error.args[0])
|
||||
|
||||
# set id before saving in db
|
||||
self.sheerka.set_id_if_needed(concept, False)
|
||||
|
||||
# add the BNF if known
|
||||
if concept.bnf:
|
||||
concepts_definitions = self.sheerka.get_concept_definition()
|
||||
concepts_definitions[concept] = concept.bnf
|
||||
|
||||
# check if it's a valid BNF or whether it breaks the known rules
|
||||
concept_lexer_parser = self.sheerka.parsers[CONCEPT_LEXER_PARSER_CLASS]()
|
||||
with context.push(self.sheerka.name, desc=f"Initializing concept definition for {concept}") as sub_context:
|
||||
sub_context.concepts[concept.key] = concept # the concept is not in the real cache yet
|
||||
sub_context.log_new(logger)
|
||||
init_ret_value = concept_lexer_parser.initialize(sub_context, concepts_definitions)
|
||||
sub_context.add_values(return_values=init_ret_value)
|
||||
if not init_ret_value.status:
|
||||
return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value))
|
||||
|
||||
# save the new concept in sdp
|
||||
try:
|
||||
# TODO : needs to make these calls atomic (or at least one single call)
|
||||
self.sheerka.sdp.add(
|
||||
context.event.get_digest(),
|
||||
self.sheerka.CONCEPTS_ENTRY,
|
||||
concept,
|
||||
use_ref=True)
|
||||
self.sheerka.sdp.add(
|
||||
context.event.get_digest(),
|
||||
self.sheerka.CONCEPTS_BY_ID_ENTRY,
|
||||
{concept.id: concept.get_digest()},
|
||||
is_ref=True)
|
||||
if concepts_definitions is not None:
|
||||
self.sheerka.sdp.set(
|
||||
context.event.get_digest(),
|
||||
self.sheerka.CONCEPTS_DEFINITIONS_ENTRY,
|
||||
concepts_definitions,
|
||||
use_ref=True)
|
||||
except SheerkaDataProviderDuplicateKeyError as error:
|
||||
context.log_error(logger, "Failed to create a new concept.", who=self.logger_name)
|
||||
return self.sheerka.ret(
|
||||
self.logger_name,
|
||||
False,
|
||||
self.sheerka.new(BuiltinConcepts.CONCEPT_ALREADY_DEFINED, body=concept),
|
||||
error.args[0])
|
||||
|
||||
# Updates the caches
|
||||
self.sheerka.cache_by_key[concept.key] = self.sheerka.sdp.get_safe(self.sheerka.CONCEPTS_ENTRY, concept.key)
|
||||
self.sheerka.cache_by_id[concept.id] = concept
|
||||
if init_ret_value is not None and init_ret_value.status:
|
||||
self.sheerka.concepts_grammars = init_ret_value.body
|
||||
|
||||
# process the return in needed
|
||||
ret = self.sheerka.ret(self.logger_name, True, self.sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
|
||||
return ret
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,44 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept
|
||||
|
||||
|
||||
class SheerkaDump:
|
||||
def __init__(self, sheerka):
|
||||
self.sheerka = sheerka
|
||||
|
||||
def dump_concepts(self):
|
||||
lst = self.sheerkasdp.list(self.sheerkaCONCEPTS_ENTRY)
|
||||
for item in lst:
|
||||
if hasattr(item, "__iter__"):
|
||||
for i in item:
|
||||
self.sheerkalog.info(i)
|
||||
else:
|
||||
self.sheerkalog.info(item)
|
||||
|
||||
def dump_definitions(self):
|
||||
defs = self.sheerkasdp.get(self.sheerkaCONCEPTS_DEFINITIONS_ENTRY)
|
||||
self.sheerkalog.info(defs)
|
||||
|
||||
def dump_desc(self, *concept_names):
|
||||
first = True
|
||||
for concept_name in concept_names:
|
||||
if isinstance(concept_name, Concept):
|
||||
concepts = concept_name
|
||||
else:
|
||||
concepts = self.sheerkaget(concept_name)
|
||||
if self.sheerkaisinstance(concepts, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
self.sheerkalog.error(f"Concept '{concept_name}' is unknown")
|
||||
return False
|
||||
|
||||
if not hasattr(concepts, "__iter__"):
|
||||
concepts = [concepts]
|
||||
|
||||
for c in concepts:
|
||||
if not first:
|
||||
self.sheerkalog.info("")
|
||||
self.sheerkalog.info(f"name : {c.name}")
|
||||
self.sheerkalog.info(f"bnf : {c.metadata.definition}")
|
||||
self.sheerkalog.info(f"key : {c.key}")
|
||||
self.sheerkalog.info(f"body : {c.body}")
|
||||
self.sheerkalog.info(f"digest : {c.get_digest()}")
|
||||
first = False
|
||||
@@ -0,0 +1,195 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept, DoNotResolve, ConceptParts
|
||||
import core.builtin_helpers
|
||||
|
||||
CONCEPT_EVALUATION_STEPS = [
|
||||
BuiltinConcepts.BEFORE_EVALUATION,
|
||||
BuiltinConcepts.EVALUATION,
|
||||
BuiltinConcepts.AFTER_EVALUATION]
|
||||
|
||||
|
||||
class SheerkaEvaluateConcept:
|
||||
def __init__(self, sheerka):
|
||||
self.sheerka = sheerka
|
||||
self.logger_name = self.evaluate_concept.__name__
|
||||
|
||||
def initialize_concept_asts(self, context, concept: Concept, logger=None):
|
||||
"""
|
||||
Updates the codes of the newly created concept
|
||||
Basically, it runs the parsers on all parts
|
||||
:param concept:
|
||||
:param context:
|
||||
:param logger:
|
||||
:return:
|
||||
"""
|
||||
steps = [BuiltinConcepts.BEFORE_PARSING, BuiltinConcepts.PARSING, BuiltinConcepts.AFTER_PARSING]
|
||||
for part_key in ConceptParts:
|
||||
if part_key in concept.compiled:
|
||||
continue
|
||||
|
||||
source = getattr(concept.metadata, part_key.value)
|
||||
if source is None or not isinstance(source, str):
|
||||
continue
|
||||
|
||||
if source.strip() == "":
|
||||
concept.compiled[part_key] = DoNotResolve(source)
|
||||
else:
|
||||
with context.push(desc=f"Initializing compiled for {part_key}") as sub_context:
|
||||
sub_context.log_new(logger)
|
||||
sub_context.add_inputs(source=source)
|
||||
to_parse = self.sheerka.ret(context.who, True,
|
||||
self.sheerka.new(BuiltinConcepts.USER_INPUT, body=source))
|
||||
res = self.sheerka.execute(sub_context, to_parse, steps, logger)
|
||||
concept.compiled[part_key] = res
|
||||
sub_context.add_values(return_values=res)
|
||||
|
||||
for prop, default_value in concept.metadata.props:
|
||||
if prop in concept.compiled:
|
||||
continue
|
||||
|
||||
if default_value is None or not isinstance(default_value, str):
|
||||
continue
|
||||
|
||||
if default_value.strip() == "":
|
||||
concept.compiled[prop] = DoNotResolve(default_value)
|
||||
else:
|
||||
with context.push(desc=f"Initializing AST for property {prop}") as sub_context:
|
||||
sub_context.log_new(logger)
|
||||
sub_context.add_inputs(source=default_value)
|
||||
to_parse = self.sheerka.ret(context.who, True,
|
||||
self.sheerka.new(BuiltinConcepts.USER_INPUT, body=default_value))
|
||||
res = self.sheerka.execute(context, to_parse, steps)
|
||||
concept.compiled[prop] = res
|
||||
sub_context.add_values(return_values=res)
|
||||
|
||||
# Updates the cache of concepts when possible
|
||||
if concept.key in self.sheerka.cache_by_key:
|
||||
entry = self.sheerka.cache_by_key[concept.key]
|
||||
if isinstance(entry, list):
|
||||
# TODO : manage when there are multiple entries
|
||||
pass
|
||||
else:
|
||||
self.sheerka.cache_by_key[concept.key].compiled = concept.compiled
|
||||
|
||||
def resolve(self, context, to_resolve, current_prop, current_concept, logger):
|
||||
if isinstance(to_resolve, DoNotResolve):
|
||||
return to_resolve.value
|
||||
|
||||
desc = f"Evaluating {current_prop} (concept={current_concept})"
|
||||
context.log(logger, desc, self.logger_name)
|
||||
with context.push(desc=desc, obj=current_concept) as sub_context:
|
||||
sub_context.log_new(logger)
|
||||
|
||||
# when it's a concept, evaluate it
|
||||
if isinstance(to_resolve, Concept) and \
|
||||
not context.sheerka.isinstance(to_resolve, BuiltinConcepts.RETURN_VALUE):
|
||||
evaluated = self.evaluate_concept(sub_context, to_resolve, logger)
|
||||
sub_context.add_values(return_values=evaluated)
|
||||
if evaluated.key == to_resolve.key:
|
||||
return evaluated
|
||||
else:
|
||||
error = evaluated
|
||||
|
||||
# otherwise, execute all return values to find out what is the value
|
||||
else:
|
||||
r = self.sheerka.execute(sub_context, to_resolve, CONCEPT_EVALUATION_STEPS, logger)
|
||||
one_r = core.builtin_helpers.expect_one(context, r)
|
||||
sub_context.add_values(return_values=one_r)
|
||||
if one_r.status:
|
||||
return one_r.value
|
||||
else:
|
||||
error = one_r.value
|
||||
|
||||
return self.sheerka.new(BuiltinConcepts.CONCEPT_EVAL_ERROR,
|
||||
body=error,
|
||||
concept=current_concept,
|
||||
property_name=current_prop)
|
||||
|
||||
def resolve_list(self, context, list_to_resolve, current_prop, current_concept, logger):
|
||||
"""When dealing with a list, there are two possibilities"""
|
||||
# It may be a list of ReturnValueConcept to execute (always the case for metadata)
|
||||
# or a list of single values (may be the case for properties)
|
||||
# in this latter case, all values are to be processed one by one and a list should be returned
|
||||
if len(list_to_resolve) == 0:
|
||||
return []
|
||||
|
||||
if self.sheerka.isinstance(list_to_resolve[0], BuiltinConcepts.RETURN_VALUE):
|
||||
return self.resolve(context, list_to_resolve, current_prop, current_concept, logger)
|
||||
|
||||
res = []
|
||||
for to_resolve in list_to_resolve:
|
||||
# sanity check
|
||||
if self.sheerka.isinstance(to_resolve, BuiltinConcepts.RETURN_VALUE):
|
||||
return self.sheerka.new(BuiltinConcepts.CONCEPT_EVAL_ERROR,
|
||||
body="Mix between real values and return values",
|
||||
concept=current_concept,
|
||||
property_name=current_prop)
|
||||
|
||||
r = self.resolve(context, to_resolve, current_prop, current_concept, logger)
|
||||
if self.sheerka.isinstance(r, BuiltinConcepts.CONCEPT_EVAL_ERROR):
|
||||
return r
|
||||
res.append(r)
|
||||
|
||||
return res
|
||||
|
||||
def evaluate_concept(self, context, concept: Concept, logger=None):
|
||||
"""
|
||||
Evaluation a concept
|
||||
It means that if the where clause is True, will evaluate the body
|
||||
:param context:
|
||||
:param concept:
|
||||
:param logger:
|
||||
:return: value of the evaluation or error
|
||||
"""
|
||||
|
||||
logger = logger or self.sheerka.log
|
||||
|
||||
if concept.metadata.is_evaluated:
|
||||
return concept
|
||||
|
||||
# WHERE condition should already be validated by the parser.
|
||||
# It's a mandatory condition for the concept before it can be recognized
|
||||
|
||||
#
|
||||
# TODO : Validate the PRE condition
|
||||
#
|
||||
|
||||
self.initialize_concept_asts(context, concept, logger)
|
||||
|
||||
# to make sure of the order, it don't use ConceptParts.get_parts()
|
||||
# props must be evaluated first
|
||||
all_metadata_to_eval = ["props", "where", "pre", "post", "body"]
|
||||
|
||||
for metadata_to_eval in all_metadata_to_eval:
|
||||
if metadata_to_eval == "props":
|
||||
for prop_name in (p for p in concept.props if p in concept.compiled):
|
||||
prop_ast = concept.compiled[prop_name]
|
||||
|
||||
if isinstance(prop_ast, list):
|
||||
# Do not send the current concept for the properties
|
||||
resolved = self.resolve_list(context, prop_ast, prop_name, None, logger)
|
||||
else:
|
||||
# Do not send the current concept for the properties
|
||||
resolved = self.resolve(context, prop_ast, prop_name, None, logger)
|
||||
if context.sheerka.isinstance(resolved, BuiltinConcepts.CONCEPT_EVAL_ERROR):
|
||||
resolved.set_prop("concept", concept) # since current concept was not sent
|
||||
return resolved
|
||||
else:
|
||||
concept.set_prop(prop_name, resolved)
|
||||
else:
|
||||
part_key = ConceptParts(metadata_to_eval)
|
||||
if part_key in concept.compiled and concept.compiled[part_key] is not None:
|
||||
metadata_ast = concept.compiled[part_key]
|
||||
resolved = self.resolve(context, metadata_ast, part_key, concept, logger)
|
||||
if context.sheerka.isinstance(resolved, BuiltinConcepts.CONCEPT_EVAL_ERROR):
|
||||
return resolved
|
||||
else:
|
||||
concept.values[part_key] = resolved
|
||||
|
||||
#
|
||||
# TODO : Validate the POST condition
|
||||
#
|
||||
|
||||
concept.init_key() # only does it if needed
|
||||
concept.metadata.is_evaluated = True
|
||||
return concept
|
||||
@@ -0,0 +1,254 @@
|
||||
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
|
||||
import core.utils
|
||||
|
||||
|
||||
class SheerkaExecute:
|
||||
"""
|
||||
Manage the execution of a process flow
|
||||
"""
|
||||
|
||||
def __init__(self, sheerka):
|
||||
self.sheerka = sheerka
|
||||
|
||||
def call_parsers(self, execution_context, return_values, logger=None):
|
||||
|
||||
# return_values must be a list
|
||||
if not isinstance(return_values, list):
|
||||
return_values = [return_values]
|
||||
|
||||
# first make the distinguish between what is for the parsers and what is not
|
||||
result = []
|
||||
to_process = []
|
||||
for r in return_values:
|
||||
if not r.status or not self.sheerka.isinstance(r.body, BuiltinConcepts.USER_INPUT):
|
||||
result.append(r)
|
||||
else:
|
||||
to_process.append(r)
|
||||
|
||||
if not to_process:
|
||||
return result
|
||||
|
||||
# keep track of the originals user inputs, as they need to be removed at the end
|
||||
user_inputs = to_process[:]
|
||||
|
||||
# group the parsers by priorities
|
||||
instantiated_parsers = [parser(sheerka=self.sheerka) for parser in self.sheerka.parsers.values()]
|
||||
grouped_parsers = {}
|
||||
for parser in [p for p in instantiated_parsers if p.enabled]:
|
||||
if logger:
|
||||
parser.log = logger
|
||||
grouped_parsers.setdefault(parser.priority, []).append(parser)
|
||||
sorted_priorities = sorted(grouped_parsers.keys(), reverse=True)
|
||||
|
||||
stop_processing = False
|
||||
for priority in sorted_priorities:
|
||||
inputs_for_this_group = to_process[:]
|
||||
|
||||
for parser in grouped_parsers[priority]:
|
||||
|
||||
return_value_success_found = False
|
||||
for return_value in inputs_for_this_group:
|
||||
|
||||
to_parse = return_value.body.body \
|
||||
if self.sheerka.isinstance(return_value.body, BuiltinConcepts.USER_INPUT) \
|
||||
else return_value.body
|
||||
|
||||
# if self.sheerka.log.isEnabledFor(logging.DEBUG):
|
||||
# debug_text = "'" + to_parse + "'" if isinstance(to_parse, str) \
|
||||
# else "'" + BaseParser.get_text_from_tokens(to_parse) + "' as tokens"
|
||||
# execution_context.log(logger or self.sheerka.log, f"Parsing {debug_text}")
|
||||
|
||||
with execution_context.push(desc=f"Parsing using {parser.name}") as sub_context:
|
||||
sub_context.add_inputs(to_parse=to_parse)
|
||||
res = parser.parse(sub_context, to_parse)
|
||||
if res is not None:
|
||||
if hasattr(res, "__iter__"):
|
||||
for r in res:
|
||||
if r is None:
|
||||
continue
|
||||
r.parents = [return_value]
|
||||
result.append(r)
|
||||
if self.sheerka.isinstance(r.body, BuiltinConcepts.PARSER_RESULT):
|
||||
to_process.append(r)
|
||||
if r.status:
|
||||
return_value_success_found = True
|
||||
|
||||
else:
|
||||
res.parents = [return_value]
|
||||
result.append(res)
|
||||
if self.sheerka.isinstance(res.body, BuiltinConcepts.PARSER_RESULT):
|
||||
to_process.append(res)
|
||||
if res.status:
|
||||
return_value_success_found = True
|
||||
sub_context.add_values(return_values=res)
|
||||
|
||||
if return_value_success_found:
|
||||
stop_processing = True
|
||||
break # Stop the other return_values (but not the other parsers with the same priority)
|
||||
|
||||
if stop_processing:
|
||||
break # Do not try the other priorities if a match is found
|
||||
|
||||
result = core.utils.remove_list_from_list(result, user_inputs)
|
||||
return result
|
||||
|
||||
def call_evaluators(self, execution_context, return_values, process_step, evaluation_context=None, logger=None):
|
||||
|
||||
# return_values must be a list
|
||||
if not isinstance(return_values, list):
|
||||
return_values = [return_values]
|
||||
|
||||
# Evaluation context are contexts that may modify the behaviour of the execution
|
||||
# For example, a concept to indicate that the value is not wanted
|
||||
# Or a concept to indicate that we want the letter form of the response
|
||||
# But first, they need to be transformed into return values
|
||||
if evaluation_context is None:
|
||||
evaluation_return_values = []
|
||||
else:
|
||||
evaluation_return_values = [self.sheerka.ret(execution_context.who, True, c) for c in evaluation_context]
|
||||
|
||||
# add the current step as part as the evaluation context
|
||||
evaluation_return_values.append(self.sheerka.ret(execution_context.who, True, self.sheerka.new(process_step)))
|
||||
|
||||
# the pool of return values are the mix
|
||||
return_values.extend(evaluation_return_values)
|
||||
|
||||
# group the evaluators by priority and sort them
|
||||
# The first one to be applied will be the one with the highest priority
|
||||
grouped_evaluators = {}
|
||||
instantiated_evaluators = [e_class() for e_class in self.sheerka.evaluators]
|
||||
|
||||
# pre-process evaluators if needed
|
||||
instantiated_evaluators = self._preprocess_evaluators(execution_context, instantiated_evaluators)
|
||||
|
||||
for evaluator in [e for e in instantiated_evaluators if e.enabled and process_step in e.steps]:
|
||||
if logger:
|
||||
evaluator.log = logger
|
||||
grouped_evaluators.setdefault(evaluator.priority, []).append(evaluator)
|
||||
|
||||
# order the groups by priority, the higher first
|
||||
sorted_priorities = sorted(grouped_evaluators.keys(), reverse=True)
|
||||
|
||||
# process
|
||||
iteration = 0
|
||||
while True:
|
||||
with execution_context.push(desc=f"iteration #{iteration}", iteration=iteration) as iteration_context:
|
||||
simple_digest = return_values[:]
|
||||
iteration_context.add_inputs(return_values=simple_digest)
|
||||
|
||||
for priority in sorted_priorities:
|
||||
|
||||
original_items = return_values[:]
|
||||
evaluated_items = []
|
||||
to_delete = []
|
||||
for evaluator in grouped_evaluators[priority]:
|
||||
evaluator = self._preprocess_evaluators(execution_context, evaluator.__class__()) # fresh copy
|
||||
|
||||
sub_context_desc = f"Evaluating using {evaluator.name} ({priority=})"
|
||||
with iteration_context.push(desc=sub_context_desc) as sub_context:
|
||||
sub_context.add_inputs(return_values=original_items)
|
||||
|
||||
# process evaluators that work on one simple return value at the time
|
||||
from evaluators.BaseEvaluator import OneReturnValueEvaluator
|
||||
if isinstance(evaluator, OneReturnValueEvaluator):
|
||||
debug_result = []
|
||||
for item in original_items:
|
||||
if evaluator.matches(sub_context, item):
|
||||
result = evaluator.eval(sub_context, item)
|
||||
if result is None:
|
||||
debug_result.append({"input": item, "return_value": None})
|
||||
continue
|
||||
|
||||
to_delete.append(item)
|
||||
if isinstance(result, list):
|
||||
evaluated_items.extend(result)
|
||||
elif isinstance(result, ReturnValueConcept):
|
||||
evaluated_items.append(result)
|
||||
else:
|
||||
error = self.sheerka.new(BuiltinConcepts.INVALID_RETURN_VALUE, body=result,
|
||||
evaluator=evaluator)
|
||||
result = self.sheerka.ret("sheerka.process", False, error, parents=[item])
|
||||
evaluated_items.append(result)
|
||||
debug_result.append({"input": item, "return_value": result})
|
||||
else:
|
||||
debug_result.append({"input": item, "return_value": "** No Match **"})
|
||||
sub_context.add_values(return_values=debug_result)
|
||||
|
||||
# process evaluators that work on all return values
|
||||
else:
|
||||
if evaluator.matches(sub_context, original_items):
|
||||
results = evaluator.eval(sub_context, original_items)
|
||||
if results is None:
|
||||
continue
|
||||
if not isinstance(results, list):
|
||||
results = [results]
|
||||
for result in results:
|
||||
evaluated_items.append(result)
|
||||
to_delete.extend(result.parents)
|
||||
sub_context.add_values(return_values=results)
|
||||
else:
|
||||
sub_context.add_values(return_values="** No Match **")
|
||||
|
||||
return_values = evaluated_items
|
||||
return_values.extend([item for item in original_items if item not in to_delete])
|
||||
|
||||
iteration_context.add_values(return_values=return_values[:])
|
||||
|
||||
# have we done something ?
|
||||
to_compare = return_values[:]
|
||||
if simple_digest == to_compare:
|
||||
break
|
||||
|
||||
# inc the iteration and continue
|
||||
iteration += 1
|
||||
|
||||
# remove all evaluation context that are not reduced
|
||||
return_values = core.utils.remove_list_from_list(return_values, evaluation_return_values)
|
||||
return return_values
|
||||
|
||||
def execute(self, execution_context, return_values, execution_steps, logger=None):
|
||||
"""
|
||||
Executes process for all initial contexts
|
||||
:param execution_context:
|
||||
:param return_values:
|
||||
:param execution_steps:
|
||||
:param logger: logger to use (if not directly called by sheerka)
|
||||
:return:
|
||||
"""
|
||||
|
||||
for step in execution_steps:
|
||||
copy = return_values[:] if hasattr(return_values, "__iter__") else [return_values]
|
||||
with execution_context.push(step=step, iteration=0, desc=f"{step=}", return_values=copy) as sub_context:
|
||||
sub_context.log(logger or self.sheerka.log, f"{step=}, context='{sub_context}'")
|
||||
|
||||
if step == BuiltinConcepts.PARSING:
|
||||
return_values = self.call_parsers(sub_context, return_values, logger)
|
||||
else:
|
||||
return_values = self.call_evaluators(sub_context, return_values, step, None, logger)
|
||||
|
||||
if copy != return_values:
|
||||
sub_context.log_result(logger or self.sheerka.log, return_values)
|
||||
|
||||
sub_context.add_values(return_values=return_values)
|
||||
|
||||
return return_values
|
||||
|
||||
def _preprocess_evaluators(self, context, evaluators):
|
||||
if not context.preprocess:
|
||||
return evaluators
|
||||
|
||||
if not hasattr(evaluators, "__iter__"):
|
||||
single_one = True
|
||||
evaluators = [evaluators]
|
||||
else:
|
||||
single_one = False
|
||||
|
||||
for preprocess in context.preprocess:
|
||||
for e in evaluators:
|
||||
if preprocess.props["name"].value == e.name:
|
||||
for prop, value in preprocess.props.items():
|
||||
if prop == "name":
|
||||
continue
|
||||
if hasattr(e, prop):
|
||||
setattr(e, prop, value.value)
|
||||
return evaluators[0] if single_one else evaluators
|
||||
@@ -0,0 +1,83 @@
|
||||
from core.builtin_concepts import BuiltinConcepts, ErrorConcept
|
||||
from core.concept import Concept
|
||||
|
||||
GROUP_PREFIX = 'All_'
|
||||
|
||||
|
||||
class SheerkaSetsManager:
|
||||
def __init__(self, sheerka):
|
||||
self.sheerka = sheerka
|
||||
self.logger_name = self.add_concept_to_set.__name__
|
||||
|
||||
def add_concept_to_set(self, context, concept, concept_set, logger=None):
|
||||
"""
|
||||
Add an entry in sdp to tell that concept isa concept_set
|
||||
:param context:
|
||||
:param concept:
|
||||
:param concept_set:
|
||||
:param logger:
|
||||
:return:
|
||||
"""
|
||||
logger = logger or self.sheerka.log
|
||||
|
||||
context.log(logger, f"Adding concept {concept} to set {concept_set}", who=self.logger_name)
|
||||
|
||||
assert concept.id
|
||||
assert concept_set.id
|
||||
|
||||
try:
|
||||
ret = self.sheerka.sdp.add_unique(context.event.get_digest(), GROUP_PREFIX + concept_set.id, concept.id)
|
||||
if ret == (None, None): # concept already in set
|
||||
return self.sheerka.ret(
|
||||
self.logger_name,
|
||||
False,
|
||||
self.sheerka.new(BuiltinConcepts.CONCEPT_ALREADY_IN_SET, body=concept, concept_set=concept_set))
|
||||
else:
|
||||
return self.sheerka.ret(self.logger_name, True, self.sheerka.new(BuiltinConcepts.SUCCESS))
|
||||
except Exception as error:
|
||||
context.log_error(logger, "Failed to add to set.", who=self.logger_name)
|
||||
return self.sheerka.ret(self.logger_name, False, ErrorConcept(error), error.args[0])
|
||||
|
||||
def get_set_elements(self, concept):
|
||||
"""
|
||||
Concept is supposed to be a set
|
||||
Returns all elements if the set
|
||||
:param concept:
|
||||
:return:
|
||||
"""
|
||||
|
||||
assert concept.id
|
||||
|
||||
ids = self.sheerka.sdp.get_safe(GROUP_PREFIX + concept.id)
|
||||
if ids is None:
|
||||
return self.sheerka.new(BuiltinConcepts.NOT_A_SET, body=concept)
|
||||
|
||||
elements = [self.sheerka.get_by_id(element_id) for element_id in ids]
|
||||
return elements
|
||||
|
||||
def isa(self, a, b):
|
||||
"""
|
||||
return true if the concept a is a b
|
||||
Will handle when the keyword isa will be implemented
|
||||
:param a:
|
||||
:param b:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if isinstance(a, BuiltinConcepts): # common KSI error ;-)
|
||||
raise SyntaxError("Remember that the first parameter of isinstance MUST be a concept")
|
||||
|
||||
assert isinstance(a, Concept)
|
||||
assert isinstance(b, Concept)
|
||||
|
||||
# TODO, first check the 'isa' property of a
|
||||
|
||||
return self.sheerka.sdp.exists(GROUP_PREFIX + b.id, a.id)
|
||||
|
||||
def isagroup(self, concept):
|
||||
"""True if exists All_<concept_id> in sdp"""
|
||||
if not concept.id:
|
||||
return None
|
||||
|
||||
res = self.sheerka.sdp.get_safe(GROUP_PREFIX + concept.id)
|
||||
return res is not None
|
||||
@@ -0,0 +1,50 @@
|
||||
import logging
|
||||
import sys
|
||||
|
||||
enabled = []
|
||||
disabled = ["init", "sdp", "parsers", "evaluators", "verbose"]
|
||||
|
||||
console_handler = logging.StreamHandler(sys.stdout)
|
||||
|
||||
all_loggers = {}
|
||||
|
||||
|
||||
def set_enabled(to_enable):
|
||||
if to_enable is None:
|
||||
return
|
||||
|
||||
if not hasattr(to_enable, "__iter__"):
|
||||
to_enable = [to_enable]
|
||||
|
||||
enabled.extend(to_enable)
|
||||
|
||||
|
||||
def to_discard(logger_class):
|
||||
if logger_class is None:
|
||||
return False
|
||||
|
||||
if logger_class in enabled or logger_class.strip(".") in enabled:
|
||||
return False
|
||||
|
||||
if logger_class not in disabled:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def get_logger(logger_name):
|
||||
if logger_name in all_loggers:
|
||||
return all_loggers[logger_name]
|
||||
|
||||
logger = logging.getLogger(logger_name)
|
||||
all_loggers[logger_name] = logger
|
||||
|
||||
for d in disabled:
|
||||
if logger_name.startswith(d + ".") and to_discard(d):
|
||||
logger.disabled = True
|
||||
|
||||
for e in enabled:
|
||||
if logger_name.startswith("verbose." + e):
|
||||
logger.disabled = False
|
||||
|
||||
return logger
|
||||
@@ -0,0 +1,161 @@
|
||||
import dataclasses
|
||||
from enum import Enum
|
||||
|
||||
from core.concept import Concept, PROPERTIES_TO_SERIALIZE
|
||||
from core.sheerka.Sheerka import ExecutionContext
|
||||
from core.tokenizer import Token
|
||||
from evaluators.BaseEvaluator import BaseEvaluator
|
||||
from parsers.BaseParser import BaseParser, Node
|
||||
from parsers.BnfParser import BnfParser
|
||||
from parsers.ConceptLexerParser import UnrecognizedTokensNode, ParsingExpression
|
||||
from parsers.PythonParser import PythonNode
|
||||
from sdp.sheerkaDataProvider import Event
|
||||
|
||||
OBJ_TYPE_KEY = "__type__"
|
||||
OBJ_ID_KEY = "__id__"
|
||||
OBJ_NAME_KEY = "__name__"
|
||||
|
||||
default_concept = Concept()
|
||||
|
||||
|
||||
class SheerkaTransformType(Enum):
|
||||
Concept = 1
|
||||
Reference = 2
|
||||
ExecutionContext = 3
|
||||
Event = 4
|
||||
Node = 5
|
||||
Exception = 6
|
||||
|
||||
def __repr__(self):
|
||||
return self.__class__.__name__ + "." + self.name
|
||||
|
||||
|
||||
class SheerkaTransform:
|
||||
|
||||
def __init__(self, sheerka):
|
||||
self.ids = {}
|
||||
self.sheerka = sheerka
|
||||
self.id_count = -1
|
||||
|
||||
def to_dict(self, obj):
|
||||
|
||||
if isinstance(obj, (Concept, ExecutionContext, Event)):
|
||||
exists, _id = self.exist(obj)
|
||||
if exists:
|
||||
return {
|
||||
OBJ_TYPE_KEY: SheerkaTransformType.Reference,
|
||||
OBJ_ID_KEY: _id
|
||||
}
|
||||
else:
|
||||
self.id_count = self.id_count + 1
|
||||
self.ids[obj] = self.id_count
|
||||
|
||||
if isinstance(obj, Concept):
|
||||
return self.concept_to_dict(obj)
|
||||
|
||||
elif isinstance(obj, ExecutionContext):
|
||||
return self.execution_context_to_dict(obj)
|
||||
|
||||
elif isinstance(obj, Event):
|
||||
return {
|
||||
OBJ_TYPE_KEY: SheerkaTransformType.Event,
|
||||
OBJ_ID_KEY: self.id_count,
|
||||
'digest': obj.get_digest()}
|
||||
|
||||
elif isinstance(obj, (BaseParser, BaseEvaluator, BnfParser)):
|
||||
return obj.name
|
||||
|
||||
elif isinstance(obj, Token):
|
||||
return obj.__dict__
|
||||
|
||||
elif isinstance(obj, PythonNode):
|
||||
return {
|
||||
OBJ_TYPE_KEY: SheerkaTransformType.Node,
|
||||
OBJ_NAME_KEY: "PythonNode",
|
||||
'source': obj.source,
|
||||
'ast_': obj.get_dump(obj.ast_)
|
||||
}
|
||||
|
||||
elif isinstance(obj, Node):
|
||||
to_dict = {
|
||||
OBJ_TYPE_KEY: SheerkaTransformType.Node,
|
||||
OBJ_NAME_KEY: obj.__class__.__name__,
|
||||
}
|
||||
for k, v in obj.__dict__.items():
|
||||
to_dict[k] = self.to_dict(v)
|
||||
|
||||
return to_dict
|
||||
|
||||
elif isinstance(obj, Exception):
|
||||
to_dict = {
|
||||
OBJ_TYPE_KEY: SheerkaTransformType.Exception,
|
||||
OBJ_NAME_KEY: obj.__class__.__name__,
|
||||
}
|
||||
for k, v in obj.__dict__.items():
|
||||
to_dict[k] = self.to_dict(v)
|
||||
return to_dict
|
||||
|
||||
elif isinstance(obj, ParsingExpression):
|
||||
return obj.__repr__()
|
||||
|
||||
elif isinstance(obj, dict):
|
||||
return dict((str(k) if isinstance(k, Concept) else k, self.to_dict(v)) for k, v in obj.items())
|
||||
|
||||
elif hasattr(obj, "__iter__") and not isinstance(obj, str):
|
||||
return list(self.to_dict(o) for o in obj)
|
||||
|
||||
else:
|
||||
return obj
|
||||
|
||||
def concept_to_dict(self, obj: Concept):
|
||||
to_dict = {
|
||||
OBJ_TYPE_KEY: SheerkaTransformType.Concept,
|
||||
OBJ_ID_KEY: self.id_count,
|
||||
}
|
||||
if obj.id:
|
||||
ref = self.sheerka.get(obj.key, obj.id)
|
||||
to_dict["id"] = obj.id
|
||||
else:
|
||||
ref = default_concept
|
||||
|
||||
# transform metadata
|
||||
for prop in PROPERTIES_TO_SERIALIZE:
|
||||
value = getattr(obj.metadata, prop)
|
||||
ref_value = getattr(ref.metadata, prop)
|
||||
if value != ref_value:
|
||||
to_dict["meta." + prop] = self.to_dict(value)
|
||||
|
||||
# transform value
|
||||
for metadata, value in obj.values.items():
|
||||
ref_value = ref.values[metadata] if metadata in ref.values else None
|
||||
if value != ref_value:
|
||||
to_dict[metadata.value] = self.to_dict(value)
|
||||
|
||||
# transform properties
|
||||
for prop in obj.props:
|
||||
value = obj.props[prop].value
|
||||
if prop not in ref.props or value != ref.props[prop].value:
|
||||
if "props" not in to_dict:
|
||||
to_dict["props"] = []
|
||||
to_dict["props"].append((prop, self.to_dict(value)))
|
||||
|
||||
return to_dict
|
||||
|
||||
def execution_context_to_dict(self, obj: ExecutionContext):
|
||||
to_dict = {
|
||||
OBJ_TYPE_KEY: SheerkaTransformType.ExecutionContext,
|
||||
OBJ_ID_KEY: self.id_count
|
||||
}
|
||||
for property_name in obj.__dict__:
|
||||
if property_name == "sheerka":
|
||||
continue
|
||||
to_dict[property_name] = self.to_dict(getattr(obj, property_name))
|
||||
|
||||
return to_dict
|
||||
|
||||
def exist(self, obj):
|
||||
for k, v in self.ids.items():
|
||||
if id(k) == id(obj) or k == obj:
|
||||
return True, v
|
||||
|
||||
return False, None
|
||||
@@ -0,0 +1,411 @@
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class TokenKind(Enum):
|
||||
EOF = "eof"
|
||||
WHITESPACE = "whitespace"
|
||||
NEWLINE = "newline"
|
||||
KEYWORD = "keyword"
|
||||
IDENTIFIER = "identifier"
|
||||
CONCEPT = "concept"
|
||||
STRING = "string"
|
||||
NUMBER = "number"
|
||||
TRUE = "true"
|
||||
FALSE = "false"
|
||||
LPAR = "lpar"
|
||||
RPAR = "rpar"
|
||||
LBRACKET = "lbrace"
|
||||
RBRACKET = "rbracket"
|
||||
LBRACE = "lbrace"
|
||||
RBRACE = "rbrace"
|
||||
PLUS = "plus"
|
||||
MINUS = "minus"
|
||||
STAR = "star"
|
||||
SLASH = "slash"
|
||||
PERCENT = "percent"
|
||||
COMMA = "comma"
|
||||
SEMICOLON = "semicolon"
|
||||
COLON = "colon"
|
||||
DOT = "dot"
|
||||
QMARK = "qmark"
|
||||
VBAR = "vbar"
|
||||
AMPER = "amper"
|
||||
EQUALS = "="
|
||||
AT = "at"
|
||||
BACK_QUOTE = "bquote" # `
|
||||
BACK_SLASH = "bslash" # \
|
||||
CARAT = "carat" # ^
|
||||
DOLLAR = "dollar" # $
|
||||
EURO = "dollar" # €
|
||||
STERLING = "steling" # £
|
||||
EMARK = "emark" # !
|
||||
GREATER = "greater" # >
|
||||
LESS = "less" # <
|
||||
HASH = "HASH" # #
|
||||
TILDE = "tilde" # ~
|
||||
UNDERSCORE = "underscore" # _
|
||||
DEGREE = "degree" # °
|
||||
|
||||
|
||||
@dataclass()
|
||||
class Token:
|
||||
type: TokenKind
|
||||
value: object
|
||||
index: int
|
||||
line: int
|
||||
column: int
|
||||
|
||||
def __repr__(self):
|
||||
if self.type == TokenKind.IDENTIFIER:
|
||||
value = str(self.value)
|
||||
elif self.type == TokenKind.WHITESPACE:
|
||||
value = "<ws>"
|
||||
elif self.type == TokenKind.NEWLINE:
|
||||
value = r"\n"
|
||||
elif self.type == TokenKind.EOF:
|
||||
value = "<EOF>"
|
||||
else:
|
||||
value = self.value
|
||||
|
||||
return f"Token({value})"
|
||||
|
||||
|
||||
@dataclass()
|
||||
class LexerError(Exception):
|
||||
message: str
|
||||
text: str
|
||||
index: int
|
||||
line: int
|
||||
column: int
|
||||
|
||||
|
||||
class Keywords(Enum):
|
||||
DEF = "def"
|
||||
CONCEPT = "concept"
|
||||
FROM = "from"
|
||||
BNF = "bnf"
|
||||
AS = "as"
|
||||
WHERE = "where"
|
||||
PRE = "pre"
|
||||
POST = "post"
|
||||
ISA = "isa"
|
||||
|
||||
|
||||
class Tokenizer:
|
||||
"""
|
||||
Class that can iterate on the tokens
|
||||
"""
|
||||
|
||||
KEYWORDS = set(x.value for x in Keywords)
|
||||
|
||||
def __init__(self, text):
|
||||
self.text = text
|
||||
self.text_len = len(text)
|
||||
self.column = 1
|
||||
self.line = 1
|
||||
self.i = 0
|
||||
|
||||
def __iter__(self):
|
||||
|
||||
while self.i < self.text_len:
|
||||
c = self.text[self.i]
|
||||
if c == "+":
|
||||
if self.i + 1 < self.text_len and self.text[self.i + 1].isdigit():
|
||||
number = self.eat_number(self.i)
|
||||
yield Token(TokenKind.NUMBER, number, self.i, self.line, self.column)
|
||||
self.i += len(number)
|
||||
self.column += len(number)
|
||||
else:
|
||||
yield Token(TokenKind.PLUS, "+", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == "-":
|
||||
if self.i + 1 < self.text_len and self.text[self.i + 1].isdigit():
|
||||
number = self.eat_number(self.i)
|
||||
yield Token(TokenKind.NUMBER, number, self.i, self.line, self.column)
|
||||
self.i += len(number)
|
||||
self.column += len(number)
|
||||
else:
|
||||
yield Token(TokenKind.MINUS, "-", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == "_":
|
||||
if self.i + 1 < self.text_len and self.text[self.i + 1].isalpha():
|
||||
identifier = self.eat_identifier(self.i)
|
||||
token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
|
||||
value = Keywords(identifier) if identifier in self.KEYWORDS else identifier
|
||||
yield Token(token_type, value, self.i, self.line, self.column)
|
||||
self.i += len(identifier)
|
||||
self.column += len(identifier)
|
||||
else:
|
||||
yield Token(TokenKind.UNDERSCORE, "_", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == "/":
|
||||
yield Token(TokenKind.SLASH, "/", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == "*":
|
||||
yield Token(TokenKind.STAR, "*", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == "{":
|
||||
yield Token(TokenKind.LBRACE, "{", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == "}":
|
||||
yield Token(TokenKind.RBRACE, "}", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == "(":
|
||||
yield Token(TokenKind.LPAR, "(", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == ")":
|
||||
yield Token(TokenKind.RPAR, ")", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == "[":
|
||||
yield Token(TokenKind.LBRACKET, "[", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == "]":
|
||||
yield Token(TokenKind.RBRACKET, "]", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == "=":
|
||||
yield Token(TokenKind.EQUALS, "=", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == " " or c == "\t":
|
||||
whitespace = self.eat_whitespace(self.i)
|
||||
yield Token(TokenKind.WHITESPACE, whitespace, self.i, self.line, self.column)
|
||||
self.i += len(whitespace)
|
||||
self.column += len(whitespace)
|
||||
elif c == ",":
|
||||
yield Token(TokenKind.COMMA, ",", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == ".":
|
||||
yield Token(TokenKind.DOT, ".", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == ";":
|
||||
yield Token(TokenKind.SEMICOLON, ";", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == ":":
|
||||
yield Token(TokenKind.COLON, ":", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == "?":
|
||||
yield Token(TokenKind.QMARK, "?", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == "|":
|
||||
yield Token(TokenKind.VBAR, "|", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == "&":
|
||||
yield Token(TokenKind.AMPER, "&", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == "<":
|
||||
yield Token(TokenKind.LESS, "<", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == ">":
|
||||
yield Token(TokenKind.GREATER, ">", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == "!":
|
||||
yield Token(TokenKind.EMARK, "!", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == "`":
|
||||
yield Token(TokenKind.BACK_QUOTE, "`", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == "\\":
|
||||
yield Token(TokenKind.BACK_SLASH, "\\", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == "^":
|
||||
yield Token(TokenKind.CARAT, "^", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == "$":
|
||||
yield Token(TokenKind.DOLLAR, "$", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == "€":
|
||||
yield Token(TokenKind.EURO, "€", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == "£":
|
||||
yield Token(TokenKind.STERLING, "£", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == "#":
|
||||
yield Token(TokenKind.HASH, "#", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == "°":
|
||||
yield Token(TokenKind.DEGREE, "°", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == "~":
|
||||
yield Token(TokenKind.TILDE, "~", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == "\n" or c == "\r":
|
||||
newline = self.eat_newline(self.i)
|
||||
yield Token(TokenKind.NEWLINE, newline, self.i, self.line, self.column)
|
||||
self.i += len(newline)
|
||||
self.column = 1
|
||||
self.line += 1
|
||||
elif c == "c" and self.i + 1 < self.text_len and self.text[self.i + 1] == ":":
|
||||
concept_name = self.eat_concept_name(self.i + 2, self.line, self.column)
|
||||
yield Token(TokenKind.CONCEPT, concept_name, self.i, self.line, self.column)
|
||||
self.i += len(concept_name) + 3
|
||||
self.column += len(concept_name) + 3
|
||||
elif c.isalpha() or c == "_":
|
||||
identifier = self.eat_identifier(self.i)
|
||||
token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
|
||||
value = Keywords(identifier) if identifier in self.KEYWORDS else identifier
|
||||
yield Token(token_type, value, self.i, self.line, self.column)
|
||||
self.i += len(identifier)
|
||||
self.column += len(identifier)
|
||||
elif c.isdigit():
|
||||
number = self.eat_number(self.i)
|
||||
yield Token(TokenKind.NUMBER, number, self.i, self.line, self.column)
|
||||
self.i += len(number)
|
||||
self.column += len(number)
|
||||
elif c == "'" or c == '"':
|
||||
string, newlines = self.eat_string(self.i, self.line, self.column)
|
||||
yield Token(TokenKind.STRING, string, self.i, self.line, self.column) # quotes are kept
|
||||
self.i += len(string)
|
||||
self.column = 1 if newlines > 0 else self.column + len(string)
|
||||
self.line += newlines
|
||||
elif c == "_":
|
||||
yield Token(TokenKind.UNDERSCORE, "_", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
else:
|
||||
raise LexerError(f"Unknown token '{c}'", self.text, self.i, self.line, self.column)
|
||||
|
||||
yield Token(TokenKind.EOF, "", self.i, self.line, self.column)
|
||||
|
||||
def eat_concept_name(self, start, line, column):
|
||||
result = ""
|
||||
i = start
|
||||
end_colon_found = False
|
||||
|
||||
while i < self.text_len:
|
||||
c = self.text[i]
|
||||
|
||||
if c == "\n":
|
||||
raise LexerError(f"New line is forbidden in concept name", result, i, line, column + 2 + len(result))
|
||||
|
||||
if c == ":":
|
||||
end_colon_found = True
|
||||
break
|
||||
|
||||
result += c
|
||||
i += 1
|
||||
|
||||
if not end_colon_found:
|
||||
raise LexerError(f"Missing ending colon", result, i, line, column + 2 + len(result))
|
||||
|
||||
if result == "":
|
||||
raise LexerError(f"Concept name not found", result, start, line, column + 2 + len(result))
|
||||
|
||||
return result
|
||||
|
||||
def eat_whitespace(self, start):
|
||||
result = self.text[start]
|
||||
i = start + 1
|
||||
while i < self.text_len:
|
||||
c = self.text[i]
|
||||
if c == " " or c == "\t":
|
||||
result += c
|
||||
i += 1
|
||||
else:
|
||||
break
|
||||
|
||||
return result
|
||||
|
||||
def eat_newline(self, start):
|
||||
if start + 1 == self.text_len:
|
||||
return self.text[start]
|
||||
|
||||
current = self.text[start]
|
||||
next = self.text[start + 1]
|
||||
if current == "\n" and next == "\r" or current == "\r" and next == "\n":
|
||||
return current + next
|
||||
|
||||
return current
|
||||
|
||||
def eat_identifier(self, start):
|
||||
result = self.text[start]
|
||||
i = start + 1
|
||||
while i < self.text_len:
|
||||
c = self.text[i]
|
||||
if c.isalpha() or c == "_" or c == "-" or c.isdigit():
|
||||
result += c
|
||||
i += 1
|
||||
else:
|
||||
break
|
||||
|
||||
return result
|
||||
|
||||
def eat_number(self, start):
|
||||
result = self.text[start]
|
||||
i = start + 1
|
||||
while i < self.text_len:
|
||||
c = self.text[i]
|
||||
if c.isdigit() or c == ".":
|
||||
result += c
|
||||
i += 1
|
||||
else:
|
||||
break
|
||||
|
||||
return result
|
||||
|
||||
def eat_string(self, start_index, start_line, start_column):
|
||||
quote = self.text[start_index]
|
||||
result = self.text[start_index]
|
||||
lines_count = 0
|
||||
|
||||
i = start_index + 1
|
||||
escape = False
|
||||
newline = None
|
||||
while i < self.text_len:
|
||||
c = self.text[i]
|
||||
result += c
|
||||
i += 1
|
||||
|
||||
if newline:
|
||||
lines_count += 1
|
||||
newline = c if c == newline else None
|
||||
else:
|
||||
if c == "\r" or c == "\n":
|
||||
newline = c
|
||||
|
||||
if c == "\\":
|
||||
escape = True
|
||||
elif c == quote and not escape:
|
||||
break
|
||||
else:
|
||||
escape = False
|
||||
|
||||
# add trailing new line if needed
|
||||
if newline:
|
||||
lines_count += 1
|
||||
|
||||
if result[-1] != quote:
|
||||
raise LexerError("Missing Trailing quote", result, i, start_line + lines_count,
|
||||
1 if lines_count > 0 else start_column + len(result))
|
||||
|
||||
return result, lines_count
|
||||
@@ -0,0 +1,238 @@
|
||||
import importlib
|
||||
import inspect
|
||||
import pkgutil
|
||||
|
||||
from core.tokenizer import TokenKind
|
||||
|
||||
|
||||
def sysarg_to_string(argv):
|
||||
"""
|
||||
Transform a list of strings into a single string
|
||||
Add quotes if needed
|
||||
:return:
|
||||
"""
|
||||
if argv is None or not argv:
|
||||
return ""
|
||||
|
||||
result = ""
|
||||
first = True
|
||||
for s in argv:
|
||||
if not first:
|
||||
result += " "
|
||||
|
||||
result += '"' + s + '"' if " " in s else s
|
||||
first = False
|
||||
|
||||
if result[0] in ('"', "'"):
|
||||
result = result[1:-1] # strip quotes
|
||||
return result
|
||||
|
||||
|
||||
def get_class(qname):
|
||||
"""
|
||||
Loads a class from its full qualified name
|
||||
:param qname:
|
||||
:return:
|
||||
"""
|
||||
parts = qname.split('.')
|
||||
module = ".".join(parts[:-1])
|
||||
m = __import__(module)
|
||||
for comp in parts[1:]:
|
||||
m = getattr(m, comp)
|
||||
return m
|
||||
|
||||
|
||||
def get_module(qname):
|
||||
"""
|
||||
Loads a module from its full qualified name
|
||||
:param qname:
|
||||
:return:
|
||||
"""
|
||||
parts = qname.split('.')
|
||||
m = __import__(qname)
|
||||
for comp in parts[1:]:
|
||||
m = getattr(m, comp)
|
||||
return m
|
||||
|
||||
|
||||
def new_object(kls, *args, **kwargs):
|
||||
"""
|
||||
New instance of an object
|
||||
:param kls:
|
||||
:param args:
|
||||
:param kwargs:
|
||||
:return:
|
||||
"""
|
||||
obj_type = get_class(kls)
|
||||
return obj_type(*args, **kwargs)
|
||||
|
||||
|
||||
def get_full_qualified_name(obj):
|
||||
"""
|
||||
Returns the full qualified name of a class (including its module name )
|
||||
:param obj:
|
||||
:return:
|
||||
"""
|
||||
if obj.__class__ == type:
|
||||
module = obj.__module__
|
||||
if module is None or module == str.__class__.__module__:
|
||||
return obj.__name__ # Avoid reporting __builtin__
|
||||
else:
|
||||
return module + '.' + obj.__name__
|
||||
else:
|
||||
module = obj.__class__.__module__
|
||||
if module is None or module == str.__class__.__module__:
|
||||
return obj.__class__.__name__ # Avoid reporting __builtin__
|
||||
else:
|
||||
return module + '.' + obj.__class__.__name__
|
||||
|
||||
|
||||
def get_classes(module_name):
|
||||
"""
|
||||
Gets all classes, for a given module_name
|
||||
:param module_name: name of the module
|
||||
:return:
|
||||
"""
|
||||
mod = get_module(module_name)
|
||||
for name in dir(mod):
|
||||
obj = getattr(mod, name)
|
||||
if inspect.isclass(obj):
|
||||
yield obj
|
||||
|
||||
|
||||
def get_classes_from_package(package_name):
|
||||
"""
|
||||
Gets all classes, for a given package
|
||||
:param package_name: name of the package
|
||||
:return:
|
||||
"""
|
||||
pkg = __import__(package_name)
|
||||
prefix = pkg.__name__ + "."
|
||||
for importer, modname, ispkg in pkgutil.iter_modules(pkg.__path__, prefix):
|
||||
for c in get_classes(modname):
|
||||
yield c
|
||||
|
||||
|
||||
def init_package_import(package_name):
|
||||
pkg = __import__(package_name)
|
||||
prefix = pkg.__name__ + "."
|
||||
for (module_loader, name, ispkg) in pkgutil.iter_modules(pkg.__path__, prefix):
|
||||
importlib.import_module(name)
|
||||
|
||||
|
||||
def get_sub_classes(package_name, base_class):
|
||||
base_class = get_class(base_class) if isinstance(base_class, str) else base_class
|
||||
all_class = set(base_class.__subclasses__()).union(
|
||||
[s for c in base_class.__subclasses__() for s in get_sub_classes(package_name, c)])
|
||||
|
||||
# limit to the classes of the package
|
||||
return [c for c in all_class if c.__module__.startswith(package_name)]
|
||||
|
||||
|
||||
def remove_from_list(lst, to_remove_predicate):
|
||||
"""
|
||||
Removes elements from a list if they exist
|
||||
:param lst:
|
||||
:param to_remove_predicate:
|
||||
:return:
|
||||
"""
|
||||
|
||||
flagged = []
|
||||
for item in lst:
|
||||
if to_remove_predicate(item):
|
||||
flagged.append(item)
|
||||
|
||||
for item in flagged:
|
||||
lst.remove(item)
|
||||
|
||||
return lst
|
||||
|
||||
|
||||
def remove_list_from_list(lst, to_remove):
|
||||
# https://stackoverflow.com/questions/2514961/remove-all-values-within-one-list-from-another-list/30353802
|
||||
# explains that list comprehension is not the best approach
|
||||
for item in to_remove:
|
||||
try:
|
||||
lst.remove(item)
|
||||
except ValueError:
|
||||
pass
|
||||
return lst
|
||||
|
||||
|
||||
def product(a, b):
|
||||
"""
|
||||
Kind of cartesian product between lists a and b
|
||||
knowing that a is also a list
|
||||
|
||||
So it's a cartesian product between a list of list and a list
|
||||
"""
|
||||
|
||||
if a is None or len(a) == 0:
|
||||
return b
|
||||
if b is None or len(b) == 0:
|
||||
return a
|
||||
|
||||
res = []
|
||||
for item_b in b:
|
||||
for item_a in a:
|
||||
items = item_a + [item_b]
|
||||
res.append(items)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def strip_quotes(text):
|
||||
if not isinstance(text, str):
|
||||
return text
|
||||
|
||||
if text == "":
|
||||
return ""
|
||||
|
||||
if text[0] == "'" or text[0] == '"':
|
||||
return text[1:-1]
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def strip_tokens(tokens, strip_eof=False):
|
||||
"""
|
||||
Remove the starting and trailing spaces and newline
|
||||
"""
|
||||
if tokens is None:
|
||||
return None
|
||||
|
||||
start = 0
|
||||
length = len(tokens)
|
||||
while start < length and tokens[start].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
|
||||
start += 1
|
||||
|
||||
if start == length:
|
||||
return []
|
||||
|
||||
end_tokens = (TokenKind.WHITESPACE, TokenKind.NEWLINE, TokenKind.EOF) \
|
||||
if strip_eof \
|
||||
else (TokenKind.WHITESPACE, TokenKind.NEWLINE)
|
||||
|
||||
end = length - 1
|
||||
while end > 0 and tokens[end].type in end_tokens:
|
||||
end -= 1
|
||||
|
||||
return tokens[start: end + 1]
|
||||
|
||||
|
||||
def escape_char(text, to_escape):
|
||||
res = ""
|
||||
|
||||
for c in text:
|
||||
res += ("\\" + c) if c in to_escape else c
|
||||
|
||||
return res
|
||||
|
||||
def pp(items):
|
||||
if not hasattr(items, "__iter__"):
|
||||
return str(items)
|
||||
|
||||
if len(items) == 0:
|
||||
return str(items)
|
||||
|
||||
return " \n" + " \n".join(str(item) for item in items)
|
||||
@@ -0,0 +1,131 @@
|
||||
from core.ast.nodes import python_to_concept
|
||||
from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts
|
||||
from core.builtin_helpers import get_names
|
||||
from core.concept import Concept
|
||||
from evaluators.BaseEvaluator import OneReturnValueEvaluator
|
||||
from parsers.BaseParser import NotInitializedNode
|
||||
from parsers.ConceptLexerParser import ParsingExpression, ParsingExpressionVisitor
|
||||
from parsers.DefaultParser import DefConceptNode
|
||||
|
||||
from parsers.PythonParser import PythonNode
|
||||
|
||||
|
||||
class ConceptOrRuleNameVisitor(ParsingExpressionVisitor):
|
||||
"""
|
||||
Gets the concepts referenced by BNF
|
||||
If a rule_name is given, it will also be considered as a potential property
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.names = set()
|
||||
|
||||
def visit_ConceptExpression(self, node):
|
||||
if node.rule_name:
|
||||
self.names.add(node.rule_name)
|
||||
elif isinstance(node.concept, Concept):
|
||||
self.names.add(node.concept.name)
|
||||
else:
|
||||
self.names.add(node.concept)
|
||||
|
||||
def visit_all(self, node):
|
||||
if node.rule_name:
|
||||
self.names.add(node.rule_name)
|
||||
|
||||
|
||||
class AddConceptEvaluator(OneReturnValueEvaluator):
|
||||
"""
|
||||
Used to add a new concept
|
||||
"""
|
||||
NAME = "AddNewConcept"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 50)
|
||||
|
||||
def matches(self, context, return_value):
|
||||
return return_value.status and \
|
||||
isinstance(return_value.value, ParserResultConcept) and \
|
||||
isinstance(return_value.value.value, DefConceptNode)
|
||||
|
||||
def eval(self, context, return_value):
|
||||
context.log(self.log, "Adding a new concept", self.name)
|
||||
def_concept_node = return_value.value.value
|
||||
sheerka = context.sheerka
|
||||
|
||||
# validate the node
|
||||
props_found = set()
|
||||
|
||||
concept = Concept(def_concept_node.name)
|
||||
for prop in ("definition", "where", "pre", "post", "body"):
|
||||
# put back the sources
|
||||
part_ret_val = getattr(def_concept_node, prop)
|
||||
if not isinstance(part_ret_val, ReturnValueConcept) or not part_ret_val.status:
|
||||
continue # Nothing to do is not initialized
|
||||
|
||||
# update the parts
|
||||
source = self.get_source(part_ret_val)
|
||||
setattr(concept.metadata, prop, source)
|
||||
|
||||
# try to find what can be a property
|
||||
concept_name = [part.value for part in def_concept_node.name.tokens]
|
||||
for p in self.get_props(sheerka, part_ret_val, concept_name):
|
||||
props_found.add(p)
|
||||
|
||||
# add props order by appearance when possible
|
||||
for token in def_concept_node.name.tokens:
|
||||
if token.value in props_found:
|
||||
concept.def_prop(token.value, None)
|
||||
|
||||
# add the remaining properties
|
||||
for p in props_found:
|
||||
if p not in concept.props:
|
||||
concept.def_prop(p, None)
|
||||
|
||||
# finish initialisation
|
||||
concept.init_key(def_concept_node.name.tokens)
|
||||
if not isinstance(def_concept_node.definition, NotInitializedNode) and \
|
||||
sheerka.is_success(def_concept_node.definition):
|
||||
concept.bnf = def_concept_node.definition.value.value
|
||||
|
||||
ret = sheerka.create_new_concept(context, concept, self.verbose_log)
|
||||
if not ret.status:
|
||||
error_cause = sheerka.value(ret.body)
|
||||
context.log(self.log, f"Failed to add concept '{concept.name}'. Reason: {error_cause}", self.name)
|
||||
return sheerka.ret(self.name, ret.status, ret.value, parents=[return_value])
|
||||
|
||||
@staticmethod
|
||||
def get_source(ret_value):
|
||||
return ret_value.value.source
|
||||
|
||||
@staticmethod
|
||||
def get_props(sheerka, ret_value, concept_name):
|
||||
"""
|
||||
Try to find out the variables
|
||||
This function can only be a draft, as there may be tons of different situations
|
||||
I guess that it can only be complete when will we have access to Sheerka memory
|
||||
"""
|
||||
|
||||
#
|
||||
# Case of python code
|
||||
#
|
||||
if isinstance(ret_value.value, ParserResultConcept) and isinstance(ret_value.value.value, PythonNode):
|
||||
python_node = ret_value.value.value
|
||||
as_concept_node = python_to_concept(python_node.ast_)
|
||||
variables = get_names(sheerka, as_concept_node)
|
||||
variables = filter(lambda x: x in concept_name, variables)
|
||||
return list(variables)
|
||||
|
||||
#
|
||||
# case of concept
|
||||
#
|
||||
if isinstance(ret_value.value, ParserResultConcept) and isinstance(ret_value.value.value, Concept):
|
||||
return list(ret_value.value.value.props.keys())
|
||||
|
||||
#
|
||||
# case of BNF
|
||||
#
|
||||
if isinstance(ret_value.value, ParserResultConcept) and isinstance(ret_value.value.value, ParsingExpression):
|
||||
visitor = ConceptOrRuleNameVisitor()
|
||||
visitor.visit(ret_value.value.value)
|
||||
return sorted(list(visitor.names))
|
||||
|
||||
return []
|
||||
@@ -0,0 +1,77 @@
|
||||
import core.builtin_helpers
|
||||
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
|
||||
from evaluators.BaseEvaluator import OneReturnValueEvaluator
|
||||
from parsers.DefaultParser import IsaConceptNode
|
||||
|
||||
ALL_STEPS = [
|
||||
BuiltinConcepts.BEFORE_PARSING,
|
||||
BuiltinConcepts.PARSING,
|
||||
BuiltinConcepts.EVALUATION,
|
||||
BuiltinConcepts.AFTER_EVALUATION
|
||||
]
|
||||
|
||||
|
||||
class AddConceptInSetEvaluator(OneReturnValueEvaluator):
|
||||
"""
|
||||
Tells that a concept is a part of a set
|
||||
"""
|
||||
NAME = "AddConceptInSet"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 50)
|
||||
|
||||
def matches(self, context, return_value):
|
||||
return return_value.status and \
|
||||
isinstance(return_value.value, ParserResultConcept) and \
|
||||
isinstance(return_value.value.value, IsaConceptNode)
|
||||
|
||||
def eval(self, context, return_value):
|
||||
|
||||
def _resolve(name_node):
|
||||
ret_val = sheerka.ret(
|
||||
self.name,
|
||||
True,
|
||||
sheerka.new(BuiltinConcepts.USER_INPUT, body=name_node.tokens, user_name="N/A"))
|
||||
|
||||
with context.push(desc=f"Recognizing '{name_node}'") as sub_context:
|
||||
r = sheerka.execute(sub_context, ret_val, ALL_STEPS, self.verbose_log)
|
||||
one_r = core.builtin_helpers.expect_one(context, r)
|
||||
sub_context.add_values(return_values=one_r)
|
||||
return one_r
|
||||
|
||||
isa_node = return_value.value.value
|
||||
sheerka = context.sheerka
|
||||
context.log(self.log, f"Adding a concept {isa_node.concept} to set {isa_node.set}", self.name)
|
||||
|
||||
# Try to recognize the concept
|
||||
res = _resolve(isa_node.concept)
|
||||
if not res.status:
|
||||
return sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=str(isa_node.concept)),
|
||||
parents=[return_value])
|
||||
concept = res.value
|
||||
|
||||
res = _resolve(isa_node.set)
|
||||
if not res.status:
|
||||
return sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=str(isa_node.set)),
|
||||
parents=[return_value])
|
||||
concept_set = res.value
|
||||
|
||||
res = sheerka.add_concept_to_set(context, concept, concept_set, self.verbose_log)
|
||||
if not res.status:
|
||||
context.log(self.log, f"Failed. Reason: {sheerka.value(res.body)}.", self.name)
|
||||
else:
|
||||
context.log(self.log, f"Concept added.", self.name)
|
||||
|
||||
return sheerka.ret(
|
||||
self.name,
|
||||
res.status,
|
||||
res.body,
|
||||
parents=[return_value])
|
||||
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
from core.sheerka.Sheerka import ExecutionContext
|
||||
from core.sheerka_logger import get_logger
|
||||
|
||||
|
||||
class BaseEvaluator:
|
||||
"""
|
||||
Base class to evaluate ReturnValues
|
||||
"""
|
||||
|
||||
PREFIX = "evaluators."
|
||||
|
||||
def __init__(self, name, steps, priority: int, enabled=True):
|
||||
self.log = get_logger(self.PREFIX + self.__class__.__name__)
|
||||
self.init_log = get_logger("init." + self.PREFIX + self.__class__.__name__)
|
||||
self.verbose_log = get_logger("verbose." + self.PREFIX + self.__class__.__name__)
|
||||
|
||||
self.name = self.PREFIX + name
|
||||
self.steps = steps
|
||||
self.priority = priority
|
||||
self.enabled = enabled
|
||||
|
||||
def __repr__(self):
|
||||
return f"{self.name} ({self.priority})"
|
||||
|
||||
|
||||
class OneReturnValueEvaluator(BaseEvaluator):
|
||||
"""
|
||||
Evaluate one specific return value
|
||||
"""
|
||||
|
||||
def matches(self, context: ExecutionContext, return_value):
|
||||
pass
|
||||
|
||||
def eval(self, context: ExecutionContext, return_value):
|
||||
pass
|
||||
|
||||
|
||||
class AllReturnValuesEvaluator(BaseEvaluator):
|
||||
"""
|
||||
Evaluates the groups of ReturnValues
|
||||
"""
|
||||
|
||||
def __init__(self, name, steps, priority: int, enabled=True):
|
||||
super().__init__(name, steps, priority, enabled)
|
||||
self.eaten = []
|
||||
|
||||
def matches(self, context: ExecutionContext, return_values):
|
||||
pass
|
||||
|
||||
def eval(self, context: ExecutionContext, return_values):
|
||||
pass
|
||||
|
||||
@@ -0,0 +1,53 @@
|
||||
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
|
||||
from core.concept import Concept, ConceptParts
|
||||
from evaluators.BaseEvaluator import OneReturnValueEvaluator
|
||||
|
||||
|
||||
class ConceptEvaluator(OneReturnValueEvaluator):
|
||||
"""
|
||||
The concept evaluatuor is the main class that know what to do with a concept
|
||||
It verifies the PRE
|
||||
If ok, can execute or not the BODY
|
||||
Then checks the POST conditions
|
||||
"""
|
||||
NAME = "Concept"
|
||||
|
||||
def __init__(self, return_body=False):
|
||||
super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 50)
|
||||
self.return_body = return_body
|
||||
|
||||
def matches(self, context, return_value):
|
||||
return return_value.status and \
|
||||
isinstance(return_value.value, ParserResultConcept) and \
|
||||
isinstance(return_value.value.value, Concept)
|
||||
|
||||
def eval(self, context, return_value):
|
||||
sheerka = context.sheerka
|
||||
concept = return_value.value.value
|
||||
context.log(self.verbose_log, f"Evaluating concept {concept}.", self.name)
|
||||
|
||||
# If the concept that is requested is in the context(at least its name), drop the call.
|
||||
# Why ?
|
||||
# If we evaluate Concept("foo", body="a").set_prop("a", "'property_a'")
|
||||
# The body should be 'property_a', and not a concept called 'a'
|
||||
if context.obj and concept.name in context.obj.props:
|
||||
value = context.obj.props[concept.name].value
|
||||
context.log(self.verbose_log, f"{concept.name} is a property. Returning value '{value}'.", self.name)
|
||||
|
||||
return sheerka.ret(self.name, True, value, parents=[return_value])
|
||||
|
||||
evaluated = sheerka.evaluate_concept(context, concept, self.verbose_log)
|
||||
|
||||
if evaluated.key != concept.key:
|
||||
# evaluated.key != concept.key means that we have transformed the concept
|
||||
# When you successfully evaluate an error, the status should not be false
|
||||
return sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
evaluated,
|
||||
parents=[return_value])
|
||||
|
||||
if not self.return_body or ConceptParts.BODY not in evaluated.compiled:
|
||||
return sheerka.ret(self.name, True, evaluated, parents=[return_value])
|
||||
else:
|
||||
return sheerka.ret(self.name, True, evaluated.body, parents=[return_value])
|
||||
@@ -0,0 +1,43 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept
|
||||
from evaluators.BaseEvaluator import AllReturnValuesEvaluator
|
||||
|
||||
|
||||
class EvalEvaluator(AllReturnValuesEvaluator):
|
||||
"""
|
||||
Returns the body of all successful concepts
|
||||
"""
|
||||
|
||||
NAME = "Eval"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(self.NAME, [BuiltinConcepts.AFTER_EVALUATION], 80)
|
||||
self.eval_requested = None
|
||||
|
||||
def matches(self, context, return_values):
|
||||
sheerka = context.sheerka
|
||||
for ret in return_values:
|
||||
if ret.status and sheerka.isinstance(ret.body, BuiltinConcepts.CONCEPT_EVAL_REQUESTED):
|
||||
self.eval_requested = ret
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def eval(self, context, return_values):
|
||||
sheerka = context.sheerka
|
||||
result = []
|
||||
|
||||
for ret_val in return_values:
|
||||
if ret_val.status and isinstance(ret_val.body, Concept) and ret_val.body.body:
|
||||
context.log(self.verbose_log, f"Evaluating {ret_val}", who=self)
|
||||
result.append(sheerka.ret(self.name, True, ret_val.body.body, parents=[ret_val, self.eval_requested]))
|
||||
|
||||
if len(result) > 0:
|
||||
return result
|
||||
else:
|
||||
# suppress the successful BuiltinConcepts.CONCEPT_EVAL_REQUESTED
|
||||
return sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.CONCEPT_EVAL_REQUESTED),
|
||||
parents=[self.eval_requested])
|
||||
@@ -0,0 +1,102 @@
|
||||
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
|
||||
from evaluators.BaseEvaluator import OneReturnValueEvaluator
|
||||
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode
|
||||
from parsers.PythonParser import LexerNodeParserHelperForPython, PythonNode
|
||||
|
||||
|
||||
class LexerNodeEvaluator(OneReturnValueEvaluator):
|
||||
"""
|
||||
After a BNF is recognized, generates the concept or the list concepts
|
||||
"""
|
||||
|
||||
NAME = "LexerNode"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 60)
|
||||
self.identifiers = {} # cache for already created identifier (the key is id(concept))
|
||||
self.identifiers_key = {} # number of identifiers with the same root (prefix)
|
||||
|
||||
def matches(self, context, return_value):
|
||||
if not return_value.status:
|
||||
return False
|
||||
|
||||
if not isinstance(return_value.value, ParserResultConcept):
|
||||
return False
|
||||
|
||||
value = return_value.value.value
|
||||
if isinstance(value, (ConceptNode, SourceCodeNode)):
|
||||
return True
|
||||
|
||||
if hasattr(value, "__iter__"):
|
||||
for node in value:
|
||||
if not isinstance(node, (ConceptNode, SourceCodeNode)):
|
||||
return False
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def eval(self, context, return_value):
|
||||
"""
|
||||
From a concept node, creates a new concept
|
||||
and makes sure that the properties are correctly set
|
||||
"""
|
||||
nodes = return_value.value.value
|
||||
if not hasattr(nodes, "__iter__"):
|
||||
nodes = [nodes]
|
||||
|
||||
context.log(self.verbose_log, f"{nodes=}", self.name)
|
||||
|
||||
for node in nodes:
|
||||
if isinstance(node, SourceCodeNode):
|
||||
ret = self.evaluate_python_code(context, nodes)
|
||||
break
|
||||
else:
|
||||
ret = self.evaluate_concepts_only(context, nodes)
|
||||
|
||||
ret.parents = [return_value]
|
||||
return ret
|
||||
|
||||
def evaluate_concepts_only(self, context, nodes):
|
||||
concepts = []
|
||||
source = ""
|
||||
sheerka = context.sheerka
|
||||
|
||||
for node in nodes:
|
||||
if isinstance(node, ConceptNode):
|
||||
source += node.source if source == "" else (" " + node.source)
|
||||
concepts.append(node.concept)
|
||||
|
||||
if len(concepts) == 1:
|
||||
return sheerka.ret(
|
||||
self.name,
|
||||
True,
|
||||
context.sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=source,
|
||||
body=concepts[0],
|
||||
try_parsed=None))
|
||||
|
||||
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=nodes))
|
||||
|
||||
def evaluate_python_code(self, context, nodes):
|
||||
sheerka = context.sheerka
|
||||
|
||||
helper = LexerNodeParserHelperForPython()
|
||||
result = helper.parse(context, nodes)
|
||||
|
||||
if isinstance(result, PythonNode):
|
||||
return sheerka.ret(
|
||||
self.name,
|
||||
True,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=result.source,
|
||||
body=result,
|
||||
try_parsed=None))
|
||||
else:
|
||||
return sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
result.body)
|
||||
@@ -0,0 +1,83 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
import core.builtin_helpers
|
||||
from core.concept import Concept
|
||||
from evaluators.BaseEvaluator import AllReturnValuesEvaluator, BaseEvaluator
|
||||
from evaluators.ConceptEvaluator import ConceptEvaluator
|
||||
from evaluators.PythonEvaluator import PythonEvaluator
|
||||
from parsers.BaseParser import BaseParser
|
||||
|
||||
|
||||
class MultipleSameSuccessEvaluator(AllReturnValuesEvaluator):
|
||||
"""
|
||||
Used to filter the responses
|
||||
It has a low priority to let other evaluators try to resolve the errors
|
||||
|
||||
It reduces the responses when several evaluators give the same answer
|
||||
"""
|
||||
|
||||
NAME = "MultipleSameSuccess"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(self.NAME, [BuiltinConcepts.AFTER_EVALUATION], 50)
|
||||
self.success = []
|
||||
|
||||
def matches(self, context, return_values):
|
||||
nb_successful_evaluators = 0
|
||||
only_parsers_in_error = True
|
||||
to_process = False
|
||||
|
||||
for ret in return_values:
|
||||
|
||||
if ret.status and context.sheerka.isinstance(ret.body, BuiltinConcepts.REDUCE_REQUESTED):
|
||||
to_process = True
|
||||
self.eaten.append(ret)
|
||||
elif ret.who.startswith(BaseEvaluator.PREFIX):
|
||||
if ret.status:
|
||||
nb_successful_evaluators += 1
|
||||
self.success.append(ret)
|
||||
self.eaten.append(ret)
|
||||
elif ret.who.startswith(BaseParser.PREFIX):
|
||||
self.eaten.append(ret)
|
||||
if ret.status:
|
||||
only_parsers_in_error = False
|
||||
|
||||
return to_process and nb_successful_evaluators > 1 and only_parsers_in_error
|
||||
|
||||
def eval(self, context, return_values):
|
||||
sheerka = context.sheerka
|
||||
context.log(self.verbose_log, f"{len(self.success)} successful return value(s)", who=self)
|
||||
for s in self.success:
|
||||
context.log(self.verbose_log, f"{s}", who=self)
|
||||
|
||||
if not core.builtin_helpers.is_same_success(sheerka, self.success):
|
||||
return None
|
||||
|
||||
# ######################################
|
||||
# !!!!! W A R N I N G !!!!!!!!
|
||||
# I have a massive issue with how I implement this feature
|
||||
# I have forced an arbitrary order between Concept evaluator and Python evaluator
|
||||
# I gave a random order to the other
|
||||
#
|
||||
# I guess that we need a proper algorithm to elect which return value to use if they have the same result
|
||||
# I guts feeling is that, it will depend on the intent of the user
|
||||
# So it depends on the context
|
||||
|
||||
# try to return a concept if possible
|
||||
# give the priority to the ConceptEvaluator
|
||||
for s in self.success:
|
||||
if isinstance(s.value, Concept) and s.who == ConceptEvaluator().name:
|
||||
return sheerka.ret(self.name, True, s.value, parents=self.eaten)
|
||||
|
||||
# Then the PythonEvaluator
|
||||
for s in self.success:
|
||||
if isinstance(s.value, Concept) and s.who == PythonEvaluator().name:
|
||||
return sheerka.ret(self.name, True, s.value, parents=self.eaten)
|
||||
|
||||
# Then the first concept.
|
||||
# It's not predictable, so I guess that it's not a good implementation choice
|
||||
for s in self.success:
|
||||
if isinstance(s.value, Concept):
|
||||
return sheerka.ret(self.name, True, s.value, parents=self.eaten)
|
||||
|
||||
return sheerka.ret(self.name, True, self.success[0].value, parents=self.eaten)
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from evaluators.BaseEvaluator import AllReturnValuesEvaluator
|
||||
from parsers.BaseParser import BaseParser
|
||||
|
||||
|
||||
class OneErrorEvaluator(AllReturnValuesEvaluator):
|
||||
"""
|
||||
Use to reduce when there is only one evaluator in error
|
||||
The rest of the return values must be parsers in error
|
||||
"""
|
||||
|
||||
NAME = "OneError"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(self.NAME, [BuiltinConcepts.AFTER_EVALUATION], 30)
|
||||
self.return_value_in_error = None
|
||||
|
||||
def matches(self, context, return_values):
|
||||
nb_evaluators_in_error = 0
|
||||
to_process = False
|
||||
|
||||
for ret in return_values:
|
||||
if ret.status and (ret.who.startswith(self.PREFIX) or ret.who.startswith(BaseParser.PREFIX)):
|
||||
return False
|
||||
elif ret.status and context.sheerka.isinstance(ret.body, BuiltinConcepts.REDUCE_REQUESTED):
|
||||
to_process = True
|
||||
self.eaten.append(ret)
|
||||
elif not ret.status and ret.who.startswith(self.PREFIX):
|
||||
nb_evaluators_in_error += 1
|
||||
self.return_value_in_error = ret
|
||||
self.eaten.append(ret)
|
||||
elif not ret.status and ret.who.startswith(BaseParser.PREFIX):
|
||||
self.eaten.append(ret)
|
||||
|
||||
return to_process and nb_evaluators_in_error == 1
|
||||
|
||||
def eval(self, context, return_values):
|
||||
context.log(self.verbose_log, f"1 return value in error, {len(self.eaten)} item(s) eaten", who=self)
|
||||
context.log(self.verbose_log, f"{self.return_value_in_error}", who=self)
|
||||
|
||||
sheerka = context.sheerka
|
||||
return sheerka.ret(self.name, False, self.return_value_in_error.value, parents=self.eaten)
|
||||
@@ -0,0 +1,44 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from evaluators.BaseEvaluator import AllReturnValuesEvaluator
|
||||
from parsers.BaseParser import BaseParser
|
||||
|
||||
|
||||
class OneSuccessEvaluator(AllReturnValuesEvaluator):
|
||||
"""
|
||||
Used to filter the responses
|
||||
It has a low priority to let other evaluators try to resolve the errors
|
||||
|
||||
Make sure that there is only one successful answer
|
||||
"""
|
||||
|
||||
NAME = "OneSuccess"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(self.NAME, [BuiltinConcepts.AFTER_EVALUATION], 60) # before MultipleSameSuccess
|
||||
self.successful_return_value = None
|
||||
|
||||
def matches(self, context, return_values):
|
||||
nb_successful_evaluators = 0
|
||||
to_process = False
|
||||
|
||||
for ret in return_values:
|
||||
if ret.status and ret.who.startswith(BaseParser.PREFIX):
|
||||
return False
|
||||
elif ret.status and context.sheerka.isinstance(ret.body, BuiltinConcepts.REDUCE_REQUESTED):
|
||||
to_process = True
|
||||
self.eaten.append(ret)
|
||||
elif ret.status and ret.who.startswith(self.PREFIX):
|
||||
nb_successful_evaluators += 1
|
||||
self.successful_return_value = ret
|
||||
self.eaten.append(ret)
|
||||
elif not ret.status:
|
||||
self.eaten.append(ret)
|
||||
|
||||
return to_process and nb_successful_evaluators == 1
|
||||
|
||||
def eval(self, context, return_values):
|
||||
context.log(self.verbose_log, f"1 successful return value, {len(self.eaten)} item(s) eaten", who=self)
|
||||
context.log(self.verbose_log, f"{self.successful_return_value}", who=self)
|
||||
|
||||
sheerka = context.sheerka
|
||||
return sheerka.ret(self.name, True, self.successful_return_value.value, parents=self.eaten)
|
||||
@@ -0,0 +1,40 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from evaluators.BaseEvaluator import OneReturnValueEvaluator
|
||||
|
||||
|
||||
class PrepareEvalEvaluator(OneReturnValueEvaluator):
|
||||
"""
|
||||
To parse evaluation requests
|
||||
"""
|
||||
|
||||
NAME = "PrepareEval"
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(self.NAME, [BuiltinConcepts.BEFORE_PARSING], 90)
|
||||
self.text = None
|
||||
|
||||
def matches(self, context, return_value):
|
||||
if not (return_value.status and
|
||||
context.sheerka.isinstance(return_value.body, BuiltinConcepts.USER_INPUT) and
|
||||
isinstance(return_value.body.body, str)):
|
||||
return False
|
||||
|
||||
text = return_value.body.body.strip()
|
||||
if not text.startswith("eval "):
|
||||
return False
|
||||
|
||||
self.text = text
|
||||
return True
|
||||
|
||||
def eval(self, context, return_value):
|
||||
sheerka = context.sheerka
|
||||
|
||||
new_text_to_parse = sheerka.ret(
|
||||
self.name,
|
||||
True, sheerka.new(BuiltinConcepts.USER_INPUT, body=self.text[5:], user_name=context.event.user))
|
||||
|
||||
evaluation_requested = sheerka.ret(
|
||||
self.name,
|
||||
True, sheerka.new(BuiltinConcepts.CONCEPT_EVAL_REQUESTED))
|
||||
|
||||
return [new_text_to_parse, evaluation_requested]
|
||||
@@ -0,0 +1,189 @@
|
||||
import copy
|
||||
from enum import Enum
|
||||
|
||||
from core.ast.visitors import UnreferencedNamesVisitor
|
||||
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
|
||||
from core.concept import ConceptParts, Concept
|
||||
from evaluators.BaseEvaluator import OneReturnValueEvaluator
|
||||
from parsers.PythonParser import PythonNode
|
||||
import ast
|
||||
import core.ast.nodes
|
||||
|
||||
|
||||
class PythonEvaluator(OneReturnValueEvaluator):
|
||||
NAME = "Python"
|
||||
|
||||
"""
|
||||
Evaluate a Python node, ie, evaluate some Python code
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 50)
|
||||
self.locals = {}
|
||||
|
||||
def matches(self, context, return_value):
|
||||
return return_value.status and \
|
||||
isinstance(return_value.value, ParserResultConcept) and \
|
||||
isinstance(return_value.value.value, PythonNode)
|
||||
|
||||
def eval(self, context, return_value):
|
||||
sheerka = context.sheerka
|
||||
node = return_value.value.value
|
||||
try:
|
||||
context.log(self.verbose_log, f"Evaluating python node {node}.", self.name)
|
||||
|
||||
# Do not evaluate if the ast refers to a concept (leave it to ConceptEvaluator)
|
||||
if isinstance(node.ast_, ast.Expression) and isinstance(node.ast_.body, ast.Name):
|
||||
c = context.sheerka.get(node.ast_.body.id)
|
||||
if not context.sheerka.isinstance(c, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
context.log(self.verbose_log, "It's a simple concept. Not for me.", self.name)
|
||||
not_for_me = context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=node)
|
||||
return sheerka.ret(self.name, False, not_for_me, parents=[return_value])
|
||||
|
||||
my_locals = self.get_locals(context, node)
|
||||
context.log(self.verbose_log, f"locals={my_locals}", self.name)
|
||||
|
||||
if isinstance(node.ast_, ast.Expression):
|
||||
context.log(self.verbose_log, "Evaluating using 'eval'.", self.name)
|
||||
compiled = compile(node.ast_, "<string>", "eval")
|
||||
evaluated = eval(compiled, {}, my_locals)
|
||||
else:
|
||||
context.log(self.verbose_log, "Evaluating using 'exec'.", self.name)
|
||||
evaluated = self.exec_with_return(node.ast_, my_locals)
|
||||
|
||||
context.log(self.verbose_log, f"{evaluated=}", self.name)
|
||||
return sheerka.ret(self.name, True, evaluated, parents=[return_value])
|
||||
except Exception as error:
|
||||
context.log_error(self.verbose_log, error, self.name)
|
||||
error = sheerka.new(BuiltinConcepts.ERROR, body=error)
|
||||
return sheerka.ret(self.name, False, error, parents=[return_value])
|
||||
|
||||
def get_locals(self, context, node):
|
||||
my_locals = {
|
||||
"sheerka": context.sheerka,
|
||||
"desc": context.sheerka.dump_handler.dump_desc,
|
||||
"concepts": context.sheerka.dump_handler.dump_concepts,
|
||||
"definitions": context.sheerka.dump_handler.dump_definitions,
|
||||
}
|
||||
if context.obj:
|
||||
context.log(self.verbose_log,
|
||||
f"Concept '{context.obj}' is in context. Adding its properties to locals if any.", self.name)
|
||||
|
||||
for prop_name, prop_value in context.obj.props.items():
|
||||
if not isinstance(prop_value.value, Concept):
|
||||
my_locals[prop_name] = prop_value.value
|
||||
else:
|
||||
my_locals[prop_name] = context.sheerka.value(prop_value.value)
|
||||
|
||||
node_concept = core.ast.nodes.python_to_concept(node.ast_)
|
||||
unreferenced_names_visitor = UnreferencedNamesVisitor(context.sheerka)
|
||||
unreferenced_names_visitor.visit(node_concept)
|
||||
|
||||
for name in unreferenced_names_visitor.names:
|
||||
context.log(self.verbose_log, f"Resolving '{name}'.", self.name)
|
||||
|
||||
if name in node.concepts:
|
||||
context.log(self.verbose_log, f"Using value from node.", self.name)
|
||||
concept = node.concepts[name]
|
||||
return_concept = False
|
||||
|
||||
else:
|
||||
concept_key, concept_id, return_concept = self.resolve_name(context, name)
|
||||
|
||||
if concept_key in my_locals:
|
||||
context.log(self.verbose_log, f"Using value from property.", self.name)
|
||||
continue
|
||||
|
||||
context.log(self.verbose_log, f"Instantiating new concept.", self.name)
|
||||
concept = context.sheerka.new((concept_key, concept_id))
|
||||
if context.sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
context.log(self.verbose_log, f"'{concept_key}' is not a concept. Skipping.", self.name)
|
||||
continue
|
||||
|
||||
context.log(self.verbose_log, f"Evaluating '{concept}'", self.name)
|
||||
with context.push(self.name, desc=f"Evaluating '{concept}'", obj=concept) as sub_context:
|
||||
sub_context.log_new(self.verbose_log)
|
||||
evaluated = context.sheerka.evaluate_concept(sub_context, concept, self.verbose_log)
|
||||
sub_context.add_values(return_values=evaluated)
|
||||
|
||||
if evaluated.key == concept.key:
|
||||
my_locals[name] = evaluated if return_concept else context.sheerka.value(evaluated)
|
||||
|
||||
if self.locals:
|
||||
my_locals.update(self.locals)
|
||||
|
||||
return my_locals
|
||||
|
||||
def resolve_name(self, context, to_resolve):
|
||||
"""
|
||||
Try to match
|
||||
__C__concept_key__C__
|
||||
or
|
||||
__C__concept_key__concept_id__C__
|
||||
|
||||
:param context:
|
||||
:param to_resolve:
|
||||
:return:
|
||||
"""
|
||||
if not to_resolve.startswith("__C__"):
|
||||
return to_resolve, None, False
|
||||
|
||||
context.log(self.verbose_log, f"Resolving name '{to_resolve}'.", self.name)
|
||||
|
||||
if len(to_resolve) >= 18 and to_resolve[:18] == "__C__USE_CONCEPT__":
|
||||
use_concept = True
|
||||
index = 18
|
||||
else:
|
||||
use_concept = False
|
||||
index = 5
|
||||
|
||||
try:
|
||||
next_index = to_resolve.index("__", index)
|
||||
if next_index == index:
|
||||
context.log(self.verbose_log, f"Error: no key between '__'.", self.name)
|
||||
return None
|
||||
concept_key = to_resolve[index: next_index]
|
||||
except ValueError:
|
||||
context.log(self.verbose_log, f"Error: Missing trailing '__'.", self.name)
|
||||
return None
|
||||
|
||||
if next_index == len(to_resolve) - 5:
|
||||
context.log(self.verbose_log, f"Recognized concept '{concept_key}'", self.name)
|
||||
return concept_key, None, use_concept
|
||||
|
||||
index = next_index + 2
|
||||
try:
|
||||
next_index = to_resolve.index("__", index)
|
||||
if next_index == index:
|
||||
context.log(self.verbose_log, f"Error: no id between '__'.", self.name)
|
||||
return None
|
||||
|
||||
concept_id = to_resolve[index: next_index]
|
||||
except ValueError:
|
||||
context.log(self.verbose_log, f"Recognized concept '{concept_key}'.", self.name)
|
||||
return concept_key, None, use_concept
|
||||
|
||||
context.log(self.verbose_log, f"Recognized concept '{concept_key}' (id='{concept_id}').", self.name)
|
||||
return concept_key, concept_id, use_concept
|
||||
|
||||
@staticmethod
|
||||
def expr_to_expression(expr):
|
||||
expr.lineno = 0
|
||||
expr.col_offset = 0
|
||||
result = ast.Expression(expr.value, lineno=0, col_offset=0)
|
||||
|
||||
return result
|
||||
|
||||
def exec_with_return(self, code_ast, my_locals):
|
||||
|
||||
init_ast = copy.deepcopy(code_ast)
|
||||
init_ast.body = code_ast.body[:-1]
|
||||
|
||||
last_ast = copy.deepcopy(code_ast)
|
||||
last_ast.body = code_ast.body[-1:]
|
||||
|
||||
exec(compile(init_ast, "<ast>", "exec"), {}, my_locals)
|
||||
if type(last_ast.body[0]) == ast.Expr:
|
||||
return eval(compile(self.expr_to_expression(last_ast.body[0]), "<ast>", "eval"), {}, my_locals)
|
||||
else:
|
||||
exec(compile(last_ast, "<ast>", "exec"), {}, my_locals)
|
||||
@@ -0,0 +1,54 @@
|
||||
import logging
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
import core.builtin_helpers
|
||||
from evaluators.BaseEvaluator import AllReturnValuesEvaluator, BaseEvaluator
|
||||
from parsers.BaseParser import BaseParser
|
||||
|
||||
|
||||
class TooManySuccessEvaluator(AllReturnValuesEvaluator):
|
||||
"""
|
||||
Used to filter the responses
|
||||
It has a low priority to let other evaluators try to resolve the errors
|
||||
|
||||
Raises an error when that are several successful answers, with different values
|
||||
"""
|
||||
|
||||
NAME = "TooManySuccess"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(self.NAME, [BuiltinConcepts.AFTER_EVALUATION], 60)
|
||||
self.success = []
|
||||
|
||||
def matches(self, context, return_values):
|
||||
to_process = False
|
||||
|
||||
for ret in return_values:
|
||||
if ret.status and ret.who.startswith(BaseParser.PREFIX):
|
||||
return False
|
||||
elif ret.status and context.sheerka.isinstance(ret.body, BuiltinConcepts.REDUCE_REQUESTED):
|
||||
to_process = True
|
||||
self.eaten.append(ret)
|
||||
elif ret.status and ret.who.startswith(self.PREFIX):
|
||||
self.success.append(ret)
|
||||
self.eaten.append(ret)
|
||||
elif not ret.status:
|
||||
self.eaten.append(ret)
|
||||
|
||||
return to_process and len(self.success) > 1
|
||||
|
||||
def eval(self, context, return_values):
|
||||
sheerka = context.sheerka
|
||||
if self.verbose_log.isEnabledFor(logging.DEBUG):
|
||||
for s in self.success:
|
||||
context.log(self.verbose_log, s, self.name)
|
||||
context.log(self.verbose_log, f"value={sheerka.value(s.value)}", self.name)
|
||||
|
||||
if not core.builtin_helpers.is_same_success(sheerka, self.success):
|
||||
context.log(self.verbose_log,
|
||||
f"Values are different. Raising {BuiltinConcepts.TOO_MANY_SUCCESS}.", self.name)
|
||||
too_many_success = sheerka.new(BuiltinConcepts.TOO_MANY_SUCCESS, body=self.success)
|
||||
return sheerka.ret(self.name, False, too_many_success, parents=self.eaten)
|
||||
|
||||
context.log(self.verbose_log, f"Values are the same. Nothing to do.", self.name)
|
||||
return None
|
||||
@@ -0,0 +1,120 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept
|
||||
from core.tokenizer import TokenKind, Keywords
|
||||
from core.sheerka_logger import get_logger
|
||||
import logging
|
||||
|
||||
|
||||
@dataclass()
|
||||
class Node:
|
||||
pass
|
||||
|
||||
|
||||
@dataclass()
|
||||
class NopNode(Node):
|
||||
pass
|
||||
|
||||
def __repr__(self):
|
||||
return "nop"
|
||||
|
||||
|
||||
class NotInitializedNode(Node):
|
||||
pass
|
||||
|
||||
def __repr__(self):
|
||||
return "**N/A**"
|
||||
|
||||
|
||||
@dataclass()
|
||||
class ErrorNode(Node):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass()
|
||||
class UnexpectedTokenErrorNode(ErrorNode):
|
||||
message: str
|
||||
expected_tokens: list
|
||||
|
||||
|
||||
class BaseParser:
|
||||
PREFIX = "parsers."
|
||||
|
||||
def __init__(self, name, priority: int, enabled=True):
|
||||
self.log = get_logger("parsers." + self.__class__.__name__)
|
||||
self.init_log = get_logger("init." + self.PREFIX + self.__class__.__name__)
|
||||
self.verbose_log = get_logger("verbose." + self.PREFIX + self.__class__.__name__)
|
||||
|
||||
self.name = self.PREFIX + name
|
||||
self.priority = priority
|
||||
self.enabled = enabled
|
||||
|
||||
self.has_error = False
|
||||
self.error_sink = []
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, self.__class__):
|
||||
return False
|
||||
return self.name == other.name
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.name)
|
||||
|
||||
def __repr__(self):
|
||||
return self.name
|
||||
|
||||
def parse(self, context, text):
|
||||
pass
|
||||
|
||||
def log_result(self, context, source, ret):
|
||||
if not self.log.isEnabledFor(logging.DEBUG):
|
||||
return
|
||||
|
||||
if ret.status:
|
||||
value = context.return_value_to_str(ret)
|
||||
context.log(self.log, f"Recognized '{source}' as {value}", self.name)
|
||||
else:
|
||||
context.log(self.log, f"Failed to recognize '{source}'", self.name)
|
||||
|
||||
def log_multiple_results(self, context, source, list_of_ret):
|
||||
if not self.log.isEnabledFor(logging.DEBUG):
|
||||
return
|
||||
|
||||
context.log(self.log, f"Recognized '{source}' as multiple concepts", self.name)
|
||||
for r in list_of_ret:
|
||||
value = context.return_value_to_str(r)
|
||||
context.log(self.log, f" Recognized '{value}'", self.name)
|
||||
|
||||
def get_return_value_body(self, sheerka, source, tree, try_parse):
|
||||
if len(self.error_sink) == 1 and isinstance(self.error_sink[0], Concept):
|
||||
return self.error_sink[0]
|
||||
|
||||
return sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=source,
|
||||
body=self.error_sink if self.has_error else tree,
|
||||
try_parsed=try_parse)
|
||||
|
||||
@staticmethod
|
||||
def get_text_from_tokens(tokens, custom_switcher=None):
|
||||
if tokens is None:
|
||||
return ""
|
||||
res = ""
|
||||
|
||||
if not hasattr(tokens, "__iter__"):
|
||||
tokens = [tokens]
|
||||
|
||||
switcher = {
|
||||
TokenKind.KEYWORD: lambda t: Keywords(t.value).value,
|
||||
TokenKind.CONCEPT: lambda t: "c:" + t.value + ":",
|
||||
}
|
||||
|
||||
if custom_switcher:
|
||||
switcher.update(custom_switcher)
|
||||
|
||||
for token in tokens:
|
||||
value = switcher.get(token.type, lambda t: t.value)(token)
|
||||
res += value
|
||||
return res
|
||||
@@ -0,0 +1,270 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
import core.utils
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.sheerka.Sheerka import ExecutionContext
|
||||
from core.tokenizer import Tokenizer, Token, TokenKind, LexerError
|
||||
from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode
|
||||
from parsers.ConceptLexerParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, StrMatch
|
||||
|
||||
|
||||
@dataclass()
|
||||
class UnexpectedEndOfFileError(ErrorNode):
|
||||
pass
|
||||
|
||||
|
||||
class BnfParser(BaseParser):
|
||||
"""
|
||||
Parser used to transform litteral into ParsingExpression
|
||||
example :
|
||||
a | b, c -> Sequence(OrderedChoice(a, b) ,c)
|
||||
|
||||
'|' (pipe) is used for OrderedChoice
|
||||
',' (comma) is used for Sequence
|
||||
'?' (question mark) is used for Optional
|
||||
'*' (star) is used for ZeroOrMore
|
||||
'+' (plus) is used for OneOrMore
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("Bnf", 50, False)
|
||||
# self.has_error = False
|
||||
# self.error_sink = []
|
||||
# self.name = BaseParser.PREFIX + "Bnf"
|
||||
|
||||
self.lexer_iter = None
|
||||
self._current = None
|
||||
self.after_current = None
|
||||
self.nb_open_par = 0
|
||||
self.context = None
|
||||
self.source = ""
|
||||
self.sheerka = None
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, BnfParser):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def reset_parser(self, context, text):
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
|
||||
self.lexer_iter = iter(Tokenizer(text.strip())) if isinstance(text, str) else iter(text)
|
||||
self._current = None
|
||||
self.after_current = None
|
||||
self.nb_open_par = 0
|
||||
|
||||
self.next_token()
|
||||
self.eat_white_space()
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.has_error = True
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.next_token()
|
||||
return error
|
||||
|
||||
def get_token(self) -> Token:
|
||||
return self._current
|
||||
|
||||
def next_token(self, skip_whitespace=False):
|
||||
if self._current and self._current.type == TokenKind.EOF:
|
||||
return
|
||||
|
||||
try:
|
||||
self._current = self.after_current or next(self.lexer_iter)
|
||||
self.source += str(self._current.value)
|
||||
self.after_current = None
|
||||
|
||||
if skip_whitespace:
|
||||
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
|
||||
self._current = next(self.lexer_iter)
|
||||
self.source += str(self._current.value)
|
||||
except StopIteration:
|
||||
self._current = Token(TokenKind.EOF, "", -1, -1, -1)
|
||||
|
||||
def next_after(self):
|
||||
if self.after_current is not None:
|
||||
return self.after_current
|
||||
|
||||
try:
|
||||
self.after_current = next(self.lexer_iter)
|
||||
# self.source += str(self.after_current.value)
|
||||
return self.after_current
|
||||
except StopIteration:
|
||||
self.after_current = Token(TokenKind.EOF, "", -1, -1, -1)
|
||||
return self.after_current
|
||||
|
||||
def eat_white_space(self):
|
||||
if self.after_current is not None:
|
||||
self._current = self.after_current
|
||||
self.source += str(self._current.value)
|
||||
self.after_current = None
|
||||
|
||||
try:
|
||||
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
|
||||
self._current = next(self.lexer_iter)
|
||||
self.source += str(self._current.value)
|
||||
except StopIteration:
|
||||
self._current = None
|
||||
|
||||
def maybe_sequence(self, first, second):
|
||||
token = self.get_token()
|
||||
return token.type == second or token.type == first and self.next_after().type == second
|
||||
|
||||
def parse(self, context: ExecutionContext, text):
|
||||
|
||||
tree = None
|
||||
try:
|
||||
self.reset_parser(context, text)
|
||||
tree = self.parser_outer_rule_name()
|
||||
|
||||
token = self.get_token()
|
||||
if token and token.type != TokenKind.EOF:
|
||||
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", []))
|
||||
except LexerError as e:
|
||||
self.add_error(e, False)
|
||||
|
||||
value = self.get_return_value_body(context.sheerka, self.source, tree, tree)
|
||||
|
||||
ret = self.sheerka.ret(
|
||||
self.name,
|
||||
not self.has_error,
|
||||
value)
|
||||
|
||||
return ret
|
||||
|
||||
def parser_outer_rule_name(self):
|
||||
return self.parser_rule_name(self.parse_choice)
|
||||
|
||||
def parse_choice(self):
|
||||
sequence = self.parse_sequence()
|
||||
|
||||
self.eat_white_space()
|
||||
token = self.get_token()
|
||||
if token is None or token.type != TokenKind.VBAR:
|
||||
return sequence
|
||||
|
||||
elements = [sequence]
|
||||
while True:
|
||||
# maybe eat the vertical bar
|
||||
self.eat_white_space()
|
||||
token = self.get_token()
|
||||
if token is None or token.type != TokenKind.VBAR:
|
||||
break
|
||||
self.next_token(skip_whitespace=True)
|
||||
|
||||
sequence = self.parse_sequence()
|
||||
elements.append(sequence)
|
||||
|
||||
return OrderedChoice(*elements)
|
||||
|
||||
def parse_sequence(self):
|
||||
expr_and_modifier = self.parse_modifier()
|
||||
token = self.get_token()
|
||||
if token is None or \
|
||||
token.type == TokenKind.EOF or \
|
||||
token.type == TokenKind.EQUALS or \
|
||||
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
|
||||
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
|
||||
return expr_and_modifier
|
||||
|
||||
elements = [expr_and_modifier]
|
||||
while True:
|
||||
token = self.get_token()
|
||||
if token is None or \
|
||||
token.type == TokenKind.EOF or \
|
||||
token.type == TokenKind.EQUALS or \
|
||||
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
|
||||
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
|
||||
break
|
||||
self.eat_white_space()
|
||||
|
||||
sequence = self.parse_modifier()
|
||||
elements.append(sequence)
|
||||
|
||||
return Sequence(*elements)
|
||||
|
||||
def parse_modifier(self):
|
||||
expression = self.parser_inner_rule_name()
|
||||
|
||||
token = self.get_token()
|
||||
|
||||
if token.type == TokenKind.QMARK:
|
||||
self.next_token()
|
||||
return Optional(expression)
|
||||
|
||||
if token.type == TokenKind.STAR:
|
||||
self.next_token()
|
||||
return ZeroOrMore(expression)
|
||||
|
||||
if token.type == TokenKind.PLUS:
|
||||
self.next_token()
|
||||
return OneOrMore(expression)
|
||||
|
||||
return expression
|
||||
|
||||
def parser_inner_rule_name(self):
|
||||
return self.parser_rule_name(self.parse_expression)
|
||||
|
||||
def parse_expression(self):
|
||||
token = self.get_token()
|
||||
if token.type == TokenKind.EOF:
|
||||
self.add_error(UnexpectedEndOfFileError(), False)
|
||||
if token.type == TokenKind.LPAR:
|
||||
self.nb_open_par += 1
|
||||
self.next_token()
|
||||
expression = self.parse_choice()
|
||||
token = self.get_token()
|
||||
if token.type == TokenKind.RPAR:
|
||||
self.nb_open_par -= 1
|
||||
self.next_token()
|
||||
return expression
|
||||
else:
|
||||
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", [TokenKind.RPAR]))
|
||||
return expression
|
||||
|
||||
if token.type == TokenKind.IDENTIFIER:
|
||||
self.next_token()
|
||||
|
||||
concept_name = str(token.value)
|
||||
|
||||
# we are trying to match against a concept which is still under construction !
|
||||
# (for example of recursive bnf definition)
|
||||
if self.context.obj and hasattr(self.context.obj, "name"):
|
||||
if concept_name == str(self.context.obj.name):
|
||||
return ConceptExpression(concept_name)
|
||||
|
||||
concept = self.context.get_concept(concept_name)
|
||||
if not self.sheerka.is_known(concept):
|
||||
self.add_error(concept)
|
||||
return None
|
||||
elif hasattr(concept, "__iter__"):
|
||||
self.add_error(
|
||||
self.sheerka.new(BuiltinConcepts.CANNOT_RESOLVE_CONCEPT,
|
||||
body=("key", concept_name)))
|
||||
return None
|
||||
else:
|
||||
return concept
|
||||
|
||||
ret = StrMatch(core.utils.strip_quotes(token.value))
|
||||
self.next_token()
|
||||
return ret
|
||||
|
||||
def parser_rule_name(self, next_to_parse):
|
||||
expression = next_to_parse()
|
||||
token = self.get_token()
|
||||
if token is None or token.type != TokenKind.EQUALS:
|
||||
return expression
|
||||
|
||||
self.next_token() # eat equals
|
||||
token = self.get_token()
|
||||
|
||||
if token is None or token.type != TokenKind.IDENTIFIER:
|
||||
return self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", [TokenKind.IDENTIFIER]))
|
||||
|
||||
expression.rule_name = token.value
|
||||
self.next_token()
|
||||
return expression
|
||||
@@ -0,0 +1,994 @@
|
||||
#####################################################################################################
|
||||
# This implementation of the parser is highly inspired by the arpeggio project (https://github.com/textX/Arpeggio)
|
||||
# I don't directly use the project, but it helped me figure out
|
||||
# what to do.
|
||||
# Dejanović I., Milosavljević G., Vaderna R.:
|
||||
# Arpeggio: A flexible PEG parser for Python,
|
||||
# Knowledge-Based Systems, 2016, 95, 71 - 74, doi:10.1016/j.knosys.2015.12.004
|
||||
#####################################################################################################
|
||||
from collections import namedtuple
|
||||
from dataclasses import dataclass
|
||||
from collections import defaultdict
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept, ConceptParts, DoNotResolve
|
||||
from core.tokenizer import TokenKind, Tokenizer, Token
|
||||
from parsers.BaseParser import BaseParser, Node, ErrorNode
|
||||
import core.utils
|
||||
|
||||
|
||||
@dataclass()
|
||||
class LexerNode(Node):
|
||||
start: int # starting index in the tokens list
|
||||
end: int # ending index in the tokens list
|
||||
tokens: list = None # tokens
|
||||
source: str = None # string representation of what was parsed
|
||||
|
||||
def __post_init__(self):
|
||||
if self.source is None:
|
||||
self.source = BaseParser.get_text_from_tokens(self.tokens)
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, LexerNode):
|
||||
return False
|
||||
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source and \
|
||||
self.tokens == other.tokens
|
||||
|
||||
|
||||
class UnrecognizedTokensNode(LexerNode):
|
||||
def __init__(self, start, end, tokens):
|
||||
super().__init__(start, end, tokens)
|
||||
|
||||
def add_token(self, token, pos):
|
||||
self.tokens.append(token)
|
||||
self.end = pos
|
||||
|
||||
def fix_source(self):
|
||||
self.source = BaseParser.get_text_from_tokens(self.tokens)
|
||||
|
||||
def not_whitespace(self):
|
||||
return not (len(self.tokens) == 1 and self.tokens[0].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE))
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, utnode):
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if not isinstance(other, UnrecognizedTokensNode):
|
||||
return False
|
||||
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.start, self.end, self.source))
|
||||
|
||||
def __repr__(self):
|
||||
return f"UnrecognizedTokensNode(start={self.start}, end={self.end}, source='{self.source}')"
|
||||
|
||||
|
||||
class ConceptNode(LexerNode):
|
||||
"""
|
||||
Returned by the ConceptLexerParser
|
||||
It represents a recognized concept
|
||||
"""
|
||||
|
||||
def __init__(self, concept, start, end, tokens=None, source=None, underlying=None):
|
||||
super().__init__(start, end, tokens, source)
|
||||
self.concept = concept
|
||||
self.underlying = underlying
|
||||
|
||||
if self.source is None:
|
||||
self.source = BaseParser.get_text_from_tokens(self.tokens)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, cnode):
|
||||
return self.concept.key == other.concept_key and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if isinstance(other, short_cnode):
|
||||
return self.concept.key == other.concept_key and self.source == other.source
|
||||
|
||||
if not isinstance(other, ConceptNode):
|
||||
return False
|
||||
|
||||
return self.concept == other.concept and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source and \
|
||||
self.underlying == other.underlying
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.concept, self.start, self.end, self.source, self.underlying))
|
||||
|
||||
def __repr__(self):
|
||||
return f"ConceptNode(concept='{self.concept}', start={self.start}, end={self.end}, source='{self.source}')"
|
||||
|
||||
|
||||
class SourceCodeNode(LexerNode):
|
||||
"""
|
||||
Returned when some source code (like Python source code is recognized)
|
||||
"""
|
||||
|
||||
def __init__(self, node, start, end, tokens=None, source=None):
|
||||
super().__init__(start, end, tokens, source)
|
||||
self.node = node # The PythonNode (or whatever language node) that is found
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, scnode):
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if not isinstance(other, SourceCodeNode):
|
||||
return False
|
||||
|
||||
return self.node == other.node and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.start, self.end, self.source))
|
||||
|
||||
def __repr__(self):
|
||||
return f"SourceCodeNode(start={self.start}, end={self.end}, source='{self.source}')"
|
||||
|
||||
|
||||
cnode = namedtuple("ConceptNode", "concept_key start end source")
|
||||
short_cnode = namedtuple("ConceptNode", "concept_key source")
|
||||
utnode = namedtuple("UnrecognizedTokensNode", "start end source")
|
||||
scnode = namedtuple("SourceCodeNode", "start end source")
|
||||
|
||||
|
||||
class NonTerminalNode(LexerNode):
|
||||
"""
|
||||
Returned by the ConceptLexerParser
|
||||
"""
|
||||
|
||||
def __init__(self, parsing_expression, start, end, tokens, children=None):
|
||||
super().__init__(start, end, tokens)
|
||||
self.parsing_expression = parsing_expression
|
||||
self.children = children
|
||||
|
||||
def __repr__(self):
|
||||
name = self.parsing_expression.rule_name or self.parsing_expression.__class__.__name__
|
||||
if len(self.children) > 0:
|
||||
sub_names = "(" + ",".join([repr(child) for child in self.children]) + ")"
|
||||
else:
|
||||
sub_names = ""
|
||||
return name + sub_names
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, NonTerminalNode):
|
||||
return False
|
||||
|
||||
return self.parsing_expression == other.parsing_expression and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.children == other.children
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.parsing_expression, self.start, self.end, self.children))
|
||||
|
||||
|
||||
class TerminalNode(LexerNode):
|
||||
"""
|
||||
Returned by the ConceptLexerParser
|
||||
"""
|
||||
|
||||
def __init__(self, parsing_expression, start, end, value):
|
||||
super().__init__(start, end, source=value)
|
||||
self.parsing_expression = parsing_expression
|
||||
self.value = value
|
||||
|
||||
def __repr__(self):
|
||||
name = self.parsing_expression.rule_name or ""
|
||||
return name + f"'{self.value}'"
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, TerminalNode):
|
||||
return False
|
||||
|
||||
return self.parsing_expression == other.parsing_expression and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.value == other.value
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.parsing_expression, self.start, self.end, self.value))
|
||||
|
||||
|
||||
@dataclass()
|
||||
class GrammarErrorNode(ErrorNode):
|
||||
message: str
|
||||
|
||||
|
||||
@dataclass()
|
||||
class UnknownConceptNode(ErrorNode):
|
||||
concept_key: str
|
||||
|
||||
|
||||
@dataclass()
|
||||
class TooManyConceptNode(ErrorNode):
|
||||
concept_key: str
|
||||
|
||||
|
||||
class ParsingExpression:
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.elements = args
|
||||
|
||||
nodes = kwargs.get('nodes', [])
|
||||
if not hasattr(nodes, '__iter__'):
|
||||
nodes = [nodes]
|
||||
self.nodes = nodes
|
||||
|
||||
self.rule_name = kwargs.get('rule_name', '')
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, ParsingExpression):
|
||||
return False
|
||||
|
||||
return self.rule_name == other.rule_name and self.elements == other.elements
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.rule_name, self.elements))
|
||||
|
||||
def parse(self, parser):
|
||||
return self._parse(parser)
|
||||
|
||||
|
||||
class ConceptExpression(ParsingExpression):
|
||||
"""
|
||||
Will match a concept
|
||||
It used only for rule definition
|
||||
|
||||
When the grammar is created, it is replaced by the actual concept
|
||||
"""
|
||||
|
||||
def __init__(self, concept, rule_name=""):
|
||||
super().__init__(rule_name=rule_name)
|
||||
self.concept = concept
|
||||
|
||||
def __repr__(self):
|
||||
return f"{self.concept}"
|
||||
|
||||
def __eq__(self, other):
|
||||
if not super().__eq__(other):
|
||||
return False
|
||||
|
||||
if not isinstance(other, ConceptExpression):
|
||||
return False
|
||||
|
||||
if isinstance(self.concept, Concept):
|
||||
return self.concept.name == other.concept.name
|
||||
|
||||
# when it's only the name of the concept
|
||||
return self.concept == other.concept
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.concept, self.rule_name))
|
||||
|
||||
@staticmethod
|
||||
def get_parsing_expression_from_name(name):
|
||||
tokens = Tokenizer(name)
|
||||
nodes = [StrMatch(core.utils.strip_quotes(token.value)) for token in list(tokens)[:-1]]
|
||||
if len(nodes) == 1:
|
||||
return nodes[0]
|
||||
else:
|
||||
sequence = Sequence(nodes)
|
||||
sequence.nodes = nodes
|
||||
return sequence
|
||||
|
||||
def _parse(self, parser):
|
||||
to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
|
||||
if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
return None
|
||||
|
||||
self.concept = to_match # Memoize
|
||||
|
||||
if to_match not in parser.concepts_grammars:
|
||||
# Try to match the concept using its name
|
||||
expr = self.get_parsing_expression_from_name(to_match.name)
|
||||
node = expr.parse(parser)
|
||||
else:
|
||||
node = parser.concepts_grammars[to_match].parse(parser)
|
||||
|
||||
if node is None:
|
||||
return None
|
||||
|
||||
return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
|
||||
|
||||
|
||||
class ConceptGroupExpression(ConceptExpression):
|
||||
def _parse(self, parser):
|
||||
to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
|
||||
if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
return None
|
||||
|
||||
self.concept = to_match # Memoize
|
||||
|
||||
if to_match not in parser.concepts_grammars:
|
||||
concepts_in_group = parser.sheerka.get_set_elements(self.concept)
|
||||
nodes = [ConceptExpression(c, rule_name=c.name) for c in concepts_in_group]
|
||||
expr = OrderedChoice(nodes)
|
||||
expr.nodes = nodes
|
||||
node = expr.parse(parser)
|
||||
else:
|
||||
node = parser.concepts_grammars[to_match].parse(parser)
|
||||
|
||||
if node is None:
|
||||
return None
|
||||
|
||||
return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
|
||||
|
||||
|
||||
class Sequence(ParsingExpression):
|
||||
"""
|
||||
Will match sequence of parser expressions in exact order they are defined.
|
||||
"""
|
||||
|
||||
def _parse(self, parser):
|
||||
init_pos = parser.pos
|
||||
end_pos = parser.pos
|
||||
|
||||
children = []
|
||||
for e in self.nodes:
|
||||
node = e.parse(parser)
|
||||
if node is None:
|
||||
return None
|
||||
else:
|
||||
if node.end != -1: # because returns -1 when no match
|
||||
children.append(node)
|
||||
end_pos = node.end
|
||||
|
||||
return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
|
||||
|
||||
def __repr__(self):
|
||||
to_str = ", ".join(repr(n) for n in self.elements)
|
||||
return f"({to_str})"
|
||||
|
||||
|
||||
class OrderedChoice(ParsingExpression):
|
||||
"""
|
||||
Will match one among multiple
|
||||
It will stop at the first match (so the order of definition is important)
|
||||
"""
|
||||
|
||||
def _parse(self, parser):
|
||||
init_pos = parser.pos
|
||||
|
||||
for e in self.nodes:
|
||||
node = e.parse(parser)
|
||||
if node:
|
||||
return NonTerminalNode(self, init_pos, node.end, parser.tokens[init_pos: node.end + 1], [node])
|
||||
|
||||
parser.seek(init_pos) # backtrack
|
||||
|
||||
return None
|
||||
|
||||
def __repr__(self):
|
||||
to_str = "| ".join(repr(n) for n in self.elements)
|
||||
return f"({to_str})"
|
||||
|
||||
|
||||
class Optional(ParsingExpression):
|
||||
"""
|
||||
Will match or not the elements
|
||||
if many matches, will choose longest one
|
||||
If you need order, use Optional(OrderedChoice)
|
||||
"""
|
||||
|
||||
def _parse(self, parser):
|
||||
init_pos = parser.pos
|
||||
selected_node = NonTerminalNode(self, parser.pos, -1, [], []) # means that nothing is found
|
||||
|
||||
for e in self.nodes:
|
||||
node = e.parse(parser)
|
||||
if node:
|
||||
if node.end > selected_node.end:
|
||||
selected_node = NonTerminalNode(
|
||||
self,
|
||||
node.start,
|
||||
node.end,
|
||||
parser.tokens[node.start: node.end + 1],
|
||||
[node])
|
||||
|
||||
parser.seek(init_pos) # backtrack
|
||||
|
||||
if selected_node.end != -1:
|
||||
parser.seek(selected_node.end)
|
||||
parser.next_token() # eat the tokens found
|
||||
|
||||
return selected_node
|
||||
|
||||
def __repr__(self):
|
||||
if len(self.elements) == 1:
|
||||
return f"{self.elements[0]}?"
|
||||
else:
|
||||
to_str = ", ".join(repr(n) for n in self.elements)
|
||||
return f"({to_str})?"
|
||||
|
||||
|
||||
class Repetition(ParsingExpression):
|
||||
"""
|
||||
Base class for all repetition-like parser expressions (?,*,+)
|
||||
Args:
|
||||
eolterm(bool): Flag that indicates that end of line should
|
||||
terminate repetition match.
|
||||
"""
|
||||
|
||||
def __init__(self, *elements, **kwargs):
|
||||
super(Repetition, self).__init__(*elements, **kwargs)
|
||||
self.sep = kwargs.get('sep', None)
|
||||
|
||||
|
||||
class ZeroOrMore(Repetition):
|
||||
"""
|
||||
ZeroOrMore will try to match parser expression specified zero or more
|
||||
times. It will never fail.
|
||||
"""
|
||||
|
||||
def _parse(self, parser):
|
||||
init_pos = parser.pos
|
||||
end_pos = -1
|
||||
children = []
|
||||
|
||||
while True:
|
||||
current_pos = parser.pos
|
||||
|
||||
# maybe eat the separator if needed
|
||||
if self.sep and children:
|
||||
sep_result = self.sep.parse(parser)
|
||||
if sep_result is None:
|
||||
parser.seek(current_pos)
|
||||
break
|
||||
|
||||
# eat the ZeroOrMore
|
||||
node = self.nodes[0].parse(parser)
|
||||
if node is None:
|
||||
parser.seek(current_pos)
|
||||
break
|
||||
else:
|
||||
if node.end != -1: # because returns -1 when no match
|
||||
children.append(node)
|
||||
end_pos = node.end
|
||||
|
||||
if len(children) == 0:
|
||||
return NonTerminalNode(self, init_pos, -1, [], [])
|
||||
|
||||
return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
|
||||
|
||||
def __repr__(self):
|
||||
to_str = ", ".join(repr(n) for n in self.elements)
|
||||
return f"({to_str})*"
|
||||
|
||||
|
||||
class OneOrMore(Repetition):
|
||||
"""
|
||||
OneOrMore will try to match parser expression specified one or more times.
|
||||
"""
|
||||
|
||||
def _parse(self, parser):
|
||||
init_pos = parser.pos
|
||||
end_pos = -1
|
||||
children = []
|
||||
|
||||
while True:
|
||||
current_pos = parser.pos
|
||||
|
||||
# maybe eat the separator if needed
|
||||
if self.sep and children:
|
||||
sep_result = self.sep.parse(parser)
|
||||
if sep_result is None:
|
||||
parser.seek(current_pos)
|
||||
break
|
||||
|
||||
# eat the ZeroOrMore
|
||||
node = self.nodes[0].parse(parser)
|
||||
if node is None:
|
||||
parser.seek(current_pos)
|
||||
break
|
||||
else:
|
||||
if node.end != -1: # because returns -1 when no match
|
||||
children.append(node)
|
||||
end_pos = node.end
|
||||
|
||||
if len(children) == 0: # if nothing is found, it's an error
|
||||
return None
|
||||
|
||||
return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
|
||||
|
||||
def __repr__(self):
|
||||
to_str = ", ".join(repr(n) for n in self.elements)
|
||||
return f"({to_str})+"
|
||||
|
||||
|
||||
class UnorderedGroup(Repetition):
|
||||
"""
|
||||
Will try to match all of the parsing expression in any order.
|
||||
"""
|
||||
|
||||
def _parse(self, parser):
|
||||
raise NotImplementedError()
|
||||
|
||||
# def __repr__(self):
|
||||
# to_str = ", ".join(repr(n) for n in self.elements)
|
||||
# return f"({to_str})#"
|
||||
|
||||
|
||||
class Match(ParsingExpression):
|
||||
"""
|
||||
Base class for all classes that will try to match something from the input.
|
||||
"""
|
||||
|
||||
def __init__(self, rule_name, root=False):
|
||||
super(Match, self).__init__(rule_name=rule_name, root=root)
|
||||
|
||||
def parse(self, parser):
|
||||
result = self._parse(parser)
|
||||
return result
|
||||
|
||||
|
||||
class StrMatch(Match):
|
||||
"""
|
||||
Matches a literal
|
||||
"""
|
||||
|
||||
def __init__(self, to_match, rule_name="", root=False, ignore_case=True):
|
||||
super(Match, self).__init__(rule_name=rule_name, root=root)
|
||||
self.to_match = to_match
|
||||
self.ignore_case = ignore_case
|
||||
|
||||
def __repr__(self):
|
||||
return f"'{self.to_match}'"
|
||||
|
||||
def __eq__(self, other):
|
||||
if not super().__eq__(other):
|
||||
return False
|
||||
|
||||
if not isinstance(other, StrMatch):
|
||||
return False
|
||||
|
||||
return self.to_match == other.to_match and self.ignore_case == other.ignore_case
|
||||
|
||||
def _parse(self, parser):
|
||||
token = parser.get_token()
|
||||
m = str(token.value).lower() == self.to_match.lower() if self.ignore_case \
|
||||
else token.value == self.to_match
|
||||
|
||||
if m:
|
||||
node = TerminalNode(self, parser.pos, parser.pos, token.value)
|
||||
parser.next_token()
|
||||
return node
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class ConceptLexerParser(BaseParser):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("ConceptLexer", 50)
|
||||
if 'grammars' in kwargs:
|
||||
self.concepts_grammars = kwargs.get("grammars")
|
||||
elif 'sheerka' in kwargs:
|
||||
self.concepts_grammars = kwargs.get("sheerka").concepts_grammars
|
||||
else:
|
||||
self.concepts_grammars = {}
|
||||
|
||||
self.ignore_case = True
|
||||
|
||||
self.token = None
|
||||
self.pos = -1
|
||||
self.tokens = None
|
||||
|
||||
self.context = None
|
||||
self.text = None
|
||||
self.sheerka = None
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.has_error = True
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.next_token()
|
||||
return error
|
||||
|
||||
def reset_parser(self, context, text):
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
self.text = text
|
||||
|
||||
if isinstance(text, str):
|
||||
try:
|
||||
self.tokens = list(Tokenizer(text))
|
||||
except core.tokenizer.LexerError as e:
|
||||
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
|
||||
return False
|
||||
|
||||
else:
|
||||
self.tokens = list(text)
|
||||
self.tokens.append(Token(TokenKind.EOF, "", -1, -1, -1)) # make sure to finish with end of file token
|
||||
|
||||
self.token = None
|
||||
self.pos = -1
|
||||
self.next_token(False)
|
||||
return True
|
||||
|
||||
def get_token(self) -> Token:
|
||||
return self.token
|
||||
|
||||
def next_token(self, skip_whitespace=True):
|
||||
if self.token and self.token.type == TokenKind.EOF:
|
||||
return False
|
||||
|
||||
self.pos += 1
|
||||
self.token = self.tokens[self.pos]
|
||||
|
||||
if skip_whitespace:
|
||||
while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
|
||||
self.pos += 1
|
||||
self.token = self.tokens[self.pos]
|
||||
|
||||
return self.token.type != TokenKind.EOF
|
||||
|
||||
def seek(self, pos):
|
||||
self.pos = pos
|
||||
self.token = self.tokens[self.pos]
|
||||
return True
|
||||
|
||||
def rewind(self, offset, skip_whitespace=True):
|
||||
self.pos += offset
|
||||
self.token = self.tokens[self.pos]
|
||||
|
||||
if skip_whitespace:
|
||||
while self.pos > 0 and (self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE):
|
||||
self.pos -= 1
|
||||
self.token = self.tokens[self.pos]
|
||||
|
||||
def initialize(self, context, concepts_definitions):
|
||||
"""
|
||||
Adds a bunch of concepts, and how they can be recognized
|
||||
:param context: execution context
|
||||
:param concepts_definitions: dictionary of concept, concept_definition
|
||||
:return:
|
||||
"""
|
||||
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
concepts_to_resolve = set()
|
||||
|
||||
# ## Gets the grammars
|
||||
for concept, concept_def in concepts_definitions.items():
|
||||
concept.init_key() # make sure that the key is initialized
|
||||
grammar = self.get_model(concept_def, concepts_to_resolve)
|
||||
self.concepts_grammars[concept] = grammar
|
||||
|
||||
if self.has_error:
|
||||
return self.sheerka.ret(self.name, False, self.error_sink)
|
||||
|
||||
# ## Removes concepts with infinite recursions
|
||||
concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve)
|
||||
for concept in concepts_to_remove:
|
||||
concepts_to_resolve.remove(concept)
|
||||
del self.concepts_grammars[concept]
|
||||
|
||||
if self.has_error:
|
||||
return self.sheerka.ret(self.name, False, self.error_sink)
|
||||
else:
|
||||
return self.sheerka.ret(self.name, True, self.concepts_grammars)
|
||||
|
||||
def get_concept(self, concept_name):
|
||||
if concept_name in self.context.concepts:
|
||||
return self.context.concepts[concept_name]
|
||||
return self.sheerka.get(concept_name)
|
||||
|
||||
def get_model(self, concept_def, concepts_to_resolve):
|
||||
|
||||
# TODO
|
||||
# inner_get_model must not modify the initial ParsingExpression
|
||||
# A copy must be created
|
||||
def inner_get_model(expression):
|
||||
if isinstance(expression, Concept):
|
||||
if self.sheerka.isagroup(expression):
|
||||
ret = ConceptGroupExpression(expression, rule_name=expression.name)
|
||||
else:
|
||||
ret = ConceptExpression(expression, rule_name=expression.name)
|
||||
concepts_to_resolve.add(expression)
|
||||
elif isinstance(expression, ConceptExpression):
|
||||
if expression.rule_name is None or expression.rule_name == "":
|
||||
expression.rule_name = expression.concept.name if isinstance(expression.concept, Concept) \
|
||||
else expression.concept
|
||||
concepts_to_resolve.add(expression.concept)
|
||||
ret = expression
|
||||
elif isinstance(expression, str):
|
||||
ret = StrMatch(expression, ignore_case=self.ignore_case)
|
||||
elif isinstance(expression, StrMatch):
|
||||
ret = expression
|
||||
if ret.ignore_case is None:
|
||||
ret.ignore_case = self.ignore_case
|
||||
elif isinstance(expression, Sequence) or \
|
||||
isinstance(expression, OrderedChoice) or \
|
||||
isinstance(expression, ZeroOrMore) or \
|
||||
isinstance(expression, OneOrMore) or \
|
||||
isinstance(expression, Optional):
|
||||
ret = expression
|
||||
ret.nodes = [inner_get_model(e) for e in ret.elements]
|
||||
else:
|
||||
ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False)
|
||||
|
||||
# Translate separator expression.
|
||||
if isinstance(expression, Repetition) and expression.sep:
|
||||
expression.sep = inner_get_model(expression.sep)
|
||||
|
||||
return ret
|
||||
|
||||
model = inner_get_model(concept_def)
|
||||
|
||||
return model
|
||||
|
||||
def detect_infinite_recursion(self, concepts_to_resolve):
|
||||
|
||||
# infinite recursion matcher
|
||||
def _is_infinite_recursion(ref_concept, node):
|
||||
if isinstance(node, ConceptExpression):
|
||||
if node.concept == ref_concept:
|
||||
return True
|
||||
|
||||
if isinstance(node.concept, str):
|
||||
to_match = self.get_concept(node.concept)
|
||||
if self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
return False
|
||||
else:
|
||||
to_match = node.concept
|
||||
|
||||
if to_match not in self.concepts_grammars:
|
||||
return False
|
||||
|
||||
return _is_infinite_recursion(ref_concept, self.concepts_grammars[to_match])
|
||||
|
||||
if isinstance(node, OrderedChoice):
|
||||
return _is_infinite_recursion(ref_concept, node.nodes[0])
|
||||
|
||||
if isinstance(node, Sequence):
|
||||
for node in node.nodes:
|
||||
if _is_infinite_recursion(ref_concept, node):
|
||||
return True
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
removed_concepts = []
|
||||
for e in concepts_to_resolve:
|
||||
if isinstance(e, str):
|
||||
e = self.get_concept(e)
|
||||
if self.sheerka.isinstance(e, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
continue
|
||||
|
||||
if e not in self.concepts_grammars:
|
||||
continue
|
||||
|
||||
to_resolve = self.concepts_grammars[e]
|
||||
if _is_infinite_recursion(e, to_resolve):
|
||||
removed_concepts.append(e)
|
||||
return removed_concepts
|
||||
|
||||
def parse(self, context, text):
|
||||
if text == "":
|
||||
return context.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.IS_EMPTY)
|
||||
)
|
||||
|
||||
if not self.reset_parser(context, text):
|
||||
return self.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
||||
|
||||
concepts_found = [[]]
|
||||
unrecognized_tokens = None
|
||||
has_unrecognized = False
|
||||
|
||||
# actually list of list
|
||||
# The first dimension is the number of possibilities found
|
||||
# The second dimension is the number of concepts found, under one possibility
|
||||
#
|
||||
# Example 1
|
||||
# concept foo : 'one' 'two'
|
||||
# concept bar : 'one' 'two'
|
||||
# input 'one two' -> will produce two possibilities (foo and bar).
|
||||
#
|
||||
# Example 2
|
||||
# concept foo : 'one'
|
||||
# concept bar : 'two'
|
||||
# input 'one two' -> will produce one possibility which is (foo, bar) (foo then bar)
|
||||
|
||||
while True:
|
||||
init_pos = self.pos
|
||||
res = []
|
||||
|
||||
for concept, grammar in self.concepts_grammars.items():
|
||||
self.seek(init_pos)
|
||||
node = grammar.parse(self) # a node is TerminalNode or NonTerminalNode
|
||||
if node is not None and node.end != -1:
|
||||
updated_concept = self.finalize_concept(context.sheerka, concept, node)
|
||||
concept_node = ConceptNode(
|
||||
updated_concept,
|
||||
node.start,
|
||||
node.end,
|
||||
self.tokens[node.start: node.end + 1],
|
||||
None,
|
||||
node)
|
||||
res.append(concept_node)
|
||||
|
||||
if len(res) == 0: # not recognized
|
||||
self.seek(init_pos)
|
||||
if unrecognized_tokens:
|
||||
unrecognized_tokens.add_token(self.get_token(), init_pos)
|
||||
else:
|
||||
unrecognized_tokens = UnrecognizedTokensNode(init_pos, init_pos, [self.get_token()])
|
||||
|
||||
if not self.next_token(False):
|
||||
break
|
||||
|
||||
else: # some concepts are recognized
|
||||
if unrecognized_tokens and unrecognized_tokens.not_whitespace():
|
||||
unrecognized_tokens.fix_source()
|
||||
concepts_found = core.utils.product(concepts_found, [unrecognized_tokens])
|
||||
has_unrecognized = True
|
||||
unrecognized_tokens = None
|
||||
|
||||
res = self.get_bests(res) # only keep the concepts that eat the more tokens
|
||||
concepts_found = core.utils.product(concepts_found, res)
|
||||
|
||||
# loop
|
||||
self.seek(res[0].end)
|
||||
if not self.next_token(False):
|
||||
break
|
||||
|
||||
# Fix the source for unrecognized tokens
|
||||
if unrecognized_tokens and unrecognized_tokens.not_whitespace():
|
||||
unrecognized_tokens.fix_source()
|
||||
concepts_found = core.utils.product(concepts_found, [unrecognized_tokens])
|
||||
has_unrecognized = True
|
||||
|
||||
# else
|
||||
# returns as many ReturnValue than choices found
|
||||
ret = []
|
||||
for choice in concepts_found:
|
||||
ret.append(
|
||||
self.sheerka.ret(
|
||||
self.name,
|
||||
not has_unrecognized,
|
||||
self.sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=text,
|
||||
body=choice,
|
||||
try_parsed=choice)))
|
||||
|
||||
if len(ret) == 1:
|
||||
self.log_result(context, text, ret[0])
|
||||
return ret[0]
|
||||
else:
|
||||
self.log_multiple_results(context, text, ret)
|
||||
return ret
|
||||
|
||||
def finalize_concept(self, sheerka, template, underlying, init_empty_body=True):
|
||||
"""
|
||||
Updates the properties of the concept
|
||||
Goes in recursion if the property is a concept
|
||||
"""
|
||||
|
||||
# this cache is to make sure that we return the same concept for the same ConceptExpression
|
||||
_underlying_value_cache = {}
|
||||
|
||||
def _add_prop(_concept, prop_name, value):
|
||||
"""
|
||||
Adds a new entry,
|
||||
makes a list if the property already exists
|
||||
"""
|
||||
if prop_name not in _concept.compiled or _concept.compiled[prop_name] is None:
|
||||
# new entry
|
||||
_concept.compiled[prop_name] = value
|
||||
else:
|
||||
# make a list if there was a value
|
||||
previous_value = _concept.compiled[prop_name]
|
||||
if isinstance(previous_value, list):
|
||||
previous_value.append(value)
|
||||
else:
|
||||
new_value = [previous_value, value]
|
||||
_concept.compiled[prop_name] = new_value
|
||||
|
||||
def _look_for_concept_match(_underlying):
|
||||
if isinstance(_underlying.parsing_expression, ConceptExpression):
|
||||
return _underlying
|
||||
|
||||
if not isinstance(_underlying, NonTerminalNode):
|
||||
return None
|
||||
|
||||
if len(_underlying.children) != 1:
|
||||
return None
|
||||
|
||||
return _look_for_concept_match(_underlying.children[0])
|
||||
|
||||
def _get_underlying_value(_underlying):
|
||||
concept_match_node = _look_for_concept_match(_underlying)
|
||||
if concept_match_node:
|
||||
if id(concept_match_node) in _underlying_value_cache:
|
||||
result = _underlying_value_cache[id(concept_match_node)]
|
||||
else:
|
||||
ref_tpl = concept_match_node.parsing_expression.concept
|
||||
result = self.finalize_concept(sheerka, ref_tpl, concept_match_node.children[0], init_empty_body)
|
||||
_underlying_value_cache[id(concept_match_node)] = result
|
||||
else:
|
||||
result = DoNotResolve(_underlying.source)
|
||||
|
||||
return result
|
||||
|
||||
def _process_rule_name(_concept, _underlying):
|
||||
if _underlying.parsing_expression.rule_name:
|
||||
value = _get_underlying_value(_underlying)
|
||||
_add_prop(_concept, _underlying.parsing_expression.rule_name, value)
|
||||
|
||||
if isinstance(_underlying, NonTerminalNode):
|
||||
for child in _underlying.children:
|
||||
_process_rule_name(_concept, child)
|
||||
|
||||
key = (template.key, template.id) if template.id else template.key
|
||||
concept = sheerka.new(key)
|
||||
if init_empty_body and concept.metadata.body is None:
|
||||
value = _get_underlying_value(underlying)
|
||||
concept.compiled[ConceptParts.BODY] = value
|
||||
if underlying.parsing_expression.rule_name:
|
||||
_add_prop(concept, underlying.parsing_expression.rule_name, value)
|
||||
|
||||
if isinstance(underlying, NonTerminalNode):
|
||||
for node in underlying.children:
|
||||
_process_rule_name(concept, node)
|
||||
|
||||
return concept
|
||||
|
||||
@staticmethod
|
||||
def get_bests(results):
|
||||
"""
|
||||
Returns the result that is the longest
|
||||
:param results:
|
||||
:return:
|
||||
"""
|
||||
by_end_pos = defaultdict(list)
|
||||
for result in results:
|
||||
by_end_pos[result.end].append(result)
|
||||
|
||||
return by_end_pos[max(by_end_pos)]
|
||||
|
||||
|
||||
class ParsingExpressionVisitor:
|
||||
"""
|
||||
visit ParsingExpression
|
||||
"""
|
||||
|
||||
def visit(self, parsing_expression):
|
||||
name = parsing_expression.__class__.__name__
|
||||
|
||||
method = 'visit_' + name
|
||||
visitor = getattr(self, method, self.generic_visit)
|
||||
return visitor(parsing_expression)
|
||||
|
||||
def generic_visit(self, parsing_expression):
|
||||
if hasattr(self, "visit_all"):
|
||||
self.visit_all(parsing_expression)
|
||||
|
||||
for node in parsing_expression.elements:
|
||||
if isinstance(node, Concept):
|
||||
self.visit(ConceptExpression(node.key or node.name))
|
||||
elif isinstance(node, str):
|
||||
self.visit(StrMatch(node))
|
||||
else:
|
||||
self.visit(node)
|
||||
@@ -0,0 +1,110 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.tokenizer import TokenKind, Token
|
||||
from parsers.BaseParser import BaseParser
|
||||
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode
|
||||
from parsers.MultipleConceptsParser import MultipleConceptsParser
|
||||
from core.concept import VARIABLE_PREFIX
|
||||
import logging
|
||||
|
||||
multiple_concepts_parser = MultipleConceptsParser()
|
||||
|
||||
|
||||
class ConceptsWithConceptsParser(BaseParser):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("ConceptsWithConcepts", 25)
|
||||
|
||||
@staticmethod
|
||||
def get_tokens(nodes):
|
||||
tokens = []
|
||||
|
||||
for node in nodes:
|
||||
if isinstance(node, ConceptNode):
|
||||
index, line, column = node.tokens[0].index, node.tokens[0].line, node.tokens[0].column
|
||||
tokens.append(Token(TokenKind.CONCEPT, node.concept, index, line, column))
|
||||
else:
|
||||
for token in node.tokens:
|
||||
if token.type == TokenKind.EOF:
|
||||
break
|
||||
elif token.type in (TokenKind.NEWLINE, TokenKind.WHITESPACE):
|
||||
continue
|
||||
else:
|
||||
tokens.append(token)
|
||||
|
||||
return tokens
|
||||
|
||||
@staticmethod
|
||||
def get_key(nodes):
|
||||
key = ""
|
||||
index = 0
|
||||
for node in nodes:
|
||||
if key:
|
||||
key += " "
|
||||
|
||||
if isinstance(node, UnrecognizedTokensNode):
|
||||
key += node.source.strip()
|
||||
else:
|
||||
key += f"{VARIABLE_PREFIX}{index}"
|
||||
index += 1
|
||||
|
||||
return key
|
||||
|
||||
def finalize_concept(self, context, concept, nodes):
|
||||
index = 0
|
||||
for node in nodes:
|
||||
|
||||
if isinstance(node, ConceptNode):
|
||||
prop_name = list(concept.props.keys())[index]
|
||||
concept.compiled[prop_name] = node.concept
|
||||
context.log(
|
||||
self.verbose_log,
|
||||
f"Setting property '{prop_name}='{node.concept}'.",
|
||||
self.name)
|
||||
index += 1
|
||||
elif isinstance(node, SourceCodeNode):
|
||||
prop_name = list(concept.props.keys())[index]
|
||||
sheerka = context.sheerka
|
||||
value = sheerka.new(BuiltinConcepts.PARSER_RESULT, parser=self, source=node.source, body=node.node)
|
||||
concept.compiled[prop_name] = [context.sheerka.ret(self.name, True, value)]
|
||||
context.log(
|
||||
self.verbose_log,
|
||||
f"Setting property '{prop_name}'='Python({node.source})'.",
|
||||
self.name)
|
||||
index += 1
|
||||
|
||||
return concept
|
||||
|
||||
def parse(self, context, text):
|
||||
sheerka = context.sheerka
|
||||
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
|
||||
return None
|
||||
|
||||
if not text.parser == multiple_concepts_parser:
|
||||
return None
|
||||
|
||||
nodes = text.body
|
||||
|
||||
concept_key = self.get_key(nodes)
|
||||
concept = sheerka.new(concept_key)
|
||||
if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
return sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text.body))
|
||||
|
||||
concepts = concept if hasattr(concept, "__iter__") else [concept]
|
||||
for concept in concepts:
|
||||
self.finalize_concept(context, concept, nodes)
|
||||
|
||||
res = []
|
||||
for concept in concepts:
|
||||
res.append(sheerka.ret(
|
||||
self.name,
|
||||
True,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=text.source,
|
||||
body=concept,
|
||||
try_parsed=None)))
|
||||
|
||||
return res[0] if len(res) == 1 else res
|
||||
@@ -0,0 +1,430 @@
|
||||
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserResultConcept
|
||||
from core.concept import ConceptParts
|
||||
import core.builtin_helpers
|
||||
import core.utils
|
||||
from parsers.BaseParser import BaseParser, Node, ErrorNode, NotInitializedNode
|
||||
from core.tokenizer import Tokenizer, TokenKind, Token, Keywords
|
||||
from dataclasses import dataclass, field
|
||||
from parsers.BnfParser import BnfParser
|
||||
from core.sheerka.Sheerka import ExecutionContext
|
||||
|
||||
|
||||
@dataclass()
|
||||
class DefaultParserNode(Node):
|
||||
"""
|
||||
Base node for all default parser nodes
|
||||
"""
|
||||
tokens: list = field(compare=False, repr=False)
|
||||
|
||||
|
||||
@dataclass()
|
||||
class DefaultParserErrorNode(DefaultParserNode, ErrorNode):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass()
|
||||
class UnexpectedTokenErrorNode(DefaultParserErrorNode):
|
||||
message: str
|
||||
expected_tokens: list
|
||||
|
||||
|
||||
@dataclass()
|
||||
class SyntaxErrorNode(DefaultParserErrorNode):
|
||||
"""
|
||||
The input is recognized, but there is a syntax error
|
||||
"""
|
||||
message: str
|
||||
|
||||
|
||||
@dataclass()
|
||||
class CannotHandleErrorNode(DefaultParserErrorNode):
|
||||
"""
|
||||
The input is not recognized
|
||||
"""
|
||||
text: str
|
||||
|
||||
|
||||
@dataclass()
|
||||
class NameNode(DefaultParserNode):
|
||||
|
||||
def get_name(self):
|
||||
name = ""
|
||||
first = True
|
||||
for token in self.tokens:
|
||||
if token.type == TokenKind.EOF:
|
||||
break
|
||||
if token.type == TokenKind.WHITESPACE:
|
||||
continue
|
||||
if not first:
|
||||
name += " "
|
||||
|
||||
name += token.value[1:-1] if token.type == TokenKind.STRING else token.value
|
||||
first = False
|
||||
|
||||
return name
|
||||
|
||||
def __repr__(self):
|
||||
return self.get_name()
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, NameNode):
|
||||
return False
|
||||
|
||||
return self.get_name() == other.get_name()
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.get_name())
|
||||
|
||||
|
||||
@dataclass()
|
||||
class DefConceptNode(DefaultParserNode):
|
||||
name: NameNode = NotInitializedNode()
|
||||
where: ReturnValueConcept = NotInitializedNode()
|
||||
pre: ReturnValueConcept = NotInitializedNode()
|
||||
post: ReturnValueConcept = NotInitializedNode()
|
||||
body: ReturnValueConcept = NotInitializedNode()
|
||||
definition: ReturnValueConcept = NotInitializedNode()
|
||||
|
||||
def get_asts(self):
|
||||
asts = {}
|
||||
for part_key in ConceptParts:
|
||||
prop_value = getattr(self, part_key.value)
|
||||
if isinstance(prop_value, ReturnValueConcept) and isinstance(prop_value.body,
|
||||
ParserResultConcept) and hasattr(
|
||||
prop_value.body.body, "ast_"):
|
||||
asts[part_key] = prop_value
|
||||
#asts[part_key] = prop_value.body.body.ast_
|
||||
return asts
|
||||
|
||||
|
||||
@dataclass()
|
||||
class IsaConceptNode(DefaultParserNode):
|
||||
concept: NameNode = NotInitializedNode()
|
||||
set: NameNode = NotInitializedNode()
|
||||
|
||||
|
||||
class DefaultParser(BaseParser):
|
||||
"""
|
||||
Parse sheerka specific grammar (like def concept)
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, "Default", 50)
|
||||
self.lexer_iter = None
|
||||
self._current = None
|
||||
self.context: ExecutionContext = None
|
||||
self.text = None
|
||||
self.sheerka = None
|
||||
|
||||
@staticmethod
|
||||
def fix_indentation(tokens):
|
||||
"""
|
||||
In the following example
|
||||
def concept add one to a as:
|
||||
def func(x):
|
||||
return x+1
|
||||
func(a)
|
||||
indentations in front of 'def func(x)', 'return x+1' and 'func(a)' must be fixed to avoid a python syntax error
|
||||
:param tokens:
|
||||
:return:
|
||||
"""
|
||||
if tokens[0].type != TokenKind.COLON:
|
||||
return tokens
|
||||
|
||||
if len(tokens) < 3:
|
||||
return UnexpectedTokenErrorNode(tokens[0:2], "Unexpected end of file", [TokenKind.NEWLINE])
|
||||
|
||||
if tokens[1].type != TokenKind.NEWLINE:
|
||||
return UnexpectedTokenErrorNode([tokens[1]], "Unexpected token after colon", [TokenKind.NEWLINE])
|
||||
|
||||
if tokens[2].type != TokenKind.WHITESPACE:
|
||||
return SyntaxErrorNode([tokens[2]], "Indentation not found.")
|
||||
indent_size = len(tokens[2].value)
|
||||
|
||||
# now fix the other indentations
|
||||
i = 3
|
||||
while i < len(tokens) - 1:
|
||||
if tokens[i].type == TokenKind.NEWLINE:
|
||||
if tokens[i + 1].type != TokenKind.WHITESPACE:
|
||||
return UnexpectedTokenErrorNode([tokens[i + 1]], "Unexpected token", [TokenKind.WHITESPACE])
|
||||
|
||||
if len(tokens[i + 1].value) < indent_size:
|
||||
return SyntaxErrorNode([tokens[i + 1]], "Invalid indentation.")
|
||||
|
||||
tokens[i + 1].value = " " * (len(tokens[i + 1].value) - indent_size)
|
||||
i += 1
|
||||
|
||||
return tokens[3:]
|
||||
|
||||
def reset_parser(self, context, text):
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
|
||||
self.text = text
|
||||
self.lexer_iter = iter(Tokenizer(text))
|
||||
self._current = None
|
||||
|
||||
self.next_token()
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.has_error = True
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.next_token()
|
||||
return error
|
||||
|
||||
def get_token(self) -> Token:
|
||||
return self._current
|
||||
|
||||
def next_token(self, skip_whitespace=True):
|
||||
try:
|
||||
self._current = next(self.lexer_iter)
|
||||
if skip_whitespace:
|
||||
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
|
||||
self._current = next(self.lexer_iter)
|
||||
except StopIteration:
|
||||
self._current = None
|
||||
|
||||
return
|
||||
|
||||
def parse(self, context, text):
|
||||
# default parser can only manage string text
|
||||
if not isinstance(text, str):
|
||||
ret = context.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text))
|
||||
self.log_result(context, text, ret)
|
||||
return ret
|
||||
|
||||
tree = None
|
||||
try:
|
||||
self.reset_parser(context, text)
|
||||
tree = self.parse_statement()
|
||||
except core.tokenizer.LexerError as e:
|
||||
self.add_error(e, False)
|
||||
|
||||
# If a error is found it must be sent to error_sink
|
||||
# tree must contain what was recognized
|
||||
|
||||
if self.has_error and isinstance(self.error_sink[0], CannotHandleErrorNode):
|
||||
body = self.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=self.error_sink)
|
||||
else:
|
||||
body = self.get_return_value_body(context.sheerka, text, tree, tree)
|
||||
# body = self.sheerka.new(
|
||||
# BuiltinConcepts.PARSER_RESULT,
|
||||
# parser=self,
|
||||
# source=text,
|
||||
# body=self.error_sink if self.has_error else tree,
|
||||
# try_parsed=tree)
|
||||
|
||||
ret = self.sheerka.ret(
|
||||
self.name,
|
||||
not self.has_error,
|
||||
body)
|
||||
|
||||
self.log_result(context, text, ret)
|
||||
return ret
|
||||
|
||||
def parse_statement(self):
|
||||
token = self.get_token()
|
||||
if token.value == Keywords.DEF:
|
||||
self.next_token()
|
||||
self.context.log(self.verbose_log, "Keyword DEF found.", self.name)
|
||||
return self.parse_def_concept(token)
|
||||
else:
|
||||
return self.parse_isa_concept()
|
||||
|
||||
def parse_def_concept(self, def_token):
|
||||
"""
|
||||
def concept name [where xxx] [pre xxx] [post xxx] [as xxx]
|
||||
"""
|
||||
|
||||
# init
|
||||
keywords_tokens = [def_token]
|
||||
concept_found = DefConceptNode(keywords_tokens)
|
||||
|
||||
# the definition of a concept consists of several parts
|
||||
# Keywords.CONCEPT to get the name of the concept
|
||||
# Keywords.FROM [Keywords.REGEX] to get the definition of the concept
|
||||
# Keywords.AS to get the body
|
||||
# Keywords.WHERE to get the conditions to recognize for the variables
|
||||
# Keywords.PRE to know if the conditions to evaluate the concept
|
||||
# Keywords.POST to apply or verify once the concept is executed
|
||||
#
|
||||
# Regroup the tokens by parts
|
||||
first_token, tokens_found_by_parts = self.regroup_tokens_by_parts(keywords_tokens)
|
||||
|
||||
if first_token.type == TokenKind.EOF:
|
||||
return self.add_error(UnexpectedTokenErrorNode([first_token], "Unexpected end of file", [Keywords.CONCEPT]))
|
||||
|
||||
# get the name
|
||||
concept_found.name = self.get_concept_name(first_token, tokens_found_by_parts)
|
||||
|
||||
# get the definition
|
||||
concept_found.definition = self.get_concept_definition(concept_found, tokens_found_by_parts)
|
||||
|
||||
# get the ASTs for the remaining parts
|
||||
asts_found_by_parts = self.get_concept_parts(tokens_found_by_parts)
|
||||
concept_found.where = asts_found_by_parts[Keywords.WHERE]
|
||||
concept_found.pre = asts_found_by_parts[Keywords.PRE]
|
||||
concept_found.post = asts_found_by_parts[Keywords.POST]
|
||||
concept_found.body = asts_found_by_parts[Keywords.AS]
|
||||
|
||||
return concept_found
|
||||
|
||||
def parse_isa_concept(self):
|
||||
concept_name = self.parse_concept_name()
|
||||
if isinstance(concept_name, DefaultParserErrorNode):
|
||||
return concept_name
|
||||
|
||||
keyword = []
|
||||
token = self.get_token()
|
||||
if token.value != Keywords.ISA:
|
||||
return self.add_error(CannotHandleErrorNode([token], ""))
|
||||
keyword.append(token)
|
||||
self.next_token()
|
||||
|
||||
set_name = self.parse_concept_name()
|
||||
return IsaConceptNode(keyword, concept_name, set_name)
|
||||
|
||||
def parse_concept_name(self):
|
||||
tokens = []
|
||||
token = self.get_token()
|
||||
|
||||
while not (token.type == TokenKind.EOF or token.type == TokenKind.KEYWORD):
|
||||
tokens.append(token)
|
||||
self.next_token()
|
||||
token = self.get_token()
|
||||
|
||||
if len(tokens) == 0:
|
||||
return self.add_error(UnexpectedTokenErrorNode([token], "Unexpected token", []))
|
||||
else:
|
||||
return NameNode(tokens)
|
||||
|
||||
def regroup_tokens_by_parts(self, keywords_tokens):
|
||||
|
||||
def_concept_parts = [Keywords.CONCEPT, Keywords.FROM, Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST]
|
||||
|
||||
# tokens found, when trying to recognize the parts
|
||||
tokens_found_by_parts = {
|
||||
Keywords.CONCEPT: [],
|
||||
Keywords.FROM: None,
|
||||
Keywords.AS: None,
|
||||
Keywords.WHERE: None,
|
||||
Keywords.PRE: None,
|
||||
Keywords.POST: None,
|
||||
}
|
||||
current_part = Keywords.CONCEPT
|
||||
token = self.get_token()
|
||||
first_token = token
|
||||
|
||||
# loop thru the tokens, and put them in the correct tokens_found_by_parts entry
|
||||
while token.type != TokenKind.EOF:
|
||||
if token.value in def_concept_parts:
|
||||
keywords_tokens.append(token) # keep track of the keywords
|
||||
keyword = token.value
|
||||
if tokens_found_by_parts[keyword]:
|
||||
# a part is defined more than once
|
||||
self.add_error(SyntaxErrorNode([token], f"Too many '{keyword.value}' declarations."))
|
||||
tokens_found_by_parts[current_part].append(token) # adds the token again
|
||||
else:
|
||||
tokens_found_by_parts[keyword] = [token]
|
||||
current_part = keyword
|
||||
self.next_token()
|
||||
else:
|
||||
tokens_found_by_parts[current_part].append(token)
|
||||
self.next_token(False)
|
||||
|
||||
token = self.get_token()
|
||||
|
||||
return first_token, tokens_found_by_parts
|
||||
|
||||
def get_concept_name(self, first_token, tokens_found_by_parts):
|
||||
name_first_token_index = 1
|
||||
token = self.get_token()
|
||||
if first_token.value != Keywords.CONCEPT:
|
||||
self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT]))
|
||||
name_first_token_index = 0
|
||||
|
||||
name_tokens = tokens_found_by_parts[Keywords.CONCEPT]
|
||||
if len(name_tokens) == name_first_token_index:
|
||||
self.add_error(SyntaxErrorNode([], "Name is mandatory"))
|
||||
|
||||
if name_tokens[-1].type == TokenKind.NEWLINE:
|
||||
name_tokens = name_tokens[:-1] # strip trailing newlines
|
||||
|
||||
if TokenKind.NEWLINE in [t.type for t in name_tokens]:
|
||||
self.add_error(SyntaxErrorNode(tokens_found_by_parts[Keywords.CONCEPT], "Newline are not allowed in name."))
|
||||
|
||||
name_node = NameNode(name_tokens[name_first_token_index:]) # skip the first token
|
||||
return name_node
|
||||
|
||||
def get_concept_definition(self, current_concept_def, tokens_found_by_parts):
|
||||
if tokens_found_by_parts[Keywords.FROM] is None:
|
||||
return NotInitializedNode()
|
||||
|
||||
definition_tokens = tokens_found_by_parts[Keywords.FROM]
|
||||
if definition_tokens[1].value != Keywords.BNF:
|
||||
return NotInitializedNode()
|
||||
|
||||
tokens = core.utils.strip_tokens(definition_tokens[2:])
|
||||
if len(tokens) == 0:
|
||||
self.add_error(SyntaxErrorNode([definition_tokens[1]], "Empty declaration"), False)
|
||||
return NotInitializedNode()
|
||||
|
||||
regex_parser = BnfParser()
|
||||
with self.context.push(self.name, obj=current_concept_def) as sub_context:
|
||||
parsing_result = regex_parser.parse(sub_context, tokens)
|
||||
sub_context.add_values(return_values=parsing_result)
|
||||
|
||||
if not parsing_result.status:
|
||||
self.add_error(parsing_result.value)
|
||||
return NotInitializedNode()
|
||||
|
||||
return parsing_result
|
||||
|
||||
def get_concept_parts(self, tokens_found_by_parts):
|
||||
asts_found_by_parts = {
|
||||
Keywords.AS: NotInitializedNode(),
|
||||
Keywords.WHERE: NotInitializedNode(),
|
||||
Keywords.PRE: NotInitializedNode(),
|
||||
Keywords.POST: NotInitializedNode(),
|
||||
}
|
||||
|
||||
for keyword in tokens_found_by_parts:
|
||||
if keyword == Keywords.CONCEPT or keyword == Keywords.FROM:
|
||||
continue # already done
|
||||
|
||||
tokens = tokens_found_by_parts[keyword]
|
||||
if tokens is None:
|
||||
continue # nothing to do
|
||||
|
||||
if len(tokens) == 1: # check for empty declarations
|
||||
self.add_error(SyntaxErrorNode([tokens[0]], "Empty declaration"), False)
|
||||
continue
|
||||
|
||||
tokens = self.fix_indentation(tokens[1:]) # manage multi-lines declarations
|
||||
if isinstance(tokens, ErrorNode):
|
||||
self.add_error(tokens)
|
||||
continue
|
||||
|
||||
# ask the other parsers if they recognize the tokens
|
||||
with self.context.push(self.name, desc=f"Parsing {keyword}") as sub_context:
|
||||
sub_context.log_new(self.verbose_log)
|
||||
to_parse = self.sheerka.ret(
|
||||
sub_context.who,
|
||||
True,
|
||||
self.sheerka.new(BuiltinConcepts.USER_INPUT, body=tokens))
|
||||
steps = [BuiltinConcepts.PARSING]
|
||||
parsed = self.sheerka.execute(sub_context, to_parse, steps, self.verbose_log)
|
||||
parsing_result = core.builtin_helpers.expect_one(sub_context, parsed, self.verbose_log)
|
||||
sub_context.add_values(return_values=parsing_result)
|
||||
|
||||
if not parsing_result.status:
|
||||
self.add_error(parsing_result.value)
|
||||
continue
|
||||
|
||||
asts_found_by_parts[keyword] = parsing_result
|
||||
|
||||
return asts_found_by_parts
|
||||
@@ -0,0 +1,28 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from parsers.BaseParser import BaseParser
|
||||
|
||||
|
||||
class EmptyStringParser(BaseParser):
|
||||
"""
|
||||
To parse empty or blank strings
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, "EmptyString", 90)
|
||||
|
||||
def parse(self, context, text):
|
||||
sheerka = context.sheerka
|
||||
|
||||
if isinstance(text, str) and text.strip() == "" or \
|
||||
isinstance(text, list) and text == [] or \
|
||||
text is None:
|
||||
ret = sheerka.ret(self.name, True, sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source="",
|
||||
body=sheerka.new(BuiltinConcepts.NOP)))
|
||||
else:
|
||||
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME))
|
||||
|
||||
self.log_result(context, text, ret)
|
||||
return ret
|
||||
@@ -0,0 +1,150 @@
|
||||
import logging
|
||||
|
||||
from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts
|
||||
from parsers.BaseParser import BaseParser
|
||||
from core.tokenizer import Tokenizer, Keywords, TokenKind, LexerError
|
||||
from core.concept import VARIABLE_PREFIX
|
||||
|
||||
|
||||
class ExactConceptParser(BaseParser):
|
||||
"""
|
||||
Tries to recognize a single concept
|
||||
"""
|
||||
|
||||
MAX_WORDS_SIZE = 10
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, "ExactConcept", 80)
|
||||
|
||||
def parse(self, context, text):
|
||||
"""
|
||||
text can be string, but text can also be an list of tokens
|
||||
:param context:
|
||||
:param text:
|
||||
:return:
|
||||
"""
|
||||
|
||||
context.log(self.verbose_log, f"Parsing '{text}'", self.name)
|
||||
res = []
|
||||
sheerka = context.sheerka
|
||||
try:
|
||||
words = self.get_words(text)
|
||||
except LexerError as e:
|
||||
context.log(self.verbose_log, f"Error found in tokenizer {e}", self.name)
|
||||
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=e))
|
||||
|
||||
if len(words) > self.MAX_WORDS_SIZE:
|
||||
context.log(self.verbose_log, f"Max words reached. Stopping.", self.name)
|
||||
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=text))
|
||||
|
||||
recognized = False
|
||||
for combination in self.combinations(words):
|
||||
|
||||
concept_key = " ".join(combination)
|
||||
result = sheerka.new(concept_key)
|
||||
|
||||
if sheerka.isinstance(result, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
continue
|
||||
|
||||
# concepts = result.body if sheerka.isinstance(result, BuiltinConcepts.ENUMERATION) else [result]
|
||||
concepts = result if isinstance(result, list) else [result]
|
||||
|
||||
for concept in concepts:
|
||||
context.log(self.verbose_log, f"Recognized concept {concept}.", self.name)
|
||||
# update the properties if needed
|
||||
for i, token in enumerate(combination):
|
||||
if token.startswith(VARIABLE_PREFIX):
|
||||
index = int(token[len(VARIABLE_PREFIX):])
|
||||
concept.def_prop_by_index(index, words[i])
|
||||
if self.verbose_log.isEnabledFor(logging.DEBUG):
|
||||
prop_name = list(concept.props.keys())[index]
|
||||
context.log(
|
||||
self.verbose_log,
|
||||
f"Added property {index}: {prop_name}='{words[i]}'.",
|
||||
self.name)
|
||||
|
||||
res.append(ReturnValueConcept(
|
||||
self.name,
|
||||
True,
|
||||
context.sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=text if isinstance(text, str) else self.get_text_from_tokens(text),
|
||||
body=concept,
|
||||
try_parsed=concept)))
|
||||
recognized = True
|
||||
|
||||
if recognized:
|
||||
if len(res) == 1:
|
||||
self.log_result(context, text, res[0])
|
||||
else:
|
||||
self.log_multiple_results(context, text, res)
|
||||
return res
|
||||
return res
|
||||
|
||||
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=text))
|
||||
self.log_result(context, text, ret)
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
def get_words(text):
|
||||
tokens = iter(Tokenizer(text)) if isinstance(text, str) else text
|
||||
res = []
|
||||
for t in tokens:
|
||||
if t.type == TokenKind.EOF:
|
||||
break
|
||||
if t.type == TokenKind.NEWLINE or t.type == TokenKind.WHITESPACE:
|
||||
continue
|
||||
res.append(t.value.value if isinstance(t.value, Keywords) else t.value)
|
||||
return res
|
||||
|
||||
def combinations(self, iterable):
|
||||
# combinations('foo', 'bar', 'baz') -->
|
||||
# ('foo', 'bar', 'baz'),
|
||||
# ('__var__0', 'bar', 'baz'),
|
||||
# ('foo', '__var__0', 'baz'),
|
||||
# ('foo', 'bar', '__var__0'),
|
||||
# ('__var__0', '__var__1', 'baz'),
|
||||
# ('__var__0', 'bar', '__var__1'),
|
||||
# ('foo', '__var__0', '__var__1'),
|
||||
# ('__var__0', '__var__1', '__var__2')]
|
||||
|
||||
pool = tuple(iterable)
|
||||
n = len(pool)
|
||||
|
||||
res = set()
|
||||
|
||||
for r in range(0, n + 1):
|
||||
indices = list(range(r))
|
||||
res.add(self.get_tuple(pool, indices))
|
||||
while True:
|
||||
for i in reversed(range(r)):
|
||||
if indices[i] != i + n - r:
|
||||
break
|
||||
else:
|
||||
break
|
||||
indices[i] += 1
|
||||
for j in range(i + 1, r):
|
||||
indices[j] = indices[j - 1] + 1
|
||||
res.add(self.get_tuple(pool, indices))
|
||||
|
||||
return res
|
||||
|
||||
@staticmethod
|
||||
def get_tuple(pool, indices):
|
||||
res = []
|
||||
vars = {}
|
||||
k = 0
|
||||
|
||||
# init vars
|
||||
for i in indices:
|
||||
value = pool[i]
|
||||
if value not in vars:
|
||||
vars[pool[i]] = f"{VARIABLE_PREFIX}{k}"
|
||||
k += 1
|
||||
|
||||
# create tuple
|
||||
for i in range(len(pool)):
|
||||
value = pool[i]
|
||||
res.append(vars[value] if value in vars else value)
|
||||
return tuple(res)
|
||||
@@ -0,0 +1,164 @@
|
||||
import ast
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.tokenizer import TokenKind
|
||||
from parsers.BaseParser import BaseParser
|
||||
from parsers.ConceptLexerParser import ConceptLexerParser, UnrecognizedTokensNode, ConceptNode, SourceCodeNode
|
||||
import core.utils
|
||||
from parsers.PythonParser import PythonParser
|
||||
|
||||
concept_lexer_parser = ConceptLexerParser()
|
||||
|
||||
|
||||
class MultipleConceptsParser(BaseParser):
|
||||
"""
|
||||
Parser that will take the result of ConceptLexerParser and
|
||||
try to resolve the unrecognized tokens token by token
|
||||
|
||||
It is a success when it returns a list ConceptNode exclusively
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, "MultipleConcepts", 45)
|
||||
|
||||
@staticmethod
|
||||
def finalize(nodes_found, unrecognized_tokens):
|
||||
if not unrecognized_tokens:
|
||||
return nodes_found, unrecognized_tokens
|
||||
|
||||
unrecognized_tokens.fix_source()
|
||||
if unrecognized_tokens.not_whitespace():
|
||||
nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
|
||||
|
||||
return nodes_found, None
|
||||
|
||||
@staticmethod
|
||||
def create_or_add(unrecognized_tokens, token, index):
|
||||
if unrecognized_tokens:
|
||||
unrecognized_tokens.add_token(token, index)
|
||||
else:
|
||||
unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
|
||||
return unrecognized_tokens
|
||||
|
||||
def parse(self, context, text):
|
||||
sheerka = context.sheerka
|
||||
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
|
||||
return None
|
||||
|
||||
if not text.parser == concept_lexer_parser:
|
||||
return None
|
||||
|
||||
sheerka = context.sheerka
|
||||
nodes = text.value
|
||||
nodes_found = [[]]
|
||||
concepts_only = True
|
||||
|
||||
for node in nodes:
|
||||
if isinstance(node, UnrecognizedTokensNode):
|
||||
unrecognized_tokens = None
|
||||
i = 0
|
||||
|
||||
while i < len(node.tokens):
|
||||
|
||||
token_index = node.start + i
|
||||
token = node.tokens[i]
|
||||
|
||||
concepts_nodes = self.get_concepts_nodes(context, token_index, token)
|
||||
if concepts_nodes is not None:
|
||||
nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
|
||||
nodes_found = core.utils.product(nodes_found, concepts_nodes)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
source_code_node = self.get_source_code_node(context, token_index, node.tokens[i:])
|
||||
if source_code_node:
|
||||
nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
|
||||
nodes_found = core.utils.product(nodes_found, [source_code_node])
|
||||
i += len(source_code_node.tokens)
|
||||
continue
|
||||
|
||||
# not a concept nor some source code
|
||||
unrecognized_tokens = self.create_or_add(unrecognized_tokens, token, token_index)
|
||||
concepts_only &= token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE)
|
||||
i += 1
|
||||
|
||||
# finish processing if needed
|
||||
nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
|
||||
|
||||
else:
|
||||
nodes_found = core.utils.product(nodes_found, [node])
|
||||
|
||||
ret = []
|
||||
for choice in nodes_found:
|
||||
ret.append(
|
||||
sheerka.ret(
|
||||
self.name,
|
||||
concepts_only,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=text.source,
|
||||
body=choice,
|
||||
try_parsed=None))
|
||||
)
|
||||
|
||||
if len(ret) == 1:
|
||||
self.log_result(context, text.source, ret[0])
|
||||
return ret[0]
|
||||
else:
|
||||
self.log_multiple_results(context, text.source, ret)
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
def get_concepts_nodes(context, index, token):
|
||||
"""
|
||||
Tries to recognize a concept
|
||||
from the univers of all known concepts
|
||||
"""
|
||||
|
||||
if token.type != TokenKind.IDENTIFIER:
|
||||
return None
|
||||
|
||||
concept = context.new_concept(token.value)
|
||||
if hasattr(concept, "__iter__") or context.sheerka.is_known(concept):
|
||||
concepts = concept if hasattr(concept, "__iter__") else [concept]
|
||||
concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts]
|
||||
return concepts_nodes
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def get_source_code_node(context, index, tokens):
|
||||
"""
|
||||
Tries to recognize source code.
|
||||
For the time being, only Python is supported
|
||||
:param context:
|
||||
:param tokens:
|
||||
:param index:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if len(tokens) == 0 or (len(tokens) == 1 and tokens[0].type == TokenKind.EOF):
|
||||
return None
|
||||
|
||||
end_index = len(tokens)
|
||||
while end_index > 0:
|
||||
parser = PythonParser()
|
||||
tokens_to_parse = tokens[:end_index]
|
||||
res = parser.parse(context, tokens_to_parse)
|
||||
if res.status:
|
||||
# only expression are accepted
|
||||
ast_ = res.value.value.ast_
|
||||
if not isinstance(ast_, ast.Expression):
|
||||
return None
|
||||
try:
|
||||
compiled = compile(ast_, "<string>", "eval")
|
||||
eval(compiled, {}, {})
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
source = BaseParser.get_text_from_tokens(tokens_to_parse)
|
||||
return SourceCodeNode(res.value.value, index, index + end_index - 1, tokens_to_parse, source)
|
||||
end_index -= 1
|
||||
|
||||
return None
|
||||
@@ -0,0 +1,214 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.tokenizer import Tokenizer, LexerError, TokenKind
|
||||
from parsers.BaseParser import BaseParser, Node, ErrorNode
|
||||
from dataclasses import dataclass, field
|
||||
import ast
|
||||
import logging
|
||||
|
||||
from parsers.ConceptLexerParser import ConceptNode
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass()
|
||||
class PythonErrorNode(ErrorNode):
|
||||
source: str
|
||||
exception: Exception
|
||||
|
||||
# def __post_init__(self):
|
||||
# self.log.debug("-> PythonErrorNode: " + str(self.exception))
|
||||
|
||||
|
||||
class PythonNode(Node):
|
||||
|
||||
def __init__(self, source, ast_=None, concepts=None):
|
||||
self.source = source
|
||||
self.ast_ = ast_ if ast_ else ast.parse(source, mode="eval") if source else None
|
||||
self.concepts = concepts or {} # when concepts are recognized in the expression
|
||||
|
||||
# def __repr__(self):
|
||||
# return "PythonNode(source='" + self.source + "', ast=" + self.get_dump(self.ast_) + ")"
|
||||
|
||||
def __repr__(self):
|
||||
ast_type = "expr" if isinstance(self.ast_, ast.Expression) else "module"
|
||||
return "PythonNode(" + ast_type + "='" + self.source + "')"
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, PythonNode):
|
||||
return False
|
||||
|
||||
if self.source != other.source:
|
||||
return False
|
||||
|
||||
self_dump = self.get_dump(self.ast_)
|
||||
other_dump = self.get_dump(other.ast_)
|
||||
|
||||
return self_dump == other_dump
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.source, self.ast_.hash))
|
||||
|
||||
@staticmethod
|
||||
def get_dump(ast_):
|
||||
dump = ast.dump(ast_)
|
||||
for to_remove in [", ctx=Load()", ", kind=None", ", type_ignores=[]"]:
|
||||
dump = dump.replace(to_remove, "")
|
||||
return dump
|
||||
|
||||
|
||||
class PythonParser(BaseParser):
|
||||
"""
|
||||
Parse Python scripts
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
|
||||
BaseParser.__init__(self, "Python", 50)
|
||||
self.source = kwargs.get("source", "<undef>")
|
||||
|
||||
def parse(self, context, text):
|
||||
sheerka = context.sheerka
|
||||
tree = None
|
||||
|
||||
python_switcher = {
|
||||
TokenKind.CONCEPT: lambda t: f"__C__USE_CONCEPT__{t.value}__C__"
|
||||
}
|
||||
|
||||
try:
|
||||
if isinstance(text, str) and "c:" in text:
|
||||
source = self.get_text_from_tokens(list(Tokenizer(text)), python_switcher)
|
||||
elif isinstance(text, str):
|
||||
source = text
|
||||
else:
|
||||
source = self.get_text_from_tokens(text, python_switcher)
|
||||
source = source.strip()
|
||||
|
||||
text = text if isinstance(text, str) else source
|
||||
|
||||
# first, try to parse an expression
|
||||
res, tree, error = self.try_parse_expression(source)
|
||||
if not res:
|
||||
# then try to parse a statement
|
||||
res, tree, error = self.try_parse_statement(source)
|
||||
if not res:
|
||||
self.has_error = True
|
||||
error_node = PythonErrorNode(text, error)
|
||||
self.error_sink.append(error_node)
|
||||
|
||||
except LexerError as e:
|
||||
self.has_error = True
|
||||
self.error_sink.append(e)
|
||||
|
||||
ret = sheerka.ret(
|
||||
self.name,
|
||||
not self.has_error,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=text,
|
||||
body=self.error_sink if self.has_error else PythonNode(text, tree),
|
||||
try_parsed=None))
|
||||
|
||||
self.log_result(context, text, ret)
|
||||
return ret
|
||||
|
||||
def try_parse_expression(self, text):
|
||||
try:
|
||||
return True, ast.parse(text, f"<{self.source}>", 'eval'), None
|
||||
except Exception as error:
|
||||
return False, None, error
|
||||
|
||||
def try_parse_statement(self, text):
|
||||
try:
|
||||
return True, ast.parse(text, f"<{self.source}>", 'exec'), None
|
||||
except Exception as error:
|
||||
return False, None, error
|
||||
|
||||
|
||||
class PythonGetNamesVisitor(ast.NodeVisitor):
|
||||
"""
|
||||
This visitor will find all the name declared in the ast
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.names = set()
|
||||
|
||||
def visit_Name(self, node):
|
||||
self.names.add(node.id)
|
||||
|
||||
class LexerNodeParserHelperForPython:
|
||||
"""Helper class to parse mix of concepts and Python"""
|
||||
|
||||
def __init__(self):
|
||||
self.identifiers = {} # cache for already created identifier (the key is id(concept))
|
||||
self.identifiers_key = {} # number of identifiers with the same root (prefix)
|
||||
|
||||
def _get_identifier(self, concept):
|
||||
"""
|
||||
Get an identifier for a concept.
|
||||
Make sure to return the same identifier if the same concept
|
||||
Make sure to return a different identifier if same name but different concept
|
||||
|
||||
Internal function because I don't want identifiers, identifiers_key and python_ids_mappings
|
||||
to be instance variables
|
||||
I would like to keep this parser as stateless as possible
|
||||
:param concept:
|
||||
:return:
|
||||
"""
|
||||
if id(concept) in self.identifiers:
|
||||
return self.identifiers[id(concept)]
|
||||
|
||||
identifier = "__C__" + self._sanitize(concept.key or concept.name)
|
||||
if concept.id:
|
||||
identifier += "__" + concept.id
|
||||
|
||||
if identifier in self.identifiers_key:
|
||||
self.identifiers_key[identifier] += 1
|
||||
identifier += f"_{self.identifiers_key[identifier]}"
|
||||
else:
|
||||
self.identifiers_key[identifier] = 0
|
||||
|
||||
identifier += "__C__"
|
||||
|
||||
self.identifiers[id(concept)] = identifier
|
||||
return identifier
|
||||
|
||||
@staticmethod
|
||||
def _sanitize(identifier):
|
||||
res = ""
|
||||
for c in identifier:
|
||||
res += c if c.isalnum() else "0"
|
||||
return res
|
||||
|
||||
def parse(self, context, nodes):
|
||||
source = ""
|
||||
to_parse = ""
|
||||
|
||||
concepts = {} # the key is the Python identifier
|
||||
|
||||
for node in nodes:
|
||||
if isinstance(node, ConceptNode):
|
||||
source += node.source
|
||||
if to_parse:
|
||||
to_parse += " "
|
||||
concept = node.concept
|
||||
python_id = self._get_identifier(concept)
|
||||
to_parse += python_id
|
||||
concepts[python_id] = concept
|
||||
else:
|
||||
source += node.source
|
||||
to_parse += node.source
|
||||
|
||||
with context.push(self, desc="Trying Python for '" + to_parse + "'") as sub_context:
|
||||
sub_context.add_inputs(to_parse=to_parse)
|
||||
python_parser = PythonParser()
|
||||
result = python_parser.parse(sub_context, to_parse)
|
||||
sub_context.add_values(return_values=result)
|
||||
|
||||
if result.status:
|
||||
python_node = result.body.body
|
||||
python_node.source = source
|
||||
python_node.concepts = concepts
|
||||
return python_node
|
||||
|
||||
return result.body # the error
|
||||
@@ -0,0 +1,105 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from parsers.BaseParser import BaseParser
|
||||
from parsers.ConceptLexerParser import ConceptNode
|
||||
from parsers.MultipleConceptsParser import MultipleConceptsParser
|
||||
from parsers.PythonParser import PythonParser
|
||||
|
||||
multiple_concepts_parser = MultipleConceptsParser()
|
||||
|
||||
|
||||
class PythonWithConceptsParser(BaseParser):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("PythonWithConcepts", 20)
|
||||
self.identifiers = None
|
||||
self.identifiers_key = None
|
||||
|
||||
@staticmethod
|
||||
def sanitize(identifier):
|
||||
res = ""
|
||||
for c in identifier:
|
||||
res += c if c.isalnum() else "0"
|
||||
return res
|
||||
|
||||
def parse(self, context, text):
|
||||
sheerka = context.sheerka
|
||||
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
|
||||
return None
|
||||
|
||||
if not text.parser == multiple_concepts_parser:
|
||||
return None
|
||||
|
||||
nodes = text.body
|
||||
source = ""
|
||||
to_parse = ""
|
||||
identifiers = {}
|
||||
identifiers_key = {}
|
||||
python_ids_mappings = {}
|
||||
|
||||
def _get_identifier(c):
|
||||
"""
|
||||
Get an identifier for a concept.
|
||||
Make sure to return the same identifier if the same concept
|
||||
Make sure to return a different identifier if same name but different concept
|
||||
|
||||
Internal function because I don't want identifiers, identifiers_key and python_ids_mappings
|
||||
to be instance variables
|
||||
I would like to keep this parser as stateless as possible
|
||||
:param c:
|
||||
:return:
|
||||
"""
|
||||
if id(c) in identifiers:
|
||||
return identifiers[id(c)]
|
||||
|
||||
identifier = "__C__" + self.sanitize(c.key or c.name)
|
||||
if c.id:
|
||||
identifier += "__" + c.id
|
||||
|
||||
if identifier in identifiers_key:
|
||||
identifiers_key[identifier] += 1
|
||||
identifier += f"_{identifiers_key[identifier]}"
|
||||
else:
|
||||
identifiers_key[identifier] = 0
|
||||
|
||||
identifier += "__C__"
|
||||
|
||||
identifiers[id(c)] = identifier
|
||||
return identifier
|
||||
|
||||
for node in nodes:
|
||||
if isinstance(node, ConceptNode):
|
||||
source += node.source
|
||||
if to_parse:
|
||||
to_parse += " "
|
||||
concept = node.concept
|
||||
python_id = _get_identifier(concept)
|
||||
to_parse += python_id
|
||||
python_ids_mappings[python_id] = concept
|
||||
else:
|
||||
source += node.source
|
||||
to_parse += node.source
|
||||
|
||||
with context.push(self, "Trying Python for '" + to_parse + "'") as sub_context:
|
||||
python_parser = PythonParser()
|
||||
result = python_parser.parse(sub_context, to_parse)
|
||||
|
||||
if result.status:
|
||||
python_node = result.body.body
|
||||
python_node.source = source
|
||||
python_node.concepts = python_ids_mappings
|
||||
|
||||
return sheerka.ret(
|
||||
self.name,
|
||||
True,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=source,
|
||||
body=result.body.body,
|
||||
try_parsed=None))
|
||||
|
||||
else:
|
||||
|
||||
return sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
result.body)
|
||||
@@ -0,0 +1,43 @@
|
||||
# How to serialize ?
|
||||
|
||||
## General rule
|
||||
- 1 byte : type of object code
|
||||
- int : version of the encoder
|
||||
- data : can be the json representation of the object
|
||||
|
||||
### Current supported types
|
||||
- E : events
|
||||
- J : Json object (with history management)
|
||||
- P : pickle (no history)
|
||||
- S : state (history, but not managed by the serializer )
|
||||
- C : concept (with history management)
|
||||
- D : concept definitions (no history management)
|
||||
- R : executionContext ('R' stands for Result or ReturnValue, no history management)
|
||||
|
||||
## How concepts are serialized ?
|
||||
- get the id of the concept
|
||||
- get the hash of the concept −> it will be its unique key
|
||||
structure of the serialisation:
|
||||
```json
|
||||
{
|
||||
"id" : "id",
|
||||
"parent": <hash code of the previous version of the concept> or "",
|
||||
"name": <name of the concept>,
|
||||
"where": "",
|
||||
"pre": "",
|
||||
"post": "",
|
||||
"body": "",
|
||||
"desc": "",
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
## Idea to manage ObjectSerializer
|
||||
Problem:
|
||||
During serialization, there is no issue. The match() method is the unique way to get the correct serialier.
|
||||
During the deserialisation, all Object serializer have type = '0' and version = 1.
|
||||
So how to choose the correct one ?
|
||||
A possible solution will be to add the type of the object to deserialize to the saved stream
|
||||
--> SHA256 for every object. Too much data saved.
|
||||
The id is to let to inc the version automatically in the Serialiser (during the registration) and to keep the mapping within sdp.state
|
||||
|
||||
@@ -0,0 +1,877 @@
|
||||
from datetime import datetime, date
|
||||
import hashlib
|
||||
import json
|
||||
import zlib
|
||||
|
||||
from sdp.sheerkaDataProviderIO import SheerkaDataProviderIO
|
||||
from sdp.sheerkaSerializer import Serializer, SerializerContext
|
||||
from core.sheerka_logger import get_logger
|
||||
|
||||
|
||||
def json_default_converter(o):
|
||||
"""
|
||||
Default formatter for json
|
||||
It's used when the json serializer does not know
|
||||
how to serialise a type
|
||||
:param o:
|
||||
:return:
|
||||
"""
|
||||
if isinstance(o, (date, datetime)):
|
||||
return o.isoformat()
|
||||
|
||||
|
||||
class Event(object):
|
||||
"""
|
||||
Class that represents something that modifies the state of the system
|
||||
"""
|
||||
|
||||
def __init__(self, message="", user="", date=datetime.now()):
|
||||
self.version = 1
|
||||
self.user = user
|
||||
self.date = date
|
||||
self.message = message
|
||||
self._digest = None
|
||||
|
||||
def get_digest(self):
|
||||
"""
|
||||
Returns the digest of the event
|
||||
:return: hexa form of the sha256
|
||||
"""
|
||||
|
||||
if self._digest:
|
||||
return self._digest
|
||||
|
||||
if self.message == "" and self.user == "":
|
||||
self._digest = "xxx" # to speed unit tests
|
||||
return self._digest
|
||||
|
||||
if not isinstance(self.message, str):
|
||||
raise NotImplementedError
|
||||
|
||||
self._digest = hashlib.sha256(f"Event:{self.user}{self.date}{self.message}".encode("utf-8")).hexdigest()
|
||||
return self._digest
|
||||
|
||||
def to_dict(self):
|
||||
return self.__dict__
|
||||
|
||||
def from_dict(self, as_dict):
|
||||
self.user = as_dict["user"]
|
||||
self.date = datetime.fromisoformat(as_dict["date"])
|
||||
self.message = as_dict["message"]
|
||||
|
||||
|
||||
class ObjToUpdate:
|
||||
"""
|
||||
Internal key value class to hold the key (and the value)
|
||||
when it is detected
|
||||
It's created to distinguish from {key, value}
|
||||
"""
|
||||
|
||||
def __init__(self, obj, key=None, digest=None):
|
||||
self.obj = obj
|
||||
self.has_key = None
|
||||
self.has_digest = None
|
||||
self._key = None
|
||||
self._digest = None
|
||||
if key is not None:
|
||||
self.set_key(key)
|
||||
if digest is not None:
|
||||
self.set_digest(digest)
|
||||
|
||||
def get_key(self):
|
||||
if self.has_key is None:
|
||||
key = SheerkaDataProvider.get_obj_key(self.obj)
|
||||
if key is None:
|
||||
self.has_key = False
|
||||
return None
|
||||
else:
|
||||
self.has_key = True
|
||||
self._key = key
|
||||
return key
|
||||
elif not self.has_key:
|
||||
return None
|
||||
else:
|
||||
return self._key
|
||||
|
||||
def get_digest(self):
|
||||
if self.has_digest is None:
|
||||
digest = SheerkaDataProvider.get_obj_digest(self.obj)
|
||||
if digest is None:
|
||||
self.has_digest = False
|
||||
return None
|
||||
else:
|
||||
self.has_digest = True
|
||||
self._digest = digest
|
||||
return digest
|
||||
elif not self.has_digest:
|
||||
return None
|
||||
else:
|
||||
return self._digest
|
||||
|
||||
def set_digest(self, digest):
|
||||
self.has_digest = True
|
||||
self._digest = digest
|
||||
|
||||
def set_key(self, key):
|
||||
self.has_key = True
|
||||
self._key = key
|
||||
|
||||
|
||||
class State:
|
||||
"""
|
||||
Class that represents the state of the system (dictionary of all known entries)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.version = 1
|
||||
self.date = None
|
||||
self.parents = []
|
||||
self.events = []
|
||||
self.data = {}
|
||||
|
||||
@staticmethod
|
||||
def check_duplicate(items, obj: ObjToUpdate, key):
|
||||
digest = obj.get_digest()
|
||||
if digest is None:
|
||||
return
|
||||
|
||||
if not hasattr(items, "__iter__"):
|
||||
items = [items]
|
||||
|
||||
for item in items:
|
||||
item_digest = SheerkaDataProvider.get_obj_digest(item)
|
||||
if item_digest == digest:
|
||||
raise SheerkaDataProviderDuplicateKeyError(key, obj.obj)
|
||||
|
||||
def update(self, entry, obj: ObjToUpdate, append=True):
|
||||
"""
|
||||
adds obj to entry
|
||||
:param entry:
|
||||
:param obj:
|
||||
:param append: if True, duplicate keys will create lists
|
||||
:return:
|
||||
"""
|
||||
obj_to_use = {obj.get_key(): obj.obj} if obj.has_key else obj.obj
|
||||
|
||||
if entry not in self.data:
|
||||
self.data[entry] = obj_to_use
|
||||
|
||||
elif not append:
|
||||
if isinstance(obj_to_use, dict):
|
||||
self.data[entry].update(obj_to_use)
|
||||
else:
|
||||
self.data[entry] = obj_to_use
|
||||
|
||||
elif isinstance(self.data[entry], list):
|
||||
self.check_duplicate(self.data[entry], obj, entry)
|
||||
self.data[entry].append(obj.obj)
|
||||
|
||||
elif isinstance(obj_to_use, dict):
|
||||
for k in obj_to_use:
|
||||
if k not in self.data[entry]:
|
||||
self.data[entry][k] = obj_to_use[k]
|
||||
elif isinstance(self.data[entry][k], list):
|
||||
self.check_duplicate(self.data[entry][k], obj, entry + "." + k)
|
||||
self.data[entry][k].append(obj_to_use[k])
|
||||
else:
|
||||
self.check_duplicate(self.data[entry][k], obj, entry + "." + k)
|
||||
self.data[entry][k] = [self.data[entry][k], obj_to_use[k]]
|
||||
|
||||
elif isinstance(self.data[entry], dict):
|
||||
raise SheerkaDataProviderError(f"Cannot found key on '{obj.obj}' while all other elements have.", obj.obj)
|
||||
|
||||
else:
|
||||
self.check_duplicate(self.data[entry], obj, entry)
|
||||
self.data[entry] = [self.data[entry], obj_to_use]
|
||||
|
||||
def modify(self, entry, key, obj, obj_key):
|
||||
# if the key changes, make sure to remove the previous entry
|
||||
append = False
|
||||
if obj_key != key:
|
||||
self.remove(entry, lambda k, o: k == key) # modify from on object to another
|
||||
append = True
|
||||
|
||||
self.update(entry, ObjToUpdate(obj, obj_key), append=append)
|
||||
|
||||
def modify_in_list(self, entry, key, obj, obj_key, obj_origin, load_ref_if_needed, save_ref_if_needed):
|
||||
found = False
|
||||
to_remove = None
|
||||
for i in range(len(self.data[entry][key])):
|
||||
item, is_ref = load_ref_if_needed(self.data[entry][key][i])
|
||||
if not hasattr(item, "get_digest"):
|
||||
continue
|
||||
if item.get_digest() == obj_origin:
|
||||
obj = save_ref_if_needed(is_ref, obj)
|
||||
if obj_key == key:
|
||||
self.data[entry][key][i] = obj
|
||||
else:
|
||||
to_remove = i
|
||||
self.update(entry, ObjToUpdate(obj, obj_key), append=True)
|
||||
found = True
|
||||
break
|
||||
|
||||
if not found:
|
||||
raise (SheerkaDataProviderError(f"Cannot modify '{entry}.{key}'. Item '{obj_origin}' not found.", obj))
|
||||
|
||||
if to_remove is not None:
|
||||
del self.data[entry][key][to_remove]
|
||||
|
||||
def remove(self, entry, filter):
|
||||
if filter is None:
|
||||
del (self.data[entry])
|
||||
|
||||
elif isinstance(self.data[entry], dict):
|
||||
keys_to_remove = []
|
||||
for key, element in self.data[entry].items():
|
||||
if filter(key, element):
|
||||
keys_to_remove.append(key)
|
||||
for key in keys_to_remove:
|
||||
del (self.data[entry][key])
|
||||
|
||||
elif not isinstance(self.data[entry], list):
|
||||
if filter(self.data[entry]):
|
||||
del (self.data[entry])
|
||||
|
||||
else:
|
||||
for element in self.data[entry]:
|
||||
if filter(element):
|
||||
self.data[entry].remove(element)
|
||||
|
||||
def get_digest(self):
|
||||
as_json = json.dumps(self.__dict__, default=json_default_converter)
|
||||
return hashlib.sha256(as_json.encode("utf-8")).hexdigest()
|
||||
|
||||
def contains(self, entry, key):
|
||||
"""
|
||||
if key is None, returns True if entry exists
|
||||
if key has a value
|
||||
returns True if entry is an dict and contains key
|
||||
:param entry:
|
||||
:param key:
|
||||
:return:
|
||||
"""
|
||||
if entry not in self.data:
|
||||
return False
|
||||
if key is None:
|
||||
return entry in self.data
|
||||
if not isinstance(self.data[entry], dict):
|
||||
return False
|
||||
return key in self.data[entry]
|
||||
|
||||
|
||||
class SheerkaDataProviderError(Exception):
|
||||
def __init__(self, message, obj):
|
||||
Exception.__init__(self, message)
|
||||
self.obj = obj
|
||||
|
||||
|
||||
class SheerkaDataProviderDuplicateKeyError(Exception):
|
||||
def __init__(self, key, obj):
|
||||
Exception.__init__(self, "Duplicate object.")
|
||||
self.key = key
|
||||
self.obj = obj
|
||||
|
||||
|
||||
class SheerkaDataProvider:
|
||||
"""Manages the state of the system"""
|
||||
|
||||
EventFolder = "events"
|
||||
StateFolder = "state"
|
||||
ObjectsFolder = "objects"
|
||||
CacheFolder = "cache"
|
||||
HeadFile = "HEAD"
|
||||
KeysFile = "keys"
|
||||
REF_PREFIX = "##REF##:"
|
||||
|
||||
def __init__(self, root=None):
|
||||
self.log = get_logger(__name__)
|
||||
self.init_log = get_logger("init." + __name__)
|
||||
self.init_log.debug("Initializing sdp.")
|
||||
|
||||
self.io = SheerkaDataProviderIO.get(root)
|
||||
self.first_time = self.io.first_time
|
||||
|
||||
self.serializer = Serializer()
|
||||
|
||||
@staticmethod
|
||||
def get_obj_key(obj):
|
||||
"""
|
||||
Tries to find the key of an object
|
||||
Look for .key, .get_key()
|
||||
:param obj:
|
||||
:return: String version of that is found, None otherwise
|
||||
"""
|
||||
return str(obj.key) if hasattr(obj, "key") \
|
||||
else str(obj.get_key()) if hasattr(obj, "get_key") \
|
||||
else None
|
||||
|
||||
@staticmethod
|
||||
def get_obj_digest(obj):
|
||||
"""
|
||||
Tries to find the key of an object
|
||||
Look for .digest, .get_digest()
|
||||
:param obj:
|
||||
:return: digest, None otherwise
|
||||
"""
|
||||
if isinstance(obj, str) and obj.startswith(SheerkaDataProvider.REF_PREFIX):
|
||||
return obj[len(SheerkaDataProvider.REF_PREFIX):]
|
||||
|
||||
return obj.digest if hasattr(obj, "digest") \
|
||||
else obj.get_digest() if hasattr(obj, "get_digest") \
|
||||
else None
|
||||
|
||||
@staticmethod
|
||||
def get_obj_origin(obj):
|
||||
"""
|
||||
Get the digest used to save obj if set
|
||||
"""
|
||||
if isinstance(obj, dict) and Serializer.ORIGIN in obj:
|
||||
return obj[Serializer.ORIGIN]
|
||||
|
||||
if hasattr(obj, Serializer.ORIGIN):
|
||||
return getattr(obj, Serializer.ORIGIN)
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def get_stream_digest(stream):
|
||||
sha256_hash = hashlib.sha256()
|
||||
for byte_block in iter(lambda: stream.read(4096), b""):
|
||||
sha256_hash.update(byte_block)
|
||||
|
||||
stream.seek(0)
|
||||
return sha256_hash.hexdigest()
|
||||
|
||||
@staticmethod
|
||||
def is_reference(obj):
|
||||
return isinstance(obj, str) and obj.startswith(SheerkaDataProvider.REF_PREFIX)
|
||||
|
||||
def add(self, event_digest: str, entry, obj, allow_multiple=True, use_ref=False, is_ref=False):
|
||||
"""
|
||||
Adds obj to the entry 'entry'
|
||||
:param event_digest: digest of the event that triggers the modification of the state
|
||||
:param entry: entry of the state to update
|
||||
:param obj: obj to insert or add
|
||||
:param allow_multiple: if set to true, the same key can be added several times.
|
||||
All entries will be put in a list
|
||||
:param use_ref: if True the actual object is saved under 'objects' folder,
|
||||
only a reference is saved in the state
|
||||
:return: (entry, key) to retrieve the object
|
||||
"""
|
||||
|
||||
if use_ref and is_ref:
|
||||
raise SheerkaDataProviderError("Cannot use use_ref and is_ref at the same time", None)
|
||||
|
||||
if is_ref and not isinstance(obj, dict):
|
||||
raise SheerkaDataProviderError("is_ref can only be used with dictionaries", obj)
|
||||
|
||||
snapshot = self.get_snapshot()
|
||||
state = self.load_state(snapshot)
|
||||
|
||||
self.log.debug(f"Adding obj '{obj}' in entry '{entry}' (allow_multiple={allow_multiple}, use_ref={use_ref})")
|
||||
|
||||
if not isinstance(obj, ObjToUpdate):
|
||||
obj = ObjToUpdate(obj)
|
||||
|
||||
# check uniqueness, cannot add the same key twice if allow_multiple == False
|
||||
key = obj.get_key()
|
||||
self.log.debug(f"key found : '{key}'") if key else self.log.debug("No key found")
|
||||
if not allow_multiple:
|
||||
if isinstance(obj.obj, dict):
|
||||
for k in obj.obj:
|
||||
if state.contains(entry, k):
|
||||
raise IndexError(f"{entry}.{k}")
|
||||
else:
|
||||
if state.contains(entry, key):
|
||||
raise IndexError(f"{entry}.{key}" if key else entry)
|
||||
|
||||
state.parents = [] if snapshot is None else [snapshot]
|
||||
state.events = [event_digest]
|
||||
state.date = datetime.now()
|
||||
|
||||
if use_ref:
|
||||
obj.set_digest(self.save_obj(obj.obj))
|
||||
obj.obj = self.REF_PREFIX + obj.get_digest()
|
||||
|
||||
if is_ref:
|
||||
for k, v in obj.obj.items():
|
||||
obj.obj[k] = self.REF_PREFIX + v
|
||||
|
||||
state.update(entry, obj)
|
||||
|
||||
new_snapshot = self.save_state(state)
|
||||
self.set_snapshot(new_snapshot)
|
||||
return entry, key
|
||||
|
||||
def add_with_auto_key(self, event_digest: str, entry, obj):
|
||||
"""
|
||||
Add obj to entry. An autogenerated key created for obj
|
||||
:param event_digest:
|
||||
:param entry:
|
||||
:param obj:
|
||||
:return:
|
||||
"""
|
||||
next_key = self.get_next_key(entry)
|
||||
if hasattr(obj, "set_key"):
|
||||
obj.set_key(next_key)
|
||||
self.add(event_digest, entry, ObjToUpdate(obj, next_key))
|
||||
return entry, next_key
|
||||
|
||||
def add_unique(self, event_digest: str, entry, obj):
|
||||
"""Add an entry and make sure it's unique"""
|
||||
snapshot = self.get_snapshot()
|
||||
state = self.load_state(snapshot)
|
||||
|
||||
state.parents = [] if snapshot is None else [snapshot]
|
||||
state.events = [event_digest]
|
||||
state.date = datetime.now()
|
||||
if entry not in state.data:
|
||||
state.data[entry] = {obj}
|
||||
already_exist = False
|
||||
else:
|
||||
already_exist = obj in state.data[entry]
|
||||
if not already_exist:
|
||||
state.data[entry].add(obj)
|
||||
|
||||
new_snapshot = self.save_state(state)
|
||||
self.set_snapshot(new_snapshot)
|
||||
return (None if already_exist else entry), None
|
||||
|
||||
def set(self, event_digest, entry, obj, use_ref=False, is_ref=False):
|
||||
"""
|
||||
Add or replace an entry. The entry is reinitialized.
|
||||
If the previous value was dict, all keys are lost
|
||||
:param event_digest:
|
||||
:param entry:
|
||||
:param obj:
|
||||
:param use_ref: Do not save obj in State (save it under objects), use_ref in State
|
||||
:param is_ref: obj is supposed to be a reference
|
||||
:return:
|
||||
"""
|
||||
|
||||
if use_ref and is_ref:
|
||||
raise SheerkaDataProviderError("Cannot use use_ref and is_ref at the same time", None)
|
||||
|
||||
if is_ref and not isinstance(obj, dict):
|
||||
raise SheerkaDataProviderError("is_ref can only be used with dictionaries", obj)
|
||||
|
||||
snapshot = self.get_snapshot()
|
||||
state = self.load_state(snapshot)
|
||||
|
||||
state.parents = [] if snapshot is None else [snapshot]
|
||||
state.events = [event_digest]
|
||||
state.date = datetime.now()
|
||||
|
||||
key = self.get_obj_key(obj)
|
||||
obj = self.save_ref_if_needed(use_ref, obj)
|
||||
|
||||
if is_ref:
|
||||
for k, v in obj.items():
|
||||
obj[k] = self.REF_PREFIX + v
|
||||
|
||||
state.data[entry] = obj if key is None else {key: obj}
|
||||
|
||||
new_snapshot = self.save_state(state)
|
||||
self.set_snapshot(new_snapshot)
|
||||
return entry, key
|
||||
|
||||
def modify(self, event_digest, entry, key, obj):
|
||||
"""
|
||||
Replace an element
|
||||
If the key is not provided, has the same effect than set eg, the entry is reset
|
||||
:param event_digest:
|
||||
:param entry:
|
||||
:param key: key of the object to update
|
||||
:param obj: new data
|
||||
:return:
|
||||
"""
|
||||
|
||||
if key is None:
|
||||
raise SheerkaDataProviderError("Key is mandatory.", None)
|
||||
|
||||
snapshot = self.get_snapshot()
|
||||
state = self.load_state(snapshot)
|
||||
|
||||
if entry not in state.data:
|
||||
raise IndexError(entry)
|
||||
|
||||
if key is not None and key not in state.data[entry]:
|
||||
raise IndexError(f"{entry}.{key}")
|
||||
|
||||
state.parents = [] if snapshot is None else [snapshot]
|
||||
state.events = [event_digest]
|
||||
state.date = datetime.now()
|
||||
|
||||
# Gets obj original key, it will help to know if the key has changed
|
||||
obj_key = self.get_obj_key(obj) or key
|
||||
|
||||
if isinstance(state.data[entry][key], list):
|
||||
obj_origin = self.get_obj_origin(obj)
|
||||
if obj_origin is None:
|
||||
raise (SheerkaDataProviderError(f"Multiple entries under '{entry}.{key}'", obj))
|
||||
|
||||
state.modify_in_list(entry, key, obj, obj_key, obj_origin, self.load_ref_if_needed, self.save_ref_if_needed)
|
||||
|
||||
else:
|
||||
obj = self.save_ref_if_needed(self.is_reference(state.data[entry][key]), obj)
|
||||
state.modify(entry, key, obj, obj_key)
|
||||
|
||||
new_snapshot = self.save_state(state)
|
||||
self.set_snapshot(new_snapshot)
|
||||
return entry, obj_key
|
||||
|
||||
def list(self, entry, filter=None):
|
||||
"""
|
||||
Lists elements of entry 'entry'
|
||||
:param entry: name of the entry to list
|
||||
:param filter: filter to use
|
||||
:return: list of elements
|
||||
"""
|
||||
snapshot = self.get_snapshot()
|
||||
state = self.load_state(snapshot)
|
||||
if entry not in state.data:
|
||||
return []
|
||||
|
||||
elements = state.data[entry]
|
||||
|
||||
if isinstance(elements, dict):
|
||||
# manage when elements have a key
|
||||
filter_to_use = (lambda k, o: True) if filter is None else filter
|
||||
for key, element in elements.items():
|
||||
if filter_to_use(key, element):
|
||||
if isinstance(element, list):
|
||||
yield [self.load_ref_if_needed(e)[0] for e in element]
|
||||
else:
|
||||
yield self.load_ref_if_needed(element)[0]
|
||||
else:
|
||||
# manage when no key is defined for the elements
|
||||
if not isinstance(elements, list) and not isinstance(elements, set):
|
||||
elements = [elements]
|
||||
|
||||
filter_to_use = (lambda o: True) if filter is None else filter
|
||||
for element in elements:
|
||||
if filter_to_use(element):
|
||||
yield self.load_ref_if_needed(element)[0]
|
||||
|
||||
def remove(self, event_digest, entry, filter=None):
|
||||
"""
|
||||
Removes elements under the entry 'entry'
|
||||
:param event_digest: event that triggers the deletion
|
||||
:param entry:
|
||||
:param filter: filter to use
|
||||
:return: new sha256 of the state
|
||||
TODO: Remove by key
|
||||
"""
|
||||
snapshot = self.get_snapshot()
|
||||
state = self.load_state(snapshot)
|
||||
|
||||
if entry not in state.data:
|
||||
raise IndexError(entry)
|
||||
|
||||
state.parents = [] if snapshot is None else [snapshot]
|
||||
state.events = [event_digest]
|
||||
state.date = datetime.now()
|
||||
state.remove(entry, filter)
|
||||
|
||||
new_snapshot = self.save_state(state)
|
||||
self.set_snapshot(new_snapshot)
|
||||
return new_snapshot
|
||||
|
||||
def get(self, entry, key=None, load_origin=True):
|
||||
"""
|
||||
Retrieve an element by its key
|
||||
:param entry:
|
||||
:param key:
|
||||
:return:
|
||||
"""
|
||||
snapshot = self.get_snapshot()
|
||||
state = self.load_state(snapshot)
|
||||
|
||||
if entry not in state.data:
|
||||
raise IndexError(entry)
|
||||
|
||||
if key is not None and key not in state.data[entry]:
|
||||
raise IndexError(f"{entry}.{key}")
|
||||
|
||||
item = state.data[entry] if key is None else state.data[entry][key]
|
||||
if isinstance(item, list):
|
||||
return [self.load_ref_if_needed(i, load_origin)[0] for i in item]
|
||||
|
||||
return self.load_ref_if_needed(item, load_origin)[0]
|
||||
|
||||
def get_safe(self, entry, key=None, load_origin=True):
|
||||
"""
|
||||
Retrieve an element by its key. Return None if the element does not exist
|
||||
:param entry:
|
||||
:param key:
|
||||
:return:
|
||||
"""
|
||||
snapshot = self.get_snapshot()
|
||||
state = self.load_state(snapshot)
|
||||
|
||||
if entry not in state.data:
|
||||
return None
|
||||
|
||||
if key is not None and key not in state.data[entry]:
|
||||
return None
|
||||
|
||||
item = state.data[entry] if key is None else state.data[entry][key]
|
||||
if isinstance(item, list):
|
||||
return [self.load_ref_if_needed(i, load_origin)[0] for i in item]
|
||||
|
||||
return self.load_ref_if_needed(item, load_origin)[0]
|
||||
|
||||
def exists(self, entry, key=None, digest=None):
|
||||
"""
|
||||
Returns true if the entry is defined
|
||||
:param key:
|
||||
:param entry:
|
||||
:param digest: digest of the object, when several entries share the same key
|
||||
:return:
|
||||
"""
|
||||
snapshot = self.get_snapshot()
|
||||
state = self.load_state(snapshot)
|
||||
exist = entry in state.data
|
||||
|
||||
if not exist or key is None:
|
||||
return exist
|
||||
|
||||
items = state.data[entry]
|
||||
exist = key in items
|
||||
if not exist or digest is None:
|
||||
return exist
|
||||
|
||||
items = items[key]
|
||||
if not isinstance(items, list):
|
||||
items = [items]
|
||||
|
||||
for item in items:
|
||||
item_digest = SheerkaDataProvider.get_obj_digest(item)
|
||||
if item_digest == digest:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def save_event(self, event: Event):
|
||||
"""
|
||||
return an event, given its digest
|
||||
:param event:
|
||||
:return: digest of the event
|
||||
"""
|
||||
digest = event.get_digest()
|
||||
target_path = self.io.get_obj_path(SheerkaDataProvider.EventFolder, digest)
|
||||
if self.io.exists(target_path):
|
||||
return digest
|
||||
|
||||
self.io.write_binary(target_path, self.serializer.serialize(event, None).read())
|
||||
|
||||
return digest
|
||||
|
||||
def load_event(self, digest):
|
||||
"""
|
||||
return an event, given its digest
|
||||
:param digest:
|
||||
:return:
|
||||
"""
|
||||
target_path = self.io.get_obj_path(SheerkaDataProvider.EventFolder, digest)
|
||||
|
||||
with self.io.open(target_path, "rb") as f:
|
||||
return self.serializer.deserialize(f, None)
|
||||
|
||||
def save_result(self, execution_context):
|
||||
"""
|
||||
Save the execution context associated with an event
|
||||
To make a long story short,
|
||||
for every single user input, there is an event (which is the first thing that is created)
|
||||
and a result (the ExecutionContext created by sheerka.evaluate_user_input()
|
||||
:param execution_context:
|
||||
:return:
|
||||
"""
|
||||
digest = execution_context.event.get_digest()
|
||||
self.log.debug(f"Saving execution context. digest={digest}")
|
||||
target_path = self.io.get_obj_path(SheerkaDataProvider.EventFolder, digest) + "_result"
|
||||
if self.io.exists(target_path):
|
||||
return digest
|
||||
|
||||
self.io.write_binary(target_path, self.serializer.serialize(execution_context, None).read())
|
||||
return digest
|
||||
|
||||
def load_result(self, digest):
|
||||
target_path = self.io.get_obj_path(SheerkaDataProvider.EventFolder, digest) + "_result"
|
||||
|
||||
with self.io.open(target_path, "rb") as f:
|
||||
return self.serializer.deserialize(f, None)
|
||||
|
||||
def save_state(self, state: State):
|
||||
digest = state.get_digest()
|
||||
self.log.debug(f"Saving new state. digest={digest}")
|
||||
target_path = self.io.get_obj_path(SheerkaDataProvider.StateFolder, digest)
|
||||
if self.io.exists(target_path):
|
||||
return digest
|
||||
|
||||
self.io.write_binary(target_path, self.serializer.serialize(state, None).read())
|
||||
return digest
|
||||
|
||||
def load_state(self, digest):
|
||||
if digest is None:
|
||||
return State()
|
||||
|
||||
target_path = self.io.get_obj_path(SheerkaDataProvider.StateFolder, digest)
|
||||
with self.io.open(target_path, "rb") as f:
|
||||
return self.serializer.deserialize(f, None)
|
||||
|
||||
def save_obj(self, obj):
|
||||
self.log.debug(f"Saving '{obj}' as reference...")
|
||||
stream = self.serializer.serialize(obj, SerializerContext(user_name="kodjo"))
|
||||
digest = obj.get_digest() if hasattr(obj, "get_digest") else self.get_stream_digest(stream)
|
||||
|
||||
target_path = self.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, digest)
|
||||
if self.io.exists(target_path):
|
||||
self.log.debug(f"...already saved. digest is {digest}")
|
||||
return digest
|
||||
|
||||
self.io.write_binary(target_path, stream.read())
|
||||
|
||||
self.log.debug(f"...digest={digest}.")
|
||||
return digest
|
||||
|
||||
def load_obj(self, digest, add_origin=True):
|
||||
if digest is None:
|
||||
return None
|
||||
|
||||
target_path = self.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, digest)
|
||||
if not self.io.exists(target_path):
|
||||
return None
|
||||
|
||||
with self.io.open(target_path, "rb") as f:
|
||||
obj = self.serializer.deserialize(f, SerializerContext(origin=digest))
|
||||
|
||||
# set the origin of the object
|
||||
if add_origin:
|
||||
if isinstance(obj, dict):
|
||||
obj[Serializer.ORIGIN] = digest
|
||||
elif not isinstance(obj, str):
|
||||
setattr(obj, Serializer.ORIGIN, digest)
|
||||
return obj
|
||||
|
||||
def load_ref_if_needed(self, obj, load_origin=True):
|
||||
if not isinstance(obj, str):
|
||||
return obj, False
|
||||
if not obj.startswith(SheerkaDataProvider.REF_PREFIX):
|
||||
return obj, False
|
||||
|
||||
resolved = self.load_obj(obj[len(SheerkaDataProvider.REF_PREFIX):], load_origin)
|
||||
if resolved is None:
|
||||
return obj, False
|
||||
|
||||
return resolved, True
|
||||
|
||||
def save_ref_if_needed(self, save_ref, obj):
|
||||
if not save_ref:
|
||||
return obj
|
||||
|
||||
digest = self.save_obj(obj)
|
||||
return self.REF_PREFIX + digest
|
||||
|
||||
def get_cache_params(self, category, key):
|
||||
digest = hashlib.sha3_256(f"{category}:{key}".encode("utf-8")).hexdigest()
|
||||
cache_path = self.io.get_obj_path(SheerkaDataProvider.CacheFolder, digest)
|
||||
return digest, cache_path
|
||||
|
||||
def add_to_cache(self, category, key, obj, update=False):
|
||||
"""
|
||||
Save obj in the internal cache system
|
||||
:param category:
|
||||
:param key:
|
||||
:param obj:
|
||||
:param update:
|
||||
:return:
|
||||
"""
|
||||
digest, cache_path = self.get_cache_params(category, key)
|
||||
|
||||
if self.io.exists(cache_path) and not update:
|
||||
return digest
|
||||
|
||||
self.io.write_binary(cache_path, zlib.compress(obj.encode("utf-8"), 9))
|
||||
return digest
|
||||
|
||||
def load_from_cache(self, category, key):
|
||||
"""
|
||||
Reload a compress object from the cache
|
||||
:param category:
|
||||
:param key:
|
||||
:return:
|
||||
"""
|
||||
digest, cache_path = self.get_cache_params(category, key)
|
||||
|
||||
if not self.io.exists(cache_path):
|
||||
raise IndexError(f"{category}.{key}")
|
||||
|
||||
with self.io.open(cache_path, "rb") as f:
|
||||
return zlib.decompress(f.read()).decode("utf-8")
|
||||
|
||||
def remove_from_cache(self, category, key):
|
||||
"""
|
||||
|
||||
:param category:
|
||||
:param key:
|
||||
:return:
|
||||
"""
|
||||
digest, cache_path = self.get_cache_params(category, key)
|
||||
if self.io.exists(cache_path):
|
||||
self.io.remove(cache_path)
|
||||
|
||||
return digest
|
||||
|
||||
def in_cache(self, category, key):
|
||||
"""
|
||||
Returns true if the key is in cache
|
||||
:param category:
|
||||
:param key:
|
||||
:return:
|
||||
"""
|
||||
digest, cache_path = self.get_cache_params(category, key)
|
||||
return self.io.exists(cache_path)
|
||||
|
||||
def get_snapshot(self):
|
||||
head_file = self.io.path_join(SheerkaDataProvider.HeadFile)
|
||||
if not self.io.exists(head_file):
|
||||
return None
|
||||
return self.io.read_text(head_file)
|
||||
# with open(head_file, "r") as f:
|
||||
# return f.read()
|
||||
|
||||
def set_snapshot(self, digest):
|
||||
head_file = self.io.path_join(SheerkaDataProvider.HeadFile)
|
||||
return self.io.write_text(head_file, digest)
|
||||
# with open(head_file, "w") as f:
|
||||
# return f.write(digest)
|
||||
|
||||
def load_keys(self):
|
||||
keys_file = self.io.path_join(SheerkaDataProvider.KeysFile)
|
||||
if not self.io.exists(keys_file):
|
||||
keys = {}
|
||||
else:
|
||||
with self.io.open(keys_file, "r") as f:
|
||||
keys = json.load(f)
|
||||
return keys
|
||||
|
||||
def save_keys(self, keys):
|
||||
keys_file = self.io.path_join(SheerkaDataProvider.KeysFile)
|
||||
with self.io.open(keys_file, "w") as f:
|
||||
json.dump(keys, f)
|
||||
|
||||
def get_next_key(self, entry):
|
||||
keys = self.load_keys()
|
||||
|
||||
next_key = keys.get(entry, 0) + 1
|
||||
keys[entry] = next_key
|
||||
|
||||
self.save_keys(keys)
|
||||
return str(next_key)
|
||||
|
||||
def set_key(self, entry, value):
|
||||
keys = self.load_keys()
|
||||
keys[entry] = value
|
||||
self.save_keys(keys)
|
||||
return str(value)
|
||||
@@ -0,0 +1,192 @@
|
||||
import io
|
||||
from os import path
|
||||
import os
|
||||
from fs.memoryfs import MemoryFS
|
||||
from core.sheerka_logger import get_logger
|
||||
|
||||
|
||||
class SheerkaDataProviderIO:
|
||||
|
||||
def __init__(self, root):
|
||||
self.root = root
|
||||
self.log = get_logger(__name__)
|
||||
self.init_log = get_logger("init." + __name__)
|
||||
|
||||
def exists(self, file_path):
|
||||
pass
|
||||
|
||||
def open(self, file_path, mode):
|
||||
pass
|
||||
|
||||
def read_text(self, file_path):
|
||||
pass
|
||||
|
||||
def read_binary(self, file_path):
|
||||
pass
|
||||
|
||||
def write_text(self, file_path, content):
|
||||
pass
|
||||
|
||||
def write_binary(self, file_path, content):
|
||||
pass
|
||||
|
||||
def remove(self, file_path):
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def get(root):
|
||||
if root == "mem://":
|
||||
return SheerkaDataProviderDictionaryIO()
|
||||
else:
|
||||
return SheerkaDataProviderFileIO(root)
|
||||
|
||||
def get_obj_path(self, object_type, digest):
|
||||
return path.join(self.root, object_type, digest[:24], digest)
|
||||
|
||||
def path_join(self, *paths):
|
||||
return path.join(self.root, *paths)
|
||||
|
||||
|
||||
class SheerkaDataProviderFileIO(SheerkaDataProviderIO):
|
||||
|
||||
def __init__(self, root):
|
||||
root = path.abspath(path.join(path.expanduser("~"), ".sheerka")) \
|
||||
if root is None \
|
||||
else path.abspath(root)
|
||||
super().__init__(root)
|
||||
|
||||
self.init_log.debug("root is set to '" + self.root + "'")
|
||||
|
||||
if not path.exists(self.root):
|
||||
self.init_log.debug("root folder not found. Creating it.")
|
||||
os.makedirs(self.root)
|
||||
self.first_time = True
|
||||
else:
|
||||
self.first_time = False
|
||||
|
||||
def open(self, file_path, mode):
|
||||
return open(file_path, mode)
|
||||
|
||||
def read_text(self, file_path):
|
||||
with open(file_path) as f:
|
||||
return f.read()
|
||||
|
||||
def read_binary(self, file_path):
|
||||
with open(file_path, "rb") as f:
|
||||
return f.read()
|
||||
|
||||
def write_text(self, file_path, content):
|
||||
self._write(file_path, content, "w")
|
||||
|
||||
def write_binary(self, file_path, content):
|
||||
self._write(file_path, content, "wb")
|
||||
|
||||
def exists(self, file_path):
|
||||
return path.exists(file_path)
|
||||
|
||||
def remove(self, file_path):
|
||||
os.remove(file_path)
|
||||
|
||||
@staticmethod
|
||||
def _write(file_path, content, mode):
|
||||
if not path.exists(path.dirname(file_path)):
|
||||
os.makedirs(path.dirname(file_path))
|
||||
|
||||
with open(file_path, mode) as f:
|
||||
f.write(content)
|
||||
|
||||
|
||||
class SheerkaDataProviderMemoryIO(SheerkaDataProviderIO):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__("")
|
||||
|
||||
self.mem_fs = MemoryFS()
|
||||
self.init_log.debug("Initializing memory file.")
|
||||
self.first_time = True
|
||||
|
||||
def open(self, file_path, mode):
|
||||
return self.mem_fs.open(file_path, mode)
|
||||
|
||||
def exists(self, file_path):
|
||||
return self.mem_fs.exists(file_path)
|
||||
|
||||
def read_text(self, file_path):
|
||||
return self.mem_fs.readtext(file_path)
|
||||
|
||||
def read_binary(self, file_path):
|
||||
return self.mem_fs.readbytes(file_path)
|
||||
|
||||
def write_binary(self, file_path, content):
|
||||
self._ensure_parent_folder(file_path)
|
||||
self.mem_fs.writebytes(file_path, content)
|
||||
|
||||
def write_text(self, file_path, content):
|
||||
self._ensure_parent_folder(file_path)
|
||||
self.mem_fs.writetext(file_path, content)
|
||||
|
||||
def remove(self, file_path):
|
||||
self.mem_fs.remove(file_path)
|
||||
|
||||
def _ensure_parent_folder(self, file_path):
|
||||
if not self.mem_fs.exists(path.dirname(file_path)):
|
||||
self.mem_fs.makedirs(path.dirname(file_path))
|
||||
|
||||
|
||||
class SheerkaDataProviderDictionaryIO(SheerkaDataProviderIO):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__("")
|
||||
self.cache = {}
|
||||
self.init_log.debug("Initializing dictionary file.")
|
||||
self.first_time = True
|
||||
|
||||
def exists(self, file_path):
|
||||
if file_path == "":
|
||||
return True
|
||||
|
||||
return file_path in self.cache
|
||||
|
||||
def read_text(self, file_path):
|
||||
return self.cache[file_path]
|
||||
|
||||
def read_binary(self, file_path):
|
||||
return self.cache[file_path]
|
||||
|
||||
def write_binary(self, file_path, content):
|
||||
self.cache[file_path] = content
|
||||
|
||||
def write_text(self, file_path, content):
|
||||
self.cache[file_path] = content
|
||||
|
||||
def remove(self, file_path):
|
||||
del (self.cache[file_path])
|
||||
|
||||
def open(self, file_path, mode):
|
||||
if "w" in mode:
|
||||
stream = io.BytesIO() if "b" in mode else io.StringIO()
|
||||
stream.close = on_close(self, file_path, stream)(stream.close)
|
||||
return stream
|
||||
|
||||
return io.BytesIO(self.cache[file_path]) if "b" in mode else io.StringIO(self.cache[file_path])
|
||||
|
||||
|
||||
def on_close(dictionary_io, file_path, stream):
|
||||
"""
|
||||
Decorator to intercept the close.
|
||||
I guess that there are solution that are more elegant
|
||||
:param dictionary_io:
|
||||
:param file_path:
|
||||
:param stream:
|
||||
:return:
|
||||
"""
|
||||
|
||||
def decorator(func):
|
||||
def wrapper(*args, **kwargs):
|
||||
stream.seek(0)
|
||||
dictionary_io.cache[file_path] = stream.read()
|
||||
func(*args, **kwargs)
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
@@ -0,0 +1,273 @@
|
||||
import dataclasses
|
||||
import json
|
||||
import pickle
|
||||
import datetime
|
||||
import struct
|
||||
import io
|
||||
from dataclasses import dataclass
|
||||
from core.sheerka_logger import get_logger
|
||||
from enum import Enum
|
||||
|
||||
import core.utils
|
||||
|
||||
from core.concept import Concept
|
||||
from core.tokenizer import Token
|
||||
from parsers.BaseParser import Node
|
||||
|
||||
|
||||
def json_default_converter(o):
|
||||
"""
|
||||
Default formatter for json
|
||||
It's used when the json serializer does not know
|
||||
how to serialise a type
|
||||
:param o:
|
||||
:return:
|
||||
"""
|
||||
if isinstance(o, (datetime.date, datetime.datetime)):
|
||||
return o.isoformat()
|
||||
|
||||
if isinstance(o, Enum):
|
||||
return o.name
|
||||
|
||||
raise Exception("Cannot serialize " + o.__class__.__name__)
|
||||
# with open("json_encoding_error.txt", "a") as f:
|
||||
# f.write(o.__class__.__name__ + "\n")
|
||||
|
||||
|
||||
|
||||
|
||||
@dataclass()
|
||||
class SerializerContext:
|
||||
user_name: str = None
|
||||
origin: str = None
|
||||
|
||||
|
||||
class Serializer:
|
||||
HEADER_FORMAT = "cH"
|
||||
USERNAME = "user_name" # key to store user that as committed the snapshot
|
||||
MODIFICATION_DATE = "modification_date" #
|
||||
PARENTS = "parents"
|
||||
ORIGIN = "##origin##"
|
||||
HISTORY = "##history##"
|
||||
|
||||
def __init__(self):
|
||||
self.log = get_logger(__name__)
|
||||
self.init_log = get_logger("init." + __name__)
|
||||
self.init_log.debug("Initializing serializers")
|
||||
self._cache = []
|
||||
|
||||
# add builtin serializers
|
||||
self.register(EventSerializer())
|
||||
self.register(StateSerializer())
|
||||
self.register(ConceptSerializer())
|
||||
self.register(DictionarySerializer())
|
||||
self.register(ExecutionContextSerializer())
|
||||
|
||||
def register(self, serializer):
|
||||
"""
|
||||
Register the list of all know serializers
|
||||
:param serializer:
|
||||
:return:
|
||||
"""
|
||||
self.init_log.debug(f"Adding serializer {serializer}")
|
||||
self._cache.append(serializer)
|
||||
|
||||
def serialize(self, obj, context):
|
||||
"""
|
||||
Get the stream representation of an object
|
||||
:param context:
|
||||
:param obj:
|
||||
:return:
|
||||
"""
|
||||
serializers = [s for s in self._cache if s.matches(obj)]
|
||||
|
||||
if not serializers:
|
||||
raise TypeError(f"Don't know how to serialize {type(obj)}")
|
||||
|
||||
serializer = serializers[0]
|
||||
|
||||
stream = io.BytesIO()
|
||||
header = struct.pack(Serializer.HEADER_FORMAT, bytes(serializer.name, "utf-8"), serializer.version)
|
||||
stream.write(header)
|
||||
|
||||
return serializer.dump(stream, obj, context)
|
||||
|
||||
def deserialize(self, stream, context):
|
||||
"""
|
||||
Loads an object from its stream representation
|
||||
:param context:
|
||||
:param stream:
|
||||
:return:
|
||||
"""
|
||||
header = struct.unpack(Serializer.HEADER_FORMAT, stream.read(4))
|
||||
serializers = [s for s in self._cache if s.name == header[0].decode("utf-8") and s.version == header[1]]
|
||||
|
||||
if not serializers:
|
||||
raise TypeError(f"Don't know how serializer name={header[0]}, version={header[1]}")
|
||||
|
||||
serializer = serializers[0]
|
||||
return serializer.load(stream, context)
|
||||
|
||||
|
||||
class BaseSerializer:
|
||||
|
||||
def __init__(self, name, version):
|
||||
"""
|
||||
Create a serializer, given a name and a version
|
||||
:param name:
|
||||
:param version:
|
||||
:return:
|
||||
"""
|
||||
self.name = name
|
||||
self.version = version
|
||||
|
||||
def matches(self, obj):
|
||||
"""
|
||||
Returns true if self can serialize obj
|
||||
:param obj:
|
||||
:return:
|
||||
"""
|
||||
pass
|
||||
|
||||
def dump(self, stream, obj, context):
|
||||
"""
|
||||
Returns the byte representation of how the object should be serialized
|
||||
:param stream: to write to
|
||||
:param obj: obj to serialize
|
||||
:param context: additional info needed to dump
|
||||
:return: stream of bytes
|
||||
"""
|
||||
pass
|
||||
|
||||
def load(self, stream, context):
|
||||
"""
|
||||
From a stream of bytes, create the object
|
||||
:param stream:
|
||||
:param context: additional info needed to load
|
||||
:return: object
|
||||
"""
|
||||
pass
|
||||
|
||||
def __repr__(self):
|
||||
return self.__class__.__name__ + ' (' + self.name + ", version=" + str(self.version) + ")"
|
||||
|
||||
|
||||
class EventSerializer(BaseSerializer):
|
||||
def __init__(self):
|
||||
BaseSerializer.__init__(self, "E", 1)
|
||||
|
||||
def matches(self, obj):
|
||||
return core.utils.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.Event"
|
||||
|
||||
def dump(self, stream, obj, context):
|
||||
stream.write(json.dumps(obj.to_dict(), default=json_default_converter).encode("utf-8"))
|
||||
stream.seek(0)
|
||||
return stream
|
||||
|
||||
def load(self, stream, context):
|
||||
json_stream = stream.read().decode("utf-8")
|
||||
as_dict = json.loads(json_stream)
|
||||
event = core.utils.get_class("sdp.sheerkaDataProvider.Event")()
|
||||
event.from_dict(as_dict)
|
||||
return event
|
||||
|
||||
|
||||
class JsonSerializer(BaseSerializer):
|
||||
|
||||
def __init__(self, fully_qualified_name, name="J", version=1):
|
||||
BaseSerializer.__init__(self, name, version)
|
||||
self.fully_qualified_name = fully_qualified_name
|
||||
|
||||
def matches(self, obj):
|
||||
return core.utils.get_full_qualified_name(obj) == self.fully_qualified_name
|
||||
|
||||
def dump(self, stream, obj, context):
|
||||
as_json = obj.to_dict()
|
||||
as_json.update({
|
||||
Serializer.HISTORY: {
|
||||
Serializer.USERNAME: context.user_name,
|
||||
Serializer.MODIFICATION_DATE: datetime.datetime.now().isoformat(),
|
||||
Serializer.PARENTS: [getattr(obj, Serializer.ORIGIN)] if hasattr(obj, Serializer.ORIGIN) else []
|
||||
}})
|
||||
stream.write(json.dumps(as_json, default=json_default_converter).encode("utf-8"))
|
||||
stream.seek(0)
|
||||
return stream
|
||||
|
||||
def load(self, stream, context):
|
||||
json_stream = stream.read().decode("utf-8")
|
||||
json_message = json.loads(json_stream)
|
||||
obj = core.utils.get_class(self.fully_qualified_name)()
|
||||
obj.from_dict(json_message)
|
||||
setattr(obj, Serializer.HISTORY, json_message[Serializer.HISTORY])
|
||||
|
||||
return obj
|
||||
|
||||
|
||||
class PickleSerializer(BaseSerializer):
|
||||
|
||||
def __init__(self, predicate, name="P", version=1):
|
||||
BaseSerializer.__init__(self, name, version)
|
||||
self.predicate = predicate
|
||||
|
||||
def matches(self, obj):
|
||||
return self.predicate(obj)
|
||||
|
||||
def dump(self, stream, obj, context):
|
||||
stream.write(pickle.dumps(obj))
|
||||
stream.seek(0)
|
||||
return stream
|
||||
|
||||
def load(self, stream, context):
|
||||
return pickle.loads(stream.read())
|
||||
|
||||
|
||||
class StateSerializer(PickleSerializer):
|
||||
def __init__(self, ):
|
||||
PickleSerializer.__init__(
|
||||
self,
|
||||
lambda obj: core.utils.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.State",
|
||||
"S",
|
||||
1)
|
||||
|
||||
|
||||
class ConceptSerializer(JsonSerializer):
|
||||
def __init__(self):
|
||||
JsonSerializer.__init__(self, "core.concept.Concept", "C", 1)
|
||||
|
||||
def matches(self, obj):
|
||||
return isinstance(obj, Concept)
|
||||
|
||||
|
||||
class DictionarySerializer(PickleSerializer):
|
||||
def __init__(self, ):
|
||||
PickleSerializer.__init__(
|
||||
self,
|
||||
lambda obj: isinstance(obj, dict),
|
||||
"D",
|
||||
1)
|
||||
|
||||
|
||||
class ExecutionContextSerializer(BaseSerializer):
|
||||
def __init__(self):
|
||||
BaseSerializer.__init__(self, "R", 1)
|
||||
|
||||
def matches(self, obj):
|
||||
return core.utils.get_full_qualified_name(obj) == "core.sheerka.ExecutionContext.ExecutionContext"
|
||||
|
||||
def dump(self, stream, obj, context):
|
||||
as_json = obj.to_dict()
|
||||
stream.write(json.dumps(as_json, default=json_default_converter).encode("utf-8"))
|
||||
stream.seek(0)
|
||||
return stream
|
||||
|
||||
def load(self, stream, context):
|
||||
json_stream = stream.read().decode("utf-8")
|
||||
json_message = json.loads(json_stream)
|
||||
obj = core.utils.get_class("core.sheerka.ExecutionContext")()
|
||||
obj.from_dict(json_message)
|
||||
return obj
|
||||
|
||||
#
|
||||
# class SheerkaSerializer(ObjectSerializer):
|
||||
# def __init__(self):
|
||||
# ObjectSerializer.__init__(self, "core.sheerka.Sheerka", "C", 1)
|
||||
Reference in New Issue
Block a user