Added bnf when adding a new concept + Started logging filtering
This commit is contained in:
@@ -190,3 +190,4 @@ def _extract_predicates(sheerka, node, variables_to_include, variables_to_exclud
|
||||
predicates.append(res)
|
||||
|
||||
return predicates
|
||||
|
||||
|
||||
+8
-8
@@ -3,6 +3,7 @@ from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
import logging
|
||||
|
||||
import core.utils
|
||||
from core.tokenizer import Tokenizer, TokenKind
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
@@ -18,8 +19,7 @@ VARIABLE_PREFIX = "__var__"
|
||||
|
||||
class ConceptParts(Enum):
|
||||
"""
|
||||
Helper class, Note quite sure that is it that useful
|
||||
I guess, I was learning nums with Python...
|
||||
Lists metadata that can contains some code
|
||||
"""
|
||||
WHERE = "where"
|
||||
PRE = "pre"
|
||||
@@ -85,6 +85,7 @@ class Concept:
|
||||
self.metadata = metadata
|
||||
self.props = {} # list of Property for this concept
|
||||
self.cached_asts = {} # cached ast for the where, pre, post and body parts
|
||||
self.bnf = None
|
||||
|
||||
def __repr__(self):
|
||||
return f"({self.metadata.id}){self.metadata.name}"
|
||||
@@ -134,9 +135,9 @@ class Concept:
|
||||
return self
|
||||
|
||||
if tokens is None:
|
||||
tokens = iter(Tokenizer(self.metadata.name))
|
||||
tokens = list(Tokenizer(self.metadata.name))
|
||||
|
||||
variables = list(self.props.keys())
|
||||
variables = list(self.props.keys()) if len(core.utils.strip_tokens(tokens, True)) > 1 else []
|
||||
|
||||
key = ""
|
||||
first = True
|
||||
@@ -171,12 +172,11 @@ class Concept:
|
||||
:param codes:
|
||||
:return:
|
||||
"""
|
||||
possibles_codes = ConceptParts.get_parts()
|
||||
if codes is None:
|
||||
return
|
||||
|
||||
for key in codes:
|
||||
if key in possibles_codes:
|
||||
self.cached_asts[ConceptParts(key)] = codes[key]
|
||||
self.cached_asts[key] = codes[key]
|
||||
|
||||
return self
|
||||
|
||||
@@ -231,7 +231,7 @@ class Concept:
|
||||
return self
|
||||
|
||||
def set_prop(self, prop_name: str, prop_value=None):
|
||||
self.props[prop_name] = Property(prop_name, prop_value)
|
||||
self.props[prop_name] = Property(prop_name, prop_value) # Python 3.x order is kept in dictionaries
|
||||
return self
|
||||
|
||||
def set_prop_by_index(self, index: int, prop_value):
|
||||
|
||||
+85
-24
@@ -1,4 +1,6 @@
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from functools import lru_cache
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts, ErrorConcept, ReturnValueConcept
|
||||
from core.concept import Concept, ConceptParts, PROPERTIES_FOR_DIGEST
|
||||
from evaluators.BaseEvaluator import OneReturnValueEvaluator
|
||||
@@ -10,8 +12,10 @@ import core.builtin_helpers
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
init_log = logging.getLogger(__name__ + ".init")
|
||||
|
||||
concept_evaluation_steps = [BuiltinConcepts.EVALUATION, BuiltinConcepts.AFTER_EVALUATION]
|
||||
CONCEPT_LEXER_PARSER_CLASS = "parsers.ConceptLexerParser.ConceptLexerParser"
|
||||
|
||||
|
||||
class Sheerka(Concept):
|
||||
@@ -19,22 +23,29 @@ class Sheerka(Concept):
|
||||
Main controller for the project
|
||||
"""
|
||||
|
||||
CONCEPTS_ENTRY = "All_Concepts"
|
||||
BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts"
|
||||
USER_CONCEPTS_KEYS = "User_Concepts"
|
||||
CONCEPTS_ENTRY = "All_Concepts" # to store all the concepts
|
||||
CONCEPTS_DEFINITIONS_ENTRY = "Concepts_Definitions" # to store definitions (bnf) of concepts
|
||||
BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts" # sequential key for builtin concepts
|
||||
USER_CONCEPTS_KEYS = "User_Concepts" # sequential key for user defined concepts
|
||||
|
||||
def __init__(self, debug=False, skip_builtins_in_db=False):
|
||||
def __init__(self, debug=False, skip_builtins_in_db=False, loggers=None):
|
||||
log.debug("Starting Sheerka.")
|
||||
super().__init__(BuiltinConcepts.SHEERKA, True, True, BuiltinConcepts.SHEERKA)
|
||||
|
||||
# cache of the most used concepts
|
||||
# Note that these are only templates
|
||||
# They are used as a footprint for instantiation
|
||||
# Except of source when the concept is supposed to be unique
|
||||
self.concepts_cache = {}
|
||||
|
||||
# cache for builtin types.
|
||||
# It allow instantiation of a builtin clas
|
||||
self.builtin_cache = {}
|
||||
#
|
||||
# Cache for all concepts BNF
|
||||
self.concepts_definitions = {}
|
||||
|
||||
#
|
||||
# cache for concepts grammars
|
||||
# a grammar can be seen as a resolved BNF
|
||||
self.concepts_grammars = {}
|
||||
|
||||
# a concept can be instantiated
|
||||
# ex: File is a concept, but File('foo.txt') is an instance
|
||||
@@ -45,14 +56,16 @@ class Sheerka(Concept):
|
||||
# ex: hello => say('hello')
|
||||
self.rules = []
|
||||
|
||||
self.sdp = None
|
||||
self.parsers = []
|
||||
self.evaluators = []
|
||||
self.sdp: SheerkaDataProvider = None # SheerkaDataProvider
|
||||
self.builtin_cache = {} # cache for builtin concepts
|
||||
self.parsers = {} # cache for builtin parsers
|
||||
self.evaluators = [] # cache for builtin evaluators
|
||||
|
||||
self.evaluators_prefix = None
|
||||
self.parsers_prefix = None
|
||||
self.evaluators_prefix: str = None
|
||||
self.parsers_prefix: str = None
|
||||
|
||||
self.debug = debug
|
||||
self.loggers = loggers or []
|
||||
self.skip_builtins_in_db = skip_builtins_in_db
|
||||
|
||||
def initialize(self, root_folder: str = None):
|
||||
@@ -85,7 +98,7 @@ class Sheerka(Concept):
|
||||
Initializes the builtin concepts
|
||||
:return: None
|
||||
"""
|
||||
log.debug("Initializing builtin concepts")
|
||||
init_log.debug("Initializing builtin concepts")
|
||||
builtins_classes = self.get_builtins_classes_as_dict()
|
||||
|
||||
# this all initialization of the builtins seems to be little bit complicated
|
||||
@@ -101,11 +114,11 @@ class Sheerka(Concept):
|
||||
if not self.skip_builtins_in_db:
|
||||
from_db = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.metadata.key)
|
||||
if from_db is None:
|
||||
log.debug(f"'{concept.name}' concept is not found in db. Adding.")
|
||||
init_log.debug(f"'{concept.name}' concept is not found in db. Adding.")
|
||||
self.set_id_if_needed(concept, True)
|
||||
self.sdp.add("init", self.CONCEPTS_ENTRY, concept, use_ref=True)
|
||||
else:
|
||||
log.debug(f"Found concept '{from_db}' in db. Updating.")
|
||||
init_log.debug(f"Found concept '{from_db}' in db. Updating.")
|
||||
concept.update_from(from_db)
|
||||
|
||||
self.add_in_cache(concept)
|
||||
@@ -120,8 +133,8 @@ class Sheerka(Concept):
|
||||
if parser.__module__ == base_class.__module__:
|
||||
continue
|
||||
|
||||
log.debug(f"Adding builtin parser '{parser.__name__}'")
|
||||
self.parsers.append(parser)
|
||||
init_log.debug(f"Adding builtin parser '{parser.__name__}'")
|
||||
self.parsers[core.utils.get_full_qualified_name(parser)] = parser
|
||||
|
||||
def initialize_builtin_evaluators(self):
|
||||
"""
|
||||
@@ -129,14 +142,26 @@ class Sheerka(Concept):
|
||||
:return:
|
||||
"""
|
||||
for evaluator in core.utils.get_sub_classes("evaluators", "evaluators.BaseEvaluator.OneReturnValueEvaluator"):
|
||||
log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
|
||||
init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
|
||||
self.evaluators.append(evaluator)
|
||||
|
||||
for evaluator in core.utils.get_sub_classes("evaluators", "evaluators.BaseEvaluator.AllReturnValuesEvaluator"):
|
||||
log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
|
||||
init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
|
||||
self.evaluators.append(evaluator)
|
||||
|
||||
def logger_filter(self, record: logging.LogRecord):
|
||||
if 'all' in self.loggers:
|
||||
return True
|
||||
|
||||
ret = True
|
||||
if 'init' not in self.loggers and record.name.endswith(".init"):
|
||||
ret = False
|
||||
|
||||
return ret
|
||||
|
||||
def init_logging(self):
|
||||
handler = logging.StreamHandler()
|
||||
handler.addFilter(self.logger_filter)
|
||||
if self.debug:
|
||||
log_format = "%(asctime)s %(name)s [%(levelname)s] %(message)s"
|
||||
log_level = logging.DEBUG
|
||||
@@ -144,7 +169,7 @@ class Sheerka(Concept):
|
||||
log_format = "%(message)s"
|
||||
log_level = logging.INFO
|
||||
|
||||
logging.basicConfig(format=log_format, level=log_level)
|
||||
logging.basicConfig(format=log_format, level=log_level, handlers=[handler])
|
||||
|
||||
def eval(self, text: str):
|
||||
"""
|
||||
@@ -153,7 +178,9 @@ class Sheerka(Concept):
|
||||
:param text:
|
||||
:return:
|
||||
"""
|
||||
log.debug(f"Evaluating '{text}'.")
|
||||
evt_digest = self.sdp.save_event(Event(text))
|
||||
log.debug(f"{evt_digest=}")
|
||||
exec_context = ExecutionContext(self.key, evt_digest, self)
|
||||
|
||||
# Before parsing
|
||||
@@ -183,7 +210,7 @@ class Sheerka(Concept):
|
||||
debug_text = "'" + text + "'" if isinstance(text, str) \
|
||||
else "'" + BaseParser.get_text_from_tokens(text) + "' as tokens"
|
||||
log.debug(f"Parsing {debug_text}")
|
||||
for parser in self.parsers:
|
||||
for parser in self.parsers.values():
|
||||
p = parser()
|
||||
res = p.parse(context, text)
|
||||
if isinstance(res, list):
|
||||
@@ -193,7 +220,7 @@ class Sheerka(Concept):
|
||||
return result
|
||||
|
||||
def process(self, context, return_values, initial_concepts=None):
|
||||
log.debug(f"Processing parsing result. context concept={initial_concepts}")
|
||||
log.debug(f"{initial_concepts=}. Processing " + core.utils.pp(return_values))
|
||||
|
||||
# return_values must be a list
|
||||
if not isinstance(return_values, list):
|
||||
@@ -303,6 +330,8 @@ class Sheerka(Concept):
|
||||
"""
|
||||
|
||||
concept.init_key()
|
||||
concepts_definitions = None
|
||||
init_ret_value = None
|
||||
|
||||
# checks for duplicate concepts
|
||||
if self.sdp.exists(self.CONCEPTS_ENTRY, concept.key, concept.get_digest()):
|
||||
@@ -312,14 +341,33 @@ class Sheerka(Concept):
|
||||
# set id before saving in db
|
||||
self.set_id_if_needed(concept, False)
|
||||
|
||||
# add the BNF if known
|
||||
if concept.bnf:
|
||||
concepts_definitions = self.concepts_definitions.copy()
|
||||
concepts_definitions[concept] = concept.bnf
|
||||
|
||||
# check if it's a valid BNF or whether it breaks the known rules
|
||||
concept_lexer_parser = self.parsers[CONCEPT_LEXER_PARSER_CLASS](self.concepts_grammars.copy())
|
||||
sub_context = context.push(self.name, "Initializing concept definition")
|
||||
sub_context.concepts_cache[concept.key] = concept # the concept is not in the real cache yet
|
||||
init_ret_value = concept_lexer_parser.initialize(sub_context, concepts_definitions)
|
||||
if not init_ret_value.status:
|
||||
return self.ret(self.create_new_concept.__name__, False, ErrorConcept(init_ret_value.value))
|
||||
|
||||
# save the new context in sdp
|
||||
try:
|
||||
self.sdp.add(context.event_digest, self.CONCEPTS_ENTRY, concept, use_ref=True)
|
||||
if concepts_definitions is not None:
|
||||
self.sdp.set(context.event_digest, self.CONCEPTS_DEFINITIONS_ENTRY, concepts_definitions, use_ref=True)
|
||||
except SheerkaDataProviderDuplicateKeyError as error:
|
||||
return self.ret(self.create_new_concept.__name__, False, ErrorConcept(error), error.args[0])
|
||||
|
||||
# add in cache for quick further reference
|
||||
# Updates the caches
|
||||
self.concepts_cache[concept.key] = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.key)
|
||||
if concepts_definitions is not None:
|
||||
self.concepts_definitions = concepts_definitions
|
||||
if init_ret_value is not None and init_ret_value.status:
|
||||
self.concepts_grammars = init_ret_value.body
|
||||
|
||||
# process the return in needed
|
||||
ret = self.ret(self.create_new_concept.__name__, True, self.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
|
||||
@@ -514,6 +562,18 @@ class Sheerka(Concept):
|
||||
|
||||
return (self.value(obj) for obj in objs)
|
||||
|
||||
def is_success(self, obj):
|
||||
if isinstance(obj, bool):
|
||||
return obj
|
||||
|
||||
if self.isinstance(obj, BuiltinConcepts.RETURN_VALUE):
|
||||
return obj.status
|
||||
|
||||
if self.isinstance(obj, BuiltinConcepts.ERROR):
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
def isinstance(self, a, b):
|
||||
"""
|
||||
return true if the concept a is an instance of the concept b
|
||||
@@ -603,6 +663,7 @@ class ExecutionContext:
|
||||
sheerka: Sheerka # sheerka
|
||||
desc: str = None # human description of what is going on
|
||||
obj: Concept = None # what is the subject of the execution context (if known)
|
||||
concepts_cache: dict = field(default_factory=dict)
|
||||
|
||||
def push(self, who, desc=None, obj=None):
|
||||
return ExecutionContext(who, self.event_digest, self.sheerka, desc=desc, obj=obj)
|
||||
|
||||
+2
-21
@@ -80,6 +80,8 @@ class LexerError(Exception):
|
||||
class Keywords(Enum):
|
||||
DEF = "def"
|
||||
CONCEPT = "concept"
|
||||
FROM = "from"
|
||||
BNF = "bnf"
|
||||
AS = "as"
|
||||
WHERE = "where"
|
||||
PRE = "pre"
|
||||
@@ -308,24 +310,3 @@ class Tokenizer:
|
||||
1 if lines_count > 0 else start_column + len(result))
|
||||
|
||||
return result, lines_count
|
||||
|
||||
def seek(self, words):
|
||||
if self.i == self.text_len:
|
||||
return 0
|
||||
|
||||
# init
|
||||
offsets = {}
|
||||
start_index = self.i
|
||||
|
||||
buffer = ""
|
||||
while self.i < self.text_len:
|
||||
c = self.text[self.i]
|
||||
|
||||
# skip white space
|
||||
if c in (" ", "\t"):
|
||||
self.i += 1
|
||||
continue
|
||||
|
||||
for word in words:
|
||||
if c == word[offset]:
|
||||
os
|
||||
|
||||
+63
-5
@@ -3,6 +3,8 @@ import inspect
|
||||
import pkgutil
|
||||
import sys
|
||||
|
||||
from core.tokenizer import TokenKind
|
||||
|
||||
|
||||
def sysarg_to_string(argv):
|
||||
"""
|
||||
@@ -72,11 +74,18 @@ def get_full_qualified_name(obj):
|
||||
:param obj:
|
||||
:return:
|
||||
"""
|
||||
module = obj.__class__.__module__
|
||||
if module is None or module == str.__class__.__module__:
|
||||
return obj.__class__.__name__ # Avoid reporting __builtin__
|
||||
if obj.__class__ == type:
|
||||
module = obj.__module__
|
||||
if module is None or module == str.__class__.__module__:
|
||||
return obj.__name__ # Avoid reporting __builtin__
|
||||
else:
|
||||
return module + '.' + obj.__name__
|
||||
else:
|
||||
return module + '.' + obj.__class__.__name__
|
||||
module = obj.__class__.__module__
|
||||
if module is None or module == str.__class__.__module__:
|
||||
return obj.__class__.__name__ # Avoid reporting __builtin__
|
||||
else:
|
||||
return module + '.' + obj.__class__.__name__
|
||||
|
||||
|
||||
def get_classes(module_name):
|
||||
@@ -137,7 +146,7 @@ def remove_from_list(lst, to_remove_predicate):
|
||||
|
||||
def product(a, b):
|
||||
"""
|
||||
Kind of cartesian product between list a and b
|
||||
Kind of cartesian product between lists a and b
|
||||
knowing that a is also a list
|
||||
|
||||
So it's a cartesian product between a list of list and a list
|
||||
@@ -155,3 +164,52 @@ def product(a, b):
|
||||
res.append(items)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def strip_quotes(text):
|
||||
if not isinstance(text, str):
|
||||
return text
|
||||
|
||||
if text == "":
|
||||
return ""
|
||||
|
||||
if text[0] == "'" or text[0] == '"':
|
||||
return text[1:-1]
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def strip_tokens(tokens, strip_eof=False):
|
||||
"""
|
||||
Remove the starting and trailing spaces and newline
|
||||
"""
|
||||
if tokens is None:
|
||||
return None
|
||||
|
||||
start = 0
|
||||
length = len(tokens)
|
||||
while start < length and tokens[start].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
|
||||
start += 1
|
||||
|
||||
if start == length:
|
||||
return []
|
||||
|
||||
end_tokens = (TokenKind.WHITESPACE, TokenKind.NEWLINE, TokenKind.EOF) \
|
||||
if strip_eof \
|
||||
else (TokenKind.WHITESPACE, TokenKind.NEWLINE)
|
||||
|
||||
end = length - 1
|
||||
while end > 0 and tokens[end].type in end_tokens:
|
||||
end -= 1
|
||||
|
||||
return tokens[start: end + 1]
|
||||
|
||||
|
||||
def pp(items):
|
||||
if not hasattr(items, "__iter__"):
|
||||
return str(items)
|
||||
|
||||
if len(items) == 0:
|
||||
return str(items)
|
||||
|
||||
return " \n" + " \n".join(str(item) for item in items)
|
||||
|
||||
Reference in New Issue
Block a user