Added bnf when adding a new concept + Started logging filtering

This commit is contained in:
2019-12-13 20:26:11 +01:00
parent 75c8793d53
commit c668cc46d2
29 changed files with 1487 additions and 190 deletions
+1
View File
@@ -190,3 +190,4 @@ def _extract_predicates(sheerka, node, variables_to_include, variables_to_exclud
predicates.append(res)
return predicates
+8 -8
View File
@@ -3,6 +3,7 @@ from dataclasses import dataclass
from enum import Enum
import logging
import core.utils
from core.tokenizer import Tokenizer, TokenKind
log = logging.getLogger(__name__)
@@ -18,8 +19,7 @@ VARIABLE_PREFIX = "__var__"
class ConceptParts(Enum):
"""
Helper class, Note quite sure that is it that useful
I guess, I was learning nums with Python...
Lists metadata that can contains some code
"""
WHERE = "where"
PRE = "pre"
@@ -85,6 +85,7 @@ class Concept:
self.metadata = metadata
self.props = {} # list of Property for this concept
self.cached_asts = {} # cached ast for the where, pre, post and body parts
self.bnf = None
def __repr__(self):
return f"({self.metadata.id}){self.metadata.name}"
@@ -134,9 +135,9 @@ class Concept:
return self
if tokens is None:
tokens = iter(Tokenizer(self.metadata.name))
tokens = list(Tokenizer(self.metadata.name))
variables = list(self.props.keys())
variables = list(self.props.keys()) if len(core.utils.strip_tokens(tokens, True)) > 1 else []
key = ""
first = True
@@ -171,12 +172,11 @@ class Concept:
:param codes:
:return:
"""
possibles_codes = ConceptParts.get_parts()
if codes is None:
return
for key in codes:
if key in possibles_codes:
self.cached_asts[ConceptParts(key)] = codes[key]
self.cached_asts[key] = codes[key]
return self
@@ -231,7 +231,7 @@ class Concept:
return self
def set_prop(self, prop_name: str, prop_value=None):
self.props[prop_name] = Property(prop_name, prop_value)
self.props[prop_name] = Property(prop_name, prop_value) # Python 3.x order is kept in dictionaries
return self
def set_prop_by_index(self, index: int, prop_value):
+85 -24
View File
@@ -1,4 +1,6 @@
from dataclasses import dataclass
from dataclasses import dataclass, field
from functools import lru_cache
from core.builtin_concepts import BuiltinConcepts, ErrorConcept, ReturnValueConcept
from core.concept import Concept, ConceptParts, PROPERTIES_FOR_DIGEST
from evaluators.BaseEvaluator import OneReturnValueEvaluator
@@ -10,8 +12,10 @@ import core.builtin_helpers
import logging
log = logging.getLogger(__name__)
init_log = logging.getLogger(__name__ + ".init")
concept_evaluation_steps = [BuiltinConcepts.EVALUATION, BuiltinConcepts.AFTER_EVALUATION]
CONCEPT_LEXER_PARSER_CLASS = "parsers.ConceptLexerParser.ConceptLexerParser"
class Sheerka(Concept):
@@ -19,22 +23,29 @@ class Sheerka(Concept):
Main controller for the project
"""
CONCEPTS_ENTRY = "All_Concepts"
BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts"
USER_CONCEPTS_KEYS = "User_Concepts"
CONCEPTS_ENTRY = "All_Concepts" # to store all the concepts
CONCEPTS_DEFINITIONS_ENTRY = "Concepts_Definitions" # to store definitions (bnf) of concepts
BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts" # sequential key for builtin concepts
USER_CONCEPTS_KEYS = "User_Concepts" # sequential key for user defined concepts
def __init__(self, debug=False, skip_builtins_in_db=False):
def __init__(self, debug=False, skip_builtins_in_db=False, loggers=None):
log.debug("Starting Sheerka.")
super().__init__(BuiltinConcepts.SHEERKA, True, True, BuiltinConcepts.SHEERKA)
# cache of the most used concepts
# Note that these are only templates
# They are used as a footprint for instantiation
# Except of source when the concept is supposed to be unique
self.concepts_cache = {}
# cache for builtin types.
# It allow instantiation of a builtin clas
self.builtin_cache = {}
#
# Cache for all concepts BNF
self.concepts_definitions = {}
#
# cache for concepts grammars
# a grammar can be seen as a resolved BNF
self.concepts_grammars = {}
# a concept can be instantiated
# ex: File is a concept, but File('foo.txt') is an instance
@@ -45,14 +56,16 @@ class Sheerka(Concept):
# ex: hello => say('hello')
self.rules = []
self.sdp = None
self.parsers = []
self.evaluators = []
self.sdp: SheerkaDataProvider = None # SheerkaDataProvider
self.builtin_cache = {} # cache for builtin concepts
self.parsers = {} # cache for builtin parsers
self.evaluators = [] # cache for builtin evaluators
self.evaluators_prefix = None
self.parsers_prefix = None
self.evaluators_prefix: str = None
self.parsers_prefix: str = None
self.debug = debug
self.loggers = loggers or []
self.skip_builtins_in_db = skip_builtins_in_db
def initialize(self, root_folder: str = None):
@@ -85,7 +98,7 @@ class Sheerka(Concept):
Initializes the builtin concepts
:return: None
"""
log.debug("Initializing builtin concepts")
init_log.debug("Initializing builtin concepts")
builtins_classes = self.get_builtins_classes_as_dict()
# this all initialization of the builtins seems to be little bit complicated
@@ -101,11 +114,11 @@ class Sheerka(Concept):
if not self.skip_builtins_in_db:
from_db = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.metadata.key)
if from_db is None:
log.debug(f"'{concept.name}' concept is not found in db. Adding.")
init_log.debug(f"'{concept.name}' concept is not found in db. Adding.")
self.set_id_if_needed(concept, True)
self.sdp.add("init", self.CONCEPTS_ENTRY, concept, use_ref=True)
else:
log.debug(f"Found concept '{from_db}' in db. Updating.")
init_log.debug(f"Found concept '{from_db}' in db. Updating.")
concept.update_from(from_db)
self.add_in_cache(concept)
@@ -120,8 +133,8 @@ class Sheerka(Concept):
if parser.__module__ == base_class.__module__:
continue
log.debug(f"Adding builtin parser '{parser.__name__}'")
self.parsers.append(parser)
init_log.debug(f"Adding builtin parser '{parser.__name__}'")
self.parsers[core.utils.get_full_qualified_name(parser)] = parser
def initialize_builtin_evaluators(self):
"""
@@ -129,14 +142,26 @@ class Sheerka(Concept):
:return:
"""
for evaluator in core.utils.get_sub_classes("evaluators", "evaluators.BaseEvaluator.OneReturnValueEvaluator"):
log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
self.evaluators.append(evaluator)
for evaluator in core.utils.get_sub_classes("evaluators", "evaluators.BaseEvaluator.AllReturnValuesEvaluator"):
log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
self.evaluators.append(evaluator)
def logger_filter(self, record: logging.LogRecord):
if 'all' in self.loggers:
return True
ret = True
if 'init' not in self.loggers and record.name.endswith(".init"):
ret = False
return ret
def init_logging(self):
handler = logging.StreamHandler()
handler.addFilter(self.logger_filter)
if self.debug:
log_format = "%(asctime)s %(name)s [%(levelname)s] %(message)s"
log_level = logging.DEBUG
@@ -144,7 +169,7 @@ class Sheerka(Concept):
log_format = "%(message)s"
log_level = logging.INFO
logging.basicConfig(format=log_format, level=log_level)
logging.basicConfig(format=log_format, level=log_level, handlers=[handler])
def eval(self, text: str):
"""
@@ -153,7 +178,9 @@ class Sheerka(Concept):
:param text:
:return:
"""
log.debug(f"Evaluating '{text}'.")
evt_digest = self.sdp.save_event(Event(text))
log.debug(f"{evt_digest=}")
exec_context = ExecutionContext(self.key, evt_digest, self)
# Before parsing
@@ -183,7 +210,7 @@ class Sheerka(Concept):
debug_text = "'" + text + "'" if isinstance(text, str) \
else "'" + BaseParser.get_text_from_tokens(text) + "' as tokens"
log.debug(f"Parsing {debug_text}")
for parser in self.parsers:
for parser in self.parsers.values():
p = parser()
res = p.parse(context, text)
if isinstance(res, list):
@@ -193,7 +220,7 @@ class Sheerka(Concept):
return result
def process(self, context, return_values, initial_concepts=None):
log.debug(f"Processing parsing result. context concept={initial_concepts}")
log.debug(f"{initial_concepts=}. Processing " + core.utils.pp(return_values))
# return_values must be a list
if not isinstance(return_values, list):
@@ -303,6 +330,8 @@ class Sheerka(Concept):
"""
concept.init_key()
concepts_definitions = None
init_ret_value = None
# checks for duplicate concepts
if self.sdp.exists(self.CONCEPTS_ENTRY, concept.key, concept.get_digest()):
@@ -312,14 +341,33 @@ class Sheerka(Concept):
# set id before saving in db
self.set_id_if_needed(concept, False)
# add the BNF if known
if concept.bnf:
concepts_definitions = self.concepts_definitions.copy()
concepts_definitions[concept] = concept.bnf
# check if it's a valid BNF or whether it breaks the known rules
concept_lexer_parser = self.parsers[CONCEPT_LEXER_PARSER_CLASS](self.concepts_grammars.copy())
sub_context = context.push(self.name, "Initializing concept definition")
sub_context.concepts_cache[concept.key] = concept # the concept is not in the real cache yet
init_ret_value = concept_lexer_parser.initialize(sub_context, concepts_definitions)
if not init_ret_value.status:
return self.ret(self.create_new_concept.__name__, False, ErrorConcept(init_ret_value.value))
# save the new context in sdp
try:
self.sdp.add(context.event_digest, self.CONCEPTS_ENTRY, concept, use_ref=True)
if concepts_definitions is not None:
self.sdp.set(context.event_digest, self.CONCEPTS_DEFINITIONS_ENTRY, concepts_definitions, use_ref=True)
except SheerkaDataProviderDuplicateKeyError as error:
return self.ret(self.create_new_concept.__name__, False, ErrorConcept(error), error.args[0])
# add in cache for quick further reference
# Updates the caches
self.concepts_cache[concept.key] = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.key)
if concepts_definitions is not None:
self.concepts_definitions = concepts_definitions
if init_ret_value is not None and init_ret_value.status:
self.concepts_grammars = init_ret_value.body
# process the return in needed
ret = self.ret(self.create_new_concept.__name__, True, self.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
@@ -514,6 +562,18 @@ class Sheerka(Concept):
return (self.value(obj) for obj in objs)
def is_success(self, obj):
if isinstance(obj, bool):
return obj
if self.isinstance(obj, BuiltinConcepts.RETURN_VALUE):
return obj.status
if self.isinstance(obj, BuiltinConcepts.ERROR):
return False
return False
def isinstance(self, a, b):
"""
return true if the concept a is an instance of the concept b
@@ -603,6 +663,7 @@ class ExecutionContext:
sheerka: Sheerka # sheerka
desc: str = None # human description of what is going on
obj: Concept = None # what is the subject of the execution context (if known)
concepts_cache: dict = field(default_factory=dict)
def push(self, who, desc=None, obj=None):
return ExecutionContext(who, self.event_digest, self.sheerka, desc=desc, obj=obj)
+2 -21
View File
@@ -80,6 +80,8 @@ class LexerError(Exception):
class Keywords(Enum):
DEF = "def"
CONCEPT = "concept"
FROM = "from"
BNF = "bnf"
AS = "as"
WHERE = "where"
PRE = "pre"
@@ -308,24 +310,3 @@ class Tokenizer:
1 if lines_count > 0 else start_column + len(result))
return result, lines_count
def seek(self, words):
if self.i == self.text_len:
return 0
# init
offsets = {}
start_index = self.i
buffer = ""
while self.i < self.text_len:
c = self.text[self.i]
# skip white space
if c in (" ", "\t"):
self.i += 1
continue
for word in words:
if c == word[offset]:
os
+63 -5
View File
@@ -3,6 +3,8 @@ import inspect
import pkgutil
import sys
from core.tokenizer import TokenKind
def sysarg_to_string(argv):
"""
@@ -72,11 +74,18 @@ def get_full_qualified_name(obj):
:param obj:
:return:
"""
module = obj.__class__.__module__
if module is None or module == str.__class__.__module__:
return obj.__class__.__name__ # Avoid reporting __builtin__
if obj.__class__ == type:
module = obj.__module__
if module is None or module == str.__class__.__module__:
return obj.__name__ # Avoid reporting __builtin__
else:
return module + '.' + obj.__name__
else:
return module + '.' + obj.__class__.__name__
module = obj.__class__.__module__
if module is None or module == str.__class__.__module__:
return obj.__class__.__name__ # Avoid reporting __builtin__
else:
return module + '.' + obj.__class__.__name__
def get_classes(module_name):
@@ -137,7 +146,7 @@ def remove_from_list(lst, to_remove_predicate):
def product(a, b):
"""
Kind of cartesian product between list a and b
Kind of cartesian product between lists a and b
knowing that a is also a list
So it's a cartesian product between a list of list and a list
@@ -155,3 +164,52 @@ def product(a, b):
res.append(items)
return res
def strip_quotes(text):
if not isinstance(text, str):
return text
if text == "":
return ""
if text[0] == "'" or text[0] == '"':
return text[1:-1]
return text
def strip_tokens(tokens, strip_eof=False):
"""
Remove the starting and trailing spaces and newline
"""
if tokens is None:
return None
start = 0
length = len(tokens)
while start < length and tokens[start].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
start += 1
if start == length:
return []
end_tokens = (TokenKind.WHITESPACE, TokenKind.NEWLINE, TokenKind.EOF) \
if strip_eof \
else (TokenKind.WHITESPACE, TokenKind.NEWLINE)
end = length - 1
while end > 0 and tokens[end].type in end_tokens:
end -= 1
return tokens[start: end + 1]
def pp(items):
if not hasattr(items, "__iter__"):
return str(items)
if len(items) == 0:
return str(items)
return " \n" + " \n".join(str(item) for item in items)