Added bnf when adding a new concept + Started logging filtering

2019-12-13 20:26:11 +01:00
parent 75c8793d53
commit c668cc46d2
29 changed files with 1487 additions and 190 deletions
@@ -190,3 +190,4 @@ def _extract_predicates(sheerka, node, variables_to_include, variables_to_exclud
                predicates.append(res)

    return predicates
+
@@ -3,6 +3,7 @@ from dataclasses import dataclass
 from enum import Enum
 import logging

+import core.utils
 from core.tokenizer import Tokenizer, TokenKind

 log = logging.getLogger(__name__)
@@ -18,8 +19,7 @@ VARIABLE_PREFIX = "__var__"

 class ConceptParts(Enum):
    """
-    Helper class, Note quite sure that is it that useful
-    I guess, I was learning nums with Python...
+    Lists metadata that can contains some code
    """
    WHERE = "where"
    PRE = "pre"
@@ -85,6 +85,7 @@ class Concept:
        self.metadata = metadata
        self.props = {}  # list of Property for this concept
        self.cached_asts = {}  # cached ast for the where, pre, post and body parts
+        self.bnf = None

    def __repr__(self):
        return f"({self.metadata.id}){self.metadata.name}"
@@ -134,9 +135,9 @@ class Concept:
            return self

        if tokens is None:
-            tokens = iter(Tokenizer(self.metadata.name))
+            tokens = list(Tokenizer(self.metadata.name))

-        variables = list(self.props.keys())
+        variables = list(self.props.keys()) if len(core.utils.strip_tokens(tokens, True)) > 1 else []

        key = ""
        first = True
@@ -171,12 +172,11 @@ class Concept:
        :param codes:
        :return:
        """
-        possibles_codes = ConceptParts.get_parts()
        if codes is None:
            return
+
        for key in codes:
-            if key in possibles_codes:
-                self.cached_asts[ConceptParts(key)] = codes[key]
+            self.cached_asts[key] = codes[key]

        return self

@@ -231,7 +231,7 @@ class Concept:
        return self

    def set_prop(self, prop_name: str, prop_value=None):
-        self.props[prop_name] = Property(prop_name, prop_value)
+        self.props[prop_name] = Property(prop_name, prop_value)  # Python 3.x order is kept in dictionaries
        return self

    def set_prop_by_index(self, index: int, prop_value):
@@ -1,4 +1,6 @@
-from dataclasses import dataclass
+from dataclasses import dataclass, field
+from functools import lru_cache
+
 from core.builtin_concepts import BuiltinConcepts, ErrorConcept, ReturnValueConcept
 from core.concept import Concept, ConceptParts, PROPERTIES_FOR_DIGEST
 from evaluators.BaseEvaluator import OneReturnValueEvaluator
@@ -10,8 +12,10 @@ import core.builtin_helpers
 import logging

 log = logging.getLogger(__name__)
+init_log = logging.getLogger(__name__ + ".init")

 concept_evaluation_steps = [BuiltinConcepts.EVALUATION, BuiltinConcepts.AFTER_EVALUATION]
+CONCEPT_LEXER_PARSER_CLASS = "parsers.ConceptLexerParser.ConceptLexerParser"


 class Sheerka(Concept):
@@ -19,22 +23,29 @@ class Sheerka(Concept):
    Main controller for the project
    """

-    CONCEPTS_ENTRY = "All_Concepts"
-    BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts"
-    USER_CONCEPTS_KEYS = "User_Concepts"
+    CONCEPTS_ENTRY = "All_Concepts"  # to store all the concepts
+    CONCEPTS_DEFINITIONS_ENTRY = "Concepts_Definitions"  # to store definitions (bnf) of concepts
+    BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts"  # sequential key for builtin concepts
+    USER_CONCEPTS_KEYS = "User_Concepts"  # sequential key for user defined concepts

-    def __init__(self, debug=False, skip_builtins_in_db=False):
+    def __init__(self, debug=False, skip_builtins_in_db=False, loggers=None):
        log.debug("Starting Sheerka.")
        super().__init__(BuiltinConcepts.SHEERKA, True, True, BuiltinConcepts.SHEERKA)

        # cache of the most used concepts
        # Note that these are only templates
        # They are used as a footprint for instantiation
+        # Except of source when the concept is supposed to be unique
        self.concepts_cache = {}

-        # cache for builtin types.
-        # It allow instantiation of a builtin clas
-        self.builtin_cache = {}
+        #
+        # Cache for all concepts BNF
+        self.concepts_definitions = {}
+
+        #
+        # cache for concepts grammars
+        # a grammar can be seen as a resolved BNF
+        self.concepts_grammars = {}

        # a concept can be instantiated
        # ex: File is a concept, but File('foo.txt') is an instance
@@ -45,14 +56,16 @@ class Sheerka(Concept):
        # ex: hello => say('hello')
        self.rules = []

-        self.sdp = None
-        self.parsers = []
-        self.evaluators = []
+        self.sdp: SheerkaDataProvider = None  # SheerkaDataProvider
+        self.builtin_cache = {}  # cache for builtin concepts
+        self.parsers = {}  # cache for builtin parsers
+        self.evaluators = []  # cache for builtin evaluators

-        self.evaluators_prefix = None
-        self.parsers_prefix = None
+        self.evaluators_prefix: str = None
+        self.parsers_prefix: str = None

        self.debug = debug
+        self.loggers = loggers or []
        self.skip_builtins_in_db = skip_builtins_in_db

    def initialize(self, root_folder: str = None):
@@ -85,7 +98,7 @@ class Sheerka(Concept):
        Initializes the builtin concepts
        :return: None
        """
-        log.debug("Initializing builtin concepts")
+        init_log.debug("Initializing builtin concepts")
        builtins_classes = self.get_builtins_classes_as_dict()

        # this all initialization of the builtins seems to be little bit complicated
@@ -101,11 +114,11 @@ class Sheerka(Concept):
            if not self.skip_builtins_in_db:
                from_db = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.metadata.key)
                if from_db is None:
-                    log.debug(f"'{concept.name}' concept is not found in db. Adding.")
+                    init_log.debug(f"'{concept.name}' concept is not found in db. Adding.")
                    self.set_id_if_needed(concept, True)
                    self.sdp.add("init", self.CONCEPTS_ENTRY, concept, use_ref=True)
                else:
-                    log.debug(f"Found concept '{from_db}' in db. Updating.")
+                    init_log.debug(f"Found concept '{from_db}' in db. Updating.")
                    concept.update_from(from_db)

            self.add_in_cache(concept)
@@ -120,8 +133,8 @@ class Sheerka(Concept):
            if parser.__module__ == base_class.__module__:
                continue

-            log.debug(f"Adding builtin parser '{parser.__name__}'")
-            self.parsers.append(parser)
+            init_log.debug(f"Adding builtin parser '{parser.__name__}'")
+            self.parsers[core.utils.get_full_qualified_name(parser)] = parser

    def initialize_builtin_evaluators(self):
        """
@@ -129,14 +142,26 @@ class Sheerka(Concept):
        :return:
        """
        for evaluator in core.utils.get_sub_classes("evaluators", "evaluators.BaseEvaluator.OneReturnValueEvaluator"):
-            log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
+            init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
            self.evaluators.append(evaluator)

        for evaluator in core.utils.get_sub_classes("evaluators", "evaluators.BaseEvaluator.AllReturnValuesEvaluator"):
-            log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
+            init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
            self.evaluators.append(evaluator)

+    def logger_filter(self, record: logging.LogRecord):
+        if 'all' in self.loggers:
+            return True
+
+        ret = True
+        if 'init' not in self.loggers and record.name.endswith(".init"):
+                ret = False
+
+        return ret
+
    def init_logging(self):
+        handler = logging.StreamHandler()
+        handler.addFilter(self.logger_filter)
        if self.debug:
            log_format = "%(asctime)s %(name)s [%(levelname)s] %(message)s"
            log_level = logging.DEBUG
@@ -144,7 +169,7 @@ class Sheerka(Concept):
            log_format = "%(message)s"
            log_level = logging.INFO

-        logging.basicConfig(format=log_format, level=log_level)
+        logging.basicConfig(format=log_format, level=log_level, handlers=[handler])

    def eval(self, text: str):
        """
@@ -153,7 +178,9 @@ class Sheerka(Concept):
        :param text:
        :return:
        """
+        log.debug(f"Evaluating '{text}'.")
        evt_digest = self.sdp.save_event(Event(text))
+        log.debug(f"{evt_digest=}")
        exec_context = ExecutionContext(self.key, evt_digest, self)

        # Before parsing
@@ -183,7 +210,7 @@ class Sheerka(Concept):
            debug_text = "'" + text + "'" if isinstance(text, str) \
                else "'" + BaseParser.get_text_from_tokens(text) + "' as tokens"
            log.debug(f"Parsing {debug_text}")
-        for parser in self.parsers:
+        for parser in self.parsers.values():
            p = parser()
            res = p.parse(context, text)
            if isinstance(res, list):
@@ -193,7 +220,7 @@ class Sheerka(Concept):
        return result

    def process(self, context, return_values, initial_concepts=None):
-        log.debug(f"Processing parsing result. context concept={initial_concepts}")
+        log.debug(f"{initial_concepts=}. Processing " + core.utils.pp(return_values))

        # return_values must be a list
        if not isinstance(return_values, list):
@@ -303,6 +330,8 @@ class Sheerka(Concept):
        """

        concept.init_key()
+        concepts_definitions = None
+        init_ret_value = None

        # checks for duplicate concepts
        if self.sdp.exists(self.CONCEPTS_ENTRY, concept.key, concept.get_digest()):
@@ -312,14 +341,33 @@ class Sheerka(Concept):
        # set id before saving in db
        self.set_id_if_needed(concept, False)

+        # add the BNF if known
+        if concept.bnf:
+            concepts_definitions = self.concepts_definitions.copy()
+            concepts_definitions[concept] = concept.bnf
+
+            # check if it's a valid BNF or whether it breaks the known rules
+            concept_lexer_parser = self.parsers[CONCEPT_LEXER_PARSER_CLASS](self.concepts_grammars.copy())
+            sub_context = context.push(self.name, "Initializing concept definition")
+            sub_context.concepts_cache[concept.key] = concept  # the concept is not in the real cache yet
+            init_ret_value = concept_lexer_parser.initialize(sub_context, concepts_definitions)
+            if not init_ret_value.status:
+                return self.ret(self.create_new_concept.__name__, False, ErrorConcept(init_ret_value.value))
+
        # save the new context in sdp
        try:
            self.sdp.add(context.event_digest, self.CONCEPTS_ENTRY, concept, use_ref=True)
+            if concepts_definitions is not None:
+                self.sdp.set(context.event_digest, self.CONCEPTS_DEFINITIONS_ENTRY, concepts_definitions, use_ref=True)
        except SheerkaDataProviderDuplicateKeyError as error:
            return self.ret(self.create_new_concept.__name__, False, ErrorConcept(error), error.args[0])

-        # add in cache for quick further reference
+        # Updates the caches
        self.concepts_cache[concept.key] = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.key)
+        if concepts_definitions is not None:
+            self.concepts_definitions = concepts_definitions
+        if init_ret_value is not None and init_ret_value.status:
+            self.concepts_grammars = init_ret_value.body

        # process the return in needed
        ret = self.ret(self.create_new_concept.__name__, True, self.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
@@ -514,6 +562,18 @@ class Sheerka(Concept):

        return (self.value(obj) for obj in objs)

+    def is_success(self, obj):
+        if isinstance(obj, bool):
+            return obj
+
+        if self.isinstance(obj, BuiltinConcepts.RETURN_VALUE):
+            return obj.status
+
+        if self.isinstance(obj, BuiltinConcepts.ERROR):
+            return False
+
+        return False
+
    def isinstance(self, a, b):
        """
        return true if the concept a is an instance of the concept b
@@ -603,6 +663,7 @@ class ExecutionContext:
    sheerka: Sheerka  # sheerka
    desc: str = None  # human description of what is going on
    obj: Concept = None  # what is the subject of the execution context (if known)
+    concepts_cache: dict = field(default_factory=dict)

    def push(self, who, desc=None, obj=None):
        return ExecutionContext(who, self.event_digest, self.sheerka, desc=desc, obj=obj)
@@ -80,6 +80,8 @@ class LexerError(Exception):
 class Keywords(Enum):
    DEF = "def"
    CONCEPT = "concept"
+    FROM = "from"
+    BNF = "bnf"
    AS = "as"
    WHERE = "where"
    PRE = "pre"
@@ -308,24 +310,3 @@ class Tokenizer:
                             1 if lines_count > 0 else start_column + len(result))

        return result, lines_count
-
-    def seek(self, words):
-        if self.i == self.text_len:
-            return 0
-
-        # init
-        offsets = {}
-        start_index = self.i
-
-        buffer = ""
-        while self.i < self.text_len:
-            c = self.text[self.i]
-
-            # skip white space
-            if c in (" ", "\t"):
-                self.i += 1
-                continue
-
-            for word in words:
-                if c == word[offset]:
-                    os
@@ -3,6 +3,8 @@ import inspect
 import pkgutil
 import sys

+from core.tokenizer import TokenKind
+

 def sysarg_to_string(argv):
    """
@@ -72,11 +74,18 @@ def get_full_qualified_name(obj):
    :param obj:
    :return:
    """
-    module = obj.__class__.__module__
-    if module is None or module == str.__class__.__module__:
-        return obj.__class__.__name__  # Avoid reporting __builtin__
+    if obj.__class__ == type:
+        module = obj.__module__
+        if module is None or module == str.__class__.__module__:
+            return obj.__name__  # Avoid reporting __builtin__
+        else:
+            return module + '.' + obj.__name__
    else:
-        return module + '.' + obj.__class__.__name__
+        module = obj.__class__.__module__
+        if module is None or module == str.__class__.__module__:
+            return obj.__class__.__name__  # Avoid reporting __builtin__
+        else:
+            return module + '.' + obj.__class__.__name__


 def get_classes(module_name):
@@ -137,7 +146,7 @@ def remove_from_list(lst, to_remove_predicate):

 def product(a, b):
    """
-    Kind of cartesian product between list a and b
+    Kind of cartesian product between lists a and b
    knowing that a is also a list

    So it's a cartesian product between a list of list and a list
@@ -155,3 +164,52 @@ def product(a, b):
            res.append(items)

    return res
+
+
+def strip_quotes(text):
+    if not isinstance(text, str):
+        return text
+
+    if text == "":
+        return ""
+
+    if text[0] == "'" or text[0] == '"':
+        return text[1:-1]
+
+    return text
+
+
+def strip_tokens(tokens, strip_eof=False):
+    """
+    Remove the starting and trailing spaces and newline
+    """
+    if tokens is None:
+        return None
+
+    start = 0
+    length = len(tokens)
+    while start < length and tokens[start].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
+        start += 1
+
+    if start == length:
+        return []
+
+    end_tokens = (TokenKind.WHITESPACE, TokenKind.NEWLINE, TokenKind.EOF) \
+        if strip_eof \
+        else (TokenKind.WHITESPACE, TokenKind.NEWLINE)
+
+    end = length - 1
+    while end > 0 and tokens[end].type in end_tokens:
+        end -= 1
+
+    return tokens[start: end + 1]
+
+
+def pp(items):
+    if not hasattr(items, "__iter__"):
+        return str(items)
+
+    if len(items) == 0:
+        return str(items)
+
+    return "    \n" + "    \n".join(str(item) for item in items)