Added basic implentation for where

2020-02-05 18:47:20 +01:00
parent a5a721094b
commit afc1e22949
35 changed files with 864 additions and 320 deletions
@@ -50,6 +50,7 @@ class BuiltinConcepts(Enum):
    CONCEPT_EVAL_REQUESTED = "concept eval requested"
    REDUCE_REQUESTED = "reduce requested"  # remove meaningless error when possible
    NOT_A_SET = "not a set"  # the concept has no entry in sets
+    WHERE_CLAUSE_FAILED = "where clause failed"  # failed to validate where clause during evaluation

    NODE = "node"
    GENERIC_NODE = "generic node"
@@ -91,6 +92,7 @@ BuiltinErrors = [str(e) for e in {
    BuiltinConcepts.CONCEPT_EVAL_ERROR,
    BuiltinConcepts.CONCEPT_ALREADY_IN_SET,
    BuiltinConcepts.NOT_A_SET,
+    BuiltinConcepts.WHERE_CLAUSE_FAILED
 }]

 """
@@ -385,3 +387,20 @@ class ConceptAlreadyInSet(Concept):
    @property
    def concept_set(self):
        return self.props["concept_set"].value
+
+
+class WhereClauseFailed(Concept):
+    def __init__(self, concept=None):
+        super().__init__(BuiltinConcepts.WHERE_CLAUSE_FAILED,
+                         True,
+                         False,
+                         BuiltinConcepts.WHERE_CLAUSE_FAILED)
+        self.set_metadata_value(ConceptParts.BODY, concept)
+        self.metadata.is_evaluated = True
+
+    def __repr__(self):
+        return f"WhereClauseFailed(concept={self.concept})"
+
+    @property
+    def concept(self):
+        return self.body
@@ -231,7 +231,8 @@ class Concept:
            if token.value in variables:
                key += VARIABLE_PREFIX + str(variables.index(token.value))
            else:
-                key += token.value[1:-1] if token.type == TokenKind.STRING else token.value
+                value = token.value[1:-1] if token.type == TokenKind.STRING else token.value
+                key += value
            first = False

        self.metadata.key = key
@@ -45,7 +45,7 @@ class SheerkaCreateNewConcept:

        # add the BNF if known
        if concept.bnf:
-            concepts_definitions = self.sheerka.get_concept_definition()
+            concepts_definitions = self.sheerka.get_concepts_definitions(context)
            concepts_definitions[concept] = concept.bnf

            # check if it's a valid BNF or whether it breaks the known rules
@@ -61,22 +61,26 @@ class SheerkaCreateNewConcept:
        # save the new concept in sdp
        try:
            # TODO : needs to make these calls atomic (or at least one single call)
+            # save the new concept
            self.sheerka.sdp.add(
                context.event.get_digest(),
                self.sheerka.CONCEPTS_ENTRY,
                concept,
                use_ref=True)
+            # save it by id
            self.sheerka.sdp.add(
                context.event.get_digest(),
                self.sheerka.CONCEPTS_BY_ID_ENTRY,
                {concept.id: concept.get_digest()},
                is_ref=True)
+            # update the definition table
            if concepts_definitions is not None:
                self.sheerka.sdp.set(
                    context.event.get_digest(),
                    self.sheerka.CONCEPTS_DEFINITIONS_ENTRY,
-                    concepts_definitions,
+                    concept_lexer_parser.encode_grammar(init_ret_value.body),
                    use_ref=True)
+                self.sheerka.concepts_definitions_cache = None  # invalidate cache
        except SheerkaDataProviderDuplicateKeyError as error:
            context.log_error(logger, "Failed to create a new concept.", who=self.logger_name)
            return self.sheerka.ret(
@@ -94,6 +98,3 @@ class SheerkaCreateNewConcept:
        # process the return in needed
        ret = self.sheerka.ret(self.logger_name, True, self.sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
        return ret
-
-
-
@@ -1,5 +1,14 @@
 from core.builtin_concepts import BuiltinConcepts
 from core.concept import Concept
+from sdp.sheerkaDataProvider import SheerkaDataProvider
+import pprint
+import os
+
+
+def get_pp():
+    rows, columns = os.popen('stty size', 'r').read().split()
+    pp = pprint.PrettyPrinter(width=columns, compact=True)
+    return pp


 class SheerkaDump:
@@ -71,3 +80,8 @@ class SheerkaDump:
                    break

            page_count += 1
+
+    def dump_state(self):
+        snapshot = self.sheerka.sdp.get_snapshot(SheerkaDataProvider.HeadFile)
+        state = self.sheerka.sdp.load_state(snapshot)
+        self.sheerka.log.info(get_pp().pformat(state.data))
@@ -147,9 +147,6 @@ class SheerkaEvaluateConcept:
        if concept.metadata.is_evaluated:
            return concept

-        # WHERE condition should already be validated by the parser.
-        # It's a mandatory condition for the concept before it can be recognized
-
        #
        # TODO : Validate the PRE condition
        #
@@ -157,8 +154,8 @@ class SheerkaEvaluateConcept:
        self.initialize_concept_asts(context, concept, logger)

        # to make sure of the order, it don't use ConceptParts.get_parts()
-        # props must be evaluated first
-        all_metadata_to_eval = ["props", "where", "pre", "post", "body"]
+        # props must be evaluated first, body must be evaluated before where
+        all_metadata_to_eval = ["pre", "post", "props", "body", "where"]

        for metadata_to_eval in all_metadata_to_eval:
            if metadata_to_eval == "props":
@@ -186,6 +183,12 @@ class SheerkaEvaluateConcept:
                    else:
                        concept.values[part_key] = resolved

+        # validate where clause
+        if concept.metadata.where is not None:
+            where_value = concept.values[ConceptParts.WHERE]
+            if not (where_value is None or self.sheerka.value(where_value) is True):
+                return self.sheerka.new(BuiltinConcepts.WHERE_CLAUSE_FAILED, body=concept)
+
        #
        # TODO : Validate the POST condition
        #
@@ -19,7 +19,7 @@ class History:
        return msg

    def __repr__(self):
-        return f"event={self.event!r}, status={self.status}, result={self.result}"
+        return f"History(event={self.event!r}, status={self.status}, result={self.result})"

    def __eq__(self, other):
        if id(self) == id(other):
@@ -38,6 +38,21 @@ class SheerkaSetsManager:
            context.log_error(logger, "Failed to add to set.", who=self.logger_name)
            return self.sheerka.ret(self.logger_name, False, ErrorConcept(error), error.args[0])

+    def add_concepts_to_set(self, context, concepts, concept_set, logger=None):
+        """Adding multiple concepts at the same time"""
+        logger = logger or self.sheerka.log
+
+        context.log(logger, f"Adding concepts {concepts} to set {concept_set}", who=self.logger_name)
+        previous = self.sheerka.sdp.get_safe(GROUP_PREFIX + concept_set.id)
+
+        new_ids = [c.id for c in concepts] if previous is None else previous + [c.id for c in concepts]
+        try:
+            self.sheerka.sdp.set(context.event.get_digest(), GROUP_PREFIX + concept_set.id, new_ids)
+            return self.sheerka.ret(self.logger_name, True, self.sheerka.new(BuiltinConcepts.SUCCESS))
+        except Exception as error:
+            context.log_error(logger, "Failed to add to set.", who=self.logger_name)
+            return self.sheerka.ret(self.logger_name, False, ErrorConcept(error), error.args[0])
+
    def get_set_elements(self, concept):
        """
        Concept is supposed to be a set
@@ -22,6 +22,7 @@ import logging
 #     BuiltinConcepts.AFTER_EVALUATION]

 CONCEPT_LEXER_PARSER_CLASS = "parsers.ConceptLexerParser.ConceptLexerParser"
+BNF_PARSER_CLASS = "parsers.BnfParser.BnfParser"
 CONCEPTS_FILE = "_concepts.txt"


@@ -52,7 +53,7 @@ class Sheerka(Concept):

        # cache for concept definitions,
        # Primarily used for unit test that does not have access to sdp
-        self.concepts_definition_cache = {}
+        self.concepts_definitions_cache = {}

        #
        # cache for concepts grammars
@@ -187,7 +188,8 @@ class Sheerka(Concept):

    def initialize_concepts_definitions(self, execution_context):
        self.init_log.debug("Initializing concepts definitions")
-        definitions = self.sdp.get_safe(self.CONCEPTS_DEFINITIONS_ENTRY, load_origin=False)
+        # definitions = self.sdp.get_safe(self.CONCEPTS_DEFINITIONS_ENTRY, load_origin=False)
+        definitions = self.get_concepts_definitions(execution_context)

        if definitions is None:
            self.init_log.debug("No BNF defined")
@@ -389,14 +391,26 @@ class Sheerka(Concept):

        return result or self._get_unknown(('id', concept_id))

-    def get_concept_definition(self):
-        if self.concepts_definition_cache:
-            return self.concepts_definition_cache
+    def get_concepts_definitions(self, context):
+        if self.concepts_definitions_cache:
+            return self.concepts_definitions_cache

-        self.concepts_definition_cache = self.sdp.get_safe(
+        encoded = self.sdp.get_safe(
            self.CONCEPTS_DEFINITIONS_ENTRY,
            load_origin=False) or {}
-        return self.concepts_definition_cache
+
+        self.concepts_definitions_cache = {}
+        bnf_parser = self.parsers[BNF_PARSER_CLASS]()
+        for k, v in encoded.items():
+            key, id_ = core.utils.unstr_concept(k)
+            concept = self.new((key, id_))
+            rule_result = bnf_parser.parse(context, v)
+            if rule_result.status:
+                self.concepts_definitions_cache[concept] = rule_result.value.value
+            else:
+                self.log.error(f"Failed to load bnf rule for concept {key}")
+
+        return self.concepts_definitions_cache

    def new(self, concept_key, **kwargs):
        """
@@ -411,7 +425,7 @@ class Sheerka(Concept):
        else:
            concept_id = None

-        template = self.get(concept_key, concept_id)
+        template = self.get_by_id(concept_id) if not concept_key else self.get(concept_key, concept_id)

        # manage concept not found
        if self.isinstance(template, BuiltinConcepts.UNKNOWN_CONCEPT) and \
@@ -579,7 +593,10 @@ class Sheerka(Concept):
            self.during_restore = True
            with open(CONCEPTS_FILE, "r") as f:
                for line in f.readlines():
-                    self.log.info(line.strip())
+                    line = line.strip()
+                    if line == "" or line.startswith("#"):
+                        continue
+                    self.log.info(line)
                    self.evaluate_user_input(line)
            self.during_restore = False
        except IOError:
@@ -266,10 +266,10 @@ class Tokenizer:
                self.column = 1
                self.line += 1
            elif c == "c" and self.i + 1 < self.text_len and self.text[self.i + 1] == ":":
-                concept_name = self.eat_concept_name(self.i + 2, self.line, self.column)
-                yield Token(TokenKind.CONCEPT, concept_name, self.i, self.line, self.column)
-                self.i += len(concept_name) + 3
-                self.column += len(concept_name) + 3
+                name, id, length = self.eat_concept(self.i + 2, self.line, self.column + 2)
+                yield Token(TokenKind.CONCEPT, (name, id), self.i, self.line, self.column)
+                self.i += length + 2
+                self.column += length + 2
            elif c.isalpha() or c == "_":
                identifier = self.eat_identifier(self.i)
                token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
@@ -297,31 +297,41 @@ class Tokenizer:

        yield Token(TokenKind.EOF, "", self.i, self.line, self.column)

-    def eat_concept_name(self, start, line, column):
-        result = ""
+    def eat_concept(self, start, line, column):
+        key, id, buffer = None, None, ""
        i = start
-        end_colon_found = False
+        processing_key = True

        while i < self.text_len:
-            c = self.text[i]

+            c = self.text[i]
            if c == "\n":
-                raise LexerError(f"New line is forbidden in concept name", result, i, line, column + 2 + len(result))
+                raise LexerError(f"New line in concept name", self.text[start:i], i, line, column + i - start)

            if c == ":":
-                end_colon_found = True
+                if processing_key:
+                    key = buffer if buffer else None
+                else:
+                    id = buffer if buffer else None
+                i += 1  # eat the colon
                break

-            result += c
+            if c == "|":
+                key = buffer if buffer else None
+                buffer = ""
+                processing_key = False
+                i += 1
+                continue
+
+            buffer += c
            i += 1
+        else:
+            raise LexerError(f"Missing ending colon", self.text[start:i], i, line, column + i - start)

-        if not end_colon_found:
-            raise LexerError(f"Missing ending colon", result, i, line, column + 2 + len(result))
+        if (key, id) == (None, None):
+            raise LexerError(f"Concept identifiers not found", "", start, line, column)

-        if result == "":
-            raise LexerError(f"Concept name not found", result, start, line, column + 2 + len(result))
-
-        return result
+        return key, id, i - start

    def eat_whitespace(self, start):
        result = self.text[start]
@@ -1,6 +1,7 @@
 import importlib
 import inspect
 import pkgutil
+import re

 from core.tokenizer import TokenKind

@@ -239,43 +240,6 @@ def pp(items):
    return "    \n" + "    \n".join(str(item) for item in items)


-def decode_concept(concept_repr):
-    """
-    if concept_repr is like :c:key:id:
-    return the key and the id
-    :param concept_repr:
-    :return:
-    """
-    if not (concept_repr and isinstance(concept_repr, str) and concept_repr.startswith(":c:")):
-        return None, None
-
-    i = 3
-    length = len(concept_repr)
-    key = ""
-    while i < length:
-        if concept_repr[i] == ":":
-            break
-        key += concept_repr[i]
-        i += 1
-    else:
-        return None, None
-
-    i += 1
-    if i >= length:
-        return key, None
-
-    id = ""
-    while i < length:
-        if concept_repr[i] == ":":
-            break
-        id += concept_repr[i]
-        i += 1
-    else:
-        return None, None
-
-    return key, id
-
-
 def decode_enum(enum_repr: str):
    """
    Tries to transform ClassName.Name into an enum
@@ -300,3 +264,110 @@ def decode_enum(enum_repr: str):

    except TypeError:
        return None
+
+
+def str_concept(t):
+    """
+    The key,id identifiers of a concept are stored in a tuple
+    we want to return the key and the id, separated by a pipe
+    None value must be replaced by an empty string
+
+    >>> assert str_concept(("key", "id")) == "c:key|id:"
+    >>> assert str_concept((None, "id")) == "c:|id:"
+    >>> assert str_concept(("key", None)) == "c:key:"
+    >>> assert str_concept((None, None)) == ""
+    :param t:
+    :return:
+    """
+    if isinstance(t, tuple):
+        key, id_ = t[0], t[1]
+    else:
+        key, id_ = t.key, t.id
+
+    if key is None and id_ is None:
+        return ""
+
+    result = 'c:' if key is None else "c:" + key
+    if id_:
+        result += "|" + id_
+    return result + ":"
+
+
+def unstr_concept(concept_repr):
+    """
+    if concept_repr is like :c:key:id:
+    return the key and the id
+    :param concept_repr:
+    :return:
+    """
+    if not (concept_repr and isinstance(concept_repr, str) and concept_repr.startswith("c:")):
+        return None, None
+
+    i = 2
+    length = len(concept_repr)
+    key = ""
+    while i < length:
+        c = concept_repr[i]
+        if c in (":", "|"):
+            break
+        key += c
+        i += 1
+    else:
+        return None, None
+
+    if c == ":":
+        return key if key != "" else None, None
+
+    i += 1
+    id = ""
+    while i < length:
+        c = concept_repr[i]
+        if c == ":":
+            break
+        id += c
+        i += 1
+    else:
+        return None, None
+
+    return key if key != "" else None, id if id != "" else None
+
+
+def encode_concept(t, use_concept=False):
+    """
+    Given a tuple of concept id, concept id
+    Create a valid Python identifier that can be parsed back
+
+    >>> assert encode_concept(("key", "id")) == "__C__KEY_key__ID_id__C__"
+    >>> assert encode_concept((None, "id")) == "__C__KEY_00None00__ID_id__C__"
+    >>> assert encode_concept(("key", None)) == "__C__KEY_key__ID_00None00__C__"
+    >>> assert encode_concept(("key", "id"), True) == "__C__USE_CONCEPT__KEY_key__ID_id__C__"
+
+    :param t:
+    :param use_concept:
+    :return:
+    """
+
+    key, id_ = (t[0], t[1]) if isinstance(t, tuple) else (t.key, t.id)
+    prefix = "__C__USE_CONCEPT" if use_concept else "__C"
+    sanitized_key = "".join(c if c.isalnum() else "0" for c in key) if key else "00None00"
+    return prefix + f"__KEY_{sanitized_key}__ID_{id_ or '00None00'}__C__"
+
+
+decode_regex = re.compile(r"__KEY_(\w+)__ID_(\w+)__C__")
+
+
+def decode_concept(text):
+    """
+    Decode what was encoded by encode_concept_key_id
+    :param text:
+    :return:
+    """
+    use_concept = text.startswith("__C__USE_CONCEPT")
+    m = decode_regex.search(text)
+    lookup = {"00None00": None}
+    if m:
+        key = lookup.get(m.group(1), m.group(1))
+        id_ = lookup.get(m.group(2), m.group(2))
+        return key, id_, use_concept
+
+    return None, None, None