Refactored Caching, Refactored BnfNodeParser, Introduced Sphinx

2020-05-12 17:21:10 +02:00
parent 7d3a490bc5
commit 6e343ba996
110 changed files with 13865 additions and 7540 deletions
@@ -2,8 +2,9 @@ from collections import namedtuple
 from dataclasses import dataclass
 from enum import Enum

+import core.utils
 from core.builtin_concepts import BuiltinConcepts
-from core.concept import VARIABLE_PREFIX, Concept
+from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, ConceptParts
 from core.sheerka.ExecutionContext import ExecutionContext
 from core.tokenizer import TokenKind, LexerError, Token
 from parsers.BaseParser import Node, BaseParser, ErrorNode
@@ -187,6 +188,9 @@ class SourceCodeNode(LexerNode):
                   self.end == other.end and \
                   self.source == other.source

+        if isinstance(other, SCN):
+            return other == self
+
        if not isinstance(other, SourceCodeNode):
            return False

@@ -352,6 +356,51 @@ class HelperWithPos:
        return self


+class SCN(HelperWithPos):
+    """
+    SourceCodeNode tester class
+    It matches with SourceCodeNode but with less constraints
+
+    SCN == SourceCodeNode if source, start, end (start and end are not validated when None)
+    """
+
+    def __init__(self, source, start=None, end=None):
+        super().__init__(start, end)
+        self.source = source
+
+    def __eq__(self, other):
+        if id(self) == id(other):
+            return True
+
+        if isinstance(other, SourceCodeNode):
+            if self.source != other.source:
+                return False
+            if self.start is not None and self.start != other.start:
+                return False
+            if self.end is not None and self.end != other.end:
+                return False
+
+            return True
+
+        if not isinstance(other, CN):
+            return False
+
+        return self.source == other.source and \
+               self.start == other.start and \
+               self.end == other.end
+
+    def __hash__(self):
+        return hash((self.source, self.start, self.end))
+
+    def __repr__(self):
+        txt = f"SCN(source='{self.source}'"
+        if self.start is not None:
+            txt += f", start={self.start}"
+        if self.end is not None:
+            txt += f", end={self.end}"
+        return txt + ")"
+
+
 class CN(HelperWithPos):
    """
    ConceptNode tester class
@@ -390,6 +439,8 @@ class CN(HelperWithPos):
                return False
            if self.end is not None and self.end != other.end:
                return False
+            if self.source is not None and self.source != other.source:
+                return False
            return True

        if not isinstance(other, CN):
@@ -425,9 +476,10 @@ class CNC(CN):
    CNC == ConceptNode if CNC.compiled == ConceptNode.concept.compiled
    """

-    def __init__(self, concept_key, start=None, end=None, source=None, **kwargs):
+    def __init__(self, concept_key, start=None, end=None, source=None, exclude_body=False, **kwargs):
        super().__init__(concept_key, start, end, source)
        self.compiled = kwargs
+        self.exclude_body = exclude_body

    def __eq__(self, other):
        if id(self) == id(other):
@@ -442,7 +494,13 @@ class CNC(CN):
                return False
            if self.end is not None and self.end != other.end:
                return False
-            return self.compiled == other.concept.compiled  # assert instead of return to help debugging tests
+            if self.source is not None and self.source != other.source:
+                return False
+            if self.exclude_body:
+                to_compare = {k: v for k, v in other.concept.compiled.items() if k != ConceptParts.BODY}
+            else:
+                to_compare = other.concept.compiled
+            return self.compiled == to_compare

        if not isinstance(other, CNC):
            return False
@@ -518,11 +576,10 @@ class BaseNodeParser(BaseParser):
        super().__init__(name, priority)
        if 'sheerka' in kwargs:
            sheerka = kwargs.get("sheerka")
-            self.init_from_sheerka(sheerka)
+            self.concepts_by_first_keyword = sheerka.resolved_concepts_by_first_keyword

        else:
            self.concepts_by_first_keyword = None
-            self.sya_definitions = None

        self.token = None
        self.pos = -1
@@ -532,17 +589,16 @@ class BaseNodeParser(BaseParser):
        self.text = None
        self.sheerka = None

-    def init_from_sheerka(self, sheerka):
+    def init_from_concepts(self, context, concepts, **kwargs):
        """
-        Use the definitons from Sheerka to initialize
-        :param sheerka:
+        Initialize the parser with a list of concepts
+        For unit tests convenience
+        :param context
+        :param concepts
        :return:
        """
-        self.concepts_by_first_keyword = sheerka.concepts_by_first_keyword
-        if sheerka.sya_definitions:
-            self.sya_definitions = {}
-            for k, v in sheerka.sya_definitions.items():
-                self.sya_definitions[k] = (v[0], SyaAssociativity(v[1]))
+        concepts_by_first_keyword = self.get_concepts_by_first_keyword(context, concepts).body
+        self.concepts_by_first_keyword = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword).body

    def reset_parser(self, context, text):
        self.context = context
@@ -582,82 +638,43 @@ class BaseNodeParser(BaseParser):

        return self.token.type != TokenKind.EOF

-    def initialize(self, context, concepts, sya_definitions=None, use_sheerka=False):
-        """
-        To quickly find a concept, we store them in an hash where the key is the first token of the concept
-        example :
-            Concept("foo a").def_prop("a"), "foo" is a token, "a" is a variable
-            So the key to use will be "foo"
-
-            Concept("a foo").def_prop("a") -> first token is "foo"
-
-            Concept("Hello my dear a").def_prop("a") -> first token is "Hello"
-        Note that under the same key, there will be multiple entry
-        a B-Tree may be a better implementation in the future
-
-        We also store sya_definition which a is tuple (concept_precedence:int, concept_associativity:SyaAssociativity)
-        :param context:
-        :param concepts: list[Concept]
-        :param sya_definitions: hash[concept_id, tuple(precedence:int, associativity:SyaAssociativity)]
-        :param use_sheerka: first init with the definitions from Sheerka
-        :return:
-        """
-        self.context = context
-        self.sheerka = context.sheerka
-
-        if use_sheerka:
-            self.init_from_sheerka(self.sheerka)
-
-        if sya_definitions:
-            if self.sya_definitions:
-                self.sya_definitions.update(sya_definitions)
-            else:
-                self.sya_definitions = sya_definitions
-
-        if self.concepts_by_first_keyword is None:
-            self.concepts_by_first_keyword = {}
-
-        for concept in concepts:
-            keywords = concept.key.split()
-            for keyword in keywords:
-                if keyword.startswith(VARIABLE_PREFIX):
-                    continue
-
-                self.concepts_by_first_keyword.setdefault(keyword, []).append(concept.id)
-                break
-
-        return self.sheerka.ret(self.name, True, self.concepts_by_first_keyword)
-
-    def get_concepts(self, token, to_keep, to_map=None):
+    def get_concepts(self, token, to_keep, custom=None, to_map=None, strip_quotes=False):
        """
        Tries to find if there are concepts that match the value of the token
        :param token:
        :param to_keep: predicate to tell if the concept is eligible
+        :param custom: lambda name -> List[Concepts] that gives extra concepts, according to the name
        :param to_map:
+        :param strip_quotes: Remove quotes from strings
        :return:
        """

+        if token.type == TokenKind.WHITESPACE:
+            return None
+
        if token.type == TokenKind.STRING:
-            name = token.value[1:-1]
+            name = token.value[1:-1] if strip_quotes else token.value
        elif token.type == TokenKind.KEYWORD:
            name = token.value.value
        else:
            name = token.value

+        custom_concepts = custom(name) if custom else []
+
        result = []
        if name in self.concepts_by_first_keyword:
-            for concept_id in self.concepts_by_first_keyword[name]:
+            for concept_id in self.concepts_by_first_keyword.get(name):

                concept = self.sheerka.get_by_id(concept_id)

                if not to_keep(concept):
                    continue

-                concept = to_map(concept) if to_map else concept
+                concept = to_map(self, concept) if to_map else concept
                result.append(concept)
-            return result
+            return result + custom_concepts

-        return None
+        return custom_concepts if custom else None

    @staticmethod
    def get_token_value(token):
@@ -667,3 +684,116 @@ class BaseNodeParser(BaseParser):
            return token.value.value
        else:
            return token.value
+
+    @staticmethod
+    def get_concepts_by_first_keyword(context, concepts, use_sheerka=False):
+        """
+        Create the map describing the first token expected by a concept
+        :param context:
+        :param concepts: lists of concepts to parse
+        :param use_sheerka: if True, update concepts_by_first_keyword from sheerka
+        :return:
+        """
+        sheerka = context.sheerka
+        res = sheerka.cache_manager.copy(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) if use_sheerka else {}
+        for concept in concepts:
+            keywords = BaseNodeParser.get_first_tokens(sheerka, concept)
+
+            if keywords is None:
+                # no first token found for a concept ?
+                return sheerka.ret(sheerka.name, False, concept)
+
+            for keyword in keywords:
+                res.setdefault(keyword, []).append(concept.id)
+
+        return sheerka.ret("BaseNodeParser", True, res)
+
+    @staticmethod
+    def resolve_concepts_by_first_keyword(context, concepts_by_first_keyword):
+        sheerka = context.sheerka
+
+        def _make_unique(elements):
+            keys = {}
+            for e in elements:
+                keys[e] = 1
+            return list(keys.keys())
+
+        def _resolve_concepts(concept_str):
+            resolved = []
+            to_resolve = []
+            concept = sheerka.get_by_id(core.utils.unstr_concept(concept_str)[1])
+            if sheerka.isaset(context, concept):
+                concepts = sheerka.get_set_elements(context, concept)
+            else:
+                concepts = [concept]
+
+            for concept in concepts:
+                BaseNodeParser.ensure_bnf(context, concept)  # need to make sure that it cannot fail
+                keywords = BaseNodeParser.get_first_tokens(sheerka, concept)
+                for keyword in keywords:
+                    (to_resolve if keyword.startswith("c:|") else resolved).append(keyword)
+
+                for concept_to_resolve_str in to_resolve:
+                    resolved += _resolve_concepts(concept_to_resolve_str)
+
+            return resolved
+
+        res = {}
+        for k, v in concepts_by_first_keyword.items():
+            if k.startswith("c:|"):
+                resolved_keywords = _resolve_concepts(k)
+                for resolved in resolved_keywords:
+                    res.setdefault(resolved, []).extend(v)
+            else:
+                res.setdefault(k, []).extend(v)
+
+        # 'uniquify' the lists
+        for k, v in res.items():
+            res[k] = _make_unique(v)
+
+        return sheerka.ret("BaseNodeParser", True, res)
+
+    @staticmethod
+    def resolve_sya_associativity_and_precedence(context, sya):
+        pass
+
+    @staticmethod
+    def get_first_tokens(sheerka, concept):
+        """
+
+        :param sheerka:
+        :param concept:
+        :return:
+        """
+        if concept.bnf:
+            from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor
+            bnf_visitor = BnfNodeFirstTokenVisitor(sheerka)
+            bnf_visitor.visit(concept.bnf)
+            return bnf_visitor.first_tokens
+        else:
+            keywords = concept.key.split()
+            for keyword in keywords:
+                if keyword.startswith(VARIABLE_PREFIX):
+                    continue
+
+                return [keyword]
+
+        return None
+
+    @staticmethod
+    def ensure_bnf(context, concept, parser_name="BaseNodeParser"):
+        if concept.metadata.definition_type == DEFINITION_TYPE_BNF and not concept.bnf:
+            from parsers.BnfParser import BnfParser
+            regex_parser = BnfParser()
+            desc = f"Resolving BNF {concept.metadata.definition}"
+            with context.push(parser_name, obj=concept, desc=desc) as sub_context:
+                sub_context.add_inputs(parser_input=concept.metadata.definition)
+                bnf_parsing_ret_val = regex_parser.parse(sub_context, concept.metadata.definition)
+                sub_context.add_values(return_values=bnf_parsing_ret_val)
+
+                if not bnf_parsing_ret_val.status:
+                    raise Exception(bnf_parsing_ret_val.value)
+
+                concept.bnf = bnf_parsing_ret_val.body.body
+                if concept.id:
+                    context.sheerka.get_by_id(concept.id).bnf = concept.bnf  # update bnf in cache