Refactored Caching, Refactored BnfNodeParser, Introduced Sphinx

2020-05-12 17:21:10 +02:00
parent 7d3a490bc5
commit 6e343ba996
110 changed files with 13865 additions and 7540 deletions
@@ -1,9 +1,9 @@
 import logging

 from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts
-from parsers.BaseParser import BaseParser
-from core.tokenizer import Tokenizer, Keywords, TokenKind, LexerError
 from core.concept import VARIABLE_PREFIX
+from core.tokenizer import Keywords, TokenKind, LexerError
+from parsers.BaseParser import BaseParser


 class ExactConceptParser(BaseParser):
@@ -11,10 +11,11 @@ class ExactConceptParser(BaseParser):
    Tries to recognize a single concept
    """

-    MAX_WORDS_SIZE = 10
+    MAX_WORDS_SIZE = 3

-    def __init__(self, **kwargs):
+    def __init__(self, max_word_size=None, **kwargs):
        BaseParser.__init__(self, "ExactConcept", 80)
+        self.max_word_size = max_word_size

    def parse(self, context, parser_input):
        """
@@ -33,11 +34,11 @@ class ExactConceptParser(BaseParser):
            context.log(f"Error found in tokenizer {e}", self.name)
            return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=e))

-        if len(words) > self.MAX_WORDS_SIZE:
+        if len(words) > (self.max_word_size or self.MAX_WORDS_SIZE):
            context.log(f"Max words reached. Stopping.", self.name)
            return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=parser_input))

-        recognized = False
+        recognized = []  # keep track of the concepts founds
        for combination in self.combinations(words):

            concept_key = " ".join(combination)
@@ -49,16 +50,23 @@ class ExactConceptParser(BaseParser):
            concepts = result if isinstance(result, list) else [result]

            for concept in concepts:
+                if concept.id in recognized:
+                    context.log(f"Recognized concept {concept} again. Skipping.", self.name)
+                    # example
+                    # if the input is foo a and a concept is defined as foo a
+                    # The will be two matches. One for 'foo a' and 'foo _var_0'
+                    # but it's the same concept foo a
+                    continue
+
                context.log(f"Recognized concept {concept}.", self.name)
                # update the properties if needed
-                need_validation = False
                for i, token in enumerate(combination):
                    if token.startswith(VARIABLE_PREFIX):
                        index = int(token[len(VARIABLE_PREFIX):])
-                        concept.def_prop_by_index(index, words[i])
+                        concept.def_var_by_index(index, words[i])
                        concept.metadata.need_validation = True
                        if self.verbose_log.isEnabledFor(logging.DEBUG):
-                            prop_name = list(concept.props.keys())[index]
+                            prop_name = concept.metadata.variables[index][0]
                            context.log(
                                f"Added property {index}: {prop_name}='{words[i]}'.",
                                self.name)
@@ -69,12 +77,13 @@ class ExactConceptParser(BaseParser):
                    context.sheerka.new(
                        BuiltinConcepts.PARSER_RESULT,
                        parser=self,
-                        source=parser_input if isinstance(parser_input, str) else self.get_text_from_tokens(parser_input),
+                        source=parser_input if isinstance(parser_input, str) else self.get_text_from_tokens(
+                            parser_input),
                        body=concept,
                        try_parsed=concept)))
-                recognized = True
+                recognized.append(concept.id)

-        if recognized:
+        if len(recognized) > 0:
            if len(res) == 1:
                self.log_result(context, parser_input, res[0])
            else: