Enhanced complex concepts handling

2020-01-11 08:03:35 +01:00
parent a62c1f0f13
commit 40416ac337
24 changed files with 1647 additions and 961 deletions
@@ -0,0 +1,96 @@
+from core.builtin_concepts import BuiltinConcepts
+from core.tokenizer import TokenKind
+from parsers.BaseParser import BaseParser
+from parsers.ConceptLexerParser import ConceptLexerParser, UnrecognizedTokensNode, ConceptNode
+import core.utils
+
+concept_lexer_parser = ConceptLexerParser()
+
+
+class MultipleConceptsParser(BaseParser):
+    """
+    Parser that will take the result of ConceptLexerParser and
+    try to resolve the unrecognized tokens token by token
+
+    It is a success when it returns a list ConceptNode exclusively
+    """
+
+    def __init__(self, **kwargs):
+        BaseParser.__init__(self, "MultipleConcepts", 45)
+
+    def parse(self, context, text):
+        sheerka = context.sheerka
+        if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
+            return None
+
+        if not text.parser == concept_lexer_parser:
+            return None
+
+        sheerka = context.sheerka
+        nodes = text.value
+        nodes_found = [[]]
+        source = ""
+        concepts_only = True
+
+        for node in nodes:
+            if isinstance(node, UnrecognizedTokensNode):
+                unrecognized_tokens = None
+                for i, token in enumerate(node.tokens):
+                    index = node.start + i
+
+                    if token.type == TokenKind.IDENTIFIER:
+                        # it may be a concept
+                        concept = context.new_concept(token.value)
+                        if hasattr(concept, "__iter__") or not sheerka.is_unknown(concept):
+                            # finish processing unrecognized_tokens
+                            if unrecognized_tokens:
+                                unrecognized_tokens.fix_source()
+                                source += unrecognized_tokens.source
+                                if unrecognized_tokens.not_whitespace():
+                                    nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
+                                unrecognized_tokens = None
+
+                            source += token.value
+                            concepts = concept if hasattr(concept, "__iter__") else [concept]
+                            concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts]
+                            nodes_found = core.utils.product(nodes_found, concepts_nodes)
+                            continue
+                    else:
+                        # it cannot be a concept
+                        concepts_only &= token.type == TokenKind.WHITESPACE or token.type == TokenKind.NEWLINE
+
+                        if unrecognized_tokens:
+                            unrecognized_tokens.add_token(token, index)
+                        else:
+                            unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
+
+                if unrecognized_tokens:
+                    unrecognized_tokens.fix_source()
+                    source += unrecognized_tokens.source
+                    if unrecognized_tokens.not_whitespace():
+                        nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
+
+            else:
+                nodes_found = core.utils.product(nodes_found, [node])
+                source += node.source
+
+        ret = []
+        for choice in nodes_found:
+            ret.append(
+                sheerka.ret(
+                    self.name,
+                    concepts_only,
+                    sheerka.new(
+                        BuiltinConcepts.PARSER_RESULT,
+                        parser=self,
+                        source=source,
+                        body=choice,
+                        try_parsed=None))
+            )
+
+        if len(ret) == 1:
+            self.log_result(context, source, ret[0])
+            return ret[0]
+        else:
+            self.log_multiple_results(context, source, ret)
+            return ret