Sheerka-Old/src/parsers/UnrecognizedNodeParser.py

from dataclasses import dataclass

import core.utils
from core.builtin_concepts import BuiltinConcepts
from core.builtin_helpers import only_successful, parse_unrecognized, get_lexer_nodes, update_compiled
from parsers.SequenceNodeParser import SequenceNodeParser
from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode, SourceCodeWithConceptNode
from parsers.BaseParser import BaseParser, ErrorNode
from parsers.BnfNodeParser import BnfNodeParser
from parsers.SyaNodeParser import SyaNodeParser

PARSERS = ["EmptyString",
           "ShortTermMemory",
           SequenceNodeParser.NAME,
           BnfNodeParser.NAME,
           SyaNodeParser.NAME,
           "Python"]


@dataclass()
class CannotParseNode(ErrorNode):
    unrecognized: UnrecognizedTokensNode


class UnrecognizedNodeParser(BaseParser):
    """
    This parser comes after the other NodeParsers (Atom, Bnf or Sya)
    It will try to resolve all UnrecognizedTokensNode.
    """

    def __init__(self, **kwargs):
        super().__init__("UnrecognizedNode", 45)  # lower than SequenceNode, BnfNode and SyaNode

    def add_error(self, error):
        if hasattr(error, "__iter__"):
            self.error_sink.extend(error)
        else:
            self.error_sink.append(error)

    def parse(self, context, parser_input):
        sheerka = context.sheerka
        nodes = self.get_input_as_lexer_nodes(parser_input, None)
        if not nodes:
            return None

        sequences_found = [[]]
        has_unrecognized = False
        self.error_sink.clear()

        for node in nodes:
            if isinstance(node, ConceptNode):
                res = self.validate_concept_node(context, node)
                if not res.status:
                    self.add_error(res.body)
                else:
                    sequences_found = core.utils.product(sequences_found, [res.body])

            elif isinstance(node, UnrecognizedTokensNode):
                res = parse_unrecognized(context, node.source, PARSERS)
                res = only_successful(context, res)
                if res.status:
                    lexer_nodes = get_lexer_nodes(res.body.body, node.start, node.tokens)
                    if lexer_nodes:
                        # make lexer_nodes is not empty (for example, some Python result are discarded)
                        sequences_found = core.utils.product(sequences_found, lexer_nodes)
                    else:
                        sequences_found = core.utils.product(sequences_found, [node])
                        has_unrecognized = True
                else:
                    sequences_found = core.utils.product(sequences_found, [node])
                    has_unrecognized = True

            elif isinstance(node, SourceCodeNode):
                sequences_found = core.utils.product(sequences_found, [node])
                has_unrecognized = True  # to let PythonWithConceptParser validate the code

            elif isinstance(node, SourceCodeWithConceptNode):
                for i, n in [(i, n) for i, n in enumerate(node.nodes) if isinstance(n, ConceptNode)]:
                    res = self.validate_concept_node(context, n)
                    if not res.status:
                        self.add_error(res.body)
                        break
                    else:
                        node.nodes[i] = res.body
                sequences_found = core.utils.product(sequences_found, [node])
                has_unrecognized = True  # to let PythonWithConceptParser validate the code

            else:  # cannot happen as of today :-)
                raise NotImplementedError(f"Node is {type(node)}, which is not supported yet")

        # concept with UnrecognizedToken in their properties is considered as fatal error
        if self.has_error:
            return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))

        ret = []
        for choice in sequences_found:
            ret.append(
                sheerka.ret(
                    self.name,
                    not has_unrecognized,
                    sheerka.new(
                        BuiltinConcepts.PARSER_RESULT,
                        parser=self,
                        source=parser_input.source,
                        body=choice,
                        try_parsed=choice)))

        if len(ret) == 1:
            self.log_result(context, parser_input, ret[0])
            return ret[0]
        else:
            self.log_multiple_results(context, parser_input, ret)
            return ret

    def validate_concept_node(self, context, concept_node):
        errors = []
        update_compiled(context, concept_node.concept, errors)

        if len(errors) > 0:
            return context.sheerka.ret(self.name, False, errors)
        else:
            return context.sheerka.ret(self.name, True, concept_node)