from dataclasses import dataclass from core.builtin_concepts import BuiltinConcepts from core.concept import Concept from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode, SourceCodeWithConceptNode from parsers.BaseParser import BaseParser, ErrorNode from core.builtin_helpers import only_successful, parse_unrecognized, get_lexer_nodes import core.utils PARSERS = ["EmptyString", "AtomNode", "BnfNode", "SyaNode", "Python"] @dataclass() class CannotParseNode(ErrorNode): unrecognized: UnrecognizedTokensNode class UnrecognizedNodeParser(BaseParser): """ This parser comes after the other NodeParsers (Atom, Bnf or Sya) It will try to resolve all UnrecognizedTokensNode. """ def __init__(self, **kwargs): super().__init__("UnrecognizedNode", 45) # lower than AtomNode, BnfNode and SyaNode def add_error(self, error): if hasattr(error, "__iter__"): self.error_sink.extend(error) else: self.error_sink.append(error) def parse(self, context, parser_input): sheerka = context.sheerka nodes = self.get_input_as_lexer_nodes(parser_input, None) if not nodes: return None sequences_found = [[]] has_unrecognized = False for node in nodes: if isinstance(node, ConceptNode): res = self.validate_concept_node(context, node) if not res.status: self.add_error(res.body) else: sequences_found = core.utils.product(sequences_found, [res.body]) elif isinstance(node, UnrecognizedTokensNode): res = parse_unrecognized(context, node.source, PARSERS) res = only_successful(context, res) if res.status: lexer_nodes = get_lexer_nodes(res.body.body, node.start, node.tokens) if lexer_nodes: # make lexer_nodes is not empty (for example, some Python result are discarded) sequences_found = core.utils.product(sequences_found, lexer_nodes) else: sequences_found = core.utils.product(sequences_found, [node]) has_unrecognized = True else: sequences_found = core.utils.product(sequences_found, [node]) has_unrecognized = True elif isinstance(node, SourceCodeNode): sequences_found = core.utils.product(sequences_found, [node]) has_unrecognized = True # never trust source code not. I may be an invalid source code else: # cannot happen as of today :-) raise NotImplementedError() # concept with UnrecognizedToken in their properties is considered as fatal error if self.has_error: return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) ret = [] for choice in sequences_found: ret.append( sheerka.ret( self.name, not has_unrecognized, sheerka.new( BuiltinConcepts.PARSER_RESULT, parser=self, source=parser_input, body=choice, try_parsed=choice))) if len(ret) == 1: self.log_result(context, parser_input, ret[0]) return ret[0] else: self.log_multiple_results(context, parser_input, ret) return ret def validate_concept_node(self, context, concept_node): sheerka = context.sheerka errors = [] def _validate_concept(concept): """ Recursively browse the compiled properties in order to find unrecognized :param concept: :return: """ for name, value in concept.compiled.items(): if isinstance(value, Concept): _validate_concept(value) elif isinstance(value, UnrecognizedTokensNode): res = parse_unrecognized(context, value.source, PARSERS) res = only_successful(context, res) # only key successful parsers if res.status: concept.compiled[name] = res.body.body else: errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{value.source}'")) _validate_concept(concept_node.concept) if len(errors) > 0: return context.sheerka.ret(self.name, False, errors) else: return context.sheerka.ret(self.name, True, concept_node)