from dataclasses import dataclass import core.utils from core.builtin_concepts import BuiltinConcepts from core.builtin_helpers import get_lexer_nodes, only_successful, update_compiled from parsers.BaseNodeParser import ConceptNode, SourceCodeNode, SourceCodeWithConceptNode, UnrecognizedTokensNode from parsers.BaseParser import BaseParserInputParser, ParsingError from parsers.BnfNodeParser import BnfNodeParser from parsers.PythonParser import PythonParser from parsers.SequenceNodeParser import SequenceNodeParser from parsers.SyaNodeParser import SyaNodeParser PARSERS = ["EmptyString", "ShortTermMemory", SequenceNodeParser.NAME, BnfNodeParser.NAME, SyaNodeParser.NAME, PythonParser.NAME] @dataclass() class CannotParseError(ParsingError): unrecognized: UnrecognizedTokensNode class UnrecognizedNodeParser(BaseParserInputParser): """ This parser comes after the other NodeParsers (Atom, Bnf or Sya) It will try to resolve all UnrecognizedTokensNode. """ def __init__(self, **kwargs): super().__init__("UnrecognizedNode", 45) # lower than SequenceNode, BnfNode and SyaNode def add_error(self, error): if hasattr(error, "__iter__"): self.error_sink.extend(error) else: self.error_sink.append(error) def parse(self, context, parser_input): sheerka = context.sheerka nodes = self.get_input_as_lexer_nodes(parser_input, None) if not nodes: return None sequences_found = [[]] has_unrecognized = False self.error_sink.clear() for node in nodes: if isinstance(node, ConceptNode): res = self.validate_concept_node(context, node) if not res.status: self.add_error(res.body) else: sequences_found = core.utils.sheerka_product(sequences_found, [res.body]) elif isinstance(node, UnrecognizedTokensNode): res = context.sheerka.parse_unrecognized(context, node.source, PARSERS) res = only_successful(context, res) if res.status: lexer_nodes = get_lexer_nodes(res.body.body, node.start, node.tokens) if lexer_nodes: # make lexer_nodes is not empty (for example, some Python result are discarded) sequences_found = core.utils.sheerka_product(sequences_found, lexer_nodes) else: sequences_found = core.utils.sheerka_product(sequences_found, [node]) has_unrecognized = True else: sequences_found = core.utils.sheerka_product(sequences_found, [node]) has_unrecognized = True elif isinstance(node, SourceCodeNode): sequences_found = core.utils.sheerka_product(sequences_found, [node]) has_unrecognized = True # to let PythonWithConceptParser validate the code elif isinstance(node, SourceCodeWithConceptNode): for i, n in [(i, n) for i, n in enumerate(node.nodes) if isinstance(n, ConceptNode)]: res = self.validate_concept_node(context, n) if not res.status: self.add_error(res.body) break else: node.nodes[i] = res.body sequences_found = core.utils.sheerka_product(sequences_found, [node]) has_unrecognized = True # to let PythonWithConceptParser validate the code else: # cannot happen as of today :-) raise NotImplementedError(f"Node is {type(node)}, which is not supported yet") # concept with UnrecognizedToken in their properties is considered as fatal error if self.has_error: return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) ret = [] for choice in sequences_found: ret.append( sheerka.ret( self.name, not has_unrecognized, sheerka.new( BuiltinConcepts.PARSER_RESULT, parser=self, source=parser_input.source, body=choice, try_parsed=choice))) if len(ret) == 1: self.log_result(context, parser_input, ret[0]) return ret[0] else: self.log_multiple_results(context, parser_input, ret) return ret def validate_concept_node(self, context, concept_node): errors = [] update_compiled(context, concept_node.concept, errors) if len(errors) > 0: return context.sheerka.ret(self.name, False, errors) else: return context.sheerka.ret(self.name, True, concept_node)