from core.builtin_concepts import BuiltinConcepts from core.tokenizer import TokenKind from parsers.BaseParser import BaseParser from parsers.ConceptLexerParser import ConceptLexerParser, UnrecognizedTokensNode, ConceptNode import core.utils concept_lexer_parser = ConceptLexerParser() class MultipleConceptsParser(BaseParser): """ Parser that will take the result of ConceptLexerParser and try to resolve the unrecognized tokens token by token It is a success when it returns a list ConceptNode exclusively """ def __init__(self, **kwargs): BaseParser.__init__(self, "MultipleConcepts", 45) def parse(self, context, text): sheerka = context.sheerka if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT): return None if not text.parser == concept_lexer_parser: return None sheerka = context.sheerka nodes = text.value nodes_found = [[]] source = "" concepts_only = True for node in nodes: if isinstance(node, UnrecognizedTokensNode): unrecognized_tokens = None for i, token in enumerate(node.tokens): index = node.start + i if token.type == TokenKind.IDENTIFIER: # it may be a concept concept = context.new_concept(token.value) if hasattr(concept, "__iter__") or not sheerka.is_unknown(concept): # finish processing unrecognized_tokens if unrecognized_tokens: unrecognized_tokens.fix_source() source += unrecognized_tokens.source if unrecognized_tokens.not_whitespace(): nodes_found = core.utils.product(nodes_found, [unrecognized_tokens]) unrecognized_tokens = None source += token.value concepts = concept if hasattr(concept, "__iter__") else [concept] concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts] nodes_found = core.utils.product(nodes_found, concepts_nodes) continue else: # it cannot be a concept concepts_only &= token.type == TokenKind.WHITESPACE or token.type == TokenKind.NEWLINE if unrecognized_tokens: unrecognized_tokens.add_token(token, index) else: unrecognized_tokens = UnrecognizedTokensNode(index, index, [token]) if unrecognized_tokens: unrecognized_tokens.fix_source() source += unrecognized_tokens.source if unrecognized_tokens.not_whitespace(): nodes_found = core.utils.product(nodes_found, [unrecognized_tokens]) else: nodes_found = core.utils.product(nodes_found, [node]) source += node.source ret = [] for choice in nodes_found: ret.append( sheerka.ret( self.name, concepts_only, sheerka.new( BuiltinConcepts.PARSER_RESULT, parser=self, source=source, body=choice, try_parsed=None)) ) if len(ret) == 1: self.log_result(context, source, ret[0]) return ret[0] else: self.log_multiple_results(context, source, ret) return ret