import ast from core.builtin_concepts import BuiltinConcepts from core.tokenizer import TokenKind from parsers.BaseParser import BaseParser from parsers.ConceptLexerParser import ConceptLexerParser, UnrecognizedTokensNode, ConceptNode, SourceCodeNode import core.utils from parsers.PythonParser import PythonParser concept_lexer_parser = ConceptLexerParser() class MultipleConceptsParser(BaseParser): """ Parser that will take the result of ConceptLexerParser and try to resolve the unrecognized tokens token by token It is a success when it returns a list ConceptNode exclusively """ def __init__(self, **kwargs): BaseParser.__init__(self, "MultipleConcepts", 45) @staticmethod def finalize(nodes_found, unrecognized_tokens): if not unrecognized_tokens: return nodes_found, unrecognized_tokens unrecognized_tokens.fix_source() if unrecognized_tokens.not_whitespace(): nodes_found = core.utils.product(nodes_found, [unrecognized_tokens]) return nodes_found, None @staticmethod def create_or_add(unrecognized_tokens, token, index): if unrecognized_tokens: unrecognized_tokens.add_token(token, index) else: unrecognized_tokens = UnrecognizedTokensNode(index, index, [token]) return unrecognized_tokens def parse(self, context, text): sheerka = context.sheerka if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT): return None if not text.parser == concept_lexer_parser: return None sheerka = context.sheerka nodes = text.value nodes_found = [[]] concepts_only = True for node in nodes: if isinstance(node, UnrecognizedTokensNode): unrecognized_tokens = None i = 0 while i < len(node.tokens): token_index = node.start + i token = node.tokens[i] concepts_nodes = self.get_concepts_nodes(context, token_index, token) if concepts_nodes is not None: nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens) nodes_found = core.utils.product(nodes_found, concepts_nodes) i += 1 continue source_code_node = self.get_source_code_node(context, token_index, node.tokens[i:]) if source_code_node: nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens) nodes_found = core.utils.product(nodes_found, [source_code_node]) i += len(source_code_node.tokens) continue # not a concept nor some source code unrecognized_tokens = self.create_or_add(unrecognized_tokens, token, token_index) concepts_only &= token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE) i += 1 # finish processing if needed nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens) else: nodes_found = core.utils.product(nodes_found, [node]) ret = [] for choice in nodes_found: ret.append( sheerka.ret( self.name, concepts_only, sheerka.new( BuiltinConcepts.PARSER_RESULT, parser=self, source=text.source, body=choice, try_parsed=None)) ) if len(ret) == 1: self.log_result(context, text.source, ret[0]) return ret[0] else: self.log_multiple_results(context, text.source, ret) return ret @staticmethod def get_concepts_nodes(context, index, token): """ Tries to recognize a concept from the univers of all known concepts """ if token.type != TokenKind.IDENTIFIER: return None concept = context.new_concept(token.value) if hasattr(concept, "__iter__") or context.sheerka.is_known(concept): concepts = concept if hasattr(concept, "__iter__") else [concept] concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts] return concepts_nodes return None @staticmethod def get_source_code_node(context, index, tokens): """ Tries to recognize source code. For the time being, only Python is supported :param context: :param tokens: :param index: :return: """ if len(tokens) == 0 or (len(tokens) == 1 and tokens[0].type == TokenKind.EOF): return None end_index = len(tokens) while end_index > 0: parser = PythonParser() tokens_to_parse = tokens[:end_index] res = parser.parse(context, tokens_to_parse) if res.status: # only expression are accepted ast_ = res.value.value.ast_ if not isinstance(ast_, ast.Expression): return None try: compiled = compile(ast_, "", "eval") eval(compiled, {}, {}) except Exception: return None source = BaseParser.get_text_from_tokens(tokens_to_parse) return SourceCodeNode(res.value.value, index, index + end_index - 1, tokens_to_parse, source) end_index -= 1 return None