164 lines
5.7 KiB
Python
164 lines
5.7 KiB
Python
# to be replaced by SyaNodeParser
|
|
import ast
|
|
|
|
from core.builtin_concepts import BuiltinConcepts
|
|
from core.tokenizer import TokenKind
|
|
from parsers.BaseNodeParser import SourceCodeNode
|
|
from parsers.BaseParser import BaseParser
|
|
from parsers.BnfNodeParser import BnfNodeParser, UnrecognizedTokensNode, ConceptNode
|
|
import core.utils
|
|
from parsers.PythonParser import PythonParser
|
|
|
|
concept_lexer_parser = BnfNodeParser()
|
|
|
|
|
|
class MultipleConceptsParser(BaseParser):
|
|
"""
|
|
Parser that will take the result of BnfNodeParser and
|
|
try to resolve the unrecognized tokens token by token
|
|
|
|
It is a success when it returns a list ConceptNode exclusively
|
|
"""
|
|
|
|
def __init__(self, **kwargs):
|
|
BaseParser.__init__(self, "MultipleConcepts", 45)
|
|
self.enabled = False
|
|
|
|
@staticmethod
|
|
def finalize(nodes_found, unrecognized_tokens):
|
|
if not unrecognized_tokens:
|
|
return nodes_found, unrecognized_tokens
|
|
|
|
unrecognized_tokens.fix_source()
|
|
if unrecognized_tokens.not_whitespace():
|
|
nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
|
|
|
|
return nodes_found, None
|
|
|
|
@staticmethod
|
|
def create_or_add(unrecognized_tokens, token, index):
|
|
if unrecognized_tokens:
|
|
unrecognized_tokens.add_token(token, index)
|
|
else:
|
|
unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
|
|
return unrecognized_tokens
|
|
|
|
def parse(self, context, parser_input):
|
|
sheerka = context.sheerka
|
|
nodes = self.get_input_as_lexer_nodes(parser_input, concept_lexer_parser)
|
|
if not nodes:
|
|
return None
|
|
|
|
nodes_found = [[]]
|
|
concepts_only = True
|
|
|
|
for node in nodes:
|
|
if isinstance(node, UnrecognizedTokensNode):
|
|
unrecognized_tokens = None
|
|
i = 0
|
|
|
|
while i < len(node.tokens):
|
|
|
|
token_index = node.start + i
|
|
token = node.tokens[i]
|
|
|
|
concepts_nodes = self.get_concepts_nodes(context, token_index, token)
|
|
if concepts_nodes is not None:
|
|
nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
|
|
nodes_found = core.utils.product(nodes_found, concepts_nodes)
|
|
i += 1
|
|
continue
|
|
|
|
source_code_node = self.get_source_code_node(context, token_index, node.tokens[i:])
|
|
if source_code_node:
|
|
nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
|
|
nodes_found = core.utils.product(nodes_found, [source_code_node])
|
|
i += len(source_code_node.tokens)
|
|
continue
|
|
|
|
# not a concept nor some source code
|
|
unrecognized_tokens = self.create_or_add(unrecognized_tokens, token, token_index)
|
|
concepts_only &= token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE)
|
|
i += 1
|
|
|
|
# finish processing if needed
|
|
nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
|
|
|
|
else:
|
|
nodes_found = core.utils.product(nodes_found, [node])
|
|
|
|
ret = []
|
|
for choice in nodes_found:
|
|
ret.append(
|
|
sheerka.ret(
|
|
self.name,
|
|
concepts_only,
|
|
sheerka.new(
|
|
BuiltinConcepts.PARSER_RESULT,
|
|
parser=self,
|
|
source=parser_input.source,
|
|
body=choice,
|
|
try_parsed=None))
|
|
)
|
|
|
|
if len(ret) == 1:
|
|
self.log_result(context, parser_input.source, ret[0])
|
|
return ret[0]
|
|
else:
|
|
self.log_multiple_results(context, parser_input.source, ret)
|
|
return ret
|
|
|
|
@staticmethod
|
|
def get_concepts_nodes(context, index, token):
|
|
"""
|
|
Tries to recognize a concept
|
|
from the univers of all known concepts
|
|
"""
|
|
|
|
if token.type != TokenKind.IDENTIFIER:
|
|
return None
|
|
|
|
concept = context.new_concept(token.value)
|
|
if hasattr(concept, "__iter__") or context.sheerka.is_known(concept):
|
|
concepts = concept if hasattr(concept, "__iter__") else [concept]
|
|
concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts]
|
|
return concepts_nodes
|
|
|
|
return None
|
|
|
|
@staticmethod
|
|
def get_source_code_node(context, index, tokens):
|
|
"""
|
|
Tries to recognize source code.
|
|
For the time being, only Python is supported
|
|
:param context:
|
|
:param tokens:
|
|
:param index:
|
|
:return:
|
|
"""
|
|
|
|
if len(tokens) == 0 or (len(tokens) == 1 and tokens[0].type == TokenKind.EOF):
|
|
return None
|
|
|
|
end_index = len(tokens)
|
|
while end_index > 0:
|
|
parser = PythonParser()
|
|
tokens_to_parse = tokens[:end_index]
|
|
res = parser.parse(context, tokens_to_parse)
|
|
if res.status:
|
|
# only expression are accepted
|
|
ast_ = res.value.value.ast_
|
|
if not isinstance(ast_, ast.Expression):
|
|
return None
|
|
try:
|
|
compiled = compile(ast_, "<string>", "eval")
|
|
eval(compiled, {}, {})
|
|
except Exception:
|
|
return None
|
|
|
|
source = BaseParser.get_text_from_tokens(tokens_to_parse)
|
|
return SourceCodeNode(res.value.value, index, index + end_index - 1, tokens_to_parse, source)
|
|
end_index -= 1
|
|
|
|
return None
|