Files
Sheerka-Old/src/parsers/PythonParser.py
T

218 lines
6.7 KiB
Python

import ast
import logging
from dataclasses import dataclass
import core.utils
from core.builtin_concepts import BuiltinConcepts
from core.tokenizer import LexerError, TokenKind
from parsers.BaseParser import BaseParser, Node, ErrorNode
from parsers.BnfNodeParser import ConceptNode
log = logging.getLogger(__name__)
@dataclass()
class PythonErrorNode(ErrorNode):
source: str
exception: Exception
# def __post_init__(self):
# self.log.debug("-> PythonErrorNode: " + str(self.exception))
class PythonNode(Node):
def __init__(self, source, ast_=None, concepts=None):
self.source = source
self.ast_ = ast_ if ast_ else ast.parse(source, mode="eval") if source else None
self.concepts = concepts or {} # when concepts are recognized in the expression
# def __repr__(self):
# return "PythonNode(source='" + self.source + "', ast=" + self.get_dump(self.ast_) + ")"
def __repr__(self):
ast_type = "expr" if isinstance(self.ast_, ast.Expression) else "module"
return "PythonNode(" + ast_type + "='" + self.source + "')"
def __eq__(self, other):
if not isinstance(other, PythonNode):
return False
if self.source != other.source:
return False
self_dump = self.get_dump(self.ast_)
other_dump = self.get_dump(other.ast_)
return self_dump == other_dump
def __hash__(self):
return hash((self.source, self.ast_.hash))
@staticmethod
def get_dump(ast_):
dump = ast.dump(ast_)
for to_remove in [", ctx=Load()", ", kind=None", ", type_ignores=[]"]:
dump = dump.replace(to_remove, "")
return dump
class PythonParser(BaseParser):
"""
Parse Python scripts
"""
def __init__(self, **kwargs):
BaseParser.__init__(self, "Python", 50)
self.source = kwargs.get("source", "<undef>")
def parse(self, context, parser_input):
sheerka = context.sheerka
tree = None
python_switcher = {
TokenKind.CONCEPT: lambda t: core.utils.encode_concept(t.value, True)
}
try:
source = self.get_input_as_text(parser_input, python_switcher)
source = source.strip()
parser_input = parser_input if isinstance(parser_input, str) else source
# first, try to parse an expression
res, tree, error = self.try_parse_expression(source)
if not res:
# then try to parse a statement
res, tree, error = self.try_parse_statement(source)
if not res:
error_node = PythonErrorNode(parser_input, error)
self.error_sink.append(error_node)
except LexerError as e:
self.error_sink.append(e)
if self.has_error:
ret = sheerka.ret(
self.name,
False,
sheerka.new(
BuiltinConcepts.NOT_FOR_ME,
body=parser_input,
reason=self.error_sink))
else:
ret = sheerka.ret(
self.name,
True,
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=parser_input,
body=PythonNode(parser_input, tree),
try_parsed=None))
self.log_result(context, parser_input, ret)
return ret
def try_parse_expression(self, text):
try:
return True, ast.parse(text, f"<{self.source}>", 'eval'), None
except Exception as error:
return False, None, error
def try_parse_statement(self, text):
try:
return True, ast.parse(text, f"<{self.source}>", 'exec'), None
except Exception as error:
return False, None, error
class PythonGetNamesVisitor(ast.NodeVisitor):
"""
This visitor will find all the name declared in the ast
"""
def __init__(self):
self.names = set()
def visit_Name(self, node):
self.names.add(node.id)
class LexerNodeParserHelperForPython:
"""Helper class to parse mix of concepts and Python"""
def __init__(self):
self.identifiers = {} # cache for already created identifier (the key is id(concept))
self.identifiers_key = {} # number of identifiers with the same root (prefix)
def _get_identifier(self, concept):
"""
Get an identifier for a concept.
Make sure to return the same identifier if the same concept
Make sure to return a different identifier if same name but different concept
Internal function because I don't want identifiers, identifiers_key and python_ids_mappings
to be instance variables
I would like to keep this parser as stateless as possible
:param concept:
:return:
"""
if id(concept) in self.identifiers:
return self.identifiers[id(concept)]
identifier = "__C__" + self._sanitize(concept.key or concept.name)
if concept.id:
identifier += "__" + concept.id
if identifier in self.identifiers_key:
self.identifiers_key[identifier] += 1
identifier += f"_{self.identifiers_key[identifier]}"
else:
self.identifiers_key[identifier] = 0
identifier += "__C__"
self.identifiers[id(concept)] = identifier
return identifier
@staticmethod
def _sanitize(identifier):
res = ""
for c in identifier:
res += c if c.isalnum() else "0"
return res
def parse(self, context, nodes):
source = ""
to_parse = ""
concepts = {} # the key is the Python identifier
for node in nodes:
if isinstance(node, ConceptNode):
source += node.source
if to_parse:
to_parse += " "
concept = node.concept
python_id = self._get_identifier(concept)
to_parse += python_id
concepts[python_id] = concept
else:
source += node.source
to_parse += node.source
with context.push(self, desc="Trying Python for '" + to_parse + "'") as sub_context:
sub_context.add_inputs(to_parse=to_parse)
python_parser = PythonParser()
result = python_parser.parse(sub_context, to_parse)
sub_context.add_values(return_values=result)
if result.status:
python_node = result.body.body
python_node.source = source
python_node.concepts = concepts
return python_node
return result.body # the error