215 lines
6.6 KiB
Python
215 lines
6.6 KiB
Python
from core.builtin_concepts import BuiltinConcepts
|
|
from core.tokenizer import Tokenizer, LexerError, TokenKind
|
|
from parsers.BaseParser import BaseParser, Node, ErrorNode
|
|
from dataclasses import dataclass, field
|
|
import ast
|
|
import logging
|
|
|
|
from parsers.ConceptLexerParser import ConceptNode
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass()
|
|
class PythonErrorNode(ErrorNode):
|
|
source: str
|
|
exception: Exception
|
|
|
|
# def __post_init__(self):
|
|
# self.log.debug("-> PythonErrorNode: " + str(self.exception))
|
|
|
|
|
|
class PythonNode(Node):
|
|
|
|
def __init__(self, source, ast_, concepts=None):
|
|
self.source = source
|
|
self.ast_ = ast_
|
|
self.concepts = concepts or {} # when concepts are recognized in the expression
|
|
|
|
# def __repr__(self):
|
|
# return "PythonNode(source='" + self.source + "', ast=" + self.get_dump(self.ast_) + ")"
|
|
|
|
def __repr__(self):
|
|
ast_type = "expr" if isinstance(self.ast_, ast.Expression) else "module"
|
|
return "PythonNode(" + ast_type + "='" + self.source + "')"
|
|
|
|
def __eq__(self, other):
|
|
if not isinstance(other, PythonNode):
|
|
return False
|
|
|
|
if self.source != other.source:
|
|
return False
|
|
|
|
self_dump = self.get_dump(self.ast_)
|
|
other_dump = self.get_dump(other.ast_)
|
|
|
|
return self_dump == other_dump
|
|
|
|
def __hash__(self):
|
|
return hash((self.source, self.ast_.hash))
|
|
|
|
@staticmethod
|
|
def get_dump(ast_):
|
|
dump = ast.dump(ast_)
|
|
for to_remove in [", ctx=Load()", ", kind=None", ", type_ignores=[]"]:
|
|
dump = dump.replace(to_remove, "")
|
|
return dump
|
|
|
|
|
|
class PythonParser(BaseParser):
|
|
"""
|
|
Parse Python scripts
|
|
"""
|
|
|
|
def __init__(self, **kwargs):
|
|
|
|
BaseParser.__init__(self, "Python", 50)
|
|
self.source = kwargs.get("source", "<undef>")
|
|
|
|
def parse(self, context, text):
|
|
sheerka = context.sheerka
|
|
tree = None
|
|
|
|
python_switcher = {
|
|
TokenKind.CONCEPT: lambda t: f"__C__USE_CONCEPT__{t.value}__C__"
|
|
}
|
|
|
|
try:
|
|
if isinstance(text, str) and "c:" in text:
|
|
source = self.get_text_from_tokens(list(Tokenizer(text)), python_switcher)
|
|
elif isinstance(text, str):
|
|
source = text
|
|
else:
|
|
source = self.get_text_from_tokens(text, python_switcher)
|
|
source = source.strip()
|
|
|
|
text = text if isinstance(text, str) else source
|
|
|
|
# first, try to parse an expression
|
|
res, tree, error = self.try_parse_expression(source)
|
|
if not res:
|
|
# then try to parse a statement
|
|
res, tree, error = self.try_parse_statement(source)
|
|
if not res:
|
|
self.has_error = True
|
|
error_node = PythonErrorNode(text, error)
|
|
self.error_sink.append(error_node)
|
|
|
|
except LexerError as e:
|
|
self.has_error = True
|
|
self.error_sink.append(e)
|
|
|
|
ret = sheerka.ret(
|
|
self.name,
|
|
not self.has_error,
|
|
sheerka.new(
|
|
BuiltinConcepts.PARSER_RESULT,
|
|
parser=self,
|
|
source=text,
|
|
body=self.error_sink if self.has_error else PythonNode(text, tree),
|
|
try_parsed=None))
|
|
|
|
self.log_result(context, text, ret)
|
|
return ret
|
|
|
|
def try_parse_expression(self, text):
|
|
try:
|
|
return True, ast.parse(text, f"<{self.source}>", 'eval'), None
|
|
except Exception as error:
|
|
return False, None, error
|
|
|
|
def try_parse_statement(self, text):
|
|
try:
|
|
return True, ast.parse(text, f"<{self.source}>", 'exec'), None
|
|
except Exception as error:
|
|
return False, None, error
|
|
|
|
|
|
class PythonGetNamesVisitor(ast.NodeVisitor):
|
|
"""
|
|
This visitor will find all the name declared in the ast
|
|
"""
|
|
|
|
def __init__(self):
|
|
self.names = set()
|
|
|
|
def visit_Name(self, node):
|
|
self.names.add(node.id)
|
|
|
|
class LexerNodeParserHelperForPython:
|
|
"""Helper class to parse mix of concepts and Python"""
|
|
|
|
def __init__(self):
|
|
self.identifiers = {} # cache for already created identifier (the key is id(concept))
|
|
self.identifiers_key = {} # number of identifiers with the same root (prefix)
|
|
|
|
def _get_identifier(self, concept):
|
|
"""
|
|
Get an identifier for a concept.
|
|
Make sure to return the same identifier if the same concept
|
|
Make sure to return a different identifier if same name but different concept
|
|
|
|
Internal function because I don't want identifiers, identifiers_key and python_ids_mappings
|
|
to be instance variables
|
|
I would like to keep this parser as stateless as possible
|
|
:param concept:
|
|
:return:
|
|
"""
|
|
if id(concept) in self.identifiers:
|
|
return self.identifiers[id(concept)]
|
|
|
|
identifier = "__C__" + self._sanitize(concept.key or concept.name)
|
|
if concept.id:
|
|
identifier += "__" + concept.id
|
|
|
|
if identifier in self.identifiers_key:
|
|
self.identifiers_key[identifier] += 1
|
|
identifier += f"_{self.identifiers_key[identifier]}"
|
|
else:
|
|
self.identifiers_key[identifier] = 0
|
|
|
|
identifier += "__C__"
|
|
|
|
self.identifiers[id(concept)] = identifier
|
|
return identifier
|
|
|
|
@staticmethod
|
|
def _sanitize(identifier):
|
|
res = ""
|
|
for c in identifier:
|
|
res += c if c.isalnum() else "0"
|
|
return res
|
|
|
|
def parse(self, context, nodes):
|
|
source = ""
|
|
to_parse = ""
|
|
|
|
concepts = {} # the key is the Python identifier
|
|
|
|
for node in nodes:
|
|
if isinstance(node, ConceptNode):
|
|
source += node.source
|
|
if to_parse:
|
|
to_parse += " "
|
|
concept = node.concept
|
|
python_id = self._get_identifier(concept)
|
|
to_parse += python_id
|
|
concepts[python_id] = concept
|
|
else:
|
|
source += node.source
|
|
to_parse += node.source
|
|
|
|
with context.push(self, desc="Trying Python for '" + to_parse + "'") as sub_context:
|
|
sub_context.add_inputs(to_parse=to_parse)
|
|
python_parser = PythonParser()
|
|
result = python_parser.parse(sub_context, to_parse)
|
|
sub_context.add_values(return_values=result)
|
|
|
|
if result.status:
|
|
python_node = result.body.body
|
|
python_node.source = source
|
|
python_node.concepts = concepts
|
|
return python_node
|
|
|
|
return result.body # the error
|