from dataclasses import dataclass from typing import List, Union from core.builtin_concepts_ids import BuiltinConcepts from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import Token, TokenKind, Tokenizer, LexerError from core.utils import tokens_are_matching, get_text_from_tokens from parsers.BaseNodeParser import UnrecognizedTokensNode from parsers.BaseParser import Node, ParsingError, BaseParser, ErrorSink, UnexpectedTokenParsingError class ComparisonType: EQUALS = "EQ" NOT_EQUAlS = "NOT_EQ" LESS_THAN = "LT" LESS_THAN_OR_EQUALS = "LTE" GREATER_THAN = "GT" GREATER_THAN_OR_EQUALS = "GTE" IN = "IN" NOT_IN = "NOT_IN" @dataclass() class LeftPartNotFoundError(ParsingError): """ When the expression starts with 'or' or 'and' """ pass @dataclass() class ParenthesisMismatchError(ParsingError): token: Token class ExprNode(Node): """ Base ExprNode eval() must be overridden """ def __init__(self, start: int, end: int, tokens: List[Token]): self.start = start self.end = end self.tokens = tokens self.source = None def eval(self, obj): return True def __eq__(self, other): if not isinstance(other, ExprNode): return False if self.start != other.start or self.end != other.end: return False if other.tokens is not None and other.tokens != self.tokens: return False return True def __hash__(self): return hash((self.start, self.end)) def get_source(self): if self.source is None: self.source = get_text_from_tokens(self.tokens) return self.source class NameExprNode(ExprNode): def __init__(self, start, end, tokens): super().__init__(start, end, tokens) self.value = "".join([t.str_value for t in self.tokens]) def eval(self, obj): return self.value def get_value(self): return self.value def __repr__(self): return f"NameExprNode(start={self.start}, end={self.end}, '{self.value}')" def __str__(self): return self.value def __eq__(self, other): if not isinstance(other, NameExprNode): return False return super().__eq__(other) def __hash__(self): return super().__hash__() def to_unrecognized(self): """ UnrecognizedTokensNode with all tokens """ return UnrecognizedTokensNode(self.start, self.end, self.tokens).fix_source() def to_str_unrecognized(self): """ UnrecognizedTokensNode with one token, which is a string token of all the tokens """ token = Token(TokenKind.STRING, "'" + self.str_value() + "'", self.tokens[0].index, self.tokens[0].line, self.tokens[0].column) return UnrecognizedTokensNode(self.start, self.end, [token]).fix_source() class AndNode(ExprNode): def __init__(self, start, end, tokens, *parts: ExprNode): super().__init__(start, end, tokens) self.parts = parts def eval(self, obj): res = self.parts[0].eval(obj) and self.parts[1].eval(obj) for part in self.parts[2:]: res &= part.eval(obj) return res def __repr__(self): return f"AndNode(start={self.start}, end={self.end}, " + ", ".join([repr(p) for p in self.parts]) + ")" def __str__(self): return " and ".join([str(p) for p in self.parts]) def __eq__(self, other): if not isinstance(other, AndNode): return False if self.start != other.start or self.end != other.end: return False if other.tokens is not None and other.tokens != self.tokens: return False return self.parts == other.parts def __hash__(self): return hash((self.start, self.end, self.parts)) class OrNode(ExprNode): def __init__(self, start, end, tokens, *parts: ExprNode): super().__init__(start, end, tokens) self.parts = parts def eval(self, obj): res = self.parts[0].eval(obj) or self.parts[1].eval(obj) for part in self.parts[2:]: res |= part.eval(obj) return res def __repr__(self): return f"OrNode(start={self.start}, end={self.end}, " + ", ".join([repr(p) for p in self.parts]) + ")" def __str__(self): return " or ".join([str(p) for p in self.parts]) def __eq__(self, other): if not isinstance(other, OrNode): return False if self.start != other.start or self.end != other.end: return False if other.tokens is not None and other.tokens != self.tokens: return False return self.parts == other.parts def __hash__(self): return hash((self.start, self.end, self.parts)) class NotNode(ExprNode): def __init__(self, start, end, tokens, node: ExprNode): super().__init__(start, end, tokens) self.node = node def eval(self, obj): return not self.node.eval(obj) def get_value(self): return self.node.get_value() def __repr__(self): return f"NotNode(start={self.start}, end={self.end}, {self.node!r})" def __str__(self): return f"not {self.node}" def __eq__(self, other): if not isinstance(other, NotNode): return False if self.start != other.start or self.end != other.end: return False if other.tokens is not None and other.tokens != self.tokens: return False return self.node == other.node def __hash__(self): return hash((self.start, self.end, self.node)) class ParenthesisNode(ExprNode): """ Contains the boundaries of an expression inside parenthesis Need it, just to keep track of the boundaries of the parenthesis """ def __init__(self, start, end, tokens, node: ExprNode): super().__init__(start, end, tokens) self.node = node def __eq__(self, other): if not isinstance(other, ParenthesisNode): return False if self.start != other.start or self.end != other.end: return False if other.tokens is not None and other.tokens != self.tokens: return False return self.node == other.node def __hash__(self): return hash((self.start, self.end, self.node)) def __repr__(self): return f"ParenthesisNode(start={self.start}, end={self.end}, node={self.node!r})" def __str__(self): return f"({self.node})" class VariableNode(ExprNode): def __init__(self, start, end, tokens, name, *attributes): super().__init__(start, end, tokens) self.name = name.strip() self.attributes = [attr.strip() for attr in attributes] if len(self.attributes) > 0: self.attributes_str = ".".join(self.attributes) else: self.attributes_str = None def __eq__(self, other): if id(self) == id(other): return True if not isinstance(other, VariableNode): return False return self.name == other.name and self.attributes == other.attributes def __hash__(self): return hash((self.name, self.attributes)) def __repr__(self): prefix = f"VariableNode(start={self.start}, end={self.end}, '{self.name}" if len(self.attributes) > 0: return prefix + "." + ".".join(self.attributes) + "')" else: return prefix + "')" def __str__(self): if self.attributes: return self.name + "." + ".".join(self.attributes) else: return self.name def unpack(self): return [self.name] + self.attributes class ComparisonNode(ExprNode): def __init__(self, start, end, tokens, comp: str, left: ExprNode, right: ExprNode): super().__init__(start, end, tokens) self.comp = comp self.left = left self.right = right def __eq__(self, other): if id(self) == id(other): return True if not isinstance(other, ComparisonNode): return False return (self.comp == other.comp and self.left == other.left and self.right == other.right) def __hash__(self): return hash((self.comp, self.left, self.right)) def __repr__(self): return f"ComparisonNode(start={self.start}, end={self.end}, {self.left!r} {self.comp} {self.right!r})" def __str__(self): return f"{self.left} {self.comp} {self.right}" @staticmethod def rebuild_source(left, op, right): if isinstance(right, str): right = f"'{right}'" if op == ComparisonType.EQUALS: return f"{left} == {right}" if op == ComparisonType.NOT_EQUAlS: return f"{left} != {right}" if op == ComparisonType.LESS_THAN: return f"{left} < {right}" if op == ComparisonType.LESS_THAN_OR_EQUALS: return f"{left} <= {right}" if op == ComparisonType.GREATER_THAN: return f"{left} > {right}" if op == ComparisonType.GREATER_THAN_OR_EQUALS: return f"{left} >= {right}" if op == ComparisonType.IN: return f"{left} in ({right})" if op == ComparisonType.NOT_IN: return f"{left} not in ({right})" @dataclass() class FunctionParameter: """ class the represent result of the parameter parsing """ value: NameExprNode # value parsed separator: NameExprNode = None # holds the value and the position of the separator def add_sep(self, start, end, tokens): self.separator = NameExprNode(start, end, tokens) def value_to_unrecognized(self): return UnrecognizedTokensNode(self.value.start, self.value.end, self.value.tokens).fix_source() def separator_to_unrecognized(self): if self.separator is None: return None return UnrecognizedTokensNode(self.separator.start, self.separator.end, self.separator.tokens).fix_source() class FunctionNode(ExprNode): def __init__(self, start, end, tokens, first: NameExprNode, last: NameExprNode, parameters: Union[None, List[FunctionParameter]]): super().__init__(start, end, tokens) self.first = first self.last = last self.parameters = parameters def __eq__(self, other): if id(self) == id(other): return True if not isinstance(other, FunctionNode): return False return (self.first == other.first and self.last == other.last and self.parameters == other.parameters) def __hash__(self): return hash((self.first, self.last, self.parameters)) def __repr__(self): return f"FunctionNode(start={self.start}, end={self.end}, {self.first!r} {self.last} {self.parameters!r})" def __str__(self): return f"{self.first} {self.parameters} {self.last}" class BaseExpressionParser(BaseParser): def reset_parser_input(self, parser_input: ParserInput, error_sink): try: error_sink.clear() parser_input.reset(self.yield_eof) except LexerError as e: error_sink.add_error(e) return False parser_input.next_token() return True def parse(self, context, parser_input: ParserInput): """ :param context: :param parser_input: :return: """ if not isinstance(parser_input, ParserInput): return None context.log(f"Parsing '{parser_input}' with {self.NAME}Parser", self.name) sheerka = context.sheerka if parser_input.is_empty(): return context.sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.IS_EMPTY)) error_sink = ErrorSink() if not self.reset_parser_input(parser_input, error_sink): return context.sheerka.ret( self.name, False, context.sheerka.new(BuiltinConcepts.ERROR, body=error_sink.sink)) node = self.parse_input(context, parser_input, error_sink) token = parser_input.token if token and token.type != TokenKind.EOF: if token.type == TokenKind.RPAR: error_sink.add_error(ParenthesisMismatchError(token)) else: error_sink.add_error(UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, [TokenKind.EOF])) if isinstance(node, ParenthesisNode): node = node.node value = self.get_return_value_body(context.sheerka, parser_input.as_text(), node, node, error_sink.sink) ret = context.sheerka.ret(self.name, not error_sink.has_error, value) return ret def parse_input(self, context, parser_input, error_sink): raise NotImplementedError def parse_tokens_stop_condition(self, token, parser_input): raise NotImplementedError def parse_tokens(self, context, parser_input, error_sink): def stop(): return token.type == TokenKind.EOF or \ paren_count == 0 and (token.type == TokenKind.RPAR or self.parse_tokens_stop_condition(token, parser_input)) token = parser_input.token if token.type == TokenKind.EOF: return None if token.type == TokenKind.LPAR: last_paren = token start = parser_input.pos parser_input.next_token() expr = self.parse_input(context, parser_input, error_sink) token = parser_input.token if token.type != TokenKind.RPAR: error_sink.add_error(ParenthesisMismatchError(last_paren)) return expr end = parser_input.pos parser_input.next_token() return ParenthesisNode(start, end, None, expr) paren_count = 0 last_paren = None start = parser_input.pos end = parser_input.pos last_is_whitespace = False while not stop(): last_is_whitespace = token.type == TokenKind.WHITESPACE end += 1 if token.type == TokenKind.LPAR: last_paren = token paren_count += 1 if token.type == TokenKind.RPAR: paren_count -= 1 parser_input.next_token(False) token = parser_input.token if last_is_whitespace: end -= 1 if start == end: if token.type != TokenKind.RPAR: error_sink.add_error(LeftPartNotFoundError()) return None if paren_count != 0: error_sink.add_error(ParenthesisMismatchError(last_paren)) return None if self.expr_parser: new_parsing_input = ParserInput( None, tokens=parser_input.tokens, length=parser_input.length, start=start, end=end - 1, yield_oef=False).reset() new_parsing_input.next_token() return self.expr_parser.parse_input(context, new_parsing_input, error_sink) else: return NameExprNode(start, end - 1, parser_input.tokens[start:end]) class ExpressionVisitor: """ Pyhtonic implementation of visitors for ExprNode """ def visit(self, expr_node): name = expr_node.__class__.__name__ method = 'visit_' + name visitor = getattr(self, method, self.generic_visit) return visitor(expr_node) def generic_visit(self, expr_node): """Called if no explicit visitor function exists for a node.""" for field, value in expr_node.__dict__.items(): if isinstance(value, (list, tuple)): for item in value: if isinstance(item, ExprNode): self.visit(item) elif isinstance(value, ExprNode): self.visit(value) class TrueifyVisitor(ExpressionVisitor): """ Visit an ExprNode replace all the nodes containing a variable to 'trueify' with True The node containing both variables to trueify and to skip are skipped """ def __init__(self, to_trueify, to_skip): self.to_trueify = to_trueify self.to_skip = to_skip def visit_AndNode(self, expr_node): parts = [] for part in expr_node.parts: parts.append(self.visit(part)) return AndNode(expr_node.start, expr_node.end, expr_node.tokens, *parts) def visit_OrNode(self, expr_node): parts = [] for part in expr_node.parts: parts.append(self.visit(part)) return OrNode(expr_node.start, expr_node.end, expr_node.tokens, *parts) def visit_NameExprNode(self, expr_node): return_true = False for t in expr_node.tokens: if t.type == TokenKind.IDENTIFIER: if t.value in self.to_skip: return expr_node if t.value in self.to_trueify: return_true = True return NameExprNode(expr_node.start, expr_node.end, [Token(TokenKind.IDENTIFIER, "True", -1, -1, -1)]) if return_true else expr_node is_question_tokens = list(Tokenizer("is_question()")) eval_question_requested_in_context = list(Tokenizer("context.in_context(BuiltinConcepts.EVAL_QUESTION_REQUESTED)")) class IsAQuestionVisitor(ExpressionVisitor): """ visit an expression and return True if is_question or context.in_context(BuiltinConcepts.EVAL_QUESTION_REQUESTED) if found. """ def visit_NameExprNode(self, expr_node): if tokens_are_matching(expr_node.tokens, is_question_tokens) or \ tokens_are_matching(expr_node.tokens, eval_question_requested_in_context): return True return None def visit_AndNode(self, expr_node): """ AND | True | False | None ------+-------+-------+---------- False | False | False | False True | True | False | True None | True | False | None """ res = self.visit(expr_node.parts[0]) if isinstance(res, bool) and not res: return res for part in expr_node.parts[1:]: visited = self.visit(part) if isinstance(visited, bool): if not visited: return visited else: res = visited return res def visit_OrNode(self, expr_node): """ OR | True | False | None ------+-------+-------+---------- True | True | True | True False | True | False | False None | True | False | None """ res = self.visit(expr_node.parts[0]) if isinstance(res, bool) and res: return res for part in expr_node.parts[1:]: visited = self.visit(part) if isinstance(visited, bool): if visited: return visited else: res = visited return res def visit_NotNode(self, expr_node): """ | NOT ------+------- False | True True | False None | None """ visited = self.visit(expr_node.node) return None if visited is None else not visited def is_a_question(self, expr_node): res = self.visit(expr_node) return isinstance(res, bool) and res