""" PyQuery - The Python Object Query System Author: Tim Henderson Contact: tim.tadh@hackthology.com Copyright (c) 2010 All Rights Reserved. Licensed under a BSD style license see the LICENSE file. File: symbols.py Purpose: Objects and functions representing components of a query. """ from __future__ import absolute_import from __future__ import division from builtins import object from builtins import range from builtins import str from builtins import zip from collections import deque from itertools import product from core.global_symbols import NotInit from core.utils import sheerka_getattr, sheerka_hasattr try: from .OrderedSet import OrderedSet except SystemError: from OrderedSet import OrderedSet class Attribute(object): """ Represents an attribute. An attribute consists of a name and a "callchain." The callchain represent one or more function or index lookups being performed on the attribute. eg. x(1,2,3)[1]() translates to: self.name = 'x' self.callchain = [Call([1,2,3]), Call([1], True), Call([])] """ def __init__(self, name, callchain=None): self.name = name self.callchain = callchain def __repr__(self): return str(self) def __str__(self): return str(self.name) + "[" + str(self.callchain) + "]" class Call(object): """ Represent a 'Call' (for context see the documentation on the Attribute class). A call consists of the parameters passed into the call (themselves functions which takes the namespace (objs)) and whether not this is an item lookup rather than a function call. """ def __init__(self, params, lookup=False): self.params = params self.lookup = lookup def __repr__(self): return str(self) def __str__(self): if self.lookup: return "__getitem__" + str(tuple(self.params)) return "__call__" + str(tuple(self.params)) class KeyValuePair(object): """ Represents a key,value pair for use while iterating over a dictionary. """ def __init__(self, key, value): self.key = key self.value = value def __repr__(self): return str(self) def __str__(self): return "" % (self.key, self.value) def attribute_value(attribute_list, scalar=False, context='locals'): """ Transforms a AttributeValue into its actual value. eg. x.y.z().q[1].r attribute_list = [Attribute('x'), Attribute('y'), Attribute('z',[Call([])]), Attribute('q', [Call([1], True)]), Attribute('r')] translates into the attribute lookups, function calls, and __getitem__ calls necessary to produce a value. if scalar == True: it simply returns the value stored in attribute_list context is no longer used and should be removed. """ def expand(namespace, attr, x): """ Expands the the value of one attribute by looking the name up in the namespace dict and then performing and function calls and dictionary lookups specified in the callchain. """ if attr.callchain: for call in attr.callchain: p = list() for param in call.params: if isinstance(param, type(value)) and value.__code__ == param.__code__ or \ isinstance(param, type(value)) and hasattr(param, '__objquery__'): p.append(param(namespace)) else: p.append(param) if call.lookup: x = x.__getitem__(p[0]) else: x = x.__call__(*p) return x def value(namespace): """ The computation function returned the user. Computes the actual value of the the attribute expression when @namespace is passed in. """ if scalar: return attribute_list attr0 = attribute_list[0] obj = expand(namespace, attr0, namespace[attr0.name]) for attr in attribute_list[1:]: if sheerka_hasattr(obj, attr.name): obj = expand(namespace, attr, sheerka_getattr(obj, attr.name)) else: raise AttributeError(f"'{type(obj).__name__}' object has no attribute '{attr.name}'") return obj return value def operator(op): """ Returns a function which performs comparison operations """ if op == '==': return lambda x, y: x == y if op == '!=': return lambda x, y: x != y if op == '<=': return lambda x, y: x <= y if op == '>=': return lambda x, y: x >= y if op == '<': return lambda x, y: x < y if op == '>': return lambda x, y: x > y raise Exception("operator %s not found" % op) def arith_operator(op): """ Returns a function which performs arithmetic operations """ if op == '+': return lambda x, y: x + y if op == '-': return lambda x, y: x - y if op == '*': return lambda x, y: x * y if op == '/': return lambda x, y: x / y if op == '%': return lambda x, y: x % y raise Exception("operator %s not found" % op) def set_operator(op): """ Returns a function which performs set operations """ if op == '|': return lambda x, y: OrderedSet(x) | OrderedSet(y) if op == '&': return lambda x, y: OrderedSet(x) & OrderedSet(y) if op == '-': return lambda x, y: OrderedSet(x) - OrderedSet(y) raise Exception("operator %s not found" % op) def set_expression_operation1(op): """ Returns a function which performs scalar in set operations """ if op == 'in': return lambda x, y: x in y if op == 'not in': return lambda x, y: x not in y raise Exception("operator %s not found" % op) def set_expression_operation2(op): """ Returns a function which performs set to set comparison operations """ if op == 'is': return lambda x, y: x == y if op == 'is not': return lambda x, y: x != y if op == 'subset': return lambda x, y: x <= y if op == 'superset': return lambda x, y: x >= y if op == 'proper subset': return lambda x, y: x < y if op == 'proper superset': return lambda x, y: x > y raise Exception("operator %s not found" % op) def bool_operator(op): """ Returns a function which performs basic boolean (and, or) operations """ if op == 'and': return lambda x, y, namespace: x(namespace) and y(namespace) if op == 'or': return lambda x, y, namespace: x(namespace) or y(namespace) raise Exception("operator %s not found" % op) def unary_operator(op): """ Returns a function which performs unary (not) operation """ if op == 'not': return lambda x: not x raise Exception("operator %s not found" % op) def comparison_value(value1, op, value2): """ Returns a function which will calculate a where expression for a basic comparison operation. """ def where(namespace): return op(value1(namespace), value2(namespace)) object.__setattr__(where, '__objquery__', True) return where def arith_value(value1, op, value2): """ Returns a function which will calculate a where expression for a basic arithmetic operation. """ def computation(namespace): return op(value1(namespace), value2(namespace)) object.__setattr__(computation, '__objquery__', True) return computation def set_value(s1, op, s2): """ Returns a Query function for the result of set operations (difference, union etc..) """ def query(namespace): return op(s1(namespace), s2(namespace)) object.__setattr__(query, '__objquery__', True) return query def set_expression_value(val, op, s): """ Returns a where function which returns the result of a value in set operation """ def where(namespace): return op(val(namespace), s(namespace)) object.__setattr__(where, '__objquery__', True) return where def boolean_expression_value(value1, op, value2): """ returns the function which computes the result of boolean (and or) operation """ def where(namespace): return op(value1, value2, namespace) object.__setattr__(where, '__objquery__', True) return where def unary_expression_value(op, val): """ returns the function which computes the result of boolean not operation """ def where(namespace): return op(val(namespace)) object.__setattr__(where, '__objquery__', True) return where def boolean_value(val): """ returns the function which booleanizes the result of the Value function """ def where(namespace): return bool(val(namespace)) object.__setattr__(where, '__objquery__', True) return where def where_value(val): """ returns the results of a Value function. """ def where(namespace): return val(namespace) object.__setattr__(where, '__objquery__', True) return where def dict_value(pairs): """ creates a dictionary from the passed pairs after evaluation. """ def as_dict(namespace): return dict((name(namespace), value(namespace)) for name, value in pairs) object.__setattr__(as_dict, '__objquery__', True) return as_dict def list_value(values): """ creates a list from the pass objs after evaluation. """ def as_list(objs): return list(value(objs) for value in values) object.__setattr__(as_list, '__objquery__', True) return as_list def query_value(q): """ Computes a path expression. The query (@q) is a list of attribute names and associated where expressions. The function returned computes the result when called. :param q: query List[(attr_name, filter_condition)] """ attrs = q def query(namespace): def select(namespace, attrs_path): """ a generator which computes the actual results :param namespace: dictionary of available objects :param attrs_path: List[(attr_name, filter_condition)] """ def add(_queue, _namespace, _index): """ adds the object v to the queue push the current namespace """ index_to_use = _index + 1 try: object.__setattr__(_namespace, '_objquery__i', index_to_use) except TypeError: setattr(_namespace, '_objquery__i', index_to_use) _queue.appendleft(_namespace) queue = deque() # KSI 20210214: Why using type instead of the given dictionary ? # -> to seamlessly use getattr() to retrieve the attribute # -> to allow setattr(_namespace, '_objquery__i', index_to_use) add(queue, type('base', (object,), namespace), -1) # init with the namespace while len(queue) > 0: current_namespace = queue.pop() index = object.__getattribute__(current_namespace, '_objquery__i') attr_name, where = attrs_path[index] # if sheerka_hasattr(current_namespace, attr_name): # the current object has the attr attr_value = sheerka_getattr(current_namespace, attr_name) # it is iterable if not isinstance(attr_value, str) and hasattr(attr_value, '__iter__'): # # try to use where clause as an indexer if where is not None: try: namespace_copy = dict(namespace) res = where(namespace_copy) except (NameError, KeyError, TypeError): res = NotInit if res is not NotInit and type(res) != bool: item_to_use = attr_value[res] # if this is the last attribute yield the obj if index + 1 == len(attrs_path): yield item_to_use else: add(queue, item_to_use, index) # otherwise add to the queue continue for item in attr_value: # add each child into the processing queue if isinstance(attr_value, dict): item_to_use = KeyValuePair(item, attr_value[item]) else: item_to_use = item # but only if its where condition is satisfied if where is not None: namespace_copy = dict(namespace) namespace_copy.update({'self': item_to_use}) try: if not where(namespace_copy): continue except (AttributeError, TypeError, KeyError): continue # if this is the last attribute yield the obj if index + 1 == len(attrs_path): yield item_to_use else: add(queue, item_to_use, index) # otherwise add to the queue else: # it is not iterable if where is not None: namespace_copy = dict(namespace) namespace_copy.update({'self': attr_value}) res = where(namespace_copy) if type(res) != bool: raise TypeError(f"'{type(attr_value).__name__}' object is not subscriptable") if not res: continue # if this is the last attribute yield the obj if index + 1 == len(attrs_path): yield attr_value else: add(queue, attr_value, index) # otherwise add to the queue # else try in the parent namespace # return OrderedSet(select(namespace, attrs)) return list(select(namespace, attrs)) object.__setattr__(query, '__objquery__', True) return query def quantified_value(mode, name, s, satisfies): """ Processes the quantified expressions (some x in <> satisfie...) returns the where function. """ def where(namespace): nobjs = s(namespace) # runs the first part of the query (eg. the expression) if not nobjs: return False # if returns and empty set then return false if mode == 'every': r = True for x in nobjs: cobjs = dict(namespace) # we have to copy the objects to not squash # the upper namespace cobjs.update({name: x}) if not satisfies(cobjs): r = False return r elif mode == 'some': for x in nobjs: cobjs = dict(namespace) cobjs.update({name: x}) if satisfies(cobjs): return True return False raise Exception("mode '%s' is not 'every' or 'some'" % mode) return where def flwr_sequence(return_expr, for_expr=None, # list of (name, function_that_returns_a_collection) let_expr=None, where_expr=None, order_expr=None, flatten=False, collecting=False): """ Returns the function to calculate the results of a flwr expression """ # print order_expr if flatten: assert len(return_expr) == 1 and not isinstance(return_expr[0], tuple) assert not collecting # if collecting: # target = return_expr['as'] # reduce_function = return_expr['with'] # return_expr = return_expr['value'] def sequence(namespace): def _flatten_func(items): if not isinstance(items, (tuple, list, set)): yield items else: for item in items: if isinstance(item, (tuple, list, set)): for j in _flatten_func(item): yield j else: yield item def _build_yield(_namespace): def _build_return(obj): try: if len(obj) == 1 and not isinstance(obj[0], tuple): return obj[0](_namespace) elif isinstance(obj[0], tuple): # it has named return values return dict((name, f(_namespace)) for name, f in obj) else: # multiple positional return values return tuple(f(_namespace) for f in obj) except Exception as ex: return ex if not collecting: return _build_return(return_expr) # return a list of collecting information return [ { 'value': _build_return(collector_def['value']), 'as': collector_def['as'](_namespace), 'with': collector_def['with'](_namespace) } for collector_def in return_expr ] def compute_sequence(_namespace): # Tim Henderson # take the cartesian product of the for expression # note you cannot do this: # for x in , y in # :sadface: some day I will fix this. # however I will only do that when I implement and optimizer # for PyQuery otherwise it just isn't worth it. if for_expr is not None: obs = ([(name, obj) for obj in get_collection(_namespace)] for name, get_collection in for_expr) else: # Tim Henderson # The goal is to get the for loop to run once. this syntax does # it. We may not have a for_expr but we want everything else # to execute normally. obs = [[None]] for items in product(*obs): namespace_copy = dict(_namespace) if for_expr is not None: for name, item in items: namespace_copy.update({name: item}) if let_expr: for name, let in let_expr: namespace_copy.update({name: let(namespace_copy)}) # calculate the let expr if where_expr and not where_expr(namespace_copy): continue # skip if the where fails if not flatten: yield _build_yield(namespace_copy) # single unamed return else: for item in _flatten_func(return_expr[0](namespace_copy)): yield item if collecting: rets = tuple(dict() for _ in range(len(return_expr))) for collectors in compute_sequence(namespace): for i, collector in enumerate(collectors): _as = collector['as'] _rf = collector['with'] _value = collector['value'] rets[i][_as] = _rf(rets[i].get(_as, None), _value) return rets[0] if len(rets) == 1 else rets else: r = list(compute_sequence(namespace)) if not r: return tuple(r) elif order_expr: attr, direction = order_expr if isinstance(attr, str): if not isinstance(return_expr[0], tuple): raise SyntaxError("Using a name in the order by clause when not using named return values.") else: if isinstance(return_expr[0], tuple): raise SyntaxError( "Using a number in the order by clause when not using positional return values.") if len(return_expr) == 1 and not isinstance(return_expr[0], tuple): keyfunc = lambda x: x else: keyfunc = lambda x: x[attr] reverse_order = direction == 'DESC' r = sorted(r, key=keyfunc, reverse=reverse_order) return tuple(r) object.__setattr__(sequence, '__objquery__', True) return sequence def function_definition(params, query): def flwr_function(namespace): def function(*args): if len(args) != len(params): raise RuntimeError("Got wrong number of params expected %d got %d" % (len(params), len(args))) namespace_copy = dict(namespace) namespace_copy.update(list(zip(params, args))) return query(namespace_copy) return function return flwr_function def if_expression(condition, then, otherwise): def if_expr(namespace): if condition(namespace): return then(namespace) else: return otherwise(namespace) return if_expr