Source code for parallel_statistics.sparse

import numpy as np

[docs]class SparseArray: """ A sparse 1D array class. This not complete, and is mainly designed to support the use case in this package. The scipy sparse classes are all focused on matrix applications and did not quite fit These operations are defined: * setting and getting indices * Adding another by another ``SparseArray`` * Subtracting to another by another ``SparseArray`` * Multiplying by another ``SparseArray`` with the same indices * Dividing by another ``SparseArray`` with the same indices * Raising the array to a scalar power * Comparing to another ``SparseArray`` with the same indices Examples -------- >>> s = SparseArray() >>> s[1000] = 1.0 >>> s[2000] = 2.0 >>> t = s + s Attributes ---------- d : dict The dictionary of set indices (keys) and values """ def __init__(self, size=None, dtype=np.float64): """Create a sparse array. Parameters ---------- size: int, optional The maximum size of the array. Used only on conversion to dense array, and when checking inputs. It can be left as None to have no maximum size, in which case dense array output will just use whatever the maximum set index was. dtype: numpy data-type, optional """ self.d={} self.size=size self.dtype=dtype
[docs] def count_nonzero(self): """The number of non-zero array elements Returns ---------- int """ return len(self.d)
def __setitem__(self, index, value): """Set a value in the array. """ if isinstance(index, np.ndarray): if index.size == 0: return m = index.max() if self.size is not None and m >= self.size: raise IndexError(f"Index {m} too large in sparse array size {self.size}") if isinstance(value, np.ndarray): for i, v in zip(index, value): self.d[i] = self.dtype(v) else: v = self.dtype(value) for i in index: self.d[i] = v else: if self.size is not None and index>=self.size: raise IndexError("Index value too large") self.d[index] = self.dtype(value) def _set_direct(self, index, value): # Like __setitem__ but bypassing the checks # and type conversion for speed. self.d[index] = value def __getitem__(self, index): """Get a value in the array Parameters ---------- index: int Returns ------- value: dtype Type will be np.float64 by default """ return self.d.get(index, 0.0) def __mul__(self, other): x = SparseArray() for k,v in self.d.items(): x[k] = v * other[k] return x def __truediv__(self, other): x = SparseArray() for k,v in self.d.items(): x[k] = v / other[k] return x def __add__(self, other): keys = set() keys.update(self.d.keys(), other.d.keys()) x = SparseArray() for k in keys: x[k] = self[k] + other[k] return x def __iadd__(self, other): for k,v in other.d.items(): self[k] += v return self def __sub__(self, other): keys = set() keys.update(self.d.keys(), other.d.keys()) x = SparseArray() for k in keys: x[k] = self[k] - other[k] return x def __pow__(self, y): x = SparseArray() for k in self.d.keys(): x[k] = self[k]**y return x def __eq__(self, val): if np.isscalar(val): inds = [k for k,v in self.d.items() if v == val] return np.array(inds) elif isinstance(val, SparseArray): if set(val.d.keys()) != set(self.d.keys()): raise ValueError("Cannot compare two sparse arrays with different hit indices") inds = [k for k,v in self.d.items() if v == val[k]] return np.array(inds)
[docs] def to_dense(self): """ Make a dense version of the array, just as a plain numpy array. Un-set values will be zero. Returns ------- dense: array Dense version of array """ if self.size is None: size = max(self.d.keys()) + 1 else: size = self.size dense = np.zeros(size) for k,v in self.d.items(): dense[k] = v return dense
[docs] @classmethod def from_dense(cls, dense): """ Convert a standard (dense) 1D array into a sparse array, elements with value zero will not be set in the new array. Parameters ---------- dense: array 1D numpy array to convert to sparse form Returns ------- sparse: SparseArray """ dense = np.atleast_1d(dense) if dense.ndim>1: raise ValueError("Only 1D arrays can be made sparse") sparse = cls(size=dense.size, dtype=dense.dtype) for k,v in enumerate(dense): # bypasses the checks in __setitem__ if v!=0: sparse._set_direct(k,v) return sparse
[docs] def to_arrays(self): """ Return the indices (keys) and values of elements that have been set. Returns ------- indices: array indices of elements that have been set. values: array values of elements that have been set. """ indices = np.fromiter(self.d.keys(), dtype=np.int64) values = np.fromiter(self.d.values(), dtype=self.dtype) order = indices.argsort() indices = indices[order] values = values[order] return indices, values