Source code for parallel_statistics.sparse

import numpy as np

[docs]class SparseArray:
    """
    A sparse 1D array class.

    This not complete, and is mainly designed to support the
    use case in this package.  The scipy sparse classes are
    all focused on matrix applications and did not quite fit

    These operations are defined:
     * setting and getting indices

     * Adding another by another ``SparseArray``

     * Subtracting to another by another ``SparseArray``

     * Multiplying by another ``SparseArray`` with the same indices

     * Dividing by another ``SparseArray`` with the same indices

     * Raising the array to a scalar power

     * Comparing to another ``SparseArray`` with the same indices

    Examples
    --------
    >>> s = SparseArray()
    >>> s[1000] = 1.0
    >>> s[2000] = 2.0
    >>> t = s + s


    Attributes
    ----------
    d : dict
        The dictionary of set indices (keys) and values

    """
    def __init__(self, size=None, dtype=np.float64):
        """Create a sparse array.

        Parameters
        ----------
        size: int, optional
            The maximum size of the array.  
            Used only on conversion to dense array, and when checking inputs.
            It can be left as None to have no maximum size, in which case dense
            array output will just use whatever the maximum set index was.
        dtype: numpy data-type, optional
        
        """
        self.d={}
        self.size=size
        self.dtype=dtype

[docs]    def count_nonzero(self):
        """The number of non-zero array elements

        Returns
        ----------
        int
        """
        return len(self.d)

    def __setitem__(self, index, value):
        """Set a value in the array.
        """
        if isinstance(index, np.ndarray):
            if index.size == 0:
                return
            m = index.max()
            if self.size is not None and m >= self.size:
                raise IndexError(f"Index {m} too large in sparse array size {self.size}")
            if isinstance(value, np.ndarray):
                for i, v in zip(index, value):
                    self.d[i] = self.dtype(v)
            else:
                v = self.dtype(value)
                for i in index:
                    self.d[i] = v

        else:
            if self.size is not None and index>=self.size:
                raise IndexError("Index value too large")

            self.d[index] = self.dtype(value)

    def _set_direct(self, index, value):
        # Like __setitem__ but bypassing the checks
        # and type conversion for speed.
        self.d[index] = value

    def __getitem__(self, index):
        """Get a value in the array

        Parameters
        ----------
        index: int

        Returns
        -------
        value: dtype
            Type will be np.float64 by default
        """
        return self.d.get(index, 0.0)

    def __mul__(self, other):
        x = SparseArray()
        for k,v in self.d.items():
            x[k] = v * other[k]
        return x

    def __truediv__(self, other):
        x = SparseArray()
        for k,v in self.d.items():
            x[k] = v / other[k]
        return x

    def __add__(self, other):
        keys = set()
        keys.update(self.d.keys(), other.d.keys())
        x = SparseArray()
        for k in keys:
            x[k] = self[k] + other[k]
        return x

    def __iadd__(self, other):
        for k,v in other.d.items():
            self[k] += v
        return self

    def __sub__(self, other):
        keys = set()
        keys.update(self.d.keys(), other.d.keys())
        x = SparseArray()
        for k in keys:
            x[k] = self[k] - other[k]
        return x

    def __pow__(self, y):
        x = SparseArray()
        for k in self.d.keys():
            x[k] = self[k]**y
        return x

    def __eq__(self, val):
        if np.isscalar(val):
            inds = [k for k,v in self.d.items() if v == val]
            return np.array(inds)
        elif isinstance(val, SparseArray):
            if set(val.d.keys()) != set(self.d.keys()):
                raise ValueError("Cannot compare two sparse arrays with different hit indices")
            inds = [k for k,v in self.d.items() if v == val[k]]
            return np.array(inds)


[docs]    def to_dense(self):
        """
        Make a dense version of the array, just as a plain numpy array.
        Un-set values will be zero.

        Returns
        -------
        dense: array
            Dense version of array
        """
        if self.size is None:
            size = max(self.d.keys()) + 1
        else:
            size = self.size
        dense = np.zeros(size)
        for k,v in self.d.items():
            dense[k] = v
        return dense

[docs]    @classmethod
    def from_dense(cls, dense):
        """
        Convert a standard (dense) 1D array into a sparse array,
        elements with value zero will not be set in the new array.

        Parameters
        ----------
        dense: array
            1D numpy array to convert to sparse form

        Returns
        -------
        sparse: SparseArray
            
            
        """        
        dense = np.atleast_1d(dense)
        if dense.ndim>1:
            raise ValueError("Only 1D arrays can be made sparse")

        sparse = cls(size=dense.size, dtype=dense.dtype)

        for k,v in enumerate(dense):
            # bypasses the checks in __setitem__
            if v!=0:
                sparse._set_direct(k,v)
        return sparse

[docs]    def to_arrays(self):
        """
        Return the indices (keys) and values of elements that have been set.

        Returns
        -------
        indices: array
            indices of elements that have been set.
        values: array
            values of elements that have been set.
        """
        indices = np.fromiter(self.d.keys(), dtype=np.int64)
        values = np.fromiter(self.d.values(), dtype=self.dtype)
        order = indices.argsort()
        indices = indices[order]
        values = values[order]
        return indices, values