Source code for pygsp2.utils

# -*- coding: utf-8 -*-
r"""
The :mod:`pygsp2.utils` module implements some utility functions used throughout
the package.
"""
import sys
import logging
import functools
import io
import logging
import pkgutil
import sys

import numpy as np
import scipy.io
from scipy import sparse



[docs]
def build_logger(name):
    logger = logging.getLogger(name)

    if not logger.handlers:
        formatter = logging.Formatter('%(asctime)s:[%(levelname)s](%(name)s.%(funcName)s): %(message)s')

        steam_handler = logging.StreamHandler()
        steam_handler.setLevel(logging.DEBUG)
        steam_handler.setFormatter(formatter)

        logger.setLevel(logging.DEBUG)
        logger.addHandler(steam_handler)

    return logger



logger = build_logger(__name__)



[docs]
def filterbank_handler(func):

    # Preserve documentation of func.
    @functools.wraps(func)
    def inner(f, *args, **kwargs):

        if 'i' in kwargs:
            return func(f, *args, **kwargs)

        elif f.Nf <= 1:
            return func(f, *args, **kwargs)

        else:
            output = []
            for i in range(f.Nf):
                output.append(func(f, *args, i=i, **kwargs))
            return output

    return inner




[docs]
def loadmat(path):
    r"""
    Load a matlab data file.

    Parameters
    ----------
    path : string
        Path to the mat file from the data folder, without the .mat extension.

    Returns
    -------
    data : dict
        dictionary with variable names as keys, and loaded matrices as
        values.

    Examples
    --------
    >>> from pygsp2 import utils
    >>> data = utils.loadmat('pointclouds/bunny')
    >>> data['bunny'].shape
    (2503, 3)

    """
    data = pkgutil.get_data('pygsp2', 'data/' + path + '.mat')
    data = io.BytesIO(data)
    return scipy.io.loadmat(data)




[docs]
def distanz(x, y=None):
    r"""
    Calculate the distance between two colon vectors.

    Parameters
    ----------
    x : ndarray
        First colon vector
    y : ndarray
        Second colon vector

    Returns
    -------
    d : ndarray
        Distance between x and y

    Examples
    --------
    >>> from pygsp2 import utils
    >>> x = np.arange(3)
    >>> utils.distanz(x, x)
    array([[0., 1., 2.],
           [1., 0., 1.],
           [2., 1., 0.]])

    """
    try:
        x.shape[1]
    except IndexError:
        x = x.reshape(1, x.shape[0])

    if y is None:
        y = x

    else:
        try:
            y.shape[1]
        except IndexError:
            y = y.reshape(1, y.shape[0])

    rx, cx = x.shape
    ry, cy = y.shape

    # Size verification
    if rx != ry:
        raise ValueError('The sizes of x and y do not fit')

    xx = (x * x).sum(axis=0)
    yy = (y * y).sum(axis=0)
    xy = np.dot(x.T, y)

    d = abs(np.kron(np.ones((cy, 1)), xx).T + np.kron(np.ones((cx, 1)), yy) - 2 * xy)

    return np.sqrt(d)




[docs]
def resistance_distance(G):
    r"""
    Compute the resistance distances of a graph.

    Parameters
    ----------
    G : Graph or sparse matrix
        Graph structure or Laplacian matrix (L)

    Returns
    -------
    rd : sparse matrix
        distance matrix

    References
    ----------
    :cite:`klein1993resistance`
    """
    if sparse.issparse(G):
        L = G.tocsc()

    else:
        if G.lap_type != 'combinatorial':
            raise ValueError('Need a combinatorial Laplacian.')
        L = G.L.tocsc()

    try:
        pseudo = sparse.linalg.inv(L)
    except RuntimeError:
        pseudo = sparse.lil_matrix(np.linalg.pinv(L.toarray()))

    N = np.shape(L)[0]
    d = sparse.csc_matrix(pseudo.diagonal())
    rd = sparse.kron(d, sparse.csc_matrix(np.ones((N, 1)))).T \
        + sparse.kron(d, sparse.csc_matrix(np.ones((N, 1)))) \
        - pseudo - pseudo.T

    return rd




[docs]
def symmetrize(W, method='average'):
    r"""
    Symmetrize a square matrix.

    Parameters
    ----------
    W : array_like
        Square matrix to be symmetrized
    method : string
        * 'average' : symmetrize by averaging with the transpose. Most useful
          when transforming a directed graph to an undirected one.
        * 'maximum' : symmetrize by taking the maximum with the transpose.
          Similar to 'fill' except that ambiguous entries are resolved by
          taking the largest value.
        * 'fill' : symmetrize by filling in the zeros in both the upper and
          lower triangular parts. Ambiguous entries are resolved by averaging
          the values.
        * 'tril' : symmetrize by considering the lower triangular part only.
        * 'triu' : symmetrize by considering the upper triangular part only.

    Notes
    -----
    You can have the sum by multiplying the average by two. It is however not a
    good candidate for this function as it modifies an already symmetric
    matrix.

    Examples
    --------
    >>> from pygsp2 import utils
    >>> W = np.array([[0, 3, 0], [3, 1, 6], [4, 2, 3]], dtype=float)
    >>> W
    array([[0., 3., 0.],
           [3., 1., 6.],
           [4., 2., 3.]])
    >>> utils.symmetrize(W, method='average')
    array([[0., 3., 2.],
           [3., 1., 4.],
           [2., 4., 3.]])
    >>> 2 * utils.symmetrize(W, method='average')
    array([[0., 6., 4.],
           [6., 2., 8.],
           [4., 8., 6.]])
    >>> utils.symmetrize(W, method='maximum')
    array([[0., 3., 4.],
           [3., 1., 6.],
           [4., 6., 3.]])
    >>> utils.symmetrize(W, method='fill')
    array([[0., 3., 4.],
           [3., 1., 4.],
           [4., 4., 3.]])
    >>> utils.symmetrize(W, method='tril')
    array([[0., 3., 4.],
           [3., 1., 2.],
           [4., 2., 3.]])
    >>> utils.symmetrize(W, method='triu')
    array([[0., 3., 0.],
           [3., 1., 6.],
           [0., 6., 3.]])

    """
    if W.shape[0] != W.shape[1]:
        raise ValueError('Matrix must be square.')

    if method == 'average':
        return (W + W.T) / 2

    elif method == 'maximum':
        if sparse.issparse(W):
            bigger = (W.T > W)
            return W - W.multiply(bigger) + W.T.multiply(bigger)
        else:
            return np.maximum(W, W.T)

    elif method == 'fill':
        A = (W > 0)  # Boolean type.
        if sparse.issparse(W):
            mask = (A + A.T) - A
            W = W + mask.multiply(W.T)
        else:
            # Numpy boolean subtract is deprecated.
            mask = np.logical_xor(np.logical_or(A, A.T), A)
            W = W + mask * W.T
        return symmetrize(W, method='average')  # Resolve ambiguous entries.

    elif method in ['tril', 'triu']:
        if sparse.issparse(W):
            tri = getattr(sparse, method)
        else:
            tri = getattr(np, method)
        W = tri(W)
        return symmetrize(W, method='maximum')

    else:
        raise ValueError('Unknown symmetrization method {}.'.format(method))




[docs]
def rescale_center(x):
    r"""
    Rescale and center data, e.g. embedding coordinates.

    Parameters
    ----------
    x : ndarray
        Data to be rescaled.

    Returns
    -------
    r : ndarray
        Rescaled data.

    Examples
    --------
    >>> from pygsp2 import utils
    >>> x = np.array([[1, 6], [2, 5], [3, 4]])
    >>> utils.rescale_center(x)
    array([[-1. ,  1. ],
           [-0.6,  0.6],
           [-0.2,  0.2]])

    """
    N = x.shape[1]
    y = x - np.kron(np.ones((1, N)), np.mean(x, axis=1)[:, np.newaxis])
    c = np.amax(y)
    r = y / c

    return r




[docs]
def compute_log_scales(lmin, lmax, Nscales, t1=1, t2=2):
    r"""
    Compute logarithm scales for wavelets.

    Parameters
    ----------
    lmin : float
        Smallest non-zero eigenvalue.
    lmax : float
        Largest eigenvalue, i.e. :py:attr:`pygsp2.graphs.Graph.lmax`.
    Nscales : int
        Number of scales.

    Returns
    -------
    scales : ndarray
        List of scales of length Nscales.

    Examples
    --------
    >>> from pygsp2 import utils
    >>> utils.compute_log_scales(1, 10, 3)
    array([2.       , 0.4472136, 0.1      ])

    """
    scale_min = t1 / lmax
    scale_max = t2 / lmin
    return np.exp(np.linspace(np.log(scale_max), np.log(scale_min), Nscales))




[docs]
def to_sparse(i, j, v, m, n):
    """
    Create and compressing a matrix that have many zeros
    Parameters:
        i: 1-D array representing the index 1 values
            Size n1
        j: 1-D array representing the index 2 values
            Size n1
        v: 1-D array representing the values
            Size n1
        m: integer representing x size of the matrix >= n1
        n: integer representing y size of the matrix >= n1
    Returns:
        s: 2-D array
            Matrix full of zeros excepting values v at indexes i, j.
    """
    return sparse.csr_matrix((v, (i, j)), shape=(m, n))




[docs]
def sum_squareform(n):
    """Returns sparse matrix that sums the squareform of a vector. Reference
    from the unlocbox toolbox function for matlab.

    Parameters
    ----------
    n: int, number of nodes in the spare matrix.

    Returns
    -------
    S:    matrix so that S*w = sum(W) for vector w = squareform(W)
    St:   the adjoint of S

    Reference:
    https://epfl-lts2.github.io/gspbox-html/doc/learn_graph/gsp_learn_graph_log_degrees.html

    """
    # number of columns is the length of w given size of W
    ncols = int((n - 1) * (n) / 2)

    I = np.zeros([ncols])
    J = np.zeros([ncols])

    # offset
    k = 0
    for i in np.arange(1, n):
        I[k:k + (n - i)] = np.arange(i, n)
        k = k + (n - i)

    k = 0
    for i in np.arange(1, n):
        J[k:k + (n - i)] = i - 1
        k = k + (n - i)

    i = np.array(np.hstack([np.arange(0, ncols), np.arange(0, ncols)]))
    j = np.hstack([I, J]).squeeze().T.ravel()
    s = np.ones(len(i))
    m = ncols

    St = to_sparse(i, j, s, m, n)

    S = St.T

    return (S, St)