KiTE.diffusion_maps

Utilities to transform coordinates and distances into a diffusion space.

  1"""
  2Utilities to transform coordinates and distances into a diffusion space.
  3"""
  4
  5import numpy as np
  6from sklearn.metrics import pairwise_distances
  7from sklearn.metrics.pairwise import euclidean_distances
  8from scipy.linalg import eigh
  9
 10
 11def calculate_kernel_matrix(X, epsilon):
 12    """
 13    Calculate Kernel Matrix ... an indication of local geometry
 14    NOTE: K is symmetric (k(x,y)=k(y,x)) and positivity preserving (k(x,y) >= 0 forall x,y)
 15
 16    Parameters
 17    ----------
 18    X : numpy-array
 19        Input data
 20
 21    epsilon : float
 22        Metric for kernel width
 23
 24    Returns
 25    -------
 26    numpy-array
 27        Kernel Matrix
 28    """
 29    distance_sq = euclidean_distances(X, X, squared=True)
 30    K = np.exp(-distance_sq / (2.0 * epsilon))
 31
 32    return K
 33
 34
 35def get_connectivity_matrix(K):
 36    """
 37    Calculate connectivity matrix (as normalization of Kernel Matrix Rows).
 38    Each value in connectivity matrix is probability of stepping to another cell in 1 timestep!
 39    Multiply connectivity matrices to see how probabilities change over 2 timesteps
 40
 41    NOTES:
 42        - p is positivity preserving (p(x,y) >= 0 forall x,y) & sum of P in each row = 1
 43
 44
 45    P_i = collection of all connectivities leading to point xi
 46        = Diffusion Space of X
 47
 48    Parameters
 49    ----------
 50    K : numpy-array
 51        kernel matrix
 52
 53    Returns
 54    -------
 55    numpy-array
 56        Full Connectivity Matrix
 57    """
 58
 59    # 1. d_row = sum across K's 1st dimension
 60    dx = K.sum(axis=0)
 61    assert 0 not in dx
 62
 63    # 2. p_ij = k_ij / d_row
 64    p = np.multiply(1 / dx, K)
 65    return p
 66
 67
 68def transform_into_diffusion_space(K=None, num_timesteps=1, num_eigenvectors=10):
 69    """
 70    Given Kernel, calculates connectivity matrix (as normalization of Kernel Matrix Rows).
 71    Performs Eigendecomposition to transform kernel coordinates into a diffusion space
 72
 73    Parameters
 74    ----------
 75    K : numpy-array
 76        kernel matrix
 77    num_timesteps : int
 78        Number of timesteps for diffusion calculation
 79    num_eigenvectors : int
 80        Number of eigenvectors (used in descending order of magnitude) used to calculate diffusion coordinates
 81
 82    Returns
 83    -------
 84    numpy-array
 85        Diffusion Coordinates/Map
 86    """
 87    p = get_connectivity_matrix(K)
 88
 89    # Eigendecomposition:
 90    eigenvalues, eigenvectors = eigh(p)
 91
 92    # Build Diffy Map:
 93    n = len(eigenvalues)
 94    assert len(eigenvalues) == len(eigenvectors)
 95    diffy_map = []
 96    for i in range(num_eigenvectors):
 97        indx_from_back = n - i - 1
 98        val = (eigenvalues[indx_from_back]) ** num_timesteps
 99        vec = eigenvectors[indx_from_back]
100        diffy_map.append(val * vec)
101
102    return diffy_map
103
104
105def calculate_diffusion_distance_matrix(diffy_map):
106    """
107    New Distance based on pairwise distance between 2 points' connectivity
108
109    D^2 = sum_u [ (P^t)_iu - (P^t)_ju ]^2 ... D = dist(p_iu, p_ij)
110
111    D(xi, xj) ^ 2 = (Pi - Pj)^2
112
113    Parameters
114    ----------
115    diffy_map : numpy-array
116        Diffusion Coordinates/Map
117
118    Returns
119    -------
120    numpy-array
121        Diffusion Distance Matrix
122    """
123    diffy_distance = pairwise_distances(diffy_map, metric="euclidean")
124    return diffy_distance
def calculate_kernel_matrix(X, epsilon):
12def calculate_kernel_matrix(X, epsilon):
13    """
14    Calculate Kernel Matrix ... an indication of local geometry
15    NOTE: K is symmetric (k(x,y)=k(y,x)) and positivity preserving (k(x,y) >= 0 forall x,y)
16
17    Parameters
18    ----------
19    X : numpy-array
20        Input data
21
22    epsilon : float
23        Metric for kernel width
24
25    Returns
26    -------
27    numpy-array
28        Kernel Matrix
29    """
30    distance_sq = euclidean_distances(X, X, squared=True)
31    K = np.exp(-distance_sq / (2.0 * epsilon))
32
33    return K

Calculate Kernel Matrix ... an indication of local geometry NOTE: K is symmetric (k(x,y)=k(y,x)) and positivity preserving (k(x,y) >= 0 forall x,y)

Parameters
  • X (numpy-array): Input data
  • epsilon (float): Metric for kernel width
Returns
  • numpy-array: Kernel Matrix
def get_connectivity_matrix(K):
36def get_connectivity_matrix(K):
37    """
38    Calculate connectivity matrix (as normalization of Kernel Matrix Rows).
39    Each value in connectivity matrix is probability of stepping to another cell in 1 timestep!
40    Multiply connectivity matrices to see how probabilities change over 2 timesteps
41
42    NOTES:
43        - p is positivity preserving (p(x,y) >= 0 forall x,y) & sum of P in each row = 1
44
45
46    P_i = collection of all connectivities leading to point xi
47        = Diffusion Space of X
48
49    Parameters
50    ----------
51    K : numpy-array
52        kernel matrix
53
54    Returns
55    -------
56    numpy-array
57        Full Connectivity Matrix
58    """
59
60    # 1. d_row = sum across K's 1st dimension
61    dx = K.sum(axis=0)
62    assert 0 not in dx
63
64    # 2. p_ij = k_ij / d_row
65    p = np.multiply(1 / dx, K)
66    return p

Calculate connectivity matrix (as normalization of Kernel Matrix Rows). Each value in connectivity matrix is probability of stepping to another cell in 1 timestep! Multiply connectivity matrices to see how probabilities change over 2 timesteps

NOTES: - p is positivity preserving (p(x,y) >= 0 forall x,y) & sum of P in each row = 1

P_i = collection of all connectivities leading to point xi = Diffusion Space of X

Parameters
  • K (numpy-array): kernel matrix
Returns
  • numpy-array: Full Connectivity Matrix
def transform_into_diffusion_space(K=None, num_timesteps=1, num_eigenvectors=10):
 69def transform_into_diffusion_space(K=None, num_timesteps=1, num_eigenvectors=10):
 70    """
 71    Given Kernel, calculates connectivity matrix (as normalization of Kernel Matrix Rows).
 72    Performs Eigendecomposition to transform kernel coordinates into a diffusion space
 73
 74    Parameters
 75    ----------
 76    K : numpy-array
 77        kernel matrix
 78    num_timesteps : int
 79        Number of timesteps for diffusion calculation
 80    num_eigenvectors : int
 81        Number of eigenvectors (used in descending order of magnitude) used to calculate diffusion coordinates
 82
 83    Returns
 84    -------
 85    numpy-array
 86        Diffusion Coordinates/Map
 87    """
 88    p = get_connectivity_matrix(K)
 89
 90    # Eigendecomposition:
 91    eigenvalues, eigenvectors = eigh(p)
 92
 93    # Build Diffy Map:
 94    n = len(eigenvalues)
 95    assert len(eigenvalues) == len(eigenvectors)
 96    diffy_map = []
 97    for i in range(num_eigenvectors):
 98        indx_from_back = n - i - 1
 99        val = (eigenvalues[indx_from_back]) ** num_timesteps
100        vec = eigenvectors[indx_from_back]
101        diffy_map.append(val * vec)
102
103    return diffy_map

Given Kernel, calculates connectivity matrix (as normalization of Kernel Matrix Rows). Performs Eigendecomposition to transform kernel coordinates into a diffusion space

Parameters
  • K (numpy-array): kernel matrix
  • num_timesteps (int): Number of timesteps for diffusion calculation
  • num_eigenvectors (int): Number of eigenvectors (used in descending order of magnitude) used to calculate diffusion coordinates
Returns
  • numpy-array: Diffusion Coordinates/Map
def calculate_diffusion_distance_matrix(diffy_map):
106def calculate_diffusion_distance_matrix(diffy_map):
107    """
108    New Distance based on pairwise distance between 2 points' connectivity
109
110    D^2 = sum_u [ (P^t)_iu - (P^t)_ju ]^2 ... D = dist(p_iu, p_ij)
111
112    D(xi, xj) ^ 2 = (Pi - Pj)^2
113
114    Parameters
115    ----------
116    diffy_map : numpy-array
117        Diffusion Coordinates/Map
118
119    Returns
120    -------
121    numpy-array
122        Diffusion Distance Matrix
123    """
124    diffy_distance = pairwise_distances(diffy_map, metric="euclidean")
125    return diffy_distance

New Distance based on pairwise distance between 2 points' connectivity

D^2 = sum_u [ (P^t)_iu - (P^t)_ju ]^2 ... D = dist(p_iu, p_ij)

D(xi, xj) ^ 2 = (Pi - Pj)^2

Parameters
  • diffy_map (numpy-array): Diffusion Coordinates/Map
Returns
  • numpy-array: Diffusion Distance Matrix