KiTE.diffusion_maps
Utilities to transform coordinates and distances into a diffusion space.
1""" 2Utilities to transform coordinates and distances into a diffusion space. 3""" 4 5import numpy as np 6from sklearn.metrics import pairwise_distances 7from sklearn.metrics.pairwise import euclidean_distances 8from scipy.linalg import eigh 9 10 11def calculate_kernel_matrix(X, epsilon): 12 """ 13 Calculate Kernel Matrix ... an indication of local geometry 14 NOTE: K is symmetric (k(x,y)=k(y,x)) and positivity preserving (k(x,y) >= 0 forall x,y) 15 16 Parameters 17 ---------- 18 X : numpy-array 19 Input data 20 21 epsilon : float 22 Metric for kernel width 23 24 Returns 25 ------- 26 numpy-array 27 Kernel Matrix 28 """ 29 distance_sq = euclidean_distances(X, X, squared=True) 30 K = np.exp(-distance_sq / (2.0 * epsilon)) 31 32 return K 33 34 35def get_connectivity_matrix(K): 36 """ 37 Calculate connectivity matrix (as normalization of Kernel Matrix Rows). 38 Each value in connectivity matrix is probability of stepping to another cell in 1 timestep! 39 Multiply connectivity matrices to see how probabilities change over 2 timesteps 40 41 NOTES: 42 - p is positivity preserving (p(x,y) >= 0 forall x,y) & sum of P in each row = 1 43 44 45 P_i = collection of all connectivities leading to point xi 46 = Diffusion Space of X 47 48 Parameters 49 ---------- 50 K : numpy-array 51 kernel matrix 52 53 Returns 54 ------- 55 numpy-array 56 Full Connectivity Matrix 57 """ 58 59 # 1. d_row = sum across K's 1st dimension 60 dx = K.sum(axis=0) 61 assert 0 not in dx 62 63 # 2. p_ij = k_ij / d_row 64 p = np.multiply(1 / dx, K) 65 return p 66 67 68def transform_into_diffusion_space(K=None, num_timesteps=1, num_eigenvectors=10): 69 """ 70 Given Kernel, calculates connectivity matrix (as normalization of Kernel Matrix Rows). 71 Performs Eigendecomposition to transform kernel coordinates into a diffusion space 72 73 Parameters 74 ---------- 75 K : numpy-array 76 kernel matrix 77 num_timesteps : int 78 Number of timesteps for diffusion calculation 79 num_eigenvectors : int 80 Number of eigenvectors (used in descending order of magnitude) used to calculate diffusion coordinates 81 82 Returns 83 ------- 84 numpy-array 85 Diffusion Coordinates/Map 86 """ 87 p = get_connectivity_matrix(K) 88 89 # Eigendecomposition: 90 eigenvalues, eigenvectors = eigh(p) 91 92 # Build Diffy Map: 93 n = len(eigenvalues) 94 assert len(eigenvalues) == len(eigenvectors) 95 diffy_map = [] 96 for i in range(num_eigenvectors): 97 indx_from_back = n - i - 1 98 val = (eigenvalues[indx_from_back]) ** num_timesteps 99 vec = eigenvectors[indx_from_back] 100 diffy_map.append(val * vec) 101 102 return diffy_map 103 104 105def calculate_diffusion_distance_matrix(diffy_map): 106 """ 107 New Distance based on pairwise distance between 2 points' connectivity 108 109 D^2 = sum_u [ (P^t)_iu - (P^t)_ju ]^2 ... D = dist(p_iu, p_ij) 110 111 D(xi, xj) ^ 2 = (Pi - Pj)^2 112 113 Parameters 114 ---------- 115 diffy_map : numpy-array 116 Diffusion Coordinates/Map 117 118 Returns 119 ------- 120 numpy-array 121 Diffusion Distance Matrix 122 """ 123 diffy_distance = pairwise_distances(diffy_map, metric="euclidean") 124 return diffy_distance
12def calculate_kernel_matrix(X, epsilon): 13 """ 14 Calculate Kernel Matrix ... an indication of local geometry 15 NOTE: K is symmetric (k(x,y)=k(y,x)) and positivity preserving (k(x,y) >= 0 forall x,y) 16 17 Parameters 18 ---------- 19 X : numpy-array 20 Input data 21 22 epsilon : float 23 Metric for kernel width 24 25 Returns 26 ------- 27 numpy-array 28 Kernel Matrix 29 """ 30 distance_sq = euclidean_distances(X, X, squared=True) 31 K = np.exp(-distance_sq / (2.0 * epsilon)) 32 33 return K
Calculate Kernel Matrix ... an indication of local geometry NOTE: K is symmetric (k(x,y)=k(y,x)) and positivity preserving (k(x,y) >= 0 forall x,y)
Parameters
- X (numpy-array): Input data
- epsilon (float): Metric for kernel width
Returns
- numpy-array: Kernel Matrix
36def get_connectivity_matrix(K): 37 """ 38 Calculate connectivity matrix (as normalization of Kernel Matrix Rows). 39 Each value in connectivity matrix is probability of stepping to another cell in 1 timestep! 40 Multiply connectivity matrices to see how probabilities change over 2 timesteps 41 42 NOTES: 43 - p is positivity preserving (p(x,y) >= 0 forall x,y) & sum of P in each row = 1 44 45 46 P_i = collection of all connectivities leading to point xi 47 = Diffusion Space of X 48 49 Parameters 50 ---------- 51 K : numpy-array 52 kernel matrix 53 54 Returns 55 ------- 56 numpy-array 57 Full Connectivity Matrix 58 """ 59 60 # 1. d_row = sum across K's 1st dimension 61 dx = K.sum(axis=0) 62 assert 0 not in dx 63 64 # 2. p_ij = k_ij / d_row 65 p = np.multiply(1 / dx, K) 66 return p
Calculate connectivity matrix (as normalization of Kernel Matrix Rows). Each value in connectivity matrix is probability of stepping to another cell in 1 timestep! Multiply connectivity matrices to see how probabilities change over 2 timesteps
NOTES: - p is positivity preserving (p(x,y) >= 0 forall x,y) & sum of P in each row = 1
P_i = collection of all connectivities leading to point xi = Diffusion Space of X
Parameters
- K (numpy-array): kernel matrix
Returns
- numpy-array: Full Connectivity Matrix
69def transform_into_diffusion_space(K=None, num_timesteps=1, num_eigenvectors=10): 70 """ 71 Given Kernel, calculates connectivity matrix (as normalization of Kernel Matrix Rows). 72 Performs Eigendecomposition to transform kernel coordinates into a diffusion space 73 74 Parameters 75 ---------- 76 K : numpy-array 77 kernel matrix 78 num_timesteps : int 79 Number of timesteps for diffusion calculation 80 num_eigenvectors : int 81 Number of eigenvectors (used in descending order of magnitude) used to calculate diffusion coordinates 82 83 Returns 84 ------- 85 numpy-array 86 Diffusion Coordinates/Map 87 """ 88 p = get_connectivity_matrix(K) 89 90 # Eigendecomposition: 91 eigenvalues, eigenvectors = eigh(p) 92 93 # Build Diffy Map: 94 n = len(eigenvalues) 95 assert len(eigenvalues) == len(eigenvectors) 96 diffy_map = [] 97 for i in range(num_eigenvectors): 98 indx_from_back = n - i - 1 99 val = (eigenvalues[indx_from_back]) ** num_timesteps 100 vec = eigenvectors[indx_from_back] 101 diffy_map.append(val * vec) 102 103 return diffy_map
Given Kernel, calculates connectivity matrix (as normalization of Kernel Matrix Rows). Performs Eigendecomposition to transform kernel coordinates into a diffusion space
Parameters
- K (numpy-array): kernel matrix
- num_timesteps (int): Number of timesteps for diffusion calculation
- num_eigenvectors (int): Number of eigenvectors (used in descending order of magnitude) used to calculate diffusion coordinates
Returns
- numpy-array: Diffusion Coordinates/Map
106def calculate_diffusion_distance_matrix(diffy_map): 107 """ 108 New Distance based on pairwise distance between 2 points' connectivity 109 110 D^2 = sum_u [ (P^t)_iu - (P^t)_ju ]^2 ... D = dist(p_iu, p_ij) 111 112 D(xi, xj) ^ 2 = (Pi - Pj)^2 113 114 Parameters 115 ---------- 116 diffy_map : numpy-array 117 Diffusion Coordinates/Map 118 119 Returns 120 ------- 121 numpy-array 122 Diffusion Distance Matrix 123 """ 124 diffy_distance = pairwise_distances(diffy_map, metric="euclidean") 125 return diffy_distance
New Distance based on pairwise distance between 2 points' connectivity
D^2 = sum_u [ (P^t)_iu - (P^t)_ju ]^2 ... D = dist(p_iu, p_ij)
D(xi, xj) ^ 2 = (Pi - Pj)^2
Parameters
- diffy_map (numpy-array): Diffusion Coordinates/Map
Returns
- numpy-array: Diffusion Distance Matrix