KiTE.metrics
Metrics utilities to help test if a model is locally biased.
1""" 2Metrics utilities to help test if a model is locally biased. 3""" 4 5from joblib import Parallel, delayed 6from sklearn.gaussian_process.kernels import pairwise_kernels 7import logging 8import numpy as np 9from KiTE.validation import check_attributes 10 11 12def _calculate_err_vector(Y, p): 13 return Y - p 14 15 16def ELCE2_estimator(K_xx, err): 17 """ 18 The estimator ELCE^2 19 20 Parameters 21 ---------- 22 K_xx : numpy-array 23 evaluated kernel function 24 25 err : numpy-array 26 one-dimensional error vector 27 28 Returns 29 ------- 30 numpy-array 31 estimated ELCE^2 32 """ 33 K = (err.flatten() * K_xx.T).T * err.flatten() 34 return K.sum() - K.diagonal().sum() 35 36 37def ELCE2_normalization(K): 38 """ 39 The normalization of estimator ELCE^2 40 41 Parameters 42 ---------- 43 K : numpy-array 44 evaluated kernel function 45 46 Returns 47 ------- 48 float 49 estimated normalization of ELCE^2 50 """ 51 52 size = K.shape[0] 53 54 return (size - 1.0) * K.sum() / size 55 56 57def ELCE2_null_estimator(err, K, rng): 58 """ 59 Compute the ELCE^2_u for one bootstrap realization 60 61 Parameters 62 ---------- 63 err : numpy-array 64 one-dimensional error vector 65 66 K : numpy-array 67 evaluated kernel function 68 69 rng : type(np.random.RandomState()) 70 numpy random function 71 72 Returns 73 ------- 74 float 75 unbiased estimate of ELCE^2_u 76 77 """ 78 79 # randomize error vector so error = sample .. not looking at local neighbors 80 # checking local calibration .. 81 # randomizaiton -- quanitifies noise in estimator 82 idx = rng.permutation(len(err)) 83 84 return ELCE2_estimator(K, err[idx]) 85 86 87def compute_null_distribution( 88 p_err, K, iterations=1000, n_jobs=1, verbose=False, random_state=None 89): 90 """ 91 Compute the null-distribution of test statistics via a bootstrap procedure 92 93 Parameters 94 ---------- 95 p_err : numpy-array 96 one-dimensional probability error vector 97 98 K : numpy-array 99 evaluated kernel function 100 101 iterations : int 102 controls the number of bootstrap realizations 103 104 verbose : bool 105 controls the verbosity of the model's output 106 107 random_state : type(np.random.RandomState()) or None 108 defines the initial random state 109 110 Returns 111 ------- 112 numpy-array 113 boostrap samples of the test null distribution 114 """ 115 rng = ( 116 random_state 117 if isinstance(random_state, type(np.random.RandomState())) 118 else np.random.RandomState(random_state) 119 ) 120 iterations_list = range(iterations) 121 122 # compute the null distribution 123 # for 1 cpu run the normal code, for more cpu use the Parallel library. This maximize the speed. 124 test_null = ( 125 [ELCE2_null_estimator(p_err, K, rng) for _ in iterations_list] 126 if n_jobs == 1 127 else Parallel(n_jobs=n_jobs, prefer="threads")( 128 delayed(ELCE2_null_estimator)(p_err, K, rng) for _ in iterations_list 129 ) 130 ) 131 132 return np.array(test_null) 133 134 135def ELCE2( 136 X, 137 Y, 138 p, 139 kernel_function="rbf", 140 prob_kernel_width=0.1, 141 iterations=None, 142 use_diffusion_distance=False, 143 verbose=True, 144 random_state=None, 145 n_jobs=1, 146 **kwargs, 147): 148 """ 149 This function estimate ELCE^2_u employing a kernel trick. ELCE^2_u tests if a proposed posterior credible interval 150 is calibrated employing a randomly drawn calibration test. The null hypothesis is that the posteriors are 151 properly calibrated. This function perform a bootstrap algorithm to estimate the null distribution, 152 and corresponding p-value. 153 154 Parameters 155 ---------- 156 X : numpy-array 157 data, of size NxD [N is the number of data points, D is the features dimension] 158 159 Y : numpy-array 160 credible error vector, of size Nx1 [N is the number of data points] 161 162 p : numpy-array 163 probability vector, of size Nx1 [N is the number of data points] 164 165 kernel_function : string 166 defines the kernel function. For the list of implemented kernels, please consult with [sklearn](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.kernel_metrics.html#sklearn.metrics.pairwise.kernel_metrics) 167 168 prob_kernel_width : float 169 Width of the probably kernel function 170 171 iterations : int 172 controls the number of bootstrap realizations 173 174 use_diffusion_distance : bool 175 Use diffusion instead of eucliden distances 176 Transforms X into a diffusion space (see diffusion_maps.py) 177 178 verbose : bool 179 controls the verbosity of the model's output 180 181 random_state : type(np.random.RandomState()) or None 182 defines the initial random state 183 184 n_jobs : int 185 number of jobs to run in parallel 186 187 **kwargs : **kwargs 188 extra parameters, these are passed to `pairwise_kernels()` as kernel parameters 189 E.g., if `kernel_two_sample_test(..., kernel_function='rbf', gamma=0.1)` 190 191 Returns 192 ------- 193 tuple 194 - SIZE = 1 if iterations=`None` else 3 (float, numpy-array, float) 195 - first element is the test value, 196 - second element is samples from the null distribution via a bootstraps algorithm, 197 - third element is the estimated p-value. 198 """ 199 200 def create_kernel(): 201 """ 202 RBF = https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.rbf_kernel.html?highlight=gaussian+kernel 203 204 Returns 205 ------- 206 A kernel matrix K such that K_{i, j} is the kernel between the ith and jth vectors of the given matrix X, if Y is None. 207 """ 208 # Pre-compute Kernel Function (Hyperplane/Convolution) 209 K_pp_gamma = 1.0 / (prob_kernel_width**2) 210 K_pp_metric = "rbf" 211 212 # In binary class (p vs 1-p) vs miltiple classification (p1 + ...+ pn = 1) 213 # p should be nx1 for the kernal function 214 if len(p.shape) == 2: 215 K_pp_data = p 216 elif len(p.shape) == 1: # if p.shape == 1 ... turn n, --> nx1 217 K_pp_data = p[:, np.newaxis] 218 else: 219 raise ValueError( 220 f"p has invalid dimensions of {p.shape}. The length of p's shape should be 1 or 2, not {len(p.shape)}" 221 ) 222 223 K_xx = pairwise_kernels(X, X, metric=kernel_function, **kwargs) 224 K_pp = pairwise_kernels( 225 K_pp_data, K_pp_data, metric=K_pp_metric, gamma=K_pp_gamma 226 ) 227 228 # Consider both similar features && similar probabilities 229 # K_xx includes ENTIRE feature space ... may not match input as model .. 230 K = K_pp * K_xx 231 return K 232 233 check_attributes(X, Y) 234 K = create_kernel() 235 assert len(Y) == len(p) 236 assert None not in Y 237 assert None not in p 238 p_err = _calculate_err_vector(Y, p) 239 240 # Estimate the Null "Oracle" Distribution 241 test_value = ELCE2_estimator(K, p_err) 242 norm = ELCE2_normalization(K) 243 244 if verbose: 245 logging.info(f"test value = {(test_value / norm)}") 246 logging.info("Computing the null distribution.") 247 248 if iterations is None: 249 return test_value / norm 250 251 # p-value's resolution 252 resolution = 1.0 / iterations 253 254 # compute the null distribution via a bootstrap algorithm 255 test_null = compute_null_distribution( 256 p_err, 257 K, 258 iterations=iterations, 259 verbose=verbose, 260 n_jobs=n_jobs, 261 random_state=random_state, 262 ) 263 264 # Permutation -- est noise under Ho ... Ho assume no global/local miscallibrate .. should cent around 0 265 # center it at zero (to account for global mis-calibration) 266 test_null -= np.mean(test_null) 267 268 # compute the p-value, if less then the resolution set it to the resolution 269 p_value = max(resolution, resolution * (test_null > test_value).sum()) 270 271 if verbose: 272 msg = ( 273 (f"p-value < {p_value} \t (resolution : {resolution})") 274 if p_value == resolution 275 else (f"p-value ~= {p_value} \t (resolution : {resolution})") 276 ) 277 logging.info(msg) 278 279 return test_value / norm, test_null / norm, p_value
def
ELCE2_estimator(K_xx, err):
17def ELCE2_estimator(K_xx, err): 18 """ 19 The estimator ELCE^2 20 21 Parameters 22 ---------- 23 K_xx : numpy-array 24 evaluated kernel function 25 26 err : numpy-array 27 one-dimensional error vector 28 29 Returns 30 ------- 31 numpy-array 32 estimated ELCE^2 33 """ 34 K = (err.flatten() * K_xx.T).T * err.flatten() 35 return K.sum() - K.diagonal().sum()
The estimator ELCE^2
Parameters
- K_xx (numpy-array): evaluated kernel function
- err (numpy-array): one-dimensional error vector
Returns
- numpy-array: estimated ELCE^2
def
ELCE2_normalization(K):
38def ELCE2_normalization(K): 39 """ 40 The normalization of estimator ELCE^2 41 42 Parameters 43 ---------- 44 K : numpy-array 45 evaluated kernel function 46 47 Returns 48 ------- 49 float 50 estimated normalization of ELCE^2 51 """ 52 53 size = K.shape[0] 54 55 return (size - 1.0) * K.sum() / size
The normalization of estimator ELCE^2
Parameters
- K (numpy-array): evaluated kernel function
Returns
- float: estimated normalization of ELCE^2
def
ELCE2_null_estimator(err, K, rng):
58def ELCE2_null_estimator(err, K, rng): 59 """ 60 Compute the ELCE^2_u for one bootstrap realization 61 62 Parameters 63 ---------- 64 err : numpy-array 65 one-dimensional error vector 66 67 K : numpy-array 68 evaluated kernel function 69 70 rng : type(np.random.RandomState()) 71 numpy random function 72 73 Returns 74 ------- 75 float 76 unbiased estimate of ELCE^2_u 77 78 """ 79 80 # randomize error vector so error = sample .. not looking at local neighbors 81 # checking local calibration .. 82 # randomizaiton -- quanitifies noise in estimator 83 idx = rng.permutation(len(err)) 84 85 return ELCE2_estimator(K, err[idx])
Compute the ELCE^2_u for one bootstrap realization
Parameters
- err (numpy-array): one-dimensional error vector
- K (numpy-array): evaluated kernel function
- rng (type(np.random.RandomState())): numpy random function
Returns
- float: unbiased estimate of ELCE^2_u
def
compute_null_distribution( p_err, K, iterations=1000, n_jobs=1, verbose=False, random_state=None):
88def compute_null_distribution( 89 p_err, K, iterations=1000, n_jobs=1, verbose=False, random_state=None 90): 91 """ 92 Compute the null-distribution of test statistics via a bootstrap procedure 93 94 Parameters 95 ---------- 96 p_err : numpy-array 97 one-dimensional probability error vector 98 99 K : numpy-array 100 evaluated kernel function 101 102 iterations : int 103 controls the number of bootstrap realizations 104 105 verbose : bool 106 controls the verbosity of the model's output 107 108 random_state : type(np.random.RandomState()) or None 109 defines the initial random state 110 111 Returns 112 ------- 113 numpy-array 114 boostrap samples of the test null distribution 115 """ 116 rng = ( 117 random_state 118 if isinstance(random_state, type(np.random.RandomState())) 119 else np.random.RandomState(random_state) 120 ) 121 iterations_list = range(iterations) 122 123 # compute the null distribution 124 # for 1 cpu run the normal code, for more cpu use the Parallel library. This maximize the speed. 125 test_null = ( 126 [ELCE2_null_estimator(p_err, K, rng) for _ in iterations_list] 127 if n_jobs == 1 128 else Parallel(n_jobs=n_jobs, prefer="threads")( 129 delayed(ELCE2_null_estimator)(p_err, K, rng) for _ in iterations_list 130 ) 131 ) 132 133 return np.array(test_null)
Compute the null-distribution of test statistics via a bootstrap procedure
Parameters
- p_err (numpy-array): one-dimensional probability error vector
- K (numpy-array): evaluated kernel function
- iterations (int): controls the number of bootstrap realizations
- verbose (bool): controls the verbosity of the model's output
- random_state (type(np.random.RandomState()) or None): defines the initial random state
Returns
- numpy-array: boostrap samples of the test null distribution
def
ELCE2( X, Y, p, kernel_function='rbf', prob_kernel_width=0.1, iterations=None, use_diffusion_distance=False, verbose=True, random_state=None, n_jobs=1, **kwargs):
136def ELCE2( 137 X, 138 Y, 139 p, 140 kernel_function="rbf", 141 prob_kernel_width=0.1, 142 iterations=None, 143 use_diffusion_distance=False, 144 verbose=True, 145 random_state=None, 146 n_jobs=1, 147 **kwargs, 148): 149 """ 150 This function estimate ELCE^2_u employing a kernel trick. ELCE^2_u tests if a proposed posterior credible interval 151 is calibrated employing a randomly drawn calibration test. The null hypothesis is that the posteriors are 152 properly calibrated. This function perform a bootstrap algorithm to estimate the null distribution, 153 and corresponding p-value. 154 155 Parameters 156 ---------- 157 X : numpy-array 158 data, of size NxD [N is the number of data points, D is the features dimension] 159 160 Y : numpy-array 161 credible error vector, of size Nx1 [N is the number of data points] 162 163 p : numpy-array 164 probability vector, of size Nx1 [N is the number of data points] 165 166 kernel_function : string 167 defines the kernel function. For the list of implemented kernels, please consult with [sklearn](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.kernel_metrics.html#sklearn.metrics.pairwise.kernel_metrics) 168 169 prob_kernel_width : float 170 Width of the probably kernel function 171 172 iterations : int 173 controls the number of bootstrap realizations 174 175 use_diffusion_distance : bool 176 Use diffusion instead of eucliden distances 177 Transforms X into a diffusion space (see diffusion_maps.py) 178 179 verbose : bool 180 controls the verbosity of the model's output 181 182 random_state : type(np.random.RandomState()) or None 183 defines the initial random state 184 185 n_jobs : int 186 number of jobs to run in parallel 187 188 **kwargs : **kwargs 189 extra parameters, these are passed to `pairwise_kernels()` as kernel parameters 190 E.g., if `kernel_two_sample_test(..., kernel_function='rbf', gamma=0.1)` 191 192 Returns 193 ------- 194 tuple 195 - SIZE = 1 if iterations=`None` else 3 (float, numpy-array, float) 196 - first element is the test value, 197 - second element is samples from the null distribution via a bootstraps algorithm, 198 - third element is the estimated p-value. 199 """ 200 201 def create_kernel(): 202 """ 203 RBF = https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.rbf_kernel.html?highlight=gaussian+kernel 204 205 Returns 206 ------- 207 A kernel matrix K such that K_{i, j} is the kernel between the ith and jth vectors of the given matrix X, if Y is None. 208 """ 209 # Pre-compute Kernel Function (Hyperplane/Convolution) 210 K_pp_gamma = 1.0 / (prob_kernel_width**2) 211 K_pp_metric = "rbf" 212 213 # In binary class (p vs 1-p) vs miltiple classification (p1 + ...+ pn = 1) 214 # p should be nx1 for the kernal function 215 if len(p.shape) == 2: 216 K_pp_data = p 217 elif len(p.shape) == 1: # if p.shape == 1 ... turn n, --> nx1 218 K_pp_data = p[:, np.newaxis] 219 else: 220 raise ValueError( 221 f"p has invalid dimensions of {p.shape}. The length of p's shape should be 1 or 2, not {len(p.shape)}" 222 ) 223 224 K_xx = pairwise_kernels(X, X, metric=kernel_function, **kwargs) 225 K_pp = pairwise_kernels( 226 K_pp_data, K_pp_data, metric=K_pp_metric, gamma=K_pp_gamma 227 ) 228 229 # Consider both similar features && similar probabilities 230 # K_xx includes ENTIRE feature space ... may not match input as model .. 231 K = K_pp * K_xx 232 return K 233 234 check_attributes(X, Y) 235 K = create_kernel() 236 assert len(Y) == len(p) 237 assert None not in Y 238 assert None not in p 239 p_err = _calculate_err_vector(Y, p) 240 241 # Estimate the Null "Oracle" Distribution 242 test_value = ELCE2_estimator(K, p_err) 243 norm = ELCE2_normalization(K) 244 245 if verbose: 246 logging.info(f"test value = {(test_value / norm)}") 247 logging.info("Computing the null distribution.") 248 249 if iterations is None: 250 return test_value / norm 251 252 # p-value's resolution 253 resolution = 1.0 / iterations 254 255 # compute the null distribution via a bootstrap algorithm 256 test_null = compute_null_distribution( 257 p_err, 258 K, 259 iterations=iterations, 260 verbose=verbose, 261 n_jobs=n_jobs, 262 random_state=random_state, 263 ) 264 265 # Permutation -- est noise under Ho ... Ho assume no global/local miscallibrate .. should cent around 0 266 # center it at zero (to account for global mis-calibration) 267 test_null -= np.mean(test_null) 268 269 # compute the p-value, if less then the resolution set it to the resolution 270 p_value = max(resolution, resolution * (test_null > test_value).sum()) 271 272 if verbose: 273 msg = ( 274 (f"p-value < {p_value} \t (resolution : {resolution})") 275 if p_value == resolution 276 else (f"p-value ~= {p_value} \t (resolution : {resolution})") 277 ) 278 logging.info(msg) 279 280 return test_value / norm, test_null / norm, p_value
This function estimate ELCE^2_u employing a kernel trick. ELCE^2_u tests if a proposed posterior credible interval is calibrated employing a randomly drawn calibration test. The null hypothesis is that the posteriors are properly calibrated. This function perform a bootstrap algorithm to estimate the null distribution, and corresponding p-value.
Parameters
- X (numpy-array): data, of size NxD [N is the number of data points, D is the features dimension]
- Y (numpy-array): credible error vector, of size Nx1 [N is the number of data points]
- p (numpy-array): probability vector, of size Nx1 [N is the number of data points]
- kernel_function (string): defines the kernel function. For the list of implemented kernels, please consult with sklearn
- prob_kernel_width (float): Width of the probably kernel function
- iterations (int): controls the number of bootstrap realizations
- use_diffusion_distance (bool): Use diffusion instead of eucliden distances Transforms X into a diffusion space (see diffusion_maps.py)
- verbose (bool): controls the verbosity of the model's output
- random_state (type(np.random.RandomState()) or None): defines the initial random state
- n_jobs (int): number of jobs to run in parallel
- *kwargs (*kwargs):
extra parameters, these are passed to
pairwise_kernels()
as kernel parameters E.g., ifkernel_two_sample_test(..., kernel_function='rbf', gamma=0.1)
Returns
- tuple: - SIZE = 1 if iterations=
None
else 3 (float, numpy-array, float)- first element is the test value,
- second element is samples from the null distribution via a bootstraps algorithm,
- third element is the estimated p-value.