KiTE.metrics

Metrics utilities to help test if a model is locally biased.

  1"""
  2Metrics utilities to help test if a model is locally biased.
  3"""
  4
  5from joblib import Parallel, delayed
  6from sklearn.gaussian_process.kernels import pairwise_kernels
  7import logging
  8import numpy as np
  9from KiTE.validation import check_attributes
 10
 11
 12def _calculate_err_vector(Y, p):
 13    return Y - p
 14
 15
 16def ELCE2_estimator(K_xx, err):
 17    """
 18    The estimator ELCE^2
 19
 20    Parameters
 21    ----------
 22    K_xx : numpy-array
 23        evaluated kernel function
 24
 25    err : numpy-array
 26        one-dimensional error vector
 27
 28    Returns
 29    -------
 30    numpy-array
 31        estimated ELCE^2
 32    """
 33    K = (err.flatten() * K_xx.T).T * err.flatten()
 34    return K.sum() - K.diagonal().sum()
 35
 36
 37def ELCE2_normalization(K):
 38    """
 39    The normalization of estimator ELCE^2
 40
 41    Parameters
 42    ----------
 43    K : numpy-array
 44        evaluated kernel function
 45
 46    Returns
 47    -------
 48    float
 49        estimated normalization of ELCE^2
 50    """
 51
 52    size = K.shape[0]
 53
 54    return (size - 1.0) * K.sum() / size
 55
 56
 57def ELCE2_null_estimator(err, K, rng):
 58    """
 59    Compute the ELCE^2_u for one bootstrap realization
 60
 61    Parameters
 62    ----------
 63    err : numpy-array
 64        one-dimensional error vector
 65
 66    K : numpy-array
 67        evaluated kernel function
 68
 69    rng : type(np.random.RandomState())
 70        numpy random function
 71
 72    Returns
 73    -------
 74    float
 75        unbiased estimate of ELCE^2_u
 76
 77    """
 78
 79    # randomize error vector so error = sample .. not looking at local neighbors
 80    # checking local calibration ..
 81    # randomizaiton -- quanitifies noise in estimator
 82    idx = rng.permutation(len(err))
 83
 84    return ELCE2_estimator(K, err[idx])
 85
 86
 87def compute_null_distribution(
 88    p_err, K, iterations=1000, n_jobs=1, verbose=False, random_state=None
 89):
 90    """
 91    Compute the null-distribution of test statistics via a bootstrap procedure
 92
 93    Parameters
 94    ----------
 95    p_err : numpy-array
 96        one-dimensional probability error vector
 97
 98    K : numpy-array
 99        evaluated kernel function
100
101    iterations : int
102        controls the number of bootstrap realizations
103
104    verbose : bool
105        controls the verbosity of the model's output
106
107    random_state : type(np.random.RandomState()) or None
108        defines the initial random state
109
110    Returns
111    -------
112    numpy-array
113        boostrap samples of the test null distribution
114    """
115    rng = (
116        random_state
117        if isinstance(random_state, type(np.random.RandomState()))
118        else np.random.RandomState(random_state)
119    )
120    iterations_list = range(iterations)
121
122    # compute the null distribution
123    # for 1 cpu run the normal code, for more cpu use the Parallel library. This maximize the speed.
124    test_null = (
125        [ELCE2_null_estimator(p_err, K, rng) for _ in iterations_list]
126        if n_jobs == 1
127        else Parallel(n_jobs=n_jobs, prefer="threads")(
128            delayed(ELCE2_null_estimator)(p_err, K, rng) for _ in iterations_list
129        )
130    )
131
132    return np.array(test_null)
133
134
135def ELCE2(
136    X,
137    Y,
138    p,
139    kernel_function="rbf",
140    prob_kernel_width=0.1,
141    iterations=None,
142    use_diffusion_distance=False,
143    verbose=True,
144    random_state=None,
145    n_jobs=1,
146    **kwargs,
147):
148    """
149    This function estimate ELCE^2_u employing a kernel trick. ELCE^2_u tests if a proposed posterior credible interval
150    is calibrated employing a randomly drawn calibration test. The null hypothesis is that the posteriors are
151    properly calibrated. This function perform a bootstrap algorithm to estimate the null distribution,
152    and corresponding p-value.
153
154    Parameters
155    ----------
156    X : numpy-array
157        data, of size NxD [N is the number of data points, D is the features dimension]
158
159    Y : numpy-array
160        credible error vector, of size Nx1 [N is the number of data points]
161
162    p : numpy-array
163        probability vector, of size Nx1 [N is the number of data points]
164
165    kernel_function : string
166        defines the kernel function. For the list of implemented kernels, please consult with [sklearn](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.kernel_metrics.html#sklearn.metrics.pairwise.kernel_metrics)
167
168    prob_kernel_width : float
169        Width of the probably kernel function
170
171    iterations : int
172        controls the number of bootstrap realizations
173
174    use_diffusion_distance : bool
175        Use diffusion instead of eucliden distances
176        Transforms X into a diffusion space (see diffusion_maps.py)
177
178    verbose : bool
179        controls the verbosity of the model's output
180
181    random_state : type(np.random.RandomState()) or None
182        defines the initial random state
183
184    n_jobs : int
185        number of jobs to run in parallel
186
187    **kwargs : **kwargs
188        extra parameters, these are passed to `pairwise_kernels()` as kernel parameters
189        E.g., if `kernel_two_sample_test(..., kernel_function='rbf', gamma=0.1)`
190
191    Returns
192    -------
193    tuple
194        - SIZE = 1 if iterations=`None` else 3 (float, numpy-array, float)
195            - first element is the test value,
196            - second element is samples from the null distribution via a bootstraps algorithm,
197            - third element is the estimated p-value.
198    """
199
200    def create_kernel():
201        """
202        RBF = https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.rbf_kernel.html?highlight=gaussian+kernel
203
204        Returns
205        -------
206        A kernel matrix K such that K_{i, j} is the kernel between the ith and jth vectors of the given matrix X, if Y is None.
207        """
208        # Pre-compute Kernel Function (Hyperplane/Convolution)
209        K_pp_gamma = 1.0 / (prob_kernel_width**2)
210        K_pp_metric = "rbf"
211
212        # In binary class (p vs 1-p) vs miltiple classification (p1 + ...+ pn = 1)
213        # p should be nx1 for the kernal function
214        if len(p.shape) == 2:
215            K_pp_data = p
216        elif len(p.shape) == 1:  # if p.shape == 1 ... turn n, --> nx1
217            K_pp_data = p[:, np.newaxis]
218        else:
219            raise ValueError(
220                f"p has invalid dimensions of {p.shape}. The length of p's shape should be 1 or 2, not {len(p.shape)}"
221            )
222
223        K_xx = pairwise_kernels(X, X, metric=kernel_function, **kwargs)
224        K_pp = pairwise_kernels(
225            K_pp_data, K_pp_data, metric=K_pp_metric, gamma=K_pp_gamma
226        )
227
228        # Consider both similar features && similar probabilities
229        # K_xx includes ENTIRE feature space ... may not match input as model ..
230        K = K_pp * K_xx
231        return K
232
233    check_attributes(X, Y)
234    K = create_kernel()
235    assert len(Y) == len(p)
236    assert None not in Y
237    assert None not in p
238    p_err = _calculate_err_vector(Y, p)
239
240    # Estimate the Null "Oracle" Distribution
241    test_value = ELCE2_estimator(K, p_err)
242    norm = ELCE2_normalization(K)
243
244    if verbose:
245        logging.info(f"test value = {(test_value / norm)}")
246        logging.info("Computing the null distribution.")
247
248    if iterations is None:
249        return test_value / norm
250
251    # p-value's resolution
252    resolution = 1.0 / iterations
253
254    # compute the null distribution via a bootstrap algorithm
255    test_null = compute_null_distribution(
256        p_err,
257        K,
258        iterations=iterations,
259        verbose=verbose,
260        n_jobs=n_jobs,
261        random_state=random_state,
262    )
263
264    # Permutation -- est noise under Ho ... Ho assume no global/local miscallibrate .. should cent around 0
265    # center it at zero (to account for global mis-calibration)
266    test_null -= np.mean(test_null)
267
268    # compute the p-value, if less then the resolution set it to the resolution
269    p_value = max(resolution, resolution * (test_null > test_value).sum())
270
271    if verbose:
272        msg = (
273            (f"p-value < {p_value} \t (resolution : {resolution})")
274            if p_value == resolution
275            else (f"p-value ~= {p_value} \t (resolution : {resolution})")
276        )
277        logging.info(msg)
278
279    return test_value / norm, test_null / norm, p_value
def ELCE2_estimator(K_xx, err):
17def ELCE2_estimator(K_xx, err):
18    """
19    The estimator ELCE^2
20
21    Parameters
22    ----------
23    K_xx : numpy-array
24        evaluated kernel function
25
26    err : numpy-array
27        one-dimensional error vector
28
29    Returns
30    -------
31    numpy-array
32        estimated ELCE^2
33    """
34    K = (err.flatten() * K_xx.T).T * err.flatten()
35    return K.sum() - K.diagonal().sum()

The estimator ELCE^2

Parameters
  • K_xx (numpy-array): evaluated kernel function
  • err (numpy-array): one-dimensional error vector
Returns
  • numpy-array: estimated ELCE^2
def ELCE2_normalization(K):
38def ELCE2_normalization(K):
39    """
40    The normalization of estimator ELCE^2
41
42    Parameters
43    ----------
44    K : numpy-array
45        evaluated kernel function
46
47    Returns
48    -------
49    float
50        estimated normalization of ELCE^2
51    """
52
53    size = K.shape[0]
54
55    return (size - 1.0) * K.sum() / size

The normalization of estimator ELCE^2

Parameters
  • K (numpy-array): evaluated kernel function
Returns
  • float: estimated normalization of ELCE^2
def ELCE2_null_estimator(err, K, rng):
58def ELCE2_null_estimator(err, K, rng):
59    """
60    Compute the ELCE^2_u for one bootstrap realization
61
62    Parameters
63    ----------
64    err : numpy-array
65        one-dimensional error vector
66
67    K : numpy-array
68        evaluated kernel function
69
70    rng : type(np.random.RandomState())
71        numpy random function
72
73    Returns
74    -------
75    float
76        unbiased estimate of ELCE^2_u
77
78    """
79
80    # randomize error vector so error = sample .. not looking at local neighbors
81    # checking local calibration ..
82    # randomizaiton -- quanitifies noise in estimator
83    idx = rng.permutation(len(err))
84
85    return ELCE2_estimator(K, err[idx])

Compute the ELCE^2_u for one bootstrap realization

Parameters
  • err (numpy-array): one-dimensional error vector
  • K (numpy-array): evaluated kernel function
  • rng (type(np.random.RandomState())): numpy random function
Returns
  • float: unbiased estimate of ELCE^2_u
def compute_null_distribution( p_err, K, iterations=1000, n_jobs=1, verbose=False, random_state=None):
 88def compute_null_distribution(
 89    p_err, K, iterations=1000, n_jobs=1, verbose=False, random_state=None
 90):
 91    """
 92    Compute the null-distribution of test statistics via a bootstrap procedure
 93
 94    Parameters
 95    ----------
 96    p_err : numpy-array
 97        one-dimensional probability error vector
 98
 99    K : numpy-array
100        evaluated kernel function
101
102    iterations : int
103        controls the number of bootstrap realizations
104
105    verbose : bool
106        controls the verbosity of the model's output
107
108    random_state : type(np.random.RandomState()) or None
109        defines the initial random state
110
111    Returns
112    -------
113    numpy-array
114        boostrap samples of the test null distribution
115    """
116    rng = (
117        random_state
118        if isinstance(random_state, type(np.random.RandomState()))
119        else np.random.RandomState(random_state)
120    )
121    iterations_list = range(iterations)
122
123    # compute the null distribution
124    # for 1 cpu run the normal code, for more cpu use the Parallel library. This maximize the speed.
125    test_null = (
126        [ELCE2_null_estimator(p_err, K, rng) for _ in iterations_list]
127        if n_jobs == 1
128        else Parallel(n_jobs=n_jobs, prefer="threads")(
129            delayed(ELCE2_null_estimator)(p_err, K, rng) for _ in iterations_list
130        )
131    )
132
133    return np.array(test_null)

Compute the null-distribution of test statistics via a bootstrap procedure

Parameters
  • p_err (numpy-array): one-dimensional probability error vector
  • K (numpy-array): evaluated kernel function
  • iterations (int): controls the number of bootstrap realizations
  • verbose (bool): controls the verbosity of the model's output
  • random_state (type(np.random.RandomState()) or None): defines the initial random state
Returns
  • numpy-array: boostrap samples of the test null distribution
def ELCE2( X, Y, p, kernel_function='rbf', prob_kernel_width=0.1, iterations=None, use_diffusion_distance=False, verbose=True, random_state=None, n_jobs=1, **kwargs):
136def ELCE2(
137    X,
138    Y,
139    p,
140    kernel_function="rbf",
141    prob_kernel_width=0.1,
142    iterations=None,
143    use_diffusion_distance=False,
144    verbose=True,
145    random_state=None,
146    n_jobs=1,
147    **kwargs,
148):
149    """
150    This function estimate ELCE^2_u employing a kernel trick. ELCE^2_u tests if a proposed posterior credible interval
151    is calibrated employing a randomly drawn calibration test. The null hypothesis is that the posteriors are
152    properly calibrated. This function perform a bootstrap algorithm to estimate the null distribution,
153    and corresponding p-value.
154
155    Parameters
156    ----------
157    X : numpy-array
158        data, of size NxD [N is the number of data points, D is the features dimension]
159
160    Y : numpy-array
161        credible error vector, of size Nx1 [N is the number of data points]
162
163    p : numpy-array
164        probability vector, of size Nx1 [N is the number of data points]
165
166    kernel_function : string
167        defines the kernel function. For the list of implemented kernels, please consult with [sklearn](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.kernel_metrics.html#sklearn.metrics.pairwise.kernel_metrics)
168
169    prob_kernel_width : float
170        Width of the probably kernel function
171
172    iterations : int
173        controls the number of bootstrap realizations
174
175    use_diffusion_distance : bool
176        Use diffusion instead of eucliden distances
177        Transforms X into a diffusion space (see diffusion_maps.py)
178
179    verbose : bool
180        controls the verbosity of the model's output
181
182    random_state : type(np.random.RandomState()) or None
183        defines the initial random state
184
185    n_jobs : int
186        number of jobs to run in parallel
187
188    **kwargs : **kwargs
189        extra parameters, these are passed to `pairwise_kernels()` as kernel parameters
190        E.g., if `kernel_two_sample_test(..., kernel_function='rbf', gamma=0.1)`
191
192    Returns
193    -------
194    tuple
195        - SIZE = 1 if iterations=`None` else 3 (float, numpy-array, float)
196            - first element is the test value,
197            - second element is samples from the null distribution via a bootstraps algorithm,
198            - third element is the estimated p-value.
199    """
200
201    def create_kernel():
202        """
203        RBF = https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.rbf_kernel.html?highlight=gaussian+kernel
204
205        Returns
206        -------
207        A kernel matrix K such that K_{i, j} is the kernel between the ith and jth vectors of the given matrix X, if Y is None.
208        """
209        # Pre-compute Kernel Function (Hyperplane/Convolution)
210        K_pp_gamma = 1.0 / (prob_kernel_width**2)
211        K_pp_metric = "rbf"
212
213        # In binary class (p vs 1-p) vs miltiple classification (p1 + ...+ pn = 1)
214        # p should be nx1 for the kernal function
215        if len(p.shape) == 2:
216            K_pp_data = p
217        elif len(p.shape) == 1:  # if p.shape == 1 ... turn n, --> nx1
218            K_pp_data = p[:, np.newaxis]
219        else:
220            raise ValueError(
221                f"p has invalid dimensions of {p.shape}. The length of p's shape should be 1 or 2, not {len(p.shape)}"
222            )
223
224        K_xx = pairwise_kernels(X, X, metric=kernel_function, **kwargs)
225        K_pp = pairwise_kernels(
226            K_pp_data, K_pp_data, metric=K_pp_metric, gamma=K_pp_gamma
227        )
228
229        # Consider both similar features && similar probabilities
230        # K_xx includes ENTIRE feature space ... may not match input as model ..
231        K = K_pp * K_xx
232        return K
233
234    check_attributes(X, Y)
235    K = create_kernel()
236    assert len(Y) == len(p)
237    assert None not in Y
238    assert None not in p
239    p_err = _calculate_err_vector(Y, p)
240
241    # Estimate the Null "Oracle" Distribution
242    test_value = ELCE2_estimator(K, p_err)
243    norm = ELCE2_normalization(K)
244
245    if verbose:
246        logging.info(f"test value = {(test_value / norm)}")
247        logging.info("Computing the null distribution.")
248
249    if iterations is None:
250        return test_value / norm
251
252    # p-value's resolution
253    resolution = 1.0 / iterations
254
255    # compute the null distribution via a bootstrap algorithm
256    test_null = compute_null_distribution(
257        p_err,
258        K,
259        iterations=iterations,
260        verbose=verbose,
261        n_jobs=n_jobs,
262        random_state=random_state,
263    )
264
265    # Permutation -- est noise under Ho ... Ho assume no global/local miscallibrate .. should cent around 0
266    # center it at zero (to account for global mis-calibration)
267    test_null -= np.mean(test_null)
268
269    # compute the p-value, if less then the resolution set it to the resolution
270    p_value = max(resolution, resolution * (test_null > test_value).sum())
271
272    if verbose:
273        msg = (
274            (f"p-value < {p_value} \t (resolution : {resolution})")
275            if p_value == resolution
276            else (f"p-value ~= {p_value} \t (resolution : {resolution})")
277        )
278        logging.info(msg)
279
280    return test_value / norm, test_null / norm, p_value

This function estimate ELCE^2_u employing a kernel trick. ELCE^2_u tests if a proposed posterior credible interval is calibrated employing a randomly drawn calibration test. The null hypothesis is that the posteriors are properly calibrated. This function perform a bootstrap algorithm to estimate the null distribution, and corresponding p-value.

Parameters
  • X (numpy-array): data, of size NxD [N is the number of data points, D is the features dimension]
  • Y (numpy-array): credible error vector, of size Nx1 [N is the number of data points]
  • p (numpy-array): probability vector, of size Nx1 [N is the number of data points]
  • kernel_function (string): defines the kernel function. For the list of implemented kernels, please consult with sklearn
  • prob_kernel_width (float): Width of the probably kernel function
  • iterations (int): controls the number of bootstrap realizations
  • use_diffusion_distance (bool): Use diffusion instead of eucliden distances Transforms X into a diffusion space (see diffusion_maps.py)
  • verbose (bool): controls the verbosity of the model's output
  • random_state (type(np.random.RandomState()) or None): defines the initial random state
  • n_jobs (int): number of jobs to run in parallel
  • *kwargs (*kwargs): extra parameters, these are passed to pairwise_kernels() as kernel parameters E.g., if kernel_two_sample_test(..., kernel_function='rbf', gamma=0.1)
Returns
  • tuple: - SIZE = 1 if iterations=None else 3 (float, numpy-array, float)
    • first element is the test value,
    • second element is samples from the null distribution via a bootstraps algorithm,
    • third element is the estimated p-value.