KiTE.calibrate

Calibration utilities to help reduce the local bias of a given model.

  1"""
  2Calibration utilities to help reduce the local bias of a given model.
  3"""
  4
  5import numpy as np
  6from sklearn.gaussian_process.kernels import pairwise_kernels
  7from sklearn.kernel_ridge import KernelRidge
  8from sklearn.svm import SVR
  9from sklearn.linear_model import LogisticRegression
 10from sklearn.isotonic import IsotonicRegression
 11from netcal.scaling import TemperatureScaling, BetaCalibration
 12from netcal.binning import HistogramBinning, BBQ, ENIR
 13from sklearn.calibration import calibration_curve
 14from sklearn.metrics import brier_score_loss
 15from KiTE.calibration_models import KRR_calibration, EWF_calibration
 16from KiTE.validation import check_attributes, check_credible_vector_args
 17
 18
 19def local_bias_estimator(X, Y, p, X_grid, model="KRR", kernel_function="rbf", **kwargs):
 20    """
 21    Estimates model bias by calculating bias = Y - model predictions. Assumes model is already close to the oracle model.
 22
 23    Parameters
 24    ----------
 25    X : numpy-array
 26        data, of size NxD [N is the number of data points, D is the features dimension]
 27
 28    Y : numpy-array
 29        credible error vector, of size Nx1 [N is the number of data points]
 30
 31    p : numpy-array
 32        probability vector, of size Nx1 [N is the number of data points]
 33
 34    X_grid : numpy-array
 35        - For EWF, X_grid used to build a kernel. This kernel used to calculate bias
 36        - Else, X_grid used as "test features" to predict bias
 37
 38    model : str
 39        Indicates model type. Valid Options include:
 40            - "KRR": KernelRidge
 41            - "SVR": SVR
 42            - "EWF": EWF
 43
 44    kernel_function : str
 45        Indicates kernel function type.
 46        Refer to sklearn documentation to see which kernel functions are valid for model chosen.
 47
 48    **kwargs : **kwargs
 49        extra parameters passed to `pairwise_kernels()` as kernel parameters
 50
 51    Returns
 52    -------
 53    numpy-array
 54        estimated bias
 55    """
 56
 57    check_attributes(X, Y)
 58
 59    if model == "KRR":
 60        model = KernelRidge(kernel=kernel_function, **kwargs)
 61    elif model == "SVR":
 62        model = SVR(kernel=kernel_function, **kwargs)
 63    elif model == "EWF":
 64        K = pairwise_kernels(X, X_grid, metric=kernel_function, **kwargs)
 65        p_err = Y - p
 66        bias = np.sum(p_err.flatten() * K.T, axis=1) / np.sum(K.T, axis=1)
 67        return bias
 68    else:
 69        raise ValueError(
 70            f"Model {model} is not defined. Valid Options include: KRR, SVR, or EWF"
 71        )
 72
 73    # Use chosen model to predict bias_calibration based on features X.
 74    bias_calibration = Y - p
 75    model.fit(X, bias_calibration)
 76    bias = model.predict(X_grid)
 77    return bias
 78
 79
 80def construct_credible_error_vector(Y, Yr_up, Yr_down, alpha):
 81    """
 82    For a one dimensional output prediction Y, construct the credible error vector.
 83    Uses given lower and upper percentiles. Assumes credible level alpha is fixed.
 84
 85    Parameters
 86    ----------
 87    Y : numpy-array
 88        data, of size Nx1 [N is the number of data points]
 89
 90    Yr_up : numpy-array
 91        upper percentile vector, of size Nx1 [N is the number of data points]
 92
 93    Yr_down : numpy-array
 94        lower percentile vector, of size Nx1 [N is the number of data points]
 95
 96    alpha : float
 97        the theoretical credible level alpha
 98
 99    Returns
100    -------
101    numpy-array
102        Credible Error Vector
103    """
104    check_credible_vector_args(Y, Yr_up, Yr_down, alpha)
105
106    # indicator of Y less than posterior percentile r
107    Yind = 1.0 * ((Y < Yr_up) * (Y > Yr_down))
108
109    # percentile/probability error vector
110    e = Yind - alpha
111
112    return e
113
114
115def calibrate(
116    Xtrain, prob_train, Ytrain, Xtest=None, prob_test=None, method="platt", **kwargs
117):
118    """
119    A calibration method that takes the predicted probabilties and positive cases and recalibrate the probabilities.
120
121    Parameters
122    ----------
123    Xtrain : array, shape (n_samples_train,)
124        Features used for training
125
126    prob_train : array, shape (n_samples_train,)
127        Probabilities of positive class to train a calibration model
128
129    Ytrain : array, shape (n_samples_train,)
130        Values used for training
131
132    Xtest : array, shape (n_samples_test,)
133        Features used for testing
134
135    prob_test : array, shape (n_samples_test,)
136        Probabilities of the positive class to be calibrated (test set). If None it re-calibrate the training set.
137
138    method: string, 'platt', 'isotonic', 'temperature_scaling', 'beta', 'HB', 'BBG', 'ENIR'
139        The method to use for calibration. Can be ‘sigmoid’ which corresponds to Platt’s method
140        (i.e. a logistic regression model) or ‘isotonic’ which is a non-parametric approach.
141        It is not advised to use isotonic calibration with too few calibration samples (<<1000) since it tends to overfit.
142
143    **kwargs
144        Additional Args used to fit KRR or EWF
145
146    Returns
147    -------
148    array, shape (n_bins,)
149        The calibrated error for test set. (p_calibrated)
150
151    """
152    probs = prob_train[:, np.newaxis] if prob_test is None else prob_test[:, np.newaxis]
153    Xtest = Xtrain if Xtest is None else Xtest
154
155    if method == "platt":
156        model = LogisticRegression()
157        model.fit(prob_train[:, np.newaxis], Ytrain)  # LR needs X to be 2-dimensional
158        p_calibrated = model.predict_proba(probs)[:, 1]
159
160    elif method == "isotonic":
161        model = IsotonicRegression(out_of_bounds="clip")
162        model.fit(prob_train, Ytrain)  # LR needs X to be 2-dimensional
163        p_calibrated = model.transform(probs.flatten())
164
165    elif method == "temperature_scaling":
166        model = TemperatureScaling()
167        model.fit(prob_train, Ytrain)
168        p_calibrated = model.transform(probs)
169
170    elif method == "beta":
171        model = BetaCalibration()
172        model.fit(prob_train, Ytrain)
173        p_calibrated = model.transform(probs)
174
175    elif method == "HB":
176        model = HistogramBinning()
177        model.fit(prob_train, Ytrain)
178        p_calibrated = model.transform(probs)
179
180    elif method == "BBQ":
181        model = BBQ()
182        model.fit(prob_train, Ytrain)
183        p_calibrated = model.transform(probs)
184
185    elif method == "ENIR":
186        model = ENIR()
187        model.fit(prob_train, Ytrain)
188        p_calibrated = model.transform(probs)
189
190    elif method == "KRR":
191        model = KRR_calibration()
192        model.fit(Xtrain, prob_train, Ytrain, **kwargs)
193        p_calibrated = model.predict(Xtest, probs.flatten(), mode="prob")
194
195    elif method == "EWF":
196        model = EWF_calibration()
197        model.fit(Xtrain, prob_train, Ytrain, **kwargs)
198        p_calibrated = model.predict(Xtest, probs.flatten(), mode="prob")
199
200    else:
201        raise ValueError("Method %s is not defined." % method)
202
203    p_calibrated[np.isnan(p_calibrated)] = 0
204
205    # normalize the large numbers and small numbers to one and zero
206    # p_calibrated[p_calibrated > 1.0] = 1.0
207    # p_calibrated[p_calibrated < 0.0] = 0.0
208
209    return p_calibrated  # f-hat -- f(x) = oracle = f-hat + b(x)
210
211
212def _counts_per_bin(prob, n_bins):
213    """
214    Taken from https://github.com/scikit-learn/scikit-learn/blob/a24c8b46/sklearn/calibration.py#L513
215    """
216
217    bins = np.linspace(0.0, 1.0 + 1e-8, n_bins + 1)
218    binids = np.digitize(prob, bins) - 1
219    bin_total = np.bincount(binids, minlength=len(bins))
220
221    nonzero = bin_total != 0
222    return bin_total[nonzero]
223
224
225def calibration_error(y_true, y_prob, n_bins=10, method="ECE"):
226    """
227    Compute calibration error given true targets and predicted probabilities.
228     Calibration curves may also be referred to as reliability diagrams.
229
230    Parameters
231    ----------
232    y_true : array, shape (n_samples,)
233        True targets.
234
235    y_prob : array, shape (n_samples,)
236        Probabilities of the positive class.
237
238    method : string, default='ECE', {'ECE', 'MCE', 'BS'}
239        Which method to be used to compute calibration error.
240
241    n_bins : int
242        Number of bins. Note that a bigger number requires more data.
243
244    Returns
245    -------
246    float
247        calibration error score
248    """
249
250    # compute fraction of positive cases per y_prob in a bin.
251    # See scikit-learn documentation for details
252    fraction_of_positives, mean_predicted_value = calibration_curve(
253        y_true, y_prob, normalize=False, n_bins=n_bins, strategy="uniform"
254    )
255
256    if method == "ECE":
257        hist_count = _counts_per_bin(y_prob, n_bins)
258        return np.sum(
259            hist_count * np.abs(fraction_of_positives - mean_predicted_value)
260        ) / np.sum(hist_count)
261    elif method == "MCE":
262        return np.max(np.abs(fraction_of_positives - mean_predicted_value))
263    elif method == "BS":
264        return brier_score_loss(y_true, y_prob, pos_label=1)
265    else:
266        raise ValueError("Method %s is not defined." % method)
def local_bias_estimator(X, Y, p, X_grid, model='KRR', kernel_function='rbf', **kwargs):
20def local_bias_estimator(X, Y, p, X_grid, model="KRR", kernel_function="rbf", **kwargs):
21    """
22    Estimates model bias by calculating bias = Y - model predictions. Assumes model is already close to the oracle model.
23
24    Parameters
25    ----------
26    X : numpy-array
27        data, of size NxD [N is the number of data points, D is the features dimension]
28
29    Y : numpy-array
30        credible error vector, of size Nx1 [N is the number of data points]
31
32    p : numpy-array
33        probability vector, of size Nx1 [N is the number of data points]
34
35    X_grid : numpy-array
36        - For EWF, X_grid used to build a kernel. This kernel used to calculate bias
37        - Else, X_grid used as "test features" to predict bias
38
39    model : str
40        Indicates model type. Valid Options include:
41            - "KRR": KernelRidge
42            - "SVR": SVR
43            - "EWF": EWF
44
45    kernel_function : str
46        Indicates kernel function type.
47        Refer to sklearn documentation to see which kernel functions are valid for model chosen.
48
49    **kwargs : **kwargs
50        extra parameters passed to `pairwise_kernels()` as kernel parameters
51
52    Returns
53    -------
54    numpy-array
55        estimated bias
56    """
57
58    check_attributes(X, Y)
59
60    if model == "KRR":
61        model = KernelRidge(kernel=kernel_function, **kwargs)
62    elif model == "SVR":
63        model = SVR(kernel=kernel_function, **kwargs)
64    elif model == "EWF":
65        K = pairwise_kernels(X, X_grid, metric=kernel_function, **kwargs)
66        p_err = Y - p
67        bias = np.sum(p_err.flatten() * K.T, axis=1) / np.sum(K.T, axis=1)
68        return bias
69    else:
70        raise ValueError(
71            f"Model {model} is not defined. Valid Options include: KRR, SVR, or EWF"
72        )
73
74    # Use chosen model to predict bias_calibration based on features X.
75    bias_calibration = Y - p
76    model.fit(X, bias_calibration)
77    bias = model.predict(X_grid)
78    return bias

Estimates model bias by calculating bias = Y - model predictions. Assumes model is already close to the oracle model.

Parameters
  • X (numpy-array): data, of size NxD [N is the number of data points, D is the features dimension]
  • Y (numpy-array): credible error vector, of size Nx1 [N is the number of data points]
  • p (numpy-array): probability vector, of size Nx1 [N is the number of data points]
  • X_grid (numpy-array):
    • For EWF, X_grid used to build a kernel. This kernel used to calculate bias
    • Else, X_grid used as "test features" to predict bias
  • model (str): Indicates model type. Valid Options include: - "KRR": KernelRidge - "SVR": SVR - "EWF": EWF
  • kernel_function (str): Indicates kernel function type. Refer to sklearn documentation to see which kernel functions are valid for model chosen.
  • *kwargs (*kwargs): extra parameters passed to pairwise_kernels() as kernel parameters
Returns
  • numpy-array: estimated bias
def construct_credible_error_vector(Y, Yr_up, Yr_down, alpha):
 81def construct_credible_error_vector(Y, Yr_up, Yr_down, alpha):
 82    """
 83    For a one dimensional output prediction Y, construct the credible error vector.
 84    Uses given lower and upper percentiles. Assumes credible level alpha is fixed.
 85
 86    Parameters
 87    ----------
 88    Y : numpy-array
 89        data, of size Nx1 [N is the number of data points]
 90
 91    Yr_up : numpy-array
 92        upper percentile vector, of size Nx1 [N is the number of data points]
 93
 94    Yr_down : numpy-array
 95        lower percentile vector, of size Nx1 [N is the number of data points]
 96
 97    alpha : float
 98        the theoretical credible level alpha
 99
100    Returns
101    -------
102    numpy-array
103        Credible Error Vector
104    """
105    check_credible_vector_args(Y, Yr_up, Yr_down, alpha)
106
107    # indicator of Y less than posterior percentile r
108    Yind = 1.0 * ((Y < Yr_up) * (Y > Yr_down))
109
110    # percentile/probability error vector
111    e = Yind - alpha
112
113    return e

For a one dimensional output prediction Y, construct the credible error vector. Uses given lower and upper percentiles. Assumes credible level alpha is fixed.

Parameters
  • Y (numpy-array): data, of size Nx1 [N is the number of data points]
  • Yr_up (numpy-array): upper percentile vector, of size Nx1 [N is the number of data points]
  • Yr_down (numpy-array): lower percentile vector, of size Nx1 [N is the number of data points]
  • alpha (float): the theoretical credible level alpha
Returns
  • numpy-array: Credible Error Vector
def calibrate( Xtrain, prob_train, Ytrain, Xtest=None, prob_test=None, method='platt', **kwargs):
116def calibrate(
117    Xtrain, prob_train, Ytrain, Xtest=None, prob_test=None, method="platt", **kwargs
118):
119    """
120    A calibration method that takes the predicted probabilties and positive cases and recalibrate the probabilities.
121
122    Parameters
123    ----------
124    Xtrain : array, shape (n_samples_train,)
125        Features used for training
126
127    prob_train : array, shape (n_samples_train,)
128        Probabilities of positive class to train a calibration model
129
130    Ytrain : array, shape (n_samples_train,)
131        Values used for training
132
133    Xtest : array, shape (n_samples_test,)
134        Features used for testing
135
136    prob_test : array, shape (n_samples_test,)
137        Probabilities of the positive class to be calibrated (test set). If None it re-calibrate the training set.
138
139    method: string, 'platt', 'isotonic', 'temperature_scaling', 'beta', 'HB', 'BBG', 'ENIR'
140        The method to use for calibration. Can be ‘sigmoid’ which corresponds to Platt’s method
141        (i.e. a logistic regression model) or ‘isotonic’ which is a non-parametric approach.
142        It is not advised to use isotonic calibration with too few calibration samples (<<1000) since it tends to overfit.
143
144    **kwargs
145        Additional Args used to fit KRR or EWF
146
147    Returns
148    -------
149    array, shape (n_bins,)
150        The calibrated error for test set. (p_calibrated)
151
152    """
153    probs = prob_train[:, np.newaxis] if prob_test is None else prob_test[:, np.newaxis]
154    Xtest = Xtrain if Xtest is None else Xtest
155
156    if method == "platt":
157        model = LogisticRegression()
158        model.fit(prob_train[:, np.newaxis], Ytrain)  # LR needs X to be 2-dimensional
159        p_calibrated = model.predict_proba(probs)[:, 1]
160
161    elif method == "isotonic":
162        model = IsotonicRegression(out_of_bounds="clip")
163        model.fit(prob_train, Ytrain)  # LR needs X to be 2-dimensional
164        p_calibrated = model.transform(probs.flatten())
165
166    elif method == "temperature_scaling":
167        model = TemperatureScaling()
168        model.fit(prob_train, Ytrain)
169        p_calibrated = model.transform(probs)
170
171    elif method == "beta":
172        model = BetaCalibration()
173        model.fit(prob_train, Ytrain)
174        p_calibrated = model.transform(probs)
175
176    elif method == "HB":
177        model = HistogramBinning()
178        model.fit(prob_train, Ytrain)
179        p_calibrated = model.transform(probs)
180
181    elif method == "BBQ":
182        model = BBQ()
183        model.fit(prob_train, Ytrain)
184        p_calibrated = model.transform(probs)
185
186    elif method == "ENIR":
187        model = ENIR()
188        model.fit(prob_train, Ytrain)
189        p_calibrated = model.transform(probs)
190
191    elif method == "KRR":
192        model = KRR_calibration()
193        model.fit(Xtrain, prob_train, Ytrain, **kwargs)
194        p_calibrated = model.predict(Xtest, probs.flatten(), mode="prob")
195
196    elif method == "EWF":
197        model = EWF_calibration()
198        model.fit(Xtrain, prob_train, Ytrain, **kwargs)
199        p_calibrated = model.predict(Xtest, probs.flatten(), mode="prob")
200
201    else:
202        raise ValueError("Method %s is not defined." % method)
203
204    p_calibrated[np.isnan(p_calibrated)] = 0
205
206    # normalize the large numbers and small numbers to one and zero
207    # p_calibrated[p_calibrated > 1.0] = 1.0
208    # p_calibrated[p_calibrated < 0.0] = 0.0
209
210    return p_calibrated  # f-hat -- f(x) = oracle = f-hat + b(x)

A calibration method that takes the predicted probabilties and positive cases and recalibrate the probabilities.

Parameters
  • Xtrain (array, shape (n_samples_train,)): Features used for training
  • prob_train (array, shape (n_samples_train,)): Probabilities of positive class to train a calibration model
  • Ytrain (array, shape (n_samples_train,)): Values used for training
  • Xtest (array, shape (n_samples_test,)): Features used for testing
  • prob_test (array, shape (n_samples_test,)): Probabilities of the positive class to be calibrated (test set). If None it re-calibrate the training set.
  • method (string, 'platt', 'isotonic', 'temperature_scaling', 'beta', 'HB', 'BBG', 'ENIR'): The method to use for calibration. Can be ‘sigmoid’ which corresponds to Platt’s method (i.e. a logistic regression model) or ‘isotonic’ which is a non-parametric approach. It is not advised to use isotonic calibration with too few calibration samples (<<1000) since it tends to overfit.
  • **kwargs: Additional Args used to fit KRR or EWF
Returns
  • array, shape (n_bins,): The calibrated error for test set. (p_calibrated)
def calibration_error(y_true, y_prob, n_bins=10, method='ECE'):
226def calibration_error(y_true, y_prob, n_bins=10, method="ECE"):
227    """
228    Compute calibration error given true targets and predicted probabilities.
229     Calibration curves may also be referred to as reliability diagrams.
230
231    Parameters
232    ----------
233    y_true : array, shape (n_samples,)
234        True targets.
235
236    y_prob : array, shape (n_samples,)
237        Probabilities of the positive class.
238
239    method : string, default='ECE', {'ECE', 'MCE', 'BS'}
240        Which method to be used to compute calibration error.
241
242    n_bins : int
243        Number of bins. Note that a bigger number requires more data.
244
245    Returns
246    -------
247    float
248        calibration error score
249    """
250
251    # compute fraction of positive cases per y_prob in a bin.
252    # See scikit-learn documentation for details
253    fraction_of_positives, mean_predicted_value = calibration_curve(
254        y_true, y_prob, normalize=False, n_bins=n_bins, strategy="uniform"
255    )
256
257    if method == "ECE":
258        hist_count = _counts_per_bin(y_prob, n_bins)
259        return np.sum(
260            hist_count * np.abs(fraction_of_positives - mean_predicted_value)
261        ) / np.sum(hist_count)
262    elif method == "MCE":
263        return np.max(np.abs(fraction_of_positives - mean_predicted_value))
264    elif method == "BS":
265        return brier_score_loss(y_true, y_prob, pos_label=1)
266    else:
267        raise ValueError("Method %s is not defined." % method)

Compute calibration error given true targets and predicted probabilities. Calibration curves may also be referred to as reliability diagrams.

Parameters
  • y_true (array, shape (n_samples,)): True targets.
  • y_prob (array, shape (n_samples,)): Probabilities of the positive class.
  • method (string, default='ECE', {'ECE', 'MCE', 'BS'}): Which method to be used to compute calibration error.
  • n_bins (int): Number of bins. Note that a bigger number requires more data.
Returns
  • float: calibration error score