KiTE.calibrate
Calibration utilities to help reduce the local bias of a given model.
1""" 2Calibration utilities to help reduce the local bias of a given model. 3""" 4 5import numpy as np 6from sklearn.gaussian_process.kernels import pairwise_kernels 7from sklearn.kernel_ridge import KernelRidge 8from sklearn.svm import SVR 9from sklearn.linear_model import LogisticRegression 10from sklearn.isotonic import IsotonicRegression 11from netcal.scaling import TemperatureScaling, BetaCalibration 12from netcal.binning import HistogramBinning, BBQ, ENIR 13from sklearn.calibration import calibration_curve 14from sklearn.metrics import brier_score_loss 15from KiTE.calibration_models import KRR_calibration, EWF_calibration 16from KiTE.validation import check_attributes, check_credible_vector_args 17 18 19def local_bias_estimator(X, Y, p, X_grid, model="KRR", kernel_function="rbf", **kwargs): 20 """ 21 Estimates model bias by calculating bias = Y - model predictions. Assumes model is already close to the oracle model. 22 23 Parameters 24 ---------- 25 X : numpy-array 26 data, of size NxD [N is the number of data points, D is the features dimension] 27 28 Y : numpy-array 29 credible error vector, of size Nx1 [N is the number of data points] 30 31 p : numpy-array 32 probability vector, of size Nx1 [N is the number of data points] 33 34 X_grid : numpy-array 35 - For EWF, X_grid used to build a kernel. This kernel used to calculate bias 36 - Else, X_grid used as "test features" to predict bias 37 38 model : str 39 Indicates model type. Valid Options include: 40 - "KRR": KernelRidge 41 - "SVR": SVR 42 - "EWF": EWF 43 44 kernel_function : str 45 Indicates kernel function type. 46 Refer to sklearn documentation to see which kernel functions are valid for model chosen. 47 48 **kwargs : **kwargs 49 extra parameters passed to `pairwise_kernels()` as kernel parameters 50 51 Returns 52 ------- 53 numpy-array 54 estimated bias 55 """ 56 57 check_attributes(X, Y) 58 59 if model == "KRR": 60 model = KernelRidge(kernel=kernel_function, **kwargs) 61 elif model == "SVR": 62 model = SVR(kernel=kernel_function, **kwargs) 63 elif model == "EWF": 64 K = pairwise_kernels(X, X_grid, metric=kernel_function, **kwargs) 65 p_err = Y - p 66 bias = np.sum(p_err.flatten() * K.T, axis=1) / np.sum(K.T, axis=1) 67 return bias 68 else: 69 raise ValueError( 70 f"Model {model} is not defined. Valid Options include: KRR, SVR, or EWF" 71 ) 72 73 # Use chosen model to predict bias_calibration based on features X. 74 bias_calibration = Y - p 75 model.fit(X, bias_calibration) 76 bias = model.predict(X_grid) 77 return bias 78 79 80def construct_credible_error_vector(Y, Yr_up, Yr_down, alpha): 81 """ 82 For a one dimensional output prediction Y, construct the credible error vector. 83 Uses given lower and upper percentiles. Assumes credible level alpha is fixed. 84 85 Parameters 86 ---------- 87 Y : numpy-array 88 data, of size Nx1 [N is the number of data points] 89 90 Yr_up : numpy-array 91 upper percentile vector, of size Nx1 [N is the number of data points] 92 93 Yr_down : numpy-array 94 lower percentile vector, of size Nx1 [N is the number of data points] 95 96 alpha : float 97 the theoretical credible level alpha 98 99 Returns 100 ------- 101 numpy-array 102 Credible Error Vector 103 """ 104 check_credible_vector_args(Y, Yr_up, Yr_down, alpha) 105 106 # indicator of Y less than posterior percentile r 107 Yind = 1.0 * ((Y < Yr_up) * (Y > Yr_down)) 108 109 # percentile/probability error vector 110 e = Yind - alpha 111 112 return e 113 114 115def calibrate( 116 Xtrain, prob_train, Ytrain, Xtest=None, prob_test=None, method="platt", **kwargs 117): 118 """ 119 A calibration method that takes the predicted probabilties and positive cases and recalibrate the probabilities. 120 121 Parameters 122 ---------- 123 Xtrain : array, shape (n_samples_train,) 124 Features used for training 125 126 prob_train : array, shape (n_samples_train,) 127 Probabilities of positive class to train a calibration model 128 129 Ytrain : array, shape (n_samples_train,) 130 Values used for training 131 132 Xtest : array, shape (n_samples_test,) 133 Features used for testing 134 135 prob_test : array, shape (n_samples_test,) 136 Probabilities of the positive class to be calibrated (test set). If None it re-calibrate the training set. 137 138 method: string, 'platt', 'isotonic', 'temperature_scaling', 'beta', 'HB', 'BBG', 'ENIR' 139 The method to use for calibration. Can be ‘sigmoid’ which corresponds to Platt’s method 140 (i.e. a logistic regression model) or ‘isotonic’ which is a non-parametric approach. 141 It is not advised to use isotonic calibration with too few calibration samples (<<1000) since it tends to overfit. 142 143 **kwargs 144 Additional Args used to fit KRR or EWF 145 146 Returns 147 ------- 148 array, shape (n_bins,) 149 The calibrated error for test set. (p_calibrated) 150 151 """ 152 probs = prob_train[:, np.newaxis] if prob_test is None else prob_test[:, np.newaxis] 153 Xtest = Xtrain if Xtest is None else Xtest 154 155 if method == "platt": 156 model = LogisticRegression() 157 model.fit(prob_train[:, np.newaxis], Ytrain) # LR needs X to be 2-dimensional 158 p_calibrated = model.predict_proba(probs)[:, 1] 159 160 elif method == "isotonic": 161 model = IsotonicRegression(out_of_bounds="clip") 162 model.fit(prob_train, Ytrain) # LR needs X to be 2-dimensional 163 p_calibrated = model.transform(probs.flatten()) 164 165 elif method == "temperature_scaling": 166 model = TemperatureScaling() 167 model.fit(prob_train, Ytrain) 168 p_calibrated = model.transform(probs) 169 170 elif method == "beta": 171 model = BetaCalibration() 172 model.fit(prob_train, Ytrain) 173 p_calibrated = model.transform(probs) 174 175 elif method == "HB": 176 model = HistogramBinning() 177 model.fit(prob_train, Ytrain) 178 p_calibrated = model.transform(probs) 179 180 elif method == "BBQ": 181 model = BBQ() 182 model.fit(prob_train, Ytrain) 183 p_calibrated = model.transform(probs) 184 185 elif method == "ENIR": 186 model = ENIR() 187 model.fit(prob_train, Ytrain) 188 p_calibrated = model.transform(probs) 189 190 elif method == "KRR": 191 model = KRR_calibration() 192 model.fit(Xtrain, prob_train, Ytrain, **kwargs) 193 p_calibrated = model.predict(Xtest, probs.flatten(), mode="prob") 194 195 elif method == "EWF": 196 model = EWF_calibration() 197 model.fit(Xtrain, prob_train, Ytrain, **kwargs) 198 p_calibrated = model.predict(Xtest, probs.flatten(), mode="prob") 199 200 else: 201 raise ValueError("Method %s is not defined." % method) 202 203 p_calibrated[np.isnan(p_calibrated)] = 0 204 205 # normalize the large numbers and small numbers to one and zero 206 # p_calibrated[p_calibrated > 1.0] = 1.0 207 # p_calibrated[p_calibrated < 0.0] = 0.0 208 209 return p_calibrated # f-hat -- f(x) = oracle = f-hat + b(x) 210 211 212def _counts_per_bin(prob, n_bins): 213 """ 214 Taken from https://github.com/scikit-learn/scikit-learn/blob/a24c8b46/sklearn/calibration.py#L513 215 """ 216 217 bins = np.linspace(0.0, 1.0 + 1e-8, n_bins + 1) 218 binids = np.digitize(prob, bins) - 1 219 bin_total = np.bincount(binids, minlength=len(bins)) 220 221 nonzero = bin_total != 0 222 return bin_total[nonzero] 223 224 225def calibration_error(y_true, y_prob, n_bins=10, method="ECE"): 226 """ 227 Compute calibration error given true targets and predicted probabilities. 228 Calibration curves may also be referred to as reliability diagrams. 229 230 Parameters 231 ---------- 232 y_true : array, shape (n_samples,) 233 True targets. 234 235 y_prob : array, shape (n_samples,) 236 Probabilities of the positive class. 237 238 method : string, default='ECE', {'ECE', 'MCE', 'BS'} 239 Which method to be used to compute calibration error. 240 241 n_bins : int 242 Number of bins. Note that a bigger number requires more data. 243 244 Returns 245 ------- 246 float 247 calibration error score 248 """ 249 250 # compute fraction of positive cases per y_prob in a bin. 251 # See scikit-learn documentation for details 252 fraction_of_positives, mean_predicted_value = calibration_curve( 253 y_true, y_prob, normalize=False, n_bins=n_bins, strategy="uniform" 254 ) 255 256 if method == "ECE": 257 hist_count = _counts_per_bin(y_prob, n_bins) 258 return np.sum( 259 hist_count * np.abs(fraction_of_positives - mean_predicted_value) 260 ) / np.sum(hist_count) 261 elif method == "MCE": 262 return np.max(np.abs(fraction_of_positives - mean_predicted_value)) 263 elif method == "BS": 264 return brier_score_loss(y_true, y_prob, pos_label=1) 265 else: 266 raise ValueError("Method %s is not defined." % method)
def
local_bias_estimator(X, Y, p, X_grid, model='KRR', kernel_function='rbf', **kwargs):
20def local_bias_estimator(X, Y, p, X_grid, model="KRR", kernel_function="rbf", **kwargs): 21 """ 22 Estimates model bias by calculating bias = Y - model predictions. Assumes model is already close to the oracle model. 23 24 Parameters 25 ---------- 26 X : numpy-array 27 data, of size NxD [N is the number of data points, D is the features dimension] 28 29 Y : numpy-array 30 credible error vector, of size Nx1 [N is the number of data points] 31 32 p : numpy-array 33 probability vector, of size Nx1 [N is the number of data points] 34 35 X_grid : numpy-array 36 - For EWF, X_grid used to build a kernel. This kernel used to calculate bias 37 - Else, X_grid used as "test features" to predict bias 38 39 model : str 40 Indicates model type. Valid Options include: 41 - "KRR": KernelRidge 42 - "SVR": SVR 43 - "EWF": EWF 44 45 kernel_function : str 46 Indicates kernel function type. 47 Refer to sklearn documentation to see which kernel functions are valid for model chosen. 48 49 **kwargs : **kwargs 50 extra parameters passed to `pairwise_kernels()` as kernel parameters 51 52 Returns 53 ------- 54 numpy-array 55 estimated bias 56 """ 57 58 check_attributes(X, Y) 59 60 if model == "KRR": 61 model = KernelRidge(kernel=kernel_function, **kwargs) 62 elif model == "SVR": 63 model = SVR(kernel=kernel_function, **kwargs) 64 elif model == "EWF": 65 K = pairwise_kernels(X, X_grid, metric=kernel_function, **kwargs) 66 p_err = Y - p 67 bias = np.sum(p_err.flatten() * K.T, axis=1) / np.sum(K.T, axis=1) 68 return bias 69 else: 70 raise ValueError( 71 f"Model {model} is not defined. Valid Options include: KRR, SVR, or EWF" 72 ) 73 74 # Use chosen model to predict bias_calibration based on features X. 75 bias_calibration = Y - p 76 model.fit(X, bias_calibration) 77 bias = model.predict(X_grid) 78 return bias
Estimates model bias by calculating bias = Y - model predictions. Assumes model is already close to the oracle model.
Parameters
- X (numpy-array): data, of size NxD [N is the number of data points, D is the features dimension]
- Y (numpy-array): credible error vector, of size Nx1 [N is the number of data points]
- p (numpy-array): probability vector, of size Nx1 [N is the number of data points]
- X_grid (numpy-array):
- For EWF, X_grid used to build a kernel. This kernel used to calculate bias
- Else, X_grid used as "test features" to predict bias
- model (str): Indicates model type. Valid Options include: - "KRR": KernelRidge - "SVR": SVR - "EWF": EWF
- kernel_function (str): Indicates kernel function type. Refer to sklearn documentation to see which kernel functions are valid for model chosen.
- *kwargs (*kwargs):
extra parameters passed to
pairwise_kernels()
as kernel parameters
Returns
- numpy-array: estimated bias
def
construct_credible_error_vector(Y, Yr_up, Yr_down, alpha):
81def construct_credible_error_vector(Y, Yr_up, Yr_down, alpha): 82 """ 83 For a one dimensional output prediction Y, construct the credible error vector. 84 Uses given lower and upper percentiles. Assumes credible level alpha is fixed. 85 86 Parameters 87 ---------- 88 Y : numpy-array 89 data, of size Nx1 [N is the number of data points] 90 91 Yr_up : numpy-array 92 upper percentile vector, of size Nx1 [N is the number of data points] 93 94 Yr_down : numpy-array 95 lower percentile vector, of size Nx1 [N is the number of data points] 96 97 alpha : float 98 the theoretical credible level alpha 99 100 Returns 101 ------- 102 numpy-array 103 Credible Error Vector 104 """ 105 check_credible_vector_args(Y, Yr_up, Yr_down, alpha) 106 107 # indicator of Y less than posterior percentile r 108 Yind = 1.0 * ((Y < Yr_up) * (Y > Yr_down)) 109 110 # percentile/probability error vector 111 e = Yind - alpha 112 113 return e
For a one dimensional output prediction Y, construct the credible error vector. Uses given lower and upper percentiles. Assumes credible level alpha is fixed.
Parameters
- Y (numpy-array): data, of size Nx1 [N is the number of data points]
- Yr_up (numpy-array): upper percentile vector, of size Nx1 [N is the number of data points]
- Yr_down (numpy-array): lower percentile vector, of size Nx1 [N is the number of data points]
- alpha (float): the theoretical credible level alpha
Returns
- numpy-array: Credible Error Vector
def
calibrate( Xtrain, prob_train, Ytrain, Xtest=None, prob_test=None, method='platt', **kwargs):
116def calibrate( 117 Xtrain, prob_train, Ytrain, Xtest=None, prob_test=None, method="platt", **kwargs 118): 119 """ 120 A calibration method that takes the predicted probabilties and positive cases and recalibrate the probabilities. 121 122 Parameters 123 ---------- 124 Xtrain : array, shape (n_samples_train,) 125 Features used for training 126 127 prob_train : array, shape (n_samples_train,) 128 Probabilities of positive class to train a calibration model 129 130 Ytrain : array, shape (n_samples_train,) 131 Values used for training 132 133 Xtest : array, shape (n_samples_test,) 134 Features used for testing 135 136 prob_test : array, shape (n_samples_test,) 137 Probabilities of the positive class to be calibrated (test set). If None it re-calibrate the training set. 138 139 method: string, 'platt', 'isotonic', 'temperature_scaling', 'beta', 'HB', 'BBG', 'ENIR' 140 The method to use for calibration. Can be ‘sigmoid’ which corresponds to Platt’s method 141 (i.e. a logistic regression model) or ‘isotonic’ which is a non-parametric approach. 142 It is not advised to use isotonic calibration with too few calibration samples (<<1000) since it tends to overfit. 143 144 **kwargs 145 Additional Args used to fit KRR or EWF 146 147 Returns 148 ------- 149 array, shape (n_bins,) 150 The calibrated error for test set. (p_calibrated) 151 152 """ 153 probs = prob_train[:, np.newaxis] if prob_test is None else prob_test[:, np.newaxis] 154 Xtest = Xtrain if Xtest is None else Xtest 155 156 if method == "platt": 157 model = LogisticRegression() 158 model.fit(prob_train[:, np.newaxis], Ytrain) # LR needs X to be 2-dimensional 159 p_calibrated = model.predict_proba(probs)[:, 1] 160 161 elif method == "isotonic": 162 model = IsotonicRegression(out_of_bounds="clip") 163 model.fit(prob_train, Ytrain) # LR needs X to be 2-dimensional 164 p_calibrated = model.transform(probs.flatten()) 165 166 elif method == "temperature_scaling": 167 model = TemperatureScaling() 168 model.fit(prob_train, Ytrain) 169 p_calibrated = model.transform(probs) 170 171 elif method == "beta": 172 model = BetaCalibration() 173 model.fit(prob_train, Ytrain) 174 p_calibrated = model.transform(probs) 175 176 elif method == "HB": 177 model = HistogramBinning() 178 model.fit(prob_train, Ytrain) 179 p_calibrated = model.transform(probs) 180 181 elif method == "BBQ": 182 model = BBQ() 183 model.fit(prob_train, Ytrain) 184 p_calibrated = model.transform(probs) 185 186 elif method == "ENIR": 187 model = ENIR() 188 model.fit(prob_train, Ytrain) 189 p_calibrated = model.transform(probs) 190 191 elif method == "KRR": 192 model = KRR_calibration() 193 model.fit(Xtrain, prob_train, Ytrain, **kwargs) 194 p_calibrated = model.predict(Xtest, probs.flatten(), mode="prob") 195 196 elif method == "EWF": 197 model = EWF_calibration() 198 model.fit(Xtrain, prob_train, Ytrain, **kwargs) 199 p_calibrated = model.predict(Xtest, probs.flatten(), mode="prob") 200 201 else: 202 raise ValueError("Method %s is not defined." % method) 203 204 p_calibrated[np.isnan(p_calibrated)] = 0 205 206 # normalize the large numbers and small numbers to one and zero 207 # p_calibrated[p_calibrated > 1.0] = 1.0 208 # p_calibrated[p_calibrated < 0.0] = 0.0 209 210 return p_calibrated # f-hat -- f(x) = oracle = f-hat + b(x)
A calibration method that takes the predicted probabilties and positive cases and recalibrate the probabilities.
Parameters
- Xtrain (array, shape (n_samples_train,)): Features used for training
- prob_train (array, shape (n_samples_train,)): Probabilities of positive class to train a calibration model
- Ytrain (array, shape (n_samples_train,)): Values used for training
- Xtest (array, shape (n_samples_test,)): Features used for testing
- prob_test (array, shape (n_samples_test,)): Probabilities of the positive class to be calibrated (test set). If None it re-calibrate the training set.
- method (string, 'platt', 'isotonic', 'temperature_scaling', 'beta', 'HB', 'BBG', 'ENIR'): The method to use for calibration. Can be ‘sigmoid’ which corresponds to Platt’s method (i.e. a logistic regression model) or ‘isotonic’ which is a non-parametric approach. It is not advised to use isotonic calibration with too few calibration samples (<<1000) since it tends to overfit.
- **kwargs: Additional Args used to fit KRR or EWF
Returns
- array, shape (n_bins,): The calibrated error for test set. (p_calibrated)
def
calibration_error(y_true, y_prob, n_bins=10, method='ECE'):
226def calibration_error(y_true, y_prob, n_bins=10, method="ECE"): 227 """ 228 Compute calibration error given true targets and predicted probabilities. 229 Calibration curves may also be referred to as reliability diagrams. 230 231 Parameters 232 ---------- 233 y_true : array, shape (n_samples,) 234 True targets. 235 236 y_prob : array, shape (n_samples,) 237 Probabilities of the positive class. 238 239 method : string, default='ECE', {'ECE', 'MCE', 'BS'} 240 Which method to be used to compute calibration error. 241 242 n_bins : int 243 Number of bins. Note that a bigger number requires more data. 244 245 Returns 246 ------- 247 float 248 calibration error score 249 """ 250 251 # compute fraction of positive cases per y_prob in a bin. 252 # See scikit-learn documentation for details 253 fraction_of_positives, mean_predicted_value = calibration_curve( 254 y_true, y_prob, normalize=False, n_bins=n_bins, strategy="uniform" 255 ) 256 257 if method == "ECE": 258 hist_count = _counts_per_bin(y_prob, n_bins) 259 return np.sum( 260 hist_count * np.abs(fraction_of_positives - mean_predicted_value) 261 ) / np.sum(hist_count) 262 elif method == "MCE": 263 return np.max(np.abs(fraction_of_positives - mean_predicted_value)) 264 elif method == "BS": 265 return brier_score_loss(y_true, y_prob, pos_label=1) 266 else: 267 raise ValueError("Method %s is not defined." % method)
Compute calibration error given true targets and predicted probabilities. Calibration curves may also be referred to as reliability diagrams.
Parameters
- y_true (array, shape (n_samples,)): True targets.
- y_prob (array, shape (n_samples,)): Probabilities of the positive class.
- method (string, default='ECE', {'ECE', 'MCE', 'BS'}): Which method to be used to compute calibration error.
- n_bins (int): Number of bins. Note that a bigger number requires more data.
Returns
- float: calibration error score