# Author: Niels Nuyttens <niels@nannyml.com>
# Jakub Bialek <jakub@nannyml.com>
#
# License: Apache Software License 2.0
"""Module containing functions to estimate sampling error for binary classification metrics.
The implementation of the sampling error estimation is split into two functions.
The first function is called during fitting and will calculate the sampling error components based the reference data.
Most of the time these will be the standard deviation of the distribution of differences between
``y_true`` and ``y_pred`` and the fraction of positive labels in ``y_true``.
The second function will be called during calculation or estimation. It takes the predetermined error components and
combines them with the size of the (analysis) data to give an estimate for the sampling error.
"""
from typing import Optional, Tuple, Union
import numpy as np
import pandas as pd
from sklearn.metrics import average_precision_score
from nannyml.exceptions import InvalidArgumentsException
# How many experiments to perform when doing resampling to approximate sampling error.
N_EXPERIMENTS = 50
# Max resample size - we don't need full reference if it is too big.
MAX_RESAMPLE_SIZE = 50_000
def _universal_sampling_error(reference_std, reference_fraction, data):
return reference_std / np.sqrt(len(data) * reference_fraction)
[docs]def auroc_sampling_error_components(y_true_reference: pd.Series, y_pred_proba_reference: pd.Series) -> Tuple:
"""
Calculate sampling error components for AUROC using reference data.
Calculation is based on the Variance Sum Law and expressing AUROC as a Mann-Whitney U statistic.
Parameters
----------
y_true_reference: pd.Series
Target values for the reference dataset.
y_pred_proba_reference: pd.Series
Prediction values for the reference dataset.
Returns
-------
(std, fraction): Tuple[np.ndarray, float]
"""
y_true = y_true_reference.copy().reset_index(drop=True)
y_pred_proba = y_pred_proba_reference.copy().reset_index(drop=True)
if np.mean(y_true) > 0.5:
y_true = abs(np.asarray(y_true) - 1)
y_pred_proba = 1 - y_pred_proba
sorted_idx = np.argsort(y_pred_proba)
y_pred_proba = y_pred_proba[sorted_idx]
y_true = y_true[sorted_idx]
rank_order = np.asarray(range(len(y_pred_proba)))
positive_ranks = y_true * rank_order
indexes = np.unique(positive_ranks)[1:]
ser = []
for i, index in enumerate(indexes):
ser.append(index - i)
n_pos = np.sum(y_true)
n_neg = len(y_true) - n_pos
ser_multi = np.true_divide(ser, n_neg)
fraction = n_pos / len(y_true)
return np.std(ser_multi), fraction
[docs]def auroc_sampling_error(sampling_error_components, data):
"""
Calculate the AUROC sampling error for a chunk of data.
Parameters
----------
sampling_error_components : a set of parameters that were derived from reference data.
data : the (analysis) data you want to calculate or estimate a metric for.
Returns
-------
sampling_error: float
"""
reference_std, reference_fraction = sampling_error_components
return _universal_sampling_error(reference_std, reference_fraction, data)
[docs]def ap_sampling_error_components(
y_true_reference: pd.Series, y_pred_proba_reference: pd.Series
) -> Tuple[np.ndarray, int]:
"""
Calculate sampling error components for AP using reference data.
Calculation is done by calculating the sampling error on reference data and extrapolating
for different sizes using 1/sqrt(n) approximation.
Parameters
----------
y_true_reference: pd.Series
Target values for the reference dataset.
y_pred_proba_reference: pd.Series
Prediction values for the reference dataset.
Returns
-------
(std, sample_size): Tuple[np.ndarray, int]
Note that the sampling error component are different than usual!
"""
# we don't need all reference if it's big (save compute)
sample_size = np.minimum(y_true_reference.shape[0] // 2, MAX_RESAMPLE_SIZE)
y_true_reference = y_true_reference.to_numpy()
y_pred_proba_reference = y_pred_proba_reference.to_numpy()
ap_results = []
for _ in range(N_EXPERIMENTS):
_indexes_for_sample = np.random.choice(y_true_reference.shape[0], sample_size, replace=True)
sample_y_true_reference = y_true_reference[_indexes_for_sample]
sample_y_pred_proba_reference = y_pred_proba_reference[_indexes_for_sample]
ap_results.append(average_precision_score(sample_y_true_reference, sample_y_pred_proba_reference))
return np.std(ap_results), sample_size
[docs]def ap_sampling_error(sampling_error_components, data):
"""
Calculate the AUROC sampling error for a chunk of data.
if first component is NaN (due to data quality) result will be nan
Parameters
----------
sampling_error_components : a set of parameters that were derived from reference data.
data : the (analysis) data you want to calculate or estimate a metric for.
Returns
-------
sampling_error: float
"""
reference_std, reference_size = sampling_error_components
analysis_size = data.shape[0]
return reference_std * np.sqrt(reference_size / analysis_size)
[docs]def f1_sampling_error_components(y_true_reference: pd.Series, y_pred_reference: pd.Series) -> Tuple:
"""
Calculate sampling error components for F1 using reference data.
Calculation is based on modified standard error of mean formula.
Parameters
----------
y_true_reference: pd.Series
Target values for the reference dataset.
y_pred_reference: pd.Series
Predictions for the reference dataset.
Returns
-------
(std, fraction): Tuple[np.ndarray, float]
"""
TP = np.where((y_true_reference == y_pred_reference) & (y_pred_reference == 1), 1, np.nan)
FP = np.where((y_true_reference != y_pred_reference) & (y_pred_reference == 1), 0, np.nan)
FN = np.where((y_true_reference != y_pred_reference) & (y_pred_reference == 0), 0, np.nan)
TP = TP[~np.isnan(TP)]
FN = FN[~np.isnan(FN)]
FP = FP[~np.isnan(FP)]
tp_fp_fn = np.concatenate([TP, FN, FP])
correcting_factor = len(tp_fp_fn) / ((len(FN) + len(FP)) * 0.5 + len(TP))
obs_level_f1 = tp_fp_fn * correcting_factor
fraction_of_relevant = len(tp_fp_fn) / len(y_pred_reference)
return np.std(obs_level_f1), fraction_of_relevant
[docs]def f1_sampling_error(sampling_error_components, data):
"""
Calculate the F1 sampling error for a chunk of data.
Parameters
----------
sampling_error_components : a set of parameters that were derived from reference data.
data : the (analysis) data you want to calculate or estimate a metric for.
Returns
-------
sampling_error: float
"""
reference_std, reference_fraction = sampling_error_components
return _universal_sampling_error(reference_std, reference_fraction, data)
[docs]def precision_sampling_error_components(y_true_reference: pd.Series, y_pred_reference: pd.Series) -> Tuple:
"""
Calculate sampling error components for precision using reference data.
Calculation is based on modified standard error of mean formula.
Parameters
----------
y_true_reference: pd.Series
Target values for the reference dataset.
y_pred_reference: pd.Series
Predictions for the reference dataset.
Returns
-------
(std, fraction): Tuple[np.ndarray, float]
"""
TP = np.where((y_true_reference == y_pred_reference) & (y_pred_reference == 1), 1, np.nan)
FP = np.where((y_true_reference != y_pred_reference) & (y_pred_reference == 1), 0, np.nan)
TP = TP[~np.isnan(TP)]
FP = FP[~np.isnan(FP)]
obs_level_precision = np.concatenate([TP, FP])
amount_positive_pred = np.sum(y_pred_reference)
fraction_of_pos_pred = amount_positive_pred / len(y_pred_reference)
return np.std(obs_level_precision), fraction_of_pos_pred
[docs]def precision_sampling_error(sampling_error_components, data):
"""
Calculate the precision sampling error for a chunk of data.
Parameters
----------
sampling_error_components : a set of parameters that were derived from reference data.
data : the (analysis) data you want to calculate or estimate a metric for.
Returns
-------
sampling_error: float
"""
reference_std, reference_fraction = sampling_error_components
return _universal_sampling_error(reference_std, reference_fraction, data)
[docs]def recall_sampling_error_components(y_true_reference: pd.Series, y_pred_reference: pd.Series) -> Tuple:
"""
Calculate sampling error components for recall using reference data.
Calculation is based on modified standard error of mean formula.
Parameters
----------
y_true_reference: pd.Series
Target values for the reference dataset.
y_pred_reference: pd.Series
Predictions for the reference dataset.
Returns
-------
(std, fraction): Tuple[np.ndarray, float]
"""
TP = np.where((y_true_reference == y_pred_reference) & (y_pred_reference == 1), 1, np.nan)
FN = np.where((y_true_reference != y_pred_reference) & (y_pred_reference == 0), 0, np.nan)
TP = TP[~np.isnan(TP)]
FN = FN[~np.isnan(FN)]
obs_level_recall = np.concatenate([TP, FN])
fraction_of_relevant = sum(y_true_reference) / len(y_pred_reference)
return np.std(obs_level_recall), fraction_of_relevant
[docs]def recall_sampling_error(sampling_error_components, data):
"""
Calculate the recall sampling error for a chunk of data.
Parameters
----------
sampling_error_components : a set of parameters that were derived from reference data.
data : the (analysis) data you want to calculate or estimate a metric for.
Returns
-------
sampling_error: float
"""
reference_std, reference_fraction = sampling_error_components
return _universal_sampling_error(reference_std, reference_fraction, data)
[docs]def specificity_sampling_error_components(y_true_reference: pd.Series, y_pred_reference: pd.Series) -> Tuple:
"""
Calculate sampling error components for specificity using reference data.
Calculation is based on modified standard error of mean formula.
Parameters
----------
y_true_reference: pd.Series
Target values for the reference dataset.
y_pred_reference: pd.Series
Predictions for the reference dataset.
Returns
-------
(std, fraction): Tuple[np.ndarray, float]
"""
TN = np.where((y_true_reference == y_pred_reference) & (y_pred_reference == 0), 1, np.nan)
FP = np.where((y_true_reference != y_pred_reference) & (y_pred_reference == 1), 0, np.nan)
TN = TN[~np.isnan(TN)]
FP = FP[~np.isnan(FP)]
obs_level_specificity = np.concatenate([TN, FP])
fraction_of_relevant = len(obs_level_specificity) / len(y_pred_reference)
return np.std(obs_level_specificity), fraction_of_relevant
[docs]def specificity_sampling_error(sampling_error_components, data):
"""
Calculate the specificity sampling error for a chunk of data.
Parameters
----------
sampling_error_components : a set of parameters that were derived from reference data.
data : the (analysis) data you want to calculate or estimate a metric for.
Returns
-------
sampling_error: float
"""
reference_std, reference_fraction = sampling_error_components
return _universal_sampling_error(reference_std, reference_fraction, data)
[docs]def accuracy_sampling_error_components(y_true_reference: pd.Series, y_pred_reference: pd.Series) -> Tuple:
"""
Calculate sampling error components for accuracy using reference data.
Calculation is based on modified standard error of mean formula.
Parameters
----------
y_true_reference: pd.Series
Target values for the reference dataset.
y_pred_reference: pd.Series
Predictions for the reference dataset.
Returns
-------
(std,): Tuple[np.ndarray]
"""
y_true_reference = np.asarray(y_true_reference).astype(int)
y_pred_reference = np.asarray(y_pred_reference).astype(int)
correct_table = (y_true_reference == y_pred_reference).astype(int)
return (np.std(correct_table),)
[docs]def accuracy_sampling_error(sampling_error_components: Tuple, data) -> float:
"""
Calculate the accuracy sampling error for a chunk of data.
Parameters
----------
sampling_error_components : a set of parameters that were derived from reference data.
data : the (analysis) data you want to calculate or estimate a metric for.
Returns
-------
sampling_error: float
"""
(reference_std,) = sampling_error_components
return reference_std / np.sqrt(len(data))
[docs]def true_positive_sampling_error_components(
y_true_reference: pd.Series, y_pred_reference: pd.Series, normalize_confusion_matrix: Union[str, None]
) -> Tuple:
"""
Estimate sampling error components for true positive rate using reference data.
Calculation is based on modified standard error of mean formula.
Parameters
----------
y_true_reference: pd.Series
Target values for the reference dataset.
y_pred_reference: pd.Series
Predictions for the reference dataset.
normalize_confusion_matrix: str
The type of normalization to apply to the confusion matrix.
Returns
-------
(std, relevant_proportion, norm_type): Tuple[float, float, str]
"""
y_true_reference = np.asarray(y_true_reference).astype(int)
y_pred_reference = np.asarray(y_pred_reference).astype(int)
obs_level_tp = np.where((y_true_reference == y_pred_reference) & (y_pred_reference == 1), 1, 0)
obs_level_fn = np.where((y_true_reference != y_pred_reference) & (y_pred_reference == 0), 1, 0)
obs_level_fp = np.where((y_true_reference != y_pred_reference) & (y_pred_reference == 1), 1, 0)
num_tp = np.sum(obs_level_tp)
num_fn = np.sum(obs_level_fn)
num_fp = np.sum(obs_level_fp)
if normalize_confusion_matrix is None:
std = np.std(obs_level_tp)
relevant_proportion = 1
elif normalize_confusion_matrix == "all":
std = np.std(obs_level_tp)
relevant_proportion = 1
elif normalize_confusion_matrix == "true":
number_of_real_positives = num_fn + num_tp
proportion_of_real_positives = number_of_real_positives / len(y_true_reference)
obs_level_tp = np.concatenate([np.ones(num_tp), np.zeros(number_of_real_positives - num_tp)])
std = np.std(obs_level_tp)
relevant_proportion = proportion_of_real_positives
elif normalize_confusion_matrix == "pred":
number_of_pred_positives = num_fp + num_tp
proportion_of_pred_positives = number_of_pred_positives / len(y_true_reference)
obs_level_tp = np.concatenate([np.ones(num_tp), np.zeros(number_of_pred_positives - num_tp)])
std = np.std(obs_level_tp)
relevant_proportion = proportion_of_pred_positives
else:
raise InvalidArgumentsException(
f"'normalize_confusion_matrix' should be None, 'true', 'pred' or 'all' "
f"but got '{normalize_confusion_matrix}"
)
return std, relevant_proportion, normalize_confusion_matrix
[docs]def true_positive_sampling_error(sampling_error_components: Tuple, data) -> float:
"""
Calculate the true positive rate sampling error for a chunk of data.
Parameters
----------
sampling_error_components : a set of parameters that were derived from reference data.
data : the (analysis) data you want to calculate or estimate a metric for.
Returns
-------
sampling_error: float
"""
(reference_std, relevant_proportion, norm_type) = sampling_error_components
if norm_type is None:
tp_standard_error = (reference_std / np.sqrt(len(data))) * len(data)
elif norm_type == "all":
tp_standard_error = reference_std / np.sqrt(len(data))
elif norm_type == "true" or norm_type == "pred":
tp_standard_error = reference_std / np.sqrt(len(data) * relevant_proportion)
else:
raise InvalidArgumentsException(
f"'normalize_confusion_matrix' should be None, 'true', 'pred' or 'all' " f"but got '{norm_type}"
)
return tp_standard_error
[docs]def true_negative_sampling_error_components(
y_true_reference: pd.Series, y_pred_reference: pd.Series, normalize_confusion_matrix: Union[str, None]
) -> Tuple:
"""
Estimate sampling error components for true negative rate using reference data.
Calculation is based on modified standard error of mean formula.
Parameters
----------
y_true_reference: pd.Series
Target values for the reference dataset.
y_pred_reference: pd.Series
Predictions for the reference dataset.
normalize_confusion_matrix: str
The type of normalization to apply to the confusion matrix.
Returns
-------
(std, relevant_proportion, norm_type): Tuple[float, float, str]
"""
y_true_reference = np.asarray(y_true_reference).astype(int)
y_pred_reference = np.asarray(y_pred_reference).astype(int)
obs_level_tn = np.where((y_true_reference == y_pred_reference) & (y_pred_reference == 0), 1, 0)
obs_level_fn = np.where((y_true_reference != y_pred_reference) & (y_pred_reference == 0), 1, 0)
obs_level_fp = np.where((y_true_reference != y_pred_reference) & (y_pred_reference == 1), 1, 0)
num_tn = np.sum(obs_level_tn)
num_fn = np.sum(obs_level_fn)
num_fp = np.sum(obs_level_fp)
if normalize_confusion_matrix is None:
std = np.std(obs_level_tn)
relevant_proportion = 1
elif normalize_confusion_matrix == "all":
std = np.std(obs_level_tn)
relevant_proportion = 1
elif normalize_confusion_matrix == "true":
number_of_real_negatives = num_fp + num_tn
proportion_of_real_negatives = number_of_real_negatives / len(y_true_reference)
obs_level_tn = np.concatenate([np.ones(num_tn), np.zeros(number_of_real_negatives - num_tn)])
std = np.std(obs_level_tn)
relevant_proportion = proportion_of_real_negatives
elif normalize_confusion_matrix == "pred":
number_of_pred_negatives = num_fn + num_tn
proportion_of_pred_negatives = number_of_pred_negatives / len(y_true_reference)
obs_level_tn = np.concatenate([np.ones(num_tn), np.zeros(number_of_pred_negatives - num_tn)])
std = np.std(obs_level_tn)
relevant_proportion = proportion_of_pred_negatives
else:
raise InvalidArgumentsException(
f"'normalize_confusion_matrix' should be None, 'true', 'pred' or 'all' "
f"but got '{normalize_confusion_matrix}"
)
return std, relevant_proportion, normalize_confusion_matrix
[docs]def true_negative_sampling_error(sampling_error_components: Tuple, data) -> float:
"""
Calculate the true negative rate sampling error for a chunk of data.
Parameters
----------
sampling_error_components : a set of parameters that were derived from reference data.
data : the (analysis) data you want to calculate or estimate a metric for.
Returns
-------
sampling_error: float
"""
(reference_std, relevant_proportion, norm_type) = sampling_error_components
if norm_type is None:
tn_standard_error = (reference_std / np.sqrt(len(data))) * len(data)
elif norm_type == "all":
tn_standard_error = reference_std / np.sqrt(len(data))
elif norm_type == "true" or norm_type == "pred":
tn_standard_error = reference_std / np.sqrt(len(data) * relevant_proportion)
else:
raise InvalidArgumentsException(
f"'normalize_confusion_matrix' should be None, 'true', 'pred' or 'all' " f"but got '{norm_type}"
)
return tn_standard_error
[docs]def false_positive_sampling_error_components(
y_true_reference: pd.Series, y_pred_reference: pd.Series, normalize_confusion_matrix: Union[str, None]
) -> Tuple:
"""
Estimate sampling error components for false positive rate using reference data.
Calculation is based on modified standard error of mean formula.
Parameters
----------
y_true_reference: pd.Series
Target values for the reference dataset.
y_pred_reference: pd.Series
Predictions for the reference dataset.
normalize_confusion_matrix: str
The type of normalization to apply to the confusion matrix.
Returns
-------
(std, relevant_proportion, norm_type): Tuple[float, float, str]
"""
y_true_reference = np.asarray(y_true_reference).astype(int)
y_pred_reference = np.asarray(y_pred_reference).astype(int)
obs_level_tp = np.where((y_true_reference == y_pred_reference) & (y_pred_reference == 1), 1, 0)
obs_level_tn = np.where((y_true_reference == y_pred_reference) & (y_pred_reference == 0), 1, 0)
obs_level_fp = np.where((y_true_reference != y_pred_reference) & (y_pred_reference == 1), 1, 0)
num_tp = np.sum(obs_level_tp)
num_tn = np.sum(obs_level_tn)
num_fp = np.sum(obs_level_fp)
if normalize_confusion_matrix is None:
std = np.std(obs_level_fp)
relevant_proportion = 1
elif normalize_confusion_matrix == "all":
std = np.std(obs_level_fp)
relevant_proportion = 1
elif normalize_confusion_matrix == "true":
number_of_real_negatives = num_fp + num_tn
proportion_of_real_negatives = number_of_real_negatives / len(y_true_reference)
obs_level_fp = np.concatenate([np.ones(num_fp), np.zeros(number_of_real_negatives - num_fp)])
std = np.std(obs_level_fp)
relevant_proportion = proportion_of_real_negatives
elif normalize_confusion_matrix == "pred":
number_of_pred_positives = num_fp + num_tp
proportion_of_pred_positives = number_of_pred_positives / len(y_true_reference)
obs_level_fp = np.concatenate([np.ones(num_fp), np.zeros(number_of_pred_positives - num_fp)])
std = np.std(obs_level_fp)
relevant_proportion = proportion_of_pred_positives
else:
raise InvalidArgumentsException(
f"'normalize_confusion_matrix' should be None, 'true', 'pred' or 'all' "
f"but got '{normalize_confusion_matrix}"
)
return std, relevant_proportion, normalize_confusion_matrix
[docs]def false_positive_sampling_error(sampling_error_components: Tuple, data) -> float:
"""
Calculate the false positive rate sampling error for a chunk of data.
Parameters
----------
sampling_error_components : a set of parameters that were derived from reference data.
data : the (analysis) data you want to calculate or estimate a metric for.
Returns
-------
sampling_error: float
"""
(reference_std, relevant_proportion, norm_type) = sampling_error_components
if norm_type is None:
fp_standard_error = (reference_std / np.sqrt(len(data))) * len(data)
elif norm_type == "all":
fp_standard_error = reference_std / np.sqrt(len(data))
elif norm_type == "true" or norm_type == "pred":
fp_standard_error = reference_std / np.sqrt(len(data) * relevant_proportion)
else:
raise InvalidArgumentsException(
f"'normalize_confusion_matrix' should be None, 'true', 'pred' or 'all' " f"but got '{norm_type}"
)
return fp_standard_error
[docs]def false_negative_sampling_error_components(
y_true_reference: pd.Series, y_pred_reference: pd.Series, normalize_confusion_matrix: Union[str, None]
) -> Tuple:
"""
Estimate sampling error components for false negative rate using reference data.
Calculation is based on modified standard error of mean formula.
Parameters
----------
y_true_reference: pd.Series
Target values for the reference dataset.
y_pred_reference: pd.Series
Predictions for the reference dataset.
normalize_confusion_matrix: str
The type of normalization to apply to the confusion matrix.
Returns
-------
(std, relevant_proportion, norm_type): Tuple[float, float, str]
"""
y_true_reference = np.asarray(y_true_reference).astype(int)
y_pred_reference = np.asarray(y_pred_reference).astype(int)
obs_level_tp = np.where((y_true_reference == y_pred_reference) & (y_pred_reference == 1), 1, 0)
obs_level_tn = np.where((y_true_reference == y_pred_reference) & (y_pred_reference == 0), 1, 0)
obs_level_fn = np.where((y_true_reference != y_pred_reference) & (y_pred_reference == 0), 1, 0)
num_tp = np.sum(obs_level_tp)
num_tn = np.sum(obs_level_tn)
num_fn = np.sum(obs_level_fn)
if normalize_confusion_matrix is None:
std = np.std(obs_level_fn)
relevant_proportion = 1
elif normalize_confusion_matrix == "all":
std = np.std(obs_level_fn)
relevant_proportion = 1 # Could be None, None
elif normalize_confusion_matrix == "true":
number_of_real_positives = num_fn + num_tp
proportion_of_real_positives = number_of_real_positives / len(y_true_reference)
obs_level_fn = np.concatenate([np.ones(num_fn), np.zeros(number_of_real_positives - num_fn)])
std = np.std(obs_level_fn)
relevant_proportion = proportion_of_real_positives
elif normalize_confusion_matrix == "pred":
number_of_pred_negatives = num_fn + num_tn
proportion_of_pred_negatives = number_of_pred_negatives / len(y_true_reference)
obs_level_fn = np.concatenate([np.ones(num_fn), np.zeros(number_of_pred_negatives - num_fn)])
std = np.std(obs_level_fn)
relevant_proportion = proportion_of_pred_negatives
else:
raise InvalidArgumentsException(
f"'normalize_confusion_matrix' should be None, 'true', 'pred' or 'all' "
f"but got '{normalize_confusion_matrix}"
)
return std, relevant_proportion, normalize_confusion_matrix
[docs]def false_negative_sampling_error(sampling_error_components: Tuple, data) -> float:
"""
Calculate the false positive rate sampling error for a chunk of data.
Parameters
----------
sampling_error_components : a set of parameters that were derived from reference data.
data : the (analysis) data you want to calculate or estimate a metric for.
Returns
-------
sampling_error: float
"""
(reference_std, relevant_proportion, norm_type) = sampling_error_components
if norm_type is None:
fn_standard_error = (reference_std / np.sqrt(len(data))) * len(data)
elif norm_type == "all":
fn_standard_error = reference_std / np.sqrt(len(data))
elif norm_type == "true" or norm_type == "pred":
fn_standard_error = reference_std / np.sqrt(len(data) * relevant_proportion)
else:
raise InvalidArgumentsException(
f"'normalize_confusion_matrix' should be None, 'true', 'pred' or 'all' " f"but got '{norm_type}"
)
return fn_standard_error
[docs]def business_value_sampling_error_components(
y_true_reference: pd.Series,
y_pred_reference: pd.Series,
business_value_matrix: np.ndarray,
normalize_business_value: Optional[str],
) -> Tuple:
"""
Estimate sampling error for the false negative rate.
Parameters
----------
y_true_reference: pd.Series
Target values for the reference dataset.
y_pred_reference: pd.Series
Predictions for the reference dataset.
business_value_matrix: np.ndarray
A 2x2 matrix of values for the business problem.
Returns
-------
components: tuple
"""
y_true_reference = np.asarray(y_true_reference).astype(int)
y_pred_reference = np.asarray(y_pred_reference).astype(int)
obs_level_tp = np.where((y_true_reference == y_pred_reference) & (y_pred_reference == 1), 1, 0)
obs_level_tn = np.where((y_true_reference == y_pred_reference) & (y_pred_reference == 0), 1, 0)
obs_level_fp = np.where((y_true_reference != y_pred_reference) & (y_pred_reference == 1), 1, 0)
obs_level_fn = np.where((y_true_reference != y_pred_reference) & (y_pred_reference == 0), 1, 0)
combined_and_weighted = (
obs_level_tp * business_value_matrix[1, 1]
+ obs_level_tn * business_value_matrix[0, 0]
+ obs_level_fp * business_value_matrix[0, 1]
+ obs_level_fn * business_value_matrix[1, 0]
)
std = np.std(combined_and_weighted)
return (std, normalize_business_value)
[docs]def business_value_sampling_error(sampling_error_components: Tuple, data) -> float:
"""
Calculate the false positive rate sampling error for a chunk of data.
Parameters
----------
sampling_error_components : a set of parameters that were derived from reference data.
data : the (analysis) data you want to calculate or estimate a metric for.
Returns
-------
sampling_error: float
"""
(reference_std, norm_type) = sampling_error_components
if norm_type is None:
analysis_std = reference_std * len(data)
else: # norm_type must be 'per_prediciton'
analysis_std = reference_std
total_value_standard_error = analysis_std / np.sqrt(len(data))
return total_value_standard_error