Source code for nannyml.sampling_error.regression

```#  Author:   Niels Nuyttens  <niels@nannyml.com>
#

from typing import Tuple

import numpy as np
import pandas as pd

[docs]def mae_sampling_error_components(y_true_reference: pd.Series, y_pred_reference: pd.Series) -> Tuple:
"""
Calculate sampling error components for Mean Absolute Error (MAE) using reference data.

Parameters
----------
y_true_reference: pd.Series
Target values for the reference dataset.
y_pred_reference: pd.Series
Predictions for the reference dataset.

Returns
-------
(std,): Tuple[np.ndarray]
"""
std = np.std(np.abs(y_true_reference - y_pred_reference))
return (std,)

[docs]def mae_sampling_error(sampling_error_components, data) -> float:
"""
Calculate the specificity sampling error for a chunk of data.

Parameters
----------
sampling_error_components : a set of parameters that were derived from reference data.
data : the (analysis) data you want to calculate or estimate a metric for.

Returns
-------
sampling_error: float

"""
return sampling_error_components[0] / np.sqrt(len(data))

[docs]def mape_sampling_error_components(y_true_reference: pd.Series, y_pred_reference: pd.Series) -> Tuple:
"""
Calculate sampling error components for Mean Absolute Percentage Error (MAPE) using reference data.

Parameters
----------
y_true_reference: pd.Series
Target values for the reference dataset.
y_pred_reference: pd.Series
Predictions for the reference dataset.

Returns
-------
(std,): Tuple[np.ndarray]
"""
std = np.std(np.abs(y_true_reference - y_pred_reference) / y_true_reference)
return (std,)

[docs]def mape_sampling_error(sampling_error_components, data) -> float:
"""
Calculate the specificity sampling error for a chunk of data.

Parameters
----------
sampling_error_components : a set of parameters that were derived from reference data.
data : the (analysis) data you want to calculate or estimate a metric for.

Returns
-------
sampling_error: float

"""
return sampling_error_components[0] / np.sqrt(len(data))

[docs]def mse_sampling_error_components(y_true_reference: pd.Series, y_pred_reference: pd.Series) -> Tuple:
"""
Calculate sampling error components for Mean Squared Error (MSE) using reference data.

Parameters
----------
y_true_reference: pd.Series
Target values for the reference dataset.
y_pred_reference: pd.Series
Predictions for the reference dataset.

Returns
-------
(std,): Tuple[np.ndarray]
"""
std = np.std((y_true_reference - y_pred_reference) ** 2)
return (std,)

[docs]def mse_sampling_error(sampling_error_components, data) -> float:
"""
Calculate the specificity sampling error for a chunk of data.

Parameters
----------
sampling_error_components : a set of parameters that were derived from reference data.
data : the (analysis) data you want to calculate or estimate a metric for.

Returns
-------
sampling_error: float

"""
return sampling_error_components[0] / np.sqrt(len(data))

[docs]def msle_sampling_error_components(y_true_reference: pd.Series, y_pred_reference: pd.Series) -> Tuple:
"""
Calculate sampling error components for Mean Squared Logarithmic Error (MSLE) using reference data.

Parameters
----------
y_true_reference: pd.Series
Target values for the reference dataset.
y_pred_reference: pd.Series
Predictions for the reference dataset.

Returns
-------
(std,): Tuple[np.ndarray]
"""
std = np.std((np.log(1 + y_true_reference) - np.log(1 + y_pred_reference)) ** 2)
return (std,)

[docs]def msle_sampling_error(sampling_error_components, data) -> float:
"""
Calculate the specificity sampling error for a chunk of data.

Parameters
----------
sampling_error_components : a set of parameters that were derived from reference data.
data : the (analysis) data you want to calculate or estimate a metric for.

Returns
-------
sampling_error: float

"""
return sampling_error_components[0] / np.sqrt(len(data))

[docs]def rmse_sampling_error_components(y_true_reference: pd.Series, y_pred_reference: pd.Series) -> Tuple:
"""
Calculate sampling error components for Root Mean Squared Logarithmic Error (RMSLE) using reference data.

Parameters
----------
y_true_reference: pd.Series
Target values for the reference dataset.
y_pred_reference: pd.Series
Predictions for the reference dataset.

Returns
-------
(std,): Tuple[np.ndarray]
"""
squared_error = (y_true_reference - y_pred_reference) ** 2
squared_error_std = np.std(squared_error)
squared_error_mean = np.mean(squared_error)
return squared_error_std, squared_error_mean

[docs]def rmse_sampling_error(sampling_error_components, data) -> float:
"""
Calculate the specificity sampling error for a chunk of data.

Parameters
----------
sampling_error_components : a set of parameters that were derived from reference data.
data : the (analysis) data you want to calculate or estimate a metric for.

Returns
-------
sampling_error: float

"""
squared_error_std, squared_error_mean = sampling_error_components
return np.sqrt((squared_error_std**2) / (4 * len(data) * squared_error_mean))

[docs]def rmsle_sampling_error_components(y_true_reference: pd.Series, y_pred_reference: pd.Series) -> Tuple:
"""
Calculate sampling error components for Mean Absolute Error (MAE) using reference data.

Parameters
----------
y_true_reference: pd.Series
Target values for the reference dataset.
y_pred_reference: pd.Series
Predictions for the reference dataset.

Returns
-------
(std,): Tuple[np.ndarray]
"""
squared_log_error = (np.log(1 + y_true_reference) - np.log(1 + y_pred_reference)) ** 2
squared_log_error_std = np.std(squared_log_error)
squared_log_error_mean = np.mean(squared_log_error)
return squared_log_error_std, squared_log_error_mean

[docs]def rmsle_sampling_error(sampling_error_components, data) -> float:
"""
Calculate the specificity sampling error for a chunk of data.

Parameters
----------
sampling_error_components : a set of parameters that were derived from reference data.
data : the (analysis) data you want to calculate or estimate a metric for.

Returns
-------
sampling_error: float

"""
squared_log_error_std, squared_log_error_mean = sampling_error_components
return np.sqrt((squared_log_error_std**2) / (4 * len(data) * squared_log_error_mean))
```