Source code for nannyml.sampling_error.regression

#  Author:   Niels Nuyttens  <niels@nannyml.com>
#
#  License: Apache Software License 2.0

from typing import Tuple

import numpy as np
import pandas as pd


[docs]def mae_sampling_error_components(y_true_reference: pd.Series, y_pred_reference: pd.Series) -> Tuple: """ Calculate sampling error components for Mean Absolute Error (MAE) using reference data. Parameters ---------- y_true_reference: pd.Series Target values for the reference dataset. y_pred_reference: pd.Series Predictions for the reference dataset. Returns ------- (std,): Tuple[np.ndarray] """ std = np.std(np.abs(y_true_reference - y_pred_reference)) return (std,)
[docs]def mae_sampling_error(sampling_error_components, data) -> float: """ Calculate Mean Absolute Error (MAE) sampling error for a chunk of data. Parameters ---------- sampling_error_components : a set of parameters that were derived from reference data. data : the (analysis) data you want to calculate or estimate a metric for. Returns ------- sampling_error: float """ return sampling_error_components[0] / np.sqrt(len(data))
[docs]def mape_sampling_error_components(y_true_reference: pd.Series, y_pred_reference: pd.Series) -> Tuple: """ Calculate sampling error components for Mean Absolute Percentage Error (MAPE) using reference data. Parameters ---------- y_true_reference: pd.Series Target values for the reference dataset. y_pred_reference: pd.Series Predictions for the reference dataset. Returns ------- (std,): Tuple[np.ndarray] """ std = np.std(np.abs(y_true_reference - y_pred_reference) / y_true_reference) return (std,)
[docs]def mape_sampling_error(sampling_error_components, data) -> float: """ Calculate Mean Absolute Percentage Error (MAPE) sampling error for a chunk of data. Parameters ---------- sampling_error_components : a set of parameters that were derived from reference data. data : the (analysis) data you want to calculate or estimate a metric for. Returns ------- sampling_error: float """ return sampling_error_components[0] / np.sqrt(len(data))
[docs]def mse_sampling_error_components(y_true_reference: pd.Series, y_pred_reference: pd.Series) -> Tuple: """ Calculate sampling error components for Mean Squared Error (MSE) using reference data. Parameters ---------- y_true_reference: pd.Series Target values for the reference dataset. y_pred_reference: pd.Series Predictions for the reference dataset. Returns ------- (std,): Tuple[np.ndarray] """ std = np.std((y_true_reference - y_pred_reference) ** 2) return (std,)
[docs]def mse_sampling_error(sampling_error_components, data) -> float: """ Calculate Mean Squared Error (MSE) sampling error for a chunk of data. Parameters ---------- sampling_error_components : a set of parameters that were derived from reference data. data : the (analysis) data you want to calculate or estimate a metric for. Returns ------- sampling_error: float """ return sampling_error_components[0] / np.sqrt(len(data))
[docs]def msle_sampling_error_components(y_true_reference: pd.Series, y_pred_reference: pd.Series) -> Tuple: """ Calculate sampling error components for Mean Squared Logarithmic Error (MSLE) using reference data. Parameters ---------- y_true_reference: pd.Series Target values for the reference dataset. y_pred_reference: pd.Series Predictions for the reference dataset. Returns ------- (std,): Tuple[np.ndarray] """ std = np.std((np.log(1 + y_true_reference) - np.log(1 + y_pred_reference)) ** 2) return (std,)
[docs]def msle_sampling_error(sampling_error_components, data) -> float: """ Calculate Mean Squared Logarithmic Error (MSLE) sampling error for a chunk of data. Parameters ---------- sampling_error_components : a set of parameters that were derived from reference data. data : the (analysis) data you want to calculate or estimate a metric for. Returns ------- sampling_error: float """ return sampling_error_components[0] / np.sqrt(len(data))
[docs]def rmse_sampling_error_components(y_true_reference: pd.Series, y_pred_reference: pd.Series) -> Tuple: """ Calculate sampling error components for Root Mean Squared Error (RMSE) using reference data. Parameters ---------- y_true_reference: pd.Series Target values for the reference dataset. y_pred_reference: pd.Series Predictions for the reference dataset. Returns ------- (std,): Tuple[np.ndarray] """ squared_error = (y_true_reference - y_pred_reference) ** 2 squared_error_std = np.std(squared_error) squared_error_mean = np.mean(squared_error) return squared_error_std, squared_error_mean
[docs]def rmse_sampling_error(sampling_error_components, data) -> float: """ Calculate Root Mean Squared Error (RMSE) sampling error for a chunk of data. Parameters ---------- sampling_error_components : a set of parameters that were derived from reference data. data : the (analysis) data you want to calculate or estimate a metric for. Returns ------- sampling_error: float """ squared_error_std, squared_error_mean = sampling_error_components return np.sqrt((squared_error_std**2) / (4 * len(data) * squared_error_mean))
[docs]def rmsle_sampling_error_components(y_true_reference: pd.Series, y_pred_reference: pd.Series) -> Tuple: """ Calculate sampling error components for Root Mean Squared Logarithmic Error (RMSLE) using reference data. Parameters ---------- y_true_reference: pd.Series Target values for the reference dataset. y_pred_reference: pd.Series Predictions for the reference dataset. Returns ------- (std,): Tuple[np.ndarray] """ squared_log_error = (np.log(1 + y_true_reference) - np.log(1 + y_pred_reference)) ** 2 squared_log_error_std = np.std(squared_log_error) squared_log_error_mean = np.mean(squared_log_error) return squared_log_error_std, squared_log_error_mean
[docs]def rmsle_sampling_error(sampling_error_components, data) -> float: """ Calculate Root Mean Squared Logarithmic Error (RMSLE) sampling error for a chunk of data. Parameters ---------- sampling_error_components : a set of parameters that were derived from reference data. data : the (analysis) data you want to calculate or estimate a metric for. Returns ------- sampling_error: float """ squared_log_error_std, squared_log_error_mean = sampling_error_components return np.sqrt((squared_log_error_std**2) / (4 * len(data) * squared_log_error_mean))