Source code for nannyml.performance_estimation.direct_loss_estimation.result

import copy
from typing import Any, Dict, List, Optional, Union

import pandas as pd
from plotly.graph_objects import Figure

from nannyml import Chunker
from nannyml._typing import ProblemType
from nannyml.base import AbstractEstimatorResult
from nannyml.exceptions import InvalidArgumentsException
from nannyml.performance_estimation.direct_loss_estimation.metrics import Metric, MetricFactory
from nannyml.plots._step_plot import _step_plot


[docs]class Result(AbstractEstimatorResult): def __init__( self, results_data: pd.DataFrame, metrics: List[Metric], feature_column_names: List[str], y_pred: str, y_true: str, chunker: Chunker, tune_hyperparameters: bool, hyperparameter_tuning_config: Dict[str, Any], hyperparameters: Optional[Dict[str, Any]], timestamp_column_name: Optional[str] = None, ): super().__init__(results_data) self.metrics = metrics self.feature_column_names = feature_column_names self.y_pred = y_pred self.y_true = y_true self.timestamp_column_name = timestamp_column_name self.chunker = chunker self.tune_hyperparameters = tune_hyperparameters self.hyperparameter_tuning_config = (hyperparameter_tuning_config,) self.hyperparameters = hyperparameters def _filter(self, period: str, metrics: List[str] = None, *args, **kwargs) -> AbstractEstimatorResult: if metrics is None: metrics = [metric.column_name for metric in self.metrics] data = pd.concat([self.data.loc[:, (['chunk'])], self.data.loc[:, (metrics,)]], axis=1) if period != 'all': data = self.data.loc[self.data.loc[:, ('chunk', 'period')] == period, :] data = data.reset_index(drop=True) res = copy.deepcopy(self) res.data = data return res
[docs] def plot( self, kind: str = 'performance', metric: Union[str, Metric] = None, plot_reference: bool = False, *args, **kwargs, ) -> Figure: if kind == 'performance': if metric is None: raise InvalidArgumentsException( "no value for 'metric' given. Please provide the name of a metric to display." ) if isinstance(metric, str): metric = MetricFactory.create( metric, ProblemType.REGRESSION, feature_column_names=self.feature_column_names, y_true=self.y_true, y_pred=self.y_pred, chunker=self.chunker, tune_hyperparameters=self.tune_hyperparameters, hyperparameter_tuning_config=self.hyperparameter_tuning_config, hyperparameters=self.hyperparameters, ) return self._plot_direct_error_estimation_performance(metric, plot_reference) else: raise InvalidArgumentsException(f"unknown plot kind '{kind}'. " f"Please provide on of: ['performance'].")
def _plot_direct_error_estimation_performance(self, metric: Metric, plot_reference: bool) -> Figure: estimation_results = self.to_df(multilevel=False) plot_period_separator = plot_reference estimation_results['estimated'] = True if not plot_reference: estimation_results = estimation_results[estimation_results['chunk_period'] == 'analysis'] # TODO: hack, assembling single results column to pass to plotting, overriding alert cols estimation_results['plottable'] = estimation_results.apply( lambda r: r[f'{metric.column_name}_value'] if r['chunk_period'] == 'analysis' else r[f'{metric.column_name}_realized'], axis=1, ) estimation_results[f'{metric.column_name}_alert'] = estimation_results.apply( lambda r: r[f'{metric.column_name}_alert'] if r['chunk_period'] == 'analysis' else False, axis=1 ) is_time_based_x_axis = self.timestamp_column_name is not None # Plot estimated performance fig = _step_plot( table=estimation_results, metric_column_name='plottable', chunk_column_name='chunk_key', chunk_type_column_name='chunk_period', chunk_index_column_name='chunk_index', start_date_column_name='chunk_start_date' if is_time_based_x_axis else None, end_date_column_name='chunk_end_date' if is_time_based_x_axis else None, chunk_legend_labels=[ f'Reference period (realized {metric.display_name})', f'Analysis period (estimated {metric.display_name})', ], drift_column_name=f'{metric.column_name}_alert', drift_legend_label='Degraded performance', hover_labels=['Chunk', f'{metric.display_name}', 'Target data'], hover_marker_labels=['Reference', 'No change', 'Change'], lower_threshold_column_name=f'{metric.column_name}_lower_threshold', upper_threshold_column_name=f'{metric.column_name}_upper_threshold', threshold_legend_label='Performance threshold', title=f'DLE - Estimated {metric.display_name}', y_axis_title=f'{metric.display_name}', v_line_separating_analysis_period=plot_period_separator, estimated_column_name='estimated', lower_confidence_column_name=f'{metric.column_name}_lower_confidence_boundary', upper_confidence_column_name=f'{metric.column_name}_upper_confidence_boundary', sampling_error_column_name=f'{metric.column_name}_sampling_error', ) return fig