Source code for nannyml.performance_estimation.confidence_based.results

#  Author:   Niels Nuyttens  <niels@nannyml.com>
#
#  License: Apache Software License 2.0

"""Module containing CBPE estimation results and plotting implementations."""

import pandas as pd
from plotly import graph_objects as go

from nannyml import InvalidArgumentsException
from nannyml.performance_estimation.base import PerformanceEstimatorResult
from nannyml.plots import CHUNK_KEY_COLUMN_NAME
from nannyml.plots._step_plot import _step_plot

SUPPORTED_METRIC_VALUES = ['roc_auc', 'f1', 'precision', 'recall', 'specificity', 'accuracy']


[docs]class CBPEPerformanceEstimatorResult(PerformanceEstimatorResult): """Contains results for CBPE estimation and adds plotting functionality."""
[docs] def plot(self, kind: str = 'performance', metric: str = None, *args, **kwargs) -> go.Figure: """Render plots based on CBPE estimation results. This function will return a :class:`plotly.graph_objects.Figure` object. The following kinds of plots are available: - ``performance``: a line plot rendering the estimated performance per :class:`~nannyml.chunk.Chunk` after applying the :meth:`~nannyml.performance_estimation.confidence_based.CBPE.calculate` method on a chunked dataset. Parameters ---------- kind: str, default='performance' The kind of plot to render. Only the 'performance' plot is currently available. metric: str, default=None The metric to plot when rendering a plot of kind 'performance'. Examples -------- >>> import nannyml as nml >>> ref_df, ana_df, _ = nml.load_synthetic_binary_classification_dataset() >>> metadata = nml.extract_metadata(ref_df, model_type=nml.ModelType.CLASSIFICATION_BINARY) >>> estimator = nml.CBPE(model_metadata=metadata, chunk_period='W') >>> estimator.fit(ref_df) >>> estimates = estimator.estimate(ana_df) >>> # plot the estimated performance >>> estimates.plot(kind='performance').show() """ if kind == 'performance': if metric is None: raise InvalidArgumentsException( "no value for 'metric' given. Please provide the name of a metric to display." ) if metric not in SUPPORTED_METRIC_VALUES: raise InvalidArgumentsException( f"unknown 'metric' value: '{metric}'. " f"Supported values are {SUPPORTED_METRIC_VALUES}." ) return _plot_cbpe_performance_estimation(self.data, metric) else: raise InvalidArgumentsException(f"unknown plot kind '{kind}'. " f"Please provide on of: ['performance'].")
def _plot_cbpe_performance_estimation(estimation_results: pd.DataFrame, metric: str) -> go.Figure: """Renders a line plot of the ``reconstruction_error`` of the data reconstruction drift calculation results. Chunks are set on a time-based X-axis by using the period containing their observations. Chunks of different partitions (``reference`` and ``analysis``) are represented using different colors and a vertical separation if the drift results contain multiple partitions. If the ``realized_performance`` data is also provided, an extra line shall be plotted to allow an easy comparison of the estimated versus realized performance. Parameters ---------- estimation_results : pd.DataFrame Results of the data CBPE performance estimation metric: str, default=None The metric to plot when rendering a plot of kind 'performance'. Returns ------- fig: plotly.graph_objects.Figure A ``Figure`` object containing the requested performance estimation plot. Can be saved to disk or shown rendered on screen using ``fig.show()``. """ estimation_results = estimation_results.copy(deep=True) estimation_results['thresholds'] = list( zip(estimation_results[f'lower_threshold_{metric}'], estimation_results[f'upper_threshold_{metric}']) ) estimation_results['estimated'] = estimation_results['partition'].apply(lambda r: r == 'analysis') plot_partition_separator = len(estimation_results['partition'].value_counts()) > 1 # TODO: hack, assembling single results column to pass to plotting, overriding alert cols estimation_results['plottable'] = estimation_results.apply( lambda r: r[f'estimated_{metric}'] if r['partition'] == 'analysis' else r[f'realized_{metric}'], axis=1 ) estimation_results['alert'] = estimation_results.apply( lambda r: r[f'alert_{metric}'] if r['partition'] == 'analysis' else False, axis=1 ) # Plot estimated performance fig = _step_plot( table=estimation_results, metric_column_name='plottable', chunk_column_name=CHUNK_KEY_COLUMN_NAME, chunk_legend_labels=[f'Reference period (realized {metric})', f'Analysis period (estimated {metric})'], drift_column_name='alert', drift_legend_label='Degraded performance', hover_labels=['Chunk', f'{metric}', 'Target data'], hover_marker_labels=['Reference', 'No change', 'Change'], threshold_column_name='thresholds', threshold_legend_label='Performance threshold', title=f'CBPE - Estimated {metric}', y_axis_title=f'{metric}', v_line_separating_analysis_period=plot_partition_separator, estimated_column_name='estimated', confidence_column_name=f'confidence_{metric}', ) return fig