Source code for nannyml.performance_calculation.result
# Author: Niels Nuyttens <niels@nannyml.com>
#
# License: Apache Software License 2.0
"""Contains the results of the realized performance calculation and provides plotting functionality."""
from typing import Optional, Union
import pandas as pd
import plotly.graph_objects as go
from nannyml.base import AbstractCalculator, AbstractCalculatorResult
from nannyml.exceptions import InvalidArgumentsException
from nannyml.performance_calculation.metrics.base import Metric, MetricFactory
from nannyml.plots import CHUNK_KEY_COLUMN_NAME
from nannyml.plots._step_plot import _step_plot
[docs]class PerformanceCalculatorResult(AbstractCalculatorResult):
"""Contains the results of the realized performance calculation and provides plotting functionality."""
def __init__(
self,
results_data: pd.DataFrame,
calculator: AbstractCalculator,
):
"""Creates a new PerformanceCalculatorResult instance."""
super().__init__(results_data)
from .calculator import PerformanceCalculator
if not isinstance(calculator, PerformanceCalculator):
raise RuntimeError(
f"{calculator.__class__.__name__} is not an instance of type " f"UnivariateStatisticalDriftCalculator"
)
self.calculator = calculator
@property
def calculator_name(self) -> str:
return "performance_calculator"
[docs] def plot(
self,
kind: str = 'performance',
plot_reference: bool = False,
*args,
**kwargs,
) -> Optional[go.Figure]:
"""Render realized performance metrics.
The following kinds of plots are available:
- ``performance``
a step plot showing the realized performance metric per :class:`~nannyml.chunk.Chunk` for
a given metric.
Parameters
----------
kind: str, default='performance'
The kind of plot to render. Only the 'performance' plot is currently available.
metric: Union[str, nannyml.performance_calculation.metrics.base.Metric], default=None
The name of the metric to plot. Value should be one of:
- 'roc_auc'
- 'f1'
- 'precision'
- 'recall'
- 'specificity'
- 'accuracy'
plot_reference: bool, default=False
Indicates whether to include the reference period in the plot or not. Defaults to ``False``.
Returns
-------
fig: :class:`plotly.graph_objs._figure.Figure`
A :class:`~plotly.graph_objs._figure.Figure` object containing the requested drift plot.
Can be saved to disk using the :meth:`~plotly.graph_objs._figure.Figure.write_image` method
or shown rendered on screen using the :meth:`~plotly.graph_objs._figure.Figure.show` method.
Examples
--------
>>> import nannyml as nml
>>>
>>> reference_df, analysis_df, target_df = nml.load_synthetic_binary_classification_dataset()
>>>
>>> calc = nml.PerformanceCalculator(y_true='work_home_actual', y_pred='y_pred', y_pred_proba='y_pred_proba',
>>> timestamp_column_name='timestamp', metrics=['f1', 'roc_auc'])
>>>
>>> calc.fit(reference_df)
>>>
>>> results = calc.calculate(analysis_df.merge(target_df, on='identifier'))
>>> print(results.data)
key start_index ... roc_auc_upper_threshold roc_auc_alert
0 [0:4999] 0 ... 0.97866 False
1 [5000:9999] 5000 ... 0.97866 False
2 [10000:14999] 10000 ... 0.97866 False
3 [15000:19999] 15000 ... 0.97866 False
4 [20000:24999] 20000 ... 0.97866 False
5 [25000:29999] 25000 ... 0.97866 True
6 [30000:34999] 30000 ... 0.97866 True
7 [35000:39999] 35000 ... 0.97866 True
8 [40000:44999] 40000 ... 0.97866 True
9 [45000:49999] 45000 ... 0.97866 True
>>> for metric in calc.metrics:
>>> results.plot(metric=metric, plot_reference=True).show()
"""
if kind == 'performance':
if 'metric' not in kwargs:
raise InvalidArgumentsException("missing value for parameter 'metric'")
return _plot_performance_metric(self.data, self.calculator, plot_reference, kwargs['metric'])
else:
raise InvalidArgumentsException(f"unknown plot kind '{kind}'. " f"Please provide on of: ['performance'].")
# @property
# def plots(self) -> Dict[str, go.Figure]:
# return {metric: self.plot(kind='performance', metric=metric) for metric in self._metrics}
def _plot_performance_metric(
results_data: pd.DataFrame, calculator, plot_reference: bool, metric: Union[str, Metric]
) -> go.Figure:
"""Renders a line plot of a selected metric of the performance calculation results.
Chunks are set on a time-based X-axis by using the period containing their observations.
Chunks of different periods (``reference`` and ``analysis``) are represented using different colors and
a vertical separation if the drift results contain multiple periods.
Parameters
----------
results_data : pd.DataFrame
Results of the data CBPE performance estimation
metric: Union[str, nannyml.performance_calculation.metrics.base.Metric]
The name of the metric to plot. Value should be one of:
- 'roc_auc'
- 'f1'
- 'precision'
- 'recall'
- 'sensitivity'
- 'specificity'
- 'accuracy'
Returns
-------
fig: plotly.graph_objects.Figure
A ``Figure`` object containing the requested performance estimation plot.
Can be saved to disk or shown rendered on screen using ``fig.show()``.
"""
results_data = results_data.copy()
if isinstance(metric, str):
metric = MetricFactory.create(metric, calculator.problem_type, {'calculator': calculator})
plot_period_separator = plot_reference
results_data['period'] = 'analysis'
if plot_reference:
reference_results = calculator.previous_reference_results.copy()
reference_results['period'] = 'reference'
results_data = pd.concat([reference_results, results_data], ignore_index=True)
# Plot metric performance
fig = _step_plot(
table=results_data,
metric_column_name=metric.column_name,
chunk_column_name=CHUNK_KEY_COLUMN_NAME,
drift_column_name=f'{metric.column_name}_alert',
drift_legend_label='Degraded performance',
hover_labels=['Chunk', metric.display_name, 'Target data'],
hover_marker_labels=['Reference', 'No change', 'Change'],
lower_threshold_column_name=f'{metric.column_name}_lower_threshold',
upper_threshold_column_name=f'{metric.column_name}_upper_threshold',
threshold_legend_label='Performance threshold',
partial_target_column_name='targets_missing_rate',
title=f'Realized performance: {metric.display_name}',
y_axis_title='Realized performance',
v_line_separating_analysis_period=plot_period_separator,
sampling_error_column_name=f'{metric.column_name}_sampling_error',
)
return fig