Source code for nannyml.data_quality.missing.result

#  Author:   Niels Nuyttens  <niels@nannyml.com>
#  Author:   Nikolaos Perrakis  <nikos@nannyml.com>
#
#  License: Apache Software License 2.0

"""Contains the results of the univariate statistical drift calculation and provides plotting functionality."""
from __future__ import annotations

import warnings
from typing import List, Optional

with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    import pandas as pd

import plotly.graph_objects as go

from nannyml._typing import Key
from nannyml.base import PerColumnResult
from nannyml.chunk import Chunker
from nannyml.plots.blueprints.comparisons import ResultCompareMixin
from nannyml.plots.blueprints.metrics import plot_metrics
from nannyml.usage_logging import UsageEvent, log_usage


[docs]class Result(PerColumnResult, ResultCompareMixin): """Missing Values Result Class. Contains calculation results and provides plotting functionality. """ def __init__( self, results_data: pd.DataFrame, column_names: List[str], data_quality_metric: str, timestamp_column_name: Optional[str], chunker: Chunker, ): """Initialize Missing Values Result Class.""" super().__init__(results_data, column_names) self.timestamp_column_name = timestamp_column_name self.data_quality_metric = data_quality_metric self.chunker = chunker
[docs] def keys(self) -> List[Key]: # noqa: D102 return [ Key( properties=(column_name,), display_names=(column_name, f"{self.data_quality_metric.replace('_', ' ').title()}"), ) for column_name in self.column_names ]
[docs] @log_usage(UsageEvent.DQ_CALC_MISSING_VALUES_PLOT) def plot( self, *args, **kwargs, ) -> go.Figure: """Plot Missing Values results. Returns ------- fig: :class:`plotly.graph_objs._figure.Figure` A :class:`~plotly.graph_objs._figure.Figure` object containing the requested drift plot. Can be saved to disk using the :meth:`~plotly.graph_objs._figure.Figure.write_image` method or shown rendered on screen using the :meth:`~plotly.graph_objs._figure.Figure.show` method. Examples -------- >>> import nannyml as nml >>> reference, analysis, _ = nml.load_synthetic_car_price_dataset() >>> column_names = [col for col in reference.columns if col not in ['timestamp', 'y_pred', 'y_true']] >>> calc = nml.MissingValuesCalculator( ... column_names=column_names, ... timestamp_column_name='timestamp', ... ).fit(reference) >>> res = calc.calculate(analysis) >>> res.filter(period='analysis').plot().show() """ return plot_metrics( self, title='Data Quality ', subplot_title_format='{display_names[1]} for <b>{display_names[0]}</b>', subplot_y_axis_title_format='{display_names[1]}', )