Source code for nannyml.io.db.entities

#  Author:   Niels Nuyttens  <niels@nannyml.com>
#
#  License: Apache Software License 2.0

""" Contains the definitions of the database entities that map directly to the underlying table definitions.

    Every ``Result`` class has a matching ``Entity`` class, which implies that each calculator or estimator will export
    its results into a specific table.
"""

from datetime import datetime
from typing import List, Optional

from sqlmodel import Field, Relationship, SQLModel


[docs]class Model(SQLModel, table=True): # type: ignore[call-arg] """Represents a ``Model``. Only created when the ``model_name`` property of the :class:`~nannyml.io.db.database_writer.DatabaseWriter` was given. The ``id`` field here will act as a foreign key in the ``run`` table and all ``metric`` tables. Stored in the ``model`` table. """ #: A technical key that is used as a foreign key in the other tables id: Optional[int] = Field(default=None, primary_key=True) #: Optional model name that might be useful in visualizations e.g. in Grafana dashboards name: str #: List of NannyML runs runs: List["Run"] = Relationship(back_populates="model")
[docs]class Run(SQLModel, table=True): # type: ignore[call-arg] """Represents a NannyML run, allowing to filter results based on what run generated them. The ``id`` field here will act as a foreign key in all ``metric`` tables. Stored in the ``run`` table. """ #: Foreign key in all ``metric`` tables id: Optional[int] = Field(default=None, primary_key=True) #: Used to link a run to a model model_id: Optional[int] = Field(default=None, foreign_key="model.id") #: The actual ``Model`` class instance that is linked to the run model: Model = Relationship(back_populates="runs") # metrics: List["Metric"] = Relationship(back_populates="run") # Could not get this to work (due to inheritance) #: Execution time of NannyML run execution_timestamp: datetime = Field(default=datetime.now())
[docs]class Metric(SQLModel): """ Base ``Metric`` definition. """ #: The technical identifier for this database row id: Optional[int] = Field(default=None, primary_key=True) #: Foreign key pointing to a record in the ``model`` table model_id: Optional[int] = Field(default=None, foreign_key="model.id") #: Foreign key pointing to a record in the ``run`` table run_id: int = Field(default=None, foreign_key="run.id") # run: Run = Relationship(back_populates="metrics") # Could not get this to work (due to inheritance) #: The start datetime of the :class:`~nannyml.chunk.Chunk` start_timestamp: datetime #: The end datetime of the :class:`~nannyml.chunk.Chunk` end_timestamp: datetime #: The ''center'' timestamp of the :class:`~nannyml.chunk.Chunk`, i.e. the mean of the start and end timestamps timestamp: datetime #: The name of the method being calculated, e.g. ``jensen_shannon`` or ``chi2`` metric_name: str #: The value returned by the method value: float #: Indicates if the method raised an alert for this :class:`~nannyml.chunk.Chunk` alert: bool
[docs]class UnivariateDriftMetric(Metric, table=True): # type: ignore[call-arg] """Represents results of the :class:`~nannyml.drift.univariate.calculator.UnivariateDriftCalculator`. Stored in the ``univariate_drift_metrics`` table. """ __tablename__ = 'univariate_drift_metrics' #: The name of the column this metric belongs to column_name: str
[docs]class DataReconstructionFeatureDriftMetric(Metric, table=True): # type: ignore[call-arg] """:class:`~nannyml.drift.multivariate.data_reconstruction.calculator.DataReconstructionDriftCalculator` results. Stored in the ``data_reconstruction_feature_drift_metrics`` table. """ __tablename__ = 'data_reconstruction_feature_drift_metrics' #: The upper alerting threshold value upper_threshold: Optional[float] #: The lower alerting threshold value lower_threshold: Optional[float]
[docs]class RealizedPerformanceMetric(Metric, table=True): # type: ignore[call-arg] """Represents results of the :class:`~nannyml.performance_calculation.calculator.PerformanceCalculator`. Stored in the ``realized_performance_metrics`` table. """ __tablename__ = 'realized_performance_metrics' #: The upper alerting threshold value upper_threshold: Optional[float] #: The lower alerting threshold value lower_threshold: Optional[float]
[docs]class CBPEPerformanceMetric(Metric, table=True): # type: ignore[call-arg] """Represents results of the :class:`~nannyml.performance_estimation.confidence_based.cbpe.CBPE` estimator. Stored in the ``cbpe_performance_metrics`` table. """ __tablename__ = "cbpe_performance_metrics" #: The upper alerting threshold value upper_threshold: Optional[float] #: The lower alerting threshold value lower_threshold: Optional[float]
[docs]class DLEPerformanceMetric(Metric, table=True): # type: ignore[call-arg] """Represents results of the :class:`~nannyml.performance_estimation.direct_loss_estimation.dle.DLE estimator`. Stored in the ``dle_performance_metrics`` table. """ __tablename__ = "dle_performance_metrics" #: The upper alerting threshold value upper_threshold: Optional[float] #: The lower alerting threshold value lower_threshold: Optional[float]
[docs]class UnseenValuesMetric(Metric, table=True): __tablename__ = "unseen_values_metrics" #: The name of the column this metric belongs to column_name: str #: The upper alerting threshold value upper_threshold: Optional[float] #: The lower alerting threshold value lower_threshold: Optional[float]
[docs]class MissingValuesMetric(Metric, table=True): __tablename__ = "missing_values_metrics" #: The name of the column this metric belongs to column_name: str #: The upper alerting threshold value upper_threshold: Optional[float] #: The lower alerting threshold value lower_threshold: Optional[float]