commit 47b5a5b56902aba79fe97e05c368c40ed5b8b2c0 Author: Mika Date: Thu Apr 2 13:12:16 2026 +0000 Add artifact_1_band_width_analysis/src/artifact_1_band_width_analysis/core.py diff --git a/artifact_1_band_width_analysis/src/artifact_1_band_width_analysis/core.py b/artifact_1_band_width_analysis/src/artifact_1_band_width_analysis/core.py new file mode 100644 index 0000000..d09892b --- /dev/null +++ b/artifact_1_band_width_analysis/src/artifact_1_band_width_analysis/core.py @@ -0,0 +1,90 @@ +from __future__ import annotations +import logging +from dataclasses import dataclass +from typing import List, Dict, Any +import pandas as pd + +logger = logging.getLogger(__name__) + + +class DataValidationError(Exception): + """Fehler, wenn Eingabedaten nicht den Erwartungen entsprechen.""" + pass + + +@dataclass +class RunData: + band_width: float + aux: int + near_expiry: float + retry_tail_p99: float + + def __post_init__(self) -> None: + # Eingabevalidierung + if not isinstance(self.band_width, (int, float)): + raise DataValidationError("band_width muss numerisch sein.") + if not isinstance(self.aux, int): + raise DataValidationError("aux muss ein Integer sein.") + if not isinstance(self.near_expiry, (int, float)): + raise DataValidationError("near_expiry muss numerisch sein.") + if not isinstance(self.retry_tail_p99, (int, float)): + raise DataValidationError("retry_tail_p99 muss numerisch sein.") + + +# Public API + +def calculate_statistics(data: List[RunData]) -> Dict[str, Any]: + """Berechnet Median, IQR und weitere Kennzahlen für band_width und retry_tail_p99 pro Run.""" + assert data, 'Eingabedaten dürfen nicht leer sein.' + try: + df = pd.DataFrame([d.__dict__ for d in data]) + except Exception as e: + logger.exception("Fehler beim Erstellen des DataFrames") + raise DataValidationError(f"Ungültige Datenstruktur: {e}") from e + + if df.empty: + raise DataValidationError("DataFrame aus Eingabedaten ist leer.") + + results: Dict[str, Any] = {} + for col in ["band_width", "retry_tail_p99"]: + series = df[col].dropna() + if series.empty: + raise DataValidationError(f"Spalte {col} enthält keine gültigen Werte.") + q1, q3 = series.quantile(0.25), series.quantile(0.75) + iqr = q3 - q1 + results[col] = { + "median": float(series.median()), + "iqr": float(iqr), + "mean": float(series.mean()), + "std": float(series.std()), + "min": float(series.min()), + "max": float(series.max()), + } + logger.debug("Berechnete Statistik-Ergebnisse: %s", results) + return results + + +def compare_runs(run1: RunData, run2: RunData) -> Dict[str, Any]: + """Vergleicht zwei Runs hinsichtlich band_width und anderer Kennzahlen.""" + if not all(isinstance(r, RunData) for r in [run1, run2]): + raise DataValidationError("Beide Eingaben müssen RunData-Instanzen sein.") + + diff = { + "band_width_diff": run2.band_width - run1.band_width, + "aux_diff": run2.aux - run1.aux, + "near_expiry_diff": run2.near_expiry - run1.near_expiry, + "retry_tail_p99_diff": run2.retry_tail_p99 - run1.retry_tail_p99, + } + logger.debug( + "Vergleich zwischen Runs: %s vs %s -> %s", + run1.__dict__, + run2.__dict__, + diff, + ) + return diff + + +# Konfiguration für CI-Validierung +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + logger.info("Module core.py loaded successfully.")