Add artifact_1_band_width_analysis/src/artifact_1_band_width_analysis/core.py
This commit is contained in:
commit
47b5a5b569
1 changed files with 90 additions and 0 deletions
|
|
@ -0,0 +1,90 @@
|
|||
from __future__ import annotations
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Dict, Any
|
||||
import pandas as pd
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DataValidationError(Exception):
|
||||
"""Fehler, wenn Eingabedaten nicht den Erwartungen entsprechen."""
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class RunData:
|
||||
band_width: float
|
||||
aux: int
|
||||
near_expiry: float
|
||||
retry_tail_p99: float
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
# Eingabevalidierung
|
||||
if not isinstance(self.band_width, (int, float)):
|
||||
raise DataValidationError("band_width muss numerisch sein.")
|
||||
if not isinstance(self.aux, int):
|
||||
raise DataValidationError("aux muss ein Integer sein.")
|
||||
if not isinstance(self.near_expiry, (int, float)):
|
||||
raise DataValidationError("near_expiry muss numerisch sein.")
|
||||
if not isinstance(self.retry_tail_p99, (int, float)):
|
||||
raise DataValidationError("retry_tail_p99 muss numerisch sein.")
|
||||
|
||||
|
||||
# Public API
|
||||
|
||||
def calculate_statistics(data: List[RunData]) -> Dict[str, Any]:
|
||||
"""Berechnet Median, IQR und weitere Kennzahlen für band_width und retry_tail_p99 pro Run."""
|
||||
assert data, 'Eingabedaten dürfen nicht leer sein.'
|
||||
try:
|
||||
df = pd.DataFrame([d.__dict__ for d in data])
|
||||
except Exception as e:
|
||||
logger.exception("Fehler beim Erstellen des DataFrames")
|
||||
raise DataValidationError(f"Ungültige Datenstruktur: {e}") from e
|
||||
|
||||
if df.empty:
|
||||
raise DataValidationError("DataFrame aus Eingabedaten ist leer.")
|
||||
|
||||
results: Dict[str, Any] = {}
|
||||
for col in ["band_width", "retry_tail_p99"]:
|
||||
series = df[col].dropna()
|
||||
if series.empty:
|
||||
raise DataValidationError(f"Spalte {col} enthält keine gültigen Werte.")
|
||||
q1, q3 = series.quantile(0.25), series.quantile(0.75)
|
||||
iqr = q3 - q1
|
||||
results[col] = {
|
||||
"median": float(series.median()),
|
||||
"iqr": float(iqr),
|
||||
"mean": float(series.mean()),
|
||||
"std": float(series.std()),
|
||||
"min": float(series.min()),
|
||||
"max": float(series.max()),
|
||||
}
|
||||
logger.debug("Berechnete Statistik-Ergebnisse: %s", results)
|
||||
return results
|
||||
|
||||
|
||||
def compare_runs(run1: RunData, run2: RunData) -> Dict[str, Any]:
|
||||
"""Vergleicht zwei Runs hinsichtlich band_width und anderer Kennzahlen."""
|
||||
if not all(isinstance(r, RunData) for r in [run1, run2]):
|
||||
raise DataValidationError("Beide Eingaben müssen RunData-Instanzen sein.")
|
||||
|
||||
diff = {
|
||||
"band_width_diff": run2.band_width - run1.band_width,
|
||||
"aux_diff": run2.aux - run1.aux,
|
||||
"near_expiry_diff": run2.near_expiry - run1.near_expiry,
|
||||
"retry_tail_p99_diff": run2.retry_tail_p99 - run1.retry_tail_p99,
|
||||
}
|
||||
logger.debug(
|
||||
"Vergleich zwischen Runs: %s vs %s -> %s",
|
||||
run1.__dict__,
|
||||
run2.__dict__,
|
||||
diff,
|
||||
)
|
||||
return diff
|
||||
|
||||
|
||||
# Konfiguration für CI-Validierung
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger.info("Module core.py loaded successfully.")
|
||||
Loading…
Reference in a new issue