Add artifact_1_band_width_analysis/src/artifact_1_band_width_analysis/core.py
This commit is contained in:
commit
47b5a5b569
1 changed files with 90 additions and 0 deletions
|
|
@ -0,0 +1,90 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import List, Dict, Any
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class DataValidationError(Exception):
|
||||||
|
"""Fehler, wenn Eingabedaten nicht den Erwartungen entsprechen."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RunData:
|
||||||
|
band_width: float
|
||||||
|
aux: int
|
||||||
|
near_expiry: float
|
||||||
|
retry_tail_p99: float
|
||||||
|
|
||||||
|
def __post_init__(self) -> None:
|
||||||
|
# Eingabevalidierung
|
||||||
|
if not isinstance(self.band_width, (int, float)):
|
||||||
|
raise DataValidationError("band_width muss numerisch sein.")
|
||||||
|
if not isinstance(self.aux, int):
|
||||||
|
raise DataValidationError("aux muss ein Integer sein.")
|
||||||
|
if not isinstance(self.near_expiry, (int, float)):
|
||||||
|
raise DataValidationError("near_expiry muss numerisch sein.")
|
||||||
|
if not isinstance(self.retry_tail_p99, (int, float)):
|
||||||
|
raise DataValidationError("retry_tail_p99 muss numerisch sein.")
|
||||||
|
|
||||||
|
|
||||||
|
# Public API
|
||||||
|
|
||||||
|
def calculate_statistics(data: List[RunData]) -> Dict[str, Any]:
|
||||||
|
"""Berechnet Median, IQR und weitere Kennzahlen für band_width und retry_tail_p99 pro Run."""
|
||||||
|
assert data, 'Eingabedaten dürfen nicht leer sein.'
|
||||||
|
try:
|
||||||
|
df = pd.DataFrame([d.__dict__ for d in data])
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception("Fehler beim Erstellen des DataFrames")
|
||||||
|
raise DataValidationError(f"Ungültige Datenstruktur: {e}") from e
|
||||||
|
|
||||||
|
if df.empty:
|
||||||
|
raise DataValidationError("DataFrame aus Eingabedaten ist leer.")
|
||||||
|
|
||||||
|
results: Dict[str, Any] = {}
|
||||||
|
for col in ["band_width", "retry_tail_p99"]:
|
||||||
|
series = df[col].dropna()
|
||||||
|
if series.empty:
|
||||||
|
raise DataValidationError(f"Spalte {col} enthält keine gültigen Werte.")
|
||||||
|
q1, q3 = series.quantile(0.25), series.quantile(0.75)
|
||||||
|
iqr = q3 - q1
|
||||||
|
results[col] = {
|
||||||
|
"median": float(series.median()),
|
||||||
|
"iqr": float(iqr),
|
||||||
|
"mean": float(series.mean()),
|
||||||
|
"std": float(series.std()),
|
||||||
|
"min": float(series.min()),
|
||||||
|
"max": float(series.max()),
|
||||||
|
}
|
||||||
|
logger.debug("Berechnete Statistik-Ergebnisse: %s", results)
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def compare_runs(run1: RunData, run2: RunData) -> Dict[str, Any]:
|
||||||
|
"""Vergleicht zwei Runs hinsichtlich band_width und anderer Kennzahlen."""
|
||||||
|
if not all(isinstance(r, RunData) for r in [run1, run2]):
|
||||||
|
raise DataValidationError("Beide Eingaben müssen RunData-Instanzen sein.")
|
||||||
|
|
||||||
|
diff = {
|
||||||
|
"band_width_diff": run2.band_width - run1.band_width,
|
||||||
|
"aux_diff": run2.aux - run1.aux,
|
||||||
|
"near_expiry_diff": run2.near_expiry - run1.near_expiry,
|
||||||
|
"retry_tail_p99_diff": run2.retry_tail_p99 - run1.retry_tail_p99,
|
||||||
|
}
|
||||||
|
logger.debug(
|
||||||
|
"Vergleich zwischen Runs: %s vs %s -> %s",
|
||||||
|
run1.__dict__,
|
||||||
|
run2.__dict__,
|
||||||
|
diff,
|
||||||
|
)
|
||||||
|
return diff
|
||||||
|
|
||||||
|
|
||||||
|
# Konfiguration für CI-Validierung
|
||||||
|
if __name__ == "__main__":
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
logger.info("Module core.py loaded successfully.")
|
||||||
Loading…
Reference in a new issue