Add frozen_runs_analysis/src/frozen_runs_analysis/core.py
This commit is contained in:
parent
9fab7ffa8c
commit
c67cc23016
1 changed files with 96 additions and 0 deletions
96
frozen_runs_analysis/src/frozen_runs_analysis/core.py
Normal file
96
frozen_runs_analysis/src/frozen_runs_analysis/core.py
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, asdict
|
||||
from typing import Dict, Any, List
|
||||
import pandas as pd
|
||||
from statistics import mean, pstdev, quantiles
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
|
||||
class RunDataValidationError(Exception):
|
||||
"""Raised when RunData validation fails."""
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class RunData:
|
||||
run_id: str
|
||||
status: str
|
||||
sanity_checks: Dict[str, Any]
|
||||
config_hash: str
|
||||
|
||||
@classmethod
|
||||
def validate(cls, data: Dict[str, Any]) -> 'RunData':
|
||||
required_fields = {"run_id", "status", "sanity_checks", "config_hash"}
|
||||
missing = required_fields - data.keys()
|
||||
if missing:
|
||||
raise RunDataValidationError(f"Missing fields in RunData: {', '.join(missing)}")
|
||||
if not isinstance(data.get("sanity_checks"), dict):
|
||||
raise RunDataValidationError("Field 'sanity_checks' must be a dict.")
|
||||
if not isinstance(data.get("run_id"), str):
|
||||
raise RunDataValidationError("Field 'run_id' must be a str.")
|
||||
if not isinstance(data.get("status"), str):
|
||||
raise RunDataValidationError("Field 'status' must be a str.")
|
||||
if not isinstance(data.get("config_hash"), str):
|
||||
raise RunDataValidationError("Field 'config_hash' must be a str.")
|
||||
return cls(
|
||||
run_id=data["run_id"],
|
||||
status=data["status"],
|
||||
sanity_checks=data["sanity_checks"],
|
||||
config_hash=data["config_hash"]
|
||||
)
|
||||
|
||||
|
||||
def analyse_frozen_runs(data: List[RunData]) -> Dict[str, Any]:
|
||||
"""Analysiert Frozen-Run-Daten, berechnet Kennzahlen zur Stabilität und Häufigkeiten."""
|
||||
# Validierung
|
||||
if not data:
|
||||
logger.warning("Keine Daten zur Analyse übergeben.")
|
||||
return {}
|
||||
|
||||
valid_runs: List[RunData] = []
|
||||
for item in data:
|
||||
if isinstance(item, dict):
|
||||
try:
|
||||
valid_runs.append(RunData.validate(item))
|
||||
except RunDataValidationError as e:
|
||||
logger.error(f"Ungültiger Dateneintrag ignoriert: {e}")
|
||||
elif isinstance(item, RunData):
|
||||
valid_runs.append(item)
|
||||
else:
|
||||
logger.error(f"Unbekannter Datentyp in Analyse: {type(item).__name__}")
|
||||
|
||||
if not valid_runs:
|
||||
logger.warning("Keine validen Läufe nach Validierung.")
|
||||
return {}
|
||||
|
||||
df = pd.DataFrame([asdict(r) for r in valid_runs])
|
||||
|
||||
# Einfacher Sanity-Score: Anzahl Fehler in sanity_checks
|
||||
df['sanity_error_count'] = df['sanity_checks'].apply(lambda d: sum(1 for v in d.values() if v not in (None, True, False) and not v))
|
||||
|
||||
# Flip-Flop-Frequenz per config_hash und status
|
||||
hash_status = df.groupby('config_hash')['status'].nunique()
|
||||
flip_flop_rate = (hash_status > 1).mean()
|
||||
|
||||
# Statistische Verteilungen
|
||||
sanity_counts = df['sanity_error_count'].tolist()
|
||||
avg_errors = mean(sanity_counts)
|
||||
std_errors = pstdev(sanity_counts) if len(sanity_counts) > 1 else 0.0
|
||||
p95 = quantiles(sanity_counts, n=100)[94] if len(sanity_counts) >= 20 else max(sanity_counts)
|
||||
|
||||
result = {
|
||||
"total_runs": len(df),
|
||||
"unique_hashes": df['config_hash'].nunique(),
|
||||
"flip_flop_rate": round(float(flip_flop_rate), 4),
|
||||
"avg_sanity_errors": round(avg_errors, 4),
|
||||
"std_sanity_errors": round(std_errors, 4),
|
||||
"p95_sanity_errors": round(float(p95), 4),
|
||||
}
|
||||
|
||||
logger.info(f"Analyse abgeschlossen: {result}")
|
||||
return result
|
||||
Loading…
Reference in a new issue