Add artifact.1/src/artifact_1/core.py

This commit is contained in:
Mika 2026-01-21 17:37:35 +00:00
commit 548d94a7f0

View file

@ -0,0 +1,108 @@
from __future__ import annotations
import logging
from dataclasses import dataclass, asdict
from typing import Any, Dict, List
import pandas as pd
from scipy import stats
__all__ = [
"RunData",
"MetricsSummary",
"calculate_metrics",
"run_mann_whitney_test",
]
# Setup logging for CI-ready consistency
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
@dataclass
class RunData:
timestamp: str
pinned_flag: bool
runtime: float
seqcount_retry_count: int
mischfenster_dauer: float
def validate(self) -> None:
assert isinstance(self.timestamp, str), "timestamp muss ein String sein"
assert isinstance(self.pinned_flag, bool), "pinned_flag muss bool sein"
assert isinstance(self.runtime, (float, int)), "runtime muss float/int sein"
assert isinstance(self.seqcount_retry_count, int), "seqcount_retry_count muss int sein"
assert isinstance(self.mischfenster_dauer, (float, int)), "mischfenster_dauer muss float/int sein"
@dataclass
class MetricsSummary:
retry_free_rate: float
mischfenster_stats: Dict[str, float]
correlations: Dict[str, float]
def as_dict(self) -> Dict[str, Any]:
return asdict(self)
def calculate_metrics(run_data: RunData) -> MetricsSummary:
"""Berechnet aggregierte Metriken und statistische Kennzahlen aus RunData."""
if not isinstance(run_data, RunData):
raise TypeError("run_data muss vom Typ RunData sein")
run_data.validate()
df = pd.DataFrame([{k: getattr(run_data, k) for k in run_data.__dataclass_fields__}])
# Retry-free-rate (Anteil runs ohne seqcount-Retry)
retry_free_rate = float((df['seqcount_retry_count'] == 0).mean())
# Mischfenster-Statistiken
mischfenster_stats = {
'p50': float(df['mischfenster_dauer'].quantile(0.5)),
'p95': float(df['mischfenster_dauer'].quantile(0.95)),
'max': float(df['mischfenster_dauer'].max()),
}
# Korrelationen zwischen Mischfenster und Retries
if df['seqcount_retry_count'].std() == 0 or df['mischfenster_dauer'].std() == 0:
corr_val = 0.0
else:
corr_val = float(df['mischfenster_dauer'].corr(df['seqcount_retry_count']))
correlations = {'mischfenster_vs_retry_count': corr_val}
summary = MetricsSummary(
retry_free_rate=retry_free_rate,
mischfenster_stats=mischfenster_stats,
correlations=correlations,
)
logger.info("MetricsSummary berechnet: %s", summary.as_dict())
return summary
def run_mann_whitney_test(data1: List[float], data2: List[float]) -> float:
"""Führt Mann-Whitney-U-Test durch, um Signifikanz zwischen zwei Gruppen zu prüfen."""
if not (isinstance(data1, list) and isinstance(data2, list)):
raise TypeError("data1 und data2 müssen Listen von float sein")
if not data1 or not data2:
raise ValueError("Beide Datengruppen müssen Werte enthalten")
if not all(isinstance(x, (int, float)) for x in data1 + data2):
raise TypeError("Alle Elemente in data1 und data2 müssen numerisch sein")
try:
_, p_value = stats.mannwhitneyu(data1, data2, alternative='two-sided')
except Exception as e:
logger.exception("Fehler beim Mann-Whitney-U-Test: %s", e)
raise
logger.info("Mann-Whitney-U-Test p-Wert: %.5f", p_value)
return float(p_value)