Add artifact.1/src/artifact_1/core.py
This commit is contained in:
commit
548d94a7f0
1 changed files with 108 additions and 0 deletions
108
artifact.1/src/artifact_1/core.py
Normal file
108
artifact.1/src/artifact_1/core.py
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, asdict
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import pandas as pd
|
||||
from scipy import stats
|
||||
|
||||
__all__ = [
|
||||
"RunData",
|
||||
"MetricsSummary",
|
||||
"calculate_metrics",
|
||||
"run_mann_whitney_test",
|
||||
]
|
||||
|
||||
|
||||
# Setup logging for CI-ready consistency
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class RunData:
|
||||
timestamp: str
|
||||
pinned_flag: bool
|
||||
runtime: float
|
||||
seqcount_retry_count: int
|
||||
mischfenster_dauer: float
|
||||
|
||||
def validate(self) -> None:
|
||||
assert isinstance(self.timestamp, str), "timestamp muss ein String sein"
|
||||
assert isinstance(self.pinned_flag, bool), "pinned_flag muss bool sein"
|
||||
assert isinstance(self.runtime, (float, int)), "runtime muss float/int sein"
|
||||
assert isinstance(self.seqcount_retry_count, int), "seqcount_retry_count muss int sein"
|
||||
assert isinstance(self.mischfenster_dauer, (float, int)), "mischfenster_dauer muss float/int sein"
|
||||
|
||||
|
||||
@dataclass
|
||||
class MetricsSummary:
|
||||
retry_free_rate: float
|
||||
mischfenster_stats: Dict[str, float]
|
||||
correlations: Dict[str, float]
|
||||
|
||||
def as_dict(self) -> Dict[str, Any]:
|
||||
return asdict(self)
|
||||
|
||||
|
||||
def calculate_metrics(run_data: RunData) -> MetricsSummary:
|
||||
"""Berechnet aggregierte Metriken und statistische Kennzahlen aus RunData."""
|
||||
|
||||
if not isinstance(run_data, RunData):
|
||||
raise TypeError("run_data muss vom Typ RunData sein")
|
||||
|
||||
run_data.validate()
|
||||
|
||||
df = pd.DataFrame([{k: getattr(run_data, k) for k in run_data.__dataclass_fields__}])
|
||||
|
||||
# Retry-free-rate (Anteil runs ohne seqcount-Retry)
|
||||
retry_free_rate = float((df['seqcount_retry_count'] == 0).mean())
|
||||
|
||||
# Mischfenster-Statistiken
|
||||
mischfenster_stats = {
|
||||
'p50': float(df['mischfenster_dauer'].quantile(0.5)),
|
||||
'p95': float(df['mischfenster_dauer'].quantile(0.95)),
|
||||
'max': float(df['mischfenster_dauer'].max()),
|
||||
}
|
||||
|
||||
# Korrelationen zwischen Mischfenster und Retries
|
||||
if df['seqcount_retry_count'].std() == 0 or df['mischfenster_dauer'].std() == 0:
|
||||
corr_val = 0.0
|
||||
else:
|
||||
corr_val = float(df['mischfenster_dauer'].corr(df['seqcount_retry_count']))
|
||||
|
||||
correlations = {'mischfenster_vs_retry_count': corr_val}
|
||||
|
||||
summary = MetricsSummary(
|
||||
retry_free_rate=retry_free_rate,
|
||||
mischfenster_stats=mischfenster_stats,
|
||||
correlations=correlations,
|
||||
)
|
||||
|
||||
logger.info("MetricsSummary berechnet: %s", summary.as_dict())
|
||||
|
||||
return summary
|
||||
|
||||
|
||||
def run_mann_whitney_test(data1: List[float], data2: List[float]) -> float:
|
||||
"""Führt Mann-Whitney-U-Test durch, um Signifikanz zwischen zwei Gruppen zu prüfen."""
|
||||
|
||||
if not (isinstance(data1, list) and isinstance(data2, list)):
|
||||
raise TypeError("data1 und data2 müssen Listen von float sein")
|
||||
|
||||
if not data1 or not data2:
|
||||
raise ValueError("Beide Datengruppen müssen Werte enthalten")
|
||||
|
||||
if not all(isinstance(x, (int, float)) for x in data1 + data2):
|
||||
raise TypeError("Alle Elemente in data1 und data2 müssen numerisch sein")
|
||||
|
||||
try:
|
||||
_, p_value = stats.mannwhitneyu(data1, data2, alternative='two-sided')
|
||||
except Exception as e:
|
||||
logger.exception("Fehler beim Mann-Whitney-U-Test: %s", e)
|
||||
raise
|
||||
|
||||
logger.info("Mann-Whitney-U-Test p-Wert: %.5f", p_value)
|
||||
|
||||
return float(p_value)
|
||||
Loading…
Reference in a new issue