From 548d94a7f05947df62a9ba700ed4e41bcd6b0148 Mon Sep 17 00:00:00 2001 From: Mika Date: Wed, 21 Jan 2026 17:37:35 +0000 Subject: [PATCH] Add artifact.1/src/artifact_1/core.py --- artifact.1/src/artifact_1/core.py | 108 ++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 artifact.1/src/artifact_1/core.py diff --git a/artifact.1/src/artifact_1/core.py b/artifact.1/src/artifact_1/core.py new file mode 100644 index 0000000..e9fa5ec --- /dev/null +++ b/artifact.1/src/artifact_1/core.py @@ -0,0 +1,108 @@ +from __future__ import annotations + +import logging +from dataclasses import dataclass, asdict +from typing import Any, Dict, List + +import pandas as pd +from scipy import stats + +__all__ = [ + "RunData", + "MetricsSummary", + "calculate_metrics", + "run_mann_whitney_test", +] + + +# Setup logging for CI-ready consistency +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +@dataclass +class RunData: + timestamp: str + pinned_flag: bool + runtime: float + seqcount_retry_count: int + mischfenster_dauer: float + + def validate(self) -> None: + assert isinstance(self.timestamp, str), "timestamp muss ein String sein" + assert isinstance(self.pinned_flag, bool), "pinned_flag muss bool sein" + assert isinstance(self.runtime, (float, int)), "runtime muss float/int sein" + assert isinstance(self.seqcount_retry_count, int), "seqcount_retry_count muss int sein" + assert isinstance(self.mischfenster_dauer, (float, int)), "mischfenster_dauer muss float/int sein" + + +@dataclass +class MetricsSummary: + retry_free_rate: float + mischfenster_stats: Dict[str, float] + correlations: Dict[str, float] + + def as_dict(self) -> Dict[str, Any]: + return asdict(self) + + +def calculate_metrics(run_data: RunData) -> MetricsSummary: + """Berechnet aggregierte Metriken und statistische Kennzahlen aus RunData.""" + + if not isinstance(run_data, RunData): + raise TypeError("run_data muss vom Typ RunData sein") + + run_data.validate() + + df = pd.DataFrame([{k: getattr(run_data, k) for k in run_data.__dataclass_fields__}]) + + # Retry-free-rate (Anteil runs ohne seqcount-Retry) + retry_free_rate = float((df['seqcount_retry_count'] == 0).mean()) + + # Mischfenster-Statistiken + mischfenster_stats = { + 'p50': float(df['mischfenster_dauer'].quantile(0.5)), + 'p95': float(df['mischfenster_dauer'].quantile(0.95)), + 'max': float(df['mischfenster_dauer'].max()), + } + + # Korrelationen zwischen Mischfenster und Retries + if df['seqcount_retry_count'].std() == 0 or df['mischfenster_dauer'].std() == 0: + corr_val = 0.0 + else: + corr_val = float(df['mischfenster_dauer'].corr(df['seqcount_retry_count'])) + + correlations = {'mischfenster_vs_retry_count': corr_val} + + summary = MetricsSummary( + retry_free_rate=retry_free_rate, + mischfenster_stats=mischfenster_stats, + correlations=correlations, + ) + + logger.info("MetricsSummary berechnet: %s", summary.as_dict()) + + return summary + + +def run_mann_whitney_test(data1: List[float], data2: List[float]) -> float: + """Führt Mann-Whitney-U-Test durch, um Signifikanz zwischen zwei Gruppen zu prüfen.""" + + if not (isinstance(data1, list) and isinstance(data2, list)): + raise TypeError("data1 und data2 müssen Listen von float sein") + + if not data1 or not data2: + raise ValueError("Beide Datengruppen müssen Werte enthalten") + + if not all(isinstance(x, (int, float)) for x in data1 + data2): + raise TypeError("Alle Elemente in data1 und data2 müssen numerisch sein") + + try: + _, p_value = stats.mannwhitneyu(data1, data2, alternative='two-sided') + except Exception as e: + logger.exception("Fehler beim Mann-Whitney-U-Test: %s", e) + raise + + logger.info("Mann-Whitney-U-Test p-Wert: %.5f", p_value) + + return float(p_value)