Add metrics_analysis_script/src/metrics_analysis_script/main.py

This commit is contained in:
Mika 2026-01-24 12:03:28 +00:00
parent 0a45f1259c
commit 61ab048282

View file

@ -0,0 +1,107 @@
from __future__ import annotations
import logging
from typing import List, Dict, Any
from dataclasses import dataclass
import numpy as np
import pandas as pd
from scipy.stats import bootstrap
logger = logging.getLogger(__name__)
@dataclass
class MetricResultsInput:
metric_name: str
values: List[float]
pinned_flag: bool
def validate(self) -> None:
assert isinstance(self.metric_name, str) and self.metric_name != "", "metric_name must be non-empty string"
assert isinstance(self.values, list) and all(isinstance(v, (int, float)) for v in self.values), "values must be list of numbers"
assert isinstance(self.pinned_flag, bool), "pinned_flag must be bool"
if len(self.values) == 0:
raise ValueError(f"Empty values for metric {self.metric_name}")
@dataclass
class MetricResults:
metric_name: str
p50: float
p95: float
max: float
retry_free_rate: float
bootstrap_ci: List[float]
def _bootstrap_confidence_interval(data: np.ndarray, n_resamples: int = 1000, seed: int = 42) -> List[float]:
rng = np.random.default_rng(seed)
try:
res = bootstrap((data,), np.mean, n_resamples=n_resamples, confidence_level=0.95, random_state=rng, method="basic")
return [float(res.confidence_interval.low), float(res.confidence_interval.high)]
except Exception as e:
logger.warning(f"Bootstrap failed: {e}")
return [float(np.mean(data)), float(np.mean(data))]
def analyze_metrics(data: List[Dict[str, Any]]) -> List[MetricResults]:
"""Analysiert Metriken und erzeugt statistische Kennzahlen mit Bootstrap-Intervallen."""
if not isinstance(data, list):
raise TypeError("data must be a list of dictionaries")
results: List[MetricResults] = []
for entry in data:
entry_obj = MetricResultsInput(**entry)
entry_obj.validate()
values_array = np.array(entry_obj.values, dtype=float)
p50 = float(np.percentile(values_array, 50))
p95 = float(np.percentile(values_array, 95))
max_val = float(np.max(values_array))
# retry_free_rate: Schätzwert für fehlerfreie Runs, hier: Anteil Werte unter 1.05*Median
threshold = 1.05 * p50
retry_free_rate = float(np.mean(values_array <= threshold))
ci = _bootstrap_confidence_interval(values_array)
result = MetricResults(
metric_name=entry_obj.metric_name,
p50=p50,
p95=p95,
max=max_val,
retry_free_rate=retry_free_rate,
bootstrap_ci=ci,
)
logger.debug(f"Calculated MetricResults for {entry_obj.metric_name}: {result}")
results.append(result)
assert all(isinstance(r, MetricResults) for r in results), "Results must all be MetricResults"
return results
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
import argparse
import json
parser = argparse.ArgumentParser(description="Analyze frozen runs metrics with bootstrap confidence intervals.")
parser.add_argument("--input", required=True, help="Path to JSON input file with metrics data")
parser.add_argument("--output", required=True, help="Path to output JSON file with analysis results")
args = parser.parse_args()
input_path = args.input
output_path = args.output
with open(input_path, "r", encoding="utf-8") as f:
data_json = json.load(f)
analysis = analyze_metrics(data_json)
output_list = [r.__dict__ for r in analysis]
with open(output_path, "w", encoding="utf-8") as f:
json.dump(output_list, f, indent=2)
logger.info(f"Analysis results written to {output_path}")