Add metrics_analysis_script/src/metrics_analysis_script/main.py

2026-01-24 12:03:28 +00:00 · 2026-01-24 12:03:28 +00:00 · 61ab048282
commit 61ab048282
parent 0a45f1259c
1 changed files with 107 additions and 0 deletions
--- a/metrics_analysis_script/src/metrics_analysis_script/main.py
+++ b/metrics_analysis_script/src/metrics_analysis_script/main.py
@ -0,0 +1,107 @@
 from __future__ import annotations
 import logging
 from typing import List, Dict, Any
 from dataclasses import dataclass
 import numpy as np
 import pandas as pd
 from scipy.stats import bootstrap
 logger = logging.getLogger(__name__)
@dataclass
 class MetricResultsInput:
    metric_name: str
    values: List[float]
    pinned_flag: bool
    def validate(self) -> None:
        assert isinstance(self.metric_name, str) and self.metric_name != "", "metric_name must be non-empty string"
        assert isinstance(self.values, list) and all(isinstance(v, (int, float)) for v in self.values), "values must be list of numbers"
        assert isinstance(self.pinned_flag, bool), "pinned_flag must be bool"
        if len(self.values) == 0:
            raise ValueError(f"Empty values for metric {self.metric_name}")
@dataclass
 class MetricResults:
    metric_name: str
    p50: float
    p95: float
    max: float
    retry_free_rate: float
    bootstrap_ci: List[float]
 def _bootstrap_confidence_interval(data: np.ndarray, n_resamples: int = 1000, seed: int = 42) -> List[float]:
    rng = np.random.default_rng(seed)
    try:
        res = bootstrap((data,), np.mean, n_resamples=n_resamples, confidence_level=0.95, random_state=rng, method="basic")
        return [float(res.confidence_interval.low), float(res.confidence_interval.high)]
    except Exception as e:
        logger.warning(f"Bootstrap failed: {e}")
        return [float(np.mean(data)), float(np.mean(data))]
 def analyze_metrics(data: List[Dict[str, Any]]) -> List[MetricResults]:
    """Analysiert Metriken und erzeugt statistische Kennzahlen mit Bootstrap-Intervallen."""
    if not isinstance(data, list):
        raise TypeError("data must be a list of dictionaries")
    results: List[MetricResults] = []
    for entry in data:
        entry_obj = MetricResultsInput(**entry)
        entry_obj.validate()
        values_array = np.array(entry_obj.values, dtype=float)
        p50 = float(np.percentile(values_array, 50))
        p95 = float(np.percentile(values_array, 95))
        max_val = float(np.max(values_array))
        # retry_free_rate: Schätzwert für fehlerfreie Runs, hier: Anteil Werte unter 1.05*Median
        threshold = 1.05 * p50
        retry_free_rate = float(np.mean(values_array <= threshold))
        ci = _bootstrap_confidence_interval(values_array)
        result = MetricResults(
            metric_name=entry_obj.metric_name,
            p50=p50,
            p95=p95,
            max=max_val,
            retry_free_rate=retry_free_rate,
            bootstrap_ci=ci,
        )
        logger.debug(f"Calculated MetricResults for {entry_obj.metric_name}: {result}")
        results.append(result)
    assert all(isinstance(r, MetricResults) for r in results), "Results must all be MetricResults"
    return results
 if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    import argparse
    import json
    parser = argparse.ArgumentParser(description="Analyze frozen runs metrics with bootstrap confidence intervals.")
    parser.add_argument("--input", required=True, help="Path to JSON input file with metrics data")
    parser.add_argument("--output", required=True, help="Path to output JSON file with analysis results")
    args = parser.parse_args()
    input_path = args.input
    output_path = args.output
    with open(input_path, "r", encoding="utf-8") as f:
        data_json = json.load(f)
    analysis = analyze_metrics(data_json)
    output_list = [r.__dict__ for r in analysis]
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(output_list, f, indent=2)
    logger.info(f"Analysis results written to {output_path}")