From 61ab04828296a1fffda324cc852a599c72bcf6bc Mon Sep 17 00:00:00 2001 From: Mika Date: Sat, 24 Jan 2026 12:03:28 +0000 Subject: [PATCH] Add metrics_analysis_script/src/metrics_analysis_script/main.py --- .../src/metrics_analysis_script/main.py | 107 ++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 metrics_analysis_script/src/metrics_analysis_script/main.py diff --git a/metrics_analysis_script/src/metrics_analysis_script/main.py b/metrics_analysis_script/src/metrics_analysis_script/main.py new file mode 100644 index 0000000..511cb2c --- /dev/null +++ b/metrics_analysis_script/src/metrics_analysis_script/main.py @@ -0,0 +1,107 @@ +from __future__ import annotations +import logging +from typing import List, Dict, Any +from dataclasses import dataclass +import numpy as np +import pandas as pd +from scipy.stats import bootstrap + +logger = logging.getLogger(__name__) + + +@dataclass +class MetricResultsInput: + metric_name: str + values: List[float] + pinned_flag: bool + + def validate(self) -> None: + assert isinstance(self.metric_name, str) and self.metric_name != "", "metric_name must be non-empty string" + assert isinstance(self.values, list) and all(isinstance(v, (int, float)) for v in self.values), "values must be list of numbers" + assert isinstance(self.pinned_flag, bool), "pinned_flag must be bool" + if len(self.values) == 0: + raise ValueError(f"Empty values for metric {self.metric_name}") + + +@dataclass +class MetricResults: + metric_name: str + p50: float + p95: float + max: float + retry_free_rate: float + bootstrap_ci: List[float] + + +def _bootstrap_confidence_interval(data: np.ndarray, n_resamples: int = 1000, seed: int = 42) -> List[float]: + rng = np.random.default_rng(seed) + try: + res = bootstrap((data,), np.mean, n_resamples=n_resamples, confidence_level=0.95, random_state=rng, method="basic") + return [float(res.confidence_interval.low), float(res.confidence_interval.high)] + except Exception as e: + logger.warning(f"Bootstrap failed: {e}") + return [float(np.mean(data)), float(np.mean(data))] + + +def analyze_metrics(data: List[Dict[str, Any]]) -> List[MetricResults]: + """Analysiert Metriken und erzeugt statistische Kennzahlen mit Bootstrap-Intervallen.""" + if not isinstance(data, list): + raise TypeError("data must be a list of dictionaries") + + results: List[MetricResults] = [] + + for entry in data: + entry_obj = MetricResultsInput(**entry) + entry_obj.validate() + + values_array = np.array(entry_obj.values, dtype=float) + + p50 = float(np.percentile(values_array, 50)) + p95 = float(np.percentile(values_array, 95)) + max_val = float(np.max(values_array)) + + # retry_free_rate: Schätzwert für fehlerfreie Runs, hier: Anteil Werte unter 1.05*Median + threshold = 1.05 * p50 + retry_free_rate = float(np.mean(values_array <= threshold)) + + ci = _bootstrap_confidence_interval(values_array) + + result = MetricResults( + metric_name=entry_obj.metric_name, + p50=p50, + p95=p95, + max=max_val, + retry_free_rate=retry_free_rate, + bootstrap_ci=ci, + ) + logger.debug(f"Calculated MetricResults for {entry_obj.metric_name}: {result}") + results.append(result) + + assert all(isinstance(r, MetricResults) for r in results), "Results must all be MetricResults" + + return results + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + import argparse + import json + + parser = argparse.ArgumentParser(description="Analyze frozen runs metrics with bootstrap confidence intervals.") + parser.add_argument("--input", required=True, help="Path to JSON input file with metrics data") + parser.add_argument("--output", required=True, help="Path to output JSON file with analysis results") + args = parser.parse_args() + + input_path = args.input + output_path = args.output + + with open(input_path, "r", encoding="utf-8") as f: + data_json = json.load(f) + + analysis = analyze_metrics(data_json) + output_list = [r.__dict__ for r in analysis] + + with open(output_path, "w", encoding="utf-8") as f: + json.dump(output_list, f, indent=2) + + logger.info(f"Analysis results written to {output_path}")