diff --git a/run_data_analysis/src/run_data_analysis/core.py b/run_data_analysis/src/run_data_analysis/core.py new file mode 100644 index 0000000..d82a702 --- /dev/null +++ b/run_data_analysis/src/run_data_analysis/core.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +import json +from dataclasses import dataclass, asdict +from typing import Dict, Any +import pandas as pd +import statistics + + +@dataclass +class RunData: + """Repräsentiert Messdaten eines einzelnen Runs zur Bandanalyse.""" + + band_center: float + band_width: float + cluster_score: float + retry_tail: float + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> RunData: + required_fields = {"band_center", "band_width", "cluster_score", "retry_tail"} + missing = required_fields - data.keys() + if missing: + raise ValueError(f"Missing fields in RunData: {missing}") + + try: + return cls( + band_center=float(data["band_center"]), + band_width=float(data["band_width"]), + cluster_score=float(data["cluster_score"]), + retry_tail=float(data["retry_tail"]), + ) + except (TypeError, ValueError) as e: + raise ValueError(f"Invalid data types in RunData: {e}") from e + + +def analyse_run_data(run_data: RunData) -> Dict[str, Any]: + """Analysiert Run-Daten, um statistische Kennzahlen zum Resonanzband zu berechnen.""" + + assert isinstance(run_data, RunData), "Input must be a RunData instance" + + # Form DataFrame for flexibility and possible extension to multiple runs + data = pd.DataFrame([asdict(run_data)]) + + # Calculate basic stats using pandas and statistics for robustness + result = { + "band_center_mean": data["band_center"].mean(), + "band_width_mean": data["band_width"].mean(), + "cluster_score_mean": data["cluster_score"].mean(), + "retry_tail_mean": data["retry_tail"].mean(), + "band_center_std": float(statistics.pstdev(data["band_center"])) if len(data) > 1 else 0.0, + "band_width_std": float(statistics.pstdev(data["band_width"])) if len(data) > 1 else 0.0, + "stability_index": float(run_data.cluster_score / max(run_data.band_width, 1e-9)), + "retry_tail_ratio": float(run_data.retry_tail / max(run_data.cluster_score, 1e-9)), + } + + # Validation for CI readiness + for key, value in result.items(): + assert isinstance(value, float), f"Value for {key} must be float" + + return result