From 493679d565ea88bac25c7f050702f6839eb840ad Mon Sep 17 00:00:00 2001 From: Mika Date: Mon, 30 Mar 2026 16:33:37 +0000 Subject: [PATCH] Add data_analysis/src/data_analysis/core.py --- data_analysis/src/data_analysis/core.py | 60 +++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 data_analysis/src/data_analysis/core.py diff --git a/data_analysis/src/data_analysis/core.py b/data_analysis/src/data_analysis/core.py new file mode 100644 index 0000000..47da5f2 --- /dev/null +++ b/data_analysis/src/data_analysis/core.py @@ -0,0 +1,60 @@ +from typing import List, Dict, Any +import pandas as pd +import statistics +import logging + + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +def _validate_input(run_data_list: List[Dict[str, Any]]) -> None: + if not isinstance(run_data_list, list): + raise TypeError("run_data_list must be a list of dictionaries") + for idx, item in enumerate(run_data_list): + if not isinstance(item, dict): + raise TypeError(f"Item at index {idx} must be a dict") + required_fields = {"epoch_ms", "run_id", "retry_tail_p99", "band_width"} + if not required_fields.issubset(item.keys()): + missing = required_fields - item.keys() + raise ValueError(f"Missing required fields: {missing}") + if not isinstance(item["epoch_ms"], int): + raise TypeError("epoch_ms must be int") + if not isinstance(item["run_id"], str): + raise TypeError("run_id must be str") + if not isinstance(item["retry_tail_p99"], (int, float)): + raise TypeError("retry_tail_p99 must be numeric") + if not isinstance(item["band_width"], (int, float)): + raise TypeError("band_width must be numeric") + + +def analyze_data(run_data_list: List[Dict[str, Any]]) -> Dict[str, Any]: + """Analysiert eine Liste von Laufdaten und berechnet Median, IQR und Stabilitätsindikator.""" + _validate_input(run_data_list) + + if not run_data_list: + raise ValueError("run_data_list darf nicht leer sein") + + df = pd.DataFrame(run_data_list) + + try: + value_series = df["retry_tail_p99"].astype(float) + median_val = float(value_series.median()) + q75 = value_series.quantile(0.75) + q25 = value_series.quantile(0.25) + iqr_val = float(q75 - q25) + except Exception as e: + logger.error("Fehler bei der Berechnung: %s", e) + raise + + # Einfache Stabilitätsregel: relative IQR < 10% vom Median + stability = bool((median_val != 0) and (iqr_val / abs(median_val) < 0.1)) + + result = { + "median": median_val, + "iqr": iqr_val, + "stability": stability, + } + + assert all(k in result for k in ("median", "iqr", "stability")), "Output fields missing" + return result