Add data_analysis/src/data_analysis/core.py

This commit is contained in:
Mika 2026-03-30 16:33:37 +00:00
parent ee50f090a3
commit 493679d565

View file

@ -0,0 +1,60 @@
from typing import List, Dict, Any
import pandas as pd
import statistics
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def _validate_input(run_data_list: List[Dict[str, Any]]) -> None:
if not isinstance(run_data_list, list):
raise TypeError("run_data_list must be a list of dictionaries")
for idx, item in enumerate(run_data_list):
if not isinstance(item, dict):
raise TypeError(f"Item at index {idx} must be a dict")
required_fields = {"epoch_ms", "run_id", "retry_tail_p99", "band_width"}
if not required_fields.issubset(item.keys()):
missing = required_fields - item.keys()
raise ValueError(f"Missing required fields: {missing}")
if not isinstance(item["epoch_ms"], int):
raise TypeError("epoch_ms must be int")
if not isinstance(item["run_id"], str):
raise TypeError("run_id must be str")
if not isinstance(item["retry_tail_p99"], (int, float)):
raise TypeError("retry_tail_p99 must be numeric")
if not isinstance(item["band_width"], (int, float)):
raise TypeError("band_width must be numeric")
def analyze_data(run_data_list: List[Dict[str, Any]]) -> Dict[str, Any]:
"""Analysiert eine Liste von Laufdaten und berechnet Median, IQR und Stabilitätsindikator."""
_validate_input(run_data_list)
if not run_data_list:
raise ValueError("run_data_list darf nicht leer sein")
df = pd.DataFrame(run_data_list)
try:
value_series = df["retry_tail_p99"].astype(float)
median_val = float(value_series.median())
q75 = value_series.quantile(0.75)
q25 = value_series.quantile(0.25)
iqr_val = float(q75 - q25)
except Exception as e:
logger.error("Fehler bei der Berechnung: %s", e)
raise
# Einfache Stabilitätsregel: relative IQR < 10% vom Median
stability = bool((median_val != 0) and (iqr_val / abs(median_val) < 0.1))
result = {
"median": median_val,
"iqr": iqr_val,
"stability": stability,
}
assert all(k in result for k in ("median", "iqr", "stability")), "Output fields missing"
return result