Add data_analysis/src/data_analysis/core.py
This commit is contained in:
parent
ee50f090a3
commit
493679d565
1 changed files with 60 additions and 0 deletions
60
data_analysis/src/data_analysis/core.py
Normal file
60
data_analysis/src/data_analysis/core.py
Normal file
|
|
@ -0,0 +1,60 @@
|
||||||
|
from typing import List, Dict, Any
|
||||||
|
import pandas as pd
|
||||||
|
import statistics
|
||||||
|
import logging
|
||||||
|
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_input(run_data_list: List[Dict[str, Any]]) -> None:
|
||||||
|
if not isinstance(run_data_list, list):
|
||||||
|
raise TypeError("run_data_list must be a list of dictionaries")
|
||||||
|
for idx, item in enumerate(run_data_list):
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
raise TypeError(f"Item at index {idx} must be a dict")
|
||||||
|
required_fields = {"epoch_ms", "run_id", "retry_tail_p99", "band_width"}
|
||||||
|
if not required_fields.issubset(item.keys()):
|
||||||
|
missing = required_fields - item.keys()
|
||||||
|
raise ValueError(f"Missing required fields: {missing}")
|
||||||
|
if not isinstance(item["epoch_ms"], int):
|
||||||
|
raise TypeError("epoch_ms must be int")
|
||||||
|
if not isinstance(item["run_id"], str):
|
||||||
|
raise TypeError("run_id must be str")
|
||||||
|
if not isinstance(item["retry_tail_p99"], (int, float)):
|
||||||
|
raise TypeError("retry_tail_p99 must be numeric")
|
||||||
|
if not isinstance(item["band_width"], (int, float)):
|
||||||
|
raise TypeError("band_width must be numeric")
|
||||||
|
|
||||||
|
|
||||||
|
def analyze_data(run_data_list: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||||
|
"""Analysiert eine Liste von Laufdaten und berechnet Median, IQR und Stabilitätsindikator."""
|
||||||
|
_validate_input(run_data_list)
|
||||||
|
|
||||||
|
if not run_data_list:
|
||||||
|
raise ValueError("run_data_list darf nicht leer sein")
|
||||||
|
|
||||||
|
df = pd.DataFrame(run_data_list)
|
||||||
|
|
||||||
|
try:
|
||||||
|
value_series = df["retry_tail_p99"].astype(float)
|
||||||
|
median_val = float(value_series.median())
|
||||||
|
q75 = value_series.quantile(0.75)
|
||||||
|
q25 = value_series.quantile(0.25)
|
||||||
|
iqr_val = float(q75 - q25)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Fehler bei der Berechnung: %s", e)
|
||||||
|
raise
|
||||||
|
|
||||||
|
# Einfache Stabilitätsregel: relative IQR < 10% vom Median
|
||||||
|
stability = bool((median_val != 0) and (iqr_val / abs(median_val) < 0.1))
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"median": median_val,
|
||||||
|
"iqr": iqr_val,
|
||||||
|
"stability": stability,
|
||||||
|
}
|
||||||
|
|
||||||
|
assert all(k in result for k in ("median", "iqr", "stability")), "Output fields missing"
|
||||||
|
return result
|
||||||
Loading…
Reference in a new issue