Add data_analysis/src/data_analysis/core.py
This commit is contained in:
parent
ee50f090a3
commit
493679d565
1 changed files with 60 additions and 0 deletions
60
data_analysis/src/data_analysis/core.py
Normal file
60
data_analysis/src/data_analysis/core.py
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
from typing import List, Dict, Any
|
||||
import pandas as pd
|
||||
import statistics
|
||||
import logging
|
||||
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _validate_input(run_data_list: List[Dict[str, Any]]) -> None:
|
||||
if not isinstance(run_data_list, list):
|
||||
raise TypeError("run_data_list must be a list of dictionaries")
|
||||
for idx, item in enumerate(run_data_list):
|
||||
if not isinstance(item, dict):
|
||||
raise TypeError(f"Item at index {idx} must be a dict")
|
||||
required_fields = {"epoch_ms", "run_id", "retry_tail_p99", "band_width"}
|
||||
if not required_fields.issubset(item.keys()):
|
||||
missing = required_fields - item.keys()
|
||||
raise ValueError(f"Missing required fields: {missing}")
|
||||
if not isinstance(item["epoch_ms"], int):
|
||||
raise TypeError("epoch_ms must be int")
|
||||
if not isinstance(item["run_id"], str):
|
||||
raise TypeError("run_id must be str")
|
||||
if not isinstance(item["retry_tail_p99"], (int, float)):
|
||||
raise TypeError("retry_tail_p99 must be numeric")
|
||||
if not isinstance(item["band_width"], (int, float)):
|
||||
raise TypeError("band_width must be numeric")
|
||||
|
||||
|
||||
def analyze_data(run_data_list: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""Analysiert eine Liste von Laufdaten und berechnet Median, IQR und Stabilitätsindikator."""
|
||||
_validate_input(run_data_list)
|
||||
|
||||
if not run_data_list:
|
||||
raise ValueError("run_data_list darf nicht leer sein")
|
||||
|
||||
df = pd.DataFrame(run_data_list)
|
||||
|
||||
try:
|
||||
value_series = df["retry_tail_p99"].astype(float)
|
||||
median_val = float(value_series.median())
|
||||
q75 = value_series.quantile(0.75)
|
||||
q25 = value_series.quantile(0.25)
|
||||
iqr_val = float(q75 - q25)
|
||||
except Exception as e:
|
||||
logger.error("Fehler bei der Berechnung: %s", e)
|
||||
raise
|
||||
|
||||
# Einfache Stabilitätsregel: relative IQR < 10% vom Median
|
||||
stability = bool((median_val != 0) and (iqr_val / abs(median_val) < 0.1))
|
||||
|
||||
result = {
|
||||
"median": median_val,
|
||||
"iqr": iqr_val,
|
||||
"stability": stability,
|
||||
}
|
||||
|
||||
assert all(k in result for k in ("median", "iqr", "stability")), "Output fields missing"
|
||||
return result
|
||||
Loading…
Reference in a new issue