Add data_analysis_script/src/data_analysis_script/core.py

This commit is contained in:
Mika 2025-12-15 12:57:23 +00:00
commit 61cfd4fdd7

View file

@ -0,0 +1,64 @@
from typing import List, Dict, Any
import numpy as np
from scipy import stats
def analyze_data(data: List[Dict[str, Any]]) -> Dict[str, float]:
"""Analysiert Messdaten und berechnet Median-Amplitude, Ausreißerrate und statistische Signifikanz.
Args:
data: Liste von Messdatensätzen mit Schlüsseln 'condition_name' und 'amplitude'.
Returns:
Dictionary mit Feldern 'median_amplitude', 'outlier_rate', 'statistical_significance'.
"""
if not data:
raise ValueError("data darf nicht leer sein")
amplitudes = []
conditions = {}
for record in data:
if not all(k in record for k in ("condition_name", "amplitude")):
raise ValueError("Jeder Datensatz muss 'condition_name' und 'amplitude' enthalten")
try:
amp = float(record["amplitude"])
except (TypeError, ValueError):
raise ValueError("Amplitude muss numerisch sein")
amplitudes.append(amp)
cond = record["condition_name"]
conditions.setdefault(cond, []).append(amp)
arr = np.array(amplitudes)
median_amplitude = float(np.median(arr))
q1, q3 = np.percentile(arr, [25, 75])
iqr = q3 - q1
lower_bound = q1 - 1.5 * iqr
upper_bound = q3 + 1.5 * iqr
outliers = ((arr < lower_bound) | (arr > upper_bound)).sum()
outlier_rate = float(outliers) / len(arr) if len(arr) > 0 else 0.0
p_value = 1.0
if len(conditions) == 2:
groups = list(conditions.values())
if all(len(g) > 0 for g in groups):
try:
_, p_value = stats.mannwhitneyu(groups[0], groups[1], alternative='two-sided')
except Exception:
p_value = 1.0
elif len(conditions) > 2:
groups = [g for g in conditions.values() if len(g) > 0]
if len(groups) > 1:
try:
_, p_value = stats.kruskal(*groups)
except Exception:
p_value = 1.0
result = {
"median_amplitude": median_amplitude,
"outlier_rate": outlier_rate,
"statistical_significance": float(p_value)
}
return result