Add data_analysis_script/src/data_analysis_script/core.py
This commit is contained in:
commit
61cfd4fdd7
1 changed files with 64 additions and 0 deletions
64
data_analysis_script/src/data_analysis_script/core.py
Normal file
64
data_analysis_script/src/data_analysis_script/core.py
Normal file
|
|
@ -0,0 +1,64 @@
|
||||||
|
from typing import List, Dict, Any
|
||||||
|
import numpy as np
|
||||||
|
from scipy import stats
|
||||||
|
|
||||||
|
|
||||||
|
def analyze_data(data: List[Dict[str, Any]]) -> Dict[str, float]:
|
||||||
|
"""Analysiert Messdaten und berechnet Median-Amplitude, Ausreißerrate und statistische Signifikanz.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data: Liste von Messdatensätzen mit Schlüsseln 'condition_name' und 'amplitude'.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary mit Feldern 'median_amplitude', 'outlier_rate', 'statistical_significance'.
|
||||||
|
"""
|
||||||
|
if not data:
|
||||||
|
raise ValueError("data darf nicht leer sein")
|
||||||
|
|
||||||
|
amplitudes = []
|
||||||
|
conditions = {}
|
||||||
|
|
||||||
|
for record in data:
|
||||||
|
if not all(k in record for k in ("condition_name", "amplitude")):
|
||||||
|
raise ValueError("Jeder Datensatz muss 'condition_name' und 'amplitude' enthalten")
|
||||||
|
try:
|
||||||
|
amp = float(record["amplitude"])
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
raise ValueError("Amplitude muss numerisch sein")
|
||||||
|
amplitudes.append(amp)
|
||||||
|
cond = record["condition_name"]
|
||||||
|
conditions.setdefault(cond, []).append(amp)
|
||||||
|
|
||||||
|
arr = np.array(amplitudes)
|
||||||
|
median_amplitude = float(np.median(arr))
|
||||||
|
|
||||||
|
q1, q3 = np.percentile(arr, [25, 75])
|
||||||
|
iqr = q3 - q1
|
||||||
|
lower_bound = q1 - 1.5 * iqr
|
||||||
|
upper_bound = q3 + 1.5 * iqr
|
||||||
|
outliers = ((arr < lower_bound) | (arr > upper_bound)).sum()
|
||||||
|
outlier_rate = float(outliers) / len(arr) if len(arr) > 0 else 0.0
|
||||||
|
|
||||||
|
p_value = 1.0
|
||||||
|
if len(conditions) == 2:
|
||||||
|
groups = list(conditions.values())
|
||||||
|
if all(len(g) > 0 for g in groups):
|
||||||
|
try:
|
||||||
|
_, p_value = stats.mannwhitneyu(groups[0], groups[1], alternative='two-sided')
|
||||||
|
except Exception:
|
||||||
|
p_value = 1.0
|
||||||
|
elif len(conditions) > 2:
|
||||||
|
groups = [g for g in conditions.values() if len(g) > 0]
|
||||||
|
if len(groups) > 1:
|
||||||
|
try:
|
||||||
|
_, p_value = stats.kruskal(*groups)
|
||||||
|
except Exception:
|
||||||
|
p_value = 1.0
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"median_amplitude": median_amplitude,
|
||||||
|
"outlier_rate": outlier_rate,
|
||||||
|
"statistical_significance": float(p_value)
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
Loading…
Reference in a new issue