Add data_analysis_script/src/data_analysis_script/core.py
This commit is contained in:
commit
61cfd4fdd7
1 changed files with 64 additions and 0 deletions
64
data_analysis_script/src/data_analysis_script/core.py
Normal file
64
data_analysis_script/src/data_analysis_script/core.py
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
from typing import List, Dict, Any
|
||||
import numpy as np
|
||||
from scipy import stats
|
||||
|
||||
|
||||
def analyze_data(data: List[Dict[str, Any]]) -> Dict[str, float]:
|
||||
"""Analysiert Messdaten und berechnet Median-Amplitude, Ausreißerrate und statistische Signifikanz.
|
||||
|
||||
Args:
|
||||
data: Liste von Messdatensätzen mit Schlüsseln 'condition_name' und 'amplitude'.
|
||||
|
||||
Returns:
|
||||
Dictionary mit Feldern 'median_amplitude', 'outlier_rate', 'statistical_significance'.
|
||||
"""
|
||||
if not data:
|
||||
raise ValueError("data darf nicht leer sein")
|
||||
|
||||
amplitudes = []
|
||||
conditions = {}
|
||||
|
||||
for record in data:
|
||||
if not all(k in record for k in ("condition_name", "amplitude")):
|
||||
raise ValueError("Jeder Datensatz muss 'condition_name' und 'amplitude' enthalten")
|
||||
try:
|
||||
amp = float(record["amplitude"])
|
||||
except (TypeError, ValueError):
|
||||
raise ValueError("Amplitude muss numerisch sein")
|
||||
amplitudes.append(amp)
|
||||
cond = record["condition_name"]
|
||||
conditions.setdefault(cond, []).append(amp)
|
||||
|
||||
arr = np.array(amplitudes)
|
||||
median_amplitude = float(np.median(arr))
|
||||
|
||||
q1, q3 = np.percentile(arr, [25, 75])
|
||||
iqr = q3 - q1
|
||||
lower_bound = q1 - 1.5 * iqr
|
||||
upper_bound = q3 + 1.5 * iqr
|
||||
outliers = ((arr < lower_bound) | (arr > upper_bound)).sum()
|
||||
outlier_rate = float(outliers) / len(arr) if len(arr) > 0 else 0.0
|
||||
|
||||
p_value = 1.0
|
||||
if len(conditions) == 2:
|
||||
groups = list(conditions.values())
|
||||
if all(len(g) > 0 for g in groups):
|
||||
try:
|
||||
_, p_value = stats.mannwhitneyu(groups[0], groups[1], alternative='two-sided')
|
||||
except Exception:
|
||||
p_value = 1.0
|
||||
elif len(conditions) > 2:
|
||||
groups = [g for g in conditions.values() if len(g) > 0]
|
||||
if len(groups) > 1:
|
||||
try:
|
||||
_, p_value = stats.kruskal(*groups)
|
||||
except Exception:
|
||||
p_value = 1.0
|
||||
|
||||
result = {
|
||||
"median_amplitude": median_amplitude,
|
||||
"outlier_rate": outlier_rate,
|
||||
"statistical_significance": float(p_value)
|
||||
}
|
||||
|
||||
return result
|
||||
Loading…
Reference in a new issue