commit 61cfd4fdd77af8db792242dca75e7e77af05fd7a Author: Mika Date: Mon Dec 15 12:57:23 2025 +0000 Add data_analysis_script/src/data_analysis_script/core.py diff --git a/data_analysis_script/src/data_analysis_script/core.py b/data_analysis_script/src/data_analysis_script/core.py new file mode 100644 index 0000000..c78bbcb --- /dev/null +++ b/data_analysis_script/src/data_analysis_script/core.py @@ -0,0 +1,64 @@ +from typing import List, Dict, Any +import numpy as np +from scipy import stats + + +def analyze_data(data: List[Dict[str, Any]]) -> Dict[str, float]: + """Analysiert Messdaten und berechnet Median-Amplitude, Ausreißerrate und statistische Signifikanz. + + Args: + data: Liste von Messdatensätzen mit Schlüsseln 'condition_name' und 'amplitude'. + + Returns: + Dictionary mit Feldern 'median_amplitude', 'outlier_rate', 'statistical_significance'. + """ + if not data: + raise ValueError("data darf nicht leer sein") + + amplitudes = [] + conditions = {} + + for record in data: + if not all(k in record for k in ("condition_name", "amplitude")): + raise ValueError("Jeder Datensatz muss 'condition_name' und 'amplitude' enthalten") + try: + amp = float(record["amplitude"]) + except (TypeError, ValueError): + raise ValueError("Amplitude muss numerisch sein") + amplitudes.append(amp) + cond = record["condition_name"] + conditions.setdefault(cond, []).append(amp) + + arr = np.array(amplitudes) + median_amplitude = float(np.median(arr)) + + q1, q3 = np.percentile(arr, [25, 75]) + iqr = q3 - q1 + lower_bound = q1 - 1.5 * iqr + upper_bound = q3 + 1.5 * iqr + outliers = ((arr < lower_bound) | (arr > upper_bound)).sum() + outlier_rate = float(outliers) / len(arr) if len(arr) > 0 else 0.0 + + p_value = 1.0 + if len(conditions) == 2: + groups = list(conditions.values()) + if all(len(g) > 0 for g in groups): + try: + _, p_value = stats.mannwhitneyu(groups[0], groups[1], alternative='two-sided') + except Exception: + p_value = 1.0 + elif len(conditions) > 2: + groups = [g for g in conditions.values() if len(g) > 0] + if len(groups) > 1: + try: + _, p_value = stats.kruskal(*groups) + except Exception: + p_value = 1.0 + + result = { + "median_amplitude": median_amplitude, + "outlier_rate": outlier_rate, + "statistical_significance": float(p_value) + } + + return result