diff --git a/trace_agg/src/trace_agg/core.py b/trace_agg/src/trace_agg/core.py new file mode 100644 index 0000000..9479734 --- /dev/null +++ b/trace_agg/src/trace_agg/core.py @@ -0,0 +1,57 @@ +import pandas as pd +import numpy as np +import os +from pathlib import Path +from statistics import median + + +def aggregate_em_data(num_trials: int) -> dict: + """Aggregiert EM-Daten aus CSV-Dateien und berechnet Summary-Kennzahlen. + + Args: + num_trials (int): Anzahl der zu verarbeitenden Trials/Samples. + + Returns: + dict: Dictionary im Schema von summary_statistics mit + peak_amplitude, median_bandpower, crosscorr_with_clockevents. + """ + data_dir = Path('data') + csv_files = sorted(data_dir.glob('em_traces_*.csv')) + if not csv_files: + raise FileNotFoundError("Keine Eingabedateien gefunden unter data/em_traces_*.csv") + + all_data = [] + for i, csv_path in enumerate(csv_files): + if num_trials and i >= num_trials: + break + df = pd.read_csv(csv_path) + # Validierung der erwarteten Spalten + for col in ('timestamp', 'amplitude', 'bandpower'): + if col not in df.columns: + raise ValueError(f"Fehlende Spalte '{col}' in {csv_path.name}") + all_data.append(df) + + if not all_data: + raise ValueError("Keine gültigen EM-Daten gefunden.") + + combined = pd.concat(all_data, ignore_index=True) + + # Berechnungen + peak_amplitude = float(combined['amplitude'].max()) if not combined.empty else 0.0 + median_bandpower = float(combined['bandpower'].median()) if not combined.empty else 0.0 + + # Vereinfachte Crosscorrelation-Metrik (Amplitude vs. Bandpower) + if len(combined) > 1: + crosscorr_value = float(np.corrcoef(combined['amplitude'], combined['bandpower'])[0, 1]) + if np.isnan(crosscorr_value): + crosscorr_value = 0.0 + else: + crosscorr_value = 0.0 + + summary = { + "peak_amplitude": peak_amplitude, + "median_bandpower": median_bandpower, + "crosscorr_with_clockevents": crosscorr_value, + } + + return summary