From 02866790e1c26e833349351f1f998ea1f10c3aa1 Mon Sep 17 00:00:00 2001 From: Mika Date: Sun, 5 Apr 2026 02:07:41 +0000 Subject: [PATCH] Add data_analysis_tool/src/data_analysis_tool/core.py --- .../src/data_analysis_tool/core.py | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 data_analysis_tool/src/data_analysis_tool/core.py diff --git a/data_analysis_tool/src/data_analysis_tool/core.py b/data_analysis_tool/src/data_analysis_tool/core.py new file mode 100644 index 0000000..77dd449 --- /dev/null +++ b/data_analysis_tool/src/data_analysis_tool/core.py @@ -0,0 +1,63 @@ +from __future__ import annotations +import json +import pandas as pd +from pathlib import Path +from typing import Any + + +class AnalysisResults: + """Speichert die Ergebnisse der Magnetfeldauswertung.""" + + def __init__(self, mean_Bx: float, mean_By: float, variance_Bx: float, spikes: int) -> None: + self.mean_Bx = float(mean_Bx) + self.mean_By = float(mean_By) + self.variance_Bx = float(variance_Bx) + self.spikes = int(spikes) + + def to_json(self) -> str: + """Konvertiert die Analyseergebnisse in ein JSON-Format.""" + result = { + "mean_Bx": self.mean_Bx, + "mean_By": self.mean_By, + "variance_Bx": self.variance_Bx, + "spikes": self.spikes, + } + return json.dumps(result, indent=2) + + +def analyze_data(data_file: str) -> AnalysisResults: + """Analysiert Magnetometerdaten aus einer CSV-Datei und berechnet Mittelwerte, Varianzen und Anomalien.""" + file_path = Path(data_file) + if not file_path.exists() or not file_path.is_file(): + raise FileNotFoundError(f"Eingabedatei nicht gefunden: {data_file}") + + # Lese CSV-Daten + try: + df = pd.read_csv(file_path) + except Exception as e: + raise ValueError(f"Fehler beim Lesen der CSV-Datei: {e}") + + required_cols = {"Bx", "By", "Bz"} + if not required_cols.issubset(df.columns): + raise ValueError(f"CSV muss die Spalten {required_cols} enthalten.") + + # Input-Validierung: alle numerischen Spalten prüfen + for col in ["Bx", "By", "Bz"]: + if not pd.api.types.is_numeric_dtype(df[col]): + raise ValueError(f"Spalte {col} muss numerisch sein.") + + mean_Bx = df["Bx"].mean() + mean_By = df["By"].mean() + variance_Bx = df["Bx"].var() + + # Einfache Spike-Erkennung: Werte, die mehr als 3 Standardabweichungen vom Mittelwert entfernt sind + std_Bx = df["Bx"].std() + spikes_series = (df["Bx"] - mean_Bx).abs() > 3 * std_Bx if std_Bx > 0 else pd.Series([False] * len(df)) + spikes_count = int(spikes_series.sum()) + + return AnalysisResults( + mean_Bx=mean_Bx, + mean_By=mean_By, + variance_Bx=variance_Bx, + spikes=spikes_count, + ) \ No newline at end of file