Add data_analysis_tool/src/data_analysis_tool/core.py
This commit is contained in:
parent
3ad21f3a84
commit
02866790e1
1 changed files with 63 additions and 0 deletions
63
data_analysis_tool/src/data_analysis_tool/core.py
Normal file
63
data_analysis_tool/src/data_analysis_tool/core.py
Normal file
|
|
@ -0,0 +1,63 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
import json
|
||||||
|
import pandas as pd
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
class AnalysisResults:
|
||||||
|
"""Speichert die Ergebnisse der Magnetfeldauswertung."""
|
||||||
|
|
||||||
|
def __init__(self, mean_Bx: float, mean_By: float, variance_Bx: float, spikes: int) -> None:
|
||||||
|
self.mean_Bx = float(mean_Bx)
|
||||||
|
self.mean_By = float(mean_By)
|
||||||
|
self.variance_Bx = float(variance_Bx)
|
||||||
|
self.spikes = int(spikes)
|
||||||
|
|
||||||
|
def to_json(self) -> str:
|
||||||
|
"""Konvertiert die Analyseergebnisse in ein JSON-Format."""
|
||||||
|
result = {
|
||||||
|
"mean_Bx": self.mean_Bx,
|
||||||
|
"mean_By": self.mean_By,
|
||||||
|
"variance_Bx": self.variance_Bx,
|
||||||
|
"spikes": self.spikes,
|
||||||
|
}
|
||||||
|
return json.dumps(result, indent=2)
|
||||||
|
|
||||||
|
|
||||||
|
def analyze_data(data_file: str) -> AnalysisResults:
|
||||||
|
"""Analysiert Magnetometerdaten aus einer CSV-Datei und berechnet Mittelwerte, Varianzen und Anomalien."""
|
||||||
|
file_path = Path(data_file)
|
||||||
|
if not file_path.exists() or not file_path.is_file():
|
||||||
|
raise FileNotFoundError(f"Eingabedatei nicht gefunden: {data_file}")
|
||||||
|
|
||||||
|
# Lese CSV-Daten
|
||||||
|
try:
|
||||||
|
df = pd.read_csv(file_path)
|
||||||
|
except Exception as e:
|
||||||
|
raise ValueError(f"Fehler beim Lesen der CSV-Datei: {e}")
|
||||||
|
|
||||||
|
required_cols = {"Bx", "By", "Bz"}
|
||||||
|
if not required_cols.issubset(df.columns):
|
||||||
|
raise ValueError(f"CSV muss die Spalten {required_cols} enthalten.")
|
||||||
|
|
||||||
|
# Input-Validierung: alle numerischen Spalten prüfen
|
||||||
|
for col in ["Bx", "By", "Bz"]:
|
||||||
|
if not pd.api.types.is_numeric_dtype(df[col]):
|
||||||
|
raise ValueError(f"Spalte {col} muss numerisch sein.")
|
||||||
|
|
||||||
|
mean_Bx = df["Bx"].mean()
|
||||||
|
mean_By = df["By"].mean()
|
||||||
|
variance_Bx = df["Bx"].var()
|
||||||
|
|
||||||
|
# Einfache Spike-Erkennung: Werte, die mehr als 3 Standardabweichungen vom Mittelwert entfernt sind
|
||||||
|
std_Bx = df["Bx"].std()
|
||||||
|
spikes_series = (df["Bx"] - mean_Bx).abs() > 3 * std_Bx if std_Bx > 0 else pd.Series([False] * len(df))
|
||||||
|
spikes_count = int(spikes_series.sum())
|
||||||
|
|
||||||
|
return AnalysisResults(
|
||||||
|
mean_Bx=mean_Bx,
|
||||||
|
mean_By=mean_By,
|
||||||
|
variance_Bx=variance_Bx,
|
||||||
|
spikes=spikes_count,
|
||||||
|
)
|
||||||
Loading…
Reference in a new issue