From b8c072029151f09c08142f84434e6e2986017961 Mon Sep 17 00:00:00 2001 From: Mika Date: Sun, 5 Jul 2026 02:07:35 +0000 Subject: [PATCH] Add data_analysis/src/data_analysis/core.py --- data_analysis/src/data_analysis/core.py | 105 ++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 data_analysis/src/data_analysis/core.py diff --git a/data_analysis/src/data_analysis/core.py b/data_analysis/src/data_analysis/core.py new file mode 100644 index 0000000..f59d884 --- /dev/null +++ b/data_analysis/src/data_analysis/core.py @@ -0,0 +1,105 @@ +from __future__ import annotations +import logging +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path +from typing import Optional +import pandas as pd +import matplotlib.pyplot as plt + + +# Logging setup for CI readiness +logger = logging.getLogger(__name__) +if not logger.handlers: + handler = logging.StreamHandler() + formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') + handler.setFormatter(formatter) + logger.addHandler(handler) +logger.setLevel(logging.INFO) + + +@dataclass +class MeasurementRecord: + timestamp: datetime + temperature: float + humidity: float + lux: float + + @staticmethod + def validate_row(row: pd.Series) -> bool: + try: + datetime.fromisoformat(str(row['timestamp'])) + float(row['temperature']) + float(row['humidity']) + float(row['lux']) + except (ValueError, TypeError, KeyError): + return False + return True + + +def load_data(filename: str) -> pd.DataFrame: + """Lädt und bereinigt CSV-Daten mit Zeit, Temperatur, Feuchtigkeit, Licht.""" + assert isinstance(filename, str), "filename muss ein String sein" + + path = Path(filename) + if not path.exists(): + raise FileNotFoundError(f"Datei nicht gefunden: {filename}") + + logger.info("Lade CSV-Daten aus %s", filename) + df = pd.read_csv(path) + + required_columns = {'timestamp', 'temperature', 'humidity', 'lux'} + if not required_columns.issubset(df.columns): + raise ValueError(f"CSV muss Spalten {required_columns} enthalten") + + # Typkonvertierung und Validierung + df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce') + df['temperature'] = pd.to_numeric(df['temperature'], errors='coerce') + df['humidity'] = pd.to_numeric(df['humidity'], errors='coerce') + df['lux'] = pd.to_numeric(df['lux'], errors='coerce') + + # Drop unvollständige Zeilen + before_drop = len(df) + df.dropna(subset=['timestamp', 'temperature', 'humidity', 'lux'], inplace=True) + after_drop = len(df) + + logger.info("Bereinigte Daten: %d Zeilen entfernt (%d -> %d)", before_drop - after_drop, before_drop, after_drop) + + return df + + +def plot_data(data: pd.DataFrame, output_path: Optional[str] = None) -> None: + """Erstellt Plots für Temperatur, Luftfeuchtigkeit und Lichtintensität.""" + if not isinstance(data, pd.DataFrame): + raise TypeError("data muss ein pandas.DataFrame sein") + + required_columns = {'timestamp', 'temperature', 'humidity', 'lux'} + if not required_columns.issubset(data.columns): + raise ValueError(f"DataFrame muss Spalten {required_columns} enthalten") + + if data.empty: + raise ValueError("DataFrame ist leer, kein Plot möglich") + + fig, ax1 = plt.subplots(figsize=(10, 6)) + ax1.plot(data['timestamp'], data['temperature'], color='tab:red', label='Temperatur (°C)') + ax1.set_xlabel('Zeit') + ax1.set_ylabel('Temperatur (°C)', color='tab:red') + ax1.tick_params(axis='y', labelcolor='tab:red') + + ax2 = ax1.twinx() + ax2.plot(data['timestamp'], data['lux'], color='tab:blue', alpha=0.6, label='Licht (Lux)') + ax2.set_ylabel('Licht (Lux)', color='tab:blue') + ax2.tick_params(axis='y', labelcolor='tab:blue') + + plt.title('Zeitverlauf: Temperatur und Licht') + fig.autofmt_xdate() + fig.tight_layout() + + if output_path: + Path(output_path).parent.mkdir(parents=True, exist_ok=True) + plt.savefig(output_path, dpi=150) + logger.info("Plot gespeichert in %s", output_path) + else: + plt.show() + + plt.close(fig)