Add data_analysis/src/data_analysis/core.py

This commit is contained in:
Mika 2026-07-05 02:07:35 +00:00
parent 9b5946b139
commit b8c0720291

View file

@ -0,0 +1,105 @@
from __future__ import annotations
import logging
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Optional
import pandas as pd
import matplotlib.pyplot as plt
# Logging setup for CI readiness
logger = logging.getLogger(__name__)
if not logger.handlers:
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)
@dataclass
class MeasurementRecord:
timestamp: datetime
temperature: float
humidity: float
lux: float
@staticmethod
def validate_row(row: pd.Series) -> bool:
try:
datetime.fromisoformat(str(row['timestamp']))
float(row['temperature'])
float(row['humidity'])
float(row['lux'])
except (ValueError, TypeError, KeyError):
return False
return True
def load_data(filename: str) -> pd.DataFrame:
"""Lädt und bereinigt CSV-Daten mit Zeit, Temperatur, Feuchtigkeit, Licht."""
assert isinstance(filename, str), "filename muss ein String sein"
path = Path(filename)
if not path.exists():
raise FileNotFoundError(f"Datei nicht gefunden: {filename}")
logger.info("Lade CSV-Daten aus %s", filename)
df = pd.read_csv(path)
required_columns = {'timestamp', 'temperature', 'humidity', 'lux'}
if not required_columns.issubset(df.columns):
raise ValueError(f"CSV muss Spalten {required_columns} enthalten")
# Typkonvertierung und Validierung
df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
df['temperature'] = pd.to_numeric(df['temperature'], errors='coerce')
df['humidity'] = pd.to_numeric(df['humidity'], errors='coerce')
df['lux'] = pd.to_numeric(df['lux'], errors='coerce')
# Drop unvollständige Zeilen
before_drop = len(df)
df.dropna(subset=['timestamp', 'temperature', 'humidity', 'lux'], inplace=True)
after_drop = len(df)
logger.info("Bereinigte Daten: %d Zeilen entfernt (%d -> %d)", before_drop - after_drop, before_drop, after_drop)
return df
def plot_data(data: pd.DataFrame, output_path: Optional[str] = None) -> None:
"""Erstellt Plots für Temperatur, Luftfeuchtigkeit und Lichtintensität."""
if not isinstance(data, pd.DataFrame):
raise TypeError("data muss ein pandas.DataFrame sein")
required_columns = {'timestamp', 'temperature', 'humidity', 'lux'}
if not required_columns.issubset(data.columns):
raise ValueError(f"DataFrame muss Spalten {required_columns} enthalten")
if data.empty:
raise ValueError("DataFrame ist leer, kein Plot möglich")
fig, ax1 = plt.subplots(figsize=(10, 6))
ax1.plot(data['timestamp'], data['temperature'], color='tab:red', label='Temperatur (°C)')
ax1.set_xlabel('Zeit')
ax1.set_ylabel('Temperatur (°C)', color='tab:red')
ax1.tick_params(axis='y', labelcolor='tab:red')
ax2 = ax1.twinx()
ax2.plot(data['timestamp'], data['lux'], color='tab:blue', alpha=0.6, label='Licht (Lux)')
ax2.set_ylabel('Licht (Lux)', color='tab:blue')
ax2.tick_params(axis='y', labelcolor='tab:blue')
plt.title('Zeitverlauf: Temperatur und Licht')
fig.autofmt_xdate()
fig.tight_layout()
if output_path:
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
plt.savefig(output_path, dpi=150)
logger.info("Plot gespeichert in %s", output_path)
else:
plt.show()
plt.close(fig)