Add data_analysis/src/data_analysis/core.py
This commit is contained in:
parent
9b5946b139
commit
b8c0720291
1 changed files with 105 additions and 0 deletions
105
data_analysis/src/data_analysis/core.py
Normal file
105
data_analysis/src/data_analysis/core.py
Normal file
|
|
@ -0,0 +1,105 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
import pandas as pd
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
|
||||||
|
# Logging setup for CI readiness
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
if not logger.handlers:
|
||||||
|
handler = logging.StreamHandler()
|
||||||
|
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
|
||||||
|
handler.setFormatter(formatter)
|
||||||
|
logger.addHandler(handler)
|
||||||
|
logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MeasurementRecord:
|
||||||
|
timestamp: datetime
|
||||||
|
temperature: float
|
||||||
|
humidity: float
|
||||||
|
lux: float
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def validate_row(row: pd.Series) -> bool:
|
||||||
|
try:
|
||||||
|
datetime.fromisoformat(str(row['timestamp']))
|
||||||
|
float(row['temperature'])
|
||||||
|
float(row['humidity'])
|
||||||
|
float(row['lux'])
|
||||||
|
except (ValueError, TypeError, KeyError):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def load_data(filename: str) -> pd.DataFrame:
|
||||||
|
"""Lädt und bereinigt CSV-Daten mit Zeit, Temperatur, Feuchtigkeit, Licht."""
|
||||||
|
assert isinstance(filename, str), "filename muss ein String sein"
|
||||||
|
|
||||||
|
path = Path(filename)
|
||||||
|
if not path.exists():
|
||||||
|
raise FileNotFoundError(f"Datei nicht gefunden: {filename}")
|
||||||
|
|
||||||
|
logger.info("Lade CSV-Daten aus %s", filename)
|
||||||
|
df = pd.read_csv(path)
|
||||||
|
|
||||||
|
required_columns = {'timestamp', 'temperature', 'humidity', 'lux'}
|
||||||
|
if not required_columns.issubset(df.columns):
|
||||||
|
raise ValueError(f"CSV muss Spalten {required_columns} enthalten")
|
||||||
|
|
||||||
|
# Typkonvertierung und Validierung
|
||||||
|
df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
|
||||||
|
df['temperature'] = pd.to_numeric(df['temperature'], errors='coerce')
|
||||||
|
df['humidity'] = pd.to_numeric(df['humidity'], errors='coerce')
|
||||||
|
df['lux'] = pd.to_numeric(df['lux'], errors='coerce')
|
||||||
|
|
||||||
|
# Drop unvollständige Zeilen
|
||||||
|
before_drop = len(df)
|
||||||
|
df.dropna(subset=['timestamp', 'temperature', 'humidity', 'lux'], inplace=True)
|
||||||
|
after_drop = len(df)
|
||||||
|
|
||||||
|
logger.info("Bereinigte Daten: %d Zeilen entfernt (%d -> %d)", before_drop - after_drop, before_drop, after_drop)
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def plot_data(data: pd.DataFrame, output_path: Optional[str] = None) -> None:
|
||||||
|
"""Erstellt Plots für Temperatur, Luftfeuchtigkeit und Lichtintensität."""
|
||||||
|
if not isinstance(data, pd.DataFrame):
|
||||||
|
raise TypeError("data muss ein pandas.DataFrame sein")
|
||||||
|
|
||||||
|
required_columns = {'timestamp', 'temperature', 'humidity', 'lux'}
|
||||||
|
if not required_columns.issubset(data.columns):
|
||||||
|
raise ValueError(f"DataFrame muss Spalten {required_columns} enthalten")
|
||||||
|
|
||||||
|
if data.empty:
|
||||||
|
raise ValueError("DataFrame ist leer, kein Plot möglich")
|
||||||
|
|
||||||
|
fig, ax1 = plt.subplots(figsize=(10, 6))
|
||||||
|
ax1.plot(data['timestamp'], data['temperature'], color='tab:red', label='Temperatur (°C)')
|
||||||
|
ax1.set_xlabel('Zeit')
|
||||||
|
ax1.set_ylabel('Temperatur (°C)', color='tab:red')
|
||||||
|
ax1.tick_params(axis='y', labelcolor='tab:red')
|
||||||
|
|
||||||
|
ax2 = ax1.twinx()
|
||||||
|
ax2.plot(data['timestamp'], data['lux'], color='tab:blue', alpha=0.6, label='Licht (Lux)')
|
||||||
|
ax2.set_ylabel('Licht (Lux)', color='tab:blue')
|
||||||
|
ax2.tick_params(axis='y', labelcolor='tab:blue')
|
||||||
|
|
||||||
|
plt.title('Zeitverlauf: Temperatur und Licht')
|
||||||
|
fig.autofmt_xdate()
|
||||||
|
fig.tight_layout()
|
||||||
|
|
||||||
|
if output_path:
|
||||||
|
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
plt.savefig(output_path, dpi=150)
|
||||||
|
logger.info("Plot gespeichert in %s", output_path)
|
||||||
|
else:
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
plt.close(fig)
|
||||||
Loading…
Reference in a new issue