Add data_analysis/src/data_analysis/core.py
This commit is contained in:
parent
9b5946b139
commit
b8c0720291
1 changed files with 105 additions and 0 deletions
105
data_analysis/src/data_analysis/core.py
Normal file
105
data_analysis/src/data_analysis/core.py
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
from __future__ import annotations
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
# Logging setup for CI readiness
|
||||
logger = logging.getLogger(__name__)
|
||||
if not logger.handlers:
|
||||
handler = logging.StreamHandler()
|
||||
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
|
||||
handler.setFormatter(formatter)
|
||||
logger.addHandler(handler)
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
|
||||
@dataclass
|
||||
class MeasurementRecord:
|
||||
timestamp: datetime
|
||||
temperature: float
|
||||
humidity: float
|
||||
lux: float
|
||||
|
||||
@staticmethod
|
||||
def validate_row(row: pd.Series) -> bool:
|
||||
try:
|
||||
datetime.fromisoformat(str(row['timestamp']))
|
||||
float(row['temperature'])
|
||||
float(row['humidity'])
|
||||
float(row['lux'])
|
||||
except (ValueError, TypeError, KeyError):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def load_data(filename: str) -> pd.DataFrame:
|
||||
"""Lädt und bereinigt CSV-Daten mit Zeit, Temperatur, Feuchtigkeit, Licht."""
|
||||
assert isinstance(filename, str), "filename muss ein String sein"
|
||||
|
||||
path = Path(filename)
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(f"Datei nicht gefunden: {filename}")
|
||||
|
||||
logger.info("Lade CSV-Daten aus %s", filename)
|
||||
df = pd.read_csv(path)
|
||||
|
||||
required_columns = {'timestamp', 'temperature', 'humidity', 'lux'}
|
||||
if not required_columns.issubset(df.columns):
|
||||
raise ValueError(f"CSV muss Spalten {required_columns} enthalten")
|
||||
|
||||
# Typkonvertierung und Validierung
|
||||
df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
|
||||
df['temperature'] = pd.to_numeric(df['temperature'], errors='coerce')
|
||||
df['humidity'] = pd.to_numeric(df['humidity'], errors='coerce')
|
||||
df['lux'] = pd.to_numeric(df['lux'], errors='coerce')
|
||||
|
||||
# Drop unvollständige Zeilen
|
||||
before_drop = len(df)
|
||||
df.dropna(subset=['timestamp', 'temperature', 'humidity', 'lux'], inplace=True)
|
||||
after_drop = len(df)
|
||||
|
||||
logger.info("Bereinigte Daten: %d Zeilen entfernt (%d -> %d)", before_drop - after_drop, before_drop, after_drop)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def plot_data(data: pd.DataFrame, output_path: Optional[str] = None) -> None:
|
||||
"""Erstellt Plots für Temperatur, Luftfeuchtigkeit und Lichtintensität."""
|
||||
if not isinstance(data, pd.DataFrame):
|
||||
raise TypeError("data muss ein pandas.DataFrame sein")
|
||||
|
||||
required_columns = {'timestamp', 'temperature', 'humidity', 'lux'}
|
||||
if not required_columns.issubset(data.columns):
|
||||
raise ValueError(f"DataFrame muss Spalten {required_columns} enthalten")
|
||||
|
||||
if data.empty:
|
||||
raise ValueError("DataFrame ist leer, kein Plot möglich")
|
||||
|
||||
fig, ax1 = plt.subplots(figsize=(10, 6))
|
||||
ax1.plot(data['timestamp'], data['temperature'], color='tab:red', label='Temperatur (°C)')
|
||||
ax1.set_xlabel('Zeit')
|
||||
ax1.set_ylabel('Temperatur (°C)', color='tab:red')
|
||||
ax1.tick_params(axis='y', labelcolor='tab:red')
|
||||
|
||||
ax2 = ax1.twinx()
|
||||
ax2.plot(data['timestamp'], data['lux'], color='tab:blue', alpha=0.6, label='Licht (Lux)')
|
||||
ax2.set_ylabel('Licht (Lux)', color='tab:blue')
|
||||
ax2.tick_params(axis='y', labelcolor='tab:blue')
|
||||
|
||||
plt.title('Zeitverlauf: Temperatur und Licht')
|
||||
fig.autofmt_xdate()
|
||||
fig.tight_layout()
|
||||
|
||||
if output_path:
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
plt.savefig(output_path, dpi=150)
|
||||
logger.info("Plot gespeichert in %s", output_path)
|
||||
else:
|
||||
plt.show()
|
||||
|
||||
plt.close(fig)
|
||||
Loading…
Reference in a new issue