Add data_analysis/src/data_analysis/core.py

This commit is contained in:
Mika 2026-02-15 03:06:26 +00:00
parent ef8a8bc4b1
commit eb97474cdb

View file

@ -0,0 +1,83 @@
from __future__ import annotations
import logging
from dataclasses import dataclass
from typing import List
import pandas as pd
import math
logger = logging.getLogger(__name__)
@dataclass
class SensorData:
timestamp: str
voltage_mv: float
temperature_c: float
humidity_percent: float
@staticmethod
def validate(data: List[SensorData]) -> None:
if not isinstance(data, list):
raise TypeError("data must be a list of SensorData instances")
for i, d in enumerate(data):
if not isinstance(d, SensorData):
raise TypeError(f"Item {i} is not a SensorData instance")
if not isinstance(d.voltage_mv, (int, float)):
raise TypeError(f"Invalid voltage_mv type in item {i}")
if not isinstance(d.temperature_c, (int, float)):
raise TypeError(f"Invalid temperature_c type in item {i}")
if not isinstance(d.humidity_percent, (int, float)):
raise TypeError(f"Invalid humidity_percent type in item {i}")
@dataclass
class SummaryReport:
max_voltage: float
min_voltage: float
average_voltage: float
correlation_with_weather: float
def analyze_data(data: List[SensorData]) -> SummaryReport:
"""Analysiert Sensordaten und berechnet statistische Kennzahlen und Korrelationen."""
SensorData.validate(data)
if not data:
raise ValueError("No sensor data provided for analysis.")
logger.debug("Starting analysis of %d sensor data entries", len(data))
df = pd.DataFrame([{
"timestamp": d.timestamp,
"voltage_mv": d.voltage_mv,
"temperature_c": d.temperature_c,
"humidity_percent": d.humidity_percent,
} for d in data])
max_voltage = float(df["voltage_mv"].max())
min_voltage = float(df["voltage_mv"].min())
average_voltage = float(df["voltage_mv"].mean())
# Berechne Korrelation zwischen Spannung und Wetterparametern (Temperatur & Feuchte)
corr_temp = df["voltage_mv"].corr(df["temperature_c"])
corr_hum = df["voltage_mv"].corr(df["humidity_percent"])
# Fasse Gesamt-Korrelation als Durchschnitt zusammen, NaN wird ignoriert
correlations = [c for c in [corr_temp, corr_hum] if not math.isnan(c)]
correlation_with_weather = float(sum(correlations) / len(correlations)) if correlations else 0.0
report = SummaryReport(
max_voltage=max_voltage,
min_voltage=min_voltage,
average_voltage=average_voltage,
correlation_with_weather=correlation_with_weather,
)
logger.debug(
"Finished analysis: max=%.3f, min=%.3f, avg=%.3f, corr=%.3f",
report.max_voltage, report.min_voltage, report.average_voltage, report.correlation_with_weather,
)
return report