Add data_analysis/src/data_analysis/core.py
This commit is contained in:
parent
ef8a8bc4b1
commit
eb97474cdb
1 changed files with 83 additions and 0 deletions
83
data_analysis/src/data_analysis/core.py
Normal file
83
data_analysis/src/data_analysis/core.py
Normal file
|
|
@ -0,0 +1,83 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import List
|
||||||
|
import pandas as pd
|
||||||
|
import math
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SensorData:
|
||||||
|
timestamp: str
|
||||||
|
voltage_mv: float
|
||||||
|
temperature_c: float
|
||||||
|
humidity_percent: float
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def validate(data: List[SensorData]) -> None:
|
||||||
|
if not isinstance(data, list):
|
||||||
|
raise TypeError("data must be a list of SensorData instances")
|
||||||
|
for i, d in enumerate(data):
|
||||||
|
if not isinstance(d, SensorData):
|
||||||
|
raise TypeError(f"Item {i} is not a SensorData instance")
|
||||||
|
if not isinstance(d.voltage_mv, (int, float)):
|
||||||
|
raise TypeError(f"Invalid voltage_mv type in item {i}")
|
||||||
|
if not isinstance(d.temperature_c, (int, float)):
|
||||||
|
raise TypeError(f"Invalid temperature_c type in item {i}")
|
||||||
|
if not isinstance(d.humidity_percent, (int, float)):
|
||||||
|
raise TypeError(f"Invalid humidity_percent type in item {i}")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SummaryReport:
|
||||||
|
max_voltage: float
|
||||||
|
min_voltage: float
|
||||||
|
average_voltage: float
|
||||||
|
correlation_with_weather: float
|
||||||
|
|
||||||
|
|
||||||
|
def analyze_data(data: List[SensorData]) -> SummaryReport:
|
||||||
|
"""Analysiert Sensordaten und berechnet statistische Kennzahlen und Korrelationen."""
|
||||||
|
|
||||||
|
SensorData.validate(data)
|
||||||
|
if not data:
|
||||||
|
raise ValueError("No sensor data provided for analysis.")
|
||||||
|
|
||||||
|
logger.debug("Starting analysis of %d sensor data entries", len(data))
|
||||||
|
|
||||||
|
df = pd.DataFrame([{
|
||||||
|
"timestamp": d.timestamp,
|
||||||
|
"voltage_mv": d.voltage_mv,
|
||||||
|
"temperature_c": d.temperature_c,
|
||||||
|
"humidity_percent": d.humidity_percent,
|
||||||
|
} for d in data])
|
||||||
|
|
||||||
|
max_voltage = float(df["voltage_mv"].max())
|
||||||
|
min_voltage = float(df["voltage_mv"].min())
|
||||||
|
average_voltage = float(df["voltage_mv"].mean())
|
||||||
|
|
||||||
|
# Berechne Korrelation zwischen Spannung und Wetterparametern (Temperatur & Feuchte)
|
||||||
|
corr_temp = df["voltage_mv"].corr(df["temperature_c"])
|
||||||
|
corr_hum = df["voltage_mv"].corr(df["humidity_percent"])
|
||||||
|
|
||||||
|
# Fasse Gesamt-Korrelation als Durchschnitt zusammen, NaN wird ignoriert
|
||||||
|
correlations = [c for c in [corr_temp, corr_hum] if not math.isnan(c)]
|
||||||
|
correlation_with_weather = float(sum(correlations) / len(correlations)) if correlations else 0.0
|
||||||
|
|
||||||
|
report = SummaryReport(
|
||||||
|
max_voltage=max_voltage,
|
||||||
|
min_voltage=min_voltage,
|
||||||
|
average_voltage=average_voltage,
|
||||||
|
correlation_with_weather=correlation_with_weather,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
"Finished analysis: max=%.3f, min=%.3f, avg=%.3f, corr=%.3f",
|
||||||
|
report.max_voltage, report.min_voltage, report.average_voltage, report.correlation_with_weather,
|
||||||
|
)
|
||||||
|
|
||||||
|
return report
|
||||||
Loading…
Reference in a new issue