Add data_analysis/src/data_analysis/core.py
This commit is contained in:
parent
ef8a8bc4b1
commit
eb97474cdb
1 changed files with 83 additions and 0 deletions
83
data_analysis/src/data_analysis/core.py
Normal file
83
data_analysis/src/data_analysis/core.py
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from typing import List
|
||||
import pandas as pd
|
||||
import math
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SensorData:
|
||||
timestamp: str
|
||||
voltage_mv: float
|
||||
temperature_c: float
|
||||
humidity_percent: float
|
||||
|
||||
@staticmethod
|
||||
def validate(data: List[SensorData]) -> None:
|
||||
if not isinstance(data, list):
|
||||
raise TypeError("data must be a list of SensorData instances")
|
||||
for i, d in enumerate(data):
|
||||
if not isinstance(d, SensorData):
|
||||
raise TypeError(f"Item {i} is not a SensorData instance")
|
||||
if not isinstance(d.voltage_mv, (int, float)):
|
||||
raise TypeError(f"Invalid voltage_mv type in item {i}")
|
||||
if not isinstance(d.temperature_c, (int, float)):
|
||||
raise TypeError(f"Invalid temperature_c type in item {i}")
|
||||
if not isinstance(d.humidity_percent, (int, float)):
|
||||
raise TypeError(f"Invalid humidity_percent type in item {i}")
|
||||
|
||||
|
||||
@dataclass
|
||||
class SummaryReport:
|
||||
max_voltage: float
|
||||
min_voltage: float
|
||||
average_voltage: float
|
||||
correlation_with_weather: float
|
||||
|
||||
|
||||
def analyze_data(data: List[SensorData]) -> SummaryReport:
|
||||
"""Analysiert Sensordaten und berechnet statistische Kennzahlen und Korrelationen."""
|
||||
|
||||
SensorData.validate(data)
|
||||
if not data:
|
||||
raise ValueError("No sensor data provided for analysis.")
|
||||
|
||||
logger.debug("Starting analysis of %d sensor data entries", len(data))
|
||||
|
||||
df = pd.DataFrame([{
|
||||
"timestamp": d.timestamp,
|
||||
"voltage_mv": d.voltage_mv,
|
||||
"temperature_c": d.temperature_c,
|
||||
"humidity_percent": d.humidity_percent,
|
||||
} for d in data])
|
||||
|
||||
max_voltage = float(df["voltage_mv"].max())
|
||||
min_voltage = float(df["voltage_mv"].min())
|
||||
average_voltage = float(df["voltage_mv"].mean())
|
||||
|
||||
# Berechne Korrelation zwischen Spannung und Wetterparametern (Temperatur & Feuchte)
|
||||
corr_temp = df["voltage_mv"].corr(df["temperature_c"])
|
||||
corr_hum = df["voltage_mv"].corr(df["humidity_percent"])
|
||||
|
||||
# Fasse Gesamt-Korrelation als Durchschnitt zusammen, NaN wird ignoriert
|
||||
correlations = [c for c in [corr_temp, corr_hum] if not math.isnan(c)]
|
||||
correlation_with_weather = float(sum(correlations) / len(correlations)) if correlations else 0.0
|
||||
|
||||
report = SummaryReport(
|
||||
max_voltage=max_voltage,
|
||||
min_voltage=min_voltage,
|
||||
average_voltage=average_voltage,
|
||||
correlation_with_weather=correlation_with_weather,
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
"Finished analysis: max=%.3f, min=%.3f, avg=%.3f, corr=%.3f",
|
||||
report.max_voltage, report.min_voltage, report.average_voltage, report.correlation_with_weather,
|
||||
)
|
||||
|
||||
return report
|
||||
Loading…
Reference in a new issue