Add data_analysis/src/data_analysis/core.py
This commit is contained in:
parent
c729b8f0a1
commit
df84494fc8
1 changed files with 69 additions and 0 deletions
69
data_analysis/src/data_analysis/core.py
Normal file
69
data_analysis/src/data_analysis/core.py
Normal file
|
|
@ -0,0 +1,69 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass, field, asdict
|
||||||
|
from typing import Dict, Any
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
logger.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
|
||||||
|
class DataValidationError(Exception):
|
||||||
|
"""Custom exception raised when input data validation fails."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class AnalysisResult:
|
||||||
|
"""Repräsentiert die Analyseergebnisse mit Temperaturdifferenzen und erkannten Mustern."""
|
||||||
|
temperature_differences: Dict[str, float] = field(default_factory=dict)
|
||||||
|
pattern_recognition: str = "Unbekannt"
|
||||||
|
|
||||||
|
def to_json(self) -> str:
|
||||||
|
"""Serialisiert die Analyseergebnisse in JSON-Format."""
|
||||||
|
logger.debug("Serialisiere Analyseergebnisse zu JSON.")
|
||||||
|
return json.dumps(asdict(self), ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_input_data(data: pd.DataFrame) -> None:
|
||||||
|
"""Stellt sicher, dass die Eingabedaten die erforderlichen Spalten enthalten."""
|
||||||
|
required_columns = {"surface", "temperature_c", "emissivity", "timestamp"}
|
||||||
|
if not isinstance(data, pd.DataFrame):
|
||||||
|
logger.error("Eingabedaten sind kein DataFrame.")
|
||||||
|
raise DataValidationError("Input data must be a pandas DataFrame.")
|
||||||
|
|
||||||
|
missing = required_columns - set(data.columns)
|
||||||
|
if missing:
|
||||||
|
logger.error("Fehlende Spalten in Eingabedaten: %s", ", ".join(missing))
|
||||||
|
raise DataValidationError(f"Missing required columns: {', '.join(missing)}")
|
||||||
|
|
||||||
|
|
||||||
|
def analyze_temperature_data(data: pd.DataFrame) -> AnalysisResult:
|
||||||
|
"""Analysiert Temperaturmessdaten, berechnet Temperaturdifferenzen und Muster in den Zeitreihen."""
|
||||||
|
logger.debug("Starte Analyse der Temperaturdaten...")
|
||||||
|
_validate_input_data(data)
|
||||||
|
|
||||||
|
# Mittelwert pro Oberfläche berechnen
|
||||||
|
means = data.groupby("surface")["temperature_c"].mean().to_dict()
|
||||||
|
logger.debug("Berechnete Mittelwerte: %s", means)
|
||||||
|
|
||||||
|
# Temperaturdifferenzen berechnen
|
||||||
|
surfaces = list(means.keys())
|
||||||
|
temp_diffs: Dict[str, float] = {}
|
||||||
|
for i, s1 in enumerate(surfaces):
|
||||||
|
for s2 in surfaces[i + 1:]:
|
||||||
|
diff_key = f"{s1}-{s2}"
|
||||||
|
temp_diffs[diff_key] = round(means[s1] - means[s2], 3)
|
||||||
|
logger.debug("Berechnete Temperaturdifferenzen: %s", temp_diffs)
|
||||||
|
|
||||||
|
# Mustererkennung (einfaches Beispiel): identifiziere, welche Oberfläche im Schnitt am wärmsten ist
|
||||||
|
warmest_surface = max(means, key=means.get)
|
||||||
|
pattern = f"Die Oberfläche '{warmest_surface}' weist die höchste mittlere Temperatur auf."
|
||||||
|
logger.info("Mustererkennung abgeschlossen: %s", pattern)
|
||||||
|
|
||||||
|
result = AnalysisResult(temperature_differences=temp_diffs, pattern_recognition=pattern)
|
||||||
|
logger.debug("Analyse abgeschlossen. Ergebnis: %s", result)
|
||||||
|
|
||||||
|
return result
|
||||||
Loading…
Reference in a new issue