Add data_analysis/src/data_analysis/core.py
This commit is contained in:
parent
c729b8f0a1
commit
df84494fc8
1 changed files with 69 additions and 0 deletions
69
data_analysis/src/data_analysis/core.py
Normal file
69
data_analysis/src/data_analysis/core.py
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
from __future__ import annotations
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from typing import Dict, Any
|
||||
import pandas as pd
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
|
||||
class DataValidationError(Exception):
|
||||
"""Custom exception raised when input data validation fails."""
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class AnalysisResult:
|
||||
"""Repräsentiert die Analyseergebnisse mit Temperaturdifferenzen und erkannten Mustern."""
|
||||
temperature_differences: Dict[str, float] = field(default_factory=dict)
|
||||
pattern_recognition: str = "Unbekannt"
|
||||
|
||||
def to_json(self) -> str:
|
||||
"""Serialisiert die Analyseergebnisse in JSON-Format."""
|
||||
logger.debug("Serialisiere Analyseergebnisse zu JSON.")
|
||||
return json.dumps(asdict(self), ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
def _validate_input_data(data: pd.DataFrame) -> None:
|
||||
"""Stellt sicher, dass die Eingabedaten die erforderlichen Spalten enthalten."""
|
||||
required_columns = {"surface", "temperature_c", "emissivity", "timestamp"}
|
||||
if not isinstance(data, pd.DataFrame):
|
||||
logger.error("Eingabedaten sind kein DataFrame.")
|
||||
raise DataValidationError("Input data must be a pandas DataFrame.")
|
||||
|
||||
missing = required_columns - set(data.columns)
|
||||
if missing:
|
||||
logger.error("Fehlende Spalten in Eingabedaten: %s", ", ".join(missing))
|
||||
raise DataValidationError(f"Missing required columns: {', '.join(missing)}")
|
||||
|
||||
|
||||
def analyze_temperature_data(data: pd.DataFrame) -> AnalysisResult:
|
||||
"""Analysiert Temperaturmessdaten, berechnet Temperaturdifferenzen und Muster in den Zeitreihen."""
|
||||
logger.debug("Starte Analyse der Temperaturdaten...")
|
||||
_validate_input_data(data)
|
||||
|
||||
# Mittelwert pro Oberfläche berechnen
|
||||
means = data.groupby("surface")["temperature_c"].mean().to_dict()
|
||||
logger.debug("Berechnete Mittelwerte: %s", means)
|
||||
|
||||
# Temperaturdifferenzen berechnen
|
||||
surfaces = list(means.keys())
|
||||
temp_diffs: Dict[str, float] = {}
|
||||
for i, s1 in enumerate(surfaces):
|
||||
for s2 in surfaces[i + 1:]:
|
||||
diff_key = f"{s1}-{s2}"
|
||||
temp_diffs[diff_key] = round(means[s1] - means[s2], 3)
|
||||
logger.debug("Berechnete Temperaturdifferenzen: %s", temp_diffs)
|
||||
|
||||
# Mustererkennung (einfaches Beispiel): identifiziere, welche Oberfläche im Schnitt am wärmsten ist
|
||||
warmest_surface = max(means, key=means.get)
|
||||
pattern = f"Die Oberfläche '{warmest_surface}' weist die höchste mittlere Temperatur auf."
|
||||
logger.info("Mustererkennung abgeschlossen: %s", pattern)
|
||||
|
||||
result = AnalysisResult(temperature_differences=temp_diffs, pattern_recognition=pattern)
|
||||
logger.debug("Analyse abgeschlossen. Ergebnis: %s", result)
|
||||
|
||||
return result
|
||||
Loading…
Reference in a new issue