Add data_analysis/src/data_analysis/core.py
This commit is contained in:
parent
e95a204761
commit
6b0119f042
1 changed files with 98 additions and 0 deletions
98
data_analysis/src/data_analysis/core.py
Normal file
98
data_analysis/src/data_analysis/core.py
Normal file
|
|
@ -0,0 +1,98 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
import logging
|
||||||
|
from typing import List, Any
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from statistics import mean, stdev
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class AnalysisResult:
|
||||||
|
"""Struktur zur Speicherung der Ergebnisse der Sensordatenanalyse."""
|
||||||
|
|
||||||
|
significant_patterns: list[Any] = field(default_factory=list)
|
||||||
|
anomaly_events: list[Any] = field(default_factory=list)
|
||||||
|
|
||||||
|
def __init__(self, significant_patterns: list[Any], anomaly_events: list[Any]) -> None:
|
||||||
|
self.significant_patterns = significant_patterns
|
||||||
|
self.anomaly_events = anomaly_events
|
||||||
|
|
||||||
|
|
||||||
|
class DataValidationError(Exception):
|
||||||
|
"""Wird ausgelöst, wenn Eingabedaten ungültig sind."""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class LogEntry:
|
||||||
|
timestamp: str
|
||||||
|
luminosity: int
|
||||||
|
sound_level: float
|
||||||
|
temperature: float
|
||||||
|
inference: float
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_log_entries(log_entries: List[LogEntry]) -> None:
|
||||||
|
if not isinstance(log_entries, list):
|
||||||
|
raise DataValidationError("log_entries muss eine Liste sein.")
|
||||||
|
for entry in log_entries:
|
||||||
|
if not isinstance(entry, LogEntry):
|
||||||
|
raise DataValidationError("Eintrag ist kein LogEntry-Objekt.")
|
||||||
|
if not isinstance(entry.luminosity, int):
|
||||||
|
raise DataValidationError("Luminosity muss int sein.")
|
||||||
|
if not isinstance(entry.sound_level, (int, float)):
|
||||||
|
raise DataValidationError("Sound-Level muss numerisch sein.")
|
||||||
|
if not isinstance(entry.temperature, (int, float)):
|
||||||
|
raise DataValidationError("Temperature muss numerisch sein.")
|
||||||
|
if not isinstance(entry.inference, (int, float)):
|
||||||
|
raise DataValidationError("Inference muss numerisch sein.")
|
||||||
|
|
||||||
|
|
||||||
|
def analyze_data(log_entries: List[LogEntry]) -> AnalysisResult:
|
||||||
|
"""Analysiert eine Liste von Rover-LogEinträgen und erkennt Muster sowie Anomalien."""
|
||||||
|
_validate_log_entries(log_entries)
|
||||||
|
if not log_entries:
|
||||||
|
logger.warning("Leere Eingabeliste übergeben.")
|
||||||
|
return AnalysisResult([], [])
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"luminosity": [entry.luminosity for entry in log_entries],
|
||||||
|
"sound": [entry.sound_level for entry in log_entries],
|
||||||
|
"temperature": [entry.temperature for entry in log_entries],
|
||||||
|
"inference": [entry.inference for entry in log_entries],
|
||||||
|
}
|
||||||
|
df = pd.DataFrame(data)
|
||||||
|
|
||||||
|
significant_patterns = []
|
||||||
|
anomaly_events = []
|
||||||
|
|
||||||
|
# Erkennung signifikanter Korrelationen / Muster
|
||||||
|
corr = df.corr(numeric_only=True)
|
||||||
|
for col1 in corr.columns:
|
||||||
|
for col2 in corr.columns:
|
||||||
|
if col1 != col2 and abs(corr.loc[col1, col2]) > 0.8:
|
||||||
|
pattern = {"relationship": f"High correlation between {col1} and {col2}", "correlation": corr.loc[col1, col2]}
|
||||||
|
significant_patterns.append(pattern)
|
||||||
|
|
||||||
|
# Erkennung von Anomalien basierend auf 3*Standardabweichung
|
||||||
|
for column in ["luminosity", "sound", "temperature", "inference"]:
|
||||||
|
series = df[column]
|
||||||
|
if len(series) < 2:
|
||||||
|
continue
|
||||||
|
mean_value = mean(series)
|
||||||
|
std_value = stdev(series)
|
||||||
|
lower_bound = mean_value - 3 * std_value
|
||||||
|
upper_bound = mean_value + 3 * std_value
|
||||||
|
anomalies = df[(series < lower_bound) | (series > upper_bound)]
|
||||||
|
for idx, row in anomalies.iterrows():
|
||||||
|
event = {"index": int(idx), "parameter": column, "value": row[column]}
|
||||||
|
anomaly_events.append(event)
|
||||||
|
|
||||||
|
logger.info("Analyse abgeschlossen: %d Muster, %d Anomalien", len(significant_patterns), len(anomaly_events))
|
||||||
|
assert isinstance(significant_patterns, list)
|
||||||
|
assert isinstance(anomaly_events, list)
|
||||||
|
|
||||||
|
return AnalysisResult(significant_patterns, anomaly_events)
|
||||||
Loading…
Reference in a new issue