Add stability_analysis/src/stability_analysis/core.py
This commit is contained in:
parent
f159fdfb02
commit
7435c2e3e3
1 changed files with 93 additions and 0 deletions
93
stability_analysis/src/stability_analysis/core.py
Normal file
93
stability_analysis/src/stability_analysis/core.py
Normal file
|
|
@ -0,0 +1,93 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import List, Dict, Any
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from statistics import mean, stdev
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
logging.basicConfig(level=logging.INFO, format='[%(levelname)s] %(message)s')
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class AnalysisResults:
|
||||||
|
"""Strukturierte Ergebnisse der Stabilitätsanalyse."""
|
||||||
|
stable_cluster: List[str]
|
||||||
|
outlier_counts: Dict[str, int]
|
||||||
|
patterns: List[str]
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
'stable_cluster': self.stable_cluster,
|
||||||
|
'outlier_counts': self.outlier_counts,
|
||||||
|
'patterns': self.patterns,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class InputValidationError(Exception):
|
||||||
|
"""Wird ausgelöst, wenn Eingabedaten fehlerhaft oder unvollständig sind."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_data_list(data_list: List[Dict[str, Any]]) -> None:
|
||||||
|
if not isinstance(data_list, list):
|
||||||
|
raise InputValidationError("data_list muss eine Liste von Dicts sein.")
|
||||||
|
if not data_list:
|
||||||
|
raise InputValidationError("data_list darf nicht leer sein.")
|
||||||
|
for i, item in enumerate(data_list):
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
raise InputValidationError(f"Eintrag {i} ist kein Dict.")
|
||||||
|
if len(item) == 0:
|
||||||
|
raise InputValidationError(f"Eintrag {i} ist leer.")
|
||||||
|
|
||||||
|
|
||||||
|
def analyze_data(data_list: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||||
|
"""Analysiert Laufzeitdaten, um stabile Cluster, Ausreißer und Muster zu erkennen."""
|
||||||
|
logger.info("Starte Datenanalyse für %d Einträge.", len(data_list))
|
||||||
|
_validate_data_list(data_list)
|
||||||
|
|
||||||
|
try:
|
||||||
|
df = pd.DataFrame(data_list)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Fehler beim Erstellen des DataFrame: %s", e)
|
||||||
|
raise InputValidationError("Ungültige Datenstruktur.") from e
|
||||||
|
|
||||||
|
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
|
||||||
|
if not numeric_cols:
|
||||||
|
raise InputValidationError("Keine numerischen Spalten für Analyse gefunden.")
|
||||||
|
|
||||||
|
stable_clusters: List[str] = []
|
||||||
|
outlier_counts: Dict[str, int] = {}
|
||||||
|
patterns: List[str] = []
|
||||||
|
|
||||||
|
for col in numeric_cols:
|
||||||
|
values = df[col].dropna().to_numpy()
|
||||||
|
if len(values) < 2:
|
||||||
|
continue
|
||||||
|
col_mean = np.mean(values)
|
||||||
|
col_std = np.std(values)
|
||||||
|
lower, upper = col_mean - 2 * col_std, col_mean + 2 * col_std
|
||||||
|
outlier_mask = (values < lower) | (values > upper)
|
||||||
|
outlier_count = int(np.sum(outlier_mask))
|
||||||
|
outlier_counts[col] = outlier_count
|
||||||
|
|
||||||
|
rel_std = (col_std / col_mean) if col_mean != 0 else float('inf')
|
||||||
|
if rel_std < 0.05:
|
||||||
|
stable_clusters.append(col)
|
||||||
|
patterns.append(f"{col}: sehr stabil (rel_std={rel_std:.3f})")
|
||||||
|
elif rel_std < 0.15:
|
||||||
|
patterns.append(f"{col}: moderat stabil (rel_std={rel_std:.3f})")
|
||||||
|
else:
|
||||||
|
patterns.append(f"{col}: variabel (rel_std={rel_std:.3f})")
|
||||||
|
|
||||||
|
result = AnalysisResults(
|
||||||
|
stable_cluster=stable_clusters,
|
||||||
|
outlier_counts=outlier_counts,
|
||||||
|
patterns=patterns,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info("Analyse abgeschlossen. %d stabile Cluster gefunden.", len(stable_clusters))
|
||||||
|
assert isinstance(result.to_dict(), dict), "Analyseergebnis muss dict sein"
|
||||||
|
return result.to_dict()
|
||||||
Loading…
Reference in a new issue