Add artifact1/src/artifact1/core.py

This commit is contained in:
Mika 2026-03-15 12:31:22 +00:00
commit 4b8fe96731

View file

@ -0,0 +1,101 @@
from __future__ import annotations
import pandas as pd
import logging
from dataclasses import dataclass
from typing import Any, Dict
from statistics import mean, stdev
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
class MetricsValidationError(Exception):
"""Custom Exception for invalid performance metrics input data."""
pass
@dataclass
class PerformanceData:
max_only_alerts: int
outlier_frequency: float
expires_at_dist_hours: float
retry_total_overhead: float
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> PerformanceData:
required_fields = {
'max_only_alerts': int,
'outlier_frequency': float,
'expires_at_dist_hours': float,
'retry_total_overhead': float,
}
for field, t in required_fields.items():
if field not in data:
raise MetricsValidationError(f"Missing required field: {field}")
try:
_ = t(data[field])
except (ValueError, TypeError):
raise MetricsValidationError(f"Invalid type for field {field}: expected {t.__name__}, got {type(data[field]).__name__}")
return cls(**{f: data[f] for f in required_fields})
def analyze_metrics(data: Dict[str, Any]) -> Dict[str, Any]:
"""Analysiert Performance-Daten und erstellt aggregierte Analyse-Ergebnisse.
Args:
data: Eingehende JSON-Daten, die Performance-Metriken enthalten.
Returns:
Dict mit durchschnittlichen und statistischen Kennzahlen.
"""
if not isinstance(data, dict):
raise MetricsValidationError("Input data must be a dictionary.")
metrics = data.get('runs') or data.get('metrics') or data
if not isinstance(metrics, list):
raise MetricsValidationError("Expected a list of metric entries under 'runs' or 'metrics'.")
try:
validated_records = [PerformanceData.from_dict(entry).__dict__ for entry in metrics]
except MetricsValidationError as e:
logger.error(f"Failed to validate input data: {e}")
raise
if not validated_records:
return {'status': 'empty', 'aggregates': {}, 'anomalies': []}
df = pd.DataFrame(validated_records)
result: Dict[str, Any] = {'status': 'ok', 'aggregates': {}, 'anomalies': []}
for column in df.columns:
col_data = df[column].dropna()
if not col_data.empty:
result['aggregates'][column] = {
'mean': float(col_data.mean()),
'std': float(col_data.std(ddof=0)),
'min': float(col_data.min()),
'max': float(col_data.max()),
}
anomalies = []
for col, stats in result['aggregates'].items():
mu, sigma = stats['mean'], stats['std']
upper_limit = mu + 3 * sigma if sigma > 0 else mu * 1.3
lower_limit = mu - 3 * sigma if sigma > 0 else mu * 0.7
outliers = df[(df[col] > upper_limit) | (df[col] < lower_limit)]
if not outliers.empty:
anomalies.append({
'metric': col,
'count': int(len(outliers)),
'thresholds': {
'lower': lower_limit,
'upper': upper_limit
}
})
result['anomalies'] = anomalies
assert 'aggregates' in result and isinstance(result['aggregates'], dict), "Aggregates missing in result"
return result