commit 4b8fe96731a60c2b6a8d699043644aedf8dbfb22 Author: Mika Date: Sun Mar 15 12:31:22 2026 +0000 Add artifact1/src/artifact1/core.py diff --git a/artifact1/src/artifact1/core.py b/artifact1/src/artifact1/core.py new file mode 100644 index 0000000..03697ab --- /dev/null +++ b/artifact1/src/artifact1/core.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +import pandas as pd +import logging +from dataclasses import dataclass +from typing import Any, Dict +from statistics import mean, stdev + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +class MetricsValidationError(Exception): + """Custom Exception for invalid performance metrics input data.""" + pass + + +@dataclass +class PerformanceData: + max_only_alerts: int + outlier_frequency: float + expires_at_dist_hours: float + retry_total_overhead: float + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> PerformanceData: + required_fields = { + 'max_only_alerts': int, + 'outlier_frequency': float, + 'expires_at_dist_hours': float, + 'retry_total_overhead': float, + } + for field, t in required_fields.items(): + if field not in data: + raise MetricsValidationError(f"Missing required field: {field}") + try: + _ = t(data[field]) + except (ValueError, TypeError): + raise MetricsValidationError(f"Invalid type for field {field}: expected {t.__name__}, got {type(data[field]).__name__}") + return cls(**{f: data[f] for f in required_fields}) + + +def analyze_metrics(data: Dict[str, Any]) -> Dict[str, Any]: + """Analysiert Performance-Daten und erstellt aggregierte Analyse-Ergebnisse. + + Args: + data: Eingehende JSON-Daten, die Performance-Metriken enthalten. + Returns: + Dict mit durchschnittlichen und statistischen Kennzahlen. + """ + if not isinstance(data, dict): + raise MetricsValidationError("Input data must be a dictionary.") + + metrics = data.get('runs') or data.get('metrics') or data + + if not isinstance(metrics, list): + raise MetricsValidationError("Expected a list of metric entries under 'runs' or 'metrics'.") + + try: + validated_records = [PerformanceData.from_dict(entry).__dict__ for entry in metrics] + except MetricsValidationError as e: + logger.error(f"Failed to validate input data: {e}") + raise + + if not validated_records: + return {'status': 'empty', 'aggregates': {}, 'anomalies': []} + + df = pd.DataFrame(validated_records) + + result: Dict[str, Any] = {'status': 'ok', 'aggregates': {}, 'anomalies': []} + + for column in df.columns: + col_data = df[column].dropna() + if not col_data.empty: + result['aggregates'][column] = { + 'mean': float(col_data.mean()), + 'std': float(col_data.std(ddof=0)), + 'min': float(col_data.min()), + 'max': float(col_data.max()), + } + + anomalies = [] + for col, stats in result['aggregates'].items(): + mu, sigma = stats['mean'], stats['std'] + upper_limit = mu + 3 * sigma if sigma > 0 else mu * 1.3 + lower_limit = mu - 3 * sigma if sigma > 0 else mu * 0.7 + outliers = df[(df[col] > upper_limit) | (df[col] < lower_limit)] + if not outliers.empty: + anomalies.append({ + 'metric': col, + 'count': int(len(outliers)), + 'thresholds': { + 'lower': lower_limit, + 'upper': upper_limit + } + }) + + result['anomalies'] = anomalies + + assert 'aggregates' in result and isinstance(result['aggregates'], dict), "Aggregates missing in result" + return result