From 0ffb24680439dc52329f6d07d0382f50e5a93c0b Mon Sep 17 00:00:00 2001 From: Mika Date: Fri, 20 Mar 2026 11:41:43 +0000 Subject: [PATCH] Add artifact.1/src/artifact_1/core.py --- artifact.1/src/artifact_1/core.py | 78 +++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 artifact.1/src/artifact_1/core.py diff --git a/artifact.1/src/artifact_1/core.py b/artifact.1/src/artifact_1/core.py new file mode 100644 index 0000000..4e9b8d6 --- /dev/null +++ b/artifact.1/src/artifact_1/core.py @@ -0,0 +1,78 @@ +from __future__ import annotations +import json +import logging +from dataclasses import dataclass +from typing import List, Dict, Any +import pandas as pd +from statistics import mean, stdev + + +logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) + + +class MetricDataValidationError(ValueError): + """Raised when MetricData validation fails.""" + + +@dataclass +class MetricData: + band_width: float + retry_tail_p99: float + band_center: float + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> MetricData: + required_fields = ["band_width", "retry_tail_p99", "band_center"] + missing = [f for f in required_fields if f not in data] + if missing: + raise MetricDataValidationError(f"Missing fields in MetricData: {missing}") + + try: + bw = float(data["band_width"]) + rp99 = float(data["retry_tail_p99"]) + bc = float(data["band_center"]) + except (TypeError, ValueError) as e: + raise MetricDataValidationError(f"Field type invalid: {e}") from e + + return cls(band_width=bw, retry_tail_p99=rp99, band_center=bc) + + +def analyse_metrics(data: List[MetricData]) -> Dict[str, Any]: + """Analysiert Leistungsmessdaten in Bezug auf Parallelitätseinflüsse. + + Args: + data: Liste von MetricData-Instanzen. + + Returns: + Dictionary mit aggregierten Kennzahlen über Bandbreite und Stabilität. + """ + if not data: + raise ValueError("'data' darf nicht leer sein.") + + logger.debug("Starting analysis of %d MetricData entries", len(data)) + + # Convert data to DataFrame for efficient analysis + df = pd.DataFrame([d.__dict__ for d in data]) + + # Basic validation to ensure numeric data + for col in ["band_width", "retry_tail_p99", "band_center"]: + if df[col].isnull().any(): + raise MetricDataValidationError(f"Column {col} contains NaN values.") + + # Compute descriptive statistics + result = { + "count": int(len(df)), + "band_width_avg": df["band_width"].mean(), + "band_width_std": df["band_width"].std(ddof=0), + "retry_tail_p99_avg": df["retry_tail_p99"].mean(), + "retry_tail_p99_max": df["retry_tail_p99"].max(), + "band_center_avg": df["band_center"].mean(), + "band_center_stability": stdev(df["band_center"]) if len(df) > 1 else 0.0, + } + + # CI readiness: simple assertions on computed values + assert all(k in result for k in ["band_width_avg", "retry_tail_p99_avg", "band_center_avg"]) + + logger.debug("Analysis complete with results: %s", result) + return result