commit 43cfda9bfb005ac0f61debc5d36b69ea71fd50d8 Author: Mika Date: Wed Mar 4 15:16:37 2026 +0000 Add artifact.1/src/artifact_1/core.py diff --git a/artifact.1/src/artifact_1/core.py b/artifact.1/src/artifact_1/core.py new file mode 100644 index 0000000..fe9ec9c --- /dev/null +++ b/artifact.1/src/artifact_1/core.py @@ -0,0 +1,100 @@ +from __future__ import annotations + +import statistics +from dataclasses import dataclass +from typing import List, Dict, Any +import logging + + +# Configure basic logging for CI readiness +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +class DataValidationError(Exception): + """Raised when input data does not conform to expected structure.""" + pass + + +@dataclass +class TestResult: + group: str + pinned_status: str + warn_rate: float + unknown_rate: float + delta_t_rate: float + + @staticmethod + def from_dict(d: Dict[str, Any]) -> 'TestResult': + required_fields = {'group', 'pinned_status', 'warn_rate', 'unknown_rate', 'delta_t_rate'} + if not isinstance(d, dict): + raise DataValidationError(f"Each entry must be a dict, got: {type(d)}") + missing = required_fields - d.keys() + if missing: + raise DataValidationError(f"Missing required fields: {missing}") + try: + return TestResult( + group=str(d['group']), + pinned_status=str(d['pinned_status']), + warn_rate=float(d['warn_rate']), + unknown_rate=float(d['unknown_rate']), + delta_t_rate=float(d['delta_t_rate']) + ) + except (TypeError, ValueError) as e: + raise DataValidationError(f"Invalid field type: {e}") from e + + +def analyze_ab_data(data: List[TestResult]) -> Dict[str, Any]: + """Analysiert A/B-Testdaten und berechnet aggregierte Kennzahlen pro Gruppe und Stratum. + + Args: + data: Liste von TestResult-Objekten. + + Returns: + Dictionary mit aggregierten Kennzahlen pro (group, pinned_status) Paarung. + """ + + if not isinstance(data, list): + raise DataValidationError("Input data must be a list of TestResult objects.") + + # Validate list contents + for idx, item in enumerate(data): + if not isinstance(item, TestResult): + raise DataValidationError(f"Item at index {idx} is not a TestResult instance.") + + # Group data by (group, pinned_status) + grouped: Dict[tuple[str, str], Dict[str, list[float]]] = {} + for tr in data: + key = (tr.group, tr.pinned_status) + if key not in grouped: + grouped[key] = { + 'warn_rate': [], + 'unknown_rate': [], + 'delta_t_rate': [] + } + grouped[key]['warn_rate'].append(tr.warn_rate) + grouped[key]['unknown_rate'].append(tr.unknown_rate) + grouped[key]['delta_t_rate'].append(tr.delta_t_rate) + + result: Dict[str, Any] = {} + for (group, pin_status), metrics in grouped.items(): + try: + result.setdefault(group, {})[pin_status] = { + 'mean_warn_rate': statistics.fmean(metrics['warn_rate']), + 'mean_unknown_rate': statistics.fmean(metrics['unknown_rate']), + 'mean_delta_t_rate': statistics.fmean(metrics['delta_t_rate']), + 'count': len(metrics['warn_rate']) + } + except statistics.StatisticsError as e: + logger.error(f"Statistics computation failed for {group}, {pin_status}: {e}") + result.setdefault(group, {})[pin_status] = { + 'mean_warn_rate': None, + 'mean_unknown_rate': None, + 'mean_delta_t_rate': None, + 'count': 0 + } + + logger.info("Aggregated summary computed for %d group-pinned combinations.", len(result)) + assert all(isinstance(v, dict) for v in result.values()), "Result must be a nested dictionary." + + return result