Add artifact.1/src/artifact_1/core.py

This commit is contained in:
Mika 2026-03-04 15:16:37 +00:00
commit 43cfda9bfb

View file

@ -0,0 +1,100 @@
from __future__ import annotations
import statistics
from dataclasses import dataclass
from typing import List, Dict, Any
import logging
# Configure basic logging for CI readiness
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class DataValidationError(Exception):
"""Raised when input data does not conform to expected structure."""
pass
@dataclass
class TestResult:
group: str
pinned_status: str
warn_rate: float
unknown_rate: float
delta_t_rate: float
@staticmethod
def from_dict(d: Dict[str, Any]) -> 'TestResult':
required_fields = {'group', 'pinned_status', 'warn_rate', 'unknown_rate', 'delta_t_rate'}
if not isinstance(d, dict):
raise DataValidationError(f"Each entry must be a dict, got: {type(d)}")
missing = required_fields - d.keys()
if missing:
raise DataValidationError(f"Missing required fields: {missing}")
try:
return TestResult(
group=str(d['group']),
pinned_status=str(d['pinned_status']),
warn_rate=float(d['warn_rate']),
unknown_rate=float(d['unknown_rate']),
delta_t_rate=float(d['delta_t_rate'])
)
except (TypeError, ValueError) as e:
raise DataValidationError(f"Invalid field type: {e}") from e
def analyze_ab_data(data: List[TestResult]) -> Dict[str, Any]:
"""Analysiert A/B-Testdaten und berechnet aggregierte Kennzahlen pro Gruppe und Stratum.
Args:
data: Liste von TestResult-Objekten.
Returns:
Dictionary mit aggregierten Kennzahlen pro (group, pinned_status) Paarung.
"""
if not isinstance(data, list):
raise DataValidationError("Input data must be a list of TestResult objects.")
# Validate list contents
for idx, item in enumerate(data):
if not isinstance(item, TestResult):
raise DataValidationError(f"Item at index {idx} is not a TestResult instance.")
# Group data by (group, pinned_status)
grouped: Dict[tuple[str, str], Dict[str, list[float]]] = {}
for tr in data:
key = (tr.group, tr.pinned_status)
if key not in grouped:
grouped[key] = {
'warn_rate': [],
'unknown_rate': [],
'delta_t_rate': []
}
grouped[key]['warn_rate'].append(tr.warn_rate)
grouped[key]['unknown_rate'].append(tr.unknown_rate)
grouped[key]['delta_t_rate'].append(tr.delta_t_rate)
result: Dict[str, Any] = {}
for (group, pin_status), metrics in grouped.items():
try:
result.setdefault(group, {})[pin_status] = {
'mean_warn_rate': statistics.fmean(metrics['warn_rate']),
'mean_unknown_rate': statistics.fmean(metrics['unknown_rate']),
'mean_delta_t_rate': statistics.fmean(metrics['delta_t_rate']),
'count': len(metrics['warn_rate'])
}
except statistics.StatisticsError as e:
logger.error(f"Statistics computation failed for {group}, {pin_status}: {e}")
result.setdefault(group, {})[pin_status] = {
'mean_warn_rate': None,
'mean_unknown_rate': None,
'mean_delta_t_rate': None,
'count': 0
}
logger.info("Aggregated summary computed for %d group-pinned combinations.", len(result))
assert all(isinstance(v, dict) for v in result.values()), "Result must be a nested dictionary."
return result