Add retry_tail_analysis/src/retry_tail_analysis/core.py

This commit is contained in:
Mika 2026-03-24 11:10:14 +00:00
parent 1187fb661a
commit 2242d66b8c

View file

@ -0,0 +1,65 @@
from __future__ import annotations
import logging
from dataclasses import dataclass
from statistics import mean
from typing import List
logger = logging.getLogger(__name__)
class DataValidationError(ValueError):
"""Custom exception for invalid ExperimentData."""
@dataclass
class ExperimentData:
"""Represents one experimental run data entry."""
run_id: str
retry_tailp99: float
threshold: float
def __post_init__(self) -> None:
if not isinstance(self.run_id, str):
raise DataValidationError(f"run_id must be str, got {type(self.run_id).__name__}")
if not isinstance(self.retry_tailp99, (int, float)):
raise DataValidationError(
f"retry_tailp99 must be numeric, got {type(self.retry_tailp99).__name__}"
)
if not isinstance(self.threshold, (int, float)):
raise DataValidationError(f"threshold must be numeric, got {type(self.threshold).__name__}")
if self.retry_tailp99 < 0 or self.threshold < 0:
raise DataValidationError("Values for retry_tailp99 and threshold must be non-negative.")
def analyze_retry_tail(data: List[ExperimentData]) -> float:
"""Calculates aggregated retry_tailp99 from ExperimentData.
Args:
data: List of ExperimentData instances.
Returns:
Aggregated average retry_tailp99 value (float).
"""
assert isinstance(data, list), "Input data must be a list of ExperimentData instances"
if not data:
raise ValueError("Data list must not be empty.")
validated_data = []
for item in data:
if not isinstance(item, ExperimentData):
raise TypeError("All items must be ExperimentData instances.")
validated_data.append(item)
tail_values = [d.retry_tailp99 for d in validated_data]
agg_value = float(mean(tail_values))
# Check for threshold exceedances
exceed_count = sum(1 for d in validated_data if d.retry_tailp99 >= d.threshold)
logger.debug("Aggregated retry_tailp99: %.4f", agg_value)
logger.info("%d of %d runs exceed threshold.", exceed_count, len(validated_data))
return agg_value