From 2242d66b8c287f4074e216011f0b5eb6c0b35b9b Mon Sep 17 00:00:00 2001 From: Mika Date: Tue, 24 Mar 2026 11:10:14 +0000 Subject: [PATCH] Add retry_tail_analysis/src/retry_tail_analysis/core.py --- .../src/retry_tail_analysis/core.py | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 retry_tail_analysis/src/retry_tail_analysis/core.py diff --git a/retry_tail_analysis/src/retry_tail_analysis/core.py b/retry_tail_analysis/src/retry_tail_analysis/core.py new file mode 100644 index 0000000..29ada55 --- /dev/null +++ b/retry_tail_analysis/src/retry_tail_analysis/core.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +import logging +from dataclasses import dataclass +from statistics import mean +from typing import List + + +logger = logging.getLogger(__name__) + + +class DataValidationError(ValueError): + """Custom exception for invalid ExperimentData.""" + + +@dataclass +class ExperimentData: + """Represents one experimental run data entry.""" + + run_id: str + retry_tailp99: float + threshold: float + + def __post_init__(self) -> None: + if not isinstance(self.run_id, str): + raise DataValidationError(f"run_id must be str, got {type(self.run_id).__name__}") + if not isinstance(self.retry_tailp99, (int, float)): + raise DataValidationError( + f"retry_tailp99 must be numeric, got {type(self.retry_tailp99).__name__}" + ) + if not isinstance(self.threshold, (int, float)): + raise DataValidationError(f"threshold must be numeric, got {type(self.threshold).__name__}") + if self.retry_tailp99 < 0 or self.threshold < 0: + raise DataValidationError("Values for retry_tailp99 and threshold must be non-negative.") + + +def analyze_retry_tail(data: List[ExperimentData]) -> float: + """Calculates aggregated retry_tailp99 from ExperimentData. + + Args: + data: List of ExperimentData instances. + + Returns: + Aggregated average retry_tailp99 value (float). + """ + assert isinstance(data, list), "Input data must be a list of ExperimentData instances" + if not data: + raise ValueError("Data list must not be empty.") + + validated_data = [] + for item in data: + if not isinstance(item, ExperimentData): + raise TypeError("All items must be ExperimentData instances.") + validated_data.append(item) + + tail_values = [d.retry_tailp99 for d in validated_data] + agg_value = float(mean(tail_values)) + + # Check for threshold exceedances + exceed_count = sum(1 for d in validated_data if d.retry_tailp99 >= d.threshold) + + logger.debug("Aggregated retry_tailp99: %.4f", agg_value) + logger.info("%d of %d runs exceed threshold.", exceed_count, len(validated_data)) + + return agg_value