diff --git a/artifact_3_retry_tail_analysis/src/artifact_3_retry_tail_analysis/core.py b/artifact_3_retry_tail_analysis/src/artifact_3_retry_tail_analysis/core.py new file mode 100644 index 0000000..3a05880 --- /dev/null +++ b/artifact_3_retry_tail_analysis/src/artifact_3_retry_tail_analysis/core.py @@ -0,0 +1,66 @@ +from __future__ import annotations +import json +from pathlib import Path +from statistics import quantiles +from typing import Dict, Any +import pandas as pd + + +class InputValidationError(Exception): + """Raised when input data does not meet expected structure.""" + pass + + +def _validate_input(data: Dict[str, Any]) -> None: + """Validate that input data contain retriable latency samples or 'retry_tail_p99'.""" + if not isinstance(data, dict): + raise InputValidationError("Expected a dict input.") + if 'retry_tail_p99' in data: + val = data['retry_tail_p99'] + if not isinstance(val, (int, float)): + raise InputValidationError("'retry_tail_p99' must be numeric.") + elif 'retries' in data: + if not isinstance(data['retries'], (list, tuple)): + raise InputValidationError("'retries' must be list or tuple of numbers.") + else: + raise InputValidationError("Input missing 'retry_tail_p99' or 'retries'.") + + +def _extract_tail_p99(data: Dict[str, Any]) -> float: + """Extract or compute retry_tail_p99 from the provided data dict.""" + if 'retry_tail_p99' in data: + return float(data['retry_tail_p99']) + retries = data.get('retries', []) + if not retries: + raise InputValidationError("Cannot compute retry_tail_p99 from empty retries list.") + series = pd.Series(retries, dtype='float64') + return float(series.quantile(0.99)) + + +def analyze_retry_tails(data_enforced: Dict[str, Any], data_randomized: Dict[str, Any]) -> Dict[str, float]: + """Vergleicht Retry-Tail-Werte zwischen zwei Datenquellen für 'affinity enforced' und 'affinity off'-Modi. + + Args: + data_enforced: Retry-Daten im Affinitätsmodus (pinned). + data_randomized: Retry-Daten im Randomized-Modus (affinity off). + + Returns: + dict: Ergebnisse mit Kennzahlen wie `retry_tail_p99_enforced`, `retry_tail_p99_randomized`, + und Differenz `delta_retry_tail_p99`. + """ + _validate_input(data_enforced) + _validate_input(data_randomized) + + enforced_p99 = _extract_tail_p99(data_enforced) + randomized_p99 = _extract_tail_p99(data_randomized) + + result = { + 'retry_tail_p99_enforced': enforced_p99, + 'retry_tail_p99_randomized': randomized_p99, + 'delta_retry_tail_p99': enforced_p99 - randomized_p99 + } + + # CI sanity check assertions + assert isinstance(result['retry_tail_p99_enforced'], float) + assert isinstance(result['retry_tail_p99_randomized'], float) + return result