From fa99a0b2bfa4ffabb436444b9e171cf18c77206a Mon Sep 17 00:00:00 2001 From: Mika Date: Sat, 31 Jan 2026 13:07:41 +0000 Subject: [PATCH] Add rerun_evaluator/src/rerun_evaluator/core.py --- rerun_evaluator/src/rerun_evaluator/core.py | 102 ++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 rerun_evaluator/src/rerun_evaluator/core.py diff --git a/rerun_evaluator/src/rerun_evaluator/core.py b/rerun_evaluator/src/rerun_evaluator/core.py new file mode 100644 index 0000000..8d5fcd5 --- /dev/null +++ b/rerun_evaluator/src/rerun_evaluator/core.py @@ -0,0 +1,102 @@ +from __future__ import annotations + +import json +from dataclasses import dataclass +from typing import List, Dict, Any +import statistics +import logging + + +# Configure basic logging for CI-ready environments +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +class InvalidRunDataError(ValueError): + """Raised when a RunData instance is invalid or has missing/incorrect fields.""" + pass + + +@dataclass +class RunData: + """Represents a single CI run entry with metadata for rerun evaluation.""" + + run_id: str + label_triggered: bool + flappy: bool + pinned: bool + unknown_rate: float + + def __post_init__(self) -> None: + if not isinstance(self.run_id, str) or not self.run_id: + raise InvalidRunDataError("run_id must be a non-empty string") + if not isinstance(self.label_triggered, bool): + raise InvalidRunDataError("label_triggered must be a bool") + if not isinstance(self.flappy, bool): + raise InvalidRunDataError("flappy must be a bool") + if not isinstance(self.pinned, bool): + raise InvalidRunDataError("pinned must be a bool") + if not isinstance(self.unknown_rate, (float, int)) or not (0.0 <= self.unknown_rate <= 1.0): + raise InvalidRunDataError("unknown_rate must be a float between 0.0 and 1.0") + + +def evaluate_rerun_needs(runs_data: List[RunData]) -> Dict[str, Any]: + """Evaluate whether reruns are needed based on CI run historical data. + + Args: + runs_data: List of RunData entries. + + Returns: + dict: Evaluation metrics for rerun necessity and stability. + """ + assert isinstance(runs_data, list), "runs_data must be a list of RunData instances" + if not runs_data: + return { + "total_runs": 0, + "rerun_recommended": False, + "metrics": { + "label_trigger_rate": 0.0, + "flappy_rate": 0.0, + "unknown_rate_avg": 0.0, + "pinned_rate": 0.0, + }, + } + + # Validate all elements + for item in runs_data: + if not isinstance(item, RunData): + raise InvalidRunDataError("All elements in runs_data must be RunData instances") + + # Compute basic statistics + label_triggered_values = [r.label_triggered for r in runs_data] + flappy_values = [r.flappy for r in runs_data] + pinned_values = [r.pinned for r in runs_data] + unknown_rates = [r.unknown_rate for r in runs_data] + + metrics = { + "label_trigger_rate": sum(label_triggered_values) / len(runs_data), + "flappy_rate": sum(flappy_values) / len(runs_data), + "unknown_rate_avg": statistics.fmean(unknown_rates) if unknown_rates else 0.0, + "pinned_rate": sum(pinned_values) / len(runs_data), + } + + # Simple heuristic for rerun recommendation + rerun_recommended = ( + metrics["flappy_rate"] > 0.2 or + metrics["unknown_rate_avg"] > 0.15 or + metrics["label_trigger_rate"] < 0.3 + ) + + result = { + "total_runs": len(runs_data), + "rerun_recommended": bool(rerun_recommended), + "metrics": metrics, + } + + logger.info( + "Rerun evaluation completed: total_runs=%d, rerun_recommended=%s", + result["total_runs"], + result["rerun_recommended"], + ) + + return result