Add rerun_evaluator/src/rerun_evaluator/core.py
This commit is contained in:
parent
f7af088970
commit
fa99a0b2bf
1 changed files with 102 additions and 0 deletions
102
rerun_evaluator/src/rerun_evaluator/core.py
Normal file
102
rerun_evaluator/src/rerun_evaluator/core.py
Normal file
|
|
@ -0,0 +1,102 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import List, Dict, Any
|
||||||
|
import statistics
|
||||||
|
import logging
|
||||||
|
|
||||||
|
|
||||||
|
# Configure basic logging for CI-ready environments
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class InvalidRunDataError(ValueError):
|
||||||
|
"""Raised when a RunData instance is invalid or has missing/incorrect fields."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RunData:
|
||||||
|
"""Represents a single CI run entry with metadata for rerun evaluation."""
|
||||||
|
|
||||||
|
run_id: str
|
||||||
|
label_triggered: bool
|
||||||
|
flappy: bool
|
||||||
|
pinned: bool
|
||||||
|
unknown_rate: float
|
||||||
|
|
||||||
|
def __post_init__(self) -> None:
|
||||||
|
if not isinstance(self.run_id, str) or not self.run_id:
|
||||||
|
raise InvalidRunDataError("run_id must be a non-empty string")
|
||||||
|
if not isinstance(self.label_triggered, bool):
|
||||||
|
raise InvalidRunDataError("label_triggered must be a bool")
|
||||||
|
if not isinstance(self.flappy, bool):
|
||||||
|
raise InvalidRunDataError("flappy must be a bool")
|
||||||
|
if not isinstance(self.pinned, bool):
|
||||||
|
raise InvalidRunDataError("pinned must be a bool")
|
||||||
|
if not isinstance(self.unknown_rate, (float, int)) or not (0.0 <= self.unknown_rate <= 1.0):
|
||||||
|
raise InvalidRunDataError("unknown_rate must be a float between 0.0 and 1.0")
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_rerun_needs(runs_data: List[RunData]) -> Dict[str, Any]:
|
||||||
|
"""Evaluate whether reruns are needed based on CI run historical data.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
runs_data: List of RunData entries.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Evaluation metrics for rerun necessity and stability.
|
||||||
|
"""
|
||||||
|
assert isinstance(runs_data, list), "runs_data must be a list of RunData instances"
|
||||||
|
if not runs_data:
|
||||||
|
return {
|
||||||
|
"total_runs": 0,
|
||||||
|
"rerun_recommended": False,
|
||||||
|
"metrics": {
|
||||||
|
"label_trigger_rate": 0.0,
|
||||||
|
"flappy_rate": 0.0,
|
||||||
|
"unknown_rate_avg": 0.0,
|
||||||
|
"pinned_rate": 0.0,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# Validate all elements
|
||||||
|
for item in runs_data:
|
||||||
|
if not isinstance(item, RunData):
|
||||||
|
raise InvalidRunDataError("All elements in runs_data must be RunData instances")
|
||||||
|
|
||||||
|
# Compute basic statistics
|
||||||
|
label_triggered_values = [r.label_triggered for r in runs_data]
|
||||||
|
flappy_values = [r.flappy for r in runs_data]
|
||||||
|
pinned_values = [r.pinned for r in runs_data]
|
||||||
|
unknown_rates = [r.unknown_rate for r in runs_data]
|
||||||
|
|
||||||
|
metrics = {
|
||||||
|
"label_trigger_rate": sum(label_triggered_values) / len(runs_data),
|
||||||
|
"flappy_rate": sum(flappy_values) / len(runs_data),
|
||||||
|
"unknown_rate_avg": statistics.fmean(unknown_rates) if unknown_rates else 0.0,
|
||||||
|
"pinned_rate": sum(pinned_values) / len(runs_data),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Simple heuristic for rerun recommendation
|
||||||
|
rerun_recommended = (
|
||||||
|
metrics["flappy_rate"] > 0.2 or
|
||||||
|
metrics["unknown_rate_avg"] > 0.15 or
|
||||||
|
metrics["label_trigger_rate"] < 0.3
|
||||||
|
)
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"total_runs": len(runs_data),
|
||||||
|
"rerun_recommended": bool(rerun_recommended),
|
||||||
|
"metrics": metrics,
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Rerun evaluation completed: total_runs=%d, rerun_recommended=%s",
|
||||||
|
result["total_runs"],
|
||||||
|
result["rerun_recommended"],
|
||||||
|
)
|
||||||
|
|
||||||
|
return result
|
||||||
Loading…
Reference in a new issue