Add rerun_evaluator/src/rerun_evaluator/core.py
This commit is contained in:
parent
f7af088970
commit
fa99a0b2bf
1 changed files with 102 additions and 0 deletions
102
rerun_evaluator/src/rerun_evaluator/core.py
Normal file
102
rerun_evaluator/src/rerun_evaluator/core.py
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Dict, Any
|
||||
import statistics
|
||||
import logging
|
||||
|
||||
|
||||
# Configure basic logging for CI-ready environments
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class InvalidRunDataError(ValueError):
|
||||
"""Raised when a RunData instance is invalid or has missing/incorrect fields."""
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class RunData:
|
||||
"""Represents a single CI run entry with metadata for rerun evaluation."""
|
||||
|
||||
run_id: str
|
||||
label_triggered: bool
|
||||
flappy: bool
|
||||
pinned: bool
|
||||
unknown_rate: float
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if not isinstance(self.run_id, str) or not self.run_id:
|
||||
raise InvalidRunDataError("run_id must be a non-empty string")
|
||||
if not isinstance(self.label_triggered, bool):
|
||||
raise InvalidRunDataError("label_triggered must be a bool")
|
||||
if not isinstance(self.flappy, bool):
|
||||
raise InvalidRunDataError("flappy must be a bool")
|
||||
if not isinstance(self.pinned, bool):
|
||||
raise InvalidRunDataError("pinned must be a bool")
|
||||
if not isinstance(self.unknown_rate, (float, int)) or not (0.0 <= self.unknown_rate <= 1.0):
|
||||
raise InvalidRunDataError("unknown_rate must be a float between 0.0 and 1.0")
|
||||
|
||||
|
||||
def evaluate_rerun_needs(runs_data: List[RunData]) -> Dict[str, Any]:
|
||||
"""Evaluate whether reruns are needed based on CI run historical data.
|
||||
|
||||
Args:
|
||||
runs_data: List of RunData entries.
|
||||
|
||||
Returns:
|
||||
dict: Evaluation metrics for rerun necessity and stability.
|
||||
"""
|
||||
assert isinstance(runs_data, list), "runs_data must be a list of RunData instances"
|
||||
if not runs_data:
|
||||
return {
|
||||
"total_runs": 0,
|
||||
"rerun_recommended": False,
|
||||
"metrics": {
|
||||
"label_trigger_rate": 0.0,
|
||||
"flappy_rate": 0.0,
|
||||
"unknown_rate_avg": 0.0,
|
||||
"pinned_rate": 0.0,
|
||||
},
|
||||
}
|
||||
|
||||
# Validate all elements
|
||||
for item in runs_data:
|
||||
if not isinstance(item, RunData):
|
||||
raise InvalidRunDataError("All elements in runs_data must be RunData instances")
|
||||
|
||||
# Compute basic statistics
|
||||
label_triggered_values = [r.label_triggered for r in runs_data]
|
||||
flappy_values = [r.flappy for r in runs_data]
|
||||
pinned_values = [r.pinned for r in runs_data]
|
||||
unknown_rates = [r.unknown_rate for r in runs_data]
|
||||
|
||||
metrics = {
|
||||
"label_trigger_rate": sum(label_triggered_values) / len(runs_data),
|
||||
"flappy_rate": sum(flappy_values) / len(runs_data),
|
||||
"unknown_rate_avg": statistics.fmean(unknown_rates) if unknown_rates else 0.0,
|
||||
"pinned_rate": sum(pinned_values) / len(runs_data),
|
||||
}
|
||||
|
||||
# Simple heuristic for rerun recommendation
|
||||
rerun_recommended = (
|
||||
metrics["flappy_rate"] > 0.2 or
|
||||
metrics["unknown_rate_avg"] > 0.15 or
|
||||
metrics["label_trigger_rate"] < 0.3
|
||||
)
|
||||
|
||||
result = {
|
||||
"total_runs": len(runs_data),
|
||||
"rerun_recommended": bool(rerun_recommended),
|
||||
"metrics": metrics,
|
||||
}
|
||||
|
||||
logger.info(
|
||||
"Rerun evaluation completed: total_runs=%d, rerun_recommended=%s",
|
||||
result["total_runs"],
|
||||
result["rerun_recommended"],
|
||||
)
|
||||
|
||||
return result
|
||||
Loading…
Reference in a new issue