Add rollout_report_generator/src/rollout_report_generator/core.py

This commit is contained in:
Mika 2026-02-21 15:27:03 +00:00
commit 20205707ee

View file

@ -0,0 +1,128 @@
from __future__ import annotations
import argparse
import logging
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Optional
import pandas as pd
# Logging setup
logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
@dataclass
class RolloutData:
unknown_rate: float
warn_rate: float
policy_hash: str
runs: int
@dataclass
class WhitelistEntry:
key: str
scope: str
rationale: str
added_at: datetime
expires_at: Optional[datetime]
class ReportGenerationError(Exception):
"""Custom exception for errors during report generation."""
pass
def _validate_dataframe(df: pd.DataFrame) -> None:
required_columns = {"unknown_rate", "warn_rate", "policy_hash", "runs"}
missing = required_columns - set(df.columns)
if missing:
raise ReportGenerationError(f"Missing required columns: {', '.join(missing)}")
for col in ["unknown_rate", "warn_rate"]:
if not pd.api.types.is_numeric_dtype(df[col]):
raise ReportGenerationError(f"Column {col} must be numeric.")
if not pd.api.types.is_integer_dtype(df["runs"]):
raise ReportGenerationError("Column 'runs' must be integer.")
def _calculate_metrics(df: pd.DataFrame, col: str) -> dict[str, float]:
series = df[col].dropna()
stats = {
"min": float(series.min()),
"median": float(series.median()),
"p95": float(series.quantile(0.95)),
"max": float(series.max()),
}
return stats
def _generate_markdown_report(unknown_metrics: dict[str, float], warn_metrics: dict[str, float]) -> str:
lines = [
"# Rollout Data Analysis Report", "",
f"Generated at: {datetime.utcnow().isoformat()} UTC", "",
"## Metrics Summary", "",
"| Metric | unknown_rate | warn_rate |",
"|---------|--------------|-----------|",
f"| Min | {unknown_metrics['min']:.4f} | {warn_metrics['min']:.4f} |",
f"| Median | {unknown_metrics['median']:.4f} | {warn_metrics['median']:.4f} |",
f"| P95 | {unknown_metrics['p95']:.4f} | {warn_metrics['p95']:.4f} |",
f"| Max | {unknown_metrics['max']:.4f} | {warn_metrics['max']:.4f} |",
"",
"## Threshold Recommendations", "",
"- **unknown_rate PASS threshold** ≤ median",
"- **unknown_rate WARN threshold** between median and p95",
"- **warn_rate PASS threshold** ≤ median",
"- **warn_rate WARN threshold** between median and p95",
"",
"## Notes", "",
"This report summarizes historical rollout behavior to assist in setting validation thresholds.",
]
return "\n".join(lines)
def generate_report(csv_file_path: str, md_file_path: str) -> None:
"""Reads rollout CSV data, calculates metrics, and writes a Markdown report."""
logger.info("Starting report generation.")
try:
csv_path = Path(csv_file_path)
md_path = Path(md_file_path)
if not csv_path.exists():
raise ReportGenerationError(f"Input file not found: {csv_file_path}")
df = pd.read_csv(csv_path)
_validate_dataframe(df)
unknown_metrics = _calculate_metrics(df, "unknown_rate")
warn_metrics = _calculate_metrics(df, "warn_rate")
report_content = _generate_markdown_report(unknown_metrics, warn_metrics)
md_path.parent.mkdir(parents=True, exist_ok=True)
md_path.write_text(report_content, encoding="utf-8")
logger.info(f"Markdown report successfully written to {md_path}.")
except Exception as exc:
logger.exception("Failed to generate rollout report.")
raise ReportGenerationError(str(exc)) from exc
# CLI entrypoint
def _build_cli_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Generate rollout metrics report.")
parser.add_argument("--csv", required=True, help="Path to rollout CSV file.")
parser.add_argument("--out", required=True, help="Path to output markdown file.")
return parser
def main() -> None:
parser = _build_cli_parser()
args = parser.parse_args()
generate_report(args.csv, args.out)
if __name__ == "__main__":
main()