From 20205707eed46e6ba4556949c3c2a217baf8007f Mon Sep 17 00:00:00 2001 From: Mika Date: Sat, 21 Feb 2026 15:27:03 +0000 Subject: [PATCH] Add rollout_report_generator/src/rollout_report_generator/core.py --- .../src/rollout_report_generator/core.py | 128 ++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 rollout_report_generator/src/rollout_report_generator/core.py diff --git a/rollout_report_generator/src/rollout_report_generator/core.py b/rollout_report_generator/src/rollout_report_generator/core.py new file mode 100644 index 0000000..31ba086 --- /dev/null +++ b/rollout_report_generator/src/rollout_report_generator/core.py @@ -0,0 +1,128 @@ +from __future__ import annotations + +import argparse +import logging +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path +from typing import Optional + +import pandas as pd + +# Logging setup +logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + + +@dataclass +class RolloutData: + unknown_rate: float + warn_rate: float + policy_hash: str + runs: int + + +@dataclass +class WhitelistEntry: + key: str + scope: str + rationale: str + added_at: datetime + expires_at: Optional[datetime] + + +class ReportGenerationError(Exception): + """Custom exception for errors during report generation.""" + pass + + +def _validate_dataframe(df: pd.DataFrame) -> None: + required_columns = {"unknown_rate", "warn_rate", "policy_hash", "runs"} + missing = required_columns - set(df.columns) + if missing: + raise ReportGenerationError(f"Missing required columns: {', '.join(missing)}") + for col in ["unknown_rate", "warn_rate"]: + if not pd.api.types.is_numeric_dtype(df[col]): + raise ReportGenerationError(f"Column {col} must be numeric.") + if not pd.api.types.is_integer_dtype(df["runs"]): + raise ReportGenerationError("Column 'runs' must be integer.") + + +def _calculate_metrics(df: pd.DataFrame, col: str) -> dict[str, float]: + series = df[col].dropna() + stats = { + "min": float(series.min()), + "median": float(series.median()), + "p95": float(series.quantile(0.95)), + "max": float(series.max()), + } + return stats + + +def _generate_markdown_report(unknown_metrics: dict[str, float], warn_metrics: dict[str, float]) -> str: + lines = [ + "# Rollout Data Analysis Report", "", + f"Generated at: {datetime.utcnow().isoformat()} UTC", "", + "## Metrics Summary", "", + "| Metric | unknown_rate | warn_rate |", + "|---------|--------------|-----------|", + f"| Min | {unknown_metrics['min']:.4f} | {warn_metrics['min']:.4f} |", + f"| Median | {unknown_metrics['median']:.4f} | {warn_metrics['median']:.4f} |", + f"| P95 | {unknown_metrics['p95']:.4f} | {warn_metrics['p95']:.4f} |", + f"| Max | {unknown_metrics['max']:.4f} | {warn_metrics['max']:.4f} |", + "", + "## Threshold Recommendations", "", + "- **unknown_rate PASS threshold** ≤ median", + "- **unknown_rate WARN threshold** between median and p95", + "- **warn_rate PASS threshold** ≤ median", + "- **warn_rate WARN threshold** between median and p95", + "", + "## Notes", "", + "This report summarizes historical rollout behavior to assist in setting validation thresholds.", + ] + return "\n".join(lines) + + +def generate_report(csv_file_path: str, md_file_path: str) -> None: + """Reads rollout CSV data, calculates metrics, and writes a Markdown report.""" + logger.info("Starting report generation.") + try: + csv_path = Path(csv_file_path) + md_path = Path(md_file_path) + + if not csv_path.exists(): + raise ReportGenerationError(f"Input file not found: {csv_file_path}") + + df = pd.read_csv(csv_path) + _validate_dataframe(df) + + unknown_metrics = _calculate_metrics(df, "unknown_rate") + warn_metrics = _calculate_metrics(df, "warn_rate") + + report_content = _generate_markdown_report(unknown_metrics, warn_metrics) + + md_path.parent.mkdir(parents=True, exist_ok=True) + md_path.write_text(report_content, encoding="utf-8") + + logger.info(f"Markdown report successfully written to {md_path}.") + except Exception as exc: + logger.exception("Failed to generate rollout report.") + raise ReportGenerationError(str(exc)) from exc + + +# CLI entrypoint +def _build_cli_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description="Generate rollout metrics report.") + parser.add_argument("--csv", required=True, help="Path to rollout CSV file.") + parser.add_argument("--out", required=True, help="Path to output markdown file.") + return parser + + +def main() -> None: + parser = _build_cli_parser() + args = parser.parse_args() + generate_report(args.csv, args.out) + + +if __name__ == "__main__": + main() \ No newline at end of file