Add rollout_report_generator/src/rollout_report_generator/core.py
This commit is contained in:
commit
20205707ee
1 changed files with 128 additions and 0 deletions
128
rollout_report_generator/src/rollout_report_generator/core.py
Normal file
128
rollout_report_generator/src/rollout_report_generator/core.py
Normal file
|
|
@ -0,0 +1,128 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
# Logging setup
|
||||||
|
logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(levelname)s - %(message)s')
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RolloutData:
|
||||||
|
unknown_rate: float
|
||||||
|
warn_rate: float
|
||||||
|
policy_hash: str
|
||||||
|
runs: int
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class WhitelistEntry:
|
||||||
|
key: str
|
||||||
|
scope: str
|
||||||
|
rationale: str
|
||||||
|
added_at: datetime
|
||||||
|
expires_at: Optional[datetime]
|
||||||
|
|
||||||
|
|
||||||
|
class ReportGenerationError(Exception):
|
||||||
|
"""Custom exception for errors during report generation."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_dataframe(df: pd.DataFrame) -> None:
|
||||||
|
required_columns = {"unknown_rate", "warn_rate", "policy_hash", "runs"}
|
||||||
|
missing = required_columns - set(df.columns)
|
||||||
|
if missing:
|
||||||
|
raise ReportGenerationError(f"Missing required columns: {', '.join(missing)}")
|
||||||
|
for col in ["unknown_rate", "warn_rate"]:
|
||||||
|
if not pd.api.types.is_numeric_dtype(df[col]):
|
||||||
|
raise ReportGenerationError(f"Column {col} must be numeric.")
|
||||||
|
if not pd.api.types.is_integer_dtype(df["runs"]):
|
||||||
|
raise ReportGenerationError("Column 'runs' must be integer.")
|
||||||
|
|
||||||
|
|
||||||
|
def _calculate_metrics(df: pd.DataFrame, col: str) -> dict[str, float]:
|
||||||
|
series = df[col].dropna()
|
||||||
|
stats = {
|
||||||
|
"min": float(series.min()),
|
||||||
|
"median": float(series.median()),
|
||||||
|
"p95": float(series.quantile(0.95)),
|
||||||
|
"max": float(series.max()),
|
||||||
|
}
|
||||||
|
return stats
|
||||||
|
|
||||||
|
|
||||||
|
def _generate_markdown_report(unknown_metrics: dict[str, float], warn_metrics: dict[str, float]) -> str:
|
||||||
|
lines = [
|
||||||
|
"# Rollout Data Analysis Report", "",
|
||||||
|
f"Generated at: {datetime.utcnow().isoformat()} UTC", "",
|
||||||
|
"## Metrics Summary", "",
|
||||||
|
"| Metric | unknown_rate | warn_rate |",
|
||||||
|
"|---------|--------------|-----------|",
|
||||||
|
f"| Min | {unknown_metrics['min']:.4f} | {warn_metrics['min']:.4f} |",
|
||||||
|
f"| Median | {unknown_metrics['median']:.4f} | {warn_metrics['median']:.4f} |",
|
||||||
|
f"| P95 | {unknown_metrics['p95']:.4f} | {warn_metrics['p95']:.4f} |",
|
||||||
|
f"| Max | {unknown_metrics['max']:.4f} | {warn_metrics['max']:.4f} |",
|
||||||
|
"",
|
||||||
|
"## Threshold Recommendations", "",
|
||||||
|
"- **unknown_rate PASS threshold** ≤ median",
|
||||||
|
"- **unknown_rate WARN threshold** between median and p95",
|
||||||
|
"- **warn_rate PASS threshold** ≤ median",
|
||||||
|
"- **warn_rate WARN threshold** between median and p95",
|
||||||
|
"",
|
||||||
|
"## Notes", "",
|
||||||
|
"This report summarizes historical rollout behavior to assist in setting validation thresholds.",
|
||||||
|
]
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_report(csv_file_path: str, md_file_path: str) -> None:
|
||||||
|
"""Reads rollout CSV data, calculates metrics, and writes a Markdown report."""
|
||||||
|
logger.info("Starting report generation.")
|
||||||
|
try:
|
||||||
|
csv_path = Path(csv_file_path)
|
||||||
|
md_path = Path(md_file_path)
|
||||||
|
|
||||||
|
if not csv_path.exists():
|
||||||
|
raise ReportGenerationError(f"Input file not found: {csv_file_path}")
|
||||||
|
|
||||||
|
df = pd.read_csv(csv_path)
|
||||||
|
_validate_dataframe(df)
|
||||||
|
|
||||||
|
unknown_metrics = _calculate_metrics(df, "unknown_rate")
|
||||||
|
warn_metrics = _calculate_metrics(df, "warn_rate")
|
||||||
|
|
||||||
|
report_content = _generate_markdown_report(unknown_metrics, warn_metrics)
|
||||||
|
|
||||||
|
md_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
md_path.write_text(report_content, encoding="utf-8")
|
||||||
|
|
||||||
|
logger.info(f"Markdown report successfully written to {md_path}.")
|
||||||
|
except Exception as exc:
|
||||||
|
logger.exception("Failed to generate rollout report.")
|
||||||
|
raise ReportGenerationError(str(exc)) from exc
|
||||||
|
|
||||||
|
|
||||||
|
# CLI entrypoint
|
||||||
|
def _build_cli_parser() -> argparse.ArgumentParser:
|
||||||
|
parser = argparse.ArgumentParser(description="Generate rollout metrics report.")
|
||||||
|
parser.add_argument("--csv", required=True, help="Path to rollout CSV file.")
|
||||||
|
parser.add_argument("--out", required=True, help="Path to output markdown file.")
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = _build_cli_parser()
|
||||||
|
args = parser.parse_args()
|
||||||
|
generate_report(args.csv, args.out)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Loading…
Reference in a new issue