Add rollout_report_generator/src/rollout_report_generator/core.py
This commit is contained in:
commit
20205707ee
1 changed files with 128 additions and 0 deletions
128
rollout_report_generator/src/rollout_report_generator/core.py
Normal file
128
rollout_report_generator/src/rollout_report_generator/core.py
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import pandas as pd
|
||||
|
||||
# Logging setup
|
||||
logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(levelname)s - %(message)s')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class RolloutData:
|
||||
unknown_rate: float
|
||||
warn_rate: float
|
||||
policy_hash: str
|
||||
runs: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class WhitelistEntry:
|
||||
key: str
|
||||
scope: str
|
||||
rationale: str
|
||||
added_at: datetime
|
||||
expires_at: Optional[datetime]
|
||||
|
||||
|
||||
class ReportGenerationError(Exception):
|
||||
"""Custom exception for errors during report generation."""
|
||||
pass
|
||||
|
||||
|
||||
def _validate_dataframe(df: pd.DataFrame) -> None:
|
||||
required_columns = {"unknown_rate", "warn_rate", "policy_hash", "runs"}
|
||||
missing = required_columns - set(df.columns)
|
||||
if missing:
|
||||
raise ReportGenerationError(f"Missing required columns: {', '.join(missing)}")
|
||||
for col in ["unknown_rate", "warn_rate"]:
|
||||
if not pd.api.types.is_numeric_dtype(df[col]):
|
||||
raise ReportGenerationError(f"Column {col} must be numeric.")
|
||||
if not pd.api.types.is_integer_dtype(df["runs"]):
|
||||
raise ReportGenerationError("Column 'runs' must be integer.")
|
||||
|
||||
|
||||
def _calculate_metrics(df: pd.DataFrame, col: str) -> dict[str, float]:
|
||||
series = df[col].dropna()
|
||||
stats = {
|
||||
"min": float(series.min()),
|
||||
"median": float(series.median()),
|
||||
"p95": float(series.quantile(0.95)),
|
||||
"max": float(series.max()),
|
||||
}
|
||||
return stats
|
||||
|
||||
|
||||
def _generate_markdown_report(unknown_metrics: dict[str, float], warn_metrics: dict[str, float]) -> str:
|
||||
lines = [
|
||||
"# Rollout Data Analysis Report", "",
|
||||
f"Generated at: {datetime.utcnow().isoformat()} UTC", "",
|
||||
"## Metrics Summary", "",
|
||||
"| Metric | unknown_rate | warn_rate |",
|
||||
"|---------|--------------|-----------|",
|
||||
f"| Min | {unknown_metrics['min']:.4f} | {warn_metrics['min']:.4f} |",
|
||||
f"| Median | {unknown_metrics['median']:.4f} | {warn_metrics['median']:.4f} |",
|
||||
f"| P95 | {unknown_metrics['p95']:.4f} | {warn_metrics['p95']:.4f} |",
|
||||
f"| Max | {unknown_metrics['max']:.4f} | {warn_metrics['max']:.4f} |",
|
||||
"",
|
||||
"## Threshold Recommendations", "",
|
||||
"- **unknown_rate PASS threshold** ≤ median",
|
||||
"- **unknown_rate WARN threshold** between median and p95",
|
||||
"- **warn_rate PASS threshold** ≤ median",
|
||||
"- **warn_rate WARN threshold** between median and p95",
|
||||
"",
|
||||
"## Notes", "",
|
||||
"This report summarizes historical rollout behavior to assist in setting validation thresholds.",
|
||||
]
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def generate_report(csv_file_path: str, md_file_path: str) -> None:
|
||||
"""Reads rollout CSV data, calculates metrics, and writes a Markdown report."""
|
||||
logger.info("Starting report generation.")
|
||||
try:
|
||||
csv_path = Path(csv_file_path)
|
||||
md_path = Path(md_file_path)
|
||||
|
||||
if not csv_path.exists():
|
||||
raise ReportGenerationError(f"Input file not found: {csv_file_path}")
|
||||
|
||||
df = pd.read_csv(csv_path)
|
||||
_validate_dataframe(df)
|
||||
|
||||
unknown_metrics = _calculate_metrics(df, "unknown_rate")
|
||||
warn_metrics = _calculate_metrics(df, "warn_rate")
|
||||
|
||||
report_content = _generate_markdown_report(unknown_metrics, warn_metrics)
|
||||
|
||||
md_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
md_path.write_text(report_content, encoding="utf-8")
|
||||
|
||||
logger.info(f"Markdown report successfully written to {md_path}.")
|
||||
except Exception as exc:
|
||||
logger.exception("Failed to generate rollout report.")
|
||||
raise ReportGenerationError(str(exc)) from exc
|
||||
|
||||
|
||||
# CLI entrypoint
|
||||
def _build_cli_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(description="Generate rollout metrics report.")
|
||||
parser.add_argument("--csv", required=True, help="Path to rollout CSV file.")
|
||||
parser.add_argument("--out", required=True, help="Path to output markdown file.")
|
||||
return parser
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = _build_cli_parser()
|
||||
args = parser.parse_args()
|
||||
generate_report(args.csv, args.out)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Reference in a new issue