Add rollup_rollout/src/rollup_rollout/core.py

This commit is contained in:
Mika 2026-02-20 11:16:30 +00:00
commit 39a8082bb3

View file

@ -0,0 +1,82 @@
from __future__ import annotations
import json
import csv
from pathlib import Path
from dataclasses import dataclass
from typing import Any, List, Dict
@dataclass
class RolloutData:
"""Repräsentiert die strukturierten CI-Laufdaten eines Gate-V1-Runs."""
policy_hash: str
outcome: str
unknown_rate: float
top_reasons: str
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "RolloutData":
"""Validiert und erstellt eine Instanz aus einem Dictionary."""
if not isinstance(data, dict):
raise ValueError("Input must be a dictionary.")
required_fields = ["policy_hash", "outcome", "unknown_rate", "top_reasons"]
for field in required_fields:
if field not in data:
raise ValueError(f"Missing required field: {field}")
if not isinstance(data["policy_hash"], str):
raise TypeError("policy_hash must be a string")
if not isinstance(data["outcome"], str):
raise TypeError("outcome must be a string")
if not isinstance(data["unknown_rate"], (float, int)):
raise TypeError("unknown_rate must be a float or int")
if not isinstance(data["top_reasons"], str):
raise TypeError("top_reasons must be a string")
return cls(
policy_hash=data["policy_hash"],
outcome=data["outcome"],
unknown_rate=float(data["unknown_rate"]),
top_reasons=data["top_reasons"],
)
def generate_rollout_series(input_file: str, output_file: str) -> None:
"""Aggregiert CI-Ergebnisse aus mehreren 'gate_result.json'-Dateien zu einer konsistenten CSV-Datei.
Args:
input_file: Pfad zur Eingabedatei im JSON-Format mit CI-Ergebnissen.
output_file: Pfad zur Ausgabedatei (rollout_series.csv).
"""
input_path = Path(input_file)
output_path = Path(output_file)
if not input_path.exists():
raise FileNotFoundError(f"Input file not found: {input_path}")
with input_path.open("r", encoding="utf-8") as f:
try:
data = json.load(f)
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON format: {e}")
if not isinstance(data, list):
raise ValueError("JSON root must be a list of CI result objects.")
rollout_entries: List[RolloutData] = []
for entry in data:
rollout_entries.append(RolloutData.from_dict(entry))
rollout_entries.sort(key=lambda x: (x.policy_hash, x.outcome))
output_path.parent.mkdir(parents=True, exist_ok=True)
with output_path.open("w", encoding="utf-8", newline="") as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["policy_hash", "outcome", "unknown_rate", "top_reasons"])
for entry in rollout_entries:
writer.writerow([
entry.policy_hash,
entry.outcome,
f"{entry.unknown_rate:.4f}",
entry.top_reasons,
])
assert output_path.exists() and output_path.stat().st_size > 0, "Output CSV was not created or is empty."