Add report_generation/src/report_generation/core.py

This commit is contained in:
Mika 2026-03-30 16:33:39 +00:00
parent ed20485957
commit ed76bfee9f

View file

@ -0,0 +1,154 @@
from __future__ import annotations
import json
import os
from pathlib import Path
from datetime import datetime
from typing import Any, Dict, Union
import pandas as pd
class InputDataError(Exception):
"""Custom exception raised when input validation fails."""
class AnalysisResult:
"""Data representation of a single analysis result."""
def __init__(
self,
run_id: str,
step_id: int,
epoch_ms: int,
monotonic_ns: int,
tz_offset_minutes: int,
retry_tail_p99: float,
band_width: float,
) -> None:
self.run_id = run_id
self.step_id = step_id
self.epoch_ms = epoch_ms
self.monotonic_ns = monotonic_ns
self.tz_offset_minutes = tz_offset_minutes
self.retry_tail_p99 = retry_tail_p99
self.band_width = band_width
@staticmethod
def validate_dict(data: Dict[str, Any]) -> None:
required_fields = {
"run_id": str,
"step_id": int,
"epoch_ms": int,
"monotonic_ns": int,
"tz_offset_minutes": int,
"retry_tail_p99": (float, int),
"band_width": (float, int),
}
for field, expected_type in required_fields.items():
if field not in data:
raise InputDataError(f"Missing required field: {field}")
if not isinstance(data[field], expected_type):
raise InputDataError(
f"Invalid type for field '{field}': expected {expected_type}, got {type(data[field])}"
)
class StabilityReport:
"""Structured container for the generated stability report."""
def __init__(
self,
config_summary: str,
findings: str,
metric_comparison: Dict[str, Any],
timestamp: datetime,
) -> None:
self.config_summary = config_summary
self.findings = findings
self.metric_comparison = metric_comparison
self.timestamp = timestamp
def to_dict(self) -> Dict[str, Any]:
return {
"config_summary": self.config_summary,
"findings": self.findings,
"metric_comparison": self.metric_comparison,
"timestamp": self.timestamp.isoformat(),
}
def generate_report(analysis_results: Union[Dict[str, Any], pd.DataFrame]) -> str:
"""Erstellt einen Bericht auf Basis der Analyseergebnisse und speichert ihn als Datei.
Args:
analysis_results: Datenstruktur (dict oder pandas.DataFrame) mit aggregierten Kennzahlen.
Returns:
Pfad zur generierten Report-Datei im JSON-Format.
"""
# Input validation
if isinstance(analysis_results, dict):
if "data" in analysis_results:
records = analysis_results["data"]
elif isinstance(analysis_results.get("run_id"), list):
records = [analysis_results]
else:
records = analysis_results if isinstance(analysis_results, list) else [analysis_results]
if not isinstance(records, list):
raise InputDataError("Expected a list of records in 'analysis_results'.")
for rec in records:
AnalysisResult.validate_dict(rec)
df = pd.DataFrame(records)
elif isinstance(analysis_results, pd.DataFrame):
df = analysis_results.copy()
required_columns = [
"run_id",
"step_id",
"epoch_ms",
"monotonic_ns",
"tz_offset_minutes",
"retry_tail_p99",
"band_width",
]
missing = [c for c in required_columns if c not in df.columns]
if missing:
raise InputDataError(f"Missing columns in DataFrame: {missing}")
else:
raise InputDataError("analysis_results must be a dict or pandas.DataFrame.")
# Aggregation and comparison
grouped = df.groupby("run_id").agg({
"retry_tail_p99": ["median", "std"],
"band_width": ["median", "std"],
})
grouped.columns = ["_".join(col).strip() for col in grouped.columns.values]
grouped = grouped.reset_index()
# Metric comparison dictionary
metric_comparison = grouped.set_index("run_id").to_dict(orient="index")
# Simple findings summary
ref_metric = grouped["retry_tail_p99_median"].mean()
stable_runs = grouped[grouped["retry_tail_p99_std"] < (0.1 * ref_metric)]["run_id"].tolist()
findings = (
f"Identified {len(stable_runs)} stable configurations: {', '.join(stable_runs)}. "
f"Average retry_tail_p99 median: {ref_metric:.2f}."
)
config_summary = f"Runs analyzed: {', '.join(grouped['run_id'].astype(str))}"
report = StabilityReport(
config_summary=config_summary,
findings=findings,
metric_comparison=metric_comparison,
timestamp=datetime.utcnow(),
)
output_path = Path("output/stability_report.json")
output_path.parent.mkdir(parents=True, exist_ok=True)
with output_path.open("w", encoding="utf-8") as f:
json.dump(report.to_dict(), f, indent=2)
assert output_path.exists(), "Report file should exist after generation."
return str(output_path.resolve())