Add report_generation/src/report_generation/core.py
This commit is contained in:
parent
ed20485957
commit
ed76bfee9f
1 changed files with 154 additions and 0 deletions
154
report_generation/src/report_generation/core.py
Normal file
154
report_generation/src/report_generation/core.py
Normal file
|
|
@ -0,0 +1,154 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any, Dict, Union
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
class InputDataError(Exception):
|
||||||
|
"""Custom exception raised when input validation fails."""
|
||||||
|
|
||||||
|
|
||||||
|
class AnalysisResult:
|
||||||
|
"""Data representation of a single analysis result."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
run_id: str,
|
||||||
|
step_id: int,
|
||||||
|
epoch_ms: int,
|
||||||
|
monotonic_ns: int,
|
||||||
|
tz_offset_minutes: int,
|
||||||
|
retry_tail_p99: float,
|
||||||
|
band_width: float,
|
||||||
|
) -> None:
|
||||||
|
self.run_id = run_id
|
||||||
|
self.step_id = step_id
|
||||||
|
self.epoch_ms = epoch_ms
|
||||||
|
self.monotonic_ns = monotonic_ns
|
||||||
|
self.tz_offset_minutes = tz_offset_minutes
|
||||||
|
self.retry_tail_p99 = retry_tail_p99
|
||||||
|
self.band_width = band_width
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def validate_dict(data: Dict[str, Any]) -> None:
|
||||||
|
required_fields = {
|
||||||
|
"run_id": str,
|
||||||
|
"step_id": int,
|
||||||
|
"epoch_ms": int,
|
||||||
|
"monotonic_ns": int,
|
||||||
|
"tz_offset_minutes": int,
|
||||||
|
"retry_tail_p99": (float, int),
|
||||||
|
"band_width": (float, int),
|
||||||
|
}
|
||||||
|
for field, expected_type in required_fields.items():
|
||||||
|
if field not in data:
|
||||||
|
raise InputDataError(f"Missing required field: {field}")
|
||||||
|
if not isinstance(data[field], expected_type):
|
||||||
|
raise InputDataError(
|
||||||
|
f"Invalid type for field '{field}': expected {expected_type}, got {type(data[field])}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class StabilityReport:
|
||||||
|
"""Structured container for the generated stability report."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
config_summary: str,
|
||||||
|
findings: str,
|
||||||
|
metric_comparison: Dict[str, Any],
|
||||||
|
timestamp: datetime,
|
||||||
|
) -> None:
|
||||||
|
self.config_summary = config_summary
|
||||||
|
self.findings = findings
|
||||||
|
self.metric_comparison = metric_comparison
|
||||||
|
self.timestamp = timestamp
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"config_summary": self.config_summary,
|
||||||
|
"findings": self.findings,
|
||||||
|
"metric_comparison": self.metric_comparison,
|
||||||
|
"timestamp": self.timestamp.isoformat(),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def generate_report(analysis_results: Union[Dict[str, Any], pd.DataFrame]) -> str:
|
||||||
|
"""Erstellt einen Bericht auf Basis der Analyseergebnisse und speichert ihn als Datei.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
analysis_results: Datenstruktur (dict oder pandas.DataFrame) mit aggregierten Kennzahlen.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Pfad zur generierten Report-Datei im JSON-Format.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Input validation
|
||||||
|
if isinstance(analysis_results, dict):
|
||||||
|
if "data" in analysis_results:
|
||||||
|
records = analysis_results["data"]
|
||||||
|
elif isinstance(analysis_results.get("run_id"), list):
|
||||||
|
records = [analysis_results]
|
||||||
|
else:
|
||||||
|
records = analysis_results if isinstance(analysis_results, list) else [analysis_results]
|
||||||
|
if not isinstance(records, list):
|
||||||
|
raise InputDataError("Expected a list of records in 'analysis_results'.")
|
||||||
|
for rec in records:
|
||||||
|
AnalysisResult.validate_dict(rec)
|
||||||
|
df = pd.DataFrame(records)
|
||||||
|
elif isinstance(analysis_results, pd.DataFrame):
|
||||||
|
df = analysis_results.copy()
|
||||||
|
required_columns = [
|
||||||
|
"run_id",
|
||||||
|
"step_id",
|
||||||
|
"epoch_ms",
|
||||||
|
"monotonic_ns",
|
||||||
|
"tz_offset_minutes",
|
||||||
|
"retry_tail_p99",
|
||||||
|
"band_width",
|
||||||
|
]
|
||||||
|
missing = [c for c in required_columns if c not in df.columns]
|
||||||
|
if missing:
|
||||||
|
raise InputDataError(f"Missing columns in DataFrame: {missing}")
|
||||||
|
else:
|
||||||
|
raise InputDataError("analysis_results must be a dict or pandas.DataFrame.")
|
||||||
|
|
||||||
|
# Aggregation and comparison
|
||||||
|
grouped = df.groupby("run_id").agg({
|
||||||
|
"retry_tail_p99": ["median", "std"],
|
||||||
|
"band_width": ["median", "std"],
|
||||||
|
})
|
||||||
|
grouped.columns = ["_".join(col).strip() for col in grouped.columns.values]
|
||||||
|
grouped = grouped.reset_index()
|
||||||
|
|
||||||
|
# Metric comparison dictionary
|
||||||
|
metric_comparison = grouped.set_index("run_id").to_dict(orient="index")
|
||||||
|
|
||||||
|
# Simple findings summary
|
||||||
|
ref_metric = grouped["retry_tail_p99_median"].mean()
|
||||||
|
stable_runs = grouped[grouped["retry_tail_p99_std"] < (0.1 * ref_metric)]["run_id"].tolist()
|
||||||
|
findings = (
|
||||||
|
f"Identified {len(stable_runs)} stable configurations: {', '.join(stable_runs)}. "
|
||||||
|
f"Average retry_tail_p99 median: {ref_metric:.2f}."
|
||||||
|
)
|
||||||
|
|
||||||
|
config_summary = f"Runs analyzed: {', '.join(grouped['run_id'].astype(str))}"
|
||||||
|
|
||||||
|
report = StabilityReport(
|
||||||
|
config_summary=config_summary,
|
||||||
|
findings=findings,
|
||||||
|
metric_comparison=metric_comparison,
|
||||||
|
timestamp=datetime.utcnow(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output_path = Path("output/stability_report.json")
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
with output_path.open("w", encoding="utf-8") as f:
|
||||||
|
json.dump(report.to_dict(), f, indent=2)
|
||||||
|
|
||||||
|
assert output_path.exists(), "Report file should exist after generation."
|
||||||
|
return str(output_path.resolve())
|
||||||
Loading…
Reference in a new issue