Add report_generation/src/report_generation/core.py
This commit is contained in:
parent
ed20485957
commit
ed76bfee9f
1 changed files with 154 additions and 0 deletions
154
report_generation/src/report_generation/core.py
Normal file
154
report_generation/src/report_generation/core.py
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
from __future__ import annotations
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, Union
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class InputDataError(Exception):
|
||||
"""Custom exception raised when input validation fails."""
|
||||
|
||||
|
||||
class AnalysisResult:
|
||||
"""Data representation of a single analysis result."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
run_id: str,
|
||||
step_id: int,
|
||||
epoch_ms: int,
|
||||
monotonic_ns: int,
|
||||
tz_offset_minutes: int,
|
||||
retry_tail_p99: float,
|
||||
band_width: float,
|
||||
) -> None:
|
||||
self.run_id = run_id
|
||||
self.step_id = step_id
|
||||
self.epoch_ms = epoch_ms
|
||||
self.monotonic_ns = monotonic_ns
|
||||
self.tz_offset_minutes = tz_offset_minutes
|
||||
self.retry_tail_p99 = retry_tail_p99
|
||||
self.band_width = band_width
|
||||
|
||||
@staticmethod
|
||||
def validate_dict(data: Dict[str, Any]) -> None:
|
||||
required_fields = {
|
||||
"run_id": str,
|
||||
"step_id": int,
|
||||
"epoch_ms": int,
|
||||
"monotonic_ns": int,
|
||||
"tz_offset_minutes": int,
|
||||
"retry_tail_p99": (float, int),
|
||||
"band_width": (float, int),
|
||||
}
|
||||
for field, expected_type in required_fields.items():
|
||||
if field not in data:
|
||||
raise InputDataError(f"Missing required field: {field}")
|
||||
if not isinstance(data[field], expected_type):
|
||||
raise InputDataError(
|
||||
f"Invalid type for field '{field}': expected {expected_type}, got {type(data[field])}"
|
||||
)
|
||||
|
||||
|
||||
class StabilityReport:
|
||||
"""Structured container for the generated stability report."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config_summary: str,
|
||||
findings: str,
|
||||
metric_comparison: Dict[str, Any],
|
||||
timestamp: datetime,
|
||||
) -> None:
|
||||
self.config_summary = config_summary
|
||||
self.findings = findings
|
||||
self.metric_comparison = metric_comparison
|
||||
self.timestamp = timestamp
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"config_summary": self.config_summary,
|
||||
"findings": self.findings,
|
||||
"metric_comparison": self.metric_comparison,
|
||||
"timestamp": self.timestamp.isoformat(),
|
||||
}
|
||||
|
||||
|
||||
def generate_report(analysis_results: Union[Dict[str, Any], pd.DataFrame]) -> str:
|
||||
"""Erstellt einen Bericht auf Basis der Analyseergebnisse und speichert ihn als Datei.
|
||||
|
||||
Args:
|
||||
analysis_results: Datenstruktur (dict oder pandas.DataFrame) mit aggregierten Kennzahlen.
|
||||
|
||||
Returns:
|
||||
Pfad zur generierten Report-Datei im JSON-Format.
|
||||
"""
|
||||
|
||||
# Input validation
|
||||
if isinstance(analysis_results, dict):
|
||||
if "data" in analysis_results:
|
||||
records = analysis_results["data"]
|
||||
elif isinstance(analysis_results.get("run_id"), list):
|
||||
records = [analysis_results]
|
||||
else:
|
||||
records = analysis_results if isinstance(analysis_results, list) else [analysis_results]
|
||||
if not isinstance(records, list):
|
||||
raise InputDataError("Expected a list of records in 'analysis_results'.")
|
||||
for rec in records:
|
||||
AnalysisResult.validate_dict(rec)
|
||||
df = pd.DataFrame(records)
|
||||
elif isinstance(analysis_results, pd.DataFrame):
|
||||
df = analysis_results.copy()
|
||||
required_columns = [
|
||||
"run_id",
|
||||
"step_id",
|
||||
"epoch_ms",
|
||||
"monotonic_ns",
|
||||
"tz_offset_minutes",
|
||||
"retry_tail_p99",
|
||||
"band_width",
|
||||
]
|
||||
missing = [c for c in required_columns if c not in df.columns]
|
||||
if missing:
|
||||
raise InputDataError(f"Missing columns in DataFrame: {missing}")
|
||||
else:
|
||||
raise InputDataError("analysis_results must be a dict or pandas.DataFrame.")
|
||||
|
||||
# Aggregation and comparison
|
||||
grouped = df.groupby("run_id").agg({
|
||||
"retry_tail_p99": ["median", "std"],
|
||||
"band_width": ["median", "std"],
|
||||
})
|
||||
grouped.columns = ["_".join(col).strip() for col in grouped.columns.values]
|
||||
grouped = grouped.reset_index()
|
||||
|
||||
# Metric comparison dictionary
|
||||
metric_comparison = grouped.set_index("run_id").to_dict(orient="index")
|
||||
|
||||
# Simple findings summary
|
||||
ref_metric = grouped["retry_tail_p99_median"].mean()
|
||||
stable_runs = grouped[grouped["retry_tail_p99_std"] < (0.1 * ref_metric)]["run_id"].tolist()
|
||||
findings = (
|
||||
f"Identified {len(stable_runs)} stable configurations: {', '.join(stable_runs)}. "
|
||||
f"Average retry_tail_p99 median: {ref_metric:.2f}."
|
||||
)
|
||||
|
||||
config_summary = f"Runs analyzed: {', '.join(grouped['run_id'].astype(str))}"
|
||||
|
||||
report = StabilityReport(
|
||||
config_summary=config_summary,
|
||||
findings=findings,
|
||||
metric_comparison=metric_comparison,
|
||||
timestamp=datetime.utcnow(),
|
||||
)
|
||||
|
||||
output_path = Path("output/stability_report.json")
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with output_path.open("w", encoding="utf-8") as f:
|
||||
json.dump(report.to_dict(), f, indent=2)
|
||||
|
||||
assert output_path.exists(), "Report file should exist after generation."
|
||||
return str(output_path.resolve())
|
||||
Loading…
Reference in a new issue