diff --git a/report_generation/src/report_generation/core.py b/report_generation/src/report_generation/core.py new file mode 100644 index 0000000..8ba48d2 --- /dev/null +++ b/report_generation/src/report_generation/core.py @@ -0,0 +1,154 @@ +from __future__ import annotations +import json +import os +from pathlib import Path +from datetime import datetime +from typing import Any, Dict, Union +import pandas as pd + + +class InputDataError(Exception): + """Custom exception raised when input validation fails.""" + + +class AnalysisResult: + """Data representation of a single analysis result.""" + + def __init__( + self, + run_id: str, + step_id: int, + epoch_ms: int, + monotonic_ns: int, + tz_offset_minutes: int, + retry_tail_p99: float, + band_width: float, + ) -> None: + self.run_id = run_id + self.step_id = step_id + self.epoch_ms = epoch_ms + self.monotonic_ns = monotonic_ns + self.tz_offset_minutes = tz_offset_minutes + self.retry_tail_p99 = retry_tail_p99 + self.band_width = band_width + + @staticmethod + def validate_dict(data: Dict[str, Any]) -> None: + required_fields = { + "run_id": str, + "step_id": int, + "epoch_ms": int, + "monotonic_ns": int, + "tz_offset_minutes": int, + "retry_tail_p99": (float, int), + "band_width": (float, int), + } + for field, expected_type in required_fields.items(): + if field not in data: + raise InputDataError(f"Missing required field: {field}") + if not isinstance(data[field], expected_type): + raise InputDataError( + f"Invalid type for field '{field}': expected {expected_type}, got {type(data[field])}" + ) + + +class StabilityReport: + """Structured container for the generated stability report.""" + + def __init__( + self, + config_summary: str, + findings: str, + metric_comparison: Dict[str, Any], + timestamp: datetime, + ) -> None: + self.config_summary = config_summary + self.findings = findings + self.metric_comparison = metric_comparison + self.timestamp = timestamp + + def to_dict(self) -> Dict[str, Any]: + return { + "config_summary": self.config_summary, + "findings": self.findings, + "metric_comparison": self.metric_comparison, + "timestamp": self.timestamp.isoformat(), + } + + +def generate_report(analysis_results: Union[Dict[str, Any], pd.DataFrame]) -> str: + """Erstellt einen Bericht auf Basis der Analyseergebnisse und speichert ihn als Datei. + + Args: + analysis_results: Datenstruktur (dict oder pandas.DataFrame) mit aggregierten Kennzahlen. + + Returns: + Pfad zur generierten Report-Datei im JSON-Format. + """ + + # Input validation + if isinstance(analysis_results, dict): + if "data" in analysis_results: + records = analysis_results["data"] + elif isinstance(analysis_results.get("run_id"), list): + records = [analysis_results] + else: + records = analysis_results if isinstance(analysis_results, list) else [analysis_results] + if not isinstance(records, list): + raise InputDataError("Expected a list of records in 'analysis_results'.") + for rec in records: + AnalysisResult.validate_dict(rec) + df = pd.DataFrame(records) + elif isinstance(analysis_results, pd.DataFrame): + df = analysis_results.copy() + required_columns = [ + "run_id", + "step_id", + "epoch_ms", + "monotonic_ns", + "tz_offset_minutes", + "retry_tail_p99", + "band_width", + ] + missing = [c for c in required_columns if c not in df.columns] + if missing: + raise InputDataError(f"Missing columns in DataFrame: {missing}") + else: + raise InputDataError("analysis_results must be a dict or pandas.DataFrame.") + + # Aggregation and comparison + grouped = df.groupby("run_id").agg({ + "retry_tail_p99": ["median", "std"], + "band_width": ["median", "std"], + }) + grouped.columns = ["_".join(col).strip() for col in grouped.columns.values] + grouped = grouped.reset_index() + + # Metric comparison dictionary + metric_comparison = grouped.set_index("run_id").to_dict(orient="index") + + # Simple findings summary + ref_metric = grouped["retry_tail_p99_median"].mean() + stable_runs = grouped[grouped["retry_tail_p99_std"] < (0.1 * ref_metric)]["run_id"].tolist() + findings = ( + f"Identified {len(stable_runs)} stable configurations: {', '.join(stable_runs)}. " + f"Average retry_tail_p99 median: {ref_metric:.2f}." + ) + + config_summary = f"Runs analyzed: {', '.join(grouped['run_id'].astype(str))}" + + report = StabilityReport( + config_summary=config_summary, + findings=findings, + metric_comparison=metric_comparison, + timestamp=datetime.utcnow(), + ) + + output_path = Path("output/stability_report.json") + output_path.parent.mkdir(parents=True, exist_ok=True) + + with output_path.open("w", encoding="utf-8") as f: + json.dump(report.to_dict(), f, indent=2) + + assert output_path.exists(), "Report file should exist after generation." + return str(output_path.resolve())