From 9c87dd9de4cec726949e1d7e96ab55327180205c Mon Sep 17 00:00:00 2001 From: Mika Date: Thu, 5 Feb 2026 13:42:01 +0000 Subject: [PATCH] Add audit_data_processing/src/audit_data_processing/cli.py --- .../src/audit_data_processing/cli.py | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 audit_data_processing/src/audit_data_processing/cli.py diff --git a/audit_data_processing/src/audit_data_processing/cli.py b/audit_data_processing/src/audit_data_processing/cli.py new file mode 100644 index 0000000..981a742 --- /dev/null +++ b/audit_data_processing/src/audit_data_processing/cli.py @@ -0,0 +1,77 @@ +import argparse +import json +import os +from pathlib import Path +from typing import List + +import pandas as pd + + +def _validate_input_file(path: Path) -> None: + if not path.exists(): + raise FileNotFoundError(f"Eingabedatei nicht gefunden: {path}") + if not path.is_file(): + raise ValueError(f"Pfad ist keine Datei: {path}") + + +def _validate_percentiles(levels: List[float]) -> None: + if not levels: + raise ValueError("Es muss mindestens ein Perzentilwert angegeben werden.") + for p in levels: + if not (0 < p < 100): + raise ValueError(f"Ungültiger Perzentilwert: {p}") + + +def _load_data(input_path: Path) -> pd.DataFrame: + df = pd.read_csv(input_path) + expected_cols = {"run_id", "warn_rate", "unknown_rate", "pinned"} + missing = expected_cols - set(df.columns) + if missing: + raise ValueError(f"Fehlende Spalten in Eingabedatei: {missing}") + + # Typprüfung und Konvertierung + df["warn_rate"] = pd.to_numeric(df["warn_rate"], errors="raise") + df["unknown_rate"] = pd.to_numeric(df["unknown_rate"], errors="raise") + df["pinned"] = df["pinned"].astype(bool) + return df + + +def _calculate_percentiles(df: pd.DataFrame, percentiles: List[float]) -> dict: + summary = {} + for metric in ["warn_rate", "unknown_rate"]: + summary[metric] = {} + for p in percentiles: + summary[metric][f"p{int(p)}"] = float(df[metric].quantile(p / 100.0)) + return summary + + +def main() -> None: + parser = argparse.ArgumentParser(description="Audit-Daten analysieren und Perzentile berechnen.") + parser.add_argument("--input", required=True, help="Pfad zur audit.csv-Datei.") + parser.add_argument("--output", required=True, help="Pfad zur Ausgabe-JSON-Datei.") + parser.add_argument( + "--percentiles", nargs="*", type=float, default=[50, 75, 90, 95], help="Liste der Perzentile." + ) + args = parser.parse_args() + + input_path = Path(args.input) + output_path = Path(args.output) + + _validate_input_file(input_path) + _validate_percentiles(args.percentiles) + + df = _load_data(input_path) + result = { + "pinned": _calculate_percentiles(df[df["pinned"]], args.percentiles), + "unpinned": _calculate_percentiles(df[~df["pinned"]], args.percentiles), + } + + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, "w", encoding="utf-8") as f: + json.dump(result, f, indent=2, ensure_ascii=False) + + print(f"Perzentile erfolgreich berechnet und gespeichert unter: {output_path}") + + +if __name__ == "__main__": + main() \ No newline at end of file