Add audit_data_processing/src/audit_data_processing/cli.py
This commit is contained in:
parent
cbf9a22a8d
commit
9c87dd9de4
1 changed files with 77 additions and 0 deletions
77
audit_data_processing/src/audit_data_processing/cli.py
Normal file
77
audit_data_processing/src/audit_data_processing/cli.py
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
import argparse
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def _validate_input_file(path: Path) -> None:
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(f"Eingabedatei nicht gefunden: {path}")
|
||||
if not path.is_file():
|
||||
raise ValueError(f"Pfad ist keine Datei: {path}")
|
||||
|
||||
|
||||
def _validate_percentiles(levels: List[float]) -> None:
|
||||
if not levels:
|
||||
raise ValueError("Es muss mindestens ein Perzentilwert angegeben werden.")
|
||||
for p in levels:
|
||||
if not (0 < p < 100):
|
||||
raise ValueError(f"Ungültiger Perzentilwert: {p}")
|
||||
|
||||
|
||||
def _load_data(input_path: Path) -> pd.DataFrame:
|
||||
df = pd.read_csv(input_path)
|
||||
expected_cols = {"run_id", "warn_rate", "unknown_rate", "pinned"}
|
||||
missing = expected_cols - set(df.columns)
|
||||
if missing:
|
||||
raise ValueError(f"Fehlende Spalten in Eingabedatei: {missing}")
|
||||
|
||||
# Typprüfung und Konvertierung
|
||||
df["warn_rate"] = pd.to_numeric(df["warn_rate"], errors="raise")
|
||||
df["unknown_rate"] = pd.to_numeric(df["unknown_rate"], errors="raise")
|
||||
df["pinned"] = df["pinned"].astype(bool)
|
||||
return df
|
||||
|
||||
|
||||
def _calculate_percentiles(df: pd.DataFrame, percentiles: List[float]) -> dict:
|
||||
summary = {}
|
||||
for metric in ["warn_rate", "unknown_rate"]:
|
||||
summary[metric] = {}
|
||||
for p in percentiles:
|
||||
summary[metric][f"p{int(p)}"] = float(df[metric].quantile(p / 100.0))
|
||||
return summary
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Audit-Daten analysieren und Perzentile berechnen.")
|
||||
parser.add_argument("--input", required=True, help="Pfad zur audit.csv-Datei.")
|
||||
parser.add_argument("--output", required=True, help="Pfad zur Ausgabe-JSON-Datei.")
|
||||
parser.add_argument(
|
||||
"--percentiles", nargs="*", type=float, default=[50, 75, 90, 95], help="Liste der Perzentile."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
input_path = Path(args.input)
|
||||
output_path = Path(args.output)
|
||||
|
||||
_validate_input_file(input_path)
|
||||
_validate_percentiles(args.percentiles)
|
||||
|
||||
df = _load_data(input_path)
|
||||
result = {
|
||||
"pinned": _calculate_percentiles(df[df["pinned"]], args.percentiles),
|
||||
"unpinned": _calculate_percentiles(df[~df["pinned"]], args.percentiles),
|
||||
}
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
json.dump(result, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"Perzentile erfolgreich berechnet und gespeichert unter: {output_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Reference in a new issue