From 64effbcbf6750983ac11f47833d94326a182fd3f Mon Sep 17 00:00:00 2001 From: Mika Date: Mon, 16 Feb 2026 15:27:12 +0000 Subject: [PATCH] Add statistical_analysis/src/statistical_analysis/cli.py --- .../src/statistical_analysis/cli.py | 73 +++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 statistical_analysis/src/statistical_analysis/cli.py diff --git a/statistical_analysis/src/statistical_analysis/cli.py b/statistical_analysis/src/statistical_analysis/cli.py new file mode 100644 index 0000000..f518104 --- /dev/null +++ b/statistical_analysis/src/statistical_analysis/cli.py @@ -0,0 +1,73 @@ +import argparse +import json +import sys +from pathlib import Path +from typing import Any, List, Dict +import pandas as pd + +from statistical_analysis.core import analyze_outliers + + +def _load_input_data(input_path: Path) -> List[Dict[str, Any]]: + if not input_path.exists(): + raise FileNotFoundError(f"Input file not found: {input_path}") + + suffix = input_path.suffix.lower() + if suffix in (".jsonl", ".json"): + with input_path.open("r", encoding="utf-8") as f: + data = [json.loads(line) for line in f if line.strip()] + return data + elif suffix == ".csv": + df = pd.read_csv(input_path) + return df.to_dict(orient="records") + else: + raise ValueError(f"Unsupported input format: {suffix}") + + +def main() -> None: + parser = argparse.ArgumentParser( + description="CLI zur Ausführung der Ausreißeranalyse von Logdaten." + ) + parser.add_argument( + "--input", + required=True, + help="Pfad zu den Logdaten (JSONL oder CSV).", + ) + parser.add_argument( + "--output", + required=False, + help="Pfad zur JSON-Ausgabedatei der Ausreißeranalyse.", + ) + + args = parser.parse_args() + input_path = Path(args.input) + + try: + log_data = _load_input_data(input_path) + except Exception as exc: + sys.stderr.write(f"Fehler beim Einlesen der Daten: {exc}\n") + sys.exit(1) + + assert isinstance(log_data, list), "Input-Daten müssen eine Liste von Dicts sein." + assert all(isinstance(item, dict) for item in log_data), "Jedes Element muss ein Dict sein." + + try: + result = analyze_outliers(log_data) + except Exception as exc: + sys.stderr.write(f"Analysefehler: {exc}\n") + sys.exit(1) + + output_data = [result.__dict__] if not isinstance(result, list) else [r.__dict__ for r in result] + + if args.output: + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + with output_path.open("w", encoding="utf-8") as f: + json.dump(output_data, f, indent=2, ensure_ascii=False) + else: + json.dump(output_data, sys.stdout, indent=2, ensure_ascii=False) + sys.stdout.write("\n") + + +if __name__ == "__main__": + main()