diff --git a/audit_analysis/src/audit_analysis/cli.py b/audit_analysis/src/audit_analysis/cli.py new file mode 100644 index 0000000..c8909ee --- /dev/null +++ b/audit_analysis/src/audit_analysis/cli.py @@ -0,0 +1,64 @@ +import argparse +import json +import logging +from pathlib import Path +from typing import Any + +import pandas as pd # type: ignore + +from audit_analysis import core + + +logging.basicConfig( + level=logging.INFO, + format='[%(asctime)s] %(levelname)s in %(module)s: %(message)s' +) + + +def _validate_input_file(file_path: Path) -> None: + if not file_path.exists() or not file_path.is_file(): + raise FileNotFoundError(f'Eingabedatei nicht gefunden: {file_path}') + if not file_path.suffix.lower() == '.csv': + raise ValueError(f'Ungültiges Dateiformat: {file_path.suffix}, erwartet .csv') + + +def main() -> None: + """CLI-Entrypoint für die Audit-Analyse.""" + parser = argparse.ArgumentParser( + description='Analyse von CI-Auditdaten zur Klassifizierung von Unknowns.' + ) + parser.add_argument( + '--input', + required=True, + help='Pfad zur Input-Audit-Datei (CSV)' + ) + parser.add_argument( + '--output', + required=False, + help='Pfad zur Ausgabe-Datei (JSON)' + ) + + args = parser.parse_args() + input_path = Path(args.input) + output_path = Path(args.output) if args.output else Path('output/classification_report.json') + + logging.info('Starte Analyse der Audit-Datei: %s', input_path) + try: + _validate_input_file(input_path) + except (FileNotFoundError, ValueError) as e: + logging.error('Validierung fehlgeschlagen: %s', e) + raise SystemExit(1) from e + + try: + result: dict[str, Any] = core.analyze_audit(str(input_path)) + output_path.parent.mkdir(parents=True, exist_ok=True) + with output_path.open('w', encoding='utf-8') as f: + json.dump(result, f, indent=2, ensure_ascii=False) + logging.info('Bericht gespeichert unter: %s', output_path) + except Exception as exc: + logging.exception('Fehler während der Analyse: %s', exc) + raise SystemExit(1) from exc + + +if __name__ == '__main__': + main()