diff --git a/dataset_exporter/src/dataset_exporter/cli.py b/dataset_exporter/src/dataset_exporter/cli.py new file mode 100644 index 0000000..fbabbbb --- /dev/null +++ b/dataset_exporter/src/dataset_exporter/cli.py @@ -0,0 +1,53 @@ +import argparse +import json +import os +from pathlib import Path +from typing import Any, List, Dict + +from dataset_exporter.core import export_dataset, ExportOptions + + +def _load_json_dataset(file_path: str) -> List[Dict[str, Any]]: + if not os.path.exists(file_path): + raise FileNotFoundError(f"Input file not found: {file_path}") + with open(file_path, "r", encoding="utf-8") as f: + data = json.load(f) + if not isinstance(data, list): + raise ValueError("Input JSON must contain a list of records.") + return data + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Exportiert ein gegebenes In-Memory-Dataset als JSONL- oder CSV-Datei." + ) + parser.add_argument( + "--input", required=True, help="Pfad zur Eingabedatei (z. B. drift_report.json)." + ) + parser.add_argument( + "--format", required=True, choices=["jsonl", "csv"], help="Ausgabeformat ('jsonl' oder 'csv')." + ) + parser.add_argument( + "--output", required=True, help="Pfad zur Ausgabedatei." + ) + + args = parser.parse_args() + + dataset = _load_json_dataset(args.input) + + options = ExportOptions(output_format=args.format, output_path=args.output) + + # Validate format + if options.output_format not in {"jsonl", "csv"}: + raise ValueError(f"Unsupported output format: {options.output_format}") + + # Ensure output directory exists + Path(options.output_path).parent.mkdir(parents=True, exist_ok=True) + + export_dataset(dataset, options.output_format, options.output_path) + + assert os.path.isfile(options.output_path), "Exported file was not created." + + +if __name__ == "__main__": + main()