Add dataset_exporter/src/dataset_exporter/cli.py

This commit is contained in:
Mika 2026-01-31 13:07:40 +00:00
parent 9449d4a70e
commit a7a8aa5294

View file

@ -0,0 +1,53 @@
import argparse
import json
import os
from pathlib import Path
from typing import Any, List, Dict
from dataset_exporter.core import export_dataset, ExportOptions
def _load_json_dataset(file_path: str) -> List[Dict[str, Any]]:
if not os.path.exists(file_path):
raise FileNotFoundError(f"Input file not found: {file_path}")
with open(file_path, "r", encoding="utf-8") as f:
data = json.load(f)
if not isinstance(data, list):
raise ValueError("Input JSON must contain a list of records.")
return data
def main() -> None:
parser = argparse.ArgumentParser(
description="Exportiert ein gegebenes In-Memory-Dataset als JSONL- oder CSV-Datei."
)
parser.add_argument(
"--input", required=True, help="Pfad zur Eingabedatei (z. B. drift_report.json)."
)
parser.add_argument(
"--format", required=True, choices=["jsonl", "csv"], help="Ausgabeformat ('jsonl' oder 'csv')."
)
parser.add_argument(
"--output", required=True, help="Pfad zur Ausgabedatei."
)
args = parser.parse_args()
dataset = _load_json_dataset(args.input)
options = ExportOptions(output_format=args.format, output_path=args.output)
# Validate format
if options.output_format not in {"jsonl", "csv"}:
raise ValueError(f"Unsupported output format: {options.output_format}")
# Ensure output directory exists
Path(options.output_path).parent.mkdir(parents=True, exist_ok=True)
export_dataset(dataset, options.output_format, options.output_path)
assert os.path.isfile(options.output_path), "Exported file was not created."
if __name__ == "__main__":
main()