Add dataset_exporter/src/dataset_exporter/cli.py
This commit is contained in:
parent
9449d4a70e
commit
a7a8aa5294
1 changed files with 53 additions and 0 deletions
53
dataset_exporter/src/dataset_exporter/cli.py
Normal file
53
dataset_exporter/src/dataset_exporter/cli.py
Normal file
|
|
@ -0,0 +1,53 @@
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, List, Dict
|
||||||
|
|
||||||
|
from dataset_exporter.core import export_dataset, ExportOptions
|
||||||
|
|
||||||
|
|
||||||
|
def _load_json_dataset(file_path: str) -> List[Dict[str, Any]]:
|
||||||
|
if not os.path.exists(file_path):
|
||||||
|
raise FileNotFoundError(f"Input file not found: {file_path}")
|
||||||
|
with open(file_path, "r", encoding="utf-8") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
if not isinstance(data, list):
|
||||||
|
raise ValueError("Input JSON must contain a list of records.")
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Exportiert ein gegebenes In-Memory-Dataset als JSONL- oder CSV-Datei."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--input", required=True, help="Pfad zur Eingabedatei (z. B. drift_report.json)."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--format", required=True, choices=["jsonl", "csv"], help="Ausgabeformat ('jsonl' oder 'csv')."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output", required=True, help="Pfad zur Ausgabedatei."
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
dataset = _load_json_dataset(args.input)
|
||||||
|
|
||||||
|
options = ExportOptions(output_format=args.format, output_path=args.output)
|
||||||
|
|
||||||
|
# Validate format
|
||||||
|
if options.output_format not in {"jsonl", "csv"}:
|
||||||
|
raise ValueError(f"Unsupported output format: {options.output_format}")
|
||||||
|
|
||||||
|
# Ensure output directory exists
|
||||||
|
Path(options.output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
export_dataset(dataset, options.output_format, options.output_path)
|
||||||
|
|
||||||
|
assert os.path.isfile(options.output_path), "Exported file was not created."
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Loading…
Reference in a new issue