Add dataset_exporter/src/dataset_exporter/core.py

This commit is contained in:
Mika 2026-01-31 13:07:40 +00:00
parent 1de3654dab
commit 9449d4a70e

View file

@ -0,0 +1,93 @@
from __future__ import annotations
import json
import csv
from dataclasses import dataclass
from pathlib import Path
from typing import List, Dict
from datetime import datetime
class ExportError(Exception):
"""Custom exception for export-related errors."""
@dataclass
class ExportOptions:
"""Configuration options for dataset export."""
output_format: str
output_path: str
def __post_init__(self) -> None:
valid_formats = {"jsonl", "csv"}
if self.output_format not in valid_formats:
raise ValueError(f"Invalid format: {self.output_format}. Must be one of {valid_formats}.")
if not isinstance(self.output_path, str) or not self.output_path:
raise ValueError("output_path must be a non-empty string.")
@dataclass
class DriftRunRecord:
"""Represents a single CI analysis run record."""
timestamp: datetime
decision: str
warn_rate: float
fail_count: int
unknown_count: int
pinned: bool
def _validate_dataset(dataset: List[Dict]) -> None:
if not isinstance(dataset, list):
raise ValueError("dataset must be a list of dictionaries.")
for entry in dataset:
if not isinstance(entry, dict):
raise ValueError("Each item in dataset must be a dictionary.")
def export_dataset(dataset: List[Dict], output_format: str, output_path: str) -> None:
"""Exports an in-memory dataset into a structured file (JSONL or CSV)."""
_validate_dataset(dataset)
options = ExportOptions(output_format=output_format, output_path=output_path)
output_file = Path(options.output_path)
output_file.parent.mkdir(parents=True, exist_ok=True)
if options.output_format == "jsonl":
_export_to_jsonl(dataset, output_file)
elif options.output_format == "csv":
_export_to_csv(dataset, output_file)
else:
raise ExportError(f"Unsupported output format: {options.output_format}")
def _export_to_jsonl(dataset: List[Dict], output_file: Path) -> None:
try:
with output_file.open('w', encoding='utf-8') as f:
for record in dataset:
json.dump(record, f, default=str)
f.write('\n')
except OSError as e:
raise ExportError(f"Failed to write JSONL file: {e}") from e
def _export_to_csv(dataset: List[Dict], output_file: Path) -> None:
if not dataset:
with output_file.open('w', newline='', encoding='utf-8') as f:
f.write("")
return
try:
with output_file.open('w', newline='', encoding='utf-8') as f:
fieldnames = list(dataset[0].keys())
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
for record in dataset:
writer.writerow(record)
except OSError as e:
raise ExportError(f"Failed to write CSV file: {e}") from e
__all__ = ["ExportOptions", "export_dataset"]