Add dataset_exporter/src/dataset_exporter/core.py
This commit is contained in:
parent
1de3654dab
commit
9449d4a70e
1 changed files with 93 additions and 0 deletions
93
dataset_exporter/src/dataset_exporter/core.py
Normal file
93
dataset_exporter/src/dataset_exporter/core.py
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import csv
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import List, Dict
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class ExportError(Exception):
|
||||
"""Custom exception for export-related errors."""
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExportOptions:
|
||||
"""Configuration options for dataset export."""
|
||||
|
||||
output_format: str
|
||||
output_path: str
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
valid_formats = {"jsonl", "csv"}
|
||||
if self.output_format not in valid_formats:
|
||||
raise ValueError(f"Invalid format: {self.output_format}. Must be one of {valid_formats}.")
|
||||
if not isinstance(self.output_path, str) or not self.output_path:
|
||||
raise ValueError("output_path must be a non-empty string.")
|
||||
|
||||
|
||||
@dataclass
|
||||
class DriftRunRecord:
|
||||
"""Represents a single CI analysis run record."""
|
||||
|
||||
timestamp: datetime
|
||||
decision: str
|
||||
warn_rate: float
|
||||
fail_count: int
|
||||
unknown_count: int
|
||||
pinned: bool
|
||||
|
||||
|
||||
def _validate_dataset(dataset: List[Dict]) -> None:
|
||||
if not isinstance(dataset, list):
|
||||
raise ValueError("dataset must be a list of dictionaries.")
|
||||
for entry in dataset:
|
||||
if not isinstance(entry, dict):
|
||||
raise ValueError("Each item in dataset must be a dictionary.")
|
||||
|
||||
|
||||
def export_dataset(dataset: List[Dict], output_format: str, output_path: str) -> None:
|
||||
"""Exports an in-memory dataset into a structured file (JSONL or CSV)."""
|
||||
_validate_dataset(dataset)
|
||||
|
||||
options = ExportOptions(output_format=output_format, output_path=output_path)
|
||||
output_file = Path(options.output_path)
|
||||
output_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if options.output_format == "jsonl":
|
||||
_export_to_jsonl(dataset, output_file)
|
||||
elif options.output_format == "csv":
|
||||
_export_to_csv(dataset, output_file)
|
||||
else:
|
||||
raise ExportError(f"Unsupported output format: {options.output_format}")
|
||||
|
||||
|
||||
def _export_to_jsonl(dataset: List[Dict], output_file: Path) -> None:
|
||||
try:
|
||||
with output_file.open('w', encoding='utf-8') as f:
|
||||
for record in dataset:
|
||||
json.dump(record, f, default=str)
|
||||
f.write('\n')
|
||||
except OSError as e:
|
||||
raise ExportError(f"Failed to write JSONL file: {e}") from e
|
||||
|
||||
|
||||
def _export_to_csv(dataset: List[Dict], output_file: Path) -> None:
|
||||
if not dataset:
|
||||
with output_file.open('w', newline='', encoding='utf-8') as f:
|
||||
f.write("")
|
||||
return
|
||||
|
||||
try:
|
||||
with output_file.open('w', newline='', encoding='utf-8') as f:
|
||||
fieldnames = list(dataset[0].keys())
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
for record in dataset:
|
||||
writer.writerow(record)
|
||||
except OSError as e:
|
||||
raise ExportError(f"Failed to write CSV file: {e}") from e
|
||||
|
||||
|
||||
__all__ = ["ExportOptions", "export_dataset"]
|
||||
Loading…
Reference in a new issue