Add dataset_exporter/src/dataset_exporter/core.py
This commit is contained in:
parent
1de3654dab
commit
9449d4a70e
1 changed files with 93 additions and 0 deletions
93
dataset_exporter/src/dataset_exporter/core.py
Normal file
93
dataset_exporter/src/dataset_exporter/core.py
Normal file
|
|
@ -0,0 +1,93 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import csv
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Dict
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
class ExportError(Exception):
|
||||||
|
"""Custom exception for export-related errors."""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ExportOptions:
|
||||||
|
"""Configuration options for dataset export."""
|
||||||
|
|
||||||
|
output_format: str
|
||||||
|
output_path: str
|
||||||
|
|
||||||
|
def __post_init__(self) -> None:
|
||||||
|
valid_formats = {"jsonl", "csv"}
|
||||||
|
if self.output_format not in valid_formats:
|
||||||
|
raise ValueError(f"Invalid format: {self.output_format}. Must be one of {valid_formats}.")
|
||||||
|
if not isinstance(self.output_path, str) or not self.output_path:
|
||||||
|
raise ValueError("output_path must be a non-empty string.")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class DriftRunRecord:
|
||||||
|
"""Represents a single CI analysis run record."""
|
||||||
|
|
||||||
|
timestamp: datetime
|
||||||
|
decision: str
|
||||||
|
warn_rate: float
|
||||||
|
fail_count: int
|
||||||
|
unknown_count: int
|
||||||
|
pinned: bool
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_dataset(dataset: List[Dict]) -> None:
|
||||||
|
if not isinstance(dataset, list):
|
||||||
|
raise ValueError("dataset must be a list of dictionaries.")
|
||||||
|
for entry in dataset:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
raise ValueError("Each item in dataset must be a dictionary.")
|
||||||
|
|
||||||
|
|
||||||
|
def export_dataset(dataset: List[Dict], output_format: str, output_path: str) -> None:
|
||||||
|
"""Exports an in-memory dataset into a structured file (JSONL or CSV)."""
|
||||||
|
_validate_dataset(dataset)
|
||||||
|
|
||||||
|
options = ExportOptions(output_format=output_format, output_path=output_path)
|
||||||
|
output_file = Path(options.output_path)
|
||||||
|
output_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
if options.output_format == "jsonl":
|
||||||
|
_export_to_jsonl(dataset, output_file)
|
||||||
|
elif options.output_format == "csv":
|
||||||
|
_export_to_csv(dataset, output_file)
|
||||||
|
else:
|
||||||
|
raise ExportError(f"Unsupported output format: {options.output_format}")
|
||||||
|
|
||||||
|
|
||||||
|
def _export_to_jsonl(dataset: List[Dict], output_file: Path) -> None:
|
||||||
|
try:
|
||||||
|
with output_file.open('w', encoding='utf-8') as f:
|
||||||
|
for record in dataset:
|
||||||
|
json.dump(record, f, default=str)
|
||||||
|
f.write('\n')
|
||||||
|
except OSError as e:
|
||||||
|
raise ExportError(f"Failed to write JSONL file: {e}") from e
|
||||||
|
|
||||||
|
|
||||||
|
def _export_to_csv(dataset: List[Dict], output_file: Path) -> None:
|
||||||
|
if not dataset:
|
||||||
|
with output_file.open('w', newline='', encoding='utf-8') as f:
|
||||||
|
f.write("")
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
with output_file.open('w', newline='', encoding='utf-8') as f:
|
||||||
|
fieldnames = list(dataset[0].keys())
|
||||||
|
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||||
|
writer.writeheader()
|
||||||
|
for record in dataset:
|
||||||
|
writer.writerow(record)
|
||||||
|
except OSError as e:
|
||||||
|
raise ExportError(f"Failed to write CSV file: {e}") from e
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ["ExportOptions", "export_dataset"]
|
||||||
Loading…
Reference in a new issue