Add drift_report_parser/src/drift_report_parser/core.py
This commit is contained in:
commit
bcb578b3e2
1 changed files with 91 additions and 0 deletions
91
drift_report_parser/src/drift_report_parser/core.py
Normal file
91
drift_report_parser/src/drift_report_parser/core.py
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
from __future__ import annotations
|
||||
import json
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass, asdict
|
||||
from typing import List, Dict, Any
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
@dataclass
|
||||
class DriftReportData:
|
||||
"""Strukturierte Repräsentation eines einzelnen Drift-Report-Eintrags."""
|
||||
timestamp: datetime
|
||||
pinned: bool
|
||||
unpinned: bool
|
||||
decision: str
|
||||
rolling_warn_rate: float
|
||||
counts: str
|
||||
label: str
|
||||
comment: str | None = None
|
||||
|
||||
|
||||
class DriftReportParseError(Exception):
|
||||
"""Wird ausgelöst, wenn ein Drift-Report fehlerhaft ist oder nicht eingelesen werden kann."""
|
||||
pass
|
||||
|
||||
|
||||
def _validate_file_path(file_path: str | Path) -> Path:
|
||||
path = Path(file_path)
|
||||
if not path.exists():
|
||||
raise DriftReportParseError(f"Drift report file not found: {path}")
|
||||
if not path.is_file():
|
||||
raise DriftReportParseError(f"Path is not a file: {path}")
|
||||
return path
|
||||
|
||||
|
||||
def _safe_get(data: Dict[str, Any], key: str, default: Any = None) -> Any:
|
||||
return data.get(key, default)
|
||||
|
||||
|
||||
def _parse_timestamp(value: Any) -> datetime:
|
||||
if isinstance(value, datetime):
|
||||
return value
|
||||
if isinstance(value, str):
|
||||
try:
|
||||
return datetime.fromisoformat(value)
|
||||
except ValueError:
|
||||
pass
|
||||
return datetime.min
|
||||
|
||||
|
||||
def parse_drift_report(file_path: str) -> List[Dict[str, Any]]:
|
||||
"""Liest einen Drift-Report im JSON-Format und extrahiert relevante Felder zu Drift-Signalen.
|
||||
|
||||
Args:
|
||||
file_path (str): Pfad zur Eingabedatei drift_report.json.
|
||||
|
||||
Returns:
|
||||
list[dict]: Liste strukturierter DriftReportData-Dictionaries.
|
||||
"""
|
||||
path = _validate_file_path(file_path)
|
||||
|
||||
try:
|
||||
with path.open('r', encoding='utf-8') as f:
|
||||
raw_data = json.load(f)
|
||||
except json.JSONDecodeError as e:
|
||||
raise DriftReportParseError(f"JSON decoding failed: {e}") from e
|
||||
|
||||
if not isinstance(raw_data, list):
|
||||
raise DriftReportParseError("Expected a list of drift report entries.")
|
||||
|
||||
parsed_entries: List[Dict[str, Any]] = []
|
||||
|
||||
for entry in raw_data:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
|
||||
data = DriftReportData(
|
||||
timestamp=_parse_timestamp(_safe_get(entry, 'timestamp')),
|
||||
pinned=bool(_safe_get(entry, 'pinned', False)),
|
||||
unpinned=bool(_safe_get(entry, 'unpinned', False)),
|
||||
decision=str(_safe_get(entry, 'decision', 'UNKNOWN')),
|
||||
rolling_warn_rate=float(_safe_get(entry, 'rolling_warn_rate', 0.0)),
|
||||
counts=str(_safe_get(entry, 'counts', '')),
|
||||
label=str(_safe_get(entry, 'label', '')),
|
||||
comment=_safe_get(entry, 'comment'),
|
||||
)
|
||||
parsed_entries.append(asdict(data))
|
||||
|
||||
assert all(isinstance(e, dict) for e in parsed_entries), "Output validation failed"
|
||||
|
||||
return parsed_entries
|
||||
Loading…
Reference in a new issue