Add drift_detector/src/drift_detector/core.py
This commit is contained in:
parent
096dc6daba
commit
bae262abb7
1 changed files with 76 additions and 0 deletions
76
drift_detector/src/drift_detector/core.py
Normal file
76
drift_detector/src/drift_detector/core.py
Normal file
|
|
@ -0,0 +1,76 @@
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
|
|
||||||
|
class DriftSignature:
|
||||||
|
"""Repräsentiert eine normalisierte Drift-Signatur."""
|
||||||
|
|
||||||
|
def __init__(self, normalized_path: str, original_path: str, is_drift: bool) -> None:
|
||||||
|
assert isinstance(normalized_path, str), "normalized_path muss ein String sein"
|
||||||
|
assert isinstance(original_path, str), "original_path muss ein String sein"
|
||||||
|
assert isinstance(is_drift, bool), "is_drift muss ein bool sein"
|
||||||
|
self.normalized_path = normalized_path
|
||||||
|
self.original_path = original_path
|
||||||
|
self.is_drift = is_drift
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
"""Gibt die Signatur als Dictionary zurück."""
|
||||||
|
return {
|
||||||
|
"normalized_path": self.normalized_path,
|
||||||
|
"original_path": self.original_path,
|
||||||
|
"is_drift": self.is_drift,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_path(path_str: str) -> str:
|
||||||
|
"""Normalisiert Pfade durch Entfernen/Ersetzen typischer Driftmuster."""
|
||||||
|
path_str = path_str.strip()
|
||||||
|
path_str = re.sub(r"/+", "/", path_str) # Doppelslashes normalisieren
|
||||||
|
path_str = re.sub(r"(?i)(version_|v)[0-9]+", "version", path_str) # Versionsnummern neutralisieren
|
||||||
|
path_str = re.sub(r"(?i)_?temp_?[0-9]*", "temp", path_str) # temporäre Pattern
|
||||||
|
path_str = re.sub(r"(?i)date=[0-9]{8}", "date=YYYYMMDD", path_str) # Datumsnormalisierung
|
||||||
|
return path_str
|
||||||
|
|
||||||
|
|
||||||
|
def detect_drift(path: str) -> bool:
|
||||||
|
"""Erkennt Pfad-Drift aus einer JSONL- oder CSV-Datei.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path (str): Pfad zur Logdatei mit Key-Einträgen.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True, falls Drift erkannt wurde, sonst False.
|
||||||
|
"""
|
||||||
|
input_path = Path(path)
|
||||||
|
if not input_path.exists() or not input_path.is_file():
|
||||||
|
raise FileNotFoundError(f"Datei nicht gefunden: {path}")
|
||||||
|
|
||||||
|
signatures: List[DriftSignature] = []
|
||||||
|
|
||||||
|
with input_path.open("r", encoding="utf-8") as f:
|
||||||
|
for line in f:
|
||||||
|
if not line.strip():
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
entry = json.loads(line)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
raise ValueError(f"Ungültiger JSON-Eintrag: {e}") from e
|
||||||
|
|
||||||
|
if "original_path" not in entry:
|
||||||
|
raise KeyError("Fehlender Schlüssel 'original_path' im JSON-Eintrag")
|
||||||
|
|
||||||
|
original_path = str(entry["original_path"])
|
||||||
|
normalized_path = _normalize_path(original_path)
|
||||||
|
is_drift = normalized_path != original_path
|
||||||
|
signatures.append(DriftSignature(normalized_path, original_path, is_drift))
|
||||||
|
|
||||||
|
# Aggregation: wenn irgendein Eintrag Drift hat, gilt Gesamtfile als drifted
|
||||||
|
any_drift = any(sig.is_drift for sig in signatures)
|
||||||
|
|
||||||
|
report = [sig.to_dict() for sig in signatures]
|
||||||
|
with Path("output/drift_report.json").open("w", encoding="utf-8") as out_f:
|
||||||
|
json.dump(report, out_f, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
|
return any_drift
|
||||||
Loading…
Reference in a new issue