Add drift_detector/tests/test_core.py
This commit is contained in:
parent
03498ae476
commit
3153653bf3
1 changed files with 75 additions and 0 deletions
75
drift_detector/tests/test_core.py
Normal file
75
drift_detector/tests/test_core.py
Normal file
|
|
@ -0,0 +1,75 @@
|
||||||
|
import json
|
||||||
|
import io
|
||||||
|
import tempfile
|
||||||
|
import pytest
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import drift_detector.core as core
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_jsonl_data():
|
||||||
|
"""Fixture to simulate small JSONL dataset containing drift and non-drift paths."""
|
||||||
|
entries = [
|
||||||
|
{"original_path": "data/run_001/file_A.txt"},
|
||||||
|
{"original_path": "data/run_001/file_B.txt"},
|
||||||
|
{"original_path": "data/run_002/file_A.txt"} # drifted run id
|
||||||
|
]
|
||||||
|
content = "\n".join(json.dumps(e) for e in entries)
|
||||||
|
tmp = tempfile.NamedTemporaryFile(delete=False, mode="w+t")
|
||||||
|
tmp.write(content)
|
||||||
|
tmp.flush()
|
||||||
|
tmp.seek(0)
|
||||||
|
yield Path(tmp.name)
|
||||||
|
tmp.close()
|
||||||
|
Path(tmp.name).unlink(missing_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
def test_drift_signature_to_dict():
|
||||||
|
sig = core.DriftSignature(
|
||||||
|
normalized_path="data/file.txt",
|
||||||
|
original_path="data/run_001/file.txt",
|
||||||
|
is_drift=True
|
||||||
|
)
|
||||||
|
d = sig.to_dict()
|
||||||
|
assert set(d.keys()) == {"normalized_path", "original_path", "is_drift"}
|
||||||
|
assert d["is_drift"] is True
|
||||||
|
assert "data/" in d["normalized_path"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_detect_drift_returns_bool(sample_jsonl_data):
|
||||||
|
result = core.detect_drift(str(sample_jsonl_data))
|
||||||
|
assert isinstance(result, bool)
|
||||||
|
|
||||||
|
|
||||||
|
def test_detect_drift_detects_real_drift(tmp_path):
|
||||||
|
data = [
|
||||||
|
{"original_path": "logs/run_01/out.json"},
|
||||||
|
{"original_path": "logs/run_02/out.json"},
|
||||||
|
]
|
||||||
|
file_path = tmp_path / "data.jsonl"
|
||||||
|
with open(file_path, "w", encoding="utf-8") as f:
|
||||||
|
for row in data:
|
||||||
|
f.write(json.dumps(row) + "\n")
|
||||||
|
res = core.detect_drift(str(file_path))
|
||||||
|
assert res is True # run_01 vs run_02 induces drift
|
||||||
|
|
||||||
|
|
||||||
|
def test_detect_drift_no_drift(tmp_path):
|
||||||
|
data = [
|
||||||
|
{"original_path": "project/data_001/fileA.log"},
|
||||||
|
{"original_path": "project/data_001/fileB.log"},
|
||||||
|
]
|
||||||
|
file_path = tmp_path / "ok.jsonl"
|
||||||
|
with open(file_path, "w", encoding="utf-8") as f:
|
||||||
|
for r in data:
|
||||||
|
f.write(json.dumps(r) + "\n")
|
||||||
|
res = core.detect_drift(str(file_path))
|
||||||
|
assert res is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_detect_drift_invalid_file(tmp_path):
|
||||||
|
empty = tmp_path / "bad.jsonl"
|
||||||
|
empty.write_text("{not valid json}\n", encoding="utf-8")
|
||||||
|
with pytest.raises((ValueError, json.JSONDecodeError)):
|
||||||
|
core.detect_drift(str(empty))
|
||||||
Loading…
Reference in a new issue