Add drift_detector/tests/test_core.py
This commit is contained in:
parent
03498ae476
commit
3153653bf3
1 changed files with 75 additions and 0 deletions
75
drift_detector/tests/test_core.py
Normal file
75
drift_detector/tests/test_core.py
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
import json
|
||||
import io
|
||||
import tempfile
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
|
||||
import drift_detector.core as core
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_jsonl_data():
|
||||
"""Fixture to simulate small JSONL dataset containing drift and non-drift paths."""
|
||||
entries = [
|
||||
{"original_path": "data/run_001/file_A.txt"},
|
||||
{"original_path": "data/run_001/file_B.txt"},
|
||||
{"original_path": "data/run_002/file_A.txt"} # drifted run id
|
||||
]
|
||||
content = "\n".join(json.dumps(e) for e in entries)
|
||||
tmp = tempfile.NamedTemporaryFile(delete=False, mode="w+t")
|
||||
tmp.write(content)
|
||||
tmp.flush()
|
||||
tmp.seek(0)
|
||||
yield Path(tmp.name)
|
||||
tmp.close()
|
||||
Path(tmp.name).unlink(missing_ok=True)
|
||||
|
||||
|
||||
def test_drift_signature_to_dict():
|
||||
sig = core.DriftSignature(
|
||||
normalized_path="data/file.txt",
|
||||
original_path="data/run_001/file.txt",
|
||||
is_drift=True
|
||||
)
|
||||
d = sig.to_dict()
|
||||
assert set(d.keys()) == {"normalized_path", "original_path", "is_drift"}
|
||||
assert d["is_drift"] is True
|
||||
assert "data/" in d["normalized_path"]
|
||||
|
||||
|
||||
def test_detect_drift_returns_bool(sample_jsonl_data):
|
||||
result = core.detect_drift(str(sample_jsonl_data))
|
||||
assert isinstance(result, bool)
|
||||
|
||||
|
||||
def test_detect_drift_detects_real_drift(tmp_path):
|
||||
data = [
|
||||
{"original_path": "logs/run_01/out.json"},
|
||||
{"original_path": "logs/run_02/out.json"},
|
||||
]
|
||||
file_path = tmp_path / "data.jsonl"
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
for row in data:
|
||||
f.write(json.dumps(row) + "\n")
|
||||
res = core.detect_drift(str(file_path))
|
||||
assert res is True # run_01 vs run_02 induces drift
|
||||
|
||||
|
||||
def test_detect_drift_no_drift(tmp_path):
|
||||
data = [
|
||||
{"original_path": "project/data_001/fileA.log"},
|
||||
{"original_path": "project/data_001/fileB.log"},
|
||||
]
|
||||
file_path = tmp_path / "ok.jsonl"
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
for r in data:
|
||||
f.write(json.dumps(r) + "\n")
|
||||
res = core.detect_drift(str(file_path))
|
||||
assert res is False
|
||||
|
||||
|
||||
def test_detect_drift_invalid_file(tmp_path):
|
||||
empty = tmp_path / "bad.jsonl"
|
||||
empty.write_text("{not valid json}\n", encoding="utf-8")
|
||||
with pytest.raises((ValueError, json.JSONDecodeError)):
|
||||
core.detect_drift(str(empty))
|
||||
Loading…
Reference in a new issue