From 3153653bf3493c7ff604beae13e008dbec1287cf Mon Sep 17 00:00:00 2001 From: Mika Date: Sat, 14 Feb 2026 15:32:02 +0000 Subject: [PATCH] Add drift_detector/tests/test_core.py --- drift_detector/tests/test_core.py | 75 +++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 drift_detector/tests/test_core.py diff --git a/drift_detector/tests/test_core.py b/drift_detector/tests/test_core.py new file mode 100644 index 0000000..949ca72 --- /dev/null +++ b/drift_detector/tests/test_core.py @@ -0,0 +1,75 @@ +import json +import io +import tempfile +import pytest +from pathlib import Path + +import drift_detector.core as core + + +@pytest.fixture +def sample_jsonl_data(): + """Fixture to simulate small JSONL dataset containing drift and non-drift paths.""" + entries = [ + {"original_path": "data/run_001/file_A.txt"}, + {"original_path": "data/run_001/file_B.txt"}, + {"original_path": "data/run_002/file_A.txt"} # drifted run id + ] + content = "\n".join(json.dumps(e) for e in entries) + tmp = tempfile.NamedTemporaryFile(delete=False, mode="w+t") + tmp.write(content) + tmp.flush() + tmp.seek(0) + yield Path(tmp.name) + tmp.close() + Path(tmp.name).unlink(missing_ok=True) + + +def test_drift_signature_to_dict(): + sig = core.DriftSignature( + normalized_path="data/file.txt", + original_path="data/run_001/file.txt", + is_drift=True + ) + d = sig.to_dict() + assert set(d.keys()) == {"normalized_path", "original_path", "is_drift"} + assert d["is_drift"] is True + assert "data/" in d["normalized_path"] + + +def test_detect_drift_returns_bool(sample_jsonl_data): + result = core.detect_drift(str(sample_jsonl_data)) + assert isinstance(result, bool) + + +def test_detect_drift_detects_real_drift(tmp_path): + data = [ + {"original_path": "logs/run_01/out.json"}, + {"original_path": "logs/run_02/out.json"}, + ] + file_path = tmp_path / "data.jsonl" + with open(file_path, "w", encoding="utf-8") as f: + for row in data: + f.write(json.dumps(row) + "\n") + res = core.detect_drift(str(file_path)) + assert res is True # run_01 vs run_02 induces drift + + +def test_detect_drift_no_drift(tmp_path): + data = [ + {"original_path": "project/data_001/fileA.log"}, + {"original_path": "project/data_001/fileB.log"}, + ] + file_path = tmp_path / "ok.jsonl" + with open(file_path, "w", encoding="utf-8") as f: + for r in data: + f.write(json.dumps(r) + "\n") + res = core.detect_drift(str(file_path)) + assert res is False + + +def test_detect_drift_invalid_file(tmp_path): + empty = tmp_path / "bad.jsonl" + empty.write_text("{not valid json}\n", encoding="utf-8") + with pytest.raises((ValueError, json.JSONDecodeError)): + core.detect_drift(str(empty))