Add drift_detector/tests/test_core.py

2026-02-14 15:32:02 +00:00 · 2026-02-14 15:32:02 +00:00 · 3153653bf3
commit 3153653bf3
parent 03498ae476
1 changed files with 75 additions and 0 deletions
--- a/drift_detector/tests/test_core.py
+++ b/drift_detector/tests/test_core.py
@ -0,0 +1,75 @@
 import json
 import io
 import tempfile
 import pytest
 from pathlib import Path
 import drift_detector.core as core
@pytest.fixture
 def sample_jsonl_data():
    """Fixture to simulate small JSONL dataset containing drift and non-drift paths."""
    entries = [
        {"original_path": "data/run_001/file_A.txt"},
        {"original_path": "data/run_001/file_B.txt"},
        {"original_path": "data/run_002/file_A.txt"}  # drifted run id
    ]
    content = "\n".join(json.dumps(e) for e in entries)
    tmp = tempfile.NamedTemporaryFile(delete=False, mode="w+t")
    tmp.write(content)
    tmp.flush()
    tmp.seek(0)
    yield Path(tmp.name)
    tmp.close()
    Path(tmp.name).unlink(missing_ok=True)
 def test_drift_signature_to_dict():
    sig = core.DriftSignature(
        normalized_path="data/file.txt",
        original_path="data/run_001/file.txt",
        is_drift=True
    )
    d = sig.to_dict()
    assert set(d.keys()) == {"normalized_path", "original_path", "is_drift"}
    assert d["is_drift"] is True
    assert "data/" in d["normalized_path"]
 def test_detect_drift_returns_bool(sample_jsonl_data):
    result = core.detect_drift(str(sample_jsonl_data))
    assert isinstance(result, bool)
 def test_detect_drift_detects_real_drift(tmp_path):
    data = [
        {"original_path": "logs/run_01/out.json"},
        {"original_path": "logs/run_02/out.json"},
    ]
    file_path = tmp_path / "data.jsonl"
    with open(file_path, "w", encoding="utf-8") as f:
        for row in data:
            f.write(json.dumps(row) + "\n")
    res = core.detect_drift(str(file_path))
    assert res is True  # run_01 vs run_02 induces drift
 def test_detect_drift_no_drift(tmp_path):
    data = [
        {"original_path": "project/data_001/fileA.log"},
        {"original_path": "project/data_001/fileB.log"},
    ]
    file_path = tmp_path / "ok.jsonl"
    with open(file_path, "w", encoding="utf-8") as f:
        for r in data:
            f.write(json.dumps(r) + "\n")
    res = core.detect_drift(str(file_path))
    assert res is False
 def test_detect_drift_invalid_file(tmp_path):
    empty = tmp_path / "bad.jsonl"
    empty.write_text("{not valid json}\n", encoding="utf-8")
    with pytest.raises((ValueError, json.JSONDecodeError)):
        core.detect_drift(str(empty))