Add outlier_analysis/tests/test_core.py

2026-03-13 16:23:01 +00:00 · 2026-03-13 16:23:01 +00:00 · e6a47c9e7c
commit e6a47c9e7c
parent 8da7d82862
1 changed files with 70 additions and 0 deletions
--- a/outlier_analysis/tests/test_core.py
+++ b/outlier_analysis/tests/test_core.py
@ -0,0 +1,70 @@
 import json
 import pytest
 from pathlib import Path
 from outlier_analysis.core import analyze_outliers
 def load_sample_data():
    sample_path = Path(__file__).parent / 'data' / 'outlier_sample.json'
    if not sample_path.exists():
        # Fallback synthetic data
        return [
            {
                'corr_id': f'c{i}',
                'stratum': 'A' if i < 5 else 'B',
                'job_parallelism': 2,
                'expires_at_dist_hours': 10.0 + i,
                't_gate_read': 1.0,
                't_index_visible': 2.0,
                'retry_total_overhead_ms': float(i * 10),
                'policy_hash': 'abc',
                'setup_fingerprint': 'fp1'
            }
            for i in range(10)
        ]
    with open(sample_path, 'r', encoding='utf-8') as f:
        return json.load(f)
@pytest.fixture
 def log_entries():
    data = load_sample_data()
    assert isinstance(data, list)
    assert all(isinstance(x, dict) for x in data)
    return data
 def test_analyze_outliers_basic_stats(log_entries):
    result = analyze_outliers(log_entries)
    assert isinstance(result, dict)
    # Validate presence of expected keys
    for key in ('mean', 'median', 'p90', 'p95', 'p99', 'max', 'clusters'):
        assert key in result, f"Missing key: {key}"
    assert isinstance(result['clusters'], list)
 def test_mean_and_p95_values(log_entries):
    result = analyze_outliers(log_entries)
    mean_value = result['mean']
    p95_value = result['p95']
    assert mean_value >= 0.0
    assert p95_value >= mean_value, "p95 should not be smaller than mean"
 def test_cluster_detection_consistency(log_entries):
    # Duplicate certain patterns to enforce a detectable cluster
    repeated = log_entries + [dict(log_entries[0]), dict(log_entries[1])]
    result = analyze_outliers(repeated)
    clusters = result['clusters']
    assert isinstance(clusters, list)
    # Some clustering should be reported when duplicates exist
    assert len(clusters) >= 1
    # Each cluster entry should have consistent keys
    for c in clusters:
        assert isinstance(c, dict)
        assert 'pattern' in c or 'members' in c
 def test_invalid_input_raises():
    with pytest.raises((TypeError, KeyError, ValueError)):
        analyze_outliers('not_a_list')