Add outlier_analysis/tests/test_core.py

This commit is contained in:
Mika 2026-03-13 16:23:01 +00:00
parent 8da7d82862
commit e6a47c9e7c

View file

@ -0,0 +1,70 @@
import json
import pytest
from pathlib import Path
from outlier_analysis.core import analyze_outliers
def load_sample_data():
sample_path = Path(__file__).parent / 'data' / 'outlier_sample.json'
if not sample_path.exists():
# Fallback synthetic data
return [
{
'corr_id': f'c{i}',
'stratum': 'A' if i < 5 else 'B',
'job_parallelism': 2,
'expires_at_dist_hours': 10.0 + i,
't_gate_read': 1.0,
't_index_visible': 2.0,
'retry_total_overhead_ms': float(i * 10),
'policy_hash': 'abc',
'setup_fingerprint': 'fp1'
}
for i in range(10)
]
with open(sample_path, 'r', encoding='utf-8') as f:
return json.load(f)
@pytest.fixture
def log_entries():
data = load_sample_data()
assert isinstance(data, list)
assert all(isinstance(x, dict) for x in data)
return data
def test_analyze_outliers_basic_stats(log_entries):
result = analyze_outliers(log_entries)
assert isinstance(result, dict)
# Validate presence of expected keys
for key in ('mean', 'median', 'p90', 'p95', 'p99', 'max', 'clusters'):
assert key in result, f"Missing key: {key}"
assert isinstance(result['clusters'], list)
def test_mean_and_p95_values(log_entries):
result = analyze_outliers(log_entries)
mean_value = result['mean']
p95_value = result['p95']
assert mean_value >= 0.0
assert p95_value >= mean_value, "p95 should not be smaller than mean"
def test_cluster_detection_consistency(log_entries):
# Duplicate certain patterns to enforce a detectable cluster
repeated = log_entries + [dict(log_entries[0]), dict(log_entries[1])]
result = analyze_outliers(repeated)
clusters = result['clusters']
assert isinstance(clusters, list)
# Some clustering should be reported when duplicates exist
assert len(clusters) >= 1
# Each cluster entry should have consistent keys
for c in clusters:
assert isinstance(c, dict)
assert 'pattern' in c or 'members' in c
def test_invalid_input_raises():
with pytest.raises((TypeError, KeyError, ValueError)):
analyze_outliers('not_a_list')