diff --git a/metrics_analysis_script/tests/test_main.py b/metrics_analysis_script/tests/test_main.py new file mode 100644 index 0000000..60239f8 --- /dev/null +++ b/metrics_analysis_script/tests/test_main.py @@ -0,0 +1,64 @@ +import pytest +import math +import numpy as np + +from metrics_analysis_script import main + + +@pytest.fixture +def sample_data(): + return [ + { + 'metric_name': 'mix_window_p95', + 'values': [10.0, 12.0, 11.0, 9.0, 13.0, 10.5], + 'pinned_flag': True + }, + { + 'metric_name': 'mix_window_p95', + 'values': [20.0, 21.0, 19.0, 22.0, 23.0], + 'pinned_flag': False + } + ] + + +def test_analyze_metrics_structure(sample_data): + results = main.analyze_metrics(sample_data) + assert isinstance(results, list) + assert all(isinstance(r, dict) for r in results) + required_fields = {'metric_name', 'p50', 'p95', 'max', 'retry_free_rate', 'bootstrap_ci'} + for r in results: + assert required_fields.issubset(r.keys()) + assert isinstance(r['bootstrap_ci'], list) + assert len(r['bootstrap_ci']) == 2 + + +def test_statistical_consistency(sample_data): + results = main.analyze_metrics(sample_data) + for r in results: + p50 = r['p50'] + p95 = r['p95'] + maximum = r['max'] + # Ensure ordering + assert p50 <= p95 <= maximum + # Bootstrap CI bounds make sense + low, high = r['bootstrap_ci'] + assert low <= p50 <= high + + +def test_invalid_input_rejected(): + invalid_data = [ + {'metric_name': 'bad_metric', 'values': 'not_a_list', 'pinned_flag': True} + ] + with pytest.raises((AssertionError, ValueError, TypeError)): + main.analyze_metrics(invalid_data) + + +def test_bootstrap_variability(sample_data): + results1 = main.analyze_metrics(sample_data) + results2 = main.analyze_metrics(sample_data) + # Same input should yield statistically consistent but not identical bootstrap for CI boundaries + # So we check for approximate stability + for r1, r2 in zip(results1, results2): + assert math.isclose(r1['p50'], r2['p50'], rel_tol=0.05) + ci_diff = np.abs(np.array(r1['bootstrap_ci']) - np.array(r2['bootstrap_ci'])) + assert ci_diff.mean() < max(0.1 * np.mean(r1['bootstrap_ci']), 1.0)