diff --git a/max_outlier_analysis_script/tests/test_core.py b/max_outlier_analysis_script/tests/test_core.py new file mode 100644 index 0000000..9e188ed --- /dev/null +++ b/max_outlier_analysis_script/tests/test_core.py @@ -0,0 +1,77 @@ +import pytest +import pandas as pd +import math +from pathlib import Path + +from src.max_outlier_analysis_script import core + +SAMPLE_CSV_PATH = Path(__file__).parent / 'data' / 'test_outlier_sample.csv' + +@pytest.fixture +def sample_data(): + """Lädt CSV-Testdaten in ein passendes Python-Objekt (Liste von Dicts).""" + if not SAMPLE_CSV_PATH.exists(): + # Fallback: künstliche Testdaten, falls Datei fehlt + return [ + { + 'corr_id': 'c1', 'stratum': 'near-expiry-unpinned', 'job_parallelism': 5, + 'expires_at_dist_hours': 0.2, 'retry_total_overhead_ms': 12.5, 'latency_max': 350.0 + }, + { + 'corr_id': 'c2', 'stratum': 'normal', 'job_parallelism': 4, + 'expires_at_dist_hours': 3.1, 'retry_total_overhead_ms': 6.2, 'latency_max': 180.0 + }, + { + 'corr_id': 'c3', 'stratum': 'near-expiry-unpinned', 'job_parallelism': 6, + 'expires_at_dist_hours': 0.1, 'retry_total_overhead_ms': 15.8, 'latency_max': 420.0 + }, + ] + df = pd.read_csv(SAMPLE_CSV_PATH) + return df.to_dict(orient='records') + + +def test_analyze_returns_expected_keys(sample_data): + """Verifiziert, dass alle erwarteten Kennzahlen im Ergebnis enthalten sind.""" + result = core.analyze_max_outliers(sample_data) + + assert isinstance(result, dict) + expected_keys = {'max_above_p99_count', 'near_expiry_cluster_percentage', 'retry_overhead_variance'} + assert expected_keys.issubset(result.keys()) + + +def test_result_value_types(sample_data): + """Stellt sicher, dass die Rückgabewerte vom korrekten Typ sind.""" + result = core.analyze_max_outliers(sample_data) + + assert isinstance(result['max_above_p99_count'], int) + assert isinstance(result['near_expiry_cluster_percentage'], float) + assert isinstance(result['retry_overhead_variance'], float) + + +def test_statistical_consistency(sample_data): + """Überprüft einfache statistische Plausibilität der berechneten Werte.""" + result = core.analyze_max_outliers(sample_data) + + # Max über p99 kann nicht negativ sein + assert result['max_above_p99_count'] >= 0 + # Prozentsatz muss im Bereich 0-100 liegen + assert 0.0 <= result['near_expiry_cluster_percentage'] <= 100.0 + # Varianz >= 0 + assert result['retry_overhead_variance'] >= 0.0 + + +def test_error_handling_on_empty_input(): + """Leere Eingaben sollten einen kontrollierten Fehler oder leeren Output liefern.""" + with pytest.raises((ValueError, AssertionError, ZeroDivisionError, KeyError)): + _ = core.analyze_max_outliers([]) + + +def test_invariance_same_input(): + """Gleiche Eingaben müssen deterministische Ergebnisse liefern (idempotent).""" + d = [ + {'corr_id': 'x1', 'stratum': 's1', 'job_parallelism': 2, 'expires_at_dist_hours': 1.0, 'retry_total_overhead_ms': 5.0, 'latency_max': 120.0}, + {'corr_id': 'x2', 'stratum': 's1', 'job_parallelism': 3, 'expires_at_dist_hours': 2.0, 'retry_total_overhead_ms': 8.0, 'latency_max': 150.0}, + ] + res1 = core.analyze_max_outliers(d) + res2 = core.analyze_max_outliers(d) + assert res1 == res2 \ No newline at end of file