From 0a95e8ec03901d47af92c9f47f70ce39a077c1dc Mon Sep 17 00:00:00 2001 From: Mika Date: Mon, 16 Feb 2026 15:27:12 +0000 Subject: [PATCH] Add statistical_analysis/tests/test_core.py --- statistical_analysis/tests/test_core.py | 66 +++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 statistical_analysis/tests/test_core.py diff --git a/statistical_analysis/tests/test_core.py b/statistical_analysis/tests/test_core.py new file mode 100644 index 0000000..c3a6992 --- /dev/null +++ b/statistical_analysis/tests/test_core.py @@ -0,0 +1,66 @@ +import pytest +import pandas as pd +import numpy as np +from typing import List, Dict + +import src.statistical_analysis.core as core + + +@pytest.fixture +def sample_log_data() -> List[Dict]: + # Generate realistic log entries with mixed latencies, some high values as outliers + data = [] + np.random.seed(42) + latencies = np.random.normal(100, 10, 50).tolist() + [300, 320, 290] # outliers + for i, latency in enumerate(latencies): + data.append({ + "t_index_visible": latency, + "drift_signature": "none" if latency < 200 else "spike", + "timeout_counts": 0 if latency < 250 else 2 + }) + return data + + +def test_analyze_outliers_returns_model(sample_log_data): + result = core.analyze_outliers(sample_log_data) + # Validate structure + assert isinstance(result, dict), "Result should be a dict-like JSON-compatible object" + expected_keys = {"column_name", "outlier_value", "drift_signature", "timeout_counts"} + assert expected_keys.issubset(result.keys()), f"Missing keys in result: {expected_keys - set(result.keys())}" + # Type validation + assert isinstance(result["column_name"], str) + assert isinstance(result["outlier_value"], (float, int)) + assert isinstance(result["drift_signature"], str) + assert isinstance(result["timeout_counts"], int) + + +def test_outlier_detection_reasonable(sample_log_data): + # Run analysis and compare known p99 behavior + result = core.analyze_outliers(sample_log_data) + outlier_value = result["outlier_value"] + p99_estimate = np.percentile([r["t_index_visible"] for r in sample_log_data], 99) + # Outlier should approximately match p99 upper tail + assert outlier_value >= p99_estimate * 0.9, "Outlier should be near or above p99 value" + + +def test_invalid_input_raises(): + with pytest.raises((TypeError, ValueError)): + core.analyze_outliers(None) + with pytest.raises((TypeError, ValueError)): + core.analyze_outliers([{"invalid": 1}]) + + +def test_empty_input_behavior(): + # Define expected fallback for empty data + with pytest.raises(ValueError): + core.analyze_outliers([]) + + +def test_json_compatibility(sample_log_data): + import json + result = core.analyze_outliers(sample_log_data) + # Check JSON serialization compatibility + json_str = json.dumps(result) + parsed = json.loads(json_str) + assert isinstance(parsed, dict) + assert parsed["column_name"] == result["column_name"] \ No newline at end of file