Add statistical_analysis/tests/test_core.py
This commit is contained in:
parent
64effbcbf6
commit
0a95e8ec03
1 changed files with 66 additions and 0 deletions
66
statistical_analysis/tests/test_core.py
Normal file
66
statistical_analysis/tests/test_core.py
Normal file
|
|
@ -0,0 +1,66 @@
|
||||||
|
import pytest
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from typing import List, Dict
|
||||||
|
|
||||||
|
import src.statistical_analysis.core as core
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_log_data() -> List[Dict]:
|
||||||
|
# Generate realistic log entries with mixed latencies, some high values as outliers
|
||||||
|
data = []
|
||||||
|
np.random.seed(42)
|
||||||
|
latencies = np.random.normal(100, 10, 50).tolist() + [300, 320, 290] # outliers
|
||||||
|
for i, latency in enumerate(latencies):
|
||||||
|
data.append({
|
||||||
|
"t_index_visible": latency,
|
||||||
|
"drift_signature": "none" if latency < 200 else "spike",
|
||||||
|
"timeout_counts": 0 if latency < 250 else 2
|
||||||
|
})
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def test_analyze_outliers_returns_model(sample_log_data):
|
||||||
|
result = core.analyze_outliers(sample_log_data)
|
||||||
|
# Validate structure
|
||||||
|
assert isinstance(result, dict), "Result should be a dict-like JSON-compatible object"
|
||||||
|
expected_keys = {"column_name", "outlier_value", "drift_signature", "timeout_counts"}
|
||||||
|
assert expected_keys.issubset(result.keys()), f"Missing keys in result: {expected_keys - set(result.keys())}"
|
||||||
|
# Type validation
|
||||||
|
assert isinstance(result["column_name"], str)
|
||||||
|
assert isinstance(result["outlier_value"], (float, int))
|
||||||
|
assert isinstance(result["drift_signature"], str)
|
||||||
|
assert isinstance(result["timeout_counts"], int)
|
||||||
|
|
||||||
|
|
||||||
|
def test_outlier_detection_reasonable(sample_log_data):
|
||||||
|
# Run analysis and compare known p99 behavior
|
||||||
|
result = core.analyze_outliers(sample_log_data)
|
||||||
|
outlier_value = result["outlier_value"]
|
||||||
|
p99_estimate = np.percentile([r["t_index_visible"] for r in sample_log_data], 99)
|
||||||
|
# Outlier should approximately match p99 upper tail
|
||||||
|
assert outlier_value >= p99_estimate * 0.9, "Outlier should be near or above p99 value"
|
||||||
|
|
||||||
|
|
||||||
|
def test_invalid_input_raises():
|
||||||
|
with pytest.raises((TypeError, ValueError)):
|
||||||
|
core.analyze_outliers(None)
|
||||||
|
with pytest.raises((TypeError, ValueError)):
|
||||||
|
core.analyze_outliers([{"invalid": 1}])
|
||||||
|
|
||||||
|
|
||||||
|
def test_empty_input_behavior():
|
||||||
|
# Define expected fallback for empty data
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
core.analyze_outliers([])
|
||||||
|
|
||||||
|
|
||||||
|
def test_json_compatibility(sample_log_data):
|
||||||
|
import json
|
||||||
|
result = core.analyze_outliers(sample_log_data)
|
||||||
|
# Check JSON serialization compatibility
|
||||||
|
json_str = json.dumps(result)
|
||||||
|
parsed = json.loads(json_str)
|
||||||
|
assert isinstance(parsed, dict)
|
||||||
|
assert parsed["column_name"] == result["column_name"]
|
||||||
Loading…
Reference in a new issue