diff --git a/data_analysis/tests/test_core.py b/data_analysis/tests/test_core.py new file mode 100644 index 0000000..b769365 --- /dev/null +++ b/data_analysis/tests/test_core.py @@ -0,0 +1,58 @@ +import pytest +import pandas as pd +import numpy as np +from data_analysis.core import analyze_data, AnalysisResult +from data_analysis.io_utils import LogEntry + + +def _make_log_entries(): + data = [ + {"t": "2024-03-01T00:00:00Z", "Lx": 100, "dB": 40.5, "Temp": 20.0, "Inference": 0.2}, + {"t": "2024-03-01T00:01:00Z", "Lx": 102, "dB": 42.0, "Temp": 20.1, "Inference": 0.25}, + {"t": "2024-03-01T00:02:00Z", "Lx": 105, "dB": 80.0, "Temp": 20.2, "Inference": 0.9}, # anomaly + {"t": "2024-03-01T00:03:00Z", "Lx": 99, "dB": 41.0, "Temp": 20.0, "Inference": 0.3}, + ] + return [LogEntry(d["t"], d["Lx"], d["dB"], d["Temp"], d["Inference"]) for d in data] + + +def test_analyze_data_returns_analysis_result(): + logs = _make_log_entries() + result = analyze_data(logs) + assert isinstance(result, AnalysisResult) + assert hasattr(result, 'significant_patterns') + assert hasattr(result, 'anomaly_events') + assert isinstance(result.significant_patterns, list) + assert isinstance(result.anomaly_events, list) + + +def test_analyze_data_detects_anomalies(): + logs = _make_log_entries() + result = analyze_data(logs) + # At least one anomaly expected + assert any('anomaly' in str(ev).lower() or '80' in str(ev) for ev in result.anomaly_events) + + +def test_analyze_data_pattern_length_consistency(): + logs = _make_log_entries() + result1 = analyze_data(logs) + np.random.shuffle(logs) + result2 = analyze_data(logs) + assert len(result1.significant_patterns) == len(result2.significant_patterns) + + +def test_analyze_data_empty_input(): + result = analyze_data([]) + assert isinstance(result, AnalysisResult) + assert result.significant_patterns == [] + assert result.anomaly_events == [] + + +def test_invalid_input_type_raises(): + with pytest.raises((TypeError, ValueError)): + analyze_data([{'wrong': 'structure'}]) + + +def test_analysis_result_repr_str(): + result = AnalysisResult(['pattern'], ['event']) + s = str(result) + assert 'pattern' in s or 'event' in s \ No newline at end of file