diff --git a/audit_analysis/tests/test_core.py b/audit_analysis/tests/test_core.py new file mode 100644 index 0000000..f32e3c8 --- /dev/null +++ b/audit_analysis/tests/test_core.py @@ -0,0 +1,77 @@ +import pytest +import pandas as pd +from pathlib import Path +from audit_analysis import core + + +def _create_sample_csv(tmp_path: Path) -> str: + data = [ + {"id": 1, "status": "unknown", "reason": "artefact_missing"}, + {"id": 2, "status": "unknown", "reason": "io_failure"}, + {"id": 3, "status": "passed", "reason": ""}, + {"id": 4, "status": "unknown", "reason": "contract_error"}, + {"id": 5, "status": "unknown", "reason": "artefact_missing"}, + ] + df = pd.DataFrame(data) + file_path = tmp_path / "audit.csv" + df.to_csv(file_path, index=False) + return str(file_path) + + +def test_analyze_audit_nominal(tmp_path): + csv_path = _create_sample_csv(tmp_path) + result = core.analyze_audit(csv_path) + assert isinstance(result, dict), "Result should be a dictionary" + + # expect three classes: artefact_missing, io_failure, contract_error + classes = {entry['class'] for entry in result.get('entries', [])} + expected_classes = {"artefact_missing", "io_failure", "contract_error"} + assert expected_classes.issubset(classes) + + # percentages add up to approx 100 + total_percentage = sum(entry['percentage'] for entry in result['entries']) + assert pytest.approx(total_percentage, rel=0.01) == 100.0 + + # each entry has required keys + for entry in result['entries']: + assert set(entry.keys()) == {"class", "percentage", "action"} + assert isinstance(entry['class'], str) + assert isinstance(entry['percentage'], (float, int)) + assert isinstance(entry['action'], str) + + +def test_analyze_audit_invalid_path(): + with pytest.raises((FileNotFoundError, ValueError)): + core.analyze_audit("/nonexistent/file.csv") + + +def test_analyze_audit_empty_file(tmp_path): + file_path = tmp_path / "empty.csv" + file_path.write_text("") + with pytest.raises((ValueError, pd.errors.EmptyDataError)): + core.analyze_audit(str(file_path)) + + +def test_analyze_audit_all_unknown_same_class(tmp_path): + df = pd.DataFrame( + [ + {"id": i, "status": "unknown", "reason": "io_failure"} for i in range(5) + ] + ) + csv_path = tmp_path / "audit_uniform.csv" + df.to_csv(csv_path, index=False) + result = core.analyze_audit(str(csv_path)) + entries = result.get('entries', []) + assert len(entries) == 1 + assert entries[0]['class'] == 'io_failure' + assert pytest.approx(entries[0]['percentage'], rel=0.01) == 100.0 + assert isinstance(entries[0]['action'], str) + + +def test_analyze_audit_action_field_content(tmp_path): + csv_path = _create_sample_csv(tmp_path) + result = core.analyze_audit(csv_path) + actions = {entry['action'] for entry in result.get('entries', [])} + # expect actions to be from a finite set e.g. WARN or FAIL + for act in actions: + assert act in {"WARN", "FAIL", "RETRY"} or isinstance(act, str)