Add audit_analysis/src/audit_analysis/core.py
This commit is contained in:
commit
31873ea2ff
1 changed files with 94 additions and 0 deletions
94
audit_analysis/src/audit_analysis/core.py
Normal file
94
audit_analysis/src/audit_analysis/core.py
Normal file
|
|
@ -0,0 +1,94 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Dict, List
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(levelname)s - %(message)s')
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ClassificationReportEntry:
|
||||||
|
"""Repräsentiert eine Klassifikationszusammenfassung für Unknown-Audit-Einträge."""
|
||||||
|
class_: str # 'class' ist reserviertes Wort in Python
|
||||||
|
percentage: float
|
||||||
|
action: str
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, object]:
|
||||||
|
return {"class": self.class_, "percentage": self.percentage, "action": self.action}
|
||||||
|
|
||||||
|
|
||||||
|
class AuditFileError(Exception):
|
||||||
|
"""Custom Exception für Datei-Lese- oder Datenvalidierungsfehler."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_input(file_path: str) -> Path:
|
||||||
|
path = Path(file_path)
|
||||||
|
if not path.exists() or not path.is_file():
|
||||||
|
raise AuditFileError(f"Audit-Datei nicht gefunden: {file_path}")
|
||||||
|
if path.suffix.lower() != '.csv':
|
||||||
|
raise AuditFileError(f"Ungültiges Format: {file_path} ist keine CSV-Datei")
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def _classify_unknowns(df: pd.DataFrame) -> List[ClassificationReportEntry]:
|
||||||
|
unknowns = df[df['status'].str.lower() == 'unknown']
|
||||||
|
if unknowns.empty:
|
||||||
|
logger.warning("Keine Unknown-Einträge gefunden.")
|
||||||
|
return []
|
||||||
|
|
||||||
|
cause_col = 'cause' if 'cause' in unknowns.columns else 'error'
|
||||||
|
grouped = unknowns.groupby(cause_col).size().reset_index(name='count')
|
||||||
|
total = grouped['count'].sum()
|
||||||
|
|
||||||
|
mapping = {
|
||||||
|
'artefact_missing': 'WARN',
|
||||||
|
'contract_error': 'FAIL',
|
||||||
|
'io_failure': 'RETRY',
|
||||||
|
'timeout': 'RETRY'
|
||||||
|
}
|
||||||
|
|
||||||
|
report_entries = []
|
||||||
|
for _, row in grouped.iterrows():
|
||||||
|
cls_name = str(row[cause_col])
|
||||||
|
pct = float(row['count']) / total * 100 if total > 0 else 0.0
|
||||||
|
action = mapping.get(cls_name, 'INVESTIGATE')
|
||||||
|
report_entries.append(ClassificationReportEntry(class_=cls_name, percentage=round(pct, 2), action=action))
|
||||||
|
|
||||||
|
return report_entries
|
||||||
|
|
||||||
|
|
||||||
|
def analyze_audit(file_path: str) -> Dict[str, object]:
|
||||||
|
"""Analysiert eine Audit-CSV-Datei und liefert eine strukturierte Klassifikationsauswertung.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: Pfad zur Audit-CSV-Datei mit CI-Rundaten.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: JSON-ähnliche Struktur mit Klassifizierungsübersicht und empfohlenen Aktionen.
|
||||||
|
"""
|
||||||
|
assert isinstance(file_path, str), "file_path muss ein String sein"
|
||||||
|
|
||||||
|
path = _validate_input(file_path)
|
||||||
|
logger.info(f"Lese Audit-Datei: {path}")
|
||||||
|
try:
|
||||||
|
df = pd.read_csv(path)
|
||||||
|
except Exception as e:
|
||||||
|
raise AuditFileError(f"Fehler beim Lesen der CSV-Datei: {e}") from e
|
||||||
|
|
||||||
|
required_columns = {'status'}
|
||||||
|
if not required_columns.issubset(df.columns):
|
||||||
|
raise AuditFileError(f"Fehlende Pflichtspalten: {required_columns - set(df.columns)}")
|
||||||
|
|
||||||
|
report_entries = _classify_unknowns(df)
|
||||||
|
|
||||||
|
classification_report = {
|
||||||
|
"summary": [entry.to_dict() for entry in report_entries],
|
||||||
|
"total_unknowns": int(len(df[df['status'].str.lower() == 'unknown']))
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(f"Klassifikationsbericht erstellt mit {len(report_entries)} Klassen.")
|
||||||
|
return classification_report
|
||||||
Loading…
Reference in a new issue