Add max_outlier_analysis_script/src/max_outlier_analysis_script/cli.py
This commit is contained in:
parent
bd3d545b1b
commit
8f351f9310
1 changed files with 93 additions and 0 deletions
|
|
@ -0,0 +1,93 @@
|
|||
import argparse
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from max_outlier_analysis_script.core import analyze_max_outliers
|
||||
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='[%(asctime)s] %(levelname)s:%(name)s: %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _read_csv_input(path: Path) -> List[Dict[str, Any]]:
|
||||
"""Liest CSV-Eingabedatei und validiert die Felder."""
|
||||
required_columns = {
|
||||
'corr_id', 'stratum', 'job_parallelism',
|
||||
'expires_at_dist_hours', 'retry_total_overhead_ms', 'latency_max'
|
||||
}
|
||||
df = pd.read_csv(path)
|
||||
|
||||
missing = required_columns - set(df.columns)
|
||||
if missing:
|
||||
raise ValueError(f"CSV fehlt Spalten: {missing}")
|
||||
|
||||
data = df.to_dict(orient='records')
|
||||
|
||||
# Typvalidierung einfach aber strikt
|
||||
for i, record in enumerate(data):
|
||||
assert isinstance(record['corr_id'], str), f"Zeile {i}: corr_id muss str sein"
|
||||
assert isinstance(record['stratum'], str), f"Zeile {i}: stratum muss str sein"
|
||||
assert isinstance(record['job_parallelism'], (int, float)), f"Zeile {i}: job_parallelism muss numerisch sein"
|
||||
assert isinstance(record['expires_at_dist_hours'], (int, float)), f"Zeile {i}: expires_at_dist_hours muss numerisch sein"
|
||||
assert isinstance(record['retry_total_overhead_ms'], (int, float)), f"Zeile {i}: retry_total_overhead_ms muss numerisch sein"
|
||||
assert isinstance(record['latency_max'], (int, float)), f"Zeile {i}: latency_max muss numerisch sein"
|
||||
|
||||
logger.debug("CSV-Daten erfolgreich eingelesen und validiert (%d Records)", len(data))
|
||||
return data
|
||||
|
||||
|
||||
def _write_json_output(output_path: Path, results: Dict[str, Any]) -> None:
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with output_path.open('w', encoding='utf-8') as f:
|
||||
json.dump(results, f, indent=2)
|
||||
logger.info("Analyseergebnisse geschrieben nach %s", output_path)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Analyse von Max-Outlier-Daten aus CI-Lasttests"
|
||||
)
|
||||
parser.add_argument('--input', required=True, help='Pfad zur CSV-Eingabedatei')
|
||||
parser.add_argument('--output', required=False, default='output/analysis_summary.json',
|
||||
help='Pfad zur Ausgabedatei für Analyseergebnisse (JSON)')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
input_path = Path(args.input)
|
||||
output_path = Path(args.output)
|
||||
|
||||
if not input_path.exists():
|
||||
raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")
|
||||
|
||||
try:
|
||||
logger.info("Lese CSV-Datei: %s", input_path)
|
||||
data = _read_csv_input(input_path)
|
||||
logger.info("Starte Analyse über %d Datensätze", len(data))
|
||||
results = analyze_max_outliers(data)
|
||||
|
||||
if not isinstance(results, dict):
|
||||
raise TypeError("analyse_max_outliers muss ein dict zurückgeben")
|
||||
|
||||
# Minimalvalidierung des Ergebnisobjekts (CI-Ready)
|
||||
expected_keys = {'max_above_p99_count', 'near_expiry_cluster_percentage', 'retry_overhead_variance'}
|
||||
assert expected_keys.issubset(results.keys()), (
|
||||
f"Ergebnis enthält nicht alle erwarteten Felder: {expected_keys - set(results.keys())}"
|
||||
)
|
||||
|
||||
_write_json_output(output_path, results)
|
||||
logger.info("Analyse abgeschlossen.")
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("Fehler bei der Ausführung: %s", e)
|
||||
raise
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Loading…
Reference in a new issue