Add artifact.1/src/artifact_1/core.py
This commit is contained in:
commit
401970b448
1 changed files with 112 additions and 0 deletions
112
artifact.1/src/artifact_1/core.py
Normal file
112
artifact.1/src/artifact_1/core.py
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
import argparse
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(levelname)s - %(message)s')
|
||||
|
||||
|
||||
@dataclass
|
||||
class LogData:
|
||||
timestamp: datetime
|
||||
aux_worker: int
|
||||
p99_tail: float
|
||||
band_width: float
|
||||
|
||||
@staticmethod
|
||||
def validate(entry: Dict[str, Any]) -> bool:
|
||||
required_fields = ["timestamp", "aux_worker", "p99_tail", "band_width"]
|
||||
for field in required_fields:
|
||||
if field not in entry:
|
||||
raise ValueError(f"Missing required field: {field}")
|
||||
if not isinstance(entry["aux_worker"], int):
|
||||
raise TypeError("Field 'aux_worker' must be int")
|
||||
if not isinstance(entry["p99_tail"], (int, float)):
|
||||
raise TypeError("Field 'p99_tail' must be numeric")
|
||||
if not isinstance(entry["band_width"], (int, float)):
|
||||
raise TypeError("Field 'band_width' must be numeric")
|
||||
try:
|
||||
datetime.fromisoformat(entry["timestamp"].replace('Z', '+00:00'))
|
||||
except Exception as e:
|
||||
raise ValueError(f"Invalid timestamp format: {entry['timestamp']} ({e})")
|
||||
return True
|
||||
|
||||
|
||||
def analyze_logs(log_file_path: str) -> Dict[str, Any]:
|
||||
"""Analysiert Log-Dateien verschiedener aux-Worker-Konfigurationen.
|
||||
|
||||
Args:
|
||||
log_file_path (str): Pfad zur JSON-Log-Datei.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Aggregierte Ergebnisse pro aux_worker (Median, IQR usw.)
|
||||
"""
|
||||
log_path = Path(log_file_path)
|
||||
if not log_path.exists():
|
||||
raise FileNotFoundError(f"Log file not found: {log_file_path}")
|
||||
|
||||
logging.info(f"Reading log file from {log_file_path}")
|
||||
try:
|
||||
df = pd.read_json(log_path)
|
||||
except ValueError as e:
|
||||
raise ValueError(f"Invalid JSON format in {log_file_path}: {e}")
|
||||
|
||||
expected_cols = {"timestamp", "aux_worker", "p99_tail", "band_width"}
|
||||
if not expected_cols.issubset(df.columns):
|
||||
missing = expected_cols - set(df.columns)
|
||||
raise ValueError(f"Missing columns in log data: {missing}")
|
||||
|
||||
# Validate each row explicitly
|
||||
for _, row in df.iterrows():
|
||||
LogData.validate(row.to_dict())
|
||||
|
||||
# Compute aggregates
|
||||
logging.info("Computing median and IQR per aux_worker")
|
||||
result = {}
|
||||
grouped = df.groupby("aux_worker")
|
||||
|
||||
for aux, group in grouped:
|
||||
summary = {}
|
||||
for metric in ["p99_tail", "band_width"]:
|
||||
median_val = float(group[metric].median())
|
||||
q75, q25 = group[metric].quantile([0.75, 0.25])
|
||||
iqr_val = float(q75 - q25)
|
||||
summary[metric] = {"median": median_val, "iqr": iqr_val}
|
||||
result[int(aux)] = summary
|
||||
|
||||
logging.info("Analysis complete.")
|
||||
return result
|
||||
|
||||
|
||||
def _save_output(results: Dict[str, Any], output_path: Path) -> None:
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(results, f, indent=2)
|
||||
logging.info(f"Analysis summary written to {output_path}")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Analyse von Aux-Worker-Logdaten.")
|
||||
parser.add_argument("--log-file", required=True, help="Pfad zur Log-Datei (JSON-Format)")
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
required=False,
|
||||
default="output/analysis_summary.json",
|
||||
help="Pfad zur Ausgabedatei (JSON)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
results = analyze_logs(args.log_file)
|
||||
output_path = Path(args.output)
|
||||
_save_output(results, output_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Reference in a new issue