Add data_analysis/src/data_analysis/cli.py

This commit is contained in:
Mika 2026-02-24 13:33:05 +00:00
parent 5cd3549a76
commit be443a0586

View file

@ -0,0 +1,72 @@
import argparse
import json
from pathlib import Path
from typing import Any
from data_analysis import core
def _load_run_data(path: Path) -> list[dict[str, Any]]:
"""Load and validate run data JSON file into list of dicts."""
if not path.exists() or not path.is_file():
raise FileNotFoundError(f"Input file not found: {path}")
with path.open('r', encoding='utf-8') as handle:
try:
data = json.load(handle)
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON file: {path}. Error: {e}")
if not isinstance(data, list):
raise ValueError("Input JSON must contain a list of run data objects.")
required_fields = {"policy_hash", "warn_rate", "unknown_rate", "delta_time"}
for i, entry in enumerate(data):
if not isinstance(entry, dict):
raise ValueError(f"Run entry at index {i} is not a dict.")
missing = required_fields - set(entry)
if missing:
raise ValueError(f"Missing required fields {missing} in entry {i}.")
return data
def _run_analysis(input_path: Path, output_path: Path) -> None:
"""Perform analysis and write results to JSON file."""
data = _load_run_data(input_path)
warn_rate = core.calculate_warn_rate(data)
delta_stats = core.delta_time_distribution(data)
summary = {
"avg_warn_rate": warn_rate,
"delta_time_distribution": delta_stats,
}
output_path.parent.mkdir(parents=True, exist_ok=True)
with output_path.open('w', encoding='utf-8') as f:
json.dump(summary, f, indent=2, sort_keys=True)
def main() -> None:
"""Entry point for command-line interface."""
parser = argparse.ArgumentParser(
description="Analyze run timing data and compute summary statistics."
)
parser.add_argument(
"--input",
required=True,
type=Path,
help="Path to input JSON file containing run data.",
)
parser.add_argument(
"--output",
required=True,
type=Path,
help="Destination path for JSON file with results.",
)
args = parser.parse_args()
_run_analysis(args.input, args.output)
if __name__ == "__main__":
main()