diff --git a/data_analysis/src/data_analysis/cli.py b/data_analysis/src/data_analysis/cli.py new file mode 100644 index 0000000..6f0bba5 --- /dev/null +++ b/data_analysis/src/data_analysis/cli.py @@ -0,0 +1,72 @@ +import argparse +import json +from pathlib import Path +from typing import Any + +from data_analysis import core + + +def _load_run_data(path: Path) -> list[dict[str, Any]]: + """Load and validate run data JSON file into list of dicts.""" + if not path.exists() or not path.is_file(): + raise FileNotFoundError(f"Input file not found: {path}") + with path.open('r', encoding='utf-8') as handle: + try: + data = json.load(handle) + except json.JSONDecodeError as e: + raise ValueError(f"Invalid JSON file: {path}. Error: {e}") + + if not isinstance(data, list): + raise ValueError("Input JSON must contain a list of run data objects.") + + required_fields = {"policy_hash", "warn_rate", "unknown_rate", "delta_time"} + for i, entry in enumerate(data): + if not isinstance(entry, dict): + raise ValueError(f"Run entry at index {i} is not a dict.") + missing = required_fields - set(entry) + if missing: + raise ValueError(f"Missing required fields {missing} in entry {i}.") + + return data + + +def _run_analysis(input_path: Path, output_path: Path) -> None: + """Perform analysis and write results to JSON file.""" + data = _load_run_data(input_path) + warn_rate = core.calculate_warn_rate(data) + delta_stats = core.delta_time_distribution(data) + + summary = { + "avg_warn_rate": warn_rate, + "delta_time_distribution": delta_stats, + } + + output_path.parent.mkdir(parents=True, exist_ok=True) + with output_path.open('w', encoding='utf-8') as f: + json.dump(summary, f, indent=2, sort_keys=True) + + +def main() -> None: + """Entry point for command-line interface.""" + parser = argparse.ArgumentParser( + description="Analyze run timing data and compute summary statistics." + ) + parser.add_argument( + "--input", + required=True, + type=Path, + help="Path to input JSON file containing run data.", + ) + parser.add_argument( + "--output", + required=True, + type=Path, + help="Destination path for JSON file with results.", + ) + + args = parser.parse_args() + _run_analysis(args.input, args.output) + + +if __name__ == "__main__": + main()