Add frozen_runs_analysis/src/frozen_runs_analysis/cli.py

2026-01-26 12:23:44 +00:00 · 2026-01-26 12:23:44 +00:00 · 0bb836f087
commit 0bb836f087
parent c67cc23016
1 changed files with 76 additions and 0 deletions
--- a/frozen_runs_analysis/src/frozen_runs_analysis/cli.py
+++ b/frozen_runs_analysis/src/frozen_runs_analysis/cli.py
@ -0,0 +1,76 @@
+import argparse
+import json
+import logging
+from pathlib import Path
+from typing import Any, List
+
+import pandas as pd
+
+from frozen_runs_analysis import core
+
+def _setup_logging() -> None:
+    """Initializes logging with INFO level for CLI execution."""
+    logging.basicConfig(
+        level=logging.INFO,
+        format='[%(asctime)s] %(levelname)s - %(message)s',
+    )
+
+
+def _load_json_data(file_path: Path) -> List[dict]:
+    """Loads and validates JSON data file expected to contain a list of RunData objects."""
+    if not file_path.exists() or not file_path.is_file():
+        raise FileNotFoundError(f"Input file not found: {file_path}")
+
+    with file_path.open('r', encoding='utf-8') as f:
+        data = json.load(f)
+
+    if not isinstance(data, list):
+        raise ValueError("Expected top-level JSON array for frozen run data.")
+
+    for idx, item in enumerate(data):
+        if not isinstance(item, dict):
+            raise ValueError(f"Item #{idx} in data is not a JSON object.")
+        for field in ('run_id', 'status', 'sanity_checks', 'config_hash'):
+            if field not in item:
+                raise ValueError(f"Missing required field '{field}' in item #{idx}: {item}")
+
+    return data
+
+
+def _write_json_output(data: dict, output_path: Path) -> None:
+    """Writes output JSON report to the given file path."""
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    with output_path.open('w', encoding='utf-8') as f:
+        json.dump(data, f, indent=2, ensure_ascii=False)
+    logging.info(f"Analysis report written to {output_path}")
+
+
+def main() -> None:
+    """CLI entrypoint for frozen runs analysis."""
+    parser = argparse.ArgumentParser(description='Analyse frozen run datasets from Gate v0.')
+    parser.add_argument('--input', required=True, help='Path to the input JSON data file containing frozen runs.')
+    parser.add_argument('--output', required=False, help='Path to output JSON file where results will be saved.')
+
+    args = parser.parse_args()
+
+    _setup_logging()
+
+    input_path = Path(args.input).expanduser().resolve()
+    output_path = Path(args.output).expanduser().resolve() if args.output else Path('output/frozen_runs_analysis.json').resolve()
+
+    logging.info(f"Loading data from {input_path}")
+    data = _load_json_data(input_path)
+
+    logging.info("Running analysis on frozen runs data ...")
+    try:
+        result: dict[str, Any] = core.analyse_frozen_runs(data)
+    except Exception as e:
+        logging.exception("Error running analysis: %s", e)
+        raise
+
+    logging.info("Analysis complete. Saving results ...")
+    _write_json_output(result, output_path)
+
+
+if __name__ == '__main__':
+    main()