diff --git a/frozen_runs_analysis/src/frozen_runs_analysis/cli.py b/frozen_runs_analysis/src/frozen_runs_analysis/cli.py new file mode 100644 index 0000000..4325a79 --- /dev/null +++ b/frozen_runs_analysis/src/frozen_runs_analysis/cli.py @@ -0,0 +1,76 @@ +import argparse +import json +import logging +from pathlib import Path +from typing import Any, List + +import pandas as pd + +from frozen_runs_analysis import core + +def _setup_logging() -> None: + """Initializes logging with INFO level for CLI execution.""" + logging.basicConfig( + level=logging.INFO, + format='[%(asctime)s] %(levelname)s - %(message)s', + ) + + +def _load_json_data(file_path: Path) -> List[dict]: + """Loads and validates JSON data file expected to contain a list of RunData objects.""" + if not file_path.exists() or not file_path.is_file(): + raise FileNotFoundError(f"Input file not found: {file_path}") + + with file_path.open('r', encoding='utf-8') as f: + data = json.load(f) + + if not isinstance(data, list): + raise ValueError("Expected top-level JSON array for frozen run data.") + + for idx, item in enumerate(data): + if not isinstance(item, dict): + raise ValueError(f"Item #{idx} in data is not a JSON object.") + for field in ('run_id', 'status', 'sanity_checks', 'config_hash'): + if field not in item: + raise ValueError(f"Missing required field '{field}' in item #{idx}: {item}") + + return data + + +def _write_json_output(data: dict, output_path: Path) -> None: + """Writes output JSON report to the given file path.""" + output_path.parent.mkdir(parents=True, exist_ok=True) + with output_path.open('w', encoding='utf-8') as f: + json.dump(data, f, indent=2, ensure_ascii=False) + logging.info(f"Analysis report written to {output_path}") + + +def main() -> None: + """CLI entrypoint for frozen runs analysis.""" + parser = argparse.ArgumentParser(description='Analyse frozen run datasets from Gate v0.') + parser.add_argument('--input', required=True, help='Path to the input JSON data file containing frozen runs.') + parser.add_argument('--output', required=False, help='Path to output JSON file where results will be saved.') + + args = parser.parse_args() + + _setup_logging() + + input_path = Path(args.input).expanduser().resolve() + output_path = Path(args.output).expanduser().resolve() if args.output else Path('output/frozen_runs_analysis.json').resolve() + + logging.info(f"Loading data from {input_path}") + data = _load_json_data(input_path) + + logging.info("Running analysis on frozen runs data ...") + try: + result: dict[str, Any] = core.analyse_frozen_runs(data) + except Exception as e: + logging.exception("Error running analysis: %s", e) + raise + + logging.info("Analysis complete. Saving results ...") + _write_json_output(result, output_path) + + +if __name__ == '__main__': + main()