Add policy_eval.py/src/policy_eval/core.py

2026-02-09 16:01:10 +00:00 · 2026-02-09 16:01:10 +00:00 · 626a8abff8
commit 626a8abff8
1 changed files with 118 additions and 0 deletions
--- a/policy_eval.py/src/policy_eval/core.py
+++ b/policy_eval.py/src/policy_eval/core.py
@ -0,0 +1,118 @@
+from __future__ import annotations
+import os
+import json
+import hashlib
+import logging
+import csv
+from pathlib import Path
+from datetime import datetime
+from typing import Dict, Any
+import pandas as pd
+
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(levelname)s in %(module)s: %(message)s')
+
+
+class PolicyChangeError(Exception):
+    """Custom exception raised for invalid policy constants input."""
+
+
+def _validate_policy_constants(policy_constants_json: Dict[str, Any]) -> None:
+    required_keys = {"version", "constant_value"}
+    if not isinstance(policy_constants_json, dict):
+        raise PolicyChangeError("policy_constants_json must be a dict")
+    missing = required_keys - policy_constants_json.keys()
+    if missing:
+        raise PolicyChangeError(f"Missing required keys in policy_constants_json: {missing}")
+    if not isinstance(policy_constants_json["version"], str):
+        raise PolicyChangeError("version must be a string")
+    if not isinstance(policy_constants_json["constant_value"], (int, float)):
+        raise PolicyChangeError("constant_value must be numeric")
+
+
+def _compute_policy_hash(policy_constants_json: Dict[str, Any]) -> str:
+    serialized = json.dumps(policy_constants_json, sort_keys=True).encode('utf-8')
+    return hashlib.sha256(serialized).hexdigest()
+
+
+def check_policy_changes(policy_constants_json: Dict[str, Any]) -> bool:
+    """Überprüft die Policy-Constants-Datei und erkennt Änderungen anhand von Hashes und Versionen."""
+    _validate_policy_constants(policy_constants_json)
+    current_hash = _compute_policy_hash(policy_constants_json)
+    hash_record_path = Path("output/.last_policy_hash")
+
+    if hash_record_path.exists():
+        last_hash = hash_record_path.read_text().strip()
+        has_changed = last_hash != current_hash
+    else:
+        has_changed = True
+
+    hash_record_path.parent.mkdir(parents=True, exist_ok=True)
+    hash_record_path.write_text(current_hash)
+
+    logger.info("Policy change detected: %s", has_changed)
+    return has_changed
+
+
+def run_backtest(audit_set: str) -> Dict[str, str]:
+    """Führt einen Backtest gegen das fixierte Audit-Set durch und erzeugt Delta-Artefakte."""
+    audit_path = Path(audit_set)
+    if not audit_path.exists():
+        raise FileNotFoundError(f"Audit set path not found: {audit_set}")
+
+    # Idee: Vergleiche alte/neue Files (simuliert) → generiere delta_summary.json und delta_cases.csv
+    all_csvs = list(audit_path.glob('*.csv'))
+    if not all_csvs:
+        raise FileNotFoundError(f"No CSV audit files found in {audit_set}")
+
+    df_list = []
+    for csv_file in all_csvs:
+        df = pd.read_csv(csv_file)
+        df['source_file'] = csv_file.name
+        df_list.append(df)
+    full_df = pd.concat(df_list, ignore_index=True)
+
+    # Simulierte Delta-Generierung: alte vs neue Policy (hier zufälliger Vergleich)
+    full_df['reason'] = 'Policy-Update'
+    full_df['old_decision'] = full_df.iloc[:, 0].astype(str)
+    full_df['new_decision'] = full_df.iloc[:, 0].astype(str)
+
+    constants_path = Path('config/policy_constants.json')
+    policy_hash = 'unknown'
+    if constants_path.exists():
+        with open(constants_path, 'r', encoding='utf-8') as f:
+            try:
+                constants_data = json.load(f)
+                _validate_policy_constants(constants_data)
+                policy_hash = _compute_policy_hash(constants_data)
+            except Exception as exc:
+                logger.warning("Could not compute policy hash: %s", exc)
+
+    full_df['policy_hash'] = policy_hash
+
+    output_dir = Path('output')
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    delta_cases_path = output_dir / 'delta_cases.csv'
+    full_df[['reason', 'old_decision', 'new_decision', 'policy_hash']].to_csv(delta_cases_path, index=False)
+
+    # Zusammenfassung
+    delta_summary = {
+        'timestamp': datetime.utcnow().isoformat() + 'Z',
+        'total_cases': len(full_df),
+        'unique_policy_hashes': [policy_hash],
+        'reason_counts': {'Policy-Update': len(full_df)},
+    }
+    delta_summary_path = output_dir / 'delta_summary.json'
+    with open(delta_summary_path, 'w', encoding='utf-8') as f:
+        json.dump(delta_summary, f, indent=2)
+
+    # CI Assertion
+    assert delta_cases_path.exists() and delta_summary_path.exists(), "Backtest output files missing."
+    logger.info("Backtest completed: %s cases written.", len(full_df))
+
+    return {
+        'delta_summary_path': str(delta_summary_path),
+        'delta_cases_path': str(delta_cases_path),
+    }