Add artifact_2_worker_binding_analysis/src/artifact_2_worker_binding_analysis/core.py

This commit is contained in:
Mika 2026-03-19 13:57:24 +00:00
parent 0c1eda9609
commit 7f8c8b7f57

View file

@ -0,0 +1,81 @@
from typing import List, Dict, Any
from collections import defaultdict
from statistics import mean
class WorkerBindingResult:
"""Datenmodell für die Bindung eines Workers an eine Population."""
def __init__(self, worker_id: str, population_percentage: float) -> None:
if not isinstance(worker_id, str):
raise TypeError("worker_id muss eine Zeichenkette sein.")
if not isinstance(population_percentage, (int, float)):
raise TypeError("population_percentage muss numerisch sein.")
if not (0.0 <= population_percentage <= 100.0):
raise ValueError("population_percentage muss zwischen 0 und 100 liegen.")
self.worker_id = worker_id
self.population_percentage = float(population_percentage)
def __repr__(self) -> str:
return f"WorkerBindingResult(worker_id={self.worker_id!r}, population_percentage={self.population_percentage:.2f})"
def _validate_input(data: List[Dict[str, Any]]) -> None:
if not isinstance(data, list):
raise TypeError("Eingabedaten müssen eine Liste von Dictionaries sein.")
for entry in data:
if not isinstance(entry, dict):
raise TypeError("Jedes Element der Eingabeliste muss ein Dictionary sein.")
if 'worker_id' not in entry or 'population_percentage' not in entry:
raise ValueError("Jedes Datenelement muss 'worker_id' und 'population_percentage' enthalten.")
if not isinstance(entry['worker_id'], str):
raise TypeError("'worker_id' muss vom Typ str sein.")
if not isinstance(entry['population_percentage'], (int, float)):
raise TypeError("'population_percentage' muss numerisch sein.")
def _aggregate_by_worker(data: List[Dict[str, Any]]) -> Dict[str, float]:
aggregation = defaultdict(list)
for entry in data:
aggregation[entry['worker_id']].append(entry['population_percentage'])
return {worker: mean(values) for worker, values in aggregation.items()}
def analyze_worker_binding(data_enforced: List[Dict[str, Any]], data_randomized: List[Dict[str, Any]]) -> Dict[str, Any]:
"""Vergleicht Worker-Bindungen zwischen enforced- und randomized-Modi und berechnet statistische Kennzahlen.
Args:
data_enforced: Liste mit Worker-Bindungsdaten aus dem Affinitätsmodus.
data_randomized: Liste mit Worker-Bindungsdaten aus dem zufälligen Modus.
Returns:
dict mit Schlüssel 'mean_concentration_enforced', 'mean_concentration_randomized',
'diff_per_worker' und 'avg_diff_overall'.
"""
_validate_input(data_enforced)
_validate_input(data_randomized)
enforced_agg = _aggregate_by_worker(data_enforced)
randomized_agg = _aggregate_by_worker(data_randomized)
mean_conc_enforced = mean(enforced_agg.values()) if enforced_agg else 0.0
mean_conc_randomized = mean(randomized_agg.values()) if randomized_agg else 0.0
all_workers = set(enforced_agg.keys()) | set(randomized_agg.keys())
diff_per_worker = {}
for worker in all_workers:
val_enf = enforced_agg.get(worker, 0.0)
val_rand = randomized_agg.get(worker, 0.0)
diff_per_worker[worker] = val_enf - val_rand
avg_diff_overall = mean(diff_per_worker.values()) if diff_per_worker else 0.0
result = {
'mean_concentration_enforced': mean_conc_enforced,
'mean_concentration_randomized': mean_conc_randomized,
'diff_per_worker': diff_per_worker,
'avg_diff_overall': avg_diff_overall
}
assert isinstance(result, dict), "Rückgabewert muss ein Dictionary sein."
return result