From 7f8c8b7f5735bb9773f8b4bdc8cefd2b3acd034d Mon Sep 17 00:00:00 2001 From: Mika Date: Thu, 19 Mar 2026 13:57:24 +0000 Subject: [PATCH] Add artifact_2_worker_binding_analysis/src/artifact_2_worker_binding_analysis/core.py --- .../core.py | 81 +++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 artifact_2_worker_binding_analysis/src/artifact_2_worker_binding_analysis/core.py diff --git a/artifact_2_worker_binding_analysis/src/artifact_2_worker_binding_analysis/core.py b/artifact_2_worker_binding_analysis/src/artifact_2_worker_binding_analysis/core.py new file mode 100644 index 0000000..23a3aac --- /dev/null +++ b/artifact_2_worker_binding_analysis/src/artifact_2_worker_binding_analysis/core.py @@ -0,0 +1,81 @@ +from typing import List, Dict, Any +from collections import defaultdict +from statistics import mean + + +class WorkerBindingResult: + """Datenmodell für die Bindung eines Workers an eine Population.""" + + def __init__(self, worker_id: str, population_percentage: float) -> None: + if not isinstance(worker_id, str): + raise TypeError("worker_id muss eine Zeichenkette sein.") + if not isinstance(population_percentage, (int, float)): + raise TypeError("population_percentage muss numerisch sein.") + if not (0.0 <= population_percentage <= 100.0): + raise ValueError("population_percentage muss zwischen 0 und 100 liegen.") + self.worker_id = worker_id + self.population_percentage = float(population_percentage) + + def __repr__(self) -> str: + return f"WorkerBindingResult(worker_id={self.worker_id!r}, population_percentage={self.population_percentage:.2f})" + + +def _validate_input(data: List[Dict[str, Any]]) -> None: + if not isinstance(data, list): + raise TypeError("Eingabedaten müssen eine Liste von Dictionaries sein.") + for entry in data: + if not isinstance(entry, dict): + raise TypeError("Jedes Element der Eingabeliste muss ein Dictionary sein.") + if 'worker_id' not in entry or 'population_percentage' not in entry: + raise ValueError("Jedes Datenelement muss 'worker_id' und 'population_percentage' enthalten.") + if not isinstance(entry['worker_id'], str): + raise TypeError("'worker_id' muss vom Typ str sein.") + if not isinstance(entry['population_percentage'], (int, float)): + raise TypeError("'population_percentage' muss numerisch sein.") + + +def _aggregate_by_worker(data: List[Dict[str, Any]]) -> Dict[str, float]: + aggregation = defaultdict(list) + for entry in data: + aggregation[entry['worker_id']].append(entry['population_percentage']) + return {worker: mean(values) for worker, values in aggregation.items()} + + +def analyze_worker_binding(data_enforced: List[Dict[str, Any]], data_randomized: List[Dict[str, Any]]) -> Dict[str, Any]: + """Vergleicht Worker-Bindungen zwischen enforced- und randomized-Modi und berechnet statistische Kennzahlen. + + Args: + data_enforced: Liste mit Worker-Bindungsdaten aus dem Affinitätsmodus. + data_randomized: Liste mit Worker-Bindungsdaten aus dem zufälligen Modus. + + Returns: + dict mit Schlüssel 'mean_concentration_enforced', 'mean_concentration_randomized', + 'diff_per_worker' und 'avg_diff_overall'. + """ + _validate_input(data_enforced) + _validate_input(data_randomized) + + enforced_agg = _aggregate_by_worker(data_enforced) + randomized_agg = _aggregate_by_worker(data_randomized) + + mean_conc_enforced = mean(enforced_agg.values()) if enforced_agg else 0.0 + mean_conc_randomized = mean(randomized_agg.values()) if randomized_agg else 0.0 + + all_workers = set(enforced_agg.keys()) | set(randomized_agg.keys()) + diff_per_worker = {} + for worker in all_workers: + val_enf = enforced_agg.get(worker, 0.0) + val_rand = randomized_agg.get(worker, 0.0) + diff_per_worker[worker] = val_enf - val_rand + + avg_diff_overall = mean(diff_per_worker.values()) if diff_per_worker else 0.0 + + result = { + 'mean_concentration_enforced': mean_conc_enforced, + 'mean_concentration_randomized': mean_conc_randomized, + 'diff_per_worker': diff_per_worker, + 'avg_diff_overall': avg_diff_overall + } + + assert isinstance(result, dict), "Rückgabewert muss ein Dictionary sein." + return result