#include #include #include #include #include /* * bootstrap_analysis_tool * ------------------------ * Einfaches Linux-CLI-Programm zur Bootstrap-Auswertung von Outlier-Raten. * * Eingabeformat (ASCII, whitespace-separiert), eine Zahl pro Zeile: * * * Ein Wert gilt als Outlier, wenn er außerhalb [Q1 - 1.5*IQR, Q3 + 1.5*IQR] liegt. * Auf Basis der Outlier-Indikatorvariable (0/1) wird die Outlier-Rate geschätzt. * * Ablauf: * 1. Daten von stdin einlesen. * 2. Outlier nach Median/IQR-Regel bestimmen. * 3. B-Proben (Bootstrap-Resamples mit Zurücklegen) der Outlier-Indikatoren ziehen. * 4. Für jede Probe die Outlier-Rate berechnen. * 5. 95%-Konfidenzintervall der Outlier-Rate per Percentile-Methode ausgeben. * 6. Ergebnis als JSON-Objekt auf stdout ausgeben, kompatibel zu BootstrapResult. * * CLI: * ./bootstrap_tool * * - n_resamples : Anzahl der Bootstrap-Resamples (z. B. 10000) * - seed : Zufalls-Seed (z. B. 42), zur Reproduzierbarkeit * * Ausgabe (JSON, eine Zeile): * { * "mean": , * "ci_lower": , * "ci_upper": , * "outliers": * } * * Hinweis: * - mean : mittlere Outlier-Rate über alle Resamples * - ci_lower : untere 2.5%-Perzentile der Outlier-Rate * - ci_upper : obere 97.5%-Perzentile der Outlier-Rate * - outliers : Anzahl Outlier in der Originalstichprobe */ /* Dynamischer Puffer zum Einlesen von double-Werten */ static double *read_data(size_t *n_out) { size_t cap = 1024; size_t n = 0; double *data = (double *)malloc(cap * sizeof(double)); if (!data) { fprintf(stderr, "Memory allocation failed\n"); return NULL; } while (1) { double v; int r = scanf("%lf", &v); if (r == EOF || r == 0) break; if (n >= cap) { cap *= 2; double *tmp = (double *)realloc(data, cap * sizeof(double)); if (!tmp) { fprintf(stderr, "Memory reallocation failed\n"); free(data); return NULL; } data = tmp; } data[n++] = v; } if (n == 0) { free(data); *n_out = 0; return NULL; } *n_out = n; return data; } /* Vergleichsfunktion für qsort (double) */ static int cmp_double(const void *a, const void *b) { double da = *(const double *)a; double db = *(const double *)b; if (da < db) return -1; if (da > db) return 1; return 0; } /* Berechnung von Median, Q1, Q3 per sortierter Kopie (Tukey-ähnliche Definition). */ static void compute_quartiles(const double *x, size_t n, double *median, double *q1, double *q3) { double *tmp = (double *)malloc(n * sizeof(double)); if (!tmp) { fprintf(stderr, "Memory allocation failed in compute_quartiles\n"); exit(EXIT_FAILURE); } memcpy(tmp, x, n * sizeof(double)); qsort(tmp, n, sizeof(double), cmp_double); /* Median */ if (n % 2 == 0) { *median = 0.5 * (tmp[n/2 - 1] + tmp[n/2]); } else { *median = tmp[n/2]; } /* Untere und obere Hälfte für Q1/Q3 */ size_t n_low, n_high; const double *low, *high; if (n % 2 == 0) { n_low = n / 2; n_high = n / 2; low = tmp; high = tmp + n/2; } else { n_low = n / 2; n_high = n / 2; low = tmp; high = tmp + n/2 + 1; } /* Median einer Hälfte als Quartil */ if (n_low == 0 || n_high == 0) { *q1 = *median; *q3 = *median; } else { if (n_low % 2 == 0) *q1 = 0.5 * (low[n_low/2 - 1] + low[n_low/2]); else *q1 = low[n_low/2]; if (n_high % 2 == 0) *q3 = 0.5 * (high[n_high/2 - 1] + high[n_high/2]); else *q3 = high[n_high/2]; } free(tmp); } /* Erzeuge Outlier-Indikatorarray basierend auf IQR-Regel. */ static int *compute_outlier_flags(const double *x, size_t n, int *n_outliers, double *lower, double *upper) { double median, q1, q3; compute_quartiles(x, n, &median, &q1, &q3); double iqr = q3 - q1; double lo = q1 - 1.5 * iqr; double hi = q3 + 1.5 * iqr; if (lower) *lower = lo; if (upper) *upper = hi; int *flags = (int *)malloc(n * sizeof(int)); if (!flags) { fprintf(stderr, "Memory allocation failed for flags\n"); exit(EXIT_FAILURE); } int cnt = 0; for (size_t i = 0; i < n; ++i) { if (x[i] < lo || x[i] > hi) { flags[i] = 1; cnt++; } else { flags[i] = 0; } } if (n_outliers) *n_outliers = cnt; return flags; } /* Zufällige Ganzzahl im Bereich [0, n-1] */ static inline size_t rand_index(size_t n) { return (size_t)((double)rand() / ((double)RAND_MAX + 1.0) * n); } /* Percentile (0..1) eines sortierten double-Arrays (lineare Interpolation). */ static double percentile(const double *x_sorted, size_t n, double p) { if (n == 0) return NAN; if (p <= 0.0) return x_sorted[0]; if (p >= 1.0) return x_sorted[n-1]; double idx = p * (n - 1); size_t i = (size_t)floor(idx); double frac = idx - (double)i; if (i + 1 >= n) return x_sorted[n-1]; return x_sorted[i] * (1.0 - frac) + x_sorted[i+1] * frac; } int main(int argc, char **argv) { if (argc < 3) { fprintf(stderr, "Usage: %s < input_data\n", argv[0]); return EXIT_FAILURE; } long n_resamples = strtol(argv[1], NULL, 10); if (n_resamples <= 0) { fprintf(stderr, "n_resamples must be positive\n"); return EXIT_FAILURE; } long seed = strtol(argv[2], NULL, 10); if (seed <= 0) seed = 1; srand((unsigned int)seed); size_t n = 0; double *data = read_data(&n); if (!data || n == 0) { fprintf(stderr, "No input data read from stdin\n"); free(data); return EXIT_FAILURE; } int n_outliers = 0; double lo, hi; int *flags = compute_outlier_flags(data, n, &n_outliers, &lo, &hi); double *boot_props = (double *)malloc((size_t)n_resamples * sizeof(double)); if (!boot_props) { fprintf(stderr, "Memory allocation failed for bootstrap results\n"); free(data); free(flags); return EXIT_FAILURE; } /* Bootstrap über die 0/1-Outlier-Indikatoren */ for (long b = 0; b < n_resamples; ++b) { int sum = 0; for (size_t i = 0; i < n; ++i) { size_t idx = rand_index(n); sum += flags[idx]; } boot_props[b] = (double)sum / (double)n; } /* Kennzahlen aus Bootstrap-Proportionen */ double mean = 0.0; for (long b = 0; b < n_resamples; ++b) { mean += boot_props[b]; } mean /= (double)n_resamples; qsort(boot_props, (size_t)n_resamples, sizeof(double), cmp_double); double ci_lower = percentile(boot_props, (size_t)n_resamples, 0.025); double ci_upper = percentile(boot_props, (size_t)n_resamples, 0.975); /* JSON-Ausgabe passend zu BootstrapResult */ printf("{\n"); printf(" \"mean\": %.10f,\n", mean); printf(" \"ci_lower\": %.10f,\n", ci_lower); printf(" \"ci_upper\": %.10f,\n", ci_upper); printf(" \"outliers\": %d\n", n_outliers); printf("}\n"); free(data); free(flags); free(boot_props); return EXIT_SUCCESS; }