From 986b0c9edef25ecbcf3652441e472730b552402b Mon Sep 17 00:00:00 2001 From: Mika Date: Sun, 14 Jun 2026 02:06:39 +0000 Subject: [PATCH] Add data_analysis/src/data_analysis/core.py --- data_analysis/src/data_analysis/core.py | 91 +++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 data_analysis/src/data_analysis/core.py diff --git a/data_analysis/src/data_analysis/core.py b/data_analysis/src/data_analysis/core.py new file mode 100644 index 0000000..326fa25 --- /dev/null +++ b/data_analysis/src/data_analysis/core.py @@ -0,0 +1,91 @@ +from __future__ import annotations +import json +import math +import logging +from dataclasses import dataclass, asdict +from typing import List, Union +import numpy as np +from scipy.signal import find_peaks +import librosa + + +# Logging setup for CI readiness +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +if not logger.handlers: + handler = logging.StreamHandler() + formatter = logging.Formatter('[%(asctime)s] %(levelname)s in %(name)s: %(message)s') + handler.setFormatter(formatter) + logger.addHandler(handler) + + +class PatternAnalysisError(Exception): + """Custom exception for errors during audio pattern analysis.""" + pass + + +@dataclass +class PatternReport: + """Repräsentiert das Ergebnis einer Mustererkennung in den Audiodaten.""" + pattern_name: str + frequency: float + correlation: float + comments: str + + def to_json(self) -> str: + return json.dumps(asdict(self), ensure_ascii=False, indent=2) + + +# Helper validation +def _validate_audio_data(audio_data: Union[np.ndarray, list[float]]) -> np.ndarray: + if isinstance(audio_data, list): + audio_data = np.array(audio_data, dtype=float) + if not isinstance(audio_data, np.ndarray): + raise TypeError("audio_data must be numpy.ndarray or list[float]") + if audio_data.size == 0: + raise ValueError("audio_data cannot be empty") + if not np.isfinite(audio_data).all(): + raise ValueError("audio_data contains non-finite values") + return audio_data + + +def analyze_patterns(audio_data: Union[np.ndarray, list[float]]) -> List[PatternReport]: + """Analysiert Audiodaten, extrahiert Frequenzmuster und berechnet Korrelationen zwischen erkannten Mustern.""" + logger.info("Starting audio pattern analysis.") + try: + data = _validate_audio_data(audio_data) + sr = 22050 # sampling rate assumption + + # Compute FFT magnitude spectrum + fft_mag = np.abs(np.fft.rfft(data)) + freqs = np.fft.rfftfreq(len(data), 1 / sr) + + # Find peaks in frequency domain + peaks, _ = find_peaks(fft_mag, height=np.mean(fft_mag) * 1.5) + if len(peaks) == 0: + logger.warning("No prominent peaks detected in audio data.") + + pattern_reports: List[PatternReport] = [] + for i, peak_idx in enumerate(peaks[:10]): # limit to top 10 patterns + freq = freqs[peak_idx] + + # Simulate a simple correlation heuristic: relative intensity + correlation = float(min(1.0, fft_mag[peak_idx] / (np.max(fft_mag) + 1e-9))) + pattern_name = f"Pattern_{i+1}" + comments = f"Detected frequency peak at {freq:.1f} Hz." + + report = PatternReport( + pattern_name=pattern_name, + frequency=float(freq), + correlation=correlation, + comments=comments, + ) + pattern_reports.append(report) + + logger.info(f"Analysis completed: {len(pattern_reports)} patterns detected.") + assert all(isinstance(r, PatternReport) for r in pattern_reports), "Return integrity check failed." + return pattern_reports + + except Exception as exc: + logger.error(f"Error during pattern analysis: {exc}") + raise PatternAnalysisError(str(exc)) from exc