Add data_analysis/src/data_analysis/core.py

This commit is contained in:
Mika 2026-06-14 02:06:39 +00:00
parent 101ec5ec25
commit 986b0c9ede

View file

@ -0,0 +1,91 @@
from __future__ import annotations
import json
import math
import logging
from dataclasses import dataclass, asdict
from typing import List, Union
import numpy as np
from scipy.signal import find_peaks
import librosa
# Logging setup for CI readiness
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
if not logger.handlers:
handler = logging.StreamHandler()
formatter = logging.Formatter('[%(asctime)s] %(levelname)s in %(name)s: %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
class PatternAnalysisError(Exception):
"""Custom exception for errors during audio pattern analysis."""
pass
@dataclass
class PatternReport:
"""Repräsentiert das Ergebnis einer Mustererkennung in den Audiodaten."""
pattern_name: str
frequency: float
correlation: float
comments: str
def to_json(self) -> str:
return json.dumps(asdict(self), ensure_ascii=False, indent=2)
# Helper validation
def _validate_audio_data(audio_data: Union[np.ndarray, list[float]]) -> np.ndarray:
if isinstance(audio_data, list):
audio_data = np.array(audio_data, dtype=float)
if not isinstance(audio_data, np.ndarray):
raise TypeError("audio_data must be numpy.ndarray or list[float]")
if audio_data.size == 0:
raise ValueError("audio_data cannot be empty")
if not np.isfinite(audio_data).all():
raise ValueError("audio_data contains non-finite values")
return audio_data
def analyze_patterns(audio_data: Union[np.ndarray, list[float]]) -> List[PatternReport]:
"""Analysiert Audiodaten, extrahiert Frequenzmuster und berechnet Korrelationen zwischen erkannten Mustern."""
logger.info("Starting audio pattern analysis.")
try:
data = _validate_audio_data(audio_data)
sr = 22050 # sampling rate assumption
# Compute FFT magnitude spectrum
fft_mag = np.abs(np.fft.rfft(data))
freqs = np.fft.rfftfreq(len(data), 1 / sr)
# Find peaks in frequency domain
peaks, _ = find_peaks(fft_mag, height=np.mean(fft_mag) * 1.5)
if len(peaks) == 0:
logger.warning("No prominent peaks detected in audio data.")
pattern_reports: List[PatternReport] = []
for i, peak_idx in enumerate(peaks[:10]): # limit to top 10 patterns
freq = freqs[peak_idx]
# Simulate a simple correlation heuristic: relative intensity
correlation = float(min(1.0, fft_mag[peak_idx] / (np.max(fft_mag) + 1e-9)))
pattern_name = f"Pattern_{i+1}"
comments = f"Detected frequency peak at {freq:.1f} Hz."
report = PatternReport(
pattern_name=pattern_name,
frequency=float(freq),
correlation=correlation,
comments=comments,
)
pattern_reports.append(report)
logger.info(f"Analysis completed: {len(pattern_reports)} patterns detected.")
assert all(isinstance(r, PatternReport) for r in pattern_reports), "Return integrity check failed."
return pattern_reports
except Exception as exc:
logger.error(f"Error during pattern analysis: {exc}")
raise PatternAnalysisError(str(exc)) from exc