Add data_analysis/src/data_analysis/core.py
This commit is contained in:
parent
f436a8d19f
commit
c98bd3bdda
1 changed files with 102 additions and 0 deletions
102
data_analysis/src/data_analysis/core.py
Normal file
102
data_analysis/src/data_analysis/core.py
Normal file
|
|
@ -0,0 +1,102 @@
|
||||||
|
import json
|
||||||
|
import argparse
|
||||||
|
import logging
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Dict, Any
|
||||||
|
|
||||||
|
|
||||||
|
class DataValidationError(Exception):
|
||||||
|
"""Custom exception for invalid input data."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_data_points(data: List[Dict[str, Any]], required_fields=("intensity", "background_noise")) -> None:
|
||||||
|
if not isinstance(data, list):
|
||||||
|
raise DataValidationError("Input data must be a list of dictionaries.")
|
||||||
|
for i, item in enumerate(data):
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
raise DataValidationError(f"Data item at index {i} is not a dictionary.")
|
||||||
|
for field in required_fields:
|
||||||
|
if field not in item:
|
||||||
|
raise DataValidationError(f"Missing field '{field}' in data item at index {i}.")
|
||||||
|
if not isinstance(item[field], (int, float)):
|
||||||
|
raise DataValidationError(f"Field '{field}' in item {i} must be numeric.")
|
||||||
|
|
||||||
|
|
||||||
|
def correct_flourescence(raw_data: List[Dict[str, float]]) -> List[Dict[str, float]]:
|
||||||
|
"""Korrigiert Roh-Fluoreszenzdaten durch Glättung und Basislinienabzug."""
|
||||||
|
_validate_data_points(raw_data)
|
||||||
|
df = pd.DataFrame(raw_data)
|
||||||
|
# Glättung (moving average filter)
|
||||||
|
df['intensity'] = df['intensity'].rolling(window=3, min_periods=1, center=True).mean()
|
||||||
|
# Basislinienabzug anhand des Medianwerts
|
||||||
|
baseline = df['background_noise'].median()
|
||||||
|
df['corrected_intensity'] = np.maximum(df['intensity'] - baseline, 0)
|
||||||
|
corrected = [
|
||||||
|
{
|
||||||
|
'intensity': float(row['corrected_intensity']),
|
||||||
|
'background_noise': float(row['background_noise'])
|
||||||
|
}
|
||||||
|
for _, row in df.iterrows()
|
||||||
|
]
|
||||||
|
return corrected
|
||||||
|
|
||||||
|
|
||||||
|
def substract_reference(data: List[Dict[str, float]], reference: List[Dict[str, float]]) -> List[Dict[str, float]]:
|
||||||
|
"""Subtrahiert Referenzwerte (z. B. Fremdlicht) aus den Daten."""
|
||||||
|
_validate_data_points(data)
|
||||||
|
_validate_data_points(reference)
|
||||||
|
df_data = pd.DataFrame(data)
|
||||||
|
df_ref = pd.DataFrame(reference)
|
||||||
|
# Falls Längen unterschiedlich, mit Minimalgröße arbeiten
|
||||||
|
min_len = min(len(df_data), len(df_ref))
|
||||||
|
df_data = df_data.iloc[:min_len].reset_index(drop=True)
|
||||||
|
df_ref = df_ref.iloc[:min_len].reset_index(drop=True)
|
||||||
|
df_result = df_data.copy()
|
||||||
|
df_result['intensity'] = np.maximum(df_data['intensity'] - df_ref['intensity'], 0)
|
||||||
|
return df_result.to_dict(orient='records')
|
||||||
|
|
||||||
|
|
||||||
|
def _load_json(path: Path) -> List[Dict[str, Any]]:
|
||||||
|
with path.open('r', encoding='utf-8') as f:
|
||||||
|
return json.load(f)
|
||||||
|
|
||||||
|
|
||||||
|
def _save_json(data: List[Dict[str, Any]], path: Path) -> None:
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with path.open('w', encoding='utf-8') as f:
|
||||||
|
json.dump(data, f, indent=2)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Fluoreszenzdaten-Korrektur")
|
||||||
|
parser.add_argument('--input', required=True, help='Pfad zur Eingabedatei mit Rohdaten (JSON).')
|
||||||
|
parser.add_argument('--reference', required=True, help='Pfad zur Referenzdatei (JSON).')
|
||||||
|
parser.add_argument('--output', required=True, help='Pfad zur Ausgabedatei (JSON).')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO, format='[%(levelname)s] %(message)s')
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
in_path = Path(args.input)
|
||||||
|
ref_path = Path(args.reference)
|
||||||
|
out_path = Path(args.output)
|
||||||
|
|
||||||
|
logger.info(f"Lade Rohdaten aus {in_path}...")
|
||||||
|
raw_data = _load_json(in_path)
|
||||||
|
logger.info(f"Lade Referenzdaten aus {ref_path}...")
|
||||||
|
reference_data = _load_json(ref_path)
|
||||||
|
|
||||||
|
logger.info("Korrigiere Fluoreszenzdaten...")
|
||||||
|
corrected = correct_flourescence(raw_data)
|
||||||
|
logger.info("Subtrahiere Referenzsignal...")
|
||||||
|
result = substract_reference(corrected, reference_data)
|
||||||
|
|
||||||
|
_save_json(result, out_path)
|
||||||
|
logger.info(f"Korrigierte Daten wurden nach {out_path} geschrieben.")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
Loading…
Reference in a new issue