Add artifact-1/src/artifact_1/main.py
This commit is contained in:
commit
0ce3032a87
1 changed files with 92 additions and 0 deletions
92
artifact-1/src/artifact_1/main.py
Normal file
92
artifact-1/src/artifact_1/main.py
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List
|
||||
import pandas as pd
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class TimingData:
|
||||
"""Data model for timing log entries."""
|
||||
expires_at_dist_hours: float
|
||||
delta_t: float
|
||||
pinned: bool
|
||||
unpinned: bool
|
||||
|
||||
@staticmethod
|
||||
def from_dict(item: Dict[str, Any]) -> 'TimingData':
|
||||
required_fields = ['expires_at_dist_hours', 'delta_t', 'pinned', 'unpinned']
|
||||
for f in required_fields:
|
||||
if f not in item:
|
||||
raise ValueError(f"Missing required field: {f}")
|
||||
return TimingData(
|
||||
expires_at_dist_hours=float(item['expires_at_dist_hours']),
|
||||
delta_t=float(item['delta_t']),
|
||||
pinned=bool(item['pinned']),
|
||||
unpinned=bool(item['unpinned'])
|
||||
)
|
||||
|
||||
|
||||
def analyze_timing_data(data: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""Analyse timing log data, calculate Δt stats, identify anomalies and near-expiry cases."""
|
||||
if not isinstance(data, list):
|
||||
raise TypeError("Input data must be a list of dictionaries.")
|
||||
|
||||
parsed = [TimingData.from_dict(d) for d in data]
|
||||
df = pd.DataFrame([d.__dict__ for d in parsed])
|
||||
|
||||
# Validation: ensure numeric columns.
|
||||
for col in ['expires_at_dist_hours', 'delta_t']:
|
||||
if not pd.api.types.is_numeric_dtype(df[col]):
|
||||
raise ValueError(f"Column {col} must be numeric.")
|
||||
|
||||
summary = {}
|
||||
|
||||
# Negative Δt detection
|
||||
negative_dt = df[df['delta_t'] < 0]
|
||||
summary['negative_dt_count'] = len(negative_dt)
|
||||
summary['negative_dt_percentage'] = float(len(negative_dt)) / len(df) * 100 if len(df) > 0 else 0.0
|
||||
|
||||
# Aggregations by pinned/unpinned
|
||||
group_stats = df.groupby(['pinned', 'unpinned'])['delta_t'].agg(['mean', 'std', 'count']).reset_index()
|
||||
summary['group_stats'] = group_stats.to_dict(orient='records')
|
||||
|
||||
# Near-expiry threshold recommendation (heuristic)
|
||||
near_expiry_df = df[df['expires_at_dist_hours'] < 48]
|
||||
share_below_24 = (near_expiry_df['expires_at_dist_hours'] < 24).mean() if len(near_expiry_df) > 0 else 0.0
|
||||
threshold = 24 if share_below_24 > 0.5 else 48
|
||||
summary['recommended_near_expiry_threshold_hours'] = threshold
|
||||
|
||||
# Overall stats
|
||||
summary['total_records'] = len(df)
|
||||
summary['mean_delta_t'] = df['delta_t'].mean()
|
||||
summary['std_delta_t'] = df['delta_t'].std()
|
||||
|
||||
return summary
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description='Analyze timing JSON data and produce summary.')
|
||||
parser.add_argument('--input', required=True, help='Pfad zur JSON-Eingabedatei mit Timing-Daten')
|
||||
parser.add_argument('--output', required=True, help='Pfad zur JSON-Datei mit Analyseergebnissen')
|
||||
args = parser.parse_args()
|
||||
|
||||
input_path = Path(args.input)
|
||||
output_path = Path(args.output)
|
||||
|
||||
if not input_path.exists():
|
||||
raise FileNotFoundError(f"Input file not found: {input_path}")
|
||||
|
||||
with input_path.open('r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
result = analyze_timing_data(data)
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with output_path.open('w', encoding='utf-8') as f:
|
||||
json.dump(result, f, indent=2, ensure_ascii=False)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Loading…
Reference in a new issue