"""Precompute per-activity time-in-zone and cache it in `activity_time_in_zone`. For each activity, prefer the linked FIT file (per-second HR) over splits (lap-averaged HR). Skips activities already in the cache unless --force. Usage: uv run compute_time_in_zone.py # incremental uv run compute_time_in_zone.py --force # recompute all uv run compute_time_in_zone.py --type running # restrict by activity type The split (lap-averaged) fallback is intentionally noisy compared to FIT — re-running with new FITs is the fix, not retuning the lap method. """ from __future__ import annotations import argparse import sys import time from pathlib import Path import pandas as pd from analysis import ( HR_ZONES_USER, load_fit_records, time_in_zone_from_fit, time_in_zone_from_splits, ) from db import connect _ZONE_LABELS = tuple(z[0] for z in HR_ZONES_USER) def _upsert(conn, activity_id: int, by_zone: dict[str, float], source: str) -> None: z = {lab: by_zone.get(lab, 0.0) for lab in _ZONE_LABELS} total = sum(z.values()) conn.execute( """INSERT OR REPLACE INTO activity_time_in_zone (activity_id, z1_s, z2_s, z3_s, z4_s, z5_s, total_s, source, computed_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, datetime('now'))""", (activity_id, z["Z1"], z["Z2"], z["Z3"], z["Z4"], z["Z5"], total, source), ) def main() -> None: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--force", action="store_true", help="Recompute even if cached") parser.add_argument("--type", default=None, help="Restrict to one activity_type (e.g. running)") parser.add_argument("--limit", type=int, default=None, help="Stop after N activities (debugging)") args = parser.parse_args() conn = connect() # Pick targets: all activities, optionally filtered by type, minus already-cached sql = "SELECT a.activity_id, a.activity_type FROM activities a" params: list = [] where: list[str] = [] if args.type: where.append("a.activity_type = ?") params.append(args.type) if not args.force: where.append( "a.activity_id NOT IN (SELECT activity_id FROM activity_time_in_zone)" ) if where: sql += " WHERE " + " AND ".join(where) sql += " ORDER BY a.start_time_local DESC" if args.limit: sql += f" LIMIT {args.limit}" targets = list(conn.execute(sql, params)) print(f"{len(targets):,} activities to compute " f"({'force' if args.force else 'incremental'}" + (f", type={args.type}" if args.type else "") + ")") if not targets: return # Pre-load all splits in one query for the lap-fallback path splits_df = pd.read_sql( "SELECT activity_id, avg_hr, duration_s FROM activity_splits", conn, ) splits_by_id = {aid: g for aid, g in splits_df.groupby("activity_id")} fit_count = lap_count = empty = errors = 0 t0 = time.time() for i, row in enumerate(targets, 1): aid = row["activity_id"] has_fit = conn.execute( "SELECT 1 FROM activity_fit_files WHERE activity_id = ? LIMIT 1", (aid,) ).fetchone() is not None try: if has_fit: rec = load_fit_records(conn, aid) tiz = time_in_zone_from_fit(rec) if tiz: _upsert(conn, aid, tiz, "fit") fit_count += 1 if i % 25 == 0: rate = i / max(time.time() - t0, 1e-6) eta = (len(targets) - i) / rate print(f" {i}/{len(targets)} fit={fit_count} lap={lap_count} empty={empty} " f"rate={rate:.1f}/s eta={eta:.0f}s") continue # Fallback: lap-averaged s = splits_by_id.get(aid) if s is not None and not s.empty: tiz = time_in_zone_from_splits(s) if tiz: _upsert(conn, aid, tiz, "lap") lap_count += 1 continue empty += 1 except Exception as exc: # noqa: BLE001 errors += 1 print(f" ! aid={aid}: {type(exc).__name__}: {exc}", file=sys.stderr) if i % 50 == 0: conn.commit() conn.commit() print() print("=== summary ===") print(f" from FIT : {fit_count:,}") print(f" from laps : {lap_count:,}") print(f" empty : {empty:,} (no FIT, no useful splits)") print(f" errors : {errors:,}") print(f" elapsed : {time.time() - t0:.1f} s") if __name__ == "__main__": main()