openrun/compute_time_in_zone.py

"""Precompute per-activity time-in-zone and cache it in `activity_time_in_zone`.

For each activity, prefer the linked FIT file (per-second HR) over splits
(lap-averaged HR). Skips activities already in the cache unless --force.

Usage:
    uv run compute_time_in_zone.py                # incremental
    uv run compute_time_in_zone.py --force        # recompute all
    uv run compute_time_in_zone.py --type running # restrict by activity type

The split (lap-averaged) fallback is intentionally noisy compared to FIT —
re-running with new FITs is the fix, not retuning the lap method.
"""

from __future__ import annotations

import argparse
import sys
import time
from pathlib import Path

import pandas as pd

from analysis import (
    HR_ZONES_USER,
    load_fit_records,
    time_in_zone_from_fit,
    time_in_zone_from_splits,
)
from db import connect


_ZONE_LABELS = tuple(z[0] for z in HR_ZONES_USER)


def _upsert(conn, activity_id: int, by_zone: dict[str, float], source: str) -> None:
    z = {lab: by_zone.get(lab, 0.0) for lab in _ZONE_LABELS}
    total = sum(z.values())
    conn.execute(
        """INSERT OR REPLACE INTO activity_time_in_zone
           (activity_id, z1_s, z2_s, z3_s, z4_s, z5_s, total_s, source, computed_at)
           VALUES (?, ?, ?, ?, ?, ?, ?, ?, datetime('now'))""",
        (activity_id, z["Z1"], z["Z2"], z["Z3"], z["Z4"], z["Z5"], total, source),
    )


def main() -> None:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--force", action="store_true", help="Recompute even if cached")
    parser.add_argument("--type", default=None, help="Restrict to one activity_type (e.g. running)")
    parser.add_argument("--limit", type=int, default=None, help="Stop after N activities (debugging)")
    args = parser.parse_args()

    conn = connect()
    # Pick targets: all activities, optionally filtered by type, minus already-cached
    sql = "SELECT a.activity_id, a.activity_type FROM activities a"
    params: list = []
    where: list[str] = []
    if args.type:
        where.append("a.activity_type = ?")
        params.append(args.type)
    if not args.force:
        where.append(
            "a.activity_id NOT IN (SELECT activity_id FROM activity_time_in_zone)"
        )
    if where:
        sql += " WHERE " + " AND ".join(where)
    sql += " ORDER BY a.start_time_local DESC"
    if args.limit:
        sql += f" LIMIT {args.limit}"

    targets = list(conn.execute(sql, params))
    print(f"{len(targets):,} activities to compute "
          f"({'force' if args.force else 'incremental'}"
          + (f", type={args.type}" if args.type else "")
          + ")")
    if not targets:
        return

    # Pre-load all splits in one query for the lap-fallback path
    splits_df = pd.read_sql(
        "SELECT activity_id, avg_hr, duration_s FROM activity_splits",
        conn,
    )
    splits_by_id = {aid: g for aid, g in splits_df.groupby("activity_id")}

    fit_count = lap_count = empty = errors = 0
    t0 = time.time()
    for i, row in enumerate(targets, 1):
        aid = row["activity_id"]
        has_fit = conn.execute(
            "SELECT 1 FROM activity_fit_files WHERE activity_id = ? LIMIT 1", (aid,)
        ).fetchone() is not None
        try:
            if has_fit:
                rec = load_fit_records(conn, aid)
                tiz = time_in_zone_from_fit(rec)
                if tiz:
                    _upsert(conn, aid, tiz, "fit")
                    fit_count += 1
                    if i % 25 == 0:
                        rate = i / max(time.time() - t0, 1e-6)
                        eta = (len(targets) - i) / rate
                        print(f"  {i}/{len(targets)}  fit={fit_count} lap={lap_count} empty={empty}  "
                              f"rate={rate:.1f}/s  eta={eta:.0f}s")
                    continue
            # Fallback: lap-averaged
            s = splits_by_id.get(aid)
            if s is not None and not s.empty:
                tiz = time_in_zone_from_splits(s)
                if tiz:
                    _upsert(conn, aid, tiz, "lap")
                    lap_count += 1
                    continue
            empty += 1
        except Exception as exc:  # noqa: BLE001
            errors += 1
            print(f"  ! aid={aid}: {type(exc).__name__}: {exc}", file=sys.stderr)

        if i % 50 == 0:
            conn.commit()

    conn.commit()
    print()
    print("=== summary ===")
    print(f"  from FIT  : {fit_count:,}")
    print(f"  from laps : {lap_count:,}")
    print(f"  empty     : {empty:,}  (no FIT, no useful splits)")
    print(f"  errors    : {errors:,}")
    print(f"  elapsed   : {time.time() - t0:.1f} s")


if __name__ == "__main__":
    main()