Files
openrun/compute_time_in_zone.py
2026-05-18 12:53:24 -04:00

135 lines
4.6 KiB
Python

"""Precompute per-activity time-in-zone and cache it in `activity_time_in_zone`.
For each activity, prefer the linked FIT file (per-second HR) over splits
(lap-averaged HR). Skips activities already in the cache unless --force.
Usage:
uv run compute_time_in_zone.py # incremental
uv run compute_time_in_zone.py --force # recompute all
uv run compute_time_in_zone.py --type running # restrict by activity type
The split (lap-averaged) fallback is intentionally noisy compared to FIT —
re-running with new FITs is the fix, not retuning the lap method.
"""
from __future__ import annotations
import argparse
import sys
import time
from pathlib import Path
import pandas as pd
from analysis import (
HR_ZONES_USER,
load_fit_records,
time_in_zone_from_fit,
time_in_zone_from_splits,
)
from db import connect
_ZONE_LABELS = tuple(z[0] for z in HR_ZONES_USER)
def _upsert(conn, activity_id: int, by_zone: dict[str, float], source: str) -> None:
z = {lab: by_zone.get(lab, 0.0) for lab in _ZONE_LABELS}
total = sum(z.values())
conn.execute(
"""INSERT OR REPLACE INTO activity_time_in_zone
(activity_id, z1_s, z2_s, z3_s, z4_s, z5_s, total_s, source, computed_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, datetime('now'))""",
(activity_id, z["Z1"], z["Z2"], z["Z3"], z["Z4"], z["Z5"], total, source),
)
def main() -> None:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--force", action="store_true", help="Recompute even if cached")
parser.add_argument("--type", default=None, help="Restrict to one activity_type (e.g. running)")
parser.add_argument("--limit", type=int, default=None, help="Stop after N activities (debugging)")
args = parser.parse_args()
conn = connect()
# Pick targets: all activities, optionally filtered by type, minus already-cached
sql = "SELECT a.activity_id, a.activity_type FROM activities a"
params: list = []
where: list[str] = []
if args.type:
where.append("a.activity_type = ?")
params.append(args.type)
if not args.force:
where.append(
"a.activity_id NOT IN (SELECT activity_id FROM activity_time_in_zone)"
)
if where:
sql += " WHERE " + " AND ".join(where)
sql += " ORDER BY a.start_time_local DESC"
if args.limit:
sql += f" LIMIT {args.limit}"
targets = list(conn.execute(sql, params))
print(f"{len(targets):,} activities to compute "
f"({'force' if args.force else 'incremental'}"
+ (f", type={args.type}" if args.type else "")
+ ")")
if not targets:
return
# Pre-load all splits in one query for the lap-fallback path
splits_df = pd.read_sql(
"SELECT activity_id, avg_hr, duration_s FROM activity_splits",
conn,
)
splits_by_id = {aid: g for aid, g in splits_df.groupby("activity_id")}
fit_count = lap_count = empty = errors = 0
t0 = time.time()
for i, row in enumerate(targets, 1):
aid = row["activity_id"]
has_fit = conn.execute(
"SELECT 1 FROM activity_fit_files WHERE activity_id = ? LIMIT 1", (aid,)
).fetchone() is not None
try:
if has_fit:
rec = load_fit_records(conn, aid)
tiz = time_in_zone_from_fit(rec)
if tiz:
_upsert(conn, aid, tiz, "fit")
fit_count += 1
if i % 25 == 0:
rate = i / max(time.time() - t0, 1e-6)
eta = (len(targets) - i) / rate
print(f" {i}/{len(targets)} fit={fit_count} lap={lap_count} empty={empty} "
f"rate={rate:.1f}/s eta={eta:.0f}s")
continue
# Fallback: lap-averaged
s = splits_by_id.get(aid)
if s is not None and not s.empty:
tiz = time_in_zone_from_splits(s)
if tiz:
_upsert(conn, aid, tiz, "lap")
lap_count += 1
continue
empty += 1
except Exception as exc: # noqa: BLE001
errors += 1
print(f" ! aid={aid}: {type(exc).__name__}: {exc}", file=sys.stderr)
if i % 50 == 0:
conn.commit()
conn.commit()
print()
print("=== summary ===")
print(f" from FIT : {fit_count:,}")
print(f" from laps : {lap_count:,}")
print(f" empty : {empty:,} (no FIT, no useful splits)")
print(f" errors : {errors:,}")
print(f" elapsed : {time.time() - t0:.1f} s")
if __name__ == "__main__":
main()