"""Ingest a Garmin Connect data export (zip or unzipped directory) into SQLite. Usage: uv run ingest_export.py path/to/export.zip uv run ingest_export.py path/to/unzipped_export_dir/ Garmin's export contains a tree like: DI_CONNECT/ DI-Connect-Fitness/ # activity JSONs + .fit files DI-Connect-Wellness/ # daily wellness JSONs DI-Connect-Aggregator/ # rolled-up summaries DI-Connect-User/ # profile ... File names vary by account / export date. We dispatch on filename substrings and log anything unrecognized so you can tell us about new shapes. """ from __future__ import annotations import argparse import json import sqlite3 import sys import tempfile import zipfile from collections import defaultdict from datetime import datetime from pathlib import Path from typing import Any, Callable, Iterable from db import connect, set_state # --------------------------------------------------------------------------- # helpers # --------------------------------------------------------------------------- def _load_json(path: Path) -> Any: try: with path.open() as fh: return json.load(fh) except (OSError, json.JSONDecodeError) as exc: print(f" ! failed to read {path.name}: {exc}", file=sys.stderr) return None def _as_list(payload: Any) -> list[dict]: """Garmin JSON files are sometimes a list, sometimes a {"key": [...]} envelope.""" if isinstance(payload, list): return [x for x in payload if isinstance(x, dict)] if isinstance(payload, dict): for v in payload.values(): if isinstance(v, list): return [x for x in v if isinstance(x, dict)] return [payload] return [] def _dump(obj: Any) -> str: return json.dumps(obj, separators=(",", ":"), default=str) def _first(d: dict, *keys: str) -> Any: """Pull the first non-null value among candidate keys (Garmin renames fields between exports).""" for k in keys: if k in d and d[k] is not None: return d[k] return None def _date_key(d: dict) -> str | None: """Find the calendar date in a daily-stats record, normalized to ISO.""" raw = _first(d, "calendarDate", "date", "summaryDate", "statisticsStartDate") if not raw: return None if isinstance(raw, str): return raw[:10] return str(raw)[:10] # --------------------------------------------------------------------------- # handlers — one per data category # --------------------------------------------------------------------------- def handle_activities(conn: sqlite3.Connection, path: Path) -> int: payload = _load_json(path) if payload is None: return 0 # summarizedActivities format: [{"summarizedActivitiesExport": [{...}, {...}]}] if isinstance(payload, list) and payload and isinstance(payload[0], dict): if "summarizedActivitiesExport" in payload[0]: items = payload[0]["summarizedActivitiesExport"] else: items = payload elif isinstance(payload, dict): items = _as_list(payload) else: items = [] # The Garmin Takeout `summarizedActivities` export uses scaled integer units: # distance cm → m (÷100) # duration ms → s (÷1000) # elevation cm → m (÷100) # speed m/s ÷ 10 → m/s (×10) # The live API (`sync.py`) returns these in SI directly, so we only convert # when the source is this export and the raw values are present in the scaled form. def _scale(v, factor): return None if v is None else v * factor n = 0 for raw in items: aid = _first(raw, "activityId", "activityIdLocal") if aid is None: continue atype = raw.get("activityType") if isinstance(atype, dict): type_key = atype.get("typeKey") else: type_key = atype # sometimes a plain string in exports conn.execute( """ INSERT INTO activities ( activity_id, start_time_local, start_time_gmt, activity_type, activity_name, distance_m, duration_s, moving_duration_s, avg_speed_mps, max_speed_mps, avg_hr, max_hr, calories, elevation_gain_m, elevation_loss_m, training_load, aerobic_te, anaerobic_te, vo2_max, raw, fetched_at ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now')) ON CONFLICT(activity_id) DO UPDATE SET activity_name=excluded.activity_name, raw=excluded.raw, fetched_at=excluded.fetched_at """, ( aid, _first(raw, "startTimeLocal", "beginTimestamp"), _first(raw, "startTimeGmt", "startTimeGMT"), type_key, _first(raw, "activityName", "name"), _scale(_first(raw, "distance"), 0.01), _scale(_first(raw, "duration"), 0.001), _scale(_first(raw, "movingDuration"), 0.001), _scale(_first(raw, "averageSpeed", "avgSpeed"), 10.0), _scale(_first(raw, "maxSpeed"), 10.0), _first(raw, "averageHR", "avgHr"), _first(raw, "maxHR", "maxHr"), _first(raw, "calories"), _scale(_first(raw, "elevationGain"), 0.01), _scale(_first(raw, "elevationLoss"), 0.01), _first(raw, "activityTrainingLoad"), _first(raw, "aerobicTrainingEffect"), _first(raw, "anaerobicTrainingEffect"), _first(raw, "vO2MaxValue"), _dump(raw), ), ) n += 1 conn.commit() return n def handle_sleep(conn: sqlite3.Connection, path: Path) -> int: payload = _load_json(path) if payload is None: return 0 n = 0 for item in _as_list(payload): date_key = _date_key(item) if not date_key: continue conn.execute( """INSERT OR REPLACE INTO daily_sleep (calendar_date, sleep_start_gmt, sleep_end_gmt, deep_s, light_s, rem_s, awake_s, sleep_score, raw, fetched_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'))""", ( date_key, _first(item, "sleepStartTimestampGMT", "sleepStartTimeGmt"), _first(item, "sleepEndTimestampGMT", "sleepEndTimeGmt"), _first(item, "deepSleepSeconds"), _first(item, "lightSleepSeconds"), _first(item, "remSleepSeconds"), _first(item, "awakeSleepSeconds", "awakeSeconds"), _first(item, "sleepScore", "overallSleepScore", "value"), _dump(item), ), ) n += 1 conn.commit() return n def handle_steps(conn: sqlite3.Connection, path: Path) -> int: payload = _load_json(path) if payload is None: return 0 n = 0 for item in _as_list(payload): date_key = _date_key(item) if not date_key: continue conn.execute( """INSERT OR REPLACE INTO daily_steps (calendar_date, total_steps, step_goal, distance_m, raw, fetched_at) VALUES (?, ?, ?, ?, ?, datetime('now'))""", ( date_key, _first(item, "totalSteps", "steps"), _first(item, "stepGoal", "dailyStepGoal"), _first(item, "totalDistance", "distance"), _dump(item), ), ) n += 1 conn.commit() return n def handle_stress(conn: sqlite3.Connection, path: Path) -> int: payload = _load_json(path) if payload is None: return 0 n = 0 for item in _as_list(payload): date_key = _date_key(item) if not date_key: continue conn.execute( """INSERT OR REPLACE INTO daily_stress (calendar_date, avg_stress, max_stress, raw, fetched_at) VALUES (?, ?, ?, ?, datetime('now'))""", ( date_key, _first(item, "overallStressLevel", "averageStressLevel", "avgStress"), _first(item, "maxStressLevel", "maxStress"), _dump(item), ), ) n += 1 conn.commit() return n def handle_hrv(conn: sqlite3.Connection, path: Path) -> int: payload = _load_json(path) if payload is None: return 0 n = 0 for item in _as_list(payload): date_key = _date_key(item) if not date_key: continue conn.execute( """INSERT OR REPLACE INTO daily_hrv (calendar_date, weekly_avg, last_night_avg, last_night_5min, status, raw, fetched_at) VALUES (?, ?, ?, ?, ?, ?, datetime('now'))""", ( date_key, _first(item, "weeklyAvg"), _first(item, "lastNightAvg"), _first(item, "lastNight5MinHigh"), _first(item, "status"), _dump(item), ), ) n += 1 conn.commit() return n def handle_resting_hr(conn: sqlite3.Connection, path: Path) -> int: payload = _load_json(path) if payload is None: return 0 n = 0 for item in _as_list(payload): date_key = _date_key(item) if not date_key: continue rhr = _first(item, "restingHeartRate", "value") if rhr is None: continue conn.execute( """INSERT OR REPLACE INTO daily_resting_hr (calendar_date, resting_hr, raw, fetched_at) VALUES (?, ?, ?, datetime('now'))""", (date_key, rhr, _dump(item)), ) n += 1 conn.commit() return n def handle_intensity_minutes(conn: sqlite3.Connection, path: Path) -> int: payload = _load_json(path) if payload is None: return 0 n = 0 for item in _as_list(payload): date_key = _date_key(item) if not date_key: continue conn.execute( """INSERT OR REPLACE INTO daily_intensity_minutes (calendar_date, moderate_minutes, vigorous_minutes, raw, fetched_at) VALUES (?, ?, ?, ?, datetime('now'))""", ( date_key, _first(item, "moderateIntensityMinutes", "moderateValue"), _first(item, "vigorousIntensityMinutes", "vigorousValue"), _dump(item), ), ) n += 1 conn.commit() return n def handle_body_battery(conn: sqlite3.Connection, path: Path) -> int: payload = _load_json(path) if payload is None: return 0 n = 0 for item in _as_list(payload): date_key = _date_key(item) if not date_key: continue conn.execute( """INSERT OR REPLACE INTO daily_body_battery (calendar_date, charged, drained, highest, lowest, raw, fetched_at) VALUES (?, ?, ?, ?, ?, ?, datetime('now'))""", ( date_key, _first(item, "charged", "bodyBatteryChargedValue"), _first(item, "drained", "bodyBatteryDrainedValue"), _first(item, "highest", "highestBatteryLevel", "bodyBatteryHighestValue"), _first(item, "lowest", "lowestBatteryLevel", "bodyBatteryLowestValue"), _dump(item), ), ) n += 1 conn.commit() return n def handle_fit(conn: sqlite3.Connection, path: Path, export_root: Path) -> int: """Index .fit file location by activity ID. Don't parse the binary here. Garmin uses a few filename formats — handle both the classic `_.fit` and the Takeout variant `_.fit` by picking the first 8+ digit chunk. """ aid = None for chunk in path.stem.split("_"): if chunk.isdigit() and len(chunk) >= 8: aid = int(chunk) break if aid is None: # filename has no recognisable activity-id chunk return 0 # Verify the parsed number is actually an activity_id we know about. # The Garmin Takeout dump uses *upload IDs* in FIT filenames (different ID space # from activity IDs), so naive insert would create thousands of orphaned rows. # If the id isn't in `activities`, skip — link_fit_files.py handles the # by-content matching for takeout-format exports. row = conn.execute( "SELECT 1 FROM activities WHERE activity_id = ? LIMIT 1", (aid,) ).fetchone() if row is None: return 0 rel = path.relative_to(export_root) conn.execute( """INSERT OR REPLACE INTO activity_fit_files (activity_id, fit_path, indexed_at) VALUES (?, ?, datetime('now'))""", (aid, str(rel)), ) conn.commit() return 1 # --------------------------------------------------------------------------- # dispatch — pattern → handler # --------------------------------------------------------------------------- Handler = Callable[[sqlite3.Connection, Path], int] # Order matters — first match wins. Use lowercased filename substrings. DISPATCH: list[tuple[str, str, Handler]] = [ ("summarizedActivities", "activities", handle_activities), ("sleepData", "sleep", handle_sleep), ("sleep", "sleep", handle_sleep), ("UDSFile", "steps", handle_steps), # daily steps file naming varies ("step", "steps", handle_steps), ("stressLevel", "stress", handle_stress), ("stress", "stress", handle_stress), ("hrvStatus", "hrv", handle_hrv), ("hrv", "hrv", handle_hrv), ("restingHeart", "resting_hr", handle_resting_hr), ("RHR", "resting_hr", handle_resting_hr), ("intensityMinute", "intensity", handle_intensity_minutes), ("bodyBattery", "body_batt", handle_body_battery), ("BBS", "body_batt", handle_body_battery), ] def classify(name: str) -> tuple[str, Handler] | None: lower = name.lower() for needle, label, fn in DISPATCH: if needle.lower() in lower: return label, fn return None # --------------------------------------------------------------------------- # entry # --------------------------------------------------------------------------- def iter_files(root: Path) -> Iterable[Path]: for p in root.rglob("*"): if p.is_file(): yield p def main() -> None: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("source", help="Path to export.zip or unzipped export directory") parser.add_argument("--dry-run", action="store_true", help="Show what would be ingested without writing to the DB") args = parser.parse_args() src = Path(args.source).expanduser().resolve() if not src.exists(): sys.exit(f"path does not exist: {src}") cleanup_dir: tempfile.TemporaryDirectory | None = None if src.is_file() and src.suffix.lower() == ".zip": cleanup_dir = tempfile.TemporaryDirectory(prefix="garmin_export_") export_root = Path(cleanup_dir.name) print(f"unzipping {src.name} → {export_root}") with zipfile.ZipFile(src) as zf: zf.extractall(export_root) elif src.is_dir(): export_root = src else: sys.exit(f"unsupported source: {src}") conn = connect() counts: dict[str, int] = defaultdict(int) unknown: list[str] = [] fit_count = 0 for path in iter_files(export_root): if path.suffix.lower() == ".fit": if not args.dry_run: fit_count += handle_fit(conn, path, export_root) else: fit_count += 1 continue if path.suffix.lower() != ".json": continue matched = classify(path.name) if not matched: unknown.append(str(path.relative_to(export_root))) continue label, fn = matched if args.dry_run: counts[label] += 1 continue try: counts[label] += fn(conn, path) except Exception as exc: # noqa: BLE001 print(f" ! error in {label} handler for {path.name}: {exc}", file=sys.stderr) print("\n=== ingest summary ===") for label, n in sorted(counts.items()): print(f" {label:20s} {n:>6} rows" + (" (file count, dry run)" if args.dry_run else "")) print(f" fit_files {fit_count:>6}") if unknown: print(f"\n unrecognized JSON files ({len(unknown)}):") for name in unknown[:25]: print(f" {name}") if len(unknown) > 25: print(f" ... and {len(unknown) - 25} more") if not args.dry_run: set_state(conn, "last_ingest_utc", datetime.utcnow().isoformat(timespec="seconds")) set_state(conn, "last_ingest_source", str(src)) conn.commit() conn.close() if cleanup_dir: cleanup_dir.cleanup() print("\n✓ done") if __name__ == "__main__": main()