openrun/examples/notebooks/exploration.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Garmin data exploration\n",
    "\n",
    "Run `uv run auth.py` and `uv run sync.py --full` once before using this notebook.\n",
    "\n",
    "In VSCode, pick the `.venv` Python interpreter at the top right of this notebook as the kernel."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "from openrun import open_conn\n",
    "\n",
    "conn = open_conn()\n",
    "\n",
    "# What tables do we have, and how many rows in each?\n",
    "tables = pd.read_sql(\"SELECT name FROM sqlite_master WHERE type='table' ORDER BY name\", conn)\n",
    "for t in tables['name']:\n",
    "    n = conn.execute(f'SELECT COUNT(*) FROM {t}').fetchone()[0]\n",
    "    print(f'{t:30s} {n:>6}')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Activities \u2014 load into pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "activities = pd.read_sql(\n",
    "    \"SELECT activity_id, start_time_local, activity_type, activity_name, \"\n",
    "    \"distance_m, duration_s, avg_hr, max_hr, calories, elevation_gain_m, \"\n",
    "    \"training_load, aerobic_te, anaerobic_te \"\n",
    "    \"FROM activities ORDER BY start_time_local DESC\",\n",
    "    conn,\n",
    "    parse_dates=['start_time_local'],\n",
    ")\n",
    "activities['distance_km'] = activities['distance_m'] / 1000\n",
    "activities['duration_min'] = activities['duration_s'] / 60\n",
    "activities['pace_min_per_km'] = activities['duration_min'] / activities['distance_km']\n",
    "activities['week'] = activities['start_time_local'].dt.to_period('W').dt.start_time\n",
    "activities.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "activities['activity_type'].value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Weekly running mileage"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "runs = activities[activities['activity_type'].str.contains('running', case=False, na=False)]\n",
    "weekly = runs.groupby('week')['distance_km'].sum()\n",
    "\n",
    "fig, ax = plt.subplots(figsize=(12, 4))\n",
    "weekly.plot(kind='bar', ax=ax)\n",
    "ax.set_ylabel('km')\n",
    "ax.set_title('Weekly running mileage')\n",
    "ax.set_xlabel('')\n",
    "plt.xticks(rotation=45, ha='right')\n",
    "plt.tight_layout()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Sleep, stress, HRV \u2014 daily timeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "wellness = pd.read_sql(\n",
    "    \"\"\"\n",
    "    SELECT s.calendar_date,\n",
    "           s.total_steps,\n",
    "           sl.sleep_score,\n",
    "           st.avg_stress,\n",
    "           h.last_night_avg AS hrv,\n",
    "           rh.resting_hr\n",
    "    FROM daily_steps s\n",
    "    LEFT JOIN daily_sleep sl    ON sl.calendar_date = s.calendar_date\n",
    "    LEFT JOIN daily_stress st   ON st.calendar_date = s.calendar_date\n",
    "    LEFT JOIN daily_hrv h       ON h.calendar_date  = s.calendar_date\n",
    "    LEFT JOIN daily_resting_hr rh ON rh.calendar_date = s.calendar_date\n",
    "    ORDER BY s.calendar_date\n",
    "    \"\"\",\n",
    "    conn,\n",
    "    parse_dates=['calendar_date'],\n",
    ").set_index('calendar_date')\n",
    "wellness.tail(14)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, axes = plt.subplots(4, 1, figsize=(12, 8), sharex=True)\n",
    "wellness['sleep_score'].plot(ax=axes[0], title='Sleep score')\n",
    "wellness['resting_hr'].plot(ax=axes[1], title='Resting HR')\n",
    "wellness['hrv'].plot(ax=axes[2], title='HRV (last night avg)')\n",
    "wellness['avg_stress'].plot(ax=axes[3], title='Avg stress')\n",
    "plt.tight_layout()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Querying the raw JSON\n",
    "\n",
    "Every table has a `raw` column with the full Garmin response. Use SQLite's JSON1 functions, or load and `pd.json_normalize`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "raw_rows = pd.read_sql('SELECT activity_id, raw FROM activities LIMIT 5', conn)\n",
    "expanded = pd.json_normalize([json.loads(r) for r in raw_rows['raw']])\n",
    "print(f'{len(expanded.columns)} columns in the raw activity payload')\n",
    "expanded.columns.tolist()[:30]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}