Files
openrun/examples/notebooks/exploration.ipynb
2026-05-19 08:34:22 -04:00

178 lines
5.0 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Garmin data exploration\n",
"\n",
"Run `uv run auth.py` and `uv run sync.py --full` once before using this notebook.\n",
"\n",
"In VSCode, pick the `.venv` Python interpreter at the top right of this notebook as the kernel."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"from openrun import open_conn\n",
"\n",
"conn = open_conn()\n",
"\n",
"# What tables do we have, and how many rows in each?\n",
"tables = pd.read_sql(\"SELECT name FROM sqlite_master WHERE type='table' ORDER BY name\", conn)\n",
"for t in tables['name']:\n",
" n = conn.execute(f'SELECT COUNT(*) FROM {t}').fetchone()[0]\n",
" print(f'{t:30s} {n:>6}')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Activities \u2014 load into pandas"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"activities = pd.read_sql(\n",
" \"SELECT activity_id, start_time_local, activity_type, activity_name, \"\n",
" \"distance_m, duration_s, avg_hr, max_hr, calories, elevation_gain_m, \"\n",
" \"training_load, aerobic_te, anaerobic_te \"\n",
" \"FROM activities ORDER BY start_time_local DESC\",\n",
" conn,\n",
" parse_dates=['start_time_local'],\n",
")\n",
"activities['distance_km'] = activities['distance_m'] / 1000\n",
"activities['duration_min'] = activities['duration_s'] / 60\n",
"activities['pace_min_per_km'] = activities['duration_min'] / activities['distance_km']\n",
"activities['week'] = activities['start_time_local'].dt.to_period('W').dt.start_time\n",
"activities.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"activities['activity_type'].value_counts()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Weekly running mileage"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"runs = activities[activities['activity_type'].str.contains('running', case=False, na=False)]\n",
"weekly = runs.groupby('week')['distance_km'].sum()\n",
"\n",
"fig, ax = plt.subplots(figsize=(12, 4))\n",
"weekly.plot(kind='bar', ax=ax)\n",
"ax.set_ylabel('km')\n",
"ax.set_title('Weekly running mileage')\n",
"ax.set_xlabel('')\n",
"plt.xticks(rotation=45, ha='right')\n",
"plt.tight_layout()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Sleep, stress, HRV \u2014 daily timeline"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"wellness = pd.read_sql(\n",
" \"\"\"\n",
" SELECT s.calendar_date,\n",
" s.total_steps,\n",
" sl.sleep_score,\n",
" st.avg_stress,\n",
" h.last_night_avg AS hrv,\n",
" rh.resting_hr\n",
" FROM daily_steps s\n",
" LEFT JOIN daily_sleep sl ON sl.calendar_date = s.calendar_date\n",
" LEFT JOIN daily_stress st ON st.calendar_date = s.calendar_date\n",
" LEFT JOIN daily_hrv h ON h.calendar_date = s.calendar_date\n",
" LEFT JOIN daily_resting_hr rh ON rh.calendar_date = s.calendar_date\n",
" ORDER BY s.calendar_date\n",
" \"\"\",\n",
" conn,\n",
" parse_dates=['calendar_date'],\n",
").set_index('calendar_date')\n",
"wellness.tail(14)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig, axes = plt.subplots(4, 1, figsize=(12, 8), sharex=True)\n",
"wellness['sleep_score'].plot(ax=axes[0], title='Sleep score')\n",
"wellness['resting_hr'].plot(ax=axes[1], title='Resting HR')\n",
"wellness['hrv'].plot(ax=axes[2], title='HRV (last night avg)')\n",
"wellness['avg_stress'].plot(ax=axes[3], title='Avg stress')\n",
"plt.tight_layout()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Querying the raw JSON\n",
"\n",
"Every table has a `raw` column with the full Garmin response. Use SQLite's JSON1 functions, or load and `pd.json_normalize`:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"raw_rows = pd.read_sql('SELECT activity_id, raw FROM activities LIMIT 5', conn)\n",
"expanded = pd.json_normalize([json.loads(r) for r in raw_rows['raw']])\n",
"print(f'{len(expanded.columns)} columns in the raw activity payload')\n",
"expanded.columns.tolist()[:30]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 5
}