Continuing P0-1
This commit is contained in:
631
examples/notebooks/01_overview.ipynb
Normal file
631
examples/notebooks/01_overview.ipynb
Normal file
File diff suppressed because one or more lines are too long
324
examples/notebooks/02_running.ipynb
Normal file
324
examples/notebooks/02_running.ipynb
Normal file
File diff suppressed because one or more lines are too long
193
examples/notebooks/03_recovery.ipynb
Normal file
193
examples/notebooks/03_recovery.ipynb
Normal file
@@ -0,0 +1,193 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 03 — Recovery & wellness\n",
|
||||
"\n",
|
||||
"Sleep, HRV, RHR, body battery, and how they relate to training load."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, '..')\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"from analysis import open_conn, load_wellness, joined\n",
|
||||
"\n",
|
||||
"conn = open_conn()\n",
|
||||
"w = load_wellness(conn)\n",
|
||||
"j = joined(conn)\n",
|
||||
"print(f'{len(w)} days, {w.index.min().date()} → {w.index.max().date()}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Recent 30 days — at a glance"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"recent = w.tail(30)\n",
|
||||
"fig, axes = plt.subplots(5, 1, figsize=(13, 10), sharex=True)\n",
|
||||
"recent['sleep_score'].plot(ax=axes[0], marker='o', color='C0')\n",
|
||||
"axes[0].set_title('Sleep score'); axes[0].grid(alpha=0.3)\n",
|
||||
"recent['resting_hr'].plot(ax=axes[1], marker='o', color='C1')\n",
|
||||
"axes[1].set_title('Resting HR (bpm)'); axes[1].grid(alpha=0.3)\n",
|
||||
"recent['hrv_last_night'].plot(ax=axes[2], marker='o', color='C2')\n",
|
||||
"axes[2].set_title('HRV (last night avg, ms)'); axes[2].grid(alpha=0.3)\n",
|
||||
"recent['avg_stress'].plot(ax=axes[3], marker='o', color='C3')\n",
|
||||
"axes[3].set_title('Avg stress'); axes[3].grid(alpha=0.3)\n",
|
||||
"recent[['bb_highest','bb_lowest']].plot(ax=axes[4], marker='o')\n",
|
||||
"axes[4].set_title('Body Battery (high/low)'); axes[4].grid(alpha=0.3)\n",
|
||||
"plt.tight_layout()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Sleep composition over time"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"stages = w[['deep_s','light_s','rem_s','awake_s']].fillna(0) / 3600 # hours\n",
|
||||
"stages.columns = ['Deep','Light','REM','Awake']\n",
|
||||
"fig, ax = plt.subplots(figsize=(13, 4))\n",
|
||||
"stages.tail(60).plot.area(ax=ax, alpha=0.7, color=['#1f3a93','#6dd5fa','#9b59b6','#e74c3c'])\n",
|
||||
"ax.set_ylabel('Hours')\n",
|
||||
"ax.set_title('Sleep stages — last 60 nights')\n",
|
||||
"ax.grid(alpha=0.3)\n",
|
||||
"plt.tight_layout()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Does yesterday's training affect today's HRV / RHR?\n",
|
||||
"\n",
|
||||
"Compare days *after* a run vs days *after* rest, restricted to days where you actually have HRV/RHR readings."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"by_prev = j.copy()\n",
|
||||
"by_prev['ran_yesterday'] = by_prev['distance_km_prev'].gt(0)\n",
|
||||
"by_prev['load_bucket_prev'] = pd.cut(by_prev['training_load_prev'].fillna(0),\n",
|
||||
" bins=[-0.1, 0, 50, 100, 200, 1e6],\n",
|
||||
" labels=['rest','light','moderate','hard','very hard'])\n",
|
||||
"summary = (by_prev.groupby('load_bucket_prev', observed=True)\n",
|
||||
" .agg(n_days=('resting_hr','count'),\n",
|
||||
" rhr_mean=('resting_hr','mean'),\n",
|
||||
" hrv_mean=('hrv_last_night','mean'),\n",
|
||||
" sleep_score_mean=('sleep_score','mean'),\n",
|
||||
" avg_stress=('avg_stress','mean')))\n",
|
||||
"summary.round(1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fig, axes = plt.subplots(1, 3, figsize=(13, 4), sharex=True)\n",
|
||||
"for ax, col, title in zip(axes,\n",
|
||||
" ['rhr_mean','hrv_mean','sleep_score_mean'],\n",
|
||||
" ['Resting HR (next day)','HRV (next night)','Sleep score']):\n",
|
||||
" summary[col].plot(kind='bar', ax=ax, color='C0')\n",
|
||||
" ax.set_title(title)\n",
|
||||
" ax.set_xlabel('Yesterday training load')\n",
|
||||
" ax.grid(alpha=0.3, axis='y')\n",
|
||||
"plt.tight_layout()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Correlations\n",
|
||||
"Pairwise correlations between training and recovery signals (Spearman, robust to outliers)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cols = ['training_load','distance_km','training_load_prev','distance_km_prev',\n",
|
||||
" 'sleep_score','sleep_hours','deep_pct','rem_pct',\n",
|
||||
" 'resting_hr','hrv_last_night','avg_stress','bb_highest']\n",
|
||||
"corr = j[cols].corr(method='spearman')\n",
|
||||
"\n",
|
||||
"fig, ax = plt.subplots(figsize=(10, 8))\n",
|
||||
"im = ax.imshow(corr, vmin=-1, vmax=1, cmap='RdBu_r')\n",
|
||||
"ax.set_xticks(range(len(cols))); ax.set_xticklabels(cols, rotation=45, ha='right')\n",
|
||||
"ax.set_yticks(range(len(cols))); ax.set_yticklabels(cols)\n",
|
||||
"for i in range(len(cols)):\n",
|
||||
" for k in range(len(cols)):\n",
|
||||
" v = corr.iloc[i, k]\n",
|
||||
" if pd.notna(v):\n",
|
||||
" ax.text(k, i, f'{v:.2f}', ha='center', va='center',\n",
|
||||
" color='white' if abs(v) > 0.5 else 'black', fontsize=8)\n",
|
||||
"plt.colorbar(im, ax=ax)\n",
|
||||
"plt.tight_layout()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Weekly summary: km vs. recovery indicators"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"weekly = j.resample('W-MON').agg({\n",
|
||||
" 'distance_km': 'sum',\n",
|
||||
" 'training_load': 'sum',\n",
|
||||
" 'sleep_score': 'mean',\n",
|
||||
" 'sleep_hours': 'mean',\n",
|
||||
" 'resting_hr': 'mean',\n",
|
||||
" 'hrv_last_night': 'mean',\n",
|
||||
" 'avg_stress': 'mean',\n",
|
||||
"})\n",
|
||||
"weekly.tail(12).round(1)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {"display_name": ".venv", "language": "python", "name": "python3"},
|
||||
"language_info": {"name": "python"}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
774
examples/notebooks/04_efficiency.ipynb
Normal file
774
examples/notebooks/04_efficiency.ipynb
Normal file
File diff suppressed because one or more lines are too long
843
examples/notebooks/05_intra_run.ipynb
Normal file
843
examples/notebooks/05_intra_run.ipynb
Normal file
@@ -0,0 +1,843 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 05 \u2014 Intra-run dynamics\n",
|
||||
"\n",
|
||||
"Within-run signals from lap-level splits: cardiac drift, cadence/stride, route-controlled pace, HR-zone distribution.\n",
|
||||
"\n",
|
||||
"**Data note.** This project's sync was via the Garmin live API (`sync.py`), not the official zip export, so `activity_fit_files` is empty and per-second FIT data isn't available. Everything here runs on `activity_splits` (per-mile laps, ~2 000 rows). When FIT files arrive via `ingest_export.py`, these same analyses upgrade to per-second resolution \u2014 only the loader needs to change."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, '..')\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"from analysis import (\n",
|
||||
" open_conn, load_splits, decoupling,\n",
|
||||
" assign_hr_zone, cluster_routes, haversine_km,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"conn = open_conn()\n",
|
||||
"splits = load_splits(conn) # running only by default\n",
|
||||
"print(f'{len(splits):,} splits across {splits.activity_id.nunique():,} runs, {splits.start_time_local.min().date()} \u2192 {splits.start_time_local.max().date()}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Cardiac drift (Pa:Hr decoupling)\n",
|
||||
"\n",
|
||||
"Within a single run, divide the laps into first half and second half. For each half compute the duration-weighted ratio `speed / HR` \u2014 essentially \"pace per heartbeat,\" the gold-standard aerobic-fitness index. Decoupling = how much that ratio falls between halves.\n",
|
||||
"\n",
|
||||
"$$\\text{decoupling}\\;\\% = \\left(\\frac{(\\text{speed}/\\text{HR})_{1st}}{(\\text{speed}/\\text{HR})_{2nd}} - 1\\right) \\times 100$$\n",
|
||||
"\n",
|
||||
"Friel's rule of thumb for steady aerobic runs:\n",
|
||||
"- **< 5 %** \u2014 aerobically developed\n",
|
||||
"- **5\u201310 %** \u2014 moderate drift; sustainable\n",
|
||||
"- **> 10 %** \u2014 significant drift; pace was unsustainable or it was a hot/hard day\n",
|
||||
"\n",
|
||||
"Negative values mean you ran *more efficiently* in the second half (a negative split or conservative opener)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dec = decoupling(splits, min_splits=6)\n",
|
||||
"print(f'{len(dec)} runs with \u2265 6 splits')\n",
|
||||
"dec['decoupling_pct'].describe().round(2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fig, axes = plt.subplots(1, 2, figsize=(13, 4.5))\n",
|
||||
"\n",
|
||||
"ax = axes[0]\n",
|
||||
"ax.hist(dec['decoupling_pct'], bins=30, edgecolor='white')\n",
|
||||
"for x, lab, color in [(5, 'good <5%', '#2a9d8f'), (10, 'caution 10%', '#e76f51')]:\n",
|
||||
" ax.axvline(x, color=color, ls='--', lw=1.2, label=lab)\n",
|
||||
"ax.axvline(0, color='gray', lw=0.8)\n",
|
||||
"ax.set_xlabel('decoupling (%)'); ax.set_ylabel('runs')\n",
|
||||
"ax.set_title(f'Pa:Hr decoupling distribution (n={len(dec)})')\n",
|
||||
"ax.legend()\n",
|
||||
"\n",
|
||||
"ax = axes[1]\n",
|
||||
"sc = ax.scatter(dec['start_time_local'], dec['decoupling_pct'],\n",
|
||||
" c=dec['distance_km'], cmap='viridis', alpha=0.75, s=28)\n",
|
||||
"ax.axhline(5, color='#2a9d8f', ls='--', lw=1)\n",
|
||||
"ax.axhline(10, color='#e76f51', ls='--', lw=1)\n",
|
||||
"ax.axhline(0, color='gray', lw=0.6)\n",
|
||||
"ax.set_ylabel('decoupling (%)'); ax.set_title('decoupling over time (color = distance km)')\n",
|
||||
"plt.colorbar(sc, ax=ax, label='distance (km)')\n",
|
||||
"fig.autofmt_xdate()\n",
|
||||
"fig.tight_layout()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Aerobic runs only \u2014 the clean view\n",
|
||||
"\n",
|
||||
"Decoupling is only interpretable on **steady aerobic** efforts. Filter to runs \u2265 8 km with avg HR < 165 (well below threshold for a sub-3:30 marathoner) and look at the trend. Less drift over time = better aerobic conditioning."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"aerobic = dec[(dec['distance_km'] >= 8) & (dec['avg_hr'] < 165)].copy()\n",
|
||||
"aerobic['quarter'] = aerobic['start_time_local'].dt.to_period('Q').dt.to_timestamp()\n",
|
||||
"print(f'{len(aerobic)} aerobic runs')\n",
|
||||
"\n",
|
||||
"fig, ax = plt.subplots(figsize=(11, 4.5))\n",
|
||||
"ax.scatter(aerobic['start_time_local'], aerobic['decoupling_pct'],\n",
|
||||
" c=aerobic['avg_hr'], cmap='magma_r', s=40, alpha=0.85)\n",
|
||||
"\n",
|
||||
"# rolling median (smooth trend)\n",
|
||||
"aerobic_sorted = aerobic.sort_values('start_time_local')\n",
|
||||
"rolling = aerobic_sorted.set_index('start_time_local')['decoupling_pct'].rolling('120D', min_periods=5).median()\n",
|
||||
"ax.plot(rolling.index, rolling.values, color='black', lw=2, label='120-day rolling median')\n",
|
||||
"\n",
|
||||
"ax.axhline(5, color='#2a9d8f', ls='--', lw=1)\n",
|
||||
"ax.axhline(10, color='#e76f51', ls='--', lw=1)\n",
|
||||
"ax.set_ylabel('decoupling (%)')\n",
|
||||
"ax.set_title('Cardiac drift on aerobic runs (\u22658 km, avg HR < 165)')\n",
|
||||
"ax.legend(loc='upper right')\n",
|
||||
"fig.autofmt_xdate()\n",
|
||||
"fig.tight_layout()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Year-over-year aerobic decoupling summary\n",
|
||||
"aerobic.groupby('year').agg(\n",
|
||||
" n=('decoupling_pct', 'size'),\n",
|
||||
" median_drift=('decoupling_pct', 'median'),\n",
|
||||
" mean_drift=('decoupling_pct', 'mean'),\n",
|
||||
" pct_under_5=('decoupling_pct', lambda s: (s < 5).mean() * 100),\n",
|
||||
").round(2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1b. Per-second decoupling \u2014 race-day deep dive\n",
|
||||
"\n",
|
||||
"Lap-level decoupling (above) is coarse. With FIT files linked (since the takeout-export ingest), we can read the per-second `heart_rate` and `enhanced_speed` directly and compute Friel's decoupling without the noise from aid-station stops and lap rounding.\n",
|
||||
"\n",
|
||||
"**Method:**\n",
|
||||
"1. Drop the first 5 min (warmup) and last 2 min (cooldown / finish sprint).\n",
|
||||
"2. Drop records with speed < 0.5 m/s \u2014 aid-station pauses don't drag the mean.\n",
|
||||
"3. Slice the moving time into equal-time chunks (halves or quartiles).\n",
|
||||
"4. For each chunk: `efficiency = mean(speed) / mean(HR)`.\n",
|
||||
"5. `decoupling % = (eff_first / eff_chunk \u2212 1) \u00d7 100` \u2014 positive = drift.\n",
|
||||
"\n",
|
||||
"Friel's rule: < 5% on a steady aerobic run = aerobically developed; > 10% = unsustainable pacing or fueling deficit. Race-day numbers are expected to be higher than training (you push the back half), but *how much* higher matters."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from analysis import load_fit_records, fit_decoupling, fit_rolling_efficiency\n",
|
||||
"\n",
|
||||
"race_meta = pd.read_sql('''\n",
|
||||
" SELECT a.activity_id, a.start_time_local, a.distance_m/1000 AS km, a.avg_hr\n",
|
||||
" FROM activities a JOIN activity_fit_files f USING(activity_id)\n",
|
||||
" WHERE a.distance_m >= 45000 AND a.distance_m <= 60000\n",
|
||||
" AND a.activity_type='running'\n",
|
||||
" ORDER BY a.start_time_local\n",
|
||||
"''', conn, parse_dates=['start_time_local'])\n",
|
||||
"print(f'{len(race_meta)} prior 50K-class races with FIT linked:')\n",
|
||||
"race_meta"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load all four FITs once, cache the records frames\n",
|
||||
"race_records = {}\n",
|
||||
"for _, r in race_meta.iterrows():\n",
|
||||
" aid = int(r['activity_id'])\n",
|
||||
" race_records[aid] = load_fit_records(conn, aid)\n",
|
||||
" print(f\" {r['start_time_local'].date()} aid={aid} records={len(race_records[aid]):,}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Halves and quartiles \u2014 when does the drift start?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"halves = []\n",
|
||||
"quarts = []\n",
|
||||
"for _, r in race_meta.iterrows():\n",
|
||||
" aid = int(r['activity_id'])\n",
|
||||
" h = fit_decoupling(race_records[aid], segments=2)\n",
|
||||
" h.insert(0, 'race', r['start_time_local'].date())\n",
|
||||
" halves.append(h)\n",
|
||||
" q = fit_decoupling(race_records[aid], segments=4)\n",
|
||||
" q.insert(0, 'race', r['start_time_local'].date())\n",
|
||||
" quarts.append(q)\n",
|
||||
"halves_df = pd.concat(halves, ignore_index=True)\n",
|
||||
"quarts_df = pd.concat(quarts, ignore_index=True)\n",
|
||||
"\n",
|
||||
"print('Per-race half-by-half decoupling:')\n",
|
||||
"(halves_df.pivot(index='race', columns='segment', values='decoupling_pct')\n",
|
||||
" .round(1).rename(columns={1:'Q1+Q2', 2:'Q3+Q4'}))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print('Quartile decoupling \u2014 where the drift actually starts:')\n",
|
||||
"(quarts_df.pivot(index='race', columns='segment', values='decoupling_pct')\n",
|
||||
" .round(1).rename(columns={1:'Q1',2:'Q2',3:'Q3',4:'Q4'}))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Visualise quartile decoupling \u2014 bars per race, grouped by quartile\n",
|
||||
"fig, ax = plt.subplots(figsize=(11, 4.5))\n",
|
||||
"races = sorted(quarts_df['race'].unique())\n",
|
||||
"x = np.arange(len(races))\n",
|
||||
"w = 0.2\n",
|
||||
"colors = ['#2a9d8f', '#e9c46a', '#f4a261', '#e76f51'] # cool \u2192 hot\n",
|
||||
"for i in range(1, 5):\n",
|
||||
" vals = [quarts_df[(quarts_df.race == r) & (quarts_df.segment == i)]['decoupling_pct'].iloc[0]\n",
|
||||
" for r in races]\n",
|
||||
" ax.bar(x + (i - 2.5) * w, vals, w, color=colors[i-1], label=f'Q{i}')\n",
|
||||
"ax.axhline(0, color='black', lw=0.5)\n",
|
||||
"ax.axhline(10, color='gray', ls='--', lw=1, label='Friel \"unsustainable\" threshold')\n",
|
||||
"ax.set_xticks(x)\n",
|
||||
"ax.set_xticklabels([str(r) for r in races])\n",
|
||||
"ax.set_ylabel('decoupling (%)')\n",
|
||||
"ax.set_title('Per-second decoupling by race quartile \u2014 the wall lands in Q3 every time')\n",
|
||||
"ax.legend(loc='upper left', ncol=5)\n",
|
||||
"fig.tight_layout()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Rolling efficiency curves \u2014 when does the wheels-come-off moment hit?\n",
|
||||
"\n",
|
||||
"5-minute rolling speed/HR over elapsed time. Flat = pacing matches HR. Falling curve = decoupling in progress. The y-axis is the same physical quantity Friel's method aggregates, just plotted continuously."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fig, axes = plt.subplots(len(race_meta), 1, figsize=(13, 2.5 * len(race_meta)),\n",
|
||||
" sharex=True, squeeze=False)\n",
|
||||
"axes = axes.flatten()\n",
|
||||
"for ax, (_, r) in zip(axes, race_meta.iterrows()):\n",
|
||||
" aid = int(r['activity_id'])\n",
|
||||
" rolled = fit_rolling_efficiency(race_records[aid], window_s=300)\n",
|
||||
" valid = rolled.dropna(subset=['rolling_efficiency'])\n",
|
||||
" ax.plot(valid['elapsed_min'], valid['rolling_efficiency'], color='#264653', lw=1.5)\n",
|
||||
" # Normalise against the first 30 minutes' mean to show % drop\n",
|
||||
" base = valid.loc[valid['elapsed_min'] < 30, 'rolling_efficiency'].mean()\n",
|
||||
" if base and base > 0:\n",
|
||||
" ax2 = ax.twinx()\n",
|
||||
" ax2.plot(valid['elapsed_min'], (valid['rolling_efficiency'] / base - 1) * 100,\n",
|
||||
" color='#e76f51', lw=1, alpha=0.6)\n",
|
||||
" ax2.set_ylabel('% vs first 30 min', color='#e76f51', fontsize=9)\n",
|
||||
" ax2.axhline(0, color='#e76f51', ls=':', lw=0.6, alpha=0.5)\n",
|
||||
" ax.set_ylabel('speed / HR')\n",
|
||||
" ax.set_title(f\"{r['start_time_local'].date()} \u2014 {r['km']:.1f} km, avg HR {r['avg_hr']:.0f}\",\n",
|
||||
" fontsize=10)\n",
|
||||
"axes[-1].set_xlabel('elapsed minutes')\n",
|
||||
"fig.suptitle('Rolling efficiency through each race (5-min window)', y=1.01)\n",
|
||||
"fig.tight_layout()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### HR and pace traces, side by side\n",
|
||||
"\n",
|
||||
"Same data, separated: HR (left axis, magma colour-scale) and pace (right axis, inverted so faster is up). The interesting moments are where the curves *diverge* \u2014 HR climbing while pace stays flat (drift) or HR steady while pace falls (just tired legs)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fig, axes = plt.subplots(len(race_meta), 1, figsize=(13, 2.8 * len(race_meta)),\n",
|
||||
" sharex=True, squeeze=False)\n",
|
||||
"axes = axes.flatten()\n",
|
||||
"for ax, (_, r) in zip(axes, race_meta.iterrows()):\n",
|
||||
" aid = int(r['activity_id'])\n",
|
||||
" rec = race_records[aid].dropna(subset=['heart_rate','speed_mps','elapsed_s'])\n",
|
||||
" rec = rec[rec['speed_mps'] > 0.5]\n",
|
||||
" em = rec['elapsed_s'] / 60\n",
|
||||
" rolled_hr = rec['heart_rate'].rolling(300, min_periods=30).mean()\n",
|
||||
" rolled_pace = (1 / rec['speed_mps']) * 1000 / 60\n",
|
||||
" rolled_pace = rolled_pace.rolling(300, min_periods=30).mean()\n",
|
||||
" ax.plot(em, rolled_hr, color='#9b2226', lw=1.4, label='HR (5-min avg)')\n",
|
||||
" ax.set_ylabel('HR (bpm)', color='#9b2226')\n",
|
||||
" ax.tick_params(axis='y', labelcolor='#9b2226')\n",
|
||||
" ax2 = ax.twinx()\n",
|
||||
" ax2.plot(em, rolled_pace, color='#264653', lw=1.4, label='pace (5-min avg)')\n",
|
||||
" ax2.set_ylabel('pace (min/km)', color='#264653')\n",
|
||||
" ax2.tick_params(axis='y', labelcolor='#264653')\n",
|
||||
" ax2.invert_yaxis() # faster = up\n",
|
||||
" ax.set_title(f\"{r['start_time_local'].date()} \u2014 {r['km']:.1f} km\", fontsize=10)\n",
|
||||
"axes[-1].set_xlabel('elapsed minutes')\n",
|
||||
"fig.suptitle('HR (red) and pace (dark) \u2014 divergence = decoupling', y=1.01)\n",
|
||||
"fig.tight_layout()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Per-second vs per-mile decoupling \u2014 sanity check\n",
|
||||
"\n",
|
||||
"How does the FIT-derived number compare to the lap-level decoupling we computed in \u00a71? Per-second is correctly excluding stopped time and lap rounding, so should be **lower** than the per-mile number for the same race \u2014 but the qualitative ranking should agree."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Pull the per-mile (\u00a71) value for each race and compare to per-second\n",
|
||||
"lap_dec = dec.set_index('activity_id')['decoupling_pct']\n",
|
||||
"rows = []\n",
|
||||
"for _, r in race_meta.iterrows():\n",
|
||||
" aid = int(r['activity_id'])\n",
|
||||
" ps = halves_df[(halves_df.race == r['start_time_local'].date()) & (halves_df.segment == 2)]['decoupling_pct'].iloc[0]\n",
|
||||
" lap = lap_dec.get(aid, float('nan'))\n",
|
||||
" rows.append({'race': r['start_time_local'].date(), 'km': r['km'],\n",
|
||||
" 'per_mile_decoupling_pct': round(lap, 1),\n",
|
||||
" 'per_second_decoupling_pct': round(ps, 1),\n",
|
||||
" 'delta': round(lap - ps, 1) if not pd.isna(lap) else None})\n",
|
||||
"pd.DataFrame(rows)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### What this means for the 50-mile\n",
|
||||
"\n",
|
||||
"The per-second view localises the drift: in every prior race the wheels come off around the **4-hour mark** (between Q2 and Q3). For the 50-mile that's roughly halfway through the race \u2014 exactly when fueling errors stop being recoverable.\n",
|
||||
"\n",
|
||||
"Three concrete implications:\n",
|
||||
"\n",
|
||||
"1. **Front-load fueling.** The textbook glycogen depletion curve says 90 min of running on stored glycogen, then performance falls off without external carbs. Q1 (the easy half) shouldn't be a fueling holiday \u2014 every aid station, every hour, from the start.\n",
|
||||
"2. **Recalibrate pace by HR, not by feel.** The rolling-efficiency plots show HR rising while pace falls. Setting an HR ceiling (e.g. Z2 top = 143 bpm for the long run, slightly higher for race) and *enforcing it* would flatten the Q3 collapse.\n",
|
||||
"3. **What success looks like on Sept 12.** A 50-mile race executed cleanly should look like the *first half* of these 50K curves repeated twice. If the Q3 wall reappears around hour 4\u20135, treat it as a planned aid-station break to top up calories before continuing."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Cadence and stride length\n",
|
||||
"\n",
|
||||
"At a given pace, faster runners tend to have **higher cadence and shorter stride**. Watching cadence-vs-pace and stride-vs-pace by year shows whether form is shifting independently of fitness.\n",
|
||||
"\n",
|
||||
"Garmin's `averageRunCadence` per split is already **both-legs** steps-per-minute (typical running range 150\u2013185). `strideLength` is in cm."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"form = splits.dropna(subset=['averageRunCadence', 'strideLength', 'pace_min_per_km']).copy()\n",
|
||||
"form = form[form['averageRunCadence'] > 0].copy() # zeros = walking/standing intervals\n",
|
||||
"form['cadence_spm'] = form['averageRunCadence'] # already both-legs SPM\n",
|
||||
"form['stride_m'] = form['strideLength'] / 100\n",
|
||||
"form = form[(form['cadence_spm'] >= 140) & (form['cadence_spm'] <= 200)] # drop walks/junk\n",
|
||||
"print(f'{len(form):,} clean form splits, {form.activity_id.nunique()} runs')\n",
|
||||
"\n",
|
||||
"fig, axes = plt.subplots(1, 2, figsize=(13, 5), sharex=True)\n",
|
||||
"years = sorted(form['year'].unique())\n",
|
||||
"cmap = plt.cm.viridis(np.linspace(0, 0.9, len(years)))\n",
|
||||
"\n",
|
||||
"for c, y in zip(cmap, years):\n",
|
||||
" d = form[form['year'] == y]\n",
|
||||
" axes[0].scatter(d['pace_min_per_km'], d['cadence_spm'], s=8, alpha=0.35, color=c, label=str(y))\n",
|
||||
" axes[1].scatter(d['pace_min_per_km'], d['stride_m'], s=8, alpha=0.35, color=c, label=str(y))\n",
|
||||
"\n",
|
||||
"axes[0].set_ylabel('cadence (steps/min, both legs)')\n",
|
||||
"axes[1].set_ylabel('stride length (m)')\n",
|
||||
"for ax in axes:\n",
|
||||
" ax.set_xlabel('pace (min/km)')\n",
|
||||
" ax.invert_xaxis() # faster runs to the right\n",
|
||||
"axes[0].legend(title='year', loc='lower left', fontsize=8)\n",
|
||||
"axes[0].set_title('Cadence vs pace')\n",
|
||||
"axes[1].set_title('Stride length vs pace')\n",
|
||||
"fig.tight_layout()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Form at a controlled pace\n",
|
||||
"\n",
|
||||
"Bin splits into a narrow easy-pace band (5:30\u20136:30 min/km) and look at cadence / stride / vertical metrics year-over-year. Holding pace constant strips out the obvious \"faster = higher cadence\" effect and isolates technique drift."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"band = form[(form['pace_min_per_km'] >= 5.5) & (form['pace_min_per_km'] <= 6.5)].copy()\n",
|
||||
"vert = splits.dropna(subset=['verticalOscillation', 'verticalRatio', 'groundContactTime'])\n",
|
||||
"vert = vert[(vert['pace_min_per_km'] >= 5.5) & (vert['pace_min_per_km'] <= 6.5)]\n",
|
||||
"\n",
|
||||
"summary = band.groupby('year').agg(\n",
|
||||
" n_splits=('cadence_spm', 'size'),\n",
|
||||
" cadence_med=('cadence_spm', 'median'),\n",
|
||||
" stride_med=('stride_m', 'median'),\n",
|
||||
")\n",
|
||||
"vsum = vert.groupby('year').agg(\n",
|
||||
" vert_osc_cm=('verticalOscillation', 'median'),\n",
|
||||
" vert_ratio=('verticalRatio', 'median'),\n",
|
||||
" gct_ms=('groundContactTime', 'median'),\n",
|
||||
")\n",
|
||||
"summary = summary.join(vsum).round(2)\n",
|
||||
"summary"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fig, axes = plt.subplots(1, 3, figsize=(14, 4))\n",
|
||||
"summary['cadence_med'].plot(kind='bar', ax=axes[0], color='#264653')\n",
|
||||
"axes[0].set_ylabel('cadence (spm)'); axes[0].set_title('Cadence at easy pace')\n",
|
||||
"axes[0].set_ylim(summary['cadence_med'].min() - 3, summary['cadence_med'].max() + 3)\n",
|
||||
"\n",
|
||||
"summary['stride_med'].plot(kind='bar', ax=axes[1], color='#2a9d8f')\n",
|
||||
"axes[1].set_ylabel('stride length (m)'); axes[1].set_title('Stride at easy pace')\n",
|
||||
"axes[1].set_ylim(summary['stride_med'].min() - 0.05, summary['stride_med'].max() + 0.05)\n",
|
||||
"\n",
|
||||
"if summary['vert_osc_cm'].notna().any():\n",
|
||||
" summary['vert_osc_cm'].plot(kind='bar', ax=axes[2], color='#e76f51')\n",
|
||||
" axes[2].set_ylabel('vertical oscillation (cm)'); axes[2].set_title('Vertical bounce at easy pace')\n",
|
||||
" axes[2].set_ylim(summary['vert_osc_cm'].min() - 0.5, summary['vert_osc_cm'].max() + 0.5)\n",
|
||||
"else:\n",
|
||||
" axes[2].text(0.5, 0.5, 'no vertical-osc data', ha='center', va='center', transform=axes[2].transAxes)\n",
|
||||
"\n",
|
||||
"for ax in axes:\n",
|
||||
" ax.set_xlabel('year')\n",
|
||||
"fig.suptitle('Form metrics in the 5:30\u20136:30 min/km band, year over year')\n",
|
||||
"fig.tight_layout()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Route clustering \u2014 pace controlled for terrain\n",
|
||||
"\n",
|
||||
"Raw pace year-over-year mixes terrain, weather, intent. Cluster runs by their **start coordinates** (greedy haversine, 250 m radius) and you get \"my usual routes.\" Within a cluster the route is roughly the same, so pace differences are mostly fitness, not geography."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"starts = (splits.dropna(subset=['startLatitude', 'startLongitude'])\n",
|
||||
" .groupby('activity_id')\n",
|
||||
" .agg(lat=('startLatitude', 'first'),\n",
|
||||
" lon=('startLongitude', 'first'),\n",
|
||||
" start_time=('start_time_local', 'first'),\n",
|
||||
" distance_km=('distance_m', lambda s: s.sum() / 1000),\n",
|
||||
" avg_hr=('avg_hr', 'mean'),\n",
|
||||
" avg_pace=('pace_min_per_km', 'mean')))\n",
|
||||
"starts['cluster'] = cluster_routes(starts['lat'].values, starts['lon'].values, radius_km=0.25)\n",
|
||||
"print(f'{len(starts)} runs with start coords; {(starts.cluster >= 0).sum()} clustered, {(starts.cluster == -1).sum()} singletons')\n",
|
||||
"\n",
|
||||
"top = (starts[starts.cluster >= 0]\n",
|
||||
" .groupby('cluster')\n",
|
||||
" .agg(n=('cluster', 'size'),\n",
|
||||
" lat=('lat', 'median'),\n",
|
||||
" lon=('lon', 'median'),\n",
|
||||
" first=('start_time', 'min'),\n",
|
||||
" last=('start_time', 'max'),\n",
|
||||
" med_dist=('distance_km', 'median'))\n",
|
||||
" .sort_values('n', ascending=False))\n",
|
||||
"top.head(10)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fig, ax = plt.subplots(figsize=(9, 7))\n",
|
||||
"\n",
|
||||
"# unclustered points faint\n",
|
||||
"unc = starts[starts.cluster == -1]\n",
|
||||
"ax.scatter(unc['lon'], unc['lat'], color='lightgray', s=12, alpha=0.6, label='singletons')\n",
|
||||
"\n",
|
||||
"# top-10 clusters colored\n",
|
||||
"top10 = top.head(10).index.tolist()\n",
|
||||
"cmap = plt.cm.tab10(np.linspace(0, 1, len(top10)))\n",
|
||||
"for c, cl in zip(cmap, top10):\n",
|
||||
" pts = starts[starts.cluster == cl]\n",
|
||||
" ax.scatter(pts['lon'], pts['lat'], color=c, s=40, alpha=0.8,\n",
|
||||
" label=f'route #{cl} (n={len(pts)})')\n",
|
||||
"\n",
|
||||
"ax.set_xlabel('longitude'); ax.set_ylabel('latitude')\n",
|
||||
"ax.set_title('Run start points \u2014 top 10 recurring routes')\n",
|
||||
"ax.legend(loc='best', fontsize=8)\n",
|
||||
"ax.set_aspect('equal', adjustable='datalim')\n",
|
||||
"fig.tight_layout()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Pace progression within each frequent route\n",
|
||||
"\n",
|
||||
"Now restrict to clusters with \u22655 runs and plot pace over time per route. Slopes here are much cleaner than the global pace trend because terrain is held constant."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"frequent = top[top['n'] >= 5].index.tolist()\n",
|
||||
"print(f'{len(frequent)} routes with \u22655 runs')\n",
|
||||
"\n",
|
||||
"if frequent:\n",
|
||||
" n_cols = min(3, len(frequent))\n",
|
||||
" n_rows = (len(frequent) + n_cols - 1) // n_cols\n",
|
||||
" fig, axes = plt.subplots(n_rows, n_cols, figsize=(5 * n_cols, 3.2 * n_rows),\n",
|
||||
" squeeze=False, sharey=True)\n",
|
||||
" for ax, cl in zip(axes.flat, frequent):\n",
|
||||
" d = (starts[starts.cluster == cl]\n",
|
||||
" .dropna(subset=['avg_pace'])\n",
|
||||
" .sort_values('start_time'))\n",
|
||||
" ax.scatter(d['start_time'], d['avg_pace'], s=30, alpha=0.75, color='#264653')\n",
|
||||
" if len(d) >= 3:\n",
|
||||
" # rolling median by date\n",
|
||||
" rd = d.set_index('start_time')['avg_pace'].rolling('120D', min_periods=2).median()\n",
|
||||
" ax.plot(rd.index, rd.values, color='#e76f51', lw=1.5)\n",
|
||||
" ax.invert_yaxis() # faster up\n",
|
||||
" ax.set_title(f'route #{cl} \u2014 n={len(d)}, ~{top.loc[cl, \"med_dist\"]:.1f} km')\n",
|
||||
" ax.set_ylabel('pace (min/km)')\n",
|
||||
" # blank unused axes\n",
|
||||
" for ax in axes.flat[len(frequent):]:\n",
|
||||
" ax.set_visible(False)\n",
|
||||
" fig.suptitle('Per-route pace over time (terrain held roughly constant)')\n",
|
||||
" fig.tight_layout()\n",
|
||||
"else:\n",
|
||||
" print('No clusters with \u22655 runs.')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 4. HR-zone time-in-zone (Garmin-configured zones, per-second when possible)\n",
|
||||
"\n",
|
||||
"**Zones come from Garmin's `heartRateZones.json` (training method: HR_MAX),** not estimated from observed HR. Lactate-threshold HR sits at 182 inside Z4.\n",
|
||||
"\n",
|
||||
"| Zone | range (bpm) | feel | role |\n",
|
||||
"|------|-------------|------|------|\n",
|
||||
"| Z1 | 102\u2013122 | walk / recovery | active rest |\n",
|
||||
"| Z2 | 123\u2013143 | conversational | **long-run target** |\n",
|
||||
"| Z3 | 144\u2013164 | tempo | the \"junk-miles middle\" |\n",
|
||||
"| Z4 | 165\u2013185 | threshold (LTHR=182) | hard sustained |\n",
|
||||
"| Z5 | 186\u2013209 | VO\u2082 max | intervals |\n",
|
||||
"\n",
|
||||
"For each activity, time-in-zone comes from `activity_time_in_zone` (precomputed by `compute_time_in_zone.py`):\n",
|
||||
"- **`source='fit'`** \u2014 per-second HR from the FIT file. Each record's `dt` (typically 1 s) goes into whichever zone its HR falls in. Accurate even when laps span zone boundaries.\n",
|
||||
"- **`source='lap'`** \u2014 fallback for activities without a linked FIT. The whole lap's duration is assigned to whichever zone the *lap's average* HR sits in. Smears across boundaries, biases toward middle zones.\n",
|
||||
"\n",
|
||||
"**Polarized-training rule (Seiler):** elites accumulate ~80% of weekly time in Z1+Z2 and ~20% in Z4+Z5, with little Z3."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from analysis import HR_ZONES_USER\n",
|
||||
"\n",
|
||||
"tiz = pd.read_sql('''\n",
|
||||
" SELECT t.activity_id, t.z1_s, t.z2_s, t.z3_s, t.z4_s, t.z5_s, t.total_s, t.source,\n",
|
||||
" a.start_time_local, a.activity_type, a.distance_m, a.duration_s\n",
|
||||
" FROM activity_time_in_zone t\n",
|
||||
" JOIN activities a USING(activity_id)\n",
|
||||
" WHERE a.activity_type IN ('running','trail_running')\n",
|
||||
"''', conn, parse_dates=['start_time_local'])\n",
|
||||
"tiz['week'] = tiz['start_time_local'].dt.to_period('W-MON').dt.start_time\n",
|
||||
"tiz['year'] = tiz['start_time_local'].dt.year\n",
|
||||
"\n",
|
||||
"print(f'{len(tiz)} activities with cached time-in-zone')\n",
|
||||
"print(' source breakdown:')\n",
|
||||
"print(tiz['source'].value_counts().to_string())\n",
|
||||
"print(f' fit coverage: {(tiz.source==\"fit\").mean()*100:.0f}% of running activities')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Sanity check: FIT vs lap method on the same race\n",
|
||||
"\n",
|
||||
"On the same activity, how different are the two estimates? Take the 2025-09-20 race (8 hours, 28k FIT records) and compute both, then compare. The lap method should over-weight whichever zone the typical lap average falls in (here, Z3) and under-count time spent in adjacent zones because boundary-crossing laps get rounded to one zone."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from analysis import load_fit_records, time_in_zone_from_fit, time_in_zone_from_splits\n",
|
||||
"\n",
|
||||
"race_aid = race_meta.iloc[-1]['activity_id'] # most recent 50K (2025-09-20)\n",
|
||||
"race_date = race_meta.iloc[-1]['start_time_local'].date()\n",
|
||||
"\n",
|
||||
"fit_tiz = time_in_zone_from_fit(race_records[int(race_aid)])\n",
|
||||
"race_splits = pd.read_sql(\n",
|
||||
" 'SELECT avg_hr, duration_s FROM activity_splits WHERE activity_id = ?',\n",
|
||||
" conn, params=[int(race_aid)]\n",
|
||||
")\n",
|
||||
"lap_tiz = time_in_zone_from_splits(race_splits)\n",
|
||||
"\n",
|
||||
"compare = pd.DataFrame({\n",
|
||||
" 'FIT (per-sec) min': {k: round(v / 60, 1) for k, v in fit_tiz.items()},\n",
|
||||
" 'lap (avg-HR) min': {k: round(v / 60, 1) for k, v in lap_tiz.items()},\n",
|
||||
"}).reindex(['Z1','Z2','Z3','Z4','Z5']).fillna(0)\n",
|
||||
"compare.loc['total'] = compare.sum()\n",
|
||||
"compare['delta (min)'] = (compare['FIT (per-sec) min'] - compare['lap (avg-HR) min']).round(1)\n",
|
||||
"print(f'Race {race_date} \u2014 FIT vs lap time-in-zone:')\n",
|
||||
"compare"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Weekly time-in-zone (hours) using whichever method was available per activity\n",
|
||||
"wk_cols = ['z1_s','z2_s','z3_s','z4_s','z5_s']\n",
|
||||
"weekly = tiz.groupby('week')[wk_cols].sum() / 3600\n",
|
||||
"weekly.columns = ['Z1','Z2','Z3','Z4','Z5']\n",
|
||||
"\n",
|
||||
"fig, ax = plt.subplots(figsize=(14, 4.8))\n",
|
||||
"colors = ['#264653', '#2a9d8f', '#e9c46a', '#f4a261', '#e76f51']\n",
|
||||
"ax.stackplot(weekly.index, weekly.T.values, labels=weekly.columns, colors=colors, alpha=0.92)\n",
|
||||
"ax.set_ylabel('hours / week')\n",
|
||||
"ax.set_title('Weekly running time by HR zone (Garmin-configured zones)')\n",
|
||||
"ax.legend(loc='upper left', ncol=5, fontsize=9)\n",
|
||||
"fig.autofmt_xdate()\n",
|
||||
"fig.tight_layout()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Polarized index over time\n",
|
||||
"\n",
|
||||
"Collapse to three buckets:\n",
|
||||
"- **easy** = Z1 + Z2 (HR \u2264 143)\n",
|
||||
"- **moderate \"junk\"** = Z3 (144\u2013164)\n",
|
||||
"- **hard** = Z4 + Z5 (HR \u2265 165, threshold and up)\n",
|
||||
"\n",
|
||||
"Polarized: high easy, low moderate, modest hard. Pyramidal: high easy, modest moderate, low hard. Threshold-heavy: lots of moderate."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"buckets = pd.DataFrame({\n",
|
||||
" 'easy': weekly[['Z1','Z2']].sum(axis=1),\n",
|
||||
" 'moderate': weekly['Z3'],\n",
|
||||
" 'hard': weekly[['Z4','Z5']].sum(axis=1),\n",
|
||||
"})\n",
|
||||
"totals = buckets.sum(axis=1)\n",
|
||||
"pct = buckets.div(totals.replace(0, np.nan), axis=0) * 100\n",
|
||||
"\n",
|
||||
"fig, axes = plt.subplots(2, 1, figsize=(14, 7), sharex=True)\n",
|
||||
"\n",
|
||||
"ax = axes[0]\n",
|
||||
"ax.stackplot(pct.index, pct[['easy','moderate','hard']].T.values,\n",
|
||||
" labels=['easy (Z1+Z2)', 'moderate (Z3)', 'hard (Z4+Z5)'],\n",
|
||||
" colors=['#2a9d8f', '#e9c46a', '#e76f51'], alpha=0.9)\n",
|
||||
"ax.axhline(80, color='black', ls='--', lw=0.8, label='80% easy target')\n",
|
||||
"ax.set_ylabel('% of weekly time'); ax.set_ylim(0, 100)\n",
|
||||
"ax.set_title('Polarized-training split (weekly)')\n",
|
||||
"ax.legend(loc='lower left', ncol=4, fontsize=9)\n",
|
||||
"\n",
|
||||
"ax = axes[1]\n",
|
||||
"rolling_pct = pct.rolling(4, min_periods=2).mean()\n",
|
||||
"for col, c in [('easy','#2a9d8f'), ('moderate','#e9c46a'), ('hard','#e76f51')]:\n",
|
||||
" ax.plot(rolling_pct.index, rolling_pct[col], color=c, lw=2, label=col)\n",
|
||||
"ax.axhline(80, color='#2a9d8f', ls='--', lw=0.8)\n",
|
||||
"ax.axhline(20, color='#e76f51', ls='--', lw=0.8)\n",
|
||||
"ax.set_ylabel('% (4-week rolling mean)')\n",
|
||||
"ax.legend(loc='best', fontsize=9)\n",
|
||||
"fig.autofmt_xdate()\n",
|
||||
"fig.tight_layout()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Yearly summary + FIT-method coverage\n",
|
||||
"\n",
|
||||
"`fit_coverage_%` shows what fraction of each year's activities had a linked FIT (and therefore got per-second zones). 2026's lower coverage reflects activities that synced via the live API but aren't in the takeout dump."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"yearly_hours = (tiz.groupby('year')[wk_cols].sum() / 3600).round(1)\n",
|
||||
"yearly_hours.columns = ['Z1','Z2','Z3','Z4','Z5']\n",
|
||||
"yearly_pct = yearly_hours.div(yearly_hours.sum(axis=1), axis=0) * 100\n",
|
||||
"\n",
|
||||
"out = pd.concat({'hours': yearly_hours, '%': yearly_pct.round(1)}, axis=1)\n",
|
||||
"out['easy_pct'] = (yearly_pct[['Z1','Z2']].sum(axis=1)).round(1)\n",
|
||||
"out['hard_pct'] = (yearly_pct[['Z4','Z5']].sum(axis=1)).round(1)\n",
|
||||
"fit_coverage = tiz.groupby('year').apply(\n",
|
||||
" lambda g: (g['source']=='fit').mean() * 100, include_groups=False\n",
|
||||
").round(0)\n",
|
||||
"out['fit_coverage_%'] = fit_coverage\n",
|
||||
"out"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Race-build vs base-period zone distribution\n",
|
||||
"\n",
|
||||
"Compare what training looked like in the 12 weeks before each prior 50K race vs the rest of the year. A serious build should shift time into Z2 (long aerobic) and Z4 (threshold/tempo) and away from Z3."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"race_dates = race_meta['start_time_local']\n",
|
||||
"tiz['phase'] = 'base'\n",
|
||||
"for rd in race_dates:\n",
|
||||
" build_start = rd - pd.Timedelta(weeks=12)\n",
|
||||
" mask = (tiz['start_time_local'] >= build_start) & (tiz['start_time_local'] < rd)\n",
|
||||
" tiz.loc[mask, 'phase'] = f'build {rd.year}'\n",
|
||||
"\n",
|
||||
"phase_hours = (tiz.groupby('phase')[wk_cols].sum() / 3600).round(1)\n",
|
||||
"phase_hours.columns = ['Z1','Z2','Z3','Z4','Z5']\n",
|
||||
"phase_pct = (phase_hours.div(phase_hours.sum(axis=1), axis=0) * 100).round(1)\n",
|
||||
"phase_pct['easy_Z1+Z2'] = (phase_pct['Z1'] + phase_pct['Z2']).round(1)\n",
|
||||
"phase_pct['junk_Z3'] = phase_pct['Z3']\n",
|
||||
"phase_pct['hard_Z4+Z5'] = (phase_pct['Z4'] + phase_pct['Z5']).round(1)\n",
|
||||
"phase_pct[['easy_Z1+Z2','junk_Z3','hard_Z4+Z5']].sort_index()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## What's left when FIT files are ingested\n",
|
||||
"\n",
|
||||
"All four sections now use per-second FIT data where it's linked (349 of 378 activities, 92%). Remaining lap-only activities are mostly old multi-sport / triathlon legs that no FIT was uploaded for. Useful follow-ups:\n",
|
||||
"\n",
|
||||
"- **Cadence stability** \u2014 plot cadence over elapsed time within a long run; quantify the drop in the final 15 %.\n",
|
||||
"- **GPS polylines for route clustering** \u2014 current \u00a73 uses start coordinates only; with full FIT GPS tracks, match routes by Hausdorff distance (more accurate than start-only).\n",
|
||||
"- **Decoupling vs fueling protocol** \u2014 once the user logs even informal fueling notes for a few long runs, regress decoupling against carb intake."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
589
examples/notebooks/06_race_plan.ipynb
Normal file
589
examples/notebooks/06_race_plan.ipynb
Normal file
@@ -0,0 +1,589 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 06 \u2014 Race plan & tracker\n",
|
||||
"\n",
|
||||
"**Three-race progression in 17 weeks:**\n",
|
||||
"\n",
|
||||
"| race | date | week | role |\n",
|
||||
"|---------------|--------------|------|---------------------------------------------|\n",
|
||||
"| **30K** | Sat 2026-06-13 | wk 4 | hard long run; race-day fuel/kit rehearsal |\n",
|
||||
"| **50K** | Sat 2026-07-25 | wk 10 | peak-fitness tune-up; race-pace calibration |\n",
|
||||
"| **50 mile** | Sat 2026-09-12 | wk 17 | A-race |\n",
|
||||
"\n",
|
||||
"Plan philosophy: the two earlier races *are* the tune-ups \u2014 no separate dress rehearsals needed. The 50K does double-duty as the biggest pre-race effort, leaving 7 weeks for one final back-to-back peak, taper, and race.\n",
|
||||
"\n",
|
||||
"Built on Ethan's proven 2023\u20132025 50K formula (~22 km/wk mean, ~29 km longest training run) scaled up for the 50-mile. Recorded Garmin volume **does not** include hikes, strength, or unrecorded efforts \u2014 adherence numbers below are a floor, not a ceiling."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys; sys.path.insert(0, '..')\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"from analysis import open_conn, load_activities\n",
|
||||
"\n",
|
||||
"PLAN_START = pd.Timestamp('2026-05-18')\n",
|
||||
"RACES = {\n",
|
||||
" 'wk 4 \u2014 30K': pd.Timestamp('2026-06-13'),\n",
|
||||
" 'wk 10 \u2014 50K': pd.Timestamp('2026-07-25'),\n",
|
||||
" 'wk 17 \u2014 50 MILE': pd.Timestamp('2026-09-12'),\n",
|
||||
"}\n",
|
||||
"RACE_DATE = RACES['wk 17 \u2014 50 MILE']\n",
|
||||
"TODAY = pd.Timestamp.today().normalize()\n",
|
||||
"\n",
|
||||
"_rows = [\n",
|
||||
" # phase, kind, long_km, week_km, notes\n",
|
||||
" ('P1: base', 'build', 22, 30, 'rebuild frequency; long-run HR \u2264 145'),\n",
|
||||
" ('P1: base', 'build', 26, 38, 'add 1 trail/vert run; practice fueling'),\n",
|
||||
" ('P1: base', 'pre-race ease', 22, 35, 'short shakeouts late-week, legs fresh for Sat'),\n",
|
||||
" ('P1: 30K', '30K RACE', 30, 40, 'aerobic effort, race-day fuel + kit rehearsal'),\n",
|
||||
" ('P2: build', 'recovery', 18, 28, 'one week easy; trail/strength fine, no hard runs'),\n",
|
||||
" ('P2: build', 'build', 28, 45, ''),\n",
|
||||
" ('P2: build', 'build', 32, 55, 'fueling: 60\u201390 g carb/hr non-negotiable on long'),\n",
|
||||
" ('P2: build', 'peak before 50K', 35, 60, 'optional B2B: 28 Sat + 15 Sun'),\n",
|
||||
" ('P2: build', 'pre-race taper', 22, 42, 'cut volume ~30%, keep frequency'),\n",
|
||||
" ('P3: 50K', '50K RACE', 52, 60, 'controlled effort; this is the calibration run'),\n",
|
||||
" ('P4: recover', 'deep recovery', 15, 30, 'no hard efforts; walk-jog week'),\n",
|
||||
" ('P4: recover', 'easy rebuild', 25, 50, 'all runs by feel, HR < 150'),\n",
|
||||
" ('P5: peak', 'build', 35, 70, ''),\n",
|
||||
" ('P5: peak', 'B2B peak', 40, 80, 'BACK-TO-BACK: 40 km Sat + 22 km Sun, full race kit. The single most important week.'),\n",
|
||||
" ('P6: taper', 'taper', 28, 55, ''),\n",
|
||||
" ('P6: taper', 'deep taper', 18, 35, 'last meaningful long run'),\n",
|
||||
" ('P6: 50 MILE', '50 MILE RACE', 80, 90, 'shakeouts 5\u20136 km early-week; RACE Sat'),\n",
|
||||
"]\n",
|
||||
"PLAN = pd.DataFrame(_rows, columns=['phase','kind','long_run_km','weekly_km','notes'])\n",
|
||||
"PLAN.index = pd.date_range(PLAN_START, periods=len(PLAN), freq='W-MON')\n",
|
||||
"PLAN.index.name = 'week_start'\n",
|
||||
"PLAN['week_num'] = range(1, len(PLAN) + 1)\n",
|
||||
"PLAN['date_range'] = [f\"{d.strftime('%b %d')}\u2013{(d + pd.Timedelta(days=6)).strftime('%b %d')}\" for d in PLAN.index]\n",
|
||||
"PLAN[['week_num','date_range','phase','kind','long_run_km','weekly_km','notes']]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Plan vs actual (tracker)\n",
|
||||
"\n",
|
||||
"Coloured `status` column scores each elapsed week against planned km:\n",
|
||||
"\n",
|
||||
"- **on track** \u2014 85\u2013115% of plan\n",
|
||||
"- **over** \u2014 > 115% *(usually fine; watch fatigue if two in a row)*\n",
|
||||
"- **under** \u2014 50\u201385% *(recoverable)*\n",
|
||||
"- **missed** \u2014 < 50% *(adjust the plan; don't try to make it up next week)*\n",
|
||||
"- **\u2014** \u2014 future week, not scored\n",
|
||||
"\n",
|
||||
"Race weeks (4 / 10 / 17) are bolded."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"conn = open_conn()\n",
|
||||
"acts = load_activities(conn)\n",
|
||||
"runs = acts[acts['activity_type'].isin(['running','trail_running'])].copy()\n",
|
||||
"runs['week_start'] = runs['start_time_local'].dt.to_period('W-MON').dt.start_time\n",
|
||||
"\n",
|
||||
"weekly_actual = runs.groupby('week_start').agg(\n",
|
||||
" actual_km=('distance_km', 'sum'),\n",
|
||||
" longest_run_km=('distance_km', 'max'),\n",
|
||||
" n_runs=('activity_id', 'size'),\n",
|
||||
").round(1)\n",
|
||||
"\n",
|
||||
"tracker = PLAN.join(weekly_actual, how='left')\n",
|
||||
"tracker['actual_km'] = tracker['actual_km'].fillna(0)\n",
|
||||
"tracker['longest_run_km'] = tracker['longest_run_km'].fillna(0)\n",
|
||||
"tracker['n_runs'] = tracker['n_runs'].fillna(0).astype(int)\n",
|
||||
"tracker['weekly_delta_km'] = (tracker['actual_km'] - tracker['weekly_km']).round(1)\n",
|
||||
"tracker['long_run_delta_km'] = (tracker['longest_run_km'] - tracker['long_run_km']).round(1)\n",
|
||||
"\n",
|
||||
"elapsed_mask = tracker.index <= TODAY\n",
|
||||
"ratio = tracker['actual_km'] / tracker['weekly_km']\n",
|
||||
"status = pd.Series('\u2014', index=tracker.index)\n",
|
||||
"status[elapsed_mask & (ratio >= 0.85) & (ratio < 1.15)] = 'on track'\n",
|
||||
"status[elapsed_mask & (ratio >= 1.15)] = 'over'\n",
|
||||
"status[elapsed_mask & (ratio >= 0.50) & (ratio < 0.85)] = 'under'\n",
|
||||
"status[elapsed_mask & (ratio < 0.50)] = 'missed'\n",
|
||||
"tracker['status'] = status\n",
|
||||
"tracker['is_race'] = tracker['kind'].str.contains('RACE', na=False)\n",
|
||||
"\n",
|
||||
"view = tracker[['week_num', 'date_range', 'phase', 'kind',\n",
|
||||
" 'long_run_km', 'longest_run_km', 'long_run_delta_km',\n",
|
||||
" 'weekly_km', 'actual_km', 'weekly_delta_km',\n",
|
||||
" 'n_runs', 'status', 'notes']]\n",
|
||||
"\n",
|
||||
"_status_colors = {\n",
|
||||
" 'on track': 'background-color:#a8dadc',\n",
|
||||
" 'over': 'background-color:#fff3b0',\n",
|
||||
" 'under': 'background-color:#f4a261',\n",
|
||||
" 'missed': 'background-color:#e76f51;color:white',\n",
|
||||
" '\u2014': 'color:#bbb',\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"def _color_status(v):\n",
|
||||
" return _status_colors.get(v, '')\n",
|
||||
"\n",
|
||||
"def _bold_race(row):\n",
|
||||
" return ['font-weight:bold' if tracker.loc[row.name, 'is_race'] else ''] * len(row)\n",
|
||||
"\n",
|
||||
"(view.style\n",
|
||||
" .map(_color_status, subset=['status'])\n",
|
||||
" .apply(_bold_race, axis=1))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Where are we?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"elapsed = tracker[tracker.index <= TODAY]\n",
|
||||
"weeks_done = len(elapsed)\n",
|
||||
"weeks_left = len(tracker) - weeks_done\n",
|
||||
"\n",
|
||||
"print(f'Today: {TODAY.date()} Race day: {RACE_DATE.date()} ({(RACE_DATE - TODAY).days} days / ~{(RACE_DATE - TODAY).days // 7} weeks to A-race)')\n",
|
||||
"print(f'Plan progress: {weeks_done}/{len(tracker)} weeks elapsed, {weeks_left} remaining')\n",
|
||||
"print()\n",
|
||||
"print('Upcoming races:')\n",
|
||||
"for label, d in RACES.items():\n",
|
||||
" days = (d - TODAY).days\n",
|
||||
" marker = '\u2713 done' if days < 0 else f'in {days} d'\n",
|
||||
" print(f' {label:25s} {d.date()} ({marker})')\n",
|
||||
"\n",
|
||||
"print()\n",
|
||||
"if weeks_done == 0:\n",
|
||||
" nxt = tracker.iloc[0]\n",
|
||||
" print(f'Plan starts {tracker.index[0].date()} \u2014 first week:')\n",
|
||||
" print(f' wk 1 ({nxt.date_range}): {nxt.phase} \u2014 {nxt.kind}')\n",
|
||||
" print(f' target {nxt.weekly_km} km, long run {nxt.long_run_km} km')\n",
|
||||
" if nxt.notes: print(f' note: {nxt.notes}')\n",
|
||||
"else:\n",
|
||||
" cur = elapsed.iloc[-1]\n",
|
||||
" print(f'Current week (wk {int(cur.week_num)}, {cur.date_range}):')\n",
|
||||
" print(f' phase: {cur.phase} \u2014 {cur.kind}')\n",
|
||||
" print(f' target: {cur.weekly_km} km, long run {cur.long_run_km} km')\n",
|
||||
" print(f' actual: {cur.actual_km} km, longest {cur.longest_run_km} km ({cur.n_runs} runs)')\n",
|
||||
" print(f' status: {cur.status}')\n",
|
||||
" if cur.notes: print(f' note: {cur.notes}')\n",
|
||||
" if weeks_left > 0:\n",
|
||||
" nxt = tracker.iloc[weeks_done]\n",
|
||||
" print()\n",
|
||||
" print(f'Next up (wk {int(nxt.week_num)}, {nxt.date_range}):')\n",
|
||||
" print(f' {nxt.phase} \u2014 {nxt.kind}: {nxt.weekly_km} km, long {nxt.long_run_km} km')\n",
|
||||
" if nxt.notes: print(f' note: {nxt.notes}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Weekly volume \u2014 planned vs actual"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fig, ax = plt.subplots(figsize=(15, 5.5))\n",
|
||||
"x = np.arange(len(tracker))\n",
|
||||
"w = 0.4\n",
|
||||
"ax.bar(x - w/2, tracker['weekly_km'], width=w, color='#264653', label='planned', alpha=0.9)\n",
|
||||
"\n",
|
||||
"_bar_colors = {\n",
|
||||
" 'on track': '#2a9d8f',\n",
|
||||
" 'over': '#e9c46a',\n",
|
||||
" 'under': '#f4a261',\n",
|
||||
" 'missed': '#9b2226',\n",
|
||||
" '\u2014': '#dcdcdc',\n",
|
||||
"}\n",
|
||||
"actual_colors = [_bar_colors.get(s, '#dcdcdc') for s in tracker['status']]\n",
|
||||
"ax.bar(x + w/2, tracker['actual_km'], width=w, color=actual_colors, label='actual', alpha=0.95)\n",
|
||||
"\n",
|
||||
"max_y = max(tracker['weekly_km'].max(), tracker['actual_km'].max()) * 1.22\n",
|
||||
"ax.set_ylim(0, max_y)\n",
|
||||
"\n",
|
||||
"# Phase shading + labels\n",
|
||||
"phase_groups = tracker.reset_index().groupby('phase', sort=False)\n",
|
||||
"for p, g in phase_groups:\n",
|
||||
" i0, i1 = int(g.index.min()), int(g.index.max())\n",
|
||||
" ax.axvspan(i0 - 0.5, i1 + 0.5, color='gray', alpha=0.05)\n",
|
||||
" ax.text((i0 + i1) / 2, max_y * 0.96, p, ha='center', fontsize=8.5, color='#444')\n",
|
||||
"\n",
|
||||
"# Race markers \u2014 star above the actual bar\n",
|
||||
"race_x = np.where(tracker['is_race'].to_numpy())[0]\n",
|
||||
"for rx in race_x:\n",
|
||||
" ax.scatter([rx], [max_y * 0.88], marker='*', s=220, color='#d62828', zorder=10)\n",
|
||||
" ax.text(rx, max_y * 0.82, tracker['kind'].iloc[rx].replace(' RACE',''),\n",
|
||||
" ha='center', fontsize=8.5, color='#d62828', fontweight='bold')\n",
|
||||
"\n",
|
||||
"today_x = sum(tracker.index <= TODAY) - 0.5\n",
|
||||
"if -0.5 <= today_x <= len(tracker) - 0.5:\n",
|
||||
" ax.axvline(today_x, color='red', ls=':', lw=1.5, label='today')\n",
|
||||
"\n",
|
||||
"ax.set_xticks(x)\n",
|
||||
"ax.set_xticklabels([f'wk{n}' for n in tracker['week_num']], fontsize=8)\n",
|
||||
"ax.set_ylabel('km / week')\n",
|
||||
"ax.set_title('Weekly volume')\n",
|
||||
"ax.legend(loc='upper left')\n",
|
||||
"fig.tight_layout()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Long-run progression\n",
|
||||
"\n",
|
||||
"Race weeks (red stars) replace the planned long run with the race itself. Note the staircase: 22 \u2192 26 \u2192 22 (pre-race ease) \u2192 **30K** \u2192 recover \u2192 28 \u2192 32 \u2192 35 \u2192 22 (pre-race ease) \u2192 **50K** \u2192 recover \u2192 25 \u2192 35 \u2192 **40 (B2B peak)** \u2192 28 \u2192 18 \u2192 **50 mile**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fig, ax = plt.subplots(figsize=(15, 4.5))\n",
|
||||
"x = np.arange(len(tracker))\n",
|
||||
"ax.plot(x, tracker['long_run_km'], 'o-', color='#264653', lw=2, label='planned long run')\n",
|
||||
"\n",
|
||||
"el = np.asarray(tracker.index <= TODAY)\n",
|
||||
"if el.any():\n",
|
||||
" ax.scatter(x[el], tracker.loc[el, 'longest_run_km'].to_numpy(),\n",
|
||||
" s=80, color='#e76f51', zorder=5, label='actual longest of week')\n",
|
||||
"\n",
|
||||
"race_x = np.where(tracker['is_race'].to_numpy())[0]\n",
|
||||
"ax.scatter(race_x, tracker['long_run_km'].iloc[race_x].to_numpy(),\n",
|
||||
" marker='*', s=280, color='#d62828', zorder=10, label='race')\n",
|
||||
"\n",
|
||||
"today_x = sum(tracker.index <= TODAY) - 0.5\n",
|
||||
"if -0.5 <= today_x <= len(tracker) - 0.5:\n",
|
||||
" ax.axvline(today_x, color='red', ls=':', lw=1.5)\n",
|
||||
"\n",
|
||||
"ax.set_xticks(x)\n",
|
||||
"ax.set_xticklabels([f'wk{n}' for n in tracker['week_num']], fontsize=8)\n",
|
||||
"ax.set_ylabel('km')\n",
|
||||
"ax.set_title('Long-run progression \u2014 planned, actual, races')\n",
|
||||
"ax.legend(loc='upper left')\n",
|
||||
"fig.tight_layout()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Cumulative volume\n",
|
||||
"\n",
|
||||
"The forgiving lens \u2014 a missed week is recoverable as long as the cumulative line is within ~10% of plan. Chronic divergence for 3+ weeks is the signal to adjust the plan, not push harder."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tracker['cum_planned'] = tracker['weekly_km'].cumsum()\n",
|
||||
"actual_for_cum = tracker['actual_km'].where(tracker.index <= TODAY, other=np.nan)\n",
|
||||
"tracker['cum_actual'] = actual_for_cum.cumsum()\n",
|
||||
"\n",
|
||||
"fig, ax = plt.subplots(figsize=(15, 4.5))\n",
|
||||
"ax.plot(tracker.index, tracker['cum_planned'], color='#264653', lw=2, label='planned cumulative')\n",
|
||||
"ax.plot(tracker.index, tracker['cum_actual'], color='#2a9d8f', lw=2, label='actual cumulative')\n",
|
||||
"\n",
|
||||
"for label, d in RACES.items():\n",
|
||||
" if tracker.index.min() <= d <= tracker.index.max() + pd.Timedelta(days=7):\n",
|
||||
" ax.axvline(d, color='#d62828', alpha=0.5, lw=1)\n",
|
||||
" ax.text(d, ax.get_ylim()[1] * 0.02, label.split('\u2014')[1].strip(),\n",
|
||||
" rotation=90, ha='right', va='bottom', fontsize=8, color='#d62828')\n",
|
||||
"\n",
|
||||
"if PLAN_START <= TODAY <= tracker.index.max() + pd.Timedelta(days=7):\n",
|
||||
" ax.axvline(TODAY, color='red', ls=':', lw=1.5, label='today')\n",
|
||||
"\n",
|
||||
"ax.set_ylabel('cumulative km')\n",
|
||||
"ax.set_title('Cumulative volume')\n",
|
||||
"ax.legend(loc='upper left')\n",
|
||||
"fig.autofmt_xdate()\n",
|
||||
"fig.tight_layout()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Adherence summary"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"elapsed_only = tracker[tracker.index <= TODAY]\n",
|
||||
"if len(elapsed_only) == 0:\n",
|
||||
" print('Plan has not started yet \u2014 no completed weeks to score.')\n",
|
||||
"else:\n",
|
||||
" counts = (elapsed_only['status']\n",
|
||||
" .value_counts()\n",
|
||||
" .reindex(['on track', 'over', 'under', 'missed'])\n",
|
||||
" .fillna(0).astype(int))\n",
|
||||
" print('Completed weeks by status:')\n",
|
||||
" print(counts.to_string())\n",
|
||||
" total_planned = elapsed_only['weekly_km'].sum()\n",
|
||||
" total_actual = elapsed_only['actual_km'].sum()\n",
|
||||
" print()\n",
|
||||
" print(f'Planned km through today: {total_planned:.0f}')\n",
|
||||
" print(f'Actual km through today: {total_actual:.0f}')\n",
|
||||
" print(f'Recorded adherence: {total_actual / total_planned * 100:.0f}% of plan')\n",
|
||||
" print()\n",
|
||||
" print('Off-watch training (hikes, strength, unrecorded runs) is not in this number.')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Projected fitness / fatigue / form (Banister PMC)\n",
|
||||
"\n",
|
||||
"Combine historical training-load (from `activities.training_load`) with a forecast built from the plan above to project **CTL / ATL / TSB** through race day. The shape matters more than the absolute values \u2014 a clean taper should land TSB in **+10 to +25** on Sept 12.\n",
|
||||
"\n",
|
||||
"Forecasting assumption: each plan week's km are converted to daily training-load by multiplying weekly km \u00d7 the historical median TL/km from recent running, then distributing evenly Mon\u2013Sun. The Banister EWMAs (\u03c4=42 for CTL, \u03c4=7 for ATL) smooth out the day-of-week pattern anyway."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from analysis import banister, daily_training_load_series\n",
|
||||
"\n",
|
||||
"# Historical daily load (running + trail) up to today\n",
|
||||
"hist = daily_training_load_series(conn)\n",
|
||||
"\n",
|
||||
"# TL/km conversion factor \u2014 median across the last 12 months of running\n",
|
||||
"recent_acts = pd.read_sql('''\n",
|
||||
" SELECT distance_m, training_load\n",
|
||||
" FROM activities\n",
|
||||
" WHERE activity_type IN ('running','trail_running')\n",
|
||||
" AND training_load IS NOT NULL\n",
|
||||
" AND date(start_time_local) >= date('now','-12 months')\n",
|
||||
" AND distance_m >= 2000\n",
|
||||
"''', conn)\n",
|
||||
"recent_acts['tl_per_km'] = recent_acts['training_load'] / (recent_acts['distance_m'] / 1000)\n",
|
||||
"tl_per_km = recent_acts['tl_per_km'].median()\n",
|
||||
"print(f'historical TL/km (last 12 mo, median): {tl_per_km:.1f}')\n",
|
||||
"\n",
|
||||
"# Build forecast: distribute weekly load across days.\n",
|
||||
"# For race weeks: race day gets most of the km, lead-in days are tapered shakeouts.\n",
|
||||
"# For training weeks: long run Sat gets ~40%, two mid-week runs ~20% each, rest distributed.\n",
|
||||
"# Race-day TL/km is empirically lower (~7) than training (~11) \u2014 long ultras spread out load.\n",
|
||||
"RACE_DAY_TL_PER_KM = 7.0 # observed from prior 50K races (mean ~7.5)\n",
|
||||
"\n",
|
||||
"race_day_set = set(RACES.values())\n",
|
||||
"forecast_rows = []\n",
|
||||
"for week_start, row in tracker.iterrows():\n",
|
||||
" week_days = [week_start + pd.Timedelta(days=i) for i in range(7)]\n",
|
||||
" is_race_week = any(d in race_day_set for d in week_days)\n",
|
||||
" if is_race_week:\n",
|
||||
" race_d = next(d for d in week_days if d in race_day_set)\n",
|
||||
" # Race itself: long_run_km on race day at race TL/km\n",
|
||||
" race_load = row['long_run_km'] * RACE_DAY_TL_PER_KM\n",
|
||||
" # Remaining km this week (shakeouts) at training TL/km, spread across 3 days\n",
|
||||
" rem_km = max(row['weekly_km'] - row['long_run_km'], 0)\n",
|
||||
" rem_load = rem_km * tl_per_km\n",
|
||||
" for d in week_days:\n",
|
||||
" if d == race_d:\n",
|
||||
" forecast_rows.append((d, race_load))\n",
|
||||
" elif d.weekday() in (0, 2, 4): # Mon/Wed/Fri shakeouts\n",
|
||||
" forecast_rows.append((d, rem_load / 3))\n",
|
||||
" else:\n",
|
||||
" forecast_rows.append((d, 0.0))\n",
|
||||
" else:\n",
|
||||
" # Training week: long run on Sat (40%), Tue+Thu mid-runs (20% each), Mon/Wed easy (10% each)\n",
|
||||
" weights = {0:0.10, 1:0.20, 2:0.10, 3:0.20, 4:0.00, 5:0.40, 6:0.00}\n",
|
||||
" total_load = row['weekly_km'] * tl_per_km\n",
|
||||
" for d in week_days:\n",
|
||||
" forecast_rows.append((d, total_load * weights.get(d.weekday(), 0)))\n",
|
||||
"forecast = pd.Series(dict(forecast_rows))\n",
|
||||
"forecast.index.name = 'd'\n",
|
||||
"\n",
|
||||
"# Splice: historical actual until today, forecast from tomorrow onward\n",
|
||||
"combined = pd.concat([\n",
|
||||
" hist[hist.index <= TODAY],\n",
|
||||
" forecast[forecast.index > TODAY],\n",
|
||||
"]).sort_index()\n",
|
||||
"combined = combined[~combined.index.duplicated(keep='first')]\n",
|
||||
"\n",
|
||||
"pmc = banister(combined)\n",
|
||||
"print(f'PMC range: {pmc.index.min().date()} \u2192 {pmc.index.max().date()}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 7.5), sharex=True)\n",
|
||||
"\n",
|
||||
"# Fitness + fatigue\n",
|
||||
"ax1.plot(pmc.index, pmc['CTL'], color='#2a9d8f', lw=2.2, label='CTL (fitness, 42d)')\n",
|
||||
"ax1.plot(pmc.index, pmc['ATL'], color='#e76f51', lw=1.2, alpha=0.85, label='ATL (fatigue, 7d)')\n",
|
||||
"ax1.axvline(TODAY, color='red', ls=':', lw=1.5)\n",
|
||||
"ax1.text(TODAY, ax1.get_ylim()[1]*0.95, ' today ', color='red', fontsize=8, va='top')\n",
|
||||
"ax1.set_ylabel('training load')\n",
|
||||
"ax1.legend(loc='upper left')\n",
|
||||
"ax1.grid(alpha=0.3)\n",
|
||||
"ax1.set_title('Projected Performance Management Chart \u2014 historical + plan-based forecast')\n",
|
||||
"\n",
|
||||
"# Form (TSB)\n",
|
||||
"ax2.plot(pmc.index, pmc['TSB'], color='#264653', lw=1.5)\n",
|
||||
"ax2.axhspan(10, 25, color='#2a9d8f', alpha=0.12, label='race-ready (+10 to +25)')\n",
|
||||
"ax2.axhspan(-30, -10, color='#e9c46a', alpha=0.12, label='productive overload (\u221230 to \u221210)')\n",
|
||||
"ax2.axhline(0, color='gray', lw=0.6)\n",
|
||||
"ax2.axhline(-30, color='#e76f51', ls='--', lw=0.8)\n",
|
||||
"ax2.axvline(TODAY, color='red', ls=':', lw=1.5)\n",
|
||||
"\n",
|
||||
"# Annotate planned races\n",
|
||||
"for label, d in RACES.items():\n",
|
||||
" if d in pmc.index:\n",
|
||||
" tsb = pmc.loc[d, 'TSB']\n",
|
||||
" ax2.axvline(d, color='#d62828', alpha=0.55, lw=1)\n",
|
||||
" ax2.scatter([d], [tsb], color='#d62828', s=70, zorder=5)\n",
|
||||
" ax2.annotate(f\"{label.split('\u2014')[1].strip()}\\nTSB={tsb:+.0f}\",\n",
|
||||
" xy=(d, tsb), xytext=(8, 12),\n",
|
||||
" textcoords='offset points', fontsize=8.5, color='#d62828',\n",
|
||||
" fontweight='bold')\n",
|
||||
"\n",
|
||||
"ax2.set_ylabel('TSB (form)')\n",
|
||||
"ax2.legend(loc='lower left', fontsize=9)\n",
|
||||
"ax2.grid(alpha=0.3)\n",
|
||||
"fig.autofmt_xdate()\n",
|
||||
"fig.tight_layout()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Race-day TSB check\n",
|
||||
"print('Projected fitness/fatigue/form on each race day:')\n",
|
||||
"print()\n",
|
||||
"for label, d in RACES.items():\n",
|
||||
" if d not in pmc.index:\n",
|
||||
" print(f' {label}: outside PMC range'); continue\n",
|
||||
" row = pmc.loc[d]\n",
|
||||
" tsb = row['TSB']\n",
|
||||
" if tsb < -30: tag = 'severely fatigued \u2014 UNSAFE'\n",
|
||||
" elif tsb < -10: tag = 'productive overload \u2014 not a race-day state'\n",
|
||||
" elif tsb < 0: tag = 'balanced \u2014 slightly underrested'\n",
|
||||
" elif tsb < 10: tag = 'sharpening \u2014 taper short'\n",
|
||||
" elif tsb < 25: tag = 'fresh / peaked \u2014 IDEAL race-day window'\n",
|
||||
" else: tag = 'detrained \u2014 taper too long'\n",
|
||||
" print(f' {label} {d.date()}')\n",
|
||||
" print(f' CTL={row[\"CTL\"]:>5.1f} ATL={row[\"ATL\"]:>5.1f} TSB={tsb:>+5.1f} \u2192 {tag}')\n",
|
||||
" print()\n",
|
||||
"\n",
|
||||
"# Historical comparison \u2014 what TSB did prior 50K races land at?\n",
|
||||
"print('Historical comparison \u2014 TSB on prior 50K races:')\n",
|
||||
"race_history = pd.to_datetime(['2023-09-23','2024-09-21','2025-09-06','2025-09-20'])\n",
|
||||
"for rd in race_history:\n",
|
||||
" if rd in pmc.index:\n",
|
||||
" print(f' {rd.date()} CTL={pmc.loc[rd,\"CTL\"]:>5.1f} TSB={pmc.loc[rd,\"TSB\"]:+5.1f}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Reading the chart**\n",
|
||||
"\n",
|
||||
"- **CTL slope after today** is what the plan *promises*. A flat or declining CTL means the plan isn't building fitness \u2014 usually because volume isn't ramping fast enough (or you've already peaked).\n",
|
||||
"- **ATL spikes** before each race week are expected \u2014 that's the race effort hitting the 7-day window. The taper then lets ATL bleed off faster than CTL.\n",
|
||||
"- **TSB on race day** is the actionable number. If a race lands below +10, the plan's taper into that race is too short; if above +25, too long.\n",
|
||||
"- The two earlier races (30K, 50K) are mid-build B-races; **TSB at those races doesn't need to be in the +10\u201325 sweet spot** \u2014 slightly negative is fine, since they're training stimuli, not peak performances.\n",
|
||||
"- The **50-mile (wk 17)** is the A-race; TSB here is the one that matters. Adjust the wk 14\u201317 plan rows if it's outside +10 to +25."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Race-specific notes\n",
|
||||
"\n",
|
||||
"### Wk 4 \u2014 30K (June 13)\n",
|
||||
"\n",
|
||||
"- **Role:** longest pre-race long run + first dress rehearsal. Don't taper hard; this is training.\n",
|
||||
"- **Effort:** controlled aerobic, target avg HR < 155. Negative split is the win.\n",
|
||||
"- **Rehearse:** pack, shoes, nutrition cadence, salt. Whatever fails here gets fixed before the 50K.\n",
|
||||
"- **Recovery:** one easy week (wk 5) is enough.\n",
|
||||
"\n",
|
||||
"### Wk 10 \u2014 50K (July 25)\n",
|
||||
"\n",
|
||||
"- **Role:** real race, but also the calibration run for the 50-mile. Pace it at projected 50-mile effort + 5\u201310 bpm.\n",
|
||||
"- **Effort:** controlled. Goal is to finish strong, not PR.\n",
|
||||
"- **Calibration:** the average HR you can hold for this distance comfortably \u2248 your 50-mile ceiling. Note the number. Use it Sep 12.\n",
|
||||
"- **Recovery:** 2 weeks before resuming hard training (wk 11 deep recovery, wk 12 easy rebuild).\n",
|
||||
"\n",
|
||||
"### Wk 17 \u2014 50 MILE (September 12)\n",
|
||||
"\n",
|
||||
"- **Pacing rule:** the average HR from the 50K is the *ceiling*, not the target. Start 5\u201310 bpm below it.\n",
|
||||
"- **Fueling:** 60\u201390 g carb/hr from minute 30. Don't skip aid stations.\n",
|
||||
"- **Walking strategy:** walk every climb from the start. The race is won in the final 20 km by people who walked the first 20 climbs.\n",
|
||||
"- **Drop-bag essentials:** chafe-prevention, headlamp/spare batteries if late finish, dry socks at midway."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Key sessions to nail\n",
|
||||
"\n",
|
||||
"If everything else slips, these workouts move finish probability most:\n",
|
||||
"\n",
|
||||
"- **Wk 4 \u2014 30K race.** Validates fueling and kit. A messy 30K is a 50K-fix-it list.\n",
|
||||
"- **Wk 8 \u2014 peak long before 50K** (35 km, optionally B2B 28+15). Last big training stimulus before the calibration race.\n",
|
||||
"- **Wk 10 \u2014 50K race.** The calibration. Pace data here drives 50-mile pacing.\n",
|
||||
"- **Wk 14 \u2014 B2B peak (40 + 22).** The single most important week. Running on tired legs is the 50-mile race in miniature.\n",
|
||||
"- **Wk 17 \u2014 race week.** Don't add miles. Stay healthy. Show up."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
699
examples/notebooks/_build_05.py
Normal file
699
examples/notebooks/_build_05.py
Normal file
@@ -0,0 +1,699 @@
|
||||
"""One-shot builder for notebooks/05_intra_run.ipynb.
|
||||
|
||||
Run with: uv run python notebooks/_build_05.py
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def md(*lines: str) -> dict:
|
||||
return {"cell_type": "markdown", "metadata": {}, "source": _join(lines)}
|
||||
|
||||
|
||||
def code(*lines: str) -> dict:
|
||||
return {
|
||||
"cell_type": "code",
|
||||
"execution_count": None,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": _join(lines),
|
||||
}
|
||||
|
||||
|
||||
def _join(lines):
|
||||
text = "\n".join(lines)
|
||||
# Jupyter expects a list where each entry ends with \n except possibly the last.
|
||||
parts = text.split("\n")
|
||||
return [p + ("\n" if i < len(parts) - 1 else "") for i, p in enumerate(parts)]
|
||||
|
||||
|
||||
cells: list[dict] = []
|
||||
|
||||
cells.append(md(
|
||||
"# 05 — Intra-run dynamics",
|
||||
"",
|
||||
"Within-run signals from lap-level splits: cardiac drift, cadence/stride, route-controlled pace, HR-zone distribution.",
|
||||
"",
|
||||
"**Data note.** This project's sync was via the Garmin live API (`sync.py`), not the official zip export, so `activity_fit_files` is empty and per-second FIT data isn't available. Everything here runs on `activity_splits` (per-mile laps, ~2 000 rows). When FIT files arrive via `ingest_export.py`, these same analyses upgrade to per-second resolution — only the loader needs to change.",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"import sys",
|
||||
"sys.path.insert(0, '..')",
|
||||
"import numpy as np",
|
||||
"import pandas as pd",
|
||||
"import matplotlib.pyplot as plt",
|
||||
"from analysis import (",
|
||||
" open_conn, load_splits, decoupling,",
|
||||
" assign_hr_zone, cluster_routes, haversine_km,",
|
||||
")",
|
||||
"",
|
||||
"conn = open_conn()",
|
||||
"splits = load_splits(conn) # running only by default",
|
||||
"print(f'{len(splits):,} splits across {splits.activity_id.nunique():,} runs, "
|
||||
"{splits.start_time_local.min().date()} → {splits.start_time_local.max().date()}')",
|
||||
))
|
||||
|
||||
# ----- Section 1: cardiac drift -----
|
||||
cells.append(md(
|
||||
"## 1. Cardiac drift (Pa:Hr decoupling)",
|
||||
"",
|
||||
"Within a single run, divide the laps into first half and second half. For each half compute the duration-weighted ratio `speed / HR` — essentially \"pace per heartbeat,\" the gold-standard aerobic-fitness index. Decoupling = how much that ratio falls between halves.",
|
||||
"",
|
||||
"$$\\text{decoupling}\\;\\% = \\left(\\frac{(\\text{speed}/\\text{HR})_{1st}}{(\\text{speed}/\\text{HR})_{2nd}} - 1\\right) \\times 100$$",
|
||||
"",
|
||||
"Friel's rule of thumb for steady aerobic runs:",
|
||||
"- **< 5 %** — aerobically developed",
|
||||
"- **5–10 %** — moderate drift; sustainable",
|
||||
"- **> 10 %** — significant drift; pace was unsustainable or it was a hot/hard day",
|
||||
"",
|
||||
"Negative values mean you ran *more efficiently* in the second half (a negative split or conservative opener).",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"dec = decoupling(splits, min_splits=6)",
|
||||
"print(f'{len(dec)} runs with ≥ 6 splits')",
|
||||
"dec['decoupling_pct'].describe().round(2)",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"fig, axes = plt.subplots(1, 2, figsize=(13, 4.5))",
|
||||
"",
|
||||
"ax = axes[0]",
|
||||
"ax.hist(dec['decoupling_pct'], bins=30, edgecolor='white')",
|
||||
"for x, lab, color in [(5, 'good <5%', '#2a9d8f'), (10, 'caution 10%', '#e76f51')]:",
|
||||
" ax.axvline(x, color=color, ls='--', lw=1.2, label=lab)",
|
||||
"ax.axvline(0, color='gray', lw=0.8)",
|
||||
"ax.set_xlabel('decoupling (%)'); ax.set_ylabel('runs')",
|
||||
"ax.set_title(f'Pa:Hr decoupling distribution (n={len(dec)})')",
|
||||
"ax.legend()",
|
||||
"",
|
||||
"ax = axes[1]",
|
||||
"sc = ax.scatter(dec['start_time_local'], dec['decoupling_pct'],",
|
||||
" c=dec['distance_km'], cmap='viridis', alpha=0.75, s=28)",
|
||||
"ax.axhline(5, color='#2a9d8f', ls='--', lw=1)",
|
||||
"ax.axhline(10, color='#e76f51', ls='--', lw=1)",
|
||||
"ax.axhline(0, color='gray', lw=0.6)",
|
||||
"ax.set_ylabel('decoupling (%)'); ax.set_title('decoupling over time (color = distance km)')",
|
||||
"plt.colorbar(sc, ax=ax, label='distance (km)')",
|
||||
"fig.autofmt_xdate()",
|
||||
"fig.tight_layout()",
|
||||
))
|
||||
|
||||
cells.append(md(
|
||||
"### Aerobic runs only — the clean view",
|
||||
"",
|
||||
"Decoupling is only interpretable on **steady aerobic** efforts. Filter to runs ≥ 8 km with avg HR < 165 (well below threshold for a sub-3:30 marathoner) and look at the trend. Less drift over time = better aerobic conditioning.",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"aerobic = dec[(dec['distance_km'] >= 8) & (dec['avg_hr'] < 165)].copy()",
|
||||
"aerobic['quarter'] = aerobic['start_time_local'].dt.to_period('Q').dt.to_timestamp()",
|
||||
"print(f'{len(aerobic)} aerobic runs')",
|
||||
"",
|
||||
"fig, ax = plt.subplots(figsize=(11, 4.5))",
|
||||
"ax.scatter(aerobic['start_time_local'], aerobic['decoupling_pct'],",
|
||||
" c=aerobic['avg_hr'], cmap='magma_r', s=40, alpha=0.85)",
|
||||
"",
|
||||
"# rolling median (smooth trend)",
|
||||
"aerobic_sorted = aerobic.sort_values('start_time_local')",
|
||||
"rolling = aerobic_sorted.set_index('start_time_local')['decoupling_pct'].rolling('120D', min_periods=5).median()",
|
||||
"ax.plot(rolling.index, rolling.values, color='black', lw=2, label='120-day rolling median')",
|
||||
"",
|
||||
"ax.axhline(5, color='#2a9d8f', ls='--', lw=1)",
|
||||
"ax.axhline(10, color='#e76f51', ls='--', lw=1)",
|
||||
"ax.set_ylabel('decoupling (%)')",
|
||||
"ax.set_title('Cardiac drift on aerobic runs (≥8 km, avg HR < 165)')",
|
||||
"ax.legend(loc='upper right')",
|
||||
"fig.autofmt_xdate()",
|
||||
"fig.tight_layout()",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"# Year-over-year aerobic decoupling summary",
|
||||
"aerobic.groupby('year').agg(",
|
||||
" n=('decoupling_pct', 'size'),",
|
||||
" median_drift=('decoupling_pct', 'median'),",
|
||||
" mean_drift=('decoupling_pct', 'mean'),",
|
||||
" pct_under_5=('decoupling_pct', lambda s: (s < 5).mean() * 100),",
|
||||
").round(2)",
|
||||
))
|
||||
|
||||
# ----- Section 1b: per-second race-day decoupling from FIT files -----
|
||||
cells.append(md(
|
||||
"## 1b. Per-second decoupling — race-day deep dive",
|
||||
"",
|
||||
"Lap-level decoupling (above) is coarse. With FIT files linked (since the takeout-export ingest), we can read the per-second `heart_rate` and `enhanced_speed` directly and compute Friel's decoupling without the noise from aid-station stops and lap rounding.",
|
||||
"",
|
||||
"**Method:**",
|
||||
"1. Drop the first 5 min (warmup) and last 2 min (cooldown / finish sprint).",
|
||||
"2. Drop records with speed < 0.5 m/s — aid-station pauses don't drag the mean.",
|
||||
"3. Slice the moving time into equal-time chunks (halves or quartiles).",
|
||||
"4. For each chunk: `efficiency = mean(speed) / mean(HR)`.",
|
||||
"5. `decoupling % = (eff_first / eff_chunk − 1) × 100` — positive = drift.",
|
||||
"",
|
||||
"Friel's rule: < 5% on a steady aerobic run = aerobically developed; > 10% = unsustainable pacing or fueling deficit. Race-day numbers are expected to be higher than training (you push the back half), but *how much* higher matters.",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"from analysis import load_fit_records, fit_decoupling, fit_rolling_efficiency",
|
||||
"",
|
||||
"race_meta = pd.read_sql('''",
|
||||
" SELECT a.activity_id, a.start_time_local, a.distance_m/1000 AS km, a.avg_hr",
|
||||
" FROM activities a JOIN activity_fit_files f USING(activity_id)",
|
||||
" WHERE a.distance_m >= 45000 AND a.distance_m <= 60000",
|
||||
" AND a.activity_type='running'",
|
||||
" ORDER BY a.start_time_local",
|
||||
"''', conn, parse_dates=['start_time_local'])",
|
||||
"print(f'{len(race_meta)} prior 50K-class races with FIT linked:')",
|
||||
"race_meta",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"# Load all four FITs once, cache the records frames",
|
||||
"race_records = {}",
|
||||
"for _, r in race_meta.iterrows():",
|
||||
" aid = int(r['activity_id'])",
|
||||
" race_records[aid] = load_fit_records(conn, aid)",
|
||||
" print(f\" {r['start_time_local'].date()} aid={aid} records={len(race_records[aid]):,}\")",
|
||||
))
|
||||
|
||||
cells.append(md(
|
||||
"### Halves and quartiles — when does the drift start?",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"halves = []",
|
||||
"quarts = []",
|
||||
"for _, r in race_meta.iterrows():",
|
||||
" aid = int(r['activity_id'])",
|
||||
" h = fit_decoupling(race_records[aid], segments=2)",
|
||||
" h.insert(0, 'race', r['start_time_local'].date())",
|
||||
" halves.append(h)",
|
||||
" q = fit_decoupling(race_records[aid], segments=4)",
|
||||
" q.insert(0, 'race', r['start_time_local'].date())",
|
||||
" quarts.append(q)",
|
||||
"halves_df = pd.concat(halves, ignore_index=True)",
|
||||
"quarts_df = pd.concat(quarts, ignore_index=True)",
|
||||
"",
|
||||
"print('Per-race half-by-half decoupling:')",
|
||||
"(halves_df.pivot(index='race', columns='segment', values='decoupling_pct')",
|
||||
" .round(1).rename(columns={1:'Q1+Q2', 2:'Q3+Q4'}))",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"print('Quartile decoupling — where the drift actually starts:')",
|
||||
"(quarts_df.pivot(index='race', columns='segment', values='decoupling_pct')",
|
||||
" .round(1).rename(columns={1:'Q1',2:'Q2',3:'Q3',4:'Q4'}))",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"# Visualise quartile decoupling — bars per race, grouped by quartile",
|
||||
"fig, ax = plt.subplots(figsize=(11, 4.5))",
|
||||
"races = sorted(quarts_df['race'].unique())",
|
||||
"x = np.arange(len(races))",
|
||||
"w = 0.2",
|
||||
"colors = ['#2a9d8f', '#e9c46a', '#f4a261', '#e76f51'] # cool → hot",
|
||||
"for i in range(1, 5):",
|
||||
" vals = [quarts_df[(quarts_df.race == r) & (quarts_df.segment == i)]['decoupling_pct'].iloc[0]",
|
||||
" for r in races]",
|
||||
" ax.bar(x + (i - 2.5) * w, vals, w, color=colors[i-1], label=f'Q{i}')",
|
||||
"ax.axhline(0, color='black', lw=0.5)",
|
||||
"ax.axhline(10, color='gray', ls='--', lw=1, label='Friel \"unsustainable\" threshold')",
|
||||
"ax.set_xticks(x)",
|
||||
"ax.set_xticklabels([str(r) for r in races])",
|
||||
"ax.set_ylabel('decoupling (%)')",
|
||||
"ax.set_title('Per-second decoupling by race quartile — the wall lands in Q3 every time')",
|
||||
"ax.legend(loc='upper left', ncol=5)",
|
||||
"fig.tight_layout()",
|
||||
))
|
||||
|
||||
cells.append(md(
|
||||
"### Rolling efficiency curves — when does the wheels-come-off moment hit?",
|
||||
"",
|
||||
"5-minute rolling speed/HR over elapsed time. Flat = pacing matches HR. Falling curve = decoupling in progress. The y-axis is the same physical quantity Friel's method aggregates, just plotted continuously.",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"fig, axes = plt.subplots(len(race_meta), 1, figsize=(13, 2.5 * len(race_meta)),",
|
||||
" sharex=True, squeeze=False)",
|
||||
"axes = axes.flatten()",
|
||||
"for ax, (_, r) in zip(axes, race_meta.iterrows()):",
|
||||
" aid = int(r['activity_id'])",
|
||||
" rolled = fit_rolling_efficiency(race_records[aid], window_s=300)",
|
||||
" valid = rolled.dropna(subset=['rolling_efficiency'])",
|
||||
" ax.plot(valid['elapsed_min'], valid['rolling_efficiency'], color='#264653', lw=1.5)",
|
||||
" # Normalise against the first 30 minutes' mean to show % drop",
|
||||
" base = valid.loc[valid['elapsed_min'] < 30, 'rolling_efficiency'].mean()",
|
||||
" if base and base > 0:",
|
||||
" ax2 = ax.twinx()",
|
||||
" ax2.plot(valid['elapsed_min'], (valid['rolling_efficiency'] / base - 1) * 100,",
|
||||
" color='#e76f51', lw=1, alpha=0.6)",
|
||||
" ax2.set_ylabel('% vs first 30 min', color='#e76f51', fontsize=9)",
|
||||
" ax2.axhline(0, color='#e76f51', ls=':', lw=0.6, alpha=0.5)",
|
||||
" ax.set_ylabel('speed / HR')",
|
||||
" ax.set_title(f\"{r['start_time_local'].date()} — {r['km']:.1f} km, avg HR {r['avg_hr']:.0f}\",",
|
||||
" fontsize=10)",
|
||||
"axes[-1].set_xlabel('elapsed minutes')",
|
||||
"fig.suptitle('Rolling efficiency through each race (5-min window)', y=1.01)",
|
||||
"fig.tight_layout()",
|
||||
))
|
||||
|
||||
cells.append(md(
|
||||
"### HR and pace traces, side by side",
|
||||
"",
|
||||
"Same data, separated: HR (left axis, magma colour-scale) and pace (right axis, inverted so faster is up). The interesting moments are where the curves *diverge* — HR climbing while pace stays flat (drift) or HR steady while pace falls (just tired legs).",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"fig, axes = plt.subplots(len(race_meta), 1, figsize=(13, 2.8 * len(race_meta)),",
|
||||
" sharex=True, squeeze=False)",
|
||||
"axes = axes.flatten()",
|
||||
"for ax, (_, r) in zip(axes, race_meta.iterrows()):",
|
||||
" aid = int(r['activity_id'])",
|
||||
" rec = race_records[aid].dropna(subset=['heart_rate','speed_mps','elapsed_s'])",
|
||||
" rec = rec[rec['speed_mps'] > 0.5]",
|
||||
" em = rec['elapsed_s'] / 60",
|
||||
" rolled_hr = rec['heart_rate'].rolling(300, min_periods=30).mean()",
|
||||
" rolled_pace = (1 / rec['speed_mps']) * 1000 / 60",
|
||||
" rolled_pace = rolled_pace.rolling(300, min_periods=30).mean()",
|
||||
" ax.plot(em, rolled_hr, color='#9b2226', lw=1.4, label='HR (5-min avg)')",
|
||||
" ax.set_ylabel('HR (bpm)', color='#9b2226')",
|
||||
" ax.tick_params(axis='y', labelcolor='#9b2226')",
|
||||
" ax2 = ax.twinx()",
|
||||
" ax2.plot(em, rolled_pace, color='#264653', lw=1.4, label='pace (5-min avg)')",
|
||||
" ax2.set_ylabel('pace (min/km)', color='#264653')",
|
||||
" ax2.tick_params(axis='y', labelcolor='#264653')",
|
||||
" ax2.invert_yaxis() # faster = up",
|
||||
" ax.set_title(f\"{r['start_time_local'].date()} — {r['km']:.1f} km\", fontsize=10)",
|
||||
"axes[-1].set_xlabel('elapsed minutes')",
|
||||
"fig.suptitle('HR (red) and pace (dark) — divergence = decoupling', y=1.01)",
|
||||
"fig.tight_layout()",
|
||||
))
|
||||
|
||||
cells.append(md(
|
||||
"### Per-second vs per-mile decoupling — sanity check",
|
||||
"",
|
||||
"How does the FIT-derived number compare to the lap-level decoupling we computed in §1? Per-second is correctly excluding stopped time and lap rounding, so should be **lower** than the per-mile number for the same race — but the qualitative ranking should agree.",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"# Pull the per-mile (§1) value for each race and compare to per-second",
|
||||
"lap_dec = dec.set_index('activity_id')['decoupling_pct']",
|
||||
"rows = []",
|
||||
"for _, r in race_meta.iterrows():",
|
||||
" aid = int(r['activity_id'])",
|
||||
" ps = halves_df[(halves_df.race == r['start_time_local'].date()) & (halves_df.segment == 2)]['decoupling_pct'].iloc[0]",
|
||||
" lap = lap_dec.get(aid, float('nan'))",
|
||||
" rows.append({'race': r['start_time_local'].date(), 'km': r['km'],",
|
||||
" 'per_mile_decoupling_pct': round(lap, 1),",
|
||||
" 'per_second_decoupling_pct': round(ps, 1),",
|
||||
" 'delta': round(lap - ps, 1) if not pd.isna(lap) else None})",
|
||||
"pd.DataFrame(rows)",
|
||||
))
|
||||
|
||||
cells.append(md(
|
||||
"### What this means for the 50-mile",
|
||||
"",
|
||||
"The per-second view localises the drift: in every prior race the wheels come off around the **4-hour mark** (between Q2 and Q3). For the 50-mile that's roughly halfway through the race — exactly when fueling errors stop being recoverable.",
|
||||
"",
|
||||
"Three concrete implications:",
|
||||
"",
|
||||
"1. **Front-load fueling.** The textbook glycogen depletion curve says 90 min of running on stored glycogen, then performance falls off without external carbs. Q1 (the easy half) shouldn't be a fueling holiday — every aid station, every hour, from the start.",
|
||||
"2. **Recalibrate pace by HR, not by feel.** The rolling-efficiency plots show HR rising while pace falls. Setting an HR ceiling (e.g. Z2 top = 143 bpm for the long run, slightly higher for race) and *enforcing it* would flatten the Q3 collapse.",
|
||||
"3. **What success looks like on Sept 12.** A 50-mile race executed cleanly should look like the *first half* of these 50K curves repeated twice. If the Q3 wall reappears around hour 4–5, treat it as a planned aid-station break to top up calories before continuing.",
|
||||
))
|
||||
|
||||
# ----- Section 2: cadence + stride -----
|
||||
cells.append(md(
|
||||
"## 2. Cadence and stride length",
|
||||
"",
|
||||
"At a given pace, faster runners tend to have **higher cadence and shorter stride**. Watching cadence-vs-pace and stride-vs-pace by year shows whether form is shifting independently of fitness.",
|
||||
"",
|
||||
"Garmin's `averageRunCadence` per split is already **both-legs** steps-per-minute (typical running range 150–185). `strideLength` is in cm.",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"form = splits.dropna(subset=['averageRunCadence', 'strideLength', 'pace_min_per_km']).copy()",
|
||||
"form = form[form['averageRunCadence'] > 0].copy() # zeros = walking/standing intervals",
|
||||
"form['cadence_spm'] = form['averageRunCadence'] # already both-legs SPM",
|
||||
"form['stride_m'] = form['strideLength'] / 100",
|
||||
"form = form[(form['cadence_spm'] >= 140) & (form['cadence_spm'] <= 200)] # drop walks/junk",
|
||||
"print(f'{len(form):,} clean form splits, {form.activity_id.nunique()} runs')",
|
||||
"",
|
||||
"fig, axes = plt.subplots(1, 2, figsize=(13, 5), sharex=True)",
|
||||
"years = sorted(form['year'].unique())",
|
||||
"cmap = plt.cm.viridis(np.linspace(0, 0.9, len(years)))",
|
||||
"",
|
||||
"for c, y in zip(cmap, years):",
|
||||
" d = form[form['year'] == y]",
|
||||
" axes[0].scatter(d['pace_min_per_km'], d['cadence_spm'], s=8, alpha=0.35, color=c, label=str(y))",
|
||||
" axes[1].scatter(d['pace_min_per_km'], d['stride_m'], s=8, alpha=0.35, color=c, label=str(y))",
|
||||
"",
|
||||
"axes[0].set_ylabel('cadence (steps/min, both legs)')",
|
||||
"axes[1].set_ylabel('stride length (m)')",
|
||||
"for ax in axes:",
|
||||
" ax.set_xlabel('pace (min/km)')",
|
||||
" ax.invert_xaxis() # faster runs to the right",
|
||||
"axes[0].legend(title='year', loc='lower left', fontsize=8)",
|
||||
"axes[0].set_title('Cadence vs pace')",
|
||||
"axes[1].set_title('Stride length vs pace')",
|
||||
"fig.tight_layout()",
|
||||
))
|
||||
|
||||
cells.append(md(
|
||||
"### Form at a controlled pace",
|
||||
"",
|
||||
"Bin splits into a narrow easy-pace band (5:30–6:30 min/km) and look at cadence / stride / vertical metrics year-over-year. Holding pace constant strips out the obvious \"faster = higher cadence\" effect and isolates technique drift.",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"band = form[(form['pace_min_per_km'] >= 5.5) & (form['pace_min_per_km'] <= 6.5)].copy()",
|
||||
"vert = splits.dropna(subset=['verticalOscillation', 'verticalRatio', 'groundContactTime'])",
|
||||
"vert = vert[(vert['pace_min_per_km'] >= 5.5) & (vert['pace_min_per_km'] <= 6.5)]",
|
||||
"",
|
||||
"summary = band.groupby('year').agg(",
|
||||
" n_splits=('cadence_spm', 'size'),",
|
||||
" cadence_med=('cadence_spm', 'median'),",
|
||||
" stride_med=('stride_m', 'median'),",
|
||||
")",
|
||||
"vsum = vert.groupby('year').agg(",
|
||||
" vert_osc_cm=('verticalOscillation', 'median'),",
|
||||
" vert_ratio=('verticalRatio', 'median'),",
|
||||
" gct_ms=('groundContactTime', 'median'),",
|
||||
")",
|
||||
"summary = summary.join(vsum).round(2)",
|
||||
"summary",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"fig, axes = plt.subplots(1, 3, figsize=(14, 4))",
|
||||
"summary['cadence_med'].plot(kind='bar', ax=axes[0], color='#264653')",
|
||||
"axes[0].set_ylabel('cadence (spm)'); axes[0].set_title('Cadence at easy pace')",
|
||||
"axes[0].set_ylim(summary['cadence_med'].min() - 3, summary['cadence_med'].max() + 3)",
|
||||
"",
|
||||
"summary['stride_med'].plot(kind='bar', ax=axes[1], color='#2a9d8f')",
|
||||
"axes[1].set_ylabel('stride length (m)'); axes[1].set_title('Stride at easy pace')",
|
||||
"axes[1].set_ylim(summary['stride_med'].min() - 0.05, summary['stride_med'].max() + 0.05)",
|
||||
"",
|
||||
"if summary['vert_osc_cm'].notna().any():",
|
||||
" summary['vert_osc_cm'].plot(kind='bar', ax=axes[2], color='#e76f51')",
|
||||
" axes[2].set_ylabel('vertical oscillation (cm)'); axes[2].set_title('Vertical bounce at easy pace')",
|
||||
" axes[2].set_ylim(summary['vert_osc_cm'].min() - 0.5, summary['vert_osc_cm'].max() + 0.5)",
|
||||
"else:",
|
||||
" axes[2].text(0.5, 0.5, 'no vertical-osc data', ha='center', va='center', transform=axes[2].transAxes)",
|
||||
"",
|
||||
"for ax in axes:",
|
||||
" ax.set_xlabel('year')",
|
||||
"fig.suptitle('Form metrics in the 5:30–6:30 min/km band, year over year')",
|
||||
"fig.tight_layout()",
|
||||
))
|
||||
|
||||
# ----- Section 3: GPS route clustering -----
|
||||
cells.append(md(
|
||||
"## 3. Route clustering — pace controlled for terrain",
|
||||
"",
|
||||
"Raw pace year-over-year mixes terrain, weather, intent. Cluster runs by their **start coordinates** (greedy haversine, 250 m radius) and you get \"my usual routes.\" Within a cluster the route is roughly the same, so pace differences are mostly fitness, not geography.",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"starts = (splits.dropna(subset=['startLatitude', 'startLongitude'])",
|
||||
" .groupby('activity_id')",
|
||||
" .agg(lat=('startLatitude', 'first'),",
|
||||
" lon=('startLongitude', 'first'),",
|
||||
" start_time=('start_time_local', 'first'),",
|
||||
" distance_km=('distance_m', lambda s: s.sum() / 1000),",
|
||||
" avg_hr=('avg_hr', 'mean'),",
|
||||
" avg_pace=('pace_min_per_km', 'mean')))",
|
||||
"starts['cluster'] = cluster_routes(starts['lat'].values, starts['lon'].values, radius_km=0.25)",
|
||||
"print(f'{len(starts)} runs with start coords; {(starts.cluster >= 0).sum()} clustered, "
|
||||
"{(starts.cluster == -1).sum()} singletons')",
|
||||
"",
|
||||
"top = (starts[starts.cluster >= 0]",
|
||||
" .groupby('cluster')",
|
||||
" .agg(n=('cluster', 'size'),",
|
||||
" lat=('lat', 'median'),",
|
||||
" lon=('lon', 'median'),",
|
||||
" first=('start_time', 'min'),",
|
||||
" last=('start_time', 'max'),",
|
||||
" med_dist=('distance_km', 'median'))",
|
||||
" .sort_values('n', ascending=False))",
|
||||
"top.head(10)",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"fig, ax = plt.subplots(figsize=(9, 7))",
|
||||
"",
|
||||
"# unclustered points faint",
|
||||
"unc = starts[starts.cluster == -1]",
|
||||
"ax.scatter(unc['lon'], unc['lat'], color='lightgray', s=12, alpha=0.6, label='singletons')",
|
||||
"",
|
||||
"# top-10 clusters colored",
|
||||
"top10 = top.head(10).index.tolist()",
|
||||
"cmap = plt.cm.tab10(np.linspace(0, 1, len(top10)))",
|
||||
"for c, cl in zip(cmap, top10):",
|
||||
" pts = starts[starts.cluster == cl]",
|
||||
" ax.scatter(pts['lon'], pts['lat'], color=c, s=40, alpha=0.8,",
|
||||
" label=f'route #{cl} (n={len(pts)})')",
|
||||
"",
|
||||
"ax.set_xlabel('longitude'); ax.set_ylabel('latitude')",
|
||||
"ax.set_title('Run start points — top 10 recurring routes')",
|
||||
"ax.legend(loc='best', fontsize=8)",
|
||||
"ax.set_aspect('equal', adjustable='datalim')",
|
||||
"fig.tight_layout()",
|
||||
))
|
||||
|
||||
cells.append(md(
|
||||
"### Pace progression within each frequent route",
|
||||
"",
|
||||
"Now restrict to clusters with ≥5 runs and plot pace over time per route. Slopes here are much cleaner than the global pace trend because terrain is held constant.",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"frequent = top[top['n'] >= 5].index.tolist()",
|
||||
"print(f'{len(frequent)} routes with ≥5 runs')",
|
||||
"",
|
||||
"if frequent:",
|
||||
" n_cols = min(3, len(frequent))",
|
||||
" n_rows = (len(frequent) + n_cols - 1) // n_cols",
|
||||
" fig, axes = plt.subplots(n_rows, n_cols, figsize=(5 * n_cols, 3.2 * n_rows),",
|
||||
" squeeze=False, sharey=True)",
|
||||
" for ax, cl in zip(axes.flat, frequent):",
|
||||
" d = (starts[starts.cluster == cl]",
|
||||
" .dropna(subset=['avg_pace'])",
|
||||
" .sort_values('start_time'))",
|
||||
" ax.scatter(d['start_time'], d['avg_pace'], s=30, alpha=0.75, color='#264653')",
|
||||
" if len(d) >= 3:",
|
||||
" # rolling median by date",
|
||||
" rd = d.set_index('start_time')['avg_pace'].rolling('120D', min_periods=2).median()",
|
||||
" ax.plot(rd.index, rd.values, color='#e76f51', lw=1.5)",
|
||||
" ax.invert_yaxis() # faster up",
|
||||
" ax.set_title(f'route #{cl} — n={len(d)}, ~{top.loc[cl, \"med_dist\"]:.1f} km')",
|
||||
" ax.set_ylabel('pace (min/km)')",
|
||||
" # blank unused axes",
|
||||
" for ax in axes.flat[len(frequent):]:",
|
||||
" ax.set_visible(False)",
|
||||
" fig.suptitle('Per-route pace over time (terrain held roughly constant)')",
|
||||
" fig.tight_layout()",
|
||||
"else:",
|
||||
" print('No clusters with ≥5 runs.')",
|
||||
))
|
||||
|
||||
# ----- Section 4: HR zones (Garmin-configured, FIT-based when available) -----
|
||||
cells.append(md(
|
||||
"## 4. HR-zone time-in-zone (Garmin-configured zones, per-second when possible)",
|
||||
"",
|
||||
"**Zones come from Garmin's `heartRateZones.json` (training method: HR_MAX),** not estimated from observed HR. Lactate-threshold HR sits at 182 inside Z4.",
|
||||
"",
|
||||
"| Zone | range (bpm) | feel | role |",
|
||||
"|------|-------------|------|------|",
|
||||
"| Z1 | 102–122 | walk / recovery | active rest |",
|
||||
"| Z2 | 123–143 | conversational | **long-run target** |",
|
||||
"| Z3 | 144–164 | tempo | the \"junk-miles middle\" |",
|
||||
"| Z4 | 165–185 | threshold (LTHR=182) | hard sustained |",
|
||||
"| Z5 | 186–209 | VO₂ max | intervals |",
|
||||
"",
|
||||
"For each activity, time-in-zone comes from `activity_time_in_zone` (precomputed by `compute_time_in_zone.py`):",
|
||||
"- **`source='fit'`** — per-second HR from the FIT file. Each record's `dt` (typically 1 s) goes into whichever zone its HR falls in. Accurate even when laps span zone boundaries.",
|
||||
"- **`source='lap'`** — fallback for activities without a linked FIT. The whole lap's duration is assigned to whichever zone the *lap's average* HR sits in. Smears across boundaries, biases toward middle zones.",
|
||||
"",
|
||||
"**Polarized-training rule (Seiler):** elites accumulate ~80% of weekly time in Z1+Z2 and ~20% in Z4+Z5, with little Z3.",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"from analysis import HR_ZONES_USER",
|
||||
"",
|
||||
"tiz = pd.read_sql('''",
|
||||
" SELECT t.activity_id, t.z1_s, t.z2_s, t.z3_s, t.z4_s, t.z5_s, t.total_s, t.source,",
|
||||
" a.start_time_local, a.activity_type, a.distance_m, a.duration_s",
|
||||
" FROM activity_time_in_zone t",
|
||||
" JOIN activities a USING(activity_id)",
|
||||
" WHERE a.activity_type IN ('running','trail_running')",
|
||||
"''', conn, parse_dates=['start_time_local'])",
|
||||
"tiz['week'] = tiz['start_time_local'].dt.to_period('W-MON').dt.start_time",
|
||||
"tiz['year'] = tiz['start_time_local'].dt.year",
|
||||
"",
|
||||
"print(f'{len(tiz)} activities with cached time-in-zone')",
|
||||
"print(' source breakdown:')",
|
||||
"print(tiz['source'].value_counts().to_string())",
|
||||
"print(f' fit coverage: {(tiz.source==\"fit\").mean()*100:.0f}% of running activities')",
|
||||
))
|
||||
|
||||
cells.append(md(
|
||||
"### Sanity check: FIT vs lap method on the same race",
|
||||
"",
|
||||
"On the same activity, how different are the two estimates? Take the 2025-09-20 race (8 hours, 28k FIT records) and compute both, then compare. The lap method should over-weight whichever zone the typical lap average falls in (here, Z3) and under-count time spent in adjacent zones because boundary-crossing laps get rounded to one zone.",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"from analysis import load_fit_records, time_in_zone_from_fit, time_in_zone_from_splits",
|
||||
"",
|
||||
"race_aid = race_meta.iloc[-1]['activity_id'] # most recent 50K (2025-09-20)",
|
||||
"race_date = race_meta.iloc[-1]['start_time_local'].date()",
|
||||
"",
|
||||
"fit_tiz = time_in_zone_from_fit(race_records[int(race_aid)])",
|
||||
"race_splits = pd.read_sql(",
|
||||
" 'SELECT avg_hr, duration_s FROM activity_splits WHERE activity_id = ?',",
|
||||
" conn, params=[int(race_aid)]",
|
||||
")",
|
||||
"lap_tiz = time_in_zone_from_splits(race_splits)",
|
||||
"",
|
||||
"compare = pd.DataFrame({",
|
||||
" 'FIT (per-sec) min': {k: round(v / 60, 1) for k, v in fit_tiz.items()},",
|
||||
" 'lap (avg-HR) min': {k: round(v / 60, 1) for k, v in lap_tiz.items()},",
|
||||
"}).reindex(['Z1','Z2','Z3','Z4','Z5']).fillna(0)",
|
||||
"compare.loc['total'] = compare.sum()",
|
||||
"compare['delta (min)'] = (compare['FIT (per-sec) min'] - compare['lap (avg-HR) min']).round(1)",
|
||||
"print(f'Race {race_date} — FIT vs lap time-in-zone:')",
|
||||
"compare",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"# Weekly time-in-zone (hours) using whichever method was available per activity",
|
||||
"wk_cols = ['z1_s','z2_s','z3_s','z4_s','z5_s']",
|
||||
"weekly = tiz.groupby('week')[wk_cols].sum() / 3600",
|
||||
"weekly.columns = ['Z1','Z2','Z3','Z4','Z5']",
|
||||
"",
|
||||
"fig, ax = plt.subplots(figsize=(14, 4.8))",
|
||||
"colors = ['#264653', '#2a9d8f', '#e9c46a', '#f4a261', '#e76f51']",
|
||||
"ax.stackplot(weekly.index, weekly.T.values, labels=weekly.columns, colors=colors, alpha=0.92)",
|
||||
"ax.set_ylabel('hours / week')",
|
||||
"ax.set_title('Weekly running time by HR zone (Garmin-configured zones)')",
|
||||
"ax.legend(loc='upper left', ncol=5, fontsize=9)",
|
||||
"fig.autofmt_xdate()",
|
||||
"fig.tight_layout()",
|
||||
))
|
||||
|
||||
cells.append(md(
|
||||
"### Polarized index over time",
|
||||
"",
|
||||
"Collapse to three buckets:",
|
||||
"- **easy** = Z1 + Z2 (HR ≤ 143)",
|
||||
"- **moderate \"junk\"** = Z3 (144–164)",
|
||||
"- **hard** = Z4 + Z5 (HR ≥ 165, threshold and up)",
|
||||
"",
|
||||
"Polarized: high easy, low moderate, modest hard. Pyramidal: high easy, modest moderate, low hard. Threshold-heavy: lots of moderate.",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"buckets = pd.DataFrame({",
|
||||
" 'easy': weekly[['Z1','Z2']].sum(axis=1),",
|
||||
" 'moderate': weekly['Z3'],",
|
||||
" 'hard': weekly[['Z4','Z5']].sum(axis=1),",
|
||||
"})",
|
||||
"totals = buckets.sum(axis=1)",
|
||||
"pct = buckets.div(totals.replace(0, np.nan), axis=0) * 100",
|
||||
"",
|
||||
"fig, axes = plt.subplots(2, 1, figsize=(14, 7), sharex=True)",
|
||||
"",
|
||||
"ax = axes[0]",
|
||||
"ax.stackplot(pct.index, pct[['easy','moderate','hard']].T.values,",
|
||||
" labels=['easy (Z1+Z2)', 'moderate (Z3)', 'hard (Z4+Z5)'],",
|
||||
" colors=['#2a9d8f', '#e9c46a', '#e76f51'], alpha=0.9)",
|
||||
"ax.axhline(80, color='black', ls='--', lw=0.8, label='80% easy target')",
|
||||
"ax.set_ylabel('% of weekly time'); ax.set_ylim(0, 100)",
|
||||
"ax.set_title('Polarized-training split (weekly)')",
|
||||
"ax.legend(loc='lower left', ncol=4, fontsize=9)",
|
||||
"",
|
||||
"ax = axes[1]",
|
||||
"rolling_pct = pct.rolling(4, min_periods=2).mean()",
|
||||
"for col, c in [('easy','#2a9d8f'), ('moderate','#e9c46a'), ('hard','#e76f51')]:",
|
||||
" ax.plot(rolling_pct.index, rolling_pct[col], color=c, lw=2, label=col)",
|
||||
"ax.axhline(80, color='#2a9d8f', ls='--', lw=0.8)",
|
||||
"ax.axhline(20, color='#e76f51', ls='--', lw=0.8)",
|
||||
"ax.set_ylabel('% (4-week rolling mean)')",
|
||||
"ax.legend(loc='best', fontsize=9)",
|
||||
"fig.autofmt_xdate()",
|
||||
"fig.tight_layout()",
|
||||
))
|
||||
|
||||
cells.append(md(
|
||||
"### Yearly summary + FIT-method coverage",
|
||||
"",
|
||||
"`fit_coverage_%` shows what fraction of each year's activities had a linked FIT (and therefore got per-second zones). 2026's lower coverage reflects activities that synced via the live API but aren't in the takeout dump.",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"yearly_hours = (tiz.groupby('year')[wk_cols].sum() / 3600).round(1)",
|
||||
"yearly_hours.columns = ['Z1','Z2','Z3','Z4','Z5']",
|
||||
"yearly_pct = yearly_hours.div(yearly_hours.sum(axis=1), axis=0) * 100",
|
||||
"",
|
||||
"out = pd.concat({'hours': yearly_hours, '%': yearly_pct.round(1)}, axis=1)",
|
||||
"out['easy_pct'] = (yearly_pct[['Z1','Z2']].sum(axis=1)).round(1)",
|
||||
"out['hard_pct'] = (yearly_pct[['Z4','Z5']].sum(axis=1)).round(1)",
|
||||
"fit_coverage = tiz.groupby('year').apply(",
|
||||
" lambda g: (g['source']=='fit').mean() * 100, include_groups=False",
|
||||
").round(0)",
|
||||
"out['fit_coverage_%'] = fit_coverage",
|
||||
"out",
|
||||
))
|
||||
|
||||
cells.append(md(
|
||||
"### Race-build vs base-period zone distribution",
|
||||
"",
|
||||
"Compare what training looked like in the 12 weeks before each prior 50K race vs the rest of the year. A serious build should shift time into Z2 (long aerobic) and Z4 (threshold/tempo) and away from Z3.",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"race_dates = race_meta['start_time_local']",
|
||||
"tiz['phase'] = 'base'",
|
||||
"for rd in race_dates:",
|
||||
" build_start = rd - pd.Timedelta(weeks=12)",
|
||||
" mask = (tiz['start_time_local'] >= build_start) & (tiz['start_time_local'] < rd)",
|
||||
" tiz.loc[mask, 'phase'] = f'build {rd.year}'",
|
||||
"",
|
||||
"phase_hours = (tiz.groupby('phase')[wk_cols].sum() / 3600).round(1)",
|
||||
"phase_hours.columns = ['Z1','Z2','Z3','Z4','Z5']",
|
||||
"phase_pct = (phase_hours.div(phase_hours.sum(axis=1), axis=0) * 100).round(1)",
|
||||
"phase_pct['easy_Z1+Z2'] = (phase_pct['Z1'] + phase_pct['Z2']).round(1)",
|
||||
"phase_pct['junk_Z3'] = phase_pct['Z3']",
|
||||
"phase_pct['hard_Z4+Z5'] = (phase_pct['Z4'] + phase_pct['Z5']).round(1)",
|
||||
"phase_pct[['easy_Z1+Z2','junk_Z3','hard_Z4+Z5']].sort_index()",
|
||||
))
|
||||
|
||||
cells.append(md(
|
||||
"## What's left when FIT files are ingested",
|
||||
"",
|
||||
"All four sections now use per-second FIT data where it's linked (349 of 378 activities, 92%). Remaining lap-only activities are mostly old multi-sport / triathlon legs that no FIT was uploaded for. Useful follow-ups:",
|
||||
"",
|
||||
"- **Cadence stability** — plot cadence over elapsed time within a long run; quantify the drop in the final 15 %.",
|
||||
"- **GPS polylines for route clustering** — current §3 uses start coordinates only; with full FIT GPS tracks, match routes by Hausdorff distance (more accurate than start-only).",
|
||||
"- **Decoupling vs fueling protocol** — once the user logs even informal fueling notes for a few long runs, regress decoupling against carb intake.",
|
||||
))
|
||||
|
||||
notebook = {
|
||||
"cells": cells,
|
||||
"metadata": {
|
||||
"kernelspec": {"display_name": ".venv", "language": "python", "name": "python3"},
|
||||
"language_info": {"name": "python"},
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5,
|
||||
}
|
||||
|
||||
out = Path(__file__).parent / "05_intra_run.ipynb"
|
||||
out.write_text(json.dumps(notebook, indent=1))
|
||||
print(f"wrote {out} ({out.stat().st_size:,} bytes, {len(cells)} cells)")
|
||||
547
examples/notebooks/_build_06.py
Normal file
547
examples/notebooks/_build_06.py
Normal file
@@ -0,0 +1,547 @@
|
||||
"""One-shot builder for notebooks/06_race_plan.ipynb.
|
||||
|
||||
Run with: uv run python notebooks/_build_06.py
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def md(*lines: str) -> dict:
|
||||
return {"cell_type": "markdown", "metadata": {}, "source": _join(lines)}
|
||||
|
||||
|
||||
def code(*lines: str) -> dict:
|
||||
return {
|
||||
"cell_type": "code",
|
||||
"execution_count": None,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": _join(lines),
|
||||
}
|
||||
|
||||
|
||||
def _join(lines):
|
||||
text = "\n".join(lines)
|
||||
parts = text.split("\n")
|
||||
return [p + ("\n" if i < len(parts) - 1 else "") for i, p in enumerate(parts)]
|
||||
|
||||
|
||||
cells: list[dict] = []
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
cells.append(md(
|
||||
"# 06 — Race plan & tracker",
|
||||
"",
|
||||
"**Three-race progression in 17 weeks:**",
|
||||
"",
|
||||
"| race | date | week | role |",
|
||||
"|---------------|--------------|------|---------------------------------------------|",
|
||||
"| **30K** | Sat 2026-06-13 | wk 4 | hard long run; race-day fuel/kit rehearsal |",
|
||||
"| **50K** | Sat 2026-07-25 | wk 10 | peak-fitness tune-up; race-pace calibration |",
|
||||
"| **50 mile** | Sat 2026-09-12 | wk 17 | A-race |",
|
||||
"",
|
||||
"Plan philosophy: the two earlier races *are* the tune-ups — no separate dress rehearsals needed. The 50K does double-duty as the biggest pre-race effort, leaving 7 weeks for one final back-to-back peak, taper, and race.",
|
||||
"",
|
||||
"Built on Ethan's proven 2023–2025 50K formula (~22 km/wk mean, ~29 km longest training run) scaled up for the 50-mile. Recorded Garmin volume **does not** include hikes, strength, or unrecorded efforts — adherence numbers below are a floor, not a ceiling.",
|
||||
))
|
||||
|
||||
# Imports + plan definition
|
||||
cells.append(code(
|
||||
"import sys; sys.path.insert(0, '..')",
|
||||
"import numpy as np",
|
||||
"import pandas as pd",
|
||||
"import matplotlib.pyplot as plt",
|
||||
"from analysis import open_conn, load_activities",
|
||||
"",
|
||||
"PLAN_START = pd.Timestamp('2026-05-18')",
|
||||
"RACES = {",
|
||||
" 'wk 4 — 30K': pd.Timestamp('2026-06-13'),",
|
||||
" 'wk 10 — 50K': pd.Timestamp('2026-07-25'),",
|
||||
" 'wk 17 — 50 MILE': pd.Timestamp('2026-09-12'),",
|
||||
"}",
|
||||
"RACE_DATE = RACES['wk 17 — 50 MILE']",
|
||||
"TODAY = pd.Timestamp.today().normalize()",
|
||||
"",
|
||||
"_rows = [",
|
||||
" # phase, kind, long_km, week_km, notes",
|
||||
" ('P1: base', 'build', 22, 30, 'rebuild frequency; long-run HR ≤ 145'),",
|
||||
" ('P1: base', 'build', 26, 38, 'add 1 trail/vert run; practice fueling'),",
|
||||
" ('P1: base', 'pre-race ease', 22, 35, 'short shakeouts late-week, legs fresh for Sat'),",
|
||||
" ('P1: 30K', '30K RACE', 30, 40, 'aerobic effort, race-day fuel + kit rehearsal'),",
|
||||
" ('P2: build', 'recovery', 18, 28, 'one week easy; trail/strength fine, no hard runs'),",
|
||||
" ('P2: build', 'build', 28, 45, ''),",
|
||||
" ('P2: build', 'build', 32, 55, 'fueling: 60–90 g carb/hr non-negotiable on long'),",
|
||||
" ('P2: build', 'peak before 50K', 35, 60, 'optional B2B: 28 Sat + 15 Sun'),",
|
||||
" ('P2: build', 'pre-race taper', 22, 42, 'cut volume ~30%, keep frequency'),",
|
||||
" ('P3: 50K', '50K RACE', 52, 60, 'controlled effort; this is the calibration run'),",
|
||||
" ('P4: recover', 'deep recovery', 15, 30, 'no hard efforts; walk-jog week'),",
|
||||
" ('P4: recover', 'easy rebuild', 25, 50, 'all runs by feel, HR < 150'),",
|
||||
" ('P5: peak', 'build', 35, 70, ''),",
|
||||
" ('P5: peak', 'B2B peak', 40, 80, 'BACK-TO-BACK: 40 km Sat + 22 km Sun, full race kit. The single most important week.'),",
|
||||
" ('P6: taper', 'taper', 28, 55, ''),",
|
||||
" ('P6: taper', 'deep taper', 18, 35, 'last meaningful long run'),",
|
||||
" ('P6: 50 MILE', '50 MILE RACE', 80, 90, 'shakeouts 5–6 km early-week; RACE Sat'),",
|
||||
"]",
|
||||
"PLAN = pd.DataFrame(_rows, columns=['phase','kind','long_run_km','weekly_km','notes'])",
|
||||
"PLAN.index = pd.date_range(PLAN_START, periods=len(PLAN), freq='W-MON')",
|
||||
"PLAN.index.name = 'week_start'",
|
||||
"PLAN['week_num'] = range(1, len(PLAN) + 1)",
|
||||
"PLAN['date_range'] = [f\"{d.strftime('%b %d')}–{(d + pd.Timedelta(days=6)).strftime('%b %d')}\" for d in PLAN.index]",
|
||||
"PLAN[['week_num','date_range','phase','kind','long_run_km','weekly_km','notes']]",
|
||||
))
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
cells.append(md(
|
||||
"## Plan vs actual (tracker)",
|
||||
"",
|
||||
"Coloured `status` column scores each elapsed week against planned km:",
|
||||
"",
|
||||
"- **on track** — 85–115% of plan",
|
||||
"- **over** — > 115% *(usually fine; watch fatigue if two in a row)*",
|
||||
"- **under** — 50–85% *(recoverable)*",
|
||||
"- **missed** — < 50% *(adjust the plan; don't try to make it up next week)*",
|
||||
"- **—** — future week, not scored",
|
||||
"",
|
||||
"Race weeks (4 / 10 / 17) are bolded.",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"conn = open_conn()",
|
||||
"acts = load_activities(conn)",
|
||||
"runs = acts[acts['activity_type'].isin(['running','trail_running'])].copy()",
|
||||
"runs['week_start'] = runs['start_time_local'].dt.to_period('W-MON').dt.start_time",
|
||||
"",
|
||||
"weekly_actual = runs.groupby('week_start').agg(",
|
||||
" actual_km=('distance_km', 'sum'),",
|
||||
" longest_run_km=('distance_km', 'max'),",
|
||||
" n_runs=('activity_id', 'size'),",
|
||||
").round(1)",
|
||||
"",
|
||||
"tracker = PLAN.join(weekly_actual, how='left')",
|
||||
"tracker['actual_km'] = tracker['actual_km'].fillna(0)",
|
||||
"tracker['longest_run_km'] = tracker['longest_run_km'].fillna(0)",
|
||||
"tracker['n_runs'] = tracker['n_runs'].fillna(0).astype(int)",
|
||||
"tracker['weekly_delta_km'] = (tracker['actual_km'] - tracker['weekly_km']).round(1)",
|
||||
"tracker['long_run_delta_km'] = (tracker['longest_run_km'] - tracker['long_run_km']).round(1)",
|
||||
"",
|
||||
"elapsed_mask = tracker.index <= TODAY",
|
||||
"ratio = tracker['actual_km'] / tracker['weekly_km']",
|
||||
"status = pd.Series('—', index=tracker.index)",
|
||||
"status[elapsed_mask & (ratio >= 0.85) & (ratio < 1.15)] = 'on track'",
|
||||
"status[elapsed_mask & (ratio >= 1.15)] = 'over'",
|
||||
"status[elapsed_mask & (ratio >= 0.50) & (ratio < 0.85)] = 'under'",
|
||||
"status[elapsed_mask & (ratio < 0.50)] = 'missed'",
|
||||
"tracker['status'] = status",
|
||||
"tracker['is_race'] = tracker['kind'].str.contains('RACE', na=False)",
|
||||
"",
|
||||
"view = tracker[['week_num', 'date_range', 'phase', 'kind',",
|
||||
" 'long_run_km', 'longest_run_km', 'long_run_delta_km',",
|
||||
" 'weekly_km', 'actual_km', 'weekly_delta_km',",
|
||||
" 'n_runs', 'status', 'notes']]",
|
||||
"",
|
||||
"_status_colors = {",
|
||||
" 'on track': 'background-color:#a8dadc',",
|
||||
" 'over': 'background-color:#fff3b0',",
|
||||
" 'under': 'background-color:#f4a261',",
|
||||
" 'missed': 'background-color:#e76f51;color:white',",
|
||||
" '—': 'color:#bbb',",
|
||||
"}",
|
||||
"",
|
||||
"def _color_status(v):",
|
||||
" return _status_colors.get(v, '')",
|
||||
"",
|
||||
"def _bold_race(row):",
|
||||
" return ['font-weight:bold' if tracker.loc[row.name, 'is_race'] else ''] * len(row)",
|
||||
"",
|
||||
"(view.style",
|
||||
" .map(_color_status, subset=['status'])",
|
||||
" .apply(_bold_race, axis=1))",
|
||||
))
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
cells.append(md(
|
||||
"## Where are we?",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"elapsed = tracker[tracker.index <= TODAY]",
|
||||
"weeks_done = len(elapsed)",
|
||||
"weeks_left = len(tracker) - weeks_done",
|
||||
"",
|
||||
"print(f'Today: {TODAY.date()} Race day: {RACE_DATE.date()} ({(RACE_DATE - TODAY).days} days / ~{(RACE_DATE - TODAY).days // 7} weeks to A-race)')",
|
||||
"print(f'Plan progress: {weeks_done}/{len(tracker)} weeks elapsed, {weeks_left} remaining')",
|
||||
"print()",
|
||||
"print('Upcoming races:')",
|
||||
"for label, d in RACES.items():",
|
||||
" days = (d - TODAY).days",
|
||||
" marker = '✓ done' if days < 0 else f'in {days} d'",
|
||||
" print(f' {label:25s} {d.date()} ({marker})')",
|
||||
"",
|
||||
"print()",
|
||||
"if weeks_done == 0:",
|
||||
" nxt = tracker.iloc[0]",
|
||||
" print(f'Plan starts {tracker.index[0].date()} — first week:')",
|
||||
" print(f' wk 1 ({nxt.date_range}): {nxt.phase} — {nxt.kind}')",
|
||||
" print(f' target {nxt.weekly_km} km, long run {nxt.long_run_km} km')",
|
||||
" if nxt.notes: print(f' note: {nxt.notes}')",
|
||||
"else:",
|
||||
" cur = elapsed.iloc[-1]",
|
||||
" print(f'Current week (wk {int(cur.week_num)}, {cur.date_range}):')",
|
||||
" print(f' phase: {cur.phase} — {cur.kind}')",
|
||||
" print(f' target: {cur.weekly_km} km, long run {cur.long_run_km} km')",
|
||||
" print(f' actual: {cur.actual_km} km, longest {cur.longest_run_km} km ({cur.n_runs} runs)')",
|
||||
" print(f' status: {cur.status}')",
|
||||
" if cur.notes: print(f' note: {cur.notes}')",
|
||||
" if weeks_left > 0:",
|
||||
" nxt = tracker.iloc[weeks_done]",
|
||||
" print()",
|
||||
" print(f'Next up (wk {int(nxt.week_num)}, {nxt.date_range}):')",
|
||||
" print(f' {nxt.phase} — {nxt.kind}: {nxt.weekly_km} km, long {nxt.long_run_km} km')",
|
||||
" if nxt.notes: print(f' note: {nxt.notes}')",
|
||||
))
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
cells.append(md(
|
||||
"## Weekly volume — planned vs actual",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"fig, ax = plt.subplots(figsize=(15, 5.5))",
|
||||
"x = np.arange(len(tracker))",
|
||||
"w = 0.4",
|
||||
"ax.bar(x - w/2, tracker['weekly_km'], width=w, color='#264653', label='planned', alpha=0.9)",
|
||||
"",
|
||||
"_bar_colors = {",
|
||||
" 'on track': '#2a9d8f',",
|
||||
" 'over': '#e9c46a',",
|
||||
" 'under': '#f4a261',",
|
||||
" 'missed': '#9b2226',",
|
||||
" '—': '#dcdcdc',",
|
||||
"}",
|
||||
"actual_colors = [_bar_colors.get(s, '#dcdcdc') for s in tracker['status']]",
|
||||
"ax.bar(x + w/2, tracker['actual_km'], width=w, color=actual_colors, label='actual', alpha=0.95)",
|
||||
"",
|
||||
"max_y = max(tracker['weekly_km'].max(), tracker['actual_km'].max()) * 1.22",
|
||||
"ax.set_ylim(0, max_y)",
|
||||
"",
|
||||
"# Phase shading + labels",
|
||||
"phase_groups = tracker.reset_index().groupby('phase', sort=False)",
|
||||
"for p, g in phase_groups:",
|
||||
" i0, i1 = int(g.index.min()), int(g.index.max())",
|
||||
" ax.axvspan(i0 - 0.5, i1 + 0.5, color='gray', alpha=0.05)",
|
||||
" ax.text((i0 + i1) / 2, max_y * 0.96, p, ha='center', fontsize=8.5, color='#444')",
|
||||
"",
|
||||
"# Race markers — star above the actual bar",
|
||||
"race_x = np.where(tracker['is_race'].to_numpy())[0]",
|
||||
"for rx in race_x:",
|
||||
" ax.scatter([rx], [max_y * 0.88], marker='*', s=220, color='#d62828', zorder=10)",
|
||||
" ax.text(rx, max_y * 0.82, tracker['kind'].iloc[rx].replace(' RACE',''),",
|
||||
" ha='center', fontsize=8.5, color='#d62828', fontweight='bold')",
|
||||
"",
|
||||
"today_x = sum(tracker.index <= TODAY) - 0.5",
|
||||
"if -0.5 <= today_x <= len(tracker) - 0.5:",
|
||||
" ax.axvline(today_x, color='red', ls=':', lw=1.5, label='today')",
|
||||
"",
|
||||
"ax.set_xticks(x)",
|
||||
"ax.set_xticklabels([f'wk{n}' for n in tracker['week_num']], fontsize=8)",
|
||||
"ax.set_ylabel('km / week')",
|
||||
"ax.set_title('Weekly volume')",
|
||||
"ax.legend(loc='upper left')",
|
||||
"fig.tight_layout()",
|
||||
))
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
cells.append(md(
|
||||
"## Long-run progression",
|
||||
"",
|
||||
"Race weeks (red stars) replace the planned long run with the race itself. Note the staircase: 22 → 26 → 22 (pre-race ease) → **30K** → recover → 28 → 32 → 35 → 22 (pre-race ease) → **50K** → recover → 25 → 35 → **40 (B2B peak)** → 28 → 18 → **50 mile**.",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"fig, ax = plt.subplots(figsize=(15, 4.5))",
|
||||
"x = np.arange(len(tracker))",
|
||||
"ax.plot(x, tracker['long_run_km'], 'o-', color='#264653', lw=2, label='planned long run')",
|
||||
"",
|
||||
"el = np.asarray(tracker.index <= TODAY)",
|
||||
"if el.any():",
|
||||
" ax.scatter(x[el], tracker.loc[el, 'longest_run_km'].to_numpy(),",
|
||||
" s=80, color='#e76f51', zorder=5, label='actual longest of week')",
|
||||
"",
|
||||
"race_x = np.where(tracker['is_race'].to_numpy())[0]",
|
||||
"ax.scatter(race_x, tracker['long_run_km'].iloc[race_x].to_numpy(),",
|
||||
" marker='*', s=280, color='#d62828', zorder=10, label='race')",
|
||||
"",
|
||||
"today_x = sum(tracker.index <= TODAY) - 0.5",
|
||||
"if -0.5 <= today_x <= len(tracker) - 0.5:",
|
||||
" ax.axvline(today_x, color='red', ls=':', lw=1.5)",
|
||||
"",
|
||||
"ax.set_xticks(x)",
|
||||
"ax.set_xticklabels([f'wk{n}' for n in tracker['week_num']], fontsize=8)",
|
||||
"ax.set_ylabel('km')",
|
||||
"ax.set_title('Long-run progression — planned, actual, races')",
|
||||
"ax.legend(loc='upper left')",
|
||||
"fig.tight_layout()",
|
||||
))
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
cells.append(md(
|
||||
"## Cumulative volume",
|
||||
"",
|
||||
"The forgiving lens — a missed week is recoverable as long as the cumulative line is within ~10% of plan. Chronic divergence for 3+ weeks is the signal to adjust the plan, not push harder.",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"tracker['cum_planned'] = tracker['weekly_km'].cumsum()",
|
||||
"actual_for_cum = tracker['actual_km'].where(tracker.index <= TODAY, other=np.nan)",
|
||||
"tracker['cum_actual'] = actual_for_cum.cumsum()",
|
||||
"",
|
||||
"fig, ax = plt.subplots(figsize=(15, 4.5))",
|
||||
"ax.plot(tracker.index, tracker['cum_planned'], color='#264653', lw=2, label='planned cumulative')",
|
||||
"ax.plot(tracker.index, tracker['cum_actual'], color='#2a9d8f', lw=2, label='actual cumulative')",
|
||||
"",
|
||||
"for label, d in RACES.items():",
|
||||
" if tracker.index.min() <= d <= tracker.index.max() + pd.Timedelta(days=7):",
|
||||
" ax.axvline(d, color='#d62828', alpha=0.5, lw=1)",
|
||||
" ax.text(d, ax.get_ylim()[1] * 0.02, label.split('—')[1].strip(),",
|
||||
" rotation=90, ha='right', va='bottom', fontsize=8, color='#d62828')",
|
||||
"",
|
||||
"if PLAN_START <= TODAY <= tracker.index.max() + pd.Timedelta(days=7):",
|
||||
" ax.axvline(TODAY, color='red', ls=':', lw=1.5, label='today')",
|
||||
"",
|
||||
"ax.set_ylabel('cumulative km')",
|
||||
"ax.set_title('Cumulative volume')",
|
||||
"ax.legend(loc='upper left')",
|
||||
"fig.autofmt_xdate()",
|
||||
"fig.tight_layout()",
|
||||
))
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
cells.append(md(
|
||||
"## Adherence summary",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"elapsed_only = tracker[tracker.index <= TODAY]",
|
||||
"if len(elapsed_only) == 0:",
|
||||
" print('Plan has not started yet — no completed weeks to score.')",
|
||||
"else:",
|
||||
" counts = (elapsed_only['status']",
|
||||
" .value_counts()",
|
||||
" .reindex(['on track', 'over', 'under', 'missed'])",
|
||||
" .fillna(0).astype(int))",
|
||||
" print('Completed weeks by status:')",
|
||||
" print(counts.to_string())",
|
||||
" total_planned = elapsed_only['weekly_km'].sum()",
|
||||
" total_actual = elapsed_only['actual_km'].sum()",
|
||||
" print()",
|
||||
" print(f'Planned km through today: {total_planned:.0f}')",
|
||||
" print(f'Actual km through today: {total_actual:.0f}')",
|
||||
" print(f'Recorded adherence: {total_actual / total_planned * 100:.0f}% of plan')",
|
||||
" print()",
|
||||
" print('Off-watch training (hikes, strength, unrecorded runs) is not in this number.')",
|
||||
))
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
cells.append(md(
|
||||
"## Projected fitness / fatigue / form (Banister PMC)",
|
||||
"",
|
||||
"Combine historical training-load (from `activities.training_load`) with a forecast built from the plan above to project **CTL / ATL / TSB** through race day. The shape matters more than the absolute values — a clean taper should land TSB in **+10 to +25** on Sept 12.",
|
||||
"",
|
||||
"Forecasting assumption: each plan week's km are converted to daily training-load by multiplying weekly km × the historical median TL/km from recent running, then distributing evenly Mon–Sun. The Banister EWMAs (τ=42 for CTL, τ=7 for ATL) smooth out the day-of-week pattern anyway.",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"from analysis import banister, daily_training_load_series",
|
||||
"",
|
||||
"# Historical daily load (running + trail) up to today",
|
||||
"hist = daily_training_load_series(conn)",
|
||||
"",
|
||||
"# TL/km conversion factor — median across the last 12 months of running",
|
||||
"recent_acts = pd.read_sql('''",
|
||||
" SELECT distance_m, training_load",
|
||||
" FROM activities",
|
||||
" WHERE activity_type IN ('running','trail_running')",
|
||||
" AND training_load IS NOT NULL",
|
||||
" AND date(start_time_local) >= date('now','-12 months')",
|
||||
" AND distance_m >= 2000",
|
||||
"''', conn)",
|
||||
"recent_acts['tl_per_km'] = recent_acts['training_load'] / (recent_acts['distance_m'] / 1000)",
|
||||
"tl_per_km = recent_acts['tl_per_km'].median()",
|
||||
"print(f'historical TL/km (last 12 mo, median): {tl_per_km:.1f}')",
|
||||
"",
|
||||
"# Build forecast: distribute weekly load across days.",
|
||||
"# For race weeks: race day gets most of the km, lead-in days are tapered shakeouts.",
|
||||
"# For training weeks: long run Sat gets ~40%, two mid-week runs ~20% each, rest distributed.",
|
||||
"# Race-day TL/km is empirically lower (~7) than training (~11) — long ultras spread out load.",
|
||||
"RACE_DAY_TL_PER_KM = 7.0 # observed from prior 50K races (mean ~7.5)",
|
||||
"",
|
||||
"race_day_set = set(RACES.values())",
|
||||
"forecast_rows = []",
|
||||
"for week_start, row in tracker.iterrows():",
|
||||
" week_days = [week_start + pd.Timedelta(days=i) for i in range(7)]",
|
||||
" is_race_week = any(d in race_day_set for d in week_days)",
|
||||
" if is_race_week:",
|
||||
" race_d = next(d for d in week_days if d in race_day_set)",
|
||||
" # Race itself: long_run_km on race day at race TL/km",
|
||||
" race_load = row['long_run_km'] * RACE_DAY_TL_PER_KM",
|
||||
" # Remaining km this week (shakeouts) at training TL/km, spread across 3 days",
|
||||
" rem_km = max(row['weekly_km'] - row['long_run_km'], 0)",
|
||||
" rem_load = rem_km * tl_per_km",
|
||||
" for d in week_days:",
|
||||
" if d == race_d:",
|
||||
" forecast_rows.append((d, race_load))",
|
||||
" elif d.weekday() in (0, 2, 4): # Mon/Wed/Fri shakeouts",
|
||||
" forecast_rows.append((d, rem_load / 3))",
|
||||
" else:",
|
||||
" forecast_rows.append((d, 0.0))",
|
||||
" else:",
|
||||
" # Training week: long run on Sat (40%), Tue+Thu mid-runs (20% each), Mon/Wed easy (10% each)",
|
||||
" weights = {0:0.10, 1:0.20, 2:0.10, 3:0.20, 4:0.00, 5:0.40, 6:0.00}",
|
||||
" total_load = row['weekly_km'] * tl_per_km",
|
||||
" for d in week_days:",
|
||||
" forecast_rows.append((d, total_load * weights.get(d.weekday(), 0)))",
|
||||
"forecast = pd.Series(dict(forecast_rows))",
|
||||
"forecast.index.name = 'd'",
|
||||
"",
|
||||
"# Splice: historical actual until today, forecast from tomorrow onward",
|
||||
"combined = pd.concat([",
|
||||
" hist[hist.index <= TODAY],",
|
||||
" forecast[forecast.index > TODAY],",
|
||||
"]).sort_index()",
|
||||
"combined = combined[~combined.index.duplicated(keep='first')]",
|
||||
"",
|
||||
"pmc = banister(combined)",
|
||||
"print(f'PMC range: {pmc.index.min().date()} → {pmc.index.max().date()}')",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 7.5), sharex=True)",
|
||||
"",
|
||||
"# Fitness + fatigue",
|
||||
"ax1.plot(pmc.index, pmc['CTL'], color='#2a9d8f', lw=2.2, label='CTL (fitness, 42d)')",
|
||||
"ax1.plot(pmc.index, pmc['ATL'], color='#e76f51', lw=1.2, alpha=0.85, label='ATL (fatigue, 7d)')",
|
||||
"ax1.axvline(TODAY, color='red', ls=':', lw=1.5)",
|
||||
"ax1.text(TODAY, ax1.get_ylim()[1]*0.95, ' today ', color='red', fontsize=8, va='top')",
|
||||
"ax1.set_ylabel('training load')",
|
||||
"ax1.legend(loc='upper left')",
|
||||
"ax1.grid(alpha=0.3)",
|
||||
"ax1.set_title('Projected Performance Management Chart — historical + plan-based forecast')",
|
||||
"",
|
||||
"# Form (TSB)",
|
||||
"ax2.plot(pmc.index, pmc['TSB'], color='#264653', lw=1.5)",
|
||||
"ax2.axhspan(10, 25, color='#2a9d8f', alpha=0.12, label='race-ready (+10 to +25)')",
|
||||
"ax2.axhspan(-30, -10, color='#e9c46a', alpha=0.12, label='productive overload (−30 to −10)')",
|
||||
"ax2.axhline(0, color='gray', lw=0.6)",
|
||||
"ax2.axhline(-30, color='#e76f51', ls='--', lw=0.8)",
|
||||
"ax2.axvline(TODAY, color='red', ls=':', lw=1.5)",
|
||||
"",
|
||||
"# Annotate planned races",
|
||||
"for label, d in RACES.items():",
|
||||
" if d in pmc.index:",
|
||||
" tsb = pmc.loc[d, 'TSB']",
|
||||
" ax2.axvline(d, color='#d62828', alpha=0.55, lw=1)",
|
||||
" ax2.scatter([d], [tsb], color='#d62828', s=70, zorder=5)",
|
||||
" ax2.annotate(f\"{label.split('—')[1].strip()}\\nTSB={tsb:+.0f}\",",
|
||||
" xy=(d, tsb), xytext=(8, 12),",
|
||||
" textcoords='offset points', fontsize=8.5, color='#d62828',",
|
||||
" fontweight='bold')",
|
||||
"",
|
||||
"ax2.set_ylabel('TSB (form)')",
|
||||
"ax2.legend(loc='lower left', fontsize=9)",
|
||||
"ax2.grid(alpha=0.3)",
|
||||
"fig.autofmt_xdate()",
|
||||
"fig.tight_layout()",
|
||||
))
|
||||
|
||||
cells.append(code(
|
||||
"# Race-day TSB check",
|
||||
"print('Projected fitness/fatigue/form on each race day:')",
|
||||
"print()",
|
||||
"for label, d in RACES.items():",
|
||||
" if d not in pmc.index:",
|
||||
" print(f' {label}: outside PMC range'); continue",
|
||||
" row = pmc.loc[d]",
|
||||
" tsb = row['TSB']",
|
||||
" if tsb < -30: tag = 'severely fatigued — UNSAFE'",
|
||||
" elif tsb < -10: tag = 'productive overload — not a race-day state'",
|
||||
" elif tsb < 0: tag = 'balanced — slightly underrested'",
|
||||
" elif tsb < 10: tag = 'sharpening — taper short'",
|
||||
" elif tsb < 25: tag = 'fresh / peaked — IDEAL race-day window'",
|
||||
" else: tag = 'detrained — taper too long'",
|
||||
" print(f' {label} {d.date()}')",
|
||||
" print(f' CTL={row[\"CTL\"]:>5.1f} ATL={row[\"ATL\"]:>5.1f} TSB={tsb:>+5.1f} → {tag}')",
|
||||
" print()",
|
||||
"",
|
||||
"# Historical comparison — what TSB did prior 50K races land at?",
|
||||
"print('Historical comparison — TSB on prior 50K races:')",
|
||||
"race_history = pd.to_datetime(['2023-09-23','2024-09-21','2025-09-06','2025-09-20'])",
|
||||
"for rd in race_history:",
|
||||
" if rd in pmc.index:",
|
||||
" print(f' {rd.date()} CTL={pmc.loc[rd,\"CTL\"]:>5.1f} TSB={pmc.loc[rd,\"TSB\"]:+5.1f}')",
|
||||
))
|
||||
|
||||
cells.append(md(
|
||||
"**Reading the chart**",
|
||||
"",
|
||||
"- **CTL slope after today** is what the plan *promises*. A flat or declining CTL means the plan isn't building fitness — usually because volume isn't ramping fast enough (or you've already peaked).",
|
||||
"- **ATL spikes** before each race week are expected — that's the race effort hitting the 7-day window. The taper then lets ATL bleed off faster than CTL.",
|
||||
"- **TSB on race day** is the actionable number. If a race lands below +10, the plan's taper into that race is too short; if above +25, too long.",
|
||||
"- The two earlier races (30K, 50K) are mid-build B-races; **TSB at those races doesn't need to be in the +10–25 sweet spot** — slightly negative is fine, since they're training stimuli, not peak performances.",
|
||||
"- The **50-mile (wk 17)** is the A-race; TSB here is the one that matters. Adjust the wk 14–17 plan rows if it's outside +10 to +25.",
|
||||
))
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
cells.append(md(
|
||||
"## Race-specific notes",
|
||||
"",
|
||||
"### Wk 4 — 30K (June 13)",
|
||||
"",
|
||||
"- **Role:** longest pre-race long run + first dress rehearsal. Don't taper hard; this is training.",
|
||||
"- **Effort:** controlled aerobic, target avg HR < 155. Negative split is the win.",
|
||||
"- **Rehearse:** pack, shoes, nutrition cadence, salt. Whatever fails here gets fixed before the 50K.",
|
||||
"- **Recovery:** one easy week (wk 5) is enough.",
|
||||
"",
|
||||
"### Wk 10 — 50K (July 25)",
|
||||
"",
|
||||
"- **Role:** real race, but also the calibration run for the 50-mile. Pace it at projected 50-mile effort + 5–10 bpm.",
|
||||
"- **Effort:** controlled. Goal is to finish strong, not PR.",
|
||||
"- **Calibration:** the average HR you can hold for this distance comfortably ≈ your 50-mile ceiling. Note the number. Use it Sep 12.",
|
||||
"- **Recovery:** 2 weeks before resuming hard training (wk 11 deep recovery, wk 12 easy rebuild).",
|
||||
"",
|
||||
"### Wk 17 — 50 MILE (September 12)",
|
||||
"",
|
||||
"- **Pacing rule:** the average HR from the 50K is the *ceiling*, not the target. Start 5–10 bpm below it.",
|
||||
"- **Fueling:** 60–90 g carb/hr from minute 30. Don't skip aid stations.",
|
||||
"- **Walking strategy:** walk every climb from the start. The race is won in the final 20 km by people who walked the first 20 climbs.",
|
||||
"- **Drop-bag essentials:** chafe-prevention, headlamp/spare batteries if late finish, dry socks at midway.",
|
||||
))
|
||||
|
||||
cells.append(md(
|
||||
"## Key sessions to nail",
|
||||
"",
|
||||
"If everything else slips, these workouts move finish probability most:",
|
||||
"",
|
||||
"- **Wk 4 — 30K race.** Validates fueling and kit. A messy 30K is a 50K-fix-it list.",
|
||||
"- **Wk 8 — peak long before 50K** (35 km, optionally B2B 28+15). Last big training stimulus before the calibration race.",
|
||||
"- **Wk 10 — 50K race.** The calibration. Pace data here drives 50-mile pacing.",
|
||||
"- **Wk 14 — B2B peak (40 + 22).** The single most important week. Running on tired legs is the 50-mile race in miniature.",
|
||||
"- **Wk 17 — race week.** Don't add miles. Stay healthy. Show up.",
|
||||
))
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
notebook = {
|
||||
"cells": cells,
|
||||
"metadata": {
|
||||
"kernelspec": {"display_name": ".venv", "language": "python", "name": "python3"},
|
||||
"language_info": {"name": "python"},
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5,
|
||||
}
|
||||
|
||||
out = Path(__file__).parent / "06_race_plan.ipynb"
|
||||
out.write_text(json.dumps(notebook, indent=1))
|
||||
print(f"wrote {out} ({out.stat().st_size:,} bytes, {len(cells)} cells)")
|
||||
181
examples/notebooks/exploration.ipynb
Normal file
181
examples/notebooks/exploration.ipynb
Normal file
@@ -0,0 +1,181 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Garmin data exploration\n",
|
||||
"\n",
|
||||
"Run `uv run auth.py` and `uv run sync.py --full` once before using this notebook.\n",
|
||||
"\n",
|
||||
"In VSCode, pick the `.venv` Python interpreter at the top right of this notebook as the kernel."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sqlite3\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"DB = Path('..') / 'data' / 'garmin.db'\n",
|
||||
"conn = sqlite3.connect(DB)\n",
|
||||
"\n",
|
||||
"# What tables do we have, and how many rows in each?\n",
|
||||
"tables = pd.read_sql(\"SELECT name FROM sqlite_master WHERE type='table' ORDER BY name\", conn)\n",
|
||||
"for t in tables['name']:\n",
|
||||
" n = conn.execute(f'SELECT COUNT(*) FROM {t}').fetchone()[0]\n",
|
||||
" print(f'{t:30s} {n:>6}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Activities — load into pandas"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"activities = pd.read_sql(\n",
|
||||
" \"SELECT activity_id, start_time_local, activity_type, activity_name, \"\n",
|
||||
" \"distance_m, duration_s, avg_hr, max_hr, calories, elevation_gain_m, \"\n",
|
||||
" \"training_load, aerobic_te, anaerobic_te \"\n",
|
||||
" \"FROM activities ORDER BY start_time_local DESC\",\n",
|
||||
" conn,\n",
|
||||
" parse_dates=['start_time_local'],\n",
|
||||
")\n",
|
||||
"activities['distance_km'] = activities['distance_m'] / 1000\n",
|
||||
"activities['duration_min'] = activities['duration_s'] / 60\n",
|
||||
"activities['pace_min_per_km'] = activities['duration_min'] / activities['distance_km']\n",
|
||||
"activities['week'] = activities['start_time_local'].dt.to_period('W').dt.start_time\n",
|
||||
"activities.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"activities['activity_type'].value_counts()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Weekly running mileage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"runs = activities[activities['activity_type'].str.contains('running', case=False, na=False)]\n",
|
||||
"weekly = runs.groupby('week')['distance_km'].sum()\n",
|
||||
"\n",
|
||||
"fig, ax = plt.subplots(figsize=(12, 4))\n",
|
||||
"weekly.plot(kind='bar', ax=ax)\n",
|
||||
"ax.set_ylabel('km')\n",
|
||||
"ax.set_title('Weekly running mileage')\n",
|
||||
"ax.set_xlabel('')\n",
|
||||
"plt.xticks(rotation=45, ha='right')\n",
|
||||
"plt.tight_layout()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Sleep, stress, HRV — daily timeline"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"wellness = pd.read_sql(\n",
|
||||
" \"\"\"\n",
|
||||
" SELECT s.calendar_date,\n",
|
||||
" s.total_steps,\n",
|
||||
" sl.sleep_score,\n",
|
||||
" st.avg_stress,\n",
|
||||
" h.last_night_avg AS hrv,\n",
|
||||
" rh.resting_hr\n",
|
||||
" FROM daily_steps s\n",
|
||||
" LEFT JOIN daily_sleep sl ON sl.calendar_date = s.calendar_date\n",
|
||||
" LEFT JOIN daily_stress st ON st.calendar_date = s.calendar_date\n",
|
||||
" LEFT JOIN daily_hrv h ON h.calendar_date = s.calendar_date\n",
|
||||
" LEFT JOIN daily_resting_hr rh ON rh.calendar_date = s.calendar_date\n",
|
||||
" ORDER BY s.calendar_date\n",
|
||||
" \"\"\",\n",
|
||||
" conn,\n",
|
||||
" parse_dates=['calendar_date'],\n",
|
||||
").set_index('calendar_date')\n",
|
||||
"wellness.tail(14)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fig, axes = plt.subplots(4, 1, figsize=(12, 8), sharex=True)\n",
|
||||
"wellness['sleep_score'].plot(ax=axes[0], title='Sleep score')\n",
|
||||
"wellness['resting_hr'].plot(ax=axes[1], title='Resting HR')\n",
|
||||
"wellness['hrv'].plot(ax=axes[2], title='HRV (last night avg)')\n",
|
||||
"wellness['avg_stress'].plot(ax=axes[3], title='Avg stress')\n",
|
||||
"plt.tight_layout()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Querying the raw JSON\n",
|
||||
"\n",
|
||||
"Every table has a `raw` column with the full Garmin response. Use SQLite's JSON1 functions, or load and `pd.json_normalize`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"raw_rows = pd.read_sql('SELECT activity_id, raw FROM activities LIMIT 5', conn)\n",
|
||||
"expanded = pd.json_normalize([json.loads(r) for r in raw_rows['raw']])\n",
|
||||
"print(f'{len(expanded.columns)} columns in the raw activity payload')\n",
|
||||
"expanded.columns.tolist()[:30]"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user