Files
openrun/tests/integration/test_loaders.py
2026-05-19 08:34:22 -04:00

181 lines
6.9 KiB
Python

"""Schema round-trip tests: ingest a synthetic JSON payload through the
real handler, then read it back via the loader and assert the parsed/derived
values are right.
This is the contract test for every table: any future schema or unit-handling
change has to keep these green. Fixtures are inline JSON (anonymised) rather
than pinned files — these tests exercise *parsing*, not realistic data shapes.
"""
from __future__ import annotations
import json
from pathlib import Path
import pandas as pd
import pytest
from openrun.ingest.garmin_export import (
handle_activities,
handle_body_battery,
handle_hrv,
handle_intensity_minutes,
handle_resting_hr,
handle_sleep,
handle_steps,
handle_stress,
)
from openrun.model import (
load_activities,
load_sleep_stages,
load_wellness,
)
def _write_json(tmp_path: Path, name: str, payload) -> Path:
p = tmp_path / name
p.write_text(json.dumps(payload))
return p
def _seed_steps(conn, tmp_path: Path, date: str = "2026-05-04") -> None:
"""`load_wellness` joins everything onto daily_steps, so wellness tests must
seed at least one steps row on the date under test to anchor the join."""
handle_steps(conn, _write_json(tmp_path, "steps_anchor.json",
[{"calendarDate": date, "totalSteps": 1}]))
# ---------------------------------------------------------------------------
# activities — exercises the Takeout scaled-int unit conversion
# ---------------------------------------------------------------------------
def test_activities_roundtrip_takeout_units(tmp_conn, tmp_path: Path) -> None:
"""A Takeout `summarizedActivities` row in scaled-int units must come back
out in SI through `load_activities`, with derived `distance_km` correct."""
payload = [{
"summarizedActivitiesExport": [{
"activityId": 12345678,
"activityName": "Morning Run",
"startTimeLocal": "2026-05-04 06:00:00",
"startTimeGmt": "2026-05-04 10:00:00",
"activityType": {"typeKey": "running"},
"distance": 1_000_000, # 10 000 m, encoded as cm
"duration": 3_600_000, # 3 600 s, encoded as ms
"movingDuration": 3_500_000,
"averageSpeed": 0.2778, # 2.778 m/s stored as m/s ÷ 10
"maxSpeed": 0.4167, # 4.167 m/s stored as m/s ÷ 10
"averageHR": 145,
"maxHR": 168,
"calories": 540,
"elevationGain": 5000, # 50 m, encoded as cm
"elevationLoss": 4800,
"activityTrainingLoad": 110.0,
"aerobicTrainingEffect": 3.5,
"anaerobicTrainingEffect": 0.4,
"vO2MaxValue": 52.0,
}]
}]
n = handle_activities(tmp_conn, _write_json(tmp_path, "activities.json", payload))
assert n == 1
runs = load_activities(tmp_conn, type="running")
assert len(runs) == 1
row = runs.iloc[0]
assert row["activity_id"] == 12345678
assert row["distance_m"] == pytest.approx(10_000.0)
assert row["distance_km"] == pytest.approx(10.0)
assert row["duration_s"] == pytest.approx(3600.0)
assert row["pace_min_per_km"] == pytest.approx((3500 / 60) / 10.0, rel=1e-4)
assert row["avg_speed_mps"] == pytest.approx(2.778, rel=1e-3)
assert row["elevation_gain_m"] == pytest.approx(50.0)
assert row["training_load"] == 110.0
# ---------------------------------------------------------------------------
# Wellness daily tables — same shape: handler reads JSON, loader joins
# ---------------------------------------------------------------------------
def test_steps_roundtrip(tmp_conn, tmp_path: Path) -> None:
payload = [{"calendarDate": "2026-05-04", "totalSteps": 12345, "stepGoal": 10000, "totalDistance": 9234.5}]
handle_steps(tmp_conn, _write_json(tmp_path, "steps.json", payload))
df = load_wellness(tmp_conn)
assert len(df) == 1
row = df.loc[pd.Timestamp("2026-05-04")]
assert row["total_steps"] == 12345
def test_sleep_roundtrip_through_load_sleep_stages(tmp_conn, tmp_path: Path) -> None:
"""handle_sleep → daily_sleep → load_sleep_stages derived columns."""
payload = [{
"calendarDate": "2026-05-04",
"deepSleepSeconds": 3600,
"lightSleepSeconds": 10800,
"remSleepSeconds": 5400,
"awakeSleepSeconds": 1800,
"sleepScore": 78,
}]
handle_sleep(tmp_conn, _write_json(tmp_path, "sleep.json", payload))
df = load_sleep_stages(tmp_conn)
row = df.loc[pd.Timestamp("2026-05-04")]
assert row["sleep_score"] == 78
assert row["sleep_hours"] == pytest.approx((3600 + 10800 + 5400) / 3600.0)
# The invariant we lock in test_sleep_stages.py — duplicated here to assert
# the *roundtrip* preserves it, not just the in-memory frame.
assert row["deep_pct"] + row["light_pct"] + row["rem_pct"] == pytest.approx(1.0)
def test_stress_roundtrip(tmp_conn, tmp_path: Path) -> None:
_seed_steps(tmp_conn, tmp_path)
payload = [{"calendarDate": "2026-05-04", "overallStressLevel": 42, "maxStressLevel": 88}]
handle_stress(tmp_conn, _write_json(tmp_path, "stress.json", payload))
assert load_wellness(tmp_conn).loc[pd.Timestamp("2026-05-04"), "avg_stress"] == 42
def test_hrv_roundtrip(tmp_conn, tmp_path: Path) -> None:
_seed_steps(tmp_conn, tmp_path)
payload = [{
"calendarDate": "2026-05-04",
"weeklyAvg": 55.0,
"lastNightAvg": 60.0,
"lastNight5MinHigh": 72.0,
"status": "BALANCED",
}]
handle_hrv(tmp_conn, _write_json(tmp_path, "hrv.json", payload))
row = load_wellness(tmp_conn).loc[pd.Timestamp("2026-05-04")]
assert row["hrv_last_night"] == 60.0
assert row["hrv_weekly"] == 55.0
assert row["hrv_status"] == "BALANCED"
def test_resting_hr_roundtrip(tmp_conn, tmp_path: Path) -> None:
_seed_steps(tmp_conn, tmp_path)
payload = [{"calendarDate": "2026-05-04", "restingHeartRate": 52}]
handle_resting_hr(tmp_conn, _write_json(tmp_path, "rhr.json", payload))
assert load_wellness(tmp_conn).loc[pd.Timestamp("2026-05-04"), "resting_hr"] == 52
def test_intensity_minutes_roundtrip(tmp_conn, tmp_path: Path) -> None:
_seed_steps(tmp_conn, tmp_path)
payload = [{"calendarDate": "2026-05-04", "moderateIntensityMinutes": 30, "vigorousIntensityMinutes": 15}]
handle_intensity_minutes(tmp_conn, _write_json(tmp_path, "im.json", payload))
row = load_wellness(tmp_conn).loc[pd.Timestamp("2026-05-04")]
assert row["moderate_minutes"] == 30
assert row["vigorous_minutes"] == 15
def test_body_battery_roundtrip(tmp_conn, tmp_path: Path) -> None:
_seed_steps(tmp_conn, tmp_path)
payload = [{
"calendarDate": "2026-05-04",
"charged": 60,
"drained": 45,
"highest": 95,
"lowest": 20,
}]
handle_body_battery(tmp_conn, _write_json(tmp_path, "bb.json", payload))
row = load_wellness(tmp_conn).loc[pd.Timestamp("2026-05-04")]
assert row["bb_charged"] == 60
assert row["bb_lowest"] == 20