228 lines
7.4 KiB
Python
228 lines
7.4 KiB
Python
"""Tests for `openrun.ingest.fit_linker`.
|
|
|
|
We don't exercise the FIT-parsing path here — the linker's responsibilities at
|
|
the DB boundary are (1) match a session timestamp to an activity, (2) store
|
|
absolute paths, (3) rewrite them when an export moves, and (4) refuse to
|
|
silently overwrite an already-linked activity. All four are testable without
|
|
a real .fit fixture.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
from openrun.ingest.fit_linker import (
|
|
_match_activity,
|
|
link,
|
|
record_link,
|
|
relink,
|
|
)
|
|
|
|
|
|
def _seed_activity(conn, aid: int, when: datetime) -> None:
|
|
conn.execute(
|
|
"INSERT INTO activities (activity_id, start_time_gmt, raw, fetched_at) VALUES (?, ?, '{}', 'now')",
|
|
(aid, when.astimezone(timezone.utc).isoformat().replace("+00:00", "")),
|
|
)
|
|
conn.commit()
|
|
|
|
|
|
def _row(conn, aid: int) -> tuple[int, str]:
|
|
return conn.execute(
|
|
"SELECT activity_id, fit_path FROM activity_fit_files WHERE activity_id = ?",
|
|
(aid,),
|
|
).fetchone()
|
|
|
|
|
|
def test_record_link_stores_absolute_path(tmp_conn, tmp_path: Path) -> None:
|
|
"""A relative path passed to record_link is resolved to absolute on disk."""
|
|
fit = tmp_path / "sub" / "run.fit"
|
|
fit.parent.mkdir(parents=True)
|
|
fit.write_bytes(b"") # contents irrelevant — record_link doesn't parse
|
|
|
|
# Pass a relative path on purpose to verify resolution.
|
|
rel = fit.relative_to(tmp_path)
|
|
cwd_before = Path.cwd()
|
|
import os
|
|
os.chdir(tmp_path)
|
|
try:
|
|
record_link(tmp_conn, 1001, Path(rel))
|
|
finally:
|
|
os.chdir(cwd_before)
|
|
tmp_conn.commit()
|
|
|
|
stored = _row(tmp_conn, 1001)["fit_path"]
|
|
assert Path(stored).is_absolute(), f"expected absolute, got {stored}"
|
|
assert Path(stored) == fit.resolve()
|
|
|
|
|
|
def test_record_link_is_idempotent(tmp_conn, tmp_path: Path) -> None:
|
|
fit = tmp_path / "run.fit"
|
|
fit.write_bytes(b"")
|
|
record_link(tmp_conn, 42, fit)
|
|
record_link(tmp_conn, 42, fit)
|
|
tmp_conn.commit()
|
|
|
|
count = tmp_conn.execute(
|
|
"SELECT COUNT(*) FROM activity_fit_files WHERE activity_id = 42"
|
|
).fetchone()[0]
|
|
assert count == 1
|
|
|
|
|
|
def test_relink_rewrites_paths_by_basename(tmp_conn, tmp_path: Path) -> None:
|
|
"""After moving an export, `relink` should update stored absolute paths to the new root."""
|
|
old_root = tmp_path / "old"
|
|
new_root = tmp_path / "new"
|
|
(old_root / "fit").mkdir(parents=True)
|
|
(new_root / "fit").mkdir(parents=True)
|
|
|
|
# Two activities, one of which will move; one filename has no counterpart in new_root.
|
|
fit_a_old = old_root / "fit" / "a.fit"
|
|
fit_b_old = old_root / "fit" / "b.fit"
|
|
fit_a_old.write_bytes(b"")
|
|
fit_b_old.write_bytes(b"")
|
|
record_link(tmp_conn, 1, fit_a_old)
|
|
record_link(tmp_conn, 2, fit_b_old)
|
|
tmp_conn.commit()
|
|
|
|
fit_a_new = new_root / "fit" / "a.fit"
|
|
fit_a_new.write_bytes(b"")
|
|
# Intentionally leave `b.fit` absent from new_root to test the unmatched path.
|
|
|
|
updated, unmatched = relink(tmp_conn, new_root)
|
|
assert updated == 1
|
|
assert unmatched == 1
|
|
|
|
a_after = _row(tmp_conn, 1)["fit_path"]
|
|
b_after = _row(tmp_conn, 2)["fit_path"]
|
|
assert Path(a_after) == fit_a_new.resolve()
|
|
# Unmatched row left untouched, still pointing at the old (now non-existent) location.
|
|
assert Path(b_after) == fit_b_old.resolve()
|
|
|
|
|
|
def test_relink_rejects_non_directory(tmp_conn, tmp_path: Path) -> None:
|
|
missing = tmp_path / "does_not_exist"
|
|
try:
|
|
relink(tmp_conn, missing)
|
|
except FileNotFoundError as exc:
|
|
assert "not a directory" in str(exc)
|
|
else:
|
|
raise AssertionError("expected FileNotFoundError for missing relink root")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _match_activity — pure function, no DB
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_match_activity_within_tolerance() -> None:
|
|
index = {1000: 42, 2000: 99}
|
|
keys = sorted(index)
|
|
assert _match_activity(1030, keys, index, tolerance_s=60) == 42
|
|
|
|
|
|
def test_match_activity_picks_closest() -> None:
|
|
"""Two candidates both within tolerance → closest one wins."""
|
|
index = {1000: 42, 1050: 99}
|
|
keys = sorted(index)
|
|
# 1030 is 30s after 1000 and 20s before 1050 → picks 1050
|
|
assert _match_activity(1030, keys, index, tolerance_s=60) == 99
|
|
|
|
|
|
def test_match_activity_outside_tolerance_returns_none() -> None:
|
|
index = {1000: 42}
|
|
keys = sorted(index)
|
|
assert _match_activity(1500, keys, index, tolerance_s=60) is None
|
|
|
|
|
|
def test_match_activity_empty_index() -> None:
|
|
assert _match_activity(1000, [], {}, tolerance_s=60) is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# link() collision handling — uses fit_iter injection so we never parse a FIT
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_link_writes_match(tmp_conn, tmp_path: Path) -> None:
|
|
when = datetime(2026, 5, 1, 12, 0, 0, tzinfo=timezone.utc)
|
|
_seed_activity(tmp_conn, aid=7, when=when)
|
|
|
|
fit = tmp_path / "good.fit"
|
|
fit.write_bytes(b"")
|
|
|
|
summary = link(
|
|
export_root=tmp_path,
|
|
conn=tmp_conn,
|
|
dry_run=False,
|
|
min_size_kb=0,
|
|
tolerance_s=60,
|
|
fit_iter=[(fit, when)],
|
|
)
|
|
|
|
assert summary == {"linked": 1, "unmatched": 0, "parse_failed": 0, "collisions": 0}
|
|
row = tmp_conn.execute(
|
|
"SELECT fit_path FROM activity_fit_files WHERE activity_id = 7"
|
|
).fetchone()
|
|
assert Path(row["fit_path"]) == fit.resolve()
|
|
|
|
|
|
def test_link_warns_and_skips_on_collision(tmp_conn, tmp_path: Path, capsys) -> None:
|
|
"""Two FITs whose session times both match the *same* activity:
|
|
first wins, second is reported as a collision and not written."""
|
|
when = datetime(2026, 5, 1, 12, 0, 0, tzinfo=timezone.utc)
|
|
_seed_activity(tmp_conn, aid=7, when=when)
|
|
|
|
first = tmp_path / "first.fit"
|
|
second = tmp_path / "second.fit"
|
|
first.write_bytes(b"")
|
|
second.write_bytes(b"")
|
|
|
|
summary = link(
|
|
export_root=tmp_path,
|
|
conn=tmp_conn,
|
|
dry_run=False,
|
|
min_size_kb=0,
|
|
tolerance_s=60,
|
|
# Both FITs report the same session start → both match aid=7.
|
|
fit_iter=[(first, when), (second, when)],
|
|
)
|
|
|
|
assert summary["linked"] == 1
|
|
assert summary["collisions"] == 1
|
|
|
|
# The first FIT's path must still be in the table — second must not have clobbered.
|
|
stored = tmp_conn.execute(
|
|
"SELECT fit_path FROM activity_fit_files WHERE activity_id = 7"
|
|
).fetchone()["fit_path"]
|
|
assert Path(stored) == first.resolve()
|
|
|
|
# Collision is reported on stderr with both filenames for diagnosis.
|
|
err = capsys.readouterr().err
|
|
assert "collision" in err
|
|
assert "second.fit" in err
|
|
assert "first.fit" in err
|
|
|
|
|
|
def test_link_counts_parse_failures_and_unmatched(tmp_conn, tmp_path: Path) -> None:
|
|
when = datetime(2026, 5, 1, 12, 0, 0, tzinfo=timezone.utc)
|
|
_seed_activity(tmp_conn, aid=7, when=when)
|
|
|
|
far_off = datetime(2030, 1, 1, tzinfo=timezone.utc)
|
|
parse_fail = tmp_path / "broken.fit"
|
|
orphan = tmp_path / "orphan.fit"
|
|
parse_fail.write_bytes(b"")
|
|
orphan.write_bytes(b"")
|
|
|
|
summary = link(
|
|
export_root=tmp_path,
|
|
conn=tmp_conn,
|
|
dry_run=False,
|
|
min_size_kb=0,
|
|
tolerance_s=60,
|
|
fit_iter=[(parse_fail, None), (orphan, far_off)],
|
|
)
|
|
|
|
assert summary["parse_failed"] == 1
|
|
assert summary["unmatched"] == 1
|
|
assert summary["linked"] == 0
|