"""Tests for `openrun.ingest.fit_linker`. We don't exercise the FIT-parsing path here — the linker's responsibilities at the DB boundary are (1) match a session timestamp to an activity, (2) store absolute paths, (3) rewrite them when an export moves, and (4) refuse to silently overwrite an already-linked activity. All four are testable without a real .fit fixture. """ from __future__ import annotations from datetime import datetime, timezone from pathlib import Path from openrun.ingest.fit_linker import ( _match_activity, link, record_link, relink, ) def _seed_activity(conn, aid: int, when: datetime) -> None: conn.execute( "INSERT INTO activities (activity_id, start_time_gmt, raw, fetched_at) VALUES (?, ?, '{}', 'now')", (aid, when.astimezone(timezone.utc).isoformat().replace("+00:00", "")), ) conn.commit() def _row(conn, aid: int) -> tuple[int, str]: return conn.execute( "SELECT activity_id, fit_path FROM activity_fit_files WHERE activity_id = ?", (aid,), ).fetchone() def test_record_link_stores_absolute_path(tmp_conn, tmp_path: Path) -> None: """A relative path passed to record_link is resolved to absolute on disk.""" fit = tmp_path / "sub" / "run.fit" fit.parent.mkdir(parents=True) fit.write_bytes(b"") # contents irrelevant — record_link doesn't parse # Pass a relative path on purpose to verify resolution. rel = fit.relative_to(tmp_path) cwd_before = Path.cwd() import os os.chdir(tmp_path) try: record_link(tmp_conn, 1001, Path(rel)) finally: os.chdir(cwd_before) tmp_conn.commit() stored = _row(tmp_conn, 1001)["fit_path"] assert Path(stored).is_absolute(), f"expected absolute, got {stored}" assert Path(stored) == fit.resolve() def test_record_link_is_idempotent(tmp_conn, tmp_path: Path) -> None: fit = tmp_path / "run.fit" fit.write_bytes(b"") record_link(tmp_conn, 42, fit) record_link(tmp_conn, 42, fit) tmp_conn.commit() count = tmp_conn.execute( "SELECT COUNT(*) FROM activity_fit_files WHERE activity_id = 42" ).fetchone()[0] assert count == 1 def test_relink_rewrites_paths_by_basename(tmp_conn, tmp_path: Path) -> None: """After moving an export, `relink` should update stored absolute paths to the new root.""" old_root = tmp_path / "old" new_root = tmp_path / "new" (old_root / "fit").mkdir(parents=True) (new_root / "fit").mkdir(parents=True) # Two activities, one of which will move; one filename has no counterpart in new_root. fit_a_old = old_root / "fit" / "a.fit" fit_b_old = old_root / "fit" / "b.fit" fit_a_old.write_bytes(b"") fit_b_old.write_bytes(b"") record_link(tmp_conn, 1, fit_a_old) record_link(tmp_conn, 2, fit_b_old) tmp_conn.commit() fit_a_new = new_root / "fit" / "a.fit" fit_a_new.write_bytes(b"") # Intentionally leave `b.fit` absent from new_root to test the unmatched path. updated, unmatched = relink(tmp_conn, new_root) assert updated == 1 assert unmatched == 1 a_after = _row(tmp_conn, 1)["fit_path"] b_after = _row(tmp_conn, 2)["fit_path"] assert Path(a_after) == fit_a_new.resolve() # Unmatched row left untouched, still pointing at the old (now non-existent) location. assert Path(b_after) == fit_b_old.resolve() def test_relink_rejects_non_directory(tmp_conn, tmp_path: Path) -> None: missing = tmp_path / "does_not_exist" try: relink(tmp_conn, missing) except FileNotFoundError as exc: assert "not a directory" in str(exc) else: raise AssertionError("expected FileNotFoundError for missing relink root") # --------------------------------------------------------------------------- # _match_activity — pure function, no DB # --------------------------------------------------------------------------- def test_match_activity_within_tolerance() -> None: index = {1000: 42, 2000: 99} keys = sorted(index) assert _match_activity(1030, keys, index, tolerance_s=60) == 42 def test_match_activity_picks_closest() -> None: """Two candidates both within tolerance → closest one wins.""" index = {1000: 42, 1050: 99} keys = sorted(index) # 1030 is 30s after 1000 and 20s before 1050 → picks 1050 assert _match_activity(1030, keys, index, tolerance_s=60) == 99 def test_match_activity_outside_tolerance_returns_none() -> None: index = {1000: 42} keys = sorted(index) assert _match_activity(1500, keys, index, tolerance_s=60) is None def test_match_activity_empty_index() -> None: assert _match_activity(1000, [], {}, tolerance_s=60) is None # --------------------------------------------------------------------------- # link() collision handling — uses fit_iter injection so we never parse a FIT # --------------------------------------------------------------------------- def test_link_writes_match(tmp_conn, tmp_path: Path) -> None: when = datetime(2026, 5, 1, 12, 0, 0, tzinfo=timezone.utc) _seed_activity(tmp_conn, aid=7, when=when) fit = tmp_path / "good.fit" fit.write_bytes(b"") summary = link( export_root=tmp_path, conn=tmp_conn, dry_run=False, min_size_kb=0, tolerance_s=60, fit_iter=[(fit, when)], ) assert summary == {"linked": 1, "unmatched": 0, "parse_failed": 0, "collisions": 0} row = tmp_conn.execute( "SELECT fit_path FROM activity_fit_files WHERE activity_id = 7" ).fetchone() assert Path(row["fit_path"]) == fit.resolve() def test_link_warns_and_skips_on_collision(tmp_conn, tmp_path: Path, capsys) -> None: """Two FITs whose session times both match the *same* activity: first wins, second is reported as a collision and not written.""" when = datetime(2026, 5, 1, 12, 0, 0, tzinfo=timezone.utc) _seed_activity(tmp_conn, aid=7, when=when) first = tmp_path / "first.fit" second = tmp_path / "second.fit" first.write_bytes(b"") second.write_bytes(b"") summary = link( export_root=tmp_path, conn=tmp_conn, dry_run=False, min_size_kb=0, tolerance_s=60, # Both FITs report the same session start → both match aid=7. fit_iter=[(first, when), (second, when)], ) assert summary["linked"] == 1 assert summary["collisions"] == 1 # The first FIT's path must still be in the table — second must not have clobbered. stored = tmp_conn.execute( "SELECT fit_path FROM activity_fit_files WHERE activity_id = 7" ).fetchone()["fit_path"] assert Path(stored) == first.resolve() # Collision is reported on stderr with both filenames for diagnosis. err = capsys.readouterr().err assert "collision" in err assert "second.fit" in err assert "first.fit" in err def test_link_counts_parse_failures_and_unmatched(tmp_conn, tmp_path: Path) -> None: when = datetime(2026, 5, 1, 12, 0, 0, tzinfo=timezone.utc) _seed_activity(tmp_conn, aid=7, when=when) far_off = datetime(2030, 1, 1, tzinfo=timezone.utc) parse_fail = tmp_path / "broken.fit" orphan = tmp_path / "orphan.fit" parse_fail.write_bytes(b"") orphan.write_bytes(b"") summary = link( export_root=tmp_path, conn=tmp_conn, dry_run=False, min_size_kb=0, tolerance_s=60, fit_iter=[(parse_fail, None), (orphan, far_off)], ) assert summary["parse_failed"] == 1 assert summary["unmatched"] == 1 assert summary["linked"] == 0