499 lines
17 KiB
Python
499 lines
17 KiB
Python
|
|
"""Ingest a Garmin Connect data export (zip or unzipped directory) into SQLite.
|
|||
|
|
|
|||
|
|
Usage:
|
|||
|
|
uv run ingest_export.py path/to/export.zip
|
|||
|
|
uv run ingest_export.py path/to/unzipped_export_dir/
|
|||
|
|
|
|||
|
|
Garmin's export contains a tree like:
|
|||
|
|
DI_CONNECT/
|
|||
|
|
DI-Connect-Fitness/ # activity JSONs + .fit files
|
|||
|
|
DI-Connect-Wellness/ # daily wellness JSONs
|
|||
|
|
DI-Connect-Aggregator/ # rolled-up summaries
|
|||
|
|
DI-Connect-User/ # profile
|
|||
|
|
...
|
|||
|
|
|
|||
|
|
File names vary by account / export date. We dispatch on filename substrings
|
|||
|
|
and log anything unrecognized so you can tell us about new shapes.
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
from __future__ import annotations
|
|||
|
|
|
|||
|
|
import argparse
|
|||
|
|
import json
|
|||
|
|
import sqlite3
|
|||
|
|
import sys
|
|||
|
|
import tempfile
|
|||
|
|
import zipfile
|
|||
|
|
from collections import defaultdict
|
|||
|
|
from datetime import datetime
|
|||
|
|
from pathlib import Path
|
|||
|
|
from typing import Any, Callable, Iterable
|
|||
|
|
|
|||
|
|
from db import connect, set_state
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# helpers
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
def _load_json(path: Path) -> Any:
|
|||
|
|
try:
|
|||
|
|
with path.open() as fh:
|
|||
|
|
return json.load(fh)
|
|||
|
|
except (OSError, json.JSONDecodeError) as exc:
|
|||
|
|
print(f" ! failed to read {path.name}: {exc}", file=sys.stderr)
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _as_list(payload: Any) -> list[dict]:
|
|||
|
|
"""Garmin JSON files are sometimes a list, sometimes a {"key": [...]} envelope."""
|
|||
|
|
if isinstance(payload, list):
|
|||
|
|
return [x for x in payload if isinstance(x, dict)]
|
|||
|
|
if isinstance(payload, dict):
|
|||
|
|
for v in payload.values():
|
|||
|
|
if isinstance(v, list):
|
|||
|
|
return [x for x in v if isinstance(x, dict)]
|
|||
|
|
return [payload]
|
|||
|
|
return []
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _dump(obj: Any) -> str:
|
|||
|
|
return json.dumps(obj, separators=(",", ":"), default=str)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _first(d: dict, *keys: str) -> Any:
|
|||
|
|
"""Pull the first non-null value among candidate keys (Garmin renames fields between exports)."""
|
|||
|
|
for k in keys:
|
|||
|
|
if k in d and d[k] is not None:
|
|||
|
|
return d[k]
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _date_key(d: dict) -> str | None:
|
|||
|
|
"""Find the calendar date in a daily-stats record, normalized to ISO."""
|
|||
|
|
raw = _first(d, "calendarDate", "date", "summaryDate", "statisticsStartDate")
|
|||
|
|
if not raw:
|
|||
|
|
return None
|
|||
|
|
if isinstance(raw, str):
|
|||
|
|
return raw[:10]
|
|||
|
|
return str(raw)[:10]
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# handlers — one per data category
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
def handle_activities(conn: sqlite3.Connection, path: Path) -> int:
|
|||
|
|
payload = _load_json(path)
|
|||
|
|
if payload is None:
|
|||
|
|
return 0
|
|||
|
|
# summarizedActivities format: [{"summarizedActivitiesExport": [{...}, {...}]}]
|
|||
|
|
if isinstance(payload, list) and payload and isinstance(payload[0], dict):
|
|||
|
|
if "summarizedActivitiesExport" in payload[0]:
|
|||
|
|
items = payload[0]["summarizedActivitiesExport"]
|
|||
|
|
else:
|
|||
|
|
items = payload
|
|||
|
|
elif isinstance(payload, dict):
|
|||
|
|
items = _as_list(payload)
|
|||
|
|
else:
|
|||
|
|
items = []
|
|||
|
|
|
|||
|
|
# The Garmin Takeout `summarizedActivities` export uses scaled integer units:
|
|||
|
|
# distance cm → m (÷100)
|
|||
|
|
# duration ms → s (÷1000)
|
|||
|
|
# elevation cm → m (÷100)
|
|||
|
|
# speed m/s ÷ 10 → m/s (×10)
|
|||
|
|
# The live API (`sync.py`) returns these in SI directly, so we only convert
|
|||
|
|
# when the source is this export and the raw values are present in the scaled form.
|
|||
|
|
def _scale(v, factor):
|
|||
|
|
return None if v is None else v * factor
|
|||
|
|
|
|||
|
|
n = 0
|
|||
|
|
for raw in items:
|
|||
|
|
aid = _first(raw, "activityId", "activityIdLocal")
|
|||
|
|
if aid is None:
|
|||
|
|
continue
|
|||
|
|
atype = raw.get("activityType")
|
|||
|
|
if isinstance(atype, dict):
|
|||
|
|
type_key = atype.get("typeKey")
|
|||
|
|
else:
|
|||
|
|
type_key = atype # sometimes a plain string in exports
|
|||
|
|
conn.execute(
|
|||
|
|
"""
|
|||
|
|
INSERT INTO activities (
|
|||
|
|
activity_id, start_time_local, start_time_gmt, activity_type,
|
|||
|
|
activity_name, distance_m, duration_s, moving_duration_s,
|
|||
|
|
avg_speed_mps, max_speed_mps, avg_hr, max_hr, calories,
|
|||
|
|
elevation_gain_m, elevation_loss_m, training_load,
|
|||
|
|
aerobic_te, anaerobic_te, vo2_max, raw, fetched_at
|
|||
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'))
|
|||
|
|
ON CONFLICT(activity_id) DO UPDATE SET
|
|||
|
|
activity_name=excluded.activity_name,
|
|||
|
|
raw=excluded.raw,
|
|||
|
|
fetched_at=excluded.fetched_at
|
|||
|
|
""",
|
|||
|
|
(
|
|||
|
|
aid,
|
|||
|
|
_first(raw, "startTimeLocal", "beginTimestamp"),
|
|||
|
|
_first(raw, "startTimeGmt", "startTimeGMT"),
|
|||
|
|
type_key,
|
|||
|
|
_first(raw, "activityName", "name"),
|
|||
|
|
_scale(_first(raw, "distance"), 0.01),
|
|||
|
|
_scale(_first(raw, "duration"), 0.001),
|
|||
|
|
_scale(_first(raw, "movingDuration"), 0.001),
|
|||
|
|
_scale(_first(raw, "averageSpeed", "avgSpeed"), 10.0),
|
|||
|
|
_scale(_first(raw, "maxSpeed"), 10.0),
|
|||
|
|
_first(raw, "averageHR", "avgHr"),
|
|||
|
|
_first(raw, "maxHR", "maxHr"),
|
|||
|
|
_first(raw, "calories"),
|
|||
|
|
_scale(_first(raw, "elevationGain"), 0.01),
|
|||
|
|
_scale(_first(raw, "elevationLoss"), 0.01),
|
|||
|
|
_first(raw, "activityTrainingLoad"),
|
|||
|
|
_first(raw, "aerobicTrainingEffect"),
|
|||
|
|
_first(raw, "anaerobicTrainingEffect"),
|
|||
|
|
_first(raw, "vO2MaxValue"),
|
|||
|
|
_dump(raw),
|
|||
|
|
),
|
|||
|
|
)
|
|||
|
|
n += 1
|
|||
|
|
conn.commit()
|
|||
|
|
return n
|
|||
|
|
|
|||
|
|
|
|||
|
|
def handle_sleep(conn: sqlite3.Connection, path: Path) -> int:
|
|||
|
|
payload = _load_json(path)
|
|||
|
|
if payload is None:
|
|||
|
|
return 0
|
|||
|
|
n = 0
|
|||
|
|
for item in _as_list(payload):
|
|||
|
|
date_key = _date_key(item)
|
|||
|
|
if not date_key:
|
|||
|
|
continue
|
|||
|
|
conn.execute(
|
|||
|
|
"""INSERT OR REPLACE INTO daily_sleep
|
|||
|
|
(calendar_date, sleep_start_gmt, sleep_end_gmt, deep_s, light_s, rem_s, awake_s, sleep_score, raw, fetched_at)
|
|||
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'))""",
|
|||
|
|
(
|
|||
|
|
date_key,
|
|||
|
|
_first(item, "sleepStartTimestampGMT", "sleepStartTimeGmt"),
|
|||
|
|
_first(item, "sleepEndTimestampGMT", "sleepEndTimeGmt"),
|
|||
|
|
_first(item, "deepSleepSeconds"),
|
|||
|
|
_first(item, "lightSleepSeconds"),
|
|||
|
|
_first(item, "remSleepSeconds"),
|
|||
|
|
_first(item, "awakeSleepSeconds", "awakeSeconds"),
|
|||
|
|
_first(item, "sleepScore", "overallSleepScore", "value"),
|
|||
|
|
_dump(item),
|
|||
|
|
),
|
|||
|
|
)
|
|||
|
|
n += 1
|
|||
|
|
conn.commit()
|
|||
|
|
return n
|
|||
|
|
|
|||
|
|
|
|||
|
|
def handle_steps(conn: sqlite3.Connection, path: Path) -> int:
|
|||
|
|
payload = _load_json(path)
|
|||
|
|
if payload is None:
|
|||
|
|
return 0
|
|||
|
|
n = 0
|
|||
|
|
for item in _as_list(payload):
|
|||
|
|
date_key = _date_key(item)
|
|||
|
|
if not date_key:
|
|||
|
|
continue
|
|||
|
|
conn.execute(
|
|||
|
|
"""INSERT OR REPLACE INTO daily_steps
|
|||
|
|
(calendar_date, total_steps, step_goal, distance_m, raw, fetched_at)
|
|||
|
|
VALUES (?, ?, ?, ?, ?, datetime('now'))""",
|
|||
|
|
(
|
|||
|
|
date_key,
|
|||
|
|
_first(item, "totalSteps", "steps"),
|
|||
|
|
_first(item, "stepGoal", "dailyStepGoal"),
|
|||
|
|
_first(item, "totalDistance", "distance"),
|
|||
|
|
_dump(item),
|
|||
|
|
),
|
|||
|
|
)
|
|||
|
|
n += 1
|
|||
|
|
conn.commit()
|
|||
|
|
return n
|
|||
|
|
|
|||
|
|
|
|||
|
|
def handle_stress(conn: sqlite3.Connection, path: Path) -> int:
|
|||
|
|
payload = _load_json(path)
|
|||
|
|
if payload is None:
|
|||
|
|
return 0
|
|||
|
|
n = 0
|
|||
|
|
for item in _as_list(payload):
|
|||
|
|
date_key = _date_key(item)
|
|||
|
|
if not date_key:
|
|||
|
|
continue
|
|||
|
|
conn.execute(
|
|||
|
|
"""INSERT OR REPLACE INTO daily_stress
|
|||
|
|
(calendar_date, avg_stress, max_stress, raw, fetched_at)
|
|||
|
|
VALUES (?, ?, ?, ?, datetime('now'))""",
|
|||
|
|
(
|
|||
|
|
date_key,
|
|||
|
|
_first(item, "overallStressLevel", "averageStressLevel", "avgStress"),
|
|||
|
|
_first(item, "maxStressLevel", "maxStress"),
|
|||
|
|
_dump(item),
|
|||
|
|
),
|
|||
|
|
)
|
|||
|
|
n += 1
|
|||
|
|
conn.commit()
|
|||
|
|
return n
|
|||
|
|
|
|||
|
|
|
|||
|
|
def handle_hrv(conn: sqlite3.Connection, path: Path) -> int:
|
|||
|
|
payload = _load_json(path)
|
|||
|
|
if payload is None:
|
|||
|
|
return 0
|
|||
|
|
n = 0
|
|||
|
|
for item in _as_list(payload):
|
|||
|
|
date_key = _date_key(item)
|
|||
|
|
if not date_key:
|
|||
|
|
continue
|
|||
|
|
conn.execute(
|
|||
|
|
"""INSERT OR REPLACE INTO daily_hrv
|
|||
|
|
(calendar_date, weekly_avg, last_night_avg, last_night_5min, status, raw, fetched_at)
|
|||
|
|
VALUES (?, ?, ?, ?, ?, ?, datetime('now'))""",
|
|||
|
|
(
|
|||
|
|
date_key,
|
|||
|
|
_first(item, "weeklyAvg"),
|
|||
|
|
_first(item, "lastNightAvg"),
|
|||
|
|
_first(item, "lastNight5MinHigh"),
|
|||
|
|
_first(item, "status"),
|
|||
|
|
_dump(item),
|
|||
|
|
),
|
|||
|
|
)
|
|||
|
|
n += 1
|
|||
|
|
conn.commit()
|
|||
|
|
return n
|
|||
|
|
|
|||
|
|
|
|||
|
|
def handle_resting_hr(conn: sqlite3.Connection, path: Path) -> int:
|
|||
|
|
payload = _load_json(path)
|
|||
|
|
if payload is None:
|
|||
|
|
return 0
|
|||
|
|
n = 0
|
|||
|
|
for item in _as_list(payload):
|
|||
|
|
date_key = _date_key(item)
|
|||
|
|
if not date_key:
|
|||
|
|
continue
|
|||
|
|
rhr = _first(item, "restingHeartRate", "value")
|
|||
|
|
if rhr is None:
|
|||
|
|
continue
|
|||
|
|
conn.execute(
|
|||
|
|
"""INSERT OR REPLACE INTO daily_resting_hr
|
|||
|
|
(calendar_date, resting_hr, raw, fetched_at)
|
|||
|
|
VALUES (?, ?, ?, datetime('now'))""",
|
|||
|
|
(date_key, rhr, _dump(item)),
|
|||
|
|
)
|
|||
|
|
n += 1
|
|||
|
|
conn.commit()
|
|||
|
|
return n
|
|||
|
|
|
|||
|
|
|
|||
|
|
def handle_intensity_minutes(conn: sqlite3.Connection, path: Path) -> int:
|
|||
|
|
payload = _load_json(path)
|
|||
|
|
if payload is None:
|
|||
|
|
return 0
|
|||
|
|
n = 0
|
|||
|
|
for item in _as_list(payload):
|
|||
|
|
date_key = _date_key(item)
|
|||
|
|
if not date_key:
|
|||
|
|
continue
|
|||
|
|
conn.execute(
|
|||
|
|
"""INSERT OR REPLACE INTO daily_intensity_minutes
|
|||
|
|
(calendar_date, moderate_minutes, vigorous_minutes, raw, fetched_at)
|
|||
|
|
VALUES (?, ?, ?, ?, datetime('now'))""",
|
|||
|
|
(
|
|||
|
|
date_key,
|
|||
|
|
_first(item, "moderateIntensityMinutes", "moderateValue"),
|
|||
|
|
_first(item, "vigorousIntensityMinutes", "vigorousValue"),
|
|||
|
|
_dump(item),
|
|||
|
|
),
|
|||
|
|
)
|
|||
|
|
n += 1
|
|||
|
|
conn.commit()
|
|||
|
|
return n
|
|||
|
|
|
|||
|
|
|
|||
|
|
def handle_body_battery(conn: sqlite3.Connection, path: Path) -> int:
|
|||
|
|
payload = _load_json(path)
|
|||
|
|
if payload is None:
|
|||
|
|
return 0
|
|||
|
|
n = 0
|
|||
|
|
for item in _as_list(payload):
|
|||
|
|
date_key = _date_key(item)
|
|||
|
|
if not date_key:
|
|||
|
|
continue
|
|||
|
|
conn.execute(
|
|||
|
|
"""INSERT OR REPLACE INTO daily_body_battery
|
|||
|
|
(calendar_date, charged, drained, highest, lowest, raw, fetched_at)
|
|||
|
|
VALUES (?, ?, ?, ?, ?, ?, datetime('now'))""",
|
|||
|
|
(
|
|||
|
|
date_key,
|
|||
|
|
_first(item, "charged", "bodyBatteryChargedValue"),
|
|||
|
|
_first(item, "drained", "bodyBatteryDrainedValue"),
|
|||
|
|
_first(item, "highest", "highestBatteryLevel", "bodyBatteryHighestValue"),
|
|||
|
|
_first(item, "lowest", "lowestBatteryLevel", "bodyBatteryLowestValue"),
|
|||
|
|
_dump(item),
|
|||
|
|
),
|
|||
|
|
)
|
|||
|
|
n += 1
|
|||
|
|
conn.commit()
|
|||
|
|
return n
|
|||
|
|
|
|||
|
|
|
|||
|
|
def handle_fit(conn: sqlite3.Connection, path: Path, export_root: Path) -> int:
|
|||
|
|
"""Index .fit file location by activity ID. Don't parse the binary here.
|
|||
|
|
|
|||
|
|
Garmin uses a few filename formats — handle both the classic `<id>_<name>.fit`
|
|||
|
|
and the Takeout variant `<email>_<id>.fit` by picking the first 8+ digit chunk.
|
|||
|
|
"""
|
|||
|
|
aid = None
|
|||
|
|
for chunk in path.stem.split("_"):
|
|||
|
|
if chunk.isdigit() and len(chunk) >= 8:
|
|||
|
|
aid = int(chunk)
|
|||
|
|
break
|
|||
|
|
if aid is None:
|
|||
|
|
# filename has no recognisable activity-id chunk
|
|||
|
|
return 0
|
|||
|
|
# Verify the parsed number is actually an activity_id we know about.
|
|||
|
|
# The Garmin Takeout dump uses *upload IDs* in FIT filenames (different ID space
|
|||
|
|
# from activity IDs), so naive insert would create thousands of orphaned rows.
|
|||
|
|
# If the id isn't in `activities`, skip — link_fit_files.py handles the
|
|||
|
|
# by-content matching for takeout-format exports.
|
|||
|
|
row = conn.execute(
|
|||
|
|
"SELECT 1 FROM activities WHERE activity_id = ? LIMIT 1", (aid,)
|
|||
|
|
).fetchone()
|
|||
|
|
if row is None:
|
|||
|
|
return 0
|
|||
|
|
rel = path.relative_to(export_root)
|
|||
|
|
conn.execute(
|
|||
|
|
"""INSERT OR REPLACE INTO activity_fit_files (activity_id, fit_path, indexed_at)
|
|||
|
|
VALUES (?, ?, datetime('now'))""",
|
|||
|
|
(aid, str(rel)),
|
|||
|
|
)
|
|||
|
|
conn.commit()
|
|||
|
|
return 1
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# dispatch — pattern → handler
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
Handler = Callable[[sqlite3.Connection, Path], int]
|
|||
|
|
|
|||
|
|
# Order matters — first match wins. Use lowercased filename substrings.
|
|||
|
|
DISPATCH: list[tuple[str, str, Handler]] = [
|
|||
|
|
("summarizedActivities", "activities", handle_activities),
|
|||
|
|
("sleepData", "sleep", handle_sleep),
|
|||
|
|
("sleep", "sleep", handle_sleep),
|
|||
|
|
("UDSFile", "steps", handle_steps), # daily steps file naming varies
|
|||
|
|
("step", "steps", handle_steps),
|
|||
|
|
("stressLevel", "stress", handle_stress),
|
|||
|
|
("stress", "stress", handle_stress),
|
|||
|
|
("hrvStatus", "hrv", handle_hrv),
|
|||
|
|
("hrv", "hrv", handle_hrv),
|
|||
|
|
("restingHeart", "resting_hr", handle_resting_hr),
|
|||
|
|
("RHR", "resting_hr", handle_resting_hr),
|
|||
|
|
("intensityMinute", "intensity", handle_intensity_minutes),
|
|||
|
|
("bodyBattery", "body_batt", handle_body_battery),
|
|||
|
|
("BBS", "body_batt", handle_body_battery),
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
|
|||
|
|
def classify(name: str) -> tuple[str, Handler] | None:
|
|||
|
|
lower = name.lower()
|
|||
|
|
for needle, label, fn in DISPATCH:
|
|||
|
|
if needle.lower() in lower:
|
|||
|
|
return label, fn
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# entry
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
def iter_files(root: Path) -> Iterable[Path]:
|
|||
|
|
for p in root.rglob("*"):
|
|||
|
|
if p.is_file():
|
|||
|
|
yield p
|
|||
|
|
|
|||
|
|
|
|||
|
|
def main() -> None:
|
|||
|
|
parser = argparse.ArgumentParser(description=__doc__)
|
|||
|
|
parser.add_argument("source", help="Path to export.zip or unzipped export directory")
|
|||
|
|
parser.add_argument("--dry-run", action="store_true",
|
|||
|
|
help="Show what would be ingested without writing to the DB")
|
|||
|
|
args = parser.parse_args()
|
|||
|
|
|
|||
|
|
src = Path(args.source).expanduser().resolve()
|
|||
|
|
if not src.exists():
|
|||
|
|
sys.exit(f"path does not exist: {src}")
|
|||
|
|
|
|||
|
|
cleanup_dir: tempfile.TemporaryDirectory | None = None
|
|||
|
|
if src.is_file() and src.suffix.lower() == ".zip":
|
|||
|
|
cleanup_dir = tempfile.TemporaryDirectory(prefix="garmin_export_")
|
|||
|
|
export_root = Path(cleanup_dir.name)
|
|||
|
|
print(f"unzipping {src.name} → {export_root}")
|
|||
|
|
with zipfile.ZipFile(src) as zf:
|
|||
|
|
zf.extractall(export_root)
|
|||
|
|
elif src.is_dir():
|
|||
|
|
export_root = src
|
|||
|
|
else:
|
|||
|
|
sys.exit(f"unsupported source: {src}")
|
|||
|
|
|
|||
|
|
conn = connect()
|
|||
|
|
counts: dict[str, int] = defaultdict(int)
|
|||
|
|
unknown: list[str] = []
|
|||
|
|
fit_count = 0
|
|||
|
|
|
|||
|
|
for path in iter_files(export_root):
|
|||
|
|
if path.suffix.lower() == ".fit":
|
|||
|
|
if not args.dry_run:
|
|||
|
|
fit_count += handle_fit(conn, path, export_root)
|
|||
|
|
else:
|
|||
|
|
fit_count += 1
|
|||
|
|
continue
|
|||
|
|
if path.suffix.lower() != ".json":
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
matched = classify(path.name)
|
|||
|
|
if not matched:
|
|||
|
|
unknown.append(str(path.relative_to(export_root)))
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
label, fn = matched
|
|||
|
|
if args.dry_run:
|
|||
|
|
counts[label] += 1
|
|||
|
|
continue
|
|||
|
|
try:
|
|||
|
|
counts[label] += fn(conn, path)
|
|||
|
|
except Exception as exc: # noqa: BLE001
|
|||
|
|
print(f" ! error in {label} handler for {path.name}: {exc}", file=sys.stderr)
|
|||
|
|
|
|||
|
|
print("\n=== ingest summary ===")
|
|||
|
|
for label, n in sorted(counts.items()):
|
|||
|
|
print(f" {label:20s} {n:>6} rows" + (" (file count, dry run)" if args.dry_run else ""))
|
|||
|
|
print(f" fit_files {fit_count:>6}")
|
|||
|
|
if unknown:
|
|||
|
|
print(f"\n unrecognized JSON files ({len(unknown)}):")
|
|||
|
|
for name in unknown[:25]:
|
|||
|
|
print(f" {name}")
|
|||
|
|
if len(unknown) > 25:
|
|||
|
|
print(f" ... and {len(unknown) - 25} more")
|
|||
|
|
|
|||
|
|
if not args.dry_run:
|
|||
|
|
set_state(conn, "last_ingest_utc", datetime.utcnow().isoformat(timespec="seconds"))
|
|||
|
|
set_state(conn, "last_ingest_source", str(src))
|
|||
|
|
conn.commit()
|
|||
|
|
conn.close()
|
|||
|
|
|
|||
|
|
if cleanup_dir:
|
|||
|
|
cleanup_dir.cleanup()
|
|||
|
|
print("\n✓ done")
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
main()
|