58 lines
1.5 KiB
Bash
Executable File
58 lines
1.5 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# DVC pull → decompress .dvc-store/ back into data/.
|
|
#
|
|
# Counterpart: scripts/data-push.sh
|
|
#
|
|
# Usage:
|
|
# ./scripts/data-pull.sh # pull + decompress all
|
|
# ./scripts/data-pull.sh --local # decompress only (skip dvc pull, use existing cache)
|
|
|
|
set -euo pipefail
|
|
|
|
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
|
DATA_DIR="$REPO_ROOT/data"
|
|
STORE_DIR="$REPO_ROOT/.dvc-store"
|
|
SKIP_PULL=false
|
|
[[ "${1:-}" == "--local" ]] && SKIP_PULL=true
|
|
|
|
THREADS=$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4)
|
|
|
|
if ! $SKIP_PULL; then
|
|
echo "=== DVC pull ==="
|
|
cd "$REPO_ROOT"
|
|
# Pull from public HTTP remote (no credentials needed)
|
|
uvx --with 'dvc[s3]' dvc pull -r public
|
|
echo ""
|
|
fi
|
|
|
|
if [[ ! -d "$STORE_DIR" ]]; then
|
|
echo "Error: .dvc-store/ not found — run dvc pull first or check .dvc-store.dvc exists" >&2
|
|
exit 1
|
|
fi
|
|
|
|
echo "=== Decompressing .dvc-store/ → data/ ==="
|
|
echo "Threads: $THREADS"
|
|
echo ""
|
|
|
|
count=0
|
|
|
|
while IFS= read -r -d '' zstfile; do
|
|
relpath="${zstfile#$STORE_DIR/}"
|
|
relpath="${relpath%.zst}" # strip .zst to get original relative path
|
|
dstfile="$DATA_DIR/$relpath"
|
|
dstdir="$(dirname "$dstfile")"
|
|
|
|
# Skip if destination exists and is newer than compressed source
|
|
if [[ -f "$dstfile" && "$dstfile" -nt "$zstfile" ]]; then
|
|
continue
|
|
fi
|
|
|
|
mkdir -p "$dstdir"
|
|
zstd -d -T"$THREADS" -q --force "$zstfile" -o "$dstfile"
|
|
count=$((count + 1))
|
|
done < <(find "$STORE_DIR" -name '*.zst' -type f -print0)
|
|
|
|
echo "Decompressed $count files into data/"
|
|
echo ""
|
|
echo "=== Done ==="
|