branch: main
push-test.sh
10704 bytesRaw
#!/usr/bin/env bash
#
# push-test.sh — End-to-end incremental push test for ripgit.
#
# Clones a repo (if needed), configures the ripgit remote, deletes the remote
# repo to start clean, enables bulk mode, pushes incrementally, rebuilds
# indexes, and verifies.
#
# Usage:
# ./scripts/push-test.sh [options]
#
# Options:
# -r, --repo PATH Local repo path (default: ../openclaw)
# -n, --name NAME Repo name on ripgit (default: basename of repo path)
# -u, --owner NAME Owner name on ripgit (default: test)
# -t, --token TOKEN Access token (from /settings). Embeds in git URL, sent as Bearer for curl.
# -o, --origin URL Git clone URL (used if local repo doesn't exist)
# -s, --step SIZE First-parent commits per push (default: 200)
# -w, --worker URL Worker base URL (default: https://ripgit.stevej.workers.dev)
# -b, --branch BRANCH Branch to push (default: main)
# --no-delete Don't delete the remote repo first (resume mode)
# --no-rebuild Skip post-push index rebuilds
# --skip-to N Skip first N checkpoints (resume from checkpoint N+1)
# -h, --help Show this help
#
# Examples:
# # Fresh push of openclaw with 200-commit steps
# ./scripts/push-test.sh -r ../openclaw -s 200
#
# # Resume from checkpoint 50 (e.g., after fixing a bug mid-push)
# ./scripts/push-test.sh -r ../openclaw --no-delete --skip-to 50
#
# # Push a different repo
# ./scripts/push-test.sh -r /tmp/myrepo -o https://github.com/user/myrepo -s 100
#
set -euo pipefail
# --- Defaults ---
REPO_PATH="../curl"
REPO_NAME=""
OWNER="test"
TOKEN=""
ORIGIN_URL=""
STEP=200
WORKER_URL="https://git.theagents.company"
BRANCH="main"
DO_DELETE=false
DO_REBUILD=true
SKIP_TO=0
# --- Parse args ---
while [[ $# -gt 0 ]]; do
case $1 in
-r|--repo) REPO_PATH="$2"; shift 2 ;;
-n|--name) REPO_NAME="$2"; shift 2 ;;
-u|--owner) OWNER="$2"; shift 2 ;;
-t|--token) TOKEN="$2"; shift 2 ;;
-o|--origin) ORIGIN_URL="$2"; shift 2 ;;
-s|--step) STEP="$2"; shift 2 ;;
-w|--worker) WORKER_URL="$2"; shift 2 ;;
-b|--branch) BRANCH="$2"; shift 2 ;;
--no-tail) shift ;; # no-op, kept for backwards compat
--no-delete) DO_DELETE=false; shift ;;
--no-rebuild) DO_REBUILD=false; shift ;;
--skip-to) SKIP_TO="$2"; shift 2 ;;
-h|--help)
sed -n '2,/^$/{ s/^# //; s/^#//; p }' "$0"
exit 0
;;
*) echo "Unknown option: $1"; exit 1 ;;
esac
done
# Resolve repo path to absolute
REPO_PATH="$(cd "$(dirname "$REPO_PATH")" && pwd)/$(basename "$REPO_PATH")"
# Default repo name from path
if [[ -z "$REPO_NAME" ]]; then
REPO_NAME="$(basename "$REPO_PATH")"
fi
BASE_URL="${WORKER_URL}/${OWNER}/${REPO_NAME}"
REMOTE_NAME="ripgit"
# Git needs credentials embedded in the URL.
# curl uses a Bearer header so the token isn't logged in process lists.
if [[ -n "$TOKEN" ]]; then
SCHEME="${WORKER_URL%%://*}"
HOST="${WORKER_URL#*://}"
GIT_URL="${SCHEME}://${OWNER}:${TOKEN}@${HOST}/${OWNER}/${REPO_NAME}"
CURL_OPTS=(-H "Authorization: Bearer ${TOKEN}")
else
GIT_URL="${BASE_URL}"
CURL_OPTS=()
warn "No --token provided. Push will fail if auth is required."
fi
# --- Colors ---
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
CYAN='\033[0;36m'
BOLD='\033[1m'
NC='\033[0m'
log() { echo -e "${CYAN}[ripgit]${NC} $*"; }
ok() { echo -e "${GREEN}[ ok ]${NC} $*"; }
warn() { echo -e "${YELLOW}[ warn ]${NC} $*"; }
fail() { echo -e "${RED}[FAIL ]${NC} $*"; }
# --- Cleanup on exit ---
TAIL_PID=""
cleanup() {
if [[ -n "$TAIL_PID" ]]; then
kill "$TAIL_PID" 2>/dev/null || true
wait "$TAIL_PID" 2>/dev/null || true
fi
}
trap cleanup EXIT
# --- 1. Ensure local repo exists ---
if [[ ! -d "$REPO_PATH/.git" ]]; then
if [[ -z "$ORIGIN_URL" ]]; then
fail "Local repo not found at $REPO_PATH and no --origin URL provided"
exit 1
fi
log "Cloning $ORIGIN_URL -> $REPO_PATH ..."
git clone "$ORIGIN_URL" "$REPO_PATH"
fi
ok "Local repo: $REPO_PATH"
# --- 3. Configure ripgit remote ---
cd "$REPO_PATH"
if git remote get-url "$REMOTE_NAME" &>/dev/null; then
CURRENT_URL=$(git remote get-url "$REMOTE_NAME")
if [[ "$CURRENT_URL" != "$GIT_URL" ]]; then
git remote set-url "$REMOTE_NAME" "$GIT_URL"
warn "Updated remote $REMOTE_NAME URL"
fi
else
git remote add "$REMOTE_NAME" "$GIT_URL"
fi
ok "Remote: $REMOTE_NAME -> ${BASE_URL}" # log without token
# --- 4. Gather commit info ---
TOTAL_FP=$(git rev-list --first-parent --count "$BRANCH")
TOTAL_ALL=$(git rev-list --count "$BRANCH")
log "Branch $BRANCH: $TOTAL_FP first-parent commits ($TOTAL_ALL total), step size $STEP"
# Build checkpoint list
CHECKPOINTS_FILE=$(mktemp)
git log --first-parent --reverse --format='%H' "$BRANCH" \
| awk "NR % $STEP == 0" > "$CHECKPOINTS_FILE"
NUM_CHECKPOINTS=$(wc -l < "$CHECKPOINTS_FILE" | tr -d ' ')
log "Generated $NUM_CHECKPOINTS checkpoints (every ${STEP} fp commits)"
# --- 5. Delete remote repo ---
if $DO_DELETE; then
log "Deleting remote repo $REPO_NAME ..."
RESULT=$(curl -s "${CURL_OPTS[@]}" -X DELETE "${BASE_URL}/")
if [[ "$RESULT" == "deleted" ]]; then
ok "Remote repo deleted"
else
warn "Delete response: $RESULT"
fi
fi
# --- 6. Enable bulk mode ---
log "Setting skip_fts=1 (bulk mode) ..."
curl -s "${CURL_OPTS[@]}" -X PUT "${BASE_URL}/admin/config?key=skip_fts&value=1" > /dev/null
ok "Bulk mode enabled"
# Reset remote tracking
git fetch "$REMOTE_NAME" 2>/dev/null || true
# --- 7. Push checkpoints ---
MAX_PACK_BYTES=30000000 # 30 MB — keeps well within 128 MB DO memory limit
# push_sha: push a single SHA, return 0 on success
push_sha() {
local target_sha="$1"
local label="$2"
RC=0
OUTPUT=$(git push "$REMOTE_NAME" "${target_sha}:refs/heads/${BRANCH}" 2>&1) || RC=$?
if [[ $RC -eq 0 ]]; then
echo -e "${GREEN}OK${NC} ${label}"
PUSH_OK=$((PUSH_OK+1))
return 0
else
echo -e "${RED}FAILED${NC} ${label}"
echo "$OUTPUT" | head -5
return 1
fi
}
# push_range: push from current remote HEAD to target_sha.
# If pack would exceed MAX_PACK_BYTES, recursively split in half.
push_range() {
local target_sha="$1"
local fp_start="$2" # first-parent index of range start
local fp_end="$3" # first-parent index of target
# Get current remote ref
local current_ref
current_ref=$(git rev-parse "refs/remotes/${REMOTE_NAME}/${BRANCH}" 2>/dev/null || echo "")
if [[ -z "$current_ref" ]]; then
# No remote ref yet (first push) — just push directly
push_sha "$target_sha" ""
return $?
fi
# Estimate pack size
local pack_size
pack_size=$(git pack-objects --revs --stdout --thin <<PACKEOF 2>/dev/null | wc -c
${target_sha}
^${current_ref}
PACKEOF
)
pack_size=$(echo "$pack_size" | tr -d ' ')
if [[ "$pack_size" -le "$MAX_PACK_BYTES" ]]; then
# Small enough — push directly
push_sha "$target_sha" "(${pack_size} bytes)"
return $?
fi
# Too large — split in half
local range_size=$(( fp_end - fp_start ))
if [[ "$range_size" -le 1 ]]; then
# Can't split further — try anyway
warn "Single commit with ${pack_size} byte pack, pushing anyway"
push_sha "$target_sha" "(${pack_size} bytes, unsplittable)"
return $?
fi
local mid=$(( fp_start + range_size / 2 ))
local mid_sha
mid_sha=$(sed -n "${mid}p" "$ALL_FP_FILE")
warn "Pack too large ($(( pack_size / 1048576 )) MB) — splitting fp ${fp_start}..${fp_end} at fp ${mid}"
push_range "$mid_sha" "$fp_start" "$mid" || return 1
# Fetch to update remote tracking after the mid-point push
git fetch "$REMOTE_NAME" 2>/dev/null || true
push_range "$target_sha" "$mid" "$fp_end" || return 1
}
log "Starting incremental push (auto-split packs > $(( MAX_PACK_BYTES / 1000000 )) MB)..."
echo ""
PUSH_OK=0
PUSH_FAIL=0
START_TIME=$(date +%s)
i=0
PREV_FP=0
# Keep the full fp commit list for splitting
ALL_FP_FILE=/tmp/openclaw_fp_commits.txt
if [[ ! -f "$ALL_FP_FILE" ]]; then
git log --first-parent --reverse --format='%H' "$BRANCH" > "$ALL_FP_FILE"
fi
while IFS= read -r sha; do
i=$((i+1))
if [[ $i -le $SKIP_TO ]]; then
PREV_FP=$((i * STEP))
continue
fi
FP_NUM=$((i * STEP))
ELAPSED=$(( $(date +%s) - START_TIME ))
if [[ $PUSH_OK -gt 0 ]]; then
AVG=$(( ELAPSED / PUSH_OK ))
ETA=$(( AVG * (NUM_CHECKPOINTS - i) ))
ETA_MIN=$(( ETA / 60 ))
PROGRESS="[${ELAPSED}s elapsed, ~${ETA_MIN}m remaining]"
else
PROGRESS=""
fi
printf "${BOLD}[%d/%d]${NC} fp %-6d %s " "$i" "$NUM_CHECKPOINTS" "$FP_NUM" "$PROGRESS"
if ! push_range "$sha" "$PREV_FP" "$FP_NUM"; then
PUSH_FAIL=$((PUSH_FAIL+1))
REF_STATE=$(curl -s "${CURL_OPTS[@]}" "${BASE_URL}/refs" 2>/dev/null || echo "unreachable")
fail "Push $i failed. Server refs: $REF_STATE"
fail "Stopping. To resume: $0 --no-delete --skip-to $((i-1)) -r $REPO_PATH -s $STEP"
break
fi
# Update remote tracking after each checkpoint
git fetch "$REMOTE_NAME" 2>/dev/null || true
PREV_FP=$FP_NUM
done < "$CHECKPOINTS_FILE"
rm -f "$CHECKPOINTS_FILE"
echo ""
# --- 8. Final push of HEAD ---
if [[ $PUSH_FAIL -eq 0 ]]; then
log "Pushing HEAD ($BRANCH) ..."
if git push "$REMOTE_NAME" "$BRANCH" 2>&1; then
ok "HEAD pushed"
else
fail "HEAD push failed"
PUSH_FAIL=1
fi
fi
# --- 9. Rebuild indexes ---
if [[ $PUSH_FAIL -eq 0 ]] && $DO_REBUILD; then
echo ""
log "Disabling bulk mode ..."
curl -s "${CURL_OPTS[@]}" -X PUT "${BASE_URL}/admin/config?key=skip_fts&value=0" > /dev/null
ok "Bulk mode disabled"
log "Rebuilding commit graph ..."
RESULT=$(curl -s "${CURL_OPTS[@]}" -X PUT "${BASE_URL}/admin/rebuild-graph")
ok "$RESULT"
log "Rebuilding fts_commits ..."
RESULT=$(curl -s "${CURL_OPTS[@]}" -X PUT "${BASE_URL}/admin/rebuild-fts-commits")
ok "$RESULT"
log "Rebuilding fts_head (code search) ..."
RESULT=$(curl -s "${CURL_OPTS[@]}" -X PUT "${BASE_URL}/admin/rebuild-fts")
ok "$RESULT"
fi
# --- 10. Stats ---
echo ""
ELAPSED=$(( $(date +%s) - START_TIME ))
STATS=$(curl -s "${CURL_OPTS[@]}" "${BASE_URL}/stats")
COMMITS=$(echo "$STATS" | grep -o '"commits":[0-9]*' | cut -d: -f2)
BLOBS=$(echo "$STATS" | grep -o '"blobs":[0-9]*' | cut -d: -f2)
RATIO=$(echo "$STATS" | grep -o '"compression_ratio":[0-9.]*' | cut -d: -f2)
DB_SIZE=$(echo "$STATS" | grep -o '"database_size_bytes":[0-9]*' | cut -d: -f2)
DB_MB=$(( DB_SIZE / 1048576 ))
log "Results:"
echo " Pushes: $PUSH_OK ok, $PUSH_FAIL failed"
echo " Time: ${ELAPSED}s"
echo " Commits: $COMMITS"
echo " Blobs: $BLOBS"
echo " Compression: ${RATIO}x"
echo " DB size: ${DB_MB} MB"
echo ""
if [[ $PUSH_FAIL -gt 0 ]]; then
exit 1
fi