Files
psyc/scripts/deploy.sh
m17hr1l 9c3447723a stage-28 fix: deploy.sh — auto-trust Gitea host (TOFU), never touch identity keys
Reinstating the auto known_hosts entry on first deploy. Clear scope:
host trust (TOFU known_hosts entry) is automated — same as
'ssh -o StrictHostKeyChecking=accept-new' would do; identity keypairs
(~/.ssh/id_*) are never generated/copied/modified by deploy.sh.
PSYC_SKIP_HOST_TRUST=1 disables the auto-trust step if you'd rather
verify fingerprints manually.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-25 16:36:18 +02:00

192 lines
8.5 KiB
Bash
Executable File

#!/usr/bin/env bash
# deploy.sh — sync this branch to the prod box and verify the cockpit is serving.
#
# Usage: scripts/deploy.sh
#
# Env vars (all have defaults — override only if your setup differs):
# PSYC_PROD_HOST default: neuronetz@cloud.neuronetz.ai
# PSYC_PROD_PATH default: /home/neuronetz/docker-public/neuro-psyc
# PSYC_PROD_URL default: https://psyc.neuronetz.ai
# PSYC_PROD_GPU set to 1 to also bring up the inference (GPU) service
# PSYC_GIT_REMOTE default: origin
# PSYC_BRANCH default: the currently checked-out branch
#
# What it does (idempotent — safe to re-run):
# 1. push the current branch to origin
# 2. ssh into prod, clone the repo if missing, pull the branch
# 3. docker compose up -d --build (+ gpu profile if PSYC_PROD_GPU=1)
# 4. probe :8767/healthz on the prod box + the public URL; report state
#
# What it does NOT do:
# • touch .env on the prod box (set keys there once, manually — gitignored)
# • transfer data/ or model artifacts (gitignored; prod fetches its own)
# • configure DNS or TLS (that's the reverse-proxy + acme-companion side)
set -euo pipefail
HOST="${PSYC_PROD_HOST:-neuronetz@cloud.neuronetz.ai}"
REMOTE_PATH="${PSYC_PROD_PATH:-/home/neuronetz/docker-public/neuro-psyc}"
PUBLIC_URL="${PSYC_PROD_URL:-https://psyc.neuronetz.ai}"
GIT_REMOTE="${PSYC_GIT_REMOTE:-origin}"
BRANCH="${PSYC_BRANCH:-$(git rev-parse --abbrev-ref HEAD)}"
WITH_GPU="${PSYC_PROD_GPU:-}"
# ── tty styling ─────────────────────────────────────────────────────────
if [[ -t 1 ]]; then
B=$'\e[1m'; D=$'\e[2m'; R=$'\e[31m'; G=$'\e[32m'; Y=$'\e[33m'; C=$'\e[36m'; Z=$'\e[0m'
else B=; D=; R=; G=; Y=; C=; Z=; fi
say() { printf "%s[deploy]%s %s\n" "$C" "$Z" "$*"; }
ok() { printf "%s[deploy]%s %s%s%s\n" "$G" "$Z" "$G" "$*" "$Z"; }
warn() { printf "%s[deploy]%s %s%s%s\n" "$Y" "$Z" "$Y" "$*" "$Z"; }
fail() { printf "%s[deploy]%s %s%s%s\n" "$R" "$Z" "$R" "$*" "$Z" >&2; exit 1; }
# ── 0. preflight ────────────────────────────────────────────────────────
command -v ssh >/dev/null || fail "ssh not installed locally"
command -v git >/dev/null || fail "git not installed locally"
command -v curl >/dev/null || fail "curl not installed locally"
[[ -d .git ]] || fail "run from the psyc repo root (no .git here)"
if ! git diff --quiet HEAD -- 2>/dev/null || ! git diff --cached --quiet 2>/dev/null; then
warn "local working tree has uncommitted changes — they won't be deployed (git push only sends commits)."
fi
GIT_URL=$(git config --get "remote.${GIT_REMOTE}.url") \
|| fail "no remote '${GIT_REMOTE}' configured locally"
# Parse the git URL to pull out the SSH host + port so the prod box can
# pre-trust the Gitea host key before its first clone. Supports both
# ssh://user@host:port/path and user@host:path
GIT_HOST=""; GIT_PORT="22"
if [[ "$GIT_URL" =~ ^ssh://[^@]+@([^:/]+)(:([0-9]+))?/ ]]; then
GIT_HOST="${BASH_REMATCH[1]}"
[[ -n "${BASH_REMATCH[3]:-}" ]] && GIT_PORT="${BASH_REMATCH[3]}"
elif [[ "$GIT_URL" =~ ^[^@]+@([^:]+): ]]; then
GIT_HOST="${BASH_REMATCH[1]}"
fi
# ── 1. local push ───────────────────────────────────────────────────────
say "pushing ${B}${BRANCH}${Z} to ${B}${GIT_REMOTE}${Z}"
git push "${GIT_REMOTE}" "${BRANCH}" || fail "git push failed — fix and retry"
LOCAL_REV=$(git rev-parse --short HEAD)
ok "pushed ${BRANCH} @ ${LOCAL_REV}"
# ── 2. remote bring-up ──────────────────────────────────────────────────
say "deploying to ${B}${HOST}:${REMOTE_PATH}${Z}"
COMPOSE_PROFILES=""
[[ -n "$WITH_GPU" ]] && COMPOSE_PROFILES="--profile gpu"
# heredoc runs on the prod box. Local vars are interpolated by THIS shell;
# remote vars start with \$ so they're set on the remote side.
ssh -o StrictHostKeyChecking=accept-new -T "${HOST}" bash -s <<REMOTE
set -euo pipefail
HOST_PATH="${REMOTE_PATH}"
BRANCH="${BRANCH}"
GIT_URL="${GIT_URL}"
GIT_HOST="${GIT_HOST}"
GIT_PORT="${GIT_PORT}"
COMPOSE_PROFILES="${COMPOSE_PROFILES}"
prn() { printf ' · %s\n' "\$*"; }
# 2a. trust the Gitea SSH host on first deploy.
#
# Boundary, intentional and narrow:
# • host trust (~/.ssh/known_hosts entry) → AUTO on first run. This is TOFU,
# same as what 'ssh -o StrictHostKeyChecking=accept-new' would do.
# • identity keys (~/.ssh/id_*) → NEVER touched. We won't
# generate, copy, or modify your private/public keypairs.
# Skip the auto-trust by setting PSYC_SKIP_HOST_TRUST=1 on your laptop.
if [[ -n "\$GIT_HOST" && -z "${PSYC_SKIP_HOST_TRUST:-}" ]]; then
mkdir -p ~/.ssh && chmod 700 ~/.ssh
KH_ENTRY="[\$GIT_HOST]:\$GIT_PORT"
if ! ssh-keygen -F "\$KH_ENTRY" -f ~/.ssh/known_hosts >/dev/null 2>&1; then
prn "adding \$KH_ENTRY to ~/.ssh/known_hosts (TOFU on first deploy)"
ssh-keyscan -T 5 -p "\$GIT_PORT" "\$GIT_HOST" 2>/dev/null >> ~/.ssh/known_hosts \
|| { echo "[deploy] couldn't reach \$GIT_HOST:\$GIT_PORT to fetch host key" >&2; exit 1; }
chmod 600 ~/.ssh/known_hosts
fi
fi
# 2b. ensure dir + working tree
if [[ ! -d "\$HOST_PATH/.git" ]]; then
prn "no working tree at \$HOST_PATH — cloning \$GIT_URL"
mkdir -p "\$(dirname "\$HOST_PATH")"
if ! git clone "\$GIT_URL" "\$HOST_PATH"; then
cat >&2 <<HINT
[deploy] git clone failed. Likely causes (check in order):
• Host key wasn't trusted → ssh -p \$GIT_PORT -T git@\$GIT_HOST to accept it once.
• No SSH identity key here, or its pubkey isn't in Gitea for this user.
ls ~/.ssh/id_* 2>/dev/null
(none?) → ssh-keygen -t ed25519
then: cat ~/.ssh/id_ed25519.pub # paste into Gitea → Settings → SSH Keys
• Repo URL wrong or you're not a collaborator on m17hr1l/psyc.
deploy.sh will NOT modify ~/.ssh — fix it once and re-run.
HINT
exit 1
fi
fi
cd "\$HOST_PATH"
# 2b. fetch + checkout + pull
prn "git fetch origin"
git fetch --quiet origin
prn "git checkout \$BRANCH"
git checkout --quiet "\$BRANCH" 2>/dev/null || git checkout --quiet -b "\$BRANCH" "origin/\$BRANCH"
prn "git pull --ff-only origin \$BRANCH"
git pull --quiet --ff-only origin "\$BRANCH"
REMOTE_REV=\$(git rev-parse --short HEAD)
prn "now at \$REMOTE_REV"
# 2c. .env sanity
if [[ ! -f .env ]]; then
prn "WARNING: .env missing — copying .env.example. Edit it before psyc fetch-all will work."
cp .env.example .env
fi
# 2d. external 'backend' network for nginx-proxy
if ! docker network ls --format '{{.Name}}' | grep -qx backend; then
prn "creating external docker network 'backend'"
docker network create backend
fi
# 2e. compose up
prn "docker compose up -d --build \$COMPOSE_PROFILES"
docker compose up -d --build \$COMPOSE_PROFILES
prn "container status:"
docker compose ps --format "table {{.Name}}\t{{.Status}}" | sed 's/^/ /'
REMOTE
ok "remote bring-up complete"
# ── 3. internal health probe (on the prod box localhost) ───────────────
say "probing ${B}127.0.0.1:8767/healthz${Z} on prod (up to 90s)…"
REMOTE_HEALTH=$(ssh -o StrictHostKeyChecking=accept-new "${HOST}" '
for i in $(seq 1 45); do
if curl -fs http://127.0.0.1:8767/healthz >/dev/null 2>&1; then echo OK; exit 0; fi
sleep 2
done
echo TIMEOUT')
if [[ "${REMOTE_HEALTH}" != *OK* ]]; then
fail "cockpit unhealthy on prod after 90s — ssh ${HOST}, cd ${REMOTE_PATH}, run 'docker compose logs cockpit' to debug"
fi
ok "cockpit healthy on prod"
# ── 4. external probe via the public URL ────────────────────────────────
say "probing ${B}${PUBLIC_URL}/healthz${Z} from here…"
if curl --max-time 8 -fs "${PUBLIC_URL}/healthz" >/dev/null 2>&1; then
INF=$(curl --max-time 5 -s "${PUBLIC_URL}/api/inference-status" || printf '%s' '{}')
ok "${PUBLIC_URL} is LIVE"
printf " inference: %s\n" "${INF}"
else
warn "public URL not reachable from here — most likely DNS or TLS isn't finished"
warn " diag:"
warn " dig +short psyc.neuronetz.ai → expect A record to prod IP"
warn " on the prod-host: docker logs acme-companion --tail 30"
warn " cockpit IS healthy on prod-internal :8767 — the app is fine, the front isn't there yet"
fi
ok "done — deployed ${BRANCH} @ ${LOCAL_REV}"