Files
psyc/scripts/deploy.sh
m17hr1l 2c2ead6149 stage-28 fix: deploy.sh pre-trusts the Gitea SSH host key (first-clone)
A fresh prod box has never SSH'd to gitea.neuronetz.ai before, so the
first 'git clone' failed with 'Host key verification failed'. The
script now parses the git remote URL to extract host+port, and on the
prod box does an ssh-keyscan into ~/.ssh/known_hosts before the clone
when the entry is missing. TOFU — if you want to verify the fingerprint
out-of-band, pre-populate known_hosts manually and the script will see
the entry and skip the scan.

Also: if the clone still fails after the host key is trusted (likely a
missing SSH key on Gitea side), the script now prints a clear hint
pointing at where to register it. Supports both ssh://user@host:port/
and user@host: URL forms.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-25 15:32:44 +02:00

185 lines
8.1 KiB
Bash
Executable File

#!/usr/bin/env bash
# deploy.sh — sync this branch to the prod box and verify the cockpit is serving.
#
# Usage: scripts/deploy.sh
#
# Env vars (all have defaults — override only if your setup differs):
# PSYC_PROD_HOST default: neuronetz@cloud.neuronetz.ai
# PSYC_PROD_PATH default: /home/neuronetz/docker-public/neuro-psyc
# PSYC_PROD_URL default: https://psyc.neuronetz.ai
# PSYC_PROD_GPU set to 1 to also bring up the inference (GPU) service
# PSYC_GIT_REMOTE default: origin
# PSYC_BRANCH default: the currently checked-out branch
#
# What it does (idempotent — safe to re-run):
# 1. push the current branch to origin
# 2. ssh into prod, clone the repo if missing, pull the branch
# 3. docker compose up -d --build (+ gpu profile if PSYC_PROD_GPU=1)
# 4. probe :8767/healthz on the prod box + the public URL; report state
#
# What it does NOT do:
# • touch .env on the prod box (set keys there once, manually — gitignored)
# • transfer data/ or model artifacts (gitignored; prod fetches its own)
# • configure DNS or TLS (that's the reverse-proxy + acme-companion side)
set -euo pipefail
HOST="${PSYC_PROD_HOST:-neuronetz@cloud.neuronetz.ai}"
REMOTE_PATH="${PSYC_PROD_PATH:-/home/neuronetz/docker-public/neuro-psyc}"
PUBLIC_URL="${PSYC_PROD_URL:-https://psyc.neuronetz.ai}"
GIT_REMOTE="${PSYC_GIT_REMOTE:-origin}"
BRANCH="${PSYC_BRANCH:-$(git rev-parse --abbrev-ref HEAD)}"
WITH_GPU="${PSYC_PROD_GPU:-}"
# ── tty styling ─────────────────────────────────────────────────────────
if [[ -t 1 ]]; then
B=$'\e[1m'; D=$'\e[2m'; R=$'\e[31m'; G=$'\e[32m'; Y=$'\e[33m'; C=$'\e[36m'; Z=$'\e[0m'
else B=; D=; R=; G=; Y=; C=; Z=; fi
say() { printf "%s[deploy]%s %s\n" "$C" "$Z" "$*"; }
ok() { printf "%s[deploy]%s %s%s%s\n" "$G" "$Z" "$G" "$*" "$Z"; }
warn() { printf "%s[deploy]%s %s%s%s\n" "$Y" "$Z" "$Y" "$*" "$Z"; }
fail() { printf "%s[deploy]%s %s%s%s\n" "$R" "$Z" "$R" "$*" "$Z" >&2; exit 1; }
# ── 0. preflight ────────────────────────────────────────────────────────
command -v ssh >/dev/null || fail "ssh not installed locally"
command -v git >/dev/null || fail "git not installed locally"
command -v curl >/dev/null || fail "curl not installed locally"
[[ -d .git ]] || fail "run from the psyc repo root (no .git here)"
if ! git diff --quiet HEAD -- 2>/dev/null || ! git diff --cached --quiet 2>/dev/null; then
warn "local working tree has uncommitted changes — they won't be deployed (git push only sends commits)."
fi
GIT_URL=$(git config --get "remote.${GIT_REMOTE}.url") \
|| fail "no remote '${GIT_REMOTE}' configured locally"
# Parse the git URL to pull out the SSH host + port so the prod box can
# pre-trust the Gitea host key before its first clone. Supports both
# ssh://user@host:port/path and user@host:path
GIT_HOST=""; GIT_PORT="22"
if [[ "$GIT_URL" =~ ^ssh://[^@]+@([^:/]+)(:([0-9]+))?/ ]]; then
GIT_HOST="${BASH_REMATCH[1]}"
[[ -n "${BASH_REMATCH[3]:-}" ]] && GIT_PORT="${BASH_REMATCH[3]}"
elif [[ "$GIT_URL" =~ ^[^@]+@([^:]+): ]]; then
GIT_HOST="${BASH_REMATCH[1]}"
fi
# ── 1. local push ───────────────────────────────────────────────────────
say "pushing ${B}${BRANCH}${Z} to ${B}${GIT_REMOTE}${Z}"
git push "${GIT_REMOTE}" "${BRANCH}" || fail "git push failed — fix and retry"
LOCAL_REV=$(git rev-parse --short HEAD)
ok "pushed ${BRANCH} @ ${LOCAL_REV}"
# ── 2. remote bring-up ──────────────────────────────────────────────────
say "deploying to ${B}${HOST}:${REMOTE_PATH}${Z}"
COMPOSE_PROFILES=""
[[ -n "$WITH_GPU" ]] && COMPOSE_PROFILES="--profile gpu"
# heredoc runs on the prod box. Local vars are interpolated by THIS shell;
# remote vars start with \$ so they're set on the remote side.
ssh -o StrictHostKeyChecking=accept-new -T "${HOST}" bash -s <<REMOTE
set -euo pipefail
HOST_PATH="${REMOTE_PATH}"
BRANCH="${BRANCH}"
GIT_URL="${GIT_URL}"
GIT_HOST="${GIT_HOST}"
GIT_PORT="${GIT_PORT}"
COMPOSE_PROFILES="${COMPOSE_PROFILES}"
prn() { printf ' · %s\n' "\$*"; }
# 2a. pre-trust the Gitea SSH host key so the first clone doesn't fail with
# 'Host key verification failed'. This is TOFU — we accept whatever the
# server currently presents. If you want to verify the fingerprint
# manually, do so once and place it in ~/.ssh/known_hosts yourself.
if [[ -n "\$GIT_HOST" ]]; then
mkdir -p ~/.ssh && chmod 700 ~/.ssh
KH_ENTRY="[\$GIT_HOST]:\$GIT_PORT"
if ! ssh-keygen -F "\$KH_ENTRY" -f ~/.ssh/known_hosts >/dev/null 2>&1; then
prn "adding \$KH_ENTRY to ~/.ssh/known_hosts (first time on this prod box)"
ssh-keyscan -T 5 -p "\$GIT_PORT" "\$GIT_HOST" 2>/dev/null >> ~/.ssh/known_hosts \
|| { echo "[deploy] could not reach \$GIT_HOST:\$GIT_PORT from this box" >&2; exit 1; }
chmod 600 ~/.ssh/known_hosts
fi
fi
# 2b. ensure dir + working tree
if [[ ! -d "\$HOST_PATH/.git" ]]; then
prn "no working tree at \$HOST_PATH — cloning \$GIT_URL"
mkdir -p "\$(dirname "\$HOST_PATH")"
if ! git clone "\$GIT_URL" "\$HOST_PATH"; then
cat >&2 <<HINT
[deploy] git clone failed. Two likely causes:
• This prod box has no SSH key registered in Gitea for this user.
Run on prod: cat ~/.ssh/id_*.pub (or ssh-keygen -t ed25519 if none)
Then in Gitea: Settings → SSH Keys → add it.
• The repo URL is wrong / private and you're not a collaborator.
HINT
exit 1
fi
fi
cd "\$HOST_PATH"
# 2b. fetch + checkout + pull
prn "git fetch origin"
git fetch --quiet origin
prn "git checkout \$BRANCH"
git checkout --quiet "\$BRANCH" 2>/dev/null || git checkout --quiet -b "\$BRANCH" "origin/\$BRANCH"
prn "git pull --ff-only origin \$BRANCH"
git pull --quiet --ff-only origin "\$BRANCH"
REMOTE_REV=\$(git rev-parse --short HEAD)
prn "now at \$REMOTE_REV"
# 2c. .env sanity
if [[ ! -f .env ]]; then
prn "WARNING: .env missing — copying .env.example. Edit it before psyc fetch-all will work."
cp .env.example .env
fi
# 2d. external 'backend' network for nginx-proxy
if ! docker network ls --format '{{.Name}}' | grep -qx backend; then
prn "creating external docker network 'backend'"
docker network create backend
fi
# 2e. compose up
prn "docker compose up -d --build \$COMPOSE_PROFILES"
docker compose up -d --build \$COMPOSE_PROFILES
prn "container status:"
docker compose ps --format "table {{.Name}}\t{{.Status}}" | sed 's/^/ /'
REMOTE
ok "remote bring-up complete"
# ── 3. internal health probe (on the prod box localhost) ───────────────
say "probing ${B}127.0.0.1:8767/healthz${Z} on prod (up to 90s)"
REMOTE_HEALTH=$(ssh -o StrictHostKeyChecking=accept-new "${HOST}" '
for i in $(seq 1 45); do
if curl -fs http://127.0.0.1:8767/healthz >/dev/null 2>&1; then echo OK; exit 0; fi
sleep 2
done
echo TIMEOUT')
if [[ "${REMOTE_HEALTH}" != *OK* ]]; then
fail "cockpit unhealthy on prod after 90s — ssh ${HOST}, cd ${REMOTE_PATH}, run 'docker compose logs cockpit' to debug"
fi
ok "cockpit healthy on prod"
# ── 4. external probe via the public URL ────────────────────────────────
say "probing ${B}${PUBLIC_URL}/healthz${Z} from here…"
if curl --max-time 8 -fs "${PUBLIC_URL}/healthz" >/dev/null 2>&1; then
INF=$(curl --max-time 5 -s "${PUBLIC_URL}/api/inference-status" || printf '%s' '{}')
ok "${PUBLIC_URL} is LIVE"
printf " inference: %s\n" "${INF}"
else
warn "public URL not reachable from here — most likely DNS or TLS isn't finished"
warn " diag:"
warn " dig +short psyc.neuronetz.ai → expect A record to prod IP"
warn " on the prod-host: docker logs acme-companion --tail 30"
warn " cockpit IS healthy on prod-internal :8767 — the app is fine, the front isn't there yet"
fi
ok "done — deployed ${BRANCH} @ ${LOCAL_REV}"