Files
neuronetz-gateway/demo.sh
Stephan Berbig b47a09db91 demo + playground + docs
One-command demo so the gateway can be exercised end-to-end without a GPU or a
real model download:

- demo/mock-ollama/ — tiny FastAPI service emulating Ollama (/api/tags,
  /api/chat + /api/generate NDJSON streaming with realistic prompt_eval_count
  and eval_count on the final frame, /api/embed, /api/show, /api/version).
  Non-root multi-stage Dockerfile, never published (internal network only).
- docker-compose.demo.yml — postgres + redis + mock-ollama + gateway, with
  PLAYGROUND_ENABLED=true and ./playground mounted read-only at /app/playground.
  Mirrors the prod posture (mock-ollama not exposed).
- demo.sh — brings the stack up, waits on /healthz, creates a demo tenant with
  allow_all_models and a fresh API key via the bootstrap CLI inside the
  container, then prints the key, the playground URL, and five ready-to-paste
  curl commands (SSE chat, NDJSON chat, /v1/models, a 401, a 403 /api/pull).
  ./demo.sh --down tears everything back down with volumes.
- playground/index.html — single-file dark-themed UI served same-origin by
  the gateway at /playground (CORS-free). Per-endpoint About card with method/
  auth/streaming badges, a real description, sample request body, sample
  response, and a footer note. Live SSE/NDJSON rendering of the response.
  A live, copyable curl box that mirrors exactly what Run sends. Run + Refresh
  are visibly gated until an API key is in the field; the Base URL is
  force-pinned to location.origin three times to defeat browser autofill.
- docs/ — API.md (full endpoint reference with curl, streaming formats, error
  model, SPEC §6.5 response headers), ARCHITECTURE.md (incl. §4.6 discovery
  + the request lifecycle), DEPLOYMENT.md (Ollama-never-exposed rule,
  pointing at a real Ollama backend, env reference), THREAT_MODEL.md
  (SPEC §3 table + the allow_all_models opt-in notes), OPERATIONS.md
  (key/budget/model/usage runbook + fail-closed table), PLAYGROUND.md.
  mkdocs.yml (Material theme) wires them together.
2026-05-26 20:52:33 +02:00

205 lines
8.9 KiB
Bash
Executable File

#!/usr/bin/env bash
#
# demo.sh — the neuronetz-gateway one-command presentation.
#
# Brings up the demo stack (postgres + redis + mock-ollama + gateway) with NO
# GPU and NO model downloads, creates a demo tenant + API key via the bootstrap
# CLI *inside the gateway container*, and prints a clean summary with the key,
# the playground URL, and ready-to-paste curl commands.
#
# Usage:
# ./demo.sh # build + start, bootstrap a tenant/key, print summary
# ./demo.sh --down # tear the whole stack down (and remove volumes)
# ./demo.sh --help # this help
#
# Re-runnable: existing tenant/key are handled gracefully. The full API key is
# only ever printed once at creation (SPEC §11), so on a re-run where the key
# already exists this script creates a fresh, uniquely-named key and prints it.
set -euo pipefail
# ──────────────────────────────────────────────────────────────────────────
# Configuration
# ──────────────────────────────────────────────────────────────────────────
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
COMPOSE_FILE="${SCRIPT_DIR}/docker-compose.demo.yml"
COMPOSE=(docker compose -f "${COMPOSE_FILE}")
GATEWAY_URL="http://localhost:8080"
PLAYGROUND_URL="${GATEWAY_URL}/playground"
TENANT_NAME="demo"
KEY_NAME="demo-key"
# Colours (disabled when stdout is not a TTY).
if [ -t 1 ]; then
BOLD="$(printf '\033[1m')"; DIM="$(printf '\033[2m')"; RESET="$(printf '\033[0m')"
CYAN="$(printf '\033[36m')"; GREEN="$(printf '\033[32m')"; YELLOW="$(printf '\033[33m')"
else
BOLD=""; DIM=""; RESET=""; CYAN=""; GREEN=""; YELLOW=""
fi
log() { printf '%s\n' "${CYAN}==>${RESET} ${BOLD}$*${RESET}"; }
warn() { printf '%s\n' "${YELLOW}!!${RESET} $*" >&2; }
die() { printf '%s\n' "${YELLOW}xx${RESET} $*" >&2; exit 1; }
# ──────────────────────────────────────────────────────────────────────────
# Subcommands
# ──────────────────────────────────────────────────────────────────────────
usage() {
sed -n '3,18p' "${BASH_SOURCE[0]}" | sed 's/^# \{0,1\}//'
}
down() {
log "Tearing down the demo stack (containers + volumes)…"
"${COMPOSE[@]}" down --volumes --remove-orphans
log "Done. The demo stack is gone."
}
# Run the bootstrap CLI inside the running gateway container.
gw_cli() {
"${COMPOSE[@]}" exec -T gateway neuronetz-gateway "$@"
}
wait_for_health() {
log "Waiting for the gateway to become healthy at ${GATEWAY_URL}/healthz …"
local deadline=$(( $(date +%s) + 180 ))
until curl -fsS "${GATEWAY_URL}/healthz" >/dev/null 2>&1; do
if [ "$(date +%s)" -ge "${deadline}" ]; then
warn "Gateway did not become healthy in time. Recent gateway logs:"
"${COMPOSE[@]}" logs --tail=50 gateway >&2 || true
die "Aborting."
fi
sleep 2
done
log "Gateway is up."
}
# Create the demo tenant if it does not already exist (idempotent).
ensure_tenant() {
log "Creating demo tenant '${TENANT_NAME}' (allow-all-models) …"
local out
if out="$(gw_cli create-tenant --name "${TENANT_NAME}" --allow-all-models 2>&1)"; then
printf '%s\n' "${DIM}${out}${RESET}"
else
# Already-exists (or similar) is fine — surface it but keep going.
if printf '%s' "${out}" | grep -qiE 'exist|duplicate|unique'; then
log "Tenant '${TENANT_NAME}' already exists — reusing it."
else
warn "create-tenant reported:"
printf '%s\n' "${out}" >&2
warn "Continuing; the tenant may already be present."
fi
fi
}
# Create a fresh API key and capture the printed key. The key is printed once.
# We give each created key a unique name so re-runs always succeed and always
# yield a usable key to print.
create_key() {
local unique_name="${KEY_NAME}-$(date +%Y%m%d-%H%M%S)"
log "Creating API key '${unique_name}' for tenant '${TENANT_NAME}' …" >&2
local out
if ! out="$(gw_cli create-key --tenant "${TENANT_NAME}" --name "${unique_name}" 2>&1)"; then
warn "create-key failed:" >&2
printf '%s\n' "${out}" >&2
return 1
fi
# The CLI prints both the 12-char prefix (e.g. "prefix nz_abc12345Yz") AND the
# full key on a later line. Both match /nz_[A-Za-z0-9]+/, so pick the longest
# match — that's the full key (44 chars), never the prefix (12).
local key
key="$(printf '%s' "${out}" | grep -oE 'nz_[A-Za-z0-9]+' \
| awk '{ if (length($0) > maxlen) { maxlen = length($0); k = $0 } } END { print k }' \
|| true)"
if [ -z "${key}" ]; then
warn "Could not parse an API key from create-key output:" >&2
printf '%s\n' "${out}" >&2
return 1
fi
printf '%s' "${key}"
}
print_summary() {
local key="$1"
local cl='application/json'
cat <<EOF
${GREEN}${BOLD}════════════════════════════════════════════════════════════════════════${RESET}
${GREEN}${BOLD} neuronetz-gateway demo is live${RESET}
${GREEN}${BOLD}════════════════════════════════════════════════════════════════════════${RESET}
${BOLD}API base URL${RESET} ${CYAN}${GATEWAY_URL}${RESET}
${BOLD}Playground${RESET} ${CYAN}${PLAYGROUND_URL}${RESET}
${BOLD}API key${RESET} ${YELLOW}${key}${RESET}
${DIM}(printed once — copy it now; re-run ./demo.sh to mint another)${RESET}
${BOLD}Model backend${RESET} mock-ollama (internal network only, never published)
${BOLD}Models${RESET} llama3.1:8b · mistral:7b · qwen2.5:3b · nomic-embed-text
${BOLD}── Ready-to-paste curl commands ───────────────────────────────────────${RESET}
${DIM}# 1) Streaming chat — OpenAI-compatible SSE (data: {...} … data: [DONE])${RESET}
curl -N ${GATEWAY_URL}/v1/chat/completions \\
-H "Authorization: Bearer ${key}" \\
-H "Content-Type: ${cl}" \\
-d '{"model":"llama3.1:8b","stream":true,"messages":[{"role":"user","content":"Say hello in one sentence."}]}'
${DIM}# 2) Streaming chat — native Ollama NDJSON (one JSON object per line)${RESET}
curl -N ${GATEWAY_URL}/api/chat \\
-H "Authorization: Bearer ${key}" \\
-H "Content-Type: ${cl}" \\
-d '{"model":"llama3.1:8b","stream":true,"messages":[{"role":"user","content":"Say hello in one sentence."}]}'
${DIM}# 3) List models — the tenant's effective (live-discovered) set, OpenAI format${RESET}
curl ${GATEWAY_URL}/v1/models \\
-H "Authorization: Bearer ${key}"
${DIM}# 4) 401 Unauthorized — no/invalid key, fail-closed, no upstream details leaked${RESET}
curl -i ${GATEWAY_URL}/v1/models \\
-H "Authorization: Bearer nz_invalidKEYdoesNotExist000000000000000000"
${DIM}# 5) 403 Forbidden — model-mutating endpoint is hard-blocked (not configurable)${RESET}
curl -i ${GATEWAY_URL}/api/pull \\
-H "Authorization: Bearer ${key}" \\
-H "Content-Type: ${cl}" \\
-d '{"model":"llama3.1:8b"}'
${BOLD}───────────────────────────────────────────────────────────────────────${RESET}
Tear it all down with: ${CYAN}./demo.sh --down${RESET}
EOF
}
up() {
command -v docker >/dev/null 2>&1 || die "docker is required but not found on PATH."
command -v curl >/dev/null 2>&1 || die "curl is required but not found on PATH."
[ -f "${COMPOSE_FILE}" ] || die "Missing ${COMPOSE_FILE}"
log "Building and starting the demo stack (postgres + redis + mock-ollama + gateway) …"
"${COMPOSE[@]}" up --build -d
wait_for_health
ensure_tenant
local key
if ! key="$(create_key)"; then
die "Could not create/parse an API key. See logs above."
fi
print_summary "${key}"
}
# ──────────────────────────────────────────────────────────────────────────
# Entry point
# ──────────────────────────────────────────────────────────────────────────
main() {
case "${1:-}" in
--down|-d|down) down ;;
--help|-h|help) usage ;;
"") up ;;
*) die "Unknown argument: $1 (try --help)" ;;
esac
}
main "$@"