One-command demo so the gateway can be exercised end-to-end without a GPU or a real model download: - demo/mock-ollama/ — tiny FastAPI service emulating Ollama (/api/tags, /api/chat + /api/generate NDJSON streaming with realistic prompt_eval_count and eval_count on the final frame, /api/embed, /api/show, /api/version). Non-root multi-stage Dockerfile, never published (internal network only). - docker-compose.demo.yml — postgres + redis + mock-ollama + gateway, with PLAYGROUND_ENABLED=true and ./playground mounted read-only at /app/playground. Mirrors the prod posture (mock-ollama not exposed). - demo.sh — brings the stack up, waits on /healthz, creates a demo tenant with allow_all_models and a fresh API key via the bootstrap CLI inside the container, then prints the key, the playground URL, and five ready-to-paste curl commands (SSE chat, NDJSON chat, /v1/models, a 401, a 403 /api/pull). ./demo.sh --down tears everything back down with volumes. - playground/index.html — single-file dark-themed UI served same-origin by the gateway at /playground (CORS-free). Per-endpoint About card with method/ auth/streaming badges, a real description, sample request body, sample response, and a footer note. Live SSE/NDJSON rendering of the response. A live, copyable curl box that mirrors exactly what Run sends. Run + Refresh are visibly gated until an API key is in the field; the Base URL is force-pinned to location.origin three times to defeat browser autofill. - docs/ — API.md (full endpoint reference with curl, streaming formats, error model, SPEC §6.5 response headers), ARCHITECTURE.md (incl. §4.6 discovery + the request lifecycle), DEPLOYMENT.md (Ollama-never-exposed rule, pointing at a real Ollama backend, env reference), THREAT_MODEL.md (SPEC §3 table + the allow_all_models opt-in notes), OPERATIONS.md (key/budget/model/usage runbook + fail-closed table), PLAYGROUND.md. mkdocs.yml (Material theme) wires them together.
205 lines
8.9 KiB
Bash
Executable File
205 lines
8.9 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
#
|
|
# demo.sh — the neuronetz-gateway one-command presentation.
|
|
#
|
|
# Brings up the demo stack (postgres + redis + mock-ollama + gateway) with NO
|
|
# GPU and NO model downloads, creates a demo tenant + API key via the bootstrap
|
|
# CLI *inside the gateway container*, and prints a clean summary with the key,
|
|
# the playground URL, and ready-to-paste curl commands.
|
|
#
|
|
# Usage:
|
|
# ./demo.sh # build + start, bootstrap a tenant/key, print summary
|
|
# ./demo.sh --down # tear the whole stack down (and remove volumes)
|
|
# ./demo.sh --help # this help
|
|
#
|
|
# Re-runnable: existing tenant/key are handled gracefully. The full API key is
|
|
# only ever printed once at creation (SPEC §11), so on a re-run where the key
|
|
# already exists this script creates a fresh, uniquely-named key and prints it.
|
|
set -euo pipefail
|
|
|
|
# ──────────────────────────────────────────────────────────────────────────
|
|
# Configuration
|
|
# ──────────────────────────────────────────────────────────────────────────
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
|
|
COMPOSE_FILE="${SCRIPT_DIR}/docker-compose.demo.yml"
|
|
COMPOSE=(docker compose -f "${COMPOSE_FILE}")
|
|
|
|
GATEWAY_URL="http://localhost:8080"
|
|
PLAYGROUND_URL="${GATEWAY_URL}/playground"
|
|
TENANT_NAME="demo"
|
|
KEY_NAME="demo-key"
|
|
|
|
# Colours (disabled when stdout is not a TTY).
|
|
if [ -t 1 ]; then
|
|
BOLD="$(printf '\033[1m')"; DIM="$(printf '\033[2m')"; RESET="$(printf '\033[0m')"
|
|
CYAN="$(printf '\033[36m')"; GREEN="$(printf '\033[32m')"; YELLOW="$(printf '\033[33m')"
|
|
else
|
|
BOLD=""; DIM=""; RESET=""; CYAN=""; GREEN=""; YELLOW=""
|
|
fi
|
|
|
|
log() { printf '%s\n' "${CYAN}==>${RESET} ${BOLD}$*${RESET}"; }
|
|
warn() { printf '%s\n' "${YELLOW}!!${RESET} $*" >&2; }
|
|
die() { printf '%s\n' "${YELLOW}xx${RESET} $*" >&2; exit 1; }
|
|
|
|
# ──────────────────────────────────────────────────────────────────────────
|
|
# Subcommands
|
|
# ──────────────────────────────────────────────────────────────────────────
|
|
usage() {
|
|
sed -n '3,18p' "${BASH_SOURCE[0]}" | sed 's/^# \{0,1\}//'
|
|
}
|
|
|
|
down() {
|
|
log "Tearing down the demo stack (containers + volumes)…"
|
|
"${COMPOSE[@]}" down --volumes --remove-orphans
|
|
log "Done. The demo stack is gone."
|
|
}
|
|
|
|
# Run the bootstrap CLI inside the running gateway container.
|
|
gw_cli() {
|
|
"${COMPOSE[@]}" exec -T gateway neuronetz-gateway "$@"
|
|
}
|
|
|
|
wait_for_health() {
|
|
log "Waiting for the gateway to become healthy at ${GATEWAY_URL}/healthz …"
|
|
local deadline=$(( $(date +%s) + 180 ))
|
|
until curl -fsS "${GATEWAY_URL}/healthz" >/dev/null 2>&1; do
|
|
if [ "$(date +%s)" -ge "${deadline}" ]; then
|
|
warn "Gateway did not become healthy in time. Recent gateway logs:"
|
|
"${COMPOSE[@]}" logs --tail=50 gateway >&2 || true
|
|
die "Aborting."
|
|
fi
|
|
sleep 2
|
|
done
|
|
log "Gateway is up."
|
|
}
|
|
|
|
# Create the demo tenant if it does not already exist (idempotent).
|
|
ensure_tenant() {
|
|
log "Creating demo tenant '${TENANT_NAME}' (allow-all-models) …"
|
|
local out
|
|
if out="$(gw_cli create-tenant --name "${TENANT_NAME}" --allow-all-models 2>&1)"; then
|
|
printf '%s\n' "${DIM}${out}${RESET}"
|
|
else
|
|
# Already-exists (or similar) is fine — surface it but keep going.
|
|
if printf '%s' "${out}" | grep -qiE 'exist|duplicate|unique'; then
|
|
log "Tenant '${TENANT_NAME}' already exists — reusing it."
|
|
else
|
|
warn "create-tenant reported:"
|
|
printf '%s\n' "${out}" >&2
|
|
warn "Continuing; the tenant may already be present."
|
|
fi
|
|
fi
|
|
}
|
|
|
|
# Create a fresh API key and capture the printed key. The key is printed once.
|
|
# We give each created key a unique name so re-runs always succeed and always
|
|
# yield a usable key to print.
|
|
create_key() {
|
|
local unique_name="${KEY_NAME}-$(date +%Y%m%d-%H%M%S)"
|
|
log "Creating API key '${unique_name}' for tenant '${TENANT_NAME}' …" >&2
|
|
local out
|
|
if ! out="$(gw_cli create-key --tenant "${TENANT_NAME}" --name "${unique_name}" 2>&1)"; then
|
|
warn "create-key failed:" >&2
|
|
printf '%s\n' "${out}" >&2
|
|
return 1
|
|
fi
|
|
# The CLI prints both the 12-char prefix (e.g. "prefix nz_abc12345Yz") AND the
|
|
# full key on a later line. Both match /nz_[A-Za-z0-9]+/, so pick the longest
|
|
# match — that's the full key (44 chars), never the prefix (12).
|
|
local key
|
|
key="$(printf '%s' "${out}" | grep -oE 'nz_[A-Za-z0-9]+' \
|
|
| awk '{ if (length($0) > maxlen) { maxlen = length($0); k = $0 } } END { print k }' \
|
|
|| true)"
|
|
if [ -z "${key}" ]; then
|
|
warn "Could not parse an API key from create-key output:" >&2
|
|
printf '%s\n' "${out}" >&2
|
|
return 1
|
|
fi
|
|
printf '%s' "${key}"
|
|
}
|
|
|
|
print_summary() {
|
|
local key="$1"
|
|
local cl='application/json'
|
|
|
|
cat <<EOF
|
|
|
|
${GREEN}${BOLD}════════════════════════════════════════════════════════════════════════${RESET}
|
|
${GREEN}${BOLD} neuronetz-gateway demo is live${RESET}
|
|
${GREEN}${BOLD}════════════════════════════════════════════════════════════════════════${RESET}
|
|
|
|
${BOLD}API base URL${RESET} ${CYAN}${GATEWAY_URL}${RESET}
|
|
${BOLD}Playground${RESET} ${CYAN}${PLAYGROUND_URL}${RESET}
|
|
${BOLD}API key${RESET} ${YELLOW}${key}${RESET}
|
|
${DIM}(printed once — copy it now; re-run ./demo.sh to mint another)${RESET}
|
|
|
|
${BOLD}Model backend${RESET} mock-ollama (internal network only, never published)
|
|
${BOLD}Models${RESET} llama3.1:8b · mistral:7b · qwen2.5:3b · nomic-embed-text
|
|
|
|
${BOLD}── Ready-to-paste curl commands ───────────────────────────────────────${RESET}
|
|
|
|
${DIM}# 1) Streaming chat — OpenAI-compatible SSE (data: {...} … data: [DONE])${RESET}
|
|
curl -N ${GATEWAY_URL}/v1/chat/completions \\
|
|
-H "Authorization: Bearer ${key}" \\
|
|
-H "Content-Type: ${cl}" \\
|
|
-d '{"model":"llama3.1:8b","stream":true,"messages":[{"role":"user","content":"Say hello in one sentence."}]}'
|
|
|
|
${DIM}# 2) Streaming chat — native Ollama NDJSON (one JSON object per line)${RESET}
|
|
curl -N ${GATEWAY_URL}/api/chat \\
|
|
-H "Authorization: Bearer ${key}" \\
|
|
-H "Content-Type: ${cl}" \\
|
|
-d '{"model":"llama3.1:8b","stream":true,"messages":[{"role":"user","content":"Say hello in one sentence."}]}'
|
|
|
|
${DIM}# 3) List models — the tenant's effective (live-discovered) set, OpenAI format${RESET}
|
|
curl ${GATEWAY_URL}/v1/models \\
|
|
-H "Authorization: Bearer ${key}"
|
|
|
|
${DIM}# 4) 401 Unauthorized — no/invalid key, fail-closed, no upstream details leaked${RESET}
|
|
curl -i ${GATEWAY_URL}/v1/models \\
|
|
-H "Authorization: Bearer nz_invalidKEYdoesNotExist000000000000000000"
|
|
|
|
${DIM}# 5) 403 Forbidden — model-mutating endpoint is hard-blocked (not configurable)${RESET}
|
|
curl -i ${GATEWAY_URL}/api/pull \\
|
|
-H "Authorization: Bearer ${key}" \\
|
|
-H "Content-Type: ${cl}" \\
|
|
-d '{"model":"llama3.1:8b"}'
|
|
|
|
${BOLD}───────────────────────────────────────────────────────────────────────${RESET}
|
|
Tear it all down with: ${CYAN}./demo.sh --down${RESET}
|
|
|
|
EOF
|
|
}
|
|
|
|
up() {
|
|
command -v docker >/dev/null 2>&1 || die "docker is required but not found on PATH."
|
|
command -v curl >/dev/null 2>&1 || die "curl is required but not found on PATH."
|
|
[ -f "${COMPOSE_FILE}" ] || die "Missing ${COMPOSE_FILE}"
|
|
|
|
log "Building and starting the demo stack (postgres + redis + mock-ollama + gateway) …"
|
|
"${COMPOSE[@]}" up --build -d
|
|
|
|
wait_for_health
|
|
ensure_tenant
|
|
|
|
local key
|
|
if ! key="$(create_key)"; then
|
|
die "Could not create/parse an API key. See logs above."
|
|
fi
|
|
|
|
print_summary "${key}"
|
|
}
|
|
|
|
# ──────────────────────────────────────────────────────────────────────────
|
|
# Entry point
|
|
# ──────────────────────────────────────────────────────────────────────────
|
|
main() {
|
|
case "${1:-}" in
|
|
--down|-d|down) down ;;
|
|
--help|-h|help) usage ;;
|
|
"") up ;;
|
|
*) die "Unknown argument: $1 (try --help)" ;;
|
|
esac
|
|
}
|
|
|
|
main "$@"
|