demo + playground + docs

One-command demo so the gateway can be exercised end-to-end without a GPU or a
real model download:

- demo/mock-ollama/ — tiny FastAPI service emulating Ollama (/api/tags,
  /api/chat + /api/generate NDJSON streaming with realistic prompt_eval_count
  and eval_count on the final frame, /api/embed, /api/show, /api/version).
  Non-root multi-stage Dockerfile, never published (internal network only).
- docker-compose.demo.yml — postgres + redis + mock-ollama + gateway, with
  PLAYGROUND_ENABLED=true and ./playground mounted read-only at /app/playground.
  Mirrors the prod posture (mock-ollama not exposed).
- demo.sh — brings the stack up, waits on /healthz, creates a demo tenant with
  allow_all_models and a fresh API key via the bootstrap CLI inside the
  container, then prints the key, the playground URL, and five ready-to-paste
  curl commands (SSE chat, NDJSON chat, /v1/models, a 401, a 403 /api/pull).
  ./demo.sh --down tears everything back down with volumes.
- playground/index.html — single-file dark-themed UI served same-origin by
  the gateway at /playground (CORS-free). Per-endpoint About card with method/
  auth/streaming badges, a real description, sample request body, sample
  response, and a footer note. Live SSE/NDJSON rendering of the response.
  A live, copyable curl box that mirrors exactly what Run sends. Run + Refresh
  are visibly gated until an API key is in the field; the Base URL is
  force-pinned to location.origin three times to defeat browser autofill.
- docs/ — API.md (full endpoint reference with curl, streaming formats, error
  model, SPEC §6.5 response headers), ARCHITECTURE.md (incl. §4.6 discovery
  + the request lifecycle), DEPLOYMENT.md (Ollama-never-exposed rule,
  pointing at a real Ollama backend, env reference), THREAT_MODEL.md
  (SPEC §3 table + the allow_all_models opt-in notes), OPERATIONS.md
  (key/budget/model/usage runbook + fail-closed table), PLAYGROUND.md.
  mkdocs.yml (Material theme) wires them together.
This commit is contained in:
Stephan Berbig
2026-05-26 20:52:33 +02:00
parent 844b02aade
commit b47a09db91
13 changed files with 2501 additions and 0 deletions

146
docker-compose.demo.yml Normal file
View File

@@ -0,0 +1,146 @@
# neuronetz-gateway — DEMO stack (postgres + redis + mock-ollama + gateway).
#
# This is the one-command presentation stack. It runs the real gateway image
# (built from the repo Dockerfile) against a MOCK Ollama backend, so the whole
# thing comes up with NO GPU and NO model downloads.
#
# ./demo.sh # bring it up, create a demo tenant+key, print curls
# ./demo.sh --down # tear it all down
#
# Differs from the production stack (docker-compose.yml):
# * NO caddy — the gateway is published directly on 127.0.0.1:8080.
# * mock-ollama instead of the real ollama image.
# * playground enabled — the gateway serves /playground from a mounted file.
#
# ┌─────────────────────────────────────────────────────────────────────────┐
# │ SECURITY POSTURE (mirrors prod): │
# │ `mock-ollama` has NO `ports:` mapping. The model backend is reachable │
# │ only on the internal Docker network as `mock-ollama:11434`, exactly │
# │ like real Ollama in production. Only the gateway is published, and only │
# │ on the loopback interface (127.0.0.1:8080). │
# └─────────────────────────────────────────────────────────────────────────┘
services:
gateway:
build:
context: .
dockerfile: Dockerfile
restart: unless-stopped
ports:
- "127.0.0.1:8080:8080"
environment:
GATEWAY_BIND_HOST: 0.0.0.0
GATEWAY_BIND_PORT: "8080"
GATEWAY_LOG_LEVEL: ${GATEWAY_LOG_LEVEL:-INFO}
GATEWAY_LOG_FORMAT: ${GATEWAY_LOG_FORMAT:-console}
GATEWAY_REQUEST_ID_HEADER: ${GATEWAY_REQUEST_ID_HEADER:-X-Request-ID}
GATEWAY_TRUSTED_PROXIES: ${GATEWAY_TRUSTED_PROXIES:-127.0.0.1}
# Serve the interactive playground from the mounted file (flag-gated;
# OFF by default in prod). See playground/index.html.
PLAYGROUND_ENABLED: "true"
PLAYGROUND_FILE: /app/playground/index.html
# Point the gateway at the mock Ollama on the internal network.
OLLAMA_BASE_URL: http://mock-ollama:11434
OLLAMA_CONNECT_TIMEOUT_S: ${OLLAMA_CONNECT_TIMEOUT_S:-5}
OLLAMA_READ_TIMEOUT_S: ${OLLAMA_READ_TIMEOUT_S:-600}
OLLAMA_MAX_CONNECTIONS: ${OLLAMA_MAX_CONNECTIONS:-64}
# Discover models quickly so the demo feels live.
MODEL_DISCOVERY_REFRESH_S: ${MODEL_DISCOVERY_REFRESH_S:-15}
MODEL_DISCOVERY_CACHE_TTL_S: ${MODEL_DISCOVERY_CACHE_TTL_S:-60}
DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-gateway}:${POSTGRES_PASSWORD:-gateway}@postgres:5432/${POSTGRES_DB:-neuronetz}
DATABASE_POOL_SIZE: ${DATABASE_POOL_SIZE:-10}
DATABASE_POOL_OVERFLOW: ${DATABASE_POOL_OVERFLOW:-20}
REDIS_URL: redis://redis:6379/0
REDIS_KEY_CACHE_TTL_S: ${REDIS_KEY_CACHE_TTL_S:-60}
DEFAULT_RPM: ${DEFAULT_RPM:-60}
DEFAULT_TPM: ${DEFAULT_TPM:-100000}
DEFAULT_CONCURRENT: ${DEFAULT_CONCURRENT:-8}
MAX_REQUEST_BODY_BYTES: ${MAX_REQUEST_BODY_BYTES:-262144}
MAX_NUM_PREDICT: ${MAX_NUM_PREDICT:-4096}
ARGON2_TIME_COST: ${ARGON2_TIME_COST:-3}
ARGON2_MEMORY_COST_KIB: ${ARGON2_MEMORY_COST_KIB:-65536}
ARGON2_PARALLELISM: ${ARGON2_PARALLELISM:-4}
AUTH_FAILURE_RATE_LIMIT_PER_IP_PER_MIN: ${AUTH_FAILURE_RATE_LIMIT_PER_IP_PER_MIN:-20}
AUDIT_BUFFER_SIZE: ${AUDIT_BUFFER_SIZE:-1000}
PROMPT_LOG_DEFAULT_RETENTION_DAYS: ${PROMPT_LOG_DEFAULT_RETENTION_DAYS:-30}
AUDIT_LOG_DEFAULT_RETENTION_DAYS: ${AUDIT_LOG_DEFAULT_RETENTION_DAYS:-365}
volumes:
# The gateway serves /playground by reading this file at request time.
# Read-only mount: the demo never lets the container modify it.
- ./playground:/app/playground:ro
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
mock-ollama:
condition: service_healthy
# Apply migrations, then start the server (mirrors docker-compose.dev.yml).
command: ["sh", "-c", "alembic upgrade head && exec python -m neuronetz_gateway"]
healthcheck:
test: ["CMD", "curl", "-fsS", "http://127.0.0.1:8080/healthz"]
interval: 10s
timeout: 3s
retries: 5
start_period: 30s
networks:
- internal
# ───────────────────────────────────────────────────────────────────────────
# mock-ollama — INTERNAL NETWORK ONLY. Stands in for the real Ollama backend.
# NO `ports:` mapping, mirroring the production "Ollama is never exposed" rule.
# Reachable only as `http://mock-ollama:11434` from the gateway container.
# ───────────────────────────────────────────────────────────────────────────
mock-ollama:
build:
context: ./demo/mock-ollama
dockerfile: Dockerfile
restart: unless-stopped
# !!! NO `ports:` — the model backend is never published. !!!
healthcheck:
test: ["CMD", "curl", "-fsS", "http://127.0.0.1:11434/api/version"]
interval: 10s
timeout: 3s
retries: 5
start_period: 5s
networks:
- internal
postgres:
image: postgres:16-alpine
restart: unless-stopped
environment:
POSTGRES_USER: ${POSTGRES_USER:-gateway}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-gateway}
POSTGRES_DB: ${POSTGRES_DB:-neuronetz}
volumes:
- postgres_demo_data:/var/lib/postgresql/data
# No `ports:` — Postgres is internal-only.
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-gateway} -d ${POSTGRES_DB:-neuronetz}"]
interval: 5s
timeout: 3s
retries: 10
networks:
- internal
redis:
image: redis:7-alpine
restart: unless-stopped
command: ["redis-server", "--save", "", "--appendonly", "no"]
# No `ports:` — Redis is internal-only.
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 5s
timeout: 3s
retries: 10
networks:
- internal
networks:
# Private network for inter-service traffic; not reachable from the host.
internal:
driver: bridge
volumes:
postgres_demo_data: