demo + playground + docs

One-command demo so the gateway can be exercised end-to-end without a GPU or a real model download: - demo/mock-ollama/ — tiny FastAPI service emulating Ollama (/api/tags, /api/chat + /api/generate NDJSON streaming with realistic prompt_eval_count and eval_count on the final frame, /api/embed, /api/show, /api/version). Non-root multi-stage Dockerfile, never published (internal network only). - docker-compose.demo.yml — postgres + redis + mock-ollama + gateway, with PLAYGROUND_ENABLED=true and ./playground mounted read-only at /app/playground. Mirrors the prod posture (mock-ollama not exposed). - demo.sh — brings the stack up, waits on /healthz, creates a demo tenant with allow_all_models and a fresh API key via the bootstrap CLI inside the container, then prints the key, the playground URL, and five ready-to-paste curl commands (SSE chat, NDJSON chat, /v1/models, a 401, a 403 /api/pull). ./demo.sh --down tears everything back down with volumes. - playground/index.html — single-file dark-themed UI served same-origin by the gateway at /playground (CORS-free). Per-endpoint About card with method/ auth/streaming badges, a real description, sample request body, sample response, and a footer note. Live SSE/NDJSON rendering of the response. A live, copyable curl box that mirrors exactly what Run sends. Run + Refresh are visibly gated until an API key is in the field; the Base URL is force-pinned to location.origin three times to defeat browser autofill. - docs/ — API.md (full endpoint reference with curl, streaming formats, error model, SPEC §6.5 response headers), ARCHITECTURE.md (incl. §4.6 discovery + the request lifecycle), DEPLOYMENT.md (Ollama-never-exposed rule, pointing at a real Ollama backend, env reference), THREAT_MODEL.md (SPEC §3 table + the allow_all_models opt-in notes), OPERATIONS.md (key/budget/model/usage runbook + fail-closed table), PLAYGROUND.md. mkdocs.yml (Material theme) wires them together.
2026-05-26 20:52:33 +02:00
parent 844b02aade
commit b47a09db91
13 changed files with 2501 additions and 0 deletions
--- a/docker-compose.demo.yml
+++ b/docker-compose.demo.yml
@@ -0,0 +1,146 @@
+# neuronetz-gateway — DEMO stack (postgres + redis + mock-ollama + gateway).
+#
+# This is the one-command presentation stack. It runs the real gateway image
+# (built from the repo Dockerfile) against a MOCK Ollama backend, so the whole
+# thing comes up with NO GPU and NO model downloads.
+#
+#   ./demo.sh            # bring it up, create a demo tenant+key, print curls
+#   ./demo.sh --down     # tear it all down
+#
+# Differs from the production stack (docker-compose.yml):
+#   * NO caddy           — the gateway is published directly on 127.0.0.1:8080.
+#   * mock-ollama         instead of the real ollama image.
+#   * playground enabled  — the gateway serves /playground from a mounted file.
+#
+#  ┌─────────────────────────────────────────────────────────────────────────┐
+#  │ SECURITY POSTURE (mirrors prod):                                          │
+#  │   `mock-ollama` has NO `ports:` mapping. The model backend is reachable   │
+#  │   only on the internal Docker network as `mock-ollama:11434`, exactly     │
+#  │   like real Ollama in production. Only the gateway is published, and only │
+#  │   on the loopback interface (127.0.0.1:8080).                             │
+#  └─────────────────────────────────────────────────────────────────────────┘
+
+services:
+  gateway:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    restart: unless-stopped
+    ports:
+      - "127.0.0.1:8080:8080"
+    environment:
+      GATEWAY_BIND_HOST: 0.0.0.0
+      GATEWAY_BIND_PORT: "8080"
+      GATEWAY_LOG_LEVEL: ${GATEWAY_LOG_LEVEL:-INFO}
+      GATEWAY_LOG_FORMAT: ${GATEWAY_LOG_FORMAT:-console}
+      GATEWAY_REQUEST_ID_HEADER: ${GATEWAY_REQUEST_ID_HEADER:-X-Request-ID}
+      GATEWAY_TRUSTED_PROXIES: ${GATEWAY_TRUSTED_PROXIES:-127.0.0.1}
+      # Serve the interactive playground from the mounted file (flag-gated;
+      # OFF by default in prod). See playground/index.html.
+      PLAYGROUND_ENABLED: "true"
+      PLAYGROUND_FILE: /app/playground/index.html
+      # Point the gateway at the mock Ollama on the internal network.
+      OLLAMA_BASE_URL: http://mock-ollama:11434
+      OLLAMA_CONNECT_TIMEOUT_S: ${OLLAMA_CONNECT_TIMEOUT_S:-5}
+      OLLAMA_READ_TIMEOUT_S: ${OLLAMA_READ_TIMEOUT_S:-600}
+      OLLAMA_MAX_CONNECTIONS: ${OLLAMA_MAX_CONNECTIONS:-64}
+      # Discover models quickly so the demo feels live.
+      MODEL_DISCOVERY_REFRESH_S: ${MODEL_DISCOVERY_REFRESH_S:-15}
+      MODEL_DISCOVERY_CACHE_TTL_S: ${MODEL_DISCOVERY_CACHE_TTL_S:-60}
+      DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-gateway}:${POSTGRES_PASSWORD:-gateway}@postgres:5432/${POSTGRES_DB:-neuronetz}
+      DATABASE_POOL_SIZE: ${DATABASE_POOL_SIZE:-10}
+      DATABASE_POOL_OVERFLOW: ${DATABASE_POOL_OVERFLOW:-20}
+      REDIS_URL: redis://redis:6379/0
+      REDIS_KEY_CACHE_TTL_S: ${REDIS_KEY_CACHE_TTL_S:-60}
+      DEFAULT_RPM: ${DEFAULT_RPM:-60}
+      DEFAULT_TPM: ${DEFAULT_TPM:-100000}
+      DEFAULT_CONCURRENT: ${DEFAULT_CONCURRENT:-8}
+      MAX_REQUEST_BODY_BYTES: ${MAX_REQUEST_BODY_BYTES:-262144}
+      MAX_NUM_PREDICT: ${MAX_NUM_PREDICT:-4096}
+      ARGON2_TIME_COST: ${ARGON2_TIME_COST:-3}
+      ARGON2_MEMORY_COST_KIB: ${ARGON2_MEMORY_COST_KIB:-65536}
+      ARGON2_PARALLELISM: ${ARGON2_PARALLELISM:-4}
+      AUTH_FAILURE_RATE_LIMIT_PER_IP_PER_MIN: ${AUTH_FAILURE_RATE_LIMIT_PER_IP_PER_MIN:-20}
+      AUDIT_BUFFER_SIZE: ${AUDIT_BUFFER_SIZE:-1000}
+      PROMPT_LOG_DEFAULT_RETENTION_DAYS: ${PROMPT_LOG_DEFAULT_RETENTION_DAYS:-30}
+      AUDIT_LOG_DEFAULT_RETENTION_DAYS: ${AUDIT_LOG_DEFAULT_RETENTION_DAYS:-365}
+    volumes:
+      # The gateway serves /playground by reading this file at request time.
+      # Read-only mount: the demo never lets the container modify it.
+      - ./playground:/app/playground:ro
+    depends_on:
+      postgres:
+        condition: service_healthy
+      redis:
+        condition: service_healthy
+      mock-ollama:
+        condition: service_healthy
+    # Apply migrations, then start the server (mirrors docker-compose.dev.yml).
+    command: ["sh", "-c", "alembic upgrade head && exec python -m neuronetz_gateway"]
+    healthcheck:
+      test: ["CMD", "curl", "-fsS", "http://127.0.0.1:8080/healthz"]
+      interval: 10s
+      timeout: 3s
+      retries: 5
+      start_period: 30s
+    networks:
+      - internal
+
+  # ───────────────────────────────────────────────────────────────────────────
+  # mock-ollama — INTERNAL NETWORK ONLY. Stands in for the real Ollama backend.
+  # NO `ports:` mapping, mirroring the production "Ollama is never exposed" rule.
+  # Reachable only as `http://mock-ollama:11434` from the gateway container.
+  # ───────────────────────────────────────────────────────────────────────────
+  mock-ollama:
+    build:
+      context: ./demo/mock-ollama
+      dockerfile: Dockerfile
+    restart: unless-stopped
+    # !!! NO `ports:` — the model backend is never published. !!!
+    healthcheck:
+      test: ["CMD", "curl", "-fsS", "http://127.0.0.1:11434/api/version"]
+      interval: 10s
+      timeout: 3s
+      retries: 5
+      start_period: 5s
+    networks:
+      - internal
+
+  postgres:
+    image: postgres:16-alpine
+    restart: unless-stopped
+    environment:
+      POSTGRES_USER: ${POSTGRES_USER:-gateway}
+      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-gateway}
+      POSTGRES_DB: ${POSTGRES_DB:-neuronetz}
+    volumes:
+      - postgres_demo_data:/var/lib/postgresql/data
+    # No `ports:` — Postgres is internal-only.
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-gateway} -d ${POSTGRES_DB:-neuronetz}"]
+      interval: 5s
+      timeout: 3s
+      retries: 10
+    networks:
+      - internal
+
+  redis:
+    image: redis:7-alpine
+    restart: unless-stopped
+    command: ["redis-server", "--save", "", "--appendonly", "no"]
+    # No `ports:` — Redis is internal-only.
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      interval: 5s
+      timeout: 3s
+      retries: 10
+    networks:
+      - internal
+
+networks:
+  # Private network for inter-service traffic; not reachable from the host.
+  internal:
+    driver: bridge
+
+volumes:
+  postgres_demo_data: