neuronetz-gateway/docker-compose.demo.yml

# neuronetz-gateway — DEMO stack (postgres + redis + mock-ollama + gateway).
#
# This is the one-command presentation stack. It runs the real gateway image
# (built from the repo Dockerfile) against a MOCK Ollama backend, so the whole
# thing comes up with NO GPU and NO model downloads.
#
#   ./demo.sh            # bring it up, create a demo tenant+key, print curls
#   ./demo.sh --down     # tear it all down
#
# Differs from the production stack (docker-compose.yml):
#   * NO caddy           — the gateway is published directly on 127.0.0.1:8080.
#   * mock-ollama         instead of the real ollama image.
#   * playground enabled  — the gateway serves /playground from a mounted file.
#
#  ┌─────────────────────────────────────────────────────────────────────────┐
#  │ SECURITY POSTURE (mirrors prod):                                          │
#  │   `mock-ollama` has NO `ports:` mapping. The model backend is reachable   │
#  │   only on the internal Docker network as `mock-ollama:11434`, exactly     │
#  │   like real Ollama in production. Only the gateway is published, and only │
#  │   on the loopback interface (127.0.0.1:8080).                             │
#  └─────────────────────────────────────────────────────────────────────────┘

services:
  gateway:
    build:
      context: .
      dockerfile: Dockerfile
    restart: unless-stopped
    ports:
      - "127.0.0.1:8080:8080"
    environment:
      GATEWAY_BIND_HOST: 0.0.0.0
      GATEWAY_BIND_PORT: "8080"
      GATEWAY_LOG_LEVEL: ${GATEWAY_LOG_LEVEL:-INFO}
      GATEWAY_LOG_FORMAT: ${GATEWAY_LOG_FORMAT:-console}
      GATEWAY_REQUEST_ID_HEADER: ${GATEWAY_REQUEST_ID_HEADER:-X-Request-ID}
      GATEWAY_TRUSTED_PROXIES: ${GATEWAY_TRUSTED_PROXIES:-127.0.0.1}
      # Serve the interactive playground from the mounted file (flag-gated;
      # OFF by default in prod). See playground/index.html.
      PLAYGROUND_ENABLED: "true"
      PLAYGROUND_FILE: /app/playground/index.html
      # Point the gateway at the mock Ollama on the internal network.
      OLLAMA_BASE_URL: http://mock-ollama:11434
      OLLAMA_CONNECT_TIMEOUT_S: ${OLLAMA_CONNECT_TIMEOUT_S:-5}
      OLLAMA_READ_TIMEOUT_S: ${OLLAMA_READ_TIMEOUT_S:-600}
      OLLAMA_MAX_CONNECTIONS: ${OLLAMA_MAX_CONNECTIONS:-64}
      # Discover models quickly so the demo feels live.
      MODEL_DISCOVERY_REFRESH_S: ${MODEL_DISCOVERY_REFRESH_S:-15}
      MODEL_DISCOVERY_CACHE_TTL_S: ${MODEL_DISCOVERY_CACHE_TTL_S:-60}
      DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-gateway}:${POSTGRES_PASSWORD:-gateway}@postgres:5432/${POSTGRES_DB:-neuronetz}
      DATABASE_POOL_SIZE: ${DATABASE_POOL_SIZE:-10}
      DATABASE_POOL_OVERFLOW: ${DATABASE_POOL_OVERFLOW:-20}
      REDIS_URL: redis://redis:6379/0
      REDIS_KEY_CACHE_TTL_S: ${REDIS_KEY_CACHE_TTL_S:-60}
      DEFAULT_RPM: ${DEFAULT_RPM:-60}
      DEFAULT_TPM: ${DEFAULT_TPM:-100000}
      DEFAULT_CONCURRENT: ${DEFAULT_CONCURRENT:-8}
      MAX_REQUEST_BODY_BYTES: ${MAX_REQUEST_BODY_BYTES:-262144}
      MAX_NUM_PREDICT: ${MAX_NUM_PREDICT:-4096}
      ARGON2_TIME_COST: ${ARGON2_TIME_COST:-3}
      ARGON2_MEMORY_COST_KIB: ${ARGON2_MEMORY_COST_KIB:-65536}
      ARGON2_PARALLELISM: ${ARGON2_PARALLELISM:-4}
      AUTH_FAILURE_RATE_LIMIT_PER_IP_PER_MIN: ${AUTH_FAILURE_RATE_LIMIT_PER_IP_PER_MIN:-20}
      AUDIT_BUFFER_SIZE: ${AUDIT_BUFFER_SIZE:-1000}
      PROMPT_LOG_DEFAULT_RETENTION_DAYS: ${PROMPT_LOG_DEFAULT_RETENTION_DAYS:-30}
      AUDIT_LOG_DEFAULT_RETENTION_DAYS: ${AUDIT_LOG_DEFAULT_RETENTION_DAYS:-365}
    volumes:
      # The gateway serves /playground by reading this file at request time.
      # Read-only mount: the demo never lets the container modify it.
      - ./playground:/app/playground:ro
    depends_on:
      postgres:
        condition: service_healthy
      redis:
        condition: service_healthy
      mock-ollama:
        condition: service_healthy
    # Apply migrations, then start the server (mirrors docker-compose.dev.yml).
    command: ["sh", "-c", "alembic upgrade head && exec python -m neuronetz_gateway"]
    healthcheck:
      test: ["CMD", "curl", "-fsS", "http://127.0.0.1:8080/healthz"]
      interval: 10s
      timeout: 3s
      retries: 5
      start_period: 30s
    networks:
      - internal

  # ───────────────────────────────────────────────────────────────────────────
  # mock-ollama — INTERNAL NETWORK ONLY. Stands in for the real Ollama backend.
  # NO `ports:` mapping, mirroring the production "Ollama is never exposed" rule.
  # Reachable only as `http://mock-ollama:11434` from the gateway container.
  # ───────────────────────────────────────────────────────────────────────────
  mock-ollama:
    build:
      context: ./demo/mock-ollama
      dockerfile: Dockerfile
    restart: unless-stopped
    # !!! NO `ports:` — the model backend is never published. !!!
    healthcheck:
      test: ["CMD", "curl", "-fsS", "http://127.0.0.1:11434/api/version"]
      interval: 10s
      timeout: 3s
      retries: 5
      start_period: 5s
    networks:
      - internal

  postgres:
    image: postgres:16-alpine
    restart: unless-stopped
    environment:
      POSTGRES_USER: ${POSTGRES_USER:-gateway}
      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-gateway}
      POSTGRES_DB: ${POSTGRES_DB:-neuronetz}
    volumes:
      - postgres_demo_data:/var/lib/postgresql/data
    # No `ports:` — Postgres is internal-only.
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-gateway} -d ${POSTGRES_DB:-neuronetz}"]
      interval: 5s
      timeout: 3s
      retries: 10
    networks:
      - internal

  redis:
    image: redis:7-alpine
    restart: unless-stopped
    command: ["redis-server", "--save", "", "--appendonly", "no"]
    # No `ports:` — Redis is internal-only.
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 5s
      timeout: 3s
      retries: 10
    networks:
      - internal

networks:
  # Private network for inter-service traffic; not reachable from the host.
  internal:
    driver: bridge

volumes:
  postgres_demo_data: