Two production-hardening changes triggered by real issues found on the
first prod attempt against neuronetz-ai-01.
1. Upstream auth (the production Ollama is fronted by an auth proxy):
- New config: OLLAMA_AUTH_TOKEN (pydantic SecretStr — never appears in
repr/logs/errors), plus OLLAMA_AUTH_HEADER (default "Authorization")
and OLLAMA_AUTH_SCHEME (default "Bearer") for stacks that expect a
non-standard header like X-API-Key.
- lifespan._build_upstream_headers() injects the configured header into
the single shared httpx client used by both the proxy hot path AND
the discovery poller, so /api/tags + /api/chat both authenticate
against the upstream automatically.
- New CLI: `neuronetz-gateway probe-ollama` — uses the same client
config to GET /api/version and /api/tags, reports success/transport-
error/HTTP-status, lists the first few discovered models, exits 1 on
any failure. The token itself is never printed (only whether one
was attached). Lets ops verify upstream reachability before letting
real traffic through.
- docker-compose.yml passes OLLAMA_AUTH_TOKEN/HEADER/SCHEME through;
.env.example documents them with a leave-blank-for-internal-Ollama
default.
2. Volume adoption (don't lose existing model data on re-deploy):
- docker-compose.yml now pins absolute Docker volume NAMES for both
postgres_data and ollama_data, configurable via POSTGRES_DATA_VOLUME
and OLLAMA_DATA_VOLUME. Defaults preserve the previous per-project
names so existing deployments aren't disturbed.
- This addresses the scenario where deploying this compose under a new
project directory created fresh, empty volumes alongside an existing
`neuro-ollama_ollama-data` volume containing pre-pulled models (incl.
deepseek-r1:14b, qwen2.5:14b, gemma3:12b, ...). Setting
OLLAMA_DATA_VOLUME=neuro-ollama_ollama-data in .env tells the new
stack to mount the existing volume in place — no copy, no downtime.
- .env.example documents the override with the exact host's volume name
as an example.
Both changes are ruff + mypy --strict clean.
182 lines
8.8 KiB
YAML
182 lines
8.8 KiB
YAML
# neuronetz-gateway — FULL production stack, hosted behind jwilder-proxy
|
|
# (the same setup used by neuronetz.ai / neuro-landing).
|
|
#
|
|
# Internet ──TLS──▶ nginx-proxy (jwilder) ──HTTP/1.1──▶ gateway
|
|
# │
|
|
# └─▶ postgres / redis / ollama
|
|
# (private network only)
|
|
#
|
|
# Public traffic is terminated by the jwilder/nginx-proxy that already runs on
|
|
# this host. The gateway joins its external `proxy` network and advertises
|
|
# itself with VIRTUAL_HOST / VIRTUAL_PORT; letsencrypt-nginx-proxy-companion
|
|
# obtains and renews the cert for api.neuronetz.ai automatically.
|
|
#
|
|
# ┌─────────────────────────────────────────────────────────────────────────┐
|
|
# │ SECURITY NON-NEGOTIABLE: │
|
|
# │ The `ollama` service has NO `ports:` mapping and MUST NEVER get one. │
|
|
# │ Ollama is reachable only on the internal Docker network via the │
|
|
# │ service name `ollama:11434`. Publishing it would re-open the exact │
|
|
# │ unauthenticated exposure this whole project exists to close. │
|
|
# └─────────────────────────────────────────────────────────────────────────┘
|
|
#
|
|
# Prerequisites on the host:
|
|
# - A jwilder-proxy stack (nginx-proxy + acme-companion) already running and
|
|
# attached to an external Docker network named `proxy`.
|
|
# - DNS A/AAAA record for api.neuronetz.ai pointing at this host.
|
|
#
|
|
# Bring it up:
|
|
# cp .env.example .env # set POSTGRES_PASSWORD and any overrides
|
|
# docker compose up -d --build
|
|
#
|
|
# Users without jwilder-proxy can use the Caddy sidecar example in
|
|
# ops/caddy/Caddyfile.example instead — see docs/DEPLOYMENT.md.
|
|
|
|
services:
|
|
gateway:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile
|
|
container_name: neuronetz-gateway
|
|
restart: unless-stopped
|
|
# NOTE: deliberately NO `ports:` — the gateway is reached only via the
|
|
# jwilder nginx-proxy on the shared external `proxy` network.
|
|
expose:
|
|
- "8080"
|
|
environment:
|
|
# jwilder/nginx-proxy + acme-companion routing (matches neuro-landing).
|
|
VIRTUAL_HOST: ${GATEWAY_VIRTUAL_HOST:-api.neuronetz.ai}
|
|
VIRTUAL_PORT: "8080"
|
|
LETSENCRYPT_HOST: ${GATEWAY_VIRTUAL_HOST:-api.neuronetz.ai}
|
|
LETSENCRYPT_EMAIL: ${LETSENCRYPT_EMAIL:-admin@neuronetz.ai}
|
|
# ─── Gateway runtime ──────────────────────────────────────────────
|
|
GATEWAY_BIND_HOST: 0.0.0.0
|
|
GATEWAY_BIND_PORT: "8080"
|
|
GATEWAY_LOG_LEVEL: ${GATEWAY_LOG_LEVEL:-INFO}
|
|
GATEWAY_LOG_FORMAT: ${GATEWAY_LOG_FORMAT:-json}
|
|
GATEWAY_REQUEST_ID_HEADER: ${GATEWAY_REQUEST_ID_HEADER:-X-Request-ID}
|
|
# nginx-proxy forwards from the `proxy` network — trust its IP space.
|
|
GATEWAY_TRUSTED_PROXIES: ${GATEWAY_TRUSTED_PROXIES:-127.0.0.1,nginx-proxy}
|
|
# ─── Internal service addressing ──────────────────────────────────
|
|
DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-gateway}:${POSTGRES_PASSWORD:-changeme}@postgres:5432/${POSTGRES_DB:-neuronetz}
|
|
DATABASE_POOL_SIZE: ${DATABASE_POOL_SIZE:-10}
|
|
DATABASE_POOL_OVERFLOW: ${DATABASE_POOL_OVERFLOW:-20}
|
|
REDIS_URL: redis://redis:6379/0
|
|
REDIS_KEY_CACHE_TTL_S: ${REDIS_KEY_CACHE_TTL_S:-60}
|
|
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-http://ollama:11434}
|
|
OLLAMA_CONNECT_TIMEOUT_S: ${OLLAMA_CONNECT_TIMEOUT_S:-5}
|
|
OLLAMA_READ_TIMEOUT_S: ${OLLAMA_READ_TIMEOUT_S:-600}
|
|
OLLAMA_MAX_CONNECTIONS: ${OLLAMA_MAX_CONNECTIONS:-64}
|
|
# Optional Bearer token for an externally-fronted Ollama (default empty:
|
|
# the in-stack ollama service needs no auth on the private network).
|
|
OLLAMA_AUTH_TOKEN: ${OLLAMA_AUTH_TOKEN:-}
|
|
OLLAMA_AUTH_HEADER: ${OLLAMA_AUTH_HEADER:-Authorization}
|
|
OLLAMA_AUTH_SCHEME: ${OLLAMA_AUTH_SCHEME:-Bearer}
|
|
MODEL_DISCOVERY_REFRESH_S: ${MODEL_DISCOVERY_REFRESH_S:-60}
|
|
MODEL_DISCOVERY_CACHE_TTL_S: ${MODEL_DISCOVERY_CACHE_TTL_S:-120}
|
|
DEFAULT_RPM: ${DEFAULT_RPM:-60}
|
|
DEFAULT_TPM: ${DEFAULT_TPM:-100000}
|
|
DEFAULT_CONCURRENT: ${DEFAULT_CONCURRENT:-8}
|
|
MAX_REQUEST_BODY_BYTES: ${MAX_REQUEST_BODY_BYTES:-262144}
|
|
MAX_NUM_PREDICT: ${MAX_NUM_PREDICT:-4096}
|
|
ARGON2_TIME_COST: ${ARGON2_TIME_COST:-3}
|
|
ARGON2_MEMORY_COST_KIB: ${ARGON2_MEMORY_COST_KIB:-65536}
|
|
ARGON2_PARALLELISM: ${ARGON2_PARALLELISM:-4}
|
|
AUTH_FAILURE_RATE_LIMIT_PER_IP_PER_MIN: ${AUTH_FAILURE_RATE_LIMIT_PER_IP_PER_MIN:-20}
|
|
AUDIT_BUFFER_SIZE: ${AUDIT_BUFFER_SIZE:-1000}
|
|
PROMPT_LOG_DEFAULT_RETENTION_DAYS: ${PROMPT_LOG_DEFAULT_RETENTION_DAYS:-30}
|
|
AUDIT_LOG_DEFAULT_RETENTION_DAYS: ${AUDIT_LOG_DEFAULT_RETENTION_DAYS:-365}
|
|
# Playground + auto-docs OFF by default in prod.
|
|
PLAYGROUND_ENABLED: ${PLAYGROUND_ENABLED:-false}
|
|
DOCS_ENABLED: ${DOCS_ENABLED:-false}
|
|
depends_on:
|
|
postgres:
|
|
condition: service_healthy
|
|
redis:
|
|
condition: service_healthy
|
|
ollama:
|
|
condition: service_started
|
|
# Apply migrations, then start the server.
|
|
command: ["sh", "-c", "alembic upgrade head && exec python -m neuronetz_gateway"]
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-fsS", "http://127.0.0.1:8080/healthz"]
|
|
interval: 15s
|
|
timeout: 3s
|
|
retries: 5
|
|
start_period: 30s
|
|
networks:
|
|
- proxy # for nginx-proxy / acme-companion (TLS-fronted public traffic)
|
|
- internal # for postgres / redis / ollama (private)
|
|
|
|
postgres:
|
|
image: postgres:16-alpine
|
|
container_name: neuronetz-postgres
|
|
restart: unless-stopped
|
|
environment:
|
|
POSTGRES_USER: ${POSTGRES_USER:-gateway}
|
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-changeme}
|
|
POSTGRES_DB: ${POSTGRES_DB:-neuronetz}
|
|
volumes:
|
|
- postgres_data:/var/lib/postgresql/data
|
|
# No `ports:` — Postgres is internal-only.
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-gateway} -d ${POSTGRES_DB:-neuronetz}"]
|
|
interval: 5s
|
|
timeout: 3s
|
|
retries: 10
|
|
networks:
|
|
- internal
|
|
|
|
redis:
|
|
image: redis:7-alpine
|
|
container_name: neuronetz-redis
|
|
restart: unless-stopped
|
|
command: ["redis-server", "--save", "", "--appendonly", "no"]
|
|
# No `ports:` — Redis is internal-only.
|
|
healthcheck:
|
|
test: ["CMD", "redis-cli", "ping"]
|
|
interval: 5s
|
|
timeout: 3s
|
|
retries: 10
|
|
networks:
|
|
- internal
|
|
|
|
# ───────────────────────────────────────────────────────────────────────────
|
|
# Ollama — INTERNAL NETWORK ONLY. DO NOT ADD A `ports:` MAPPING.
|
|
# Reachable only as `http://ollama:11434` from the gateway container.
|
|
# ───────────────────────────────────────────────────────────────────────────
|
|
ollama:
|
|
image: ollama/ollama:latest
|
|
container_name: neuronetz-ollama
|
|
restart: unless-stopped
|
|
# !!! NO `ports:` — never publish Ollama to the host or the internet. !!!
|
|
volumes:
|
|
- ollama_data:/root/.ollama
|
|
networks:
|
|
- internal
|
|
|
|
networks:
|
|
# External network managed by the host's jwilder-proxy stack
|
|
# (the same network neuronetz-web / neuronetz-www are attached to).
|
|
proxy:
|
|
external: true
|
|
# Private network for inter-service traffic; not reachable from the host.
|
|
internal:
|
|
driver: bridge
|
|
|
|
volumes:
|
|
# Pin absolute volume NAMES so the stack can ADOPT an existing volume that was
|
|
# created by a previous deployment under a different compose project. Without
|
|
# an explicit `name:`, compose namespaces volumes by project (directory) name,
|
|
# so a rename or re-clone silently creates fresh, empty volumes alongside the
|
|
# old data. We hit that the first time this stack was deployed (the original
|
|
# models lived in `neuro-ollama_ollama-data` and a fresh `neuro-gateway_
|
|
# ollama_data` was created next to them, leaving the models orphaned).
|
|
#
|
|
# Override via .env if your existing volumes are named differently:
|
|
# POSTGRES_DATA_VOLUME=neuro-api_postgres-data
|
|
# OLLAMA_DATA_VOLUME=neuro-ollama_ollama-data
|
|
postgres_data:
|
|
name: ${POSTGRES_DATA_VOLUME:-neuro-gateway_postgres_data}
|
|
ollama_data:
|
|
name: ${OLLAMA_DATA_VOLUME:-neuro-gateway_ollama_data}
|