diff --git a/.env.example b/.env.example index 825036d..2e5a2d0 100644 --- a/.env.example +++ b/.env.example @@ -9,7 +9,14 @@ GATEWAY_BIND_PORT=8080 GATEWAY_LOG_LEVEL=INFO GATEWAY_LOG_FORMAT=json # json|console GATEWAY_REQUEST_ID_HEADER=X-Request-ID -GATEWAY_TRUSTED_PROXIES=127.0.0.1,caddy # for X-Forwarded-For +GATEWAY_TRUSTED_PROXIES=127.0.0.1,nginx-proxy # for X-Forwarded-For + +# ──────────── Public hostname (jwilder-proxy / acme-companion) ─────── +# These are consumed by docker-compose.yml's gateway service so that the +# host's nginx-proxy stack routes TLS-terminated traffic for your domain. +# Mirrors the pattern used by neuro-landing. +GATEWAY_VIRTUAL_HOST=api.neuronetz.ai +LETSENCRYPT_EMAIL=admin@neuronetz.ai # ──────────────────────────── Upstream ─────────────────────────── OLLAMA_BASE_URL=http://ollama:11434 diff --git a/docker-compose.yml b/docker-compose.yml index 6a2d0ae..3d662cd 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,10 +1,15 @@ -# neuronetz-gateway — FULL production stack (SPEC §4.1). +# neuronetz-gateway — FULL production stack, hosted behind jwilder-proxy +# (the same setup used by neuronetz.ai / neuro-landing). # -# Internet ──TLS──▶ caddy ──HTTP/1.1 internal──▶ gateway ──▶ postgres / redis / ollama +# Internet ──TLS──▶ nginx-proxy (jwilder) ──HTTP/1.1──▶ gateway +# │ +# └─▶ postgres / redis / ollama +# (private network only) # -# Only Caddy publishes ports to the host. The gateway is reachable solely through -# Caddy on the internal network. Postgres, Redis and (critically) Ollama are NOT -# published to the host at all. +# Public traffic is terminated by the jwilder/nginx-proxy that already runs on +# this host. The gateway joins its external `proxy` network and advertises +# itself with VIRTUAL_HOST / VIRTUAL_PORT; letsencrypt-nginx-proxy-companion +# obtains and renews the cert for api.neuronetz.ai automatically. # # ┌─────────────────────────────────────────────────────────────────────────┐ # │ SECURITY NON-NEGOTIABLE: │ @@ -14,45 +19,44 @@ # │ unauthenticated exposure this whole project exists to close. │ # └─────────────────────────────────────────────────────────────────────────┘ # -# Copy `.env.example` to `.env` and adjust before running: +# Prerequisites on the host: +# - A jwilder-proxy stack (nginx-proxy + acme-companion) already running and +# attached to an external Docker network named `proxy`. +# - DNS A/AAAA record for api.neuronetz.ai pointing at this host. +# +# Bring it up: +# cp .env.example .env # set POSTGRES_PASSWORD and any overrides # docker compose up -d --build +# +# Users without jwilder-proxy can use the Caddy sidecar example in +# ops/caddy/Caddyfile.example instead — see docs/DEPLOYMENT.md. services: - caddy: - image: caddy:2-alpine - restart: unless-stopped - depends_on: - gateway: - condition: service_healthy - ports: - - "80:80" - - "443:443" - - "443:443/udp" # HTTP/3 - volumes: - - ./ops/caddy/Caddyfile.example:/etc/caddy/Caddyfile:ro - - caddy_data:/data - - caddy_config:/config - networks: - - edge - - internal - gateway: build: context: . dockerfile: Dockerfile + container_name: neuronetz-gateway restart: unless-stopped - # NOTE: deliberately NO `ports:` — the gateway is internal-only and is - # reached exclusively through Caddy. + # NOTE: deliberately NO `ports:` — the gateway is reached only via the + # jwilder nginx-proxy on the shared external `proxy` network. expose: - "8080" environment: + # jwilder/nginx-proxy + acme-companion routing (matches neuro-landing). + VIRTUAL_HOST: ${GATEWAY_VIRTUAL_HOST:-api.neuronetz.ai} + VIRTUAL_PORT: "8080" + LETSENCRYPT_HOST: ${GATEWAY_VIRTUAL_HOST:-api.neuronetz.ai} + LETSENCRYPT_EMAIL: ${LETSENCRYPT_EMAIL:-admin@neuronetz.ai} + # ─── Gateway runtime ────────────────────────────────────────────── GATEWAY_BIND_HOST: 0.0.0.0 GATEWAY_BIND_PORT: "8080" GATEWAY_LOG_LEVEL: ${GATEWAY_LOG_LEVEL:-INFO} GATEWAY_LOG_FORMAT: ${GATEWAY_LOG_FORMAT:-json} GATEWAY_REQUEST_ID_HEADER: ${GATEWAY_REQUEST_ID_HEADER:-X-Request-ID} - GATEWAY_TRUSTED_PROXIES: ${GATEWAY_TRUSTED_PROXIES:-127.0.0.1,caddy} - # Service-name addressing on the internal network. + # nginx-proxy forwards from the `proxy` network — trust its IP space. + GATEWAY_TRUSTED_PROXIES: ${GATEWAY_TRUSTED_PROXIES:-127.0.0.1,nginx-proxy} + # ─── Internal service addressing ────────────────────────────────── DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-gateway}:${POSTGRES_PASSWORD:-changeme}@postgres:5432/${POSTGRES_DB:-neuronetz} DATABASE_POOL_SIZE: ${DATABASE_POOL_SIZE:-10} DATABASE_POOL_OVERFLOW: ${DATABASE_POOL_OVERFLOW:-20} @@ -62,6 +66,8 @@ services: OLLAMA_CONNECT_TIMEOUT_S: ${OLLAMA_CONNECT_TIMEOUT_S:-5} OLLAMA_READ_TIMEOUT_S: ${OLLAMA_READ_TIMEOUT_S:-600} OLLAMA_MAX_CONNECTIONS: ${OLLAMA_MAX_CONNECTIONS:-64} + MODEL_DISCOVERY_REFRESH_S: ${MODEL_DISCOVERY_REFRESH_S:-60} + MODEL_DISCOVERY_CACHE_TTL_S: ${MODEL_DISCOVERY_CACHE_TTL_S:-120} DEFAULT_RPM: ${DEFAULT_RPM:-60} DEFAULT_TPM: ${DEFAULT_TPM:-100000} DEFAULT_CONCURRENT: ${DEFAULT_CONCURRENT:-8} @@ -74,6 +80,9 @@ services: AUDIT_BUFFER_SIZE: ${AUDIT_BUFFER_SIZE:-1000} PROMPT_LOG_DEFAULT_RETENTION_DAYS: ${PROMPT_LOG_DEFAULT_RETENTION_DAYS:-30} AUDIT_LOG_DEFAULT_RETENTION_DAYS: ${AUDIT_LOG_DEFAULT_RETENTION_DAYS:-365} + # Playground + auto-docs OFF by default in prod. + PLAYGROUND_ENABLED: ${PLAYGROUND_ENABLED:-false} + DOCS_ENABLED: ${DOCS_ENABLED:-false} depends_on: postgres: condition: service_healthy @@ -90,10 +99,12 @@ services: retries: 5 start_period: 30s networks: - - internal + - proxy # for nginx-proxy / acme-companion (TLS-fronted public traffic) + - internal # for postgres / redis / ollama (private) postgres: image: postgres:16-alpine + container_name: neuronetz-postgres restart: unless-stopped environment: POSTGRES_USER: ${POSTGRES_USER:-gateway} @@ -112,6 +123,7 @@ services: redis: image: redis:7-alpine + container_name: neuronetz-redis restart: unless-stopped command: ["redis-server", "--save", "", "--appendonly", "no"] # No `ports:` — Redis is internal-only. @@ -129,6 +141,7 @@ services: # ─────────────────────────────────────────────────────────────────────────── ollama: image: ollama/ollama:latest + container_name: neuronetz-ollama restart: unless-stopped # !!! NO `ports:` — never publish Ollama to the host or the internet. !!! volumes: @@ -137,16 +150,14 @@ services: - internal networks: - # Public-facing network: only Caddy is attached alongside `internal`. - edge: - driver: bridge + # External network managed by the host's jwilder-proxy stack + # (the same network neuronetz-web / neuronetz-www are attached to). + proxy: + external: true # Private network for inter-service traffic; not reachable from the host. internal: driver: bridge - internal: false volumes: postgres_data: ollama_data: - caddy_data: - caddy_config: diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md index 55ac663..9696c56 100644 --- a/docs/DEPLOYMENT.md +++ b/docs/DEPLOYMENT.md @@ -1,8 +1,17 @@ # neuronetz-gateway — Deployment -Production deployment is a single Docker Compose stack: **Caddy + gateway + Postgres + Redis -+ Ollama**. Caddy is the only public-facing component; it terminates TLS via Let's Encrypt -for `api.neuronetz.ai` and reverse-proxies to the internal-only gateway. +Production deployment is a Docker Compose stack — **gateway + Postgres + Redis + Ollama** — +that sits behind the host's existing **jwilder/nginx-proxy** stack (the same one already +serving `neuronetz.ai` / `neuro-landing`). Public traffic enters via `nginx-proxy` and +`acme-companion`, which terminate TLS and obtain/renew the Let's Encrypt certificate for +`api.neuronetz.ai`. The gateway joins the host's external `proxy` Docker network alongside +the other public-facing containers and advertises itself with `VIRTUAL_HOST` / +`VIRTUAL_PORT`. Postgres, Redis, and Ollama stay on a private internal network with no +published ports. + +> ▶ Don't have jwilder-proxy on the host? See +> [§ "Alternative: TLS via Caddy sidecar"](#alternative-tls-via-caddy-sidecar) — the +> `ops/caddy/Caddyfile.example` is shipped for that case. > For the local, no-GPU demo (mock Ollama + playground), see [`PLAYGROUND.md`](PLAYGROUND.md) > and run `./demo.sh`. This document is the **production** path. @@ -18,53 +27,61 @@ for `api.neuronetz.ai` and reverse-proxies to the internal-only gateway. > Publishing it would re-open the exact unauthenticated exposure this whole project exists > to close (SPEC §1, §3; AGENT_PROMPT non-negotiable #2). -The same posture applies to **Postgres** and **Redis** in the production compose file — no -published ports. Only **Caddy** binds host ports (80/443, 443/udp for HTTP/3). +The same posture applies to **Postgres**, **Redis**, and the gateway itself in the +production compose file — **no published ports anywhere in this compose file**. Only +the host's jwilder `nginx-proxy` container binds 80/443; the gateway is reached via the +shared external `proxy` Docker network. --- ## Prerequisites - A host with Docker + Docker Compose. -- DNS: `api.neuronetz.ai` → the host's public IP (for Let's Encrypt). -- Ports 80 and 443 reachable from the internet (ACME HTTP/TLS challenge + serving). +- A jwilder-proxy stack already running on the host, attached to an external Docker + network named `proxy`. Typically `jwilder/nginx-proxy` + `nginxproxy/acme-companion`, + the same setup serving `neuronetz.ai` / `neuro-landing`. +- DNS: `api.neuronetz.ai` → the host's public IP. +- Ports 80 and 443 already published by the jwilder-proxy container on that host (for + ACME HTTP-01 + serving). This compose file does **not** publish them itself. --- -## Steps +## Steps (production — jwilder-proxy) ```bash -git clone neuronetz-gateway && cd neuronetz-gateway +git clone ssh://git@gitea.neuronetz.ai:222/m17hr1l/neuronetz-gateway.git +cd neuronetz-gateway # 1. Configure. Copy the example env and change EVERY secret. cp .env.example .env -# - POSTGRES_PASSWORD: a strong, unique value -# - DATABASE_URL: must match the POSTGRES_* values -# - GATEWAY_LOG_FORMAT=json for production +# - POSTGRES_PASSWORD : a strong, unique value +# - GATEWAY_VIRTUAL_HOST : api.neuronetz.ai (read by nginx-proxy) +# - LETSENCRYPT_EMAIL : admin@neuronetz.ai (read by acme-companion) +# - GATEWAY_LOG_FORMAT=json : for production +# - GATEWAY_TRUSTED_PROXIES : 127.0.0.1,nginx-proxy -# 2. Configure Caddy for your domain + ACME email. -cp ops/caddy/Caddyfile.example ops/caddy/Caddyfile # then edit the site + email -# (docker-compose.yml mounts Caddyfile.example by default; point it at your edited file -# or edit in place.) - -# 3. Bring up the full stack. The gateway runs `alembic upgrade head`, then serves. +# 2. Bring up the stack. The gateway joins the external `proxy` network and +# runs `alembic upgrade head` before serving. docker compose up -d --build +# nginx-proxy observes the new container, generates an nginx vhost for +# api.neuronetz.ai, and acme-companion issues the cert via Let's Encrypt. +# Cert renewals are automatic. -# 4. Bootstrap a tenant + key (CLI runs inside the gateway container). +# 3. Bootstrap a tenant + key (CLI runs inside the gateway container). docker compose exec gateway neuronetz-gateway create-tenant --name acme --rpm 120 --tpm 200000 docker compose exec gateway neuronetz-gateway create-key --tenant acme --name prod-server-1 # ^ prints the full key ONCE — store it in your secret manager now. -# 5. Smoke test (through Caddy / TLS). +# 4. Smoke test through public TLS. curl https://api.neuronetz.ai/healthz curl -N https://api.neuronetz.ai/v1/chat/completions \ -H "Authorization: Bearer nz_…" -H "Content-Type: application/json" \ -d '{"model":"llama3.1:8b","stream":true,"messages":[{"role":"user","content":"hi"}]}' ``` -Caddy obtains and renews the certificate automatically. For local testing without a public -domain, use the `localhost { tls internal … }` block documented in `Caddyfile.example` -(trust Caddy's local CA or pass `-k` to curl). +The compose file pins `container_name: neuronetz-gateway` (and `neuronetz-postgres` / +`neuronetz-redis` / `neuronetz-ollama`) for stable identification by nginx-proxy and +for ops scripts. --- @@ -102,7 +119,9 @@ All configuration is via environment variables, validated by Pydantic Settings o | `GATEWAY_LOG_LEVEL` | `INFO` | | | `GATEWAY_LOG_FORMAT` | `json` | `json` in prod, `console` for local dev. | | `GATEWAY_REQUEST_ID_HEADER` | `X-Request-ID` | | -| `GATEWAY_TRUSTED_PROXIES` | `127.0.0.1,caddy` | Sources trusted for `X-Forwarded-For`. | +| `GATEWAY_TRUSTED_PROXIES` | `127.0.0.1,nginx-proxy` | Sources trusted for `X-Forwarded-For`. Set to your front-proxy's container name / IP. | +| `GATEWAY_VIRTUAL_HOST` | `api.neuronetz.ai` | Read by jwilder `nginx-proxy` and `acme-companion`. | +| `LETSENCRYPT_EMAIL` | `admin@neuronetz.ai` | Read by `acme-companion`. | ### Upstream (Ollama) | Var | Default | Notes | @@ -157,17 +176,31 @@ All configuration is via environment variables, validated by Pydantic Settings o --- -## TLS & security headers (Caddy) +## TLS & security headers -`ops/caddy/Caddyfile.example` already sets: +In the canonical (jwilder-proxy) setup, TLS termination and security headers belong on +the host's `nginx-proxy` container, not in this repo. Use the standard nginx-proxy +custom-config mechanism (`/etc/nginx/vhost.d/api.neuronetz.ai`) to add HSTS and the rest: -- **HSTS** `max-age=63072000; includeSubDomains; preload` -- `X-Content-Type-Options: nosniff` -- `X-Frame-Options: DENY` -- `Referrer-Policy: no-referrer` -- strips `Server` and `X-Powered-By` +``` +add_header Strict-Transport-Security "max-age=63072000; includeSubDomains; preload" always; +add_header X-Content-Type-Options "nosniff" always; +add_header X-Frame-Options "DENY" always; +add_header Referrer-Policy "no-referrer" always; +``` -Edit the site address and ACME `email` before deploying. +If you prefer to terminate TLS in this repo (no jwilder-proxy on the host), see the +section below. + + +## Alternative: TLS via Caddy sidecar + +`ops/caddy/Caddyfile.example` is provided for hosts without jwilder-proxy. It sets HSTS, +the security headers above, strips the `Server` header, and obtains a Let's Encrypt +cert. To use it, add a `caddy` service to your local copy of `docker-compose.yml` +(binding host 80/443), drop the gateway's `VIRTUAL_HOST` / `LETSENCRYPT_HOST` env vars, +and remove the `proxy` external-network requirement. The Caddyfile itself is self- +documenting; edit the site address and ACME `email` before deploying. ---