deploy: upstream Ollama auth token + adoptable data volumes
Two production-hardening changes triggered by real issues found on the
first prod attempt against neuronetz-ai-01.
1. Upstream auth (the production Ollama is fronted by an auth proxy):
- New config: OLLAMA_AUTH_TOKEN (pydantic SecretStr — never appears in
repr/logs/errors), plus OLLAMA_AUTH_HEADER (default "Authorization")
and OLLAMA_AUTH_SCHEME (default "Bearer") for stacks that expect a
non-standard header like X-API-Key.
- lifespan._build_upstream_headers() injects the configured header into
the single shared httpx client used by both the proxy hot path AND
the discovery poller, so /api/tags + /api/chat both authenticate
against the upstream automatically.
- New CLI: `neuronetz-gateway probe-ollama` — uses the same client
config to GET /api/version and /api/tags, reports success/transport-
error/HTTP-status, lists the first few discovered models, exits 1 on
any failure. The token itself is never printed (only whether one
was attached). Lets ops verify upstream reachability before letting
real traffic through.
- docker-compose.yml passes OLLAMA_AUTH_TOKEN/HEADER/SCHEME through;
.env.example documents them with a leave-blank-for-internal-Ollama
default.
2. Volume adoption (don't lose existing model data on re-deploy):
- docker-compose.yml now pins absolute Docker volume NAMES for both
postgres_data and ollama_data, configurable via POSTGRES_DATA_VOLUME
and OLLAMA_DATA_VOLUME. Defaults preserve the previous per-project
names so existing deployments aren't disturbed.
- This addresses the scenario where deploying this compose under a new
project directory created fresh, empty volumes alongside an existing
`neuro-ollama_ollama-data` volume containing pre-pulled models (incl.
deepseek-r1:14b, qwen2.5:14b, gemma3:12b, ...). Setting
OLLAMA_DATA_VOLUME=neuro-ollama_ollama-data in .env tells the new
stack to mount the existing volume in place — no copy, no downtime.
- .env.example documents the override with the exact host's volume name
as an example.
Both changes are ruff + mypy --strict clean.
This commit is contained in:
@@ -314,6 +314,90 @@ def list_models(
|
||||
_run(work)
|
||||
|
||||
|
||||
@app.command("probe-ollama")
|
||||
def probe_ollama(
|
||||
*,
|
||||
timeout: Annotated[float, typer.Option(help="Per-request timeout in seconds.")] = 10.0,
|
||||
) -> None:
|
||||
"""Probe the upstream Ollama: GET /api/version and /api/tags.
|
||||
|
||||
Uses the exact same httpx config as the running gateway (base URL, timeouts,
|
||||
and the OLLAMA_AUTH_TOKEN header if set) so a passing probe proves the
|
||||
gateway will be able to reach the backend in production. The token itself
|
||||
is NEVER printed — only whether one was attached.
|
||||
"""
|
||||
import httpx
|
||||
|
||||
from neuronetz_gateway.lifespan import _build_upstream_headers
|
||||
|
||||
settings = get_settings()
|
||||
headers = _build_upstream_headers(settings)
|
||||
auth_header = settings.ollama_auth_header
|
||||
has_token = settings.ollama_auth_token is not None and bool(
|
||||
settings.ollama_auth_token.get_secret_value().strip()
|
||||
)
|
||||
|
||||
auth_status = f"sending {auth_header}" if has_token else "no token (OLLAMA_AUTH_TOKEN unset)"
|
||||
typer.echo(f"target: {settings.ollama_base_url}")
|
||||
typer.echo(f"auth: {auth_status}")
|
||||
|
||||
async def _go() -> int:
|
||||
probe_timeout = httpx.Timeout(
|
||||
connect=settings.ollama_connect_timeout_s,
|
||||
read=timeout,
|
||||
write=timeout,
|
||||
pool=timeout,
|
||||
)
|
||||
async with httpx.AsyncClient(
|
||||
base_url=settings.ollama_base_url,
|
||||
timeout=probe_timeout,
|
||||
headers=headers,
|
||||
) as client:
|
||||
errors = 0
|
||||
for path in ("/api/version", "/api/tags"):
|
||||
try:
|
||||
resp = await client.get(path)
|
||||
except httpx.HTTPError as exc:
|
||||
typer.secho(
|
||||
f" GET {path} ✗ transport error: {type(exc).__name__}",
|
||||
fg=typer.colors.RED,
|
||||
)
|
||||
errors += 1
|
||||
continue
|
||||
if resp.status_code >= 400:
|
||||
typer.secho(
|
||||
f" GET {path} ✗ HTTP {resp.status_code}",
|
||||
fg=typer.colors.RED,
|
||||
)
|
||||
if resp.status_code in (401, 403):
|
||||
typer.echo(
|
||||
" upstream rejected the credentials — check "
|
||||
"OLLAMA_AUTH_TOKEN / header."
|
||||
)
|
||||
errors += 1
|
||||
continue
|
||||
if path == "/api/version":
|
||||
typer.secho(f" GET {path} ✓ HTTP 200", fg=typer.colors.GREEN)
|
||||
else:
|
||||
ct = resp.headers.get("content-type", "")
|
||||
body = resp.json() if ct.startswith("application/json") else {}
|
||||
n = len(body.get("models", []))
|
||||
typer.secho(
|
||||
f" GET {path} ✓ HTTP 200, {n} model(s) discovered",
|
||||
fg=typer.colors.GREEN,
|
||||
)
|
||||
for m in body.get("models", [])[:5]:
|
||||
typer.echo(f" · {m.get('name') or m.get('model')}")
|
||||
if n > 5:
|
||||
typer.echo(f" … and {n - 5} more")
|
||||
return errors
|
||||
|
||||
errors = asyncio.run(_go())
|
||||
if errors:
|
||||
raise typer.Exit(code=1)
|
||||
typer.secho("upstream reachable and authenticated.", fg=typer.colors.GREEN, bold=True)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""Console-script entry point."""
|
||||
app()
|
||||
|
||||
Reference in New Issue
Block a user