diff --git a/.env.example b/.env.example index 2e5a2d0..16c16ce 100644 --- a/.env.example +++ b/.env.example @@ -18,11 +18,33 @@ GATEWAY_TRUSTED_PROXIES=127.0.0.1,nginx-proxy # for X-Forwarded-For GATEWAY_VIRTUAL_HOST=api.neuronetz.ai LETSENCRYPT_EMAIL=admin@neuronetz.ai +# ──────────────────────── Volume adoption ──────────────────────── +# Override the Docker volume names if an EXISTING volume on the host holds +# data this stack should adopt (e.g. models pulled by a previous Ollama +# deployment). Leave unset to use the default per-project names. +# +# Example (matches the neuronetz-ai-01 host): +# OLLAMA_DATA_VOLUME=neuro-ollama_ollama-data +# POSTGRES_DATA_VOLUME=neuro-gateway_postgres_data +OLLAMA_DATA_VOLUME= +POSTGRES_DATA_VOLUME= + # ──────────────────────────── Upstream ─────────────────────────── OLLAMA_BASE_URL=http://ollama:11434 OLLAMA_CONNECT_TIMEOUT_S=5 OLLAMA_READ_TIMEOUT_S=600 OLLAMA_MAX_CONNECTIONS=64 +# If you front Ollama with an auth proxy (e.g. an external host like +# https://ollama.neuronetz.ai requiring a Bearer token), set the token here. +# The value never appears in logs/errors — it's wrapped in pydantic SecretStr. +# Leave empty to send no Authorization header (the default for an in-stack +# ollama service on the private Docker network). +OLLAMA_AUTH_TOKEN= +# Override only if your auth proxy expects a non-standard header. For +# Authorization the scheme prefix (default: Bearer) is included; for any other +# header name the raw token is sent. +OLLAMA_AUTH_HEADER=Authorization +OLLAMA_AUTH_SCHEME=Bearer # ──────────────────────── Model discovery (§4.6) ───────────────── MODEL_DISCOVERY_REFRESH_S=60 diff --git a/docker-compose.yml b/docker-compose.yml index 3d662cd..bd2040a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -62,10 +62,15 @@ services: DATABASE_POOL_OVERFLOW: ${DATABASE_POOL_OVERFLOW:-20} REDIS_URL: redis://redis:6379/0 REDIS_KEY_CACHE_TTL_S: ${REDIS_KEY_CACHE_TTL_S:-60} - OLLAMA_BASE_URL: http://ollama:11434 + OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-http://ollama:11434} OLLAMA_CONNECT_TIMEOUT_S: ${OLLAMA_CONNECT_TIMEOUT_S:-5} OLLAMA_READ_TIMEOUT_S: ${OLLAMA_READ_TIMEOUT_S:-600} OLLAMA_MAX_CONNECTIONS: ${OLLAMA_MAX_CONNECTIONS:-64} + # Optional Bearer token for an externally-fronted Ollama (default empty: + # the in-stack ollama service needs no auth on the private network). + OLLAMA_AUTH_TOKEN: ${OLLAMA_AUTH_TOKEN:-} + OLLAMA_AUTH_HEADER: ${OLLAMA_AUTH_HEADER:-Authorization} + OLLAMA_AUTH_SCHEME: ${OLLAMA_AUTH_SCHEME:-Bearer} MODEL_DISCOVERY_REFRESH_S: ${MODEL_DISCOVERY_REFRESH_S:-60} MODEL_DISCOVERY_CACHE_TTL_S: ${MODEL_DISCOVERY_CACHE_TTL_S:-120} DEFAULT_RPM: ${DEFAULT_RPM:-60} @@ -159,5 +164,18 @@ networks: driver: bridge volumes: + # Pin absolute volume NAMES so the stack can ADOPT an existing volume that was + # created by a previous deployment under a different compose project. Without + # an explicit `name:`, compose namespaces volumes by project (directory) name, + # so a rename or re-clone silently creates fresh, empty volumes alongside the + # old data. We hit that the first time this stack was deployed (the original + # models lived in `neuro-ollama_ollama-data` and a fresh `neuro-gateway_ + # ollama_data` was created next to them, leaving the models orphaned). + # + # Override via .env if your existing volumes are named differently: + # POSTGRES_DATA_VOLUME=neuro-api_postgres-data + # OLLAMA_DATA_VOLUME=neuro-ollama_ollama-data postgres_data: + name: ${POSTGRES_DATA_VOLUME:-neuro-gateway_postgres_data} ollama_data: + name: ${OLLAMA_DATA_VOLUME:-neuro-gateway_ollama_data} diff --git a/src/neuronetz_gateway/cli/manage.py b/src/neuronetz_gateway/cli/manage.py index 661bcd5..a000479 100644 --- a/src/neuronetz_gateway/cli/manage.py +++ b/src/neuronetz_gateway/cli/manage.py @@ -314,6 +314,90 @@ def list_models( _run(work) +@app.command("probe-ollama") +def probe_ollama( + *, + timeout: Annotated[float, typer.Option(help="Per-request timeout in seconds.")] = 10.0, +) -> None: + """Probe the upstream Ollama: GET /api/version and /api/tags. + + Uses the exact same httpx config as the running gateway (base URL, timeouts, + and the OLLAMA_AUTH_TOKEN header if set) so a passing probe proves the + gateway will be able to reach the backend in production. The token itself + is NEVER printed — only whether one was attached. + """ + import httpx + + from neuronetz_gateway.lifespan import _build_upstream_headers + + settings = get_settings() + headers = _build_upstream_headers(settings) + auth_header = settings.ollama_auth_header + has_token = settings.ollama_auth_token is not None and bool( + settings.ollama_auth_token.get_secret_value().strip() + ) + + auth_status = f"sending {auth_header}" if has_token else "no token (OLLAMA_AUTH_TOKEN unset)" + typer.echo(f"target: {settings.ollama_base_url}") + typer.echo(f"auth: {auth_status}") + + async def _go() -> int: + probe_timeout = httpx.Timeout( + connect=settings.ollama_connect_timeout_s, + read=timeout, + write=timeout, + pool=timeout, + ) + async with httpx.AsyncClient( + base_url=settings.ollama_base_url, + timeout=probe_timeout, + headers=headers, + ) as client: + errors = 0 + for path in ("/api/version", "/api/tags"): + try: + resp = await client.get(path) + except httpx.HTTPError as exc: + typer.secho( + f" GET {path} ✗ transport error: {type(exc).__name__}", + fg=typer.colors.RED, + ) + errors += 1 + continue + if resp.status_code >= 400: + typer.secho( + f" GET {path} ✗ HTTP {resp.status_code}", + fg=typer.colors.RED, + ) + if resp.status_code in (401, 403): + typer.echo( + " upstream rejected the credentials — check " + "OLLAMA_AUTH_TOKEN / header." + ) + errors += 1 + continue + if path == "/api/version": + typer.secho(f" GET {path} ✓ HTTP 200", fg=typer.colors.GREEN) + else: + ct = resp.headers.get("content-type", "") + body = resp.json() if ct.startswith("application/json") else {} + n = len(body.get("models", [])) + typer.secho( + f" GET {path} ✓ HTTP 200, {n} model(s) discovered", + fg=typer.colors.GREEN, + ) + for m in body.get("models", [])[:5]: + typer.echo(f" · {m.get('name') or m.get('model')}") + if n > 5: + typer.echo(f" … and {n - 5} more") + return errors + + errors = asyncio.run(_go()) + if errors: + raise typer.Exit(code=1) + typer.secho("upstream reachable and authenticated.", fg=typer.colors.GREEN, bold=True) + + def main() -> None: """Console-script entry point.""" app() diff --git a/src/neuronetz_gateway/config.py b/src/neuronetz_gateway/config.py index 327652a..2bf9d51 100644 --- a/src/neuronetz_gateway/config.py +++ b/src/neuronetz_gateway/config.py @@ -8,7 +8,7 @@ from __future__ import annotations from functools import lru_cache -from pydantic import Field +from pydantic import Field, SecretStr from pydantic_settings import BaseSettings, SettingsConfigDict @@ -35,6 +35,16 @@ class Settings(BaseSettings): ollama_connect_timeout_s: int = Field(default=5) ollama_read_timeout_s: int = Field(default=600) ollama_max_connections: int = Field(default=64) + # Optional Bearer token sent to the upstream Ollama on EVERY request from the + # gateway (proxy hot path + the discovery poller). Use SecretStr so the value + # never appears in repr(), logs, or error messages. Empty/unset = no header. + ollama_auth_token: SecretStr | None = Field(default=None) + # If you front Ollama with an auth proxy that expects a non-standard header + # name (e.g. ``X-API-Key`` instead of ``Authorization``), override here. + # The scheme prefix (``Bearer ``) is dropped automatically when the header + # isn't ``Authorization``. + ollama_auth_header: str = Field(default="Authorization") + ollama_auth_scheme: str = Field(default="Bearer") # --- Model discovery (SPEC §4.6) --- model_discovery_refresh_s: int = Field(default=60) diff --git a/src/neuronetz_gateway/lifespan.py b/src/neuronetz_gateway/lifespan.py index 9912cd5..78cb71a 100644 --- a/src/neuronetz_gateway/lifespan.py +++ b/src/neuronetz_gateway/lifespan.py @@ -32,6 +32,26 @@ if TYPE_CHECKING: _log = get_logger("lifespan") +def _build_upstream_headers(settings: Settings) -> dict[str, str]: + """Compose default headers for the upstream Ollama client. + + If ``OLLAMA_AUTH_TOKEN`` is set, attach the configured auth header. The + scheme prefix (``Bearer``) is included only when the header is the standard + ``Authorization``; for custom headers like ``X-API-Key`` the raw token is + sent. The SecretStr is unwrapped only here, never logged. + """ + headers: dict[str, str] = {"User-Agent": "neuronetz-gateway"} + if settings.ollama_auth_token is not None: + raw = settings.ollama_auth_token.get_secret_value().strip() + if raw: + header = settings.ollama_auth_header + if header.lower() == "authorization": + headers[header] = f"{settings.ollama_auth_scheme} {raw}".strip() + else: + headers[header] = raw + return headers + + def _build_http_client(settings: Settings) -> httpx.AsyncClient: """Construct the shared httpx client used to reach Ollama.""" timeout = httpx.Timeout( @@ -41,7 +61,12 @@ def _build_http_client(settings: Settings) -> httpx.AsyncClient: pool=settings.ollama_connect_timeout_s, ) limits = httpx.Limits(max_connections=settings.ollama_max_connections) - return httpx.AsyncClient(base_url=settings.ollama_base_url, timeout=timeout, limits=limits) + return httpx.AsyncClient( + base_url=settings.ollama_base_url, + timeout=timeout, + limits=limits, + headers=_build_upstream_headers(settings), + ) @asynccontextmanager