Two production-hardening changes triggered by real issues found on the
first prod attempt against neuronetz-ai-01.
1. Upstream auth (the production Ollama is fronted by an auth proxy):
- New config: OLLAMA_AUTH_TOKEN (pydantic SecretStr — never appears in
repr/logs/errors), plus OLLAMA_AUTH_HEADER (default "Authorization")
and OLLAMA_AUTH_SCHEME (default "Bearer") for stacks that expect a
non-standard header like X-API-Key.
- lifespan._build_upstream_headers() injects the configured header into
the single shared httpx client used by both the proxy hot path AND
the discovery poller, so /api/tags + /api/chat both authenticate
against the upstream automatically.
- New CLI: `neuronetz-gateway probe-ollama` — uses the same client
config to GET /api/version and /api/tags, reports success/transport-
error/HTTP-status, lists the first few discovered models, exits 1 on
any failure. The token itself is never printed (only whether one
was attached). Lets ops verify upstream reachability before letting
real traffic through.
- docker-compose.yml passes OLLAMA_AUTH_TOKEN/HEADER/SCHEME through;
.env.example documents them with a leave-blank-for-internal-Ollama
default.
2. Volume adoption (don't lose existing model data on re-deploy):
- docker-compose.yml now pins absolute Docker volume NAMES for both
postgres_data and ollama_data, configurable via POSTGRES_DATA_VOLUME
and OLLAMA_DATA_VOLUME. Defaults preserve the previous per-project
names so existing deployments aren't disturbed.
- This addresses the scenario where deploying this compose under a new
project directory created fresh, empty volumes alongside an existing
`neuro-ollama_ollama-data` volume containing pre-pulled models (incl.
deepseek-r1:14b, qwen2.5:14b, gemma3:12b, ...). Setting
OLLAMA_DATA_VOLUME=neuro-ollama_ollama-data in .env tells the new
stack to mount the existing volume in place — no copy, no downtime.
- .env.example documents the override with the exact host's volume name
as an example.
Both changes are ruff + mypy --strict clean.
97 lines
3.8 KiB
Python
97 lines
3.8 KiB
Python
"""Application configuration via Pydantic Settings v2.
|
|
|
|
Reads every environment variable documented in SPEC §7 with the documented
|
|
defaults. Boot fails loudly (ValidationError) on invalid config.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from functools import lru_cache
|
|
|
|
from pydantic import Field, SecretStr
|
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
|
|
|
|
class Settings(BaseSettings):
|
|
"""Gateway runtime configuration. All fields map to SPEC §7 env vars."""
|
|
|
|
model_config = SettingsConfigDict(
|
|
env_file=".env",
|
|
env_file_encoding="utf-8",
|
|
extra="ignore",
|
|
case_sensitive=False,
|
|
)
|
|
|
|
# --- Service ---
|
|
gateway_bind_host: str = Field(default="0.0.0.0") # noqa: S104 - bind-all is intended in container
|
|
gateway_bind_port: int = Field(default=8080)
|
|
gateway_log_level: str = Field(default="INFO")
|
|
gateway_log_format: str = Field(default="json") # json|console
|
|
gateway_request_id_header: str = Field(default="X-Request-ID")
|
|
gateway_trusted_proxies: str = Field(default="127.0.0.1,caddy")
|
|
|
|
# --- Upstream (Ollama) ---
|
|
ollama_base_url: str = Field(default="http://ollama:11434")
|
|
ollama_connect_timeout_s: int = Field(default=5)
|
|
ollama_read_timeout_s: int = Field(default=600)
|
|
ollama_max_connections: int = Field(default=64)
|
|
# Optional Bearer token sent to the upstream Ollama on EVERY request from the
|
|
# gateway (proxy hot path + the discovery poller). Use SecretStr so the value
|
|
# never appears in repr(), logs, or error messages. Empty/unset = no header.
|
|
ollama_auth_token: SecretStr | None = Field(default=None)
|
|
# If you front Ollama with an auth proxy that expects a non-standard header
|
|
# name (e.g. ``X-API-Key`` instead of ``Authorization``), override here.
|
|
# The scheme prefix (``Bearer ``) is dropped automatically when the header
|
|
# isn't ``Authorization``.
|
|
ollama_auth_header: str = Field(default="Authorization")
|
|
ollama_auth_scheme: str = Field(default="Bearer")
|
|
|
|
# --- Model discovery (SPEC §4.6) ---
|
|
model_discovery_refresh_s: int = Field(default=60)
|
|
model_discovery_cache_ttl_s: int = Field(default=120)
|
|
|
|
# --- Database ---
|
|
database_url: str = Field(
|
|
default="postgresql+asyncpg://gateway:gateway@postgres:5432/neuronetz",
|
|
)
|
|
database_pool_size: int = Field(default=10)
|
|
database_pool_overflow: int = Field(default=20)
|
|
|
|
# --- Redis ---
|
|
redis_url: str = Field(default="redis://redis:6379/0")
|
|
redis_key_cache_ttl_s: int = Field(default=60)
|
|
|
|
# --- Limits ---
|
|
default_rpm: int = Field(default=60)
|
|
default_tpm: int = Field(default=100_000)
|
|
default_concurrent: int = Field(default=8)
|
|
max_request_body_bytes: int = Field(default=262_144)
|
|
max_num_predict: int = Field(default=4096)
|
|
|
|
# --- Security ---
|
|
argon2_time_cost: int = Field(default=3)
|
|
argon2_memory_cost_kib: int = Field(default=65_536)
|
|
argon2_parallelism: int = Field(default=4)
|
|
auth_failure_rate_limit_per_ip_per_min: int = Field(default=20)
|
|
|
|
# --- Audit ---
|
|
audit_buffer_size: int = Field(default=1000)
|
|
prompt_log_default_retention_days: int = Field(default=30)
|
|
audit_log_default_retention_days: int = Field(default=365)
|
|
|
|
# --- Playground / docs (prod-safe defaults: both OFF) ---
|
|
playground_enabled: bool = Field(default=False)
|
|
playground_file: str = Field(default="/app/playground/index.html")
|
|
docs_enabled: bool = Field(default=False)
|
|
|
|
@property
|
|
def trusted_proxies_list(self) -> list[str]:
|
|
"""Parse the comma-separated trusted-proxy list into individual hosts."""
|
|
return [p.strip() for p in self.gateway_trusted_proxies.split(",") if p.strip()]
|
|
|
|
|
|
@lru_cache(maxsize=1)
|
|
def get_settings() -> Settings:
|
|
"""Return a cached Settings instance, constructed from the environment."""
|
|
return Settings()
|