neuronetz-gateway/src/neuronetz_gateway/lifespan.py

"""Application lifespan: connect/dispose backends and run background tasks.

Startup connects Postgres + Redis + the upstream httpx client, builds the
argon2 hasher and the buffered audit writer, and launches the background tasks:
the model-discovery poller (SPEC §4.6) and the Postgres revocation NOTIFY
listener (SPEC §4.5). Connection failures are tolerated so ``/healthz`` always
serves; ``/readyz`` reports true readiness. All handles live on ``app.state``.
"""

from __future__ import annotations

import asyncio
import contextlib
from collections.abc import AsyncIterator
from contextlib import asynccontextmanager
from typing import TYPE_CHECKING

import httpx
import redis.asyncio as redis

from neuronetz_gateway.audit.writer import AuditWriter
from neuronetz_gateway.auth.hashing import build_hasher
from neuronetz_gateway.config import Settings, get_settings
from neuronetz_gateway.db.session import create_engine, create_session_factory
from neuronetz_gateway.observability.logging import get_logger
from neuronetz_gateway.proxy.discovery import DiscoveryCache, discovery_loop_multi
from neuronetz_gateway.proxy.ollama import OllamaClient
from neuronetz_gateway.proxy.router import BackendRouter, build_http_clients
from neuronetz_gateway.revocation import revocation_listener

if TYPE_CHECKING:
    from fastapi import FastAPI

_log = get_logger("lifespan")


def _build_upstream_headers(settings: Settings) -> dict[str, str]:
    """Compose default headers for the upstream Ollama client.

    If ``OLLAMA_AUTH_TOKEN`` is set, attach the configured auth header. The
    scheme prefix (``Bearer``) is included only when the header is the standard
    ``Authorization``; for custom headers like ``X-API-Key`` the raw token is
    sent. The SecretStr is unwrapped only here, never logged.
    """
    headers: dict[str, str] = {"User-Agent": "neuronetz-gateway"}
    if settings.ollama_auth_token is not None:
        raw = settings.ollama_auth_token.get_secret_value().strip()
        if raw:
            header = settings.ollama_auth_header
            if header.lower() == "authorization":
                headers[header] = f"{settings.ollama_auth_scheme} {raw}".strip()
            else:
                headers[header] = raw
    return headers


def _build_http_client(settings: Settings) -> httpx.AsyncClient:
    """Construct the shared httpx client used to reach Ollama."""
    timeout = httpx.Timeout(
        connect=settings.ollama_connect_timeout_s,
        read=settings.ollama_read_timeout_s,
        write=settings.ollama_read_timeout_s,
        pool=settings.ollama_connect_timeout_s,
    )
    limits = httpx.Limits(max_connections=settings.ollama_max_connections)
    return httpx.AsyncClient(
        base_url=settings.ollama_base_url,
        timeout=timeout,
        limits=limits,
        headers=_build_upstream_headers(settings),
    )


@asynccontextmanager
async def lifespan(app: FastAPI) -> AsyncIterator[None]:
    """Manage startup/shutdown of all backends and background tasks."""
    settings: Settings = get_settings()
    app.state.settings = settings
    app.state.hasher = build_hasher(settings)
    app.state.discovery_cache = DiscoveryCache()
    tasks: list[asyncio.Task[None]] = []

    try:
        engine = create_engine(settings)
        app.state.db_engine = engine
        app.state.db_sessionmaker = create_session_factory(engine)
    except Exception as exc:  # noqa: BLE001 - tolerate so /healthz still serves
        _log.error("db_engine_init_failed", error=str(exc))
        app.state.db_engine = None
        app.state.db_sessionmaker = None

    try:
        app.state.redis = redis.from_url(settings.redis_url, decode_responses=True)
    except Exception as exc:  # noqa: BLE001 - tolerate so /healthz still serves
        _log.error("redis_init_failed", error=str(exc))
        app.state.redis = None

    # Build per-backend upstream clients (one per entry in OLLAMA_BACKENDS, or
    # a single "default" backend synthesized from the legacy OLLAMA_BASE_URL).
    backend_clients, backend_order = build_http_clients(settings)
    app.state.backend_http_clients = backend_clients
    app.state.backend_order = backend_order
    # ``http_client`` retains its single-client meaning for code paths (and
    # tests) that haven't been migrated to the router yet: it is the FIRST
    # backend's httpx client. New code should reach upstream via the router.
    app.state.http_client = backend_clients[backend_order[0]]
    app.state.backend_router = BackendRouter(
        clients={name: OllamaClient(client) for name, client in backend_clients.items()},
        order=backend_order,
        discovery=app.state.discovery_cache,
    )
    _log.info("backends_configured", backends=backend_order)

    audit_writer = AuditWriter(settings.audit_buffer_size, app.state.db_sessionmaker)
    audit_writer.start()
    app.state.audit_writer = audit_writer

    # Background tasks (cancelled on shutdown).
    tasks.append(
        asyncio.create_task(
            discovery_loop_multi(
                backend_clients,
                backend_order,
                app.state.redis,
                app.state.discovery_cache,
                settings,
            )
        )
    )
    if app.state.redis is not None and app.state.db_sessionmaker is not None:
        tasks.append(
            asyncio.create_task(
                revocation_listener(settings, app.state.redis, app.state.db_sessionmaker)
            )
        )
    app.state.background_tasks = tasks

    _log.info("gateway_startup_complete")
    try:
        yield
    finally:
        await _shutdown(app, tasks, audit_writer)


async def _shutdown(
    app: FastAPI, tasks: list[asyncio.Task[None]], audit_writer: AuditWriter
) -> None:
    """Cancel background tasks and dispose of all backend handles."""
    for task in tasks:
        task.cancel()
    for task in tasks:
        with contextlib.suppress(asyncio.CancelledError):
            await task

    with contextlib.suppress(Exception):
        await audit_writer.stop()

    # Close every per-backend httpx client (the legacy `http_client` attr is
    # one of these, so we only need to iterate the dict).
    backend_clients: dict[str, httpx.AsyncClient] = getattr(
        app.state, "backend_http_clients", {}
    )
    for client in backend_clients.values():
        with contextlib.suppress(Exception):
            await client.aclose()

    redis_client = getattr(app.state, "redis", None)
    if redis_client is not None:
        with contextlib.suppress(Exception):
            await redis_client.aclose()

    engine = getattr(app.state, "db_engine", None)
    if engine is not None:
        with contextlib.suppress(Exception):
            await engine.dispose()

    _log.info("gateway_shutdown_complete")


__all__ = ["lifespan"]