scaffold: project skeleton, schema, healthz/readyz, CI
Initial project structure for neuronetz-gateway per scope-docs/SPEC.md: - Python 3.12 / FastAPI / SQLAlchemy 2.0 (async) / Redis / Postgres stack managed by uv. Multi-stage non-root Dockerfile, prod + dev compose files (ollama service is NEVER published in either), Caddyfile + systemd unit, justfile, GitHub Actions CI (ruff, mypy --strict, pytest, bandit, pip-audit). - Pydantic-Settings config covering every env var from SPEC §7, including the MODEL_DISCOVERY_* keys for the dynamic-discovery feature (§4.6). - Alembic 0001_initial creates the full gateway schema (8 tables, 3 enums, notify_key_revoked() trigger), incl. allow_all_models on tenant_limits and key_limits for the per-tenant auto-grant toggle. - Working /healthz, /readyz (fail-closed when deps unreachable), and a Prometheus /metrics stub. Sanitizing error handlers that attach X-Request-ID to every response and never leak upstream internals. - SPEC + AGENT_PROMPT included under scope-docs/ (source of truth).
This commit is contained in:
131
src/neuronetz_gateway/lifespan.py
Normal file
131
src/neuronetz_gateway/lifespan.py
Normal file
@@ -0,0 +1,131 @@
|
||||
"""Application lifespan: connect/dispose backends and run background tasks.
|
||||
|
||||
Startup connects Postgres + Redis + the upstream httpx client, builds the
|
||||
argon2 hasher and the buffered audit writer, and launches the background tasks:
|
||||
the model-discovery poller (SPEC §4.6) and the Postgres revocation NOTIFY
|
||||
listener (SPEC §4.5). Connection failures are tolerated so ``/healthz`` always
|
||||
serves; ``/readyz`` reports true readiness. All handles live on ``app.state``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
from collections.abc import AsyncIterator
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import httpx
|
||||
import redis.asyncio as redis
|
||||
|
||||
from neuronetz_gateway.audit.writer import AuditWriter
|
||||
from neuronetz_gateway.auth.hashing import build_hasher
|
||||
from neuronetz_gateway.config import Settings, get_settings
|
||||
from neuronetz_gateway.db.session import create_engine, create_session_factory
|
||||
from neuronetz_gateway.observability.logging import get_logger
|
||||
from neuronetz_gateway.proxy.discovery import DiscoveryCache, discovery_loop
|
||||
from neuronetz_gateway.revocation import revocation_listener
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from fastapi import FastAPI
|
||||
|
||||
_log = get_logger("lifespan")
|
||||
|
||||
|
||||
def _build_http_client(settings: Settings) -> httpx.AsyncClient:
|
||||
"""Construct the shared httpx client used to reach Ollama."""
|
||||
timeout = httpx.Timeout(
|
||||
connect=settings.ollama_connect_timeout_s,
|
||||
read=settings.ollama_read_timeout_s,
|
||||
write=settings.ollama_read_timeout_s,
|
||||
pool=settings.ollama_connect_timeout_s,
|
||||
)
|
||||
limits = httpx.Limits(max_connections=settings.ollama_max_connections)
|
||||
return httpx.AsyncClient(base_url=settings.ollama_base_url, timeout=timeout, limits=limits)
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI) -> AsyncIterator[None]:
|
||||
"""Manage startup/shutdown of all backends and background tasks."""
|
||||
settings: Settings = get_settings()
|
||||
app.state.settings = settings
|
||||
app.state.hasher = build_hasher(settings)
|
||||
app.state.discovery_cache = DiscoveryCache()
|
||||
tasks: list[asyncio.Task[None]] = []
|
||||
|
||||
try:
|
||||
engine = create_engine(settings)
|
||||
app.state.db_engine = engine
|
||||
app.state.db_sessionmaker = create_session_factory(engine)
|
||||
except Exception as exc: # noqa: BLE001 - tolerate so /healthz still serves
|
||||
_log.error("db_engine_init_failed", error=str(exc))
|
||||
app.state.db_engine = None
|
||||
app.state.db_sessionmaker = None
|
||||
|
||||
try:
|
||||
app.state.redis = redis.from_url(settings.redis_url, decode_responses=True)
|
||||
except Exception as exc: # noqa: BLE001 - tolerate so /healthz still serves
|
||||
_log.error("redis_init_failed", error=str(exc))
|
||||
app.state.redis = None
|
||||
|
||||
app.state.http_client = _build_http_client(settings)
|
||||
|
||||
audit_writer = AuditWriter(settings.audit_buffer_size, app.state.db_sessionmaker)
|
||||
audit_writer.start()
|
||||
app.state.audit_writer = audit_writer
|
||||
|
||||
# Background tasks (cancelled on shutdown).
|
||||
tasks.append(
|
||||
asyncio.create_task(
|
||||
discovery_loop(
|
||||
app.state.http_client, app.state.redis, app.state.discovery_cache, settings
|
||||
)
|
||||
)
|
||||
)
|
||||
if app.state.redis is not None and app.state.db_sessionmaker is not None:
|
||||
tasks.append(
|
||||
asyncio.create_task(
|
||||
revocation_listener(settings, app.state.redis, app.state.db_sessionmaker)
|
||||
)
|
||||
)
|
||||
app.state.background_tasks = tasks
|
||||
|
||||
_log.info("gateway_startup_complete")
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
await _shutdown(app, tasks, audit_writer)
|
||||
|
||||
|
||||
async def _shutdown(
|
||||
app: FastAPI, tasks: list[asyncio.Task[None]], audit_writer: AuditWriter
|
||||
) -> None:
|
||||
"""Cancel background tasks and dispose of all backend handles."""
|
||||
for task in tasks:
|
||||
task.cancel()
|
||||
for task in tasks:
|
||||
with contextlib.suppress(asyncio.CancelledError):
|
||||
await task
|
||||
|
||||
with contextlib.suppress(Exception):
|
||||
await audit_writer.stop()
|
||||
|
||||
http_client: httpx.AsyncClient | None = getattr(app.state, "http_client", None)
|
||||
if http_client is not None:
|
||||
with contextlib.suppress(Exception):
|
||||
await http_client.aclose()
|
||||
|
||||
redis_client = getattr(app.state, "redis", None)
|
||||
if redis_client is not None:
|
||||
with contextlib.suppress(Exception):
|
||||
await redis_client.aclose()
|
||||
|
||||
engine = getattr(app.state, "db_engine", None)
|
||||
if engine is not None:
|
||||
with contextlib.suppress(Exception):
|
||||
await engine.dispose()
|
||||
|
||||
_log.info("gateway_shutdown_complete")
|
||||
|
||||
|
||||
__all__ = ["lifespan"]
|
||||
Reference in New Issue
Block a user