scaffold: project skeleton, schema, healthz/readyz, CI

Initial project structure for neuronetz-gateway per scope-docs/SPEC.md:

- Python 3.12 / FastAPI / SQLAlchemy 2.0 (async) / Redis / Postgres stack
  managed by uv. Multi-stage non-root Dockerfile, prod + dev compose files
  (ollama service is NEVER published in either), Caddyfile + systemd unit,
  justfile, GitHub Actions CI (ruff, mypy --strict, pytest, bandit, pip-audit).
- Pydantic-Settings config covering every env var from SPEC §7, including the
  MODEL_DISCOVERY_* keys for the dynamic-discovery feature (§4.6).
- Alembic 0001_initial creates the full gateway schema (8 tables, 3 enums,
  notify_key_revoked() trigger), incl. allow_all_models on tenant_limits and
  key_limits for the per-tenant auto-grant toggle.
- Working /healthz, /readyz (fail-closed when deps unreachable), and a
  Prometheus /metrics stub. Sanitizing error handlers that attach X-Request-ID
  to every response and never leak upstream internals.
- SPEC + AGENT_PROMPT included under scope-docs/ (source of truth).
This commit is contained in:
Stephan Berbig
2026-05-26 20:50:35 +02:00
commit d79f17b3bb
32 changed files with 3610 additions and 0 deletions

View File

@@ -0,0 +1,180 @@
"""FastAPI dependency-injection providers.
Exposes typed accessors for the handles placed on ``app.state`` by the lifespan
(Redis, the upstream httpx client, the DB session factory, the discovery cache)
plus the request principal and the proxy client.
QA override contract
--------------------
Routes obtain the upstream proxy via :func:`get_ollama_client`. Tests override
the *Ollama backend* by overriding this provider::
from neuronetz_gateway.deps import get_ollama_client
from neuronetz_gateway.proxy.ollama import OllamaClient
import httpx
from tests.integration.mock_ollama import create_mock_ollama
transport = httpx.ASGITransport(app=create_mock_ollama())
mock_http = httpx.AsyncClient(transport=transport, base_url="http://ollama")
app.dependency_overrides[get_ollama_client] = lambda: OllamaClient(mock_http)
Because ``get_ollama_client`` returns a fully-built :class:`OllamaClient`, an
override needs no access to ``app.state`` and can point at the in-process mock.
"""
from __future__ import annotations
from collections.abc import AsyncIterator
from typing import Annotated
import httpx
import redis.asyncio as redis
from fastapi import Depends, Request
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
from neuronetz_gateway.audit.writer import AuditWriter
from neuronetz_gateway.auth.principal import Principal
from neuronetz_gateway.budget.counter import BudgetCounter
from neuronetz_gateway.config import Settings, get_settings
from neuronetz_gateway.errors import AuthenticationError, DependencyUnavailableError
from neuronetz_gateway.proxy.discovery import DiscoveryCache
from neuronetz_gateway.proxy.ollama import OllamaClient
from neuronetz_gateway.proxy.pipeline import Pipeline
from neuronetz_gateway.ratelimit.concurrency import ConcurrencyLimiter
from neuronetz_gateway.ratelimit.sliding_window import SlidingWindowLimiter
def get_config() -> Settings:
"""Provide the cached application settings."""
return get_settings()
def get_redis(request: Request) -> redis.Redis:
"""Provide the shared Redis client, failing closed if unavailable."""
client: redis.Redis | None = getattr(request.app.state, "redis", None)
if client is None:
raise DependencyUnavailableError(internal_detail="redis client not initialised")
return client
def get_http_client(request: Request) -> httpx.AsyncClient:
"""Provide the shared upstream httpx client."""
client: httpx.AsyncClient | None = getattr(request.app.state, "http_client", None)
if client is None:
raise DependencyUnavailableError(internal_detail="http client not initialised")
return client
def get_ollama_client(request: Request) -> OllamaClient:
"""Provide the upstream Ollama proxy client (override target for tests)."""
return OllamaClient(get_http_client(request))
def get_discovery_cache(request: Request) -> DiscoveryCache:
"""Provide the in-process discovery cache; fail closed if absent."""
cache: DiscoveryCache | None = getattr(request.app.state, "discovery_cache", None)
if cache is None:
raise DependencyUnavailableError(internal_detail="discovery cache not initialised")
return cache
def get_principal(request: Request) -> Principal:
"""Return the authenticated principal placed on ``request.state``.
The auth middleware attaches it before routing; its absence on a non-exempt
route is a programming error, so we fail closed with a 401.
"""
principal: Principal | None = getattr(request.state, "principal", None)
if principal is None:
raise AuthenticationError(internal_detail="principal missing on authenticated route")
return principal
def get_audit_writer(request: Request) -> AuditWriter:
"""Provide the shared buffered audit writer; fail closed if absent."""
writer: AuditWriter | None = getattr(request.app.state, "audit_writer", None)
if writer is None:
raise DependencyUnavailableError(internal_detail="audit writer not initialised")
return writer
def get_pipeline(
request: Request,
principal: Annotated[Principal, Depends(get_principal)],
settings: Annotated[Settings, Depends(get_config)],
ollama: Annotated[OllamaClient, Depends(get_ollama_client)],
discovery: Annotated[DiscoveryCache, Depends(get_discovery_cache)],
redis_client: Annotated[redis.Redis, Depends(get_redis)],
audit: Annotated[AuditWriter, Depends(get_audit_writer)],
) -> Pipeline:
"""Assemble a per-request enforcement + proxy pipeline.
The pipeline owns all hot-path checks (rate limit, budget, concurrency,
model/endpoint allowlist) and the streaming-with-bookkeeping contract.
Audit deny-mode flips this to fail closed at the route layer.
"""
sessionmaker: async_sessionmaker[AsyncSession] | None = getattr(
request.app.state, "db_sessionmaker", None
)
return Pipeline(
request=request,
principal=principal,
settings=settings,
ollama=ollama,
discovery=discovery,
rate_limiter=SlidingWindowLimiter(redis_client),
concurrency=ConcurrencyLimiter(redis_client),
budget=BudgetCounter(redis_client),
audit=audit,
sessionmaker=sessionmaker,
)
def _get_sessionmaker(request: Request) -> async_sessionmaker[AsyncSession]:
"""Return the session factory or fail closed if the engine is absent."""
factory: async_sessionmaker[AsyncSession] | None = getattr(
request.app.state, "db_sessionmaker", None
)
if factory is None:
raise DependencyUnavailableError(internal_detail="db session factory not initialised")
return factory
async def get_db_session(request: Request) -> AsyncIterator[AsyncSession]:
"""Provide a request-scoped async DB session."""
factory = _get_sessionmaker(request)
async with factory() as session:
yield session
ConfigDep = Annotated[Settings, Depends(get_config)]
RedisDep = Annotated[redis.Redis, Depends(get_redis)]
HttpClientDep = Annotated[httpx.AsyncClient, Depends(get_http_client)]
OllamaClientDep = Annotated[OllamaClient, Depends(get_ollama_client)]
DiscoveryCacheDep = Annotated[DiscoveryCache, Depends(get_discovery_cache)]
PrincipalDep = Annotated[Principal, Depends(get_principal)]
AuditWriterDep = Annotated[AuditWriter, Depends(get_audit_writer)]
PipelineDep = Annotated[Pipeline, Depends(get_pipeline)]
DbSessionDep = Annotated[AsyncSession, Depends(get_db_session)]
__all__ = [
"AuditWriterDep",
"ConfigDep",
"DbSessionDep",
"DiscoveryCacheDep",
"HttpClientDep",
"OllamaClientDep",
"PipelineDep",
"PrincipalDep",
"RedisDep",
"get_audit_writer",
"get_config",
"get_db_session",
"get_discovery_cache",
"get_http_client",
"get_ollama_client",
"get_pipeline",
"get_principal",
"get_redis",
]