auth + cli: argon2id keys, bearer middleware, bootstrap commands

- argon2id hash/verify/needs_rehash; constant-time path; parameters from config.
- Key format nz_<prefix><secret> (12-char stored prefix incl. nz_, 32-char
  random secret); the full key is generated with secrets, hashed argon2id, and
  printed exactly once at creation — never persisted, never logged.
- Bearer auth middleware: extract → resolve prefix → Redis cache (TTL from
  REDIS_KEY_CACHE_TTL_S) → DB → argon2 verify → cache the resolved Principal.
  Fail-closed; uniform sanitized 401 with X-Request-ID; per-IP auth-failure
  counter to slow brute force. Exempt paths: /healthz /readyz /metrics /, and
  /playground when enabled.
- Bootstrap CLI (Typer) per SPEC §11: create-tenant (with --allow-all-models),
  create-key, list-keys, revoke-key, set-budget, set-models (--models or
  --allow-all / --no-allow-all), show-usage, list-models.
- Async repositories for tenants, api_keys, key_limits, budget_usage,
  revocations, audit_log — including the join+inheritance flatten that
  produces a Principal with effective rpm/tpm/concurrent/allowed_models/
  allow_all_models for the auth cache.
This commit is contained in:
Stephan Berbig
2026-05-26 20:52:33 +02:00
parent d79f17b3bb
commit 6431b2f72c
8 changed files with 1148 additions and 0 deletions

View File

@@ -0,0 +1,323 @@
"""Bootstrap CLI (Typer) per SPEC §11.
Entry point: ``neuronetz-gateway = neuronetz_gateway.cli.manage:app``.
This is the *only* supported way to create tenants and keys (AGENT_PROMPT
non-negotiable #10: the CLI must work before the first manual ``curl``). Each
command opens its own short-lived async engine against ``DATABASE_URL``, does
its unit of work in a transaction, and exits. The full API key is printed
exactly once, at creation, and never stored or logged.
``list-models`` reads the discovery cache from Redis (SPEC §4.6); with
``--tenant`` it also resolves and prints that tenant's effective model set.
"""
from __future__ import annotations
import asyncio
from collections.abc import Awaitable, Callable
from typing import Annotated
import typer
from neuronetz_gateway.auth.hashing import build_hasher, hash_secret
from neuronetz_gateway.auth.keys import generate_key
from neuronetz_gateway.config import Settings, get_settings
from neuronetz_gateway.db.models import BudgetPeriod, KeyStatus
from neuronetz_gateway.db.repositories import (
ApiKeyRepository,
BudgetRepository,
KeyLimitRepository,
RevocationRepository,
TenantRepository,
)
from neuronetz_gateway.db.session import create_engine, create_session_factory, session_scope
from neuronetz_gateway.proxy.allowlist import resolve_effective_models
from neuronetz_gateway.proxy.discovery import read_discovered_from_redis
app = typer.Typer(
name="neuronetz-gateway",
help="Bootstrap CLI for the neuronetz-gateway (tenants, keys, budgets).",
no_args_is_help=True,
add_completion=False,
)
def _run[T](coro_factory: Callable[[Settings], Awaitable[T]]) -> T:
"""Execute an async unit of work against a fresh engine, then dispose it."""
async def _main() -> T:
settings = get_settings()
engine = create_engine(settings)
try:
return await coro_factory(settings)
finally:
await engine.dispose()
return asyncio.run(_main())
@app.command("create-tenant")
def create_tenant(
name: Annotated[str, typer.Option("--name", help="Unique tenant name.")],
rpm: Annotated[int, typer.Option("--rpm", help="Requests-per-minute limit.")] = 60,
tpm: Annotated[int, typer.Option("--tpm", help="Tokens-per-minute limit.")] = 100_000,
concurrent: Annotated[
int, typer.Option("--concurrent", help="Concurrent-connection cap.")
] = 8,
allow_all_models: Annotated[
bool,
typer.Option(
"--allow-all-models/--no-allow-all-models",
help="Opt the tenant into using any installed model.",
),
] = False,
) -> None:
"""Create a tenant with optional rate limits and model policy."""
async def work(settings: Settings) -> None:
factory = create_session_factory(create_engine(settings))
async with session_scope(factory) as session:
tenants = TenantRepository(session)
if await tenants.get_by_name(name) is not None:
raise typer.BadParameter(f"tenant {name!r} already exists")
tenant = await tenants.create(
name=name,
rpm=rpm,
tpm=tpm,
concurrent=concurrent,
allow_all_models=allow_all_models,
)
typer.echo(f"created tenant {tenant.name} ({tenant.id})")
typer.echo(f" allow_all_models={allow_all_models} rpm={rpm} tpm={tpm}")
_run(work)
@app.command("create-key")
def create_key(
tenant: Annotated[str, typer.Option("--tenant", help="Owning tenant name.")],
name: Annotated[str, typer.Option("--name", help="Human-readable key name.")],
scopes: Annotated[
str, typer.Option("--scopes", help="Comma-separated scopes.")
] = "chat,embeddings",
) -> None:
"""Create an API key for a tenant. The full key is printed exactly once."""
async def work(settings: Settings) -> None:
factory = create_session_factory(create_engine(settings))
hasher = build_hasher(settings)
scope_list = [s.strip() for s in scopes.split(",") if s.strip()]
async with session_scope(factory) as session:
tenants = TenantRepository(session)
tenant_row = await tenants.get_by_name(tenant)
if tenant_row is None:
raise typer.BadParameter(f"unknown tenant {tenant!r}")
generated = generate_key()
key_hash = hash_secret(hasher, generated.full_key)
keys = ApiKeyRepository(session)
created = await keys.create(
tenant_id=tenant_row.id,
prefix=generated.prefix,
key_hash=key_hash,
name=name,
scopes=scope_list,
)
typer.echo(f"created key {created.name} for tenant {tenant} (prefix {created.prefix})")
typer.echo("")
typer.secho("API KEY (shown once — store it now):", fg=typer.colors.YELLOW, bold=True)
typer.secho(generated.full_key, fg=typer.colors.GREEN, bold=True)
_run(work)
@app.command("revoke-key")
def revoke_key(
prefix: Annotated[str, typer.Option("--prefix", help="Key prefix to revoke.")],
) -> None:
"""Revoke a key by its prefix (sets status + writes the revocation outbox)."""
async def work(settings: Settings) -> None:
factory = create_session_factory(create_engine(settings))
async with session_scope(factory) as session:
keys = ApiKeyRepository(session)
key = await keys.get_by_prefix(prefix)
if key is None:
raise typer.BadParameter(f"no key with prefix {prefix!r}")
await keys.set_status(key.id, KeyStatus.revoked)
await RevocationRepository(session).insert(key.id, reason="cli revoke")
typer.echo(f"revoked key {prefix} ({key.id})")
_run(work)
@app.command("list-keys")
def list_keys(
tenant: Annotated[str, typer.Option("--tenant", help="Tenant whose keys to list.")],
) -> None:
"""List a tenant's keys (prefixes and metadata, never full keys)."""
async def work(settings: Settings) -> None:
factory = create_session_factory(create_engine(settings))
async with session_scope(factory) as session:
tenants = TenantRepository(session)
tenant_row = await tenants.get_by_name(tenant)
if tenant_row is None:
raise typer.BadParameter(f"unknown tenant {tenant!r}")
rows = await ApiKeyRepository(session).list_for_tenant(tenant_row.id)
if not rows:
typer.echo("(no keys)")
return
for key in rows:
typer.echo(
f"{key.prefix} status={key.status.value:<8} "
f"name={key.name!r} created={key.created_at.isoformat()}"
)
_run(work)
@app.command("show-usage")
def show_usage(
tenant: Annotated[str, typer.Option("--tenant", help="Tenant to report usage for.")],
period: Annotated[str, typer.Option("--period", help="Period: day|month|total.")] = "day",
) -> None:
"""Show token/request usage for a tenant in a period."""
async def work(settings: Settings) -> None:
try:
period_enum = BudgetPeriod(period)
except ValueError as exc:
raise typer.BadParameter("period must be one of day|month|total") from exc
factory = create_session_factory(create_engine(settings))
async with session_scope(factory) as session:
tenant_row = await TenantRepository(session).get_by_name(tenant)
if tenant_row is None:
raise typer.BadParameter(f"unknown tenant {tenant!r}")
tokens_in, tokens_out, requests = await BudgetRepository(session).usage_for_tenant(
tenant_row.id, period_enum
)
typer.echo(f"usage for {tenant} (period={period}):")
typer.echo(f" requests={requests} tokens_in={tokens_in} tokens_out={tokens_out}")
_run(work)
@app.command("set-budget")
def set_budget(
key: Annotated[str, typer.Option("--key", help="Key prefix to set budget on.")],
daily: Annotated[int | None, typer.Option("--daily", help="Daily token budget.")] = None,
monthly: Annotated[
int | None, typer.Option("--monthly", help="Monthly token budget.")
] = None,
total: Annotated[int | None, typer.Option("--total", help="Lifetime token budget.")] = None,
) -> None:
"""Set per-key token budgets."""
async def work(settings: Settings) -> None:
if daily is None and monthly is None and total is None:
raise typer.BadParameter("provide at least one of --daily/--monthly/--total")
factory = create_session_factory(create_engine(settings))
async with session_scope(factory) as session:
key_row = await ApiKeyRepository(session).get_by_prefix(key)
if key_row is None:
raise typer.BadParameter(f"no key with prefix {key!r}")
await KeyLimitRepository(session).upsert_budget(
key_row.id,
tokens_daily=daily,
tokens_monthly=monthly,
tokens_total=total,
)
typer.echo(f"set budget on {key}: daily={daily} monthly={monthly} total={total}")
_run(work)
@app.command("set-models")
def set_models(
tenant: Annotated[str, typer.Option("--tenant", help="Tenant to set models for.")],
models: Annotated[
str | None, typer.Option("--models", help="Comma-separated model allowlist.")
] = None,
allow_all: Annotated[
bool | None,
typer.Option(
"--allow-all/--no-allow-all",
help="Opt into / out of allow_all_models for the tenant.",
),
] = None,
) -> None:
"""Set a tenant's model allowlist and/or its allow_all_models flag."""
async def work(settings: Settings) -> None:
if models is None and allow_all is None:
raise typer.BadParameter("provide --models and/or --allow-all/--no-allow-all")
allowed = (
[m.strip() for m in models.split(",") if m.strip()] if models is not None else None
)
factory = create_session_factory(create_engine(settings))
async with session_scope(factory) as session:
tenants = TenantRepository(session)
tenant_row = await tenants.get_by_name(tenant)
if tenant_row is None:
raise typer.BadParameter(f"unknown tenant {tenant!r}")
await tenants.set_models(
tenant_row.id, allowed_models=allowed, allow_all_models=allow_all
)
typer.echo(f"updated models for {tenant}: allowed={allowed} allow_all={allow_all}")
_run(work)
@app.command("list-models")
def list_models(
tenant: Annotated[
str | None, typer.Option("--tenant", help="Also show this tenant's effective set.")
] = None,
) -> None:
"""Show live-discovered models (and, with --tenant, the effective set)."""
import redis.asyncio as redis
async def work(settings: Settings) -> None:
client = redis.from_url(settings.redis_url, decode_responses=True)
try:
discovered = await read_discovered_from_redis(client)
finally:
await client.aclose()
discovered_names = sorted(discovered)
typer.echo("discovered models (live from Ollama via discovery cache):")
if discovered_names:
for name in discovered_names:
typer.echo(f" {name}")
else:
typer.echo(" (none — discovery cache empty or expired; requests fail closed)")
if tenant is None:
return
factory = create_session_factory(create_engine(settings))
async with session_scope(factory) as session:
tenants = TenantRepository(session)
tenant_row = await tenants.get_by_name(tenant)
if tenant_row is None:
raise typer.BadParameter(f"unknown tenant {tenant!r}")
limits = await tenants.get_limits(tenant_row.id)
if limits is None:
raise typer.BadParameter(f"tenant {tenant!r} has no limits row")
effective = resolve_effective_models(
allow_all=limits.allow_all_models,
allowed_models=tuple(limits.allowed_models),
discovered=discovered,
)
typer.echo(f"effective set for tenant {tenant}:")
for name in sorted(effective):
typer.echo(f" {name}")
_run(work)
def main() -> None:
"""Console-script entry point."""
app()
if __name__ == "__main__":
main()