neuronetz-gateway/tests/integration/test_budget.py

"""Integration tests for token budgets (SPEC §4.3 step 5, §6.5, §12).

  * A request returns the SPEC §6.5 budget headers
    (``X-Budget-Period``, ``X-Budget-Tokens-Remaining``).
  * When the daily budget is exhausted the next request is blocked with a
    sanitized ``budget_exceeded`` error.
"""

from __future__ import annotations

import asyncio

import httpx
import pytest

from neuronetz_gateway.budget.counter import BudgetCounter
from neuronetz_gateway.db.models import BudgetPeriod
from tests.integration.conftest import (
    IntegrationApp,
    _create_tenant_and_key,
)
from tests.integration.mock_ollama import DEFAULT_MODELS

pytestmark = pytest.mark.asyncio


async def _chat(client: httpx.AsyncClient, key_full: str) -> httpx.Response:
    return await client.post(
        "/api/chat",
        headers={"Authorization": f"Bearer {key_full}"},
        json={
            "model": "llama3.1:8b",
            "messages": [{"role": "user", "content": "hello"}],
            "stream": False,
        },
    )


async def test_budget_headers_present_on_response(
    integration_app: IntegrationApp, client: httpx.AsyncClient
) -> None:
    key = await _create_tenant_and_key(
        integration_app,
        tokens_daily=1_000_000,
        allowed_models=list(DEFAULT_MODELS),
    )
    resp = await _chat(client, key.full_key)
    assert resp.status_code == 200
    # SPEC §6.5
    assert resp.headers.get("X-Budget-Period") in {"day", "month", "total"}
    assert resp.headers.get("X-Budget-Tokens-Remaining") is not None


async def test_budget_blocks_when_exhausted(
    integration_app: IntegrationApp, client: httpx.AsyncClient
) -> None:
    # Tiny daily budget; the first request itself will spend more than it,
    # leaving remaining <= 0 so a follow-up must be blocked.
    key = await _create_tenant_and_key(
        integration_app,
        tokens_daily=1,
        allowed_models=list(DEFAULT_MODELS),
    )

    # Pre-burn the Redis budget counter so the *next* request is blocked
    # deterministically (don't depend on post-stream accounting timing).
    redis_client = integration_app.app.state.redis
    counter = BudgetCounter(redis_client)
    # Consume more than the daily limit so check() reports exhausted.
    await counter.consume(str(key.key_id), BudgetPeriod.day, 1000)
    # Give Redis a moment so the next request observes the consumed value.
    await asyncio.sleep(0.01)

    resp = await _chat(client, key.full_key)
    # Must not be a 200 — fail-closed / descriptive error.
    assert resp.status_code != 200
    body = resp.json()
    assert body["error"]["code"] in {"budget_exceeded", "rate_limited"}
    assert body["error"]["request_id"]
    # Message is descriptive but sanitized (no upstream / internal details).
    msg = body["error"]["message"].lower()
    for needle in ("ollama", "redis", "postgres", "traceback"):
        assert needle not in msg