Real test bodies (not stubs), driven against an in-process httpx.ASGITransport override of the gateway's get_ollama_client dependency pointing at tests/integration/mock_ollama.py. Unit (target 100% on auth/, ratelimit/, budget/): - argon2id roundtrip, wrong-key, garbage encoding, needs_rehash on param change - key format/uniqueness/prefix extraction - token counter (prompt_eval_count + eval_count, embeddings, missing-counts) - translate (OpenAI <-> Ollama for chat/completion/embeddings, streaming chunks, /v1/models list shape) - allowlist (hard-blocks, effective-set semantics across allow_all/inheritance/ empty-discovered) - discovery (parse, cache roundtrip with TTL, fail-closed, tolerates redis=None) - sliding window (allow/block/reset/per-key vs per-tenant/cost-weighted) Integration (testcontainers postgres + redis + in-process mock Ollama): - auth flow (no/malformed/wrong key all return identical sanitized 401) - proxy stream (NDJSON roundtrip, audit row's token counts match, hard-blocked endpoints uniformly 403) - openai_compat (SSE chunks, data: [DONE], non-stream shape, /v1/models) - model_discovery (allow_all sees all, default-deny sees allowed ∩ discovered, /v1/models filtered, unpermitted-but-installed = nonexistent = 403, empty cache denies even allow_all) - rate_limit (429 + Retry-After + headers; Redis down ⇒ 503, never 200) - budget (decrement + headers; pre-burned counter blocks next request) - revocation (INSERT into gateway.revocations → NOTIFY → cache evicted → 401 ≤ 1s) Includes a known-issue xfail flagging a bug in ratelimit/sliding_window.py: the per-hit ZSET member uses id(object()) which returns the same id on consecutive calls, causing same-millisecond hits to overwrite instead of stacking. To be fixed in a follow-up commit.
130 lines
4.7 KiB
Python
130 lines
4.7 KiB
Python
"""Integration tests for live model discovery + the effective set (SPEC §4.6, §12).
|
|
|
|
Covers the acceptance criteria around discovery:
|
|
* ``allow_all_models`` tenant sees every installed model in ``/api/tags`` and
|
|
``/v1/models``.
|
|
* Default-deny tenant sees only ``allowed_models ∩ discovered``.
|
|
* Request for a model outside the effective set => 403 with a generic body
|
|
(no existence disclosure: installed-but-unpermitted vs not-installed are
|
|
indistinguishable, SPEC §13.6).
|
|
* Discovery unavailable (empty cache) => deny, even for ``allow_all``.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import httpx
|
|
import pytest
|
|
|
|
from neuronetz_gateway.proxy.discovery import DiscoveredModel
|
|
from tests.integration.conftest import IntegrationApp, IntegrationKey
|
|
from tests.integration.mock_ollama import DEFAULT_MODELS
|
|
|
|
pytestmark = pytest.mark.asyncio
|
|
|
|
|
|
async def test_allow_all_tenant_sees_all_discovered(
|
|
client: httpx.AsyncClient, allow_all_key: IntegrationKey
|
|
) -> None:
|
|
resp = await client.get(
|
|
"/api/tags", headers={"Authorization": f"Bearer {allow_all_key.full_key}"}
|
|
)
|
|
assert resp.status_code == 200
|
|
names = {m["name"] for m in resp.json()["models"]}
|
|
assert set(DEFAULT_MODELS) <= names
|
|
|
|
|
|
async def test_default_deny_tenant_sees_only_allowed_intersect_discovered(
|
|
client: httpx.AsyncClient, restricted_key: IntegrationKey
|
|
) -> None:
|
|
resp = await client.get(
|
|
"/api/tags", headers={"Authorization": f"Bearer {restricted_key.full_key}"}
|
|
)
|
|
assert resp.status_code == 200
|
|
names = {m["name"] for m in resp.json()["models"]}
|
|
# The fixture allowlists only llama3.1:8b.
|
|
assert names == {"llama3.1:8b"}
|
|
|
|
|
|
async def test_v1_models_filtered_by_effective_set(
|
|
client: httpx.AsyncClient, restricted_key: IntegrationKey
|
|
) -> None:
|
|
resp = await client.get(
|
|
"/v1/models", headers={"Authorization": f"Bearer {restricted_key.full_key}"}
|
|
)
|
|
assert resp.status_code == 200
|
|
ids = {m["id"] for m in resp.json()["data"]}
|
|
assert ids == {"llama3.1:8b"}
|
|
|
|
|
|
async def test_request_for_unpermitted_model_returns_403(
|
|
client: httpx.AsyncClient, restricted_key: IntegrationKey
|
|
) -> None:
|
|
# ``mistral:7b`` IS installed (in the mock catalogue) but NOT in this
|
|
# tenant's allowlist — must be 403 with the same generic body the gateway
|
|
# would emit for a model that doesn't exist at all (SPEC §13.6).
|
|
resp = await client.post(
|
|
"/api/chat",
|
|
headers={"Authorization": f"Bearer {restricted_key.full_key}"},
|
|
json={
|
|
"model": "mistral:7b",
|
|
"messages": [{"role": "user", "content": "hi"}],
|
|
"stream": False,
|
|
},
|
|
)
|
|
assert resp.status_code == 403
|
|
err = resp.json()["error"]
|
|
assert err["code"] == "forbidden"
|
|
assert err["request_id"]
|
|
|
|
|
|
async def test_request_for_nonexistent_model_returns_same_generic_403(
|
|
client: httpx.AsyncClient, allow_all_key: IntegrationKey
|
|
) -> None:
|
|
# ``allow_all`` tenant: the effective set is whatever is discovered, so a
|
|
# model name that isn't installed is also rejected with the same 403.
|
|
resp = await client.post(
|
|
"/api/chat",
|
|
headers={"Authorization": f"Bearer {allow_all_key.full_key}"},
|
|
json={
|
|
"model": "ghost-model-not-installed",
|
|
"messages": [{"role": "user", "content": "hi"}],
|
|
"stream": False,
|
|
},
|
|
)
|
|
assert resp.status_code == 403
|
|
assert resp.json()["error"]["code"] == "forbidden"
|
|
|
|
|
|
async def test_discovery_unavailable_denies_even_allow_all(
|
|
client: httpx.AsyncClient,
|
|
integration_app: IntegrationApp,
|
|
allow_all_key: IntegrationKey,
|
|
) -> None:
|
|
# Simulate a stale/expired discovery cache: empty in-process set => every
|
|
# model resolution fails (fail-closed per SPEC §4.6, §13.5).
|
|
cache = integration_app.app.state.discovery_cache
|
|
await cache.set([])
|
|
try:
|
|
resp = await client.post(
|
|
"/api/chat",
|
|
headers={"Authorization": f"Bearer {allow_all_key.full_key}"},
|
|
json={
|
|
"model": "llama3.1:8b",
|
|
"messages": [{"role": "user", "content": "hi"}],
|
|
"stream": False,
|
|
},
|
|
)
|
|
assert resp.status_code == 403
|
|
assert resp.json()["error"]["code"] == "forbidden"
|
|
|
|
# /api/tags still returns 200 but the list is empty (no leakage, no list).
|
|
tags = await client.get(
|
|
"/api/tags", headers={"Authorization": f"Bearer {allow_all_key.full_key}"}
|
|
)
|
|
assert tags.status_code == 200
|
|
assert tags.json()["models"] == []
|
|
finally:
|
|
# Restore so other tests aren't affected by ordering.
|
|
models = [DiscoveredModel(name=n, family=n.split(":", 1)[0]) for n in DEFAULT_MODELS]
|
|
await cache.set(models)
|