neuronetz-gateway/tests/integration/test_model_discovery.py

"""Integration tests for live model discovery + the effective set (SPEC §4.6, §12).

Covers the acceptance criteria around discovery:
  * ``allow_all_models`` tenant sees every installed model in ``/api/tags`` and
    ``/v1/models``.
  * Default-deny tenant sees only ``allowed_models ∩ discovered``.
  * Request for a model outside the effective set => 403 with a generic body
    (no existence disclosure: installed-but-unpermitted vs not-installed are
    indistinguishable, SPEC §13.6).
  * Discovery unavailable (empty cache) => deny, even for ``allow_all``.
"""

from __future__ import annotations

import httpx
import pytest

from neuronetz_gateway.proxy.discovery import DiscoveredModel
from tests.integration.conftest import IntegrationApp, IntegrationKey
from tests.integration.mock_ollama import DEFAULT_MODELS

pytestmark = pytest.mark.asyncio


async def test_allow_all_tenant_sees_all_discovered(
    client: httpx.AsyncClient, allow_all_key: IntegrationKey
) -> None:
    resp = await client.get(
        "/api/tags", headers={"Authorization": f"Bearer {allow_all_key.full_key}"}
    )
    assert resp.status_code == 200
    names = {m["name"] for m in resp.json()["models"]}
    assert set(DEFAULT_MODELS) <= names


async def test_default_deny_tenant_sees_only_allowed_intersect_discovered(
    client: httpx.AsyncClient, restricted_key: IntegrationKey
) -> None:
    resp = await client.get(
        "/api/tags", headers={"Authorization": f"Bearer {restricted_key.full_key}"}
    )
    assert resp.status_code == 200
    names = {m["name"] for m in resp.json()["models"]}
    # The fixture allowlists only llama3.1:8b.
    assert names == {"llama3.1:8b"}


async def test_v1_models_filtered_by_effective_set(
    client: httpx.AsyncClient, restricted_key: IntegrationKey
) -> None:
    resp = await client.get(
        "/v1/models", headers={"Authorization": f"Bearer {restricted_key.full_key}"}
    )
    assert resp.status_code == 200
    ids = {m["id"] for m in resp.json()["data"]}
    assert ids == {"llama3.1:8b"}


async def test_request_for_unpermitted_model_returns_403(
    client: httpx.AsyncClient, restricted_key: IntegrationKey
) -> None:
    # ``mistral:7b`` IS installed (in the mock catalogue) but NOT in this
    # tenant's allowlist — must be 403 with the same generic body the gateway
    # would emit for a model that doesn't exist at all (SPEC §13.6).
    resp = await client.post(
        "/api/chat",
        headers={"Authorization": f"Bearer {restricted_key.full_key}"},
        json={
            "model": "mistral:7b",
            "messages": [{"role": "user", "content": "hi"}],
            "stream": False,
        },
    )
    assert resp.status_code == 403
    err = resp.json()["error"]
    assert err["code"] == "forbidden"
    assert err["request_id"]


async def test_request_for_nonexistent_model_returns_same_generic_403(
    client: httpx.AsyncClient, allow_all_key: IntegrationKey
) -> None:
    # ``allow_all`` tenant: the effective set is whatever is discovered, so a
    # model name that isn't installed is also rejected with the same 403.
    resp = await client.post(
        "/api/chat",
        headers={"Authorization": f"Bearer {allow_all_key.full_key}"},
        json={
            "model": "ghost-model-not-installed",
            "messages": [{"role": "user", "content": "hi"}],
            "stream": False,
        },
    )
    assert resp.status_code == 403
    assert resp.json()["error"]["code"] == "forbidden"


async def test_discovery_unavailable_denies_even_allow_all(
    client: httpx.AsyncClient,
    integration_app: IntegrationApp,
    allow_all_key: IntegrationKey,
) -> None:
    # Simulate a stale/expired discovery cache: empty in-process set => every
    # model resolution fails (fail-closed per SPEC §4.6, §13.5).
    cache = integration_app.app.state.discovery_cache
    await cache.set([])
    try:
        resp = await client.post(
            "/api/chat",
            headers={"Authorization": f"Bearer {allow_all_key.full_key}"},
            json={
                "model": "llama3.1:8b",
                "messages": [{"role": "user", "content": "hi"}],
                "stream": False,
            },
        )
        assert resp.status_code == 403
        assert resp.json()["error"]["code"] == "forbidden"

        # /api/tags still returns 200 but the list is empty (no leakage, no list).
        tags = await client.get(
            "/api/tags", headers={"Authorization": f"Bearer {allow_all_key.full_key}"}
        )
        assert tags.status_code == 200
        assert tags.json()["models"] == []
    finally:
        # Restore so other tests aren't affected by ordering.
        models = [DiscoveredModel(name=n, family=n.split(":", 1)[0]) for n in DEFAULT_MODELS]
        await cache.set(models)