neuronetz-gateway/tests/integration/test_openai_compat.py

"""Integration tests for the OpenAI-compatible surface (SPEC §6.3, §12).

  * ``/v1/chat/completions`` streaming SSE: every event is ``data: {...}\\n\\n``
    and the stream terminates with ``data: [DONE]\\n\\n``.
  * Non-streaming ``/v1/chat/completions`` returns the OpenAI ``chat.completion``
    shape with a single ``choices[0].message`` and ``usage``.
  * ``/v1/models`` returns the tenant's *effective* discovered set in the
    OpenAI model-list format.
"""

from __future__ import annotations

import json

import httpx
import pytest

from tests.integration.conftest import IntegrationKey
from tests.integration.mock_ollama import DEFAULT_MODELS

pytestmark = pytest.mark.asyncio


async def test_chat_completions_sse_ends_with_done(
    client: httpx.AsyncClient, api_key: IntegrationKey
) -> None:
    events: list[str] = []
    async with client.stream(
        "POST",
        "/v1/chat/completions",
        headers={"Authorization": f"Bearer {api_key.full_key}"},
        json={
            "model": "llama3.1:8b",
            "messages": [{"role": "user", "content": "hi"}],
            "stream": True,
        },
    ) as resp:
        assert resp.status_code == 200
        assert "text/event-stream" in resp.headers.get("content-type", "")
        async for line in resp.aiter_lines():
            if line:
                events.append(line)
    # SSE framing: every line we kept is a ``data: `` line.
    assert all(e.startswith("data: ") for e in events), events
    assert events[-1] == "data: [DONE]"
    # Parse one delta chunk to confirm OpenAI shape.
    payload_line = next(e for e in events if e != "data: [DONE]")
    payload = json.loads(payload_line.removeprefix("data: "))
    assert payload["object"] == "chat.completion.chunk"
    assert payload["choices"][0]["index"] == 0


async def test_chat_completions_non_streaming_shape(
    client: httpx.AsyncClient, api_key: IntegrationKey
) -> None:
    resp = await client.post(
        "/v1/chat/completions",
        headers={"Authorization": f"Bearer {api_key.full_key}"},
        json={
            "model": "llama3.1:8b",
            "messages": [{"role": "user", "content": "hi"}],
            "stream": False,
        },
    )
    assert resp.status_code == 200, resp.text
    body = resp.json()
    assert body["object"] == "chat.completion"
    assert body["choices"][0]["message"]["role"] == "assistant"
    assert body["usage"]["total_tokens"] >= 0


async def test_v1_models_returns_effective_set(
    client: httpx.AsyncClient, api_key: IntegrationKey
) -> None:
    resp = await client.get(
        "/v1/models", headers={"Authorization": f"Bearer {api_key.full_key}"}
    )
    assert resp.status_code == 200
    body = resp.json()
    assert body["object"] == "list"
    ids = {m["id"] for m in body["data"]}
    # ``api_key``'s tenant was created with the full DEFAULT_MODELS allowlist.
    assert set(DEFAULT_MODELS) <= ids
    for model in body["data"]:
        assert model["object"] == "model"