Real test bodies (not stubs), driven against an in-process httpx.ASGITransport override of the gateway's get_ollama_client dependency pointing at tests/integration/mock_ollama.py. Unit (target 100% on auth/, ratelimit/, budget/): - argon2id roundtrip, wrong-key, garbage encoding, needs_rehash on param change - key format/uniqueness/prefix extraction - token counter (prompt_eval_count + eval_count, embeddings, missing-counts) - translate (OpenAI <-> Ollama for chat/completion/embeddings, streaming chunks, /v1/models list shape) - allowlist (hard-blocks, effective-set semantics across allow_all/inheritance/ empty-discovered) - discovery (parse, cache roundtrip with TTL, fail-closed, tolerates redis=None) - sliding window (allow/block/reset/per-key vs per-tenant/cost-weighted) Integration (testcontainers postgres + redis + in-process mock Ollama): - auth flow (no/malformed/wrong key all return identical sanitized 401) - proxy stream (NDJSON roundtrip, audit row's token counts match, hard-blocked endpoints uniformly 403) - openai_compat (SSE chunks, data: [DONE], non-stream shape, /v1/models) - model_discovery (allow_all sees all, default-deny sees allowed ∩ discovered, /v1/models filtered, unpermitted-but-installed = nonexistent = 403, empty cache denies even allow_all) - rate_limit (429 + Retry-After + headers; Redis down ⇒ 503, never 200) - budget (decrement + headers; pre-burned counter blocks next request) - revocation (INSERT into gateway.revocations → NOTIFY → cache evicted → 401 ≤ 1s) Includes a known-issue xfail flagging a bug in ratelimit/sliding_window.py: the per-hit ZSET member uses id(object()) which returns the same id on consecutive calls, causing same-millisecond hits to overwrite instead of stacking. To be fixed in a follow-up commit.
180 lines
6.2 KiB
Python
180 lines
6.2 KiB
Python
"""Unit tests for ``neuronetz_gateway.proxy.translate`` (OpenAI <-> Ollama).
|
|
|
|
Golden-fixture tests for the OpenAI-compat layer (SPEC §6.3):
|
|
* OpenAI chat/completion/embeddings request -> Ollama request body
|
|
* Ollama stream frame -> OpenAI ``chat.completion.chunk`` (delta + final usage)
|
|
* Ollama non-stream response -> OpenAI ``chat.completion`` / ``text_completion``
|
|
* model name list -> OpenAI ``/v1/models`` list shape
|
|
|
|
The streaming chunk shape is anchored to ``mock_ollama``'s reference helper
|
|
``ollama_chunks_to_openai_sse``. SSE *framing* (``data: {...}\\n\\n`` +
|
|
``data: [DONE]``) is asserted in the integration test_openai_compat.py.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import Any
|
|
|
|
from neuronetz_gateway.proxy import translate
|
|
|
|
|
|
def _as_dict(value: object) -> dict[str, Any]:
|
|
"""Narrow a translator-returned ``object`` to a typed dict for assertions."""
|
|
assert isinstance(value, dict), value
|
|
return value
|
|
|
|
|
|
def _as_list(value: object) -> list[Any]:
|
|
"""Narrow a translator-returned ``object`` to a typed list for assertions."""
|
|
assert isinstance(value, list), value
|
|
return value
|
|
|
|
# --- request translation: OpenAI -> Ollama ---------------------------------
|
|
|
|
|
|
def test_openai_chat_request_to_ollama_preserves_messages_and_model() -> None:
|
|
openai_req: dict[str, Any] = {
|
|
"model": "llama3.1:8b",
|
|
"messages": [
|
|
{"role": "system", "content": "be terse"},
|
|
{"role": "user", "content": "hi"},
|
|
],
|
|
"stream": True,
|
|
}
|
|
ollama = translate.openai_chat_to_ollama(openai_req)
|
|
assert ollama["model"] == "llama3.1:8b"
|
|
assert ollama["messages"] == openai_req["messages"]
|
|
assert ollama["stream"] is True
|
|
|
|
|
|
def test_openai_chat_options_mapped() -> None:
|
|
openai_req: dict[str, Any] = {
|
|
"model": "llama3.1:8b",
|
|
"messages": [{"role": "user", "content": "hi"}],
|
|
"temperature": 0.2,
|
|
"max_tokens": 128,
|
|
"stream": False,
|
|
}
|
|
ollama = translate.openai_chat_to_ollama(openai_req)
|
|
options = _as_dict(ollama["options"])
|
|
assert options["temperature"] == 0.2
|
|
# OpenAI ``max_tokens`` => Ollama ``num_predict``.
|
|
assert options["num_predict"] == 128
|
|
assert ollama["stream"] is False
|
|
|
|
|
|
def test_openai_completion_to_ollama_generate() -> None:
|
|
openai_req: dict[str, Any] = {
|
|
"model": "llama3.1:8b",
|
|
"prompt": "once upon a time",
|
|
"stream": True,
|
|
}
|
|
ollama = translate.openai_completion_to_ollama(openai_req)
|
|
assert ollama["model"] == "llama3.1:8b"
|
|
assert ollama["prompt"] == "once upon a time"
|
|
assert ollama["stream"] is True
|
|
|
|
|
|
def test_openai_embeddings_to_ollama_embed() -> None:
|
|
openai_req: dict[str, Any] = {"model": "nomic-embed-text", "input": "hello world"}
|
|
ollama = translate.openai_embeddings_to_ollama(openai_req)
|
|
assert ollama["model"] == "nomic-embed-text"
|
|
assert ollama["input"] == "hello world"
|
|
|
|
|
|
# --- streaming response translation: Ollama frame -> OpenAI chunk ----------
|
|
|
|
|
|
def test_chat_delta_chunk_to_openai() -> None:
|
|
frame: dict[str, Any] = {
|
|
"model": "llama3.1:8b",
|
|
"message": {"role": "assistant", "content": "Echo:"},
|
|
"done": False,
|
|
}
|
|
out = translate.ollama_chat_chunk_to_openai(
|
|
frame, completion_id="chatcmpl-x", model="llama3.1:8b", created=1700
|
|
)
|
|
assert out["object"] == "chat.completion.chunk"
|
|
choice = _as_dict(_as_list(out["choices"])[0])
|
|
delta = _as_dict(choice["delta"])
|
|
assert delta["content"] == "Echo:"
|
|
assert choice["finish_reason"] is None
|
|
|
|
|
|
def test_chat_final_chunk_carries_usage_and_finish_reason() -> None:
|
|
frame: dict[str, Any] = {
|
|
"model": "llama3.1:8b",
|
|
"message": {"role": "assistant", "content": ""},
|
|
"done": True,
|
|
"done_reason": "stop",
|
|
"prompt_eval_count": 4,
|
|
"eval_count": 6,
|
|
}
|
|
out = translate.ollama_chat_chunk_to_openai(
|
|
frame, completion_id="chatcmpl-x", model="llama3.1:8b", created=1700
|
|
)
|
|
choice = _as_dict(_as_list(out["choices"])[0])
|
|
assert choice["finish_reason"] == "stop"
|
|
usage = _as_dict(out["usage"])
|
|
assert usage["prompt_tokens"] == 4
|
|
assert usage["completion_tokens"] == 6
|
|
assert usage["total_tokens"] == 10
|
|
|
|
|
|
# --- non-streaming response translation ------------------------------------
|
|
|
|
|
|
def test_nonstream_chat_to_openai_completion() -> None:
|
|
ollama_resp: dict[str, Any] = {
|
|
"model": "llama3.1:8b",
|
|
"message": {"role": "assistant", "content": "Echo: hi"},
|
|
"done": True,
|
|
"prompt_eval_count": 2,
|
|
"eval_count": 3,
|
|
}
|
|
out = translate.ollama_chat_to_openai(ollama_resp)
|
|
assert out["object"] == "chat.completion"
|
|
choice = _as_dict(_as_list(out["choices"])[0])
|
|
assert choice["message"] == {"role": "assistant", "content": "Echo: hi"}
|
|
assert choice["finish_reason"] == "stop"
|
|
assert _as_dict(out["usage"])["total_tokens"] == 5
|
|
|
|
|
|
def test_nonstream_generate_to_openai() -> None:
|
|
ollama_resp: dict[str, Any] = {
|
|
"model": "llama3.1:8b",
|
|
"response": "once upon a time",
|
|
"done": True,
|
|
"prompt_eval_count": 1,
|
|
"eval_count": 4,
|
|
}
|
|
out = translate.ollama_generate_to_openai(ollama_resp)
|
|
assert out["object"] == "text_completion"
|
|
choice = _as_dict(_as_list(out["choices"])[0])
|
|
assert choice["text"] == "once upon a time"
|
|
assert _as_dict(out["usage"])["total_tokens"] == 5
|
|
|
|
|
|
def test_embed_to_openai_shape() -> None:
|
|
ollama_resp: dict[str, Any] = {
|
|
"model": "nomic-embed-text",
|
|
"embeddings": [[0.0, 0.1], [0.2, 0.3]],
|
|
"prompt_eval_count": 7,
|
|
}
|
|
out = translate.ollama_embed_to_openai(ollama_resp, model="nomic-embed-text")
|
|
assert out["object"] == "list"
|
|
data = _as_list(out["data"])
|
|
assert len(data) == 2
|
|
assert data[0] == {"object": "embedding", "index": 0, "embedding": [0.0, 0.1]}
|
|
# Embeddings charge prompt tokens only (SPEC §13.1).
|
|
assert out["usage"] == {"prompt_tokens": 7, "total_tokens": 7}
|
|
|
|
|
|
def test_models_to_openai_list_shape() -> None:
|
|
out = translate.models_to_openai_list(["llama3.1:8b", "mistral:7b"])
|
|
assert out["object"] == "list"
|
|
data = _as_list(out["data"])
|
|
ids = {_as_dict(m)["id"] for m in data}
|
|
assert ids == {"llama3.1:8b", "mistral:7b"}
|
|
assert all(_as_dict(m)["object"] == "model" for m in data)
|