Files
neuronetz-gateway/tests/unit/test_translate.py
Stephan Berbig 844b02aade tests: unit + integration suite (99 tests; ruff + mypy --strict clean)
Real test bodies (not stubs), driven against an in-process httpx.ASGITransport
override of the gateway's get_ollama_client dependency pointing at
tests/integration/mock_ollama.py.

Unit (target 100% on auth/, ratelimit/, budget/):
- argon2id roundtrip, wrong-key, garbage encoding, needs_rehash on param change
- key format/uniqueness/prefix extraction
- token counter (prompt_eval_count + eval_count, embeddings, missing-counts)
- translate (OpenAI <-> Ollama for chat/completion/embeddings, streaming chunks,
  /v1/models list shape)
- allowlist (hard-blocks, effective-set semantics across allow_all/inheritance/
  empty-discovered)
- discovery (parse, cache roundtrip with TTL, fail-closed, tolerates redis=None)
- sliding window (allow/block/reset/per-key vs per-tenant/cost-weighted)

Integration (testcontainers postgres + redis + in-process mock Ollama):
- auth flow (no/malformed/wrong key all return identical sanitized 401)
- proxy stream (NDJSON roundtrip, audit row's token counts match, hard-blocked
  endpoints uniformly 403)
- openai_compat (SSE chunks, data: [DONE], non-stream shape, /v1/models)
- model_discovery (allow_all sees all, default-deny sees allowed ∩ discovered,
  /v1/models filtered, unpermitted-but-installed = nonexistent = 403,
  empty cache denies even allow_all)
- rate_limit (429 + Retry-After + headers; Redis down ⇒ 503, never 200)
- budget (decrement + headers; pre-burned counter blocks next request)
- revocation (INSERT into gateway.revocations → NOTIFY → cache evicted → 401 ≤ 1s)

Includes a known-issue xfail flagging a bug in ratelimit/sliding_window.py:
the per-hit ZSET member uses id(object()) which returns the same id on
consecutive calls, causing same-millisecond hits to overwrite instead of
stacking. To be fixed in a follow-up commit.
2026-05-26 20:52:33 +02:00

180 lines
6.2 KiB
Python

"""Unit tests for ``neuronetz_gateway.proxy.translate`` (OpenAI <-> Ollama).
Golden-fixture tests for the OpenAI-compat layer (SPEC §6.3):
* OpenAI chat/completion/embeddings request -> Ollama request body
* Ollama stream frame -> OpenAI ``chat.completion.chunk`` (delta + final usage)
* Ollama non-stream response -> OpenAI ``chat.completion`` / ``text_completion``
* model name list -> OpenAI ``/v1/models`` list shape
The streaming chunk shape is anchored to ``mock_ollama``'s reference helper
``ollama_chunks_to_openai_sse``. SSE *framing* (``data: {...}\\n\\n`` +
``data: [DONE]``) is asserted in the integration test_openai_compat.py.
"""
from __future__ import annotations
from typing import Any
from neuronetz_gateway.proxy import translate
def _as_dict(value: object) -> dict[str, Any]:
"""Narrow a translator-returned ``object`` to a typed dict for assertions."""
assert isinstance(value, dict), value
return value
def _as_list(value: object) -> list[Any]:
"""Narrow a translator-returned ``object`` to a typed list for assertions."""
assert isinstance(value, list), value
return value
# --- request translation: OpenAI -> Ollama ---------------------------------
def test_openai_chat_request_to_ollama_preserves_messages_and_model() -> None:
openai_req: dict[str, Any] = {
"model": "llama3.1:8b",
"messages": [
{"role": "system", "content": "be terse"},
{"role": "user", "content": "hi"},
],
"stream": True,
}
ollama = translate.openai_chat_to_ollama(openai_req)
assert ollama["model"] == "llama3.1:8b"
assert ollama["messages"] == openai_req["messages"]
assert ollama["stream"] is True
def test_openai_chat_options_mapped() -> None:
openai_req: dict[str, Any] = {
"model": "llama3.1:8b",
"messages": [{"role": "user", "content": "hi"}],
"temperature": 0.2,
"max_tokens": 128,
"stream": False,
}
ollama = translate.openai_chat_to_ollama(openai_req)
options = _as_dict(ollama["options"])
assert options["temperature"] == 0.2
# OpenAI ``max_tokens`` => Ollama ``num_predict``.
assert options["num_predict"] == 128
assert ollama["stream"] is False
def test_openai_completion_to_ollama_generate() -> None:
openai_req: dict[str, Any] = {
"model": "llama3.1:8b",
"prompt": "once upon a time",
"stream": True,
}
ollama = translate.openai_completion_to_ollama(openai_req)
assert ollama["model"] == "llama3.1:8b"
assert ollama["prompt"] == "once upon a time"
assert ollama["stream"] is True
def test_openai_embeddings_to_ollama_embed() -> None:
openai_req: dict[str, Any] = {"model": "nomic-embed-text", "input": "hello world"}
ollama = translate.openai_embeddings_to_ollama(openai_req)
assert ollama["model"] == "nomic-embed-text"
assert ollama["input"] == "hello world"
# --- streaming response translation: Ollama frame -> OpenAI chunk ----------
def test_chat_delta_chunk_to_openai() -> None:
frame: dict[str, Any] = {
"model": "llama3.1:8b",
"message": {"role": "assistant", "content": "Echo:"},
"done": False,
}
out = translate.ollama_chat_chunk_to_openai(
frame, completion_id="chatcmpl-x", model="llama3.1:8b", created=1700
)
assert out["object"] == "chat.completion.chunk"
choice = _as_dict(_as_list(out["choices"])[0])
delta = _as_dict(choice["delta"])
assert delta["content"] == "Echo:"
assert choice["finish_reason"] is None
def test_chat_final_chunk_carries_usage_and_finish_reason() -> None:
frame: dict[str, Any] = {
"model": "llama3.1:8b",
"message": {"role": "assistant", "content": ""},
"done": True,
"done_reason": "stop",
"prompt_eval_count": 4,
"eval_count": 6,
}
out = translate.ollama_chat_chunk_to_openai(
frame, completion_id="chatcmpl-x", model="llama3.1:8b", created=1700
)
choice = _as_dict(_as_list(out["choices"])[0])
assert choice["finish_reason"] == "stop"
usage = _as_dict(out["usage"])
assert usage["prompt_tokens"] == 4
assert usage["completion_tokens"] == 6
assert usage["total_tokens"] == 10
# --- non-streaming response translation ------------------------------------
def test_nonstream_chat_to_openai_completion() -> None:
ollama_resp: dict[str, Any] = {
"model": "llama3.1:8b",
"message": {"role": "assistant", "content": "Echo: hi"},
"done": True,
"prompt_eval_count": 2,
"eval_count": 3,
}
out = translate.ollama_chat_to_openai(ollama_resp)
assert out["object"] == "chat.completion"
choice = _as_dict(_as_list(out["choices"])[0])
assert choice["message"] == {"role": "assistant", "content": "Echo: hi"}
assert choice["finish_reason"] == "stop"
assert _as_dict(out["usage"])["total_tokens"] == 5
def test_nonstream_generate_to_openai() -> None:
ollama_resp: dict[str, Any] = {
"model": "llama3.1:8b",
"response": "once upon a time",
"done": True,
"prompt_eval_count": 1,
"eval_count": 4,
}
out = translate.ollama_generate_to_openai(ollama_resp)
assert out["object"] == "text_completion"
choice = _as_dict(_as_list(out["choices"])[0])
assert choice["text"] == "once upon a time"
assert _as_dict(out["usage"])["total_tokens"] == 5
def test_embed_to_openai_shape() -> None:
ollama_resp: dict[str, Any] = {
"model": "nomic-embed-text",
"embeddings": [[0.0, 0.1], [0.2, 0.3]],
"prompt_eval_count": 7,
}
out = translate.ollama_embed_to_openai(ollama_resp, model="nomic-embed-text")
assert out["object"] == "list"
data = _as_list(out["data"])
assert len(data) == 2
assert data[0] == {"object": "embedding", "index": 0, "embedding": [0.0, 0.1]}
# Embeddings charge prompt tokens only (SPEC §13.1).
assert out["usage"] == {"prompt_tokens": 7, "total_tokens": 7}
def test_models_to_openai_list_shape() -> None:
out = translate.models_to_openai_list(["llama3.1:8b", "mistral:7b"])
assert out["object"] == "list"
data = _as_list(out["data"])
ids = {_as_dict(m)["id"] for m in data}
assert ids == {"llama3.1:8b", "mistral:7b"}
assert all(_as_dict(m)["object"] == "model" for m in data)