"""Unit tests for ``neuronetz_gateway.proxy.translate`` (OpenAI <-> Ollama). Golden-fixture tests for the OpenAI-compat layer (SPEC §6.3): * OpenAI chat/completion/embeddings request -> Ollama request body * Ollama stream frame -> OpenAI ``chat.completion.chunk`` (delta + final usage) * Ollama non-stream response -> OpenAI ``chat.completion`` / ``text_completion`` * model name list -> OpenAI ``/v1/models`` list shape The streaming chunk shape is anchored to ``mock_ollama``'s reference helper ``ollama_chunks_to_openai_sse``. SSE *framing* (``data: {...}\\n\\n`` + ``data: [DONE]``) is asserted in the integration test_openai_compat.py. """ from __future__ import annotations from typing import Any from neuronetz_gateway.proxy import translate def _as_dict(value: object) -> dict[str, Any]: """Narrow a translator-returned ``object`` to a typed dict for assertions.""" assert isinstance(value, dict), value return value def _as_list(value: object) -> list[Any]: """Narrow a translator-returned ``object`` to a typed list for assertions.""" assert isinstance(value, list), value return value # --- request translation: OpenAI -> Ollama --------------------------------- def test_openai_chat_request_to_ollama_preserves_messages_and_model() -> None: openai_req: dict[str, Any] = { "model": "llama3.1:8b", "messages": [ {"role": "system", "content": "be terse"}, {"role": "user", "content": "hi"}, ], "stream": True, } ollama = translate.openai_chat_to_ollama(openai_req) assert ollama["model"] == "llama3.1:8b" assert ollama["messages"] == openai_req["messages"] assert ollama["stream"] is True def test_openai_chat_options_mapped() -> None: openai_req: dict[str, Any] = { "model": "llama3.1:8b", "messages": [{"role": "user", "content": "hi"}], "temperature": 0.2, "max_tokens": 128, "stream": False, } ollama = translate.openai_chat_to_ollama(openai_req) options = _as_dict(ollama["options"]) assert options["temperature"] == 0.2 # OpenAI ``max_tokens`` => Ollama ``num_predict``. assert options["num_predict"] == 128 assert ollama["stream"] is False def test_openai_completion_to_ollama_generate() -> None: openai_req: dict[str, Any] = { "model": "llama3.1:8b", "prompt": "once upon a time", "stream": True, } ollama = translate.openai_completion_to_ollama(openai_req) assert ollama["model"] == "llama3.1:8b" assert ollama["prompt"] == "once upon a time" assert ollama["stream"] is True def test_openai_embeddings_to_ollama_embed() -> None: openai_req: dict[str, Any] = {"model": "nomic-embed-text", "input": "hello world"} ollama = translate.openai_embeddings_to_ollama(openai_req) assert ollama["model"] == "nomic-embed-text" assert ollama["input"] == "hello world" # --- streaming response translation: Ollama frame -> OpenAI chunk ---------- def test_chat_delta_chunk_to_openai() -> None: frame: dict[str, Any] = { "model": "llama3.1:8b", "message": {"role": "assistant", "content": "Echo:"}, "done": False, } out = translate.ollama_chat_chunk_to_openai( frame, completion_id="chatcmpl-x", model="llama3.1:8b", created=1700 ) assert out["object"] == "chat.completion.chunk" choice = _as_dict(_as_list(out["choices"])[0]) delta = _as_dict(choice["delta"]) assert delta["content"] == "Echo:" assert choice["finish_reason"] is None def test_chat_final_chunk_carries_usage_and_finish_reason() -> None: frame: dict[str, Any] = { "model": "llama3.1:8b", "message": {"role": "assistant", "content": ""}, "done": True, "done_reason": "stop", "prompt_eval_count": 4, "eval_count": 6, } out = translate.ollama_chat_chunk_to_openai( frame, completion_id="chatcmpl-x", model="llama3.1:8b", created=1700 ) choice = _as_dict(_as_list(out["choices"])[0]) assert choice["finish_reason"] == "stop" usage = _as_dict(out["usage"]) assert usage["prompt_tokens"] == 4 assert usage["completion_tokens"] == 6 assert usage["total_tokens"] == 10 # --- non-streaming response translation ------------------------------------ def test_nonstream_chat_to_openai_completion() -> None: ollama_resp: dict[str, Any] = { "model": "llama3.1:8b", "message": {"role": "assistant", "content": "Echo: hi"}, "done": True, "prompt_eval_count": 2, "eval_count": 3, } out = translate.ollama_chat_to_openai(ollama_resp) assert out["object"] == "chat.completion" choice = _as_dict(_as_list(out["choices"])[0]) assert choice["message"] == {"role": "assistant", "content": "Echo: hi"} assert choice["finish_reason"] == "stop" assert _as_dict(out["usage"])["total_tokens"] == 5 def test_nonstream_generate_to_openai() -> None: ollama_resp: dict[str, Any] = { "model": "llama3.1:8b", "response": "once upon a time", "done": True, "prompt_eval_count": 1, "eval_count": 4, } out = translate.ollama_generate_to_openai(ollama_resp) assert out["object"] == "text_completion" choice = _as_dict(_as_list(out["choices"])[0]) assert choice["text"] == "once upon a time" assert _as_dict(out["usage"])["total_tokens"] == 5 def test_embed_to_openai_shape() -> None: ollama_resp: dict[str, Any] = { "model": "nomic-embed-text", "embeddings": [[0.0, 0.1], [0.2, 0.3]], "prompt_eval_count": 7, } out = translate.ollama_embed_to_openai(ollama_resp, model="nomic-embed-text") assert out["object"] == "list" data = _as_list(out["data"]) assert len(data) == 2 assert data[0] == {"object": "embedding", "index": 0, "embedding": [0.0, 0.1]} # Embeddings charge prompt tokens only (SPEC §13.1). assert out["usage"] == {"prompt_tokens": 7, "total_tokens": 7} def test_models_to_openai_list_shape() -> None: out = translate.models_to_openai_list(["llama3.1:8b", "mistral:7b"]) assert out["object"] == "list" data = _as_list(out["data"]) ids = {_as_dict(m)["id"] for m in data} assert ids == {"llama3.1:8b", "mistral:7b"} assert all(_as_dict(m)["object"] == "model" for m in data)