neuronetz-gateway/tests/unit/test_translate.py

"""Unit tests for ``neuronetz_gateway.proxy.translate`` (OpenAI <-> Ollama).

Golden-fixture tests for the OpenAI-compat layer (SPEC §6.3):
  * OpenAI chat/completion/embeddings request -> Ollama request body
  * Ollama stream frame -> OpenAI ``chat.completion.chunk`` (delta + final usage)
  * Ollama non-stream response -> OpenAI ``chat.completion`` / ``text_completion``
  * model name list -> OpenAI ``/v1/models`` list shape

The streaming chunk shape is anchored to ``mock_ollama``'s reference helper
``ollama_chunks_to_openai_sse``. SSE *framing* (``data: {...}\\n\\n`` +
``data: [DONE]``) is asserted in the integration test_openai_compat.py.
"""

from __future__ import annotations

from typing import Any

from neuronetz_gateway.proxy import translate


def _as_dict(value: object) -> dict[str, Any]:
    """Narrow a translator-returned ``object`` to a typed dict for assertions."""
    assert isinstance(value, dict), value
    return value


def _as_list(value: object) -> list[Any]:
    """Narrow a translator-returned ``object`` to a typed list for assertions."""
    assert isinstance(value, list), value
    return value

# --- request translation: OpenAI -> Ollama ---------------------------------


def test_openai_chat_request_to_ollama_preserves_messages_and_model() -> None:
    openai_req: dict[str, Any] = {
        "model": "llama3.1:8b",
        "messages": [
            {"role": "system", "content": "be terse"},
            {"role": "user", "content": "hi"},
        ],
        "stream": True,
    }
    ollama = translate.openai_chat_to_ollama(openai_req)
    assert ollama["model"] == "llama3.1:8b"
    assert ollama["messages"] == openai_req["messages"]
    assert ollama["stream"] is True


def test_openai_chat_options_mapped() -> None:
    openai_req: dict[str, Any] = {
        "model": "llama3.1:8b",
        "messages": [{"role": "user", "content": "hi"}],
        "temperature": 0.2,
        "max_tokens": 128,
        "stream": False,
    }
    ollama = translate.openai_chat_to_ollama(openai_req)
    options = _as_dict(ollama["options"])
    assert options["temperature"] == 0.2
    # OpenAI ``max_tokens`` => Ollama ``num_predict``.
    assert options["num_predict"] == 128
    assert ollama["stream"] is False


def test_openai_completion_to_ollama_generate() -> None:
    openai_req: dict[str, Any] = {
        "model": "llama3.1:8b",
        "prompt": "once upon a time",
        "stream": True,
    }
    ollama = translate.openai_completion_to_ollama(openai_req)
    assert ollama["model"] == "llama3.1:8b"
    assert ollama["prompt"] == "once upon a time"
    assert ollama["stream"] is True


def test_openai_embeddings_to_ollama_embed() -> None:
    openai_req: dict[str, Any] = {"model": "nomic-embed-text", "input": "hello world"}
    ollama = translate.openai_embeddings_to_ollama(openai_req)
    assert ollama["model"] == "nomic-embed-text"
    assert ollama["input"] == "hello world"


# --- streaming response translation: Ollama frame -> OpenAI chunk ----------


def test_chat_delta_chunk_to_openai() -> None:
    frame: dict[str, Any] = {
        "model": "llama3.1:8b",
        "message": {"role": "assistant", "content": "Echo:"},
        "done": False,
    }
    out = translate.ollama_chat_chunk_to_openai(
        frame, completion_id="chatcmpl-x", model="llama3.1:8b", created=1700
    )
    assert out["object"] == "chat.completion.chunk"
    choice = _as_dict(_as_list(out["choices"])[0])
    delta = _as_dict(choice["delta"])
    assert delta["content"] == "Echo:"
    assert choice["finish_reason"] is None


def test_chat_final_chunk_carries_usage_and_finish_reason() -> None:
    frame: dict[str, Any] = {
        "model": "llama3.1:8b",
        "message": {"role": "assistant", "content": ""},
        "done": True,
        "done_reason": "stop",
        "prompt_eval_count": 4,
        "eval_count": 6,
    }
    out = translate.ollama_chat_chunk_to_openai(
        frame, completion_id="chatcmpl-x", model="llama3.1:8b", created=1700
    )
    choice = _as_dict(_as_list(out["choices"])[0])
    assert choice["finish_reason"] == "stop"
    usage = _as_dict(out["usage"])
    assert usage["prompt_tokens"] == 4
    assert usage["completion_tokens"] == 6
    assert usage["total_tokens"] == 10


# --- non-streaming response translation ------------------------------------


def test_nonstream_chat_to_openai_completion() -> None:
    ollama_resp: dict[str, Any] = {
        "model": "llama3.1:8b",
        "message": {"role": "assistant", "content": "Echo: hi"},
        "done": True,
        "prompt_eval_count": 2,
        "eval_count": 3,
    }
    out = translate.ollama_chat_to_openai(ollama_resp)
    assert out["object"] == "chat.completion"
    choice = _as_dict(_as_list(out["choices"])[0])
    assert choice["message"] == {"role": "assistant", "content": "Echo: hi"}
    assert choice["finish_reason"] == "stop"
    assert _as_dict(out["usage"])["total_tokens"] == 5


def test_nonstream_generate_to_openai() -> None:
    ollama_resp: dict[str, Any] = {
        "model": "llama3.1:8b",
        "response": "once upon a time",
        "done": True,
        "prompt_eval_count": 1,
        "eval_count": 4,
    }
    out = translate.ollama_generate_to_openai(ollama_resp)
    assert out["object"] == "text_completion"
    choice = _as_dict(_as_list(out["choices"])[0])
    assert choice["text"] == "once upon a time"
    assert _as_dict(out["usage"])["total_tokens"] == 5


def test_embed_to_openai_shape() -> None:
    ollama_resp: dict[str, Any] = {
        "model": "nomic-embed-text",
        "embeddings": [[0.0, 0.1], [0.2, 0.3]],
        "prompt_eval_count": 7,
    }
    out = translate.ollama_embed_to_openai(ollama_resp, model="nomic-embed-text")
    assert out["object"] == "list"
    data = _as_list(out["data"])
    assert len(data) == 2
    assert data[0] == {"object": "embedding", "index": 0, "embedding": [0.0, 0.1]}
    # Embeddings charge prompt tokens only (SPEC §13.1).
    assert out["usage"] == {"prompt_tokens": 7, "total_tokens": 7}


def test_models_to_openai_list_shape() -> None:
    out = translate.models_to_openai_list(["llama3.1:8b", "mistral:7b"])
    assert out["object"] == "list"
    data = _as_list(out["data"])
    ids = {_as_dict(m)["id"] for m in data}
    assert ids == {"llama3.1:8b", "mistral:7b"}
    assert all(_as_dict(m)["object"] == "model" for m in data)