neuronetz-gateway/tests/unit/test_token_counter.py

"""Unit tests for ``neuronetz_gateway.proxy.token_counter``.

Tokens are read precisely from Ollama's final frame: ``prompt_eval_count``
(input) and ``eval_count`` (output) — never estimated (SPEC §2, §4.3 step 12,
§13.1). Embeddings carry only ``prompt_eval_count`` (SPEC §13.1).
"""

from __future__ import annotations

from neuronetz_gateway.proxy.token_counter import TokenUsage, extract_usage
from tests._skip import call_or_skip


def test_extract_from_final_chat_frame() -> None:
    # Mirrors the terminal NDJSON object emitted by mock_ollama (_final_metrics).
    final = {
        "model": "llama3.1:8b",
        "done": True,
        "done_reason": "stop",
        "total_duration": 1_234_567_890,
        "prompt_eval_count": 11,
        "eval_count": 7,
    }
    usage = call_or_skip(extract_usage, final)
    assert isinstance(usage, TokenUsage)
    assert usage.tokens_in == 11
    assert usage.tokens_out == 7


def test_extract_from_generate_frame() -> None:
    final = {"done": True, "context": [1, 2, 3], "prompt_eval_count": 5, "eval_count": 42}
    usage = call_or_skip(extract_usage, final)
    assert (usage.tokens_in, usage.tokens_out) == (5, 42)


def test_embeddings_frame_only_prompt_eval_count() -> None:
    # Embeddings: Ollama returns no eval_count (SPEC §13.1) => tokens_out == 0.
    frame = {"embedding": [0.0, 0.1], "prompt_eval_count": 9}
    usage = call_or_skip(extract_usage, frame)
    assert usage.tokens_in == 9
    assert usage.tokens_out == 0


def test_missing_counts_default_to_zero() -> None:
    # A frame lacking the counter fields must not raise; charge nothing rather
    # than crash the audit/budget path.
    usage = call_or_skip(extract_usage, {"done": True})
    assert usage.tokens_in == 0
    assert usage.tokens_out == 0