"""Unit tests for ``neuronetz_gateway.proxy.token_counter``. Tokens are read precisely from Ollama's final frame: ``prompt_eval_count`` (input) and ``eval_count`` (output) — never estimated (SPEC §2, §4.3 step 12, §13.1). Embeddings carry only ``prompt_eval_count`` (SPEC §13.1). """ from __future__ import annotations from neuronetz_gateway.proxy.token_counter import TokenUsage, extract_usage from tests._skip import call_or_skip def test_extract_from_final_chat_frame() -> None: # Mirrors the terminal NDJSON object emitted by mock_ollama (_final_metrics). final = { "model": "llama3.1:8b", "done": True, "done_reason": "stop", "total_duration": 1_234_567_890, "prompt_eval_count": 11, "eval_count": 7, } usage = call_or_skip(extract_usage, final) assert isinstance(usage, TokenUsage) assert usage.tokens_in == 11 assert usage.tokens_out == 7 def test_extract_from_generate_frame() -> None: final = {"done": True, "context": [1, 2, 3], "prompt_eval_count": 5, "eval_count": 42} usage = call_or_skip(extract_usage, final) assert (usage.tokens_in, usage.tokens_out) == (5, 42) def test_embeddings_frame_only_prompt_eval_count() -> None: # Embeddings: Ollama returns no eval_count (SPEC §13.1) => tokens_out == 0. frame = {"embedding": [0.0, 0.1], "prompt_eval_count": 9} usage = call_or_skip(extract_usage, frame) assert usage.tokens_in == 9 assert usage.tokens_out == 0 def test_missing_counts_default_to_zero() -> None: # A frame lacking the counter fields must not raise; charge nothing rather # than crash the audit/budget path. usage = call_or_skip(extract_usage, {"done": True}) assert usage.tokens_in == 0 assert usage.tokens_out == 0