When using ollama token count was not coming in (#375)

When using ollama token count was not coming in resulting in token count and other metrics to show up as zero. This was not causing tracing to break.
2026-06-08 14:55:14 +02:00 · 2025-01-21 18:01:56 -08:00 · 2025-01-21 18:01:56 -08:00 · 6887d52750
commit 6887d52750
parent fcd8cfb9fc
1 changed files with 7 additions and 4 deletions
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@ -483,11 +483,14 @@ impl HttpContext for StreamContext {
            let tokens_str = chat_completions_chunk_response_events.to_string();
            //HACK: add support for tokenizing mistral and other models
            //filed issue https://github.com/katanemo/arch/issues/222
-            if model.as_ref().unwrap().starts_with("mistral")
-                || model.as_ref().unwrap().starts_with("ministral")
-            {
-                model = Some("gpt-4".to_string());
+            if !model.as_ref().unwrap().starts_with("gpt") {
+                warn!(
+                    "tiktoken_rs: unsupported model: {}, using gpt-4 to compute token count",
+                    model.as_ref().unwrap()
+                );
            }
+            model = Some("gpt-4".to_string());
+
            let token_count =
                match tokenizer::token_count(model.as_ref().unwrap().as_str(), tokens_str.as_str())
                {