From 6887d527502d78daf5f7ccd44a65a5cde8ff3d63 Mon Sep 17 00:00:00 2001 From: Adil Hafeez Date: Tue, 21 Jan 2025 18:01:56 -0800 Subject: [PATCH] When using ollama token count was not coming in (#375) When using ollama token count was not coming in resulting in token count and other metrics to show up as zero. This was not causing tracing to break. --- crates/llm_gateway/src/stream_context.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index 39d4c58f..6939f1d8 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -483,11 +483,14 @@ impl HttpContext for StreamContext { let tokens_str = chat_completions_chunk_response_events.to_string(); //HACK: add support for tokenizing mistral and other models //filed issue https://github.com/katanemo/arch/issues/222 - if model.as_ref().unwrap().starts_with("mistral") - || model.as_ref().unwrap().starts_with("ministral") - { - model = Some("gpt-4".to_string()); + if !model.as_ref().unwrap().starts_with("gpt") { + warn!( + "tiktoken_rs: unsupported model: {}, using gpt-4 to compute token count", + model.as_ref().unwrap() + ); } + model = Some("gpt-4".to_string()); + let token_count = match tokenizer::token_count(model.as_ref().unwrap().as_str(), tokens_str.as_str()) {