diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index a3f67dfc..8e879eda 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -87,9 +87,11 @@ impl StreamContext { )); debug!( - "request received: llm provider hint: {:?}, selected llm: {}", - self.get_http_request_header(ARCH_PROVIDER_HINT_HEADER), - self.llm_provider.as_ref().unwrap().name + "request received: llm provider hint: {}, selected llm: {}, model: {}", + self.get_http_request_header(ARCH_PROVIDER_HINT_HEADER) + .unwrap_or_default(), + self.llm_provider.as_ref().unwrap().name, + self.llm_provider.as_ref().unwrap().model ); } @@ -494,7 +496,7 @@ impl HttpContext for StreamContext { //HACK: add support for tokenizing mistral and other models //filed issue https://github.com/katanemo/arch/issues/222 if !model.as_ref().unwrap().starts_with("gpt") { - warn!( + trace!( "tiktoken_rs: unsupported model: {}, using gpt-4 to compute token count", model.as_ref().unwrap() ); diff --git a/crates/llm_gateway/tests/integration.rs b/crates/llm_gateway/tests/integration.rs index 777d3790..b18a0d29 100644 --- a/crates/llm_gateway/tests/integration.rs +++ b/crates/llm_gateway/tests/integration.rs @@ -25,7 +25,7 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) { Some("x-arch-llm-provider-hint"), ) .returning(None) - .expect_log(Some(LogLevel::Debug), Some("request received: llm provider hint: Some(\"default\"), selected llm: open-ai-gpt-4")) + .expect_log(Some(LogLevel::Debug), Some("request received: llm provider hint: default, selected llm: open-ai-gpt-4, model: gpt-4")) .expect_add_header_map_value( Some(MapType::HttpRequestHeaders), Some("x-arch-llm-provider"), diff --git a/demos/use_cases/llm_routing/arch_config.yaml b/demos/use_cases/llm_routing/arch_config.yaml index e3238484..b4f87698 100644 --- a/demos/use_cases/llm_routing/arch_config.yaml +++ b/demos/use_cases/llm_routing/arch_config.yaml @@ -29,5 +29,12 @@ llm_providers: provider_interface: mistral model: ministral-3b-latest + - name: deepseek + access_key: $DEEPSEEK_API_KEY + provider_interface: openai + model: deepseek-reasoner + endpoint: api.deepseek.com + protocol: https + tracing: random_sampling: 100