From 4fe66c291842cd85ad7f293070bb6a21df7a8d46 Mon Sep 17 00:00:00 2001 From: Teja Sunku Date: Wed, 18 Feb 2026 01:15:46 -0800 Subject: [PATCH 1/6] docs: Fix incorrect routing preferences in OpenClaw demo config (#765) --- demos/llm_routing/openclaw_routing/config.yaml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/demos/llm_routing/openclaw_routing/config.yaml b/demos/llm_routing/openclaw_routing/config.yaml index b94dbd7f..3106b5dd 100644 --- a/demos/llm_routing/openclaw_routing/config.yaml +++ b/demos/llm_routing/openclaw_routing/config.yaml @@ -12,7 +12,6 @@ listeners: timeout: 30s llm_providers: - # Kimi K2.5 — Moonshot AI's open model (1T MoE, 32B active params) # Great for general conversation, agentic tasks, and multimodal work # OpenAI-compatible API at $0.60/M input, $2.50/M output tokens @@ -21,13 +20,13 @@ llm_providers: base_url: https://api.moonshot.ai/v1 default: true routing_preferences: - - name: code generation - description: generating code, writing scripts, implementing functions, and building tool integrations + - name: general conversation + description: general chat, greetings, casual conversation, Q&A, and everyday questions # Claude — Anthropic's most capable model # Best for complex reasoning, code, tool use, and evaluation - model: anthropic/claude-sonnet-4-5 access_key: $ANTHROPIC_API_KEY routing_preferences: - - name: general conversation - description: general chat, greetings, casual conversation, Q&A, and everyday questions + - name: code generation + description: generating code, writing scripts, implementing functions, and building tool integrations From 98b979ce544602ec821e2f9069977143e6775619 Mon Sep 17 00:00:00 2001 From: Adil Hafeez Date: Wed, 18 Feb 2026 01:19:20 -0800 Subject: [PATCH 2/6] Upstream TLS validation and configurable connect timeout (#766) --- cli/planoai/config_generator.py | 8 ++++ cli/uv.lock | 2 +- config/envoy.template.yaml | 69 ++++++++++++++++++++++++++------- config/plano_config_schema.yaml | 6 +++ 4 files changed, 71 insertions(+), 14 deletions(-) diff --git a/cli/planoai/config_generator.py b/cli/planoai/config_generator.py index 8354b8dc..522968c9 100644 --- a/cli/planoai/config_generator.py +++ b/cli/planoai/config_generator.py @@ -460,6 +460,12 @@ def validate_and_render_schema(): print("agent_orchestrator: ", agent_orchestrator) + overrides = config_yaml.get("overrides", {}) + upstream_connect_timeout = overrides.get("upstream_connect_timeout", "5s") + upstream_tls_ca_path = overrides.get( + "upstream_tls_ca_path", "/etc/ssl/certs/ca-certificates.crt" + ) + data = { "prompt_gateway_listener": prompt_gateway, "llm_gateway_listener": llm_gateway, @@ -471,6 +477,8 @@ def validate_and_render_schema(): "local_llms": llms_with_endpoint, "agent_orchestrator": agent_orchestrator, "listeners": listeners, + "upstream_connect_timeout": upstream_connect_timeout, + "upstream_tls_ca_path": upstream_tls_ca_path, } rendered = template.render(data) diff --git a/cli/uv.lock b/cli/uv.lock index 5f18604b..45ccf82e 100644 --- a/cli/uv.lock +++ b/cli/uv.lock @@ -337,7 +337,7 @@ wheels = [ [[package]] name = "planoai" -version = "0.4.6" +version = "0.4.7" source = { editable = "." } dependencies = [ { name = "click" }, diff --git a/config/envoy.template.yaml b/config/envoy.template.yaml index f514e728..a780c3f1 100644 --- a/config/envoy.template.yaml +++ b/config/envoy.template.yaml @@ -595,7 +595,7 @@ static_resources: clusters: - name: arch - connect_timeout: 5s + connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -618,9 +618,12 @@ static_resources: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 + validation_context: + trusted_ca: + filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: anthropic - connect_timeout: 0.5s + connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -643,9 +646,12 @@ static_resources: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 + validation_context: + trusted_ca: + filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: deepseek - connect_timeout: 0.5s + connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -668,9 +674,12 @@ static_resources: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 + validation_context: + trusted_ca: + filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: xai - connect_timeout: 0.5s + connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -693,9 +702,12 @@ static_resources: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 + validation_context: + trusted_ca: + filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: moonshotai - connect_timeout: 0.5s + connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -718,9 +730,12 @@ static_resources: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 + validation_context: + trusted_ca: + filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: zhipu - connect_timeout: 0.5s + connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -743,9 +758,12 @@ static_resources: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 + validation_context: + trusted_ca: + filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: together_ai - connect_timeout: 0.5s + connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -768,9 +786,12 @@ static_resources: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 + validation_context: + trusted_ca: + filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: gemini - connect_timeout: 0.5s + connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -793,9 +814,12 @@ static_resources: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 + validation_context: + trusted_ca: + filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: groq - connect_timeout: 0.5s + connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -818,9 +842,12 @@ static_resources: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 + validation_context: + trusted_ca: + filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: mistral - connect_timeout: 0.5s + connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -839,9 +866,16 @@ static_resources: typed_config: "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext sni: api.mistral.ai + common_tls_context: + tls_params: + tls_minimum_protocol_version: TLSv1_2 + tls_maximum_protocol_version: TLSv1_3 + validation_context: + trusted_ca: + filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: openai - connect_timeout: 0.5s + connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -864,6 +898,9 @@ static_resources: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 + validation_context: + trusted_ca: + filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: mistral_7b_instruct connect_timeout: 0.5s type: STRICT_DNS @@ -884,7 +921,7 @@ static_resources: {% if cluster.connect_timeout -%} connect_timeout: {{ cluster.connect_timeout }} {% else -%} - connect_timeout: 0.5s + connect_timeout: {{ upstream_connect_timeout | default('5s') }} {% endif -%} type: LOGICAL_DNS dns_lookup_family: V4_ONLY @@ -913,12 +950,15 @@ static_resources: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 + validation_context: + trusted_ca: + filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} {% endif %} {% endfor %} {% for local_llm_provider in local_llms %} - name: {{ local_llm_provider.cluster_name }} - connect_timeout: 0.5s + connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -946,6 +986,9 @@ static_resources: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 + validation_context: + trusted_ca: + filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} {% endif %} {% endfor %} diff --git a/config/plano_config_schema.yaml b/config/plano_config_schema.yaml index 0f3cefb7..cd736eb6 100644 --- a/config/plano_config_schema.yaml +++ b/config/plano_config_schema.yaml @@ -265,6 +265,12 @@ properties: type: boolean use_agent_orchestrator: type: boolean + upstream_connect_timeout: + type: string + description: "Connect timeout for upstream provider clusters (e.g., '5s', '10s'). Default is '5s'." + upstream_tls_ca_path: + type: string + description: "Path to the trusted CA bundle for upstream TLS verification. Default is '/etc/ssl/certs/ca-certificates.crt'." system_prompt: type: string prompt_targets: From 7b5f1549a52cdd225c2a0c491fc587ba03fcc25a Mon Sep 17 00:00:00 2001 From: Adil Hafeez Date: Wed, 18 Feb 2026 01:52:55 -0800 Subject: [PATCH 3/6] release 0.4.8 (#767) Co-authored-by: Claude Opus 4.6 --- .github/workflows/ci.yml | 4 ++-- apps/www/src/components/Hero.tsx | 2 +- build_filter_image.sh | 2 +- cli/planoai/__init__.py | 2 +- cli/planoai/consts.py | 2 +- cli/pyproject.toml | 2 +- config/validate_plano_config.sh | 2 +- demos/llm_routing/preference_based_routing/README.md | 4 ++-- docs/source/conf.py | 2 +- docs/source/get_started/quickstart.rst | 6 +++--- docs/source/resources/deployment.rst | 2 +- 11 files changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9070ee20..9c3698fc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -79,13 +79,13 @@ jobs: load: true tags: | ${{ env.PLANO_DOCKER_IMAGE }} - ${{ env.DOCKER_IMAGE }}:0.4.7 + ${{ env.DOCKER_IMAGE }}:0.4.8 ${{ env.DOCKER_IMAGE }}:latest cache-from: type=gha cache-to: type=gha,mode=max - name: Save image as artifact - run: docker save ${{ env.PLANO_DOCKER_IMAGE }} ${{ env.DOCKER_IMAGE }}:0.4.7 ${{ env.DOCKER_IMAGE }}:latest -o /tmp/plano-image.tar + run: docker save ${{ env.PLANO_DOCKER_IMAGE }} ${{ env.DOCKER_IMAGE }}:0.4.8 ${{ env.DOCKER_IMAGE }}:latest -o /tmp/plano-image.tar - name: Upload image artifact uses: actions/upload-artifact@v4 diff --git a/apps/www/src/components/Hero.tsx b/apps/www/src/components/Hero.tsx index f98bc4d2..2b490b7b 100644 --- a/apps/www/src/components/Hero.tsx +++ b/apps/www/src/components/Hero.tsx @@ -24,7 +24,7 @@ export function Hero() { >
- v0.4.7 + v0.4.8 — diff --git a/build_filter_image.sh b/build_filter_image.sh index 318fa542..7c79d45c 100644 --- a/build_filter_image.sh +++ b/build_filter_image.sh @@ -1 +1 @@ -docker build -f Dockerfile . -t katanemo/plano -t katanemo/plano:0.4.7 +docker build -f Dockerfile . -t katanemo/plano -t katanemo/plano:0.4.8 diff --git a/cli/planoai/__init__.py b/cli/planoai/__init__.py index 9e014320..03c28daa 100644 --- a/cli/planoai/__init__.py +++ b/cli/planoai/__init__.py @@ -1,3 +1,3 @@ """Plano CLI - Intelligent Prompt Gateway.""" -__version__ = "0.4.7" +__version__ = "0.4.8" diff --git a/cli/planoai/consts.py b/cli/planoai/consts.py index fa94efb6..84b4439f 100644 --- a/cli/planoai/consts.py +++ b/cli/planoai/consts.py @@ -5,5 +5,5 @@ PLANO_COLOR = "#969FF4" SERVICE_NAME_ARCHGW = "plano" PLANO_DOCKER_NAME = "plano" -PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.7") +PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.8") DEFAULT_OTEL_TRACING_GRPC_ENDPOINT = "http://host.docker.internal:4317" diff --git a/cli/pyproject.toml b/cli/pyproject.toml index 673e821b..44b3a553 100644 --- a/cli/pyproject.toml +++ b/cli/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "planoai" -version = "0.4.7" +version = "0.4.8" description = "Python-based CLI tool to manage Plano." authors = [{name = "Katanemo Labs, Inc."}] readme = "README.md" diff --git a/config/validate_plano_config.sh b/config/validate_plano_config.sh index 8eafd344..5291341d 100644 --- a/config/validate_plano_config.sh +++ b/config/validate_plano_config.sh @@ -5,7 +5,7 @@ failed_files=() for file in $(find . -name config.yaml -o -name plano_config_full_reference.yaml); do echo "Validating ${file}..." touch $(pwd)/${file}_rendered - if ! docker run --rm -v "$(pwd)/${file}:/app/plano_config.yaml:ro" -v "$(pwd)/${file}_rendered:/app/plano_config_rendered.yaml:rw" --entrypoint /bin/sh ${PLANO_DOCKER_IMAGE:-katanemo/plano:0.4.7} -c "python -m planoai.config_generator" 2>&1 > /dev/null ; then + if ! docker run --rm -v "$(pwd)/${file}:/app/plano_config.yaml:ro" -v "$(pwd)/${file}_rendered:/app/plano_config_rendered.yaml:rw" --entrypoint /bin/sh ${PLANO_DOCKER_IMAGE:-katanemo/plano:0.4.8} -c "python -m planoai.config_generator" 2>&1 > /dev/null ; then echo "Validation failed for $file" failed_files+=("$file") fi diff --git a/demos/llm_routing/preference_based_routing/README.md b/demos/llm_routing/preference_based_routing/README.md index 6c8ace08..e1e16ec0 100644 --- a/demos/llm_routing/preference_based_routing/README.md +++ b/demos/llm_routing/preference_based_routing/README.md @@ -15,9 +15,9 @@ Make sure your machine is up to date with [latest version of plano]([url](https: ```bash (venv) $ planoai up --service plano --foreground # Or if installed with uv: uvx planoai up --service plano --foreground -2025-05-30 18:00:09,953 - planoai.main - INFO - Starting plano cli version: 0.4.7 +2025-05-30 18:00:09,953 - planoai.main - INFO - Starting plano cli version: 0.4.8 2025-05-30 18:00:09,953 - planoai.main - INFO - Validating /Users/adilhafeez/src/intelligent-prompt-gateway/demos/llm_routing/preference_based_routing/config.yaml -2025-05-30 18:00:10,422 - cli.core - INFO - Starting plano gateway, image name: plano, tag: katanemo/plano:0.4.7 +2025-05-30 18:00:10,422 - cli.core - INFO - Starting plano gateway, image name: plano, tag: katanemo/plano:0.4.8 2025-05-30 18:00:10,662 - cli.core - INFO - plano status: running, health status: starting 2025-05-30 18:00:11,712 - cli.core - INFO - plano status: running, health status: starting 2025-05-30 18:00:12,761 - cli.core - INFO - plano is running and is healthy! diff --git a/docs/source/conf.py b/docs/source/conf.py index fafef73e..c4f20ea0 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -17,7 +17,7 @@ from sphinxawesome_theme.postprocess import Icons project = "Plano Docs" copyright = "2025, Katanemo Labs, Inc" author = "Katanemo Labs, Inc" -release = " v0.4.7" +release = " v0.4.8" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/docs/source/get_started/quickstart.rst b/docs/source/get_started/quickstart.rst index 5fda423c..e52e349b 100644 --- a/docs/source/get_started/quickstart.rst +++ b/docs/source/get_started/quickstart.rst @@ -37,7 +37,7 @@ Plano's CLI allows you to manage and interact with the Plano efficiently. To ins .. code-block:: console - $ uv tool install planoai==0.4.7 + $ uv tool install planoai==0.4.8 **Option 2: Install with pip (Traditional)** @@ -45,7 +45,7 @@ Plano's CLI allows you to manage and interact with the Plano efficiently. To ins $ python -m venv venv $ source venv/bin/activate # On Windows, use: venv\Scripts\activate - $ pip install planoai==0.4.7 + $ pip install planoai==0.4.8 .. _llm_routing_quickstart: @@ -90,7 +90,7 @@ Start Plano: $ planoai up plano_config.yaml # Or if installed with uv tool: uvx planoai up plano_config.yaml - 2024-12-05 11:24:51,288 - planoai.main - INFO - Starting plano cli version: 0.4.7 + 2024-12-05 11:24:51,288 - planoai.main - INFO - Starting plano cli version: 0.4.8 2024-12-05 11:24:51,825 - planoai.utils - INFO - Schema validation successful! 2024-12-05 11:24:51,825 - planoai.main - INFO - Starting plano ... diff --git a/docs/source/resources/deployment.rst b/docs/source/resources/deployment.rst index 2456a80b..c9c75886 100644 --- a/docs/source/resources/deployment.rst +++ b/docs/source/resources/deployment.rst @@ -25,7 +25,7 @@ Create a ``docker-compose.yml`` file with the following configuration: # docker-compose.yml services: plano: - image: katanemo/plano:0.4.7 + image: katanemo/plano:0.4.8 container_name: plano ports: - "10000:10000" # ingress (client -> plano) From baeee56f6b8f0c4faf22218b7eab3e1c9343b318 Mon Sep 17 00:00:00 2001 From: Adil Hafeez Date: Wed, 18 Feb 2026 04:43:59 -0800 Subject: [PATCH 4/6] Make model field optional in request types, resolve from default provider (#768) --- .../src/handlers/agent_chat_completions.rs | 45 ++++++++++++++----- crates/brightstaff/src/handlers/llm.rs | 23 +++++++++- crates/brightstaff/src/main.rs | 1 + crates/hermesllm/src/apis/anthropic.rs | 1 + crates/hermesllm/src/apis/openai.rs | 1 + crates/hermesllm/src/apis/openai_responses.rs | 1 + 6 files changed, 61 insertions(+), 11 deletions(-) diff --git a/crates/brightstaff/src/handlers/agent_chat_completions.rs b/crates/brightstaff/src/handlers/agent_chat_completions.rs index adfdce02..22722895 100644 --- a/crates/brightstaff/src/handlers/agent_chat_completions.rs +++ b/crates/brightstaff/src/handlers/agent_chat_completions.rs @@ -2,15 +2,17 @@ use std::sync::Arc; use std::time::Instant; use bytes::Bytes; +use common::llm_providers::LlmProviders; use hermesllm::apis::OpenAIMessage; use hermesllm::clients::SupportedAPIsFromClient; use hermesllm::providers::request::ProviderRequest; use hermesllm::ProviderRequestType; use http_body_util::combinators::BoxBody; use http_body_util::BodyExt; -use hyper::{Request, Response}; +use hyper::{Request, Response, StatusCode}; use opentelemetry::trace::get_active_span; use serde::ser::Error as SerError; +use tokio::sync::RwLock; use tracing::{debug, info, info_span, warn, Instrument}; use super::agent_selector::{AgentSelectionError, AgentSelector}; @@ -40,6 +42,7 @@ pub async fn agent_chat( _: String, agents_list: Arc>>>, listeners: Arc>>, + llm_providers: Arc>, ) -> Result>, hyper::Error> { // Extract request_id from headers or generate a new one let request_id: String = match request @@ -71,6 +74,7 @@ pub async fn agent_chat( orchestrator_service, agents_list, listeners, + llm_providers, request_id, ) .await @@ -155,6 +159,7 @@ async fn handle_agent_chat_inner( orchestrator_service: Arc, agents_list: Arc>>>, listeners: Arc>>, + llm_providers: Arc>, request_id: String, ) -> Result>, AgentFilterChainError> { // Initialize services @@ -221,16 +226,36 @@ async fn handle_agent_chat_inner( AgentFilterChainError::RequestParsing(serde_json::Error::custom(err_msg)) })?; - let client_request = match ProviderRequestType::try_from((&chat_request_bytes[..], &api_type)) { - Ok(request) => request, - Err(err) => { - warn!("failed to parse request as ProviderRequestType: {}", err); - let err_msg = format!("Failed to parse request: {}", err); - return Err(AgentFilterChainError::RequestParsing( - serde_json::Error::custom(err_msg), - )); + let mut client_request = + match ProviderRequestType::try_from((&chat_request_bytes[..], &api_type)) { + Ok(request) => request, + Err(err) => { + warn!("failed to parse request as ProviderRequestType: {}", err); + let err_msg = format!("Failed to parse request: {}", err); + return Err(AgentFilterChainError::RequestParsing( + serde_json::Error::custom(err_msg), + )); + } + }; + + // If model is not specified in the request, resolve from default provider + if client_request.model().is_empty() { + match llm_providers.read().await.default() { + Some(default_provider) => { + let default_model = default_provider.name.clone(); + info!(default_model = %default_model, "no model specified in request, using default provider"); + client_request.set_model(default_model); + } + None => { + let err_msg = "No model specified in request and no default provider configured"; + warn!("{}", err_msg); + let mut bad_request = + Response::new(ResponseHandler::create_full_body(err_msg.to_string())); + *bad_request.status_mut() = StatusCode::BAD_REQUEST; + return Ok(bad_request); + } } - }; + } let message: Vec = client_request.get_messages(); diff --git a/crates/brightstaff/src/handlers/llm.rs b/crates/brightstaff/src/handlers/llm.rs index 10a68c1a..435fb6f5 100644 --- a/crates/brightstaff/src/handlers/llm.rs +++ b/crates/brightstaff/src/handlers/llm.rs @@ -150,9 +150,30 @@ async fn llm_chat_inner( Some(SupportedAPIsFromClient::OpenAIResponsesAPI(_)) ); + // If model is not specified in the request, resolve from default provider + let model_from_request = client_request.model().to_string(); + let model_from_request = if model_from_request.is_empty() { + match llm_providers.read().await.default() { + Some(default_provider) => { + let default_model = default_provider.name.clone(); + info!(default_model = %default_model, "no model specified in request, using default provider"); + client_request.set_model(default_model.clone()); + default_model + } + None => { + let err_msg = "No model specified in request and no default provider configured"; + warn!("{}", err_msg); + let mut bad_request = Response::new(full(err_msg.to_string())); + *bad_request.status_mut() = StatusCode::BAD_REQUEST; + return Ok(bad_request); + } + } + } else { + model_from_request + }; + // Model alias resolution: update model field in client_request immediately // This ensures all downstream objects use the resolved model - let model_from_request = client_request.model().to_string(); let temperature = client_request.get_temperature(); let is_streaming_request = client_request.is_streaming(); let alias_resolved_model = resolve_model_alias(&model_from_request, &model_aliases); diff --git a/crates/brightstaff/src/main.rs b/crates/brightstaff/src/main.rs index fff69b00..87deda6a 100644 --- a/crates/brightstaff/src/main.rs +++ b/crates/brightstaff/src/main.rs @@ -202,6 +202,7 @@ async fn main() -> Result<(), Box> { fully_qualified_url, agents_list, listeners, + llm_providers, ) .with_context(parent_cx) .await; diff --git a/crates/hermesllm/src/apis/anthropic.rs b/crates/hermesllm/src/apis/anthropic.rs index 6e53e6db..3cb06828 100644 --- a/crates/hermesllm/src/apis/anthropic.rs +++ b/crates/hermesllm/src/apis/anthropic.rs @@ -102,6 +102,7 @@ pub struct McpServer { #[skip_serializing_none] #[derive(Serialize, Deserialize, Debug, Clone)] pub struct MessagesRequest { + #[serde(default)] pub model: String, pub messages: Vec, pub max_tokens: u32, diff --git a/crates/hermesllm/src/apis/openai.rs b/crates/hermesllm/src/apis/openai.rs index cd4e7d0b..53eee442 100644 --- a/crates/hermesllm/src/apis/openai.rs +++ b/crates/hermesllm/src/apis/openai.rs @@ -74,6 +74,7 @@ impl ApiDefinition for OpenAIApi { #[derive(Serialize, Deserialize, Debug, Clone, Default)] pub struct ChatCompletionsRequest { pub messages: Vec, + #[serde(default)] pub model: String, // pub audio: Option