diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9070ee20..9c3698fc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -79,13 +79,13 @@ jobs: load: true tags: | ${{ env.PLANO_DOCKER_IMAGE }} - ${{ env.DOCKER_IMAGE }}:0.4.7 + ${{ env.DOCKER_IMAGE }}:0.4.8 ${{ env.DOCKER_IMAGE }}:latest cache-from: type=gha cache-to: type=gha,mode=max - name: Save image as artifact - run: docker save ${{ env.PLANO_DOCKER_IMAGE }} ${{ env.DOCKER_IMAGE }}:0.4.7 ${{ env.DOCKER_IMAGE }}:latest -o /tmp/plano-image.tar + run: docker save ${{ env.PLANO_DOCKER_IMAGE }} ${{ env.DOCKER_IMAGE }}:0.4.8 ${{ env.DOCKER_IMAGE }}:latest -o /tmp/plano-image.tar - name: Upload image artifact uses: actions/upload-artifact@v4 diff --git a/CLAUDE.md b/CLAUDE.md index b8c1c1bd..71c94303 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -137,6 +137,12 @@ To prepare a release (e.g., bumping from `0.4.6` to `0.4.7`), update the version Commit message format: `release X.Y.Z` +## Workflow Preferences + +- **Git commits:** Do NOT add `Co-Authored-By` lines. Keep commit messages short and concise (one line, no verbose descriptions). NEVER commit and push directly to `main`—always use a feature branch and PR. +- **Git branches:** Use the format `/` when creating branches for PRs. Determine the username from `gh api user --jq .login`. +- **GitHub issues:** When a GitHub issue URL is pasted, fetch all requirements and context from the issue first. The end goal is always a PR with all tests passing. + ## Key Conventions - Rust edition 2021, formatted with `cargo fmt`, linted with `cargo clippy -D warnings` diff --git a/apps/www/src/components/Hero.tsx b/apps/www/src/components/Hero.tsx index f98bc4d2..2b490b7b 100644 --- a/apps/www/src/components/Hero.tsx +++ b/apps/www/src/components/Hero.tsx @@ -24,7 +24,7 @@ export function Hero() { >
- v0.4.7 + v0.4.8 — diff --git a/build_filter_image.sh b/build_filter_image.sh index 318fa542..7c79d45c 100644 --- a/build_filter_image.sh +++ b/build_filter_image.sh @@ -1 +1 @@ -docker build -f Dockerfile . -t katanemo/plano -t katanemo/plano:0.4.7 +docker build -f Dockerfile . -t katanemo/plano -t katanemo/plano:0.4.8 diff --git a/cli/planoai/__init__.py b/cli/planoai/__init__.py index 9e014320..03c28daa 100644 --- a/cli/planoai/__init__.py +++ b/cli/planoai/__init__.py @@ -1,3 +1,3 @@ """Plano CLI - Intelligent Prompt Gateway.""" -__version__ = "0.4.7" +__version__ = "0.4.8" diff --git a/cli/planoai/config_generator.py b/cli/planoai/config_generator.py index 8354b8dc..522968c9 100644 --- a/cli/planoai/config_generator.py +++ b/cli/planoai/config_generator.py @@ -460,6 +460,12 @@ def validate_and_render_schema(): print("agent_orchestrator: ", agent_orchestrator) + overrides = config_yaml.get("overrides", {}) + upstream_connect_timeout = overrides.get("upstream_connect_timeout", "5s") + upstream_tls_ca_path = overrides.get( + "upstream_tls_ca_path", "/etc/ssl/certs/ca-certificates.crt" + ) + data = { "prompt_gateway_listener": prompt_gateway, "llm_gateway_listener": llm_gateway, @@ -471,6 +477,8 @@ def validate_and_render_schema(): "local_llms": llms_with_endpoint, "agent_orchestrator": agent_orchestrator, "listeners": listeners, + "upstream_connect_timeout": upstream_connect_timeout, + "upstream_tls_ca_path": upstream_tls_ca_path, } rendered = template.render(data) diff --git a/cli/planoai/consts.py b/cli/planoai/consts.py index fa94efb6..84b4439f 100644 --- a/cli/planoai/consts.py +++ b/cli/planoai/consts.py @@ -5,5 +5,5 @@ PLANO_COLOR = "#969FF4" SERVICE_NAME_ARCHGW = "plano" PLANO_DOCKER_NAME = "plano" -PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.7") +PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.8") DEFAULT_OTEL_TRACING_GRPC_ENDPOINT = "http://host.docker.internal:4317" diff --git a/cli/pyproject.toml b/cli/pyproject.toml index 673e821b..44b3a553 100644 --- a/cli/pyproject.toml +++ b/cli/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "planoai" -version = "0.4.7" +version = "0.4.8" description = "Python-based CLI tool to manage Plano." authors = [{name = "Katanemo Labs, Inc."}] readme = "README.md" diff --git a/cli/uv.lock b/cli/uv.lock index 5f18604b..45ccf82e 100644 --- a/cli/uv.lock +++ b/cli/uv.lock @@ -337,7 +337,7 @@ wheels = [ [[package]] name = "planoai" -version = "0.4.6" +version = "0.4.7" source = { editable = "." } dependencies = [ { name = "click" }, diff --git a/config/envoy.template.yaml b/config/envoy.template.yaml index f514e728..a780c3f1 100644 --- a/config/envoy.template.yaml +++ b/config/envoy.template.yaml @@ -595,7 +595,7 @@ static_resources: clusters: - name: arch - connect_timeout: 5s + connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -618,9 +618,12 @@ static_resources: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 + validation_context: + trusted_ca: + filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: anthropic - connect_timeout: 0.5s + connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -643,9 +646,12 @@ static_resources: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 + validation_context: + trusted_ca: + filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: deepseek - connect_timeout: 0.5s + connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -668,9 +674,12 @@ static_resources: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 + validation_context: + trusted_ca: + filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: xai - connect_timeout: 0.5s + connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -693,9 +702,12 @@ static_resources: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 + validation_context: + trusted_ca: + filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: moonshotai - connect_timeout: 0.5s + connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -718,9 +730,12 @@ static_resources: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 + validation_context: + trusted_ca: + filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: zhipu - connect_timeout: 0.5s + connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -743,9 +758,12 @@ static_resources: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 + validation_context: + trusted_ca: + filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: together_ai - connect_timeout: 0.5s + connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -768,9 +786,12 @@ static_resources: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 + validation_context: + trusted_ca: + filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: gemini - connect_timeout: 0.5s + connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -793,9 +814,12 @@ static_resources: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 + validation_context: + trusted_ca: + filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: groq - connect_timeout: 0.5s + connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -818,9 +842,12 @@ static_resources: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 + validation_context: + trusted_ca: + filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: mistral - connect_timeout: 0.5s + connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -839,9 +866,16 @@ static_resources: typed_config: "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext sni: api.mistral.ai + common_tls_context: + tls_params: + tls_minimum_protocol_version: TLSv1_2 + tls_maximum_protocol_version: TLSv1_3 + validation_context: + trusted_ca: + filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: openai - connect_timeout: 0.5s + connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -864,6 +898,9 @@ static_resources: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 + validation_context: + trusted_ca: + filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: mistral_7b_instruct connect_timeout: 0.5s type: STRICT_DNS @@ -884,7 +921,7 @@ static_resources: {% if cluster.connect_timeout -%} connect_timeout: {{ cluster.connect_timeout }} {% else -%} - connect_timeout: 0.5s + connect_timeout: {{ upstream_connect_timeout | default('5s') }} {% endif -%} type: LOGICAL_DNS dns_lookup_family: V4_ONLY @@ -913,12 +950,15 @@ static_resources: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 + validation_context: + trusted_ca: + filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} {% endif %} {% endfor %} {% for local_llm_provider in local_llms %} - name: {{ local_llm_provider.cluster_name }} - connect_timeout: 0.5s + connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -946,6 +986,9 @@ static_resources: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 + validation_context: + trusted_ca: + filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} {% endif %} {% endfor %} diff --git a/config/plano_config_schema.yaml b/config/plano_config_schema.yaml index 9ff43bfb..7c35d2cf 100644 --- a/config/plano_config_schema.yaml +++ b/config/plano_config_schema.yaml @@ -265,6 +265,12 @@ properties: type: boolean use_agent_orchestrator: type: boolean + upstream_connect_timeout: + type: string + description: "Connect timeout for upstream provider clusters (e.g., '5s', '10s'). Default is '5s'." + upstream_tls_ca_path: + type: string + description: "Path to the trusted CA bundle for upstream TLS verification. Default is '/etc/ssl/certs/ca-certificates.crt'." system_prompt: type: string prompt_targets: diff --git a/config/validate_plano_config.sh b/config/validate_plano_config.sh index 8eafd344..5291341d 100644 --- a/config/validate_plano_config.sh +++ b/config/validate_plano_config.sh @@ -5,7 +5,7 @@ failed_files=() for file in $(find . -name config.yaml -o -name plano_config_full_reference.yaml); do echo "Validating ${file}..." touch $(pwd)/${file}_rendered - if ! docker run --rm -v "$(pwd)/${file}:/app/plano_config.yaml:ro" -v "$(pwd)/${file}_rendered:/app/plano_config_rendered.yaml:rw" --entrypoint /bin/sh ${PLANO_DOCKER_IMAGE:-katanemo/plano:0.4.7} -c "python -m planoai.config_generator" 2>&1 > /dev/null ; then + if ! docker run --rm -v "$(pwd)/${file}:/app/plano_config.yaml:ro" -v "$(pwd)/${file}_rendered:/app/plano_config_rendered.yaml:rw" --entrypoint /bin/sh ${PLANO_DOCKER_IMAGE:-katanemo/plano:0.4.8} -c "python -m planoai.config_generator" 2>&1 > /dev/null ; then echo "Validation failed for $file" failed_files+=("$file") fi diff --git a/crates/brightstaff/src/handlers/agent_chat_completions.rs b/crates/brightstaff/src/handlers/agent_chat_completions.rs index 73f5cb2a..9e094a46 100644 --- a/crates/brightstaff/src/handlers/agent_chat_completions.rs +++ b/crates/brightstaff/src/handlers/agent_chat_completions.rs @@ -3,15 +3,17 @@ use std::time::Instant; use bytes::Bytes; use common::configuration::SpanAttributes; +use common::llm_providers::LlmProviders; use hermesllm::apis::OpenAIMessage; use hermesllm::clients::SupportedAPIsFromClient; use hermesllm::providers::request::ProviderRequest; use hermesllm::ProviderRequestType; use http_body_util::combinators::BoxBody; use http_body_util::BodyExt; -use hyper::{Request, Response}; +use hyper::{Request, Response, StatusCode}; use opentelemetry::trace::get_active_span; use serde::ser::Error as SerError; +use tokio::sync::RwLock; use tracing::{debug, info, info_span, warn, Instrument}; use super::agent_selector::{AgentSelectionError, AgentSelector}; @@ -42,6 +44,7 @@ pub async fn agent_chat( agents_list: Arc>>>, listeners: Arc>>, span_attributes: Arc>, + llm_providers: Arc>, ) -> Result>, hyper::Error> { let custom_attrs = collect_custom_trace_attributes(request.headers(), span_attributes.as_ref().as_ref()); @@ -75,6 +78,7 @@ pub async fn agent_chat( orchestrator_service, agents_list, listeners, + llm_providers, request_id, custom_attrs, ) @@ -160,6 +164,7 @@ async fn handle_agent_chat_inner( orchestrator_service: Arc, agents_list: Arc>>>, listeners: Arc>>, + llm_providers: Arc>, request_id: String, custom_attrs: std::collections::HashMap, ) -> Result>, AgentFilterChainError> { @@ -230,16 +235,36 @@ async fn handle_agent_chat_inner( AgentFilterChainError::RequestParsing(serde_json::Error::custom(err_msg)) })?; - let client_request = match ProviderRequestType::try_from((&chat_request_bytes[..], &api_type)) { - Ok(request) => request, - Err(err) => { - warn!("failed to parse request as ProviderRequestType: {}", err); - let err_msg = format!("Failed to parse request: {}", err); - return Err(AgentFilterChainError::RequestParsing( - serde_json::Error::custom(err_msg), - )); + let mut client_request = + match ProviderRequestType::try_from((&chat_request_bytes[..], &api_type)) { + Ok(request) => request, + Err(err) => { + warn!("failed to parse request as ProviderRequestType: {}", err); + let err_msg = format!("Failed to parse request: {}", err); + return Err(AgentFilterChainError::RequestParsing( + serde_json::Error::custom(err_msg), + )); + } + }; + + // If model is not specified in the request, resolve from default provider + if client_request.model().is_empty() { + match llm_providers.read().await.default() { + Some(default_provider) => { + let default_model = default_provider.name.clone(); + info!(default_model = %default_model, "no model specified in request, using default provider"); + client_request.set_model(default_model); + } + None => { + let err_msg = "No model specified in request and no default provider configured"; + warn!("{}", err_msg); + let mut bad_request = + Response::new(ResponseHandler::create_full_body(err_msg.to_string())); + *bad_request.status_mut() = StatusCode::BAD_REQUEST; + return Ok(bad_request); + } } - }; + } let message: Vec = client_request.get_messages(); diff --git a/crates/brightstaff/src/handlers/llm.rs b/crates/brightstaff/src/handlers/llm.rs index 6e7ac226..7adfcff0 100644 --- a/crates/brightstaff/src/handlers/llm.rs +++ b/crates/brightstaff/src/handlers/llm.rs @@ -162,9 +162,30 @@ async fn llm_chat_inner( Some(SupportedAPIsFromClient::OpenAIResponsesAPI(_)) ); + // If model is not specified in the request, resolve from default provider + let model_from_request = client_request.model().to_string(); + let model_from_request = if model_from_request.is_empty() { + match llm_providers.read().await.default() { + Some(default_provider) => { + let default_model = default_provider.name.clone(); + info!(default_model = %default_model, "no model specified in request, using default provider"); + client_request.set_model(default_model.clone()); + default_model + } + None => { + let err_msg = "No model specified in request and no default provider configured"; + warn!("{}", err_msg); + let mut bad_request = Response::new(full(err_msg.to_string())); + *bad_request.status_mut() = StatusCode::BAD_REQUEST; + return Ok(bad_request); + } + } + } else { + model_from_request + }; + // Model alias resolution: update model field in client_request immediately // This ensures all downstream objects use the resolved model - let model_from_request = client_request.model().to_string(); let temperature = client_request.get_temperature(); let is_streaming_request = client_request.is_streaming(); let alias_resolved_model = resolve_model_alias(&model_from_request, &model_aliases); diff --git a/crates/brightstaff/src/main.rs b/crates/brightstaff/src/main.rs index bfcadb63..97345556 100644 --- a/crates/brightstaff/src/main.rs +++ b/crates/brightstaff/src/main.rs @@ -211,6 +211,7 @@ async fn main() -> Result<(), Box> { agents_list, listeners, span_attributes, + llm_providers, ) .with_context(parent_cx) .await; diff --git a/crates/hermesllm/src/apis/anthropic.rs b/crates/hermesllm/src/apis/anthropic.rs index 6e53e6db..3cb06828 100644 --- a/crates/hermesllm/src/apis/anthropic.rs +++ b/crates/hermesllm/src/apis/anthropic.rs @@ -102,6 +102,7 @@ pub struct McpServer { #[skip_serializing_none] #[derive(Serialize, Deserialize, Debug, Clone)] pub struct MessagesRequest { + #[serde(default)] pub model: String, pub messages: Vec, pub max_tokens: u32, diff --git a/crates/hermesllm/src/apis/openai.rs b/crates/hermesllm/src/apis/openai.rs index cd4e7d0b..53eee442 100644 --- a/crates/hermesllm/src/apis/openai.rs +++ b/crates/hermesllm/src/apis/openai.rs @@ -74,6 +74,7 @@ impl ApiDefinition for OpenAIApi { #[derive(Serialize, Deserialize, Debug, Clone, Default)] pub struct ChatCompletionsRequest { pub messages: Vec, + #[serde(default)] pub model: String, // pub audio: Option