diff --git a/cli/planoai/core.py b/cli/planoai/core.py index db07baeb..174f37c0 100644 --- a/cli/planoai/core.py +++ b/cli/planoai/core.py @@ -159,18 +159,18 @@ def _resolve_cli_agent_endpoint(plano_config_yaml: dict) -> tuple[str, int]: if isinstance(listeners, dict): egress_config = listeners.get("egress_traffic", {}) - host = egress_config.get("host") or egress_config.get("address") or "127.0.0.1" + host = egress_config.get("host") or egress_config.get("address") or "0.0.0.0" port = egress_config.get("port", 12000) return host, port if isinstance(listeners, list): for listener in listeners: if listener.get("type") in ["model", "model_listener"]: - host = listener.get("host") or listener.get("address") or "127.0.0.1" + host = listener.get("host") or listener.get("address") or "0.0.0.0" port = listener.get("port", 12000) return host, port - return "127.0.0.1", 12000 + return "0.0.0.0", 12000 def _apply_non_interactive_env(env: dict, additional_settings: dict) -> None: diff --git a/cli/test/test_cli_agent.py b/cli/test/test_cli_agent.py deleted file mode 100644 index eb10dc48..00000000 --- a/cli/test/test_cli_agent.py +++ /dev/null @@ -1,42 +0,0 @@ -from unittest import mock - -from planoai.core import start_cli_agent - - -PLANO_CONFIG = """ -version: v0.3.0 -listeners: - egress_traffic: - host: 127.0.0.1 - port: 12000 -""" - - -def test_start_cli_agent_codex_defaults(): - with mock.patch("builtins.open", mock.mock_open(read_data=PLANO_CONFIG)): - with mock.patch("subprocess.run") as mock_run: - start_cli_agent("fake_plano_config.yaml", "codex", "{}") - - mock_run.assert_called_once() - args, kwargs = mock_run.call_args - assert args[0] == ["codex", "--model", "gpt-5.3-codex"] - assert kwargs["check"] is True - assert kwargs["env"]["OPENAI_BASE_URL"] == "http://127.0.0.1:12000/v1" - assert kwargs["env"]["OPENAI_API_KEY"] == "test" - - -def test_start_cli_agent_claude_keeps_existing_flow(): - with mock.patch("builtins.open", mock.mock_open(read_data=PLANO_CONFIG)): - with mock.patch("subprocess.run") as mock_run: - start_cli_agent( - "fake_plano_config.yaml", - "claude", - '{"NON_INTERACTIVE_MODE": true}', - ) - - mock_run.assert_called_once() - args, kwargs = mock_run.call_args - assert args[0] == ["claude"] - assert kwargs["check"] is True - assert kwargs["env"]["ANTHROPIC_BASE_URL"] == "http://127.0.0.1:12000" - assert kwargs["env"]["ANTHROPIC_AUTH_TOKEN"] == "test" diff --git a/crates/brightstaff/src/handlers/llm.rs b/crates/brightstaff/src/handlers/llm.rs index 367d485d..d42985d6 100644 --- a/crates/brightstaff/src/handlers/llm.rs +++ b/crates/brightstaff/src/handlers/llm.rs @@ -4,9 +4,9 @@ use common::consts::{ ARCH_IS_STREAMING_HEADER, ARCH_PROVIDER_HINT_HEADER, REQUEST_ID_HEADER, TRACE_PARENT_HEADER, }; use common::llm_providers::LlmProviders; -use hermesllm::apis::openai_responses::{InputParam, ResponsesAPIRequest, Tool as ResponsesTool}; +use hermesllm::apis::openai_responses::InputParam; use hermesllm::clients::{SupportedAPIsFromClient, SupportedUpstreamAPIs}; -use hermesllm::{ProviderId, ProviderRequest, ProviderRequestType}; +use hermesllm::{ProviderRequest, ProviderRequestType}; use http_body_util::combinators::BoxBody; use http_body_util::BodyExt; use hyper::header::{self}; @@ -236,12 +236,11 @@ async fn llm_chat_inner( if client_request.remove_metadata_key("plano_preference_config") { debug!("removed plano_preference_config from metadata"); } - if provider_id == ProviderId::XAI { - if let ProviderRequestType::ResponsesAPIRequest(ref mut responses_req) = client_request { - normalize_responses_tools_for_xai(responses_req); - } + if let Some(ref client_api_kind) = client_api { + let upstream_api = + provider_id.compatible_api_for_client(client_api_kind, is_streaming_request); + client_request.normalize_for_upstream(provider_id, &upstream_api); } - // === v1/responses state management: Determine upstream API and combine input if needed === // Do this BEFORE routing since routing consumes the request // Only process state if state_storage is configured @@ -487,7 +486,6 @@ async fn llm_chat_inner( .into_response()), } } - /// Resolves model aliases by looking up the requested model in the model_aliases map. /// Returns the target model if an alias is found, otherwise returns the original model. fn resolve_model_alias( @@ -557,89 +555,3 @@ async fn get_provider_info( (hermesllm::ProviderId::OpenAI, None) } } - -fn normalize_xai_responses_tool(tool: ResponsesTool, idx: usize) -> ResponsesTool { - match tool { - ResponsesTool::Custom { - name, description, .. - } => ResponsesTool::Function { - name: name.unwrap_or_else(|| format!("custom_tool_{}", idx + 1)), - description, - parameters: Some(serde_json::json!({ - "type": "object", - "properties": { - "input": { "type": "string" } - }, - "required": ["input"], - "additionalProperties": true - })), - strict: Some(false), - }, - other => other, - } -} - -fn normalize_responses_tools_for_xai(req: &mut ResponsesAPIRequest) { - if let Some(tools) = req.tools.take() { - req.tools = Some( - tools - .into_iter() - .enumerate() - .map(|(idx, tool)| normalize_xai_responses_tool(tool, idx)) - .collect(), - ); - } -} - -#[cfg(test)] -mod tests { - use super::{normalize_xai_responses_tool, ResponsesTool}; - - #[test] - fn test_normalize_xai_custom_tool_to_function() { - let normalized = normalize_xai_responses_tool( - ResponsesTool::Custom { - name: Some("run_patch".to_string()), - description: Some("Apply patch text".to_string()), - format: Some(serde_json::json!({"kind":"patch","version":"v1"})), - }, - 0, - ); - - match normalized { - ResponsesTool::Function { - name, - description, - parameters, - strict, - } => { - assert_eq!(name, "run_patch"); - assert_eq!(description.as_deref(), Some("Apply patch text")); - assert!(parameters.is_some()); - assert_eq!(strict, Some(false)); - } - _ => panic!("expected function tool after xAI normalization"), - } - } - - #[test] - fn test_normalize_xai_non_custom_tool_unchanged() { - let normalized = normalize_xai_responses_tool( - ResponsesTool::Function { - name: "search_docs".to_string(), - description: Some("Search docs".to_string()), - parameters: Some(serde_json::json!({"type":"object"})), - strict: Some(true), - }, - 1, - ); - - match normalized { - ResponsesTool::Function { name, strict, .. } => { - assert_eq!(name, "search_docs"); - assert_eq!(strict, Some(true)); - } - _ => panic!("expected function tool to remain unchanged"), - } - } -} diff --git a/crates/brightstaff/src/state/mod.rs b/crates/brightstaff/src/state/mod.rs index bc51fbe9..3d59f359 100644 --- a/crates/brightstaff/src/state/mod.rs +++ b/crates/brightstaff/src/state/mod.rs @@ -147,3 +147,101 @@ pub async fn retrieve_and_combine_input( let combined_input = storage.merge(&prev_state, current_input); Ok(combined_input) } + +#[cfg(test)] +mod tests { + use super::extract_input_items; + use hermesllm::apis::openai_responses::{ + InputContent, InputItem, InputMessage, InputParam, MessageContent, MessageRole, + }; + + #[test] + fn test_extract_input_items_converts_text_to_user_message_item() { + let extracted = extract_input_items(&InputParam::Text("hello world".to_string())); + assert_eq!(extracted.len(), 1); + + let InputItem::Message(message) = &extracted[0] else { + panic!("expected InputItem::Message"); + }; + assert!(matches!(message.role, MessageRole::User)); + + let MessageContent::Items(items) = &message.content else { + panic!("expected MessageContent::Items"); + }; + assert_eq!(items.len(), 1); + + let InputContent::InputText { text } = &items[0] else { + panic!("expected InputContent::InputText"); + }; + assert_eq!(text, "hello world"); + } + + #[test] + fn test_extract_input_items_preserves_single_item() { + let item = InputItem::Message(InputMessage { + role: MessageRole::Assistant, + content: MessageContent::Items(vec![InputContent::InputText { + text: "assistant note".to_string(), + }]), + }); + + let extracted = extract_input_items(&InputParam::SingleItem(item.clone())); + assert_eq!(extracted.len(), 1); + let InputItem::Message(message) = &extracted[0] else { + panic!("expected InputItem::Message"); + }; + assert!(matches!(message.role, MessageRole::Assistant)); + let MessageContent::Items(items) = &message.content else { + panic!("expected MessageContent::Items"); + }; + let InputContent::InputText { text } = &items[0] else { + panic!("expected InputContent::InputText"); + }; + assert_eq!(text, "assistant note"); + } + + #[test] + fn test_extract_input_items_preserves_items_list() { + let items = vec![ + InputItem::Message(InputMessage { + role: MessageRole::User, + content: MessageContent::Items(vec![InputContent::InputText { + text: "first".to_string(), + }]), + }), + InputItem::Message(InputMessage { + role: MessageRole::Assistant, + content: MessageContent::Items(vec![InputContent::InputText { + text: "second".to_string(), + }]), + }), + ]; + + let extracted = extract_input_items(&InputParam::Items(items.clone())); + assert_eq!(extracted.len(), items.len()); + + let InputItem::Message(first) = &extracted[0] else { + panic!("expected first item to be message"); + }; + assert!(matches!(first.role, MessageRole::User)); + let MessageContent::Items(first_items) = &first.content else { + panic!("expected MessageContent::Items"); + }; + let InputContent::InputText { text: first_text } = &first_items[0] else { + panic!("expected InputContent::InputText"); + }; + assert_eq!(first_text, "first"); + + let InputItem::Message(second) = &extracted[1] else { + panic!("expected second item to be message"); + }; + assert!(matches!(second.role, MessageRole::Assistant)); + let MessageContent::Items(second_items) = &second.content else { + panic!("expected MessageContent::Items"); + }; + let InputContent::InputText { text: second_text } = &second_items[0] else { + panic!("expected InputContent::InputText"); + }; + assert_eq!(second_text, "second"); + } +} diff --git a/crates/hermesllm/src/apis/openai_responses.rs b/crates/hermesllm/src/apis/openai_responses.rs index 8c1936f9..09999586 100644 --- a/crates/hermesllm/src/apis/openai_responses.rs +++ b/crates/hermesllm/src/apis/openai_responses.rs @@ -147,8 +147,6 @@ pub enum InputItem { call_id: String, output: serde_json::Value, }, - /// Forward-compat fallback for unknown input item shapes. - Unknown(serde_json::Value), } /// Input message with role and content @@ -201,9 +199,6 @@ pub enum InputContent { data: Option, format: Option, }, - /// Forward-compat fallback for unknown content parts. - #[serde(other)] - Unknown, } /// Modality options @@ -1055,61 +1050,39 @@ impl ProviderRequest for ResponsesAPIRequest { } fn extract_messages_text(&self) -> String { + fn content_items_to_text(content_items: &[InputContent]) -> String { + content_items.iter().fold(String::new(), |acc, content| { + acc + " " + + &match content { + InputContent::InputText { text } => text.clone(), + InputContent::InputImage { .. } => "[Image]".to_string(), + InputContent::InputFile { .. } => "[File]".to_string(), + InputContent::InputAudio { .. } => "[Audio]".to_string(), + } + }) + } + + fn message_content_to_text(content: &MessageContent) -> String { + match content { + MessageContent::Text(text) => text.clone(), + MessageContent::Items(content_items) => content_items_to_text(content_items), + } + } + match &self.input { InputParam::Text(text) => text.clone(), InputParam::SingleItem(item) => { // Normalize single-item input for extraction behavior parity. match item { - InputItem::Message(msg) => match &msg.content { - MessageContent::Text(text) => text.clone(), - MessageContent::Items(content_items) => { - content_items.iter().fold(String::new(), |acc, content| { - acc + " " - + &match content { - InputContent::InputText { text } => text.clone(), - InputContent::InputImage { .. } => "[Image]".to_string(), - InputContent::InputFile { .. } => "[File]".to_string(), - InputContent::InputAudio { .. } => "[Audio]".to_string(), - InputContent::Unknown => String::new(), - } - }) - } - }, + InputItem::Message(msg) => message_content_to_text(&msg.content), _ => String::new(), } } - InputParam::Items(items) => { - items.iter().fold(String::new(), |acc, item| { - match item { - InputItem::Message(msg) => { - let content_text = match &msg.content { - MessageContent::Text(text) => text.clone(), - MessageContent::Items(content_items) => { - content_items.iter().fold(String::new(), |acc, content| { - acc + " " - + &match content { - InputContent::InputText { text } => text.clone(), - InputContent::InputImage { .. } => { - "[Image]".to_string() - } - InputContent::InputFile { .. } => { - "[File]".to_string() - } - InputContent::InputAudio { .. } => { - "[Audio]".to_string() - } - InputContent::Unknown => String::new(), - } - }) - } - }; - acc + " " + &content_text - } - // Skip non-message items (references, outputs, etc.) - _ => acc, - } - }) - } + InputParam::Items(items) => items.iter().fold(String::new(), |acc, item| match item { + InputItem::Message(msg) => acc + " " + &message_content_to_text(&msg.content), + // Skip non-message items (references, outputs, etc.) + _ => acc, + }), } } diff --git a/crates/hermesllm/src/clients/endpoints.rs b/crates/hermesllm/src/clients/endpoints.rs index 23e14604..26240ffb 100644 --- a/crates/hermesllm/src/clients/endpoints.rs +++ b/crates/hermesllm/src/clients/endpoints.rs @@ -185,7 +185,7 @@ impl SupportedAPIsFromClient { // For Responses API, check if provider supports it, otherwise translate to chat/completions match provider_id { // Providers that support /v1/responses natively - ProviderId::OpenAI | ProviderId::XAI => route_by_provider("/responses"), + ProviderId::OpenAI => route_by_provider("/responses"), // All other providers: translate to /chat/completions _ => route_by_provider("/chat/completions"), } @@ -656,7 +656,7 @@ mod tests { } #[test] - fn test_responses_api_targets_xai_native_responses_endpoint() { + fn test_responses_api_targets_xai_chat_completions_endpoint() { let api = SupportedAPIsFromClient::OpenAIResponsesAPI(OpenAIApi::Responses); assert_eq!( api.target_endpoint_for_provider( @@ -666,7 +666,7 @@ mod tests { false, None ), - "/v1/responses" + "/v1/chat/completions" ); } } diff --git a/crates/hermesllm/src/providers/id.rs b/crates/hermesllm/src/providers/id.rs index 11008711..fe76b8b8 100644 --- a/crates/hermesllm/src/providers/id.rs +++ b/crates/hermesllm/src/providers/id.rs @@ -166,11 +166,10 @@ impl ProviderId { SupportedAPIsFromClient::OpenAIChatCompletions(_), ) => SupportedUpstreamAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions), - // OpenAI Responses API - OpenAI and xAI support this natively - ( - ProviderId::OpenAI | ProviderId::XAI, - SupportedAPIsFromClient::OpenAIResponsesAPI(_), - ) => SupportedUpstreamAPIs::OpenAIResponsesAPI(OpenAIApi::Responses), + // OpenAI Responses API + (ProviderId::OpenAI, SupportedAPIsFromClient::OpenAIResponsesAPI(_)) => { + SupportedUpstreamAPIs::OpenAIResponsesAPI(OpenAIApi::Responses) + } // Amazon Bedrock natively supports Bedrock APIs (ProviderId::AmazonBedrock, SupportedAPIsFromClient::OpenAIChatCompletions(_)) => { @@ -331,14 +330,14 @@ mod tests { } #[test] - fn test_xai_uses_responses_api_for_responses_clients() { + fn test_xai_uses_chat_completions_for_responses_clients() { use crate::clients::endpoints::{SupportedAPIsFromClient, SupportedUpstreamAPIs}; let client_api = SupportedAPIsFromClient::OpenAIResponsesAPI(OpenAIApi::Responses); let upstream = ProviderId::XAI.compatible_api_for_client(&client_api, false); assert!(matches!( upstream, - SupportedUpstreamAPIs::OpenAIResponsesAPI(OpenAIApi::Responses) + SupportedUpstreamAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions) )); } } diff --git a/crates/hermesllm/src/providers/request.rs b/crates/hermesllm/src/providers/request.rs index e97e8a68..92688133 100644 --- a/crates/hermesllm/src/providers/request.rs +++ b/crates/hermesllm/src/providers/request.rs @@ -5,6 +5,7 @@ use crate::apis::amazon_bedrock::{ConverseRequest, ConverseStreamRequest}; use crate::apis::openai_responses::ResponsesAPIRequest; use crate::clients::endpoints::SupportedAPIsFromClient; use crate::clients::endpoints::SupportedUpstreamAPIs; +use crate::ProviderId; use serde_json::Value; use std::collections::HashMap; @@ -70,6 +71,25 @@ impl ProviderRequestType { Self::ResponsesAPIRequest(r) => r.set_messages(messages), } } + + /// Apply provider-specific request normalization before sending upstream. + pub fn normalize_for_upstream( + &mut self, + provider_id: ProviderId, + upstream_api: &SupportedUpstreamAPIs, + ) { + if provider_id == ProviderId::XAI + && matches!( + upstream_api, + SupportedUpstreamAPIs::OpenAIChatCompletions(_) + ) + { + if let Self::ChatCompletionsRequest(req) = self { + // xAI's legacy live-search shape is deprecated on chat/completions. + req.web_search_options = None; + } + } + } } impl ProviderRequest for ProviderRequestType { @@ -787,6 +807,62 @@ mod tests { } } + #[test] + fn test_normalize_for_upstream_xai_clears_chat_web_search_options() { + use crate::apis::openai::{Message, MessageContent, OpenAIApi, Role}; + + let mut request = ProviderRequestType::ChatCompletionsRequest(ChatCompletionsRequest { + model: "grok-4".to_string(), + messages: vec![Message { + role: Role::User, + content: Some(MessageContent::Text("hello".to_string())), + name: None, + tool_calls: None, + tool_call_id: None, + }], + web_search_options: Some(serde_json::json!({"search_context_size":"medium"})), + ..Default::default() + }); + + request.normalize_for_upstream( + ProviderId::XAI, + &SupportedUpstreamAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions), + ); + + let ProviderRequestType::ChatCompletionsRequest(req) = request else { + panic!("expected chat request"); + }; + assert!(req.web_search_options.is_none()); + } + + #[test] + fn test_normalize_for_upstream_non_xai_keeps_chat_web_search_options() { + use crate::apis::openai::{Message, MessageContent, OpenAIApi, Role}; + + let mut request = ProviderRequestType::ChatCompletionsRequest(ChatCompletionsRequest { + model: "gpt-4o".to_string(), + messages: vec![Message { + role: Role::User, + content: Some(MessageContent::Text("hello".to_string())), + name: None, + tool_calls: None, + tool_call_id: None, + }], + web_search_options: Some(serde_json::json!({"search_context_size":"medium"})), + ..Default::default() + }); + + request.normalize_for_upstream( + ProviderId::OpenAI, + &SupportedUpstreamAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions), + ); + + let ProviderRequestType::ChatCompletionsRequest(req) = request else { + panic!("expected chat request"); + }; + assert!(req.web_search_options.is_some()); + } + #[test] fn test_responses_api_to_anthropic_messages_conversion() { use crate::apis::anthropic::AnthropicApi::Messages; diff --git a/crates/hermesllm/src/transforms/request/from_openai.rs b/crates/hermesllm/src/transforms/request/from_openai.rs index 780d96d1..f2e8ab0d 100644 --- a/crates/hermesllm/src/transforms/request/from_openai.rs +++ b/crates/hermesllm/src/transforms/request/from_openai.rs @@ -136,7 +136,6 @@ impl TryFrom for Vec { } InputContent::InputFile { .. } => None, // Skip files for now InputContent::InputAudio { .. } => None, // Skip audio for now - InputContent::Unknown => None, }) .collect(), ) @@ -162,7 +161,6 @@ impl TryFrom for Vec { } InputContent::InputFile { .. } => None, // Skip files for now InputContent::InputAudio { .. } => None, // Skip audio for now - InputContent::Unknown => None, }) .collect(), ) @@ -228,7 +226,7 @@ impl TryFrom for Vec { tool_calls: Some(vec![tool_call]), }); } - InputItem::ItemReference { .. } | InputItem::Unknown(_) => { + InputItem::ItemReference { .. } => { // Item references/unknown entries are metadata-like and can be skipped // for chat-completions conversion. } diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index fdc3eac1..7a353bcb 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -20,7 +20,6 @@ use common::llm_providers::LlmProviders; use common::ratelimit::Header; use common::stats::{IncrementingMetric, RecordingMetric}; use common::{ratelimit, routing, tokenizer}; -use hermesllm::apis::openai_responses::{ResponsesAPIRequest, Tool as ResponsesTool}; use hermesllm::apis::streaming_shapes::amazon_bedrock_binary_frame::BedrockBinaryFrameDecoder; use hermesllm::apis::streaming_shapes::sse::{SseEvent, SseStreamBuffer, SseStreamBufferTrait}; use hermesllm::apis::streaming_shapes::sse_chunk_processor::SseChunkProcessor; @@ -1048,11 +1047,7 @@ impl HttpContext for StreamContext { match ProviderRequestType::try_from((deserialized_client_request, upstream)) { Ok(mut request) => { - normalize_xai_responses_tools_for_upstream( - &mut request, - self.get_provider_id(), - upstream, - ); + request.normalize_for_upstream(self.get_provider_id(), upstream); debug!( "request_id={}: upstream request payload: {}", self.request_identifier(), @@ -1231,51 +1226,6 @@ impl HttpContext for StreamContext { } } -fn normalize_xai_responses_tools_for_upstream( - request: &mut ProviderRequestType, - provider_id: ProviderId, - resolved_api: &SupportedUpstreamAPIs, -) { - if provider_id != ProviderId::XAI { - return; - } - if !matches!(resolved_api, SupportedUpstreamAPIs::OpenAIResponsesAPI(_)) { - return; - } - if let ProviderRequestType::ResponsesAPIRequest(responses_req) = request { - normalize_responses_tools_for_xai(responses_req); - } -} - -fn normalize_responses_tools_for_xai(req: &mut ResponsesAPIRequest) { - if let Some(tools) = req.tools.take() { - req.tools = Some( - tools - .into_iter() - .enumerate() - .map(|(idx, tool)| match tool { - ResponsesTool::Custom { - name, description, .. - } => ResponsesTool::Function { - name: name.unwrap_or_else(|| format!("custom_tool_{}", idx + 1)), - description, - parameters: Some(serde_json::json!({ - "type": "object", - "properties": { - "input": { "type": "string" } - }, - "required": ["input"], - "additionalProperties": true - })), - strict: Some(false), - }, - other => other, - }) - .collect(), - ); - } -} - fn current_time_ns() -> u128 { SystemTime::now() .duration_since(UNIX_EPOCH)