diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml index 16b14343..f2507142 100644 --- a/arch/envoy.template.yaml +++ b/arch/envoy.template.yaml @@ -601,6 +601,56 @@ static_resources: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 + - name: moonshotai + connect_timeout: 0.5s + type: LOGICAL_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: moonshotai + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: api.moonshot.ai + port_value: 443 + hostname: "api.moonshot.ai" + transport_socket: + name: envoy.transport_sockets.tls + typed_config: + "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext + sni: api.moonshot.ai + common_tls_context: + tls_params: + tls_minimum_protocol_version: TLSv1_2 + tls_maximum_protocol_version: TLSv1_3 + + - name: zhipu + connect_timeout: 0.5s + type: LOGICAL_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: zhipu + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: open.bigmodel.cn + port_value: 443 + hostname: "open.bigmodel.cn" + transport_socket: + name: envoy.transport_sockets.tls + typed_config: + "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext + sni: open.bigmodel.cn + common_tls_context: + tls_params: + tls_minimum_protocol_version: TLSv1_2 + tls_maximum_protocol_version: TLSv1_3 + - name: together_ai connect_timeout: 0.5s type: LOGICAL_DNS diff --git a/arch/tools/cli/config_generator.py b/arch/tools/cli/config_generator.py index 965bf040..2d86dbef 100644 --- a/arch/tools/cli/config_generator.py +++ b/arch/tools/cli/config_generator.py @@ -18,6 +18,8 @@ SUPPORTED_PROVIDERS = [ "azure_openai", "xai", "ollama", + "moonshotai", + "zhipu", ] diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index 9ad2dc0a..a37a0c80 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -175,6 +175,10 @@ pub enum LlmProviderType { AzureOpenAI, #[serde(rename = "ollama")] Ollama, + #[serde(rename = "moonshotai")] + Moonshotai, + #[serde(rename = "zhipu")] + Zhipu, } impl Display for LlmProviderType { @@ -191,6 +195,8 @@ impl Display for LlmProviderType { LlmProviderType::TogetherAI => write!(f, "together_ai"), LlmProviderType::AzureOpenAI => write!(f, "azure_openai"), LlmProviderType::Ollama => write!(f, "ollama"), + LlmProviderType::Moonshotai => write!(f, "moonshotai"), + LlmProviderType::Zhipu => write!(f, "zhipu"), } } } @@ -242,7 +248,7 @@ impl IntoModels for Vec { .iter() .map(|provider| ModelDetail { id: provider.name.clone(), - object: "model".to_string(), + object: Some("model".to_string()), created: 0, owned_by: "system".to_string(), }) diff --git a/crates/hermesllm/src/apis/openai.rs b/crates/hermesllm/src/apis/openai.rs index d7d6ea70..63b5fc58 100644 --- a/crates/hermesllm/src/apis/openai.rs +++ b/crates/hermesllm/src/apis/openai.rs @@ -380,7 +380,7 @@ pub enum StaticContentType { #[derive(Serialize, Deserialize, Debug, Clone)] pub struct ChatCompletionsResponse { pub id: String, - pub object: String, + pub object: Option, pub created: u64, pub model: String, pub choices: Vec, @@ -393,7 +393,7 @@ impl Default for ChatCompletionsResponse { fn default() -> Self { ChatCompletionsResponse { id: String::new(), - object: String::new(), + object: None, created: 0, model: String::new(), choices: vec![], @@ -486,7 +486,7 @@ impl Default for Choice { #[derive(Serialize, Deserialize, Debug, Clone)] pub struct ChatCompletionsStreamResponse { pub id: String, - pub object: String, + pub object: Option, pub created: u64, pub model: String, pub choices: Vec, @@ -549,7 +549,7 @@ pub struct StreamOptions { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ModelDetail { pub id: String, - pub object: String, + pub object: Option, pub created: usize, pub owned_by: String, } @@ -1233,7 +1233,7 @@ mod tests { let response: ChatCompletionsResponse = serde_json::from_str(json_response).unwrap(); assert_eq!(response.id, "chatcmpl-CAJc2Df6QCc7Mv3RP0Cf2xlbDV1x2"); - assert_eq!(response.object, "chat.completion"); + assert_eq!(response.object.as_deref(), Some("chat.completion")); assert_eq!(response.created, 1756574706); assert_eq!(response.model, "gpt-4o-2024-08-06"); assert_eq!(response.service_tier, Some("default".to_string())); diff --git a/crates/hermesllm/src/clients/endpoints.rs b/crates/hermesllm/src/clients/endpoints.rs index 2b0f1ca8..fd4e0149 100644 --- a/crates/hermesllm/src/clients/endpoints.rs +++ b/crates/hermesllm/src/clients/endpoints.rs @@ -80,6 +80,13 @@ impl SupportedAPIs { default_endpoint } } + ProviderId::Zhipu => { + if request_path.starts_with("/v1/") { + "/api/paas/v4/chat/completions".to_string() + } else { + default_endpoint + } + } ProviderId::AzureOpenAI => { if request_path.starts_with("/v1/") { format!("/openai/deployments/{}/chat/completions?api-version=2025-01-01-preview", model_id) diff --git a/crates/hermesllm/src/clients/transformer.rs b/crates/hermesllm/src/clients/transformer.rs index 0856c359..f6e508d4 100644 --- a/crates/hermesllm/src/clients/transformer.rs +++ b/crates/hermesllm/src/clients/transformer.rs @@ -210,7 +210,7 @@ impl TryFrom for ChatCompletionsResponse { Ok(ChatCompletionsResponse { id: resp.id, - object: "chat.completion".to_string(), + object: Some("chat.completion".to_string()), created: current_timestamp(), model: resp.model, choices: vec![choice], @@ -329,7 +329,7 @@ impl TryFrom for ChatCompletionsStreamResponse { MessagesStreamEvent::Ping => { Ok(ChatCompletionsStreamResponse { id: "stream".to_string(), - object: "chat.completion.chunk".to_string(), + object: Some("chat.completion.chunk".to_string()), created: current_timestamp(), model: "unknown".to_string(), choices: vec![], @@ -709,7 +709,7 @@ fn create_openai_chunk( ) -> ChatCompletionsStreamResponse { ChatCompletionsStreamResponse { id: id.to_string(), - object: "chat.completion.chunk".to_string(), + object: Some("chat.completion.chunk".to_string()), created: current_timestamp(), model: model.to_string(), choices: vec![StreamChoice { @@ -1254,7 +1254,7 @@ mod tests { let openai_resp: ChatCompletionsStreamResponse = event.try_into().unwrap(); assert_eq!(openai_resp.id, "msg_stream_123"); - assert_eq!(openai_resp.object, "chat.completion.chunk"); + assert_eq!(openai_resp.object.as_deref(), Some("chat.completion.chunk")); assert_eq!(openai_resp.model, "claude-3"); assert_eq!(openai_resp.choices.len(), 1); @@ -1276,7 +1276,7 @@ mod tests { let openai_resp: ChatCompletionsStreamResponse = event.try_into().unwrap(); - assert_eq!(openai_resp.object, "chat.completion.chunk"); + assert_eq!(openai_resp.object.as_deref(), Some("chat.completion.chunk")); assert_eq!(openai_resp.choices.len(), 1); let choice = &openai_resp.choices[0]; @@ -1376,7 +1376,7 @@ mod tests { let openai_resp: ChatCompletionsStreamResponse = event.try_into().unwrap(); - assert_eq!(openai_resp.object, "chat.completion.chunk"); + assert_eq!(openai_resp.object.as_deref(), Some("chat.completion.chunk")); assert_eq!(openai_resp.choices.len(), 0); // Ping has no choices } @@ -1384,7 +1384,7 @@ mod tests { fn test_openai_to_anthropic_streaming_role_start() { let openai_resp = ChatCompletionsStreamResponse { id: "chatcmpl-123".to_string(), - object: "chat.completion.chunk".to_string(), + object: Some("chat.completion.chunk".to_string()), created: 1234567890, model: "gpt-4".to_string(), choices: vec![StreamChoice { @@ -1420,7 +1420,7 @@ mod tests { fn test_openai_to_anthropic_streaming_content_delta() { let openai_resp = ChatCompletionsStreamResponse { id: "chatcmpl-123".to_string(), - object: "chat.completion.chunk".to_string(), + object: Some("chat.completion.chunk".to_string()), created: 1234567890, model: "gpt-4".to_string(), choices: vec![StreamChoice { @@ -1460,7 +1460,7 @@ mod tests { fn test_openai_to_anthropic_streaming_tool_calls() { let openai_resp = ChatCompletionsStreamResponse { id: "chatcmpl-123".to_string(), - object: "chat.completion.chunk".to_string(), + object: Some("chat.completion.chunk".to_string()), created: 1234567890, model: "gpt-4".to_string(), choices: vec![StreamChoice { @@ -1509,7 +1509,7 @@ mod tests { fn test_openai_to_anthropic_streaming_final_usage() { let openai_resp = ChatCompletionsStreamResponse { id: "chatcmpl-123".to_string(), - object: "chat.completion.chunk".to_string(), + object: Some("chat.completion.chunk".to_string()), created: 1234567890, model: "gpt-4".to_string(), choices: vec![StreamChoice { @@ -1551,7 +1551,7 @@ mod tests { fn test_openai_empty_choices_to_anthropic_ping() { let openai_resp = ChatCompletionsStreamResponse { id: "chatcmpl-123".to_string(), - object: "chat.completion.chunk".to_string(), + object: Some("chat.completion.chunk".to_string()), created: 1234567890, model: "gpt-4".to_string(), choices: vec![], // Empty choices @@ -1690,7 +1690,7 @@ mod tests { // Test that malformed streaming events are handled gracefully let openai_resp_with_missing_data = ChatCompletionsStreamResponse { id: "test".to_string(), - object: "chat.completion.chunk".to_string(), + object: Some("chat.completion.chunk".to_string()), created: 1234567890, model: "test".to_string(), choices: vec![StreamChoice { @@ -1722,7 +1722,7 @@ mod tests { let openai_resp: ChatCompletionsStreamResponse = event.try_into().unwrap(); // ContentBlockStop should produce an empty chunk - assert_eq!(openai_resp.object, "chat.completion.chunk"); + assert_eq!(openai_resp.object.as_deref(), Some("chat.completion.chunk")); assert_eq!(openai_resp.choices.len(), 1); let choice = &openai_resp.choices[0]; diff --git a/crates/hermesllm/src/providers/id.rs b/crates/hermesllm/src/providers/id.rs index 649e730c..d1756a08 100644 --- a/crates/hermesllm/src/providers/id.rs +++ b/crates/hermesllm/src/providers/id.rs @@ -17,6 +17,8 @@ pub enum ProviderId { XAI, TogetherAI, Ollama, + Moonshotai, + Zhipu, } impl From<&str> for ProviderId { @@ -34,6 +36,8 @@ impl From<&str> for ProviderId { "xai" => ProviderId::XAI, "together_ai" => ProviderId::TogetherAI, "ollama" => ProviderId::Ollama, + "moonshotai" => ProviderId::Moonshotai, + "zhipu" => ProviderId::Zhipu, _ => panic!("Unknown provider: {}", value), } } @@ -58,7 +62,9 @@ impl ProviderId { | ProviderId::AzureOpenAI | ProviderId::XAI | ProviderId::TogetherAI - | ProviderId::Ollama, + | ProviderId::Ollama + | ProviderId::Moonshotai + | ProviderId::Zhipu, SupportedAPIs::AnthropicMessagesAPI(_)) => SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions), (ProviderId::OpenAI @@ -71,7 +77,9 @@ impl ProviderId { | ProviderId::AzureOpenAI | ProviderId::XAI | ProviderId::TogetherAI - | ProviderId::Ollama, + | ProviderId::Ollama + | ProviderId::Moonshotai + | ProviderId::Zhipu, SupportedAPIs::OpenAIChatCompletions(_)) => SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions), } } @@ -92,6 +100,8 @@ impl Display for ProviderId { ProviderId::XAI => write!(f, "xai"), ProviderId::TogetherAI => write!(f, "together_ai"), ProviderId::Ollama => write!(f, "ollama"), + ProviderId::Moonshotai => write!(f, "moonshotai"), + ProviderId::Zhipu => write!(f, "zhipu"), } } } diff --git a/crates/hermesllm/src/providers/response.rs b/crates/hermesllm/src/providers/response.rs index 3087cf5b..6bc4e25f 100644 --- a/crates/hermesllm/src/providers/response.rs +++ b/crates/hermesllm/src/providers/response.rs @@ -787,7 +787,7 @@ mod tests { // Test OpenAI event type (should be None) let openai_event = ChatCompletionsStreamResponse { id: "test".to_string(), - object: "chat.completion.chunk".to_string(), + object: Some("chat.completion.chunk".to_string()), created: 123456789, model: "gpt-4".to_string(), choices: vec![], diff --git a/docs/source/concepts/llm_providers/supported_providers.rst b/docs/source/concepts/llm_providers/supported_providers.rst index 4c32b40b..3531e2e6 100644 --- a/docs/source/concepts/llm_providers/supported_providers.rst +++ b/docs/source/concepts/llm_providers/supported_providers.rst @@ -402,6 +402,94 @@ xAI - model: xai/grok-beta access_key: $XAI_API_KEY +Moonshot AI +~~~~~~~~~~~ + +**Provider Prefix:** ``moonshotai/`` + +**API Endpoint:** ``/v1/chat/completions`` + +**Authentication:** API Key - Get your Moonshot AI API key from `Moonshot AI Platform `_. + +**Supported Chat Models:** All Moonshot AI chat models including Kimi K2, Moonshot v1, and all future releases. + +.. list-table:: + :header-rows: 1 + :widths: 30 20 50 + + * - Model Name + - Model ID for Config + - Description + * - Kimi K2 Preview + - ``moonshotai/kimi-k2-0905-preview`` + - Foundation model optimized for agentic tasks with 32B activated parameters + * - Moonshot v1 32K + - ``moonshotai/moonshot-v1-32k`` + - Extended context model with 32K tokens + * - Moonshot v1 128K + - ``moonshotai/moonshot-v1-128k`` + - Long context model with 128K tokens + +**Configuration Examples:** + +.. code-block:: yaml + + llm_providers: + # Latest K2 models for agentic tasks + - model: moonshotai/kimi-k2-0905-preview + access_key: $MOONSHOTAI_API_KEY + + # V1 models with different context lengths + - model: moonshotai/moonshot-v1-32k + access_key: $MOONSHOTAI_API_KEY + + - model: moonshotai/moonshot-v1-128k + access_key: $MOONSHOTAI_API_KEY + + +Zhipu AI +~~~~~~~~ + +**Provider Prefix:** ``zhipu/`` + +**API Endpoint:** ``/api/paas/v4/chat/completions`` + +**Authentication:** API Key - Get your Zhipu AI API key from `Zhipu AI Platform `_. + +**Supported Chat Models:** All Zhipu AI GLM models including GLM-4, GLM-4 Flash, and all future releases. + +.. list-table:: + :header-rows: 1 + :widths: 30 20 50 + + * - Model Name + - Model ID for Config + - Description + * - GLM-4.6 + - ``zhipu/glm-4.6`` + - Latest and most capable GLM model with enhanced reasoning abilities + * - GLM-4.5 + - ``zhipu/glm-4.5`` + - High-performance model with multimodal capabilities + * - GLM-4.5 Air + - ``zhipu/glm-4.5-air`` + - Lightweight and fast model optimized for efficiency + +**Configuration Examples:** + +.. code-block:: yaml + + llm_providers: + # Latest GLM models + - model: zhipu/glm-4.6 + access_key: $ZHIPU_API_KEY + + - model: zhipu/glm-4.5 + access_key: $ZHIPU_API_KEY + + - model: zhipu/glm-4.5-air + access_key: $ZHIPU_API_KEY + Providers Requiring Base URL ---------------------------- @@ -451,6 +539,7 @@ Ollama - model: ollama/codellama base_url: http://host.docker.internal:11434 + OpenAI-Compatible Providers ~~~~~~~~~~~~~~~~~~~~~~~~~~~