adding support for moonshot and z-ai (#578)

* adding support for moonshot and z-ai

* Revert unwanted changes to arch_config.yaml

---------

Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-288.local>
This commit is contained in:
Salman Paracha 2025-09-30 12:24:06 -07:00 committed by GitHub
parent 7df1b8cdb0
commit 045a5e9751
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 186 additions and 22 deletions

View file

@ -601,6 +601,56 @@ static_resources:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
- name: moonshotai
connect_timeout: 0.5s
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: moonshotai
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: api.moonshot.ai
port_value: 443
hostname: "api.moonshot.ai"
transport_socket:
name: envoy.transport_sockets.tls
typed_config:
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
sni: api.moonshot.ai
common_tls_context:
tls_params:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
- name: zhipu
connect_timeout: 0.5s
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: zhipu
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: open.bigmodel.cn
port_value: 443
hostname: "open.bigmodel.cn"
transport_socket:
name: envoy.transport_sockets.tls
typed_config:
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
sni: open.bigmodel.cn
common_tls_context:
tls_params:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
- name: together_ai
connect_timeout: 0.5s
type: LOGICAL_DNS

View file

@ -18,6 +18,8 @@ SUPPORTED_PROVIDERS = [
"azure_openai",
"xai",
"ollama",
"moonshotai",
"zhipu",
]

View file

@ -175,6 +175,10 @@ pub enum LlmProviderType {
AzureOpenAI,
#[serde(rename = "ollama")]
Ollama,
#[serde(rename = "moonshotai")]
Moonshotai,
#[serde(rename = "zhipu")]
Zhipu,
}
impl Display for LlmProviderType {
@ -191,6 +195,8 @@ impl Display for LlmProviderType {
LlmProviderType::TogetherAI => write!(f, "together_ai"),
LlmProviderType::AzureOpenAI => write!(f, "azure_openai"),
LlmProviderType::Ollama => write!(f, "ollama"),
LlmProviderType::Moonshotai => write!(f, "moonshotai"),
LlmProviderType::Zhipu => write!(f, "zhipu"),
}
}
}
@ -242,7 +248,7 @@ impl IntoModels for Vec<LlmProvider> {
.iter()
.map(|provider| ModelDetail {
id: provider.name.clone(),
object: "model".to_string(),
object: Some("model".to_string()),
created: 0,
owned_by: "system".to_string(),
})

View file

@ -380,7 +380,7 @@ pub enum StaticContentType {
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct ChatCompletionsResponse {
pub id: String,
pub object: String,
pub object: Option<String>,
pub created: u64,
pub model: String,
pub choices: Vec<Choice>,
@ -393,7 +393,7 @@ impl Default for ChatCompletionsResponse {
fn default() -> Self {
ChatCompletionsResponse {
id: String::new(),
object: String::new(),
object: None,
created: 0,
model: String::new(),
choices: vec![],
@ -486,7 +486,7 @@ impl Default for Choice {
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct ChatCompletionsStreamResponse {
pub id: String,
pub object: String,
pub object: Option<String>,
pub created: u64,
pub model: String,
pub choices: Vec<StreamChoice>,
@ -549,7 +549,7 @@ pub struct StreamOptions {
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModelDetail {
pub id: String,
pub object: String,
pub object: Option<String>,
pub created: usize,
pub owned_by: String,
}
@ -1233,7 +1233,7 @@ mod tests {
let response: ChatCompletionsResponse = serde_json::from_str(json_response).unwrap();
assert_eq!(response.id, "chatcmpl-CAJc2Df6QCc7Mv3RP0Cf2xlbDV1x2");
assert_eq!(response.object, "chat.completion");
assert_eq!(response.object.as_deref(), Some("chat.completion"));
assert_eq!(response.created, 1756574706);
assert_eq!(response.model, "gpt-4o-2024-08-06");
assert_eq!(response.service_tier, Some("default".to_string()));

View file

@ -80,6 +80,13 @@ impl SupportedAPIs {
default_endpoint
}
}
ProviderId::Zhipu => {
if request_path.starts_with("/v1/") {
"/api/paas/v4/chat/completions".to_string()
} else {
default_endpoint
}
}
ProviderId::AzureOpenAI => {
if request_path.starts_with("/v1/") {
format!("/openai/deployments/{}/chat/completions?api-version=2025-01-01-preview", model_id)

View file

@ -210,7 +210,7 @@ impl TryFrom<MessagesResponse> for ChatCompletionsResponse {
Ok(ChatCompletionsResponse {
id: resp.id,
object: "chat.completion".to_string(),
object: Some("chat.completion".to_string()),
created: current_timestamp(),
model: resp.model,
choices: vec![choice],
@ -329,7 +329,7 @@ impl TryFrom<MessagesStreamEvent> for ChatCompletionsStreamResponse {
MessagesStreamEvent::Ping => {
Ok(ChatCompletionsStreamResponse {
id: "stream".to_string(),
object: "chat.completion.chunk".to_string(),
object: Some("chat.completion.chunk".to_string()),
created: current_timestamp(),
model: "unknown".to_string(),
choices: vec![],
@ -709,7 +709,7 @@ fn create_openai_chunk(
) -> ChatCompletionsStreamResponse {
ChatCompletionsStreamResponse {
id: id.to_string(),
object: "chat.completion.chunk".to_string(),
object: Some("chat.completion.chunk".to_string()),
created: current_timestamp(),
model: model.to_string(),
choices: vec![StreamChoice {
@ -1254,7 +1254,7 @@ mod tests {
let openai_resp: ChatCompletionsStreamResponse = event.try_into().unwrap();
assert_eq!(openai_resp.id, "msg_stream_123");
assert_eq!(openai_resp.object, "chat.completion.chunk");
assert_eq!(openai_resp.object.as_deref(), Some("chat.completion.chunk"));
assert_eq!(openai_resp.model, "claude-3");
assert_eq!(openai_resp.choices.len(), 1);
@ -1276,7 +1276,7 @@ mod tests {
let openai_resp: ChatCompletionsStreamResponse = event.try_into().unwrap();
assert_eq!(openai_resp.object, "chat.completion.chunk");
assert_eq!(openai_resp.object.as_deref(), Some("chat.completion.chunk"));
assert_eq!(openai_resp.choices.len(), 1);
let choice = &openai_resp.choices[0];
@ -1376,7 +1376,7 @@ mod tests {
let openai_resp: ChatCompletionsStreamResponse = event.try_into().unwrap();
assert_eq!(openai_resp.object, "chat.completion.chunk");
assert_eq!(openai_resp.object.as_deref(), Some("chat.completion.chunk"));
assert_eq!(openai_resp.choices.len(), 0); // Ping has no choices
}
@ -1384,7 +1384,7 @@ mod tests {
fn test_openai_to_anthropic_streaming_role_start() {
let openai_resp = ChatCompletionsStreamResponse {
id: "chatcmpl-123".to_string(),
object: "chat.completion.chunk".to_string(),
object: Some("chat.completion.chunk".to_string()),
created: 1234567890,
model: "gpt-4".to_string(),
choices: vec![StreamChoice {
@ -1420,7 +1420,7 @@ mod tests {
fn test_openai_to_anthropic_streaming_content_delta() {
let openai_resp = ChatCompletionsStreamResponse {
id: "chatcmpl-123".to_string(),
object: "chat.completion.chunk".to_string(),
object: Some("chat.completion.chunk".to_string()),
created: 1234567890,
model: "gpt-4".to_string(),
choices: vec![StreamChoice {
@ -1460,7 +1460,7 @@ mod tests {
fn test_openai_to_anthropic_streaming_tool_calls() {
let openai_resp = ChatCompletionsStreamResponse {
id: "chatcmpl-123".to_string(),
object: "chat.completion.chunk".to_string(),
object: Some("chat.completion.chunk".to_string()),
created: 1234567890,
model: "gpt-4".to_string(),
choices: vec![StreamChoice {
@ -1509,7 +1509,7 @@ mod tests {
fn test_openai_to_anthropic_streaming_final_usage() {
let openai_resp = ChatCompletionsStreamResponse {
id: "chatcmpl-123".to_string(),
object: "chat.completion.chunk".to_string(),
object: Some("chat.completion.chunk".to_string()),
created: 1234567890,
model: "gpt-4".to_string(),
choices: vec![StreamChoice {
@ -1551,7 +1551,7 @@ mod tests {
fn test_openai_empty_choices_to_anthropic_ping() {
let openai_resp = ChatCompletionsStreamResponse {
id: "chatcmpl-123".to_string(),
object: "chat.completion.chunk".to_string(),
object: Some("chat.completion.chunk".to_string()),
created: 1234567890,
model: "gpt-4".to_string(),
choices: vec![], // Empty choices
@ -1690,7 +1690,7 @@ mod tests {
// Test that malformed streaming events are handled gracefully
let openai_resp_with_missing_data = ChatCompletionsStreamResponse {
id: "test".to_string(),
object: "chat.completion.chunk".to_string(),
object: Some("chat.completion.chunk".to_string()),
created: 1234567890,
model: "test".to_string(),
choices: vec![StreamChoice {
@ -1722,7 +1722,7 @@ mod tests {
let openai_resp: ChatCompletionsStreamResponse = event.try_into().unwrap();
// ContentBlockStop should produce an empty chunk
assert_eq!(openai_resp.object, "chat.completion.chunk");
assert_eq!(openai_resp.object.as_deref(), Some("chat.completion.chunk"));
assert_eq!(openai_resp.choices.len(), 1);
let choice = &openai_resp.choices[0];

View file

@ -17,6 +17,8 @@ pub enum ProviderId {
XAI,
TogetherAI,
Ollama,
Moonshotai,
Zhipu,
}
impl From<&str> for ProviderId {
@ -34,6 +36,8 @@ impl From<&str> for ProviderId {
"xai" => ProviderId::XAI,
"together_ai" => ProviderId::TogetherAI,
"ollama" => ProviderId::Ollama,
"moonshotai" => ProviderId::Moonshotai,
"zhipu" => ProviderId::Zhipu,
_ => panic!("Unknown provider: {}", value),
}
}
@ -58,7 +62,9 @@ impl ProviderId {
| ProviderId::AzureOpenAI
| ProviderId::XAI
| ProviderId::TogetherAI
| ProviderId::Ollama,
| ProviderId::Ollama
| ProviderId::Moonshotai
| ProviderId::Zhipu,
SupportedAPIs::AnthropicMessagesAPI(_)) => SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions),
(ProviderId::OpenAI
@ -71,7 +77,9 @@ impl ProviderId {
| ProviderId::AzureOpenAI
| ProviderId::XAI
| ProviderId::TogetherAI
| ProviderId::Ollama,
| ProviderId::Ollama
| ProviderId::Moonshotai
| ProviderId::Zhipu,
SupportedAPIs::OpenAIChatCompletions(_)) => SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions),
}
}
@ -92,6 +100,8 @@ impl Display for ProviderId {
ProviderId::XAI => write!(f, "xai"),
ProviderId::TogetherAI => write!(f, "together_ai"),
ProviderId::Ollama => write!(f, "ollama"),
ProviderId::Moonshotai => write!(f, "moonshotai"),
ProviderId::Zhipu => write!(f, "zhipu"),
}
}
}

View file

@ -787,7 +787,7 @@ mod tests {
// Test OpenAI event type (should be None)
let openai_event = ChatCompletionsStreamResponse {
id: "test".to_string(),
object: "chat.completion.chunk".to_string(),
object: Some("chat.completion.chunk".to_string()),
created: 123456789,
model: "gpt-4".to_string(),
choices: vec![],

View file

@ -402,6 +402,94 @@ xAI
- model: xai/grok-beta
access_key: $XAI_API_KEY
Moonshot AI
~~~~~~~~~~~
**Provider Prefix:** ``moonshotai/``
**API Endpoint:** ``/v1/chat/completions``
**Authentication:** API Key - Get your Moonshot AI API key from `Moonshot AI Platform <https://platform.moonshot.ai/>`_.
**Supported Chat Models:** All Moonshot AI chat models including Kimi K2, Moonshot v1, and all future releases.
.. list-table::
:header-rows: 1
:widths: 30 20 50
* - Model Name
- Model ID for Config
- Description
* - Kimi K2 Preview
- ``moonshotai/kimi-k2-0905-preview``
- Foundation model optimized for agentic tasks with 32B activated parameters
* - Moonshot v1 32K
- ``moonshotai/moonshot-v1-32k``
- Extended context model with 32K tokens
* - Moonshot v1 128K
- ``moonshotai/moonshot-v1-128k``
- Long context model with 128K tokens
**Configuration Examples:**
.. code-block:: yaml
llm_providers:
# Latest K2 models for agentic tasks
- model: moonshotai/kimi-k2-0905-preview
access_key: $MOONSHOTAI_API_KEY
# V1 models with different context lengths
- model: moonshotai/moonshot-v1-32k
access_key: $MOONSHOTAI_API_KEY
- model: moonshotai/moonshot-v1-128k
access_key: $MOONSHOTAI_API_KEY
Zhipu AI
~~~~~~~~
**Provider Prefix:** ``zhipu/``
**API Endpoint:** ``/api/paas/v4/chat/completions``
**Authentication:** API Key - Get your Zhipu AI API key from `Zhipu AI Platform <https://open.bigmodel.cn/console/overview/>`_.
**Supported Chat Models:** All Zhipu AI GLM models including GLM-4, GLM-4 Flash, and all future releases.
.. list-table::
:header-rows: 1
:widths: 30 20 50
* - Model Name
- Model ID for Config
- Description
* - GLM-4.6
- ``zhipu/glm-4.6``
- Latest and most capable GLM model with enhanced reasoning abilities
* - GLM-4.5
- ``zhipu/glm-4.5``
- High-performance model with multimodal capabilities
* - GLM-4.5 Air
- ``zhipu/glm-4.5-air``
- Lightweight and fast model optimized for efficiency
**Configuration Examples:**
.. code-block:: yaml
llm_providers:
# Latest GLM models
- model: zhipu/glm-4.6
access_key: $ZHIPU_API_KEY
- model: zhipu/glm-4.5
access_key: $ZHIPU_API_KEY
- model: zhipu/glm-4.5-air
access_key: $ZHIPU_API_KEY
Providers Requiring Base URL
----------------------------
@ -451,6 +539,7 @@ Ollama
- model: ollama/codellama
base_url: http://host.docker.internal:11434
OpenAI-Compatible Providers
~~~~~~~~~~~~~~~~~~~~~~~~~~~