mirror of
https://github.com/katanemo/plano.git
synced 2026-05-15 11:02:39 +02:00
add support for gemini (#505)
This commit is contained in:
parent
e734b76086
commit
aa9d747fa9
8 changed files with 42 additions and 16 deletions
|
|
@ -80,6 +80,7 @@ properties:
|
||||||
- groq
|
- groq
|
||||||
- mistral
|
- mistral
|
||||||
- openai
|
- openai
|
||||||
|
- gemini
|
||||||
access_key:
|
access_key:
|
||||||
type: string
|
type: string
|
||||||
model:
|
model:
|
||||||
|
|
|
||||||
|
|
@ -450,6 +450,7 @@ static_resources:
|
||||||
name: envoy.compression.brotli.compressor
|
name: envoy.compression.brotli.compressor
|
||||||
typed_config:
|
typed_config:
|
||||||
"@type": type.googleapis.com/envoy.extensions.compression.brotli.compressor.v3.Brotli
|
"@type": type.googleapis.com/envoy.extensions.compression.brotli.compressor.v3.Brotli
|
||||||
|
chunk_size: 8192
|
||||||
- name: envoy.filters.http.compressor
|
- name: envoy.filters.http.compressor
|
||||||
typed_config:
|
typed_config:
|
||||||
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
|
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
|
||||||
|
|
@ -483,7 +484,6 @@ static_resources:
|
||||||
name: decompress
|
name: decompress
|
||||||
typed_config:
|
typed_config:
|
||||||
"@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip"
|
"@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip"
|
||||||
window_bits: 9
|
|
||||||
chunk_size: 8192
|
chunk_size: 8192
|
||||||
# If this ratio is set too low, then body data will not be decompressed completely.
|
# If this ratio is set too low, then body data will not be decompressed completely.
|
||||||
max_inflate_ratio: 1000
|
max_inflate_ratio: 1000
|
||||||
|
|
@ -494,6 +494,7 @@ static_resources:
|
||||||
name: envoy.compression.brotli.decompressor
|
name: envoy.compression.brotli.decompressor
|
||||||
typed_config:
|
typed_config:
|
||||||
"@type": type.googleapis.com/envoy.extensions.compression.brotli.decompressor.v3.Brotli
|
"@type": type.googleapis.com/envoy.extensions.compression.brotli.decompressor.v3.Brotli
|
||||||
|
chunk_size: 8192
|
||||||
- name: envoy.filters.http.router
|
- name: envoy.filters.http.router
|
||||||
typed_config:
|
typed_config:
|
||||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
nodaemon=true
|
nodaemon=true
|
||||||
|
|
||||||
[program:brightstaff]
|
[program:brightstaff]
|
||||||
command=sh -c "/app/brightstaff 2>&1 | tee /var/log/brightstaff.log"
|
command=sh -c "RUST_LOG=info /app/brightstaff 2>&1 | tee /var/log/brightstaff.log"
|
||||||
stdout_logfile=/dev/stdout
|
stdout_logfile=/dev/stdout
|
||||||
redirect_stderr=true
|
redirect_stderr=true
|
||||||
stdout_logfile_maxbytes=0
|
stdout_logfile_maxbytes=0
|
||||||
|
|
|
||||||
|
|
@ -158,6 +158,8 @@ pub enum LlmProviderType {
|
||||||
Mistral,
|
Mistral,
|
||||||
#[serde(rename = "openai")]
|
#[serde(rename = "openai")]
|
||||||
OpenAI,
|
OpenAI,
|
||||||
|
#[serde(rename = "gemini")]
|
||||||
|
Gemini,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Display for LlmProviderType {
|
impl Display for LlmProviderType {
|
||||||
|
|
@ -167,6 +169,7 @@ impl Display for LlmProviderType {
|
||||||
LlmProviderType::Claude => write!(f, "claude"),
|
LlmProviderType::Claude => write!(f, "claude"),
|
||||||
LlmProviderType::Deepseek => write!(f, "deepseek"),
|
LlmProviderType::Deepseek => write!(f, "deepseek"),
|
||||||
LlmProviderType::Groq => write!(f, "groq"),
|
LlmProviderType::Groq => write!(f, "groq"),
|
||||||
|
LlmProviderType::Gemini => write!(f, "gemini"),
|
||||||
LlmProviderType::Mistral => write!(f, "mistral"),
|
LlmProviderType::Mistral => write!(f, "mistral"),
|
||||||
LlmProviderType::OpenAI => write!(f, "openai"),
|
LlmProviderType::OpenAI => write!(f, "openai"),
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -11,8 +11,7 @@ pub const MODEL_SERVER_NAME: &str = "model_server";
|
||||||
pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
|
pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
|
||||||
pub const MESSAGES_KEY: &str = "messages";
|
pub const MESSAGES_KEY: &str = "messages";
|
||||||
pub const ARCH_PROVIDER_HINT_HEADER: &str = "x-arch-llm-provider-hint";
|
pub const ARCH_PROVIDER_HINT_HEADER: &str = "x-arch-llm-provider-hint";
|
||||||
pub const CHAT_COMPLETIONS_PATH: [&str; 2] =
|
pub const CHAT_COMPLETIONS_PATH: &str = "/v1/chat/completions";
|
||||||
["/v1/chat/completions", "/openai/v1/chat/completions"];
|
|
||||||
pub const HEALTHZ_PATH: &str = "/healthz";
|
pub const HEALTHZ_PATH: &str = "/healthz";
|
||||||
pub const X_ARCH_STATE_HEADER: &str = "x-arch-state";
|
pub const X_ARCH_STATE_HEADER: &str = "x-arch-state";
|
||||||
pub const X_ARCH_API_RESPONSE: &str = "x-arch-api-response-message";
|
pub const X_ARCH_API_RESPONSE: &str = "x-arch-api-response-message";
|
||||||
|
|
|
||||||
|
|
@ -90,13 +90,26 @@ impl StreamContext {
|
||||||
provider_hint,
|
provider_hint,
|
||||||
));
|
));
|
||||||
|
|
||||||
if self.llm_provider.as_ref().unwrap().provider_interface == LlmProviderType::Groq {
|
match self.llm_provider.as_ref().unwrap().provider_interface {
|
||||||
if let Some(path) = self.get_http_request_header(":path") {
|
LlmProviderType::Groq => {
|
||||||
if path.starts_with("/v1/") {
|
if let Some(path) = self.get_http_request_header(":path") {
|
||||||
let new_path = format!("/openai{}", path);
|
if path.starts_with("/v1/") {
|
||||||
self.set_http_request_header(":path", Some(new_path.as_str()));
|
let new_path = format!("/openai{}", path);
|
||||||
|
self.set_http_request_header(":path", Some(new_path.as_str()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
LlmProviderType::Gemini => {
|
||||||
|
if let Some(path) = self.get_http_request_header(":path") {
|
||||||
|
if path == "/v1/chat/completions" {
|
||||||
|
self.set_http_request_header(
|
||||||
|
":path",
|
||||||
|
Some("/v1beta/openai/chat/completions"),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
}
|
}
|
||||||
|
|
||||||
debug!(
|
debug!(
|
||||||
|
|
@ -202,6 +215,8 @@ impl HttpContext for StreamContext {
|
||||||
return Action::Continue;
|
return Action::Continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
self.is_chat_completions_request = CHAT_COMPLETIONS_PATH == request_path;
|
||||||
|
|
||||||
let use_agent_orchestrator = match self.overrides.as_ref() {
|
let use_agent_orchestrator = match self.overrides.as_ref() {
|
||||||
Some(overrides) => overrides.use_agent_orchestrator.unwrap_or_default(),
|
Some(overrides) => overrides.use_agent_orchestrator.unwrap_or_default(),
|
||||||
None => false,
|
None => false,
|
||||||
|
|
@ -242,9 +257,6 @@ impl HttpContext for StreamContext {
|
||||||
self.delete_content_length_header();
|
self.delete_content_length_header();
|
||||||
self.save_ratelimit_header();
|
self.save_ratelimit_header();
|
||||||
|
|
||||||
let request_path = self.get_http_request_header(":path").unwrap_or_default();
|
|
||||||
self.is_chat_completions_request = CHAT_COMPLETIONS_PATH.contains(&request_path.as_str());
|
|
||||||
|
|
||||||
self.request_id = self.get_http_request_header(REQUEST_ID_HEADER);
|
self.request_id = self.get_http_request_header(REQUEST_ID_HEADER);
|
||||||
self.traceparent = self.get_http_request_header(TRACE_PARENT_HEADER);
|
self.traceparent = self.get_http_request_header(TRACE_PARENT_HEADER);
|
||||||
|
|
||||||
|
|
@ -392,10 +404,10 @@ impl HttpContext for StreamContext {
|
||||||
Action::Continue
|
Action::Continue
|
||||||
}
|
}
|
||||||
|
|
||||||
fn on_http_response_headers(&mut self, _num_headers: usize, _end_of_stream: bool) -> Action {
|
fn on_http_response_headers(&mut self, _num_headers: usize, end_of_stream: bool) -> Action {
|
||||||
debug!(
|
debug!(
|
||||||
"on_http_response_headers [S={}] end_stream={}",
|
"on_http_response_headers [S={}] end_stream={}",
|
||||||
self.context_id, _end_of_stream
|
self.context_id, end_of_stream
|
||||||
);
|
);
|
||||||
|
|
||||||
self.set_property(
|
self.set_property(
|
||||||
|
|
@ -542,6 +554,13 @@ impl HttpContext for StreamContext {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if log::log_enabled!(log::Level::Debug) {
|
||||||
|
debug!(
|
||||||
|
"response data (converted to utf8): {}",
|
||||||
|
String::from_utf8_lossy(&body)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
let llm_provider_str = self.llm_provider().provider_interface.to_string();
|
let llm_provider_str = self.llm_provider().provider_interface.to_string();
|
||||||
let hermes_llm_provider = Provider::from(llm_provider_str.as_str());
|
let hermes_llm_provider = Provider::from(llm_provider_str.as_str());
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -54,8 +54,6 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
|
||||||
.returning(Some("selector-key"))
|
.returning(Some("selector-key"))
|
||||||
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("selector-key"))
|
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("selector-key"))
|
||||||
.returning(Some("selector-value"))
|
.returning(Some("selector-value"))
|
||||||
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some(":path"))
|
|
||||||
.returning(Some("/v1/chat/completions"))
|
|
||||||
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("x-request-id"))
|
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("x-request-id"))
|
||||||
.returning(None)
|
.returning(None)
|
||||||
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("traceparent"))
|
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("traceparent"))
|
||||||
|
|
|
||||||
|
|
@ -45,5 +45,10 @@ llm_providers:
|
||||||
provider_interface: groq
|
provider_interface: groq
|
||||||
model: llama-3.1-8b-instant
|
model: llama-3.1-8b-instant
|
||||||
|
|
||||||
|
- name: gemini
|
||||||
|
access_key: $GEMINI_API_KEY
|
||||||
|
provider_interface: gemini
|
||||||
|
model: gemini-1.5-pro-latest
|
||||||
|
|
||||||
tracing:
|
tracing:
|
||||||
random_sampling: 100
|
random_sampling: 100
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue