mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
use req/resp from hermesllm in llm gateway
This commit is contained in:
parent
f10e0fcece
commit
b0c1e97dc5
6 changed files with 35 additions and 14 deletions
|
|
@ -773,7 +773,7 @@ static_resources:
|
|||
- endpoint:
|
||||
address:
|
||||
socket_address:
|
||||
address: 0.0.0.0
|
||||
address: host.docker.internal
|
||||
port_value: 9091
|
||||
hostname: localhost
|
||||
|
||||
|
|
|
|||
|
|
@ -64,6 +64,8 @@ def docker_start_archgw_detached(
|
|||
item for volume in volume_mappings for item in ("-v", volume)
|
||||
]
|
||||
|
||||
print("using custom release path")
|
||||
|
||||
options = [
|
||||
"docker",
|
||||
"run",
|
||||
|
|
@ -76,6 +78,7 @@ def docker_start_archgw_detached(
|
|||
"--add-host",
|
||||
"host.docker.internal:host-gateway",
|
||||
ARCHGW_DOCKER_IMAGE,
|
||||
"/Users/adilhafeez/src/intelligent-prompt-gateway/crates/target/wasm32-wasip1/release:/etc/envoy/proxy-wasm-plugins:ro",
|
||||
]
|
||||
|
||||
result = subprocess.run(options, capture_output=True, text=True, check=False)
|
||||
|
|
|
|||
1
crates/Cargo.lock
generated
1
crates/Cargo.lock
generated
|
|
@ -1615,6 +1615,7 @@ dependencies = [
|
|||
"common",
|
||||
"derivative",
|
||||
"governor",
|
||||
"hermesllm",
|
||||
"http 1.1.0",
|
||||
"log",
|
||||
"md5",
|
||||
|
|
|
|||
|
|
@ -57,6 +57,12 @@ pub struct Message {
|
|||
pub content: Option<ContentType>,
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct StreamOptions {
|
||||
pub include_usage: bool,
|
||||
}
|
||||
|
||||
#[skip_serializing_none]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ChatCompletionsRequest {
|
||||
|
|
@ -70,6 +76,7 @@ pub struct ChatCompletionsRequest {
|
|||
pub stop: Option<Vec<String>>,
|
||||
pub presence_penalty: Option<f32>,
|
||||
pub frequency_penalty: Option<f32>,
|
||||
pub stream_options: Option<StreamOptions>,
|
||||
}
|
||||
|
||||
impl Default for ChatCompletionsRequest {
|
||||
|
|
@ -85,6 +92,7 @@ impl Default for ChatCompletionsRequest {
|
|||
stop: None,
|
||||
presence_penalty: None,
|
||||
frequency_penalty: None,
|
||||
stream_options: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -110,9 +118,9 @@ pub struct Choice {
|
|||
#[skip_serializing_none]
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct Usage {
|
||||
pub prompt_tokens: u32,
|
||||
pub completion_tokens: u32,
|
||||
pub total_tokens: u32,
|
||||
pub prompt_tokens: usize,
|
||||
pub completion_tokens: usize,
|
||||
pub total_tokens: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
|
|
@ -127,6 +135,7 @@ pub struct OpenAIRequestBuilder {
|
|||
stop: Option<Vec<String>>,
|
||||
presence_penalty: Option<f32>,
|
||||
frequency_penalty: Option<f32>,
|
||||
stream_options: Option<StreamOptions>,
|
||||
}
|
||||
|
||||
impl OpenAIRequestBuilder {
|
||||
|
|
@ -142,6 +151,7 @@ impl OpenAIRequestBuilder {
|
|||
stop: None,
|
||||
presence_penalty: None,
|
||||
frequency_penalty: None,
|
||||
stream_options: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -185,6 +195,12 @@ impl OpenAIRequestBuilder {
|
|||
self
|
||||
}
|
||||
|
||||
pub fn stream_options(mut self, include_usage: bool) -> Self {
|
||||
self.stream = Some(true);
|
||||
self.stream_options = Some(StreamOptions { include_usage });
|
||||
self
|
||||
}
|
||||
|
||||
pub fn build(self) -> Result<ChatCompletionsRequest, &'static str> {
|
||||
let request = ChatCompletionsRequest {
|
||||
model: self.model,
|
||||
|
|
@ -197,6 +213,7 @@ impl OpenAIRequestBuilder {
|
|||
stop: self.stop,
|
||||
presence_penalty: self.presence_penalty,
|
||||
frequency_penalty: self.frequency_penalty,
|
||||
stream_options: self.stream_options,
|
||||
};
|
||||
Ok(request)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ rand = "0.8.5"
|
|||
thiserror = "1.0.64"
|
||||
derivative = "2.2.0"
|
||||
sha2 = "0.10.8"
|
||||
hermesllm = { version = "0.1.0", path = "../hermesllm" }
|
||||
|
||||
[dev-dependencies]
|
||||
proxy-wasm-test-framework = { git = "https://github.com/katanemo/test-framework.git", branch = "new" }
|
||||
|
|
|
|||
|
|
@ -1,8 +1,5 @@
|
|||
use crate::metrics::Metrics;
|
||||
use common::api::open_ai::{
|
||||
ChatCompletionStreamResponseServerEvents, ChatCompletionsRequest, ChatCompletionsResponse,
|
||||
ContentType, Message, StreamOptions,
|
||||
};
|
||||
use common::api::open_ai::ChatCompletionStreamResponseServerEvents;
|
||||
use common::configuration::{LlmProvider, LlmProviderType, Overrides};
|
||||
use common::consts::{
|
||||
ARCH_PROVIDER_HINT_HEADER, ARCH_ROUTING_HEADER, CHAT_COMPLETIONS_PATH, HEALTHZ_PATH,
|
||||
|
|
@ -14,6 +11,10 @@ use common::ratelimit::Header;
|
|||
use common::stats::{IncrementingMetric, RecordingMetric};
|
||||
use common::tracing::{Event, Span, TraceData, Traceparent};
|
||||
use common::{ratelimit, routing, tokenizer};
|
||||
use hermesllm::providers::openai::types::ChatCompletionsRequest;
|
||||
use hermesllm::providers::openai::types::{
|
||||
ChatCompletionsResponse, ContentType, Message, StreamOptions,
|
||||
};
|
||||
use http::StatusCode;
|
||||
use log::{debug, info, warn};
|
||||
use proxy_wasm::hostcalls::get_current_time;
|
||||
|
|
@ -302,10 +303,6 @@ impl HttpContext for StreamContext {
|
|||
}
|
||||
};
|
||||
|
||||
for message in deserialized_body.messages.iter_mut() {
|
||||
message.model = None;
|
||||
}
|
||||
|
||||
self.user_message = deserialized_body
|
||||
.messages
|
||||
.iter()
|
||||
|
|
@ -355,10 +352,12 @@ impl HttpContext for StreamContext {
|
|||
chat_completion_request_str
|
||||
);
|
||||
|
||||
if deserialized_body.stream {
|
||||
if deserialized_body.stream.unwrap_or_default() {
|
||||
self.streaming_response = true;
|
||||
}
|
||||
if deserialized_body.stream && deserialized_body.stream_options.is_none() {
|
||||
if deserialized_body.stream.unwrap_or_default()
|
||||
&& deserialized_body.stream_options.is_none()
|
||||
{
|
||||
deserialized_body.stream_options = Some(StreamOptions {
|
||||
include_usage: true,
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue