This commit is contained in:
Adil Hafeez 2024-11-05 11:57:22 -08:00
parent c18dc04a7d
commit 3163a1c149
4 changed files with 70 additions and 7 deletions

View file

@ -26,4 +26,4 @@ COPY arch/envoy.template.yaml .
COPY arch/arch_config_schema.yaml .
ENTRYPOINT ["sh", "-c", "python config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug"]
ENTRYPOINT ["sh", "-c", "python config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:trace"]

View file

@ -7,7 +7,7 @@ static_resources:
address:
socket_address:
address: 0.0.0.0
port_value: 10000
port_value: 9000
traffic_direction: INBOUND
filter_chains:
- filters:
@ -40,6 +40,10 @@ static_resources:
path: "/var/log/access_ingress.log"
route_config:
name: local_routes
request_headers_to_add:
- header:
key: "x-envoy-force-trace"
value: "true"
virtual_hosts:
- name: local_service
domains:
@ -60,7 +64,7 @@ static_resources:
address:
socket_address:
address: 0.0.0.0
port_value: 10001
port_value: 10000
traffic_direction: INBOUND
filter_chains:
- filters:
@ -93,6 +97,10 @@ static_resources:
path: "/var/log/access_ingress_prompt.log"
route_config:
name: local_routes
request_headers_to_add:
- header:
key: "x-envoy-force-trace"
value: "true"
virtual_hosts:
- name: local_service
domains:
@ -184,6 +192,10 @@ static_resources:
path: "/var/log/access_internal.log"
route_config:
name: local_routes
request_headers_to_add:
- header:
key: "x-envoy-force-trace"
value: "true"
virtual_hosts:
- name: local_service
domains:
@ -256,6 +268,10 @@ static_resources:
path: "/var/log/access_llm.log"
route_config:
name: local_routes
request_headers_to_add:
- header:
key: "x-envoy-force-trace"
value: "true"
virtual_hosts:
- name: local_service
domains:
@ -454,7 +470,7 @@ static_resources:
address:
socket_address:
address: 0.0.0.0
port_value: 10001
port_value: 10000
hostname: arch_prompt_gateway_listener
- name: arch_llm_listener

View file

@ -1,4 +1,4 @@
use std::{collections::HashMap, time::Duration};
use std::{collections::HashMap, fmt::Write, time::Duration};
use common::{
common_types::{
@ -8,16 +8,20 @@ use common::{
PromptGuardRequest, PromptGuardTask,
},
consts::{
ARCH_FC_MODEL_NAME, ARCH_INTERNAL_CLUSTER_NAME, ARCH_STATE_HEADER, ARCH_UPSTREAM_HOST_HEADER, ASSISTANT_ROLE, CHAT_COMPLETIONS_PATH, GUARD_INTERNAL_HOST, HEALTHZ_PATH, REQUEST_ID_HEADER, TOOL_ROLE, TRACE_PARENT_HEADER, USER_ROLE
ARCH_FC_MODEL_NAME, ARCH_INTERNAL_CLUSTER_NAME, ARCH_STATE_HEADER,
ARCH_UPSTREAM_HOST_HEADER, ASSISTANT_ROLE, CHAT_COMPLETIONS_PATH, GUARD_INTERNAL_HOST,
HEALTHZ_PATH, REQUEST_ID_HEADER, TOOL_ROLE, TRACE_PARENT_HEADER, USER_ROLE,
},
errors::ServerError,
http::{CallArgs, Client},
};
use http::StatusCode;
use log::{debug, trace, warn};
use log::{debug, info, trace, warn};
use proxy_wasm::{traits::HttpContext, types::Action};
use serde_json::Value;
use rand::{distributions::Alphanumeric, thread_rng, Rng};
use crate::stream_context::{ResponseHandlerType, StreamCallContext, StreamContext};
// HttpContext is the trait that allows the Rust code to interact with HTTP objects.
@ -51,13 +55,38 @@ impl HttpContext for StreamContext {
self.request_id = self.get_http_request_header(REQUEST_ID_HEADER);
self.traceparent = self.get_http_request_header(TRACE_PARENT_HEADER);
if self.traceparent.is_none() {
// let trace_id: String = generate_random_hex_string(16);
// self.set_http_request_header("x-client-trace-id", Some(trace_id.as_str()));
}
// let trace_id: String = generate_random_hex_string(16);
// let parent_id: String = generate_random_hex_string(8);
// // let's add a traceparent header if it's not present
// let trace_version = "00";
// //TODO: fix 00 if sampled, 01 if not sampled. Hard coded for now.
// let trace_flags = "01";
// let trace_id = format!(
// "{}-{}-{}-{}",
// trace_version, trace_id, parent_id, trace_flags
// );
// debug!("attaching traceparent header: {}", trace_id);
// self.traceparent = Some(trace_id.clone());
// self.set_http_request_header(TRACE_PARENT_HEADER, Some(trace_id.as_str()));
// }
self.set_http_request_header("x-envoy-force-trace", Some("true"));
Action::Continue
}
fn on_http_request_body(&mut self, body_size: usize, end_of_stream: bool) -> Action {
// Let the client send the gateway all the data before sending to the LLM_provider.
// TODO: consider a streaming API.
if !end_of_stream {
return Action::Pause;
}
@ -360,3 +389,15 @@ impl HttpContext for StreamContext {
Action::Continue
}
}
fn generate_random_hex_string(len: usize) -> String {
let mut rng = thread_rng();
let mut hex_string = String::with_capacity(len);
for _ in 0..len {
let byte = rng.gen::<u8>();
write!(&mut hex_string, "{:02x}", byte).unwrap();
}
hex_string
}

View file

@ -155,6 +155,7 @@ impl StreamContext {
("content-type", "application/json"),
("x-envoy-max-retries", "3"),
("x-envoy-upstream-rq-timeout-ms", "60000"),
("x-envoy-force-trace", "true"),
];
if self.request_id.is_some() {
@ -284,6 +285,7 @@ impl StreamContext {
("content-type", "application/json"),
("x-envoy-max-retries", "3"),
("x-envoy-upstream-rq-timeout-ms", "60000"),
("x-envoy-force-trace", "true"),
];
if self.request_id.is_some() {
@ -484,6 +486,7 @@ impl StreamContext {
("content-type", "application/json"),
("x-envoy-max-retries", "3"),
("x-envoy-upstream-rq-timeout-ms", timeout_str.as_str()),
("x-envoy-force-trace", "true"),
];
if self.request_id.is_some() {
@ -635,6 +638,7 @@ impl StreamContext {
("content-type", "application/json"),
("x-envoy-max-retries", "3"),
("x-envoy-upstream-rq-timeout-ms", timeout_str.as_str()),
("x-envoy-force-trace", "true"),
];
if self.request_id.is_some() {
@ -815,6 +819,7 @@ impl StreamContext {
("content-type", "application/json"),
("x-envoy-max-retries", "3"),
("x-envoy-upstream-rq-timeout-ms", "60000"),
("x-envoy-force-trace", "true"),
];
if self.request_id.is_some() {
@ -870,6 +875,7 @@ impl StreamContext {
(":authority", endpoint.name.as_str()),
("content-type", "application/json"),
("x-envoy-max-retries", "3"),
("x-envoy-force-trace", "true"),
];
if self.request_id.is_some() {