use custom tags to store user_message

This commit is contained in:
Adil Hafeez 2024-11-15 10:44:27 -08:00
parent d3c17c7abd
commit 1508743eeb
2 changed files with 65 additions and 0 deletions

View file

@ -321,6 +321,23 @@ static_resources:
service_name: llm_gateway
random_sampling:
value: {{ arch_tracing.random_sampling }}
custom_tags:
- tag: user_prompt
metadata:
kind:
request: {}
metadata_key:
key: llm_filter
path:
- key: user_prompt
- tag: time_to_first_token
metadata:
kind:
request: {}
metadata_key:
key: llm_filter
path:
- key: time_to_first_token
{% endif %}
stat_prefix: arch_listener_http
codec_type: AUTO
@ -372,6 +389,28 @@ static_resources:
"@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip
memory_level: 3
window_bits: 10
- name: envoy.filters.http.lua
typed_config:
'@type': type.googleapis.com/envoy.extensions.filters.http.lua.v3.Lua
default_source_code:
inline_string: |
function envoy_on_response(response_handle)
local user_message = response_handle:headers():get("x-user-message")
if user_message then
response_handle:logInfo("setting x-user-message")
response_handle:streamInfo():dynamicMetadata():set("llm_filter", "user_prompt", user_message)
response_handle:headers():remove("x-user-message")
end
local time_to_first_token = response_handle:headers():get("x-time-to-first-token")
if time_to_first_token then
response_handle:logInfo("setting x-time-to-first-token")
response_handle:streamInfo():dynamicMetadata():set("llm_filter", "time_to_first_token", time_to_first_token)
response_handle:headers():remove("x-time-to-first-token")
end
end
- name: envoy.filters.http.wasm
typed_config:
"@type": type.googleapis.com/udpa.type.v1.TypedStruct

View file

@ -288,6 +288,31 @@ impl HttpContext for StreamContext {
Action::Continue
}
fn on_http_response_headers(&mut self, _num_headers: usize, _end_of_stream: bool) -> Action {
debug!(
"on_http_response_headers [S={}] end_stream={}",
self.context_id, _end_of_stream
);
if let Some(user_message) = self.user_message.as_ref() {
if let Some(prompt) = user_message.content.as_ref() {
debug!("setting user-message header: {}", prompt);
self.set_http_response_header("x-user-message", Some(&prompt));
}
}
let tftt_time_ms = get_current_time()
.unwrap()
.duration_since(self.start_time.unwrap())
.unwrap()
.as_millis();
let tftt_time = tftt_time_ms.to_string();
self.set_http_response_header("x-time-to-first-token", Some(&tftt_time));
Action::Continue
}
fn on_http_response_body(&mut self, body_size: usize, end_of_stream: bool) -> Action {
debug!(
"on_http_response_body [S={}] bytes={} end_stream={}",
@ -364,6 +389,7 @@ impl HttpContext for StreamContext {
}
}
llm_span.add_attribute("model".to_string(), self.llm_provider().name.to_string());
llm_span.add_event(Event::new(
"time_to_first_token".to_string(),
self.ttft_time