mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
use custom tags to store user_message
This commit is contained in:
parent
d3c17c7abd
commit
1508743eeb
2 changed files with 65 additions and 0 deletions
|
|
@ -321,6 +321,23 @@ static_resources:
|
|||
service_name: llm_gateway
|
||||
random_sampling:
|
||||
value: {{ arch_tracing.random_sampling }}
|
||||
custom_tags:
|
||||
- tag: user_prompt
|
||||
metadata:
|
||||
kind:
|
||||
request: {}
|
||||
metadata_key:
|
||||
key: llm_filter
|
||||
path:
|
||||
- key: user_prompt
|
||||
- tag: time_to_first_token
|
||||
metadata:
|
||||
kind:
|
||||
request: {}
|
||||
metadata_key:
|
||||
key: llm_filter
|
||||
path:
|
||||
- key: time_to_first_token
|
||||
{% endif %}
|
||||
stat_prefix: arch_listener_http
|
||||
codec_type: AUTO
|
||||
|
|
@ -372,6 +389,28 @@ static_resources:
|
|||
"@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip
|
||||
memory_level: 3
|
||||
window_bits: 10
|
||||
- name: envoy.filters.http.lua
|
||||
typed_config:
|
||||
'@type': type.googleapis.com/envoy.extensions.filters.http.lua.v3.Lua
|
||||
default_source_code:
|
||||
inline_string: |
|
||||
function envoy_on_response(response_handle)
|
||||
|
||||
local user_message = response_handle:headers():get("x-user-message")
|
||||
if user_message then
|
||||
response_handle:logInfo("setting x-user-message")
|
||||
response_handle:streamInfo():dynamicMetadata():set("llm_filter", "user_prompt", user_message)
|
||||
response_handle:headers():remove("x-user-message")
|
||||
end
|
||||
|
||||
local time_to_first_token = response_handle:headers():get("x-time-to-first-token")
|
||||
if time_to_first_token then
|
||||
response_handle:logInfo("setting x-time-to-first-token")
|
||||
response_handle:streamInfo():dynamicMetadata():set("llm_filter", "time_to_first_token", time_to_first_token)
|
||||
response_handle:headers():remove("x-time-to-first-token")
|
||||
end
|
||||
|
||||
end
|
||||
- name: envoy.filters.http.wasm
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/udpa.type.v1.TypedStruct
|
||||
|
|
|
|||
|
|
@ -288,6 +288,31 @@ impl HttpContext for StreamContext {
|
|||
Action::Continue
|
||||
}
|
||||
|
||||
fn on_http_response_headers(&mut self, _num_headers: usize, _end_of_stream: bool) -> Action {
|
||||
debug!(
|
||||
"on_http_response_headers [S={}] end_stream={}",
|
||||
self.context_id, _end_of_stream
|
||||
);
|
||||
|
||||
if let Some(user_message) = self.user_message.as_ref() {
|
||||
if let Some(prompt) = user_message.content.as_ref() {
|
||||
debug!("setting user-message header: {}", prompt);
|
||||
self.set_http_response_header("x-user-message", Some(&prompt));
|
||||
}
|
||||
}
|
||||
|
||||
let tftt_time_ms = get_current_time()
|
||||
.unwrap()
|
||||
.duration_since(self.start_time.unwrap())
|
||||
.unwrap()
|
||||
.as_millis();
|
||||
|
||||
let tftt_time = tftt_time_ms.to_string();
|
||||
self.set_http_response_header("x-time-to-first-token", Some(&tftt_time));
|
||||
|
||||
Action::Continue
|
||||
}
|
||||
|
||||
fn on_http_response_body(&mut self, body_size: usize, end_of_stream: bool) -> Action {
|
||||
debug!(
|
||||
"on_http_response_body [S={}] bytes={} end_stream={}",
|
||||
|
|
@ -364,6 +389,7 @@ impl HttpContext for StreamContext {
|
|||
}
|
||||
}
|
||||
llm_span.add_attribute("model".to_string(), self.llm_provider().name.to_string());
|
||||
|
||||
llm_span.add_event(Event::new(
|
||||
"time_to_first_token".to_string(),
|
||||
self.ttft_time
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue