diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml
index 52671f99..39735bc4 100644
--- a/arch/envoy.template.yaml
+++ b/arch/envoy.template.yaml
@@ -321,6 +321,23 @@ static_resources:
                       service_name: llm_gateway
                   random_sampling:
                     value: {{ arch_tracing.random_sampling }}
+                  custom_tags:
+                    - tag: user_prompt
+                      metadata:
+                        kind:
+                          request: {}
+                        metadata_key:
+                          key: llm_filter
+                          path:
+                            - key: user_prompt
+                    - tag: time_to_first_token
+                      metadata:
+                        kind:
+                          request: {}
+                        metadata_key:
+                          key: llm_filter
+                          path:
+                            - key: time_to_first_token
                 {% endif %}
                 stat_prefix: arch_listener_http
                 codec_type: AUTO
@@ -372,6 +389,28 @@ static_resources:
                           "@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip
                           memory_level: 3
                           window_bits: 10
+                  - name: envoy.filters.http.lua
+                    typed_config:
+                      '@type': type.googleapis.com/envoy.extensions.filters.http.lua.v3.Lua
+                      default_source_code:
+                        inline_string: |
+                          function envoy_on_response(response_handle)
+
+                            local user_message = response_handle:headers():get("x-user-message")
+                            if user_message then
+                              response_handle:logInfo("setting x-user-message")
+                              response_handle:streamInfo():dynamicMetadata():set("llm_filter", "user_prompt", user_message)
+                              response_handle:headers():remove("x-user-message")
+                            end
+
+                            local time_to_first_token = response_handle:headers():get("x-time-to-first-token")
+                            if time_to_first_token then
+                              response_handle:logInfo("setting x-time-to-first-token")
+                              response_handle:streamInfo():dynamicMetadata():set("llm_filter", "time_to_first_token", time_to_first_token)
+                              response_handle:headers():remove("x-time-to-first-token")
+                            end
+
+                          end
                   - name: envoy.filters.http.wasm
                     typed_config:
                       "@type": type.googleapis.com/udpa.type.v1.TypedStruct
diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs
index 7e35e7f2..cdfdbeb2 100644
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@@ -288,6 +288,31 @@ impl HttpContext for StreamContext {
         Action::Continue
     }
 
+    fn on_http_response_headers(&mut self, _num_headers: usize, _end_of_stream: bool) -> Action {
+        debug!(
+            "on_http_response_headers [S={}] end_stream={}",
+            self.context_id, _end_of_stream
+        );
+
+        if let Some(user_message) = self.user_message.as_ref() {
+            if let Some(prompt) = user_message.content.as_ref() {
+                debug!("setting user-message header: {}", prompt);
+                self.set_http_response_header("x-user-message", Some(&prompt));
+            }
+        }
+
+        let tftt_time_ms = get_current_time()
+            .unwrap()
+            .duration_since(self.start_time.unwrap())
+            .unwrap()
+            .as_millis();
+
+        let tftt_time = tftt_time_ms.to_string();
+        self.set_http_response_header("x-time-to-first-token", Some(&tftt_time));
+
+        Action::Continue
+    }
+
     fn on_http_response_body(&mut self, body_size: usize, end_of_stream: bool) -> Action {
         debug!(
             "on_http_response_body [S={}] bytes={} end_stream={}",
@@ -364,6 +389,7 @@ impl HttpContext for StreamContext {
                     }
                 }
                 llm_span.add_attribute("model".to_string(), self.llm_provider().name.to_string());
+
                 llm_span.add_event(Event::new(
                     "time_to_first_token".to_string(),
                     self.ttft_time