diff --git a/crates/llm_gateway/src/filter_context.rs b/crates/llm_gateway/src/filter_context.rs
index c518e244..23f30899 100644
--- a/crates/llm_gateway/src/filter_context.rs
+++ b/crates/llm_gateway/src/filter_context.rs
@@ -21,7 +21,7 @@ pub struct WasmMetrics {
     pub time_per_output_token: Histogram,
     pub latency: Histogram,
     pub output_sequence_length: Histogram,
-    // TODO: Add Input Sequence Length
+    pub input_sequence_length: Histogram,
 }
 
 impl WasmMetrics {
@@ -33,6 +33,7 @@ impl WasmMetrics {
             time_per_output_token: Histogram::new(String::from("time_per_output_token")),
             latency: Histogram::new(String::from("latency")),
             output_sequence_length: Histogram::new(String::from("output_sequence_length")),
+            input_sequence_length: Histogram::new(String::from("input_sequence_length")),
         }
     }
 }
diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs
index 298a9f68..8c3ce4c1 100644
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@@ -133,16 +133,39 @@ impl StreamContext {
         model: &str,
         json_string: &str,
     ) -> Result<(), ratelimit::Error> {
+        // Tokenize and record token count.
+        let token_count = tokenizer::token_count(model, json_string).unwrap_or(0);
+
+        // Record the token count to metrics.
+        self.metrics
+            .input_sequence_length
+            .record(token_count as u64);
+        log::debug!("Recorded input token count: {}", token_count);
+
+        // Check if rate limiting needs to be applied.
         if let Some(selector) = self.ratelimit_selector.take() {
-            // Tokenize and Ratelimit.
-            if let Ok(token_count) = tokenizer::token_count(model, json_string) {
-                ratelimit::ratelimits(None).read().unwrap().check_limit(
-                    model.to_owned(),
-                    selector,
-                    NonZero::new(token_count as u32).unwrap(),
-                )?;
+            log::debug!("Rate limiting applied for model: {}", model);
+            let result = ratelimit::ratelimits(None).read().unwrap().check_limit(
+                model.to_owned(),
+                selector,
+                NonZero::new(token_count as u32).unwrap(),
+            );
+
+            match result {
+                Ok(_) => log::debug!("Rate limit check passed for model: {}", model),
+                Err(e) => {
+                    log::debug!(
+                        "Rate limit check failed for model: {} with error: {:?}",
+                        model,
+                        e
+                    );
+                    return Err(e);
+                }
             }
+        } else {
+            log::debug!("No rate limit applied for model: {}", model);
         }
+
         Ok(())
     }
 }