diff --git a/crates/common/src/stats.rs b/crates/common/src/stats.rs
index 9479fadf..59604f0b 100644
--- a/crates/common/src/stats.rs
+++ b/crates/common/src/stats.rs
@@ -2,7 +2,6 @@ use log::error;
 use proxy_wasm::hostcalls;
 use proxy_wasm::types::*;
 
-#[allow(unused)]
 pub trait Metric {
     fn id(&self) -> u32;
     fn value(&self) -> Result<u64, String> {
@@ -14,7 +13,6 @@ pub trait Metric {
     }
 }
 
-#[allow(unused)]
 pub trait IncrementingMetric: Metric {
     fn increment(&self, offset: i64) {
         match hostcalls::increment_metric(self.id(), offset) {
@@ -24,7 +22,6 @@ pub trait IncrementingMetric: Metric {
     }
 }
 
-#[allow(unused)]
 pub trait RecordingMetric: Metric {
     fn record(&self, value: u64) {
         match hostcalls::record_metric(self.id(), value) {
@@ -39,7 +36,6 @@ pub struct Counter {
     id: u32,
 }
 
-#[allow(unused)]
 impl Counter {
     pub fn new(name: String) -> Counter {
         let returned_id = hostcalls::define_metric(MetricType::Counter, &name)
@@ -85,7 +81,6 @@ pub struct Histogram {
     id: u32,
 }
 
-#[allow(unused)]
 impl Histogram {
     pub fn new(name: String) -> Histogram {
         let returned_id = hostcalls::define_metric(MetricType::Histogram, &name)
diff --git a/crates/llm_gateway/src/filter_context.rs b/crates/llm_gateway/src/filter_context.rs
index b5b279de..16580063 100644
--- a/crates/llm_gateway/src/filter_context.rs
+++ b/crates/llm_gateway/src/filter_context.rs
@@ -1,3 +1,4 @@
+use crate::metrics::Metrics;
 use crate::stream_context::StreamContext;
 use common::configuration::Configuration;
 use common::consts::OTEL_COLLECTOR_HTTP;
@@ -6,9 +7,7 @@ use common::http::CallArgs;
 use common::http::Client;
 use common::llm_providers::LlmProviders;
 use common::ratelimit;
-use common::stats::Counter;
 use common::stats::Gauge;
-use common::stats::Histogram;
 use common::tracing::TraceData;
 use log::debug;
 use log::warn;
@@ -22,39 +21,12 @@ use std::time::Duration;
 
 use std::sync::{Arc, Mutex};
 
-#[derive(Copy, Clone, Debug)]
-pub struct WasmMetrics {
-    pub active_http_calls: Gauge,
-    pub ratelimited_rq: Counter,
-    pub time_to_first_token: Histogram,
-    pub time_per_output_token: Histogram,
-    pub tokens_per_second: Histogram,
-    pub request_latency: Histogram,
-    pub output_sequence_length: Histogram,
-    pub input_sequence_length: Histogram,
-}
-
-impl WasmMetrics {
-    fn new() -> WasmMetrics {
-        WasmMetrics {
-            active_http_calls: Gauge::new(String::from("active_http_calls")),
-            ratelimited_rq: Counter::new(String::from("ratelimited_rq")),
-            time_to_first_token: Histogram::new(String::from("time_to_first_token")),
-            time_per_output_token: Histogram::new(String::from("time_per_output_token")),
-            tokens_per_second: Histogram::new(String::from("tokens_per_second")),
-            request_latency: Histogram::new(String::from("request_latency")),
-            output_sequence_length: Histogram::new(String::from("output_sequence_length")),
-            input_sequence_length: Histogram::new(String::from("input_sequence_length")),
-        }
-    }
-}
-
 #[derive(Debug)]
 pub struct CallContext {}
 
 #[derive(Debug)]
 pub struct FilterContext {
-    metrics: Rc<WasmMetrics>,
+    metrics: Rc<Metrics>,
     // callouts stores token_id to request mapping that we use during #on_http_call_response to match the response to the request.
     callouts: RefCell<HashMap<u32, CallContext>>,
     llm_providers: Option<Rc<LlmProviders>>,
@@ -65,7 +37,7 @@ impl FilterContext {
     pub fn new() -> FilterContext {
         FilterContext {
             callouts: RefCell::new(HashMap::new()),
-            metrics: Rc::new(WasmMetrics::new()),
+            metrics: Rc::new(Metrics::new()),
             llm_providers: None,
             traces_queue: Arc::new(Mutex::new(VecDeque::new())),
         }
diff --git a/crates/llm_gateway/src/lib.rs b/crates/llm_gateway/src/lib.rs
index e2ad9025..f585ba0e 100644
--- a/crates/llm_gateway/src/lib.rs
+++ b/crates/llm_gateway/src/lib.rs
@@ -3,6 +3,7 @@ use proxy_wasm::traits::*;
 use proxy_wasm::types::*;
 
 mod filter_context;
+mod metrics;
 mod stream_context;
 
 proxy_wasm::main! {{
diff --git a/crates/llm_gateway/src/metrics.rs b/crates/llm_gateway/src/metrics.rs
new file mode 100644
index 00000000..58eb6a86
--- /dev/null
+++ b/crates/llm_gateway/src/metrics.rs
@@ -0,0 +1,28 @@
+use common::stats::{Counter, Gauge, Histogram};
+
+#[derive(Copy, Clone, Debug)]
+pub struct Metrics {
+    pub active_http_calls: Gauge,
+    pub ratelimited_rq: Counter,
+    pub time_to_first_token: Histogram,
+    pub time_per_output_token: Histogram,
+    pub tokens_per_second: Histogram,
+    pub request_latency: Histogram,
+    pub output_sequence_length: Histogram,
+    pub input_sequence_length: Histogram,
+}
+
+impl Metrics {
+    pub fn new() -> Metrics {
+        Metrics {
+            active_http_calls: Gauge::new(String::from("active_http_calls")),
+            ratelimited_rq: Counter::new(String::from("ratelimited_rq")),
+            time_to_first_token: Histogram::new(String::from("time_to_first_token")),
+            time_per_output_token: Histogram::new(String::from("time_per_output_token")),
+            tokens_per_second: Histogram::new(String::from("tokens_per_second")),
+            request_latency: Histogram::new(String::from("request_latency")),
+            output_sequence_length: Histogram::new(String::from("output_sequence_length")),
+            input_sequence_length: Histogram::new(String::from("input_sequence_length")),
+        }
+    }
+}
diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs
index 5d8669a2..f0b26ffe 100644
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@@ -1,4 +1,4 @@
-use crate::filter_context::WasmMetrics;
+use crate::metrics::Metrics;
 use common::api::open_ai::{
     ChatCompletionStreamResponseServerEvents, ChatCompletionsRequest, ChatCompletionsResponse,
     Message, StreamOptions,
@@ -12,25 +12,23 @@ use common::errors::ServerError;
 use common::llm_providers::LlmProviders;
 use common::pii::obfuscate_auth_header;
 use common::ratelimit::Header;
+use common::stats::{IncrementingMetric, RecordingMetric};
 use common::tracing::{Event, Span, TraceData, Traceparent};
 use common::{ratelimit, routing, tokenizer};
 use http::StatusCode;
 use log::{debug, trace, warn};
+use proxy_wasm::hostcalls::get_current_time;
 use proxy_wasm::traits::*;
 use proxy_wasm::types::*;
 use std::collections::VecDeque;
 use std::num::NonZero;
 use std::rc::Rc;
 use std::sync::{Arc, Mutex};
-
-use common::stats::{IncrementingMetric, RecordingMetric};
-
-use proxy_wasm::hostcalls::get_current_time;
 use std::time::{Duration, SystemTime, UNIX_EPOCH};
 
 pub struct StreamContext {
     context_id: u32,
-    metrics: Rc<WasmMetrics>,
+    metrics: Rc<Metrics>,
     ratelimit_selector: Option<Header>,
     streaming_response: bool,
     response_tokens: usize,
@@ -50,7 +48,7 @@ pub struct StreamContext {
 impl StreamContext {
     pub fn new(
         context_id: u32,
-        metrics: Rc<WasmMetrics>,
+        metrics: Rc<Metrics>,
         llm_providers: Rc<LlmProviders>,
         traces_queue: Arc<Mutex<VecDeque<TraceData>>>,
     ) -> Self {
diff --git a/crates/prompt_gateway/src/filter_context.rs b/crates/prompt_gateway/src/filter_context.rs
index 966765fe..449be126 100644
--- a/crates/prompt_gateway/src/filter_context.rs
+++ b/crates/prompt_gateway/src/filter_context.rs
@@ -1,4 +1,5 @@
 use crate::embeddings::EmbeddingType;
+use crate::metrics::Metrics;
 use crate::stream_context::StreamContext;
 use common::configuration::{Configuration, Overrides, PromptGuards, PromptTarget, Tracing};
 use common::consts::ARCH_UPSTREAM_HOST_HEADER;
@@ -21,19 +22,6 @@ use std::collections::HashMap;
 use std::rc::Rc;
 use std::time::Duration;
 
-#[derive(Copy, Clone, Debug)]
-pub struct WasmMetrics {
-    pub active_http_calls: Gauge,
-}
-
-impl WasmMetrics {
-    fn new() -> WasmMetrics {
-        WasmMetrics {
-            active_http_calls: Gauge::new(String::from("active_http_calls")),
-        }
-    }
-}
-
 pub type EmbeddingTypeMap = HashMap<EmbeddingType, Vec<f64>>;
 pub type EmbeddingsStore = HashMap<String, EmbeddingTypeMap>;
 
@@ -45,7 +33,7 @@ pub struct FilterCallContext {
 
 #[derive(Debug)]
 pub struct FilterContext {
-    metrics: Rc<WasmMetrics>,
+    metrics: Rc<Metrics>,
     // callouts stores token_id to request mapping that we use during #on_http_call_response to match the response to the request.
     callouts: RefCell<HashMap<u32, FilterCallContext>>,
     overrides: Rc<Option<Overrides>>,
@@ -62,7 +50,7 @@ impl FilterContext {
     pub fn new() -> FilterContext {
         FilterContext {
             callouts: RefCell::new(HashMap::new()),
-            metrics: Rc::new(WasmMetrics::new()),
+            metrics: Rc::new(Metrics::new()),
             system_prompt: Rc::new(None),
             prompt_targets: Rc::new(HashMap::new()),
             overrides: Rc::new(None),
diff --git a/crates/prompt_gateway/src/lib.rs b/crates/prompt_gateway/src/lib.rs
index 8fe85f75..9d828dac 100644
--- a/crates/prompt_gateway/src/lib.rs
+++ b/crates/prompt_gateway/src/lib.rs
@@ -6,6 +6,7 @@ mod context;
 mod embeddings;
 mod filter_context;
 mod http_context;
+mod metrics;
 mod stream_context;
 
 proxy_wasm::main! {{
diff --git a/crates/prompt_gateway/src/metrics.rs b/crates/prompt_gateway/src/metrics.rs
new file mode 100644
index 00000000..ff891636
--- /dev/null
+++ b/crates/prompt_gateway/src/metrics.rs
@@ -0,0 +1,14 @@
+use common::stats::Gauge;
+
+#[derive(Copy, Clone, Debug)]
+pub struct Metrics {
+    pub active_http_calls: Gauge,
+}
+
+impl Metrics {
+    pub fn new() -> Metrics {
+        Metrics {
+            active_http_calls: Gauge::new(String::from("active_http_calls")),
+        }
+    }
+}
diff --git a/crates/prompt_gateway/src/stream_context.rs b/crates/prompt_gateway/src/stream_context.rs
index 3a4d2733..8671af63 100644
--- a/crates/prompt_gateway/src/stream_context.rs
+++ b/crates/prompt_gateway/src/stream_context.rs
@@ -1,5 +1,6 @@
 use crate::embeddings::EmbeddingType;
-use crate::filter_context::{EmbeddingsStore, WasmMetrics};
+use crate::filter_context::EmbeddingsStore;
+use crate::metrics::Metrics;
 use acap::cos;
 use common::api::hallucination::{
     extract_messages_for_hallucination, HallucinationClassificationRequest,
@@ -67,7 +68,7 @@ pub struct StreamContext {
     pub prompt_targets: Rc<HashMap<String, PromptTarget>>,
     pub embeddings_store: Option<Rc<EmbeddingsStore>>,
     overrides: Rc<Option<Overrides>>,
-    pub metrics: Rc<WasmMetrics>,
+    pub metrics: Rc<Metrics>,
     pub callouts: RefCell<HashMap<u32, StreamCallContext>>,
     pub context_id: u32,
     pub tool_calls: Option<Vec<ToolCall>>,
@@ -90,7 +91,7 @@ impl StreamContext {
     #[allow(clippy::too_many_arguments)]
     pub fn new(
         context_id: u32,
-        metrics: Rc<WasmMetrics>,
+        metrics: Rc<Metrics>,
         system_prompt: Rc<Option<String>>,
         prompt_targets: Rc<HashMap<String, PromptTarget>>,
         prompt_guards: Rc<PromptGuards>,