Break apart metrics into their own module (#335)

Signed-off-by: José Ulises Niño Rivera <junr03@users.noreply.github.com>
This commit is contained in:
José Ulises Niño Rivera 2024-12-09 10:46:46 -08:00 committed by GitHub
parent d002b2042a
commit cd1b561192
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 59 additions and 61 deletions

View file

@ -2,7 +2,6 @@ use log::error;
use proxy_wasm::hostcalls;
use proxy_wasm::types::*;
#[allow(unused)]
pub trait Metric {
fn id(&self) -> u32;
fn value(&self) -> Result<u64, String> {
@ -14,7 +13,6 @@ pub trait Metric {
}
}
#[allow(unused)]
pub trait IncrementingMetric: Metric {
fn increment(&self, offset: i64) {
match hostcalls::increment_metric(self.id(), offset) {
@ -24,7 +22,6 @@ pub trait IncrementingMetric: Metric {
}
}
#[allow(unused)]
pub trait RecordingMetric: Metric {
fn record(&self, value: u64) {
match hostcalls::record_metric(self.id(), value) {
@ -39,7 +36,6 @@ pub struct Counter {
id: u32,
}
#[allow(unused)]
impl Counter {
pub fn new(name: String) -> Counter {
let returned_id = hostcalls::define_metric(MetricType::Counter, &name)
@ -85,7 +81,6 @@ pub struct Histogram {
id: u32,
}
#[allow(unused)]
impl Histogram {
pub fn new(name: String) -> Histogram {
let returned_id = hostcalls::define_metric(MetricType::Histogram, &name)

View file

@ -1,3 +1,4 @@
use crate::metrics::Metrics;
use crate::stream_context::StreamContext;
use common::configuration::Configuration;
use common::consts::OTEL_COLLECTOR_HTTP;
@ -6,9 +7,7 @@ use common::http::CallArgs;
use common::http::Client;
use common::llm_providers::LlmProviders;
use common::ratelimit;
use common::stats::Counter;
use common::stats::Gauge;
use common::stats::Histogram;
use common::tracing::TraceData;
use log::debug;
use log::warn;
@ -22,39 +21,12 @@ use std::time::Duration;
use std::sync::{Arc, Mutex};
#[derive(Copy, Clone, Debug)]
pub struct WasmMetrics {
pub active_http_calls: Gauge,
pub ratelimited_rq: Counter,
pub time_to_first_token: Histogram,
pub time_per_output_token: Histogram,
pub tokens_per_second: Histogram,
pub request_latency: Histogram,
pub output_sequence_length: Histogram,
pub input_sequence_length: Histogram,
}
impl WasmMetrics {
fn new() -> WasmMetrics {
WasmMetrics {
active_http_calls: Gauge::new(String::from("active_http_calls")),
ratelimited_rq: Counter::new(String::from("ratelimited_rq")),
time_to_first_token: Histogram::new(String::from("time_to_first_token")),
time_per_output_token: Histogram::new(String::from("time_per_output_token")),
tokens_per_second: Histogram::new(String::from("tokens_per_second")),
request_latency: Histogram::new(String::from("request_latency")),
output_sequence_length: Histogram::new(String::from("output_sequence_length")),
input_sequence_length: Histogram::new(String::from("input_sequence_length")),
}
}
}
#[derive(Debug)]
pub struct CallContext {}
#[derive(Debug)]
pub struct FilterContext {
metrics: Rc<WasmMetrics>,
metrics: Rc<Metrics>,
// callouts stores token_id to request mapping that we use during #on_http_call_response to match the response to the request.
callouts: RefCell<HashMap<u32, CallContext>>,
llm_providers: Option<Rc<LlmProviders>>,
@ -65,7 +37,7 @@ impl FilterContext {
pub fn new() -> FilterContext {
FilterContext {
callouts: RefCell::new(HashMap::new()),
metrics: Rc::new(WasmMetrics::new()),
metrics: Rc::new(Metrics::new()),
llm_providers: None,
traces_queue: Arc::new(Mutex::new(VecDeque::new())),
}

View file

@ -3,6 +3,7 @@ use proxy_wasm::traits::*;
use proxy_wasm::types::*;
mod filter_context;
mod metrics;
mod stream_context;
proxy_wasm::main! {{

View file

@ -0,0 +1,28 @@
use common::stats::{Counter, Gauge, Histogram};
#[derive(Copy, Clone, Debug)]
pub struct Metrics {
pub active_http_calls: Gauge,
pub ratelimited_rq: Counter,
pub time_to_first_token: Histogram,
pub time_per_output_token: Histogram,
pub tokens_per_second: Histogram,
pub request_latency: Histogram,
pub output_sequence_length: Histogram,
pub input_sequence_length: Histogram,
}
impl Metrics {
pub fn new() -> Metrics {
Metrics {
active_http_calls: Gauge::new(String::from("active_http_calls")),
ratelimited_rq: Counter::new(String::from("ratelimited_rq")),
time_to_first_token: Histogram::new(String::from("time_to_first_token")),
time_per_output_token: Histogram::new(String::from("time_per_output_token")),
tokens_per_second: Histogram::new(String::from("tokens_per_second")),
request_latency: Histogram::new(String::from("request_latency")),
output_sequence_length: Histogram::new(String::from("output_sequence_length")),
input_sequence_length: Histogram::new(String::from("input_sequence_length")),
}
}
}

View file

@ -1,4 +1,4 @@
use crate::filter_context::WasmMetrics;
use crate::metrics::Metrics;
use common::api::open_ai::{
ChatCompletionStreamResponseServerEvents, ChatCompletionsRequest, ChatCompletionsResponse,
Message, StreamOptions,
@ -12,25 +12,23 @@ use common::errors::ServerError;
use common::llm_providers::LlmProviders;
use common::pii::obfuscate_auth_header;
use common::ratelimit::Header;
use common::stats::{IncrementingMetric, RecordingMetric};
use common::tracing::{Event, Span, TraceData, Traceparent};
use common::{ratelimit, routing, tokenizer};
use http::StatusCode;
use log::{debug, trace, warn};
use proxy_wasm::hostcalls::get_current_time;
use proxy_wasm::traits::*;
use proxy_wasm::types::*;
use std::collections::VecDeque;
use std::num::NonZero;
use std::rc::Rc;
use std::sync::{Arc, Mutex};
use common::stats::{IncrementingMetric, RecordingMetric};
use proxy_wasm::hostcalls::get_current_time;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
pub struct StreamContext {
context_id: u32,
metrics: Rc<WasmMetrics>,
metrics: Rc<Metrics>,
ratelimit_selector: Option<Header>,
streaming_response: bool,
response_tokens: usize,
@ -50,7 +48,7 @@ pub struct StreamContext {
impl StreamContext {
pub fn new(
context_id: u32,
metrics: Rc<WasmMetrics>,
metrics: Rc<Metrics>,
llm_providers: Rc<LlmProviders>,
traces_queue: Arc<Mutex<VecDeque<TraceData>>>,
) -> Self {

View file

@ -1,4 +1,5 @@
use crate::embeddings::EmbeddingType;
use crate::metrics::Metrics;
use crate::stream_context::StreamContext;
use common::configuration::{Configuration, Overrides, PromptGuards, PromptTarget, Tracing};
use common::consts::ARCH_UPSTREAM_HOST_HEADER;
@ -21,19 +22,6 @@ use std::collections::HashMap;
use std::rc::Rc;
use std::time::Duration;
#[derive(Copy, Clone, Debug)]
pub struct WasmMetrics {
pub active_http_calls: Gauge,
}
impl WasmMetrics {
fn new() -> WasmMetrics {
WasmMetrics {
active_http_calls: Gauge::new(String::from("active_http_calls")),
}
}
}
pub type EmbeddingTypeMap = HashMap<EmbeddingType, Vec<f64>>;
pub type EmbeddingsStore = HashMap<String, EmbeddingTypeMap>;
@ -45,7 +33,7 @@ pub struct FilterCallContext {
#[derive(Debug)]
pub struct FilterContext {
metrics: Rc<WasmMetrics>,
metrics: Rc<Metrics>,
// callouts stores token_id to request mapping that we use during #on_http_call_response to match the response to the request.
callouts: RefCell<HashMap<u32, FilterCallContext>>,
overrides: Rc<Option<Overrides>>,
@ -62,7 +50,7 @@ impl FilterContext {
pub fn new() -> FilterContext {
FilterContext {
callouts: RefCell::new(HashMap::new()),
metrics: Rc::new(WasmMetrics::new()),
metrics: Rc::new(Metrics::new()),
system_prompt: Rc::new(None),
prompt_targets: Rc::new(HashMap::new()),
overrides: Rc::new(None),

View file

@ -6,6 +6,7 @@ mod context;
mod embeddings;
mod filter_context;
mod http_context;
mod metrics;
mod stream_context;
proxy_wasm::main! {{

View file

@ -0,0 +1,14 @@
use common::stats::Gauge;
#[derive(Copy, Clone, Debug)]
pub struct Metrics {
pub active_http_calls: Gauge,
}
impl Metrics {
pub fn new() -> Metrics {
Metrics {
active_http_calls: Gauge::new(String::from("active_http_calls")),
}
}
}

View file

@ -1,5 +1,6 @@
use crate::embeddings::EmbeddingType;
use crate::filter_context::{EmbeddingsStore, WasmMetrics};
use crate::filter_context::EmbeddingsStore;
use crate::metrics::Metrics;
use acap::cos;
use common::api::hallucination::{
extract_messages_for_hallucination, HallucinationClassificationRequest,
@ -67,7 +68,7 @@ pub struct StreamContext {
pub prompt_targets: Rc<HashMap<String, PromptTarget>>,
pub embeddings_store: Option<Rc<EmbeddingsStore>>,
overrides: Rc<Option<Overrides>>,
pub metrics: Rc<WasmMetrics>,
pub metrics: Rc<Metrics>,
pub callouts: RefCell<HashMap<u32, StreamCallContext>>,
pub context_id: u32,
pub tool_calls: Option<Vec<ToolCall>>,
@ -90,7 +91,7 @@ impl StreamContext {
#[allow(clippy::too_many_arguments)]
pub fn new(
context_id: u32,
metrics: Rc<WasmMetrics>,
metrics: Rc<Metrics>,
system_prompt: Rc<Option<String>>,
prompt_targets: Rc<HashMap<String, PromptTarget>>,
prompt_guards: Rc<PromptGuards>,