mirror of
https://github.com/katanemo/plano.git
synced 2026-06-08 14:55:14 +02:00
Break apart metrics into their own module (#335)
Signed-off-by: José Ulises Niño Rivera <junr03@users.noreply.github.com>
This commit is contained in:
parent
d002b2042a
commit
cd1b561192
9 changed files with 59 additions and 61 deletions
|
|
@ -2,7 +2,6 @@ use log::error;
|
|||
use proxy_wasm::hostcalls;
|
||||
use proxy_wasm::types::*;
|
||||
|
||||
#[allow(unused)]
|
||||
pub trait Metric {
|
||||
fn id(&self) -> u32;
|
||||
fn value(&self) -> Result<u64, String> {
|
||||
|
|
@ -14,7 +13,6 @@ pub trait Metric {
|
|||
}
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
pub trait IncrementingMetric: Metric {
|
||||
fn increment(&self, offset: i64) {
|
||||
match hostcalls::increment_metric(self.id(), offset) {
|
||||
|
|
@ -24,7 +22,6 @@ pub trait IncrementingMetric: Metric {
|
|||
}
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
pub trait RecordingMetric: Metric {
|
||||
fn record(&self, value: u64) {
|
||||
match hostcalls::record_metric(self.id(), value) {
|
||||
|
|
@ -39,7 +36,6 @@ pub struct Counter {
|
|||
id: u32,
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
impl Counter {
|
||||
pub fn new(name: String) -> Counter {
|
||||
let returned_id = hostcalls::define_metric(MetricType::Counter, &name)
|
||||
|
|
@ -85,7 +81,6 @@ pub struct Histogram {
|
|||
id: u32,
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
impl Histogram {
|
||||
pub fn new(name: String) -> Histogram {
|
||||
let returned_id = hostcalls::define_metric(MetricType::Histogram, &name)
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
use crate::metrics::Metrics;
|
||||
use crate::stream_context::StreamContext;
|
||||
use common::configuration::Configuration;
|
||||
use common::consts::OTEL_COLLECTOR_HTTP;
|
||||
|
|
@ -6,9 +7,7 @@ use common::http::CallArgs;
|
|||
use common::http::Client;
|
||||
use common::llm_providers::LlmProviders;
|
||||
use common::ratelimit;
|
||||
use common::stats::Counter;
|
||||
use common::stats::Gauge;
|
||||
use common::stats::Histogram;
|
||||
use common::tracing::TraceData;
|
||||
use log::debug;
|
||||
use log::warn;
|
||||
|
|
@ -22,39 +21,12 @@ use std::time::Duration;
|
|||
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub struct WasmMetrics {
|
||||
pub active_http_calls: Gauge,
|
||||
pub ratelimited_rq: Counter,
|
||||
pub time_to_first_token: Histogram,
|
||||
pub time_per_output_token: Histogram,
|
||||
pub tokens_per_second: Histogram,
|
||||
pub request_latency: Histogram,
|
||||
pub output_sequence_length: Histogram,
|
||||
pub input_sequence_length: Histogram,
|
||||
}
|
||||
|
||||
impl WasmMetrics {
|
||||
fn new() -> WasmMetrics {
|
||||
WasmMetrics {
|
||||
active_http_calls: Gauge::new(String::from("active_http_calls")),
|
||||
ratelimited_rq: Counter::new(String::from("ratelimited_rq")),
|
||||
time_to_first_token: Histogram::new(String::from("time_to_first_token")),
|
||||
time_per_output_token: Histogram::new(String::from("time_per_output_token")),
|
||||
tokens_per_second: Histogram::new(String::from("tokens_per_second")),
|
||||
request_latency: Histogram::new(String::from("request_latency")),
|
||||
output_sequence_length: Histogram::new(String::from("output_sequence_length")),
|
||||
input_sequence_length: Histogram::new(String::from("input_sequence_length")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct CallContext {}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct FilterContext {
|
||||
metrics: Rc<WasmMetrics>,
|
||||
metrics: Rc<Metrics>,
|
||||
// callouts stores token_id to request mapping that we use during #on_http_call_response to match the response to the request.
|
||||
callouts: RefCell<HashMap<u32, CallContext>>,
|
||||
llm_providers: Option<Rc<LlmProviders>>,
|
||||
|
|
@ -65,7 +37,7 @@ impl FilterContext {
|
|||
pub fn new() -> FilterContext {
|
||||
FilterContext {
|
||||
callouts: RefCell::new(HashMap::new()),
|
||||
metrics: Rc::new(WasmMetrics::new()),
|
||||
metrics: Rc::new(Metrics::new()),
|
||||
llm_providers: None,
|
||||
traces_queue: Arc::new(Mutex::new(VecDeque::new())),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ use proxy_wasm::traits::*;
|
|||
use proxy_wasm::types::*;
|
||||
|
||||
mod filter_context;
|
||||
mod metrics;
|
||||
mod stream_context;
|
||||
|
||||
proxy_wasm::main! {{
|
||||
|
|
|
|||
28
crates/llm_gateway/src/metrics.rs
Normal file
28
crates/llm_gateway/src/metrics.rs
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
use common::stats::{Counter, Gauge, Histogram};
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub struct Metrics {
|
||||
pub active_http_calls: Gauge,
|
||||
pub ratelimited_rq: Counter,
|
||||
pub time_to_first_token: Histogram,
|
||||
pub time_per_output_token: Histogram,
|
||||
pub tokens_per_second: Histogram,
|
||||
pub request_latency: Histogram,
|
||||
pub output_sequence_length: Histogram,
|
||||
pub input_sequence_length: Histogram,
|
||||
}
|
||||
|
||||
impl Metrics {
|
||||
pub fn new() -> Metrics {
|
||||
Metrics {
|
||||
active_http_calls: Gauge::new(String::from("active_http_calls")),
|
||||
ratelimited_rq: Counter::new(String::from("ratelimited_rq")),
|
||||
time_to_first_token: Histogram::new(String::from("time_to_first_token")),
|
||||
time_per_output_token: Histogram::new(String::from("time_per_output_token")),
|
||||
tokens_per_second: Histogram::new(String::from("tokens_per_second")),
|
||||
request_latency: Histogram::new(String::from("request_latency")),
|
||||
output_sequence_length: Histogram::new(String::from("output_sequence_length")),
|
||||
input_sequence_length: Histogram::new(String::from("input_sequence_length")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
use crate::filter_context::WasmMetrics;
|
||||
use crate::metrics::Metrics;
|
||||
use common::api::open_ai::{
|
||||
ChatCompletionStreamResponseServerEvents, ChatCompletionsRequest, ChatCompletionsResponse,
|
||||
Message, StreamOptions,
|
||||
|
|
@ -12,25 +12,23 @@ use common::errors::ServerError;
|
|||
use common::llm_providers::LlmProviders;
|
||||
use common::pii::obfuscate_auth_header;
|
||||
use common::ratelimit::Header;
|
||||
use common::stats::{IncrementingMetric, RecordingMetric};
|
||||
use common::tracing::{Event, Span, TraceData, Traceparent};
|
||||
use common::{ratelimit, routing, tokenizer};
|
||||
use http::StatusCode;
|
||||
use log::{debug, trace, warn};
|
||||
use proxy_wasm::hostcalls::get_current_time;
|
||||
use proxy_wasm::traits::*;
|
||||
use proxy_wasm::types::*;
|
||||
use std::collections::VecDeque;
|
||||
use std::num::NonZero;
|
||||
use std::rc::Rc;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use common::stats::{IncrementingMetric, RecordingMetric};
|
||||
|
||||
use proxy_wasm::hostcalls::get_current_time;
|
||||
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||
|
||||
pub struct StreamContext {
|
||||
context_id: u32,
|
||||
metrics: Rc<WasmMetrics>,
|
||||
metrics: Rc<Metrics>,
|
||||
ratelimit_selector: Option<Header>,
|
||||
streaming_response: bool,
|
||||
response_tokens: usize,
|
||||
|
|
@ -50,7 +48,7 @@ pub struct StreamContext {
|
|||
impl StreamContext {
|
||||
pub fn new(
|
||||
context_id: u32,
|
||||
metrics: Rc<WasmMetrics>,
|
||||
metrics: Rc<Metrics>,
|
||||
llm_providers: Rc<LlmProviders>,
|
||||
traces_queue: Arc<Mutex<VecDeque<TraceData>>>,
|
||||
) -> Self {
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
use crate::embeddings::EmbeddingType;
|
||||
use crate::metrics::Metrics;
|
||||
use crate::stream_context::StreamContext;
|
||||
use common::configuration::{Configuration, Overrides, PromptGuards, PromptTarget, Tracing};
|
||||
use common::consts::ARCH_UPSTREAM_HOST_HEADER;
|
||||
|
|
@ -21,19 +22,6 @@ use std::collections::HashMap;
|
|||
use std::rc::Rc;
|
||||
use std::time::Duration;
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub struct WasmMetrics {
|
||||
pub active_http_calls: Gauge,
|
||||
}
|
||||
|
||||
impl WasmMetrics {
|
||||
fn new() -> WasmMetrics {
|
||||
WasmMetrics {
|
||||
active_http_calls: Gauge::new(String::from("active_http_calls")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub type EmbeddingTypeMap = HashMap<EmbeddingType, Vec<f64>>;
|
||||
pub type EmbeddingsStore = HashMap<String, EmbeddingTypeMap>;
|
||||
|
||||
|
|
@ -45,7 +33,7 @@ pub struct FilterCallContext {
|
|||
|
||||
#[derive(Debug)]
|
||||
pub struct FilterContext {
|
||||
metrics: Rc<WasmMetrics>,
|
||||
metrics: Rc<Metrics>,
|
||||
// callouts stores token_id to request mapping that we use during #on_http_call_response to match the response to the request.
|
||||
callouts: RefCell<HashMap<u32, FilterCallContext>>,
|
||||
overrides: Rc<Option<Overrides>>,
|
||||
|
|
@ -62,7 +50,7 @@ impl FilterContext {
|
|||
pub fn new() -> FilterContext {
|
||||
FilterContext {
|
||||
callouts: RefCell::new(HashMap::new()),
|
||||
metrics: Rc::new(WasmMetrics::new()),
|
||||
metrics: Rc::new(Metrics::new()),
|
||||
system_prompt: Rc::new(None),
|
||||
prompt_targets: Rc::new(HashMap::new()),
|
||||
overrides: Rc::new(None),
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ mod context;
|
|||
mod embeddings;
|
||||
mod filter_context;
|
||||
mod http_context;
|
||||
mod metrics;
|
||||
mod stream_context;
|
||||
|
||||
proxy_wasm::main! {{
|
||||
|
|
|
|||
14
crates/prompt_gateway/src/metrics.rs
Normal file
14
crates/prompt_gateway/src/metrics.rs
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
use common::stats::Gauge;
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub struct Metrics {
|
||||
pub active_http_calls: Gauge,
|
||||
}
|
||||
|
||||
impl Metrics {
|
||||
pub fn new() -> Metrics {
|
||||
Metrics {
|
||||
active_http_calls: Gauge::new(String::from("active_http_calls")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
use crate::embeddings::EmbeddingType;
|
||||
use crate::filter_context::{EmbeddingsStore, WasmMetrics};
|
||||
use crate::filter_context::EmbeddingsStore;
|
||||
use crate::metrics::Metrics;
|
||||
use acap::cos;
|
||||
use common::api::hallucination::{
|
||||
extract_messages_for_hallucination, HallucinationClassificationRequest,
|
||||
|
|
@ -67,7 +68,7 @@ pub struct StreamContext {
|
|||
pub prompt_targets: Rc<HashMap<String, PromptTarget>>,
|
||||
pub embeddings_store: Option<Rc<EmbeddingsStore>>,
|
||||
overrides: Rc<Option<Overrides>>,
|
||||
pub metrics: Rc<WasmMetrics>,
|
||||
pub metrics: Rc<Metrics>,
|
||||
pub callouts: RefCell<HashMap<u32, StreamCallContext>>,
|
||||
pub context_id: u32,
|
||||
pub tool_calls: Option<Vec<ToolCall>>,
|
||||
|
|
@ -90,7 +91,7 @@ impl StreamContext {
|
|||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn new(
|
||||
context_id: u32,
|
||||
metrics: Rc<WasmMetrics>,
|
||||
metrics: Rc<Metrics>,
|
||||
system_prompt: Rc<Option<String>>,
|
||||
prompt_targets: Rc<HashMap<String, PromptTarget>>,
|
||||
prompt_guards: Rc<PromptGuards>,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue