mirror of
https://github.com/katanemo/plano.git
synced 2026-05-10 16:22:42 +02:00
Add Prometheus metrics endpoint and Grafana dashboard for brightstaff (#904)
Some checks are pending
CI / pre-commit (push) Waiting to run
CI / plano-tools-tests (push) Waiting to run
CI / native-smoke-test (push) Waiting to run
CI / docker-build (push) Waiting to run
CI / validate-config (push) Waiting to run
CI / security-scan (push) Blocked by required conditions
CI / test-prompt-gateway (push) Blocked by required conditions
CI / test-model-alias-routing (push) Blocked by required conditions
CI / test-responses-api-with-state (push) Blocked by required conditions
CI / e2e-plano-tests (3.10) (push) Blocked by required conditions
CI / e2e-plano-tests (3.11) (push) Blocked by required conditions
CI / e2e-plano-tests (3.12) (push) Blocked by required conditions
CI / e2e-plano-tests (3.13) (push) Blocked by required conditions
CI / e2e-plano-tests (3.14) (push) Blocked by required conditions
CI / e2e-demo-preference (push) Blocked by required conditions
CI / e2e-demo-currency (push) Blocked by required conditions
Publish docker image (latest) / build-arm64 (push) Waiting to run
Publish docker image (latest) / build-amd64 (push) Waiting to run
Publish docker image (latest) / create-manifest (push) Blocked by required conditions
Build and Deploy Documentation / build (push) Waiting to run
Some checks are pending
CI / pre-commit (push) Waiting to run
CI / plano-tools-tests (push) Waiting to run
CI / native-smoke-test (push) Waiting to run
CI / docker-build (push) Waiting to run
CI / validate-config (push) Waiting to run
CI / security-scan (push) Blocked by required conditions
CI / test-prompt-gateway (push) Blocked by required conditions
CI / test-model-alias-routing (push) Blocked by required conditions
CI / test-responses-api-with-state (push) Blocked by required conditions
CI / e2e-plano-tests (3.10) (push) Blocked by required conditions
CI / e2e-plano-tests (3.11) (push) Blocked by required conditions
CI / e2e-plano-tests (3.12) (push) Blocked by required conditions
CI / e2e-plano-tests (3.13) (push) Blocked by required conditions
CI / e2e-plano-tests (3.14) (push) Blocked by required conditions
CI / e2e-demo-preference (push) Blocked by required conditions
CI / e2e-demo-currency (push) Blocked by required conditions
Publish docker image (latest) / build-arm64 (push) Waiting to run
Publish docker image (latest) / build-amd64 (push) Waiting to run
Publish docker image (latest) / create-manifest (push) Blocked by required conditions
Build and Deploy Documentation / build (push) Waiting to run
This commit is contained in:
parent
9812540602
commit
22f332f62d
17 changed files with 1682 additions and 6 deletions
|
|
@ -24,13 +24,14 @@ use crate::app_state::AppState;
|
|||
use crate::handlers::agents::pipeline::PipelineProcessor;
|
||||
use crate::handlers::extract_request_id;
|
||||
use crate::handlers::full;
|
||||
use crate::metrics as bs_metrics;
|
||||
use crate::state::response_state_processor::ResponsesStateProcessor;
|
||||
use crate::state::{
|
||||
extract_input_items, retrieve_and_combine_input, StateStorage, StateStorageError,
|
||||
};
|
||||
use crate::streaming::{
|
||||
create_streaming_response, create_streaming_response_with_output_filter, truncate_message,
|
||||
ObservableStreamProcessor, StreamProcessor,
|
||||
LlmMetricsCtx, ObservableStreamProcessor, StreamProcessor,
|
||||
};
|
||||
use crate::tracing::{
|
||||
collect_custom_trace_attributes, llm as tracing_llm, operation_component,
|
||||
|
|
@ -686,6 +687,13 @@ async fn send_upstream(
|
|||
|
||||
let request_start_time = std::time::Instant::now();
|
||||
|
||||
// Labels for LLM upstream metrics. We prefer `resolved_model` (post-routing)
|
||||
// and derive the provider from its `provider/model` prefix. This matches the
|
||||
// same model id the cost/latency router keys off.
|
||||
let (metric_provider_raw, metric_model_raw) = bs_metrics::split_provider_model(resolved_model);
|
||||
let metric_provider = metric_provider_raw.to_string();
|
||||
let metric_model = metric_model_raw.to_string();
|
||||
|
||||
let llm_response = match http_client
|
||||
.post(upstream_url)
|
||||
.headers(request_headers.clone())
|
||||
|
|
@ -695,6 +703,14 @@ async fn send_upstream(
|
|||
{
|
||||
Ok(res) => res,
|
||||
Err(err) => {
|
||||
let err_class = bs_metrics::llm_error_class_from_reqwest(&err);
|
||||
bs_metrics::record_llm_upstream(
|
||||
&metric_provider,
|
||||
&metric_model,
|
||||
0,
|
||||
err_class,
|
||||
request_start_time.elapsed(),
|
||||
);
|
||||
let err_msg = format!("Failed to send request: {}", err);
|
||||
let mut internal_error = Response::new(full(err_msg));
|
||||
*internal_error.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
|
||||
|
|
@ -750,7 +766,12 @@ async fn send_upstream(
|
|||
span_name,
|
||||
request_start_time,
|
||||
messages_for_signals,
|
||||
);
|
||||
)
|
||||
.with_llm_metrics(LlmMetricsCtx {
|
||||
provider: metric_provider.clone(),
|
||||
model: metric_model.clone(),
|
||||
upstream_status: upstream_status.as_u16(),
|
||||
});
|
||||
|
||||
let output_filter_request_headers = if filter_pipeline.has_output_filters() {
|
||||
Some(request_headers.clone())
|
||||
|
|
|
|||
|
|
@ -5,10 +5,24 @@ use hyper::StatusCode;
|
|||
use std::sync::Arc;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::metrics as bs_metrics;
|
||||
use crate::metrics::labels as metric_labels;
|
||||
use crate::router::orchestrator::OrchestratorService;
|
||||
use crate::streaming::truncate_message;
|
||||
use crate::tracing::routing;
|
||||
|
||||
/// Classify a request path (already stripped of `/agents` or `/routing` by
|
||||
/// the caller) into the fixed `route` label used on routing metrics.
|
||||
fn route_label_for_path(request_path: &str) -> &'static str {
|
||||
if request_path.starts_with("/agents") {
|
||||
metric_labels::ROUTE_AGENT
|
||||
} else if request_path.starts_with("/routing") {
|
||||
metric_labels::ROUTE_ROUTING
|
||||
} else {
|
||||
metric_labels::ROUTE_LLM
|
||||
}
|
||||
}
|
||||
|
||||
pub struct RoutingResult {
|
||||
/// Primary model to use (first in the ranked list).
|
||||
pub model_name: String,
|
||||
|
|
@ -106,15 +120,23 @@ pub async fn router_chat_get_upstream_model(
|
|||
)
|
||||
.await;
|
||||
|
||||
let determination_ms = routing_start_time.elapsed().as_millis() as i64;
|
||||
let determination_elapsed = routing_start_time.elapsed();
|
||||
let determination_ms = determination_elapsed.as_millis() as i64;
|
||||
let current_span = tracing::Span::current();
|
||||
current_span.record(routing::ROUTE_DETERMINATION_MS, determination_ms);
|
||||
let route_label = route_label_for_path(request_path);
|
||||
|
||||
match routing_result {
|
||||
Ok(route) => match route {
|
||||
Some((route_name, ranked_models)) => {
|
||||
let model_name = ranked_models.first().cloned().unwrap_or_default();
|
||||
current_span.record("route.selected_model", model_name.as_str());
|
||||
bs_metrics::record_router_decision(
|
||||
route_label,
|
||||
&model_name,
|
||||
false,
|
||||
determination_elapsed,
|
||||
);
|
||||
Ok(RoutingResult {
|
||||
model_name,
|
||||
models: ranked_models,
|
||||
|
|
@ -126,6 +148,12 @@ pub async fn router_chat_get_upstream_model(
|
|||
// This signals to llm.rs to use the original validated request model
|
||||
current_span.record("route.selected_model", "none");
|
||||
info!("no route determined, using default model");
|
||||
bs_metrics::record_router_decision(
|
||||
route_label,
|
||||
"none",
|
||||
true,
|
||||
determination_elapsed,
|
||||
);
|
||||
|
||||
Ok(RoutingResult {
|
||||
model_name: "none".to_string(),
|
||||
|
|
@ -136,6 +164,7 @@ pub async fn router_chat_get_upstream_model(
|
|||
},
|
||||
Err(err) => {
|
||||
current_span.record("route.selected_model", "unknown");
|
||||
bs_metrics::record_router_decision(route_label, "unknown", true, determination_elapsed);
|
||||
Err(RoutingError::internal_error(format!(
|
||||
"Failed to determine route: {}",
|
||||
err
|
||||
|
|
|
|||
|
|
@ -12,6 +12,8 @@ use tracing::{debug, info, info_span, warn, Instrument};
|
|||
|
||||
use super::extract_or_generate_traceparent;
|
||||
use crate::handlers::llm::model_selection::router_chat_get_upstream_model;
|
||||
use crate::metrics as bs_metrics;
|
||||
use crate::metrics::labels as metric_labels;
|
||||
use crate::router::orchestrator::OrchestratorService;
|
||||
use crate::tracing::{collect_custom_trace_attributes, operation_component, set_service_name};
|
||||
|
||||
|
|
@ -230,6 +232,17 @@ async fn routing_decision_inner(
|
|||
pinned: false,
|
||||
};
|
||||
|
||||
// Distinguish "decision served" (a concrete model picked) from
|
||||
// "no_candidates" (the sentinel "none" returned when nothing
|
||||
// matched). The handler still responds 200 in both cases, so RED
|
||||
// metrics alone can't tell them apart.
|
||||
let outcome = if response.models.first().map(|m| m == "none").unwrap_or(true) {
|
||||
metric_labels::ROUTING_SVC_NO_CANDIDATES
|
||||
} else {
|
||||
metric_labels::ROUTING_SVC_DECISION_SERVED
|
||||
};
|
||||
bs_metrics::record_routing_service_outcome(outcome);
|
||||
|
||||
info!(
|
||||
primary_model = %response.models.first().map(|s| s.as_str()).unwrap_or("none"),
|
||||
total_models = response.models.len(),
|
||||
|
|
@ -249,6 +262,7 @@ async fn routing_decision_inner(
|
|||
.unwrap())
|
||||
}
|
||||
Err(err) => {
|
||||
bs_metrics::record_routing_service_outcome(metric_labels::ROUTING_SVC_POLICY_ERROR);
|
||||
warn!(error = %err.message, "routing decision failed");
|
||||
Ok(BrightStaffError::InternalServerError(err.message).into_response())
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue