mirror of
https://github.com/katanemo/plano.git
synced 2026-06-14 15:15:15 +02:00
docs: explain model_providers headers in configuration reference
Revert unrelated llm_gateway and demo test runner changes. Co-authored-by: Musa <musa@spherrrical.dev>
This commit is contained in:
parent
60da45a6ee
commit
432c94e2db
3 changed files with 36 additions and 36 deletions
|
|
@ -434,14 +434,17 @@ impl StreamContext {
|
|||
}
|
||||
Ok(streaming_chunk)
|
||||
} else {
|
||||
if body_size == 0 {
|
||||
return Err(Action::Continue);
|
||||
}
|
||||
debug!(
|
||||
"request_id={}: upstream response complete, streaming=false body_size={}",
|
||||
self.request_identifier(),
|
||||
body_size
|
||||
);
|
||||
match self.get_http_response_body(0, usize::MAX) {
|
||||
Some(body) if !body.is_empty() => Ok(body),
|
||||
_ => {
|
||||
match self.get_http_response_body(0, body_size) {
|
||||
Some(body) => Ok(body),
|
||||
None => {
|
||||
warn!(
|
||||
"request_id={}: non streaming response body empty",
|
||||
self.request_identifier()
|
||||
|
|
@ -1170,14 +1173,7 @@ impl HttpContext for StreamContext {
|
|||
}
|
||||
|
||||
let current_time = get_current_time().unwrap();
|
||||
|
||||
// Non-streaming upstream responses may arrive in multiple chunks; wait for the
|
||||
// full buffered body before parsing.
|
||||
if !self.streaming_response && !end_of_stream {
|
||||
return Action::Continue;
|
||||
}
|
||||
|
||||
if end_of_stream && body_size == 0 && self.streaming_response {
|
||||
if end_of_stream && body_size == 0 {
|
||||
debug!(
|
||||
"request_id={}: response body complete, total_bytes={}",
|
||||
self.request_identifier(),
|
||||
|
|
@ -1198,20 +1194,15 @@ impl HttpContext for StreamContext {
|
|||
);
|
||||
|
||||
// For error responses, forward the upstream error directly without parsing
|
||||
if let Ok(body) = self.read_raw_response_body(body_size) {
|
||||
if !body.is_empty() {
|
||||
if body_size > 0 {
|
||||
if let Ok(body) = self.read_raw_response_body(body_size) {
|
||||
debug!(
|
||||
"request_id={}: upstream error body: {}",
|
||||
self.request_identifier(),
|
||||
String::from_utf8_lossy(&body)
|
||||
);
|
||||
// Forward the error response as-is
|
||||
let replace_size = if body_size > 0 {
|
||||
body_size
|
||||
} else {
|
||||
body.len()
|
||||
};
|
||||
self.set_http_response_body(0, replace_size, &body);
|
||||
self.set_http_response_body(0, body_size, &body);
|
||||
}
|
||||
}
|
||||
return Action::Continue;
|
||||
|
|
@ -1241,19 +1232,6 @@ impl HttpContext for StreamContext {
|
|||
Err(action) => return action,
|
||||
};
|
||||
|
||||
if !self.streaming_response && body.is_empty() {
|
||||
if end_of_stream {
|
||||
self.handle_end_of_request_metrics_and_traces(current_time);
|
||||
}
|
||||
return Action::Continue;
|
||||
}
|
||||
|
||||
let replace_size = if body_size > 0 {
|
||||
body_size
|
||||
} else {
|
||||
body.len()
|
||||
};
|
||||
|
||||
debug!(
|
||||
"request_id={}: upstream raw response, body_size={} content={}",
|
||||
self.request_identifier(),
|
||||
|
|
@ -1265,14 +1243,14 @@ impl HttpContext for StreamContext {
|
|||
if self.streaming_response {
|
||||
match self.handle_streaming_response(&body, provider_id) {
|
||||
Ok(serialized_body) => {
|
||||
self.set_http_response_body(0, replace_size, &serialized_body);
|
||||
self.set_http_response_body(0, body_size, &serialized_body);
|
||||
}
|
||||
Err(action) => return action,
|
||||
}
|
||||
} else {
|
||||
match self.handle_non_streaming_response(&body, provider_id) {
|
||||
Ok(serialized_body) => {
|
||||
self.set_http_response_body(0, replace_size, &serialized_body);
|
||||
self.set_http_response_body(0, body_size, &serialized_body);
|
||||
}
|
||||
Err(action) => return action,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,8 +19,7 @@ run_hurl_with_retries() {
|
|||
local max_attempts=1
|
||||
local attempt=1
|
||||
|
||||
if [ "$demo_name" = "llm_routing/preference_based_routing" ] \
|
||||
|| [ "$demo_name" = "advanced/currency_exchange" ]; then
|
||||
if [ "$demo_name" = "llm_routing/preference_based_routing" ]; then
|
||||
max_attempts=3
|
||||
fi
|
||||
|
||||
|
|
|
|||
|
|
@ -7,6 +7,29 @@ The following is a complete reference of the ``plano_config.yml`` that controls
|
|||
the Plano gateway. This where you enable capabilities like routing to upstream LLm providers, defining prompt_targets
|
||||
where prompts get routed to, apply guardrails, and enable critical agent observability features.
|
||||
|
||||
Model provider headers
|
||||
----------------------
|
||||
|
||||
Each entry under ``model_providers`` (or the legacy ``llm_providers`` alias) may include a ``headers`` map of extra
|
||||
HTTP headers that Plano adds to upstream LLM requests. Plano applies these headers after it sets authentication from
|
||||
``access_key`` or ``passthrough_auth``, so you can supply provider-specific metadata without replacing the configured
|
||||
credentials.
|
||||
|
||||
- **Type:** map of strings (header name → value)
|
||||
- **Optional:** yes
|
||||
- **Common uses:** required ``User-Agent`` values, organization or account identifiers, or other headers some APIs expect
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
model_providers:
|
||||
- model: moonshotai/kimi-for-coding
|
||||
access_key: $MOONSHOTAI_API_KEY
|
||||
base_url: https://api.kimi.com/coding/v1
|
||||
headers:
|
||||
User-Agent: "KimiCLI/1.3"
|
||||
|
||||
The example below includes this and other provider options in context.
|
||||
|
||||
.. literalinclude:: includes/plano_config_full_reference.yaml
|
||||
:language: yaml
|
||||
:linenos:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue