mirror of
https://github.com/katanemo/plano.git
synced 2026-05-11 08:42:48 +02:00
Merge origin/main into musa/chatgpt-subscription
This commit is contained in:
commit
6f67048c04
118 changed files with 11627 additions and 2194 deletions
|
|
@ -435,6 +435,12 @@ impl TokenUsage for MessagesResponse {
|
|||
fn total_tokens(&self) -> usize {
|
||||
(self.usage.input_tokens + self.usage.output_tokens) as usize
|
||||
}
|
||||
fn cached_input_tokens(&self) -> Option<usize> {
|
||||
self.usage.cache_read_input_tokens.map(|t| t as usize)
|
||||
}
|
||||
fn cache_creation_tokens(&self) -> Option<usize> {
|
||||
self.usage.cache_creation_input_tokens.map(|t| t as usize)
|
||||
}
|
||||
}
|
||||
|
||||
impl ProviderResponse for MessagesResponse {
|
||||
|
|
|
|||
|
|
@ -596,6 +596,18 @@ impl TokenUsage for Usage {
|
|||
fn total_tokens(&self) -> usize {
|
||||
self.total_tokens as usize
|
||||
}
|
||||
|
||||
fn cached_input_tokens(&self) -> Option<usize> {
|
||||
self.prompt_tokens_details
|
||||
.as_ref()
|
||||
.and_then(|d| d.cached_tokens.map(|t| t as usize))
|
||||
}
|
||||
|
||||
fn reasoning_tokens(&self) -> Option<usize> {
|
||||
self.completion_tokens_details
|
||||
.as_ref()
|
||||
.and_then(|d| d.reasoning_tokens.map(|t| t as usize))
|
||||
}
|
||||
}
|
||||
|
||||
/// Implementation of ProviderRequest for ChatCompletionsRequest
|
||||
|
|
|
|||
|
|
@ -710,6 +710,18 @@ impl crate::providers::response::TokenUsage for ResponseUsage {
|
|||
fn total_tokens(&self) -> usize {
|
||||
self.total_tokens as usize
|
||||
}
|
||||
|
||||
fn cached_input_tokens(&self) -> Option<usize> {
|
||||
self.input_tokens_details
|
||||
.as_ref()
|
||||
.map(|d| d.cached_tokens.max(0) as usize)
|
||||
}
|
||||
|
||||
fn reasoning_tokens(&self) -> Option<usize> {
|
||||
self.output_tokens_details
|
||||
.as_ref()
|
||||
.map(|d| d.reasoning_tokens.max(0) as usize)
|
||||
}
|
||||
}
|
||||
|
||||
/// Token details
|
||||
|
|
|
|||
|
|
@ -1,6 +1,9 @@
|
|||
use crate::apis::anthropic::MessagesStreamEvent;
|
||||
use crate::apis::anthropic::{
|
||||
MessagesMessageDelta, MessagesStopReason, MessagesStreamEvent, MessagesUsage,
|
||||
};
|
||||
use crate::apis::streaming_shapes::sse::{SseEvent, SseStreamBufferTrait};
|
||||
use crate::providers::streaming_response::ProviderStreamResponseType;
|
||||
use log::warn;
|
||||
use std::collections::HashSet;
|
||||
|
||||
/// SSE Stream Buffer for Anthropic Messages API streaming.
|
||||
|
|
@ -11,13 +14,24 @@ use std::collections::HashSet;
|
|||
///
|
||||
/// When converting from OpenAI to Anthropic format, this buffer injects the required
|
||||
/// ContentBlockStart and ContentBlockStop events to maintain proper Anthropic protocol.
|
||||
///
|
||||
/// Guarantees (Anthropic Messages API contract):
|
||||
/// 1. `message_stop` is never emitted unless a matching `message_start` was emitted first.
|
||||
/// 2. `message_stop` is emitted at most once per stream (no double-close).
|
||||
/// 3. If upstream terminates with no content (empty/filtered/errored response), a
|
||||
/// minimal but well-formed envelope is synthesized so the client's state machine
|
||||
/// stays consistent.
|
||||
pub struct AnthropicMessagesStreamBuffer {
|
||||
/// Buffered SSE events ready to be written to wire
|
||||
buffered_events: Vec<SseEvent>,
|
||||
|
||||
/// Track if we've seen a message_start event
|
||||
/// Track if we've emitted a message_start event
|
||||
message_started: bool,
|
||||
|
||||
/// Track if we've emitted a terminal message_stop event (for idempotency /
|
||||
/// double-close protection).
|
||||
message_stopped: bool,
|
||||
|
||||
/// Track content block indices that have received ContentBlockStart events
|
||||
content_block_start_indices: HashSet<i32>,
|
||||
|
||||
|
|
@ -42,6 +56,7 @@ impl AnthropicMessagesStreamBuffer {
|
|||
Self {
|
||||
buffered_events: Vec::new(),
|
||||
message_started: false,
|
||||
message_stopped: false,
|
||||
content_block_start_indices: HashSet::new(),
|
||||
needs_content_block_stop: false,
|
||||
seen_message_delta: false,
|
||||
|
|
@ -49,6 +64,66 @@ impl AnthropicMessagesStreamBuffer {
|
|||
}
|
||||
}
|
||||
|
||||
/// Inject a `message_start` event into the buffer if one hasn't been emitted yet.
|
||||
/// This is the single source of truth for opening a message — every handler
|
||||
/// that can legitimately be the first event on the wire must call this before
|
||||
/// pushing its own event.
|
||||
fn ensure_message_started(&mut self) {
|
||||
if self.message_started {
|
||||
return;
|
||||
}
|
||||
let model = self.model.as_deref().unwrap_or("unknown");
|
||||
let message_start = AnthropicMessagesStreamBuffer::create_message_start_event(model);
|
||||
self.buffered_events.push(message_start);
|
||||
self.message_started = true;
|
||||
}
|
||||
|
||||
/// Inject a synthetic `message_delta` with `end_turn` / zero usage.
|
||||
/// Used when we must close a message but upstream never produced a terminal
|
||||
/// event (e.g. `[DONE]` arrives with no prior `finish_reason`).
|
||||
fn push_synthetic_message_delta(&mut self) {
|
||||
let event = MessagesStreamEvent::MessageDelta {
|
||||
delta: MessagesMessageDelta {
|
||||
stop_reason: MessagesStopReason::EndTurn,
|
||||
stop_sequence: None,
|
||||
},
|
||||
usage: MessagesUsage {
|
||||
input_tokens: 0,
|
||||
output_tokens: 0,
|
||||
cache_creation_input_tokens: None,
|
||||
cache_read_input_tokens: None,
|
||||
},
|
||||
};
|
||||
let sse_string: String = event.clone().into();
|
||||
self.buffered_events.push(SseEvent {
|
||||
data: None,
|
||||
event: Some("message_delta".to_string()),
|
||||
raw_line: sse_string.clone(),
|
||||
sse_transformed_lines: sse_string,
|
||||
provider_stream_response: Some(ProviderStreamResponseType::MessagesStreamEvent(event)),
|
||||
});
|
||||
self.seen_message_delta = true;
|
||||
}
|
||||
|
||||
/// Inject a `message_stop` event into the buffer, marking the stream as closed.
|
||||
/// Idempotent — subsequent calls are no-ops.
|
||||
fn push_message_stop(&mut self) {
|
||||
if self.message_stopped {
|
||||
return;
|
||||
}
|
||||
let message_stop = MessagesStreamEvent::MessageStop;
|
||||
let sse_string: String = message_stop.into();
|
||||
self.buffered_events.push(SseEvent {
|
||||
data: None,
|
||||
event: Some("message_stop".to_string()),
|
||||
raw_line: sse_string.clone(),
|
||||
sse_transformed_lines: sse_string,
|
||||
provider_stream_response: None,
|
||||
});
|
||||
self.message_stopped = true;
|
||||
self.seen_message_delta = false;
|
||||
}
|
||||
|
||||
/// Check if a content_block_start event has been sent for the given index
|
||||
fn has_content_block_start_been_sent(&self, index: i32) -> bool {
|
||||
self.content_block_start_indices.contains(&index)
|
||||
|
|
@ -149,6 +224,27 @@ impl SseStreamBufferTrait for AnthropicMessagesStreamBuffer {
|
|||
// We match on a reference first to determine the type, then move the event
|
||||
match &event.provider_stream_response {
|
||||
Some(ProviderStreamResponseType::MessagesStreamEvent(evt)) => {
|
||||
// If the message has already been closed, drop any trailing events
|
||||
// to avoid emitting data after `message_stop` (protocol violation).
|
||||
// This typically indicates a duplicate `[DONE]` from upstream or a
|
||||
// replay of previously-buffered bytes — worth surfacing so we can
|
||||
// spot misbehaving providers.
|
||||
if self.message_stopped {
|
||||
warn!(
|
||||
"anthropic stream buffer: dropping event after message_stop (variant={})",
|
||||
match evt {
|
||||
MessagesStreamEvent::MessageStart { .. } => "message_start",
|
||||
MessagesStreamEvent::ContentBlockStart { .. } => "content_block_start",
|
||||
MessagesStreamEvent::ContentBlockDelta { .. } => "content_block_delta",
|
||||
MessagesStreamEvent::ContentBlockStop { .. } => "content_block_stop",
|
||||
MessagesStreamEvent::MessageDelta { .. } => "message_delta",
|
||||
MessagesStreamEvent::MessageStop => "message_stop",
|
||||
MessagesStreamEvent::Ping => "ping",
|
||||
}
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
match evt {
|
||||
MessagesStreamEvent::MessageStart { .. } => {
|
||||
// Add the message_start event
|
||||
|
|
@ -157,14 +253,7 @@ impl SseStreamBufferTrait for AnthropicMessagesStreamBuffer {
|
|||
}
|
||||
MessagesStreamEvent::ContentBlockStart { index, .. } => {
|
||||
let index = *index as i32;
|
||||
// Inject message_start if needed
|
||||
if !self.message_started {
|
||||
let model = self.model.as_deref().unwrap_or("unknown");
|
||||
let message_start =
|
||||
AnthropicMessagesStreamBuffer::create_message_start_event(model);
|
||||
self.buffered_events.push(message_start);
|
||||
self.message_started = true;
|
||||
}
|
||||
self.ensure_message_started();
|
||||
|
||||
// Add the content_block_start event (from tool calls or other sources)
|
||||
self.buffered_events.push(event);
|
||||
|
|
@ -173,14 +262,7 @@ impl SseStreamBufferTrait for AnthropicMessagesStreamBuffer {
|
|||
}
|
||||
MessagesStreamEvent::ContentBlockDelta { index, .. } => {
|
||||
let index = *index as i32;
|
||||
// Inject message_start if needed
|
||||
if !self.message_started {
|
||||
let model = self.model.as_deref().unwrap_or("unknown");
|
||||
let message_start =
|
||||
AnthropicMessagesStreamBuffer::create_message_start_event(model);
|
||||
self.buffered_events.push(message_start);
|
||||
self.message_started = true;
|
||||
}
|
||||
self.ensure_message_started();
|
||||
|
||||
// Check if ContentBlockStart was sent for this index
|
||||
if !self.has_content_block_start_been_sent(index) {
|
||||
|
|
@ -196,6 +278,11 @@ impl SseStreamBufferTrait for AnthropicMessagesStreamBuffer {
|
|||
self.buffered_events.push(event);
|
||||
}
|
||||
MessagesStreamEvent::MessageDelta { usage, .. } => {
|
||||
// `message_delta` is only meaningful inside an open message.
|
||||
// Upstream can send it with no prior content (empty completion,
|
||||
// content filter, etc.), so we must open a message first.
|
||||
self.ensure_message_started();
|
||||
|
||||
// Inject ContentBlockStop before message_delta
|
||||
if self.needs_content_block_stop {
|
||||
let content_block_stop =
|
||||
|
|
@ -230,15 +317,52 @@ impl SseStreamBufferTrait for AnthropicMessagesStreamBuffer {
|
|||
}
|
||||
MessagesStreamEvent::ContentBlockStop { .. } => {
|
||||
// ContentBlockStop received from upstream (e.g., Bedrock)
|
||||
self.ensure_message_started();
|
||||
// Clear the flag so we don't inject another one
|
||||
self.needs_content_block_stop = false;
|
||||
self.buffered_events.push(event);
|
||||
}
|
||||
MessagesStreamEvent::MessageStop => {
|
||||
// MessageStop received from upstream (e.g., OpenAI via [DONE])
|
||||
// Clear the flag so we don't inject another one
|
||||
self.seen_message_delta = false;
|
||||
// MessageStop received from upstream (e.g., OpenAI via [DONE]).
|
||||
//
|
||||
// The Anthropic protocol requires the full envelope
|
||||
// message_start → [content blocks] → message_delta → message_stop
|
||||
// so we must not emit a bare `message_stop`. Synthesize whatever
|
||||
// is missing to keep the client's state machine consistent.
|
||||
self.ensure_message_started();
|
||||
|
||||
if self.needs_content_block_stop {
|
||||
let content_block_stop =
|
||||
AnthropicMessagesStreamBuffer::create_content_block_stop_event();
|
||||
self.buffered_events.push(content_block_stop);
|
||||
self.needs_content_block_stop = false;
|
||||
}
|
||||
|
||||
// If no message_delta has been emitted yet (empty/filtered upstream
|
||||
// response), synthesize a minimal one carrying `end_turn`.
|
||||
if !self.seen_message_delta {
|
||||
// If we also never opened a content block, open and close one
|
||||
// so clients that expect at least one block are happy.
|
||||
if self.content_block_start_indices.is_empty() {
|
||||
let content_block_start =
|
||||
AnthropicMessagesStreamBuffer::create_content_block_start_event(
|
||||
);
|
||||
self.buffered_events.push(content_block_start);
|
||||
self.set_content_block_start_sent(0);
|
||||
let content_block_stop =
|
||||
AnthropicMessagesStreamBuffer::create_content_block_stop_event(
|
||||
);
|
||||
self.buffered_events.push(content_block_stop);
|
||||
}
|
||||
self.push_synthetic_message_delta();
|
||||
}
|
||||
|
||||
// Push the upstream-provided message_stop and mark closed.
|
||||
// `push_message_stop` is idempotent but we want to reuse the
|
||||
// original SseEvent so raw passthrough semantics are preserved.
|
||||
self.buffered_events.push(event);
|
||||
self.message_stopped = true;
|
||||
self.seen_message_delta = false;
|
||||
}
|
||||
_ => {
|
||||
// Other Anthropic event types (Ping, etc.), just accumulate
|
||||
|
|
@ -254,24 +378,23 @@ impl SseStreamBufferTrait for AnthropicMessagesStreamBuffer {
|
|||
}
|
||||
|
||||
fn to_bytes(&mut self) -> Vec<u8> {
|
||||
// Convert all accumulated events to bytes and clear buffer
|
||||
// Convert all accumulated events to bytes and clear buffer.
|
||||
//
|
||||
// NOTE: We do NOT inject ContentBlockStop here because it's injected when we see MessageDelta
|
||||
// or MessageStop. Injecting it here causes premature ContentBlockStop in the middle of streaming.
|
||||
|
||||
// Inject MessageStop after MessageDelta if we've seen one
|
||||
// This completes the Anthropic Messages API event sequence
|
||||
if self.seen_message_delta {
|
||||
let message_stop = MessagesStreamEvent::MessageStop;
|
||||
let sse_string: String = message_stop.into();
|
||||
let message_stop_event = SseEvent {
|
||||
data: None,
|
||||
event: Some("message_stop".to_string()),
|
||||
raw_line: sse_string.clone(),
|
||||
sse_transformed_lines: sse_string,
|
||||
provider_stream_response: None,
|
||||
};
|
||||
self.buffered_events.push(message_stop_event);
|
||||
self.seen_message_delta = false;
|
||||
//
|
||||
// Inject a synthetic `message_stop` only when:
|
||||
// 1. A `message_delta` has been seen (otherwise we'd violate the Anthropic
|
||||
// protocol by emitting `message_stop` without a preceding `message_delta`), AND
|
||||
// 2. We haven't already emitted `message_stop` (either synthetic from a
|
||||
// previous flush, or real from an upstream `[DONE]`).
|
||||
//
|
||||
// Without the `!message_stopped` guard, a stream whose `finish_reason` chunk
|
||||
// and `[DONE]` marker land in separate HTTP body chunks would receive two
|
||||
// `message_stop` events, triggering Claude Code's "Received message_stop
|
||||
// without a current message" error.
|
||||
if self.seen_message_delta && !self.message_stopped {
|
||||
self.push_message_stop();
|
||||
}
|
||||
|
||||
let mut buffer = Vec::new();
|
||||
|
|
@ -615,4 +738,133 @@ data: [DONE]"#;
|
|||
println!("✓ Stop reason: tool_use");
|
||||
println!("✓ Proper Anthropic tool_use protocol\n");
|
||||
}
|
||||
|
||||
/// Regression test for:
|
||||
/// Claude Code CLI error: "Received message_stop without a current message"
|
||||
///
|
||||
/// Reproduces the *double-close* scenario: OpenAI's final `finish_reason`
|
||||
/// chunk and the `[DONE]` marker arrive in **separate** HTTP body chunks, so
|
||||
/// `to_bytes()` is called between them. Before the fix, this produced two
|
||||
/// `message_stop` events on the wire (one synthetic, one from `[DONE]`).
|
||||
#[test]
|
||||
fn test_openai_to_anthropic_emits_single_message_stop_across_chunk_boundary() {
|
||||
let client_api = SupportedAPIsFromClient::AnthropicMessagesAPI(AnthropicApi::Messages);
|
||||
let upstream_api = SupportedUpstreamAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions);
|
||||
let mut buffer = AnthropicMessagesStreamBuffer::new();
|
||||
|
||||
// --- HTTP chunk 1: content + finish_reason (no [DONE] yet) -----------
|
||||
let chunk_1 = r#"data: {"id":"c1","object":"chat.completion.chunk","created":1,"model":"gpt-4o","choices":[{"index":0,"delta":{"role":"assistant","content":"Hi"},"finish_reason":null}]}
|
||||
|
||||
data: {"id":"c1","object":"chat.completion.chunk","created":1,"model":"gpt-4o","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}"#;
|
||||
|
||||
for raw in SseStreamIter::try_from(chunk_1.as_bytes()).unwrap() {
|
||||
let e = SseEvent::try_from((raw, &client_api, &upstream_api)).unwrap();
|
||||
buffer.add_transformed_event(e);
|
||||
}
|
||||
let out_1 = String::from_utf8(buffer.to_bytes()).unwrap();
|
||||
|
||||
// --- HTTP chunk 2: just the [DONE] marker ----------------------------
|
||||
let chunk_2 = "data: [DONE]";
|
||||
for raw in SseStreamIter::try_from(chunk_2.as_bytes()).unwrap() {
|
||||
let e = SseEvent::try_from((raw, &client_api, &upstream_api)).unwrap();
|
||||
buffer.add_transformed_event(e);
|
||||
}
|
||||
let out_2 = String::from_utf8(buffer.to_bytes()).unwrap();
|
||||
|
||||
let combined = format!("{}{}", out_1, out_2);
|
||||
let start_count = combined.matches("event: message_start").count();
|
||||
let stop_count = combined.matches("event: message_stop").count();
|
||||
|
||||
assert_eq!(
|
||||
start_count, 1,
|
||||
"Must emit exactly one message_start across chunks, got {start_count}. Output:\n{combined}"
|
||||
);
|
||||
assert_eq!(
|
||||
stop_count, 1,
|
||||
"Must emit exactly one message_stop across chunks (no double-close), got {stop_count}. Output:\n{combined}"
|
||||
);
|
||||
// Every message_stop must be preceded by a message_start earlier in the stream.
|
||||
let start_pos = combined.find("event: message_start").unwrap();
|
||||
let stop_pos = combined.find("event: message_stop").unwrap();
|
||||
assert!(
|
||||
start_pos < stop_pos,
|
||||
"message_start must come before message_stop. Output:\n{combined}"
|
||||
);
|
||||
}
|
||||
|
||||
/// Regression test for:
|
||||
/// "Received message_stop without a current message" on empty upstream responses.
|
||||
///
|
||||
/// OpenAI returns only `[DONE]` with no content deltas and no `finish_reason`
|
||||
/// (this happens with content filters, truncated upstream streams, and some
|
||||
/// 5xx recoveries). Before the fix, the buffer emitted a bare `message_stop`
|
||||
/// with no preceding `message_start`. After the fix, it synthesizes a
|
||||
/// minimal but well-formed envelope.
|
||||
#[test]
|
||||
fn test_openai_done_only_stream_synthesizes_valid_envelope() {
|
||||
let client_api = SupportedAPIsFromClient::AnthropicMessagesAPI(AnthropicApi::Messages);
|
||||
let upstream_api = SupportedUpstreamAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions);
|
||||
let mut buffer = AnthropicMessagesStreamBuffer::new();
|
||||
|
||||
let raw_input = "data: [DONE]";
|
||||
for raw in SseStreamIter::try_from(raw_input.as_bytes()).unwrap() {
|
||||
let e = SseEvent::try_from((raw, &client_api, &upstream_api)).unwrap();
|
||||
buffer.add_transformed_event(e);
|
||||
}
|
||||
let out = String::from_utf8(buffer.to_bytes()).unwrap();
|
||||
|
||||
assert!(
|
||||
out.contains("event: message_start"),
|
||||
"Empty upstream must still produce message_start. Output:\n{out}"
|
||||
);
|
||||
assert!(
|
||||
out.contains("event: message_delta"),
|
||||
"Empty upstream must produce a synthesized message_delta. Output:\n{out}"
|
||||
);
|
||||
assert_eq!(
|
||||
out.matches("event: message_stop").count(),
|
||||
1,
|
||||
"Empty upstream must produce exactly one message_stop. Output:\n{out}"
|
||||
);
|
||||
|
||||
// Protocol ordering: start < delta < stop.
|
||||
let p_start = out.find("event: message_start").unwrap();
|
||||
let p_delta = out.find("event: message_delta").unwrap();
|
||||
let p_stop = out.find("event: message_stop").unwrap();
|
||||
assert!(
|
||||
p_start < p_delta && p_delta < p_stop,
|
||||
"Bad ordering. Output:\n{out}"
|
||||
);
|
||||
}
|
||||
|
||||
/// Regression test: events arriving after `message_stop` (e.g. a stray `[DONE]`
|
||||
/// echo, or late-arriving deltas from a racing upstream) must be dropped
|
||||
/// rather than written after the terminal frame.
|
||||
#[test]
|
||||
fn test_events_after_message_stop_are_dropped() {
|
||||
let client_api = SupportedAPIsFromClient::AnthropicMessagesAPI(AnthropicApi::Messages);
|
||||
let upstream_api = SupportedUpstreamAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions);
|
||||
let mut buffer = AnthropicMessagesStreamBuffer::new();
|
||||
|
||||
let first = r#"data: {"id":"c1","object":"chat.completion.chunk","created":1,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"ok"},"finish_reason":"stop"}]}
|
||||
|
||||
data: [DONE]"#;
|
||||
for raw in SseStreamIter::try_from(first.as_bytes()).unwrap() {
|
||||
let e = SseEvent::try_from((raw, &client_api, &upstream_api)).unwrap();
|
||||
buffer.add_transformed_event(e);
|
||||
}
|
||||
let _ = buffer.to_bytes();
|
||||
|
||||
// Simulate a duplicate / late `[DONE]` after the stream was already closed.
|
||||
let late = "data: [DONE]";
|
||||
for raw in SseStreamIter::try_from(late.as_bytes()).unwrap() {
|
||||
let e = SseEvent::try_from((raw, &client_api, &upstream_api)).unwrap();
|
||||
buffer.add_transformed_event(e);
|
||||
}
|
||||
let tail = String::from_utf8(buffer.to_bytes()).unwrap();
|
||||
assert!(
|
||||
tail.is_empty(),
|
||||
"No bytes should be emitted after message_stop, got: {tail:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -95,6 +95,7 @@ providers:
|
|||
anthropic:
|
||||
- anthropic/claude-sonnet-4-6
|
||||
- anthropic/claude-opus-4-6
|
||||
- anthropic/claude-opus-4-7
|
||||
- anthropic/claude-opus-4-5-20251101
|
||||
- anthropic/claude-opus-4-5
|
||||
- anthropic/claude-haiku-4-5-20251001
|
||||
|
|
@ -332,7 +333,53 @@ providers:
|
|||
- chatgpt/gpt-5.4
|
||||
- chatgpt/gpt-5.3-codex
|
||||
- chatgpt/gpt-5.2
|
||||
digitalocean:
|
||||
- digitalocean/openai-gpt-4.1
|
||||
- digitalocean/openai-gpt-4o
|
||||
- digitalocean/openai-gpt-4o-mini
|
||||
- digitalocean/openai-gpt-5
|
||||
- digitalocean/openai-gpt-5-mini
|
||||
- digitalocean/openai-gpt-5-nano
|
||||
- digitalocean/openai-gpt-5.1-codex-max
|
||||
- digitalocean/openai-gpt-5.2
|
||||
- digitalocean/openai-gpt-5.2-pro
|
||||
- digitalocean/openai-gpt-5.3-codex
|
||||
- digitalocean/openai-gpt-5.4
|
||||
- digitalocean/openai-gpt-5.4-mini
|
||||
- digitalocean/openai-gpt-5.4-nano
|
||||
- digitalocean/openai-gpt-5.4-pro
|
||||
- digitalocean/openai-gpt-oss-120b
|
||||
- digitalocean/openai-gpt-oss-20b
|
||||
- digitalocean/openai-o1
|
||||
- digitalocean/openai-o3
|
||||
- digitalocean/openai-o3-mini
|
||||
- digitalocean/anthropic-claude-4.1-opus
|
||||
- digitalocean/anthropic-claude-4.5-sonnet
|
||||
- digitalocean/anthropic-claude-4.6-sonnet
|
||||
- digitalocean/anthropic-claude-haiku-4.5
|
||||
- digitalocean/anthropic-claude-opus-4
|
||||
- digitalocean/anthropic-claude-opus-4.5
|
||||
- digitalocean/anthropic-claude-opus-4.6
|
||||
- digitalocean/anthropic-claude-opus-4.7
|
||||
- digitalocean/anthropic-claude-sonnet-4
|
||||
- digitalocean/alibaba-qwen3-32b
|
||||
- digitalocean/arcee-trinity-large-thinking
|
||||
- digitalocean/deepseek-3.2
|
||||
- digitalocean/deepseek-r1-distill-llama-70b
|
||||
- digitalocean/gemma-4-31B-it
|
||||
- digitalocean/glm-5
|
||||
- digitalocean/kimi-k2.5
|
||||
- digitalocean/llama3.3-70b-instruct
|
||||
- digitalocean/minimax-m2.5
|
||||
- digitalocean/nvidia-nemotron-3-super-120b
|
||||
- digitalocean/qwen3-coder-flash
|
||||
- digitalocean/qwen3.5-397b-a17b
|
||||
- digitalocean/all-mini-lm-l6-v2
|
||||
- digitalocean/gte-large-en-v1.5
|
||||
- digitalocean/multi-qa-mpnet-base-dot-v1
|
||||
- digitalocean/qwen3-embedding-0.6b
|
||||
- digitalocean/router:software-engineering
|
||||
metadata:
|
||||
total_providers: 12
|
||||
total_models: 319
|
||||
last_updated: 2026-04-03T23:14:46.956158+00:00
|
||||
total_providers: 13
|
||||
total_models: 364
|
||||
last_updated: 2026-04-20T00:00:00.000000+00:00
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ pub enum ProviderId {
|
|||
Qwen,
|
||||
AmazonBedrock,
|
||||
ChatGPT,
|
||||
DigitalOcean,
|
||||
}
|
||||
|
||||
impl TryFrom<&str> for ProviderId {
|
||||
|
|
@ -73,6 +74,9 @@ impl TryFrom<&str> for ProviderId {
|
|||
"amazon_bedrock" => Ok(ProviderId::AmazonBedrock),
|
||||
"amazon" => Ok(ProviderId::AmazonBedrock), // alias
|
||||
"chatgpt" => Ok(ProviderId::ChatGPT),
|
||||
"digitalocean" => Ok(ProviderId::DigitalOcean),
|
||||
"do" => Ok(ProviderId::DigitalOcean), // alias
|
||||
"do_ai" => Ok(ProviderId::DigitalOcean), // alias
|
||||
_ => Err(format!("Unknown provider: {}", value)),
|
||||
}
|
||||
}
|
||||
|
|
@ -98,6 +102,7 @@ impl ProviderId {
|
|||
ProviderId::Zhipu => "z-ai",
|
||||
ProviderId::Qwen => "qwen",
|
||||
ProviderId::ChatGPT => "chatgpt",
|
||||
ProviderId::DigitalOcean => "digitalocean",
|
||||
_ => return Vec::new(),
|
||||
};
|
||||
|
||||
|
|
@ -152,7 +157,8 @@ impl ProviderId {
|
|||
| ProviderId::Moonshotai
|
||||
| ProviderId::Zhipu
|
||||
| ProviderId::Qwen
|
||||
| ProviderId::ChatGPT,
|
||||
| ProviderId::ChatGPT
|
||||
| ProviderId::DigitalOcean,
|
||||
SupportedAPIsFromClient::AnthropicMessagesAPI(_),
|
||||
) => SupportedUpstreamAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions),
|
||||
|
||||
|
|
@ -172,7 +178,8 @@ impl ProviderId {
|
|||
| ProviderId::Moonshotai
|
||||
| ProviderId::Zhipu
|
||||
| ProviderId::Qwen
|
||||
| ProviderId::ChatGPT,
|
||||
| ProviderId::ChatGPT
|
||||
| ProviderId::DigitalOcean,
|
||||
SupportedAPIsFromClient::OpenAIChatCompletions(_),
|
||||
) => SupportedUpstreamAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions),
|
||||
|
||||
|
|
@ -240,6 +247,7 @@ impl Display for ProviderId {
|
|||
ProviderId::Qwen => write!(f, "qwen"),
|
||||
ProviderId::AmazonBedrock => write!(f, "amazon_bedrock"),
|
||||
ProviderId::ChatGPT => write!(f, "chatgpt"),
|
||||
ProviderId::DigitalOcean => write!(f, "digitalocean"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,6 +23,31 @@ pub trait TokenUsage {
|
|||
fn completion_tokens(&self) -> usize;
|
||||
fn prompt_tokens(&self) -> usize;
|
||||
fn total_tokens(&self) -> usize;
|
||||
/// Tokens served from a prompt cache read (OpenAI `prompt_tokens_details.cached_tokens`,
|
||||
/// Anthropic `cache_read_input_tokens`, Google `cached_content_token_count`).
|
||||
fn cached_input_tokens(&self) -> Option<usize> {
|
||||
None
|
||||
}
|
||||
/// Tokens used to write a cache entry (Anthropic `cache_creation_input_tokens`).
|
||||
fn cache_creation_tokens(&self) -> Option<usize> {
|
||||
None
|
||||
}
|
||||
/// Reasoning tokens for reasoning models (OpenAI `completion_tokens_details.reasoning_tokens`,
|
||||
/// Google `thoughts_token_count`).
|
||||
fn reasoning_tokens(&self) -> Option<usize> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Rich usage breakdown extracted from a provider response.
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
|
||||
pub struct UsageDetails {
|
||||
pub prompt_tokens: usize,
|
||||
pub completion_tokens: usize,
|
||||
pub total_tokens: usize,
|
||||
pub cached_input_tokens: Option<usize>,
|
||||
pub cache_creation_tokens: Option<usize>,
|
||||
pub reasoning_tokens: Option<usize>,
|
||||
}
|
||||
|
||||
pub trait ProviderResponse: Send + Sync {
|
||||
|
|
@ -34,6 +59,18 @@ pub trait ProviderResponse: Send + Sync {
|
|||
self.usage()
|
||||
.map(|u| (u.prompt_tokens(), u.completion_tokens(), u.total_tokens()))
|
||||
}
|
||||
|
||||
/// Extract a rich usage breakdown including cached/cache-creation/reasoning tokens.
|
||||
fn extract_usage_details(&self) -> Option<UsageDetails> {
|
||||
self.usage().map(|u| UsageDetails {
|
||||
prompt_tokens: u.prompt_tokens(),
|
||||
completion_tokens: u.completion_tokens(),
|
||||
total_tokens: u.total_tokens(),
|
||||
cached_input_tokens: u.cached_input_tokens(),
|
||||
cache_creation_tokens: u.cache_creation_tokens(),
|
||||
reasoning_tokens: u.reasoning_tokens(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl ProviderResponse for ProviderResponseType {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue