mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
/v1/messages works with transformations to and from /v1/chat/completions
This commit is contained in:
parent
2813a8cfa5
commit
ecf453ed70
10 changed files with 495 additions and 254 deletions
|
|
@ -149,8 +149,8 @@ pub struct EmbeddingProviver {
|
|||
pub enum LlmProviderType {
|
||||
#[serde(rename = "arch")]
|
||||
Arch,
|
||||
#[serde(rename = "claude")]
|
||||
Claude,
|
||||
#[serde(rename = "anthropic")]
|
||||
Anthropic,
|
||||
#[serde(rename = "deepseek")]
|
||||
Deepseek,
|
||||
#[serde(rename = "groq")]
|
||||
|
|
@ -167,7 +167,7 @@ impl Display for LlmProviderType {
|
|||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
LlmProviderType::Arch => write!(f, "arch"),
|
||||
LlmProviderType::Claude => write!(f, "claude"),
|
||||
LlmProviderType::Anthropic => write!(f, "anthropic"),
|
||||
LlmProviderType::Deepseek => write!(f, "deepseek"),
|
||||
LlmProviderType::Groq => write!(f, "groq"),
|
||||
LlmProviderType::Gemini => write!(f, "gemini"),
|
||||
|
|
|
|||
|
|
@ -3,11 +3,10 @@ use serde::{Deserialize, Serialize};
|
|||
use serde_json::Value;
|
||||
use serde_with::skip_serializing_none;
|
||||
use std::collections::HashMap;
|
||||
use std::error::Error;
|
||||
|
||||
use super::ApiDefinition;
|
||||
use crate::providers::request::{ProviderRequest, ProviderRequestError};
|
||||
use crate::providers::response::{ProviderStreamResponse, SseStreamIter};
|
||||
use crate::providers::response::ProviderStreamResponse;
|
||||
use crate::clients::transformer::ExtractText;
|
||||
|
||||
// Enum for all supported Anthropic APIs
|
||||
|
|
@ -520,55 +519,6 @@ impl MessagesRole {
|
|||
}
|
||||
}
|
||||
|
||||
// Anthropic SSE streaming iterator for MessagesStreamEvent
|
||||
pub struct AnthropicSseIter<I>
|
||||
where
|
||||
I: Iterator,
|
||||
I::Item: AsRef<str>,
|
||||
{
|
||||
sse_stream: SseStreamIter<I>,
|
||||
}
|
||||
|
||||
impl<I> AnthropicSseIter<I>
|
||||
where
|
||||
I: Iterator,
|
||||
I::Item: AsRef<str>,
|
||||
{
|
||||
pub fn new(sse_stream: SseStreamIter<I>) -> Self {
|
||||
Self { sse_stream }
|
||||
}
|
||||
}
|
||||
|
||||
impl<I> Iterator for AnthropicSseIter<I>
|
||||
where
|
||||
I: Iterator,
|
||||
I::Item: AsRef<str>,
|
||||
{
|
||||
type Item = Result<Box<dyn ProviderStreamResponse>, Box<dyn Error + Send + Sync>>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
for line in &mut self.sse_stream.lines {
|
||||
let line = line.as_ref();
|
||||
if line.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
if line.starts_with("data: ") {
|
||||
let data = &line[6..];
|
||||
if data == "[DONE]" {
|
||||
return None;
|
||||
}
|
||||
// Anthropic-specific parsing of MessagesStreamEvent
|
||||
match serde_json::from_str::<MessagesStreamEvent>(data) {
|
||||
Ok(event) => return Some(Ok(Box::new(event))),
|
||||
Err(e) => return Some(Err(Box::new(e))),
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
// Implement ProviderStreamResponse for MessagesStreamEvent
|
||||
impl ProviderStreamResponse for MessagesStreamEvent {
|
||||
fn content_delta(&self) -> Option<&str> {
|
||||
|
|
@ -594,6 +544,7 @@ impl ProviderStreamResponse for MessagesStreamEvent {
|
|||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ use std::fmt::Display;
|
|||
use thiserror::Error;
|
||||
|
||||
use crate::providers::request::{ProviderRequest, ProviderRequestError};
|
||||
use crate::providers::response::{ProviderResponse, ProviderStreamResponse, TokenUsage, SseStreamIter};
|
||||
use crate::providers::response::{ProviderResponse, ProviderStreamResponse, TokenUsage};
|
||||
use super::ApiDefinition;
|
||||
use crate::clients::transformer::{ExtractText};
|
||||
|
||||
|
|
@ -615,68 +615,6 @@ impl ProviderResponse for ChatCompletionsResponse {
|
|||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// OPENAI SSE STREAMING ITERATOR
|
||||
// ============================================================================
|
||||
|
||||
/// OpenAI-specific SSE streaming iterator
|
||||
/// Handles OpenAI's specific SSE format and ChatCompletionsStreamResponse parsing
|
||||
pub struct OpenAISseIter<I>
|
||||
where
|
||||
I: Iterator,
|
||||
I::Item: AsRef<str>,
|
||||
{
|
||||
sse_stream: SseStreamIter<I>,
|
||||
}
|
||||
|
||||
impl<I> OpenAISseIter<I>
|
||||
where
|
||||
I: Iterator,
|
||||
I::Item: AsRef<str>,
|
||||
{
|
||||
pub fn new(sse_stream: SseStreamIter<I>) -> Self {
|
||||
Self { sse_stream }
|
||||
}
|
||||
}
|
||||
|
||||
impl<I> Iterator for OpenAISseIter<I>
|
||||
where
|
||||
I: Iterator,
|
||||
I::Item: AsRef<str>,
|
||||
{
|
||||
type Item = Result<Box<dyn ProviderStreamResponse>, Box<dyn std::error::Error + Send + Sync>>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
for line in &mut self.sse_stream.lines {
|
||||
let line = line.as_ref();
|
||||
if line.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
if line.starts_with("data: ") {
|
||||
let data = &line[6..]; // Remove "data: " prefix
|
||||
if data == "[DONE]" {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Skip ping messages (usually from other providers, but handle gracefully)
|
||||
if data == r#"{"type": "ping"}"# {
|
||||
continue;
|
||||
}
|
||||
|
||||
// OpenAI-specific parsing of ChatCompletionsStreamResponse
|
||||
match serde_json::from_str::<ChatCompletionsStreamResponse>(data) {
|
||||
Ok(response) => return Some(Ok(Box::new(response))),
|
||||
Err(e) => return Some(Err(Box::new(
|
||||
OpenAIStreamError::InvalidStreamingData(format!("Error parsing OpenAI streaming data: {}, data: {}", e, data))
|
||||
))),
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
// Direct implementation of ProviderStreamResponse trait on ChatCompletionsStreamResponse
|
||||
impl ProviderStreamResponse for ChatCompletionsStreamResponse {
|
||||
fn content_delta(&self) -> Option<&str> {
|
||||
|
|
@ -702,6 +640,7 @@ impl ProviderStreamResponse for ChatCompletionsStreamResponse {
|
|||
Role::Tool => "tool",
|
||||
}))
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ impl SupportedAPIs {
|
|||
match self {
|
||||
SupportedAPIs::AnthropicMessagesAPI(AnthropicApi::Messages) => {
|
||||
match provider_id {
|
||||
ProviderId::Claude => "/v1/messages".to_string(),
|
||||
ProviderId::Anthropic => "/v1/messages".to_string(),
|
||||
_ => default_endpoint,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ pub mod clients;
|
|||
|
||||
// Re-export important types and traits
|
||||
pub use providers::request::{ProviderRequestType, ProviderRequest, ProviderRequestError};
|
||||
pub use providers::response::{ProviderResponseType, ProviderResponse, ProviderStreamResponse, ProviderStreamResponseIter, ProviderResponseError, TokenUsage};
|
||||
pub use providers::response::{ProviderResponseType, ProviderStreamResponseType, ProviderResponse, ProviderStreamResponse, ProviderResponseError, TokenUsage, SseEvent, SseStreamIter};
|
||||
pub use providers::id::ProviderId;
|
||||
pub use providers::adapters::{has_compatible_api, supported_apis};
|
||||
|
||||
|
|
@ -68,29 +68,38 @@ mod tests {
|
|||
// Test streaming response parsing with sample SSE data
|
||||
let sse_data = r#"data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":"Hello"},"finish_reason":null}]}
|
||||
|
||||
data: [DONE]
|
||||
"#;
|
||||
data: [DONE]
|
||||
"#;
|
||||
|
||||
use crate::clients::endpoints::SupportedAPIs;
|
||||
let api = SupportedAPIs::OpenAIChatCompletions(crate::apis::OpenAIApi::ChatCompletions);
|
||||
let result = ProviderStreamResponseIter::try_from((sse_data.as_bytes(), &api, &ProviderId::OpenAI));
|
||||
assert!(result.is_ok());
|
||||
|
||||
let mut streaming_response = result.unwrap();
|
||||
// Test the new simplified architecture - create SseStreamIter directly
|
||||
let sse_iter = SseStreamIter::try_from(sse_data.as_bytes());
|
||||
assert!(sse_iter.is_ok());
|
||||
|
||||
// Test that we can iterate over chunks - it's just an iterator now!
|
||||
let first_chunk = streaming_response.next();
|
||||
assert!(first_chunk.is_some());
|
||||
let mut streaming_iter = sse_iter.unwrap();
|
||||
|
||||
let chunk_result = first_chunk.unwrap();
|
||||
assert!(chunk_result.is_ok());
|
||||
// Test that we can iterate over SseEvents
|
||||
let first_event = streaming_iter.next();
|
||||
assert!(first_event.is_some());
|
||||
|
||||
let chunk = chunk_result.unwrap();
|
||||
assert_eq!(chunk.content_delta(), Some("Hello"));
|
||||
assert!(!chunk.is_final());
|
||||
let sse_event = first_event.unwrap();
|
||||
|
||||
// Test that stream ends properly
|
||||
let final_chunk = streaming_response.next();
|
||||
assert!(final_chunk.is_none());
|
||||
// Test SseEvent properties
|
||||
assert!(!sse_event.is_done());
|
||||
assert!(sse_event.data.contains("Hello"));
|
||||
|
||||
// Test that we can parse the event into a provider stream response
|
||||
let provider_response = sse_event.to_provider_stream_response(&api);
|
||||
assert!(provider_response.is_ok());
|
||||
|
||||
let stream_response = provider_response.unwrap();
|
||||
assert_eq!(stream_response.content_delta(), Some("Hello"));
|
||||
assert!(!stream_response.is_final());
|
||||
|
||||
// Test that stream ends properly with [DONE] (SseStreamIter should stop before [DONE])
|
||||
let final_event = streaming_iter.next();
|
||||
assert!(final_event.is_none()); // Should be None because iterator stops at [DONE]
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ pub fn get_provider_config(provider_id: &ProviderId) -> ProviderConfig {
|
|||
adapter_type: AdapterType::OpenAICompatible,
|
||||
}
|
||||
}
|
||||
ProviderId::Claude => {
|
||||
ProviderId::Anthropic => {
|
||||
ProviderConfig {
|
||||
supported_apis: &["/v1/messages", "/v1/chat/completions"],
|
||||
adapter_type: AdapterType::AnthropicCompatible,
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ pub enum ProviderId {
|
|||
Deepseek,
|
||||
Groq,
|
||||
Gemini,
|
||||
Claude,
|
||||
Anthropic,
|
||||
GitHub,
|
||||
Arch,
|
||||
}
|
||||
|
|
@ -23,7 +23,7 @@ impl From<&str> for ProviderId {
|
|||
"deepseek" => ProviderId::Deepseek,
|
||||
"groq" => ProviderId::Groq,
|
||||
"gemini" => ProviderId::Gemini,
|
||||
"claude" => ProviderId::Claude,
|
||||
"anthropic" => ProviderId::Anthropic,
|
||||
"github" => ProviderId::GitHub,
|
||||
"arch" => ProviderId::Arch,
|
||||
_ => panic!("Unknown provider: {}", value),
|
||||
|
|
@ -36,9 +36,9 @@ impl ProviderId {
|
|||
pub fn compatible_api_for_client(&self, client_api: &SupportedAPIs) -> SupportedAPIs {
|
||||
match (self, client_api) {
|
||||
// Claude/Anthropic providers natively support Anthropic APIs
|
||||
(ProviderId::Claude, SupportedAPIs::AnthropicMessagesAPI(_)) => client_api.clone(),
|
||||
(ProviderId::Anthropic, SupportedAPIs::AnthropicMessagesAPI(_)) => client_api.clone(),
|
||||
// Claude/Anthropic providers can also support OpenAI chat completions by mapping to Anthropic Messages
|
||||
(ProviderId::Claude, SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions)) => SupportedAPIs::AnthropicMessagesAPI(AnthropicApi::Messages),
|
||||
(ProviderId::Anthropic, SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions)) => SupportedAPIs::AnthropicMessagesAPI(AnthropicApi::Messages),
|
||||
|
||||
// OpenAI-compatible providers only support OpenAI chat completions
|
||||
(ProviderId::OpenAI | ProviderId::Groq | ProviderId::Mistral | ProviderId::Deepseek | ProviderId::Arch | ProviderId::Gemini | ProviderId::GitHub, SupportedAPIs::AnthropicMessagesAPI(_)) => SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions),
|
||||
|
|
@ -55,7 +55,7 @@ impl Display for ProviderId {
|
|||
ProviderId::Deepseek => write!(f, "Deepseek"),
|
||||
ProviderId::Groq => write!(f, "Groq"),
|
||||
ProviderId::Gemini => write!(f, "Gemini"),
|
||||
ProviderId::Claude => write!(f, "Claude"),
|
||||
ProviderId::Anthropic => write!(f, "Anthropic"),
|
||||
ProviderId::GitHub => write!(f, "GitHub"),
|
||||
ProviderId::Arch => write!(f, "Arch"),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,5 +10,5 @@ pub mod adapters;
|
|||
|
||||
pub use id::ProviderId;
|
||||
pub use request::{ProviderRequestType, ProviderRequest, ProviderRequestError} ;
|
||||
pub use response::{ProviderResponseType, ProviderStreamResponseIter, ProviderResponse, ProviderStreamResponse, TokenUsage };
|
||||
pub use response::{ProviderResponseType, ProviderResponse, ProviderStreamResponse, TokenUsage };
|
||||
pub use adapters::*;
|
||||
|
|
|
|||
|
|
@ -1,29 +1,152 @@
|
|||
use crate::providers::id::ProviderId;
|
||||
use serde::Serialize;
|
||||
use serde::{Serialize, Deserialize};
|
||||
use std::error::Error;
|
||||
use std::fmt;
|
||||
use std::convert::TryFrom;
|
||||
use std::str::FromStr;
|
||||
|
||||
use crate::apis::openai::ChatCompletionsResponse;
|
||||
use crate::apis::OpenAISseIter;
|
||||
use crate::clients::endpoints::SupportedAPIs;
|
||||
use crate::apis::anthropic::AnthropicSseIter;
|
||||
use crate::apis::anthropic::MessagesResponse;
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[derive(Serialize, Debug, Clone)]
|
||||
#[serde(untagged)]
|
||||
pub enum ProviderResponseType {
|
||||
ChatCompletionsResponse(ChatCompletionsResponse),
|
||||
MessagesResponse(MessagesResponse),
|
||||
}
|
||||
|
||||
|
||||
|
||||
pub enum ProviderStreamResponseIter {
|
||||
ChatCompletionsStream(OpenAISseIter<std::vec::IntoIter<String>>),
|
||||
MessagesStream(AnthropicSseIter<std::vec::IntoIter<String>>),
|
||||
#[derive(Serialize, Debug, Clone)]
|
||||
#[serde(untagged)]
|
||||
pub enum ProviderStreamResponseType {
|
||||
ChatCompletionsStreamResponse(crate::apis::openai::ChatCompletionsStreamResponse),
|
||||
MessagesStreamEvent(crate::apis::anthropic::MessagesStreamEvent),
|
||||
}
|
||||
|
||||
pub trait ProviderResponse: Send + Sync {
|
||||
/// Get usage information if available - returns dynamic trait object
|
||||
fn usage(&self) -> Option<&dyn TokenUsage>;
|
||||
|
||||
/// Extract token counts for metrics
|
||||
fn extract_usage_counts(&self) -> Option<(usize, usize, usize)> {
|
||||
self.usage().map(|u| (u.prompt_tokens(), u.completion_tokens(), u.total_tokens()))
|
||||
}
|
||||
}
|
||||
|
||||
impl ProviderResponse for ProviderResponseType {
|
||||
fn usage(&self) -> Option<&dyn TokenUsage> {
|
||||
match self {
|
||||
ProviderResponseType::ChatCompletionsResponse(resp) => resp.usage(),
|
||||
ProviderResponseType::MessagesResponse(resp) => resp.usage(),
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_usage_counts(&self) -> Option<(usize, usize, usize)> {
|
||||
match self {
|
||||
ProviderResponseType::ChatCompletionsResponse(resp) => resp.extract_usage_counts(),
|
||||
ProviderResponseType::MessagesResponse(resp) => resp.extract_usage_counts(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait ProviderStreamResponse: Send + Sync {
|
||||
/// Get the content delta for this chunk
|
||||
fn content_delta(&self) -> Option<&str>;
|
||||
|
||||
/// Check if this is the final chunk in the stream
|
||||
fn is_final(&self) -> bool;
|
||||
|
||||
/// Get role information if available
|
||||
fn role(&self) -> Option<&str>;
|
||||
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// SSE EVENT CONTAINER
|
||||
// ============================================================================
|
||||
|
||||
/// Represents a single Server-Sent Event with the complete wire format
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SseEvent {
|
||||
#[serde(rename = "data")]
|
||||
pub data: String, // The JSON payload after "data: "
|
||||
|
||||
#[serde(skip_serializing, skip_deserializing)]
|
||||
pub raw_line: String, // The complete line as received including "data: " prefix and "\n\n"
|
||||
|
||||
#[serde(skip_serializing, skip_deserializing)]
|
||||
pub provider_stream_response: Option<ProviderStreamResponseType>, // Parsed provider stream response object
|
||||
}
|
||||
|
||||
impl SseEvent {
|
||||
/// Check if this event represents the end of the stream
|
||||
pub fn is_done(&self) -> bool {
|
||||
self.data == "[DONE]"
|
||||
}
|
||||
|
||||
/// Check if this event should be skipped during processing
|
||||
/// This includes ping messages and other provider-specific events that don't contain content
|
||||
pub fn should_skip(&self) -> bool {
|
||||
// Skip ping messages (commonly used by providers for connection keep-alive)
|
||||
self.data == r#"{"type": "ping"}"#
|
||||
}
|
||||
|
||||
/// Get the parsed provider response if available
|
||||
pub fn provider_response(&self) -> Option<&ProviderStreamResponseType> {
|
||||
self.provider_stream_response.as_ref()
|
||||
}
|
||||
|
||||
/// Parse the data field into a ProviderStreamResponse for the given API
|
||||
pub fn to_provider_stream_response(&self, client_api: &SupportedAPIs) -> Result<Box<dyn ProviderStreamResponse>, Box<dyn std::error::Error + Send + Sync>> {
|
||||
if self.is_done() {
|
||||
return Err("Cannot parse [DONE] event as ProviderStreamResponse".into());
|
||||
}
|
||||
|
||||
match client_api {
|
||||
SupportedAPIs::OpenAIChatCompletions(_) => {
|
||||
let response: crate::apis::openai::ChatCompletionsStreamResponse =
|
||||
serde_json::from_str(&self.data)?;
|
||||
Ok(Box::new(response))
|
||||
}
|
||||
SupportedAPIs::AnthropicMessagesAPI(_) => {
|
||||
let response: crate::apis::anthropic::MessagesStreamEvent =
|
||||
serde_json::from_str(&self.data)?;
|
||||
Ok(Box::new(response))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for SseEvent {
|
||||
type Err = SseParseError;
|
||||
|
||||
fn from_str(line: &str) -> Result<Self, Self::Err> {
|
||||
if line.starts_with("data: ") {
|
||||
let data = line[6..].to_string(); // Remove "data: " prefix
|
||||
if data.is_empty() {
|
||||
return Err(SseParseError {
|
||||
message: "Empty data field is not a valid SSE event".to_string(),
|
||||
});
|
||||
}
|
||||
// [DONE] marker is a valid SSE event that indicates end of stream
|
||||
Ok(SseEvent {
|
||||
data,
|
||||
raw_line: format!("{}\n\n", line), // Store complete SSE format
|
||||
provider_stream_response: None, // Will be populated later via TryFrom
|
||||
})
|
||||
} else {
|
||||
Err(SseParseError {
|
||||
message: format!("Line does not start with 'data: ': {}", line),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for SseEvent {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{}", self.raw_line)
|
||||
}
|
||||
}
|
||||
|
||||
// --- Response transformation logic for client API compatibility ---
|
||||
impl TryFrom<(&[u8], &SupportedAPIs, &ProviderId)> for ProviderResponseType {
|
||||
|
|
@ -73,83 +196,141 @@ impl TryFrom<(&[u8], &SupportedAPIs, &ProviderId)> for ProviderResponseType {
|
|||
}
|
||||
}
|
||||
|
||||
impl TryFrom<(&[u8], &SupportedAPIs, &ProviderId)> for ProviderStreamResponseIter {
|
||||
// Stream response transformation logic for client API compatibility
|
||||
impl TryFrom<(&[u8], &SupportedAPIs, &ProviderId)> for ProviderStreamResponseType {
|
||||
type Error = Box<dyn std::error::Error + Send + Sync>;
|
||||
|
||||
fn try_from((bytes, client_api, provider_id): (&[u8], &SupportedAPIs, &ProviderId)) -> Result<Self, Self::Error> {
|
||||
let upstream_api = provider_id.compatible_api_for_client(client_api);
|
||||
|
||||
// Step 1: Parse bytes using upstream API format (what the provider actually sent)
|
||||
// Step 2: Return response type that matches client API format (what client expects)
|
||||
match (&upstream_api, client_api) {
|
||||
// Upstream sent OpenAI format, client expects OpenAI format - direct pass-through
|
||||
(SupportedAPIs::OpenAIChatCompletions(_), SupportedAPIs::OpenAIChatCompletions(_)) => {
|
||||
let s = std::str::from_utf8(bytes)?;
|
||||
let lines: Vec<String> = s.lines().map(|line| line.to_string()).collect();
|
||||
let sse_container = crate::providers::response::SseStreamIter::new(lines.into_iter());
|
||||
let iter = crate::apis::openai::OpenAISseIter::new(sse_container);
|
||||
Ok(ProviderStreamResponseIter::ChatCompletionsStream(iter))
|
||||
let resp: crate::apis::openai::ChatCompletionsStreamResponse = serde_json::from_slice(bytes)?;
|
||||
Ok(ProviderStreamResponseType::ChatCompletionsStreamResponse(resp))
|
||||
}
|
||||
// Upstream sent Anthropic format, client expects Anthropic format - direct pass-through
|
||||
(SupportedAPIs::AnthropicMessagesAPI(_), SupportedAPIs::AnthropicMessagesAPI(_)) => {
|
||||
let s = std::str::from_utf8(bytes)?;
|
||||
let lines: Vec<String> = s.lines().map(|line| line.to_string()).collect();
|
||||
let sse_container = crate::providers::response::SseStreamIter::new(lines.into_iter());
|
||||
let iter = crate::apis::anthropic::AnthropicSseIter::new(sse_container);
|
||||
Ok(ProviderStreamResponseIter::MessagesStream(iter))
|
||||
let resp: crate::apis::anthropic::MessagesStreamEvent = serde_json::from_slice(bytes)?;
|
||||
Ok(ProviderStreamResponseType::MessagesStreamEvent(resp))
|
||||
}
|
||||
// Upstream sent Anthropic format, client expects OpenAI format - need transformation
|
||||
(SupportedAPIs::AnthropicMessagesAPI(_), SupportedAPIs::OpenAIChatCompletions(_)) => {
|
||||
// Parse as Anthropic Messages stream event first
|
||||
let anthropic_resp: crate::apis::anthropic::MessagesStreamEvent = serde_json::from_slice(bytes)?;
|
||||
|
||||
// Transform to OpenAI ChatCompletions stream format using the transformer
|
||||
let chat_resp: crate::apis::openai::ChatCompletionsStreamResponse = anthropic_resp.try_into()?;
|
||||
Ok(ProviderStreamResponseType::ChatCompletionsStreamResponse(chat_resp))
|
||||
}
|
||||
// Upstream sent OpenAI format, client expects Anthropic format - need transformation
|
||||
(SupportedAPIs::OpenAIChatCompletions(_), SupportedAPIs::AnthropicMessagesAPI(_)) => {
|
||||
let s = std::str::from_utf8(bytes)?;
|
||||
let lines: Vec<String> = s.lines().map(|line| line.to_string()).collect();
|
||||
let sse_container = crate::providers::response::SseStreamIter::new(lines.into_iter());
|
||||
let iter = crate::apis::anthropic::AnthropicSseIter::new(sse_container);
|
||||
Ok(ProviderStreamResponseIter::MessagesStream(iter))
|
||||
// Parse as OpenAI ChatCompletions stream response first
|
||||
let openai_resp: crate::apis::openai::ChatCompletionsStreamResponse = serde_json::from_slice(bytes)?;
|
||||
|
||||
// Transform to Anthropic Messages stream format using the transformer
|
||||
let messages_resp: crate::apis::anthropic::MessagesStreamEvent = openai_resp.try_into()?;
|
||||
Ok(ProviderStreamResponseType::MessagesStreamEvent(messages_resp))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TryFrom implementation to convert raw bytes to SseEvent with parsed provider response
|
||||
impl TryFrom<(&[u8], &SupportedAPIs, &ProviderId)> for SseEvent {
|
||||
type Error = Box<dyn std::error::Error + Send + Sync>;
|
||||
|
||||
fn try_from((bytes, client_api, provider_id): (&[u8], &SupportedAPIs, &ProviderId)) -> Result<Self, Self::Error> {
|
||||
// Convert bytes to string
|
||||
let body_str = std::str::from_utf8(bytes)?;
|
||||
let mut sse_event: SseEvent = body_str.parse()?;
|
||||
|
||||
// If not [DONE], parse the data as a provider stream response (business logic layer)
|
||||
if !sse_event.is_done() {
|
||||
// Use the new ProviderStreamResponseType::try_from to parse the JSON data
|
||||
let provider_response = ProviderStreamResponseType::try_from((sse_event.data.as_bytes(), client_api, provider_id))?;
|
||||
sse_event.provider_stream_response = Some(provider_response);
|
||||
}
|
||||
|
||||
Ok(sse_event)
|
||||
}
|
||||
}
|
||||
|
||||
// TryFrom implementation for transforming SseEvent between API formats
|
||||
impl TryFrom<(SseEvent, &SupportedAPIs, &SupportedAPIs)> for SseEvent {
|
||||
type Error = Box<dyn std::error::Error + Send + Sync>;
|
||||
|
||||
fn try_from((mut event, upstream_api, client_api): (SseEvent, &SupportedAPIs, &SupportedAPIs)) -> Result<Self, Self::Error> {
|
||||
// If APIs are the same, no transformation needed
|
||||
if std::mem::discriminant(upstream_api) == std::mem::discriminant(client_api) {
|
||||
return Ok(event);
|
||||
}
|
||||
|
||||
// Handle [DONE] events - they don't need transformation
|
||||
if event.is_done() {
|
||||
return Ok(event);
|
||||
}
|
||||
|
||||
// Transform the data field based on API conversion
|
||||
let transformed_data = match (upstream_api, client_api) {
|
||||
(SupportedAPIs::OpenAIChatCompletions(_), SupportedAPIs::AnthropicMessagesAPI(_)) => {
|
||||
// Parse OpenAI response and convert to Anthropic
|
||||
let openai_response: crate::apis::openai::ChatCompletionsStreamResponse =
|
||||
serde_json::from_str(&event.data)?;
|
||||
let anthropic_response: crate::apis::anthropic::MessagesStreamEvent =
|
||||
openai_response.try_into()?;
|
||||
serde_json::to_string(&anthropic_response)?
|
||||
}
|
||||
(SupportedAPIs::AnthropicMessagesAPI(_), SupportedAPIs::OpenAIChatCompletions(_)) => {
|
||||
let s = std::str::from_utf8(bytes)?;
|
||||
let lines: Vec<String> = s.lines().map(|line| line.to_string()).collect();
|
||||
let sse_container = crate::providers::response::SseStreamIter::new(lines.into_iter());
|
||||
let iter = crate::apis::openai::OpenAISseIter::new(sse_container);
|
||||
Ok(ProviderStreamResponseIter::ChatCompletionsStream(iter))
|
||||
// Parse Anthropic response and convert to OpenAI
|
||||
let anthropic_response: crate::apis::anthropic::MessagesStreamEvent =
|
||||
serde_json::from_str(&event.data)?;
|
||||
let openai_response: crate::apis::openai::ChatCompletionsStreamResponse =
|
||||
anthropic_response.try_into()?;
|
||||
serde_json::to_string(&openai_response)?
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return Err(format!("Unsupported API transformation: {:?} -> {:?}", upstream_api, client_api).into());
|
||||
}
|
||||
};
|
||||
|
||||
// Update the event with transformed data and reconstruct raw_line
|
||||
event.data = transformed_data;
|
||||
event.raw_line = format!("data: {}", event.data);
|
||||
|
||||
Ok(event)
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for ProviderStreamResponseIter {
|
||||
type Item = Result<Box<dyn ProviderStreamResponse>, Box<dyn std::error::Error + Send + Sync>>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self {
|
||||
ProviderStreamResponseIter::ChatCompletionsStream(iter) => iter.next(),
|
||||
ProviderStreamResponseIter::MessagesStream(iter) => iter.next(),
|
||||
}
|
||||
}
|
||||
}
|
||||
pub trait ProviderResponse: Send + Sync {
|
||||
/// Get usage information if available - returns dynamic trait object
|
||||
fn usage(&self) -> Option<&dyn TokenUsage>;
|
||||
|
||||
/// Extract token counts for metrics
|
||||
fn extract_usage_counts(&self) -> Option<(usize, usize, usize)> {
|
||||
self.usage().map(|u| (u.prompt_tokens(), u.completion_tokens(), u.total_tokens()))
|
||||
// Into implementation to convert SseEvent to bytes for response buffer
|
||||
impl Into<Vec<u8>> for SseEvent {
|
||||
fn into(self) -> Vec<u8> {
|
||||
format!("{}\n\n", self.raw_line).into_bytes()
|
||||
}
|
||||
}
|
||||
|
||||
pub trait ProviderStreamResponse: Send + Sync {
|
||||
/// Get the content delta for this chunk
|
||||
fn content_delta(&self) -> Option<&str>;
|
||||
|
||||
/// Check if this is the final chunk in the stream
|
||||
fn is_final(&self) -> bool;
|
||||
|
||||
/// Get role information if available
|
||||
fn role(&self) -> Option<&str>;
|
||||
#[derive(Debug)]
|
||||
pub struct SseParseError {
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
impl fmt::Display for SseParseError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "SSE parse error: {}", self.message)
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for SseParseError {}
|
||||
|
||||
// ============================================================================
|
||||
// GENERIC SSE STREAMING ITERATOR (Container Only)
|
||||
// ============================================================================
|
||||
|
||||
/// Generic SSE (Server-Sent Events) streaming iterator container
|
||||
/// This is just a simple wrapper - actual Iterator implementation is delegated to provider-specific modules
|
||||
/// Parses raw SSE lines into SseEvent objects
|
||||
pub struct SseStreamIter<I>
|
||||
where
|
||||
I: Iterator,
|
||||
|
|
@ -168,26 +349,41 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
// TryFrom implementation to parse bytes into SseStreamIter
|
||||
impl TryFrom<&[u8]> for SseStreamIter<std::vec::IntoIter<String>> {
|
||||
type Error = Box<dyn std::error::Error + Send + Sync>;
|
||||
|
||||
impl ProviderResponse for ProviderResponseType {
|
||||
fn usage(&self) -> Option<&dyn TokenUsage> {
|
||||
match self {
|
||||
ProviderResponseType::ChatCompletionsResponse(resp) => resp.usage(),
|
||||
ProviderResponseType::MessagesResponse(resp) => resp.usage(),
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_usage_counts(&self) -> Option<(usize, usize, usize)> {
|
||||
match self {
|
||||
ProviderResponseType::ChatCompletionsResponse(resp) => resp.extract_usage_counts(),
|
||||
ProviderResponseType::MessagesResponse(resp) => resp.extract_usage_counts(),
|
||||
}
|
||||
fn try_from(bytes: &[u8]) -> Result<Self, Self::Error> {
|
||||
let s = std::str::from_utf8(bytes)?;
|
||||
let lines: Vec<String> = s.lines().map(|line| line.to_string()).collect();
|
||||
Ok(SseStreamIter::new(lines.into_iter()))
|
||||
}
|
||||
}
|
||||
|
||||
// Implement Send + Sync for the enum to match the original trait requirements
|
||||
unsafe impl Send for ProviderStreamResponseIter {}
|
||||
unsafe impl Sync for ProviderStreamResponseIter {}
|
||||
impl<I> Iterator for SseStreamIter<I>
|
||||
where
|
||||
I: Iterator,
|
||||
I::Item: AsRef<str>,
|
||||
{
|
||||
type Item = SseEvent;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
for line in &mut self.lines {
|
||||
if let Ok(event) = line.as_ref().parse::<SseEvent>() {
|
||||
// Check if this is the [DONE] marker - if so, end the stream
|
||||
if event.is_done() {
|
||||
return None;
|
||||
}
|
||||
// Skip events that should be filtered at the transport layer
|
||||
if event.should_skip() {
|
||||
continue;
|
||||
}
|
||||
return Some(event);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Trait for token usage information
|
||||
pub trait TokenUsage {
|
||||
|
|
@ -268,7 +464,7 @@ mod tests {
|
|||
"usage": { "input_tokens": 10, "output_tokens": 25, "cache_creation_input_tokens": 5, "cache_read_input_tokens": 3 }
|
||||
});
|
||||
let bytes = serde_json::to_vec(&resp).unwrap();
|
||||
let result = ProviderResponseType::try_from((bytes.as_slice(), &SupportedAPIs::AnthropicMessagesAPI(AnthropicApi::Messages), &ProviderId::Claude));
|
||||
let result = ProviderResponseType::try_from((bytes.as_slice(), &SupportedAPIs::AnthropicMessagesAPI(AnthropicApi::Messages), &ProviderId::Anthropic));
|
||||
assert!(result.is_ok());
|
||||
match result.unwrap() {
|
||||
ProviderResponseType::MessagesResponse(r) => {
|
||||
|
|
@ -326,7 +522,7 @@ mod tests {
|
|||
"usage": { "input_tokens": 10, "output_tokens": 25, "cache_creation_input_tokens": 5, "cache_read_input_tokens": 3 }
|
||||
});
|
||||
let bytes = serde_json::to_vec(&resp).unwrap();
|
||||
let result = ProviderResponseType::try_from((bytes.as_slice(), &SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions), &ProviderId::Claude));
|
||||
let result = ProviderResponseType::try_from((bytes.as_slice(), &SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions), &ProviderId::Anthropic));
|
||||
assert!(result.is_ok());
|
||||
match result.unwrap() {
|
||||
ProviderResponseType::ChatCompletionsResponse(r) => {
|
||||
|
|
@ -337,4 +533,122 @@ mod tests {
|
|||
_ => panic!("Expected ChatCompletionsResponse variant"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sse_event_parsing() {
|
||||
// Test valid SSE data line
|
||||
let line = r#"data: {"id":"test","object":"chat.completion.chunk"}"#;
|
||||
let event: Result<SseEvent, _> = line.parse();
|
||||
assert!(event.is_ok());
|
||||
let event = event.unwrap();
|
||||
assert_eq!(event.data, r#"{"id":"test","object":"chat.completion.chunk"}"#);
|
||||
|
||||
// Test conversion back to line using Display trait
|
||||
let wire_format = event.to_string();
|
||||
assert_eq!(wire_format, "data: {\"id\":\"test\",\"object\":\"chat.completion.chunk\"}\n\n");
|
||||
|
||||
// Test [DONE] marker - should be valid SSE event
|
||||
let done_line = "data: [DONE]";
|
||||
let done_result: Result<SseEvent, _> = done_line.parse();
|
||||
assert!(done_result.is_ok());
|
||||
let done_event = done_result.unwrap();
|
||||
assert_eq!(done_event.data, "[DONE]");
|
||||
assert!(done_event.is_done()); // Test the helper method
|
||||
|
||||
// Test non-DONE event
|
||||
assert!(!event.is_done());
|
||||
|
||||
// Test empty data - should return error
|
||||
let empty_line = "data: ";
|
||||
let empty_result: Result<SseEvent, _> = empty_line.parse();
|
||||
assert!(empty_result.is_err());
|
||||
|
||||
// Test non-data line - should return error
|
||||
let comment_line = ": this is a comment";
|
||||
let comment_result: Result<SseEvent, _> = comment_line.parse();
|
||||
assert!(comment_result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sse_event_serde() {
|
||||
// Test serialization and deserialization with serde
|
||||
let event = SseEvent {
|
||||
data: r#"{"id":"test","object":"chat.completion.chunk"}"#.to_string(),
|
||||
raw_line: r#"data: {"id":"test","object":"chat.completion.chunk"}
|
||||
|
||||
"#.to_string(),
|
||||
provider_stream_response: None,
|
||||
};
|
||||
|
||||
// Test JSON serialization - raw_line should be skipped
|
||||
let json = serde_json::to_string(&event).unwrap();
|
||||
assert!(json.contains("test"));
|
||||
assert!(json.contains("chat.completion.chunk"));
|
||||
assert!(!json.contains("raw_line")); // Should be excluded from serialization
|
||||
|
||||
// Test JSON deserialization
|
||||
let deserialized: SseEvent = serde_json::from_str(&json).unwrap();
|
||||
assert_eq!(deserialized.data, event.data);
|
||||
assert_eq!(deserialized.raw_line, ""); // Should be empty since it's skipped
|
||||
|
||||
// Test round trip for data field only
|
||||
assert_eq!(event.data, deserialized.data);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sse_event_should_skip() {
|
||||
// Test ping message should be skipped
|
||||
let ping_event = SseEvent {
|
||||
data: r#"{"type": "ping"}"#.to_string(),
|
||||
raw_line: r#"data: {"type": "ping"}"#.to_string(),
|
||||
provider_stream_response: None,
|
||||
};
|
||||
assert!(ping_event.should_skip());
|
||||
assert!(!ping_event.is_done());
|
||||
|
||||
// Test normal event should not be skipped
|
||||
let normal_event = SseEvent {
|
||||
data: r#"{"id": "test", "object": "chat.completion.chunk"}"#.to_string(),
|
||||
raw_line: r#"data: {"id": "test", "object": "chat.completion.chunk"}"#.to_string(),
|
||||
provider_stream_response: None,
|
||||
};
|
||||
assert!(!normal_event.should_skip());
|
||||
assert!(!normal_event.is_done());
|
||||
|
||||
// Test [DONE] event should not be skipped (but is handled separately)
|
||||
let done_event = SseEvent {
|
||||
data: "[DONE]".to_string(),
|
||||
raw_line: "data: [DONE]".to_string(),
|
||||
provider_stream_response: None,
|
||||
};
|
||||
assert!(!done_event.should_skip());
|
||||
assert!(done_event.is_done());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sse_stream_iter_filters_ping_messages() {
|
||||
// Create test data with ping messages mixed in
|
||||
let test_lines = vec![
|
||||
"data: {\"id\": \"msg1\", \"object\": \"chat.completion.chunk\"}".to_string(),
|
||||
"data: {\"type\": \"ping\"}".to_string(), // This should be filtered out
|
||||
"data: {\"id\": \"msg2\", \"object\": \"chat.completion.chunk\"}".to_string(),
|
||||
"data: {\"type\": \"ping\"}".to_string(), // This should be filtered out
|
||||
"data: [DONE]".to_string(), // This should end the stream
|
||||
];
|
||||
|
||||
let mut iter = SseStreamIter::new(test_lines.into_iter());
|
||||
|
||||
// First event should be msg1 (ping filtered out)
|
||||
let event1 = iter.next().unwrap();
|
||||
assert!(event1.data.contains("msg1"));
|
||||
assert!(!event1.should_skip());
|
||||
|
||||
// Second event should be msg2 (ping filtered out)
|
||||
let event2 = iter.next().unwrap();
|
||||
assert!(event2.data.contains("msg2"));
|
||||
assert!(!event2.should_skip());
|
||||
|
||||
// Iterator should end at [DONE] (no more events)
|
||||
assert!(iter.next().is_none());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ use common::stats::{IncrementingMetric, RecordingMetric};
|
|||
use common::tracing::{Event, Span, TraceData, Traceparent};
|
||||
use common::{ratelimit, routing, tokenizer};
|
||||
use hermesllm::clients::endpoints::SupportedAPIs;
|
||||
use hermesllm::providers::response::{ProviderResponse, ProviderStreamResponseIter};
|
||||
use hermesllm::providers::response::{ProviderResponse, SseEvent, SseStreamIter};
|
||||
use hermesllm::{ProviderId, ProviderRequest, ProviderRequestType, ProviderResponseType};
|
||||
|
||||
pub struct StreamContext {
|
||||
|
|
@ -129,9 +129,19 @@ impl StreamContext {
|
|||
),
|
||||
})?;
|
||||
|
||||
let authorization_header_value = format!("Bearer {}", llm_provider_api_key_value);
|
||||
|
||||
self.set_http_request_header("Authorization", Some(&authorization_header_value));
|
||||
// Set API-specific headers based on the resolved upstream API
|
||||
match self.resolved_api.as_ref() {
|
||||
Some(SupportedAPIs::AnthropicMessagesAPI(_)) => {
|
||||
// Anthropic API requires x-api-key and anthropic-version headers
|
||||
self.set_http_request_header("x-api-key", Some(llm_provider_api_key_value));
|
||||
self.set_http_request_header("anthropic-version", Some("2023-06-01"));
|
||||
}
|
||||
Some(SupportedAPIs::OpenAIChatCompletions(_)) | None => {
|
||||
// OpenAI and default: use Authorization Bearer token
|
||||
let authorization_header_value = format!("Bearer {}", llm_provider_api_key_value);
|
||||
self.set_http_request_header("Authorization", Some(&authorization_header_value));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -334,7 +344,7 @@ impl StreamContext {
|
|||
fn debug_log_body(&self, body: &[u8]) {
|
||||
if log::log_enabled!(log::Level::Debug) {
|
||||
debug!(
|
||||
"response data (converted to utf8): {}",
|
||||
"raw response data (converted to utf8): {}",
|
||||
String::from_utf8_lossy(body)
|
||||
);
|
||||
}
|
||||
|
|
@ -348,49 +358,67 @@ impl StreamContext {
|
|||
debug!("processing streaming response");
|
||||
match self.client_api.as_ref() {
|
||||
Some(client_api) => {
|
||||
match ProviderStreamResponseIter::try_from((body, client_api, &provider_id)) {
|
||||
Ok(mut streaming_response) => {
|
||||
while let Some(chunk_result) = streaming_response.next() {
|
||||
match chunk_result {
|
||||
Ok(chunk) => {
|
||||
self.record_ttft_if_needed();
|
||||
let client_api = client_api.clone(); // Clone to avoid borrowing issues
|
||||
let upstream_api = provider_id.compatible_api_for_client(&client_api);
|
||||
|
||||
if chunk.is_final() {
|
||||
debug!("Received final streaming chunk");
|
||||
}
|
||||
if let Some(content) = chunk.content_delta() {
|
||||
let estimated_tokens = content.len() / 4;
|
||||
self.response_tokens += estimated_tokens.max(1);
|
||||
}
|
||||
// Parse body into SSE iterator using TryFrom
|
||||
let sse_iter: SseStreamIter<std::vec::IntoIter<String>> =
|
||||
match SseStreamIter::try_from(body) {
|
||||
Ok(iter) => iter,
|
||||
Err(e) => {
|
||||
warn!("Failed to parse body into SSE iterator: {}", e);
|
||||
return Err(Action::Continue);
|
||||
}
|
||||
};
|
||||
|
||||
let mut response_buffer = Vec::new();
|
||||
|
||||
// Process each SSE event
|
||||
for sse_event in sse_iter {
|
||||
// Transform event if upstream API != client API
|
||||
let transformed_event: SseEvent =
|
||||
match SseEvent::try_from((sse_event, &upstream_api, &client_api)) {
|
||||
Ok(event) => event,
|
||||
Err(e) => {
|
||||
warn!("Failed to transform SSE event: {}", e);
|
||||
return Err(Action::Continue);
|
||||
}
|
||||
};
|
||||
|
||||
// Extract ProviderStreamResponse for processing (token counting, etc.)
|
||||
if !transformed_event.is_done() {
|
||||
match transformed_event.to_provider_stream_response(&client_api) {
|
||||
Ok(provider_response) => {
|
||||
self.record_ttft_if_needed();
|
||||
|
||||
if provider_response.is_final() {
|
||||
debug!("Received final streaming chunk");
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Error processing streaming chunk: {}", e);
|
||||
return Err(Action::Continue);
|
||||
|
||||
if let Some(content) = provider_response.content_delta() {
|
||||
let estimated_tokens = content.len() / 4;
|
||||
self.response_tokens += estimated_tokens.max(1);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Error processing streaming chunk: {}", e);
|
||||
return Err(Action::Continue);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Failed to parse streaming response: {}", e);
|
||||
}
|
||||
|
||||
// Add transformed event to response buffer
|
||||
let bytes: Vec<u8> = transformed_event.into();
|
||||
response_buffer.extend_from_slice(&bytes);
|
||||
}
|
||||
|
||||
Ok(response_buffer)
|
||||
}
|
||||
None => {
|
||||
warn!("Missing client_api for non-streaming response");
|
||||
return Err(Action::Continue);
|
||||
Err(Action::Continue)
|
||||
}
|
||||
};
|
||||
// NOTE:
|
||||
// We currently pass-through the original SSE bytes for streaming responses.
|
||||
// Non-streaming responses are parsed into ProviderResponseType and re-serialized to
|
||||
// normalize the payload to the client API. Doing the same for streaming would require
|
||||
// a streaming serializer that emits normalized SSE events for the target client API.
|
||||
// That doesn't exist yet in hermesllm; implementing it is a follow-up.
|
||||
// TODO(salmanap): Add a normalized SSE serializer in hermesllm and use it here so both
|
||||
// streaming and non-streaming paths perform the same compatibility mapping.
|
||||
// Until then, we keep behavior unchanged and forward upstream SSE as-is.
|
||||
// For consistency of the method contract, still return Vec<u8>.
|
||||
Ok(body.to_vec())
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_non_streaming_response(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue