fixed changes related to max_tokens and processing http error codes like 400 properly (#574)

Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-257.local>
This commit is contained in:
Salman Paracha 2025-09-25 17:00:37 -07:00 committed by GitHub
parent 7ce8d44d8e
commit 03c2cf6f0d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 157 additions and 23 deletions

View file

@ -203,9 +203,10 @@ pub async fn chat(
}
};
// copy over the headers from the original response
// copy over the headers and status code from the original response
let response_headers = llm_response.headers().clone();
let mut response = Response::builder();
let upstream_status = llm_response.status();
let mut response = Response::builder().status(upstream_status);
let headers = response.headers_mut().unwrap();
for (header_name, header_value) in response_headers.iter() {
headers.insert(header_name, header_value.clone());