mirror of
https://github.com/katanemo/plano.git
synced 2026-04-30 03:16:28 +02:00
add support for v1/messages and transformations (#558)
* pushing draft PR * transformations are working. Now need to add some tests next * updated tests and added necessary response transformations for Anthropics' message response object * fixed bugs for integration tests * fixed doc tests * fixed serialization issues with enums on response * adding some debug logs to help * fixed issues with non-streaming responses * updated the stream_context to update response bytes * the serialized bytes length must be set in the response side * fixed the debug statement that was causing the integration tests for wasm to fail * fixing json parsing errors * intentionally removing the headers * making sure that we convert the raw bytes to the correct provider type upstream * fixing non-streaming responses to tranform correctly * /v1/messages works with transformations to and from /v1/chat/completions * updating the CLI and demos to support anthropic vs. claude * adding the anthropic key to the preference based routing tests * fixed test cases and added more structured logs * fixed integration tests and cleaned up logs * added python client tests for anthropic and openai * cleaned up logs and fixed issue with connectivity for llm gateway in weather forecast demo * fixing the tests. python dependency order was broken * updated the openAI client to fix demos * removed the raw response debug statement * fixed the dup cloning issue and cleaned up the ProviderRequestType enum and traits * fixing logs * moved away from string literals to consts * fixed streaming from Anthropic Client to OpenAI * removed debug statement that would likely trip up integration tests * fixed integration tests for llm_gateway * cleaned up test cases and removed unnecessary crates * fixing comments from PR * fixed bug whereby we were sending an OpenAIChatCompletions request object to llm_gateway even though the request may have been AnthropicMessages --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-4.local> Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-9.local> Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-10.local> Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-41.local> Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-136.local>
This commit is contained in:
parent
bb71d041a0
commit
fb0581fd39
38 changed files with 2842 additions and 919 deletions
|
|
@ -149,8 +149,8 @@ pub struct EmbeddingProviver {
|
|||
pub enum LlmProviderType {
|
||||
#[serde(rename = "arch")]
|
||||
Arch,
|
||||
#[serde(rename = "claude")]
|
||||
Claude,
|
||||
#[serde(rename = "anthropic")]
|
||||
Anthropic,
|
||||
#[serde(rename = "deepseek")]
|
||||
Deepseek,
|
||||
#[serde(rename = "groq")]
|
||||
|
|
@ -167,7 +167,7 @@ impl Display for LlmProviderType {
|
|||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
LlmProviderType::Arch => write!(f, "arch"),
|
||||
LlmProviderType::Claude => write!(f, "claude"),
|
||||
LlmProviderType::Anthropic => write!(f, "anthropic"),
|
||||
LlmProviderType::Deepseek => write!(f, "deepseek"),
|
||||
LlmProviderType::Groq => write!(f, "groq"),
|
||||
LlmProviderType::Gemini => write!(f, "gemini"),
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
|
|||
pub const MESSAGES_KEY: &str = "messages";
|
||||
pub const ARCH_PROVIDER_HINT_HEADER: &str = "x-arch-llm-provider-hint";
|
||||
pub const CHAT_COMPLETIONS_PATH: &str = "/v1/chat/completions";
|
||||
pub const MESSAGES_PATH: &str = "/v1/messages";
|
||||
pub const HEALTHZ_PATH: &str = "/healthz";
|
||||
pub const X_ARCH_STATE_HEADER: &str = "x-arch-state";
|
||||
pub const X_ARCH_API_RESPONSE: &str = "x-arch-api-response-message";
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ pub fn get_llm_provider(
|
|||
return provider;
|
||||
}
|
||||
|
||||
|
||||
if llm_providers.default().is_some() {
|
||||
return llm_providers.default().unwrap();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ use log::debug;
|
|||
|
||||
#[allow(dead_code)]
|
||||
pub fn token_count(model_name: &str, text: &str) -> Result<usize, String> {
|
||||
debug!("getting token count model={}", model_name);
|
||||
debug!("TOKENIZER: computing token count for model={}", model_name);
|
||||
//HACK: add support for tokenizing mistral and other models
|
||||
//filed issue https://github.com/katanemo/arch/issues/222
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue