diff --git a/.github/workflows/e2e_archgw.yml b/.github/workflows/e2e_archgw.yml index 9f0f1061..4c3bade3 100644 --- a/.github/workflows/e2e_archgw.yml +++ b/.github/workflows/e2e_archgw.yml @@ -24,7 +24,7 @@ jobs: - name: build arch docker image run: | - cd ../../ && docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.4 -t katanemo/archgw:latest + cd ../../ && docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.7 -t katanemo/archgw:latest - name: start archgw env: diff --git a/.github/workflows/e2e_test_currency_convert.yml b/.github/workflows/e2e_test_currency_convert.yml index 6a80760b..352245f0 100644 --- a/.github/workflows/e2e_test_currency_convert.yml +++ b/.github/workflows/e2e_test_currency_convert.yml @@ -24,7 +24,7 @@ jobs: - name: build arch docker image run: | - docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.4 + docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.7 - name: install poetry run: | diff --git a/.github/workflows/e2e_test_preference_based_routing.yml b/.github/workflows/e2e_test_preference_based_routing.yml index e9564882..db8cb3d5 100644 --- a/.github/workflows/e2e_test_preference_based_routing.yml +++ b/.github/workflows/e2e_test_preference_based_routing.yml @@ -24,7 +24,7 @@ jobs: - name: build arch docker image run: | - docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.4 + docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.7 - name: install poetry run: | diff --git a/.github/workflows/validate_arch_config.yml b/.github/workflows/validate_arch_config.yml index e5fd6acb..901600ab 100644 --- a/.github/workflows/validate_arch_config.yml +++ b/.github/workflows/validate_arch_config.yml @@ -24,7 +24,7 @@ jobs: - name: build arch docker image run: | - docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.4 + docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.7 - name: validate arch config run: | diff --git a/README.md b/README.md index b0180137..e93db8a7 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,7 @@ Arch's CLI allows you to manage and interact with the Arch gateway efficiently. ```console $ python3.12 -m venv venv $ source venv/bin/activate # On Windows, use: venv\Scripts\activate -$ pip install archgw==0.3.4 +$ pip install archgw==0.3.7 ``` ### Build Agentic Apps with Arch Gateway @@ -148,7 +148,7 @@ endpoints: ```sh $ archgw up arch_config.yaml -2024-12-05 16:56:27,979 - cli.main - INFO - Starting archgw cli version: 0.3.4 +2024-12-05 16:56:27,979 - cli.main - INFO - Starting archgw cli version: 0.3.7 2024-12-05 16:56:28,485 - cli.utils - INFO - Schema validation successful! 2024-12-05 16:56:28,485 - cli.main - INFO - Starting arch model server and arch gateway 2024-12-05 16:56:51,647 - cli.core - INFO - Container is healthy! diff --git a/arch/supervisord.conf b/arch/supervisord.conf index dfb4d0d2..bec147cc 100644 --- a/arch/supervisord.conf +++ b/arch/supervisord.conf @@ -2,14 +2,14 @@ nodaemon=true [program:brightstaff] -command=sh -c "RUST_LOG=info /app/brightstaff 2>&1 | tee /var/log/brightstaff.log" +command=sh -c "RUST_LOG=debug /app/brightstaff 2>&1 | tee /var/log/brightstaff.log" stdout_logfile=/dev/stdout redirect_stderr=true stdout_logfile_maxbytes=0 stderr_logfile_maxbytes=0 [program:envoy] -command=/bin/sh -c "python /app/config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:info 2>&1 | tee /var/log//envoy.log" +command=/bin/sh -c "python /app/config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug 2>&1 | tee /var/log//envoy.log" stdout_logfile=/dev/stdout redirect_stderr=true stdout_logfile_maxbytes=0 diff --git a/arch/tools/README.md b/arch/tools/README.md index 7d098186..7a33998e 100644 --- a/arch/tools/README.md +++ b/arch/tools/README.md @@ -19,7 +19,7 @@ source venv/bin/activate ### Step 3: Run the build script ```bash -pip install archgw==0.3.4 +pip install archgw==0.3.7 ``` ## Uninstall Instructions: archgw CLI diff --git a/arch/tools/cli/consts.py b/arch/tools/cli/consts.py index d0307211..213f0134 100644 --- a/arch/tools/cli/consts.py +++ b/arch/tools/cli/consts.py @@ -10,4 +10,4 @@ SERVICE_NAME_MODEL_SERVER = "model_server" SERVICE_ALL = "all" MODEL_SERVER_LOG_FILE = "~/archgw_logs/modelserver.log" ARCHGW_DOCKER_NAME = "archgw" -ARCHGW_DOCKER_IMAGE = os.getenv("ARCHGW_DOCKER_IMAGE", "katanemo/archgw:0.3.4") +ARCHGW_DOCKER_IMAGE = os.getenv("ARCHGW_DOCKER_IMAGE", "katanemo/archgw:0.3.7") diff --git a/arch/tools/poetry.lock b/arch/tools/poetry.lock index d66eae75..a8d5e85f 100644 --- a/arch/tools/poetry.lock +++ b/arch/tools/poetry.lock @@ -2,7 +2,7 @@ [[package]] name = "archgw_modelserver" -version = "0.3.4" +version = "0.3.7" description = "A model server for serving models" optional = false python-versions = "*" @@ -104,13 +104,13 @@ i18n = ["Babel (>=2.7)"] [[package]] name = "jsonschema" -version = "4.24.0" +version = "4.25.0" description = "An implementation of JSON Schema validation for Python" optional = false python-versions = ">=3.9" files = [ - {file = "jsonschema-4.24.0-py3-none-any.whl", hash = "sha256:a462455f19f5faf404a7902952b6f0e3ce868f3ee09a359b05eca6673bd8412d"}, - {file = "jsonschema-4.24.0.tar.gz", hash = "sha256:0b4e8069eb12aedfa881333004bccaec24ecef5a8a6a4b6df142b2cc9599d196"}, + {file = "jsonschema-4.25.0-py3-none-any.whl", hash = "sha256:24c2e8da302de79c8b9382fee3e76b355e44d2a4364bb207159ce10b517bd716"}, + {file = "jsonschema-4.25.0.tar.gz", hash = "sha256:e63acf5c11762c0e6672ffb61482bdf57f0876684d8d249c0fe2d730d48bc55f"}, ] [package.dependencies] @@ -121,7 +121,7 @@ rpds-py = ">=0.7.1" [package.extras] format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"] -format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=24.6.0)"] +format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "rfc3987-syntax (>=1.1.0)", "uri-template", "webcolors (>=24.6.0)"] [[package]] name = "jsonschema-specifications" @@ -576,4 +576,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "d1e256d1bb05fab98731edc11ea15391e9611acf2c3461d00d25c5fa0040de23" +content-hash = "1875c613e62e116d557ad2d30491891557b4114a99c7c65b22b26d690e9e268b" diff --git a/arch/tools/pyproject.toml b/arch/tools/pyproject.toml index 179c36d5..c62b8656 100644 --- a/arch/tools/pyproject.toml +++ b/arch/tools/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "archgw" -version = "0.3.4" +version = "0.3.7" description = "Python-based CLI tool to manage Arch Gateway." authors = ["Katanemo Labs, Inc."] packages = [ @@ -10,7 +10,7 @@ readme = "README.md" [tool.poetry.dependencies] python = "^3.10" -archgw_modelserver = "^0.3.4" +archgw_modelserver = "^0.3.7" click = "^8.1.7" jinja2 = "^3.1.4" jsonschema = "^4.23.0" diff --git a/crates/brightstaff/src/handlers/chat_completions.rs b/crates/brightstaff/src/handlers/chat_completions.rs index bd5cab79..37da961f 100644 --- a/crates/brightstaff/src/handlers/chat_completions.rs +++ b/crates/brightstaff/src/handlers/chat_completions.rs @@ -32,6 +32,8 @@ pub async fn chat_completions( let chat_request_bytes = request.collect().await?.to_bytes(); + debug!("Received request body (raw utf8): {}", String::from_utf8_lossy(&chat_request_bytes)); + let chat_request_parsed = serde_json::from_slice::(&chat_request_bytes) .inspect_err(|err| { warn!( diff --git a/crates/brightstaff/src/handlers/mod.rs b/crates/brightstaff/src/handlers/mod.rs index febab6c2..6de38b5b 100644 --- a/crates/brightstaff/src/handlers/mod.rs +++ b/crates/brightstaff/src/handlers/mod.rs @@ -1,3 +1,2 @@ pub mod chat_completions; pub mod models; -pub mod preferences; diff --git a/crates/brightstaff/src/handlers/preferences.rs b/crates/brightstaff/src/handlers/preferences.rs deleted file mode 100644 index a9c5a65d..00000000 --- a/crates/brightstaff/src/handlers/preferences.rs +++ /dev/null @@ -1,135 +0,0 @@ -use bytes::Bytes; -use common::configuration::{LlmProvider, ModelUsagePreference}; -use http_body_util::{combinators::BoxBody, BodyExt, Full}; -use hyper::{Request, Response, StatusCode}; -use serde_json; -use std::{collections::HashMap, sync::Arc}; -use tracing::{info, warn}; - -pub async fn list_preferences( - llm_providers: Arc>>, -) -> Response> { - let prov = llm_providers.read().await; - // convert the LlmProvider to UsageBasedProvider - let providers_with_usage = prov - .iter() - .map(|provider| ModelUsagePreference { - name: provider.name.clone(), - model: provider.model.clone().unwrap_or_default(), - usage: provider.usage.clone(), - }) - .collect::>(); - - match serde_json::to_string(&providers_with_usage) { - Ok(json) => { - let body = Full::new(Bytes::from(json)) - .map_err(|never| match never {}) - .boxed(); - Response::builder() - .status(StatusCode::OK) - .header("Content-Type", "application/json") - .body(body) - .unwrap() - } - Err(_) => { - let body = Full::new(Bytes::from_static( - b"{\"error\":\"Failed to serialize models\"}", - )) - .map_err(|never| match never {}) - .boxed(); - Response::builder() - .status(StatusCode::INTERNAL_SERVER_ERROR) - .header("Content-Type", "application/json") - .body(body) - .unwrap() - } - } -} - -pub async fn update_preferences( - request: Request, - llm_providers: Arc>>, -) -> Result>, hyper::Error> { - let request_body = request.collect().await?.to_bytes(); - - let usage: Vec = match serde_json::from_slice(&request_body) { - Ok(usage) => usage, - Err(_) => { - let response_body = Full::new(Bytes::from_static(b"Invalid request body: ")) - .map_err(|never| match never {}) - .boxed(); - return Ok(Response::builder() - .status(StatusCode::BAD_REQUEST) - .header("Content-Type", "text/plain") - .body(response_body) - .unwrap()); - } - }; - - let usage_model_map: HashMap = - usage.into_iter().map(|u| (u.model.clone(), u)).collect(); - - info!( - "Updating usage preferences for models: {:?}", - usage_model_map.keys() - ); - - let mut llm_providers = llm_providers.write().await; - - // ensure that models coming in the request are valid - let llm_provider_names: Vec = llm_providers - .iter() - .map(|provider| provider.name.clone()) - .collect(); - - for model in usage_model_map.keys() { - if !llm_provider_names.contains(model) { - let model_not_found = format!("model not found: {}", model); - warn!("updating preferences: {}", model_not_found); - let response_body = Full::new(model_not_found.into()) - .map_err(|never| match never {}) - .boxed(); - return Ok(Response::builder() - .status(StatusCode::BAD_REQUEST) - .header("Content-Type", "text/plain") - .body(response_body) - .unwrap()); - } - } - - let mut updated_models_list = Vec::new(); - for provider in llm_providers.iter_mut() { - if let Some(usage_provider) = usage_model_map.get(&provider.name) { - provider.usage = usage_provider.usage.clone(); - updated_models_list.push(ModelUsagePreference { - name: provider.name.clone(), - model: provider.model.clone().unwrap_or_default(), - usage: provider.usage.clone(), - }); - } - } - - if !updated_models_list.is_empty() { - // return list of updated models - let response_body = Full::new(Bytes::from(format!( - "{{\"updated_models\": {}}}", - serde_json::to_string(&updated_models_list).unwrap() - ))) - .map_err(|never| match never {}) - .boxed(); - Ok(Response::builder() - .status(StatusCode::OK) - .header("Content-Type", "application/json") - .body(response_body) - .unwrap()) - } else { - let response_body = Full::new(Bytes::from_static(b"Provider not found")) - .map_err(|never| match never {}) - .boxed(); - Ok(Response::builder() - .status(StatusCode::NOT_FOUND) - .header("Content-Type", "text/plain") - .body(response_body) - .unwrap()) - } -} diff --git a/crates/brightstaff/src/main.rs b/crates/brightstaff/src/main.rs index dbcc9124..b5bf0204 100644 --- a/crates/brightstaff/src/main.rs +++ b/crates/brightstaff/src/main.rs @@ -1,6 +1,5 @@ use brightstaff::handlers::chat_completions::chat_completions; use brightstaff::handlers::models::list_models; -use brightstaff::handlers::preferences::{list_preferences, update_preferences}; use brightstaff::router::llm_router::RouterService; use brightstaff::utils::tracing::init_tracer; use bytes::Bytes; @@ -116,12 +115,6 @@ async fn main() -> Result<(), Box> { .with_context(parent_cx) .await } - (&Method::GET, "/v1/router/preferences") => { - Ok(list_preferences(llm_providers).await) - } - (&Method::PUT, "/v1/router/preferences") => { - update_preferences(req, llm_providers).await - } (&Method::GET, "/v1/models") => Ok(list_models(llm_providers).await), (&Method::OPTIONS, "/v1/models") => { let mut response = Response::new(empty()); diff --git a/crates/brightstaff/src/router/router_model_v1.rs b/crates/brightstaff/src/router/router_model_v1.rs index dc0e1563..bd06b525 100644 --- a/crates/brightstaff/src/router/router_model_v1.rs +++ b/crates/brightstaff/src/router/router_model_v1.rs @@ -73,7 +73,7 @@ impl RouterModel for RouterModelV1 { fn generate_request( &self, messages: &[Message], - usage_preferences: &Option>, + usage_preferences_from_request: &Option>, ) -> ChatCompletionsRequest { // remove system prompt, tool calls, tool call response and messages without content // if content is empty its likely a tool call @@ -150,31 +150,17 @@ impl RouterModel for RouterModelV1 { }) .collect::>(); - let llm_route_json = usage_preferences - .as_ref() - .map(|prefs| { - let llm_route: Vec = prefs - .iter() - .map(|pref| RoutingPreference { - name: pref.name.clone(), - description: pref.usage.clone().unwrap_or_default(), - }) - .collect(); - serde_json::to_string(&llm_route).unwrap_or_default() - }) - .unwrap_or_else(|| self.llm_route_json_str.clone()); - - let messages_content = ARCH_ROUTER_V1_SYSTEM_PROMPT - .replace("{routes}", &llm_route_json) - .replace( - "{conversation}", - &serde_json::to_string(&selected_conversation_list).unwrap_or_default(), - ); + // Generate the router request message based on the usage preferences. + // If preferences are passed in request then we use them otherwise we use the default routing model preferences. + let router_message = match convert_to_router_preferences(usage_preferences_from_request) { + Some(prefs) => generate_router_message(&prefs, &selected_conversation_list), + None => generate_router_message(&self.llm_route_json_str, &selected_conversation_list), + }; ChatCompletionsRequest { model: self.routing_model.clone(), messages: vec![Message { - content: Some(ContentType::Text(messages_content)), + content: Some(ContentType::Text(router_message)), role: USER_ROLE.to_string(), }], temperature: Some(0.01), @@ -201,12 +187,18 @@ impl RouterModel for RouterModelV1 { if let Some(usage_preferences) = usage_preferences { // If usage preferences are defined, we need to find the model that matches the selected route - let matching_preference = usage_preferences + let model_name: Option = usage_preferences .iter() - .find(|pref| pref.name == selected_route); + .map(|pref| { + pref.routing_preferences + .iter() + .find(|routing_pref| routing_pref.name == selected_route) + .map(|_| pref.model.clone()) + }) + .find_map(|model| model); - if let Some(preference) = matching_preference { - return Ok(Some((selected_route, preference.model.clone()))); + if let Some(model_name) = model_name { + return Ok(Some((selected_route, model_name))); } else { warn!( "No matching model found for route: {}, usage preferences: {:?}", @@ -216,7 +208,7 @@ impl RouterModel for RouterModelV1 { } } - // If no usage preferences are defined, we return the route with the routing model + // If no usage preferences are passed in request then use the default routing model preferences if let Some(model) = self.llm_route_to_model_map.get(&selected_route).cloned() { return Ok(Some((selected_route, model))); } @@ -234,6 +226,37 @@ impl RouterModel for RouterModelV1 { } } +fn generate_router_message(prefs: &str, selected_conversation_list: &Vec) -> String { + ARCH_ROUTER_V1_SYSTEM_PROMPT + .replace("{routes}", prefs) + .replace( + "{conversation}", + &serde_json::to_string(&selected_conversation_list).unwrap_or_default(), + ) +} + +fn convert_to_router_preferences( + prefs_from_request: &Option>, +) -> Option { + if let Some(usage_preferences) = prefs_from_request { + let routing_preferences = usage_preferences + .iter() + .flat_map(|pref| { + pref.routing_preferences + .iter() + .map(|routing_pref| RoutingPreference { + name: routing_pref.name.clone(), + description: routing_pref.description.clone(), + }) + }) + .collect::>(); + + return Some(serde_json::to_string(&routing_preferences).unwrap_or_default()); + } + + None +} + fn fix_json_response(body: &str) -> String { let mut updated_body = body.to_string(); @@ -299,7 +322,8 @@ Based on your analysis, provide your response in the following JSON formats if y ] } "#; - let llm_routes = serde_json::from_str::>>(routes_str).unwrap(); + let llm_routes = + serde_json::from_str::>>(routes_str).unwrap(); let routing_model = "test-model".to_string(); let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX); @@ -356,7 +380,8 @@ Based on your analysis, provide your response in the following JSON formats if y ] } "#; - let llm_routes = serde_json::from_str::>>(routes_str).unwrap(); + let llm_routes = + serde_json::from_str::>>(routes_str).unwrap(); let routing_model = "test-model".to_string(); let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX); @@ -379,9 +404,11 @@ Based on your analysis, provide your response in the following JSON formats if y let conversation: Vec = serde_json::from_str(conversation_str).unwrap(); let usage_preferences = Some(vec![ModelUsagePreference { - name: "code-generation".to_string(), model: "claude/claude-3-7-sonnet".to_string(), - usage: Some("generating new code snippets, functions, or boilerplate based on user prompts or requirements".to_string()), + routing_preferences: vec![RoutingPreference { + name: "code-generation".to_string(), + description: "generating new code snippets, functions, or boilerplate based on user prompts or requirements".to_string(), + }], }]); let req = router.generate_request(&conversation, &usage_preferences); @@ -419,7 +446,8 @@ Based on your analysis, provide your response in the following JSON formats if y ] } "#; - let llm_routes = serde_json::from_str::>>(routes_str).unwrap(); + let llm_routes = + serde_json::from_str::>>(routes_str).unwrap(); let routing_model = "test-model".to_string(); let router = RouterModelV1::new(llm_routes, routing_model.clone(), 235); @@ -478,7 +506,8 @@ Based on your analysis, provide your response in the following JSON formats if y ] } "#; - let llm_routes = serde_json::from_str::>>(routes_str).unwrap(); + let llm_routes = + serde_json::from_str::>>(routes_str).unwrap(); let routing_model = "test-model".to_string(); let router = RouterModelV1::new(llm_routes, routing_model.clone(), 200); @@ -538,7 +567,8 @@ Based on your analysis, provide your response in the following JSON formats if y ] } "#; - let llm_routes = serde_json::from_str::>>(routes_str).unwrap(); + let llm_routes = + serde_json::from_str::>>(routes_str).unwrap(); let routing_model = "test-model".to_string(); let router = RouterModelV1::new(llm_routes, routing_model.clone(), 230); @@ -604,7 +634,8 @@ Based on your analysis, provide your response in the following JSON formats if y ] } "#; - let llm_routes = serde_json::from_str::>>(routes_str).unwrap(); + let llm_routes = + serde_json::from_str::>>(routes_str).unwrap(); let routing_model = "test-model".to_string(); let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX); @@ -672,7 +703,8 @@ Based on your analysis, provide your response in the following JSON formats if y ] } "#; - let llm_routes = serde_json::from_str::>>(routes_str).unwrap(); + let llm_routes = + serde_json::from_str::>>(routes_str).unwrap(); let routing_model = "test-model".to_string(); let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX); @@ -747,14 +779,18 @@ Based on your analysis, provide your response in the following JSON formats if y ] } "#; - let llm_routes = serde_json::from_str::>>(routes_str).unwrap(); + let llm_routes = + serde_json::from_str::>>(routes_str).unwrap(); let router = RouterModelV1::new(llm_routes, "test-model".to_string(), 2000); // Case 1: Valid JSON with non-empty route let input = r#"{"route": "Image generation"}"#; let result = router.parse_response(input, &None).unwrap(); - assert_eq!(result, Some(("Image generation".to_string(), "gpt-4o".to_string()))); + assert_eq!( + result, + Some(("Image generation".to_string(), "gpt-4o".to_string())) + ); // Case 2: Valid JSON with empty route let input = r#"{"route": ""}"#; @@ -784,11 +820,17 @@ Based on your analysis, provide your response in the following JSON formats if y // Case 6: Single quotes and \n in JSON let input = "{'route': 'Image generation'}\\n"; let result = router.parse_response(input, &None).unwrap(); - assert_eq!(result, Some(("Image generation".to_string(), "gpt-4o".to_string()))); + assert_eq!( + result, + Some(("Image generation".to_string(), "gpt-4o".to_string())) + ); // Case 7: Code block marker let input = "```json\n{\"route\": \"Image generation\"}\n```"; let result = router.parse_response(input, &None).unwrap(); - assert_eq!(result, Some(("Image generation".to_string(), "gpt-4o".to_string()))); + assert_eq!( + result, + Some(("Image generation".to_string(), "gpt-4o".to_string())) + ); } } diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index 0693c09b..186691dc 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -1,6 +1,5 @@ use hermesllm::providers::openai::types::{ModelDetail, ModelObject, Models}; use serde::{Deserialize, Serialize}; -use serde_with::skip_serializing_none; use std::collections::HashMap; use std::fmt::Display; @@ -178,12 +177,10 @@ impl Display for LlmProviderType { } } -#[skip_serializing_none] #[derive(Serialize, Deserialize, Debug)] pub struct ModelUsagePreference { - pub name: String, pub model: String, - pub usage: Option, + pub routing_preferences: Vec, } #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/crates/hermesllm/src/providers/openai/types.rs b/crates/hermesllm/src/providers/openai/types.rs index d1c4430c..7dea64df 100644 --- a/crates/hermesllm/src/providers/openai/types.rs +++ b/crates/hermesllm/src/providers/openai/types.rs @@ -35,9 +35,16 @@ pub enum MultiPartContentType { ImageUrl, } +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct ImageUrl { + pub url: String, +} + +#[skip_serializing_none] #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct MultiPartContent { pub text: Option, + pub image_url: Option, #[serde(rename = "type")] pub content_type: MultiPartContentType, } @@ -307,10 +314,12 @@ mod tests { MultiPartContent { text: Some("This is a text part.".to_string()), content_type: MultiPartContentType::Text, + image_url: None, }, MultiPartContent { text: Some("https://example.com/image.png".to_string()), content_type: MultiPartContentType::ImageUrl, + image_url: None, }, ]); assert_eq!(multi_part_content.to_string(), "This is a text part."); @@ -364,6 +373,61 @@ mod tests { } } + #[test] + fn test_chat_completions_request_image_content() { + const CHAT_COMPLETIONS_REQUEST: &str = r#" + { + "stream": true, + "model": "openai/gpt-4o", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "describe this photo pls" + }, + { + "type": "image_url", + "image_url": { + "url": "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/...==" + } + } + ] + } + ] + }"#; + + let chat_completions_request: ChatCompletionsRequest = + serde_json::from_str(CHAT_COMPLETIONS_REQUEST).unwrap(); + assert_eq!(chat_completions_request.model, "openai/gpt-4o"); + if let Some(ContentType::MultiPart(multi_part_content)) = + chat_completions_request.messages[0].content.as_ref() + { + assert_eq!(multi_part_content.len(), 2); + assert_eq!( + multi_part_content[0].content_type, + MultiPartContentType::Text + ); + assert_eq!( + multi_part_content[0].text, + Some("describe this photo pls".to_string()) + ); + assert_eq!( + multi_part_content[1].content_type, + MultiPartContentType::ImageUrl + ); + assert_eq!( + multi_part_content[1].image_url, + Some(ImageUrl { + url: "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/...==".to_string(), + }) + ); + } else { + panic!("Expected MultiPartContent"); + } + } + #[test] fn test_sse_streaming() { let json_data = r#"data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1700000000,"model":"gpt-3.5-turbo","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]} diff --git a/demos/use_cases/llm_routing/arch_config.yaml b/demos/use_cases/llm_routing/arch_config.yaml index cb3a42e6..addaae66 100644 --- a/demos/use_cases/llm_routing/arch_config.yaml +++ b/demos/use_cases/llm_routing/arch_config.yaml @@ -12,6 +12,9 @@ llm_providers: - access_key: $OPENAI_API_KEY model: openai/gpt-4o-mini + - access_key: $OPENAI_API_KEY + model: openai/gpt-4.1 + - access_key: $OPENAI_API_KEY model: openai/gpt-4o default: true diff --git a/demos/use_cases/preference_based_routing/README.md b/demos/use_cases/preference_based_routing/README.md index 4031069d..1ac6b59b 100644 --- a/demos/use_cases/preference_based_routing/README.md +++ b/demos/use_cases/preference_based_routing/README.md @@ -14,9 +14,9 @@ Make sure your machine is up to date with [latest version of archgw]([url](https 2. start archgw in the foreground ```bash (venv) $ archgw up --service archgw --foreground -2025-05-30 18:00:09,953 - cli.main - INFO - Starting archgw cli version: 0.3.4 +2025-05-30 18:00:09,953 - cli.main - INFO - Starting archgw cli version: 0.3.7 2025-05-30 18:00:09,953 - cli.main - INFO - Validating /Users/adilhafeez/src/intelligent-prompt-gateway/demos/use_cases/preference_based_routing/arch_config.yaml -2025-05-30 18:00:10,422 - cli.core - INFO - Starting arch gateway, image name: archgw, tag: katanemo/archgw:0.3.4 +2025-05-30 18:00:10,422 - cli.core - INFO - Starting arch gateway, image name: archgw, tag: katanemo/archgw:0.3.7 2025-05-30 18:00:10,662 - cli.core - INFO - archgw status: running, health status: starting 2025-05-30 18:00:11,712 - cli.core - INFO - archgw status: running, health status: starting 2025-05-30 18:00:12,761 - cli.core - INFO - archgw is running and is healthy! diff --git a/docs/source/concepts/llm_provider.rst b/docs/source/concepts/llm_provider.rst index deffd3ad..eabdaa96 100644 --- a/docs/source/concepts/llm_provider.rst +++ b/docs/source/concepts/llm_provider.rst @@ -38,29 +38,29 @@ Adding custom LLM Provider We support any OpenAI compliant LLM for example mistral, openai, ollama etc. We also offer first class support for OpenAI, Anthropic, DeepSeek, Mistral, Groq, and Ollama based models. You can easily configure an LLM that communicates over the OpenAI API interface, by following the below guide. -For example following code block shows you how to add an ollama-supported LLM in the `arch_config.yaml` file. +For example following code block shows you how to add an ollama-supported LLM in the ``arch_config.yaml`` file. .. code-block:: yaml llm_providers: - - model: some_custom_llm_provider/llama3.2 - provider_interface: openai - base_url: http://host.docker.internal:11434 + - model: some_custom_llm_provider/llama3.2 + provider_interface: openai + base_url: http://host.docker.internal:11434 -And in the following code block shows you how to add mistral llm provider in the `arch_config.yaml` file. +And in the following code block shows you how to add mistral llm provider in the ``arch_config.yaml`` file. .. code-block:: yaml llm_providers: - - name: mistral/ministral-3b-latest - access_key: $MISTRAL_API_KEY + - name: mistral/ministral-3b-latest + access_key: $MISTRAL_API_KEY Example: Using the OpenAI Python SDK ------------------------------------ .. code-block:: python - from openai import OpenAI + from openai import OpenAI # Initialize the Arch client client = OpenAI(base_url="http://127.0.0.1:2000/") diff --git a/docs/source/conf.py b/docs/source/conf.py index 4790156e..4e2fd142 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -15,7 +15,7 @@ from sphinxawesome_theme.postprocess import Icons project = "Arch Docs" copyright = "2025, Katanemo Labs, Inc" author = "Katanemo Labs, Inc" -release = " v0.3.4" +release = " v0.3.7" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/docs/source/get_started/quickstart.rst b/docs/source/get_started/quickstart.rst index bec37d90..7027c67a 100644 --- a/docs/source/get_started/quickstart.rst +++ b/docs/source/get_started/quickstart.rst @@ -25,7 +25,7 @@ Arch's CLI allows you to manage and interact with the Arch gateway efficiently. $ python -m venv venv $ source venv/bin/activate # On Windows, use: venv\Scripts\activate - $ pip install archgw==0.3.4 + $ pip install archgw==0.3.7 Build AI Agent with Arch Gateway diff --git a/model_server/poetry.lock b/model_server/poetry.lock index 2debff66..26e65aed 100644 --- a/model_server/poetry.lock +++ b/model_server/poetry.lock @@ -2,13 +2,13 @@ [[package]] name = "accelerate" -version = "1.8.1" +version = "1.9.0" description = "Accelerate" optional = false python-versions = ">=3.9.0" files = [ - {file = "accelerate-1.8.1-py3-none-any.whl", hash = "sha256:c47b8994498875a2b1286e945bd4d20e476956056c7941d512334f4eb44ff991"}, - {file = "accelerate-1.8.1.tar.gz", hash = "sha256:f60df931671bc4e75077b852990469d4991ce8bd3a58e72375c3c95132034db9"}, + {file = "accelerate-1.9.0-py3-none-any.whl", hash = "sha256:c24739a97ade1d54af4549a65f8b6b046adc87e2b3e4d6c66516e32c53d5a8f1"}, + {file = "accelerate-1.9.0.tar.gz", hash = "sha256:0e8c61f81af7bf37195b6175a545ed292617dd90563c88f49020aea5b6a0b47f"}, ] [package.dependencies] @@ -29,7 +29,7 @@ sagemaker = ["sagemaker"] test-dev = ["bitsandbytes", "datasets", "diffusers", "evaluate", "scikit-learn", "scipy", "timm", "torchdata (>=0.8.0)", "torchpippy (>=0.2.0)", "tqdm", "transformers"] test-fp8 = ["torchao"] test-prod = ["parameterized", "pytest (>=7.2.0,<=8.0.0)", "pytest-order", "pytest-subtests", "pytest-xdist"] -test-trackers = ["comet-ml", "dvclive", "matplotlib", "mlflow", "swanlab", "tensorboard", "wandb"] +test-trackers = ["comet-ml", "dvclive", "matplotlib", "mlflow", "swanlab", "tensorboard", "trackio", "wandb"] testing = ["bitsandbytes", "datasets", "diffusers", "evaluate", "parameterized", "pytest (>=7.2.0,<=8.0.0)", "pytest-order", "pytest-subtests", "pytest-xdist", "scikit-learn", "scipy", "timm", "torchdata (>=0.8.0)", "torchpippy (>=0.2.0)", "tqdm", "transformers"] [[package]] @@ -82,15 +82,26 @@ typing_extensions = {version = ">=4", markers = "python_version < \"3.11\""} [package.extras] tests = ["mypy (>=1.14.0)", "pytest", "pytest-asyncio"] +[[package]] +name = "backports-asyncio-runner" +version = "1.2.0" +description = "Backport of asyncio.Runner, a context manager that controls event loop life cycle." +optional = false +python-versions = "<3.11,>=3.8" +files = [ + {file = "backports_asyncio_runner-1.2.0-py3-none-any.whl", hash = "sha256:0da0a936a8aeb554eccb426dc55af3ba63bcdc69fa1a600b5bb305413a4477b5"}, + {file = "backports_asyncio_runner-1.2.0.tar.gz", hash = "sha256:a5aa7b2b7d8f8bfcaa2b57313f70792df84e32a2a746f585213373f900b42162"}, +] + [[package]] name = "certifi" -version = "2025.7.9" +version = "2025.7.14" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.7" files = [ - {file = "certifi-2025.7.9-py3-none-any.whl", hash = "sha256:d842783a14f8fdd646895ac26f719a061408834473cfc10203f6a575beb15d39"}, - {file = "certifi-2025.7.9.tar.gz", hash = "sha256:c1d2ec05395148ee10cf672ffc28cd37ea0ab0d99f9cc74c43e588cbd111b079"}, + {file = "certifi-2025.7.14-py3-none-any.whl", hash = "sha256:6b31f564a415d79ee77df69d757bb49a5bb53bd9f756cbbe24394ffd6fc1f4b2"}, + {file = "certifi-2025.7.14.tar.gz", hash = "sha256:8ea99dbdfaaf2ba2f9bac77b9249ef62ec5218e7c2b2e903378ed5fccf765995"}, ] [[package]] @@ -324,13 +335,13 @@ typing = ["typing-extensions (>=4.12.2)"] [[package]] name = "fsspec" -version = "2025.5.1" +version = "2025.7.0" description = "File-system specification" optional = false python-versions = ">=3.9" files = [ - {file = "fsspec-2025.5.1-py3-none-any.whl", hash = "sha256:24d3a2e663d5fc735ab256263c4075f374a174c3410c0b25e5bd1970bceaa462"}, - {file = "fsspec-2025.5.1.tar.gz", hash = "sha256:2e55e47a540b91843b755e83ded97c6e897fa0942b11490113f09e9c443c2475"}, + {file = "fsspec-2025.7.0-py3-none-any.whl", hash = "sha256:8b012e39f63c7d5f10474de957f3ab793b47b45ae7d39f2fb735f8bbe25c0e21"}, + {file = "fsspec-2025.7.0.tar.gz", hash = "sha256:786120687ffa54b8283d942929540d8bc5ccfa820deb555a2b5d0ed2b737bf58"}, ] [package.extras] @@ -338,7 +349,7 @@ abfs = ["adlfs"] adl = ["adlfs"] arrow = ["pyarrow (>=1)"] dask = ["dask", "distributed"] -dev = ["pre-commit", "ruff"] +dev = ["pre-commit", "ruff (>=0.5)"] doc = ["numpydoc", "sphinx", "sphinx-design", "sphinx-rtd-theme", "yarl"] dropbox = ["dropbox", "dropboxdrivefs", "requests"] full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"] @@ -380,66 +391,66 @@ grpc = ["grpcio (>=1.44.0,<2.0.0)"] [[package]] name = "grpcio" -version = "1.73.1" +version = "1.74.0" description = "HTTP/2-based RPC framework" optional = false python-versions = ">=3.9" files = [ - {file = "grpcio-1.73.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:2d70f4ddd0a823436c2624640570ed6097e40935c9194482475fe8e3d9754d55"}, - {file = "grpcio-1.73.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:3841a8a5a66830261ab6a3c2a3dc539ed84e4ab019165f77b3eeb9f0ba621f26"}, - {file = "grpcio-1.73.1-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:628c30f8e77e0258ab788750ec92059fc3d6628590fb4b7cea8c102503623ed7"}, - {file = "grpcio-1.73.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:67a0468256c9db6d5ecb1fde4bf409d016f42cef649323f0a08a72f352d1358b"}, - {file = "grpcio-1.73.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68b84d65bbdebd5926eb5c53b0b9ec3b3f83408a30e4c20c373c5337b4219ec5"}, - {file = "grpcio-1.73.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:c54796ca22b8349cc594d18b01099e39f2b7ffb586ad83217655781a350ce4da"}, - {file = "grpcio-1.73.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:75fc8e543962ece2f7ecd32ada2d44c0c8570ae73ec92869f9af8b944863116d"}, - {file = "grpcio-1.73.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6a6037891cd2b1dd1406b388660522e1565ed340b1fea2955b0234bdd941a862"}, - {file = "grpcio-1.73.1-cp310-cp310-win32.whl", hash = "sha256:cce7265b9617168c2d08ae570fcc2af4eaf72e84f8c710ca657cc546115263af"}, - {file = "grpcio-1.73.1-cp310-cp310-win_amd64.whl", hash = "sha256:6a2b372e65fad38842050943f42ce8fee00c6f2e8ea4f7754ba7478d26a356ee"}, - {file = "grpcio-1.73.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:ba2cea9f7ae4bc21f42015f0ec98f69ae4179848ad744b210e7685112fa507a1"}, - {file = "grpcio-1.73.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:d74c3f4f37b79e746271aa6cdb3a1d7e4432aea38735542b23adcabaaee0c097"}, - {file = "grpcio-1.73.1-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:5b9b1805a7d61c9e90541cbe8dfe0a593dfc8c5c3a43fe623701b6a01b01d710"}, - {file = "grpcio-1.73.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3215f69a0670a8cfa2ab53236d9e8026bfb7ead5d4baabe7d7dc11d30fda967"}, - {file = "grpcio-1.73.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc5eccfd9577a5dc7d5612b2ba90cca4ad14c6d949216c68585fdec9848befb1"}, - {file = "grpcio-1.73.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:dc7d7fd520614fce2e6455ba89791458020a39716951c7c07694f9dbae28e9c0"}, - {file = "grpcio-1.73.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:105492124828911f85127e4825d1c1234b032cb9d238567876b5515d01151379"}, - {file = "grpcio-1.73.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:610e19b04f452ba6f402ac9aa94eb3d21fbc94553368008af634812c4a85a99e"}, - {file = "grpcio-1.73.1-cp311-cp311-win32.whl", hash = "sha256:d60588ab6ba0ac753761ee0e5b30a29398306401bfbceffe7d68ebb21193f9d4"}, - {file = "grpcio-1.73.1-cp311-cp311-win_amd64.whl", hash = "sha256:6957025a4608bb0a5ff42abd75bfbb2ed99eda29d5992ef31d691ab54b753643"}, - {file = "grpcio-1.73.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:921b25618b084e75d424a9f8e6403bfeb7abef074bb6c3174701e0f2542debcf"}, - {file = "grpcio-1.73.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:277b426a0ed341e8447fbf6c1d6b68c952adddf585ea4685aa563de0f03df887"}, - {file = "grpcio-1.73.1-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:96c112333309493c10e118d92f04594f9055774757f5d101b39f8150f8c25582"}, - {file = "grpcio-1.73.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f48e862aed925ae987eb7084409a80985de75243389dc9d9c271dd711e589918"}, - {file = "grpcio-1.73.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83a6c2cce218e28f5040429835fa34a29319071079e3169f9543c3fbeff166d2"}, - {file = "grpcio-1.73.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:65b0458a10b100d815a8426b1442bd17001fdb77ea13665b2f7dc9e8587fdc6b"}, - {file = "grpcio-1.73.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:0a9f3ea8dce9eae9d7cb36827200133a72b37a63896e0e61a9d5ec7d61a59ab1"}, - {file = "grpcio-1.73.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:de18769aea47f18e782bf6819a37c1c528914bfd5683b8782b9da356506190c8"}, - {file = "grpcio-1.73.1-cp312-cp312-win32.whl", hash = "sha256:24e06a5319e33041e322d32c62b1e728f18ab8c9dbc91729a3d9f9e3ed336642"}, - {file = "grpcio-1.73.1-cp312-cp312-win_amd64.whl", hash = "sha256:303c8135d8ab176f8038c14cc10d698ae1db9c480f2b2823f7a987aa2a4c5646"}, - {file = "grpcio-1.73.1-cp313-cp313-linux_armv7l.whl", hash = "sha256:b310824ab5092cf74750ebd8a8a8981c1810cb2b363210e70d06ef37ad80d4f9"}, - {file = "grpcio-1.73.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:8f5a6df3fba31a3485096ac85b2e34b9666ffb0590df0cd044f58694e6a1f6b5"}, - {file = "grpcio-1.73.1-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:052e28fe9c41357da42250a91926a3e2f74c046575c070b69659467ca5aa976b"}, - {file = "grpcio-1.73.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c0bf15f629b1497436596b1cbddddfa3234273490229ca29561209778ebe182"}, - {file = "grpcio-1.73.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ab860d5bfa788c5a021fba264802e2593688cd965d1374d31d2b1a34cacd854"}, - {file = "grpcio-1.73.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:ad1d958c31cc91ab050bd8a91355480b8e0683e21176522bacea225ce51163f2"}, - {file = "grpcio-1.73.1-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:f43ffb3bd415c57224c7427bfb9e6c46a0b6e998754bfa0d00f408e1873dcbb5"}, - {file = "grpcio-1.73.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:686231cdd03a8a8055f798b2b54b19428cdf18fa1549bee92249b43607c42668"}, - {file = "grpcio-1.73.1-cp313-cp313-win32.whl", hash = "sha256:89018866a096e2ce21e05eabed1567479713ebe57b1db7cbb0f1e3b896793ba4"}, - {file = "grpcio-1.73.1-cp313-cp313-win_amd64.whl", hash = "sha256:4a68f8c9966b94dff693670a5cf2b54888a48a5011c5d9ce2295a1a1465ee84f"}, - {file = "grpcio-1.73.1-cp39-cp39-linux_armv7l.whl", hash = "sha256:b4adc97d2d7f5c660a5498bda978ebb866066ad10097265a5da0511323ae9f50"}, - {file = "grpcio-1.73.1-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:c45a28a0cfb6ddcc7dc50a29de44ecac53d115c3388b2782404218db51cb2df3"}, - {file = "grpcio-1.73.1-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:10af9f2ab98a39f5b6c1896c6fc2036744b5b41d12739d48bed4c3e15b6cf900"}, - {file = "grpcio-1.73.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:45cf17dcce5ebdb7b4fe9e86cb338fa99d7d1bb71defc78228e1ddf8d0de8cbb"}, - {file = "grpcio-1.73.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c502c2e950fc7e8bf05c047e8a14522ef7babac59abbfde6dbf46b7a0d9c71e"}, - {file = "grpcio-1.73.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:6abfc0f9153dc4924536f40336f88bd4fe7bd7494f028675e2e04291b8c2c62a"}, - {file = "grpcio-1.73.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ed451a0e39c8e51eb1612b78686839efd1a920666d1666c1adfdb4fd51680c0f"}, - {file = "grpcio-1.73.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:07f08705a5505c9b5b0cbcbabafb96462b5a15b7236bbf6bbcc6b0b91e1cbd7e"}, - {file = "grpcio-1.73.1-cp39-cp39-win32.whl", hash = "sha256:ad5c958cc3d98bb9d71714dc69f1c13aaf2f4b53e29d4cc3f1501ef2e4d129b2"}, - {file = "grpcio-1.73.1-cp39-cp39-win_amd64.whl", hash = "sha256:42f0660bce31b745eb9d23f094a332d31f210dcadd0fc8e5be7e4c62a87ce86b"}, - {file = "grpcio-1.73.1.tar.gz", hash = "sha256:7fce2cd1c0c1116cf3850564ebfc3264fba75d3c74a7414373f1238ea365ef87"}, + {file = "grpcio-1.74.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:85bd5cdf4ed7b2d6438871adf6afff9af7096486fcf51818a81b77ef4dd30907"}, + {file = "grpcio-1.74.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:68c8ebcca945efff9d86d8d6d7bfb0841cf0071024417e2d7f45c5e46b5b08eb"}, + {file = "grpcio-1.74.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:e154d230dc1bbbd78ad2fdc3039fa50ad7ffcf438e4eb2fa30bce223a70c7486"}, + {file = "grpcio-1.74.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e8978003816c7b9eabe217f88c78bc26adc8f9304bf6a594b02e5a49b2ef9c11"}, + {file = "grpcio-1.74.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3d7bd6e3929fd2ea7fbc3f562e4987229ead70c9ae5f01501a46701e08f1ad9"}, + {file = "grpcio-1.74.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:136b53c91ac1d02c8c24201bfdeb56f8b3ac3278668cbb8e0ba49c88069e1bdc"}, + {file = "grpcio-1.74.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:fe0f540750a13fd8e5da4b3eaba91a785eea8dca5ccd2bc2ffe978caa403090e"}, + {file = "grpcio-1.74.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4e4181bfc24413d1e3a37a0b7889bea68d973d4b45dd2bc68bb766c140718f82"}, + {file = "grpcio-1.74.0-cp310-cp310-win32.whl", hash = "sha256:1733969040989f7acc3d94c22f55b4a9501a30f6aaacdbccfaba0a3ffb255ab7"}, + {file = "grpcio-1.74.0-cp310-cp310-win_amd64.whl", hash = "sha256:9e912d3c993a29df6c627459af58975b2e5c897d93287939b9d5065f000249b5"}, + {file = "grpcio-1.74.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:69e1a8180868a2576f02356565f16635b99088da7df3d45aaa7e24e73a054e31"}, + {file = "grpcio-1.74.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:8efe72fde5500f47aca1ef59495cb59c885afe04ac89dd11d810f2de87d935d4"}, + {file = "grpcio-1.74.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:a8f0302f9ac4e9923f98d8e243939a6fb627cd048f5cd38595c97e38020dffce"}, + {file = "grpcio-1.74.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2f609a39f62a6f6f05c7512746798282546358a37ea93c1fcbadf8b2fed162e3"}, + {file = "grpcio-1.74.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c98e0b7434a7fa4e3e63f250456eaef52499fba5ae661c58cc5b5477d11e7182"}, + {file = "grpcio-1.74.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:662456c4513e298db6d7bd9c3b8df6f75f8752f0ba01fb653e252ed4a59b5a5d"}, + {file = "grpcio-1.74.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3d14e3c4d65e19d8430a4e28ceb71ace4728776fd6c3ce34016947474479683f"}, + {file = "grpcio-1.74.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1bf949792cee20d2078323a9b02bacbbae002b9e3b9e2433f2741c15bdeba1c4"}, + {file = "grpcio-1.74.0-cp311-cp311-win32.whl", hash = "sha256:55b453812fa7c7ce2f5c88be3018fb4a490519b6ce80788d5913f3f9d7da8c7b"}, + {file = "grpcio-1.74.0-cp311-cp311-win_amd64.whl", hash = "sha256:86ad489db097141a907c559988c29718719aa3e13370d40e20506f11b4de0d11"}, + {file = "grpcio-1.74.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:8533e6e9c5bd630ca98062e3a1326249e6ada07d05acf191a77bc33f8948f3d8"}, + {file = "grpcio-1.74.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:2918948864fec2a11721d91568effffbe0a02b23ecd57f281391d986847982f6"}, + {file = "grpcio-1.74.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:60d2d48b0580e70d2e1954d0d19fa3c2e60dd7cbed826aca104fff518310d1c5"}, + {file = "grpcio-1.74.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3601274bc0523f6dc07666c0e01682c94472402ac2fd1226fd96e079863bfa49"}, + {file = "grpcio-1.74.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:176d60a5168d7948539def20b2a3adcce67d72454d9ae05969a2e73f3a0feee7"}, + {file = "grpcio-1.74.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e759f9e8bc908aaae0412642afe5416c9f983a80499448fcc7fab8692ae044c3"}, + {file = "grpcio-1.74.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:9e7c4389771855a92934b2846bd807fc25a3dfa820fd912fe6bd8136026b2707"}, + {file = "grpcio-1.74.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:cce634b10aeab37010449124814b05a62fb5f18928ca878f1bf4750d1f0c815b"}, + {file = "grpcio-1.74.0-cp312-cp312-win32.whl", hash = "sha256:885912559974df35d92219e2dc98f51a16a48395f37b92865ad45186f294096c"}, + {file = "grpcio-1.74.0-cp312-cp312-win_amd64.whl", hash = "sha256:42f8fee287427b94be63d916c90399ed310ed10aadbf9e2e5538b3e497d269bc"}, + {file = "grpcio-1.74.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:2bc2d7d8d184e2362b53905cb1708c84cb16354771c04b490485fa07ce3a1d89"}, + {file = "grpcio-1.74.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:c14e803037e572c177ba54a3e090d6eb12efd795d49327c5ee2b3bddb836bf01"}, + {file = "grpcio-1.74.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:f6ec94f0e50eb8fa1744a731088b966427575e40c2944a980049798b127a687e"}, + {file = "grpcio-1.74.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:566b9395b90cc3d0d0c6404bc8572c7c18786ede549cdb540ae27b58afe0fb91"}, + {file = "grpcio-1.74.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1ea6176d7dfd5b941ea01c2ec34de9531ba494d541fe2057c904e601879f249"}, + {file = "grpcio-1.74.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:64229c1e9cea079420527fa8ac45d80fc1e8d3f94deaa35643c381fa8d98f362"}, + {file = "grpcio-1.74.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:0f87bddd6e27fc776aacf7ebfec367b6d49cad0455123951e4488ea99d9b9b8f"}, + {file = "grpcio-1.74.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:3b03d8f2a07f0fea8c8f74deb59f8352b770e3900d143b3d1475effcb08eec20"}, + {file = "grpcio-1.74.0-cp313-cp313-win32.whl", hash = "sha256:b6a73b2ba83e663b2480a90b82fdae6a7aa6427f62bf43b29912c0cfd1aa2bfa"}, + {file = "grpcio-1.74.0-cp313-cp313-win_amd64.whl", hash = "sha256:fd3c71aeee838299c5887230b8a1822795325ddfea635edd82954c1eaa831e24"}, + {file = "grpcio-1.74.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:4bc5fca10aaf74779081e16c2bcc3d5ec643ffd528d9e7b1c9039000ead73bae"}, + {file = "grpcio-1.74.0-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:6bab67d15ad617aff094c382c882e0177637da73cbc5532d52c07b4ee887a87b"}, + {file = "grpcio-1.74.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:655726919b75ab3c34cdad39da5c530ac6fa32696fb23119e36b64adcfca174a"}, + {file = "grpcio-1.74.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1a2b06afe2e50ebfd46247ac3ba60cac523f54ec7792ae9ba6073c12daf26f0a"}, + {file = "grpcio-1.74.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f251c355167b2360537cf17bea2cf0197995e551ab9da6a0a59b3da5e8704f9"}, + {file = "grpcio-1.74.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:8f7b5882fb50632ab1e48cb3122d6df55b9afabc265582808036b6e51b9fd6b7"}, + {file = "grpcio-1.74.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:834988b6c34515545b3edd13e902c1acdd9f2465d386ea5143fb558f153a7176"}, + {file = "grpcio-1.74.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:22b834cef33429ca6cc28303c9c327ba9a3fafecbf62fae17e9a7b7163cc43ac"}, + {file = "grpcio-1.74.0-cp39-cp39-win32.whl", hash = "sha256:7d95d71ff35291bab3f1c52f52f474c632db26ea12700c2ff0ea0532cb0b5854"}, + {file = "grpcio-1.74.0-cp39-cp39-win_amd64.whl", hash = "sha256:ecde9ab49f58433abe02f9ed076c7b5be839cf0153883a6d23995937a82392fa"}, + {file = "grpcio-1.74.0.tar.gz", hash = "sha256:80d1f4fbb35b0742d3e3d3bb654b7381cd5f015f8497279a1e9c21ba623e01b1"}, ] [package.extras] -protobuf = ["grpcio-tools (>=1.73.1)"] +protobuf = ["grpcio-tools (>=1.74.0)"] [[package]] name = "h11" @@ -520,19 +531,19 @@ zstd = ["zstandard (>=0.18.0)"] [[package]] name = "huggingface-hub" -version = "0.33.4" +version = "0.34.1" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" optional = false python-versions = ">=3.8.0" files = [ - {file = "huggingface_hub-0.33.4-py3-none-any.whl", hash = "sha256:09f9f4e7ca62547c70f8b82767eefadd2667f4e116acba2e3e62a5a81815a7bb"}, - {file = "huggingface_hub-0.33.4.tar.gz", hash = "sha256:6af13478deae120e765bfd92adad0ae1aec1ad8c439b46f23058ad5956cbca0a"}, + {file = "huggingface_hub-0.34.1-py3-none-any.whl", hash = "sha256:60d843dcb7bc335145b20e7d2f1dfe93910f6787b2b38a936fb772ce2a83757c"}, + {file = "huggingface_hub-0.34.1.tar.gz", hash = "sha256:6978ed89ef981de3c78b75bab100a214843be1cc9d24f8e9c0dc4971808ef1b1"}, ] [package.dependencies] filelock = "*" fsspec = ">=2023.5.0" -hf-xet = {version = ">=1.1.2,<2.0.0", markers = "platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\""} +hf-xet = {version = ">=1.1.3,<2.0.0", markers = "platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\""} packaging = ">=20.9" pyyaml = ">=5.1" requests = "*" @@ -540,16 +551,16 @@ tqdm = ">=4.42.1" typing-extensions = ">=3.7.4.3" [package.extras] -all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "authlib (>=1.3.2)", "fastapi", "gradio (>=4.0.0)", "httpx", "itsdangerous", "jedi", "libcst (==1.4.0)", "mypy (==1.15.0)", "mypy (>=1.14.1,<1.15.0)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.9.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] +all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "authlib (>=1.3.2)", "fastapi", "gradio (>=4.0.0)", "httpx", "itsdangerous", "jedi", "libcst (>=1.4.0)", "mypy (==1.15.0)", "mypy (>=1.14.1,<1.15.0)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.9.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] cli = ["InquirerPy (==0.3.4)"] -dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "authlib (>=1.3.2)", "fastapi", "gradio (>=4.0.0)", "httpx", "itsdangerous", "jedi", "libcst (==1.4.0)", "mypy (==1.15.0)", "mypy (>=1.14.1,<1.15.0)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.9.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] +dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "authlib (>=1.3.2)", "fastapi", "gradio (>=4.0.0)", "httpx", "itsdangerous", "jedi", "libcst (>=1.4.0)", "mypy (==1.15.0)", "mypy (>=1.14.1,<1.15.0)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.9.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"] hf-transfer = ["hf-transfer (>=0.1.4)"] hf-xet = ["hf-xet (>=1.1.2,<2.0.0)"] inference = ["aiohttp"] mcp = ["aiohttp", "mcp (>=1.8.0)", "typer"] oauth = ["authlib (>=1.3.2)", "fastapi", "httpx", "itsdangerous"] -quality = ["libcst (==1.4.0)", "mypy (==1.15.0)", "mypy (>=1.14.1,<1.15.0)", "ruff (>=0.9.0)"] +quality = ["libcst (>=1.4.0)", "mypy (==1.15.0)", "mypy (>=1.14.1,<1.15.0)", "ruff (>=0.9.0)"] tensorflow = ["graphviz", "pydot", "tensorflow"] tensorflow-testing = ["keras (<3.0)", "tensorflow"] testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "authlib (>=1.3.2)", "fastapi", "gradio (>=4.0.0)", "httpx", "itsdangerous", "jedi", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"] @@ -1023,13 +1034,13 @@ files = [ [[package]] name = "openai" -version = "1.95.1" +version = "1.97.1" description = "The official Python library for the openai API" optional = false python-versions = ">=3.8" files = [ - {file = "openai-1.95.1-py3-none-any.whl", hash = "sha256:8bbdfeceef231b1ddfabbc232b179d79f8b849aab5a7da131178f8d10e0f162f"}, - {file = "openai-1.95.1.tar.gz", hash = "sha256:f089b605282e2a2b6776090b4b46563ac1da77f56402a222597d591e2dcc1086"}, + {file = "openai-1.97.1-py3-none-any.whl", hash = "sha256:4e96bbdf672ec3d44968c9ea39d2c375891db1acc1794668d8149d5fa6000606"}, + {file = "openai-1.97.1.tar.gz", hash = "sha256:a744b27ae624e3d4135225da9b1c89c107a2a7e5bc4c93e5b7b5214772ce7a4e"}, ] [package.dependencies] @@ -1498,16 +1509,17 @@ dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests [[package]] name = "pytest-asyncio" -version = "1.0.0" +version = "1.1.0" description = "Pytest support for asyncio" optional = false python-versions = ">=3.9" files = [ - {file = "pytest_asyncio-1.0.0-py3-none-any.whl", hash = "sha256:4f024da9f1ef945e680dc68610b52550e36590a67fd31bb3b4943979a1f90ef3"}, - {file = "pytest_asyncio-1.0.0.tar.gz", hash = "sha256:d15463d13f4456e1ead2594520216b225a16f781e144f8fdf6c5bb4667c48b3f"}, + {file = "pytest_asyncio-1.1.0-py3-none-any.whl", hash = "sha256:5fe2d69607b0bd75c656d1211f969cadba035030156745ee09e7d71740e58ecf"}, + {file = "pytest_asyncio-1.1.0.tar.gz", hash = "sha256:796aa822981e01b68c12e4827b8697108f7205020f24b5793b3c41555dab68ea"}, ] [package.dependencies] +backports-asyncio-runner = {version = ">=1.1,<2", markers = "python_version < \"3.11\""} pytest = ">=8.2,<9" [package.extras] @@ -2019,18 +2031,18 @@ telegram = ["requests"] [[package]] name = "transformers" -version = "4.53.2" +version = "4.54.0" description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" optional = false python-versions = ">=3.9.0" files = [ - {file = "transformers-4.53.2-py3-none-any.whl", hash = "sha256:db8f4819bb34f000029c73c3c557e7d06fc1b8e612ec142eecdae3947a9c78bf"}, - {file = "transformers-4.53.2.tar.gz", hash = "sha256:6c3ed95edfb1cba71c4245758f1b4878c93bf8cde77d076307dacb2cbbd72be2"}, + {file = "transformers-4.54.0-py3-none-any.whl", hash = "sha256:c96e607f848625965b76c677b2c2576f2c7b7097c1c5292b281919d90675a25e"}, + {file = "transformers-4.54.0.tar.gz", hash = "sha256:843da4d66a573cef3d1b2e7a1d767e77da054621e69d9f3faff761e55a1f8203"}, ] [package.dependencies] filelock = "*" -huggingface-hub = ">=0.30.0,<1.0" +huggingface-hub = ">=0.34.0,<1.0" numpy = ">=1.17" packaging = ">=20.0" pyyaml = ">=5.1" @@ -2042,15 +2054,15 @@ tqdm = ">=4.27" [package.extras] accelerate = ["accelerate (>=0.26.0)"] -all = ["Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "accelerate (>=0.26.0)", "av", "codecarbon (>=2.8.1)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "kernels (>=0.6.1,<0.7)", "librosa", "num2words", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm (<=1.0.11)", "tokenizers (>=0.21,<0.22)", "torch (>=2.1)", "torchaudio", "torchvision"] +all = ["Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "accelerate (>=0.26.0)", "av", "codecarbon (>=2.8.1)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "kernels (>=0.6.1,<0.7)", "librosa", "mistral-common[opencv] (>=1.6.3)", "num2words", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.21,<0.22)", "torch (>=2.1)", "torchaudio", "torchvision"] audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] benchmark = ["optimum-benchmark (>=0.3.0)"] codecarbon = ["codecarbon (>=2.8.1)"] deepspeed = ["accelerate (>=0.26.0)", "deepspeed (>=0.9.3)"] -deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.26.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk (<=3.8.1)", "optuna", "parameterized", "protobuf", "psutil", "pydantic", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"] -dev = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "accelerate (>=0.26.0)", "av", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "kernels (>=0.6.1,<0.7)", "libcst", "librosa", "nltk (<=3.8.1)", "num2words", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "pandas (<2.3.0)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm (<=1.0.11)", "tokenizers (>=0.21,<0.22)", "torch (>=2.1)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)"] -dev-tensorflow = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "libcst", "librosa", "nltk (<=3.8.1)", "onnxconverter-common", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "pandas (<2.3.0)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "tf2onnx", "timeout-decorator", "tokenizers (>=0.21,<0.22)", "urllib3 (<2.0.0)"] -dev-torch = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "kenlm", "kernels (>=0.6.1,<0.7)", "libcst", "librosa", "nltk (<=3.8.1)", "num2words", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "pandas (<2.3.0)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (<=1.0.11)", "tokenizers (>=0.21,<0.22)", "torch (>=2.1)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)"] +deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.26.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "optuna", "parameterized", "protobuf", "psutil", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"] +dev = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "accelerate (>=0.26.0)", "av", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "kernels (>=0.6.1,<0.7)", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "pandas (<2.3.0)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.21,<0.22)", "torch (>=2.1)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)"] +dev-tensorflow = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "onnxconverter-common", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "pandas (<2.3.0)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "tf2onnx", "timeout-decorator", "tokenizers (>=0.21,<0.22)", "urllib3 (<2.0.0)"] +dev-torch = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "kenlm", "kernels (>=0.6.1,<0.7)", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "pandas (<2.3.0)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.21,<0.22)", "torch (>=2.1)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)"] flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)", "scipy (<1.13.0)"] flax-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] ftfy = ["ftfy"] @@ -2058,6 +2070,7 @@ hf-xet = ["hf_xet"] hub-kernels = ["kernels (>=0.6.1,<0.7)"] integrations = ["kernels (>=0.6.1,<0.7)", "optuna", "ray[tune] (>=2.7.0)", "sigopt"] ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0,<1.3.1)", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)"] +mistral-common = ["mistral-common[opencv] (>=1.6.3)"] modelcreation = ["cookiecutter (==1.7.3)"] natten = ["natten (>=0.14.6,<0.15.0)"] num2words = ["num2words"] @@ -2065,27 +2078,27 @@ onnx = ["onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1 onnxruntime = ["onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"] open-telemetry = ["opentelemetry-api", "opentelemetry-exporter-otlp", "opentelemetry-sdk"] optuna = ["optuna"] -quality = ["GitPython (<3.1.19)", "datasets (!=2.5.0)", "libcst", "pandas (<2.3.0)", "rich", "ruff (==0.11.2)", "urllib3 (<2.0.0)"] +quality = ["GitPython (<3.1.19)", "datasets (>=2.15.0)", "libcst", "pandas (<2.3.0)", "rich", "ruff (==0.11.2)", "urllib3 (<2.0.0)"] ray = ["ray[tune] (>=2.7.0)"] -retrieval = ["datasets (!=2.5.0)", "faiss-cpu"] +retrieval = ["datasets (>=2.15.0)", "faiss-cpu"] ruff = ["ruff (==0.11.2)"] sagemaker = ["sagemaker (>=2.31.0)"] sentencepiece = ["protobuf", "sentencepiece (>=0.1.91,!=0.1.92)"] -serving = ["fastapi", "pydantic", "starlette", "uvicorn"] +serving = ["accelerate (>=0.26.0)", "fastapi", "openai", "pydantic (>=2)", "starlette", "torch (>=2.1)", "uvicorn"] sigopt = ["sigopt"] sklearn = ["scikit-learn"] speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] -testing = ["GitPython (<3.1.19)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk (<=3.8.1)", "parameterized", "psutil", "pydantic", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"] +testing = ["GitPython (<3.1.19)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "parameterized", "psutil", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"] tf = ["keras-nlp (>=0.3.1,<0.14.0)", "onnxconverter-common", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx"] tf-cpu = ["keras (>2.9,<2.16)", "keras-nlp (>=0.3.1,<0.14.0)", "onnxconverter-common", "tensorflow-cpu (>2.9,<2.16)", "tensorflow-probability (<0.24)", "tensorflow-text (<2.16)", "tf2onnx"] tf-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] tiktoken = ["blobfile", "tiktoken"] -timm = ["timm (<=1.0.11)"] +timm = ["timm (!=1.0.18,<=1.0.19)"] tokenizers = ["tokenizers (>=0.21,<0.22)"] torch = ["accelerate (>=0.26.0)", "torch (>=2.1)"] torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] torch-vision = ["Pillow (>=10.0.1,<=15.0)", "torchvision"] -torchhub = ["filelock", "huggingface-hub (>=0.30.0,<1.0)", "importlib_metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.21,<0.22)", "torch (>=2.1)", "tqdm (>=4.27)"] +torchhub = ["filelock", "huggingface-hub (>=0.34.0,<1.0)", "importlib_metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.21,<0.22)", "torch (>=2.1)", "tqdm (>=4.27)"] video = ["av"] vision = ["Pillow (>=10.0.1,<=15.0)"] diff --git a/model_server/pyproject.toml b/model_server/pyproject.toml index 76807337..7abe7a40 100644 --- a/model_server/pyproject.toml +++ b/model_server/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "archgw_modelserver" -version = "0.3.4" +version = "0.3.7" description = "A model server for serving models" authors = ["Katanemo Labs, Inc "] license = "Apache 2.0" diff --git a/model_server/src/cli.py b/model_server/src/cli.py index 5245873e..d0d9e9e1 100644 --- a/model_server/src/cli.py +++ b/model_server/src/cli.py @@ -72,7 +72,7 @@ def start_server(port=51000, foreground=False): if foreground: process = subprocess.Popen( [ - "python", + sys.executable, "-m", "uvicorn", "src.main:app", @@ -85,7 +85,7 @@ def start_server(port=51000, foreground=False): else: process = subprocess.Popen( [ - "python", + sys.executable, "-m", "uvicorn", "src.main:app",