diff --git a/crates/brightstaff/src/router/llm_router.rs b/crates/brightstaff/src/router/llm_router.rs index afb0a94a..e9f988ad 100644 --- a/crates/brightstaff/src/router/llm_router.rs +++ b/crates/brightstaff/src/router/llm_router.rs @@ -85,7 +85,7 @@ impl RouterService { info!( "router_request: {}", - shorten_string(&serde_json::to_string(&router_request).unwrap()) + &serde_json::to_string(&router_request).unwrap() ); let mut llm_route_request_headers = header::HeaderMap::new(); diff --git a/demos/use_cases/preference_based_routing/arch_config.yaml b/demos/use_cases/preference_based_routing/arch_config.yaml index b4a1116b..9233f2f8 100644 --- a/demos/use_cases/preference_based_routing/arch_config.yaml +++ b/demos/use_cases/preference_based_routing/arch_config.yaml @@ -1,9 +1,7 @@ version: "0.1-beta" routing: - model: gpt-4o - # model: archgw-router - # model: claude-3.7 + model: archgw-v1-router-model listeners: egress_traffic: @@ -14,22 +12,28 @@ listeners: llm_providers: + - name: archgw-v1-router-model + provider_interface: openai + model: cotran2/llama-1b-4-26 + base_url: http://35.192.87.187:8000/v1 + + - name: gpt-4o-mini + provider_interface: openai + access_key: $OPENAI_API_KEY + model: gpt-4o-mini + default: true + - name: gpt-4o provider_interface: openai access_key: $OPENAI_API_KEY model: gpt-4o - usage: | - - complex reasoning problem - - require multi step answer + usage: Generating original content such as scripts, articles, or creative materials. - name: o4-mini provider_interface: openai access_key: $OPENAI_API_KEY model: o4-mini - usage: | - - simple requests like hello, hi etc. - - basic fact retrieval - - easy to answer + usage: Requesting topic ideas specifically related to personal finance and budgeting. tracing: random_sampling: 100 diff --git a/demos/use_cases/preference_based_routing/test.rest b/demos/use_cases/preference_based_routing/test.rest index f7c18cfb..9fc6f6fe 100644 --- a/demos/use_cases/preference_based_routing/test.rest +++ b/demos/use_cases/preference_based_routing/test.rest @@ -1,4 +1,4 @@ -@arch_llm_router_endpoint = http://34.30.16.38:8000 +@arch_llm_router_endpoint = http://35.192.87.187:8000 POST {{arch_llm_router_endpoint}}/v1/chat/completions HTTP/1.1 Content-Type: application/json