diff --git a/crates/common/src/tokenizer.rs b/crates/common/src/tokenizer.rs index 11ce7295..198c2af7 100644 --- a/crates/common/src/tokenizer.rs +++ b/crates/common/src/tokenizer.rs @@ -12,10 +12,15 @@ pub fn token_count(model_name: &str, text: &str) -> Result { "tiktoken_rs: unsupported model: {}, using gpt-4 to compute token count", model_name ); - "gpt-4" } - true => model_name, + true => { + if model_name.starts_with("gpt-4.1") { + "gpt-4o" + } else { + model_name + } + } }; // Consideration: is it more expensive to instantiate the BPE object every time, or to contend the singleton? diff --git a/demos/use_cases/preference_based_routing/arch_config.yaml b/demos/use_cases/preference_based_routing/arch_config.yaml index 682527ca..c01f3ffc 100644 --- a/demos/use_cases/preference_based_routing/arch_config.yaml +++ b/demos/use_cases/preference_based_routing/arch_config.yaml @@ -1,7 +1,7 @@ version: "0.1-beta" routing: - model: gpt-4o + model: archgw-v1-router-model listeners: egress_traffic: @@ -14,7 +14,7 @@ llm_providers: - name: archgw-v1-router-model provider_interface: openai - model: cotran2/llama-4-epoch + model: cotran2/qwen-4-epoch-2600 base_url: http://34.46.85.85:8000/v1 - name: gpt-4o diff --git a/demos/use_cases/preference_based_routing/docker-compose.yaml b/demos/use_cases/preference_based_routing/docker-compose.yaml index c2d794c6..54158f73 100644 --- a/demos/use_cases/preference_based_routing/docker-compose.yaml +++ b/demos/use_cases/preference_based_routing/docker-compose.yaml @@ -1,17 +1,14 @@ services: - chatbot_ui: - build: - context: ../../shared/chatbot_ui - dockerfile: Dockerfile + open-web-ui: + image: ghcr.io/open-webui/open-webui:main + restart: always ports: - - "18080:8080" + - "8080:8080" environment: - - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:12000/v1 - extra_hosts: - - "host.docker.internal:host-gateway" - volumes: - - ./arch_config.yaml:/app/arch_config.yaml + - DEFAULT_MODEL=gpt-4o-mini + - ENABLE_OPENAI_API=true + - OPENAI_API_BASE_URL=http://host.docker.internal:12000/v1 jaeger: build: diff --git a/demos/use_cases/preference_based_routing/test_router_endpoint.rest b/demos/use_cases/preference_based_routing/test_router_endpoint.rest index 9fc6f6fe..d4b947c8 100644 --- a/demos/use_cases/preference_based_routing/test_router_endpoint.rest +++ b/demos/use_cases/preference_based_routing/test_router_endpoint.rest @@ -1,10 +1,10 @@ @arch_llm_router_endpoint = http://35.192.87.187:8000 -POST {{arch_llm_router_endpoint}}/v1/chat/completions HTTP/1.1 +POST http://34.46.85.85:8000/v1/chat/completions HTTP/1.1 Content-Type: application/json { - "model": "cotran2/llama-1b-4-26", + "model": "cotran2/qwen-4-epoch-2600", "messages": [ { "role": "user",