update code to use new json based system prompt for routing (#493)

2026-07-23 16:51:04 +02:00 · 2025-05-30 17:40:46 -07:00 · 2025-05-30 17:40:46 -07:00 · 0d190a6e5c
commit 0d190a6e5c
parent 8d12a9a6e0
12 changed files with 433 additions and 283 deletions
--- a/demos/use_cases/preference_based_routing/arch_config.yaml
+++ b/demos/use_cases/preference_based_routing/arch_config.yaml
@ -1,7 +1,7 @@
 version: "0.1-beta"

 routing:
-  model: archgw-v1-router-model
+  model: arch-router

 listeners:
  egress_traffic:
@ -12,10 +12,15 @@ listeners:

 llm_providers:

-  - name: archgw-v1-router-model
+  - name: arch-router
+    access_key: $OPENAI_API_KEY
+    provider_interface: arch
+    model: Arch-Router
+
+  - name: gpt-4o-mini
    provider_interface: openai
-    model: cotran2/qwen-4-epoch-2600
-    base_url: http://34.46.85.85:8000/v1
+    access_key: $OPENAI_API_KEY
+    model: gpt-4o-mini

  - name: gpt-4o
    provider_interface: openai
--- a/demos/use_cases/preference_based_routing/docker-compose.yaml
+++ b/demos/use_cases/preference_based_routing/docker-compose.yaml
@ -6,7 +6,7 @@ services:
    ports:
      - "8080:8080"
    environment:
-      - DEFAULT_MODEL=gpt-4o-mini
+      - DEFAULT_MODELS=gpt-4o-mini
      - ENABLE_OPENAI_API=true
      - OPENAI_API_BASE_URL=http://host.docker.internal:12000/v1

--- a/demos/use_cases/preference_based_routing/test_router_endpoint.rest
+++ b/demos/use_cases/preference_based_routing/test_router_endpoint.rest
@ -1,6 +1,6 @@
@arch_llm_router_endpoint = http://35.192.87.187:8000

-POST http://34.46.85.85:8000/v1/chat/completions HTTP/1.1
+POST https://archfc.katanemo.dev/v1/chat/completions HTTP/1.1
 Content-Type: application/json

 {
@ -21,4 +21,5 @@ Content-Type: application/json
 {"model":"cotran2/llama-1b-4-26","messages":[{"role":"user","content":"\nYou are an advanced Routing Assistant designed to select the optimal route based on user requests. \nYour task is to analyze conversations and match them to the most appropriate predefined route.\nReview the available routes config:\n\n# ROUTES CONFIG START\n- name: gpt-4o\n  description: simple requests, basic fact retrieval, easy to answer\n- name: o4-mini()\n  description: complex reasoning problem, require multi step answer\n# ROUTES CONFIG END\n\nExamine the following conversation between a user and an assistant:\n\n# CONVERSATION START\n[{\"role\":\"user\",\"content\":\"What is the capital of France?\"}]\n# CONVERSATION END\n\nYour goal is to identify the most appropriate route that matches the user's LATEST intent. Follow these steps:\n\n1. Carefully read and analyze the provided conversation, focusing on the user's latest request and the conversation scenario.\n2. Check if the user's request and scenario matches any of the routes in the routing configuration (focus on the description).\n3. Find the route that best matches.\n4. Use context clues from the entire conversation to determine the best fit.\n5. Return the best match possible. You only response the name of the route that best matches the user's request, use the exact name in the routes config.\n6. If no route relatively close to matches the user's latest intent or user last message is thank you or greeting, return an empty route ''. \n\n# OUTPUT FORMAT\nYour final output must follow this JSON format:\n{\n  \"route\": \"route_name\" # The matched route name, or empty string '' if no match\n}\n\nBased on your analysis, provide only the JSON object as your final output with no additional text, explanations, or whitespace.\n"}],"stream":false}

 ### get model list
-GET http://34.46.85.85:8000/v1/models HTTP/1.1
+# GET http://34.46.85.85:8000/v1/models HTTP/1.1
+GET https://archfc.katanemo.dev/arch-router/v1/models HTTP/1.1