update code to use new json based system prompt for routing (#493)

This commit is contained in:
Adil Hafeez 2025-05-30 17:40:46 -07:00 committed by GitHub
parent 8d12a9a6e0
commit 0d190a6e5c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 433 additions and 283 deletions

View file

@ -1,7 +1,7 @@
version: "0.1-beta"
routing:
model: archgw-v1-router-model
model: arch-router
listeners:
egress_traffic:
@ -12,10 +12,15 @@ listeners:
llm_providers:
- name: archgw-v1-router-model
- name: arch-router
access_key: $OPENAI_API_KEY
provider_interface: arch
model: Arch-Router
- name: gpt-4o-mini
provider_interface: openai
model: cotran2/qwen-4-epoch-2600
base_url: http://34.46.85.85:8000/v1
access_key: $OPENAI_API_KEY
model: gpt-4o-mini
- name: gpt-4o
provider_interface: openai

View file

@ -6,7 +6,7 @@ services:
ports:
- "8080:8080"
environment:
- DEFAULT_MODEL=gpt-4o-mini
- DEFAULT_MODELS=gpt-4o-mini
- ENABLE_OPENAI_API=true
- OPENAI_API_BASE_URL=http://host.docker.internal:12000/v1

View file

@ -1,6 +1,6 @@
@arch_llm_router_endpoint = http://35.192.87.187:8000
POST http://34.46.85.85:8000/v1/chat/completions HTTP/1.1
POST https://archfc.katanemo.dev/v1/chat/completions HTTP/1.1
Content-Type: application/json
{
@ -21,4 +21,5 @@ Content-Type: application/json
{"model":"cotran2/llama-1b-4-26","messages":[{"role":"user","content":"\nYou are an advanced Routing Assistant designed to select the optimal route based on user requests. \nYour task is to analyze conversations and match them to the most appropriate predefined route.\nReview the available routes config:\n\n# ROUTES CONFIG START\n- name: gpt-4o\n description: simple requests, basic fact retrieval, easy to answer\n- name: o4-mini()\n description: complex reasoning problem, require multi step answer\n# ROUTES CONFIG END\n\nExamine the following conversation between a user and an assistant:\n\n# CONVERSATION START\n[{\"role\":\"user\",\"content\":\"What is the capital of France?\"}]\n# CONVERSATION END\n\nYour goal is to identify the most appropriate route that matches the user's LATEST intent. Follow these steps:\n\n1. Carefully read and analyze the provided conversation, focusing on the user's latest request and the conversation scenario.\n2. Check if the user's request and scenario matches any of the routes in the routing configuration (focus on the description).\n3. Find the route that best matches.\n4. Use context clues from the entire conversation to determine the best fit.\n5. Return the best match possible. You only response the name of the route that best matches the user's request, use the exact name in the routes config.\n6. If no route relatively close to matches the user's latest intent or user last message is thank you or greeting, return an empty route ''. \n\n# OUTPUT FORMAT\nYour final output must follow this JSON format:\n{\n \"route\": \"route_name\" # The matched route name, or empty string '' if no match\n}\n\nBased on your analysis, provide only the JSON object as your final output with no additional text, explanations, or whitespace.\n"}],"stream":false}
### get model list
GET http://34.46.85.85:8000/v1/models HTTP/1.1
# GET http://34.46.85.85:8000/v1/models HTTP/1.1
GET https://archfc.katanemo.dev/arch-router/v1/models HTTP/1.1