mirror of
https://github.com/katanemo/plano.git
synced 2026-05-30 14:25:15 +02:00
add k8s deployment manifests and docs for self-hosted Arch-Router
This commit is contained in:
parent
f1b8c03e2f
commit
5b58bb60c3
7 changed files with 381 additions and 342 deletions
36
demos/llm_routing/model_routing_service/test.rest
Normal file
36
demos/llm_routing/model_routing_service/test.rest
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
### Code generation query (OpenAI format) — expects anthropic/claude-sonnet
|
||||
POST http://localhost:12000/routing/v1/chat/completions
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "gpt-4o-mini",
|
||||
"messages": [{"role": "user", "content": "Write a Python function for binary search"}]
|
||||
}
|
||||
|
||||
### Complex reasoning query (OpenAI format) — expects openai/gpt-4o
|
||||
POST http://localhost:12000/routing/v1/chat/completions
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "gpt-4o-mini",
|
||||
"messages": [{"role": "user", "content": "Analyze the trade-offs between microservices and monolithic architecture"}]
|
||||
}
|
||||
|
||||
### Simple query — no routing match, expects default model
|
||||
POST http://localhost:12000/routing/v1/chat/completions
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "gpt-4o-mini",
|
||||
"messages": [{"role": "user", "content": "Hello"}]
|
||||
}
|
||||
|
||||
### Code generation query (Anthropic format)
|
||||
POST http://localhost:12000/routing/v1/messages
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "claude-sonnet-4-20250514",
|
||||
"max_tokens": 1024,
|
||||
"messages": [{"role": "user", "content": "Write a REST API in Go using Gin"}]
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue