mirror of
https://github.com/katanemo/plano.git
synced 2026-06-23 15:38:07 +02:00
feat: add provider arbitrage policy and fallback routing
This commit is contained in:
parent
de2d8847f3
commit
07ad4c6ae2
10 changed files with 670 additions and 57 deletions
38
demos/llm_routing/gpu_free_tier_arbitrage/README.md
Normal file
38
demos/llm_routing/gpu_free_tier_arbitrage/README.md
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
# GPU Free-Tier Arbitrage Demo
|
||||
|
||||
This demo package showcases provider-level free-tier-first routing and deterministic fallback using a local Plano endpoint on `localhost:12000`.
|
||||
|
||||
## Files
|
||||
|
||||
- `config.yaml` - demo Plano config with `arbitrage_policy`
|
||||
- `demo.rest` - runnable REST requests for IDE REST clients
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Set API keys for providers used in this demo:
|
||||
|
||||
- `OPENAI_API_KEY`
|
||||
- `GROQ_API_KEY`
|
||||
- `TOGETHER_API_KEY`
|
||||
|
||||
## Run the demo
|
||||
|
||||
From this directory:
|
||||
|
||||
```bash
|
||||
planoai up config.yaml
|
||||
```
|
||||
|
||||
Then run requests from `demo.rest` in your REST client.
|
||||
|
||||
## What to show during the demo
|
||||
|
||||
1. Run `free-tier-first showcase` and verify response success.
|
||||
2. Inspect logs/traces for provider selection reason and selected candidate.
|
||||
3. Force a retryable error on the first candidate (for example, temporarily invalid key), then run `fallback showcase`.
|
||||
4. Verify fallback metadata appears in traces/logs:
|
||||
- `routing.selection_reason`
|
||||
- `routing.is_fallback`
|
||||
- `routing.fallback_trigger`
|
||||
- `routing.next_candidate`
|
||||
- `routing.upstream_endpoint`
|
||||
30
demos/llm_routing/gpu_free_tier_arbitrage/config.yaml
Normal file
30
demos/llm_routing/gpu_free_tier_arbitrage/config.yaml
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
version: v0.3.0
|
||||
|
||||
listeners:
|
||||
- type: model
|
||||
name: model_listener
|
||||
port: 12000
|
||||
max_retries: 1
|
||||
|
||||
model_providers:
|
||||
# Primary provider for the model.
|
||||
- model: openai/gpt-5.2
|
||||
# This is a failure key to test the arbitrage policy
|
||||
access_key: $OPENAI_API_KEY_FAILURE
|
||||
default: true
|
||||
arbitrage_policy:
|
||||
enabled: true
|
||||
rank:
|
||||
# Demo low-cost/free-tier candidates (ordered).
|
||||
- ollama/qwen3:8b
|
||||
- groq/llama-3.1-8b-instant
|
||||
|
||||
# Candidates referenced by arbitrage_policy.rank.
|
||||
- model: groq/llama-3.1-8b-instant
|
||||
access_key: $GROQ_API_KEY
|
||||
|
||||
- model: ollama/qwen3:8b
|
||||
base_url: http://localhost:11434
|
||||
|
||||
tracing:
|
||||
random_sampling: 100
|
||||
31
demos/llm_routing/gpu_free_tier_arbitrage/demo.rest
Normal file
31
demos/llm_routing/gpu_free_tier_arbitrage/demo.rest
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
@llm_endpoint = http://localhost:12000
|
||||
|
||||
### free-tier-first showcase
|
||||
POST {{llm_endpoint}}/v1/chat/completions HTTP/1.1
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "gpt-5.2",
|
||||
"stream": false,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Reply with exactly: free-tier-first routing demo successful."
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
### fallback showcase (run after forcing first candidate failure)
|
||||
POST {{llm_endpoint}}/v1/chat/completions HTTP/1.1
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "gpt-5.2",
|
||||
"stream": false,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Reply with exactly: fallback routing demo successful."
|
||||
}
|
||||
]
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue