mirror of
https://github.com/katanemo/plano.git
synced 2026-05-09 07:42:43 +02:00
add session pinning to llm_chat handler and rewrite session pinning demo
- extend brightstaff llm_chat_inner to extract X-Session-Id, check the session cache before routing, and cache the result afterward — same pattern as routing_service.rs - replace old urllib-based demo with a real FastAPI research agent that runs 3 independent tool-calling tasks with alternating intents so Plano routes to different models; demo.py is a pure httpx client that shows the routing trace side-by-side with and without session pinning
This commit is contained in:
parent
71437d2b2c
commit
0105897692
7 changed files with 771 additions and 200 deletions
28
demos/llm_routing/session_pinning/start_agents.sh
Executable file
28
demos/llm_routing/session_pinning/start_agents.sh
Executable file
|
|
@ -0,0 +1,28 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PIDS=()
|
||||
|
||||
log() { echo "$(date '+%F %T') - $*"; }
|
||||
|
||||
cleanup() {
|
||||
log "Stopping agents..."
|
||||
for PID in "${PIDS[@]}"; do
|
||||
kill "$PID" 2>/dev/null && log "Stopped process $PID"
|
||||
done
|
||||
exit 0
|
||||
}
|
||||
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
export PLANO_URL="${PLANO_URL:-http://localhost:12000}"
|
||||
export AGENT_PORT="${AGENT_PORT:-8000}"
|
||||
|
||||
log "Starting research_agent on port $AGENT_PORT..."
|
||||
uv run "$SCRIPT_DIR/agent.py" &
|
||||
PIDS+=($!)
|
||||
|
||||
for PID in "${PIDS[@]}"; do
|
||||
wait "$PID"
|
||||
done
|
||||
Loading…
Add table
Add a link
Reference in a new issue