mirror of
https://github.com/katanemo/plano.git
synced 2026-05-09 07:42:43 +02:00
add session pinning to llm_chat handler and rewrite session pinning demo
- extend brightstaff llm_chat_inner to extract X-Session-Id, check the session cache before routing, and cache the result afterward — same pattern as routing_service.rs - replace old urllib-based demo with a real FastAPI research agent that runs 3 independent tool-calling tasks with alternating intents so Plano routes to different models; demo.py is a pure httpx client that shows the routing trace side-by-side with and without session pinning
This commit is contained in:
parent
71437d2b2c
commit
0105897692
7 changed files with 771 additions and 200 deletions
|
|
@ -3,9 +3,17 @@ set -e
|
|||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
export PLANO_URL="${PLANO_URL:-http://localhost:12000}"
|
||||
export AGENT_PORT="${AGENT_PORT:-8000}"
|
||||
export AGENT_URL="http://localhost:$AGENT_PORT"
|
||||
|
||||
echo "Running session pinning demo..."
|
||||
echo "PLANO_URL=$PLANO_URL"
|
||||
echo ""
|
||||
cleanup() {
|
||||
[ -n "$AGENT_PID" ] && kill "$AGENT_PID" 2>/dev/null
|
||||
}
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
python3 "$SCRIPT_DIR/demo.py"
|
||||
# Start the agent in the background
|
||||
"$SCRIPT_DIR/start_agents.sh" &
|
||||
AGENT_PID=$!
|
||||
|
||||
# Run the demo client
|
||||
uv run "$SCRIPT_DIR/demo.py"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue