support session pinning for consistent model selection in routing (#813)

This commit is contained in:
Adil Hafeez 2026-03-13 17:32:32 -07:00
parent 785bf7e021
commit 46a5bfd82d
No known key found for this signature in database
GPG key ID: 9B18EF7691369645
8 changed files with 406 additions and 3 deletions

View file

@ -117,4 +117,47 @@ curl -s "$PLANO_URL/routing/v1/messages" \
}' | python3 -m json.tool
echo ""
# --- Example 7: Session pinning - first call (fresh routing) ---
echo "--- 7. Session pinning - first call (fresh routing decision) ---"
echo ""
curl -s "$PLANO_URL/routing/v1/chat/completions" \
-H "Content-Type: application/json" \
-H "X-Session-Id: demo-session-001" \
-d '{
"model": "gpt-4o-mini",
"messages": [
{"role": "user", "content": "Write a Python function that implements binary search on a sorted array"}
]
}' | python3 -m json.tool
echo ""
# --- Example 8: Session pinning - second call (pinned result) ---
echo "--- 8. Session pinning - second call (same session, pinned) ---"
echo " Notice: same model returned with \"pinned\": true, routing was skipped"
echo ""
curl -s "$PLANO_URL/routing/v1/chat/completions" \
-H "Content-Type: application/json" \
-H "X-Session-Id: demo-session-001" \
-d '{
"model": "gpt-4o-mini",
"messages": [
{"role": "user", "content": "Now explain how merge sort works and when to prefer it over quicksort"}
]
}' | python3 -m json.tool
echo ""
# --- Example 9: Different session gets fresh routing ---
echo "--- 9. Different session gets its own fresh routing ---"
echo ""
curl -s "$PLANO_URL/routing/v1/chat/completions" \
-H "Content-Type: application/json" \
-H "X-Session-Id: demo-session-002" \
-d '{
"model": "gpt-4o-mini",
"messages": [
{"role": "user", "content": "Explain the trade-offs between microservices and monolithic architectures"}
]
}' | python3 -m json.tool
echo ""
echo "=== Demo Complete ==="