randomize endpoint selection for bootstrapping ollamas

This commit is contained in:
Alpha Nerd 2025-09-18 18:49:11 +02:00
parent 96995cd37f
commit 8fe3880af7

View file

@ -6,7 +6,7 @@ version: 0.3
license: AGPL
"""
# -------------------------------------------------------------
import json, time, asyncio, yaml, ollama, openai, os, re, aiohttp, ssl, datetime
import json, time, asyncio, yaml, ollama, openai, os, re, aiohttp, ssl, datetime, random
from pathlib import Path
from typing import Dict, Set, List, Optional
from fastapi import FastAPI, Request, HTTPException
@ -376,11 +376,12 @@ async def choose_endpoint(model: str) -> str:
1 Query every endpoint for its advertised models (`/api/tags`).
2 Build a list of endpoints that contain the requested model.
3 For those endpoints, find those that have the model loaded
(`/api/ps`) *and* still have a free slot.
(`/api/ps`) *and* still have a free slot.
4 If none are both loaded and free, fall back to any endpoint
from the filtered list that simply has a free slot.
from the filtered list that simply has a free slot and randomly
select one.
5 If all are saturated, pick any endpoint from the filtered list
(the request will queue on that endpoint).
(the request will queue on that endpoint).
6 If no endpoint advertises the model at all, raise an error.
"""
# 1⃣ Gather advertisedmodel sets for all endpoints concurrently
@ -436,8 +437,7 @@ async def choose_endpoint(model: str) -> str:
]
if endpoints_with_free_slot:
ep = min(endpoints_with_free_slot, key=current_usage)
return ep
return random.choice(endpoints_with_free_slot)
# 5⃣ All candidate endpoints are saturated pick one with lowest usages count (will queue)
ep = min(candidate_endpoints, key=current_usage)