diff --git a/router.py b/router.py
index 5183e10..d356885 100644
--- a/router.py
+++ b/router.py
@@ -71,7 +71,7 @@ async def fetch_available_models(endpoint: str) -> Set[str]:
     set is returned.
     """
     try:
-        async with httpx.AsyncClient(timeout=5.0) as client:
+        async with httpx.AsyncClient(timeout=2.5) as client:
             if "/v1" in endpoint:
                 resp = await client.get(f"{endpoint}/models")
             else:
@@ -85,8 +85,9 @@ async def fetch_available_models(endpoint: str) -> Set[str]:
             else:
                 models = {m.get("name") for m in data.get("models", []) if m.get("name")}
             return models
-    except Exception:
+    except Exception as e:
         # Treat any error as if the endpoint offers no models
+        print(e)
         return set()
 
 async def fetch_loaded_models(endpoint: str) -> Set[str]:
@@ -206,14 +207,19 @@ async def choose_endpoint(model: str) -> str:
     loaded_sets = await asyncio.gather(*load_tasks)
 
     async with usage_lock:
+        # Helper: get current usage count for (endpoint, model)
+        def current_usage(ep: str) -> int:
+            return usage_counts.get(ep, {}).get(model, 0)
+        
         # 3️⃣ Endpoints that have the model loaded *and* a free slot
         loaded_and_free = [
             ep for ep, models in zip(candidate_endpoints, loaded_sets)
             if model in models and usage_counts[ep].get(model, 0) < config.max_concurrent_connections
         ]
-
+        
         if loaded_and_free:
-            return random.choice(loaded_and_free)
+            ep = min(loaded_and_free, key=current_usage)
+            return ep
 
         # 4️⃣ Endpoints among the candidates that simply have a free slot
         endpoints_with_free_slot = [
@@ -222,10 +228,12 @@ async def choose_endpoint(model: str) -> str:
         ]
 
         if endpoints_with_free_slot:
-            return random.choice(endpoints_with_free_slot)
+            ep = min(endpoints_with_free_slot, key=current_usage)
+            return ep
 
         # 5️⃣ All candidate endpoints are saturated – pick any (will queue)
-        return random.choice(candidate_endpoints)
+        ep = min(candidate_endpoints, key=current_usage)
+        return ep
 
 # -------------------------------------------------------------
 # 6. API route – Generate