diff --git a/router.py b/router.py
index 3b8e330..9e7daeb 100644
--- a/router.py
+++ b/router.py
@@ -179,7 +179,7 @@ async def fetch_loaded_models(endpoint: str) -> Set[str]:
     """
     client: aiohttp.ClientSession = app_state["session"]
     try:
-        async with client.get(f"/api/ps") as resp:
+        async with client.get(f"{endpoint}/api/ps") as resp:
             await _ensure_success(resp)
             data = await resp.json()
         # The response format is:
@@ -327,7 +327,7 @@ async def choose_endpoint(model: str) -> str:
     #      (concurrently, but only for the filtered list)
     load_tasks = [fetch_loaded_models(ep) for ep in candidate_endpoints]
     loaded_sets = await asyncio.gather(*load_tasks)
-
+    
     async with usage_lock:
         # Helper: get current usage count for (endpoint, model)
         def current_usage(ep: str) -> int:
@@ -336,7 +336,7 @@ async def choose_endpoint(model: str) -> str:
         # 3️⃣ Endpoints that have the model loaded *and* a free slot
         loaded_and_free = [
             ep for ep, models in zip(candidate_endpoints, loaded_sets)
-            if model in models and usage_counts[ep].get(model, 0) < config.max_concurrent_connections
+            if model in models and usage_counts.get(ep, {}).get(model, 0) < config.max_concurrent_connections
         ]
         
         if loaded_and_free:
@@ -346,7 +346,7 @@ async def choose_endpoint(model: str) -> str:
         # 4️⃣ Endpoints among the candidates that simply have a free slot
         endpoints_with_free_slot = [
             ep for ep in candidate_endpoints
-            if usage_counts[ep].get(model, 0) < config.max_concurrent_connections
+            if usage_counts.get(ep, {}).get(model, 0) < config.max_concurrent_connections
         ]
 
         if endpoints_with_free_slot: