diff --git a/router.py b/router.py
index d11c2ad..e2ce8d5 100644
--- a/router.py
+++ b/router.py
@@ -2796,7 +2796,7 @@ async def ps_details_proxy(request: Request):
 
         # Fetch /props for each llama-server model to get context length (n_ctx)
         # and unload sleeping models automatically
-        async def _fetch_llama_props(endpoint: str, model_id: str) -> tuple[int | None, bool]:
+        async def _fetch_llama_props(endpoint: str, model_id: str) -> tuple[int | None, bool, bool]:
             client: aiohttp.ClientSession = app_state["session"]
             base_url = endpoint.rstrip("/").removesuffix("/v1")
             props_url = f"{base_url}/props?model={model_id}"
@@ -2811,6 +2811,8 @@ async def ps_details_proxy(request: Request):
                         dgs = data.get("default_generation_settings", {})
                         n_ctx = dgs.get("n_ctx")
                         is_sleeping = data.get("is_sleeping", False)
+                        # Embedding models have no sampling params in default_generation_settings
+                        is_generation = "temperature" in dgs
 
                         if is_sleeping:
                             unload_url = f"{base_url}/models/unload"
@@ -2824,19 +2826,19 @@ async def ps_details_proxy(request: Request):
                             except Exception as ue:
                                 print(f"[ps_details] Failed to unload sleeping model {model_id} from {endpoint}: {ue}")
 
-                        return n_ctx, is_sleeping
+                        return n_ctx, is_sleeping, is_generation
             except Exception as e:
                 print(f"[ps_details] Failed to fetch props from {props_url}: {e}")
-            return None, False
+            return None, False, False
 
         props_results = await asyncio.gather(
             *[_fetch_llama_props(ep, mid) for ep, mid in props_requests]
         )
 
-        for (ep, raw_id), model_dict, (n_ctx, is_sleeping) in zip(props_requests, llama_models_pending, props_results):
+        for (ep, raw_id), model_dict, (n_ctx, is_sleeping, is_generation) in zip(props_requests, llama_models_pending, props_results):
             if n_ctx is not None:
                 model_dict["context_length"] = n_ctx
-                if 0 < n_ctx <= _CTX_TRIM_SMALL_LIMIT:
+                if is_generation and 0 < n_ctx <= _CTX_TRIM_SMALL_LIMIT:
                     normalized = _normalize_llama_model_name(raw_id)
                     _endpoint_nctx[(ep, normalized)] = n_ctx
                     print(f"[ctx-cache/ps] cached n_ctx={n_ctx} for ({ep},{normalized})", flush=True)