diff --git a/.gitignore b/.gitignore
index 4bb65cd..702c855 100644
--- a/.gitignore
+++ b/.gitignore
@@ -51,10 +51,6 @@ cython_debug/
 
 # VS Code files for those working on multiple tools
 .vscode/*
-.vscode/settings.json
-!.vscode/tasks.json
-!.vscode/launch.json
-!.vscode/extensions.json
 *.code-workspace
 
 # Local History for Visual Studio Code
diff --git a/router.py b/router.py
index 217e38b..dff8aa9 100644
--- a/router.py
+++ b/router.py
@@ -25,6 +25,7 @@ from PIL import Image
 # ------------------------------------------------------------------
 # Successful results are cached for 300s
 _models_cache: dict[str, tuple[Set[str], float]] = {}
+_loaded_models_cache: dict[str, tuple[Set[str], float]] = {}
 # Transient errors are cached for 1s – the key stays until the
 # timeout expires, after which the endpoint will be queried again.
 _error_cache: dict[str, float] = {}
@@ -226,7 +227,7 @@ class fetch:
                 del _models_cache[endpoint]
 
         if endpoint in _error_cache:
-            if _is_fresh(_error_cache[endpoint], 1):
+            if _is_fresh(_error_cache[endpoint], 10):
                 # Still within the short error TTL – pretend nothing is available
                 return set()
             else:
@@ -269,9 +270,22 @@ class fetch:
         loaded on that endpoint. If the request fails (e.g. timeout, 5xx), an empty
         set is returned.
         """
-        client: aiohttp.ClientSession = app_state["session"]
         if is_ext_openai_endpoint(endpoint):
             return set()
+        if endpoint in _loaded_models_cache:
+            models, cached_at = _loaded_models_cache[endpoint]
+            if _is_fresh(cached_at, 30):
+                return models
+            else:
+                # stale entry – drop it
+                del _loaded_models_cache[endpoint]
+
+        if endpoint in _error_cache:
+            if _is_fresh(_error_cache[endpoint], 10):
+                return set()
+            else:
+                del _error_cache[endpoint]
+        client: aiohttp.ClientSession = app_state["session"]
         try:
             async with client.get(f"{endpoint}/api/ps") as resp:
                 await _ensure_success(resp)
@@ -279,6 +293,7 @@ class fetch:
             # The response format is:
             #   {"models": [{"name": "model1"}, {"name": "model2"}]}
             models = {m.get("name") for m in data.get("models", []) if m.get("name")}
+            _loaded_models_cache[endpoint] = (models, time.time())
             return models
         except Exception as e:
             # If anything goes wrong we simply assume the endpoint has no models
@@ -975,7 +990,7 @@ async def embed_proxy(request: Request):
 
     # 2. Endpoint logic
     endpoint = await choose_endpoint(model)
-    is_openai_endpoint = "/v1" in endpoint
+    is_openai_endpoint = is_ext_openai_endpoint(endpoint) #"/v1" in endpoint
     if is_openai_endpoint:
         if ":latest" in model:
             model = model.split(":latest")