diff --git a/backends/probe.py b/backends/probe.py
index 2fd3a60..02c414e 100644
--- a/backends/probe.py
+++ b/backends/probe.py
@@ -192,12 +192,19 @@ class fetch:
         For llama-server endpoints: queries /v1/models and filters for status.value == "loaded"
         """
         client: aiohttp.ClientSession = get_session(endpoint)
+        cfg = get_config()
 
         # Check if this is a llama-server endpoint
-        if endpoint in get_config().llama_server_endpoints:
-            # Query /v1/models for llama-server
+        if endpoint in cfg.llama_server_endpoints:
+            # Query /v1/models for llama-server. Send the configured key as a
+            # Bearer token — current llama.cpp leaves /models public, but a
+            # build/config that protects it would otherwise 401 this probe.
+            headers = {"Referer": default_headers.get("HTTP-Referer", "https://nomyo.ai")}
+            api_key = cfg.api_keys.get(endpoint)
+            if api_key is not None:
+                headers["Authorization"] = "Bearer " + api_key
             try:
-                async with client.get(f"{endpoint}/models") as resp:
+                async with client.get(f"{endpoint}/models", headers=headers) as resp:
                     await _ensure_success(resp)
                     data = await resp.json()