diff --git a/router.py b/router.py
index 702aa50..be8c674 100644
--- a/router.py
+++ b/router.py
@@ -2684,7 +2684,16 @@ async def openai_chat_completions_proxy(request: Request):
 
     # 2. Endpoint logic
     endpoint = await choose_endpoint(model)
-    await increment_usage(endpoint, model)
+    # Normalize model name for tracking so it matches the PS table key:
+    #  - Ollama: PS reports "model:latest" → append ":latest" when missing
+    #  - llama-server: PS reports _normalize_llama_model_name(id) → strip HF prefix/quant
+    #  - External OpenAI: not shown in PS, keep as-is
+    tracking_model = model
+    if endpoint in config.llama_server_endpoints:
+        tracking_model = _normalize_llama_model_name(model)
+    elif not is_ext_openai_endpoint(endpoint) and ":" not in model:
+        tracking_model = model + ":latest"
+    await increment_usage(endpoint, tracking_model)
     base_url = ep2base(endpoint)
     oclient = openai.AsyncOpenAI(base_url=base_url, default_headers=default_headers, api_key=config.api_keys.get(endpoint, "no-key"))
     # 3. Async generator that streams completions data and decrements the counter
@@ -2731,11 +2740,7 @@ async def openai_chat_completions_proxy(request: Request):
                         if llama_usage:
                             prompt_tok, comp_tok = llama_usage
                     if prompt_tok != 0 or comp_tok != 0:
-                        local_model = model
-                        if not is_ext_openai_endpoint(endpoint):
-                            if not ":" in model:
-                                local_model = model if ":" in model else model + ":latest"
-                        await token_queue.put((endpoint, local_model, prompt_tok, comp_tok))
+                        await token_queue.put((endpoint, tracking_model, prompt_tok, comp_tok))
                 yield b"data: [DONE]\n\n"
             else:
                 prompt_tok = 0
@@ -2748,7 +2753,7 @@ async def openai_chat_completions_proxy(request: Request):
                     if llama_usage:
                         prompt_tok, comp_tok = llama_usage
                 if prompt_tok != 0 or comp_tok != 0:
-                    await token_queue.put((endpoint, model, prompt_tok, comp_tok))
+                    await token_queue.put((endpoint, tracking_model, prompt_tok, comp_tok))
                 json_line = (
                     async_gen.model_dump_json()
                     if hasattr(async_gen, "model_dump_json")
@@ -2758,7 +2763,7 @@ async def openai_chat_completions_proxy(request: Request):
 
         finally:
             # Ensure counter is decremented even if an exception occurs
-            await decrement_usage(endpoint, model)
+            await decrement_usage(endpoint, tracking_model)
 
     # 4. Return a StreamingResponse backed by the generator
     return StreamingResponse(
@@ -2832,7 +2837,16 @@ async def openai_completions_proxy(request: Request):
 
     # 2. Endpoint logic
     endpoint = await choose_endpoint(model)
-    await increment_usage(endpoint, model)
+    # Normalize model name for tracking so it matches the PS table key:
+    #  - Ollama: PS reports "model:latest" → append ":latest" when missing
+    #  - llama-server: PS reports _normalize_llama_model_name(id) → strip HF prefix/quant
+    #  - External OpenAI: not shown in PS, keep as-is
+    tracking_model = model
+    if endpoint in config.llama_server_endpoints:
+        tracking_model = _normalize_llama_model_name(model)
+    elif not is_ext_openai_endpoint(endpoint) and ":" not in model:
+        tracking_model = model + ":latest"
+    await increment_usage(endpoint, tracking_model)
     base_url = ep2base(endpoint)
     oclient = openai.AsyncOpenAI(base_url=base_url, default_headers=default_headers, api_key=config.api_keys.get(endpoint, "no-key"))
 
@@ -2870,11 +2884,7 @@ async def openai_completions_proxy(request: Request):
                         if llama_usage:
                             prompt_tok, comp_tok = llama_usage
                     if prompt_tok != 0 or comp_tok != 0:
-                        local_model = model
-                        if not is_ext_openai_endpoint(endpoint):
-                            if not ":" in model:
-                                local_model = model if ":" in model else model + ":latest"
-                        await token_queue.put((endpoint, local_model, prompt_tok, comp_tok))
+                        await token_queue.put((endpoint, tracking_model, prompt_tok, comp_tok))
                 # Final DONE event
                 yield b"data: [DONE]\n\n"
             else:
@@ -2888,7 +2898,7 @@ async def openai_completions_proxy(request: Request):
                     if llama_usage:
                         prompt_tok, comp_tok = llama_usage
                 if prompt_tok != 0 or comp_tok != 0:
-                    await token_queue.put((endpoint, model, prompt_tok, comp_tok))
+                    await token_queue.put((endpoint, tracking_model, prompt_tok, comp_tok))
                 json_line = (
                     async_gen.model_dump_json()
                     if hasattr(async_gen, "model_dump_json")
@@ -2898,7 +2908,7 @@ async def openai_completions_proxy(request: Request):
 
         finally:
             # Ensure counter is decremented even if an exception occurs
-            await decrement_usage(endpoint, model)
+            await decrement_usage(endpoint, tracking_model)
 
     # 4. Return a StreamingResponse backed by the generator
     return StreamingResponse(
diff --git a/static/index.html b/static/index.html
index 3d4d364..fe14ef5 100644
--- a/static/index.html
+++ b/static/index.html
@@ -928,7 +928,7 @@ function renderTimeSeriesChart(timeSeriesData, chart, minutes) {
                             const uniqueEndpoints = Array.from(new Set(endpoints));
                             const endpointsData = encodeURIComponent(JSON.stringify(uniqueEndpoints));
                             return `<tr data-model="${modelName}" data-endpoints="${endpointsData}">
-                                <td class="model">${modelName} <a href="#" class="stats-link" data-model="${originalName}">stats</a></td>
+                                <td class="model">${modelName} <a href="#" class="stats-link" data-model="${modelName}">stats</a></td>
                                 <td>${renderInstanceList(endpoints)}</td>
                                 <td>${params}</td>
                                 <td>${quant}</td>