diff --git a/.forgejo/workflows/docker-publish-semantic.yml b/.forgejo/workflows/docker-publish-semantic.yml
index 2fa59d5..c47a1e5 100644
--- a/.forgejo/workflows/docker-publish-semantic.yml
+++ b/.forgejo/workflows/docker-publish-semantic.yml
@@ -18,6 +18,7 @@ on:
 env:
   REGISTRY: bitfreedom.net
   IMAGE_NAME: ${{ github.repository }}
+  CACHE_IMAGE: ${{ github.repository }}-buildcache-semantic
   DOCKER_BUILD_SUMMARY: "false"
 
 jobs:
@@ -86,9 +87,9 @@ jobs:
           provenance: false
           build-args: |
             SEMANTIC_CACHE=true
-          tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:semantic-platform-${{ matrix.arch }}
-          cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-semantic-${{ matrix.arch }}
-          cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-semantic-${{ matrix.arch }},mode=max
+          tags: ${{ env.REGISTRY }}/${{ env.CACHE_IMAGE }}:platform-semantic-${{ matrix.arch }}
+          cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.CACHE_IMAGE }}:buildcache-semantic-${{ matrix.arch }}
+          cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.CACHE_IMAGE }}:buildcache-semantic-${{ matrix.arch }},mode=min
 
   merge:
     runs-on: docker-amd64
@@ -144,6 +145,6 @@ jobs:
         run: |
           docker buildx imagetools create \
             $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
-            ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:semantic-platform-amd64 \
-            ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:semantic-platform-arm64
+            ${{ env.REGISTRY }}/${{ env.CACHE_IMAGE }}:platform-semantic-amd64 \
+            ${{ env.REGISTRY }}/${{ env.CACHE_IMAGE }}:platform-semantic-arm64
 
diff --git a/.forgejo/workflows/docker-publish.yml b/.forgejo/workflows/docker-publish.yml
index 3979f62..e3dad4b 100644
--- a/.forgejo/workflows/docker-publish.yml
+++ b/.forgejo/workflows/docker-publish.yml
@@ -79,7 +79,7 @@ jobs:
           provenance: false
           tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:platform-${{ matrix.arch }}
           cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-${{ matrix.arch }}
-          cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-${{ matrix.arch }},mode=max
+          cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-${{ matrix.arch }},mode=min
 
   merge:
     runs-on: docker-amd64
diff --git a/router.py b/router.py
index 2f27d94..c87c5ca 100644
--- a/router.py
+++ b/router.py
@@ -2110,11 +2110,7 @@ async def chat_proxy(request: Request):
                         # Only cache when no max_tokens limit was set — otherwise
                         # finish_reason=length might just mean max_tokens was hit,
                         # not that the context window was exhausted.
-                        _req_max_tok = (
-                            params.get("max_tokens") or params.get("max_completion_tokens") or params.get("num_predict")
-                            if use_openai else
-                            (options.get("num_predict") if options else None)
-                        )
+                        _req_max_tok = params.get("max_tokens") or params.get("max_completion_tokens") or params.get("num_predict")
                         if _dr == "length" and not _req_max_tok:
                             _pt = getattr(chunk, "prompt_eval_count", 0) or 0
                             _ct = getattr(chunk, "eval_count", 0) or 0