diff --git a/.forgejo/workflows/docker-publish-semantic.yml b/.forgejo/workflows/docker-publish-semantic.yml index 2fa59d5..c47a1e5 100644 --- a/.forgejo/workflows/docker-publish-semantic.yml +++ b/.forgejo/workflows/docker-publish-semantic.yml @@ -18,6 +18,7 @@ on: env: REGISTRY: bitfreedom.net IMAGE_NAME: ${{ github.repository }} + CACHE_IMAGE: ${{ github.repository }}-buildcache-semantic DOCKER_BUILD_SUMMARY: "false" jobs: @@ -86,9 +87,9 @@ jobs: provenance: false build-args: | SEMANTIC_CACHE=true - tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:semantic-platform-${{ matrix.arch }} - cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-semantic-${{ matrix.arch }} - cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-semantic-${{ matrix.arch }},mode=max + tags: ${{ env.REGISTRY }}/${{ env.CACHE_IMAGE }}:platform-semantic-${{ matrix.arch }} + cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.CACHE_IMAGE }}:buildcache-semantic-${{ matrix.arch }} + cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.CACHE_IMAGE }}:buildcache-semantic-${{ matrix.arch }},mode=min merge: runs-on: docker-amd64 @@ -144,6 +145,6 @@ jobs: run: | docker buildx imagetools create \ $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:semantic-platform-amd64 \ - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:semantic-platform-arm64 + ${{ env.REGISTRY }}/${{ env.CACHE_IMAGE }}:platform-semantic-amd64 \ + ${{ env.REGISTRY }}/${{ env.CACHE_IMAGE }}:platform-semantic-arm64 diff --git a/.forgejo/workflows/docker-publish.yml b/.forgejo/workflows/docker-publish.yml index 3979f62..e3dad4b 100644 --- a/.forgejo/workflows/docker-publish.yml +++ b/.forgejo/workflows/docker-publish.yml @@ -79,7 +79,7 @@ jobs: provenance: false tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:platform-${{ matrix.arch }} cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-${{ matrix.arch }} - cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-${{ matrix.arch }},mode=max + cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-${{ matrix.arch }},mode=min merge: runs-on: docker-amd64 diff --git a/router.py b/router.py index 2f27d94..c87c5ca 100644 --- a/router.py +++ b/router.py @@ -2110,11 +2110,7 @@ async def chat_proxy(request: Request): # Only cache when no max_tokens limit was set — otherwise # finish_reason=length might just mean max_tokens was hit, # not that the context window was exhausted. - _req_max_tok = ( - params.get("max_tokens") or params.get("max_completion_tokens") or params.get("num_predict") - if use_openai else - (options.get("num_predict") if options else None) - ) + _req_max_tok = params.get("max_tokens") or params.get("max_completion_tokens") or params.get("num_predict") if _dr == "length" and not _req_max_tok: _pt = getattr(chunk, "prompt_eval_count", 0) or 0 _ct = getattr(chunk, "eval_count", 0) or 0