Compare commits
No commits in common. "07b80e654fd3dbc1157a34cc099eb9dfe8f06152" and "0bf91a6dd011c56a9c973b1527a2c271db60f42c" have entirely different histories.
07b80e654f
...
0bf91a6dd0
3 changed files with 8 additions and 11 deletions
|
|
@ -18,6 +18,7 @@ on:
|
|||
env:
|
||||
REGISTRY: bitfreedom.net
|
||||
IMAGE_NAME: ${{ github.repository }}
|
||||
CACHE_IMAGE: ${{ github.repository }}-buildcache-semantic
|
||||
DOCKER_BUILD_SUMMARY: "false"
|
||||
|
||||
jobs:
|
||||
|
|
@ -86,9 +87,9 @@ jobs:
|
|||
provenance: false
|
||||
build-args: |
|
||||
SEMANTIC_CACHE=true
|
||||
tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:semantic-platform-${{ matrix.arch }}
|
||||
cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-semantic-${{ matrix.arch }}
|
||||
cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-semantic-${{ matrix.arch }},mode=max
|
||||
tags: ${{ env.REGISTRY }}/${{ env.CACHE_IMAGE }}:platform-semantic-${{ matrix.arch }}
|
||||
cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.CACHE_IMAGE }}:buildcache-semantic-${{ matrix.arch }}
|
||||
cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.CACHE_IMAGE }}:buildcache-semantic-${{ matrix.arch }},mode=min
|
||||
|
||||
merge:
|
||||
runs-on: docker-amd64
|
||||
|
|
@ -144,6 +145,6 @@ jobs:
|
|||
run: |
|
||||
docker buildx imagetools create \
|
||||
$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
|
||||
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:semantic-platform-amd64 \
|
||||
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:semantic-platform-arm64
|
||||
${{ env.REGISTRY }}/${{ env.CACHE_IMAGE }}:platform-semantic-amd64 \
|
||||
${{ env.REGISTRY }}/${{ env.CACHE_IMAGE }}:platform-semantic-arm64
|
||||
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ jobs:
|
|||
provenance: false
|
||||
tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:platform-${{ matrix.arch }}
|
||||
cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-${{ matrix.arch }}
|
||||
cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-${{ matrix.arch }},mode=max
|
||||
cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-${{ matrix.arch }},mode=min
|
||||
|
||||
merge:
|
||||
runs-on: docker-amd64
|
||||
|
|
|
|||
|
|
@ -2110,11 +2110,7 @@ async def chat_proxy(request: Request):
|
|||
# Only cache when no max_tokens limit was set — otherwise
|
||||
# finish_reason=length might just mean max_tokens was hit,
|
||||
# not that the context window was exhausted.
|
||||
_req_max_tok = (
|
||||
params.get("max_tokens") or params.get("max_completion_tokens") or params.get("num_predict")
|
||||
if use_openai else
|
||||
(options.get("num_predict") if options else None)
|
||||
)
|
||||
_req_max_tok = params.get("max_tokens") or params.get("max_completion_tokens") or params.get("num_predict")
|
||||
if _dr == "length" and not _req_max_tok:
|
||||
_pt = getattr(chunk, "prompt_eval_count", 0) or 0
|
||||
_ct = getattr(chunk, "eval_count", 0) or 0
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue