Merge pull request 'dev-v0.7.x' (#21) from dev-v0.7.x into main
All checks were successful
Build and Publish Docker Image (Semantic Cache) / build (amd64, linux/amd64, docker-amd64) (push) Successful in 35s
Build and Publish Docker Image / build (amd64, linux/amd64, docker-amd64) (push) Successful in 36s
Build and Publish Docker Image (Semantic Cache) / build (arm64, linux/arm64, docker-arm64) (push) Successful in 10m21s
Build and Publish Docker Image (Semantic Cache) / merge (push) Successful in 32s
Build and Publish Docker Image / build (arm64, linux/arm64, docker-arm64) (push) Successful in 10m3s
Build and Publish Docker Image / merge (push) Successful in 32s
All checks were successful
Build and Publish Docker Image (Semantic Cache) / build (amd64, linux/amd64, docker-amd64) (push) Successful in 35s
Build and Publish Docker Image / build (amd64, linux/amd64, docker-amd64) (push) Successful in 36s
Build and Publish Docker Image (Semantic Cache) / build (arm64, linux/arm64, docker-arm64) (push) Successful in 10m21s
Build and Publish Docker Image (Semantic Cache) / merge (push) Successful in 32s
Build and Publish Docker Image / build (arm64, linux/arm64, docker-arm64) (push) Successful in 10m3s
Build and Publish Docker Image / merge (push) Successful in 32s
Reviewed-on: https://bitfreedom.net/code/code/nomyo-ai/nomyo-router/pulls/21
This commit is contained in:
commit
07b80e654f
3 changed files with 11 additions and 8 deletions
|
|
@ -18,7 +18,6 @@ on:
|
||||||
env:
|
env:
|
||||||
REGISTRY: bitfreedom.net
|
REGISTRY: bitfreedom.net
|
||||||
IMAGE_NAME: ${{ github.repository }}
|
IMAGE_NAME: ${{ github.repository }}
|
||||||
CACHE_IMAGE: ${{ github.repository }}-buildcache-semantic
|
|
||||||
DOCKER_BUILD_SUMMARY: "false"
|
DOCKER_BUILD_SUMMARY: "false"
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
|
@ -87,9 +86,9 @@ jobs:
|
||||||
provenance: false
|
provenance: false
|
||||||
build-args: |
|
build-args: |
|
||||||
SEMANTIC_CACHE=true
|
SEMANTIC_CACHE=true
|
||||||
tags: ${{ env.REGISTRY }}/${{ env.CACHE_IMAGE }}:platform-semantic-${{ matrix.arch }}
|
tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:semantic-platform-${{ matrix.arch }}
|
||||||
cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.CACHE_IMAGE }}:buildcache-semantic-${{ matrix.arch }}
|
cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-semantic-${{ matrix.arch }}
|
||||||
cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.CACHE_IMAGE }}:buildcache-semantic-${{ matrix.arch }},mode=min
|
cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-semantic-${{ matrix.arch }},mode=max
|
||||||
|
|
||||||
merge:
|
merge:
|
||||||
runs-on: docker-amd64
|
runs-on: docker-amd64
|
||||||
|
|
@ -145,6 +144,6 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
docker buildx imagetools create \
|
docker buildx imagetools create \
|
||||||
$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
|
$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
|
||||||
${{ env.REGISTRY }}/${{ env.CACHE_IMAGE }}:platform-semantic-amd64 \
|
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:semantic-platform-amd64 \
|
||||||
${{ env.REGISTRY }}/${{ env.CACHE_IMAGE }}:platform-semantic-arm64
|
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:semantic-platform-arm64
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -79,7 +79,7 @@ jobs:
|
||||||
provenance: false
|
provenance: false
|
||||||
tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:platform-${{ matrix.arch }}
|
tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:platform-${{ matrix.arch }}
|
||||||
cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-${{ matrix.arch }}
|
cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-${{ matrix.arch }}
|
||||||
cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-${{ matrix.arch }},mode=min
|
cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-${{ matrix.arch }},mode=max
|
||||||
|
|
||||||
merge:
|
merge:
|
||||||
runs-on: docker-amd64
|
runs-on: docker-amd64
|
||||||
|
|
|
||||||
|
|
@ -2110,7 +2110,11 @@ async def chat_proxy(request: Request):
|
||||||
# Only cache when no max_tokens limit was set — otherwise
|
# Only cache when no max_tokens limit was set — otherwise
|
||||||
# finish_reason=length might just mean max_tokens was hit,
|
# finish_reason=length might just mean max_tokens was hit,
|
||||||
# not that the context window was exhausted.
|
# not that the context window was exhausted.
|
||||||
_req_max_tok = params.get("max_tokens") or params.get("max_completion_tokens") or params.get("num_predict")
|
_req_max_tok = (
|
||||||
|
params.get("max_tokens") or params.get("max_completion_tokens") or params.get("num_predict")
|
||||||
|
if use_openai else
|
||||||
|
(options.get("num_predict") if options else None)
|
||||||
|
)
|
||||||
if _dr == "length" and not _req_max_tok:
|
if _dr == "length" and not _req_max_tok:
|
||||||
_pt = getattr(chunk, "prompt_eval_count", 0) or 0
|
_pt = getattr(chunk, "prompt_eval_count", 0) or 0
|
||||||
_ct = getattr(chunk, "eval_count", 0) or 0
|
_ct = getattr(chunk, "eval_count", 0) or 0
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue