From ed50d29ccfce5e8b0e6fef6a7440a52f11a9639e Mon Sep 17 00:00:00 2001 From: Co Tran Date: Tue, 1 Oct 2024 19:20:28 -0700 Subject: [PATCH] fix fc integration (#110) * fix fc integration * fix integration * remove file * Update arch_fc.py --- demos/function_calling/docker-compose.yaml | 2 +- model_server/app/arch_fc/arch_fc.py | 2 +- model_server/openai_params.yaml | 5 ++--- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/demos/function_calling/docker-compose.yaml b/demos/function_calling/docker-compose.yaml index a0baf28a..715ff581 100644 --- a/demos/function_calling/docker-compose.yaml +++ b/demos/function_calling/docker-compose.yaml @@ -41,7 +41,7 @@ services: << : *common-vars environment: - OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-host.docker.internal} - - FC_URL=${FC_URL:-empty} + - FC_URL=${FC_URL:-https://arch-fc-free-trial-4mzywewe.uc.gateway.dev/v1} - OLLAMA_MODEL=Arch-Function-Calling-3B-Q4_K_M - MODE=${MODE:-cloud} # uncomment following line to use ollama endpoint that is hosted by docker diff --git a/model_server/app/arch_fc/arch_fc.py b/model_server/app/arch_fc/arch_fc.py index 2039ba8a..b60fdb5c 100644 --- a/model_server/app/arch_fc/arch_fc.py +++ b/model_server/app/arch_fc/arch_fc.py @@ -19,7 +19,7 @@ fc_url = os.getenv("FC_URL", ollama_endpoint) mode = os.getenv("MODE", "cloud") if mode not in ["cloud", "local-gpu", "local-cpu"]: raise ValueError(f"Invalid mode: {mode}") -arch_api_key = os.getenv("ARCH_API_KEY", "") +arch_api_key = os.getenv("ARCH_API_KEY", "vllm") logger = logging.getLogger("uvicorn.error") handler = None diff --git a/model_server/openai_params.yaml b/model_server/openai_params.yaml index 0b8cccc8..342c3f41 100644 --- a/model_server/openai_params.yaml +++ b/model_server/openai_params.yaml @@ -1,8 +1,7 @@ params: - temperature: 0.0001 + temperature: 0.01 top_p : 0.5 repetition_penalty: 1.0 top_k: 50 - max_tokens: 128 - stop: ["<|im_start|>", "<|im_end|>"] + max_tokens: 512 stop_token_ids: [151645, 151643]