diff --git a/demos/function_calling/docker-compose.yaml b/demos/function_calling/docker-compose.yaml index a0baf28a..715ff581 100644 --- a/demos/function_calling/docker-compose.yaml +++ b/demos/function_calling/docker-compose.yaml @@ -41,7 +41,7 @@ services: << : *common-vars environment: - OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-host.docker.internal} - - FC_URL=${FC_URL:-empty} + - FC_URL=${FC_URL:-https://arch-fc-free-trial-4mzywewe.uc.gateway.dev/v1} - OLLAMA_MODEL=Arch-Function-Calling-3B-Q4_K_M - MODE=${MODE:-cloud} # uncomment following line to use ollama endpoint that is hosted by docker diff --git a/model_server/app/arch_fc/arch_fc.py b/model_server/app/arch_fc/arch_fc.py index 2039ba8a..b60fdb5c 100644 --- a/model_server/app/arch_fc/arch_fc.py +++ b/model_server/app/arch_fc/arch_fc.py @@ -19,7 +19,7 @@ fc_url = os.getenv("FC_URL", ollama_endpoint) mode = os.getenv("MODE", "cloud") if mode not in ["cloud", "local-gpu", "local-cpu"]: raise ValueError(f"Invalid mode: {mode}") -arch_api_key = os.getenv("ARCH_API_KEY", "") +arch_api_key = os.getenv("ARCH_API_KEY", "vllm") logger = logging.getLogger("uvicorn.error") handler = None diff --git a/model_server/openai_params.yaml b/model_server/openai_params.yaml index 0b8cccc8..342c3f41 100644 --- a/model_server/openai_params.yaml +++ b/model_server/openai_params.yaml @@ -1,8 +1,7 @@ params: - temperature: 0.0001 + temperature: 0.01 top_p : 0.5 repetition_penalty: 1.0 top_k: 50 - max_tokens: 128 - stop: ["<|im_start|>", "<|im_end|>"] + max_tokens: 512 stop_token_ids: [151645, 151643]