diff --git a/demos/function_calling/docker-compose.yaml b/demos/function_calling/docker-compose.yaml
index a0baf28a..715ff581 100644
--- a/demos/function_calling/docker-compose.yaml
+++ b/demos/function_calling/docker-compose.yaml
@@ -41,7 +41,7 @@ services:
     << : *common-vars
     environment:
       - OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-host.docker.internal}
-      - FC_URL=${FC_URL:-empty}
+      - FC_URL=${FC_URL:-https://arch-fc-free-trial-4mzywewe.uc.gateway.dev/v1}
       - OLLAMA_MODEL=Arch-Function-Calling-3B-Q4_K_M
       - MODE=${MODE:-cloud}
       # uncomment following line to use ollama endpoint that is hosted by docker
diff --git a/model_server/app/arch_fc/arch_fc.py b/model_server/app/arch_fc/arch_fc.py
index 2039ba8a..b60fdb5c 100644
--- a/model_server/app/arch_fc/arch_fc.py
+++ b/model_server/app/arch_fc/arch_fc.py
@@ -19,7 +19,7 @@ fc_url = os.getenv("FC_URL", ollama_endpoint)
 mode = os.getenv("MODE", "cloud")
 if mode not in ["cloud", "local-gpu", "local-cpu"]:
     raise ValueError(f"Invalid mode: {mode}")
-arch_api_key = os.getenv("ARCH_API_KEY", "")
+arch_api_key = os.getenv("ARCH_API_KEY", "vllm")
 logger = logging.getLogger("uvicorn.error")
 
 handler = None
diff --git a/model_server/openai_params.yaml b/model_server/openai_params.yaml
index 0b8cccc8..342c3f41 100644
--- a/model_server/openai_params.yaml
+++ b/model_server/openai_params.yaml
@@ -1,8 +1,7 @@
 params:
-  temperature: 0.0001
+  temperature: 0.01
   top_p : 0.5
   repetition_penalty: 1.0
   top_k: 50
-  max_tokens: 128
-  stop: ["<|im_start|>", "<|im_end|>"]
+  max_tokens: 512
   stop_token_ids: [151645, 151643]