Integrate Arch-Function-Calling-1.5B model (#85)

* add arch support * add missing file * e2e tests * delete old files and fix response * fmt
2026-04-27 09:46:28 +02:00 · 2024-09-25 23:30:50 -07:00 · 2024-09-25 23:30:50 -07:00 · 3511798fa8
commit 3511798fa8
parent 9ea6bb0d73
12 changed files with 203 additions and 427 deletions
--- a/demos/function_calling/Arch-Function-Calling-1.5B-Q4_K_M.model_file
+++ b/demos/function_calling/Arch-Function-Calling-1.5B-Q4_K_M.model_file
@ -0,0 +1,21 @@
+FROM Arch-Function-Calling-1.5B-Q4_K_M.gguf
+
+# Set parameters for response generation
+PARAMETER num_predict 1024
+PARAMETER temperature 0.001
+PARAMETER top_p 1.0
+PARAMETER top_k 16000
+PARAMETER repeat_penalty 1.0
+PARAMETER stop "<|im_end|>"
+
+# Set the random number seed to use for generation
+PARAMETER seed 42
+
+# Set the prompt template to be passed into the model
+TEMPLATE """
+{{- if .System }}<|im_start|>system
+{{ .System }}<|im_end|>
+{{ end }}{{ if .Prompt }}<|im_start|>user
+{{ .Prompt }}<|im_end|>
+{{ end }}<|im_start|>assistant
+{{ .Response }}<|im_end|>"""
--- a/demos/function_calling/Bolt-FC-1B-Q3_K_L.model_file
+++ b/demos/function_calling/Bolt-FC-1B-Q3_K_L.model_file
@ -1,25 +0,0 @@
-FROM Bolt-Function-Calling-1B-Q3_K_L.gguf
-
-# Set the size of the context window used to generate the next token
-# PARAMETER num_ctx 16384
-PARAMETER num_ctx 4096
-
-# Set parameters for response generation
-PARAMETER num_predict 1024
-PARAMETER temperature 0.1
-PARAMETER top_p 0.5
-PARAMETER top_k 32022
-PARAMETER repeat_penalty 1.0
-PARAMETER stop "<|EOT|>"
-
-# Set the random number seed to use for generation
-PARAMETER seed 42
-
-# Set the prompt template to be passed into the model
-TEMPLATE """{{ if .System }}<｜begin▁of▁sentence｜>
-{{ .System }}
-{{ end }}{{ if .Prompt }}### Instruction:
-{{ .Prompt }}
-{{ end }}### Response:
-{{ .Response }}
-<|EOT|>"""
--- a/demos/function_calling/Bolt-FC-1B-Q4_K_M.model_file
+++ b/demos/function_calling/Bolt-FC-1B-Q4_K_M.model_file
@ -1,24 +0,0 @@
-FROM Bolt-Function-Calling-1B-Q4_K_M.gguf
-
-# Set the size of the context window used to generate the next token
-PARAMETER num_ctx 4096
-
-# Set parameters for response generation
-PARAMETER num_predict 1024
-PARAMETER temperature 0.1
-PARAMETER top_p 0.5
-PARAMETER top_k 32022
-PARAMETER repeat_penalty 1.0
-PARAMETER stop "<|EOT|>"
-
-# Set the random number seed to use for generation
-PARAMETER seed 42
-
-# Set the prompt template to be passed into the model
-TEMPLATE """{{ if .System }}<｜begin▁of▁sentence｜>
-{{ .System }}
-{{ end }}{{ if .Prompt }}### Instruction:
-{{ .Prompt }}
-{{ end }}### Response:
-{{ .Response }}
-<|EOT|>"""
--- a/demos/function_calling/README.md
+++ b/demos/function_calling/README.md
@ -11,14 +11,14 @@ This demo shows how you can use intelligent prompt gateway to do function callin
   ```sh
   docker compose up
   ```
-1. Download Bolt-FC model. This demo assumes we have downloaded [Bolt-Function-Calling-1B:Q4_K_M](https://huggingface.co/katanemolabs/Bolt-Function-Calling-1B.gguf/blob/main/Bolt-Function-Calling-1B-Q4_K_M.gguf) to local folder.
+1. Download Bolt-FC model. This demo assumes we have downloaded [Arch-Function-Calling-1.5B:Q4_K_M](https://huggingface.co/katanemolabs/Arch-Function-Calling-1.5B.gguf/blob/main/Arch-Function-Calling-1.5B-Q4_K_M.gguf) to local folder.
 1. If running ollama natively run
   ```sh
   ollama serve
   ```
 2. Create model file in ollama repository
   ```sh
-   ollama create Bolt-Function-Calling-1B:Q4_K_M -f Bolt-FC-1B-Q4_K_M.model_file
+   ollama create Arch-Function-Calling-1.5B:Q4_K_M -f Arch-Function-Calling-1.5B-Q4_K_M.model_file
   ```
 3. Navigate to http://localhost:18080/
 4. You can type in queries like "how is the weather in Seattle"
--- a/demos/function_calling/docker-compose.yaml
+++ b/demos/function_calling/docker-compose.yaml
@ -59,6 +59,7 @@ services:
      - OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-host.docker.internal}
      # uncomment following line to use ollama endpoint that is hosted by docker
      # - OLLAMA_ENDPOINT=ollama
+      - OLLAMA_MODEL=Arch-Function-Calling-1.5B:Q4_K_M

  api_server:
    build: