mirror of
https://github.com/katanemo/plano.git
synced 2026-04-27 09:46:28 +02:00
Integrate Arch-Function-Calling-1.5B model (#85)
* add arch support * add missing file * e2e tests * delete old files and fix response * fmt
This commit is contained in:
parent
9ea6bb0d73
commit
3511798fa8
12 changed files with 203 additions and 427 deletions
|
|
@ -0,0 +1,21 @@
|
|||
FROM Arch-Function-Calling-1.5B-Q4_K_M.gguf
|
||||
|
||||
# Set parameters for response generation
|
||||
PARAMETER num_predict 1024
|
||||
PARAMETER temperature 0.001
|
||||
PARAMETER top_p 1.0
|
||||
PARAMETER top_k 16000
|
||||
PARAMETER repeat_penalty 1.0
|
||||
PARAMETER stop "<|im_end|>"
|
||||
|
||||
# Set the random number seed to use for generation
|
||||
PARAMETER seed 42
|
||||
|
||||
# Set the prompt template to be passed into the model
|
||||
TEMPLATE """
|
||||
{{- if .System }}<|im_start|>system
|
||||
{{ .System }}<|im_end|>
|
||||
{{ end }}{{ if .Prompt }}<|im_start|>user
|
||||
{{ .Prompt }}<|im_end|>
|
||||
{{ end }}<|im_start|>assistant
|
||||
{{ .Response }}<|im_end|>"""
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
FROM Bolt-Function-Calling-1B-Q3_K_L.gguf
|
||||
|
||||
# Set the size of the context window used to generate the next token
|
||||
# PARAMETER num_ctx 16384
|
||||
PARAMETER num_ctx 4096
|
||||
|
||||
# Set parameters for response generation
|
||||
PARAMETER num_predict 1024
|
||||
PARAMETER temperature 0.1
|
||||
PARAMETER top_p 0.5
|
||||
PARAMETER top_k 32022
|
||||
PARAMETER repeat_penalty 1.0
|
||||
PARAMETER stop "<|EOT|>"
|
||||
|
||||
# Set the random number seed to use for generation
|
||||
PARAMETER seed 42
|
||||
|
||||
# Set the prompt template to be passed into the model
|
||||
TEMPLATE """{{ if .System }}<|begin▁of▁sentence|>
|
||||
{{ .System }}
|
||||
{{ end }}{{ if .Prompt }}### Instruction:
|
||||
{{ .Prompt }}
|
||||
{{ end }}### Response:
|
||||
{{ .Response }}
|
||||
<|EOT|>"""
|
||||
|
|
@ -1,24 +0,0 @@
|
|||
FROM Bolt-Function-Calling-1B-Q4_K_M.gguf
|
||||
|
||||
# Set the size of the context window used to generate the next token
|
||||
PARAMETER num_ctx 4096
|
||||
|
||||
# Set parameters for response generation
|
||||
PARAMETER num_predict 1024
|
||||
PARAMETER temperature 0.1
|
||||
PARAMETER top_p 0.5
|
||||
PARAMETER top_k 32022
|
||||
PARAMETER repeat_penalty 1.0
|
||||
PARAMETER stop "<|EOT|>"
|
||||
|
||||
# Set the random number seed to use for generation
|
||||
PARAMETER seed 42
|
||||
|
||||
# Set the prompt template to be passed into the model
|
||||
TEMPLATE """{{ if .System }}<|begin▁of▁sentence|>
|
||||
{{ .System }}
|
||||
{{ end }}{{ if .Prompt }}### Instruction:
|
||||
{{ .Prompt }}
|
||||
{{ end }}### Response:
|
||||
{{ .Response }}
|
||||
<|EOT|>"""
|
||||
|
|
@ -11,14 +11,14 @@ This demo shows how you can use intelligent prompt gateway to do function callin
|
|||
```sh
|
||||
docker compose up
|
||||
```
|
||||
1. Download Bolt-FC model. This demo assumes we have downloaded [Bolt-Function-Calling-1B:Q4_K_M](https://huggingface.co/katanemolabs/Bolt-Function-Calling-1B.gguf/blob/main/Bolt-Function-Calling-1B-Q4_K_M.gguf) to local folder.
|
||||
1. Download Bolt-FC model. This demo assumes we have downloaded [Arch-Function-Calling-1.5B:Q4_K_M](https://huggingface.co/katanemolabs/Arch-Function-Calling-1.5B.gguf/blob/main/Arch-Function-Calling-1.5B-Q4_K_M.gguf) to local folder.
|
||||
1. If running ollama natively run
|
||||
```sh
|
||||
ollama serve
|
||||
```
|
||||
2. Create model file in ollama repository
|
||||
```sh
|
||||
ollama create Bolt-Function-Calling-1B:Q4_K_M -f Bolt-FC-1B-Q4_K_M.model_file
|
||||
ollama create Arch-Function-Calling-1.5B:Q4_K_M -f Arch-Function-Calling-1.5B-Q4_K_M.model_file
|
||||
```
|
||||
3. Navigate to http://localhost:18080/
|
||||
4. You can type in queries like "how is the weather in Seattle"
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ services:
|
|||
- OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-host.docker.internal}
|
||||
# uncomment following line to use ollama endpoint that is hosted by docker
|
||||
# - OLLAMA_ENDPOINT=ollama
|
||||
- OLLAMA_MODEL=Arch-Function-Calling-1.5B:Q4_K_M
|
||||
|
||||
api_server:
|
||||
build:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue