mirror of
https://github.com/katanemo/plano.git
synced 2026-04-28 18:36:34 +02:00
model server build (#127)
* first commit to have model_server not be dependent on Docker * making changes to fix the docker-compose file for archgw to set DNS_V4 and minor fixes with the build * additional fixes for model server to be separated out in the build * additional fixes for model server to be separated out in the build * fix to get model_server to be built as a separate python process. TODO: fix the embeddings logs after cli completes * fixing init to pull tempfile using the tempfile python package --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local>
This commit is contained in:
parent
7d21359f5b
commit
b60ceb9168
21 changed files with 3390 additions and 154 deletions
|
|
@ -7,24 +7,5 @@ services:
|
|||
volumes:
|
||||
- ${ARCH_CONFIG_FILE:-./demos/function_calling/arch_confg.yaml}:/config/arch_config.yaml
|
||||
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
|
||||
depends_on:
|
||||
model_server:
|
||||
condition: service_healthy
|
||||
env_file:
|
||||
- stage.env
|
||||
|
||||
model_server:
|
||||
image: model_server:latest
|
||||
ports:
|
||||
- "18081:80"
|
||||
healthcheck:
|
||||
test: ["CMD", "curl" ,"http://localhost/healthz"]
|
||||
interval: 5s
|
||||
retries: 20
|
||||
volumes:
|
||||
- ~/.cache/huggingface:/root/.cache/huggingface
|
||||
environment:
|
||||
- OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-host.docker.internal}
|
||||
- OLLAMA_MODEL=Arch-Function-Calling-3B-Q4_K_M
|
||||
- MODE=${MODE:-cloud}
|
||||
- FC_URL=${FC_URL:-https://arch-fc-free-trial-4mzywewe.uc.gateway.dev/v1}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue