nomyo-router/doc/examples/docker-compose.yml

150 lines
3.6 KiB
YAML
Raw Normal View History

# Docker Compose example for NOMYO Router with multiple Ollama instances
2026-03-08 09:12:09 +01:00
#
# Two router profiles are provided:
# nomyo-router — lean image, exact-match cache only (~300 MB)
# nomyo-router-semantic — semantic image, sentence-transformers baked in (~800 MB)
#
# Uncomment the redis service and set cache_backend: redis in config.yaml
# to share the LLM response cache across multiple router replicas.
version: '3.8'
services:
2026-03-08 09:12:09 +01:00
# NOMYO Router — lean image (exact-match cache, default)
nomyo-router:
image: nomyo-router:latest
2026-03-08 09:12:09 +01:00
build:
context: .
args:
SEMANTIC_CACHE: "false"
ports:
- "12434:12434"
environment:
- CONFIG_PATH=/app/config/config.yaml
2026-03-08 09:12:09 +01:00
- NOMYO_ROUTER_DB_PATH=/app/data/token_counts.db
volumes:
- ./config:/app/config
2026-03-08 09:12:09 +01:00
- router-data:/app/data
depends_on:
- ollama1
- ollama2
- ollama3
restart: unless-stopped
networks:
- nomyo-net
2026-03-08 09:12:09 +01:00
# NOMYO Router — semantic image (cache_similarity < 1.0 support, ~800 MB)
# Build: docker compose build nomyo-router-semantic
# Switch: comment out nomyo-router above, uncomment this block.
# nomyo-router-semantic:
# image: nomyo-router:semantic
# build:
# context: .
# args:
# SEMANTIC_CACHE: "true"
# ports:
# - "12434:12434"
# environment:
# - CONFIG_PATH=/app/config/config.yaml
# - NOMYO_ROUTER_DB_PATH=/app/data/token_counts.db
# volumes:
# - ./config:/app/config
# - router-data:/app/data
# - hf-cache:/app/data/hf_cache # share HuggingFace model cache across builds
# depends_on:
# - ollama1
# - ollama2
# - ollama3
# restart: unless-stopped
# networks:
# - nomyo-net
# Optional: Redis for shared LLM response cache across multiple router replicas.
# Requires cache_backend: redis in config.yaml.
# redis:
# image: redis:7-alpine
# ports:
# - "6379:6379"
# volumes:
# - redis-data:/data
# command: redis-server --save 60 1 --loglevel warning
# restart: unless-stopped
# networks:
# - nomyo-net
# Ollama Instance 1
ollama1:
image: ollama/ollama:latest
ports:
- "11434:11434"
volumes:
- ollama1-data:/root/.ollama
environment:
- OLLAMA_NUM_PARALLEL=4
restart: unless-stopped
networks:
- nomyo-net
# Ollama Instance 2
ollama2:
image: ollama/ollama:latest
ports:
- "11435:11434"
volumes:
- ollama2-data:/root/.ollama
environment:
- OLLAMA_NUM_PARALLEL=4
restart: unless-stopped
networks:
- nomyo-net
# Ollama Instance 3
ollama3:
image: ollama/ollama:latest
ports:
- "11436:11434"
volumes:
- ollama3-data:/root/.ollama
environment:
- OLLAMA_NUM_PARALLEL=4
restart: unless-stopped
networks:
- nomyo-net
# Optional: Prometheus for monitoring
prometheus:
image: prom/prometheus:latest
ports:
- "9090:9090"
volumes:
- ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml
command:
- '--config.file=/etc/prometheus/prometheus.yml'
restart: unless-stopped
networks:
- nomyo-net
# Optional: Grafana for visualization
grafana:
image: grafana/grafana:latest
ports:
- "3000:3000"
volumes:
- grafana-storage:/var/lib/grafana
restart: unless-stopped
networks:
- nomyo-net
volumes:
2026-03-08 09:12:09 +01:00
router-data:
# hf-cache: # uncomment when using nomyo-router-semantic
# redis-data: # uncomment when using Redis cache backend
ollama1-data:
ollama2-data:
ollama3-data:
grafana-storage:
networks:
nomyo-net:
driver: bridge