2026-01-05 17:16:31 +01:00
|
|
|
# Docker Compose example for NOMYO Router with multiple Ollama instances
|
2026-03-08 09:12:09 +01:00
|
|
|
#
|
|
|
|
|
# Two router profiles are provided:
|
|
|
|
|
# nomyo-router — lean image, exact-match cache only (~300 MB)
|
|
|
|
|
# nomyo-router-semantic — semantic image, sentence-transformers baked in (~800 MB)
|
|
|
|
|
#
|
|
|
|
|
# Uncomment the redis service and set cache_backend: redis in config.yaml
|
|
|
|
|
# to share the LLM response cache across multiple router replicas.
|
2026-01-05 17:16:31 +01:00
|
|
|
|
|
|
|
|
version: '3.8'
|
|
|
|
|
|
|
|
|
|
services:
|
2026-03-08 09:12:09 +01:00
|
|
|
# NOMYO Router — lean image (exact-match cache, default)
|
2026-01-05 17:16:31 +01:00
|
|
|
nomyo-router:
|
|
|
|
|
image: nomyo-router:latest
|
2026-03-08 09:12:09 +01:00
|
|
|
build:
|
|
|
|
|
context: .
|
|
|
|
|
args:
|
|
|
|
|
SEMANTIC_CACHE: "false"
|
2026-01-05 17:16:31 +01:00
|
|
|
ports:
|
|
|
|
|
- "12434:12434"
|
|
|
|
|
environment:
|
|
|
|
|
- CONFIG_PATH=/app/config/config.yaml
|
2026-03-08 09:12:09 +01:00
|
|
|
- NOMYO_ROUTER_DB_PATH=/app/data/token_counts.db
|
2026-01-05 17:16:31 +01:00
|
|
|
volumes:
|
|
|
|
|
- ./config:/app/config
|
2026-03-08 09:12:09 +01:00
|
|
|
- router-data:/app/data
|
2026-01-05 17:16:31 +01:00
|
|
|
depends_on:
|
|
|
|
|
- ollama1
|
|
|
|
|
- ollama2
|
|
|
|
|
- ollama3
|
|
|
|
|
restart: unless-stopped
|
|
|
|
|
networks:
|
|
|
|
|
- nomyo-net
|
|
|
|
|
|
2026-03-08 09:12:09 +01:00
|
|
|
# NOMYO Router — semantic image (cache_similarity < 1.0 support, ~800 MB)
|
|
|
|
|
# Build: docker compose build nomyo-router-semantic
|
|
|
|
|
# Switch: comment out nomyo-router above, uncomment this block.
|
|
|
|
|
# nomyo-router-semantic:
|
|
|
|
|
# image: nomyo-router:semantic
|
|
|
|
|
# build:
|
|
|
|
|
# context: .
|
|
|
|
|
# args:
|
|
|
|
|
# SEMANTIC_CACHE: "true"
|
|
|
|
|
# ports:
|
|
|
|
|
# - "12434:12434"
|
|
|
|
|
# environment:
|
|
|
|
|
# - CONFIG_PATH=/app/config/config.yaml
|
|
|
|
|
# - NOMYO_ROUTER_DB_PATH=/app/data/token_counts.db
|
|
|
|
|
# volumes:
|
|
|
|
|
# - ./config:/app/config
|
|
|
|
|
# - router-data:/app/data
|
|
|
|
|
# - hf-cache:/app/data/hf_cache # share HuggingFace model cache across builds
|
|
|
|
|
# depends_on:
|
|
|
|
|
# - ollama1
|
|
|
|
|
# - ollama2
|
|
|
|
|
# - ollama3
|
|
|
|
|
# restart: unless-stopped
|
|
|
|
|
# networks:
|
|
|
|
|
# - nomyo-net
|
|
|
|
|
|
|
|
|
|
# Optional: Redis for shared LLM response cache across multiple router replicas.
|
|
|
|
|
# Requires cache_backend: redis in config.yaml.
|
|
|
|
|
# redis:
|
|
|
|
|
# image: redis:7-alpine
|
|
|
|
|
# ports:
|
|
|
|
|
# - "6379:6379"
|
|
|
|
|
# volumes:
|
|
|
|
|
# - redis-data:/data
|
|
|
|
|
# command: redis-server --save 60 1 --loglevel warning
|
|
|
|
|
# restart: unless-stopped
|
|
|
|
|
# networks:
|
|
|
|
|
# - nomyo-net
|
|
|
|
|
|
2026-01-05 17:16:31 +01:00
|
|
|
# Ollama Instance 1
|
|
|
|
|
ollama1:
|
|
|
|
|
image: ollama/ollama:latest
|
|
|
|
|
ports:
|
|
|
|
|
- "11434:11434"
|
|
|
|
|
volumes:
|
|
|
|
|
- ollama1-data:/root/.ollama
|
|
|
|
|
environment:
|
|
|
|
|
- OLLAMA_NUM_PARALLEL=4
|
|
|
|
|
restart: unless-stopped
|
|
|
|
|
networks:
|
|
|
|
|
- nomyo-net
|
|
|
|
|
|
|
|
|
|
# Ollama Instance 2
|
|
|
|
|
ollama2:
|
|
|
|
|
image: ollama/ollama:latest
|
|
|
|
|
ports:
|
|
|
|
|
- "11435:11434"
|
|
|
|
|
volumes:
|
|
|
|
|
- ollama2-data:/root/.ollama
|
|
|
|
|
environment:
|
|
|
|
|
- OLLAMA_NUM_PARALLEL=4
|
|
|
|
|
restart: unless-stopped
|
|
|
|
|
networks:
|
|
|
|
|
- nomyo-net
|
|
|
|
|
|
|
|
|
|
# Ollama Instance 3
|
|
|
|
|
ollama3:
|
|
|
|
|
image: ollama/ollama:latest
|
|
|
|
|
ports:
|
|
|
|
|
- "11436:11434"
|
|
|
|
|
volumes:
|
|
|
|
|
- ollama3-data:/root/.ollama
|
|
|
|
|
environment:
|
|
|
|
|
- OLLAMA_NUM_PARALLEL=4
|
|
|
|
|
restart: unless-stopped
|
|
|
|
|
networks:
|
|
|
|
|
- nomyo-net
|
|
|
|
|
|
|
|
|
|
# Optional: Prometheus for monitoring
|
|
|
|
|
prometheus:
|
|
|
|
|
image: prom/prometheus:latest
|
|
|
|
|
ports:
|
|
|
|
|
- "9090:9090"
|
|
|
|
|
volumes:
|
|
|
|
|
- ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml
|
|
|
|
|
command:
|
|
|
|
|
- '--config.file=/etc/prometheus/prometheus.yml'
|
|
|
|
|
restart: unless-stopped
|
|
|
|
|
networks:
|
|
|
|
|
- nomyo-net
|
|
|
|
|
|
|
|
|
|
# Optional: Grafana for visualization
|
|
|
|
|
grafana:
|
|
|
|
|
image: grafana/grafana:latest
|
|
|
|
|
ports:
|
|
|
|
|
- "3000:3000"
|
|
|
|
|
volumes:
|
|
|
|
|
- grafana-storage:/var/lib/grafana
|
|
|
|
|
restart: unless-stopped
|
|
|
|
|
networks:
|
|
|
|
|
- nomyo-net
|
|
|
|
|
|
|
|
|
|
volumes:
|
2026-03-08 09:12:09 +01:00
|
|
|
router-data:
|
|
|
|
|
# hf-cache: # uncomment when using nomyo-router-semantic
|
|
|
|
|
# redis-data: # uncomment when using Redis cache backend
|
2026-01-05 17:16:31 +01:00
|
|
|
ollama1-data:
|
|
|
|
|
ollama2-data:
|
|
|
|
|
ollama3-data:
|
|
|
|
|
grafana-storage:
|
|
|
|
|
|
|
|
|
|
networks:
|
|
|
|
|
nomyo-net:
|
|
|
|
|
driver: bridge
|