nomyo-router/doc/examples/docker-compose.yml

# Docker Compose example for NOMYO Router with multiple Ollama instances
#
# Two router profiles are provided:
#   nomyo-router          — lean image, exact-match cache only (~300 MB)
#   nomyo-router-semantic — semantic image, sentence-transformers baked in (~800 MB)
#
# Uncomment the redis service and set cache_backend: redis in config.yaml
# to share the LLM response cache across multiple router replicas.

version: '3.8'

services:
  # NOMYO Router — lean image (exact-match cache, default)
  nomyo-router:
    image: nomyo-router:latest
    build:
      context: .
      args:
        SEMANTIC_CACHE: "false"
    ports:
      - "12434:12434"
    environment:
      - CONFIG_PATH=/app/config/config.yaml
      - NOMYO_ROUTER_DB_PATH=/app/data/token_counts.db
    volumes:
      - ./config:/app/config
      - router-data:/app/data
    depends_on:
      - ollama1
      - ollama2
      - ollama3
    restart: unless-stopped
    networks:
      - nomyo-net

  # NOMYO Router — semantic image (cache_similarity < 1.0 support, ~800 MB)
  # Build:  docker compose build nomyo-router-semantic
  # Switch: comment out nomyo-router above, uncomment this block.
  # nomyo-router-semantic:
  #   image: nomyo-router:semantic
  #   build:
  #     context: .
  #     args:
  #       SEMANTIC_CACHE: "true"
  #   ports:
  #     - "12434:12434"
  #   environment:
  #     - CONFIG_PATH=/app/config/config.yaml
  #     - NOMYO_ROUTER_DB_PATH=/app/data/token_counts.db
  #   volumes:
  #     - ./config:/app/config
  #     - router-data:/app/data
  #     - hf-cache:/app/data/hf_cache   # share HuggingFace model cache across builds
  #   depends_on:
  #     - ollama1
  #     - ollama2
  #     - ollama3
  #   restart: unless-stopped
  #   networks:
  #     - nomyo-net

  # Optional: Redis for shared LLM response cache across multiple router replicas.
  # Requires cache_backend: redis in config.yaml.
  # redis:
  #   image: redis:7-alpine
  #   ports:
  #     - "6379:6379"
  #   volumes:
  #     - redis-data:/data
  #   command: redis-server --save 60 1 --loglevel warning
  #   restart: unless-stopped
  #   networks:
  #     - nomyo-net

  # Ollama Instance 1
  ollama1:
    image: ollama/ollama:latest
    ports:
      - "11434:11434"
    volumes:
      - ollama1-data:/root/.ollama
    environment:
      - OLLAMA_NUM_PARALLEL=4
    restart: unless-stopped
    networks:
      - nomyo-net

  # Ollama Instance 2
  ollama2:
    image: ollama/ollama:latest
    ports:
      - "11435:11434"
    volumes:
      - ollama2-data:/root/.ollama
    environment:
      - OLLAMA_NUM_PARALLEL=4
    restart: unless-stopped
    networks:
      - nomyo-net

  # Ollama Instance 3
  ollama3:
    image: ollama/ollama:latest
    ports:
      - "11436:11434"
    volumes:
      - ollama3-data:/root/.ollama
    environment:
      - OLLAMA_NUM_PARALLEL=4
    restart: unless-stopped
    networks:
      - nomyo-net

  # Optional: Prometheus for monitoring
  prometheus:
    image: prom/prometheus:latest
    ports:
      - "9090:9090"
    volumes:
      - ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
    restart: unless-stopped
    networks:
      - nomyo-net

  # Optional: Grafana for visualization
  grafana:
    image: grafana/grafana:latest
    ports:
      - "3000:3000"
    volumes:
      - grafana-storage:/var/lib/grafana
    restart: unless-stopped
    networks:
      - nomyo-net

volumes:
  router-data:
  # hf-cache:     # uncomment when using nomyo-router-semantic
  # redis-data:   # uncomment when using Redis cache backend
  ollama1-data:
  ollama2-data:
  ollama3-data:
  grafana-storage:

networks:
  nomyo-net:
    driver: bridge
feat: added buffer_lock to prevent race condition in high concurrency scenarios added documentation 2026-01-05 17:16:31 +01:00			`# Docker Compose example for NOMYO Router with multiple Ollama instances`
feat: adding a semantic cache layer 2026-03-08 09:12:09 +01:00			`#`
			`# Two router profiles are provided:`
			`# nomyo-router — lean image, exact-match cache only (~300 MB)`
			`# nomyo-router-semantic — semantic image, sentence-transformers baked in (~800 MB)`
			`#`
			`# Uncomment the redis service and set cache_backend: redis in config.yaml`
			`# to share the LLM response cache across multiple router replicas.`
feat: added buffer_lock to prevent race condition in high concurrency scenarios added documentation 2026-01-05 17:16:31 +01:00
			`version: '3.8'`

			`services:`
feat: adding a semantic cache layer 2026-03-08 09:12:09 +01:00			`# NOMYO Router — lean image (exact-match cache, default)`
feat: added buffer_lock to prevent race condition in high concurrency scenarios added documentation 2026-01-05 17:16:31 +01:00			`nomyo-router:`
			`image: nomyo-router:latest`
feat: adding a semantic cache layer 2026-03-08 09:12:09 +01:00			`build:`
			`context: .`
			`args:`
			`SEMANTIC_CACHE: "false"`
feat: added buffer_lock to prevent race condition in high concurrency scenarios added documentation 2026-01-05 17:16:31 +01:00			`ports:`
			`- "12434:12434"`
			`environment:`
			`- CONFIG_PATH=/app/config/config.yaml`
feat: adding a semantic cache layer 2026-03-08 09:12:09 +01:00			`- NOMYO_ROUTER_DB_PATH=/app/data/token_counts.db`
feat: added buffer_lock to prevent race condition in high concurrency scenarios added documentation 2026-01-05 17:16:31 +01:00			`volumes:`
			`- ./config:/app/config`
feat: adding a semantic cache layer 2026-03-08 09:12:09 +01:00			`- router-data:/app/data`
feat: added buffer_lock to prevent race condition in high concurrency scenarios added documentation 2026-01-05 17:16:31 +01:00			`depends_on:`
			`- ollama1`
			`- ollama2`
			`- ollama3`
			`restart: unless-stopped`
			`networks:`
			`- nomyo-net`

feat: adding a semantic cache layer 2026-03-08 09:12:09 +01:00			`# NOMYO Router — semantic image (cache_similarity < 1.0 support, ~800 MB)`
			`# Build: docker compose build nomyo-router-semantic`
			`# Switch: comment out nomyo-router above, uncomment this block.`
			`# nomyo-router-semantic:`
			`# image: nomyo-router:semantic`
			`# build:`
			`# context: .`
			`# args:`
			`# SEMANTIC_CACHE: "true"`
			`# ports:`
			`# - "12434:12434"`
			`# environment:`
			`# - CONFIG_PATH=/app/config/config.yaml`
			`# - NOMYO_ROUTER_DB_PATH=/app/data/token_counts.db`
			`# volumes:`
			`# - ./config:/app/config`
			`# - router-data:/app/data`
			`# - hf-cache:/app/data/hf_cache # share HuggingFace model cache across builds`
			`# depends_on:`
			`# - ollama1`
			`# - ollama2`
			`# - ollama3`
			`# restart: unless-stopped`
			`# networks:`
			`# - nomyo-net`

			`# Optional: Redis for shared LLM response cache across multiple router replicas.`
			`# Requires cache_backend: redis in config.yaml.`
			`# redis:`
			`# image: redis:7-alpine`
			`# ports:`
			`# - "6379:6379"`
			`# volumes:`
			`# - redis-data:/data`
			`# command: redis-server --save 60 1 --loglevel warning`
			`# restart: unless-stopped`
			`# networks:`
			`# - nomyo-net`

feat: added buffer_lock to prevent race condition in high concurrency scenarios added documentation 2026-01-05 17:16:31 +01:00			`# Ollama Instance 1`
			`ollama1:`
			`image: ollama/ollama:latest`
			`ports:`
			`- "11434:11434"`
			`volumes:`
			`- ollama1-data:/root/.ollama`
			`environment:`
			`- OLLAMA_NUM_PARALLEL=4`
			`restart: unless-stopped`
			`networks:`
			`- nomyo-net`

			`# Ollama Instance 2`
			`ollama2:`
			`image: ollama/ollama:latest`
			`ports:`
			`- "11435:11434"`
			`volumes:`
			`- ollama2-data:/root/.ollama`
			`environment:`
			`- OLLAMA_NUM_PARALLEL=4`
			`restart: unless-stopped`
			`networks:`
			`- nomyo-net`

			`# Ollama Instance 3`
			`ollama3:`
			`image: ollama/ollama:latest`
			`ports:`
			`- "11436:11434"`
			`volumes:`
			`- ollama3-data:/root/.ollama`
			`environment:`
			`- OLLAMA_NUM_PARALLEL=4`
			`restart: unless-stopped`
			`networks:`
			`- nomyo-net`

			`# Optional: Prometheus for monitoring`
			`prometheus:`
			`image: prom/prometheus:latest`
			`ports:`
			`- "9090:9090"`
			`volumes:`
			`- ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml`
			`command:`
			`- '--config.file=/etc/prometheus/prometheus.yml'`
			`restart: unless-stopped`
			`networks:`
			`- nomyo-net`

			`# Optional: Grafana for visualization`
			`grafana:`
			`image: grafana/grafana:latest`
			`ports:`
			`- "3000:3000"`
			`volumes:`
			`- grafana-storage:/var/lib/grafana`
			`restart: unless-stopped`
			`networks:`
			`- nomyo-net`

			`volumes:`
feat: adding a semantic cache layer 2026-03-08 09:12:09 +01:00			`router-data:`
			`# hf-cache: # uncomment when using nomyo-router-semantic`
			`# redis-data: # uncomment when using Redis cache backend`
feat: added buffer_lock to prevent race condition in high concurrency scenarios added documentation 2026-01-05 17:16:31 +01:00			`ollama1-data:`
			`ollama2-data:`
			`ollama3-data:`
			`grafana-storage:`

			`networks:`
			`nomyo-net:`
			`driver: bridge`