# Docker Compose example for NOMYO Router with multiple Ollama instances
#
# Two router profiles are provided:
#   nomyo-router          — lean image, exact-match cache only (~300 MB)
#   nomyo-router-semantic — semantic image, sentence-transformers baked in (~800 MB)
#
# Uncomment the redis service and set cache_backend: redis in config.yaml
# to share the LLM response cache across multiple router replicas.

version: '3.8'

services:
  # NOMYO Router — lean image (exact-match cache, default)
  nomyo-router:
    image: nomyo-router:latest
    build:
      context: .
      args:
        SEMANTIC_CACHE: "false"
    ports:
      - "12434:12434"
    environment:
      - CONFIG_PATH=/app/config/config.yaml
      - NOMYO_ROUTER_DB_PATH=/app/data/token_counts.db
    volumes:
      - ./config:/app/config
      - router-data:/app/data
    depends_on:
      - ollama1
      - ollama2
      - ollama3
    restart: unless-stopped
    networks:
      - nomyo-net

  # NOMYO Router — semantic image (cache_similarity < 1.0 support, ~800 MB)
  # Build:  docker compose build nomyo-router-semantic
  # Switch: comment out nomyo-router above, uncomment this block.
  # nomyo-router-semantic:
  #   image: nomyo-router:semantic
  #   build:
  #     context: .
  #     args:
  #       SEMANTIC_CACHE: "true"
  #   ports:
  #     - "12434:12434"
  #   environment:
  #     - CONFIG_PATH=/app/config/config.yaml
  #     - NOMYO_ROUTER_DB_PATH=/app/data/token_counts.db
  #   volumes:
  #     - ./config:/app/config
  #     - router-data:/app/data
  #     - hf-cache:/app/data/hf_cache   # share HuggingFace model cache across builds
  #   depends_on:
  #     - ollama1
  #     - ollama2
  #     - ollama3
  #   restart: unless-stopped
  #   networks:
  #     - nomyo-net

  # Optional: Redis for shared LLM response cache across multiple router replicas.
  # Requires cache_backend: redis in config.yaml.
  # redis:
  #   image: redis:7-alpine
  #   ports:
  #     - "6379:6379"
  #   volumes:
  #     - redis-data:/data
  #   command: redis-server --save 60 1 --loglevel warning
  #   restart: unless-stopped
  #   networks:
  #     - nomyo-net

  # Ollama Instance 1
  ollama1:
    image: ollama/ollama:latest
    ports:
      - "11434:11434"
    volumes:
      - ollama1-data:/root/.ollama
    environment:
      - OLLAMA_NUM_PARALLEL=4
    restart: unless-stopped
    networks:
      - nomyo-net

  # Ollama Instance 2
  ollama2:
    image: ollama/ollama:latest
    ports:
      - "11435:11434"
    volumes:
      - ollama2-data:/root/.ollama
    environment:
      - OLLAMA_NUM_PARALLEL=4
    restart: unless-stopped
    networks:
      - nomyo-net

  # Ollama Instance 3
  ollama3:
    image: ollama/ollama:latest
    ports:
      - "11436:11434"
    volumes:
      - ollama3-data:/root/.ollama
    environment:
      - OLLAMA_NUM_PARALLEL=4
    restart: unless-stopped
    networks:
      - nomyo-net

  # Optional: Prometheus for monitoring
  prometheus:
    image: prom/prometheus:latest
    ports:
      - "9090:9090"
    volumes:
      - ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
    restart: unless-stopped
    networks:
      - nomyo-net

  # Optional: Grafana for visualization
  grafana:
    image: grafana/grafana:latest
    ports:
      - "3000:3000"
    volumes:
      - grafana-storage:/var/lib/grafana
    restart: unless-stopped
    networks:
      - nomyo-net

volumes:
  router-data:
  # hf-cache:     # uncomment when using nomyo-router-semantic
  # redis-data:   # uncomment when using Redis cache backend
  ollama1-data:
  ollama2-data:
  ollama3-data:
  grafana-storage:

networks:
  nomyo-net:
    driver: bridge