feat:
added buffer_lock to prevent race condition in high concurrency scenarios added documentation
This commit is contained in:
parent
434b6d4cca
commit
20a016269d
9 changed files with 2167 additions and 42 deletions
98
doc/examples/docker-compose.yml
Normal file
98
doc/examples/docker-compose.yml
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
# Docker Compose example for NOMYO Router with multiple Ollama instances
|
||||
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
# NOMYO Router
|
||||
nomyo-router:
|
||||
image: nomyo-router:latest
|
||||
build: .
|
||||
ports:
|
||||
- "12434:12434"
|
||||
environment:
|
||||
- CONFIG_PATH=/app/config/config.yaml
|
||||
- NOMYO_ROUTER_DB_PATH=/app/token_counts.db
|
||||
volumes:
|
||||
- ./config:/app/config
|
||||
- router-db:/app/token_counts.db
|
||||
depends_on:
|
||||
- ollama1
|
||||
- ollama2
|
||||
- ollama3
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- nomyo-net
|
||||
|
||||
# Ollama Instance 1
|
||||
ollama1:
|
||||
image: ollama/ollama:latest
|
||||
ports:
|
||||
- "11434:11434"
|
||||
volumes:
|
||||
- ollama1-data:/root/.ollama
|
||||
environment:
|
||||
- OLLAMA_NUM_PARALLEL=4
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- nomyo-net
|
||||
|
||||
# Ollama Instance 2
|
||||
ollama2:
|
||||
image: ollama/ollama:latest
|
||||
ports:
|
||||
- "11435:11434"
|
||||
volumes:
|
||||
- ollama2-data:/root/.ollama
|
||||
environment:
|
||||
- OLLAMA_NUM_PARALLEL=4
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- nomyo-net
|
||||
|
||||
# Ollama Instance 3
|
||||
ollama3:
|
||||
image: ollama/ollama:latest
|
||||
ports:
|
||||
- "11436:11434"
|
||||
volumes:
|
||||
- ollama3-data:/root/.ollama
|
||||
environment:
|
||||
- OLLAMA_NUM_PARALLEL=4
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- nomyo-net
|
||||
|
||||
# Optional: Prometheus for monitoring
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
ports:
|
||||
- "9090:9090"
|
||||
volumes:
|
||||
- ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml
|
||||
command:
|
||||
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- nomyo-net
|
||||
|
||||
# Optional: Grafana for visualization
|
||||
grafana:
|
||||
image: grafana/grafana:latest
|
||||
ports:
|
||||
- "3000:3000"
|
||||
volumes:
|
||||
- grafana-storage:/var/lib/grafana
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- nomyo-net
|
||||
|
||||
volumes:
|
||||
router-db:
|
||||
ollama1-data:
|
||||
ollama2-data:
|
||||
ollama3-data:
|
||||
grafana-storage:
|
||||
|
||||
networks:
|
||||
nomyo-net:
|
||||
driver: bridge
|
||||
37
doc/examples/sample-config.yaml
Normal file
37
doc/examples/sample-config.yaml
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
# Sample NOMYO Router Configuration
|
||||
|
||||
# Basic single endpoint configuration
|
||||
endpoints:
|
||||
- http://localhost:11434
|
||||
|
||||
max_concurrent_connections: 2
|
||||
|
||||
# Multi-endpoint configuration with local Ollama instances
|
||||
# endpoints:
|
||||
# - http://ollama-worker1:11434
|
||||
# - http://ollama-worker2:11434
|
||||
# - http://ollama-worker3:11434
|
||||
|
||||
# Mixed configuration with Ollama and OpenAI endpoints
|
||||
# endpoints:
|
||||
# - http://localhost:11434
|
||||
# - https://api.openai.com/v1
|
||||
|
||||
|
||||
# API keys for remote endpoints
|
||||
# Use ${VAR_NAME} syntax to reference environment variables
|
||||
api_keys:
|
||||
# Local Ollama instances typically don't require authentication
|
||||
"http://localhost:11434": "ollama"
|
||||
|
||||
# Remote Ollama instances
|
||||
# "http://remote-ollama:11434": "ollama"
|
||||
|
||||
# OpenAI API
|
||||
# "https://api.openai.com/v1": "${OPENAI_KEY}"
|
||||
|
||||
# Anthropic API
|
||||
# "https://api.anthropic.com/v1": "${ANTHROPIC_KEY}"
|
||||
|
||||
# Other OpenAI-compatible endpoints
|
||||
# "https://api.mistral.ai/v1": "${MISTRAL_KEY}"
|
||||
Loading…
Add table
Add a link
Reference in a new issue