feat: adding a semantic cache layer

This commit is contained in:
Alpha Nerd 2026-03-08 09:12:09 +01:00
parent c3d47c7ffe
commit dd4b12da6a
13 changed files with 1138 additions and 22 deletions

View file

@ -1,20 +1,30 @@
# Docker Compose example for NOMYO Router with multiple Ollama instances
#
# Two router profiles are provided:
# nomyo-router — lean image, exact-match cache only (~300 MB)
# nomyo-router-semantic — semantic image, sentence-transformers baked in (~800 MB)
#
# Uncomment the redis service and set cache_backend: redis in config.yaml
# to share the LLM response cache across multiple router replicas.
version: '3.8'
services:
# NOMYO Router
# NOMYO Router — lean image (exact-match cache, default)
nomyo-router:
image: nomyo-router:latest
build: .
build:
context: .
args:
SEMANTIC_CACHE: "false"
ports:
- "12434:12434"
environment:
- CONFIG_PATH=/app/config/config.yaml
- NOMYO_ROUTER_DB_PATH=/app/token_counts.db
- NOMYO_ROUTER_DB_PATH=/app/data/token_counts.db
volumes:
- ./config:/app/config
- router-db:/app/token_counts.db
- router-data:/app/data
depends_on:
- ollama1
- ollama2
@ -23,6 +33,45 @@ services:
networks:
- nomyo-net
# NOMYO Router — semantic image (cache_similarity < 1.0 support, ~800 MB)
# Build: docker compose build nomyo-router-semantic
# Switch: comment out nomyo-router above, uncomment this block.
# nomyo-router-semantic:
# image: nomyo-router:semantic
# build:
# context: .
# args:
# SEMANTIC_CACHE: "true"
# ports:
# - "12434:12434"
# environment:
# - CONFIG_PATH=/app/config/config.yaml
# - NOMYO_ROUTER_DB_PATH=/app/data/token_counts.db
# volumes:
# - ./config:/app/config
# - router-data:/app/data
# - hf-cache:/app/data/hf_cache # share HuggingFace model cache across builds
# depends_on:
# - ollama1
# - ollama2
# - ollama3
# restart: unless-stopped
# networks:
# - nomyo-net
# Optional: Redis for shared LLM response cache across multiple router replicas.
# Requires cache_backend: redis in config.yaml.
# redis:
# image: redis:7-alpine
# ports:
# - "6379:6379"
# volumes:
# - redis-data:/data
# command: redis-server --save 60 1 --loglevel warning
# restart: unless-stopped
# networks:
# - nomyo-net
# Ollama Instance 1
ollama1:
image: ollama/ollama:latest
@ -87,7 +136,9 @@ services:
- nomyo-net
volumes:
router-db:
router-data:
# hf-cache: # uncomment when using nomyo-router-semantic
# redis-data: # uncomment when using Redis cache backend
ollama1-data:
ollama2-data:
ollama3-data: