From eb16d684fc44dda938b8ff42a3dd455f22327946 Mon Sep 17 00:00:00 2001 From: Sam Valladares Date: Tue, 27 Jan 2026 02:32:23 -0600 Subject: [PATCH] fix: use platform cache dir for fastembed models Previously, fastembed created .fastembed_cache in the current working directory, polluting project folders with symlinks. Now uses platform-appropriate cache directories: - macOS: ~/Library/Caches/com.vestige.core/fastembed - Linux: ~/.cache/vestige/fastembed - Windows: %LOCALAPPDATA%\vestige\cache\fastembed Can still be overridden with FASTEMBED_CACHE_PATH env var. Fixes user feedback about .fastembed_cache appearing in random folders. Co-Authored-By: Claude Opus 4.5 --- README.md | 12 ++++--- crates/vestige-core/src/embeddings/local.rs | 37 +++++++++++++++++++-- docs/CONFIGURATION.md | 20 +++++------ 3 files changed, 52 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index a208f2a..bd62eff 100644 --- a/README.md +++ b/README.md @@ -136,12 +136,14 @@ claude mcp add vestige /full/path/to/vestige-mcp -s user
-.fastembed_cache appearing in project folders +Model cache location -Run once from home directory to create cache there: -```bash -cd ~ && vestige health -``` +The embedding model (~130MB) is cached in platform-specific directories: +- **macOS**: `~/Library/Caches/com.vestige.core/fastembed` +- **Linux**: `~/.cache/vestige/fastembed` +- **Windows**: `%LOCALAPPDATA%\vestige\cache\fastembed` + +Override with: `export FASTEMBED_CACHE_PATH=/custom/path`
diff --git a/crates/vestige-core/src/embeddings/local.rs b/crates/vestige-core/src/embeddings/local.rs index 9fe20c7..30a5415 100644 --- a/crates/vestige-core/src/embeddings/local.rs +++ b/crates/vestige-core/src/embeddings/local.rs @@ -35,14 +35,47 @@ pub const BATCH_SIZE: usize = 32; /// Result type for model initialization static EMBEDDING_MODEL_RESULT: OnceLock, String>> = OnceLock::new(); +/// Get the default cache directory for fastembed models +/// Uses FASTEMBED_CACHE_PATH env var, or falls back to platform cache directory +fn get_cache_dir() -> std::path::PathBuf { + if let Ok(path) = std::env::var("FASTEMBED_CACHE_PATH") { + return std::path::PathBuf::from(path); + } + + // Use platform-appropriate cache directory via directories crate + // macOS: ~/Library/Caches/com.vestige.core/fastembed + // Linux: ~/.cache/vestige/fastembed + // Windows: %LOCALAPPDATA%\vestige\cache\fastembed + if let Some(proj_dirs) = directories::ProjectDirs::from("com", "vestige", "core") { + return proj_dirs.cache_dir().join("fastembed"); + } + + // Fallback to home directory + if let Some(base_dirs) = directories::BaseDirs::new() { + return base_dirs.home_dir().join(".cache/vestige/fastembed"); + } + + // Last resort fallback (shouldn't happen) + std::path::PathBuf::from(".fastembed_cache") +} + /// Initialize the global embedding model /// Using nomic-embed-text-v1.5 (768d) - 8192 token context, Matryoshka support fn get_model() -> Result, EmbeddingError> { let result = EMBEDDING_MODEL_RESULT.get_or_init(|| { + // Get cache directory (respects FASTEMBED_CACHE_PATH env var) + let cache_dir = get_cache_dir(); + + // Create cache directory if it doesn't exist + if let Err(e) = std::fs::create_dir_all(&cache_dir) { + tracing::warn!("Failed to create cache directory {:?}: {}", cache_dir, e); + } + // nomic-embed-text-v1.5: 768 dimensions, 8192 token context // Matryoshka representation learning, fully open source - let options = - InitOptions::new(EmbeddingModel::NomicEmbedTextV15).with_show_download_progress(true); + let options = InitOptions::new(EmbeddingModel::NomicEmbedTextV15) + .with_show_download_progress(true) + .with_cache_dir(cache_dir); TextEmbedding::try_new(options) .map(Mutex::new) diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index f59dd6a..766715c 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -10,19 +10,19 @@ Vestige downloads the **Nomic Embed Text v1.5** model (~130MB) from Hugging Face **All subsequent runs are fully offline.** -Model cache location: -- Creates `.fastembed_cache/` in the current working directory on first run -- Contains symlinks to model files in `~/.cache/huggingface/` +### Model Cache Location -**Recommended**: Run your first Vestige command from your home directory: -```bash -cd ~ -vestige health # Creates ~/.fastembed_cache/ once -``` +The embedding model is cached in platform-specific directories: -Or set the environment variable: +| Platform | Cache Location | +|----------|----------------| +| macOS | `~/Library/Caches/com.vestige.core/fastembed` | +| Linux | `~/.cache/vestige/fastembed` | +| Windows | `%LOCALAPPDATA%\vestige\cache\fastembed` | + +Override with environment variable: ```bash -export FASTEMBED_CACHE_PATH="$HOME/.fastembed_cache" +export FASTEMBED_CACHE_PATH="/custom/path" ``` ---