mirror of
https://github.com/samvallad33/vestige.git
synced 2026-04-25 00:36:22 +02:00
fix: use platform cache dir for fastembed models
Previously, fastembed created .fastembed_cache in the current working directory, polluting project folders with symlinks. Now uses platform-appropriate cache directories: - macOS: ~/Library/Caches/com.vestige.core/fastembed - Linux: ~/.cache/vestige/fastembed - Windows: %LOCALAPPDATA%\vestige\cache\fastembed Can still be overridden with FASTEMBED_CACHE_PATH env var. Fixes user feedback about .fastembed_cache appearing in random folders. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
497f149b64
commit
eb16d684fc
3 changed files with 52 additions and 17 deletions
12
README.md
12
README.md
|
|
@ -136,12 +136,14 @@ claude mcp add vestige /full/path/to/vestige-mcp -s user
|
|||
</details>
|
||||
|
||||
<details>
|
||||
<summary>.fastembed_cache appearing in project folders</summary>
|
||||
<summary>Model cache location</summary>
|
||||
|
||||
Run once from home directory to create cache there:
|
||||
```bash
|
||||
cd ~ && vestige health
|
||||
```
|
||||
The embedding model (~130MB) is cached in platform-specific directories:
|
||||
- **macOS**: `~/Library/Caches/com.vestige.core/fastembed`
|
||||
- **Linux**: `~/.cache/vestige/fastembed`
|
||||
- **Windows**: `%LOCALAPPDATA%\vestige\cache\fastembed`
|
||||
|
||||
Override with: `export FASTEMBED_CACHE_PATH=/custom/path`
|
||||
</details>
|
||||
|
||||
<details>
|
||||
|
|
|
|||
|
|
@ -35,14 +35,47 @@ pub const BATCH_SIZE: usize = 32;
|
|||
/// Result type for model initialization
|
||||
static EMBEDDING_MODEL_RESULT: OnceLock<Result<Mutex<TextEmbedding>, String>> = OnceLock::new();
|
||||
|
||||
/// Get the default cache directory for fastembed models
|
||||
/// Uses FASTEMBED_CACHE_PATH env var, or falls back to platform cache directory
|
||||
fn get_cache_dir() -> std::path::PathBuf {
|
||||
if let Ok(path) = std::env::var("FASTEMBED_CACHE_PATH") {
|
||||
return std::path::PathBuf::from(path);
|
||||
}
|
||||
|
||||
// Use platform-appropriate cache directory via directories crate
|
||||
// macOS: ~/Library/Caches/com.vestige.core/fastembed
|
||||
// Linux: ~/.cache/vestige/fastembed
|
||||
// Windows: %LOCALAPPDATA%\vestige\cache\fastembed
|
||||
if let Some(proj_dirs) = directories::ProjectDirs::from("com", "vestige", "core") {
|
||||
return proj_dirs.cache_dir().join("fastembed");
|
||||
}
|
||||
|
||||
// Fallback to home directory
|
||||
if let Some(base_dirs) = directories::BaseDirs::new() {
|
||||
return base_dirs.home_dir().join(".cache/vestige/fastembed");
|
||||
}
|
||||
|
||||
// Last resort fallback (shouldn't happen)
|
||||
std::path::PathBuf::from(".fastembed_cache")
|
||||
}
|
||||
|
||||
/// Initialize the global embedding model
|
||||
/// Using nomic-embed-text-v1.5 (768d) - 8192 token context, Matryoshka support
|
||||
fn get_model() -> Result<std::sync::MutexGuard<'static, TextEmbedding>, EmbeddingError> {
|
||||
let result = EMBEDDING_MODEL_RESULT.get_or_init(|| {
|
||||
// Get cache directory (respects FASTEMBED_CACHE_PATH env var)
|
||||
let cache_dir = get_cache_dir();
|
||||
|
||||
// Create cache directory if it doesn't exist
|
||||
if let Err(e) = std::fs::create_dir_all(&cache_dir) {
|
||||
tracing::warn!("Failed to create cache directory {:?}: {}", cache_dir, e);
|
||||
}
|
||||
|
||||
// nomic-embed-text-v1.5: 768 dimensions, 8192 token context
|
||||
// Matryoshka representation learning, fully open source
|
||||
let options =
|
||||
InitOptions::new(EmbeddingModel::NomicEmbedTextV15).with_show_download_progress(true);
|
||||
let options = InitOptions::new(EmbeddingModel::NomicEmbedTextV15)
|
||||
.with_show_download_progress(true)
|
||||
.with_cache_dir(cache_dir);
|
||||
|
||||
TextEmbedding::try_new(options)
|
||||
.map(Mutex::new)
|
||||
|
|
|
|||
|
|
@ -10,19 +10,19 @@ Vestige downloads the **Nomic Embed Text v1.5** model (~130MB) from Hugging Face
|
|||
|
||||
**All subsequent runs are fully offline.**
|
||||
|
||||
Model cache location:
|
||||
- Creates `.fastembed_cache/` in the current working directory on first run
|
||||
- Contains symlinks to model files in `~/.cache/huggingface/`
|
||||
### Model Cache Location
|
||||
|
||||
**Recommended**: Run your first Vestige command from your home directory:
|
||||
```bash
|
||||
cd ~
|
||||
vestige health # Creates ~/.fastembed_cache/ once
|
||||
```
|
||||
The embedding model is cached in platform-specific directories:
|
||||
|
||||
Or set the environment variable:
|
||||
| Platform | Cache Location |
|
||||
|----------|----------------|
|
||||
| macOS | `~/Library/Caches/com.vestige.core/fastembed` |
|
||||
| Linux | `~/.cache/vestige/fastembed` |
|
||||
| Windows | `%LOCALAPPDATA%\vestige\cache\fastembed` |
|
||||
|
||||
Override with environment variable:
|
||||
```bash
|
||||
export FASTEMBED_CACHE_PATH="$HOME/.fastembed_cache"
|
||||
export FASTEMBED_CACHE_PATH="/custom/path"
|
||||
```
|
||||
|
||||
---
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue