From f10367ecd0286b288132a869c6b7d9371359899f Mon Sep 17 00:00:00 2001 From: Sam Valladares Date: Tue, 27 Jan 2026 01:04:06 -0600 Subject: [PATCH] fix: add explicit embedding initialization with error logging When embeddings fail to initialize in the MCP server context (e.g., due to working directory issues), the error was silently swallowed and smart_ingest would fall back to regular ingest without explanation. Changes: - Add init_embeddings() method to Storage for explicit initialization - Initialize embeddings at MCP server startup with error logging - Add check_ready() method to EmbeddingService for error access - Log warning when is_ready() returns false Now users will see clear error messages like: "Failed to initialize embedding service: ..." "Hint: Check FASTEMBED_CACHE_PATH or ensure ~/.fastembed_cache exists" Co-Authored-By: Claude Opus 4.5 --- crates/vestige-core/src/embeddings/local.rs | 13 ++++++++++++- crates/vestige-core/src/storage/sqlite.rs | 14 ++++++++++++++ crates/vestige-mcp/src/main.rs | 15 ++++++++++++++- 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/crates/vestige-core/src/embeddings/local.rs b/crates/vestige-core/src/embeddings/local.rs index 98e0985..9fe20c7 100644 --- a/crates/vestige-core/src/embeddings/local.rs +++ b/crates/vestige-core/src/embeddings/local.rs @@ -186,7 +186,18 @@ impl EmbeddingService { /// Check if the model is ready pub fn is_ready(&self) -> bool { - get_model().is_ok() + match get_model() { + Ok(_) => true, + Err(e) => { + tracing::warn!("Embedding model not ready: {}", e); + false + } + } + } + + /// Check if the model is ready and return the error if not + pub fn check_ready(&self) -> Result<(), EmbeddingError> { + get_model().map(|_| ()) } /// Initialize the model (downloads if necessary) diff --git a/crates/vestige-core/src/storage/sqlite.rs b/crates/vestige-core/src/storage/sqlite.rs index 32e457a..7843cc3 100644 --- a/crates/vestige-core/src/storage/sqlite.rs +++ b/crates/vestige-core/src/storage/sqlite.rs @@ -1072,6 +1072,20 @@ impl Storage { false } + /// Initialize the embedding service explicitly + /// Call this at startup to catch initialization errors early + #[cfg(feature = "embeddings")] + pub fn init_embeddings(&mut self) -> Result<()> { + self.embedding_service.init().map_err(|e| { + StorageError::Init(format!("Embedding service initialization failed: {}", e)) + }) + } + + #[cfg(not(feature = "embeddings"))] + pub fn init_embeddings(&mut self) -> Result<()> { + Ok(()) // No-op when embeddings feature is disabled + } + /// Get query embedding from cache or compute it #[cfg(feature = "embeddings")] fn get_query_embedding(&self, query: &str) -> Result> { diff --git a/crates/vestige-mcp/src/main.rs b/crates/vestige-mcp/src/main.rs index f490f24..ef16924 100644 --- a/crates/vestige-mcp/src/main.rs +++ b/crates/vestige-mcp/src/main.rs @@ -133,8 +133,21 @@ async fn main() { // Initialize storage with optional custom data directory let storage = match Storage::new(data_dir) { - Ok(s) => { + Ok(mut s) => { info!("Storage initialized successfully"); + + // Try to initialize embeddings early and log any issues + #[cfg(feature = "embeddings")] + { + if let Err(e) = s.init_embeddings() { + error!("Failed to initialize embedding service: {}", e); + error!("Smart ingest will fall back to regular ingest without deduplication"); + error!("Hint: Check FASTEMBED_CACHE_PATH or ensure ~/.fastembed_cache exists"); + } else { + info!("Embedding service initialized successfully"); + } + } + Arc::new(Mutex::new(s)) } Err(e) => {