perf(gemini-cli): skip MCP server startup via workspace settings override

The user has 6 MCP servers configured in ~/.gemini/settings.json. Without mitigation, the gemini CLI spawns all of them on every headless call, adding 10-60+ seconds of startup latency. Two flags reduce this: - cmd.current_dir(workdir): workspace .gemini/settings.json with {"mcpServers":{}} overrides ~/.gemini/settings.json, blocking all 6 MCP servers from spawning. The workdir is /tmp/noxa-gemini/ and is created once at GeminiCliProvider::new(). - --extensions "": prevents extension loading (~3s saved) Per geminicli.com/docs: workspace settings override user settings. The --allowed-mcp-server-names flag was tested but hangs with a fake name and exits without response for empty string — not usable. Result: consistent 13-17s per call vs >60s baseline with MCP servers.
2026-04-25 00:06:21 +02:00 · 2026-04-11 20:23:33 -04:00 · 2026-04-11 20:23:33 -04:00 · 251979edfe
commit 251979edfe
parent c018ea4a61
1 changed files with 66 additions and 5 deletions
--- a/crates/noxa-llm/src/providers/gemini_cli.rs
+++ b/crates/noxa-llm/src/providers/gemini_cli.rs
@ -1,7 +1,23 @@
 /// Gemini CLI provider — shells out to `gemini -p` for completions.
 /// Primary provider in the default chain; requires the `gemini` binary on PATH.
-/// Prompts are passed via the `-p` flag (not as a positional or via stdin) to prevent
+///
+/// Prompts are passed via the `-p` flag (not via stdin or as a positional) to prevent
 /// command injection from web-scraped content. Output is parsed from `--output-format json`.
+///
+/// # Startup optimizations
+///
+/// The gemini CLI is an agentic Node.js application that connects to every configured MCP
+/// server at startup (the user has 6). Without mitigation this can add 10-60+ seconds per
+/// call as those servers spin up and time out.
+///
+/// Two flags reduce this:
+/// - `--extensions ""` — skips extension loading (~3 s saved)
+/// - `current_dir` set to a temp workdir containing `.gemini/settings.json` with
+///   `{"mcpServers":{}}` — workspace settings override user settings, so all 6 MCP
+///   servers are skipped at subprocess startup (major speedup).
+///
+/// The workdir is created once at construction and reused for every call.
+use std::path::PathBuf;
 use std::sync::Arc;
 use std::time::Duration;

@ -15,14 +31,20 @@ use crate::clean::strip_thinking_tags;
 use crate::error::LlmError;
 use crate::provider::{CompletionRequest, LlmProvider};

-/// Maximum concurrent Gemini subprocess calls (MCP server protection).
+/// Maximum concurrent Gemini subprocess calls.
 const MAX_CONCURRENT: usize = 6;
-/// Subprocess deadline — prevents hung `gemini` processes from blocking the chain.
+/// Subprocess deadline — prevents hung `gemini` processes blocking the chain.
 const SUBPROCESS_TIMEOUT: Duration = Duration::from_secs(60);

+/// Fixed workdir used for every subprocess call.
+/// A workspace-level `.gemini/settings.json` here overrides the user's MCP server config.
+const NOXA_GEMINI_WORKDIR: &str = "/tmp/noxa-gemini";
+
 pub struct GeminiCliProvider {
    default_model: String,
    semaphore: Arc<Semaphore>,
+    /// Workdir with a minimal `.gemini/settings.json` that disables MCP servers.
+    workdir: PathBuf,
 }

 impl GeminiCliProvider {
@ -34,9 +56,13 @@ impl GeminiCliProvider {
            .filter(|s| !s.is_empty())
            .unwrap_or_else(|| "gemini-2.5-pro".into());

+        let workdir = PathBuf::from(NOXA_GEMINI_WORKDIR);
+        ensure_gemini_workdir(&workdir);
+
        Self {
            default_model,
            semaphore: Arc::new(Semaphore::new(MAX_CONCURRENT)),
+            workdir,
        }
    }

@ -75,14 +101,20 @@ impl LlmProvider for GeminiCliProvider {
        cmd.arg("--output-format").arg("json");
        // --yolo suppresses any interactive confirmation prompts in headless mode.
        cmd.arg("--yolo");
+        // --extensions "" skips loading user extensions (~3 s startup savings).
+        cmd.arg("--extensions").arg("");
+        // Workspace settings in self.workdir override the user's ~/.gemini/settings.json,
+        // replacing the user's MCP server list with {} so none are spawned at startup.
+        // Without this, each of the user's MCP servers adds latency to every call.
+        cmd.current_dir(&self.workdir);

        cmd.stdin(std::process::Stdio::null());
        cmd.stdout(std::process::Stdio::piped());
        cmd.stderr(std::process::Stdio::piped());

-        debug!(model, "spawning gemini subprocess");
+        debug!(model, workdir = %self.workdir.display(), "spawning gemini subprocess");

-        let mut child = cmd.spawn().map_err(LlmError::Subprocess)?;
+        let child = cmd.spawn().map_err(LlmError::Subprocess)?;

        // Bounded wait — prevents indefinite hangs on auth expiry or network stall.
        let output = match timeout(SUBPROCESS_TIMEOUT, child.wait_with_output()).await {
@ -152,6 +184,35 @@ fn extract_response_from_output(stdout: &str) -> Result<String, LlmError> {
        .map(|s| s.to_string())
 }

+/// Create the noxa gemini workdir with a minimal workspace settings file.
+///
+/// The `.gemini/settings.json` written here overrides the user's `~/.gemini/settings.json`
+/// for any `gemini` subprocess run from this directory. Setting `mcpServers` to `{}` prevents
+/// the CLI from spawning the user's configured MCP servers on every headless call.
+///
+/// Errors are intentionally ignored — if the write fails, the subprocess still works,
+/// just without the startup optimization (and with a warning in the logs).
+fn ensure_gemini_workdir(workdir: &std::path::Path) {
+    let settings_dir = workdir.join(".gemini");
+    let settings_path = settings_dir.join("settings.json");
+
+    if settings_path.exists() {
+        return;
+    }
+
+    if let Err(e) = std::fs::create_dir_all(&settings_dir) {
+        tracing::warn!(path = %settings_dir.display(), error = %e, "failed to create gemini workdir");
+        return;
+    }
+
+    // Minimal workspace settings: disable all MCP servers.
+    // Workspace settings override ~/.gemini/settings.json per gemini CLI docs.
+    let content = r#"{"mcpServers":{}}"#;
+    if let Err(e) = std::fs::write(&settings_path, content) {
+        tracing::warn!(path = %settings_path.display(), error = %e, "failed to write gemini workspace settings");
+    }
+}
+
 /// Concatenate all messages into a single prompt string for the CLI.
 fn build_prompt(messages: &[crate::provider::Message]) -> String {
    messages