diff --git a/crates/noxa-llm/src/providers/gemini_cli.rs b/crates/noxa-llm/src/providers/gemini_cli.rs index 1c9af68..8c7df61 100644 --- a/crates/noxa-llm/src/providers/gemini_cli.rs +++ b/crates/noxa-llm/src/providers/gemini_cli.rs @@ -1,12 +1,11 @@ /// Gemini CLI provider — shells out to `gemini -p` for completions. /// Primary provider in the default chain; requires the `gemini` binary on PATH. -/// Prompts are passed exclusively via stdin (never as CLI arguments) to prevent -/// command injection from web-scraped content. +/// Prompts are passed via the `-p` flag (not as a positional or via stdin) to prevent +/// command injection from web-scraped content. Output is parsed from `--output-format json`. use std::sync::Arc; use std::time::Duration; use async_trait::async_trait; -use tokio::io::AsyncWriteExt; use tokio::process::Command; use tokio::sync::Semaphore; use tokio::time::timeout; @@ -19,7 +18,7 @@ use crate::provider::{CompletionRequest, LlmProvider}; /// Maximum concurrent Gemini subprocess calls (MCP server protection). const MAX_CONCURRENT: usize = 6; /// Subprocess deadline — prevents hung `gemini` processes from blocking the chain. -const SUBPROCESS_TIMEOUT: Duration = Duration::from_secs(30); +const SUBPROCESS_TIMEOUT: Duration = Duration::from_secs(60); pub struct GeminiCliProvider { default_model: String, @@ -67,65 +66,43 @@ impl LlmProvider for GeminiCliProvider { .map_err(|_| LlmError::ProviderError("gemini semaphore closed".into()))?; let mut cmd = Command::new("gemini"); - cmd.arg("-p"); + // -p STRING: headless mode with prompt as the flag value (never positional arg). + // Passing via -p prevents command injection; the value is never interpreted as a shell command. + cmd.arg("-p").arg(&prompt); cmd.arg("--model").arg(model); + // Always request structured JSON output so we can extract the `response` field + // and skip any preceding noise lines (e.g. MCP status warnings). + cmd.arg("--output-format").arg("json"); + // --yolo suppresses any interactive confirmation prompts in headless mode. + cmd.arg("--yolo"); - if request.json_mode { - cmd.arg("--json"); - } - - if let Some(max) = request.max_tokens { - cmd.arg("--max-output-tokens").arg(max.to_string()); - } - - cmd.stdin(std::process::Stdio::piped()); + cmd.stdin(std::process::Stdio::null()); cmd.stdout(std::process::Stdio::piped()); cmd.stderr(std::process::Stdio::piped()); debug!(model, "spawning gemini subprocess"); - let mut child = cmd - .spawn() - .map_err(|e| LlmError::Subprocess(e))?; - - // Write prompt to stdin then close it. - if let Some(mut stdin) = child.stdin.take() { - stdin - .write_all(prompt.as_bytes()) - .await - .map_err(LlmError::Subprocess)?; - // drop closes the pipe, signalling EOF to gemini - } + let mut child = cmd.spawn().map_err(LlmError::Subprocess)?; // Bounded wait — prevents indefinite hangs on auth expiry or network stall. let output = match timeout(SUBPROCESS_TIMEOUT, child.wait_with_output()).await { Ok(Ok(out)) => out, - Ok(Err(e)) => { - return Err(LlmError::Subprocess(e)); - } - Err(_elapsed) => { - // Process is still running; kill it to avoid a zombie. - // We can't easily kill after wait_with_output, but the child handle - // is consumed. In the happy path this branch is never reached. - return Err(LlmError::Timeout); - } + Ok(Err(e)) => return Err(LlmError::Subprocess(e)), + Err(_elapsed) => return Err(LlmError::Timeout), }; if !output.status.success() { let stderr_preview = String::from_utf8_lossy(&output.stderr); - let preview = if stderr_preview.len() > 500 { - &stderr_preview[..500] - } else { - &stderr_preview - }; + let preview = &stderr_preview[..stderr_preview.len().min(500)]; return Err(LlmError::ProviderError(format!( "gemini exited with {}: {preview}", output.status ))); } - let raw = String::from_utf8_lossy(&output.stdout).into_owned(); - let cleaned = strip_code_fences(strip_thinking_tags(&raw).trim()); + let stdout = String::from_utf8_lossy(&output.stdout); + let response = extract_response_from_output(&stdout)?; + let cleaned = strip_code_fences(strip_thinking_tags(&response).trim()); Ok(cleaned) } @@ -147,6 +124,34 @@ impl LlmProvider for GeminiCliProvider { } } +/// Parse the `response` field from gemini's `--output-format json` output. +/// +/// The CLI emits lines before the JSON object (e.g. MCP status warnings). +/// We find the first `{` to locate the JSON, parse it, and extract `.response`. +fn extract_response_from_output(stdout: &str) -> Result { + let json_start = stdout.find('{').ok_or_else(|| { + let preview = &stdout[..stdout.len().min(300)]; + LlmError::ProviderError(format!("gemini produced no JSON output: {preview}")) + })?; + + let json_str = &stdout[json_start..]; + let outer: serde_json::Value = serde_json::from_str(json_str).map_err(|e| { + let preview = &json_str[..json_str.len().min(300)]; + LlmError::ProviderError(format!("failed to parse gemini JSON output: {e} — {preview}")) + })?; + + // `response` holds the model's actual text output. + outer["response"] + .as_str() + .ok_or_else(|| { + LlmError::ProviderError(format!( + "gemini JSON output missing 'response' field: {}", + &json_str[..json_str.len().min(300)] + )) + }) + .map(|s| s.to_string()) +} + /// Concatenate all messages into a single prompt string for the CLI. fn build_prompt(messages: &[crate::provider::Message]) -> String { messages @@ -242,6 +247,37 @@ mod tests { assert!(result.contains("Tell me something.")); } + // ── extract_response_from_output ────────────────────────────────────────── + + #[test] + fn extracts_response_from_clean_json() { + let stdout = r#"{"session_id":"abc","response":"Hello world","stats":{}}"#; + assert_eq!(extract_response_from_output(stdout).unwrap(), "Hello world"); + } + + #[test] + fn extracts_response_skipping_mcp_noise() { + // MCP warning line appears before the JSON object in real gemini output. + let stdout = "MCP issues detected. Run /mcp list for status.\n{\"session_id\":\"abc\",\"response\":\"the answer\",\"stats\":{}}"; + assert_eq!( + extract_response_from_output(stdout).unwrap(), + "the answer" + ); + } + + #[test] + fn error_when_no_json_in_output() { + let result = extract_response_from_output("MCP issues detected. No JSON follows."); + assert!(matches!(result, Err(LlmError::ProviderError(_)))); + } + + #[test] + fn error_when_response_field_missing() { + let stdout = r#"{"session_id":"abc","stats":{}}"#; + let result = extract_response_from_output(stdout); + assert!(matches!(result, Err(LlmError::ProviderError(_)))); + } + // ── strip_code_fences ───────────────────────────────────────────────────── #[test] @@ -266,9 +302,6 @@ mod tests { #[tokio::test] async fn unavailable_when_binary_missing() { - // Use a clearly nonexistent binary name to test the false path. - // We can't swap the binary name in GeminiCliProvider without extracting it, - // but we CAN verify the logic by calling Command directly the same way. let result = tokio::process::Command::new("__noxa_nonexistent_binary_xyz__") .arg("--version") .stdout(std::process::Stdio::null()) @@ -282,7 +315,6 @@ mod tests { #[test] fn strips_thinking_tags_from_output() { - // Verify the pipeline: strip_thinking_tags → strip_code_fences let raw = "internal reasoning{\"result\": true}"; let after_thinking = strip_thinking_tags(raw); let after_fences = strip_code_fences(after_thinking.trim());