mirror of
https://github.com/0xMassi/webclaw.git
synced 2026-04-25 00:06:21 +02:00
fix(gemini-cli): correct CLI invocation to match gemini v0.36 interface
The previous implementation used wrong flags (-p without value, --json,
--max-output-tokens) that don't exist in the real gemini CLI.
Correct invocation:
- Pass prompt as -p STRING value (not via stdin)
- Use --output-format json to get structured {response, stats} output
- Add --yolo to suppress interactive confirmation prompts
- Remove nonexistent --json and --max-output-tokens flags
- Parse `.response` field from JSON output, skipping MCP noise lines
- Extend timeout from 30s to 60s (agentic CLI is slower than raw API)
Smoke tested end-to-end: stdin HTML → summarize and --extract-json
both produce correct output via Gemini CLI.
This commit is contained in:
parent
cfe455b752
commit
cc1617a3a9
1 changed files with 78 additions and 46 deletions
|
|
@ -1,12 +1,11 @@
|
|||
/// Gemini CLI provider — shells out to `gemini -p` for completions.
|
||||
/// Primary provider in the default chain; requires the `gemini` binary on PATH.
|
||||
/// Prompts are passed exclusively via stdin (never as CLI arguments) to prevent
|
||||
/// command injection from web-scraped content.
|
||||
/// Prompts are passed via the `-p` flag (not as a positional or via stdin) to prevent
|
||||
/// command injection from web-scraped content. Output is parsed from `--output-format json`.
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use tokio::process::Command;
|
||||
use tokio::sync::Semaphore;
|
||||
use tokio::time::timeout;
|
||||
|
|
@ -19,7 +18,7 @@ use crate::provider::{CompletionRequest, LlmProvider};
|
|||
/// Maximum concurrent Gemini subprocess calls (MCP server protection).
|
||||
const MAX_CONCURRENT: usize = 6;
|
||||
/// Subprocess deadline — prevents hung `gemini` processes from blocking the chain.
|
||||
const SUBPROCESS_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
const SUBPROCESS_TIMEOUT: Duration = Duration::from_secs(60);
|
||||
|
||||
pub struct GeminiCliProvider {
|
||||
default_model: String,
|
||||
|
|
@ -67,65 +66,43 @@ impl LlmProvider for GeminiCliProvider {
|
|||
.map_err(|_| LlmError::ProviderError("gemini semaphore closed".into()))?;
|
||||
|
||||
let mut cmd = Command::new("gemini");
|
||||
cmd.arg("-p");
|
||||
// -p STRING: headless mode with prompt as the flag value (never positional arg).
|
||||
// Passing via -p prevents command injection; the value is never interpreted as a shell command.
|
||||
cmd.arg("-p").arg(&prompt);
|
||||
cmd.arg("--model").arg(model);
|
||||
// Always request structured JSON output so we can extract the `response` field
|
||||
// and skip any preceding noise lines (e.g. MCP status warnings).
|
||||
cmd.arg("--output-format").arg("json");
|
||||
// --yolo suppresses any interactive confirmation prompts in headless mode.
|
||||
cmd.arg("--yolo");
|
||||
|
||||
if request.json_mode {
|
||||
cmd.arg("--json");
|
||||
}
|
||||
|
||||
if let Some(max) = request.max_tokens {
|
||||
cmd.arg("--max-output-tokens").arg(max.to_string());
|
||||
}
|
||||
|
||||
cmd.stdin(std::process::Stdio::piped());
|
||||
cmd.stdin(std::process::Stdio::null());
|
||||
cmd.stdout(std::process::Stdio::piped());
|
||||
cmd.stderr(std::process::Stdio::piped());
|
||||
|
||||
debug!(model, "spawning gemini subprocess");
|
||||
|
||||
let mut child = cmd
|
||||
.spawn()
|
||||
.map_err(|e| LlmError::Subprocess(e))?;
|
||||
|
||||
// Write prompt to stdin then close it.
|
||||
if let Some(mut stdin) = child.stdin.take() {
|
||||
stdin
|
||||
.write_all(prompt.as_bytes())
|
||||
.await
|
||||
.map_err(LlmError::Subprocess)?;
|
||||
// drop closes the pipe, signalling EOF to gemini
|
||||
}
|
||||
let mut child = cmd.spawn().map_err(LlmError::Subprocess)?;
|
||||
|
||||
// Bounded wait — prevents indefinite hangs on auth expiry or network stall.
|
||||
let output = match timeout(SUBPROCESS_TIMEOUT, child.wait_with_output()).await {
|
||||
Ok(Ok(out)) => out,
|
||||
Ok(Err(e)) => {
|
||||
return Err(LlmError::Subprocess(e));
|
||||
}
|
||||
Err(_elapsed) => {
|
||||
// Process is still running; kill it to avoid a zombie.
|
||||
// We can't easily kill after wait_with_output, but the child handle
|
||||
// is consumed. In the happy path this branch is never reached.
|
||||
return Err(LlmError::Timeout);
|
||||
}
|
||||
Ok(Err(e)) => return Err(LlmError::Subprocess(e)),
|
||||
Err(_elapsed) => return Err(LlmError::Timeout),
|
||||
};
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr_preview = String::from_utf8_lossy(&output.stderr);
|
||||
let preview = if stderr_preview.len() > 500 {
|
||||
&stderr_preview[..500]
|
||||
} else {
|
||||
&stderr_preview
|
||||
};
|
||||
let preview = &stderr_preview[..stderr_preview.len().min(500)];
|
||||
return Err(LlmError::ProviderError(format!(
|
||||
"gemini exited with {}: {preview}",
|
||||
output.status
|
||||
)));
|
||||
}
|
||||
|
||||
let raw = String::from_utf8_lossy(&output.stdout).into_owned();
|
||||
let cleaned = strip_code_fences(strip_thinking_tags(&raw).trim());
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let response = extract_response_from_output(&stdout)?;
|
||||
let cleaned = strip_code_fences(strip_thinking_tags(&response).trim());
|
||||
Ok(cleaned)
|
||||
}
|
||||
|
||||
|
|
@ -147,6 +124,34 @@ impl LlmProvider for GeminiCliProvider {
|
|||
}
|
||||
}
|
||||
|
||||
/// Parse the `response` field from gemini's `--output-format json` output.
|
||||
///
|
||||
/// The CLI emits lines before the JSON object (e.g. MCP status warnings).
|
||||
/// We find the first `{` to locate the JSON, parse it, and extract `.response`.
|
||||
fn extract_response_from_output(stdout: &str) -> Result<String, LlmError> {
|
||||
let json_start = stdout.find('{').ok_or_else(|| {
|
||||
let preview = &stdout[..stdout.len().min(300)];
|
||||
LlmError::ProviderError(format!("gemini produced no JSON output: {preview}"))
|
||||
})?;
|
||||
|
||||
let json_str = &stdout[json_start..];
|
||||
let outer: serde_json::Value = serde_json::from_str(json_str).map_err(|e| {
|
||||
let preview = &json_str[..json_str.len().min(300)];
|
||||
LlmError::ProviderError(format!("failed to parse gemini JSON output: {e} — {preview}"))
|
||||
})?;
|
||||
|
||||
// `response` holds the model's actual text output.
|
||||
outer["response"]
|
||||
.as_str()
|
||||
.ok_or_else(|| {
|
||||
LlmError::ProviderError(format!(
|
||||
"gemini JSON output missing 'response' field: {}",
|
||||
&json_str[..json_str.len().min(300)]
|
||||
))
|
||||
})
|
||||
.map(|s| s.to_string())
|
||||
}
|
||||
|
||||
/// Concatenate all messages into a single prompt string for the CLI.
|
||||
fn build_prompt(messages: &[crate::provider::Message]) -> String {
|
||||
messages
|
||||
|
|
@ -242,6 +247,37 @@ mod tests {
|
|||
assert!(result.contains("Tell me something."));
|
||||
}
|
||||
|
||||
// ── extract_response_from_output ──────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn extracts_response_from_clean_json() {
|
||||
let stdout = r#"{"session_id":"abc","response":"Hello world","stats":{}}"#;
|
||||
assert_eq!(extract_response_from_output(stdout).unwrap(), "Hello world");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extracts_response_skipping_mcp_noise() {
|
||||
// MCP warning line appears before the JSON object in real gemini output.
|
||||
let stdout = "MCP issues detected. Run /mcp list for status.\n{\"session_id\":\"abc\",\"response\":\"the answer\",\"stats\":{}}";
|
||||
assert_eq!(
|
||||
extract_response_from_output(stdout).unwrap(),
|
||||
"the answer"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn error_when_no_json_in_output() {
|
||||
let result = extract_response_from_output("MCP issues detected. No JSON follows.");
|
||||
assert!(matches!(result, Err(LlmError::ProviderError(_))));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn error_when_response_field_missing() {
|
||||
let stdout = r#"{"session_id":"abc","stats":{}}"#;
|
||||
let result = extract_response_from_output(stdout);
|
||||
assert!(matches!(result, Err(LlmError::ProviderError(_))));
|
||||
}
|
||||
|
||||
// ── strip_code_fences ─────────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
|
|
@ -266,9 +302,6 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn unavailable_when_binary_missing() {
|
||||
// Use a clearly nonexistent binary name to test the false path.
|
||||
// We can't swap the binary name in GeminiCliProvider without extracting it,
|
||||
// but we CAN verify the logic by calling Command directly the same way.
|
||||
let result = tokio::process::Command::new("__noxa_nonexistent_binary_xyz__")
|
||||
.arg("--version")
|
||||
.stdout(std::process::Stdio::null())
|
||||
|
|
@ -282,7 +315,6 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn strips_thinking_tags_from_output() {
|
||||
// Verify the pipeline: strip_thinking_tags → strip_code_fences
|
||||
let raw = "<think>internal reasoning</think>{\"result\": true}";
|
||||
let after_thinking = strip_thinking_tags(raw);
|
||||
let after_fences = strip_code_fences(after_thinking.trim());
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue