mirror of
https://github.com/0xMassi/webclaw.git
synced 2026-04-28 09:46:20 +02:00
- Add GeminiCliProvider: shells out to `gemini -p` with --output-format json, injection-safe prompt passing, MCP server suppression via temp workdir, 6-slot concurrency semaphore, 60s subprocess deadline - Add --llm-provider, --llm-model, --llm-base-url CLI flags for per-call overrides - Provider chain: Gemini CLI → OpenAI → Ollama → Anthropic - Move LLM timing to dispatch layer (LLM: Xs on stderr) - Default Ollama model: qwen3:8b → qwen3.5:9b (benchmark shows better schema extraction) - Add noxa mcp subcommand - Add docs/reports/llm-benchmark-2026-04-11.md (Gemini vs qwen3.5:4b vs qwen3.5:9b) - Bump version 0.3.11 → 0.4.0 Co-authored-by: Claude <claude@anthropic.com>
140 lines
3.6 KiB
JSON
140 lines
3.6 KiB
JSON
{
|
|
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
"$id": "./config.schema.json",
|
|
"title": "Noxa config.json",
|
|
"description": "Optional non-secret defaults for the noxa CLI. Unknown fields are ignored by the binary, and secrets/URLs belong in .env.",
|
|
"type": "object",
|
|
"additionalProperties": true,
|
|
"properties": {
|
|
"$schema": {
|
|
"type": "string",
|
|
"description": "Editor hint pointing at this schema."
|
|
},
|
|
"_doc": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"description": "Human-readable notes. Ignored by noxa."
|
|
},
|
|
"format": {
|
|
"type": "string",
|
|
"enum": ["markdown", "json", "text", "llm", "html"],
|
|
"default": "markdown",
|
|
"description": "Default output format."
|
|
},
|
|
"browser": {
|
|
"type": "string",
|
|
"enum": ["chrome", "firefox", "random"],
|
|
"default": "chrome",
|
|
"description": "TLS/browser fingerprint profile."
|
|
},
|
|
"timeout": {
|
|
"type": "integer",
|
|
"minimum": 0,
|
|
"default": 30,
|
|
"description": "Request timeout in seconds."
|
|
},
|
|
"pdf_mode": {
|
|
"type": "string",
|
|
"enum": ["auto", "fast"],
|
|
"default": "auto",
|
|
"description": "How PDFs are handled."
|
|
},
|
|
"metadata": {
|
|
"type": "boolean",
|
|
"default": false,
|
|
"description": "Include metadata in output."
|
|
},
|
|
"verbose": {
|
|
"type": "boolean",
|
|
"default": false,
|
|
"description": "Enable verbose logging."
|
|
},
|
|
"output_dir": {
|
|
"type": ["string", "null"],
|
|
"default": null,
|
|
"description": "Write outputs to files in this directory instead of stdout."
|
|
},
|
|
"only_main_content": {
|
|
"type": "boolean",
|
|
"default": false,
|
|
"description": "Strip nav/sidebar/footer noise automatically."
|
|
},
|
|
"include_selectors": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"default": [],
|
|
"description": "CSS selectors to force-include."
|
|
},
|
|
"exclude_selectors": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"default": [],
|
|
"description": "CSS selectors to exclude."
|
|
},
|
|
"depth": {
|
|
"type": "integer",
|
|
"minimum": 0,
|
|
"default": 1,
|
|
"description": "Maximum crawl depth."
|
|
},
|
|
"max_pages": {
|
|
"type": "integer",
|
|
"minimum": 0,
|
|
"default": 20,
|
|
"description": "Maximum number of pages to crawl."
|
|
},
|
|
"concurrency": {
|
|
"type": "integer",
|
|
"minimum": 0,
|
|
"default": 5,
|
|
"description": "Maximum concurrent requests."
|
|
},
|
|
"delay": {
|
|
"type": "integer",
|
|
"minimum": 0,
|
|
"default": 100,
|
|
"description": "Delay between requests in milliseconds."
|
|
},
|
|
"path_prefix": {
|
|
"type": ["string", "null"],
|
|
"default": null,
|
|
"description": "Only crawl paths with this prefix."
|
|
},
|
|
"include_paths": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"default": [],
|
|
"description": "Glob patterns for crawl paths to include."
|
|
},
|
|
"exclude_paths": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"default": [],
|
|
"description": "Glob patterns for crawl paths to exclude."
|
|
},
|
|
"use_sitemap": {
|
|
"type": "boolean",
|
|
"default": false,
|
|
"description": "Seed crawl traversal from sitemap discovery."
|
|
},
|
|
"llm_provider": {
|
|
"type": "string",
|
|
"enum": ["gemini", "ollama", "openai", "anthropic"],
|
|
"description": "Optional LLM provider name."
|
|
},
|
|
"llm_model": {
|
|
"type": "string",
|
|
"description": "Optional LLM model override."
|
|
}
|
|
}
|
|
}
|