diff --git a/surfsense_backend/alembic/versions/106_add_platform_web_search.py b/surfsense_backend/alembic/versions/106_add_platform_web_search.py deleted file mode 100644 index e4ba59cbd..000000000 --- a/surfsense_backend/alembic/versions/106_add_platform_web_search.py +++ /dev/null @@ -1,51 +0,0 @@ -"""106_add_platform_web_search - -Revision ID: 106 -Revises: 105 -Create Date: 2026-03-14 - -Adds web_search_enabled and web_search_config columns to searchspaces for -per-space control over the platform web search capability. - -Also removes legacy SEARXNG_API connector rows — web search is now a platform -service, not a per-user connector. -""" - -from __future__ import annotations - -from collections.abc import Sequence - -import sqlalchemy as sa -from sqlalchemy.dialects.postgresql import JSONB - -from alembic import op - -revision: str = "106" -down_revision: str | None = "105" -branch_labels: str | Sequence[str] | None = None -depends_on: str | Sequence[str] | None = None - - -def upgrade() -> None: - op.add_column( - "searchspaces", - sa.Column( - "web_search_enabled", - sa.Boolean(), - nullable=False, - server_default=sa.text("true"), - ), - ) - op.add_column( - "searchspaces", - sa.Column("web_search_config", JSONB, nullable=True), - ) - - op.execute( - "DELETE FROM search_source_connectors WHERE connector_type = 'SEARXNG_API'" - ) - - -def downgrade() -> None: - op.drop_column("searchspaces", "web_search_config") - op.drop_column("searchspaces", "web_search_enabled") diff --git a/surfsense_backend/app/agents/new_chat/chat_deepagent.py b/surfsense_backend/app/agents/new_chat/chat_deepagent.py index 1dcc1d393..0e0281a8c 100644 --- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py +++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py @@ -41,7 +41,6 @@ _perf_log = get_perf_logger() _CONNECTOR_TYPE_TO_SEARCHABLE: dict[str, str] = { # Direct mappings (connector type == searchable type) "TAVILY_API": "TAVILY_API", - "SEARXNG_API": "SEARXNG_API", "LINKUP_API": "LINKUP_API", "BAIDU_SEARCH_API": "BAIDU_SEARCH_API", "SLACK_CONNECTOR": "SLACK_CONNECTOR", @@ -234,27 +233,6 @@ async def create_surfsense_deep_agent( search_space_id ) - # Platform web search: inject SEARXNG_API when the service is available - # and the search space hasn't disabled web search. - from app.db import SearchSpace - from app.services import web_search_service - - _LIVE_SEARCH_CONNECTORS = {"TAVILY_API", "SEARXNG_API", "LINKUP_API", "BAIDU_SEARCH_API"} - - space = await db_session.get(SearchSpace, search_space_id) - web_search_enabled = space.web_search_enabled if space else True - - if web_search_enabled and web_search_service.is_available(): - if available_connectors is None: - available_connectors = list(_ALWAYS_AVAILABLE_DOC_TYPES) - if "SEARXNG_API" not in available_connectors: - available_connectors.append("SEARXNG_API") - - if not web_search_enabled and available_connectors: - available_connectors = [ - c for c in available_connectors if c not in _LIVE_SEARCH_CONNECTORS - ] - except Exception as e: logging.warning(f"Failed to discover available connectors/document types: {e}") _perf_log.info( diff --git a/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py b/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py index 4596d5efd..56a4defff 100644 --- a/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py +++ b/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py @@ -27,7 +27,6 @@ from app.utils.perf import get_perf_logger # These are never filtered by available_document_types. _LIVE_SEARCH_CONNECTORS: set[str] = { "TAVILY_API", - "SEARXNG_API", "LINKUP_API", "BAIDU_SEARCH_API", } @@ -191,7 +190,6 @@ _ALL_CONNECTORS: list[str] = [ "DISCORD_CONNECTOR", "AIRTABLE_CONNECTOR", "TAVILY_API", - "SEARXNG_API", "LINKUP_API", "BAIDU_SEARCH_API", "LUMA_CONNECTOR", @@ -228,7 +226,6 @@ CONNECTOR_DESCRIPTIONS: dict[str, str] = { "DISCORD_CONNECTOR": "Discord server conversations and shared content (personal community)", "AIRTABLE_CONNECTOR": "Airtable records, tables, and database content (personal data)", "TAVILY_API": "Tavily web search API results (real-time web search)", - "SEARXNG_API": "SearxNG search API results (privacy-focused web search)", "LINKUP_API": "Linkup search API results (web search)", "BAIDU_SEARCH_API": "Baidu search API results (Chinese web search)", "LUMA_CONNECTOR": "Luma events and meetings", @@ -479,7 +476,6 @@ def format_documents_for_context( # a numeric chunk_id (the numeric IDs are meaningless auto-incremented counters). live_search_connectors = { "TAVILY_API", - "SEARXNG_API", "LINKUP_API", "BAIDU_SEARCH_API", } @@ -705,7 +701,6 @@ async def search_knowledge_base_async( # Specs for live-search connectors (external APIs, no local DB/embedding). live_connector_specs: dict[str, tuple[str, bool, bool, dict[str, Any]]] = { "TAVILY_API": ("search_tavily", False, True, {}), - "SEARXNG_API": ("search_searxng", False, True, {}), "LINKUP_API": ("search_linkup", False, False, {"mode": "standard"}), "BAIDU_SEARCH_API": ("search_baidu", False, True, {}), } @@ -979,7 +974,7 @@ IMPORTANT: - Only connectors that are enabled/configured for this search space are available.{doc_types_info} - For real-time/public web queries (e.g., current exchange rates, stock prices, breaking news, weather), explicitly include live web connectors in `connectors_to_search`, prioritizing: - ["LINKUP_API", "TAVILY_API", "SEARXNG_API", "BAIDU_SEARCH_API"]. + ["LINKUP_API", "TAVILY_API", "BAIDU_SEARCH_API"]. ## Available connector enums for `connectors_to_search` diff --git a/surfsense_backend/app/agents/new_chat/tools/registry.py b/surfsense_backend/app/agents/new_chat/tools/registry.py index 030cbf239..ab0d72194 100644 --- a/surfsense_backend/app/agents/new_chat/tools/registry.py +++ b/surfsense_backend/app/agents/new_chat/tools/registry.py @@ -68,6 +68,7 @@ from .podcast import create_generate_podcast_tool from .report import create_generate_report_tool from .scrape_webpage import create_scrape_webpage_tool from .search_surfsense_docs import create_search_surfsense_docs_tool +from .web_search import create_web_search_tool from .shared_memory import ( create_recall_shared_memory_tool, create_save_shared_memory_tool, @@ -186,7 +187,13 @@ BUILTIN_TOOLS: list[ToolDefinition] = [ ), requires=[], # firecrawl_api_key is optional ), - # Note: write_todos is now provided by TodoListMiddleware from deepagents + # Web search tool — real-time web search via platform SearXNG + ToolDefinition( + name="web_search", + description="Search the web for real-time information using the platform SearXNG instance", + factory=lambda deps: create_web_search_tool(), + requires=[], + ), # Surfsense documentation search tool ToolDefinition( name="search_surfsense_docs", diff --git a/surfsense_backend/app/agents/new_chat/tools/web_search.py b/surfsense_backend/app/agents/new_chat/tools/web_search.py new file mode 100644 index 000000000..057ee8d07 --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/tools/web_search.py @@ -0,0 +1,115 @@ +""" +Web search tool backed by the platform SearXNG instance. + +Provides a standalone tool for real-time web searches, separate from the +knowledge base search which handles local/indexed documents. +""" + +import json +import time +from typing import Any + +from langchain_core.tools import StructuredTool +from pydantic import BaseModel, Field + +from app.utils.perf import get_perf_logger + + +class WebSearchInput(BaseModel): + """Input schema for the web_search tool.""" + + query: str = Field( + description="The search query to look up on the web. Use specific, descriptive terms.", + ) + top_k: int = Field( + default=10, + description="Number of results to retrieve (default: 10, max: 50).", + ) + + +def _format_web_results(documents: list[dict[str, Any]], *, max_chars: int = 50_000) -> str: + """Format SearXNG results into XML suitable for the LLM context.""" + if not documents: + return "No web search results found." + + parts: list[str] = [] + total_chars = 0 + + for doc in documents: + doc_info = doc.get("document") or {} + metadata = doc_info.get("metadata") or {} + title = doc_info.get("title") or "Web Result" + url = metadata.get("url") or "" + content = (doc.get("content") or "").strip() + if not content: + continue + + metadata_json = json.dumps(metadata, ensure_ascii=False) + doc_xml = "\n".join([ + "", + "", + f" SEARXNG_API", + f" <![CDATA[{title}]]>", + f" ", + f" ", + "", + "", + f" ", + "", + "", + "", + ]) + + if total_chars + len(doc_xml) > max_chars: + parts.append("") + break + + parts.append(doc_xml) + total_chars += len(doc_xml) + + return "\n".join(parts).strip() or "No web search results found." + + +def create_web_search_tool() -> StructuredTool: + """Factory for the ``web_search`` tool. + + The tool calls the platform SearXNG service (via ``web_search_service``) + which handles caching, circuit breaking, and retries internally. + """ + + async def _web_search_impl(query: str, top_k: int = 10) -> str: + from app.services import web_search_service + + perf = get_perf_logger() + t0 = time.perf_counter() + + if not web_search_service.is_available(): + return "Web search is not available — SearXNG is not configured on this server." + + _result_obj, documents = await web_search_service.search( + query=query, + top_k=min(max(1, top_k), 50), + ) + + formatted = _format_web_results(documents) + + perf.info( + "[web_search] query=%r results=%d chars=%d in %.3fs", + query[:60], + len(documents), + len(formatted), + time.perf_counter() - t0, + ) + return formatted + + return StructuredTool( + name="web_search", + description=( + "Search the web for real-time information using the platform SearXNG instance. " + "Use this for current events, news, prices, weather, public facts, or any " + "question that requires up-to-date information from the internet. " + "Results are privacy-focused — all queries are proxied through the server." + ), + coroutine=_web_search_impl, + args_schema=WebSearchInput, + ) diff --git a/surfsense_backend/app/app.py b/surfsense_backend/app/app.py index 9bd266d39..e6db5670e 100644 --- a/surfsense_backend/app/app.py +++ b/surfsense_backend/app/app.py @@ -513,15 +513,6 @@ async def health_check(): return {"status": "ok"} -@app.get("/api/v1/platform/web-search/health", tags=["platform"]) -@limiter.exempt -async def web_search_health(user: User = Depends(current_active_user)): - """Return the health status of the platform web search (SearXNG) service.""" - from app.services import web_search_service - - return await web_search_service.health_check() - - @app.get("/verify-token") async def authenticated_route( user: User = Depends(current_active_user), diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index ac6ad549c..062b11b3a 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -1204,12 +1204,6 @@ class SearchSpace(BaseModel, TimestampMixin): Integer, nullable=True, default=0 ) # For image generation, defaults to Auto mode - # Platform web search capability (opt-out via toggle) - web_search_enabled = Column( - Boolean, nullable=False, default=True, server_default=text("true") - ) - web_search_config = Column(JSONB, nullable=True) - user_id = Column( UUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=False ) diff --git a/surfsense_backend/app/routes/search_spaces_routes.py b/surfsense_backend/app/routes/search_spaces_routes.py index 3eaa985f0..7f6638e2c 100644 --- a/surfsense_backend/app/routes/search_spaces_routes.py +++ b/surfsense_backend/app/routes/search_spaces_routes.py @@ -180,8 +180,6 @@ async def read_search_spaces( user_id=space.user_id, citations_enabled=space.citations_enabled, qna_custom_instructions=space.qna_custom_instructions, - web_search_enabled=space.web_search_enabled, - web_search_config=space.web_search_config, member_count=member_count, is_owner=is_owner, ) diff --git a/surfsense_backend/app/schemas/search_space.py b/surfsense_backend/app/schemas/search_space.py index 23640b2d8..054fe1465 100644 --- a/surfsense_backend/app/schemas/search_space.py +++ b/surfsense_backend/app/schemas/search_space.py @@ -1,6 +1,5 @@ import uuid from datetime import datetime -from typing import Any from pydantic import BaseModel, ConfigDict @@ -15,8 +14,6 @@ class SearchSpaceBase(BaseModel): class SearchSpaceCreate(SearchSpaceBase): citations_enabled: bool = True qna_custom_instructions: str | None = None - web_search_enabled: bool = True - web_search_config: dict[str, Any] | None = None class SearchSpaceUpdate(BaseModel): @@ -24,8 +21,6 @@ class SearchSpaceUpdate(BaseModel): description: str | None = None citations_enabled: bool | None = None qna_custom_instructions: str | None = None - web_search_enabled: bool | None = None - web_search_config: dict[str, Any] | None = None class SearchSpaceRead(SearchSpaceBase, IDModel, TimestampModel): @@ -34,8 +29,6 @@ class SearchSpaceRead(SearchSpaceBase, IDModel, TimestampModel): user_id: uuid.UUID citations_enabled: bool qna_custom_instructions: str | None = None - web_search_enabled: bool - web_search_config: dict[str, Any] | None = None model_config = ConfigDict(from_attributes=True) diff --git a/surfsense_backend/app/services/connector_service.py b/surfsense_backend/app/services/connector_service.py index f065a24eb..cb12ffd70 100644 --- a/surfsense_backend/app/services/connector_service.py +++ b/surfsense_backend/app/services/connector_service.py @@ -16,7 +16,6 @@ from app.db import ( Document, SearchSourceConnector, SearchSourceConnectorType, - SearchSpace, async_session_maker, ) from app.retriever.chunks_hybrid_search import ChucksHybridSearchRetriever @@ -581,8 +580,8 @@ class ConnectorService: """Search using the platform SearXNG instance. Delegates to ``WebSearchService`` which handles caching, circuit - breaking, and retries. Per-search-space overrides are read from the - ``SearchSpace.web_search_config`` JSONB column. + breaking, and retries. SearXNG configuration comes from the + docker/searxng/settings.yml file. """ from app.services import web_search_service @@ -594,26 +593,9 @@ class ConnectorService: "sources": [], }, [] - # Fetch optional per-space overrides - engines: str | None = None - language: str | None = None - safesearch: int | None = None - - space = await self.session.get(SearchSpace, search_space_id) - if space and space.web_search_config: - cfg = space.web_search_config - engines = cfg.get("engines") - language = cfg.get("language") - raw_ss = cfg.get("safesearch") - if isinstance(raw_ss, int) and 0 <= raw_ss <= 2: - safesearch = raw_ss - return await web_search_service.search( query=user_query, top_k=top_k, - engines=engines, - language=language, - safesearch=safesearch, ) async def search_baidu( diff --git a/surfsense_web/app/dashboard/[search_space_id]/settings/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/settings/page.tsx index ea82b29d7..971c82e4d 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/settings/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/settings/page.tsx @@ -1,6 +1,6 @@ "use client"; -import { Bot, Brain, FileText, Globe, ImageIcon, MessageSquare, Search, Shield } from "lucide-react"; +import { Bot, Brain, FileText, Globe, ImageIcon, MessageSquare, Shield } from "lucide-react"; import { useParams, useRouter, useSearchParams } from "next/navigation"; import { useTranslations } from "next-intl"; import { useCallback, useEffect } from "react"; @@ -11,7 +11,6 @@ import { LLMRoleManager } from "@/components/settings/llm-role-manager"; import { ModelConfigManager } from "@/components/settings/model-config-manager"; import { PromptConfigManager } from "@/components/settings/prompt-config-manager"; import { RolesManager } from "@/components/settings/roles-manager"; -import { WebSearchSettingsManager } from "@/components/settings/web-search-settings-manager"; import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/animated-tabs"; import { trackSettingsViewed } from "@/lib/posthog/events"; @@ -21,7 +20,6 @@ const VALID_TABS = [ "roles", "image-models", "prompts", - "web-search", "public-links", "team-roles", ] as const; @@ -78,19 +76,15 @@ export default function SettingsPage() { {t("nav_team_roles")} - - - {t("nav_system_instructions")} - - - - {t("nav_web_search")} - - - - {t("nav_public_links")} - - + + + {t("nav_system_instructions")} + + + + {t("nav_public_links")} + + @@ -106,9 +100,6 @@ export default function SettingsPage() { - - - diff --git a/surfsense_web/components/settings/web-search-settings-manager.tsx b/surfsense_web/components/settings/web-search-settings-manager.tsx deleted file mode 100644 index d819eece9..000000000 --- a/surfsense_web/components/settings/web-search-settings-manager.tsx +++ /dev/null @@ -1,258 +0,0 @@ -"use client"; - -import { useQuery } from "@tanstack/react-query"; -import { useAtomValue } from "jotai"; -import { Globe, Loader2, Save } from "lucide-react"; -import { useTranslations } from "next-intl"; -import { useCallback, useEffect, useState } from "react"; -import { toast } from "sonner"; -import { updateSearchSpaceMutationAtom } from "@/atoms/search-spaces/search-space-mutation.atoms"; -import { Alert, AlertDescription } from "@/components/ui/alert"; -import { Button } from "@/components/ui/button"; -import { - Card, - CardContent, - CardDescription, - CardHeader, - CardTitle, -} from "@/components/ui/card"; -import { Input } from "@/components/ui/input"; -import { Label } from "@/components/ui/label"; -import { - Select, - SelectContent, - SelectItem, - SelectTrigger, - SelectValue, -} from "@/components/ui/select"; -import { Skeleton } from "@/components/ui/skeleton"; -import { Switch } from "@/components/ui/switch"; -import { searchSpacesApiService } from "@/lib/apis/search-spaces-api.service"; -import { baseApiService } from "@/lib/apis/base-api.service"; -import { cacheKeys } from "@/lib/query-client/cache-keys"; - -interface WebSearchSettingsManagerProps { - searchSpaceId: number; -} - -interface HealthStatus { - status: string; - response_time_ms?: number; - error?: string; - circuit_breaker?: string; -} - -export function WebSearchSettingsManager({ searchSpaceId }: WebSearchSettingsManagerProps) { - const t = useTranslations("searchSpaceSettings"); - const { - data: searchSpace, - isLoading, - refetch, - } = useQuery({ - queryKey: cacheKeys.searchSpaces.detail(searchSpaceId.toString()), - queryFn: () => searchSpacesApiService.getSearchSpace({ id: searchSpaceId }), - enabled: !!searchSpaceId, - }); - - const { data: healthData } = useQuery({ - queryKey: ["web-search-health"], - queryFn: async () => { - const response = await baseApiService.get("/api/v1/platform/web-search/health"); - return response as HealthStatus; - }, - refetchInterval: 30000, - staleTime: 10000, - }); - - const { mutateAsync: updateSearchSpace } = useAtomValue(updateSearchSpaceMutationAtom); - - const [enabled, setEnabled] = useState(true); - const [engines, setEngines] = useState(""); - const [language, setLanguage] = useState(""); - const [safesearch, setSafesearch] = useState(""); - const [saving, setSaving] = useState(false); - - useEffect(() => { - if (searchSpace) { - setEnabled(searchSpace.web_search_enabled ?? true); - const cfg = searchSpace.web_search_config as Record | null; - setEngines((cfg?.engines as string) ?? ""); - setLanguage((cfg?.language as string) ?? ""); - const ss = cfg?.safesearch; - setSafesearch(ss !== null && ss !== undefined ? String(ss) : ""); - } - }, [searchSpace]); - - const handleSave = useCallback(async () => { - try { - setSaving(true); - - const webSearchConfig: Record = {}; - if (engines.trim()) webSearchConfig.engines = engines.trim(); - if (language.trim()) webSearchConfig.language = language.trim(); - if (safesearch !== "") webSearchConfig.safesearch = Number(safesearch); - - await updateSearchSpace({ - id: searchSpaceId, - data: { - web_search_enabled: enabled, - web_search_config: Object.keys(webSearchConfig).length > 0 ? webSearchConfig : null, - }, - }); - - toast.success(t("web_search_saved")); - await refetch(); - } catch (error: unknown) { - console.error("Error saving web search settings:", error); - const message = error instanceof Error ? error.message : "Failed to save web search settings"; - toast.error(message); - } finally { - setSaving(false); - } - }, [searchSpaceId, enabled, engines, language, safesearch, updateSearchSpace, refetch, t]); - - if (isLoading) { - return ( -
- - - - - - - - - - -
- ); - } - - const isHealthy = healthData?.status === "healthy"; - const isUnavailable = healthData?.status === "unavailable"; - - return ( -
- - - - {t("web_search_description")} - - - - {healthData && ( -
- - - {isHealthy - ? `${t("web_search_status_healthy")} (${healthData.response_time_ms}ms)` - : isUnavailable - ? t("web_search_status_not_configured") - : t("web_search_status_unhealthy")} - -
- )} - - - - {t("web_search_title")} - - {t("web_search_enabled_description")} - - - -
-
- -

- {t("web_search_enabled_description")} -

-
- -
- - {enabled && ( -
-
- - setEngines(e.target.value)} - className="text-sm md:text-base h-9 md:h-10" - /> -

- {t("web_search_engines_description")} -

-
- -
-
- - setLanguage(e.target.value)} - className="text-sm md:text-base h-9 md:h-10" - /> -

- {t("web_search_language_description")} -

-
- -
- - -

- {t("web_search_safesearch_description")} -

-
-
-
- )} -
-
- -
- -
-
- ); -} diff --git a/surfsense_web/contracts/types/search-space.types.ts b/surfsense_web/contracts/types/search-space.types.ts index a71c1bbbb..8a0a2fb4c 100644 --- a/surfsense_web/contracts/types/search-space.types.ts +++ b/surfsense_web/contracts/types/search-space.types.ts @@ -9,8 +9,6 @@ export const searchSpace = z.object({ user_id: z.string(), citations_enabled: z.boolean(), qna_custom_instructions: z.string().nullable(), - web_search_enabled: z.boolean(), - web_search_config: z.record(z.unknown()).nullable(), member_count: z.number(), is_owner: z.boolean(), }); @@ -56,8 +54,6 @@ export const updateSearchSpaceRequest = z.object({ description: true, citations_enabled: true, qna_custom_instructions: true, - web_search_enabled: true, - web_search_config: true, }) .partial(), });