diff --git a/surfsense_backend/alembic/versions/30_add_baidu_search_connector_enum.py b/surfsense_backend/alembic/versions/30_add_baidu_search_connector_enum.py new file mode 100644 index 000000000..f1aa58d83 --- /dev/null +++ b/surfsense_backend/alembic/versions/30_add_baidu_search_connector_enum.py @@ -0,0 +1,73 @@ +"""Add BAIDU_SEARCH_API to searchsourceconnectortype enum + +Revision ID: 30 +Revises: 29 + +Changes: +1. Add BAIDU_SEARCH_API value to searchsourceconnectortype enum +2. Add BAIDU_SEARCH_API value to documenttype enum for consistency +""" + +from collections.abc import Sequence + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "30" +down_revision: str | None = "29" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Add BAIDU_SEARCH_API to searchsourceconnectortype and documenttype enums.""" + + # Add BAIDU_SEARCH_API to searchsourceconnectortype enum + op.execute( + """ + DO $$ + BEGIN + IF NOT EXISTS ( + SELECT 1 FROM pg_type t + JOIN pg_enum e ON t.oid = e.enumtypid + WHERE t.typname = 'searchsourceconnectortype' AND e.enumlabel = 'BAIDU_SEARCH_API' + ) THEN + ALTER TYPE searchsourceconnectortype ADD VALUE 'BAIDU_SEARCH_API'; + END IF; + END + $$; + """ + ) + + # Add BAIDU_SEARCH_API to documenttype enum for consistency + op.execute( + """ + DO $$ + BEGIN + IF NOT EXISTS ( + SELECT 1 FROM pg_type t + JOIN pg_enum e ON t.oid = e.enumtypid + WHERE t.typname = 'documenttype' AND e.enumlabel = 'BAIDU_SEARCH_API' + ) THEN + ALTER TYPE documenttype ADD VALUE 'BAIDU_SEARCH_API'; + END IF; + END + $$; + """ + ) + + +def downgrade() -> None: + """ + Downgrade is not supported for enum values in PostgreSQL. + + Removing enum values can break existing data and is generally not safe. + To remove these values, you would need to: + 1. Remove all references to BAIDU_SEARCH_API in the database + 2. Recreate the enum type without BAIDU_SEARCH_API + 3. Reapply all other enum values + + This is intentionally left as a no-op for safety. + """ + pass + diff --git a/surfsense_backend/app/agents/researcher/nodes.py b/surfsense_backend/app/agents/researcher/nodes.py index 39ffc1218..7824e06ea 100644 --- a/surfsense_backend/app/agents/researcher/nodes.py +++ b/surfsense_backend/app/agents/researcher/nodes.py @@ -1057,6 +1057,32 @@ async def fetch_relevant_documents( } ) + elif connector == "BAIDU_SEARCH_API": + ( + source_object, + baidu_chunks, + ) = await connector_service.search_baidu( + user_query=reformulated_query, + user_id=user_id, + search_space_id=search_space_id, + top_k=top_k, + ) + + # Add to sources and raw documents + if source_object: + all_sources.append(source_object) + all_raw_documents.extend(baidu_chunks) + + # Stream found document count + if streaming_service and writer: + writer( + { + "yield_value": streaming_service.format_terminal_info_delta( + f"🇨🇳 Found {len(baidu_chunks)} Baidu Search results related to your query" + ) + } + ) + elif connector == "DISCORD_CONNECTOR": ( source_object, diff --git a/surfsense_backend/app/agents/researcher/utils.py b/surfsense_backend/app/agents/researcher/utils.py index 6ce784c1f..01d9f8f95 100644 --- a/surfsense_backend/app/agents/researcher/utils.py +++ b/surfsense_backend/app/agents/researcher/utils.py @@ -48,6 +48,7 @@ def get_connector_emoji(connector_name: str) -> str: "DISCORD_CONNECTOR": "🗨️", "TAVILY_API": "🔍", "LINKUP_API": "🔗", + "BAIDU_SEARCH_API": "🇨🇳", "GOOGLE_CALENDAR_CONNECTOR": "📅", "AIRTABLE_CONNECTOR": "🗃️", "LUMA_CONNECTOR": "✨", @@ -73,6 +74,7 @@ def get_connector_friendly_name(connector_name: str) -> str: "DISCORD_CONNECTOR": "Discord", "TAVILY_API": "Tavily Search", "LINKUP_API": "Linkup Search", + "BAIDU_SEARCH_API": "Baidu Search", "AIRTABLE_CONNECTOR": "Airtable", "LUMA_CONNECTOR": "Luma", "ELASTICSEARCH_CONNECTOR": "Elasticsearch", diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index ee3a3e079..db5ea73ea 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -58,6 +58,7 @@ class SearchSourceConnectorType(str, Enum): TAVILY_API = "TAVILY_API" SEARXNG_API = "SEARXNG_API" LINKUP_API = "LINKUP_API" + BAIDU_SEARCH_API = "BAIDU_SEARCH_API" # Baidu AI Search API for Chinese web search SLACK_CONNECTOR = "SLACK_CONNECTOR" NOTION_CONNECTOR = "NOTION_CONNECTOR" GITHUB_CONNECTOR = "GITHUB_CONNECTOR" diff --git a/surfsense_backend/app/services/connector_service.py b/surfsense_backend/app/services/connector_service.py index 467354a17..1ab70276f 100644 --- a/surfsense_backend/app/services/connector_service.py +++ b/surfsense_backend/app/services/connector_service.py @@ -560,6 +560,226 @@ class ConnectorService: return result_object, documents + async def search_baidu( + self, + user_query: str, + user_id: str, + search_space_id: int, + top_k: int = 20, + ) -> tuple: + """ + Search using Baidu AI Search API and return both sources and documents. + + Baidu AI Search provides intelligent search with automatic summarization. + We extract the raw search results (references) from the API response. + + Args: + user_query: User's search query + user_id: User ID + search_space_id: Search space ID + top_k: Maximum number of results to return + + Returns: + tuple: (sources_info_dict, documents_list) + """ + # Get Baidu connector configuration + baidu_connector = await self.get_connector_by_type( + user_id, SearchSourceConnectorType.BAIDU_SEARCH_API, search_space_id + ) + + if not baidu_connector: + return { + "id": 12, + "name": "Baidu Search", + "type": "BAIDU_SEARCH_API", + "sources": [], + }, [] + + config = baidu_connector.config or {} + api_key = config.get("BAIDU_API_KEY") + + if not api_key: + print("ERROR: Baidu connector is missing BAIDU_API_KEY configuration") + print(f"Connector config: {config}") + return { + "id": 12, + "name": "Baidu Search", + "type": "BAIDU_SEARCH_API", + "sources": [], + }, [] + + # Optional configuration parameters + model = config.get("BAIDU_MODEL", "ernie-3.5-8k") + search_source = config.get("BAIDU_SEARCH_SOURCE", "baidu_search_v2") + enable_deep_search = config.get("BAIDU_ENABLE_DEEP_SEARCH", False) + + # Baidu AI Search API endpoint + baidu_endpoint = "https://qianfan.baidubce.com/v2/ai_search/chat/completions" + + # Prepare request headers + # Note: Baidu uses X-Appbuilder-Authorization instead of standard Authorization header + headers = { + "X-Appbuilder-Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + } + + # Prepare request payload + # Calculate resource_type_filter top_k values + # Baidu v2 supports max 20 per type + max_per_type = min(top_k, 20) + + payload = { + "messages": [{"role": "user", "content": user_query}], + "model": model, + "search_source": search_source, + "resource_type_filter": [ + {"type": "web", "top_k": max_per_type}, + {"type": "video", "top_k": max(1, max_per_type // 4)}, # Fewer videos + ], + "stream": False, # Non-streaming for simpler processing + "enable_deep_search": enable_deep_search, + "enable_corner_markers": True, # Enable reference markers + } + + try: + # Baidu AI Search may take longer as it performs search + summarization + # Increase timeout to 90 seconds + async with httpx.AsyncClient(timeout=90.0) as client: + response = await client.post( + baidu_endpoint, + headers=headers, + json=payload, + ) + response.raise_for_status() + except httpx.TimeoutException as exc: + print(f"ERROR: Baidu API request timeout after 90s: {exc!r}") + print(f"Endpoint: {baidu_endpoint}") + return { + "id": 12, + "name": "Baidu Search", + "type": "BAIDU_SEARCH_API", + "sources": [], + }, [] + except httpx.HTTPStatusError as exc: + print(f"ERROR: Baidu API HTTP Status Error: {exc.response.status_code}") + print(f"Response text: {exc.response.text[:500]}") + print(f"Request URL: {exc.request.url}") + return { + "id": 12, + "name": "Baidu Search", + "type": "BAIDU_SEARCH_API", + "sources": [], + }, [] + except httpx.RequestError as exc: + print(f"ERROR: Baidu API Request Error: {type(exc).__name__}: {exc!r}") + print(f"Endpoint: {baidu_endpoint}") + return { + "id": 12, + "name": "Baidu Search", + "type": "BAIDU_SEARCH_API", + "sources": [], + }, [] + except Exception as exc: + print(f"ERROR: Unexpected error calling Baidu API: {type(exc).__name__}: {exc!r}") + print(f"Endpoint: {baidu_endpoint}") + print(f"Payload: {payload}") + return { + "id": 12, + "name": "Baidu Search", + "type": "BAIDU_SEARCH_API", + "sources": [], + }, [] + + try: + data = response.json() + except ValueError as e: + print(f"ERROR: Failed to decode JSON response from Baidu AI Search: {e}") + print(f"Response status: {response.status_code}") + print(f"Response text: {response.text[:500]}") # First 500 chars + return { + "id": 12, + "name": "Baidu Search", + "type": "BAIDU_SEARCH_API", + "sources": [], + }, [] + + # Extract references (search results) from the response + baidu_references = data.get("references", []) + + if "code" in data or "message" in data: + print(f"WARNING: Baidu API returned error - Code: {data.get('code')}, Message: {data.get('message')}") + + if not baidu_references: + print("WARNING: No references found in Baidu API response") + print(f"Response keys: {list(data.keys())}") + return { + "id": 12, + "name": "Baidu Search", + "type": "BAIDU_SEARCH_API", + "sources": [], + }, [] + + sources_list: list[dict[str, Any]] = [] + documents: list[dict[str, Any]] = [] + + async with self.counter_lock: + for reference in baidu_references: + # Extract basic fields + title = reference.get("title", "Baidu Search Result") + url = reference.get("url", "") + content = reference.get("content", "") + date = reference.get("date", "") + ref_type = reference.get("type", "web") # web, image, video + + # Create a source entry + source = { + "id": self.source_id_counter, + "title": title, + "description": content[:300] if content else "", # Limit description length + "url": url, + } + sources_list.append(source) + + # Prepare metadata + metadata = { + "url": url, + "date": date, + "type": ref_type, + "source": "BAIDU_SEARCH_API", + "web_anchor": reference.get("web_anchor", ""), + "website": reference.get("website", ""), + } + + # Add type-specific metadata + if ref_type == "image" and reference.get("image"): + metadata["image"] = reference["image"] + elif ref_type == "video" and reference.get("video"): + metadata["video"] = reference["video"] + + # Create a document entry + document = { + "chunk_id": self.source_id_counter, + "content": content, + "score": 1.0, # Baidu doesn't provide relevance scores + "document": { + "id": self.source_id_counter, + "title": title, + "document_type": "BAIDU_SEARCH_API", + "metadata": metadata, + }, + } + documents.append(document) + self.source_id_counter += 1 + + result_object = { + "id": 12, + "name": "Baidu Search", + "type": "BAIDU_SEARCH_API", + "sources": sources_list, + } + + return result_object, documents + async def search_slack( self, user_query: str, diff --git a/surfsense_backend/app/utils/validators.py b/surfsense_backend/app/utils/validators.py index c23ca8543..53ba187d2 100644 --- a/surfsense_backend/app/utils/validators.py +++ b/surfsense_backend/app/utils/validators.py @@ -434,6 +434,15 @@ def validate_connector_config( }, }, "LINKUP_API": {"required": ["LINKUP_API_KEY"], "validators": {}}, + "BAIDU_SEARCH_API": { + "required": ["BAIDU_API_KEY"], + "optional": [ + "BAIDU_MODEL", + "BAIDU_SEARCH_SOURCE", + "BAIDU_ENABLE_DEEP_SEARCH", + ], + "validators": {}, + }, "SLACK_CONNECTOR": {"required": ["SLACK_BOT_TOKEN"], "validators": {}}, "NOTION_CONNECTOR": { "required": ["NOTION_INTEGRATION_TOKEN"], diff --git a/surfsense_web/.gitignore b/surfsense_web/.gitignore index c78e9de6c..8c6362124 100644 --- a/surfsense_web/.gitignore +++ b/surfsense_web/.gitignore @@ -32,6 +32,11 @@ yarn-error.log* # env files (can opt-in for committing if needed) .env +.env.local +.env*.local +.env.development.local +.env.test.local +.env.production.local # vercel .vercel diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/baidu-search-api/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/baidu-search-api/page.tsx new file mode 100644 index 000000000..1eaee9676 --- /dev/null +++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/baidu-search-api/page.tsx @@ -0,0 +1,319 @@ +"use client"; + +import { zodResolver } from "@hookform/resolvers/zod"; +import { ArrowLeft, Check, Info, Loader2 } from "lucide-react"; +import { motion } from "motion/react"; +import { useParams, useRouter } from "next/navigation"; +import { useState } from "react"; +import { useForm } from "react-hook-form"; +import { toast } from "sonner"; +import * as z from "zod"; +import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert"; +import { Button } from "@/components/ui/button"; +import { + Card, + CardContent, + CardDescription, + CardFooter, + CardHeader, + CardTitle, +} from "@/components/ui/card"; +import { + Form, + FormControl, + FormDescription, + FormField, + FormItem, + FormLabel, + FormMessage, +} from "@/components/ui/form"; +import { Input } from "@/components/ui/input"; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from "@/components/ui/select"; +import { Switch } from "@/components/ui/switch"; +import { EnumConnectorName } from "@/contracts/enums/connector"; +import { getConnectorIcon } from "@/contracts/enums/connectorIcons"; +import { useSearchSourceConnectors } from "@/hooks/use-search-source-connectors"; + +// Define the form schema with Zod +const baiduSearchApiFormSchema = z.object({ + name: z.string().min(3, { + message: "Connector name must be at least 3 characters.", + }), + api_key: z.string().min(10, { + message: "API key is required and must be valid.", + }), + model: z.string().optional(), + search_source: z.enum(["baidu_search_v1", "baidu_search_v2"]).optional(), + enable_deep_search: z.boolean().default(false), +}); + +// Define the type for the form values +type BaiduSearchApiFormValues = z.infer; + +export default function BaiduSearchApiPage() { + const router = useRouter(); + const params = useParams(); + const searchSpaceId = params.search_space_id as string; + const [isSubmitting, setIsSubmitting] = useState(false); + const { createConnector } = useSearchSourceConnectors(); + + // Initialize the form + const form = useForm({ + resolver: zodResolver(baiduSearchApiFormSchema), + defaultValues: { + name: "Baidu Search Connector", + api_key: "", + model: "ernie-3.5-8k", + search_source: "baidu_search_v2", + enable_deep_search: false, + }, + }); + + // Handle form submission + const onSubmit = async (values: BaiduSearchApiFormValues) => { + setIsSubmitting(true); + try { + // Build config object + const config: Record = { + BAIDU_API_KEY: values.api_key, + }; + + // Add optional parameters if provided + if (values.model) { + config.BAIDU_MODEL = values.model; + } + if (values.search_source) { + config.BAIDU_SEARCH_SOURCE = values.search_source; + } + if (values.enable_deep_search !== undefined) { + config.BAIDU_ENABLE_DEEP_SEARCH = values.enable_deep_search; + } + + await createConnector( + { + name: values.name, + connector_type: EnumConnectorName.BAIDU_SEARCH_API, + config, + is_indexable: false, + last_indexed_at: null, + }, + parseInt(searchSpaceId) + ); + + toast.success("Baidu Search connector created successfully!"); + + // Navigate back to connectors page + router.push(`/dashboard/${searchSpaceId}/connectors`); + } catch (error) { + console.error("Error creating connector:", error); + toast.error(error instanceof Error ? error.message : "Failed to create connector"); + } finally { + setIsSubmitting(false); + } + }; + + return ( +
+ + + {/* Header */} +
+
+
+ {getConnectorIcon(EnumConnectorName.BAIDU_SEARCH_API, "h-6 w-6")} +
+
+

Connect Baidu Search

+

+ Connect Baidu AI Search for intelligent Chinese web search capabilities. +

+
+
+
+ + + + + Connect Baidu Search + + Integrate with Baidu AI Search to enhance your search capabilities with + intelligent Chinese web search results. + + + + + + API Key Required + + You'll need a Baidu AppBuilder API key to use this connector. You can get one by + signing up at{" "} + + qianfan.cloud.baidu.com + + + + +
+ + ( + + Connector Name + + + + A friendly name to identify this connector. + + + )} + /> + + ( + + Baidu AppBuilder API Key + + + + + Your API key will be encrypted and stored securely. + + + + )} + /> + + ( + + Model (Optional) + + + The language model used for search summarization. Default: ERNIE 3.5 8K. + + + + )} + /> + + ( + + Search Source (Optional) + + + V2 provides better performance and richer content. Default: V2. + + + + )} + /> + + ( + +
+ Enable Deep Search + + Deep search retrieves up to 100 results per type (may incur additional + costs). + +
+ + + +
+ )} + /> + +
+ +
+ + +
+ +

What you get with Baidu Search:

+
    +
  • Intelligent search tailored for Chinese web content
  • +
  • Real-time information from Baidu's search index
  • +
  • AI-powered summarization with source references
  • +
  • Support for web, video, and image search results
  • +
+
+
+
+
+ ); +} diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/page.tsx index 22b803f9d..4f34b9c22 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/page.tsx @@ -64,6 +64,12 @@ const connectorCategories: ConnectorCategory[] = [ title: "Elasticsearch", description: "Connect to Elasticsearch to index and search documents, logs and metrics.", icon: getConnectorIcon(EnumConnectorName.ELASTICSEARCH_CONNECTOR, "h-6 w-6"), + }, + { + id: "baidu-search-api", + title: "Baidu Search", + description: "Search the Chinese web using Baidu AI Search API", + icon: getConnectorIcon(EnumConnectorName.BAIDU_SEARCH_API, "h-6 w-6"), status: "available", }, ], diff --git a/surfsense_web/components/chat/SourceDetailSheet.tsx b/surfsense_web/components/chat/SourceDetailSheet.tsx index 138d004db..ecc8db4d2 100644 --- a/surfsense_web/components/chat/SourceDetailSheet.tsx +++ b/surfsense_web/components/chat/SourceDetailSheet.tsx @@ -53,7 +53,10 @@ export function SourceDetailSheet({ // Check if this is a source type that should render directly from node const isDirectRenderSource = - sourceType === "TAVILY_API" || sourceType === "LINKUP_API" || sourceType === "SEARXNG_API"; + sourceType === "TAVILY_API" || + sourceType === "LINKUP_API" || + sourceType === "SEARXNG_API" || + sourceType === "BAIDU_SEARCH_API"; useEffect(() => { if (open && chunkId && !isDirectRenderSource) { diff --git a/surfsense_web/contracts/enums/connector.ts b/surfsense_web/contracts/enums/connector.ts index 39117aad5..50486c92e 100644 --- a/surfsense_web/contracts/enums/connector.ts +++ b/surfsense_web/contracts/enums/connector.ts @@ -3,6 +3,7 @@ export enum EnumConnectorName { TAVILY_API = "TAVILY_API", SEARXNG_API = "SEARXNG_API", LINKUP_API = "LINKUP_API", + BAIDU_SEARCH_API = "BAIDU_SEARCH_API", SLACK_CONNECTOR = "SLACK_CONNECTOR", NOTION_CONNECTOR = "NOTION_CONNECTOR", GITHUB_CONNECTOR = "GITHUB_CONNECTOR", diff --git a/surfsense_web/contracts/enums/connectorIcons.tsx b/surfsense_web/contracts/enums/connectorIcons.tsx index e8831cdc0..66e9edfcc 100644 --- a/surfsense_web/contracts/enums/connectorIcons.tsx +++ b/surfsense_web/contracts/enums/connectorIcons.tsx @@ -35,6 +35,8 @@ export const getConnectorIcon = (connectorType: EnumConnectorName | string, clas return ; case EnumConnectorName.SEARXNG_API: return ; + case EnumConnectorName.BAIDU_SEARCH_API: + return ; case EnumConnectorName.SLACK_CONNECTOR: return ; case EnumConnectorName.NOTION_CONNECTOR: