From 81dfc7102fa34ae756ab9b18b0fd246d31c25df8 Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Fri, 20 Feb 2026 16:45:50 -0800 Subject: [PATCH] feat: fixed live connectors citations --- .../app/agents/new_chat/system_prompt.py | 28 +++- .../agents/new_chat/tools/knowledge_base.py | 16 ++- .../assistant-ui/connector-popup.tsx | 2 +- .../baidu-search-api-connect-form.tsx | 1 + .../components/circleback-connect-form.tsx | 1 + .../components/elasticsearch-connect-form.tsx | 1 + .../components/linkup-api-connect-form.tsx | 1 + .../components/luma-connect-form.tsx | 1 + .../components/searxng-connect-form.tsx | 1 + .../components/tavily-api-connect-form.tsx | 1 + .../views/connector-connect-view.tsx | 55 ++++---- .../hooks/use-connector-dialog.ts | 5 +- .../assistant-ui/inline-citation.tsx | 46 ++++++- .../components/assistant-ui/markdown-text.tsx | 122 ++++++++++-------- 14 files changed, 189 insertions(+), 92 deletions(-) diff --git a/surfsense_backend/app/agents/new_chat/system_prompt.py b/surfsense_backend/app/agents/new_chat/system_prompt.py index 32a484fce..3fd83b716 100644 --- a/surfsense_backend/app/agents/new_chat/system_prompt.py +++ b/surfsense_backend/app/agents/new_chat/system_prompt.py @@ -523,6 +523,7 @@ CRITICAL CITATION REQUIREMENTS: The documents you receive are structured like this: +**Knowledge base documents (numeric chunk IDs):** 42 @@ -538,7 +539,24 @@ The documents you receive are structured like this: -IMPORTANT: You MUST cite using the chunk ids (e.g. 123, 124, doc-45). Do NOT cite document_id. +**Live web search results (URL chunk IDs):** + + + TAVILY_API::Some Title::https://example.com/article + TAVILY_API + <![CDATA[Some web search result]]> + + + + + + + + +IMPORTANT: You MUST cite using the EXACT chunk ids from the `` tags. +- For knowledge base documents, chunk ids are numeric (e.g. 123, 124) or prefixed (e.g. doc-45). +- For live web search results, chunk ids are URLs (e.g. https://example.com/article). +Do NOT cite document_id. Always use the chunk id. @@ -550,13 +568,15 @@ IMPORTANT: You MUST cite using the chunk ids (e.g. 123, 124, doc-45). Do NOT cit - NEVER format citations as clickable links or as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only - NEVER make up chunk IDs if you are unsure about the chunk_id. It is better to omit the citation than to guess - Copy the EXACT chunk id from the XML - if it says ``, use [citation:doc-123] +- If the chunk id is a URL like ``, use [citation:https://example.com/page] CORRECT citation formats: -- [citation:5] +- [citation:5] (numeric chunk ID from knowledge base) - [citation:doc-123] (for Surfsense documentation chunks) -- [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3] +- [citation:https://example.com/article] (URL chunk ID from web search results) +- [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3] (multiple citations) INCORRECT citation formats (DO NOT use): - Using parentheses and markdown links: ([citation:5](https://github.com/MODSetter/SurfSense)) @@ -571,7 +591,7 @@ INCORRECT citation formats (DO NOT use): Based on your GitHub repositories and video content, Python's asyncio library provides tools for writing concurrent code using the async/await syntax [citation:5]. It's particularly useful for I/O-bound and high-level structured network code [citation:5]. -The key advantage of asyncio is that it can improve performance by allowing other code to run while waiting for I/O operations to complete [citation:12]. This makes it excellent for scenarios like web scraping, API calls, database operations, or any situation where your program spends time waiting for external resources. +According to web search results, the key advantage of asyncio is that it can improve performance by allowing other code to run while waiting for I/O operations to complete [citation:https://docs.python.org/3/library/asyncio.html]. This makes it excellent for scenarios like web scraping, API calls, database operations, or any situation where your program spends time waiting for external resources. However, from your video learning, it's important to note that asyncio is not suitable for CPU-bound tasks as it runs on a single thread [citation:12]. For computationally intensive work, you'd want to use multiprocessing instead. diff --git a/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py b/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py index fefd80f04..931ea1657 100644 --- a/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py +++ b/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py @@ -210,6 +210,7 @@ def format_documents_for_context(documents: list[dict[str, Any]]) -> str: source = ( (doc.get("source") if isinstance(doc, dict) else None) + or document_info.get("document_type") or metadata.get("document_type") or "UNKNOWN" ) @@ -268,10 +269,20 @@ def format_documents_for_context(documents: list[dict[str, Any]]) -> str: continue grouped[doc_key]["chunks"].append({"chunk_id": chunk_id, "content": content}) + # Live search connectors whose results should be cited by URL rather than + # a numeric chunk_id (the numeric IDs are meaningless auto-incremented counters). + _LIVE_SEARCH_CONNECTORS = { + "TAVILY_API", + "SEARXNG_API", + "LINKUP_API", + "BAIDU_SEARCH_API", + } + # Render XML expected by citation instructions parts: list[str] = [] for g in grouped.values(): metadata_json = json.dumps(g["metadata"], ensure_ascii=False) + is_live_search = g["document_type"] in _LIVE_SEARCH_CONNECTORS parts.append("") parts.append("") @@ -286,7 +297,10 @@ def format_documents_for_context(documents: list[dict[str, Any]]) -> str: for ch in g["chunks"]: ch_content = ch["content"] - ch_id = ch["chunk_id"] + # For live search connectors, use the document URL as the chunk id + # so the LLM outputs [citation:https://...] which the frontend + # renders as a clickable link. + ch_id = g["url"] if (is_live_search and g["url"]) else ch["chunk_id"] if ch_id is None: parts.append(f" ") else: diff --git a/surfsense_web/components/assistant-ui/connector-popup.tsx b/surfsense_web/components/assistant-ui/connector-popup.tsx index e597770ee..143508977 100644 --- a/surfsense_web/components/assistant-ui/connector-popup.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup.tsx @@ -418,7 +418,7 @@ export const ConnectorIndicator: FC<{ hideTrigger?: boolean }> = ({ hideTrigger {/* Bottom fade shadow */} -
+
)} diff --git a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/baidu-search-api-connect-form.tsx b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/baidu-search-api-connect-form.tsx index d8bd01209..044bb5565 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/baidu-search-api-connect-form.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/baidu-search-api-connect-form.tsx @@ -57,6 +57,7 @@ export const BaiduSearchApiConnectForm: FC = ({ onSubmit, isSu BAIDU_API_KEY: values.api_key, }, is_indexable: false, + is_active: true, last_indexed_at: null, periodic_indexing_enabled: false, indexing_frequency_minutes: null, diff --git a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/circleback-connect-form.tsx b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/circleback-connect-form.tsx index cd7f1a888..625856e46 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/circleback-connect-form.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/circleback-connect-form.tsx @@ -51,6 +51,7 @@ export const CirclebackConnectForm: FC = ({ onSubmit, isSubmit connector_type: EnumConnectorName.CIRCLEBACK_CONNECTOR, config: {}, is_indexable: false, + is_active: true, last_indexed_at: null, periodic_indexing_enabled: false, indexing_frequency_minutes: null, diff --git a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/elasticsearch-connect-form.tsx b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/elasticsearch-connect-form.tsx index 3ceca0930..95b93cd20 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/elasticsearch-connect-form.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/elasticsearch-connect-form.tsx @@ -155,6 +155,7 @@ export const ElasticsearchConnectForm: FC = ({ onSubmit, isSub connector_type: EnumConnectorName.ELASTICSEARCH_CONNECTOR, config, is_indexable: true, + is_active: true, last_indexed_at: null, periodic_indexing_enabled: periodicEnabled, indexing_frequency_minutes: periodicEnabled ? parseInt(frequencyMinutes, 10) : null, diff --git a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/linkup-api-connect-form.tsx b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/linkup-api-connect-form.tsx index c51ca1bca..7c4ff0a46 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/linkup-api-connect-form.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/linkup-api-connect-form.tsx @@ -57,6 +57,7 @@ export const LinkupApiConnectForm: FC = ({ onSubmit, isSubmitt LINKUP_API_KEY: values.api_key, }, is_indexable: false, + is_active: true, last_indexed_at: null, periodic_indexing_enabled: false, indexing_frequency_minutes: null, diff --git a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/luma-connect-form.tsx b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/luma-connect-form.tsx index 2804dbba8..63be3cc93 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/luma-connect-form.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/luma-connect-form.tsx @@ -71,6 +71,7 @@ export const LumaConnectForm: FC = ({ onSubmit, isSubmitting } LUMA_API_KEY: values.api_key, }, is_indexable: true, + is_active: true, last_indexed_at: null, periodic_indexing_enabled: periodicEnabled, indexing_frequency_minutes: periodicEnabled ? parseInt(frequencyMinutes, 10) : null, diff --git a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/searxng-connect-form.tsx b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/searxng-connect-form.tsx index 658d59d60..ecf219924 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/searxng-connect-form.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/searxng-connect-form.tsx @@ -110,6 +110,7 @@ export const SearxngConnectForm: FC = ({ onSubmit, isSubmittin connector_type: EnumConnectorName.SEARXNG_API, config, is_indexable: false, + is_active: true, last_indexed_at: null, periodic_indexing_enabled: false, indexing_frequency_minutes: null, diff --git a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/tavily-api-connect-form.tsx b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/tavily-api-connect-form.tsx index fc2533e6f..a8032e11a 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/tavily-api-connect-form.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/tavily-api-connect-form.tsx @@ -57,6 +57,7 @@ export const TavilyApiConnectForm: FC = ({ onSubmit, isSubmitt TAVILY_API_KEY: values.api_key, }, is_indexable: false, + is_active: true, last_indexed_at: null, periodic_indexing_enabled: false, indexing_frequency_minutes: null, diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-connect-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-connect-view.tsx index ec754da2e..e8d811d9f 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-connect-view.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-connect-view.tsx @@ -1,7 +1,7 @@ "use client"; import { ArrowLeft } from "lucide-react"; -import { type FC, useMemo } from "react"; +import { type FC, useMemo, useRef } from "react"; import { Button } from "@/components/ui/button"; import { Spinner } from "@/components/ui/spinner"; import type { EnumConnectorName } from "@/contracts/enums/connector"; @@ -9,6 +9,20 @@ import { getConnectorIcon } from "@/contracts/enums/connectorIcons"; import { getConnectorTypeDisplay } from "@/lib/connectors/utils"; import { getConnectFormComponent } from "../../connect-forms"; +const FORM_ID_MAP: Record = { + TAVILY_API: "tavily-connect-form", + SEARXNG_API: "searxng-connect-form", + LINKUP_API: "linkup-api-connect-form", + BAIDU_SEARCH_API: "baidu-search-api-connect-form", + ELASTICSEARCH_CONNECTOR: "elasticsearch-connect-form", + BOOKSTACK_CONNECTOR: "bookstack-connect-form", + GITHUB_CONNECTOR: "github-connect-form", + LUMA_CONNECTOR: "luma-connect-form", + CIRCLEBACK_CONNECTOR: "circleback-connect-form", + MCP_CONNECTOR: "mcp-connect-form", + OBSIDIAN_CONNECTOR: "obsidian-connect-form", +}; + interface ConnectorConnectViewProps { connectorType: string; onSubmit: (data: { @@ -35,6 +49,7 @@ export const ConnectorConnectView: FC = ({ onBack, isSubmitting, }) => { + const formContainerRef = useRef(null); // Get connector-specific form component const ConnectFormComponent = useMemo( () => getConnectFormComponent(connectorType), @@ -46,26 +61,18 @@ export const ConnectorConnectView: FC = ({ if (isSubmitting) { return; } - // Map connector types to their form IDs - const formIdMap: Record = { - TAVILY_API: "tavily-connect-form", - SEARXNG_API: "searxng-connect-form", - LINKUP_API: "linkup-api-connect-form", - BAIDU_SEARCH_API: "baidu-search-api-connect-form", - ELASTICSEARCH_CONNECTOR: "elasticsearch-connect-form", - BOOKSTACK_CONNECTOR: "bookstack-connect-form", - GITHUB_CONNECTOR: "github-connect-form", - LUMA_CONNECTOR: "luma-connect-form", - CIRCLEBACK_CONNECTOR: "circleback-connect-form", - MCP_CONNECTOR: "mcp-connect-form", - OBSIDIAN_CONNECTOR: "obsidian-connect-form", - }; - const formId = formIdMap[connectorType]; - if (formId) { - const form = document.getElementById(formId) as HTMLFormElement; - if (form) { - form.requestSubmit(); - } + const formId = FORM_ID_MAP[connectorType]; + const root = formContainerRef.current; + const mappedForm = + root && formId + ? (root.querySelector(`[id="${formId}"]`) as HTMLFormElement | null) + : null; + // Fallback to currently rendered form to avoid silent no-op + // when a connector type or form id mapping drifts. + const fallbackForm = root?.querySelector("form") as HTMLFormElement | null; + const form = mappedForm ?? fallbackForm; + if (form) { + form.requestSubmit(); } }; @@ -114,7 +121,10 @@ export const ConnectorConnectView: FC = ({
{/* Form Content - Scrollable */} -
+
= ({ Cancel