feat: fixed live connectors citations

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-02-20 16:45:50 -08:00
parent ce46708064
commit 81dfc7102f
14 changed files with 189 additions and 92 deletions

View file

@ -418,7 +418,7 @@ export const ConnectorIndicator: FC<{ hideTrigger?: boolean }> = ({ hideTrigger
</div>
</div>
{/* Bottom fade shadow */}
<div className="absolute bottom-0 left-0 right-0 h-7 bg-gradient-to-t from-muted via-muted/80 to-transparent pointer-events-none z-10" />
<div className="absolute bottom-0 left-0 right-0 h-7 bg-linear-to-t from-muted via-muted/80 to-transparent pointer-events-none z-10" />
</div>
</Tabs>
)}

View file

@ -57,6 +57,7 @@ export const BaiduSearchApiConnectForm: FC<ConnectFormProps> = ({ onSubmit, isSu
BAIDU_API_KEY: values.api_key,
},
is_indexable: false,
is_active: true,
last_indexed_at: null,
periodic_indexing_enabled: false,
indexing_frequency_minutes: null,

View file

@ -51,6 +51,7 @@ export const CirclebackConnectForm: FC<ConnectFormProps> = ({ onSubmit, isSubmit
connector_type: EnumConnectorName.CIRCLEBACK_CONNECTOR,
config: {},
is_indexable: false,
is_active: true,
last_indexed_at: null,
periodic_indexing_enabled: false,
indexing_frequency_minutes: null,

View file

@ -155,6 +155,7 @@ export const ElasticsearchConnectForm: FC<ConnectFormProps> = ({ onSubmit, isSub
connector_type: EnumConnectorName.ELASTICSEARCH_CONNECTOR,
config,
is_indexable: true,
is_active: true,
last_indexed_at: null,
periodic_indexing_enabled: periodicEnabled,
indexing_frequency_minutes: periodicEnabled ? parseInt(frequencyMinutes, 10) : null,

View file

@ -57,6 +57,7 @@ export const LinkupApiConnectForm: FC<ConnectFormProps> = ({ onSubmit, isSubmitt
LINKUP_API_KEY: values.api_key,
},
is_indexable: false,
is_active: true,
last_indexed_at: null,
periodic_indexing_enabled: false,
indexing_frequency_minutes: null,

View file

@ -71,6 +71,7 @@ export const LumaConnectForm: FC<ConnectFormProps> = ({ onSubmit, isSubmitting }
LUMA_API_KEY: values.api_key,
},
is_indexable: true,
is_active: true,
last_indexed_at: null,
periodic_indexing_enabled: periodicEnabled,
indexing_frequency_minutes: periodicEnabled ? parseInt(frequencyMinutes, 10) : null,

View file

@ -110,6 +110,7 @@ export const SearxngConnectForm: FC<ConnectFormProps> = ({ onSubmit, isSubmittin
connector_type: EnumConnectorName.SEARXNG_API,
config,
is_indexable: false,
is_active: true,
last_indexed_at: null,
periodic_indexing_enabled: false,
indexing_frequency_minutes: null,

View file

@ -57,6 +57,7 @@ export const TavilyApiConnectForm: FC<ConnectFormProps> = ({ onSubmit, isSubmitt
TAVILY_API_KEY: values.api_key,
},
is_indexable: false,
is_active: true,
last_indexed_at: null,
periodic_indexing_enabled: false,
indexing_frequency_minutes: null,

View file

@ -1,7 +1,7 @@
"use client";
import { ArrowLeft } from "lucide-react";
import { type FC, useMemo } from "react";
import { type FC, useMemo, useRef } from "react";
import { Button } from "@/components/ui/button";
import { Spinner } from "@/components/ui/spinner";
import type { EnumConnectorName } from "@/contracts/enums/connector";
@ -9,6 +9,20 @@ import { getConnectorIcon } from "@/contracts/enums/connectorIcons";
import { getConnectorTypeDisplay } from "@/lib/connectors/utils";
import { getConnectFormComponent } from "../../connect-forms";
const FORM_ID_MAP: Record<string, string> = {
TAVILY_API: "tavily-connect-form",
SEARXNG_API: "searxng-connect-form",
LINKUP_API: "linkup-api-connect-form",
BAIDU_SEARCH_API: "baidu-search-api-connect-form",
ELASTICSEARCH_CONNECTOR: "elasticsearch-connect-form",
BOOKSTACK_CONNECTOR: "bookstack-connect-form",
GITHUB_CONNECTOR: "github-connect-form",
LUMA_CONNECTOR: "luma-connect-form",
CIRCLEBACK_CONNECTOR: "circleback-connect-form",
MCP_CONNECTOR: "mcp-connect-form",
OBSIDIAN_CONNECTOR: "obsidian-connect-form",
};
interface ConnectorConnectViewProps {
connectorType: string;
onSubmit: (data: {
@ -35,6 +49,7 @@ export const ConnectorConnectView: FC<ConnectorConnectViewProps> = ({
onBack,
isSubmitting,
}) => {
const formContainerRef = useRef<HTMLDivElement | null>(null);
// Get connector-specific form component
const ConnectFormComponent = useMemo(
() => getConnectFormComponent(connectorType),
@ -46,26 +61,18 @@ export const ConnectorConnectView: FC<ConnectorConnectViewProps> = ({
if (isSubmitting) {
return;
}
// Map connector types to their form IDs
const formIdMap: Record<string, string> = {
TAVILY_API: "tavily-connect-form",
SEARXNG_API: "searxng-connect-form",
LINKUP_API: "linkup-api-connect-form",
BAIDU_SEARCH_API: "baidu-search-api-connect-form",
ELASTICSEARCH_CONNECTOR: "elasticsearch-connect-form",
BOOKSTACK_CONNECTOR: "bookstack-connect-form",
GITHUB_CONNECTOR: "github-connect-form",
LUMA_CONNECTOR: "luma-connect-form",
CIRCLEBACK_CONNECTOR: "circleback-connect-form",
MCP_CONNECTOR: "mcp-connect-form",
OBSIDIAN_CONNECTOR: "obsidian-connect-form",
};
const formId = formIdMap[connectorType];
if (formId) {
const form = document.getElementById(formId) as HTMLFormElement;
if (form) {
form.requestSubmit();
}
const formId = FORM_ID_MAP[connectorType];
const root = formContainerRef.current;
const mappedForm =
root && formId
? (root.querySelector(`[id="${formId}"]`) as HTMLFormElement | null)
: null;
// Fallback to currently rendered form to avoid silent no-op
// when a connector type or form id mapping drifts.
const fallbackForm = root?.querySelector("form") as HTMLFormElement | null;
const form = mappedForm ?? fallbackForm;
if (form) {
form.requestSubmit();
}
};
@ -114,7 +121,10 @@ export const ConnectorConnectView: FC<ConnectorConnectViewProps> = ({
</div>
{/* Form Content - Scrollable */}
<div className="flex-1 min-h-0 overflow-y-auto px-6 sm:px-12">
<div
ref={formContainerRef}
className="connector-connect-form-root flex-1 min-h-0 overflow-y-auto px-6 sm:px-12"
>
<ConnectFormComponent
onSubmit={onSubmit}
onBack={onBack}
@ -134,6 +144,7 @@ export const ConnectorConnectView: FC<ConnectorConnectViewProps> = ({
Cancel
</Button>
<Button
type="button"
onClick={handleFormSubmit}
disabled={isSubmitting}
className="text-xs sm:text-sm min-w-[140px] disabled:opacity-50 disabled:cursor-not-allowed disabled:pointer-events-none"

View file

@ -558,7 +558,9 @@ export const useConnectorDialog = () => {
},
onIndexingStart?: (connectorId: number) => void
) => {
if (!searchSpaceId || !connectingConnectorType) return;
if (!searchSpaceId || !connectingConnectorType) {
return;
}
// Prevent multiple submissions using ref for immediate check
if (isCreatingConnectorRef.current) return;
@ -582,7 +584,6 @@ export const useConnectorDialog = () => {
search_space_id: searchSpaceId,
},
});
// Refetch connectors to get the new one
const result = await refetchAllConnectors();
if (result.data) {

View file

@ -2,22 +2,20 @@
import type { FC } from "react";
import { useState } from "react";
import { ExternalLink } from "lucide-react";
import { SourceDetailPanel } from "@/components/new-chat/source-detail-panel";
interface InlineCitationProps {
chunkId: number;
citationNumber: number;
isDocsChunk?: boolean;
}
/**
* Inline citation component for the new chat.
* Renders a clickable numbered badge that opens the SourceDetailPanel with document chunk details.
* Supports both regular knowledge base chunks and Surfsense documentation chunks.
* Inline citation for knowledge-base chunks (numeric chunk IDs).
* Renders a clickable badge showing the actual chunk ID that opens the SourceDetailPanel.
*/
export const InlineCitation: FC<InlineCitationProps> = ({
chunkId,
citationNumber,
isDocsChunk = false,
}) => {
const [isOpen, setIsOpen] = useState(false);
@ -37,12 +35,46 @@ export const InlineCitation: FC<InlineCitationProps> = ({
onClick={() => setIsOpen(true)}
onKeyDown={(e) => e.key === "Enter" && setIsOpen(true)}
className="text-[10px] font-bold bg-primary/80 hover:bg-primary text-primary-foreground rounded-full min-w-4 h-4 px-1 inline-flex items-center justify-center align-super cursor-pointer transition-colors ml-0.5"
title={`View source #${citationNumber}`}
title={`View source chunk #${chunkId}`}
role="button"
tabIndex={0}
>
{citationNumber}
{chunkId}
</span>
</SourceDetailPanel>
);
};
function extractDomain(url: string): string {
try {
const hostname = new URL(url).hostname;
return hostname.replace(/^www\./, "");
} catch {
return url;
}
}
interface UrlCitationProps {
url: string;
}
/**
* Inline citation for live web search results (URL-based chunk IDs).
* Renders a clickable badge showing the source domain that opens the URL in a new tab.
*/
export const UrlCitation: FC<UrlCitationProps> = ({ url }) => {
const domain = extractDomain(url);
return (
<a
href={url}
target="_blank"
rel="noopener noreferrer"
className="text-[10px] font-bold bg-primary/80 hover:bg-primary text-primary-foreground rounded-full h-4 px-1.5 inline-flex items-center gap-0.5 align-super cursor-pointer transition-colors ml-0.5 no-underline"
title={url}
>
<ExternalLink className="size-2.5 shrink-0" />
{domain}
</a>
);
};

View file

@ -14,17 +14,36 @@ import rehypeKatex from "rehype-katex";
import remarkGfm from "remark-gfm";
import remarkMath from "remark-math";
import "katex/dist/katex.min.css";
import { InlineCitation } from "@/components/assistant-ui/inline-citation";
import { InlineCitation, UrlCitation } from "@/components/assistant-ui/inline-citation";
import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button";
import { cn } from "@/lib/utils";
// Storage for URL citations replaced during preprocess to avoid GFM autolink interference.
// Populated in preprocessMarkdown, consumed in parseTextWithCitations.
let _pendingUrlCitations = new Map<string, string>();
let _urlCiteIdx = 0;
/**
* Convert all LaTeX delimiter styles to the dollar-sign syntax
* that remark-math understands. LLMs use various delimiters
* (\(...\), \[...\], \begin{equation}, etc.) and we need to
* normalise them all to $ / $$ before the markdown parser runs.
* Preprocess raw markdown before it reaches the remark/rehype pipeline.
* - Replaces URL-based citations with safe placeholders (prevents GFM autolinks)
* - Normalises LaTeX delimiters to dollar-sign syntax for remark-math
*/
function convertLatexDelimiters(content: string): string {
function preprocessMarkdown(content: string): string {
// Replace URL-based citations with safe placeholders BEFORE markdown parsing.
// GFM autolinks would otherwise convert the https://... inside [citation:URL]
// into an <a> element, splitting the text and preventing our citation regex
// from matching the full pattern.
_pendingUrlCitations = new Map();
_urlCiteIdx = 0;
content = content.replace(
/[[【]\u200B?citation:\s*(https?:\/\/[^\]\】\u200B]+)\s*\u200B?[\]】]/g,
(_, url) => {
const key = `urlcite${_urlCiteIdx++}`;
_pendingUrlCitations.set(key, url.trim());
return `[citation:${key}]`;
}
);
// 1. Block math: \[...\] → $$...$$
content = content.replace(/\\\[([\s\S]*?)\\\]/g, (_, inner) => `$$${inner}$$`);
// 2. Inline math: \(...\) → $...$
@ -50,40 +69,19 @@ function convertLatexDelimiters(content: string): string {
return content;
}
// Citation pattern: [citation:CHUNK_ID] or [citation:doc-CHUNK_ID]
// Also matches Chinese brackets 【】 and handles zero-width spaces that LLM sometimes inserts
const CITATION_REGEX = /[[【]\u200B?citation:(doc-)?(\d+)\u200B?[\]】]/g;
// Track chunk IDs to citation numbers mapping for consistent numbering
// This map is reset when a new message starts rendering
// Uses string keys to differentiate between doc and regular chunks (e.g., "doc-123" vs "123")
let chunkIdToCitationNumber: Map<string, number> = new Map();
let nextCitationNumber = 1;
// Matches [citation:...] with numeric IDs (incl. doc- prefix, comma-separated),
// URL-based IDs from live web search, or urlciteN placeholders from preprocess.
// Also matches Chinese brackets 【】 and handles zero-width spaces that LLM sometimes inserts.
const CITATION_REGEX =
/[[【]\u200B?citation:\s*(https?:\/\/[^\]\】\u200B]+|urlcite\d+|(?:doc-)?\d+(?:\s*,\s*(?:doc-)?\d+)*)\s*\u200B?[\]】]/g;
/**
* Resets the citation counter - should be called at the start of each message
*/
export function resetCitationCounter() {
chunkIdToCitationNumber = new Map();
nextCitationNumber = 1;
}
/**
* Gets or assigns a citation number for a chunk ID
* Uses string key to differentiate between doc and regular chunks
*/
function getCitationNumber(chunkId: number, isDocsChunk: boolean): number {
const key = isDocsChunk ? `doc-${chunkId}` : String(chunkId);
const existingNumber = chunkIdToCitationNumber.get(key);
if (existingNumber === undefined) {
chunkIdToCitationNumber.set(key, nextCitationNumber++);
}
return chunkIdToCitationNumber.get(key)!;
}
/**
* Parses text and replaces [citation:XXX] patterns with InlineCitation components
* Supports both regular chunks [citation:123] and docs chunks [citation:doc-123]
* Parses text and replaces [citation:XXX] patterns with citation components.
* Supports:
* - Numeric chunk IDs: [citation:123]
* - Doc-prefixed IDs: [citation:doc-123]
* - Comma-separated IDs: [citation:4149, 4150, 4151]
* - URL-based citations from live search: [citation:https://example.com/page]
*/
function parseTextWithCitations(text: string): ReactNode[] {
const parts: ReactNode[] = [];
@ -91,35 +89,49 @@ function parseTextWithCitations(text: string): ReactNode[] {
let match: RegExpExecArray | null;
let instanceIndex = 0;
// Reset regex state
CITATION_REGEX.lastIndex = 0;
match = CITATION_REGEX.exec(text);
while (match !== null) {
// Add text before the citation
if (match.index > lastIndex) {
parts.push(text.substring(lastIndex, match.index));
}
// Check if this is a docs chunk (has "doc-" prefix)
const isDocsChunk = match[1] === "doc-";
const chunkId = Number.parseInt(match[2], 10);
const citationNumber = getCitationNumber(chunkId, isDocsChunk);
parts.push(
<InlineCitation
key={`citation-${isDocsChunk ? "doc-" : ""}${chunkId}-${instanceIndex}`}
chunkId={chunkId}
citationNumber={citationNumber}
isDocsChunk={isDocsChunk}
/>
);
const captured = match[1];
if (captured.startsWith("http://") || captured.startsWith("https://")) {
parts.push(
<UrlCitation key={`citation-url-${instanceIndex}`} url={captured.trim()} />
);
instanceIndex++;
} else if (captured.startsWith("urlcite")) {
const url = _pendingUrlCitations.get(captured);
if (url) {
parts.push(
<UrlCitation key={`citation-url-${instanceIndex}`} url={url} />
);
}
instanceIndex++;
} else {
const rawIds = captured.split(",").map((s) => s.trim());
for (const rawId of rawIds) {
const isDocsChunk = rawId.startsWith("doc-");
const chunkId = Number.parseInt(isDocsChunk ? rawId.slice(4) : rawId, 10);
parts.push(
<InlineCitation
key={`citation-${isDocsChunk ? "doc-" : ""}${chunkId}-${instanceIndex}`}
chunkId={chunkId}
isDocsChunk={isDocsChunk}
/>
);
instanceIndex++;
}
}
lastIndex = match.index + match[0].length;
instanceIndex++;
match = CITATION_REGEX.exec(text);
}
// Add any remaining text after the last citation
if (lastIndex < text.length) {
parts.push(text.substring(lastIndex));
}
@ -134,7 +146,7 @@ const MarkdownTextImpl = () => {
rehypePlugins={[rehypeKatex]}
className="aui-md"
components={defaultComponents}
preprocess={convertLatexDelimiters}
preprocess={preprocessMarkdown}
/>
);
};