mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-24 21:38:09 +02:00
feat: parse and render kb line citations
This commit is contained in:
parent
73dd4e8e3a
commit
5f341bdd2f
4 changed files with 101 additions and 10 deletions
|
|
@ -2,9 +2,11 @@
|
||||||
|
|
||||||
import { useSetAtom } from "jotai";
|
import { useSetAtom } from "jotai";
|
||||||
import { FileText } from "lucide-react";
|
import { FileText } from "lucide-react";
|
||||||
|
import { useParams } from "next/navigation";
|
||||||
import type { FC } from "react";
|
import type { FC } from "react";
|
||||||
import { useId, useState } from "react";
|
import { useId, useState } from "react";
|
||||||
import { openCitationPanelAtom } from "@/atoms/citation/citation-panel.atom";
|
import { openCitationPanelAtom } from "@/atoms/citation/citation-panel.atom";
|
||||||
|
import { openEditorPanelAtom } from "@/atoms/editor/editor-panel.atom";
|
||||||
import { useCitationMetadata } from "@/components/assistant-ui/citation-metadata-context";
|
import { useCitationMetadata } from "@/components/assistant-ui/citation-metadata-context";
|
||||||
import { CitationPanelContent } from "@/components/citation-panel/citation-panel";
|
import { CitationPanelContent } from "@/components/citation-panel/citation-panel";
|
||||||
import { Citation } from "@/components/tool-ui/citation";
|
import { Citation } from "@/components/tool-ui/citation";
|
||||||
|
|
@ -108,6 +110,50 @@ const NumericChunkCitation: FC<{ chunkId: number }> = ({ chunkId }) => {
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
interface LineCitationProps {
|
||||||
|
documentId: number;
|
||||||
|
startLine: number;
|
||||||
|
endLine: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Inline citation for a knowledge-base document line range
|
||||||
|
* (`[citation:d<documentId>#L<start>-<end>]`). Clicking opens the document in
|
||||||
|
* the editor's read-only source view, scrolled to and highlighting the cited
|
||||||
|
* lines — the same anchor the citation panel uses for chunk citations.
|
||||||
|
*/
|
||||||
|
export const LineCitation: FC<LineCitationProps> = ({ documentId, startLine, endLine }) => {
|
||||||
|
const openEditorPanel = useSetAtom(openEditorPanelAtom);
|
||||||
|
const params = useParams();
|
||||||
|
const searchSpaceId = Number(params?.search_space_id);
|
||||||
|
|
||||||
|
const label = startLine === endLine ? `L${startLine}` : `L${startLine}-${endLine}`;
|
||||||
|
|
||||||
|
const handleClick = () => {
|
||||||
|
if (!Number.isFinite(searchSpaceId)) return;
|
||||||
|
openEditorPanel({
|
||||||
|
documentId,
|
||||||
|
searchSpaceId,
|
||||||
|
highlightLines: { start: startLine, end: endLine },
|
||||||
|
forceSourceView: true,
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Button
|
||||||
|
type="button"
|
||||||
|
variant="ghost"
|
||||||
|
onClick={handleClick}
|
||||||
|
className="ml-0.5 inline-flex h-5 min-w-5 items-center justify-center gap-0.5 rounded-md bg-popover px-1.5 text-[11px] font-medium text-popover-foreground/80 align-baseline"
|
||||||
|
title={`View cited lines ${startLine}–${endLine}`}
|
||||||
|
aria-label={`View cited document lines ${startLine} to ${endLine}`}
|
||||||
|
>
|
||||||
|
<FileText className="size-3" />
|
||||||
|
{label}
|
||||||
|
</Button>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
import { tryGetHostname } from "@/lib/url";
|
import { tryGetHostname } from "@/lib/url";
|
||||||
|
|
||||||
interface UrlCitationProps {
|
interface UrlCitationProps {
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
"use client";
|
"use client";
|
||||||
|
|
||||||
import type { ReactNode } from "react";
|
import type { ReactNode } from "react";
|
||||||
import { InlineCitation, UrlCitation } from "@/components/assistant-ui/inline-citation";
|
import { InlineCitation, LineCitation, UrlCitation } from "@/components/assistant-ui/inline-citation";
|
||||||
import {
|
import {
|
||||||
type CitationToken,
|
type CitationToken,
|
||||||
type CitationUrlMap,
|
type CitationUrlMap,
|
||||||
|
|
@ -21,6 +21,16 @@ export function renderCitationToken(token: CitationToken, ordinalKey: number): R
|
||||||
if (token.kind === "url") {
|
if (token.kind === "url") {
|
||||||
return <UrlCitation key={`citation-url-${ordinalKey}`} url={token.url} />;
|
return <UrlCitation key={`citation-url-${ordinalKey}`} url={token.url} />;
|
||||||
}
|
}
|
||||||
|
if (token.kind === "line") {
|
||||||
|
return (
|
||||||
|
<LineCitation
|
||||||
|
key={`citation-line-${token.documentId}-${token.startLine}-${ordinalKey}`}
|
||||||
|
documentId={token.documentId}
|
||||||
|
startLine={token.startLine}
|
||||||
|
endLine={token.endLine}
|
||||||
|
/>
|
||||||
|
);
|
||||||
|
}
|
||||||
return (
|
return (
|
||||||
<InlineCitation
|
<InlineCitation
|
||||||
key={`citation-${token.isDocsChunk ? "doc-" : ""}${token.chunkId}-${ordinalKey}`}
|
key={`citation-${token.isDocsChunk ? "doc-" : ""}${token.chunkId}-${ordinalKey}`}
|
||||||
|
|
|
||||||
|
|
@ -3,9 +3,10 @@
|
||||||
import { type Descendant, KEYS } from "platejs";
|
import { type Descendant, KEYS } from "platejs";
|
||||||
import { createPlatePlugin, type PlateElementProps } from "platejs/react";
|
import { createPlatePlugin, type PlateElementProps } from "platejs/react";
|
||||||
import type { FC } from "react";
|
import type { FC } from "react";
|
||||||
import { InlineCitation, UrlCitation } from "@/components/assistant-ui/inline-citation";
|
import { InlineCitation, LineCitation, UrlCitation } from "@/components/assistant-ui/inline-citation";
|
||||||
import {
|
import {
|
||||||
CITATION_REGEX,
|
CITATION_REGEX,
|
||||||
|
type CitationToken,
|
||||||
type CitationUrlMap,
|
type CitationUrlMap,
|
||||||
parseTextWithCitations,
|
parseTextWithCitations,
|
||||||
} from "@/lib/citations/citation-parser";
|
} from "@/lib/citations/citation-parser";
|
||||||
|
|
@ -17,9 +18,12 @@ import {
|
||||||
*/
|
*/
|
||||||
export type CitationElementNode = {
|
export type CitationElementNode = {
|
||||||
type: "citation";
|
type: "citation";
|
||||||
kind: "chunk" | "doc" | "url";
|
kind: "chunk" | "doc" | "url" | "line";
|
||||||
chunkId?: number;
|
chunkId?: number;
|
||||||
url?: string;
|
url?: string;
|
||||||
|
documentId?: number;
|
||||||
|
startLine?: number;
|
||||||
|
endLine?: number;
|
||||||
/** Original literal token that produced this citation node. */
|
/** Original literal token that produced this citation node. */
|
||||||
rawText: string;
|
rawText: string;
|
||||||
children: [{ text: "" }];
|
children: [{ text: "" }];
|
||||||
|
|
@ -33,11 +37,22 @@ const CitationElement: FC<PlateElementProps<CitationElementNode>> = ({
|
||||||
element,
|
element,
|
||||||
}) => {
|
}) => {
|
||||||
const isUrl = element.kind === "url";
|
const isUrl = element.kind === "url";
|
||||||
|
const isLine =
|
||||||
|
element.kind === "line" &&
|
||||||
|
element.documentId !== undefined &&
|
||||||
|
element.startLine !== undefined &&
|
||||||
|
element.endLine !== undefined;
|
||||||
return (
|
return (
|
||||||
<span {...attributes} className="inline-flex align-baseline">
|
<span {...attributes} className="inline-flex align-baseline">
|
||||||
<span contentEditable={false}>
|
<span contentEditable={false}>
|
||||||
{isUrl && element.url ? (
|
{isUrl && element.url ? (
|
||||||
<UrlCitation url={element.url} />
|
<UrlCitation url={element.url} />
|
||||||
|
) : isLine ? (
|
||||||
|
<LineCitation
|
||||||
|
documentId={element.documentId as number}
|
||||||
|
startLine={element.startLine as number}
|
||||||
|
endLine={element.endLine as number}
|
||||||
|
/>
|
||||||
) : element.chunkId !== undefined ? (
|
) : element.chunkId !== undefined ? (
|
||||||
<InlineCitation chunkId={element.chunkId} isDocsChunk={element.kind === "doc"} />
|
<InlineCitation chunkId={element.chunkId} isDocsChunk={element.kind === "doc"} />
|
||||||
) : null}
|
) : null}
|
||||||
|
|
@ -97,10 +112,7 @@ function copyMarks(textNode: SlateText): Record<string, unknown> {
|
||||||
return marks;
|
return marks;
|
||||||
}
|
}
|
||||||
|
|
||||||
function makeCitationElement(
|
function makeCitationElement(rawText: string, segment: CitationToken): CitationElementNode {
|
||||||
rawText: string,
|
|
||||||
segment: { kind: "url"; url: string } | { kind: "chunk"; chunkId: number; isDocsChunk: boolean }
|
|
||||||
): CitationElementNode {
|
|
||||||
if (segment.kind === "url") {
|
if (segment.kind === "url") {
|
||||||
return {
|
return {
|
||||||
type: CITATION_TYPE,
|
type: CITATION_TYPE,
|
||||||
|
|
@ -110,6 +122,17 @@ function makeCitationElement(
|
||||||
children: [{ text: "" }],
|
children: [{ text: "" }],
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
if (segment.kind === "line") {
|
||||||
|
return {
|
||||||
|
type: CITATION_TYPE,
|
||||||
|
kind: "line",
|
||||||
|
documentId: segment.documentId,
|
||||||
|
startLine: segment.startLine,
|
||||||
|
endLine: segment.endLine,
|
||||||
|
rawText,
|
||||||
|
children: [{ text: "" }],
|
||||||
|
};
|
||||||
|
}
|
||||||
return {
|
return {
|
||||||
type: CITATION_TYPE,
|
type: CITATION_TYPE,
|
||||||
kind: segment.isDocsChunk ? "doc" : "chunk",
|
kind: segment.isDocsChunk ? "doc" : "chunk",
|
||||||
|
|
|
||||||
|
|
@ -18,12 +18,16 @@ import { FENCED_OR_INLINE_CODE } from "@/lib/markdown/code-regions";
|
||||||
* sometimes emit.
|
* sometimes emit.
|
||||||
*/
|
*/
|
||||||
export const CITATION_REGEX =
|
export const CITATION_REGEX =
|
||||||
/[[【]\u200B?citation:\s*(https?:\/\/[^\]】\u200B]+|urlcite\d+|(?:doc-)?-?\d+(?:\s*,\s*(?:doc-)?-?\d+)*)\s*\u200B?[\]】]/g;
|
/[[【]\u200B?citation:\s*(https?:\/\/[^\]】\u200B]+|urlcite\d+|d\d+#L\d+-\d+|(?:doc-)?-?\d+(?:\s*,\s*(?:doc-)?-?\d+)*)\s*\u200B?[\]】]/g;
|
||||||
|
|
||||||
|
/** Matches the knowledge-base line-citation form `d<documentId>#L<start>-<end>`. */
|
||||||
|
const LINE_CITATION_REGEX = /^d(\d+)#L(\d+)-(\d+)$/;
|
||||||
|
|
||||||
/** A single parsed citation reference. */
|
/** A single parsed citation reference. */
|
||||||
export type CitationToken =
|
export type CitationToken =
|
||||||
| { kind: "url"; url: string }
|
| { kind: "url"; url: string }
|
||||||
| { kind: "chunk"; chunkId: number; isDocsChunk: boolean };
|
| { kind: "chunk"; chunkId: number; isDocsChunk: boolean }
|
||||||
|
| { kind: "line"; documentId: number; startLine: number; endLine: number };
|
||||||
|
|
||||||
/** Output of `parseTextWithCitations` — interleaved text + citation tokens. */
|
/** Output of `parseTextWithCitations` — interleaved text + citation tokens. */
|
||||||
export type ParsedSegment = string | CitationToken;
|
export type ParsedSegment = string | CitationToken;
|
||||||
|
|
@ -95,7 +99,15 @@ export function parseTextWithCitations(text: string, urlMap: CitationUrlMap): Pa
|
||||||
|
|
||||||
const captured = match[1];
|
const captured = match[1];
|
||||||
|
|
||||||
if (captured.startsWith("http://") || captured.startsWith("https://")) {
|
const lineMatch = LINE_CITATION_REGEX.exec(captured);
|
||||||
|
if (lineMatch) {
|
||||||
|
segments.push({
|
||||||
|
kind: "line",
|
||||||
|
documentId: Number.parseInt(lineMatch[1], 10),
|
||||||
|
startLine: Number.parseInt(lineMatch[2], 10),
|
||||||
|
endLine: Number.parseInt(lineMatch[3], 10),
|
||||||
|
});
|
||||||
|
} else if (captured.startsWith("http://") || captured.startsWith("https://")) {
|
||||||
segments.push({ kind: "url", url: captured.trim() });
|
segments.push({ kind: "url", url: captured.trim() });
|
||||||
} else if (captured.startsWith("urlcite")) {
|
} else if (captured.startsWith("urlcite")) {
|
||||||
const url = urlMap.get(captured);
|
const url = urlMap.get(captured);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue