diff --git a/surfsense_web/components/assistant-ui/assistant-message.tsx b/surfsense_web/components/assistant-ui/assistant-message.tsx index 00f3acebf..1748f47a9 100644 --- a/surfsense_web/components/assistant-ui/assistant-message.tsx +++ b/surfsense_web/components/assistant-ui/assistant-message.tsx @@ -24,6 +24,8 @@ import dynamic from "next/dynamic"; import type { FC } from "react"; import { useEffect, useMemo, useRef, useState } from "react"; import { commentsEnabledAtom, targetCommentIdAtom } from "@/atoms/chat/current-thread.atom"; +import { tryGetHostname } from "@/lib/url"; + import { globalNewLLMConfigsAtom, newLLMConfigsAtom, @@ -99,20 +101,12 @@ const GenerateImageToolUI = dynamic( import("@/components/tool-ui/generate-image").then((m) => ({ default: m.GenerateImageToolUI })), { ssr: false } ); -function extractDomain(url: string): string | undefined { - try { - return new URL(url).hostname.replace(/^www\./, ""); - } catch { - return undefined; - } -} - function useCitationsFromMetadata(): SerializableCitation[] { const allCitations = useAllCitationMetadata(); return useMemo(() => { const result: SerializableCitation[] = []; for (const [url, meta] of allCitations) { - const domain = extractDomain(url); + const domain = tryGetHostname(url); result.push({ id: `url-cite-${url}`, href: url, diff --git a/surfsense_web/components/assistant-ui/inline-citation.tsx b/surfsense_web/components/assistant-ui/inline-citation.tsx index 32a29cfc9..f0d9013bf 100644 --- a/surfsense_web/components/assistant-ui/inline-citation.tsx +++ b/surfsense_web/components/assistant-ui/inline-citation.tsx @@ -193,14 +193,7 @@ const SurfsenseDocCitation: FC<{ chunkId: number }> = ({ chunkId }) => { ); }; -function extractDomain(url: string): string { - try { - const hostname = new URL(url).hostname; - return hostname.replace(/^www\./, ""); - } catch { - return url; - } -} +import { tryGetHostname } from "@/lib/url"; interface UrlCitationProps { url: string; @@ -212,7 +205,7 @@ interface UrlCitationProps { * page title and snippet (extracted deterministically from web_search tool results). */ export const UrlCitation: FC = ({ url }) => { - const domain = extractDomain(url); + const domain = tryGetHostname(url) ?? url; const meta = useCitationMetadata(url); return ( diff --git a/surfsense_web/components/assistant-ui/markdown-text.tsx b/surfsense_web/components/assistant-ui/markdown-text.tsx index 1bb7cf2d9..43d435d20 100644 --- a/surfsense_web/components/assistant-ui/markdown-text.tsx +++ b/surfsense_web/components/assistant-ui/markdown-text.tsx @@ -23,6 +23,8 @@ import "katex/dist/katex.min.css"; import { toast } from "sonner"; import { processChildrenWithCitations } from "@/components/citations/citation-renderer"; import { Skeleton } from "@/components/ui/skeleton"; +import { tryGetHostname } from "@/lib/url"; + import { Table, TableBody, @@ -139,15 +141,6 @@ const MarkdownTextImpl = () => { export const MarkdownText = memo(MarkdownTextImpl); -function extractDomain(url: string): string { - try { - const parsed = new URL(url); - return parsed.hostname.replace(/^www\./, ""); - } catch { - return ""; - } -} - // Canonical local-file virtual paths are mount-prefixed: // const LOCAL_FILE_PATH_REGEX = /^\/[a-z0-9_-]+\/[^\s`]+(?:\/[^\s`]+)*$/; @@ -288,7 +281,7 @@ function FilePathLink({ path, className }: { path: string; className?: string }) function MarkdownImage({ src, alt }: { src?: string; alt?: string }) { if (!src) return null; - const domain = extractDomain(src); + const domain = tryGetHostname(src) ?? ""; return (
diff --git a/surfsense_web/components/tool-ui/citation/citation.tsx b/surfsense_web/components/tool-ui/citation/citation.tsx index 05ce214f3..a7c82de8c 100644 --- a/surfsense_web/components/tool-ui/citation/citation.tsx +++ b/surfsense_web/components/tool-ui/citation/citation.tsx @@ -6,19 +6,11 @@ import * as React from "react"; import { openSafeNavigationHref, sanitizeHref } from "../shared/media"; import { cn, Popover, PopoverContent, PopoverTrigger } from "./_adapter"; import type { CitationVariant, SerializableCitation } from "./schema"; +import { tryGetHostname } from "@/lib/url"; import { TYPE_ICONS } from "./type-icons"; const FALLBACK_LOCALE = "en-US"; -function extractDomain(url: string): string | undefined { - try { - const urlObj = new URL(url); - return urlObj.hostname.replace(/^www\./, ""); - } catch { - return undefined; - } -} - function formatDate(isoString: string, locale: string): string { try { const date = new Date(isoString); @@ -78,7 +70,7 @@ export function Citation(props: CitationProps) { const locale = providedLocale ?? FALLBACK_LOCALE; const sanitizedHref = sanitizeHref(rawHref); - const domain = providedDomain ?? extractDomain(rawHref); + const domain = providedDomain ?? tryGetHostname(rawHref); const citationData: SerializableCitation = { ...serializable, diff --git a/surfsense_web/lib/url.ts b/surfsense_web/lib/url.ts new file mode 100644 index 000000000..0c9227581 --- /dev/null +++ b/surfsense_web/lib/url.ts @@ -0,0 +1,14 @@ +/** + * Extract a normalized hostname from a URL. Strips a leading `www.`. + * Returns `undefined` if the input is not a parseable URL. + * + * This is the canonical replacement for the four previously-duplicated + * `extractDomain` helpers that had subtly different error fallbacks. + */ +export function tryGetHostname(url: string): string | undefined { + try { + return new URL(url).hostname.replace(/^www\./, ""); + } catch { + return undefined; + } +}