From 5ae853e15cabb135c572c06b5a6a16d49876f8ad Mon Sep 17 00:00:00 2001 From: arkml <6592213+arkml@users.noreply.github.com> Date: Fri, 29 May 2026 10:58:57 +0530 Subject: [PATCH] fix thread boundary in email reply drafts (#588) --- .../renderer/src/components/email-view.tsx | 30 +++- .../core/src/knowledge/sync_gmail.test.ts | 42 ++++++ .../packages/core/src/knowledge/sync_gmail.ts | 129 +++++++++++++++++- 3 files changed, 192 insertions(+), 9 deletions(-) create mode 100644 apps/x/packages/core/src/knowledge/sync_gmail.test.ts diff --git a/apps/x/apps/renderer/src/components/email-view.tsx b/apps/x/apps/renderer/src/components/email-view.tsx index deed545c..dea0561e 100644 --- a/apps/x/apps/renderer/src/components/email-view.tsx +++ b/apps/x/apps/renderer/src/components/email-view.tsx @@ -69,6 +69,31 @@ function snippet(text?: string): string { return (text || '').replace(/\s+/g, ' ').trim().slice(0, 180) } +function isReplyQuoteBoundary(lines: string[], index: number): boolean { + const line = lines[index]?.trim() || '' + if (/^On\b.+\bwrote:\s*$/i.test(line)) return true + if (/^-{2,}\s*(Original Message|Forwarded message)\s*-{2,}$/i.test(line)) return true + if (/^From:\s+\S/i.test(line)) { + const next = lines.slice(index + 1, index + 6).map((value) => value.trim()) + return next.some((value) => /^(Sent|Date):\s+\S/i.test(value)) + && next.some((value) => /^To:\s+\S/i.test(value)) + && next.some((value) => /^Subject:\s+\S/i.test(value)) + } + return false +} + +function stripQuotedReplyText(text: string): string { + const lines = text.replace(/\r\n/g, '\n').replace(/\r/g, '\n').split('\n') + const boundary = lines.findIndex((line, index) => { + if (isReplyQuoteBoundary(lines, index)) return true + return index > 0 + && line.trim().startsWith('>') + && (lines[index - 1]?.trim() === '' || lines[index - 1]?.trim().startsWith('>')) + }) + const visible = boundary >= 0 ? lines.slice(0, boundary) : lines + return visible.join('\n').replace(/[ \t]+\n/g, '\n').replace(/\n{3,}/g, '\n\n').trim() +} + function getInitial(from?: string): string { return (extractName(from)[0] || '?').toUpperCase() } @@ -692,7 +717,7 @@ function ComposeBox({ const initialContent = useMemo(() => { if (mode === 'forward') return buildForwardedContent(thread) // Gmail-side draft (user's own work) wins over the AI-generated draft. - const source = thread.gmail_draft || thread.draft_response + const source = stripQuotedReplyText(thread.gmail_draft || thread.draft_response || '') if (!source) return '' return source .split(/\n{2,}/) @@ -1048,8 +1073,7 @@ function ThreadDetail({ const MAX_KEPT_OPEN = 5 const PAGE_SIZE = 25 -const SECTIONS = ['important', 'other'] as const -type InboxSection = (typeof SECTIONS)[number] +type InboxSection = 'important' | 'other' interface SectionState { threads: GmailThread[] diff --git a/apps/x/packages/core/src/knowledge/sync_gmail.test.ts b/apps/x/packages/core/src/knowledge/sync_gmail.test.ts new file mode 100644 index 00000000..5da55bbc --- /dev/null +++ b/apps/x/packages/core/src/knowledge/sync_gmail.test.ts @@ -0,0 +1,42 @@ +import { describe, expect, it } from 'vitest'; +import { + sanitizeReplyBodyForGmailReply, + stripGmailQuotedReplyHtml, + stripGmailQuotedReplyText, +} from './sync_gmail.js'; + +describe('Gmail reply body sanitization', () => { + it('strips Gmail quote attribution and older quoted text from plain text replies', () => { + const body = [ + 'Sounds good, thanks. I will send it over today.', + '', + 'On Thu, 28 May 2026 at 23:45, PRAKHAR wrote:', + '> Can you share the final file?', + '> Thanks', + ].join('\n'); + + expect(stripGmailQuotedReplyText(body)).toBe('Sounds good, thanks. I will send it over today.'); + }); + + it('strips Gmail quote blocks from html replies', () => { + const html = [ + '

Sounds good, thanks.

', + '
', + '
On Thu, 28 May 2026 at 23:45, PRAKHAR wrote:
', + '
Older thread text
', + '
', + ].join(''); + + expect(stripGmailQuotedReplyHtml(html)).toBe('

Sounds good, thanks.

'); + }); + + it('regenerates html from clean text if only the text boundary is detected', () => { + const result = sanitizeReplyBodyForGmailReply( + '

Sounds good, thanks.

Older thread text

', + 'Sounds good, thanks.\n\nOn Thu, 28 May 2026 at 23:45, PRAKHAR wrote:\nOlder thread text', + ); + + expect(result.bodyText).toBe('Sounds good, thanks.'); + expect(result.bodyHtml).toBe('

Sounds good, thanks.

'); + }); +}); diff --git a/apps/x/packages/core/src/knowledge/sync_gmail.ts b/apps/x/packages/core/src/knowledge/sync_gmail.ts index 6b131a5d..5c32c7bf 100644 --- a/apps/x/packages/core/src/knowledge/sync_gmail.ts +++ b/apps/x/packages/core/src/knowledge/sync_gmail.ts @@ -35,7 +35,7 @@ const nhm = new NodeHtmlMarkdown(); // previously cached snapshots (e.g. attachment / recipient parsing fixes). The // short-circuit in buildAndCacheSnapshot only reuses a cache whose version matches, // so stale entries are transparently rebuilt on the next sync. -const SNAPSHOT_PARSER_VERSION = 2; +const SNAPSHOT_PARSER_VERSION = 3; interface SnapshotCacheEntry { historyId: string; @@ -405,6 +405,112 @@ function normalizeBody(body: string): string { return body.replace(/\r\n/g, '\n').replace(/\n{3,}/g, '\n\n').trim(); } +function isGmailQuoteAttribution(line: string): boolean { + const trimmed = line.trim(); + return /^On\b.+\bwrote:\s*$/i.test(trimmed); +} + +function isOriginalMessageBoundary(line: string): boolean { + return /^-{2,}\s*Original Message\s*-{2,}$/i.test(line.trim()); +} + +function isForwardedMessageBoundary(line: string): boolean { + return /^-{2,}\s*Forwarded message\s*-{2,}$/i.test(line.trim()); +} + +function isOutlookHeaderBoundary(lines: string[], index: number): boolean { + if (!/^From:\s+\S/i.test(lines[index]?.trim() || '')) return false; + const next = lines.slice(index + 1, index + 6).map((line) => line.trim()); + return next.some((line) => /^(Sent|Date):\s+\S/i.test(line)) + && next.some((line) => /^To:\s+\S/i.test(line)) + && next.some((line) => /^Subject:\s+\S/i.test(line)); +} + +function findQuotedReplyBoundary(lines: string[]): number { + for (let i = 0; i < lines.length; i += 1) { + const line = lines[i] || ''; + if ( + isGmailQuoteAttribution(line) + || isOriginalMessageBoundary(line) + || isForwardedMessageBoundary(line) + || isOutlookHeaderBoundary(lines, i) + ) { + return i; + } + + // Gmail plain text drafts often carry older messages as a quoted block. + // Treat a trailing blockquote as history, but avoid stripping an inline + // quote the user is actively writing at the top of the reply. + if (i > 0 && line.trim().startsWith('>') && (lines[i - 1]?.trim() === '' || lines[i - 1]?.trim().startsWith('>'))) { + return i; + } + } + return -1; +} + +export function stripGmailQuotedReplyText(text: string): string { + const normalized = text.replace(/\r\n/g, '\n').replace(/\r/g, '\n'); + const lines = normalized.split('\n'); + const boundary = findQuotedReplyBoundary(lines); + const visible = boundary >= 0 ? lines.slice(0, boundary) : lines; + return visible + .join('\n') + .replace(/[ \t]+\n/g, '\n') + .replace(/\n{3,}/g, '\n\n') + .trim(); +} + +function htmlQuoteBoundaryIndex(html: string): number { + const candidates: number[] = []; + const patterns = [ + /<[^>]+\bclass\s*=\s*["'][^"']*\bgmail_(?:quote|attr)\b[^"']*["'][^>]*>/i, + /]*(?:type\s*=\s*["']cite["']|class\s*=\s*["'][^"']*\bgmail_quote\b[^"']*["'])[^>]*>/i, + /<(p|div|li)\b[^>]*>\s*(?:<(?:span|b|strong|i|em)\b[^>]*>\s*)*On\b[\s\S]{0,800}?\bwrote:\s*(?:\s*)?(?:<\/(?:span|b|strong|i|em)>\s*)*<\/\1>/i, + /<(p|div|li)\b[^>]*>\s*-{2,}\s*(?:Original Message|Forwarded message)\s*-{2,}\s*<\/\1>/i, + ]; + + for (const pattern of patterns) { + const match = pattern.exec(html); + if (match?.index !== undefined) candidates.push(match.index); + } + + return candidates.length > 0 ? Math.min(...candidates) : -1; +} + +export function stripGmailQuotedReplyHtml(html: string): string { + const boundary = htmlQuoteBoundaryIndex(html); + const visible = boundary >= 0 ? html.slice(0, boundary) : html; + return visible.trim(); +} + +function textToHtml(text: string): string { + return text + .split(/\n{2,}/) + .map((para) => `

${escapeHtml(para).replace(/\n/g, '
')}

`) + .join(''); +} + +function escapeHtml(value: string): string { + return value + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"') + .replace(/'/g, '''); +} + +export function sanitizeReplyBodyForGmailReply(bodyHtml: string, bodyText: string): { bodyHtml: string; bodyText: string } { + const cleanText = stripGmailQuotedReplyText(bodyText); + const cleanHtml = stripGmailQuotedReplyHtml(bodyHtml); + const textWasStripped = cleanText !== bodyText.replace(/\r\n/g, '\n').replace(/\r/g, '\n').trim(); + const htmlWasStripped = cleanHtml !== bodyHtml.trim(); + + return { + bodyText: cleanText, + bodyHtml: textWasStripped && !htmlWasStripped ? textToHtml(cleanText) : cleanHtml, + }; +} + function headerValue(headers: gmail.Schema$MessagePartHeader[] | undefined, name: string): string | undefined { return headers?.find(h => h.name?.toLowerCase() === name.toLowerCase())?.value || undefined; } @@ -636,9 +742,13 @@ async function buildAndCacheSnapshot( const sentMessages = parsed.filter((m) => !m.isDraft); const draftMessages = parsed.filter((m) => m.isDraft); - const visibleMessages = sentMessages.map(({ isDraft: _isDraft, ...rest }) => rest); + const visibleMessages = sentMessages.map((msg) => { + const rest: Partial = { ...msg }; + delete rest.isDraft; + return rest as Omit; + }); const latestDraftBody = draftMessages.length > 0 - ? draftMessages[draftMessages.length - 1]!.body.trim() + ? stripGmailQuotedReplyText(draftMessages[draftMessages.length - 1]!.body) : ''; if (visibleMessages.length === 0) return null; @@ -674,7 +784,10 @@ async function buildAndCacheSnapshot( const classification = await classifyThread(snapshot, userEmail, { skipDraft }); snapshot.importance = classification.importance; if (classification.summary) snapshot.summary = classification.summary; - if (classification.draftResponse) snapshot.draft_response = classification.draftResponse; + if (classification.draftResponse) { + const draftResponse = stripGmailQuotedReplyText(classification.draftResponse); + if (draftResponse) snapshot.draft_response = draftResponse; + } } catch (err) { console.warn(`[Gmail] classify failed for ${threadId}:`, err); } @@ -1330,6 +1443,10 @@ export async function sendThreadReply(opts: SendReplyOptions): Promise