From 8463c8ba57041da4a444368ea0adcd1cfd37decf Mon Sep 17 00:00:00 2001 From: Gagancreates Date: Mon, 1 Jun 2026 02:16:51 +0530 Subject: [PATCH] feat(google-docs): import and sync down as Markdown, record remote revision --- .../core/src/knowledge/google_docs.test.ts | 142 ++++++++++++++++++ .../core/src/knowledge/google_docs.ts | 41 +++-- 2 files changed, 170 insertions(+), 13 deletions(-) create mode 100644 apps/x/packages/core/src/knowledge/google_docs.test.ts diff --git a/apps/x/packages/core/src/knowledge/google_docs.test.ts b/apps/x/packages/core/src/knowledge/google_docs.test.ts new file mode 100644 index 00000000..c5f037fe --- /dev/null +++ b/apps/x/packages/core/src/knowledge/google_docs.test.ts @@ -0,0 +1,142 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +/** + * Phase 1 — read-path fidelity. + * + * Google Docs are pulled in as Markdown (text/markdown export), not flattened + * to text/plain, so headings / bold / lists / links survive into the local + * note. Import and sync-down also record the Drive `modifiedTime` in + * frontmatter so a later sync-up can detect remote edits. + */ + +const MARKDOWN_SNAPSHOT = [ + '# Title', + '', + 'Some **bold** and a [link](https://example.com).', + '', + '- one', + '- two', +].join('\n'); + +// In-memory capture of the most recent writeFile. +let written: { path: string; content: string } | null = null; +let readFileContent = ''; +let exportCalls: Array<{ fileId: string; mimeType: string }> = []; + +const driveFile = { + id: 'doc-123', + name: 'My Doc', + webViewLink: 'https://docs.google.com/document/d/doc-123/edit', + modifiedTime: '2026-05-28T10:00:00.000Z', + owners: [{ displayName: 'Arjun', emailAddress: 'arjun@example.com' }], +}; + +beforeEach(() => { + vi.resetModules(); + written = null; + exportCalls = []; + + vi.doMock('node:fs/promises', () => ({ + default: { + readFile: vi.fn(async () => readFileContent), + writeFile: vi.fn(async (path: string, content: string) => { written = { path, content }; }), + mkdir: vi.fn(async () => undefined), + access: vi.fn(async () => { throw new Error('ENOENT'); }), + }, + })); + + vi.doMock('../config/config.js', () => ({ WorkDir: '/ws' })); + vi.doMock('../workspace/workspace.js', () => ({ + resolveWorkspacePath: (rel: string) => `/ws/${rel}`, + })); + + vi.doMock('./google-client-factory.js', () => ({ + GoogleClientFactory: { + getClient: vi.fn(async () => ({})), + getCredentialStatus: vi.fn(async () => ({ + connected: true, + hasRequiredScopes: true, + missingScopes: [], + })), + }, + })); + + const driveClient = { + files: { + get: vi.fn(async () => ({ data: driveFile })), + export: vi.fn(async (params: { fileId: string; mimeType: string }) => { + exportCalls.push({ fileId: params.fileId, mimeType: params.mimeType }); + return { data: MARKDOWN_SNAPSHOT }; + }), + list: vi.fn(async () => ({ data: { files: [driveFile] } })), + }, + }; + + vi.doMock('googleapis', () => ({ + google: { + drive: vi.fn(() => driveClient), + docs: vi.fn(() => ({ documents: { get: vi.fn(), batchUpdate: vi.fn() } })), + }, + })); +}); + +afterEach(() => { + vi.clearAllMocks(); +}); + +describe('importGoogleDoc', () => { + it('exports as Markdown (not plain text) and keeps the formatting in the note body', async () => { + const { importGoogleDoc } = await import('./google_docs.js'); + const result = await importGoogleDoc('doc-123', 'knowledge'); + + expect(exportCalls).toEqual([{ fileId: 'doc-123', mimeType: 'text/markdown' }]); + expect(result.path).toBe('knowledge/My Doc.md'); + expect(written).not.toBeNull(); + + const content = written!.content; + // Markdown structure survives the import. + expect(content).toContain('# Title'); + expect(content).toContain('**bold**'); + expect(content).toContain('[link](https://example.com)'); + expect(content).toContain('- one'); + }); + + it('records the Drive modifiedTime in frontmatter for conflict detection', async () => { + const { importGoogleDoc } = await import('./google_docs.js'); + await importGoogleDoc('doc-123', 'knowledge'); + + expect(written!.content).toContain('remoteModifiedTime: "2026-05-28T10:00:00.000Z"'); + expect(written!.content).toContain('id: "doc-123"'); + }); +}); + +describe('refreshGoogleDocSnapshot (sync down)', () => { + it('re-exports Markdown and refreshes remoteModifiedTime while preserving the link', async () => { + readFileContent = [ + '---', + 'source:', + ' - google-doc', + 'google_doc:', + ' id: "doc-123"', + ' url: "https://docs.google.com/document/d/doc-123/edit"', + ' title: "My Doc"', + ' syncedAt: "2026-05-20T00:00:00.000Z"', + ' remoteModifiedTime: "2026-05-20T00:00:00.000Z"', + '---', + '', + 'old body', + '', + ].join('\n'); + + const { refreshGoogleDocSnapshot } = await import('./google_docs.js'); + const result = await refreshGoogleDocSnapshot('knowledge/My Doc.md'); + + expect(result.ok).toBe(true); + expect(exportCalls).toEqual([{ fileId: 'doc-123', mimeType: 'text/markdown' }]); + // Body replaced with the fresh Markdown export. + expect(written!.content).toContain('# Title'); + expect(written!.content).not.toContain('old body'); + // modifiedTime advanced to the remote value. + expect(written!.content).toContain('remoteModifiedTime: "2026-05-28T10:00:00.000Z"'); + }); +}); diff --git a/apps/x/packages/core/src/knowledge/google_docs.ts b/apps/x/packages/core/src/knowledge/google_docs.ts index 59210f49..9d29c8f1 100644 --- a/apps/x/packages/core/src/knowledge/google_docs.ts +++ b/apps/x/packages/core/src/knowledge/google_docs.ts @@ -23,10 +23,16 @@ type GoogleDocFrontmatter = { url: string; title: string; syncedAt?: string; + // Drive `modifiedTime` (RFC3339) captured at the last sync, used to detect + // remote edits before a sync-up would overwrite them. + remoteModifiedTime?: string; }; const GOOGLE_DOC_MIME = 'application/vnd.google-apps.document'; -const TEXT_MIME = 'text/plain'; +// Google Docs natively export to Markdown, which preserves headings, bold, +// lists, links and tables on the way into the local note — far better fidelity +// than the old text/plain export. +const MARKDOWN_MIME = 'text/markdown'; function yamlQuote(value: string): string { return JSON.stringify(value); @@ -56,7 +62,7 @@ function normalizeKnowledgeDir(targetFolder: string): string { function buildStubContent(doc: GoogleDocFrontmatter, snapshot: string): string { const syncedAt = doc.syncedAt ?? new Date().toISOString(); - return [ + const lines = [ '---', 'source:', ' - google-doc', @@ -65,11 +71,12 @@ function buildStubContent(doc: GoogleDocFrontmatter, snapshot: string): string { ` url: ${yamlQuote(doc.url)}`, ` title: ${yamlQuote(doc.title)}`, ` syncedAt: ${yamlQuote(syncedAt)}`, - '---', - '', - snapshot.trimEnd(), - '', - ].join('\n'); + ]; + if (doc.remoteModifiedTime) { + lines.push(` remoteModifiedTime: ${yamlQuote(doc.remoteModifiedTime)}`); + } + lines.push('---', '', snapshot.trimEnd(), ''); + return lines.join('\n'); } function parseLinkedGoogleDoc(markdown: string): GoogleDocFrontmatter | null { @@ -96,7 +103,7 @@ function parseLinkedGoogleDoc(markdown: string): GoogleDocFrontmatter | null { if (!nested) continue; const key = nested[1] as keyof GoogleDocFrontmatter; let value = nested[2].trim(); - if (!['id', 'url', 'title', 'syncedAt'].includes(key)) continue; + if (!['id', 'url', 'title', 'syncedAt', 'remoteModifiedTime'].includes(key)) continue; try { if ((value.startsWith('"') && value.endsWith('"')) || (value.startsWith("'") && value.endsWith("'"))) { value = JSON.parse(value); @@ -143,10 +150,10 @@ async function getDocsClient() { return google.docs({ version: 'v1', auth }); } -async function exportDocText(fileId: string): Promise { +async function exportDocMarkdown(fileId: string): Promise { const driveClient = await getDriveClient(); const result = await driveClient.files.export( - { fileId, mimeType: TEXT_MIME }, + { fileId, mimeType: MARKDOWN_MIME }, { responseType: 'text' }, ); return typeof result.data === 'string' ? result.data : String(result.data ?? ''); @@ -227,7 +234,7 @@ export async function importGoogleDoc(fileId: string, targetFolder: string): Pro if (!status.hasRequiredScopes) throw new Error('Google is missing Drive/Docs scopes. Reconnect Google.'); const doc = await getDocMetadata(fileId); - const snapshot = await exportDocText(fileId); + const snapshot = await exportDocMarkdown(fileId); const relPath = await uniqueKnowledgePath(targetFolder, doc.name); const absPath = resolveWorkspacePath(relPath); await fs.mkdir(path.dirname(absPath), { recursive: true }); @@ -236,6 +243,7 @@ export async function importGoogleDoc(fileId: string, targetFolder: string): Pro url: doc.url, title: doc.name, syncedAt: new Date().toISOString(), + remoteModifiedTime: doc.modifiedTime ?? undefined, }, snapshot), 'utf8'); return { path: relPath, doc }; } @@ -246,9 +254,16 @@ export async function refreshGoogleDocSnapshot(relPath: string): Promise<{ ok: t const linked = parseLinkedGoogleDoc(markdown); if (!linked) throw new Error('This note is not linked to a Google Doc.'); - const snapshot = await exportDocText(linked.id); + const [snapshot, meta] = await Promise.all([ + exportDocMarkdown(linked.id), + getDocMetadata(linked.id), + ]); const syncedAt = new Date().toISOString(); - await fs.writeFile(absPath, buildStubContent({ ...linked, syncedAt }, snapshot), 'utf8'); + await fs.writeFile(absPath, buildStubContent({ + ...linked, + syncedAt, + remoteModifiedTime: meta.modifiedTime ?? linked.remoteModifiedTime, + }, snapshot), 'utf8'); return { ok: true, syncedAt }; }