feat(google-docs): structure-preserving sync up with remote-conflict guard

This commit is contained in:
Gagancreates 2026-06-01 02:27:33 +05:30
parent 8463c8ba57
commit 7e6ee040aa
6 changed files with 409 additions and 24 deletions

View file

@ -825,7 +825,7 @@ export function setupIpcHandlers() {
return refreshGoogleDocSnapshot(args.path);
},
'google-docs:sync': async (_event, args) => {
return syncLinkedGoogleDocFromMarkdown(args.path, args.markdown);
return syncLinkedGoogleDocFromMarkdown(args.path, args.markdown, { force: args.force });
},
// Search handler
'search:query': async (_event, args) => {

View file

@ -22,6 +22,7 @@ const MARKDOWN_SNAPSHOT = [
let written: { path: string; content: string } | null = null;
let readFileContent = '';
let exportCalls: Array<{ fileId: string; mimeType: string }> = [];
let batchUpdateCalls: Array<{ documentId: string; requests: unknown[] }> = [];
const driveFile = {
id: 'doc-123',
@ -35,6 +36,7 @@ beforeEach(() => {
vi.resetModules();
written = null;
exportCalls = [];
batchUpdateCalls = [];
vi.doMock('node:fs/promises', () => ({
default: {
@ -72,14 +74,42 @@ beforeEach(() => {
},
};
const docsClient = {
documents: {
get: vi.fn(async () => ({ data: { body: { content: [{ endIndex: 12 }] } } })),
batchUpdate: vi.fn(async (params: { documentId: string; requestBody: { requests: unknown[] } }) => {
batchUpdateCalls.push({ documentId: params.documentId, requests: params.requestBody.requests });
return { data: {} };
}),
},
};
vi.doMock('googleapis', () => ({
google: {
drive: vi.fn(() => driveClient),
docs: vi.fn(() => ({ documents: { get: vi.fn(), batchUpdate: vi.fn() } })),
docs: vi.fn(() => docsClient),
},
}));
});
function linkedMarkdown(remoteModifiedTime: string, body = '# Title\n\nhello **world**'): string {
return [
'---',
'source:',
' - google-doc',
'google_doc:',
' id: "doc-123"',
' url: "https://docs.google.com/document/d/doc-123/edit"',
' title: "My Doc"',
' syncedAt: "2026-05-20T00:00:00.000Z"',
` remoteModifiedTime: ${JSON.stringify(remoteModifiedTime)}`,
'---',
'',
body,
'',
].join('\n');
}
afterEach(() => {
vi.clearAllMocks();
});
@ -140,3 +170,52 @@ describe('refreshGoogleDocSnapshot (sync down)', () => {
expect(written!.content).toContain('remoteModifiedTime: "2026-05-28T10:00:00.000Z"');
});
});
describe('syncLinkedGoogleDocFromMarkdown (sync up)', () => {
it('blocks the push when the doc changed remotely since the last sync', async () => {
// Stored baseline is older than the doc's current modifiedTime (2026-05-28).
const markdown = linkedMarkdown('2026-05-20T00:00:00.000Z');
const { syncLinkedGoogleDocFromMarkdown } = await import('./google_docs.js');
const result = await syncLinkedGoogleDocFromMarkdown('knowledge/My Doc.md', markdown);
expect(result.synced).toBe(false);
expect(result.conflict).toBe(true);
expect(batchUpdateCalls).toHaveLength(0); // remote was not touched
});
it('overwrites on force even when the remote is ahead', async () => {
const markdown = linkedMarkdown('2026-05-20T00:00:00.000Z');
const { syncLinkedGoogleDocFromMarkdown } = await import('./google_docs.js');
const result = await syncLinkedGoogleDocFromMarkdown('knowledge/My Doc.md', markdown, { force: true });
expect(result.synced).toBe(true);
expect(batchUpdateCalls).toHaveLength(1);
});
it('pushes structure-preserving requests and refreshes the stored revision', async () => {
// Baseline matches the remote, so there is no conflict.
const markdown = linkedMarkdown('2026-05-28T10:00:00.000Z');
const { syncLinkedGoogleDocFromMarkdown } = await import('./google_docs.js');
const result = await syncLinkedGoogleDocFromMarkdown('knowledge/My Doc.md', markdown);
expect(result.synced).toBe(true);
expect(batchUpdateCalls).toHaveLength(1);
const requests = batchUpdateCalls[0].requests as Array<Record<string, unknown>>;
// Old content cleared, then a heading style applied (structure, not flat text).
expect(requests.some((r) => 'deleteContentRange' in r)).toBe(true);
expect(requests.some((r) => 'updateParagraphStyle' in r)).toBe(true);
expect(requests.some((r) => 'updateTextStyle' in r)).toBe(true);
// Local note's baseline is bumped to the post-push revision.
expect(written!.content).toContain('remoteModifiedTime: "2026-05-28T10:00:00.000Z"');
});
});
describe('isRemoteAhead', () => {
it('detects a newer remote revision and tolerates missing baselines', async () => {
const { isRemoteAhead } = await import('./google_docs.js');
expect(isRemoteAhead('2026-05-28T10:00:00.000Z', '2026-05-20T00:00:00.000Z')).toBe(true);
expect(isRemoteAhead('2026-05-20T00:00:00.000Z', '2026-05-28T10:00:00.000Z')).toBe(false);
expect(isRemoteAhead('2026-05-28T10:00:00.000Z', undefined)).toBe(false);
expect(isRemoteAhead(null, '2026-05-20T00:00:00.000Z')).toBe(false);
});
});

View file

@ -1,9 +1,10 @@
import fs from 'node:fs/promises';
import path from 'node:path';
import { google, drive_v3 as drive } from 'googleapis';
import { google, drive_v3 as drive, docs_v1 } from 'googleapis';
import { WorkDir } from '../config/config.js';
import { resolveWorkspacePath } from '../workspace/workspace.js';
import { GoogleClientFactory } from './google-client-factory.js';
import { markdownToDocsRequests } from './markdown-to-docs.js';
export const GOOGLE_DOC_SCOPES = [
'https://www.googleapis.com/auth/drive.readonly',
@ -127,15 +128,21 @@ function bodyFromMarkdown(markdown: string): string {
return body;
}
function markdownSnapshotToPlainText(markdown: string): string {
return bodyFromMarkdown(markdown)
.replace(/^#{1,6}\s+/gm, '')
.replace(/^\s*[-*]\s+/gm, '- ')
.replace(/\*\*([^*]+)\*\*/g, '$1')
.replace(/\*([^*]+)\*/g, '$1')
.replace(/`([^`]+)`/g, '$1')
.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')
.trimEnd();
/**
* True when the Google Doc has been edited remotely since our last recorded
* sync i.e. a sync-up would clobber changes we never pulled. Missing
* timestamps (e.g. legacy notes with no baseline) are treated as "not ahead"
* so the push is allowed rather than blocked forever.
*/
export function isRemoteAhead(
remoteModifiedTime: string | null | undefined,
lastKnownModifiedTime: string | undefined,
): boolean {
if (!remoteModifiedTime || !lastKnownModifiedTime) return false;
const remote = Date.parse(remoteModifiedTime);
const known = Date.parse(lastKnownModifiedTime);
if (Number.isNaN(remote) || Number.isNaN(known)) return false;
return remote > known;
}
async function getDriveClient() {
@ -267,21 +274,37 @@ export async function refreshGoogleDocSnapshot(relPath: string): Promise<{ ok: t
return { ok: true, syncedAt };
}
export async function syncLinkedGoogleDocFromMarkdown(relPath: string, markdown: string): Promise<{ synced: boolean; syncedAt?: string; error?: string }> {
export async function syncLinkedGoogleDocFromMarkdown(
relPath: string,
markdown: string,
opts: { force?: boolean } = {},
): Promise<{ synced: boolean; syncedAt?: string; conflict?: boolean; error?: string }> {
try {
const normalized = relPath.replace(/\\/g, '/');
if (!normalized.startsWith('knowledge/') || !normalized.endsWith('.md')) return { synced: false };
const linked = parseLinkedGoogleDoc(markdown);
if (!linked) return { synced: false };
const text = markdownSnapshotToPlainText(markdown);
// Conflict guard: don't silently overwrite remote edits we never pulled.
if (!opts.force) {
const meta = await getDocMetadata(linked.id);
if (isRemoteAhead(meta.modifiedTime, linked.remoteModifiedTime)) {
return {
synced: false,
conflict: true,
error: 'The Google Doc changed since your last sync. Pull the latest, or overwrite it.',
};
}
}
const body = bodyFromMarkdown(markdown);
const docsClient = await getDocsClient();
const current = await docsClient.documents.get({
documentId: linked.id,
fields: 'body(content(endIndex))',
});
const endIndex = current.data.body?.content?.at(-1)?.endIndex ?? 1;
const requests = [];
const requests: docs_v1.Schema$Request[] = [];
if (endIndex > 2) {
requests.push({
deleteContentRange: {
@ -289,14 +312,8 @@ export async function syncLinkedGoogleDocFromMarkdown(relPath: string, markdown:
},
});
}
if (text.trim()) {
requests.push({
insertText: {
location: { index: 1 },
text: `${text.trimEnd()}\n`,
},
});
}
// Recreate the body with structure preserved (headings, emphasis, lists, links).
requests.push(...markdownToDocsRequests(body, 1));
if (requests.length > 0) {
await docsClient.documents.batchUpdate({
documentId: linked.id,
@ -304,9 +321,16 @@ export async function syncLinkedGoogleDocFromMarkdown(relPath: string, markdown:
});
}
// Re-read the revision so our stored baseline reflects this push and the
// next sync-up won't see a phantom conflict.
const meta = await getDocMetadata(linked.id);
const absPath = path.join(WorkDir, normalized);
const syncedAt = new Date().toISOString();
await fs.writeFile(absPath, buildStubContent({ ...linked, syncedAt }, bodyFromMarkdown(markdown)), 'utf8');
await fs.writeFile(absPath, buildStubContent({
...linked,
syncedAt,
remoteModifiedTime: meta.modifiedTime ?? linked.remoteModifiedTime,
}, body), 'utf8');
return { synced: true, syncedAt };
} catch (error) {
console.error('[GoogleDocs] Failed to sync linked Google Doc:', error);

View file

@ -0,0 +1,83 @@
import { describe, expect, it } from 'vitest';
import { markdownToDocsRequests, parseInline } from './markdown-to-docs.js';
describe('parseInline', () => {
it('extracts bold, italic and link ranges with offsets relative to the plain text', () => {
expect(parseInline('a **b** c')).toEqual({
text: 'a b c',
ranges: [{ start: 2, end: 3, bold: true }],
});
expect(parseInline('see [docs](https://x.dev) now')).toEqual({
text: 'see docs now',
ranges: [{ start: 4, end: 8, link: 'https://x.dev' }],
});
expect(parseInline('_em_')).toEqual({
text: 'em',
ranges: [{ start: 0, end: 2, italic: true }],
});
});
it('keeps inline code text without styling', () => {
expect(parseInline('run `npm test`')).toEqual({ text: 'run npm test', ranges: [] });
});
});
describe('markdownToDocsRequests', () => {
it('returns no requests for an empty body', () => {
expect(markdownToDocsRequests(' \n\n')).toEqual([]);
});
it('inserts the full text first, then layers styles at the right indices', () => {
const reqs = markdownToDocsRequests('# Hello\n\nworld **bold**');
// First request inserts all paragraph text at index 1.
expect(reqs[0]).toEqual({
insertText: { location: { index: 1 }, text: 'Hello\n\nworld bold\n' },
});
// Heading 1 applied to "Hello\n" → [1, 7).
expect(reqs).toContainEqual({
updateParagraphStyle: {
range: { startIndex: 1, endIndex: 7 },
paragraphStyle: { namedStyleType: 'HEADING_1' },
fields: 'namedStyleType',
},
});
// "bold" sits at [14, 18) in the inserted text.
expect(reqs).toContainEqual({
updateTextStyle: {
range: { startIndex: 14, endIndex: 18 },
textStyle: { bold: true },
fields: 'bold',
},
});
});
it('maps bullet and numbered lists to the right bullet presets', () => {
const bullets = markdownToDocsRequests('- one\n- two');
const bulletReqs = bullets.filter((r) => 'createParagraphBullets' in r);
expect(bulletReqs).toHaveLength(2);
expect(bulletReqs[0]).toMatchObject({
createParagraphBullets: { bulletPreset: 'BULLET_DISC_CIRCLE_SQUARE' },
});
const numbered = markdownToDocsRequests('1. first\n2. second');
const numberedReqs = numbered.filter((r) => 'createParagraphBullets' in r);
expect(numberedReqs).toHaveLength(2);
expect(numberedReqs[0]).toMatchObject({
createParagraphBullets: { bulletPreset: 'NUMBERED_DECIMAL_ALPHA_ROMAN' },
});
});
it('emits a link textStyle request', () => {
const reqs = markdownToDocsRequests('see [docs](https://x.dev)');
expect(reqs).toContainEqual({
updateTextStyle: {
range: { startIndex: 5, endIndex: 9 },
textStyle: { link: { url: 'https://x.dev' } },
fields: 'link',
},
});
});
});

View file

@ -0,0 +1,195 @@
import type { docs_v1 } from 'googleapis';
/**
* Convert a Markdown note body into Google Docs API batchUpdate requests that
* recreate the content with structure preserved headings, bold/italic,
* bullet & numbered lists, and links instead of flattening everything to
* plain text.
*
* Strategy: the doc body is cleared first (see syncLinkedGoogleDocFromMarkdown),
* then we insert all paragraph text in one shot at `insertIndex` and layer
* paragraph/text styling on top using ranges computed against the inserted
* text. Style requests do not shift indices, so a single insertText followed by
* style updates stays index-stable within one batchUpdate.
*
* Out of scope (degrade to plain paragraphs): tables, images, code fences,
* blockquotes, nested lists.
*/
type InlineRange = {
start: number;
end: number;
bold?: boolean;
italic?: boolean;
link?: string;
};
type Block = {
text: string;
ranges: InlineRange[];
paragraph: 'normal' | 'heading';
headingLevel?: number;
list?: 'bullet' | 'number';
};
const HEADING_NAMED_STYLE: Record<number, string> = {
1: 'HEADING_1',
2: 'HEADING_2',
3: 'HEADING_3',
4: 'HEADING_4',
5: 'HEADING_5',
6: 'HEADING_6',
};
/**
* Parse a single line's inline Markdown (bold, italic, code, links) into plain
* text plus the style ranges that apply to it. Offsets are relative to the
* returned text. Nested emphasis is not handled; inner markers are kept as-is.
*/
export function parseInline(raw: string): { text: string; ranges: InlineRange[] } {
let text = '';
const ranges: InlineRange[] = [];
let i = 0;
while (i < raw.length) {
const rest = raw.slice(i);
// Link: [label](url)
const link = /^\[([^\]]+)\]\(([^)\s]+)\)/.exec(rest);
if (link) {
const start = text.length;
text += link[1];
ranges.push({ start, end: text.length, link: link[2] });
i += link[0].length;
continue;
}
// Bold: **text** or __text__
const bold = /^(\*\*|__)(.+?)\1/.exec(rest);
if (bold) {
const start = text.length;
text += bold[2];
ranges.push({ start, end: text.length, bold: true });
i += bold[0].length;
continue;
}
// Italic: *text* or _text_
const italic = /^(\*|_)([^*_]+?)\1/.exec(rest);
if (italic) {
const start = text.length;
text += italic[2];
ranges.push({ start, end: text.length, italic: true });
i += italic[0].length;
continue;
}
// Inline code: `text` — kept as text, no monospace styling applied.
const code = /^`([^`]+)`/.exec(rest);
if (code) {
text += code[1];
i += code[0].length;
continue;
}
text += raw[i];
i += 1;
}
return { text, ranges };
}
function parseBlock(line: string): Block {
const heading = /^(#{1,6})\s+(.*)$/.exec(line);
if (heading) {
const { text, ranges } = parseInline(heading[2]);
return { text, ranges, paragraph: 'heading', headingLevel: heading[1].length };
}
const bullet = /^\s*[-*+]\s+(.*)$/.exec(line);
if (bullet) {
const { text, ranges } = parseInline(bullet[1]);
return { text, ranges, paragraph: 'normal', list: 'bullet' };
}
const numbered = /^\s*\d+\.\s+(.*)$/.exec(line);
if (numbered) {
const { text, ranges } = parseInline(numbered[1]);
return { text, ranges, paragraph: 'normal', list: 'number' };
}
const { text, ranges } = parseInline(line);
return { text, ranges, paragraph: 'normal' };
}
/**
* Build the batchUpdate requests for the given Markdown body. Each line becomes
* one paragraph (blank lines included, to preserve spacing).
*/
export function markdownToDocsRequests(
body: string,
insertIndex = 1,
): docs_v1.Schema$Request[] {
const trimmed = body.replace(/\s+$/, '');
if (!trimmed) return [];
const blocks = trimmed.split('\n').map(parseBlock);
// Concatenate every block's text, each terminated by a newline that ends its
// paragraph. Track where each block starts in the inserted text.
let fullText = '';
const starts: number[] = [];
for (const block of blocks) {
starts.push(insertIndex + fullText.length);
fullText += `${block.text}\n`;
}
const requests: docs_v1.Schema$Request[] = [
{ insertText: { location: { index: insertIndex }, text: fullText } },
];
blocks.forEach((block, idx) => {
const start = starts[idx];
const textEnd = start + block.text.length;
const paraEnd = textEnd + 1; // include the trailing newline
if (block.paragraph === 'heading' && block.headingLevel) {
requests.push({
updateParagraphStyle: {
range: { startIndex: start, endIndex: paraEnd },
paragraphStyle: { namedStyleType: HEADING_NAMED_STYLE[block.headingLevel] },
fields: 'namedStyleType',
},
});
}
if (block.list && block.text.length > 0) {
requests.push({
createParagraphBullets: {
range: { startIndex: start, endIndex: paraEnd },
bulletPreset: block.list === 'number'
? 'NUMBERED_DECIMAL_ALPHA_ROMAN'
: 'BULLET_DISC_CIRCLE_SQUARE',
},
});
}
for (const r of block.ranges) {
if (r.end <= r.start) continue;
const range = { startIndex: start + r.start, endIndex: start + r.end };
if (r.bold) {
requests.push({ updateTextStyle: { range, textStyle: { bold: true }, fields: 'bold' } });
}
if (r.italic) {
requests.push({ updateTextStyle: { range, textStyle: { italic: true }, fields: 'italic' } });
}
if (r.link) {
requests.push({
updateTextStyle: { range, textStyle: { link: { url: r.link } }, fields: 'link' },
});
}
}
});
return requests;
}

View file

@ -693,10 +693,14 @@ const ipcSchemas = {
req: z.object({
path: RelPath,
markdown: z.string(),
// Overwrite the Google Doc even if it changed remotely since last sync.
force: z.boolean().optional(),
}),
res: z.object({
synced: z.boolean(),
syncedAt: z.string().optional(),
// True when a remote edit was detected and the push was held back.
conflict: z.boolean().optional(),
error: z.string().optional(),
}),
},