mirror of
https://github.com/rowboatlabs/rowboat.git
synced 2026-06-21 20:18:11 +02:00
feat(google-docs): import native Docs AND uploaded .docx files from Drive
This commit is contained in:
parent
112bf461c9
commit
8ecd2254b1
2 changed files with 78 additions and 23 deletions
|
|
@ -15,17 +15,24 @@ const REGISTRY_ABS = '/ws/knowledge/.assets/google-docs/links.json';
|
||||||
let vfs: Map<string, string | Buffer>;
|
let vfs: Map<string, string | Buffer>;
|
||||||
let exportCalls: Array<{ fileId: string; mimeType: string }>;
|
let exportCalls: Array<{ fileId: string; mimeType: string }>;
|
||||||
let updateCalls: Array<{ fileId: string }>;
|
let updateCalls: Array<{ fileId: string }>;
|
||||||
|
let getMediaCalls: number;
|
||||||
|
|
||||||
const driveFile = {
|
const GDOC_MIME = 'application/vnd.google-apps.document';
|
||||||
id: 'doc-123',
|
|
||||||
name: 'My Doc',
|
|
||||||
webViewLink: 'https://docs.google.com/document/d/doc-123/edit',
|
|
||||||
modifiedTime: '2026-05-28T10:00:00.000Z',
|
|
||||||
owners: [{ displayName: 'Arjun', emailAddress: 'arjun@example.com' }],
|
|
||||||
};
|
|
||||||
|
|
||||||
const DOCX_MIME = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document';
|
const DOCX_MIME = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document';
|
||||||
|
|
||||||
|
function makeDriveFile() {
|
||||||
|
return {
|
||||||
|
id: 'doc-123',
|
||||||
|
name: 'My Doc',
|
||||||
|
webViewLink: 'https://docs.google.com/document/d/doc-123/edit',
|
||||||
|
modifiedTime: '2026-05-28T10:00:00.000Z',
|
||||||
|
mimeType: GDOC_MIME,
|
||||||
|
owners: [{ displayName: 'Arjun', emailAddress: 'arjun@example.com' }],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
let driveFile = makeDriveFile();
|
||||||
const docxBytes = () => new TextEncoder().encode('DOCX_BYTES').buffer;
|
const docxBytes = () => new TextEncoder().encode('DOCX_BYTES').buffer;
|
||||||
|
const downloadedBytes = () => new TextEncoder().encode('DOWNLOADED').buffer;
|
||||||
|
|
||||||
function seedRegistry(entries: Record<string, unknown>) {
|
function seedRegistry(entries: Record<string, unknown>) {
|
||||||
vfs.set(REGISTRY_ABS, JSON.stringify(entries));
|
vfs.set(REGISTRY_ABS, JSON.stringify(entries));
|
||||||
|
|
@ -41,6 +48,8 @@ beforeEach(() => {
|
||||||
vfs = new Map();
|
vfs = new Map();
|
||||||
exportCalls = [];
|
exportCalls = [];
|
||||||
updateCalls = [];
|
updateCalls = [];
|
||||||
|
getMediaCalls = 0;
|
||||||
|
driveFile = makeDriveFile();
|
||||||
|
|
||||||
vi.doMock('node:fs/promises', () => ({
|
vi.doMock('node:fs/promises', () => ({
|
||||||
default: {
|
default: {
|
||||||
|
|
@ -71,7 +80,10 @@ beforeEach(() => {
|
||||||
|
|
||||||
const driveClient = {
|
const driveClient = {
|
||||||
files: {
|
files: {
|
||||||
get: vi.fn(async () => ({ data: driveFile })),
|
get: vi.fn(async (params: { alt?: string }) => {
|
||||||
|
if (params.alt === 'media') { getMediaCalls += 1; return { data: downloadedBytes() }; }
|
||||||
|
return { data: driveFile };
|
||||||
|
}),
|
||||||
export: vi.fn(async (params: { fileId: string; mimeType: string }) => {
|
export: vi.fn(async (params: { fileId: string; mimeType: string }) => {
|
||||||
exportCalls.push({ fileId: params.fileId, mimeType: params.mimeType });
|
exportCalls.push({ fileId: params.fileId, mimeType: params.mimeType });
|
||||||
return { data: docxBytes() };
|
return { data: docxBytes() };
|
||||||
|
|
@ -108,9 +120,23 @@ describe('importGoogleDoc', () => {
|
||||||
expect(link).toMatchObject({
|
expect(link).toMatchObject({
|
||||||
id: 'doc-123',
|
id: 'doc-123',
|
||||||
title: 'My Doc',
|
title: 'My Doc',
|
||||||
|
mimeType: GDOC_MIME,
|
||||||
remoteModifiedTime: '2026-05-28T10:00:00.000Z',
|
remoteModifiedTime: '2026-05-28T10:00:00.000Z',
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('downloads an uploaded .docx file directly (no export, no double extension)', async () => {
|
||||||
|
driveFile = { ...makeDriveFile(), name: 'Report.docx', mimeType: DOCX_MIME };
|
||||||
|
const { importGoogleDoc } = await import('./google_docs.js');
|
||||||
|
const result = await importGoogleDoc('doc-123', 'knowledge');
|
||||||
|
|
||||||
|
// Uploaded Word file → files.get(alt=media), not files.export.
|
||||||
|
expect(exportCalls).toHaveLength(0);
|
||||||
|
expect(getMediaCalls).toBe(1);
|
||||||
|
// No "Report.docx.docx".
|
||||||
|
expect(result.path).toBe('knowledge/Report.docx');
|
||||||
|
expect(readRegistry()['knowledge/Report.docx'].mimeType).toBe(DOCX_MIME);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('getGoogleDocLink', () => {
|
describe('getGoogleDocLink', () => {
|
||||||
|
|
|
||||||
|
|
@ -17,9 +17,12 @@ export type GoogleDocListItem = {
|
||||||
url: string;
|
url: string;
|
||||||
modifiedTime: string | null;
|
modifiedTime: string | null;
|
||||||
owner: string | null;
|
owner: string | null;
|
||||||
|
// Drive mimeType — distinguishes a native Google Doc (needs export) from an
|
||||||
|
// uploaded Word file (download its bytes directly).
|
||||||
|
mimeType: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Metadata linking a local .docx file to its source Google Doc. Stored in a
|
// Metadata linking a local .docx file to its source Drive file. Stored in a
|
||||||
// registry (see LINKS_REL) because a .docx is binary and can't carry the
|
// registry (see LINKS_REL) because a .docx is binary and can't carry the
|
||||||
// frontmatter a markdown note would.
|
// frontmatter a markdown note would.
|
||||||
export type GoogleDocLink = {
|
export type GoogleDocLink = {
|
||||||
|
|
@ -27,14 +30,18 @@ export type GoogleDocLink = {
|
||||||
url: string;
|
url: string;
|
||||||
title: string;
|
title: string;
|
||||||
syncedAt: string;
|
syncedAt: string;
|
||||||
|
// Source Drive mimeType (native Google Doc vs uploaded .docx) — decides
|
||||||
|
// whether a pull exports or downloads.
|
||||||
|
mimeType?: string;
|
||||||
// Drive `modifiedTime` (RFC3339) at the last sync — used to detect remote
|
// Drive `modifiedTime` (RFC3339) at the last sync — used to detect remote
|
||||||
// edits before a sync-up would overwrite them.
|
// edits before a sync-up would overwrite them.
|
||||||
remoteModifiedTime?: string;
|
remoteModifiedTime?: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
const GOOGLE_DOC_MIME = 'application/vnd.google-apps.document';
|
const GOOGLE_DOC_MIME = 'application/vnd.google-apps.document';
|
||||||
// The Google Doc is exported to / imported from a real Word document so the
|
// A native Google Doc is exported to / written back as a real Word document so
|
||||||
// in-app docx editor round-trips it with full fidelity (tables, images, styles).
|
// the in-app docx editor round-trips it with full fidelity. Uploaded .docx
|
||||||
|
// files already are Word documents and are downloaded/uploaded as-is.
|
||||||
const DOCX_MIME = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document';
|
const DOCX_MIME = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document';
|
||||||
|
|
||||||
// Hidden registry mapping workspace-relative .docx paths → their Google Doc.
|
// Hidden registry mapping workspace-relative .docx paths → their Google Doc.
|
||||||
|
|
@ -123,10 +130,19 @@ async function getDriveClient() {
|
||||||
return google.drive({ version: 'v3', auth });
|
return google.drive({ version: 'v3', auth });
|
||||||
}
|
}
|
||||||
|
|
||||||
async function exportDocx(fileId: string): Promise<Buffer> {
|
// Get the file as .docx bytes: a native Google Doc is exported; an uploaded
|
||||||
|
// Word file is downloaded as-is.
|
||||||
|
async function fetchAsDocx(fileId: string, mimeType: string | undefined): Promise<Buffer> {
|
||||||
const driveClient = await getDriveClient();
|
const driveClient = await getDriveClient();
|
||||||
const result = await driveClient.files.export(
|
if (!mimeType || mimeType === GOOGLE_DOC_MIME) {
|
||||||
{ fileId, mimeType: DOCX_MIME },
|
const result = await driveClient.files.export(
|
||||||
|
{ fileId, mimeType: DOCX_MIME },
|
||||||
|
{ responseType: 'arraybuffer' },
|
||||||
|
);
|
||||||
|
return Buffer.from(result.data as ArrayBuffer);
|
||||||
|
}
|
||||||
|
const result = await driveClient.files.get(
|
||||||
|
{ fileId, alt: 'media', supportsAllDrives: true },
|
||||||
{ responseType: 'arraybuffer' },
|
{ responseType: 'arraybuffer' },
|
||||||
);
|
);
|
||||||
return Buffer.from(result.data as ArrayBuffer);
|
return Buffer.from(result.data as ArrayBuffer);
|
||||||
|
|
@ -136,7 +152,8 @@ async function getDocMetadata(fileId: string): Promise<GoogleDocListItem> {
|
||||||
const driveClient = await getDriveClient();
|
const driveClient = await getDriveClient();
|
||||||
const result = await driveClient.files.get({
|
const result = await driveClient.files.get({
|
||||||
fileId,
|
fileId,
|
||||||
fields: 'id,name,webViewLink,modifiedTime,owners(displayName,emailAddress)',
|
fields: 'id,name,webViewLink,modifiedTime,mimeType,owners(displayName,emailAddress)',
|
||||||
|
supportsAllDrives: true,
|
||||||
});
|
});
|
||||||
const file = result.data;
|
const file = result.data;
|
||||||
if (!file.id || !file.name) throw new Error('Selected Google Doc is missing metadata.');
|
if (!file.id || !file.name) throw new Error('Selected Google Doc is missing metadata.');
|
||||||
|
|
@ -150,12 +167,14 @@ function toGoogleDocListItem(file: drive.Schema$File): GoogleDocListItem {
|
||||||
url: file.webViewLink ?? `https://docs.google.com/document/d/${file.id}/edit`,
|
url: file.webViewLink ?? `https://docs.google.com/document/d/${file.id}/edit`,
|
||||||
modifiedTime: file.modifiedTime ?? null,
|
modifiedTime: file.modifiedTime ?? null,
|
||||||
owner: file.owners?.[0]?.displayName ?? file.owners?.[0]?.emailAddress ?? null,
|
owner: file.owners?.[0]?.displayName ?? file.owners?.[0]?.emailAddress ?? null,
|
||||||
|
mimeType: file.mimeType ?? GOOGLE_DOC_MIME,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
async function uniqueDocxPath(targetFolder: string, title: string): Promise<string> {
|
async function uniqueDocxPath(targetFolder: string, title: string): Promise<string> {
|
||||||
const folder = normalizeKnowledgeDir(targetFolder);
|
const folder = normalizeKnowledgeDir(targetFolder);
|
||||||
const base = sanitizeFilename(title);
|
// Strip an existing .docx so an uploaded "Report.docx" doesn't become "Report.docx.docx".
|
||||||
|
const base = sanitizeFilename(title.replace(/\.docx$/i, ''));
|
||||||
let candidate = `${folder}/${base}.docx`;
|
let candidate = `${folder}/${base}.docx`;
|
||||||
let index = 1;
|
let index = 1;
|
||||||
while (true) {
|
while (true) {
|
||||||
|
|
@ -185,7 +204,9 @@ export async function listGoogleDocs(query?: string): Promise<{ files: GoogleDoc
|
||||||
if (!status.hasRequiredScopes) throw new Error('Google is missing Drive access. Reconnect Google.');
|
if (!status.hasRequiredScopes) throw new Error('Google is missing Drive access. Reconnect Google.');
|
||||||
|
|
||||||
const driveClient = await getDriveClient();
|
const driveClient = await getDriveClient();
|
||||||
const clauses = [`mimeType='${GOOGLE_DOC_MIME}'`, 'trashed=false'];
|
// Native Google Docs (exportable) and uploaded Word files (downloadable).
|
||||||
|
const typeClause = `(mimeType='${GOOGLE_DOC_MIME}' or mimeType='${DOCX_MIME}')`;
|
||||||
|
const clauses = [typeClause, 'trashed=false'];
|
||||||
const trimmed = query?.trim();
|
const trimmed = query?.trim();
|
||||||
if (trimmed) {
|
if (trimmed) {
|
||||||
clauses.push(`name contains '${escapeDriveQueryValue(trimmed)}'`);
|
clauses.push(`name contains '${escapeDriveQueryValue(trimmed)}'`);
|
||||||
|
|
@ -195,7 +216,7 @@ export async function listGoogleDocs(query?: string): Promise<{ files: GoogleDoc
|
||||||
q,
|
q,
|
||||||
pageSize: 25,
|
pageSize: 25,
|
||||||
orderBy: 'modifiedTime desc',
|
orderBy: 'modifiedTime desc',
|
||||||
fields: 'files(id,name,webViewLink,modifiedTime,owners(displayName,emailAddress))',
|
fields: 'files(id,name,webViewLink,modifiedTime,mimeType,owners(displayName,emailAddress))',
|
||||||
// Also surface docs in shared drives and "Shared with me", not just My Drive.
|
// Also surface docs in shared drives and "Shared with me", not just My Drive.
|
||||||
corpora: 'allDrives',
|
corpora: 'allDrives',
|
||||||
includeItemsFromAllDrives: true,
|
includeItemsFromAllDrives: true,
|
||||||
|
|
@ -217,7 +238,7 @@ export async function importGoogleDoc(fileId: string, targetFolder: string): Pro
|
||||||
if (!status.hasRequiredScopes) throw new Error('Google is missing Drive access. Reconnect Google.');
|
if (!status.hasRequiredScopes) throw new Error('Google is missing Drive access. Reconnect Google.');
|
||||||
|
|
||||||
const doc = await getDocMetadata(fileId);
|
const doc = await getDocMetadata(fileId);
|
||||||
const bytes = await exportDocx(fileId);
|
const bytes = await fetchAsDocx(fileId, doc.mimeType);
|
||||||
const relPath = await uniqueDocxPath(targetFolder, doc.name);
|
const relPath = await uniqueDocxPath(targetFolder, doc.name);
|
||||||
const absPath = resolveWorkspacePath(relPath);
|
const absPath = resolveWorkspacePath(relPath);
|
||||||
await fs.mkdir(path.dirname(absPath), { recursive: true });
|
await fs.mkdir(path.dirname(absPath), { recursive: true });
|
||||||
|
|
@ -227,6 +248,7 @@ export async function importGoogleDoc(fileId: string, targetFolder: string): Pro
|
||||||
url: doc.url,
|
url: doc.url,
|
||||||
title: doc.name,
|
title: doc.name,
|
||||||
syncedAt: new Date().toISOString(),
|
syncedAt: new Date().toISOString(),
|
||||||
|
mimeType: doc.mimeType,
|
||||||
remoteModifiedTime: doc.modifiedTime ?? undefined,
|
remoteModifiedTime: doc.modifiedTime ?? undefined,
|
||||||
});
|
});
|
||||||
return { path: relPath, doc };
|
return { path: relPath, doc };
|
||||||
|
|
@ -237,7 +259,10 @@ export async function syncGoogleDocDown(relPath: string): Promise<{ ok: true; sy
|
||||||
const link = await getGoogleDocLink(relPath);
|
const link = await getGoogleDocLink(relPath);
|
||||||
if (!link) throw new Error('This file is not linked to a Google Doc.');
|
if (!link) throw new Error('This file is not linked to a Google Doc.');
|
||||||
|
|
||||||
const [bytes, meta] = await Promise.all([exportDocx(link.id), getDocMetadata(link.id)]);
|
const [bytes, meta] = await Promise.all([
|
||||||
|
fetchAsDocx(link.id, link.mimeType),
|
||||||
|
getDocMetadata(link.id),
|
||||||
|
]);
|
||||||
await fs.writeFile(resolveWorkspacePath(normalizeRel(relPath)), bytes);
|
await fs.writeFile(resolveWorkspacePath(normalizeRel(relPath)), bytes);
|
||||||
const syncedAt = new Date().toISOString();
|
const syncedAt = new Date().toISOString();
|
||||||
await setLink(relPath, {
|
await setLink(relPath, {
|
||||||
|
|
@ -245,6 +270,7 @@ export async function syncGoogleDocDown(relPath: string): Promise<{ ok: true; sy
|
||||||
url: link.url,
|
url: link.url,
|
||||||
title: link.title,
|
title: link.title,
|
||||||
syncedAt,
|
syncedAt,
|
||||||
|
mimeType: link.mimeType ?? meta.mimeType,
|
||||||
remoteModifiedTime: meta.modifiedTime ?? link.remoteModifiedTime,
|
remoteModifiedTime: meta.modifiedTime ?? link.remoteModifiedTime,
|
||||||
});
|
});
|
||||||
return { ok: true, syncedAt };
|
return { ok: true, syncedAt };
|
||||||
|
|
@ -273,11 +299,13 @@ export async function syncGoogleDocUp(
|
||||||
|
|
||||||
const bytes = await fs.readFile(resolveWorkspacePath(normalizeRel(relPath)));
|
const bytes = await fs.readFile(resolveWorkspacePath(normalizeRel(relPath)));
|
||||||
const driveClient = await getDriveClient();
|
const driveClient = await getDriveClient();
|
||||||
// Uploading .docx media to a Google Doc converts it back into the existing
|
// For a native Google Doc, uploading .docx media converts it back into the
|
||||||
// doc, keeping the file's id, URL and Google-Doc type intact.
|
// existing doc (id/URL/type preserved). For an uploaded .docx file, it just
|
||||||
|
// replaces the bytes.
|
||||||
await driveClient.files.update({
|
await driveClient.files.update({
|
||||||
fileId: link.id,
|
fileId: link.id,
|
||||||
media: { mimeType: DOCX_MIME, body: Readable.from(bytes) },
|
media: { mimeType: DOCX_MIME, body: Readable.from(bytes) },
|
||||||
|
supportsAllDrives: true,
|
||||||
});
|
});
|
||||||
|
|
||||||
const meta = await getDocMetadata(link.id);
|
const meta = await getDocMetadata(link.id);
|
||||||
|
|
@ -287,6 +315,7 @@ export async function syncGoogleDocUp(
|
||||||
url: link.url,
|
url: link.url,
|
||||||
title: link.title,
|
title: link.title,
|
||||||
syncedAt,
|
syncedAt,
|
||||||
|
mimeType: link.mimeType ?? meta.mimeType,
|
||||||
remoteModifiedTime: meta.modifiedTime ?? link.remoteModifiedTime,
|
remoteModifiedTime: meta.modifiedTime ?? link.remoteModifiedTime,
|
||||||
});
|
});
|
||||||
return { synced: true, syncedAt };
|
return { synced: true, syncedAt };
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue