mirror of
https://github.com/rowboatlabs/rowboat.git
synced 2026-06-09 19:45:17 +02:00
feat(google-docs): import native Docs AND uploaded .docx files from Drive
This commit is contained in:
parent
112bf461c9
commit
8ecd2254b1
2 changed files with 78 additions and 23 deletions
|
|
@ -15,17 +15,24 @@ const REGISTRY_ABS = '/ws/knowledge/.assets/google-docs/links.json';
|
|||
let vfs: Map<string, string | Buffer>;
|
||||
let exportCalls: Array<{ fileId: string; mimeType: string }>;
|
||||
let updateCalls: Array<{ fileId: string }>;
|
||||
let getMediaCalls: number;
|
||||
|
||||
const driveFile = {
|
||||
id: 'doc-123',
|
||||
name: 'My Doc',
|
||||
webViewLink: 'https://docs.google.com/document/d/doc-123/edit',
|
||||
modifiedTime: '2026-05-28T10:00:00.000Z',
|
||||
owners: [{ displayName: 'Arjun', emailAddress: 'arjun@example.com' }],
|
||||
};
|
||||
|
||||
const GDOC_MIME = 'application/vnd.google-apps.document';
|
||||
const DOCX_MIME = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document';
|
||||
|
||||
function makeDriveFile() {
|
||||
return {
|
||||
id: 'doc-123',
|
||||
name: 'My Doc',
|
||||
webViewLink: 'https://docs.google.com/document/d/doc-123/edit',
|
||||
modifiedTime: '2026-05-28T10:00:00.000Z',
|
||||
mimeType: GDOC_MIME,
|
||||
owners: [{ displayName: 'Arjun', emailAddress: 'arjun@example.com' }],
|
||||
};
|
||||
}
|
||||
let driveFile = makeDriveFile();
|
||||
const docxBytes = () => new TextEncoder().encode('DOCX_BYTES').buffer;
|
||||
const downloadedBytes = () => new TextEncoder().encode('DOWNLOADED').buffer;
|
||||
|
||||
function seedRegistry(entries: Record<string, unknown>) {
|
||||
vfs.set(REGISTRY_ABS, JSON.stringify(entries));
|
||||
|
|
@ -41,6 +48,8 @@ beforeEach(() => {
|
|||
vfs = new Map();
|
||||
exportCalls = [];
|
||||
updateCalls = [];
|
||||
getMediaCalls = 0;
|
||||
driveFile = makeDriveFile();
|
||||
|
||||
vi.doMock('node:fs/promises', () => ({
|
||||
default: {
|
||||
|
|
@ -71,7 +80,10 @@ beforeEach(() => {
|
|||
|
||||
const driveClient = {
|
||||
files: {
|
||||
get: vi.fn(async () => ({ data: driveFile })),
|
||||
get: vi.fn(async (params: { alt?: string }) => {
|
||||
if (params.alt === 'media') { getMediaCalls += 1; return { data: downloadedBytes() }; }
|
||||
return { data: driveFile };
|
||||
}),
|
||||
export: vi.fn(async (params: { fileId: string; mimeType: string }) => {
|
||||
exportCalls.push({ fileId: params.fileId, mimeType: params.mimeType });
|
||||
return { data: docxBytes() };
|
||||
|
|
@ -108,9 +120,23 @@ describe('importGoogleDoc', () => {
|
|||
expect(link).toMatchObject({
|
||||
id: 'doc-123',
|
||||
title: 'My Doc',
|
||||
mimeType: GDOC_MIME,
|
||||
remoteModifiedTime: '2026-05-28T10:00:00.000Z',
|
||||
});
|
||||
});
|
||||
|
||||
it('downloads an uploaded .docx file directly (no export, no double extension)', async () => {
|
||||
driveFile = { ...makeDriveFile(), name: 'Report.docx', mimeType: DOCX_MIME };
|
||||
const { importGoogleDoc } = await import('./google_docs.js');
|
||||
const result = await importGoogleDoc('doc-123', 'knowledge');
|
||||
|
||||
// Uploaded Word file → files.get(alt=media), not files.export.
|
||||
expect(exportCalls).toHaveLength(0);
|
||||
expect(getMediaCalls).toBe(1);
|
||||
// No "Report.docx.docx".
|
||||
expect(result.path).toBe('knowledge/Report.docx');
|
||||
expect(readRegistry()['knowledge/Report.docx'].mimeType).toBe(DOCX_MIME);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getGoogleDocLink', () => {
|
||||
|
|
|
|||
|
|
@ -17,9 +17,12 @@ export type GoogleDocListItem = {
|
|||
url: string;
|
||||
modifiedTime: string | null;
|
||||
owner: string | null;
|
||||
// Drive mimeType — distinguishes a native Google Doc (needs export) from an
|
||||
// uploaded Word file (download its bytes directly).
|
||||
mimeType: string;
|
||||
};
|
||||
|
||||
// Metadata linking a local .docx file to its source Google Doc. Stored in a
|
||||
// Metadata linking a local .docx file to its source Drive file. Stored in a
|
||||
// registry (see LINKS_REL) because a .docx is binary and can't carry the
|
||||
// frontmatter a markdown note would.
|
||||
export type GoogleDocLink = {
|
||||
|
|
@ -27,14 +30,18 @@ export type GoogleDocLink = {
|
|||
url: string;
|
||||
title: string;
|
||||
syncedAt: string;
|
||||
// Source Drive mimeType (native Google Doc vs uploaded .docx) — decides
|
||||
// whether a pull exports or downloads.
|
||||
mimeType?: string;
|
||||
// Drive `modifiedTime` (RFC3339) at the last sync — used to detect remote
|
||||
// edits before a sync-up would overwrite them.
|
||||
remoteModifiedTime?: string;
|
||||
};
|
||||
|
||||
const GOOGLE_DOC_MIME = 'application/vnd.google-apps.document';
|
||||
// The Google Doc is exported to / imported from a real Word document so the
|
||||
// in-app docx editor round-trips it with full fidelity (tables, images, styles).
|
||||
// A native Google Doc is exported to / written back as a real Word document so
|
||||
// the in-app docx editor round-trips it with full fidelity. Uploaded .docx
|
||||
// files already are Word documents and are downloaded/uploaded as-is.
|
||||
const DOCX_MIME = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document';
|
||||
|
||||
// Hidden registry mapping workspace-relative .docx paths → their Google Doc.
|
||||
|
|
@ -123,10 +130,19 @@ async function getDriveClient() {
|
|||
return google.drive({ version: 'v3', auth });
|
||||
}
|
||||
|
||||
async function exportDocx(fileId: string): Promise<Buffer> {
|
||||
// Get the file as .docx bytes: a native Google Doc is exported; an uploaded
|
||||
// Word file is downloaded as-is.
|
||||
async function fetchAsDocx(fileId: string, mimeType: string | undefined): Promise<Buffer> {
|
||||
const driveClient = await getDriveClient();
|
||||
const result = await driveClient.files.export(
|
||||
{ fileId, mimeType: DOCX_MIME },
|
||||
if (!mimeType || mimeType === GOOGLE_DOC_MIME) {
|
||||
const result = await driveClient.files.export(
|
||||
{ fileId, mimeType: DOCX_MIME },
|
||||
{ responseType: 'arraybuffer' },
|
||||
);
|
||||
return Buffer.from(result.data as ArrayBuffer);
|
||||
}
|
||||
const result = await driveClient.files.get(
|
||||
{ fileId, alt: 'media', supportsAllDrives: true },
|
||||
{ responseType: 'arraybuffer' },
|
||||
);
|
||||
return Buffer.from(result.data as ArrayBuffer);
|
||||
|
|
@ -136,7 +152,8 @@ async function getDocMetadata(fileId: string): Promise<GoogleDocListItem> {
|
|||
const driveClient = await getDriveClient();
|
||||
const result = await driveClient.files.get({
|
||||
fileId,
|
||||
fields: 'id,name,webViewLink,modifiedTime,owners(displayName,emailAddress)',
|
||||
fields: 'id,name,webViewLink,modifiedTime,mimeType,owners(displayName,emailAddress)',
|
||||
supportsAllDrives: true,
|
||||
});
|
||||
const file = result.data;
|
||||
if (!file.id || !file.name) throw new Error('Selected Google Doc is missing metadata.');
|
||||
|
|
@ -150,12 +167,14 @@ function toGoogleDocListItem(file: drive.Schema$File): GoogleDocListItem {
|
|||
url: file.webViewLink ?? `https://docs.google.com/document/d/${file.id}/edit`,
|
||||
modifiedTime: file.modifiedTime ?? null,
|
||||
owner: file.owners?.[0]?.displayName ?? file.owners?.[0]?.emailAddress ?? null,
|
||||
mimeType: file.mimeType ?? GOOGLE_DOC_MIME,
|
||||
};
|
||||
}
|
||||
|
||||
async function uniqueDocxPath(targetFolder: string, title: string): Promise<string> {
|
||||
const folder = normalizeKnowledgeDir(targetFolder);
|
||||
const base = sanitizeFilename(title);
|
||||
// Strip an existing .docx so an uploaded "Report.docx" doesn't become "Report.docx.docx".
|
||||
const base = sanitizeFilename(title.replace(/\.docx$/i, ''));
|
||||
let candidate = `${folder}/${base}.docx`;
|
||||
let index = 1;
|
||||
while (true) {
|
||||
|
|
@ -185,7 +204,9 @@ export async function listGoogleDocs(query?: string): Promise<{ files: GoogleDoc
|
|||
if (!status.hasRequiredScopes) throw new Error('Google is missing Drive access. Reconnect Google.');
|
||||
|
||||
const driveClient = await getDriveClient();
|
||||
const clauses = [`mimeType='${GOOGLE_DOC_MIME}'`, 'trashed=false'];
|
||||
// Native Google Docs (exportable) and uploaded Word files (downloadable).
|
||||
const typeClause = `(mimeType='${GOOGLE_DOC_MIME}' or mimeType='${DOCX_MIME}')`;
|
||||
const clauses = [typeClause, 'trashed=false'];
|
||||
const trimmed = query?.trim();
|
||||
if (trimmed) {
|
||||
clauses.push(`name contains '${escapeDriveQueryValue(trimmed)}'`);
|
||||
|
|
@ -195,7 +216,7 @@ export async function listGoogleDocs(query?: string): Promise<{ files: GoogleDoc
|
|||
q,
|
||||
pageSize: 25,
|
||||
orderBy: 'modifiedTime desc',
|
||||
fields: 'files(id,name,webViewLink,modifiedTime,owners(displayName,emailAddress))',
|
||||
fields: 'files(id,name,webViewLink,modifiedTime,mimeType,owners(displayName,emailAddress))',
|
||||
// Also surface docs in shared drives and "Shared with me", not just My Drive.
|
||||
corpora: 'allDrives',
|
||||
includeItemsFromAllDrives: true,
|
||||
|
|
@ -217,7 +238,7 @@ export async function importGoogleDoc(fileId: string, targetFolder: string): Pro
|
|||
if (!status.hasRequiredScopes) throw new Error('Google is missing Drive access. Reconnect Google.');
|
||||
|
||||
const doc = await getDocMetadata(fileId);
|
||||
const bytes = await exportDocx(fileId);
|
||||
const bytes = await fetchAsDocx(fileId, doc.mimeType);
|
||||
const relPath = await uniqueDocxPath(targetFolder, doc.name);
|
||||
const absPath = resolveWorkspacePath(relPath);
|
||||
await fs.mkdir(path.dirname(absPath), { recursive: true });
|
||||
|
|
@ -227,6 +248,7 @@ export async function importGoogleDoc(fileId: string, targetFolder: string): Pro
|
|||
url: doc.url,
|
||||
title: doc.name,
|
||||
syncedAt: new Date().toISOString(),
|
||||
mimeType: doc.mimeType,
|
||||
remoteModifiedTime: doc.modifiedTime ?? undefined,
|
||||
});
|
||||
return { path: relPath, doc };
|
||||
|
|
@ -237,7 +259,10 @@ export async function syncGoogleDocDown(relPath: string): Promise<{ ok: true; sy
|
|||
const link = await getGoogleDocLink(relPath);
|
||||
if (!link) throw new Error('This file is not linked to a Google Doc.');
|
||||
|
||||
const [bytes, meta] = await Promise.all([exportDocx(link.id), getDocMetadata(link.id)]);
|
||||
const [bytes, meta] = await Promise.all([
|
||||
fetchAsDocx(link.id, link.mimeType),
|
||||
getDocMetadata(link.id),
|
||||
]);
|
||||
await fs.writeFile(resolveWorkspacePath(normalizeRel(relPath)), bytes);
|
||||
const syncedAt = new Date().toISOString();
|
||||
await setLink(relPath, {
|
||||
|
|
@ -245,6 +270,7 @@ export async function syncGoogleDocDown(relPath: string): Promise<{ ok: true; sy
|
|||
url: link.url,
|
||||
title: link.title,
|
||||
syncedAt,
|
||||
mimeType: link.mimeType ?? meta.mimeType,
|
||||
remoteModifiedTime: meta.modifiedTime ?? link.remoteModifiedTime,
|
||||
});
|
||||
return { ok: true, syncedAt };
|
||||
|
|
@ -273,11 +299,13 @@ export async function syncGoogleDocUp(
|
|||
|
||||
const bytes = await fs.readFile(resolveWorkspacePath(normalizeRel(relPath)));
|
||||
const driveClient = await getDriveClient();
|
||||
// Uploading .docx media to a Google Doc converts it back into the existing
|
||||
// doc, keeping the file's id, URL and Google-Doc type intact.
|
||||
// For a native Google Doc, uploading .docx media converts it back into the
|
||||
// existing doc (id/URL/type preserved). For an uploaded .docx file, it just
|
||||
// replaces the bytes.
|
||||
await driveClient.files.update({
|
||||
fileId: link.id,
|
||||
media: { mimeType: DOCX_MIME, body: Readable.from(bytes) },
|
||||
supportsAllDrives: true,
|
||||
});
|
||||
|
||||
const meta = await getDocMetadata(link.id);
|
||||
|
|
@ -287,6 +315,7 @@ export async function syncGoogleDocUp(
|
|||
url: link.url,
|
||||
title: link.title,
|
||||
syncedAt,
|
||||
mimeType: link.mimeType ?? meta.mimeType,
|
||||
remoteModifiedTime: meta.modifiedTime ?? link.remoteModifiedTime,
|
||||
});
|
||||
return { synced: true, syncedAt };
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue