fix(gdrive): validate folder access, run config test, harden Drive API (#321)

* fix(gdrive): validate folder access, run config test, harden Drive API

Connection test and setup validation now verify folder_id resolves to an accessible Drive folder before counting Docs, via a shared verifyGdriveFolderAndCountDocs helper, so a wrong or unshared folder fails instead of passing with 0 docs.

Move gdrive-config.test.ts under test/ so Vitest's test/** glob actually runs it; escape folder_id in the Drive query; add retry/backoff on transient Google API responses; and record skipped non-Google-Doc files in the staged manifest.

* chore: sync uv.lock to ktx-daemon/ktx-sl 0.13.1
This commit is contained in:
Andrey Avtomonov 2026-06-28 01:02:37 +02:00 committed by GitHub
parent 5645dc4d28
commit ca231df5fe
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 346 additions and 65 deletions

View file

@ -22,7 +22,8 @@ const listFiles = vi.fn(async () => ({
nextPageToken: null,
}));
vi.mock('../../../../../src/context/ingest/adapters/gdrive/gdrive-client.js', () => ({
vi.mock('../../../../../src/context/ingest/adapters/gdrive/gdrive-client.js', async (importOriginal) => ({
...(await importOriginal<typeof import('../../../../../src/context/ingest/adapters/gdrive/gdrive-client.js')>()),
createGoogleDocsClients: vi.fn(() => ({
drive: { listFiles },
docs: { getDocument },
@ -81,4 +82,42 @@ describe('fetchGdriveSnapshot', () => {
readFile(join(stagedDir, 'docs', 'herness-and-enterprise-a-7913523027', 'page.md'), 'utf-8'),
).resolves.toContain('# Herness and Enterprise Agent Operating Framework for Connected Systems');
});
it('records skipped non-Google-Doc files in the manifest with a summary warning', async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'ktx-gdrive-fetch-'));
listFiles.mockResolvedValueOnce({
files: [
{
id: 'doc-1',
name: 'Doc',
mimeType: 'application/vnd.google-apps.document',
parents: ['folder-123'],
webViewLink: 'https://docs.google.com/document/d/doc-1',
modifiedTime: '2026-05-24T01:53:28.347Z',
},
{
id: 'sheet-1',
name: 'Sheet',
mimeType: 'application/vnd.google-apps.spreadsheet',
parents: ['folder-123'],
webViewLink: 'https://docs.google.com/spreadsheets/d/sheet-1',
modifiedTime: '2026-05-24T01:53:28.347Z',
},
],
nextPageToken: null,
});
const manifest = await fetchGdriveSnapshot({
key: { client_email: 'bot@example.com', private_key: 'secret' }, // pragma: allowlist secret
config: { serviceAccountKey: 'unused', folderId: 'folder-123', recursive: false }, // pragma: allowlist secret
stagedDir,
});
expect(manifest.fileCount).toBe(1);
expect(manifest.skipped).toEqual([
{ externalId: 'sheet-1', reason: 'unsupported mime type: application/vnd.google-apps.spreadsheet' },
]);
expect(manifest.warnings).toHaveLength(1);
expect(manifest.warnings[0]).toContain('Skipped 1');
});
});

View file

@ -0,0 +1,100 @@
import { describe, expect, it, vi } from 'vitest';
import {
driveFolderChildrenQuery,
fetchWithGoogleRetry,
verifyGdriveFolderAndCountDocs,
type GoogleDriveClient,
} from '../../../../../src/context/ingest/adapters/gdrive/gdrive-client.js';
import {
GDRIVE_DOC_MIME_TYPE,
GDRIVE_FOLDER_MIME_TYPE,
type GdriveFileRecord,
} from '../../../../../src/context/ingest/adapters/gdrive/types.js';
function fileRecord(partial: Partial<GdriveFileRecord> & { id: string; mimeType: string }): GdriveFileRecord {
return {
name: partial.name ?? partial.id,
parents: [],
webViewLink: null,
modifiedTime: null,
...partial,
};
}
describe('driveFolderChildrenQuery', () => {
it('escapes single quotes and backslashes in the folder id', () => {
expect(driveFolderChildrenQuery('abc')).toBe("'abc' in parents and trashed = false");
expect(driveFolderChildrenQuery("a'b")).toBe("'a\\'b' in parents and trashed = false");
expect(driveFolderChildrenQuery('a\\b')).toBe("'a\\\\b' in parents and trashed = false");
});
});
describe('verifyGdriveFolderAndCountDocs', () => {
it('throws a caller-facing error when the folder is not accessible', async () => {
const drive: GoogleDriveClient = {
getFile: vi.fn(async () => null),
listFiles: vi.fn(),
};
await expect(verifyGdriveFolderAndCountDocs(drive, 'missing')).rejects.toThrow('is not accessible');
expect(drive.listFiles).not.toHaveBeenCalled();
});
it('throws when the id resolves to a non-folder', async () => {
const drive: GoogleDriveClient = {
getFile: vi.fn(async () => fileRecord({ id: 'doc-1', mimeType: GDRIVE_DOC_MIME_TYPE })),
listFiles: vi.fn(),
};
await expect(verifyGdriveFolderAndCountDocs(drive, 'doc-1')).rejects.toThrow('is not a folder');
expect(drive.listFiles).not.toHaveBeenCalled();
});
it('counts Google Docs across pages and ignores non-Docs', async () => {
const listFiles = vi
.fn()
.mockResolvedValueOnce({
files: [
fileRecord({ id: '1', mimeType: GDRIVE_DOC_MIME_TYPE }),
fileRecord({ id: '2', mimeType: 'application/vnd.google-apps.spreadsheet' }),
],
nextPageToken: 'page-2',
})
.mockResolvedValueOnce({
files: [fileRecord({ id: '3', mimeType: GDRIVE_DOC_MIME_TYPE })],
nextPageToken: null,
});
const drive: GoogleDriveClient = {
getFile: vi.fn(async () => fileRecord({ id: 'folder', mimeType: GDRIVE_FOLDER_MIME_TYPE })),
listFiles,
};
await expect(verifyGdriveFolderAndCountDocs(drive, 'folder')).resolves.toBe(2);
expect(listFiles).toHaveBeenCalledTimes(2);
});
});
describe('fetchWithGoogleRetry', () => {
const noopSleep = async () => {};
it('retries transient 5xx responses then returns success', async () => {
const doFetch = vi
.fn()
.mockResolvedValueOnce(new Response('busy', { status: 503 }))
.mockResolvedValueOnce(new Response('{}', { status: 200 }));
const response = await fetchWithGoogleRetry(doFetch, { sleep: noopSleep });
expect(response.status).toBe(200);
expect(doFetch).toHaveBeenCalledTimes(2);
});
it('does not retry non-retryable responses', async () => {
const doFetch = vi.fn().mockResolvedValue(new Response('nope', { status: 404 }));
const response = await fetchWithGoogleRetry(doFetch, { sleep: noopSleep });
expect(response.status).toBe(404);
expect(doFetch).toHaveBeenCalledTimes(1);
});
it('stops after maxAttempts when responses stay transient', async () => {
const doFetch = vi.fn().mockResolvedValue(new Response('rate', { status: 429 }));
const response = await fetchWithGoogleRetry(doFetch, { sleep: noopSleep, maxAttempts: 3 });
expect(response.status).toBe(429);
expect(doFetch).toHaveBeenCalledTimes(3);
});
});