fix(gdrive): validate folder access, run config test, harden Drive API (#321)

* fix(gdrive): validate folder access, run config test, harden Drive API

Connection test and setup validation now verify folder_id resolves to an accessible Drive folder before counting Docs, via a shared verifyGdriveFolderAndCountDocs helper, so a wrong or unshared folder fails instead of passing with 0 docs.

Move gdrive-config.test.ts under test/ so Vitest's test/** glob actually runs it; escape folder_id in the Drive query; add retry/backoff on transient Google API responses; and record skipped non-Google-Doc files in the staged manifest.

* chore: sync uv.lock to ktx-daemon/ktx-sl 0.13.1
This commit is contained in:
Andrey Avtomonov 2026-06-28 01:02:37 +02:00 committed by GitHub
parent 5645dc4d28
commit ca231df5fe
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 346 additions and 65 deletions

View file

@ -434,7 +434,15 @@ describe('runKtxConnection', () => {
],
nextPageToken: null,
}));
const createGdriveClient = vi.fn(async () => ({ listFiles }));
const getFile = vi.fn(async () => ({
id: 'folder-123',
name: 'Docs',
mimeType: 'application/vnd.google-apps.folder',
parents: [],
webViewLink: null,
modifiedTime: null,
}));
const createGdriveClient = vi.fn(async () => ({ listFiles, getFile }));
const io = makeIo();
await expect(
@ -442,12 +450,38 @@ describe('runKtxConnection', () => {
).resolves.toBe(0);
expect(createGdriveClient).toHaveBeenCalledWith(expect.objectContaining({ projectDir }), 'docs_drive');
expect(listFiles).toHaveBeenCalledWith({ q: "'folder-123' in parents and trashed = false" });
expect(getFile).toHaveBeenCalledWith('folder-123');
expect(listFiles).toHaveBeenCalledWith({ q: "'folder-123' in parents and trashed = false", pageToken: undefined });
expect(io.stdout()).toContain('Connection test passed: docs_drive');
expect(io.stdout()).toContain('Driver: gdrive');
expect(io.stdout()).toContain('Docs: 1');
});
it('fails a Google Drive connection test when the folder is not accessible', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeConnections(projectDir, {
docs_drive: {
driver: 'gdrive',
service_account_key_ref: 'file:/tmp/gdrive-key.json', // pragma: allowlist secret
folder_id: 'missing-folder',
recursive: false,
},
});
const listFiles = vi.fn();
const getFile = vi.fn(async () => null);
const createGdriveClient = vi.fn(async () => ({ listFiles, getFile }));
const io = makeIo();
await expect(
runKtxConnection({ command: 'test', projectDir, connectionId: 'docs_drive' }, io.io, { createGdriveClient }),
).resolves.toBe(1);
expect(getFile).toHaveBeenCalledWith('missing-folder');
expect(listFiles).not.toHaveBeenCalled();
expect(io.stderr()).toContain('is not accessible');
});
it('tests a dbt connection via testRepoConnection (success)', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
@ -593,6 +627,14 @@ describe('runKtxConnection', () => {
],
nextPageToken: null,
})),
getFile: vi.fn(async () => ({
id: 'folder-123',
name: 'Docs',
mimeType: 'application/vnd.google-apps.folder',
parents: [],
webViewLink: null,
modifiedTime: null,
})),
}));
const io = makeIo();

View file

@ -0,0 +1,71 @@
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import {
gdriveConnectionToPullConfig,
parseGdriveConnectionConfig,
resolveGdriveServiceAccountKey,
} from '../../../src/context/connections/gdrive-config.js';
describe('standalone gdrive connection config', () => {
let tempDir: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-gdrive-config-'));
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('parses config with safe defaults', () => {
const parsed = parseGdriveConnectionConfig({
driver: 'gdrive',
service_account_key_ref: 'file:/tmp/google-key.json', // pragma: allowlist secret
folder_id: 'folder-123',
});
expect(parsed).toEqual({
driver: 'gdrive',
service_account_key_ref: 'file:/tmp/google-key.json', // pragma: allowlist secret
folder_id: 'folder-123',
recursive: false,
});
});
it('requires file-based service account keys', () => {
expect(() =>
parseGdriveConnectionConfig({
driver: 'gdrive',
service_account_key_ref: 'env:GOOGLE_KEY', // pragma: allowlist secret
folder_id: 'folder-123',
}),
).toThrow('gdrive service_account_key_ref must use file:/path/to/key.json');
});
it('resolves service account key files', async () => {
const keyPath = join(tempDir, 'google-key.json');
await writeFile(keyPath, '{"client_email":"bot@example.com","private_key":"line-1"}\n', 'utf-8'); // pragma: allowlist secret
await expect(resolveGdriveServiceAccountKey(`file:${keyPath}`)).resolves.toContain('"client_email":"bot@example.com"');
});
it('converts config into adapter pull config', async () => {
const keyPath = join(tempDir, 'google-key.json');
await writeFile(keyPath, '{"client_email":"bot@example.com","private_key":"line-1"}\n', 'utf-8'); // pragma: allowlist secret
const pullConfig = await gdriveConnectionToPullConfig(
parseGdriveConnectionConfig({
driver: 'gdrive',
service_account_key_ref: `file:${keyPath}`, // pragma: allowlist secret
folder_id: 'folder-123',
recursive: true,
}),
);
expect(pullConfig).toEqual({
serviceAccountKey: '{"client_email":"bot@example.com","private_key":"line-1"}', // pragma: allowlist secret
folderId: 'folder-123',
recursive: true,
});
});
});

View file

@ -22,7 +22,8 @@ const listFiles = vi.fn(async () => ({
nextPageToken: null,
}));
vi.mock('../../../../../src/context/ingest/adapters/gdrive/gdrive-client.js', () => ({
vi.mock('../../../../../src/context/ingest/adapters/gdrive/gdrive-client.js', async (importOriginal) => ({
...(await importOriginal<typeof import('../../../../../src/context/ingest/adapters/gdrive/gdrive-client.js')>()),
createGoogleDocsClients: vi.fn(() => ({
drive: { listFiles },
docs: { getDocument },
@ -81,4 +82,42 @@ describe('fetchGdriveSnapshot', () => {
readFile(join(stagedDir, 'docs', 'herness-and-enterprise-a-7913523027', 'page.md'), 'utf-8'),
).resolves.toContain('# Herness and Enterprise Agent Operating Framework for Connected Systems');
});
it('records skipped non-Google-Doc files in the manifest with a summary warning', async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'ktx-gdrive-fetch-'));
listFiles.mockResolvedValueOnce({
files: [
{
id: 'doc-1',
name: 'Doc',
mimeType: 'application/vnd.google-apps.document',
parents: ['folder-123'],
webViewLink: 'https://docs.google.com/document/d/doc-1',
modifiedTime: '2026-05-24T01:53:28.347Z',
},
{
id: 'sheet-1',
name: 'Sheet',
mimeType: 'application/vnd.google-apps.spreadsheet',
parents: ['folder-123'],
webViewLink: 'https://docs.google.com/spreadsheets/d/sheet-1',
modifiedTime: '2026-05-24T01:53:28.347Z',
},
],
nextPageToken: null,
});
const manifest = await fetchGdriveSnapshot({
key: { client_email: 'bot@example.com', private_key: 'secret' }, // pragma: allowlist secret
config: { serviceAccountKey: 'unused', folderId: 'folder-123', recursive: false }, // pragma: allowlist secret
stagedDir,
});
expect(manifest.fileCount).toBe(1);
expect(manifest.skipped).toEqual([
{ externalId: 'sheet-1', reason: 'unsupported mime type: application/vnd.google-apps.spreadsheet' },
]);
expect(manifest.warnings).toHaveLength(1);
expect(manifest.warnings[0]).toContain('Skipped 1');
});
});

View file

@ -0,0 +1,100 @@
import { describe, expect, it, vi } from 'vitest';
import {
driveFolderChildrenQuery,
fetchWithGoogleRetry,
verifyGdriveFolderAndCountDocs,
type GoogleDriveClient,
} from '../../../../../src/context/ingest/adapters/gdrive/gdrive-client.js';
import {
GDRIVE_DOC_MIME_TYPE,
GDRIVE_FOLDER_MIME_TYPE,
type GdriveFileRecord,
} from '../../../../../src/context/ingest/adapters/gdrive/types.js';
function fileRecord(partial: Partial<GdriveFileRecord> & { id: string; mimeType: string }): GdriveFileRecord {
return {
name: partial.name ?? partial.id,
parents: [],
webViewLink: null,
modifiedTime: null,
...partial,
};
}
describe('driveFolderChildrenQuery', () => {
it('escapes single quotes and backslashes in the folder id', () => {
expect(driveFolderChildrenQuery('abc')).toBe("'abc' in parents and trashed = false");
expect(driveFolderChildrenQuery("a'b")).toBe("'a\\'b' in parents and trashed = false");
expect(driveFolderChildrenQuery('a\\b')).toBe("'a\\\\b' in parents and trashed = false");
});
});
describe('verifyGdriveFolderAndCountDocs', () => {
it('throws a caller-facing error when the folder is not accessible', async () => {
const drive: GoogleDriveClient = {
getFile: vi.fn(async () => null),
listFiles: vi.fn(),
};
await expect(verifyGdriveFolderAndCountDocs(drive, 'missing')).rejects.toThrow('is not accessible');
expect(drive.listFiles).not.toHaveBeenCalled();
});
it('throws when the id resolves to a non-folder', async () => {
const drive: GoogleDriveClient = {
getFile: vi.fn(async () => fileRecord({ id: 'doc-1', mimeType: GDRIVE_DOC_MIME_TYPE })),
listFiles: vi.fn(),
};
await expect(verifyGdriveFolderAndCountDocs(drive, 'doc-1')).rejects.toThrow('is not a folder');
expect(drive.listFiles).not.toHaveBeenCalled();
});
it('counts Google Docs across pages and ignores non-Docs', async () => {
const listFiles = vi
.fn()
.mockResolvedValueOnce({
files: [
fileRecord({ id: '1', mimeType: GDRIVE_DOC_MIME_TYPE }),
fileRecord({ id: '2', mimeType: 'application/vnd.google-apps.spreadsheet' }),
],
nextPageToken: 'page-2',
})
.mockResolvedValueOnce({
files: [fileRecord({ id: '3', mimeType: GDRIVE_DOC_MIME_TYPE })],
nextPageToken: null,
});
const drive: GoogleDriveClient = {
getFile: vi.fn(async () => fileRecord({ id: 'folder', mimeType: GDRIVE_FOLDER_MIME_TYPE })),
listFiles,
};
await expect(verifyGdriveFolderAndCountDocs(drive, 'folder')).resolves.toBe(2);
expect(listFiles).toHaveBeenCalledTimes(2);
});
});
describe('fetchWithGoogleRetry', () => {
const noopSleep = async () => {};
it('retries transient 5xx responses then returns success', async () => {
const doFetch = vi
.fn()
.mockResolvedValueOnce(new Response('busy', { status: 503 }))
.mockResolvedValueOnce(new Response('{}', { status: 200 }));
const response = await fetchWithGoogleRetry(doFetch, { sleep: noopSleep });
expect(response.status).toBe(200);
expect(doFetch).toHaveBeenCalledTimes(2);
});
it('does not retry non-retryable responses', async () => {
const doFetch = vi.fn().mockResolvedValue(new Response('nope', { status: 404 }));
const response = await fetchWithGoogleRetry(doFetch, { sleep: noopSleep });
expect(response.status).toBe(404);
expect(doFetch).toHaveBeenCalledTimes(1);
});
it('stops after maxAttempts when responses stay transient', async () => {
const doFetch = vi.fn().mockResolvedValue(new Response('rate', { status: 429 }));
const response = await fetchWithGoogleRetry(doFetch, { sleep: noopSleep, maxAttempts: 3 });
expect(response.status).toBe(429);
expect(doFetch).toHaveBeenCalledTimes(3);
});
});