mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-28 08:49:38 +02:00
fix(gdrive): validate folder access, run config test, harden Drive API (#321)
* fix(gdrive): validate folder access, run config test, harden Drive API Connection test and setup validation now verify folder_id resolves to an accessible Drive folder before counting Docs, via a shared verifyGdriveFolderAndCountDocs helper, so a wrong or unshared folder fails instead of passing with 0 docs. Move gdrive-config.test.ts under test/ so Vitest's test/** glob actually runs it; escape folder_id in the Drive query; add retry/backoff on transient Google API responses; and record skipped non-Google-Doc files in the staged manifest. * chore: sync uv.lock to ktx-daemon/ktx-sl 0.13.1
This commit is contained in:
parent
5645dc4d28
commit
ca231df5fe
11 changed files with 346 additions and 65 deletions
|
|
@ -438,7 +438,8 @@ connections:
|
|||
|
||||
- `gdrive` is knowledge-only in v1; it does not produce semantic layer sources
|
||||
- `ktx setup` supports Google Drive configuration, including the service-account key ref, folder id, and recursive crawl flag
|
||||
- `ktx connection test <connectionId>` supports `gdrive` and reports the number of Google Docs visible in the configured folder
|
||||
- `ktx connection test <connectionId>` supports `gdrive`: it verifies that `folder_id` resolves to a folder the service account can read, then reports the number of Google Docs visible in it. A wrong or unshared `folder_id` fails the test instead of reporting zero docs
|
||||
- Only Google Docs are ingested in v1; other file types (Sheets, Slides, PDFs) in the folder are skipped and recorded in the staged manifest
|
||||
- The service account must be granted access to the target folder explicitly
|
||||
|
||||
## Common errors
|
||||
|
|
|
|||
|
|
@ -6,8 +6,8 @@ import { type NotionBotInfo, NotionClient } from './context/ingest/adapters/noti
|
|||
import { parseGdriveConnectionConfig, resolveGdriveServiceAccountKey } from './context/connections/gdrive-config.js';
|
||||
import { createLocalLookerCredentialResolver } from './context/ingest/adapters/looker/local-looker.adapter.js';
|
||||
import { metabaseRuntimeConfigFromLocalConnection } from './context/ingest/adapters/metabase/local-metabase.adapter.js';
|
||||
import { createGoogleDocsClients } from './context/ingest/adapters/gdrive/gdrive-client.js';
|
||||
import { GDRIVE_DOC_MIME_TYPE, gdriveServiceAccountKeySchema } from './context/ingest/adapters/gdrive/types.js';
|
||||
import { createGoogleDocsClients, verifyGdriveFolderAndCountDocs } from './context/ingest/adapters/gdrive/gdrive-client.js';
|
||||
import { gdriveServiceAccountKeySchema } from './context/ingest/adapters/gdrive/types.js';
|
||||
import { testRepoConnection } from './context/ingest/repo-fetch.js';
|
||||
import { federatedConnectionListing } from './context/connections/federation.js';
|
||||
import { getDriverRegistration } from './context/connections/drivers.js';
|
||||
|
|
@ -36,7 +36,7 @@ type LookerTestPort = Pick<LookerClient, 'testConnection'>;
|
|||
type NotionTestPort = Pick<NotionClient, 'retrieveBotUser'>;
|
||||
type GdriveTestPort = Pick<
|
||||
ReturnType<typeof createGoogleDocsClients>['drive'],
|
||||
'listFiles'
|
||||
'listFiles' | 'getFile'
|
||||
>;
|
||||
type TestRepoConnection = typeof testRepoConnection;
|
||||
|
||||
|
|
@ -217,12 +217,7 @@ async function testGdriveConnection(
|
|||
}
|
||||
const parsed = parseGdriveConnectionConfig(connection);
|
||||
const client = await createClient(project, connectionId);
|
||||
const result = await client.listFiles({
|
||||
q: `'${parsed.folder_id}' in parents and trashed = false`,
|
||||
});
|
||||
return {
|
||||
docs: result.files.filter((file) => file.mimeType === GDRIVE_DOC_MIME_TYPE).length,
|
||||
};
|
||||
return { docs: await verifyGdriveFolderAndCountDocs(client, parsed.folder_id) };
|
||||
}
|
||||
|
||||
interface GitConnectionFields {
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
import { createHash } from 'node:crypto';
|
||||
import { mkdir, writeFile } from 'node:fs/promises';
|
||||
import { dirname, join } from 'node:path';
|
||||
import { createGoogleDocsClients } from './gdrive-client.js';
|
||||
import { createGoogleDocsClients, driveFolderChildrenQuery } from './gdrive-client.js';
|
||||
import { normalizeGoogleDocToMarkdown } from './normalize.js';
|
||||
import type { GdriveFileRecord, GdriveManifest, GdrivePullConfig } from './types.js';
|
||||
import { GDRIVE_DOC_MIME_TYPE, GDRIVE_SOURCE_KEY } from './types.js';
|
||||
import { GDRIVE_DOC_MIME_TYPE, GDRIVE_FOLDER_MIME_TYPE, GDRIVE_SOURCE_KEY } from './types.js';
|
||||
|
||||
async function writeJson(path: string, value: unknown): Promise<void> {
|
||||
await mkdir(dirname(path), { recursive: true });
|
||||
|
|
@ -39,32 +39,52 @@ function gdriveDocDirName(title: string, fileId: string): string {
|
|||
return `${compactSegment(title)}-${shortHash(fileId)}`;
|
||||
}
|
||||
|
||||
interface GdriveDocRecord {
|
||||
file: GdriveFileRecord;
|
||||
drivePath: string[];
|
||||
folderId: string;
|
||||
}
|
||||
|
||||
interface GdriveSkippedFile {
|
||||
externalId: string;
|
||||
reason: string;
|
||||
}
|
||||
|
||||
interface ListFolderResult {
|
||||
docs: GdriveDocRecord[];
|
||||
skipped: GdriveSkippedFile[];
|
||||
}
|
||||
|
||||
async function listFolderFiles(
|
||||
drive: ReturnType<typeof createGoogleDocsClients>['drive'],
|
||||
folderId: string,
|
||||
recursive: boolean,
|
||||
parents: string[] = [],
|
||||
): Promise<Array<{ file: GdriveFileRecord; drivePath: string[]; folderId: string }>> {
|
||||
const q = `'${folderId}' in parents and trashed = false`;
|
||||
const records: Array<{ file: GdriveFileRecord; drivePath: string[]; folderId: string }> = [];
|
||||
): Promise<ListFolderResult> {
|
||||
const q = driveFolderChildrenQuery(folderId);
|
||||
const docs: GdriveDocRecord[] = [];
|
||||
const skipped: GdriveSkippedFile[] = [];
|
||||
let pageToken: string | undefined;
|
||||
do {
|
||||
const page = await drive.listFiles({ q, pageToken });
|
||||
for (const file of page.files) {
|
||||
if (file.mimeType === 'application/vnd.google-apps.folder') {
|
||||
if (file.mimeType === GDRIVE_FOLDER_MIME_TYPE) {
|
||||
if (recursive) {
|
||||
records.push(...(await listFolderFiles(drive, file.id, true, [...parents, file.name])));
|
||||
const nested = await listFolderFiles(drive, file.id, true, [...parents, file.name]);
|
||||
docs.push(...nested.docs);
|
||||
skipped.push(...nested.skipped);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (file.mimeType !== GDRIVE_DOC_MIME_TYPE) {
|
||||
skipped.push({ externalId: file.id, reason: `unsupported mime type: ${file.mimeType}` });
|
||||
continue;
|
||||
}
|
||||
records.push({ file, drivePath: parents, folderId });
|
||||
docs.push({ file, drivePath: parents, folderId });
|
||||
}
|
||||
pageToken = page.nextPageToken ?? undefined;
|
||||
} while (pageToken);
|
||||
return records;
|
||||
return { docs, skipped };
|
||||
}
|
||||
|
||||
export async function fetchGdriveSnapshot(params: {
|
||||
|
|
@ -74,7 +94,7 @@ export async function fetchGdriveSnapshot(params: {
|
|||
}): Promise<GdriveManifest> {
|
||||
await mkdir(params.stagedDir, { recursive: true });
|
||||
const clients = createGoogleDocsClients(params.key);
|
||||
const docs = await listFolderFiles(clients.drive, params.config.folderId, params.config.recursive);
|
||||
const { docs, skipped } = await listFolderFiles(clients.drive, params.config.folderId, params.config.recursive);
|
||||
|
||||
for (const { file, drivePath, folderId } of docs) {
|
||||
const document = await clients.docs.getDocument(file.id);
|
||||
|
|
@ -101,8 +121,11 @@ export async function fetchGdriveSnapshot(params: {
|
|||
recursive: params.config.recursive,
|
||||
fetchedAt: new Date().toISOString(),
|
||||
fileCount: docs.length,
|
||||
skipped: [],
|
||||
warnings: [],
|
||||
skipped,
|
||||
warnings:
|
||||
skipped.length > 0
|
||||
? [`Skipped ${skipped.length} non-Google-Doc file(s); only Google Docs are ingested in v1.`]
|
||||
: [],
|
||||
};
|
||||
await writeJson(join(params.stagedDir, 'manifest.json'), manifest);
|
||||
return manifest;
|
||||
|
|
|
|||
|
|
@ -1,21 +1,13 @@
|
|||
import { JWT } from 'google-auth-library';
|
||||
import type { GdriveFileRecord, GdriveServiceAccountKey, GoogleDocsDocument } from './types.js';
|
||||
import { GDRIVE_SCOPES, gdriveServiceAccountKeySchema } from './types.js';
|
||||
import { GDRIVE_DOC_MIME_TYPE, GDRIVE_FOLDER_MIME_TYPE, GDRIVE_SCOPES, gdriveServiceAccountKeySchema } from './types.js';
|
||||
|
||||
const GOOGLE_DRIVE_BASE_URL = 'https://www.googleapis.com/drive/v3';
|
||||
const GOOGLE_DOCS_BASE_URL = 'https://docs.googleapis.com/v1';
|
||||
const GOOGLE_FILE_FIELDS = 'id,name,mimeType,parents,webViewLink,modifiedTime';
|
||||
|
||||
interface GoogleApiListResponse {
|
||||
files?: Array<{
|
||||
id?: string;
|
||||
name?: string;
|
||||
mimeType?: string;
|
||||
parents?: string[];
|
||||
webViewLink?: string;
|
||||
modifiedTime?: string;
|
||||
}>;
|
||||
nextPageToken?: string;
|
||||
}
|
||||
const RETRYABLE_STATUSES = new Set([408, 429, 500, 502, 503, 504]);
|
||||
const MAX_REQUEST_ATTEMPTS = 4;
|
||||
|
||||
interface GoogleApiFile {
|
||||
id?: string;
|
||||
|
|
@ -26,6 +18,50 @@ interface GoogleApiFile {
|
|||
modifiedTime?: string;
|
||||
}
|
||||
|
||||
interface GoogleApiListResponse {
|
||||
files?: GoogleApiFile[];
|
||||
nextPageToken?: string;
|
||||
}
|
||||
|
||||
export interface GoogleDriveClient {
|
||||
listFiles(args: { q: string; pageToken?: string }): Promise<{ files: GdriveFileRecord[]; nextPageToken: string | null }>;
|
||||
getFile(fileId: string): Promise<GdriveFileRecord | null>;
|
||||
}
|
||||
|
||||
export interface GoogleDocsClients {
|
||||
drive: GoogleDriveClient;
|
||||
docs: {
|
||||
getDocument(documentId: string): Promise<GoogleDocsDocument>;
|
||||
};
|
||||
}
|
||||
|
||||
function defaultSleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
function retryDelayMs(attempt: number, retryAfterHeader: string | null): number {
|
||||
const retryAfterSeconds = retryAfterHeader ? Number.parseInt(retryAfterHeader, 10) : Number.NaN;
|
||||
if (Number.isFinite(retryAfterSeconds) && retryAfterSeconds >= 0) {
|
||||
return Math.min(retryAfterSeconds * 1000, 30_000);
|
||||
}
|
||||
return Math.min(500 * 2 ** attempt, 8_000);
|
||||
}
|
||||
|
||||
/** @internal Retries transient Google API responses (429/5xx) honoring Retry-After. */
|
||||
export async function fetchWithGoogleRetry(
|
||||
doFetch: () => Promise<Response>,
|
||||
options: { maxAttempts?: number; sleep?: (ms: number) => Promise<void> } = {},
|
||||
): Promise<Response> {
|
||||
const maxAttempts = options.maxAttempts ?? MAX_REQUEST_ATTEMPTS;
|
||||
const sleep = options.sleep ?? defaultSleep;
|
||||
let response = await doFetch();
|
||||
for (let attempt = 1; attempt < maxAttempts && !response.ok && RETRYABLE_STATUSES.has(response.status); attempt += 1) {
|
||||
await sleep(retryDelayMs(attempt - 1, response.headers.get('retry-after')));
|
||||
response = await doFetch();
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
async function parseGoogleResponse<T>(response: Response): Promise<T> {
|
||||
if (!response.ok) {
|
||||
const body = await response.text();
|
||||
|
|
@ -35,8 +71,10 @@ async function parseGoogleResponse<T>(response: Response): Promise<T> {
|
|||
}
|
||||
|
||||
async function authorizedFetch(client: JWT, url: string): Promise<Response> {
|
||||
const headers = await client.getRequestHeaders(url);
|
||||
return fetch(url, { headers });
|
||||
return fetchWithGoogleRetry(async () => {
|
||||
const headers = await client.getRequestHeaders(url);
|
||||
return fetch(url, { headers });
|
||||
});
|
||||
}
|
||||
|
||||
function isGoogleApiFileRecord(file: GoogleApiFile): file is GoogleApiFile & {
|
||||
|
|
@ -47,14 +85,55 @@ function isGoogleApiFileRecord(file: GoogleApiFile): file is GoogleApiFile & {
|
|||
return typeof file.id === 'string' && typeof file.name === 'string' && typeof file.mimeType === 'string';
|
||||
}
|
||||
|
||||
export function createGoogleDocsClients(rawKey: unknown): {
|
||||
drive: {
|
||||
listFiles(args: { q: string; pageToken?: string }): Promise<{ files: GdriveFileRecord[]; nextPageToken: string | null }>;
|
||||
function toFileRecord(file: GoogleApiFile & { id: string; name: string; mimeType: string }): GdriveFileRecord {
|
||||
return {
|
||||
id: file.id,
|
||||
name: file.name,
|
||||
mimeType: file.mimeType,
|
||||
parents: Array.isArray(file.parents) ? file.parents.filter((parent): parent is string => typeof parent === 'string') : [],
|
||||
webViewLink: typeof file.webViewLink === 'string' ? file.webViewLink : null,
|
||||
modifiedTime: typeof file.modifiedTime === 'string' ? file.modifiedTime : null,
|
||||
};
|
||||
docs: {
|
||||
getDocument(documentId: string): Promise<GoogleDocsDocument>;
|
||||
};
|
||||
} {
|
||||
}
|
||||
|
||||
function escapeDriveQueryValue(value: string): string {
|
||||
return value.replace(/\\/g, '\\\\').replace(/'/g, "\\'");
|
||||
}
|
||||
|
||||
/** Builds the Drive query for the non-trashed direct children of a folder, escaping the folder id. */
|
||||
export function driveFolderChildrenQuery(folderId: string): string {
|
||||
return `'${escapeDriveQueryValue(folderId)}' in parents and trashed = false`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Confirms `folderId` resolves to a folder the service account can read, then counts the
|
||||
* Google Docs directly inside it. Throws a caller-facing error when the id is missing or not a folder.
|
||||
*/
|
||||
export async function verifyGdriveFolderAndCountDocs(
|
||||
drive: GoogleDriveClient,
|
||||
folderId: string,
|
||||
): Promise<number> {
|
||||
const folder = await drive.getFile(folderId);
|
||||
if (!folder) {
|
||||
throw new Error(
|
||||
`Google Drive folder "${folderId}" is not accessible. Share it with the service account email and verify folder_id.`,
|
||||
);
|
||||
}
|
||||
if (folder.mimeType !== GDRIVE_FOLDER_MIME_TYPE) {
|
||||
throw new Error(`Google Drive id "${folderId}" is not a folder (mimeType: ${folder.mimeType}).`);
|
||||
}
|
||||
const q = driveFolderChildrenQuery(folderId);
|
||||
let docs = 0;
|
||||
let pageToken: string | undefined;
|
||||
do {
|
||||
const page = await drive.listFiles({ q, pageToken });
|
||||
docs += page.files.filter((file) => file.mimeType === GDRIVE_DOC_MIME_TYPE).length;
|
||||
pageToken = page.nextPageToken ?? undefined;
|
||||
} while (pageToken);
|
||||
return docs;
|
||||
}
|
||||
|
||||
export function createGoogleDocsClients(rawKey: unknown): GoogleDocsClients {
|
||||
const key = gdriveServiceAccountKeySchema.parse(rawKey) satisfies GdriveServiceAccountKey;
|
||||
const client = new JWT({
|
||||
email: key.client_email,
|
||||
|
|
@ -70,7 +149,7 @@ export function createGoogleDocsClients(rawKey: unknown): {
|
|||
supportsAllDrives: 'true',
|
||||
includeItemsFromAllDrives: 'true',
|
||||
pageSize: '1000',
|
||||
fields: 'nextPageToken,files(id,name,mimeType,parents,webViewLink,modifiedTime)',
|
||||
fields: `nextPageToken,files(${GOOGLE_FILE_FIELDS})`,
|
||||
});
|
||||
if (args.pageToken) {
|
||||
params.set('pageToken', args.pageToken);
|
||||
|
|
@ -78,19 +157,22 @@ export function createGoogleDocsClients(rawKey: unknown): {
|
|||
const response = await authorizedFetch(client, `${GOOGLE_DRIVE_BASE_URL}/files?${params.toString()}`);
|
||||
const parsed = await parseGoogleResponse<GoogleApiListResponse>(response);
|
||||
return {
|
||||
files: (parsed.files ?? [])
|
||||
.filter(isGoogleApiFileRecord)
|
||||
.map((file) => ({
|
||||
id: file.id,
|
||||
name: file.name,
|
||||
mimeType: file.mimeType,
|
||||
parents: Array.isArray(file.parents) ? file.parents.filter((parent): parent is string => typeof parent === 'string') : [],
|
||||
webViewLink: typeof file.webViewLink === 'string' ? file.webViewLink : null,
|
||||
modifiedTime: typeof file.modifiedTime === 'string' ? file.modifiedTime : null,
|
||||
})),
|
||||
files: (parsed.files ?? []).filter(isGoogleApiFileRecord).map(toFileRecord),
|
||||
nextPageToken: typeof parsed.nextPageToken === 'string' ? parsed.nextPageToken : null,
|
||||
};
|
||||
},
|
||||
async getFile(fileId: string) {
|
||||
const params = new URLSearchParams({ supportsAllDrives: 'true', fields: GOOGLE_FILE_FIELDS });
|
||||
const response = await authorizedFetch(
|
||||
client,
|
||||
`${GOOGLE_DRIVE_BASE_URL}/files/${encodeURIComponent(fileId)}?${params.toString()}`,
|
||||
);
|
||||
if (response.status === 404) {
|
||||
return null;
|
||||
}
|
||||
const file = await parseGoogleResponse<GoogleApiFile>(response);
|
||||
return isGoogleApiFileRecord(file) ? toFileRecord(file) : null;
|
||||
},
|
||||
},
|
||||
docs: {
|
||||
async getDocument(documentId: string) {
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ const GDRIVE_DRIVE_SCOPE = 'https://www.googleapis.com/auth/drive.readonly';
|
|||
export const GDRIVE_SCOPES = [GDRIVE_DRIVE_SCOPE, GDRIVE_DOCS_SCOPE] as const;
|
||||
export const GDRIVE_SOURCE_KEY = 'gdrive';
|
||||
export const GDRIVE_DOC_MIME_TYPE = 'application/vnd.google-apps.document';
|
||||
export const GDRIVE_FOLDER_MIME_TYPE = 'application/vnd.google-apps.folder';
|
||||
|
||||
export const gdrivePullConfigSchema = z.object({
|
||||
serviceAccountKey: z.string().min(1),
|
||||
|
|
|
|||
|
|
@ -11,8 +11,9 @@ import { resolveNotionConnectionAuthToken } from './context/connections/notion-c
|
|||
import { resolveKtxConfigReference } from './context/core/config-reference.js';
|
||||
import {
|
||||
createGoogleDocsClients,
|
||||
verifyGdriveFolderAndCountDocs,
|
||||
} from './context/ingest/adapters/gdrive/gdrive-client.js';
|
||||
import { GDRIVE_DOC_MIME_TYPE, gdriveServiceAccountKeySchema } from './context/ingest/adapters/gdrive/types.js';
|
||||
import { gdriveServiceAccountKeySchema } from './context/ingest/adapters/gdrive/types.js';
|
||||
import { cloneOrPull, testRepoConnection } from './context/ingest/repo-fetch.js';
|
||||
import { DEFAULT_METABASE_CLIENT_CONFIG, MetabaseClient } from './context/ingest/adapters/metabase/client.js';
|
||||
import { discoverMetabaseDatabases, type DiscoveredMetabaseDatabase } from './context/ingest/adapters/metabase/mapping.js';
|
||||
|
|
@ -716,10 +717,7 @@ async function defaultValidateGdrive(connection: KtxProjectConnectionConfig): Pr
|
|||
const config = parseGdriveConnectionConfig(connection);
|
||||
const keyText = await resolveGdriveServiceAccountKey(config.service_account_key_ref);
|
||||
const clients = createGoogleDocsClients(gdriveServiceAccountKeySchema.parse(JSON.parse(keyText)));
|
||||
const result = await clients.drive.listFiles({
|
||||
q: `'${config.folder_id}' in parents and trashed = false`,
|
||||
});
|
||||
const docs = result.files.filter((file) => file.mimeType === GDRIVE_DOC_MIME_TYPE).length;
|
||||
const docs = await verifyGdriveFolderAndCountDocs(clients.drive, config.folder_id);
|
||||
return { ok: true, detail: `docs=${docs}` };
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -434,7 +434,15 @@ describe('runKtxConnection', () => {
|
|||
],
|
||||
nextPageToken: null,
|
||||
}));
|
||||
const createGdriveClient = vi.fn(async () => ({ listFiles }));
|
||||
const getFile = vi.fn(async () => ({
|
||||
id: 'folder-123',
|
||||
name: 'Docs',
|
||||
mimeType: 'application/vnd.google-apps.folder',
|
||||
parents: [],
|
||||
webViewLink: null,
|
||||
modifiedTime: null,
|
||||
}));
|
||||
const createGdriveClient = vi.fn(async () => ({ listFiles, getFile }));
|
||||
const io = makeIo();
|
||||
|
||||
await expect(
|
||||
|
|
@ -442,12 +450,38 @@ describe('runKtxConnection', () => {
|
|||
).resolves.toBe(0);
|
||||
|
||||
expect(createGdriveClient).toHaveBeenCalledWith(expect.objectContaining({ projectDir }), 'docs_drive');
|
||||
expect(listFiles).toHaveBeenCalledWith({ q: "'folder-123' in parents and trashed = false" });
|
||||
expect(getFile).toHaveBeenCalledWith('folder-123');
|
||||
expect(listFiles).toHaveBeenCalledWith({ q: "'folder-123' in parents and trashed = false", pageToken: undefined });
|
||||
expect(io.stdout()).toContain('Connection test passed: docs_drive');
|
||||
expect(io.stdout()).toContain('Driver: gdrive');
|
||||
expect(io.stdout()).toContain('Docs: 1');
|
||||
});
|
||||
|
||||
it('fails a Google Drive connection test when the folder is not accessible', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await initKtxProject({ projectDir });
|
||||
await writeConnections(projectDir, {
|
||||
docs_drive: {
|
||||
driver: 'gdrive',
|
||||
service_account_key_ref: 'file:/tmp/gdrive-key.json', // pragma: allowlist secret
|
||||
folder_id: 'missing-folder',
|
||||
recursive: false,
|
||||
},
|
||||
});
|
||||
const listFiles = vi.fn();
|
||||
const getFile = vi.fn(async () => null);
|
||||
const createGdriveClient = vi.fn(async () => ({ listFiles, getFile }));
|
||||
const io = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxConnection({ command: 'test', projectDir, connectionId: 'docs_drive' }, io.io, { createGdriveClient }),
|
||||
).resolves.toBe(1);
|
||||
|
||||
expect(getFile).toHaveBeenCalledWith('missing-folder');
|
||||
expect(listFiles).not.toHaveBeenCalled();
|
||||
expect(io.stderr()).toContain('is not accessible');
|
||||
});
|
||||
|
||||
it('tests a dbt connection via testRepoConnection (success)', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await initKtxProject({ projectDir });
|
||||
|
|
@ -593,6 +627,14 @@ describe('runKtxConnection', () => {
|
|||
],
|
||||
nextPageToken: null,
|
||||
})),
|
||||
getFile: vi.fn(async () => ({
|
||||
id: 'folder-123',
|
||||
name: 'Docs',
|
||||
mimeType: 'application/vnd.google-apps.folder',
|
||||
parents: [],
|
||||
webViewLink: null,
|
||||
modifiedTime: null,
|
||||
})),
|
||||
}));
|
||||
const io = makeIo();
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import {
|
|||
gdriveConnectionToPullConfig,
|
||||
parseGdriveConnectionConfig,
|
||||
resolveGdriveServiceAccountKey,
|
||||
} from './gdrive-config.js';
|
||||
} from '../../../src/context/connections/gdrive-config.js';
|
||||
|
||||
describe('standalone gdrive connection config', () => {
|
||||
let tempDir: string;
|
||||
|
|
@ -22,7 +22,8 @@ const listFiles = vi.fn(async () => ({
|
|||
nextPageToken: null,
|
||||
}));
|
||||
|
||||
vi.mock('../../../../../src/context/ingest/adapters/gdrive/gdrive-client.js', () => ({
|
||||
vi.mock('../../../../../src/context/ingest/adapters/gdrive/gdrive-client.js', async (importOriginal) => ({
|
||||
...(await importOriginal<typeof import('../../../../../src/context/ingest/adapters/gdrive/gdrive-client.js')>()),
|
||||
createGoogleDocsClients: vi.fn(() => ({
|
||||
drive: { listFiles },
|
||||
docs: { getDocument },
|
||||
|
|
@ -81,4 +82,42 @@ describe('fetchGdriveSnapshot', () => {
|
|||
readFile(join(stagedDir, 'docs', 'herness-and-enterprise-a-7913523027', 'page.md'), 'utf-8'),
|
||||
).resolves.toContain('# Herness and Enterprise Agent Operating Framework for Connected Systems');
|
||||
});
|
||||
|
||||
it('records skipped non-Google-Doc files in the manifest with a summary warning', async () => {
|
||||
stagedDir = await mkdtemp(join(tmpdir(), 'ktx-gdrive-fetch-'));
|
||||
listFiles.mockResolvedValueOnce({
|
||||
files: [
|
||||
{
|
||||
id: 'doc-1',
|
||||
name: 'Doc',
|
||||
mimeType: 'application/vnd.google-apps.document',
|
||||
parents: ['folder-123'],
|
||||
webViewLink: 'https://docs.google.com/document/d/doc-1',
|
||||
modifiedTime: '2026-05-24T01:53:28.347Z',
|
||||
},
|
||||
{
|
||||
id: 'sheet-1',
|
||||
name: 'Sheet',
|
||||
mimeType: 'application/vnd.google-apps.spreadsheet',
|
||||
parents: ['folder-123'],
|
||||
webViewLink: 'https://docs.google.com/spreadsheets/d/sheet-1',
|
||||
modifiedTime: '2026-05-24T01:53:28.347Z',
|
||||
},
|
||||
],
|
||||
nextPageToken: null,
|
||||
});
|
||||
|
||||
const manifest = await fetchGdriveSnapshot({
|
||||
key: { client_email: 'bot@example.com', private_key: 'secret' }, // pragma: allowlist secret
|
||||
config: { serviceAccountKey: 'unused', folderId: 'folder-123', recursive: false }, // pragma: allowlist secret
|
||||
stagedDir,
|
||||
});
|
||||
|
||||
expect(manifest.fileCount).toBe(1);
|
||||
expect(manifest.skipped).toEqual([
|
||||
{ externalId: 'sheet-1', reason: 'unsupported mime type: application/vnd.google-apps.spreadsheet' },
|
||||
]);
|
||||
expect(manifest.warnings).toHaveLength(1);
|
||||
expect(manifest.warnings[0]).toContain('Skipped 1');
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -0,0 +1,100 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import {
|
||||
driveFolderChildrenQuery,
|
||||
fetchWithGoogleRetry,
|
||||
verifyGdriveFolderAndCountDocs,
|
||||
type GoogleDriveClient,
|
||||
} from '../../../../../src/context/ingest/adapters/gdrive/gdrive-client.js';
|
||||
import {
|
||||
GDRIVE_DOC_MIME_TYPE,
|
||||
GDRIVE_FOLDER_MIME_TYPE,
|
||||
type GdriveFileRecord,
|
||||
} from '../../../../../src/context/ingest/adapters/gdrive/types.js';
|
||||
|
||||
function fileRecord(partial: Partial<GdriveFileRecord> & { id: string; mimeType: string }): GdriveFileRecord {
|
||||
return {
|
||||
name: partial.name ?? partial.id,
|
||||
parents: [],
|
||||
webViewLink: null,
|
||||
modifiedTime: null,
|
||||
...partial,
|
||||
};
|
||||
}
|
||||
|
||||
describe('driveFolderChildrenQuery', () => {
|
||||
it('escapes single quotes and backslashes in the folder id', () => {
|
||||
expect(driveFolderChildrenQuery('abc')).toBe("'abc' in parents and trashed = false");
|
||||
expect(driveFolderChildrenQuery("a'b")).toBe("'a\\'b' in parents and trashed = false");
|
||||
expect(driveFolderChildrenQuery('a\\b')).toBe("'a\\\\b' in parents and trashed = false");
|
||||
});
|
||||
});
|
||||
|
||||
describe('verifyGdriveFolderAndCountDocs', () => {
|
||||
it('throws a caller-facing error when the folder is not accessible', async () => {
|
||||
const drive: GoogleDriveClient = {
|
||||
getFile: vi.fn(async () => null),
|
||||
listFiles: vi.fn(),
|
||||
};
|
||||
await expect(verifyGdriveFolderAndCountDocs(drive, 'missing')).rejects.toThrow('is not accessible');
|
||||
expect(drive.listFiles).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('throws when the id resolves to a non-folder', async () => {
|
||||
const drive: GoogleDriveClient = {
|
||||
getFile: vi.fn(async () => fileRecord({ id: 'doc-1', mimeType: GDRIVE_DOC_MIME_TYPE })),
|
||||
listFiles: vi.fn(),
|
||||
};
|
||||
await expect(verifyGdriveFolderAndCountDocs(drive, 'doc-1')).rejects.toThrow('is not a folder');
|
||||
expect(drive.listFiles).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('counts Google Docs across pages and ignores non-Docs', async () => {
|
||||
const listFiles = vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce({
|
||||
files: [
|
||||
fileRecord({ id: '1', mimeType: GDRIVE_DOC_MIME_TYPE }),
|
||||
fileRecord({ id: '2', mimeType: 'application/vnd.google-apps.spreadsheet' }),
|
||||
],
|
||||
nextPageToken: 'page-2',
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
files: [fileRecord({ id: '3', mimeType: GDRIVE_DOC_MIME_TYPE })],
|
||||
nextPageToken: null,
|
||||
});
|
||||
const drive: GoogleDriveClient = {
|
||||
getFile: vi.fn(async () => fileRecord({ id: 'folder', mimeType: GDRIVE_FOLDER_MIME_TYPE })),
|
||||
listFiles,
|
||||
};
|
||||
await expect(verifyGdriveFolderAndCountDocs(drive, 'folder')).resolves.toBe(2);
|
||||
expect(listFiles).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe('fetchWithGoogleRetry', () => {
|
||||
const noopSleep = async () => {};
|
||||
|
||||
it('retries transient 5xx responses then returns success', async () => {
|
||||
const doFetch = vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce(new Response('busy', { status: 503 }))
|
||||
.mockResolvedValueOnce(new Response('{}', { status: 200 }));
|
||||
const response = await fetchWithGoogleRetry(doFetch, { sleep: noopSleep });
|
||||
expect(response.status).toBe(200);
|
||||
expect(doFetch).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it('does not retry non-retryable responses', async () => {
|
||||
const doFetch = vi.fn().mockResolvedValue(new Response('nope', { status: 404 }));
|
||||
const response = await fetchWithGoogleRetry(doFetch, { sleep: noopSleep });
|
||||
expect(response.status).toBe(404);
|
||||
expect(doFetch).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('stops after maxAttempts when responses stay transient', async () => {
|
||||
const doFetch = vi.fn().mockResolvedValue(new Response('rate', { status: 429 }));
|
||||
const response = await fetchWithGoogleRetry(doFetch, { sleep: noopSleep, maxAttempts: 3 });
|
||||
expect(response.status).toBe(429);
|
||||
expect(doFetch).toHaveBeenCalledTimes(3);
|
||||
});
|
||||
});
|
||||
4
uv.lock
generated
4
uv.lock
generated
|
|
@ -466,7 +466,7 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "ktx-daemon"
|
||||
version = "0.13.0"
|
||||
version = "0.13.1"
|
||||
source = { editable = "python/ktx-daemon" }
|
||||
dependencies = [
|
||||
{ name = "fastapi" },
|
||||
|
|
@ -523,7 +523,7 @@ dev = [
|
|||
|
||||
[[package]]
|
||||
name = "ktx-sl"
|
||||
version = "0.13.0"
|
||||
version = "0.13.1"
|
||||
source = { editable = "python/ktx-sl" }
|
||||
dependencies = [
|
||||
{ name = "pydantic" },
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue