ktx/packages/cli/test/context/scan/credentials.test.ts

import { describe, expect, it } from 'vitest';
import { REDACTED_KTX_CREDENTIAL_VALUE } from '../../../src/context/core/redaction.js';
import {
  redactKtxCredentialEnvelope,
  redactKtxCredentialValue,
  redactKtxScanMetadata,
  redactKtxScanReport,
  redactKtxScanWarning,
} from '../../../src/context/scan/credentials.js';
import type { KtxCredentialEnvelope, KtxScanReport, KtxScanWarning } from '../../../src/context/scan/types.js';

describe('KTX scan credential redaction', () => {
  it('keeps credential references inspectable', () => {
    const envReference: KtxCredentialEnvelope = { kind: 'env', name: 'DATABASE_URL' };
    const fileReference: KtxCredentialEnvelope = { kind: 'file', path: '~/.config/ktx/warehouse' };

    expect(redactKtxCredentialEnvelope(envReference)).toEqual(envReference);
    expect(redactKtxCredentialEnvelope(fileReference)).toEqual(fileReference);
  });

  it('redacts resolved credential envelope values recursively', () => {
    expect(
      redactKtxCredentialEnvelope({
        kind: 'resolved',
        source: 'host',
        values: {
          username: 'readonly',
          password: 'secret-password', // pragma: allowlist secret
          nested: {
            api_key: 'phx_123', // pragma: allowlist secret
            warehouse: 'compute_wh',
          },
          headers: [{ authorizationToken: 'token-value' }, { label: 'safe' }],
        },
      }),
    ).toEqual({
      kind: 'resolved',
      source: 'host',
      redacted: true,
      values: {
        username: 'readonly',
        password: REDACTED_KTX_CREDENTIAL_VALUE,
        nested: {
          api_key: REDACTED_KTX_CREDENTIAL_VALUE,
          warehouse: 'compute_wh',
        },
        headers: [{ authorizationToken: REDACTED_KTX_CREDENTIAL_VALUE }, { label: 'safe' }],
      },
    });
  });

  it('redacts scan metadata fields that commonly contain secrets', () => {
    expect(
      redactKtxScanMetadata({
        driver: 'postgres',
        url: 'postgres://user:pass@example.test/db', // pragma: allowlist secret
        serviceAccountJson: {
          client_email: 'reader@example.test',
          private_key: 'pem-value', // pragma: allowlist secret
        },
        safeCount: 3,
      }),
    ).toEqual({
      driver: 'postgres',
      url: REDACTED_KTX_CREDENTIAL_VALUE,
      serviceAccountJson: {
        client_email: 'reader@example.test',
        private_key: REDACTED_KTX_CREDENTIAL_VALUE,
      },
      safeCount: 3,
    });
  });

  it('redacts scan warning messages and metadata without hiding safe context', () => {
    const warning: KtxScanWarning = {
      code: 'sampling_failed',
      message: 'sample failed for postgres://reader:secret@example.test/db', // pragma: allowlist secret
      recoverable: true,
      metadata: {
        table: 'orders',
        url: 'postgres://reader:secret@example.test/db', // pragma: allowlist secret
        nested: {
          api_key: 'sk_test_123', // pragma: allowlist secret
          schema: 'public',
        },
      },
    };

    expect(redactKtxScanWarning(warning)).toEqual({
      code: 'sampling_failed',
      message: 'sample failed for postgres://reader:<redacted>@example.test/db',
      recoverable: true,
      metadata: {
        table: 'orders',
        url: REDACTED_KTX_CREDENTIAL_VALUE,
        nested: {
          api_key: REDACTED_KTX_CREDENTIAL_VALUE,
          schema: 'public',
        },
      },
    });
  });

  it('redacts scan report warning metadata recursively', () => {
    const report: KtxScanReport = {
      connectionId: 'warehouse',
      driver: 'postgres',
      syncId: 'sync-1',
      runId: 'run-1',
      trigger: 'cli',
      mode: 'structural',
      dryRun: false,
      artifactPaths: {
        rawSourcesDir: 'raw-sources/warehouse/live-database/sync-1',
        reportPath: 'raw-sources/warehouse/live-database/sync-1/scan-report.json',
        manifestShards: [],
        enrichmentArtifacts: [],
      },
      diffSummary: {
        tablesAdded: 0,
        tablesModified: 0,
        tablesDeleted: 0,
        tablesUnchanged: 0,
        columnsAdded: 0,
        columnsModified: 0,
        columnsDeleted: 0,
      },
      manifestShardsWritten: 0,
      structuralSyncStats: {
        tablesCreated: 0,
        tablesUpdated: 0,
        tablesDeleted: 0,
        columnsCreated: 0,
        columnsUpdated: 0,
        columnsDeleted: 0,
      },
      enrichment: {
        dataDictionary: 'skipped',
        tableDescriptions: 'skipped',
        columnDescriptions: 'skipped',
        embeddings: 'skipped',
        deterministicRelationships: 'skipped',
        llmRelationshipValidation: 'skipped',
        statisticalValidation: 'skipped',
      },
      capabilityGaps: [],
      warnings: [
        {
          code: 'credential_redacted',
          message: 'metadata redacted',
          recoverable: true,
          metadata: {
            credentials_json: '{"private_key":"pem-value"}', // pragma: allowlist secret
            safeCount: 2,
          },
        },
      ],
      relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 },
      enrichmentState: {
        resumedStages: [],
        completedStages: [],
        failedStages: [],
      },
      createdAt: '2026-04-29T00:00:00.000Z',
    };

    const redacted = redactKtxScanReport(report);

    expect(redacted.warnings[0]?.metadata).toEqual({
      credentials_json: REDACTED_KTX_CREDENTIAL_VALUE,
      safeCount: 2,
    });
    expect(report.warnings[0]?.metadata).toEqual({
      credentials_json: '{"private_key":"pem-value"}', // pragma: allowlist secret
      safeCount: 2,
    });
  });

  it('redacts standalone primitive credential values only when the field key is sensitive', () => {
    expect(redactKtxCredentialValue('password', 'abc')).toBe(REDACTED_KTX_CREDENTIAL_VALUE);
    expect(redactKtxCredentialValue('schema', 'public')).toBe('public');
  });
});
Initial open-source release 2026-05-10 23:12:26 +02:00			`import { describe, expect, it } from 'vitest';`
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId\|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string \| null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env 2026-05-26 08:49:05 +02:00			`import { REDACTED_KTX_CREDENTIAL_VALUE } from '../../../src/context/core/redaction.js';`
Initial open-source release 2026-05-10 23:12:26 +02:00			`import {`
rename klo to ktx 2026-05-10 23:51:24 +02:00			`redactKtxCredentialEnvelope,`
			`redactKtxCredentialValue,`
			`redactKtxScanMetadata,`
			`redactKtxScanReport,`
			`redactKtxScanWarning,`
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId\|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string \| null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env 2026-05-26 08:49:05 +02:00			`} from '../../../src/context/scan/credentials.js';`
			`import type { KtxCredentialEnvelope, KtxScanReport, KtxScanWarning } from '../../../src/context/scan/types.js';`
Initial open-source release 2026-05-10 23:12:26 +02:00
rename klo to ktx 2026-05-10 23:51:24 +02:00			`describe('KTX scan credential redaction', () => {`
Initial open-source release 2026-05-10 23:12:26 +02:00			`it('keeps credential references inspectable', () => {`
rename klo to ktx 2026-05-10 23:51:24 +02:00			`const envReference: KtxCredentialEnvelope = { kind: 'env', name: 'DATABASE_URL' };`
			`const fileReference: KtxCredentialEnvelope = { kind: 'file', path: '~/.config/ktx/warehouse' };`
Initial open-source release 2026-05-10 23:12:26 +02:00
rename klo to ktx 2026-05-10 23:51:24 +02:00			`expect(redactKtxCredentialEnvelope(envReference)).toEqual(envReference);`
			`expect(redactKtxCredentialEnvelope(fileReference)).toEqual(fileReference);`
Initial open-source release 2026-05-10 23:12:26 +02:00			`});`

			`it('redacts resolved credential envelope values recursively', () => {`
			`expect(`
rename klo to ktx 2026-05-10 23:51:24 +02:00			`redactKtxCredentialEnvelope({`
Initial open-source release 2026-05-10 23:12:26 +02:00			`kind: 'resolved',`
			`source: 'host',`
			`values: {`
			`username: 'readonly',`
			`password: 'secret-password', // pragma: allowlist secret`
			`nested: {`
			`api_key: 'phx_123', // pragma: allowlist secret`
			`warehouse: 'compute_wh',`
			`},`
			`headers: [{ authorizationToken: 'token-value' }, { label: 'safe' }],`
			`},`
			`}),`
			`).toEqual({`
			`kind: 'resolved',`
			`source: 'host',`
			`redacted: true,`
			`values: {`
			`username: 'readonly',`
rename klo to ktx 2026-05-10 23:51:24 +02:00			`password: REDACTED_KTX_CREDENTIAL_VALUE,`
Initial open-source release 2026-05-10 23:12:26 +02:00			`nested: {`
rename klo to ktx 2026-05-10 23:51:24 +02:00			`api_key: REDACTED_KTX_CREDENTIAL_VALUE,`
Initial open-source release 2026-05-10 23:12:26 +02:00			`warehouse: 'compute_wh',`
			`},`
rename klo to ktx 2026-05-10 23:51:24 +02:00			`headers: [{ authorizationToken: REDACTED_KTX_CREDENTIAL_VALUE }, { label: 'safe' }],`
Initial open-source release 2026-05-10 23:12:26 +02:00			`},`
			`});`
			`});`

			`it('redacts scan metadata fields that commonly contain secrets', () => {`
			`expect(`
rename klo to ktx 2026-05-10 23:51:24 +02:00			`redactKtxScanMetadata({`
Initial open-source release 2026-05-10 23:12:26 +02:00			`driver: 'postgres',`
			`url: 'postgres://user:pass@example.test/db', // pragma: allowlist secret`
			`serviceAccountJson: {`
			`client_email: 'reader@example.test',`
			`private_key: 'pem-value', // pragma: allowlist secret`
			`},`
			`safeCount: 3,`
			`}),`
			`).toEqual({`
			`driver: 'postgres',`
rename klo to ktx 2026-05-10 23:51:24 +02:00			`url: REDACTED_KTX_CREDENTIAL_VALUE,`
Initial open-source release 2026-05-10 23:12:26 +02:00			`serviceAccountJson: {`
			`client_email: 'reader@example.test',`
rename klo to ktx 2026-05-10 23:51:24 +02:00			`private_key: REDACTED_KTX_CREDENTIAL_VALUE,`
Initial open-source release 2026-05-10 23:12:26 +02:00			`},`
			`safeCount: 3,`
			`});`
			`});`

			`it('redacts scan warning messages and metadata without hiding safe context', () => {`
rename klo to ktx 2026-05-10 23:51:24 +02:00			`const warning: KtxScanWarning = {`
Initial open-source release 2026-05-10 23:12:26 +02:00			`code: 'sampling_failed',`
			`message: 'sample failed for postgres://reader:secret@example.test/db', // pragma: allowlist secret`
			`recoverable: true,`
			`metadata: {`
			`table: 'orders',`
			`url: 'postgres://reader:secret@example.test/db', // pragma: allowlist secret`
			`nested: {`
			`api_key: 'sk_test_123', // pragma: allowlist secret`
			`schema: 'public',`
			`},`
			`},`
			`};`

rename klo to ktx 2026-05-10 23:51:24 +02:00			`expect(redactKtxScanWarning(warning)).toEqual({`
Initial open-source release 2026-05-10 23:12:26 +02:00			`code: 'sampling_failed',`
			`message: 'sample failed for postgres://reader:<redacted>@example.test/db',`
			`recoverable: true,`
			`metadata: {`
			`table: 'orders',`
rename klo to ktx 2026-05-10 23:51:24 +02:00			`url: REDACTED_KTX_CREDENTIAL_VALUE,`
Initial open-source release 2026-05-10 23:12:26 +02:00			`nested: {`
rename klo to ktx 2026-05-10 23:51:24 +02:00			`api_key: REDACTED_KTX_CREDENTIAL_VALUE,`
Initial open-source release 2026-05-10 23:12:26 +02:00			`schema: 'public',`
			`},`
			`},`
			`});`
			`});`

			`it('redacts scan report warning metadata recursively', () => {`
rename klo to ktx 2026-05-10 23:51:24 +02:00			`const report: KtxScanReport = {`
Initial open-source release 2026-05-10 23:12:26 +02:00			`connectionId: 'warehouse',`
			`driver: 'postgres',`
			`syncId: 'sync-1',`
			`runId: 'run-1',`
			`trigger: 'cli',`
			`mode: 'structural',`
			`dryRun: false,`
			`artifactPaths: {`
			`rawSourcesDir: 'raw-sources/warehouse/live-database/sync-1',`
			`reportPath: 'raw-sources/warehouse/live-database/sync-1/scan-report.json',`
			`manifestShards: [],`
			`enrichmentArtifacts: [],`
			`},`
			`diffSummary: {`
			`tablesAdded: 0,`
			`tablesModified: 0,`
			`tablesDeleted: 0,`
			`tablesUnchanged: 0,`
			`columnsAdded: 0,`
			`columnsModified: 0,`
			`columnsDeleted: 0,`
			`},`
			`manifestShardsWritten: 0,`
			`structuralSyncStats: {`
			`tablesCreated: 0,`
			`tablesUpdated: 0,`
			`tablesDeleted: 0,`
			`columnsCreated: 0,`
			`columnsUpdated: 0,`
			`columnsDeleted: 0,`
			`},`
			`enrichment: {`
			`dataDictionary: 'skipped',`
			`tableDescriptions: 'skipped',`
			`columnDescriptions: 'skipped',`
			`embeddings: 'skipped',`
			`deterministicRelationships: 'skipped',`
			`llmRelationshipValidation: 'skipped',`
			`statisticalValidation: 'skipped',`
			`},`
			`capabilityGaps: [],`
			`warnings: [`
			`{`
			`code: 'credential_redacted',`
			`message: 'metadata redacted',`
			`recoverable: true,`
			`metadata: {`
			`credentials_json: '{"private_key":"pem-value"}', // pragma: allowlist secret`
			`safeCount: 2,`
			`},`
			`},`
			`],`
			`relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 },`
			`enrichmentState: {`
			`resumedStages: [],`
			`completedStages: [],`
			`failedStages: [],`
			`},`
			`createdAt: '2026-04-29T00:00:00.000Z',`
			`};`

rename klo to ktx 2026-05-10 23:51:24 +02:00			`const redacted = redactKtxScanReport(report);`
Initial open-source release 2026-05-10 23:12:26 +02:00
			`expect(redacted.warnings[0]?.metadata).toEqual({`
rename klo to ktx 2026-05-10 23:51:24 +02:00			`credentials_json: REDACTED_KTX_CREDENTIAL_VALUE,`
Initial open-source release 2026-05-10 23:12:26 +02:00			`safeCount: 2,`
			`});`
			`expect(report.warnings[0]?.metadata).toEqual({`
			`credentials_json: '{"private_key":"pem-value"}', // pragma: allowlist secret`
			`safeCount: 2,`
			`});`
			`});`

			`it('redacts standalone primitive credential values only when the field key is sensitive', () => {`
rename klo to ktx 2026-05-10 23:51:24 +02:00			`expect(redactKtxCredentialValue('password', 'abc')).toBe(REDACTED_KTX_CREDENTIAL_VALUE);`
			`expect(redactKtxCredentialValue('schema', 'public')).toBe('public');`
Initial open-source release 2026-05-10 23:12:26 +02:00			`});`
			`});`