Initial open-source release

This commit is contained in:
Andrey Avtomonov 2026-05-10 23:12:26 +02:00
commit 1a42152e6f
1199 changed files with 257054 additions and 0 deletions

View file

@ -0,0 +1,391 @@
import { describe, expect, it } from 'vitest';
import { buildDefaultKloProjectConfig, parseKloProjectConfig, serializeKloProjectConfig } from './config.js';
describe('KLO project config', () => {
it('builds the default standalone project config', () => {
expect(buildDefaultKloProjectConfig('warehouse')).toEqual({
project: 'warehouse',
connections: {},
storage: {
state: 'sqlite',
search: 'sqlite-fts5',
git: {
auto_commit: true,
author: 'klo <klo@example.com>',
},
},
llm: {
provider: {
backend: 'none',
},
models: {},
},
ingest: {
adapters: ['live-database', 'lookml', 'metabase', 'metricflow', 'notion'],
embeddings: {
backend: 'deterministic',
model: 'deterministic',
dimensions: 8,
},
workUnits: {
stepBudget: 40,
maxConcurrency: 1,
failureMode: 'continue',
},
},
agent: {
run_research: {
enabled: false,
max_iterations: 20,
default_toolset: ['sl_query', 'knowledge_search', 'sl_read_source'],
},
},
memory: {
auto_commit: true,
},
scan: {
enrichment: {
mode: 'none',
},
relationships: {
enabled: true,
llmProposals: true,
validationRequiredForManifest: true,
acceptThreshold: 0.85,
reviewThreshold: 0.55,
maxLlmTablesPerBatch: 40,
maxCandidatesPerColumn: 25,
profileSampleRows: 10000,
validationConcurrency: 4,
},
},
});
});
it('round-trips through YAML with stable defaults', () => {
const serialized = serializeKloProjectConfig(buildDefaultKloProjectConfig('warehouse'));
const parsed = parseKloProjectConfig(serialized);
expect(serialized).toContain('project: warehouse');
expect(serialized).toContain('live-database');
expect(serialized).toContain('notion');
expect(serialized).toContain(
' embeddings:\n backend: deterministic\n model: deterministic\n dimensions: 8',
);
expect(parsed.project).toBe('warehouse');
expect(parsed.ingest.adapters).toEqual(['live-database', 'lookml', 'metabase', 'metricflow', 'notion']);
expect(parsed.ingest.embeddings).toEqual({
backend: 'deterministic',
model: 'deterministic',
dimensions: 8,
});
});
it('parses and serializes setup wizard metadata', () => {
const config = parseKloProjectConfig(`
project: revenue
setup:
database_connection_ids:
- warehouse
- analytics
completed_steps:
- project
- llm
connections:
warehouse:
driver: postgres
url: env:WAREHOUSE_URL
`);
expect(config.setup).toEqual({
database_connection_ids: ['warehouse', 'analytics'],
completed_steps: ['project', 'llm'],
});
const serialized = serializeKloProjectConfig(config);
expect(serialized).toContain('setup:');
expect(serialized).toContain('database_connection_ids:');
expect(serialized).toContain('completed_steps:');
});
it('parses global direct Anthropic LLM config', () => {
const config = parseKloProjectConfig(`
project: demo
llm:
provider:
backend: anthropic
anthropic:
api_key: env:ANTHROPIC_API_KEY
models:
default: claude-sonnet-4-6
triage: claude-haiku-4-5
repair: claude-opus-4-7
promptCaching:
enabled: false
ingest:
workUnits:
stepBudget: 30
maxConcurrency: 2
failureMode: abort
`);
expect(config.llm).toMatchObject({
provider: {
backend: 'anthropic',
anthropic: { api_key: 'env:ANTHROPIC_API_KEY' }, // pragma: allowlist secret
},
models: {
default: 'claude-sonnet-4-6',
triage: 'claude-haiku-4-5',
repair: 'claude-opus-4-7',
},
promptCaching: { enabled: false },
});
expect(config.ingest.workUnits).toEqual({
stepBudget: 30,
maxConcurrency: 2,
failureMode: 'abort',
});
});
it('parses global Vertex LLM config', () => {
const config = parseKloProjectConfig(`
project: demo
llm:
provider:
backend: vertex
vertex:
project: local-gcp-project
location: us-east5
models:
default: claude-sonnet-4-6
triage: claude-haiku-4-5
`);
expect(config.llm.provider.backend).toBe('vertex');
expect(config.llm.provider.vertex).toEqual({ project: 'local-gcp-project', location: 'us-east5' });
expect(config.llm.models).toEqual({
default: 'claude-sonnet-4-6',
triage: 'claude-haiku-4-5',
});
});
it('parses gateway LLM, OpenAI scan embeddings, and sentence-transformers ingest embeddings', () => {
const config = parseKloProjectConfig(`
project: demo
llm:
provider:
backend: gateway
gateway:
api_key: env:AI_GATEWAY_API_KEY
base_url: https://gateway.example/v1
models:
default: anthropic/claude-sonnet-4-6
ingest:
embeddings:
backend: sentence-transformers
model: all-MiniLM-L6-v2
dimensions: 384
sentenceTransformers:
base_url: http://127.0.0.1:18081
pathPrefix: ""
batchSize: 16
scan:
enrichment:
mode: llm
embeddings:
backend: openai
model: text-embedding-3-small
dimensions: 1536
openai:
api_key: env:OPENAI_API_KEY
batchSize: 32
`);
expect(config.ingest.embeddings).toMatchObject({
backend: 'sentence-transformers',
model: 'all-MiniLM-L6-v2',
dimensions: 384,
sentenceTransformers: { base_url: 'http://127.0.0.1:18081', pathPrefix: '' },
batchSize: 16,
});
expect(config.llm.models.default).toBe('anthropic/claude-sonnet-4-6');
expect(config.scan.enrichment.mode).toBe('llm');
expect(config.scan.enrichment.embeddings?.dimensions).toBe(1536);
});
it('parses scan relationship settings', () => {
const config = parseKloProjectConfig(`
project: demo
scan:
relationships:
enabled: false
llm_proposals: false
validation_required_for_manifest: true
accept_threshold: 0.91
review_threshold: 0.61
max_llm_tables_per_batch: 12
max_candidates_per_column: 7
profile_sample_rows: 500
validation_concurrency: 2
validation_budget: 0
`);
expect(config.scan.relationships).toEqual({
enabled: false,
llmProposals: false,
validationRequiredForManifest: true,
acceptThreshold: 0.91,
reviewThreshold: 0.61,
maxLlmTablesPerBatch: 12,
maxCandidatesPerColumn: 7,
profileSampleRows: 500,
validationConcurrency: 2,
validationBudget: 0,
});
expect(serializeKloProjectConfig(config)).toContain('enabled: false');
expect(serializeKloProjectConfig(config)).toContain('llmProposals: false');
expect(serializeKloProjectConfig(config)).toContain('validationRequiredForManifest: true');
expect(serializeKloProjectConfig(config)).toContain('acceptThreshold: 0.91');
expect(serializeKloProjectConfig(config)).toContain('reviewThreshold: 0.61');
expect(serializeKloProjectConfig(config)).toContain('maxLlmTablesPerBatch: 12');
expect(serializeKloProjectConfig(config)).toContain('maxCandidatesPerColumn: 7');
expect(serializeKloProjectConfig(config)).toContain('profileSampleRows: 500');
expect(serializeKloProjectConfig(config)).toContain('validationConcurrency: 2');
expect(serializeKloProjectConfig(config)).toContain('validationBudget: 0');
});
it('parses the scan relationship validation budget sentinel', () => {
const config = parseKloProjectConfig(`
project: demo
scan:
relationships:
validation_budget: all
`);
expect(config.scan.relationships.validationBudget).toBe('all');
expect(serializeKloProjectConfig(config)).toContain('validationBudget: all');
});
it('falls back to safe scan relationship defaults for invalid numeric settings', () => {
const config = parseKloProjectConfig(`
project: demo
scan:
relationships:
accept_threshold: 2
review_threshold: -1
max_llm_tables_per_batch: 0
max_candidates_per_column: -4
profile_sample_rows: 0
validation_concurrency: 0
validation_budget: 1.5
`);
expect(config.scan.relationships).toMatchObject({
acceptThreshold: 0.85,
reviewThreshold: 0.55,
maxLlmTablesPerBatch: 40,
maxCandidatesPerColumn: 25,
profileSampleRows: 10000,
validationConcurrency: 4,
});
expect(config.scan.relationships).not.toHaveProperty('validationBudget');
});
it('falls back for invalid scan relationship validation budget strings', () => {
const config = parseKloProjectConfig(`
project: demo
scan:
relationships:
validation_budget: infinite
`);
expect(config.scan.relationships).not.toHaveProperty('validationBudget');
});
it('rejects legacy local LLM and embedding fields', () => {
expect(() =>
parseKloProjectConfig(`
project: demo
ingest:
llm:
backend: anthropic
`),
).toThrow('Unsupported ingest.llm: use top-level llm.provider, llm.models, and ingest.workUnits');
expect(() =>
parseKloProjectConfig(`
project: demo
scan:
enrichment:
backend: gateway
`),
).toThrow('Unsupported scan.enrichment.backend: use scan.enrichment.mode');
expect(() =>
parseKloProjectConfig(`
project: demo
scan:
enrichment:
mode: llm
llm:
backend: gateway
`),
).toThrow('Unsupported scan.enrichment.llm: use top-level llm.provider and llm.models');
expect(() =>
parseKloProjectConfig(`
project: demo
ingest:
embeddings:
provider: gateway
max_batch_size: 32
`),
).toThrow('Unsupported ingest.embeddings.provider');
});
it('rejects gateway embedding configs', () => {
expect(() =>
parseKloProjectConfig(`
project: demo
ingest:
embeddings:
backend: gateway
model: provider/text-embedding
dimensions: 1536
`),
).toThrow('Unsupported ingest.embeddings.backend: gateway');
expect(() =>
parseKloProjectConfig(`
project: demo
scan:
enrichment:
mode: llm
embeddings:
backend: gateway
model: provider/text-embedding
dimensions: 1536
`),
).toThrow('Unsupported scan.enrichment.embeddings.backend: gateway');
});
it('fills optional sections when a minimal config is loaded', () => {
const config = parseKloProjectConfig('project: local\n');
expect(config).toEqual(buildDefaultKloProjectConfig('local'));
expect(config.ingest.embeddings).toEqual({
backend: 'deterministic',
model: 'deterministic',
dimensions: 8,
});
});
it('rejects configs without an object root', () => {
expect(() => parseKloProjectConfig('- nope\n')).toThrow('klo.yaml must contain a YAML object');
});
it('rejects configs with a missing project name', () => {
expect(() => parseKloProjectConfig('connections: {}\n')).toThrow('klo.yaml field "project" is required');
});
});

View file

@ -0,0 +1,551 @@
import type { KloEmbeddingBackend, KloLlmBackend, KloModelRole, KloPromptCacheTtl } from '@klo/llm';
import YAML from 'yaml';
export type KloStorageState = 'postgres' | 'sqlite';
export type KloSearchBackend = 'postgres-hybrid' | 'sqlite-fts5';
type KloLocalLlmBackend = KloLlmBackend | 'none';
type KloLocalEmbeddingBackend = KloEmbeddingBackend | 'none';
type KloScanEnrichmentMode = 'none' | 'deterministic' | 'llm';
interface KloProjectPromptCachingConfig {
enabled?: boolean;
systemTtl?: KloPromptCacheTtl;
toolsTtl?: KloPromptCacheTtl;
historyTtl?: KloPromptCacheTtl;
vertexFallbackTo5m?: boolean;
}
export interface KloProjectLlmProviderConfig {
backend: KloLocalLlmBackend;
vertex?: { project?: string; location: string };
anthropic?: { api_key?: string; base_url?: string };
gateway?: { api_key?: string; base_url?: string };
}
export interface KloProjectLlmConfig {
provider: KloProjectLlmProviderConfig;
models: Partial<Record<KloModelRole, string>> & { default?: string };
promptCaching?: KloProjectPromptCachingConfig;
}
export interface KloProjectEmbeddingConfig {
backend: KloLocalEmbeddingBackend;
model?: string;
dimensions: number;
openai?: { api_key?: string; base_url?: string };
sentenceTransformers?: { base_url: string; pathPrefix?: string };
batchSize?: number;
}
export interface KloScanEnrichmentConfig {
mode: KloScanEnrichmentMode;
embeddings?: KloProjectEmbeddingConfig;
}
export interface KloIngestWorkUnitsConfig {
stepBudget: number;
maxConcurrency: number;
failureMode: 'abort' | 'continue';
}
export interface KloScanRelationshipConfig {
enabled: boolean;
llmProposals: boolean;
validationRequiredForManifest: boolean;
acceptThreshold: number;
reviewThreshold: number;
maxLlmTablesPerBatch: number;
maxCandidatesPerColumn: number;
profileSampleRows: number;
validationConcurrency: number;
validationBudget?: number | 'all';
}
export interface KloProjectScanConfig {
enrichment: KloScanEnrichmentConfig;
relationships: KloScanRelationshipConfig;
}
export interface KloProjectConnectionConfig {
driver: string;
url?: string;
readonly?: boolean;
[key: string]: unknown;
}
export interface KloProjectSetupConfig {
database_connection_ids: string[];
completed_steps: string[];
}
export interface KloProjectConfig {
project: string;
setup?: KloProjectSetupConfig;
connections: Record<string, KloProjectConnectionConfig>;
storage: {
state: KloStorageState;
search: KloSearchBackend;
git: {
auto_commit: boolean;
author: string;
};
};
llm: KloProjectLlmConfig;
ingest: {
adapters: string[];
embeddings: KloProjectEmbeddingConfig;
workUnits: KloIngestWorkUnitsConfig;
};
agent: {
run_research: {
enabled: boolean;
max_iterations: number;
default_toolset: string[];
};
};
memory: {
auto_commit: boolean;
};
scan: KloProjectScanConfig;
}
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === 'object' && value !== null && !Array.isArray(value);
}
function stringArray(value: unknown, fallback: string[]): string[] {
if (!Array.isArray(value)) {
return fallback;
}
return value.filter((item): item is string => typeof item === 'string' && item.length > 0);
}
function booleanValue(value: unknown, fallback: boolean): boolean {
return typeof value === 'boolean' ? value : fallback;
}
function numberValue(value: unknown, fallback: number): number {
return typeof value === 'number' && Number.isFinite(value) ? value : fallback;
}
function stringValue(value: unknown, fallback: string): string {
return typeof value === 'string' && value.trim().length > 0 ? value : fallback;
}
function optionalNonEmptyString(value: unknown): string | undefined {
if (typeof value !== 'string') {
return undefined;
}
const trimmed = value.trim();
return trimmed.length > 0 ? trimmed : undefined;
}
function positiveIntegerConfigValue(value: unknown, fallback: number): number {
if (typeof value !== 'number' || !Number.isInteger(value) || value <= 0) {
return fallback;
}
return value;
}
function validationBudgetConfigValue(value: unknown, fallback: number | 'all' | undefined): number | 'all' | undefined {
if (value === 'all') {
return value;
}
if (typeof value === 'number' && Number.isInteger(value) && value >= 0) {
return value;
}
return fallback;
}
function ratioConfigValue(value: unknown, fallback: number): number {
if (typeof value !== 'number' || !Number.isFinite(value) || value < 0 || value > 1) {
return fallback;
}
return value;
}
function localLlmBackend(value: unknown, fallback: KloLocalLlmBackend, section = 'llm.provider'): KloLocalLlmBackend {
if (value == null) {
return fallback;
}
if (value === 'none' || value === 'anthropic' || value === 'vertex' || value === 'gateway') {
return value;
}
throw new Error(`Unsupported ${section}.backend: ${String(value)}`);
}
function localEmbeddingBackend(
value: unknown,
fallback: KloLocalEmbeddingBackend,
section = 'ingest.embeddings',
): KloLocalEmbeddingBackend {
if (value == null) {
return fallback;
}
if (
value === 'none' ||
value === 'deterministic' ||
value === 'openai' ||
value === 'sentence-transformers'
) {
return value;
}
throw new Error(`Unsupported ${section}.backend: ${String(value)}`);
}
function scanEnrichmentMode(value: unknown, fallback: KloScanEnrichmentMode): KloScanEnrichmentMode {
if (value == null) {
return fallback;
}
if (value === 'none' || value === 'deterministic' || value === 'llm') {
return value;
}
throw new Error(`Unsupported scan.enrichment.mode: ${String(value)}`);
}
function rejectLegacyProvider(section: string, value: unknown): void {
if (value !== undefined) {
throw new Error(`Unsupported ${section}.provider: use ${section}.backend`);
}
}
function optionalStringRecord(value: unknown): Record<string, unknown> {
return isRecord(value) ? value : {};
}
function optionalProviderConfig(value: unknown): { api_key?: string; base_url?: string } | undefined {
if (!isRecord(value)) {
return undefined;
}
const apiKey = optionalNonEmptyString(value.api_key);
const baseUrl = optionalNonEmptyString(value.base_url);
if (!apiKey && !baseUrl) {
return undefined;
}
return {
...(apiKey ? { api_key: apiKey } : {}),
...(baseUrl ? { base_url: baseUrl } : {}),
};
}
function parseModels(value: unknown): KloProjectLlmConfig['models'] {
if (!isRecord(value)) {
return {};
}
const models: KloProjectLlmConfig['models'] = {};
for (const [role, model] of Object.entries(value)) {
const modelName = optionalNonEmptyString(model);
if (modelName) {
models[role as KloModelRole] = modelName;
}
}
return models;
}
function promptCacheTtl(value: unknown): KloPromptCacheTtl | undefined {
return value === '5m' || value === '1h' ? value : undefined;
}
function parsePromptCaching(value: unknown): KloProjectPromptCachingConfig | undefined {
if (!isRecord(value)) {
return undefined;
}
return {
...(typeof value.enabled === 'boolean' ? { enabled: value.enabled } : {}),
...(promptCacheTtl(value.systemTtl) ? { systemTtl: promptCacheTtl(value.systemTtl) } : {}),
...(promptCacheTtl(value.toolsTtl) ? { toolsTtl: promptCacheTtl(value.toolsTtl) } : {}),
...(promptCacheTtl(value.historyTtl) ? { historyTtl: promptCacheTtl(value.historyTtl) } : {}),
...(typeof value.vertexFallbackTo5m === 'boolean' ? { vertexFallbackTo5m: value.vertexFallbackTo5m } : {}),
};
}
function parseProjectLlmProviderConfig(
raw: Record<string, unknown>,
defaults: KloProjectLlmProviderConfig,
section: string,
): KloProjectLlmProviderConfig {
rejectLegacyProvider(section, raw.provider);
const vertex = isRecord(raw.vertex)
? {
...(optionalNonEmptyString(raw.vertex.project) ? { project: optionalNonEmptyString(raw.vertex.project) } : {}),
location: stringValue(raw.vertex.location, ''),
}
: undefined;
const anthropic = optionalProviderConfig(raw.anthropic);
const gateway = optionalProviderConfig(raw.gateway);
return {
backend: localLlmBackend(raw.backend, defaults.backend, section),
...(vertex ? { vertex } : {}),
...(anthropic ? { anthropic } : {}),
...(gateway ? { gateway } : {}),
};
}
function parseProjectLlmConfig(raw: Record<string, unknown>, defaults: KloProjectLlmConfig): KloProjectLlmConfig {
const provider = isRecord(raw.provider) ? raw.provider : {};
return {
provider: parseProjectLlmProviderConfig(provider, defaults.provider, 'llm.provider'),
models: parseModels(raw.models ?? defaults.models),
...(parsePromptCaching(raw.promptCaching) ? { promptCaching: parsePromptCaching(raw.promptCaching) } : {}),
};
}
function parseProjectEmbeddingConfig(
raw: Record<string, unknown>,
defaults: KloProjectEmbeddingConfig,
section: string,
): KloProjectEmbeddingConfig {
rejectLegacyProvider(section, raw.provider);
const openai = optionalProviderConfig(raw.openai);
const sentenceTransformers = isRecord(raw.sentenceTransformers)
? {
base_url: stringValue(raw.sentenceTransformers.base_url, ''),
...(typeof raw.sentenceTransformers.pathPrefix === 'string'
? { pathPrefix: raw.sentenceTransformers.pathPrefix }
: {}),
}
: undefined;
const backend = localEmbeddingBackend(raw.backend, defaults.backend, section);
const model =
optionalNonEmptyString(raw.model) ?? (raw.backend == null && backend !== 'none' ? defaults.model : undefined);
const batchSize = positiveIntegerConfigValue(raw.batchSize, 0);
return {
backend,
...(model ? { model } : {}),
dimensions: positiveIntegerConfigValue(raw.dimensions, defaults.dimensions),
...(openai ? { openai } : {}),
...(sentenceTransformers ? { sentenceTransformers } : {}),
...(batchSize > 0 ? { batchSize } : {}),
};
}
function parseScanRelationshipConfig(
raw: Record<string, unknown>,
defaults: KloScanRelationshipConfig,
): KloScanRelationshipConfig {
const validationBudget = validationBudgetConfigValue(
raw.validation_budget ?? raw.validationBudget,
defaults.validationBudget,
);
return {
enabled: booleanValue(raw.enabled, defaults.enabled),
llmProposals: booleanValue(raw.llm_proposals ?? raw.llmProposals, defaults.llmProposals),
validationRequiredForManifest: booleanValue(
raw.validation_required_for_manifest ?? raw.validationRequiredForManifest,
defaults.validationRequiredForManifest,
),
acceptThreshold: ratioConfigValue(raw.accept_threshold ?? raw.acceptThreshold, defaults.acceptThreshold),
reviewThreshold: ratioConfigValue(raw.review_threshold ?? raw.reviewThreshold, defaults.reviewThreshold),
maxLlmTablesPerBatch: positiveIntegerConfigValue(
raw.max_llm_tables_per_batch ?? raw.maxLlmTablesPerBatch,
defaults.maxLlmTablesPerBatch,
),
maxCandidatesPerColumn: positiveIntegerConfigValue(
raw.max_candidates_per_column ?? raw.maxCandidatesPerColumn,
defaults.maxCandidatesPerColumn,
),
profileSampleRows: positiveIntegerConfigValue(
raw.profile_sample_rows ?? raw.profileSampleRows,
defaults.profileSampleRows,
),
validationConcurrency: positiveIntegerConfigValue(
raw.validation_concurrency ?? raw.validationConcurrency,
defaults.validationConcurrency,
),
...(validationBudget !== undefined ? { validationBudget } : {}),
};
}
function workUnitFailureMode(value: unknown, fallback: 'abort' | 'continue'): 'abort' | 'continue' {
return value === 'abort' || value === 'continue' ? value : fallback;
}
function parseIngestWorkUnitsConfig(
raw: Record<string, unknown>,
defaults: KloIngestWorkUnitsConfig,
): KloIngestWorkUnitsConfig {
return {
stepBudget: positiveIntegerConfigValue(raw.stepBudget, defaults.stepBudget),
maxConcurrency: positiveIntegerConfigValue(raw.maxConcurrency, defaults.maxConcurrency),
failureMode: workUnitFailureMode(raw.failureMode, defaults.failureMode),
};
}
export function buildDefaultKloProjectConfig(projectName = 'klo-project'): KloProjectConfig {
return {
project: projectName,
connections: {},
storage: {
state: 'sqlite',
search: 'sqlite-fts5',
git: {
auto_commit: true,
author: 'klo <klo@example.com>',
},
},
llm: {
provider: {
backend: 'none',
},
models: {},
},
ingest: {
adapters: ['live-database', 'lookml', 'metabase', 'metricflow', 'notion'],
embeddings: {
backend: 'deterministic',
model: 'deterministic',
dimensions: 8,
},
workUnits: {
stepBudget: 40,
maxConcurrency: 1,
failureMode: 'continue',
},
},
agent: {
run_research: {
enabled: false,
max_iterations: 20,
default_toolset: ['sl_query', 'knowledge_search', 'sl_read_source'],
},
},
memory: {
auto_commit: true,
},
scan: {
enrichment: {
mode: 'none',
},
relationships: {
enabled: true,
llmProposals: true,
validationRequiredForManifest: true,
acceptThreshold: 0.85,
reviewThreshold: 0.55,
maxLlmTablesPerBatch: 40,
maxCandidatesPerColumn: 25,
profileSampleRows: 10000,
validationConcurrency: 4,
},
},
};
}
export function parseKloProjectConfig(raw: string): KloProjectConfig {
const parsed = YAML.parse(raw) as unknown;
if (!isRecord(parsed)) {
throw new Error('klo.yaml must contain a YAML object');
}
const project = parsed.project;
if (typeof project !== 'string' || project.trim().length === 0) {
throw new Error('klo.yaml field "project" is required');
}
const defaults = buildDefaultKloProjectConfig(project.trim());
const llm = isRecord(parsed.llm) ? parsed.llm : {};
const storage = isRecord(parsed.storage) ? parsed.storage : {};
const storageGit = isRecord(storage.git) ? storage.git : {};
const setup = isRecord(parsed.setup) ? parsed.setup : undefined;
const ingest = isRecord(parsed.ingest) ? parsed.ingest : {};
const ingestEmbeddings = isRecord(ingest.embeddings) ? ingest.embeddings : {};
const ingestWorkUnits = isRecord(ingest.workUnits) ? ingest.workUnits : {};
const agent = isRecord(parsed.agent) ? parsed.agent : {};
const runResearch = isRecord(agent.run_research) ? agent.run_research : {};
const memory = isRecord(parsed.memory) ? parsed.memory : {};
const scan = isRecord(parsed.scan) ? parsed.scan : {};
const scanEnrichment = isRecord(scan.enrichment) ? scan.enrichment : {};
const scanRelationships = isRecord(scan.relationships) ? scan.relationships : {};
if (isRecord(ingest.llm)) {
throw new Error('Unsupported ingest.llm: use top-level llm.provider, llm.models, and ingest.workUnits');
}
if (scanEnrichment.backend !== undefined) {
throw new Error('Unsupported scan.enrichment.backend: use scan.enrichment.mode');
}
if (isRecord(scanEnrichment.llm)) {
throw new Error('Unsupported scan.enrichment.llm: use top-level llm.provider and llm.models');
}
const parsedLlm = parseProjectLlmConfig(llm, defaults.llm);
const parsedIngestEmbeddings = parseProjectEmbeddingConfig(
ingestEmbeddings,
defaults.ingest.embeddings,
'ingest.embeddings',
);
const parsedIngestWorkUnits = parseIngestWorkUnitsConfig(ingestWorkUnits, defaults.ingest.workUnits);
const scanEmbeddings = parseProjectEmbeddingConfig(
optionalStringRecord(scanEnrichment.embeddings),
defaults.ingest.embeddings,
'scan.enrichment.embeddings',
);
const parsedScanEnrichment: KloScanEnrichmentConfig = {
mode: scanEnrichmentMode(scanEnrichment.mode, defaults.scan.enrichment.mode),
...(isRecord(scanEnrichment.embeddings) ? { embeddings: scanEmbeddings } : {}),
};
const parsedScanRelationships = parseScanRelationshipConfig(scanRelationships, defaults.scan.relationships);
return {
project: project.trim(),
...(setup
? {
setup: {
database_connection_ids: stringArray(setup.database_connection_ids, []),
completed_steps: stringArray(setup.completed_steps, []),
},
}
: {}),
connections: isRecord(parsed.connections)
? (parsed.connections as Record<string, KloProjectConnectionConfig>)
: defaults.connections,
storage: {
state: storage.state === 'sqlite' ? 'sqlite' : defaults.storage.state,
search: storage.search === 'sqlite-fts5' ? 'sqlite-fts5' : defaults.storage.search,
git: {
auto_commit: booleanValue(storageGit.auto_commit, defaults.storage.git.auto_commit),
author: stringValue(storageGit.author, defaults.storage.git.author),
},
},
llm: parsedLlm,
ingest: {
adapters: stringArray(ingest.adapters, defaults.ingest.adapters),
embeddings: parsedIngestEmbeddings,
workUnits: parsedIngestWorkUnits,
},
agent: {
run_research: {
enabled: booleanValue(runResearch.enabled, defaults.agent.run_research.enabled),
max_iterations: numberValue(runResearch.max_iterations, defaults.agent.run_research.max_iterations),
default_toolset: stringArray(runResearch.default_toolset, defaults.agent.run_research.default_toolset),
},
},
memory: {
auto_commit: booleanValue(memory.auto_commit, defaults.memory.auto_commit),
},
scan: {
enrichment: parsedScanEnrichment,
relationships: parsedScanRelationships,
},
};
}
export function serializeKloProjectConfig(config: KloProjectConfig): string {
return `${YAML.stringify(config, { indent: 2, lineWidth: 0 }).trimEnd()}\n`;
}

View file

@ -0,0 +1,33 @@
export type {
KloProjectConfig,
KloProjectConnectionConfig,
KloProjectEmbeddingConfig,
KloProjectLlmConfig,
KloSearchBackend,
KloStorageState,
} from './config.js';
export { buildDefaultKloProjectConfig, parseKloProjectConfig, serializeKloProjectConfig } from './config.js';
export type { LocalGitFileStoreDeps } from './local-git-file-store.js';
export { LocalGitFileStore } from './local-git-file-store.js';
export { kloLocalStateDbPath } from './local-state-db.js';
export type {
ConnectionMappingBootstrap,
LookerMappingBootstrap,
LookmlMappingBootstrap,
MetabaseMappingBootstrap,
} from './mappings-yaml-schema.js';
export {
parseConnectionMappingBootstrap,
parseLookerMappingBootstrap,
parseLookmlMappingBootstrap,
parseMetabaseMappingBootstrap,
} from './mappings-yaml-schema.js';
export type { InitKloProjectOptions, InitKloProjectResult, KloLocalProject, LoadKloProjectOptions } from './project.js';
export { initKloProject, loadKloProject } from './project.js';
export type { KloSetupStep } from './setup-config.js';
export {
KLO_SETUP_STEPS,
markKloSetupStepComplete,
mergeKloSetupGitignoreEntries,
setKloSetupDatabaseConnectionIds,
} from './setup-config.js';

View file

@ -0,0 +1,101 @@
import { mkdtemp, readFile, rm, stat } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { GitService, type KloCoreConfig } from '../core/index.js';
import { LocalGitFileStore } from './local-git-file-store.js';
describe('LocalGitFileStore', () => {
let tempDir: string;
let store: LocalGitFileStore;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'klo-local-store-'));
const coreConfig: KloCoreConfig = {
storage: { configDir: tempDir, homeDir: tempDir },
git: {
userName: 'klo',
userEmail: 'klo@example.com',
bootstrapMessage: 'Initialize test project',
bootstrapAuthor: 'klo',
bootstrapAuthorEmail: 'klo@example.com',
},
};
const git = new GitService(coreConfig);
await git.onModuleInit();
store = new LocalGitFileStore({ rootDir: tempDir, git });
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('writes, commits, and reads a project file', async () => {
const write = await store.writeFile(
'knowledge/global/revenue.md',
'# Revenue\n',
'Agent',
'agent@example.com',
'Add revenue page',
);
expect(write.commitHash).toMatch(/^[0-9a-f]{40}$/);
await expect(readFile(join(tempDir, 'knowledge/global/revenue.md'), 'utf-8')).resolves.toBe('# Revenue\n');
await expect(store.readFile('knowledge/global/revenue.md')).resolves.toMatchObject({
content: '# Revenue\n',
});
});
it('lists files recursively and can strip the requested prefix', async () => {
await store.writeFile('knowledge/global/a.md', 'a', 'Agent', 'agent@example.com', 'Add a');
await store.writeFile('knowledge/global/nested/b.md', 'b', 'Agent', 'agent@example.com', 'Add b');
await expect(store.listFiles('knowledge')).resolves.toEqual({
files: ['knowledge/global/a.md', 'knowledge/global/nested/b.md'],
});
await expect(store.listFiles('knowledge/global', true)).resolves.toEqual({
files: ['a.md', 'nested/b.md'],
});
});
it('deletes and commits an existing file', async () => {
await store.writeFile('semantic-layer/conn/orders.yaml', 'name: orders\n', 'Agent', 'agent@example.com', 'Add SL');
const deleted = await store.deleteFile(
'semantic-layer/conn/orders.yaml',
'Agent',
'agent@example.com',
'Delete SL',
);
expect(deleted?.commitHash).toMatch(/^[0-9a-f]{40}$/);
await expect(stat(join(tempDir, 'semantic-layer/conn/orders.yaml'))).rejects.toThrow();
});
it('returns null when deleting a missing file', async () => {
await expect(store.deleteFile('missing.md', 'Agent', 'agent@example.com', 'Delete missing')).resolves.toBeNull();
});
it('exposes Git history for a file', async () => {
await store.writeFile('knowledge/global/history.md', 'v1', 'Agent', 'agent@example.com', 'Add history');
await store.writeFile('knowledge/global/history.md', 'v2', 'Agent', 'agent@example.com', 'Update history');
const history = await store.getFileHistory('knowledge/global/history.md');
expect(Array.isArray(history)).toBe(true);
expect(history[0]).toMatchObject({ message: 'Update history' });
expect(history[1]).toMatchObject({ message: 'Add history' });
});
it('rejects absolute paths and parent-directory traversal', async () => {
await expect(store.writeFile('/tmp/outside.md', 'bad', 'Agent', 'agent@example.com', 'Bad write')).rejects.toThrow(
'Path must be relative',
);
await expect(store.readFile('../outside.md')).rejects.toThrow('Path escapes the project directory');
});
it('rejects direct .git access', async () => {
await expect(store.readFile('.git/config')).rejects.toThrow('Path cannot access .git');
});
});

View file

@ -0,0 +1,190 @@
import { promises as fs } from 'node:fs';
import { dirname, isAbsolute, join, relative, resolve, sep } from 'node:path';
import type {
GitCommitInfo,
GitService,
KloFileHistoryEntry,
KloFileListResult,
KloFileReadResult,
KloFileStorePort,
KloFileWriteResult,
} from '../core/index.js';
export interface LocalGitFileStoreDeps {
rootDir: string;
git: GitService;
}
function normalizeRelativePath(filePath: string): string {
return filePath.replaceAll('\\', '/').replace(/^\.\/+/, '');
}
function gitInfoToWriteResult(info: GitCommitInfo): KloFileWriteResult {
return {
success: true,
commitHash: info.commitHash,
commitMessage: info.message,
author: info.author,
authorEmail: info.authorEmail,
timestamp: info.timestamp,
created: info.created,
};
}
export class LocalGitFileStore implements KloFileStorePort<LocalGitFileStore> {
private readonly rootDir: string;
private readonly git: GitService;
constructor(deps: LocalGitFileStoreDeps) {
this.rootDir = resolve(deps.rootDir);
this.git = deps.git;
}
forWorktree(workdir: string): LocalGitFileStore {
return new LocalGitFileStore({ rootDir: workdir, git: this.git.forWorktree(workdir) });
}
async writeFile(
path: string,
content: string,
author: string,
authorEmail: string,
commitMessage: string,
options?: { skipLock?: boolean },
): Promise<KloFileWriteResult> {
const relativePath = this.safeRelativePath(path);
const absolutePath = this.absolutePath(relativePath);
await fs.mkdir(dirname(absolutePath), { recursive: true });
await fs.writeFile(absolutePath, content, 'utf-8');
if (options?.skipLock) {
return { success: true, commitHash: null, path: relativePath, operation: 'write' };
}
const info = await this.git.commitFile(relativePath, commitMessage, author, authorEmail);
return { ...gitInfoToWriteResult(info), path: relativePath, operation: 'write' };
}
async readFile(path: string): Promise<KloFileReadResult> {
const relativePath = this.safeRelativePath(path);
const absolutePath = this.absolutePath(relativePath);
const content = await fs.readFile(absolutePath, 'utf-8');
const stats = await fs.stat(absolutePath);
return {
path: relativePath,
content,
size: stats.size,
modifiedAt: stats.mtime.toISOString(),
};
}
async deleteFile(
path: string,
author: string,
authorEmail: string,
commitMessage: string,
options?: { skipLock?: boolean },
): Promise<KloFileWriteResult | null> {
const relativePath = this.safeRelativePath(path);
const absolutePath = this.absolutePath(relativePath);
try {
await fs.access(absolutePath);
} catch {
return null;
}
await fs.unlink(absolutePath);
if (options?.skipLock) {
return { success: true, commitHash: null, path: relativePath, operation: 'delete' };
}
const info = await this.git.deleteFile(relativePath, commitMessage, author, authorEmail);
return { ...gitInfoToWriteResult(info), path: relativePath, operation: 'delete' };
}
async listFiles(path = '', stripPrefix = false): Promise<KloFileListResult> {
const relativePath = path ? this.safeRelativePath(path) : '';
const searchRoot = relativePath ? this.absolutePath(relativePath) : this.rootDir;
let files: string[];
try {
files = await this.walk(searchRoot);
} catch (error) {
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
return { files: [] };
}
throw error;
}
const prefix = relativePath ? `${relativePath}/` : '';
const relativeFiles = files
.map((file) => normalizeRelativePath(relative(this.rootDir, file)))
.filter((file) => !file.startsWith('.git/') && !file.includes('/.git/'))
.filter((file) => !file.startsWith('.klo/cache/'))
.map((file) => (stripPrefix && prefix && file.startsWith(prefix) ? file.slice(prefix.length) : file))
.sort();
return { files: relativeFiles };
}
async getFileHistory(path: string): Promise<KloFileHistoryEntry[]> {
const relativePath = this.safeRelativePath(path);
const history = await this.git.getFileHistory(relativePath);
return history.map((entry) => ({
sha: entry.commitHash,
commitHash: entry.commitHash,
shortHash: entry.shortHash,
message: entry.message,
author: entry.author,
authorEmail: entry.authorEmail,
timestamp: entry.timestamp,
committedDate: entry.committedDate,
created: entry.created,
enhancedMessage: entry.enhancedMessage,
}));
}
private safeRelativePath(path: string): string {
if (path.length === 0) {
return '';
}
if (isAbsolute(path)) {
throw new Error('Path must be relative');
}
const normalized = normalizeRelativePath(path);
if (normalized === '.git' || normalized.startsWith('.git/')) {
throw new Error('Path cannot access .git');
}
const absolute = resolve(this.rootDir, normalized);
if (absolute !== this.rootDir && !absolute.startsWith(`${this.rootDir}${sep}`)) {
throw new Error('Path escapes the project directory');
}
return normalized;
}
private absolutePath(path: string): string {
return path ? join(this.rootDir, path) : this.rootDir;
}
private async walk(dir: string): Promise<string[]> {
const entries = await fs.readdir(dir, { withFileTypes: true });
const files: string[] = [];
for (const entry of entries) {
const absolute = join(dir, entry.name);
if (entry.isDirectory()) {
if (entry.name !== '.git') {
files.push(...(await this.walk(absolute)));
}
} else if (entry.isFile()) {
files.push(absolute);
}
}
return files;
}
}

View file

@ -0,0 +1,6 @@
import { join } from 'node:path';
import type { KloLocalProject } from './project.js';
export function kloLocalStateDbPath(project: Pick<KloLocalProject, 'projectDir'>): string {
return join(project.projectDir, '.klo', 'db.sqlite');
}

View file

@ -0,0 +1,85 @@
import { describe, expect, it } from 'vitest';
import {
parseConnectionMappingBootstrap,
parseLookmlMappingBootstrap,
parseLookerMappingBootstrap,
parseMetabaseMappingBootstrap,
} from './mappings-yaml-schema.js';
describe('klo.yaml mapping bootstrap schema', () => {
it('parses Metabase mapping intent with CLI syncMode default ALL', () => {
const bootstrap = parseMetabaseMappingBootstrap('prod-metabase', {
driver: 'metabase',
mappings: {
databaseMappings: { '1': 'prod-warehouse', '2': null },
syncEnabled: { '1': true, '2': false },
selections: { collections: [12], items: [345] },
defaultTagNames: ['klo', 'prod'],
},
});
expect(bootstrap).toEqual({
adapter: 'metabase',
connectionId: 'prod-metabase',
databaseMappings: { '1': 'prod-warehouse', '2': null },
syncEnabled: { '1': true, '2': false },
syncMode: 'ALL',
selections: { collections: [12], items: [345] },
defaultTagNames: ['klo', 'prod'],
});
});
it('rejects Metabase non-integer mapping keys', () => {
expect(() =>
parseMetabaseMappingBootstrap('prod-metabase', {
driver: 'metabase',
mappings: { databaseMappings: { abc: 'warehouse' } },
}),
).toThrow(/databaseMappings key "abc" must be a positive integer string/);
});
it('parses Looker connection mapping intent', () => {
const bootstrap = parseLookerMappingBootstrap('prod-looker', {
driver: 'looker',
mappings: {
connectionMappings: {
bigquery_prod: 'prod-warehouse',
snowflake_dev: null,
},
},
});
expect(bootstrap).toEqual({
adapter: 'looker',
connectionId: 'prod-looker',
connectionMappings: {
bigquery_prod: 'prod-warehouse',
snowflake_dev: null,
},
});
});
it('parses LookML expected connection from mappings block', () => {
expect(
parseLookmlMappingBootstrap('prod-lookml', {
driver: 'lookml',
repo_url: 'https://github.com/acme/looker.git',
mappings: { expectedLookerConnectionName: 'bigquery_prod' },
}),
).toEqual({
adapter: 'lookml',
connectionId: 'prod-lookml',
expectedLookerConnectionName: 'bigquery_prod',
});
});
it('dispatches by flat driver and returns null for connections with no mappings block', () => {
expect(parseConnectionMappingBootstrap('warehouse', { driver: 'postgres', url: 'env:DATABASE_URL' })).toBeNull();
expect(
parseConnectionMappingBootstrap('prod-looker', {
driver: 'looker',
mappings: { connectionMappings: { analytics: 'prod-warehouse' } },
}),
).toMatchObject({ adapter: 'looker', connectionId: 'prod-looker' });
});
});

View file

@ -0,0 +1,135 @@
import * as z from 'zod';
import type { KloProjectConnectionConfig } from './config.js';
const metabaseSyncModeSchema = z.enum(['ALL', 'ONLY', 'EXCEPT']);
const positiveIntegerValueSchema = z.number().int().positive();
const stringTargetSchema = z.string().min(1).nullable();
const metabaseSelectionsSchema = z
.object({
collections: z.array(positiveIntegerValueSchema).default([]),
items: z.array(positiveIntegerValueSchema).default([]),
});
const metabaseMappingsSchema = z
.object({
databaseMappings: z.record(z.string(), stringTargetSchema).default({}),
syncEnabled: z.record(z.string(), z.boolean()).default({}),
syncMode: metabaseSyncModeSchema.default('ALL'),
selections: metabaseSelectionsSchema.default({ collections: [], items: [] }),
defaultTagNames: z.array(z.string().min(1)).default([]),
});
const lookerMappingsSchema = z
.object({
connectionMappings: z.record(z.string().min(1), stringTargetSchema).default({}),
});
const lookmlMappingsSchema = z
.object({
expectedLookerConnectionName: z.string().min(1).nullable().default(null),
});
export type MetabaseMappingBootstrap = {
adapter: 'metabase';
connectionId: string;
databaseMappings: Record<string, string | null>;
syncEnabled: Record<string, boolean>;
syncMode: z.infer<typeof metabaseSyncModeSchema>;
selections: { collections: number[]; items: number[] };
defaultTagNames: string[];
};
export type LookerMappingBootstrap = {
adapter: 'looker';
connectionId: string;
connectionMappings: Record<string, string | null>;
};
export type LookmlMappingBootstrap = {
adapter: 'lookml';
connectionId: string;
expectedLookerConnectionName: string | null;
};
export type ConnectionMappingBootstrap = MetabaseMappingBootstrap | LookerMappingBootstrap | LookmlMappingBootstrap;
function recordValue(value: unknown): Record<string, unknown> {
return typeof value === 'object' && value !== null && !Array.isArray(value) ? (value as Record<string, unknown>) : {};
}
function assertPositiveIntegerKeys(field: string, record: Record<string, unknown>): void {
for (const key of Object.keys(record)) {
if (!/^[1-9]\d*$/.test(key)) {
throw new Error(`${field} key "${key}" must be a positive integer string`);
}
}
}
function driverOf(connection: KloProjectConnectionConfig): string {
return String(connection.driver ?? '').toLowerCase();
}
export function parseMetabaseMappingBootstrap(
connectionId: string,
connection: KloProjectConnectionConfig,
): MetabaseMappingBootstrap {
const rawMappings = recordValue(connection.mappings);
assertPositiveIntegerKeys('databaseMappings', recordValue(rawMappings.databaseMappings));
assertPositiveIntegerKeys('syncEnabled', recordValue(rawMappings.syncEnabled));
const parsed = metabaseMappingsSchema.parse(rawMappings);
return {
adapter: 'metabase',
connectionId,
databaseMappings: parsed.databaseMappings,
syncEnabled: parsed.syncEnabled,
syncMode: parsed.syncMode,
selections: parsed.selections,
defaultTagNames: parsed.defaultTagNames,
};
}
export function parseLookerMappingBootstrap(
connectionId: string,
connection: KloProjectConnectionConfig,
): LookerMappingBootstrap {
const parsed = lookerMappingsSchema.parse(recordValue(connection.mappings));
return {
adapter: 'looker',
connectionId,
connectionMappings: parsed.connectionMappings,
};
}
export function parseLookmlMappingBootstrap(
connectionId: string,
connection: KloProjectConnectionConfig,
): LookmlMappingBootstrap {
const parsed = lookmlMappingsSchema.parse(recordValue(connection.mappings));
return {
adapter: 'lookml',
connectionId,
expectedLookerConnectionName: parsed.expectedLookerConnectionName,
};
}
export function parseConnectionMappingBootstrap(
connectionId: string,
connection: KloProjectConnectionConfig,
): ConnectionMappingBootstrap | null {
if (!connection.mappings || typeof connection.mappings !== 'object' || Array.isArray(connection.mappings)) {
return null;
}
const driver = driverOf(connection);
if (driver === 'metabase') {
return parseMetabaseMappingBootstrap(connectionId, connection);
}
if (driver === 'looker') {
return parseLookerMappingBootstrap(connectionId, connection);
}
if (driver === 'lookml') {
return parseLookmlMappingBootstrap(connectionId, connection);
}
return null;
}

View file

@ -0,0 +1,78 @@
import { mkdtemp, readFile, rm, stat } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { initKloProject, loadKloProject } from './project.js';
describe('KLO local project runtime', () => {
let tempDir: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'klo-project-runtime-'));
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('initializes the standalone project layout and commits it', async () => {
const projectDir = join(tempDir, 'warehouse');
const result = await initKloProject({
projectDir,
projectName: 'warehouse',
authorName: 'Agent',
authorEmail: 'agent@example.com',
});
expect(result.projectDir).toBe(projectDir);
expect(result.config.project).toBe('warehouse');
expect(result.commitHash).toMatch(/^[0-9a-f]{40}$/);
await expect(readFile(join(projectDir, 'klo.yaml'), 'utf-8')).resolves.toContain('project: warehouse');
const gitignore = await readFile(join(projectDir, '.klo/.gitignore'), 'utf-8');
expect(gitignore).toContain('cache/');
expect(gitignore).toContain('db.sqlite');
expect(gitignore).toContain('secrets/');
expect(gitignore).toContain('setup/');
expect(gitignore).toContain('agents/');
await expect(stat(join(projectDir, 'knowledge/global/.gitkeep'))).resolves.toBeDefined();
await expect(stat(join(projectDir, 'semantic-layer/.gitkeep'))).resolves.toBeDefined();
await expect(stat(join(projectDir, '_schema/.gitkeep'))).rejects.toMatchObject({ code: 'ENOENT' });
await expect(stat(join(projectDir, 'raw-sources/.gitkeep'))).resolves.toBeDefined();
await expect(stat(join(projectDir, '.git'))).resolves.toBeDefined();
});
it('loads an initialized project with a working file store', async () => {
const projectDir = join(tempDir, 'warehouse');
await initKloProject({ projectDir, projectName: 'warehouse' });
const loaded = await loadKloProject({ projectDir });
await loaded.fileStore.writeFile(
'knowledge/global/revenue.md',
'# Revenue\n',
'Agent',
'agent@example.com',
'Add revenue page',
);
expect(loaded.config.project).toBe('warehouse');
await expect(loaded.fileStore.readFile('knowledge/global/revenue.md')).resolves.toMatchObject({
content: '# Revenue\n',
});
});
it('rejects reinitializing an existing project unless force is set', async () => {
const projectDir = join(tempDir, 'warehouse');
await initKloProject({ projectDir, projectName: 'warehouse' });
await expect(initKloProject({ projectDir, projectName: 'warehouse' })).rejects.toThrow(
'Project already contains klo.yaml',
);
await expect(initKloProject({ projectDir, projectName: 'warehouse-v2', force: true })).resolves.toMatchObject({
config: {
project: 'warehouse-v2',
},
});
});
});

View file

@ -0,0 +1,143 @@
import { promises as fs } from 'node:fs';
import { basename, dirname, join, resolve } from 'node:path';
import { GitService, type KloCoreConfig, type KloLogger, noopLogger } from '../core/index.js';
import type { KloProjectConfig } from './config.js';
import { buildDefaultKloProjectConfig, parseKloProjectConfig, serializeKloProjectConfig } from './config.js';
import { LocalGitFileStore } from './local-git-file-store.js';
export interface InitKloProjectOptions {
projectDir: string;
projectName?: string;
force?: boolean;
authorName?: string;
authorEmail?: string;
logger?: KloLogger;
}
export interface LoadKloProjectOptions {
projectDir: string;
authorName?: string;
authorEmail?: string;
logger?: KloLogger;
}
export interface KloLocalProject {
projectDir: string;
configPath: string;
config: KloProjectConfig;
coreConfig: KloCoreConfig;
git: GitService;
fileStore: LocalGitFileStore;
}
export interface InitKloProjectResult extends KloLocalProject {
commitHash: string | null;
}
const TRACKED_SCAFFOLD_FILES: Array<{ path: string; content: string }> = [
{ path: '.klo/.gitignore', content: 'cache/\ndb.sqlite\nsecrets/\nsetup/\nagents/\n' },
{ path: '.klo/prompts/.gitkeep', content: '' },
{ path: '.klo/skills/.gitkeep', content: '' },
{ path: 'knowledge/global/.gitkeep', content: '' },
{ path: 'semantic-layer/.gitkeep', content: '' },
{ path: 'raw-sources/.gitkeep', content: '' },
];
function createCoreConfig(projectDir: string, authorName: string, authorEmail: string): KloCoreConfig {
return {
storage: {
configDir: projectDir,
homeDir: dirname(projectDir),
worktreesDir: join(projectDir, '.klo/worktrees'),
},
git: {
userName: authorName,
userEmail: authorEmail,
bootstrapMessage: 'Initialize klo project repository',
bootstrapAuthor: authorName,
bootstrapAuthorEmail: authorEmail,
},
};
}
async function fileExists(path: string): Promise<boolean> {
try {
await fs.access(path);
return true;
} catch {
return false;
}
}
async function writeProjectFile(projectDir: string, relativePath: string, content: string): Promise<void> {
const absolutePath = join(projectDir, relativePath);
await fs.mkdir(dirname(absolutePath), { recursive: true });
await fs.writeFile(absolutePath, content, 'utf-8');
}
async function createRuntime(
projectDir: string,
config: KloProjectConfig,
authorName: string,
authorEmail: string,
logger: KloLogger,
): Promise<KloLocalProject> {
const coreConfig = createCoreConfig(projectDir, authorName, authorEmail);
const git = new GitService(coreConfig, logger);
await git.onModuleInit();
return {
projectDir,
configPath: join(projectDir, 'klo.yaml'),
config,
coreConfig,
git,
fileStore: new LocalGitFileStore({ rootDir: projectDir, git }),
};
}
export async function initKloProject(options: InitKloProjectOptions): Promise<InitKloProjectResult> {
const projectDir = resolve(options.projectDir);
const projectName = options.projectName?.trim() || basename(projectDir) || 'klo-project';
const authorName = options.authorName ?? 'klo';
const authorEmail = options.authorEmail ?? 'klo@example.com';
const logger = options.logger ?? noopLogger;
const configPath = join(projectDir, 'klo.yaml');
await fs.mkdir(projectDir, { recursive: true });
if (!options.force && (await fileExists(configPath))) {
throw new Error(`Project already contains klo.yaml: ${configPath}`);
}
const config = buildDefaultKloProjectConfig(projectName);
const runtime = await createRuntime(projectDir, config, authorName, authorEmail, logger);
await writeProjectFile(projectDir, 'klo.yaml', serializeKloProjectConfig(config));
await fs.mkdir(join(projectDir, '.klo/cache'), { recursive: true });
for (const file of TRACKED_SCAFFOLD_FILES) {
await writeProjectFile(projectDir, file.path, file.content);
}
const commit = await runtime.git.commitFiles(
['klo.yaml', ...TRACKED_SCAFFOLD_FILES.map((file) => file.path)],
`Initialize KLO project: ${projectName}`,
authorName,
authorEmail,
);
return {
...runtime,
commitHash: commit.commitHash,
};
}
export async function loadKloProject(options: LoadKloProjectOptions): Promise<KloLocalProject> {
const projectDir = resolve(options.projectDir);
const authorName = options.authorName ?? 'klo';
const authorEmail = options.authorEmail ?? 'klo@example.com';
const logger = options.logger ?? noopLogger;
const configPath = join(projectDir, 'klo.yaml');
const raw = await fs.readFile(configPath, 'utf-8');
const config = parseKloProjectConfig(raw);
return createRuntime(projectDir, config, authorName, authorEmail, logger);
}

View file

@ -0,0 +1,76 @@
import { describe, expect, it } from 'vitest';
import { buildDefaultKloProjectConfig } from './config.js';
import {
markKloSetupStepComplete,
mergeKloSetupGitignoreEntries,
setKloSetupDatabaseConnectionIds,
} from './setup-config.js';
describe('KLO setup config helpers', () => {
it('marks setup steps complete without duplicating existing state', () => {
const config = buildDefaultKloProjectConfig('warehouse');
const withProject = markKloSetupStepComplete(config, 'project');
const withProjectAgain = markKloSetupStepComplete(withProject, 'project');
const withLlm = markKloSetupStepComplete(withProjectAgain, 'llm');
const withContext = markKloSetupStepComplete(withLlm, 'context');
expect(withProject.setup).toEqual({
database_connection_ids: [],
completed_steps: ['project'],
});
expect(withProjectAgain.setup?.completed_steps).toEqual(['project']);
expect(withLlm.setup?.completed_steps).toEqual(['project', 'llm']);
expect(withContext.setup?.completed_steps).toEqual(['project', 'llm', 'context']);
expect(config.setup).toBeUndefined();
});
it('preserves database connection ids while marking a step complete', () => {
const config = {
...buildDefaultKloProjectConfig('warehouse'),
setup: {
database_connection_ids: ['warehouse'],
completed_steps: ['databases'],
},
};
expect(markKloSetupStepComplete(config, 'project').setup).toEqual({
database_connection_ids: ['warehouse'],
completed_steps: ['databases', 'project'],
});
});
it('sets setup database connection ids without duplicates', () => {
const config = buildDefaultKloProjectConfig('warehouse');
const withDatabases = setKloSetupDatabaseConnectionIds(config, ['warehouse', 'analytics', 'warehouse']);
expect(withDatabases.setup).toEqual({
database_connection_ids: ['warehouse', 'analytics'],
completed_steps: [],
});
expect(config.setup).toBeUndefined();
});
it('marks databases complete only when requested', () => {
const config = markKloSetupStepComplete(buildDefaultKloProjectConfig('warehouse'), 'project');
const withDatabases = setKloSetupDatabaseConnectionIds(config, ['warehouse'], { complete: true });
const withDatabasesAgain = setKloSetupDatabaseConnectionIds(withDatabases, ['warehouse'], { complete: true });
expect(withDatabases.setup).toEqual({
database_connection_ids: ['warehouse'],
completed_steps: ['project', 'databases'],
});
expect(withDatabasesAgain.setup).toEqual(withDatabases.setup);
});
it('merges setup-local gitignore entries without removing existing lines', () => {
expect(mergeKloSetupGitignoreEntries('cache/\ndb.sqlite\n')).toBe(
['cache/', 'db.sqlite', 'secrets/', 'setup/', 'agents/', ''].join('\n'),
);
expect(mergeKloSetupGitignoreEntries('cache/\nsecrets/\n')).toBe(
['cache/', 'secrets/', 'setup/', 'agents/', ''].join('\n'),
);
});
});

View file

@ -0,0 +1,55 @@
import type { KloProjectConfig } from './config.js';
export const KLO_SETUP_STEPS = ['project', 'llm', 'embeddings', 'databases', 'sources', 'context', 'agents'] as const;
export type KloSetupStep = (typeof KLO_SETUP_STEPS)[number];
const SETUP_GITIGNORE_ENTRIES = ['secrets/', 'setup/', 'agents/'] as const;
export function markKloSetupStepComplete(config: KloProjectConfig, step: KloSetupStep): KloProjectConfig {
const databaseConnectionIds = config.setup?.database_connection_ids ?? [];
const completedSteps = config.setup?.completed_steps ?? [];
return {
...config,
setup: {
database_connection_ids: [...databaseConnectionIds],
completed_steps: completedSteps.includes(step) ? [...completedSteps] : [...completedSteps, step],
},
};
}
export function setKloSetupDatabaseConnectionIds(
config: KloProjectConfig,
connectionIds: string[],
options: { complete?: boolean } = {},
): KloProjectConfig {
const uniqueConnectionIds = [...new Set(connectionIds.filter((connectionId) => connectionId.trim().length > 0))];
const completedSteps = config.setup?.completed_steps ?? [];
const nextCompletedSteps =
options.complete === true && !completedSteps.includes('databases')
? [...completedSteps, 'databases']
: [...completedSteps];
return {
...config,
setup: {
database_connection_ids: uniqueConnectionIds,
completed_steps: nextCompletedSteps,
},
};
}
export function mergeKloSetupGitignoreEntries(content: string): string {
const lines = content
.split(/\r?\n/)
.map((line) => line.trimEnd())
.filter((line, index, all) => line.length > 0 || index < all.length - 1);
const existing = new Set(lines);
for (const entry of SETUP_GITIGNORE_ENTRIES) {
if (!existing.has(entry)) {
lines.push(entry);
existing.add(entry);
}
}
return `${lines.join('\n')}\n`;
}