mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-16 08:25:14 +02:00
* refactor(workspace): relocate @ktx/llm source into packages/cli/src/llm * refactor(workspace): rewrite @ktx/llm imports to relative paths * refactor(workspace): fold internal packages into cli * chore(workspace): gate dead-code with knip production mode Turn on production-mode knip plus an autofix run in pre-commit and the `pnpm dead-code` script, document the `/** @internal */` convention for test-only exports in AGENTS.md, annotate test-only exports across the CLI with that JSDoc, and drop dead exports/wrappers the new gate surfaced (e.g. `cli-project.ts`, `lookerRuntimeSourceToFileAdapterSource`, `createLocalScanEnrichmentProvidersFromConfig`, `PGLITE_OWNER_PROCESS_BACKEND_CAPABILITIES`, stale type re-exports). Replace the loose `ignoreIssues` allowlist in `knip.json` with explicit production entries so cross-package barrel leaks are caught. * refactor(cli): delete internal barrel index.ts files The 34 `index.ts` re-export barrels inside `packages/cli/src/` were holdovers from the pre-fold multi-workspace structure. Post-fold-in they served no production purpose: external consumers go through the single package main entry, and in-repo callers mostly imported through them only because the path was short. Internally, knip flagged most barrel re-exports as production-dead (only reached via tests). This change: - Deletes every internal barrel except `packages/cli/src/index.ts` (the published package entry). - Rewrites ~270 source/test files to import each name directly from the file that defines it. - Moves `tools/warehouse-verification/index.ts` to `create-warehouse-verification-tools.ts` (the function it defined locally) and updates its single consumer. - Renames `search/backend-conformance.ts` → `.test-utils.ts` to match the existing test-helper file convention. - Deletes 13 dead test-only chains (dbt-descriptions/*, live-database/extracted-schema, live-database/structural-sync, relationship-* feedback/review chain) plus their tests and a cascading orphan integration test. - Updates test mocks that pointed at deleted barrel paths (notion-client, connector barrels in scan/local-scan-connectors tests) to mock the source files instead. - Points the maintainer benchmark script (`scripts/relationship-benchmark-report.mjs`) at source files instead of `dist/context/scan/index.js`. - Drops the barrel `!` entries from `knip.json`; adds explicit production entries only for the benchmark code reached via dist by the maintainer script. Net: 413 files changed, ~1.2k insertions, ~9.4k deletions. `pnpm run dead-code` (Biome + knip default + knip production) and `pnpm run type-check` are clean; 2277 tests pass. * refactor(workspace): rename @ktx/cli to @kaelio/ktx and pack it directly Promote the CLI workspace package to the public name `@kaelio/ktx` and drop the separate `scripts/build-public-npm-package.mjs` wrapper. The CLI package is now publishable in place (`publishConfig.access: public`, `provenance: true`), so artifact packing uses `pnpm pack` against `packages/cli/` instead of assembling a parallel package tree. Updates all workspace filter invocations, docs, tests, and release readiness checks to reference the new package name, and folds the tarball-name helper into `scripts/public-npm-release-metadata.mjs`. * docs: align "agent clients" and "data agents" terminology Replace "client agents" with "agent clients" and "database agents" with "data agents" across AGENTS.md, README.md, the docs-site copy, and the matching setup-agents test description, matching the canonical vocabulary in docs/terminology.md. Also moves packages/cli/tsconfig.json's tsBuildInfoFile from node_modules/.cache/ to dist/.tsbuildinfo so incremental builds survive node_modules reinstalls. * refactor(release): single source of truth for package version Make packages/cli/package.json the single source of truth for the @kaelio/ktx version. publicNpmPackageVersion() now reads it directly, so artifact filenames, release-readiness checks, and the Python wheel version all derive from one field. The duplicate release-policy.json.publicNpmPackageVersion is removed. Previously the two fields could drift: tarballs were named kaelio-ktx-0.4.1.tgz while internally containing @kaelio/ktx@0.0.0-private. - update-public-release-version.mjs rewrites both Python pyproject.toml files (ktx-daemon, ktx-sl) alongside the npm package.jsons, normalizing the version for PEP 440 (e.g. 0.1.0-rc.2 -> 0.1.0rc2). - semantic-release-config.cjs adds the two pyproject.toml files to @semantic-release/git assets so the release commit back to main carries every version source in lockstep. - The six "?? '0.0.0-private'" fallback literals across the CLI are replaced with "?? getKtxCliPackageInfo().version", and createDefaultKtxMcpServer makes its version arg required. - docs/release.md describes the actual commit-back model: the dev tree always reflects the most recent release; no sentinel pin to maintain. Verified: pnpm run artifacts:build now produces kaelio-ktx-0.4.1.tgz and kaelio_ktx-0.4.1-py3-none-any.whl with @kaelio/ktx@0.4.1 inside. Full type-check, dead-code, and 2287 vitests + 173 script tests pass. * refactor(cli): inject embedding provider resolution and detect sentence-transformers runtime Make resolveProjectEmbeddingProvider and runtimeIo injectable in ingest and scan command entrypoints so tests can stub them, and teach resolvePublicIngestRuntimeRequirements to flag the local-embeddings runtime feature when ktx.yaml selects sentence-transformers. * chore(cli): mark buildLocalStatsStatus and LocalStatsStatus as @internal Both symbols are consumed only by status-project.test.ts. Annotating with /** @internal */ keeps knip's production-mode check clean without changing runtime behavior. * fix(cli): use real package metadata in print-command-tree The stubbed package name embedded a forbidden product identifier that tripped the boundary check in CI. Read the metadata from package.json instead — keeps the rendered tree unchanged and removes a duplicate source of truth. * feat(cli): show embedding coverage in `ktx status`, drop duplicate disk counts Inline `(N embedded)` next to the Wiki scope counts and Semantic-layer source counts, computed with `SUM(embedding_json IS NOT NULL)` over `knowledge_pages` and `local_sl_sources`. Rename the "Knowledge" label to "Wiki" (canonical per `docs/terminology.md`) and rename the matching `localStats.knowledgePages` field to `localStats.wikiPages`. Drop `wiki=N md` and `semantic-layer=N yaml` from the Disk row — those duplicated the per-surface rows above. Disk now reports only actual byte usage (db, cache, raw-sources). The unused `wikiGlobalMarkdownCount` / `semanticLayerYamlCount` fields, the `isMarkdownEntry` / `isYamlEntry` helpers, and the `filter` arg on `summarizeDir` are removed.
189 lines
5.8 KiB
TypeScript
189 lines
5.8 KiB
TypeScript
import * as z from 'zod';
|
|
import type { MemoryFlowReplayInput } from './types.js';
|
|
|
|
const memoryFlowRunStatusSchema = z.enum(['running', 'done', 'error']);
|
|
|
|
const memoryFlowEventTimestampShape = {
|
|
emittedAt: z.string().datetime().optional(),
|
|
};
|
|
|
|
function eventSchema<T extends z.ZodRawShape>(shape: T): z.ZodObject<T & typeof memoryFlowEventTimestampShape> {
|
|
return z.object({ ...shape, ...memoryFlowEventTimestampShape });
|
|
}
|
|
|
|
const memoryFlowReplayMetadataSchema = z.object({
|
|
schemaVersion: z.literal(1),
|
|
mode: z.enum(['full', 'deterministic', 'replay', 'seeded']),
|
|
origin: z.enum(['captured', 'packaged', 'synthetic-report']),
|
|
timing: z.enum(['captured', 'synthetic', 'not-captured', 'prebuilt']),
|
|
capturedAt: z.string().datetime().nullable(),
|
|
sourceReportId: z.string().min(1).nullable(),
|
|
sourceReportPath: z.string().min(1).nullable(),
|
|
fallbackReason: z.string().min(1).nullable(),
|
|
});
|
|
|
|
const memoryFlowEventSchema = z.discriminatedUnion('type', [
|
|
eventSchema({
|
|
type: z.literal('source_acquired'),
|
|
adapter: z.string().min(1),
|
|
trigger: z.string().min(1),
|
|
fileCount: z.number().int().min(0),
|
|
}),
|
|
eventSchema({ type: z.literal('scope_detected'), fingerprint: z.string().nullable() }),
|
|
eventSchema({
|
|
type: z.literal('raw_snapshot_written'),
|
|
syncId: z.string().min(1),
|
|
rawFileCount: z.number().int().min(0),
|
|
}),
|
|
eventSchema({
|
|
type: z.literal('diff_computed'),
|
|
added: z.number().int().min(0),
|
|
modified: z.number().int().min(0),
|
|
deleted: z.number().int().min(0),
|
|
unchanged: z.number().int().min(0),
|
|
}),
|
|
eventSchema({
|
|
type: z.literal('chunks_planned'),
|
|
chunkCount: z.number().int().min(0),
|
|
workUnitCount: z.number().int().min(0),
|
|
evictionCount: z.number().int().min(0),
|
|
}),
|
|
eventSchema({
|
|
type: z.literal('stage_skipped'),
|
|
stage: z.enum(['source', 'chunks', 'workUnits', 'actions', 'gates', 'saved']),
|
|
reason: z.string().min(1),
|
|
}),
|
|
eventSchema({
|
|
type: z.literal('stage_progress'),
|
|
stage: z.enum([
|
|
'source',
|
|
'integration',
|
|
'reconciliation',
|
|
'finalization',
|
|
'wiki_sl_ref_repair',
|
|
'final_gates',
|
|
'save',
|
|
'provenance',
|
|
'report',
|
|
]),
|
|
percent: z.number().min(0).max(100),
|
|
message: z.string().min(1),
|
|
transient: z.boolean().optional(),
|
|
}),
|
|
eventSchema({
|
|
type: z.literal('work_unit_started'),
|
|
unitKey: z.string().min(1),
|
|
skills: z.array(z.string().min(1)),
|
|
stepBudget: z.number().int().min(0),
|
|
}),
|
|
eventSchema({
|
|
type: z.literal('work_unit_step'),
|
|
unitKey: z.string().min(1),
|
|
stepIndex: z.number().int().min(0),
|
|
stepBudget: z.number().int().min(0),
|
|
}),
|
|
eventSchema({
|
|
type: z.literal('candidate_action'),
|
|
unitKey: z.string().min(1),
|
|
target: z.enum(['wiki', 'sl']),
|
|
action: z.string().min(1),
|
|
key: z.string().min(1),
|
|
}),
|
|
eventSchema({
|
|
type: z.literal('work_unit_finished'),
|
|
unitKey: z.string().min(1),
|
|
status: z.enum(['success', 'failed']),
|
|
reason: z.string().optional(),
|
|
}),
|
|
eventSchema({
|
|
type: z.literal('reconciliation_finished'),
|
|
conflictCount: z.number().int().min(0),
|
|
fallbackCount: z.number().int().min(0),
|
|
}),
|
|
eventSchema({
|
|
type: z.literal('saved'),
|
|
commitSha: z.string().nullable(),
|
|
wikiCount: z.number().int().min(0),
|
|
slCount: z.number().int().min(0),
|
|
}),
|
|
eventSchema({ type: z.literal('provenance_recorded'), rowCount: z.number().int().min(0) }),
|
|
eventSchema({
|
|
type: z.literal('report_created'),
|
|
runId: z.string().min(1),
|
|
reportPath: z.string().min(1).optional(),
|
|
}),
|
|
]);
|
|
|
|
const memoryFlowPlannedWorkUnitSchema = z.object({
|
|
unitKey: z.string().min(1),
|
|
rawFiles: z.array(z.string()),
|
|
peerFileCount: z.number().int().min(0),
|
|
dependencyCount: z.number().int().min(0),
|
|
});
|
|
|
|
const memoryFlowActionDetailSchema = z.object({
|
|
unitKey: z.string().min(1),
|
|
target: z.enum(['wiki', 'sl']),
|
|
action: z.enum(['created', 'updated', 'removed']),
|
|
key: z.string().min(1),
|
|
summary: z.string(),
|
|
rawFiles: z.array(z.string()),
|
|
status: z.enum(['success', 'failed']),
|
|
});
|
|
|
|
const memoryFlowProvenanceDetailSchema = z.object({
|
|
rawPath: z.string(),
|
|
artifactKind: z.enum(['sl', 'wiki']).nullable(),
|
|
artifactKey: z.string().nullable(),
|
|
actionType: z.string().min(1),
|
|
});
|
|
|
|
const memoryFlowTranscriptDetailSchema = z.object({
|
|
unitKey: z.string().min(1),
|
|
path: z.string().min(1),
|
|
toolCallCount: z.number().int().min(0),
|
|
errorCount: z.number().int().min(0),
|
|
toolNames: z.array(z.string()),
|
|
});
|
|
|
|
const memoryFlowDetailSectionsSchema = z.object({
|
|
actions: z.array(memoryFlowActionDetailSchema),
|
|
provenance: z.array(memoryFlowProvenanceDetailSchema),
|
|
transcripts: z.array(memoryFlowTranscriptDetailSchema),
|
|
});
|
|
|
|
export const memoryFlowReplayInputSchema: z.ZodType<MemoryFlowReplayInput> = z.object({
|
|
metadata: memoryFlowReplayMetadataSchema.optional(),
|
|
runId: z.string().min(1),
|
|
connectionId: z.string().min(1),
|
|
adapter: z.string().min(1),
|
|
status: memoryFlowRunStatusSchema,
|
|
sourceDir: z.string().nullable(),
|
|
syncId: z.string().min(1),
|
|
reportId: z.string().min(1).optional(),
|
|
reportPath: z.string().min(1).optional(),
|
|
errors: z.array(z.string()),
|
|
events: z.array(memoryFlowEventSchema),
|
|
plannedWorkUnits: z.array(memoryFlowPlannedWorkUnitSchema),
|
|
details: memoryFlowDetailSectionsSchema,
|
|
});
|
|
|
|
/** @internal */
|
|
export const memoryFlowStreamEventSchema = z.discriminatedUnion('type', [
|
|
z.object({ type: z.literal('snapshot'), snapshot: memoryFlowReplayInputSchema }),
|
|
z.object({
|
|
type: z.literal('closed'),
|
|
status: memoryFlowRunStatusSchema,
|
|
errors: z.array(z.string()),
|
|
}),
|
|
]);
|
|
|
|
|
|
/** @internal */
|
|
export function parseMemoryFlowReplayInput(value: unknown): MemoryFlowReplayInput {
|
|
const result = memoryFlowReplayInputSchema.safeParse(value);
|
|
if (!result.success) {
|
|
throw new Error(`Invalid memory-flow replay input: ${z.prettifyError(result.error)}`);
|
|
}
|
|
return result.data;
|
|
}
|