diff --git a/apps/x/packages/core/src/agent-loop/agent-loop.test.ts b/apps/x/packages/core/src/agent-loop/agent-loop.test.ts index fca16bb4..ba3f22de 100644 --- a/apps/x/packages/core/src/agent-loop/agent-loop.test.ts +++ b/apps/x/packages/core/src/agent-loop/agent-loop.test.ts @@ -56,11 +56,13 @@ type ModelStep = class FakeModelAdapter implements ModelAdapter { calls = 0; + lastSystem: string | null = null; constructor(private steps: ModelStep[]) {} stream(req: ModelStreamRequest): EventStream { this.calls++; + this.lastSystem = req.system; const out = new EventStream(); const step = this.steps.shift(); void (async () => { @@ -110,7 +112,7 @@ class FakeToolRunner implements ToolRunner { private behaviors: Record) => ToolRunResult> = {}, ) {} - definitions() { + async definitions() { return []; } @@ -180,6 +182,7 @@ function makeLoop(opts: { runner?: FakeToolRunner; gate?: FakePermissionGate; store?: TurnStore; + systemComposer?: { system(): Promise }; maxIterations?: number; } = {}) { const store = opts.store ?? new InMemoryTurnStore(); @@ -191,6 +194,7 @@ function makeLoop(opts: { modelAdapter: adapter, toolRunner: runner, permissionGate: gate, + ...(opts.systemComposer ? { systemComposer: opts.systemComposer } : {}), ...(opts.maxIterations !== undefined ? { maxIterations: opts.maxIterations } : {}), }); return { loop, store, adapter, runner, gate }; @@ -207,8 +211,11 @@ function emptyTurn( provider: null, model: null, permissionMode: "manual", + useCase: null, + subUseCase: null, sessionId: null, sessionSeq: null, + composeContext: null, messages: [], permissionRequests: [], permissionDecisions: [], @@ -795,6 +802,23 @@ describe("AgentLoopImpl", () => { expect(totalUsage(turn).totalTokens).toBeNull(); }); + it("passes the composed system prompt to the model adapter", async () => { + const { loop, adapter } = makeLoop({ + steps: [{ kind: "message", message: assistantText("ok") }], + systemComposer: { async system() { return "SYSTEM PROMPT HERE"; } }, + }); + await (await loop.createTurn({ messages: [userMsg("go")] })).result; + expect(adapter.lastSystem).toBe("SYSTEM PROMPT HERE"); + }); + + it("defaults to no system prompt when no composer is configured", async () => { + const { loop, adapter } = makeLoop({ + steps: [{ kind: "message", message: assistantText("ok") }], + }); + await (await loop.createTurn({ messages: [userMsg("go")] })).result; + expect(adapter.lastSystem).toBeNull(); + }); + it("getTurn returns the persisted turn; unknown ids reject", async () => { const { loop } = makeLoop({ steps: [{ kind: "message", message: assistantText("hi") }] }); const created = await (await loop.createTurn({ messages: [userMsg("hello")] })).result; diff --git a/apps/x/packages/core/src/agent-loop/agent-loop.ts b/apps/x/packages/core/src/agent-loop/agent-loop.ts index 283aec23..2815bd53 100644 --- a/apps/x/packages/core/src/agent-loop/agent-loop.ts +++ b/apps/x/packages/core/src/agent-loop/agent-loop.ts @@ -1,10 +1,13 @@ import crypto from "node:crypto"; import { z } from "zod"; import { ToolCallPart, ToolMessage } from "@x/shared/dist/message.js"; +import { enterUseCase, type UseCase } from "../analytics/use_case.js"; import { EventStream } from "./event-stream.js"; import type { ModelAdapter } from "./model-adapter.js"; import { KeyedMutex } from "./mutex.js"; import type { PermissionGate } from "./permission-gate.js"; +import { NullSystemComposer, type SystemComposer } from "./system-composer.js"; +import { NullTurnObserver, type TurnEventMeta, type TurnObserver } from "./turn-observer.js"; import type { ToolRunner, ToolRunResult } from "./tool-runner.js"; import type { TurnStore } from "./turn-store.js"; import { @@ -72,6 +75,8 @@ export class AgentLoopImpl implements AgentLoop { private modelAdapter: ModelAdapter; private toolRunner: ToolRunner; private permissionGate: PermissionGate; + private systemComposer: SystemComposer; + private observer: TurnObserver; private maxIterations: number; private mutex = new KeyedMutex(); // All not-yet-finished entries per turn (running AND queued behind the @@ -83,12 +88,16 @@ export class AgentLoopImpl implements AgentLoop { modelAdapter: ModelAdapter; toolRunner: ToolRunner; permissionGate: PermissionGate; + systemComposer?: SystemComposer; + observer?: TurnObserver; maxIterations?: number; }) { this.store = deps.store; this.modelAdapter = deps.modelAdapter; this.toolRunner = deps.toolRunner; this.permissionGate = deps.permissionGate; + this.systemComposer = deps.systemComposer ?? new NullSystemComposer(); + this.observer = deps.observer ?? new NullTurnObserver(); this.maxIterations = deps.maxIterations ?? DEFAULT_MAX_ITERATIONS; } @@ -102,14 +111,17 @@ export class AgentLoopImpl implements AgentLoop { // Between this write and enter() below the turn is store-visible but // not yet stoppable; acceptable while turn ids only reach callers via // the returned handle, not via store polling. - await this.store.create({ + const turn: z.infer = { id: turnId, agentId: parsed.agentId ?? null, provider: parsed.provider ?? null, model: parsed.model ?? null, permissionMode: parsed.permissionMode ?? "manual", + useCase: parsed.useCase ?? null, + subUseCase: parsed.subUseCase ?? null, sessionId: parsed.sessionId ?? null, sessionSeq: parsed.sessionSeq ?? null, + composeContext: parsed.composeContext ?? null, messages: parsed.messages, permissionRequests: [], permissionDecisions: [], @@ -120,7 +132,13 @@ export class AgentLoopImpl implements AgentLoop { completedAt: null, createdAt: now, updatedAt: now, - }); + }; + await this.store.create(turn); + // The created turn is a committed fact — surface it to observers (the + // bus) immediately so the session UI shows the new turn and its user + // message right away, and a per-turn consumer can attach before the + // first model step streams. + this.observer.onState(turn); return this.enter(turnId, async () => {}); } @@ -243,9 +261,22 @@ export class AgentLoopImpl implements AgentLoop { if (signal.aborted) return; const turn = await this.mustGet(turnId); + // Install the turn's use-case for this async chain so nested LLM + // calls (the permission classifier, builtin tools that call a model) + // inherit it for analytics — parity with the old runtime's + // enterUseCase. Set once per advance() entry. + if (iteration === 0 && turn.useCase) { + enterUseCase({ + useCase: turn.useCase as UseCase, + ...(turn.subUseCase ? { subUseCase: turn.subUseCase } : {}), + ...(turn.agentId ? { agentName: turn.agentId } : {}), + }); + } + // 1. terminal states if (turn.error !== null || turn.completedAt !== null) return; + const meta: TurnEventMeta = { turnId, sessionId: turn.sessionId }; const unresolved = unresolvedToolCalls(turn); const stateOf = new Map(unresolved.map((call) => [ call.toolCallId, @@ -267,7 +298,7 @@ export class AgentLoopImpl implements AgentLoop { for (const call of needsClassifier) { const request = turn.permissionRequests .find((r) => r.toolCallId === call.toolCallId)?.request; - const verdict = await this.permissionGate.classify(call, request); + const verdict = await this.permissionGate.classify(call, request, turn); turn.permissionDecisions.push({ toolCallId: call.toolCallId, decidedBy: "classifier", @@ -298,7 +329,7 @@ export class AgentLoopImpl implements AgentLoop { if (unevaluated.length > 0) { const requested: string[] = []; for (const call of unevaluated) { - const check = await this.permissionGate.check(call); + const check = await this.permissionGate.check(call, turn); if (check.required) { turn.permissionRequests.push({ toolCallId: call.toolCallId, @@ -313,7 +344,7 @@ export class AgentLoopImpl implements AgentLoop { if (requested.length > 0) { await this.persist(turn); for (const toolCallId of requested) { - stream.push({ type: "permission-requested", toolCallId }); + this.emit(stream, meta, { type: "permission-requested", toolCallId }); } continue; // re-derive: waiting (manual) or classifier (auto) } @@ -346,13 +377,19 @@ export class AgentLoopImpl implements AgentLoop { startedAt: nowIso(), }); await this.persist(turn); - stream.push({ type: "tool-execution-start", toolCallId: call.toolCallId }); + this.emit(stream, meta, { type: "tool-execution-start", toolCallId: call.toolCallId }); // Tool failures are conversational: a throwing runner // becomes an error ToolMessage the model can react to, // never a terminal turn error. Aborts still propagate. const outcome = await this.toolRunner - .run(call, { turnId, signal }) + .run(call, { + turnId, + agentId: turn.agentId, + codeMode: turn.composeContext?.codeMode ?? null, + signal, + emit: (event) => this.emit(stream, meta, event), + }) .catch((error: unknown): ToolRunResult => { signal.throwIfAborted(); return { @@ -370,7 +407,7 @@ export class AgentLoopImpl implements AgentLoop { } await this.persist(turn); if (outcome.type !== "pending") { - stream.push({ type: "tool-result", toolCallId: call.toolCallId }); + this.emit(stream, meta, { type: "tool-result", toolCallId: call.toolCallId }); } } continue; @@ -389,15 +426,23 @@ export class AgentLoopImpl implements AgentLoop { // The stream's result promise is the single source of failure: // it rejects on model error AND on abort, and the catch below // tells those apart via signal.aborted. + const [tools, system] = await Promise.all([ + this.toolRunner.definitions(turn.agentId), + this.systemComposer.system(turn), + ]); const modelStream = this.modelAdapter.stream({ provider: turn.provider, model: turn.model, + system, messages: turn.messages, - tools: this.toolRunner.definitions(), + tools, signal, + useCase: turn.useCase, + subUseCase: turn.subUseCase, + agentId: turn.agentId, }); for await (const event of modelStream) { - stream.push(event); + this.emit(stream, meta, event); } const step = await modelStream.result; turn.messages.push(step.message); @@ -437,6 +482,18 @@ export class AgentLoopImpl implements AgentLoop { private async persist(turn: z.infer): Promise { turn.updatedAt = nowIso(); await this.store.update(turn); + // Every committed fact is a state snapshot for observers (the bus). + this.observer.onState(turn); + } + + // Push a live event to the turn's handle AND the observer (the bus). + private emit( + stream: EventStream>, + meta: TurnEventMeta, + event: TurnEvent, + ): void { + stream.push(event); + this.observer.onEvent(meta, event); } private async mustGet(turnId: string): Promise> { diff --git a/apps/x/packages/core/src/agent-loop/in-memory-turn-store.ts b/apps/x/packages/core/src/agent-loop/in-memory-turn-store.ts index f88e3f3a..0920bac6 100644 --- a/apps/x/packages/core/src/agent-loop/in-memory-turn-store.ts +++ b/apps/x/packages/core/src/agent-loop/in-memory-turn-store.ts @@ -56,6 +56,12 @@ export class InMemoryTurnStore implements TurnStore { }); } + async deleteBySession(sessionId: string): Promise { + for (const [id, { turn }] of this.rows) { + if (turn.sessionId === sessionId) this.rows.delete(id); + } + } + async latestForSession(sessionId: string): Promise | null> { const turns = await this.listBySession(sessionId); return turns.length > 0 ? turns[turns.length - 1] : null; diff --git a/apps/x/packages/core/src/agent-loop/model-adapter.ts b/apps/x/packages/core/src/agent-loop/model-adapter.ts index 733b9c82..e59bbc70 100644 --- a/apps/x/packages/core/src/agent-loop/model-adapter.ts +++ b/apps/x/packages/core/src/agent-loop/model-adapter.ts @@ -8,16 +8,25 @@ import { } from "@x/shared/dist/message.js"; import { convertFromMessages } from "../agents/runtime.js"; import { createProvider } from "../models/models.js"; -import { resolveProviderConfig } from "../models/defaults.js"; +import { getDefaultModelAndProvider, resolveProviderConfig } from "../models/defaults.js"; +import { captureLlmUsage } from "../analytics/usage.js"; +import type { UseCase } from "../analytics/use_case.js"; import { EventStream } from "./event-stream.js"; import type { ModelStreamEvent, ModelUsage, ToolDefinition } from "./types.js"; export type ModelStreamRequest = { provider: string | null; model: string | null; + // The system prompt for this call, composed fresh per step (agent + // instructions + context). null = no system prompt. + system: string | null; messages: z.infer; tools: ToolDefinition[]; signal: AbortSignal; + // Analytics attribution for the `llm_usage` event; null = don't capture. + useCase: string | null; + subUseCase: string | null; + agentId: string | null; }; // Usage as reported by the provider for one model step; null when the @@ -58,12 +67,18 @@ export class VercelModelAdapter implements ModelAdapter { req: ModelStreamRequest, out: EventStream, ): Promise { - if (!req.provider || !req.model) { - throw new Error("Agent loop turn has no provider/model configured"); + // A turn may leave provider/model unset (null = "use the configured + // default"), exactly like the old runtime resolved them at run creation. + let providerName = req.provider; + let modelId = req.model; + if (!providerName || !modelId) { + const def = await getDefaultModelAndProvider(); + providerName = providerName ?? def.provider; + modelId = modelId ?? def.model; } - const providerConfig = await resolveProviderConfig(req.provider); + const providerConfig = await resolveProviderConfig(providerName); const provider = createProvider(providerConfig); - const model = provider.languageModel(req.model); + const model = provider.languageModel(modelId); const tools: ToolSet = {}; for (const def of req.tools) { @@ -77,6 +92,7 @@ export class VercelModelAdapter implements ModelAdapter { const result = streamText({ model, + ...(req.system ? { system: req.system } : {}), messages: convertFromMessages(req.messages), tools, stopWhen: stepCountIs(1), @@ -147,6 +163,27 @@ export class VercelModelAdapter implements ModelAdapter { }), () => null, ); + // Tag this model step's usage for analytics — parity with the old + // runtime, which wrapped each streamText in withUseCase + captureLlmUsage. + if (req.useCase) { + captureLlmUsage({ + useCase: req.useCase as UseCase, + ...(req.subUseCase ? { subUseCase: req.subUseCase } : {}), + ...(req.agentId ? { agentName: req.agentId } : {}), + model: modelId, + provider: providerName, + usage: usage + ? { + ...(usage.inputTokens !== null ? { inputTokens: usage.inputTokens } : {}), + ...(usage.outputTokens !== null ? { outputTokens: usage.outputTokens } : {}), + ...(usage.totalTokens !== null ? { totalTokens: usage.totalTokens } : {}), + ...(usage.reasoningTokens !== null ? { reasoningTokens: usage.reasoningTokens } : {}), + ...(usage.cachedInputTokens !== null ? { cachedInputTokens: usage.cachedInputTokens } : {}), + } + : undefined, + }); + } + out.push({ type: "finish", message }); out.end({ message, usage }); } diff --git a/apps/x/packages/core/src/agent-loop/permission-gate.ts b/apps/x/packages/core/src/agent-loop/permission-gate.ts index 184084f1..79a597c1 100644 --- a/apps/x/packages/core/src/agent-loop/permission-gate.ts +++ b/apps/x/packages/core/src/agent-loop/permission-gate.ts @@ -1,5 +1,6 @@ import { z } from "zod"; import { ToolCallPart } from "@x/shared/dist/message.js"; +import type { AgentLoopTurn } from "./types.js"; export type PermissionCheckResult = | { required: false } @@ -11,12 +12,19 @@ export type PermissionClassification = { }; // Decides whether a tool call needs user approval, and (in auto mode) -// classifies it. The real implementation (bridging getToolPermissionMetadata / +// classifies it. Both methods receive the current turn snapshot: the real +// implementation needs turn.sessionId (to consult session-scoped grants) and +// turn.messages (the classifier judges intent against the conversation). The +// real implementation (bridging getToolPermissionMetadata / // classifyToolPermissions) is integration-phase work; v1 uses fakes in tests. export interface PermissionGate { - check(toolCall: z.infer): Promise; + check( + toolCall: z.infer, + turn: z.infer, + ): Promise; classify( toolCall: z.infer, request: unknown, + turn: z.infer, ): Promise; } diff --git a/apps/x/packages/core/src/agent-loop/sqlite-turn-store.test.ts b/apps/x/packages/core/src/agent-loop/sqlite-turn-store.test.ts index 6c8e281c..19ce3c8d 100644 --- a/apps/x/packages/core/src/agent-loop/sqlite-turn-store.test.ts +++ b/apps/x/packages/core/src/agent-loop/sqlite-turn-store.test.ts @@ -54,8 +54,11 @@ function sampleTurn( provider: "openai", model: "gpt-x", permissionMode: "auto", + useCase: null, + subUseCase: null, sessionId: null, sessionSeq: null, + composeContext: null, messages: [ { role: "user", content: "hello" }, { @@ -134,6 +137,20 @@ describe("SqliteTurnStore", () => { expect(await store.get("t1")).toEqual(updated); }); + it("round-trips a populated compose context", async () => { + const { store } = await loadStore(); + const turn = sampleTurn("t1", { + composeContext: { voiceInput: true, voiceOutput: "summary", searchEnabled: false, codeMode: "claude" }, + }); + await store.create(turn); + expect((await store.get("t1"))?.composeContext).toEqual({ + voiceInput: true, + voiceOutput: "summary", + searchEnabled: false, + codeMode: "claude", + }); + }); + it("returns null for unknown ids and rejects updates to missing turns", async () => { const { store } = await loadStore(); expect(await store.get("missing")).toBeNull(); diff --git a/apps/x/packages/core/src/agent-loop/sqlite-turn-store.ts b/apps/x/packages/core/src/agent-loop/sqlite-turn-store.ts index 57974ff3..41c39def 100644 --- a/apps/x/packages/core/src/agent-loop/sqlite-turn-store.ts +++ b/apps/x/packages/core/src/agent-loop/sqlite-turn-store.ts @@ -7,6 +7,7 @@ import type { TurnStore } from "./turn-store.js"; import { AgentLoopError, AgentLoopTurn, + ComposeContext, DispatchedTool, ModelUsage, PermissionDecision, @@ -81,6 +82,13 @@ export class SqliteTurnStore implements TurnStore { } } + async deleteBySession(sessionId: string): Promise { + await this.db + .deleteFrom("agent_loop_turns") + .where("session_id", "=", sessionId) + .execute(); + } + async latestForSession(sessionId: string): Promise | null> { const turns = await this.foldSession(sessionId, null); return turns.length > 0 ? turns[turns.length - 1] : null; @@ -140,8 +148,11 @@ function toRow( provider: turn.provider, model: turn.model, permission_mode: turn.permissionMode, + use_case: turn.useCase, + sub_use_case: turn.subUseCase, session_id: turn.sessionId, session_seq: turn.sessionSeq, + compose_context: turn.composeContext === null ? null : JSON.stringify(turn.composeContext), messages: JSON.stringify(delta), prefix_length: prefixLength, permission_requests: JSON.stringify(turn.permissionRequests), @@ -165,8 +176,13 @@ function fromRow(row: Selectable): z.infer): Promise; +} + +// Default: no system prompt. Keeps the loop usable without a composer (tests, +// agent-less turns). +export class NullSystemComposer implements SystemComposer { + async system(): Promise { + return null; + } +} diff --git a/apps/x/packages/core/src/agent-loop/tool-runner.ts b/apps/x/packages/core/src/agent-loop/tool-runner.ts index 900a5677..baf95873 100644 --- a/apps/x/packages/core/src/agent-loop/tool-runner.ts +++ b/apps/x/packages/core/src/agent-loop/tool-runner.ts @@ -1,6 +1,6 @@ import { z } from "zod"; -import { ToolCallPart } from "@x/shared/dist/message.js"; -import type { ToolDefinition } from "./types.js"; +import { CodeMode, ToolCallPart } from "@x/shared/dist/message.js"; +import type { ToolDefinition, TurnEvent } from "./types.js"; export type ToolRunResult = | { type: "result"; value: unknown } // → ToolMessage @@ -9,14 +9,26 @@ export type ToolRunResult = export type ToolRunContext = { turnId: string; + // The turn's agent — the runner resolves the tool name to that agent's + // attachment (builtin vs MCP) to know how to execute it. null = no agent. + agentId: string | null; + // The turn's code-mode chip (null = off). The code_agent_run tool honors + // this over the model's argument so toggling the chip switches agents. + codeMode: z.infer | null; signal: AbortSignal; + // Forward a live event onto the turn's stream while the tool runs (e.g. a + // `tool-output` chunk). Best-effort and never persisted — drop it and the + // tool still produces the same final result. This is the new home of the + // old runtime's `ctx.publish`. + emit: (event: TurnEvent) => void; }; // Executes tool calls. The real implementation (bridging exec-tool.ts / MCP) // is integration-phase work; v1 uses fakes in tests. export interface ToolRunner { - // Tool definitions advertised to the model. Environment, not turn state: - // resume works because the loop is reconstructed with the same runner. - definitions(): ToolDefinition[]; + // Tool definitions advertised to the model for a given agent. Environment, + // not turn state: resume works because the loop reconstructs them from the + // same (immutable) agent config. agentId is null for an agent-less turn. + definitions(agentId: string | null): Promise; run(toolCall: z.infer, ctx: ToolRunContext): Promise; } diff --git a/apps/x/packages/core/src/agent-loop/turn-observer.ts b/apps/x/packages/core/src/agent-loop/turn-observer.ts new file mode 100644 index 00000000..b4deeac8 --- /dev/null +++ b/apps/x/packages/core/src/agent-loop/turn-observer.ts @@ -0,0 +1,19 @@ +import { z } from "zod"; +import type { AgentLoopTurn, TurnEvent } from "./types.js"; + +export type TurnEventMeta = { turnId: string; sessionId: string | null }; + +// Side-channel for everything the loop does, so an integration layer can fan it +// onto a bus without consuming each turn's handle. onEvent fires for every live +// event (deltas, tool/permission lifecycle); onState fires on every committed +// fact (each persist) carrying the full turn snapshot. Both are best-effort and +// never affect loop control flow. +export interface TurnObserver { + onEvent(meta: TurnEventMeta, event: TurnEvent): void; + onState(turn: z.infer): void; +} + +export class NullTurnObserver implements TurnObserver { + onEvent(): void {} + onState(): void {} +} diff --git a/apps/x/packages/core/src/agent-loop/turn-store.ts b/apps/x/packages/core/src/agent-loop/turn-store.ts index 51af7e56..873b1e93 100644 --- a/apps/x/packages/core/src/agent-loop/turn-store.ts +++ b/apps/x/packages/core/src/agent-loop/turn-store.ts @@ -10,4 +10,6 @@ export interface TurnStore { // Session linkage queries (used by the sessions layer); ordered by sessionSeq. latestForSession(sessionId: string): Promise | null>; listBySession(sessionId: string): Promise[]>; + // Remove every turn belonging to a session (used when a session is deleted). + deleteBySession(sessionId: string): Promise; } diff --git a/apps/x/packages/core/src/agent-loop/types.ts b/apps/x/packages/core/src/agent-loop/types.ts index 44997e1f..6d07e180 100644 --- a/apps/x/packages/core/src/agent-loop/types.ts +++ b/apps/x/packages/core/src/agent-loop/types.ts @@ -1,269 +1,5 @@ -import { z } from "zod"; -import { - AssistantMessage, - Message, - MessageList, - ToolCallPart, -} from "@x/shared/dist/message.js"; - -// ─── Persisted fact schemas ───────────────────────────────────────────────── -// -// A turn is five append-only fact logs + set-once scalars. Records are never -// mutated or deleted; every field records exactly one non-derivable fact. -// Everything else (status, per-call lifecycle) is derived. - -export const PermissionRequest = z.object({ - toolCallId: z.string(), - // What the user is approving (file access, command, ...). Computed from - // tool args by the PermissionGate, so it must be persisted to pin down - // exactly what was asked. - request: z.unknown(), - requestedAt: z.string(), -}); - -export const PermissionDecision = z.discriminatedUnion("decidedBy", [ - z.object({ - toolCallId: z.string(), - decidedBy: z.literal("user"), - decision: z.enum(["granted", "denied"]), - reason: z.string().nullable(), - decidedAt: z.string(), - }), - z.object({ - toolCallId: z.string(), - decidedBy: z.literal("classifier"), - decision: z.enum(["granted", "denied", "abstained"]), - reason: z.string(), - decidedAt: z.string(), - }), -]); - -export const StartedTool = z.object({ - toolCallId: z.string(), - startedAt: z.string(), -}); - -export const DispatchedTool = z.object({ - toolCallId: z.string(), - dispatchedAt: z.string(), -}); - -// One entry per model call. Token counts are as reported by the provider — -// null when the provider did not report that field. Aggregate via totalUsage. -export const ModelUsage = z.object({ - inputTokens: z.number().nullable(), - outputTokens: z.number().nullable(), - totalTokens: z.number().nullable(), - reasoningTokens: z.number().nullable(), - cachedInputTokens: z.number().nullable(), - at: z.string(), -}); - -export const AgentLoopError = z.object({ - message: z.string(), - code: z.string().optional(), - details: z.unknown().optional(), - at: z.string(), -}); - -export const PermissionMode = z.enum(["manual", "auto"]); - -export const AgentLoopTurn = z.object({ - id: z.string(), - agentId: z.string().nullable(), - provider: z.string().nullable(), - model: z.string().nullable(), - permissionMode: PermissionMode, - - // Session linkage — opaque to the loop (the sessions layer owns the - // meaning). seq is the turn's 1-based position within its session. - sessionId: z.string().nullable(), - sessionSeq: z.number().int().positive().nullable(), - - // append-only fact logs - messages: MessageList, - permissionRequests: z.array(PermissionRequest), - permissionDecisions: z.array(PermissionDecision), - startedTools: z.array(StartedTool), - dispatchedTools: z.array(DispatchedTool), - modelUsage: z.array(ModelUsage), - - // set-once scalars - error: AgentLoopError.nullable(), - completedAt: z.string().nullable(), - - createdAt: z.string(), - updatedAt: z.string(), -}); - -export const AgentLoopInput = z.object({ - agentId: z.string().nullable().optional(), - provider: z.string().nullable().optional(), - model: z.string().nullable().optional(), - permissionMode: PermissionMode.optional(), - sessionId: z.string().nullable().optional(), - sessionSeq: z.number().int().positive().nullable().optional(), - // May include prior-conversation history; turns are self-contained by design. - messages: MessageList.min(1), -}).refine( - (input) => (input.sessionId == null) === (input.sessionSeq == null), - { message: "sessionId and sessionSeq must be set together" }, -); - -// ─── Tool definitions (environment, not turn state) ──────────────────────── - -export type ToolDefinition = { - name: string; - description?: string; - // JSON Schema for the tool input - inputSchema?: unknown; -}; - -// ─── Live (never persisted) event types ───────────────────────────────────── - -export type ModelStreamEvent = - | { type: "text-delta"; delta: string } - | { type: "reasoning-delta"; delta: string } - | { type: "tool-call"; toolCall: z.infer } - | { type: "finish"; message: z.infer } - | { type: "error"; error: unknown }; - -export type TurnEvent = - | ModelStreamEvent - | { type: "tool-execution-start"; toolCallId: string } - | { type: "tool-result"; toolCallId: string } - | { type: "permission-requested"; toolCallId: string }; - -// ─── Derived state ────────────────────────────────────────────────────────── - -export type TurnStatus = "waiting" | "completed" | "error" | "idle"; - -export type ToolCallState = - | "resolved" // matching ToolMessage exists — terminal - | "dispatched" // delegated; result arrives via setToolResult - | "interrupted" // started but never resolved nor dispatched (crash/abort) - | "needs-classifier" // open request, auto mode, classifier has not spoken - | "awaiting-user" // open request, waiting on a user decision - | "cleared" // terminal `granted` decision; ready to execute - | "unevaluated"; // no facts yet; permission gate has not been consulted - -export function toolCallParts( - turn: z.infer, -): z.infer[] { - const parts: z.infer[] = []; - for (const msg of turn.messages) { - if (msg.role !== "assistant" || typeof msg.content === "string") continue; - for (const part of msg.content) { - if (part.type === "tool-call") parts.push(part); - } - } - return parts; -} - -export function resolvedToolCallIds(turn: z.infer): Set { - const ids = new Set(); - for (const msg of turn.messages) { - if (msg.role === "tool") ids.add(msg.toolCallId); - } - return ids; -} - -export function unresolvedToolCalls( - turn: z.infer, -): z.infer[] { - const resolved = resolvedToolCallIds(turn); - return toolCallParts(turn).filter((part) => !resolved.has(part.toolCallId)); -} - -export function deriveToolCallState( - turn: z.infer, - toolCallId: string, -): ToolCallState { - if (resolvedToolCallIds(turn).has(toolCallId)) return "resolved"; - if (turn.dispatchedTools.some((t) => t.toolCallId === toolCallId)) return "dispatched"; - if (turn.startedTools.some((t) => t.toolCallId === toolCallId)) return "interrupted"; - - const request = turn.permissionRequests.find((r) => r.toolCallId === toolCallId); - if (request) { - const decisions = turn.permissionDecisions.filter((d) => d.toolCallId === toolCallId); - const terminal = decisions.find((d) => d.decision === "granted" || d.decision === "denied"); - if (terminal) { - // A denied call always has its denial ToolMessage appended atomically - // with the decision, so an unresolved terminal decision should be - // `granted` — but check explicitly: an unpaired denial (a buggy - // future writer) must never derive as executable. It falls back to - // awaiting-user, which self-heals via a fresh decision. - return terminal.decision === "granted" ? "cleared" : "awaiting-user"; - } - if (turn.permissionMode === "auto" && !decisions.some((d) => d.decidedBy === "classifier")) { - return "needs-classifier"; - } - return "awaiting-user"; - } - - return "unevaluated"; -} - -export function deriveTurnStatus(turn: z.infer): TurnStatus { - if (turn.error !== null) return "error"; - if (turn.completedAt !== null) return "completed"; - for (const call of unresolvedToolCalls(turn)) { - const state = deriveToolCallState(turn, call.toolCallId); - if (state === "awaiting-user" || state === "dispatched") return "waiting"; - } - return "idle"; -} - -// The transcript as a successor turn would see it: a terminal turn's dangling -// tool calls are closed out with synthetic ToolMessages so a follow-up never -// re-executes — or hangs on — stale calls. Pure and deterministic over an -// immutable (terminal) turn, which is what lets the sessions layer build the -// next turn's input from it AND lets stores reproduce it byte-for-byte. -export function closedTranscript( - turn: z.infer, -): z.infer[] { - const messages = [...turn.messages]; - for (const call of unresolvedToolCalls(turn)) { - messages.push({ - role: "tool", - content: closureContent(deriveToolCallState(turn, call.toolCallId)), - toolCallId: call.toolCallId, - toolName: call.toolName, - }); - } - return messages; -} - -// Honest per-state wording for a dangling call: how far did it actually get? -function closureContent(state: ToolCallState): string { - switch (state) { - case "interrupted": - // execution began in-process; the side effect may have landed - return "Tool execution was interrupted before completing. It may or may not have taken effect; do not assume it ran."; - case "dispatched": - // delegated to an external runner; it may still finish out there - return "Tool was dispatched but its result never arrived; it may have completed externally. Do not assume it ran or that it failed."; - default: - // never reached execution (unevaluated / awaiting permission / cleared-but-not-started) - return "Tool was not executed: the turn was stopped before this call ran."; - } -} - -// Sum of all model calls in the turn. A field is null only if no call -// reported it; otherwise unreported entries count as 0 toward the sum. -export function totalUsage( - turn: z.infer, -): Omit, "at"> { - const sum = (field: "inputTokens" | "outputTokens" | "totalTokens" | "reasoningTokens" | "cachedInputTokens") => { - const reported = turn.modelUsage.map((u) => u[field]).filter((v) => v !== null); - if (reported.length === 0) return null; - return reported.reduce((a, b) => a + b, 0); - }; - return { - inputTokens: sum("inputTokens"), - outputTokens: sum("outputTokens"), - totalTokens: sum("totalTokens"), - reasoningTokens: sum("reasoningTokens"), - cachedInputTokens: sum("cachedInputTokens"), - }; -} +// The turn contract + pure derivations now live in @x/shared so the IPC layer +// and the renderer can share them (like runs.ts for the old runtime). This +// re-export keeps the many `./types.js` / `../agent-loop/types.js` imports +// across core working unchanged. +export * from "@x/shared/dist/agent-turn.js"; diff --git a/apps/x/packages/core/src/agent-runtime/agent-tools.ts b/apps/x/packages/core/src/agent-runtime/agent-tools.ts new file mode 100644 index 00000000..d1f4273e --- /dev/null +++ b/apps/x/packages/core/src/agent-runtime/agent-tools.ts @@ -0,0 +1,120 @@ +import { z } from "zod"; +import { Agent, ToolAttachment } from "@x/shared/dist/agent.js"; +import type { ToolDefinition } from "../agent-loop/types.js"; +import { BuiltinTools } from "../application/lib/builtin-tools.js"; +import { loadAgent } from "../agents/runtime.js"; + +// "ask-human" is a builtin attachment but has no entry in the BuiltinTools +// catalog: it never executes through execTool. The loop dispatches it (run → +// pending) and the user's answer arrives via setToolResult. Its advertised +// schema mirrors the old runtime's mapAgentTool special case. +export const ASK_HUMAN_TOOL = "ask-human"; +const ASK_HUMAN_DEFINITION: ToolDefinition = { + name: ASK_HUMAN_TOOL, + description: + "Ask a human before proceeding. Optionally pass `options` (an array of short button labels) to render the question as a one-click choice; the user's response will be the chosen label verbatim.", + inputSchema: { + type: "object", + properties: { + question: { type: "string", description: "The question to ask the human" }, + options: { + type: "array", + items: { type: "string" }, + description: + "Optional short button labels (2-4 recommended). If provided, the user picks one with a single click instead of typing. The response you receive will be the chosen label.", + }, + }, + required: ["question"], + additionalProperties: false, + }, +}; + +type ResolvedAgentTools = { + agent: z.infer | null; + definitions: ToolDefinition[]; + attachments: Map>; +}; + +// Loads an agent's tool set once and serves both bridges: the ToolRunner reads +// `definitions` (advertised to the model) and resolves an `attachment` per call +// (to know how to execute it); the PermissionGate reads `attachment` (to decide +// whether a call needs approval). Cached by agentId — agent config is immutable +// for the life of a turn, so re-reading the file each model iteration is waste. +export class AgentTools { + private cache = new Map(); + + constructor(private load: (id: string) => Promise> = loadAgent) {} + + async resolve(agentId: string | null): Promise { + if (agentId === null) return { agent: null, definitions: [], attachments: new Map() }; + const cached = this.cache.get(agentId); + if (cached) return cached; + const resolved = await this.build(agentId); + this.cache.set(agentId, resolved); + return resolved; + } + + async agent(agentId: string | null): Promise | null> { + return (await this.resolve(agentId)).agent; + } + + async definitions(agentId: string | null): Promise { + return (await this.resolve(agentId)).definitions; + } + + async attachment( + agentId: string | null, + toolName: string, + ): Promise | null> { + return (await this.resolve(agentId)).attachments.get(toolName) ?? null; + } + + private async build(agentId: string): Promise { + const agent = await this.load(agentId); + const definitions: ToolDefinition[] = []; + const attachments = new Map>(); + + for (const [name, attachment] of Object.entries(agent.tools ?? {})) { + attachments.set(name, attachment); + + if (attachment.type === "mcp") { + definitions.push({ + name, + description: attachment.description, + inputSchema: attachment.inputSchema, + }); + continue; + } + if (attachment.type === "agent") { + // agent-as-tool is unused in shipped agents and unsupported here. + continue; + } + // builtin + if (name === ASK_HUMAN_TOOL) { + definitions.push(ASK_HUMAN_DEFINITION); + continue; + } + const builtin = BuiltinTools[name]; + if (!builtin) continue; + if (builtin.isAvailable && !(await builtin.isAvailable())) continue; + definitions.push({ + name, + description: builtin.description, + inputSchema: toJsonSchema(builtin.inputSchema), + }); + } + + return { agent, definitions, attachments }; + } +} + +// Builtin schemas are zod; the model adapter expects JSON Schema. Convert +// defensively — a tool with an exotic schema that won't convert still gets +// advertised (with an open object schema) rather than breaking the whole turn. +function toJsonSchema(schema: unknown): unknown { + try { + return z.toJSONSchema(schema as z.ZodType); + } catch { + return { type: "object", properties: {} }; + } +} diff --git a/apps/x/packages/core/src/agent-runtime/copilot-system-composer.test.ts b/apps/x/packages/core/src/agent-runtime/copilot-system-composer.test.ts new file mode 100644 index 00000000..367c1372 --- /dev/null +++ b/apps/x/packages/core/src/agent-runtime/copilot-system-composer.test.ts @@ -0,0 +1,83 @@ +import { describe, expect, it } from "vitest"; +import { z } from "zod"; +import { Agent } from "@x/shared/dist/agent.js"; +import { AgentLoopTurn } from "../agent-loop/types.js"; +import { AgentTools } from "./agent-tools.js"; +import { CopilotSystemComposer } from "./copilot-system-composer.js"; + +function turn(overrides: Partial> = {}): z.infer { + const now = "2026-06-14T00:00:00Z"; + return { + id: "turn-1", + agentId: "my-agent", // non-copilot → agent-notes/workdir disk reads are skipped + provider: null, + model: null, + permissionMode: "manual", + useCase: null, + subUseCase: null, + sessionId: "sess-1", + sessionSeq: 1, + composeContext: null, + messages: [{ role: "user", content: "hi" }], + permissionRequests: [], + permissionDecisions: [], + startedTools: [], + dispatchedTools: [], + modelUsage: [], + error: null, + completedAt: null, + createdAt: now, + updatedAt: now, + ...overrides, + }; +} + +function composerFor(instructions: string): CopilotSystemComposer { + const agent: z.infer = { name: "my-agent", instructions, tools: {} }; + return new CopilotSystemComposer(new AgentTools(async () => agent)); +} + +describe("CopilotSystemComposer", () => { + it("returns null for an agent-less turn", async () => { + const composer = composerFor("be helpful"); + expect(await composer.system(turn({ agentId: null }))).toBeNull(); + }); + + it("always includes the agent instructions and the hidden-user-context explainer", async () => { + const composer = composerFor("YOU ARE A TEST AGENT"); + const system = await composer.system(turn()); + expect(system).toContain("YOU ARE A TEST AGENT"); + expect(system).toContain("# Hidden User Context"); + }); + + it("omits voice / search / code-mode blocks when no compose context is set", async () => { + const composer = composerFor("be helpful"); + const system = (await composer.system(turn())) ?? ""; + expect(system).not.toContain("# Voice Input"); + expect(system).not.toContain("# Voice Output"); + expect(system).not.toContain("# Search"); + expect(system).not.toContain("# Code Mode"); + }); + + it("injects each block when its compose flag is set", async () => { + const composer = composerFor("be helpful"); + const system = (await composer.system(turn({ + composeContext: { + voiceInput: true, + voiceOutput: "summary", + searchEnabled: true, + codeMode: "claude", + }, + }))) ?? ""; + expect(system).toContain("# Voice Input"); + expect(system).toContain("# Voice Output (MANDATORY"); + expect(system).toContain("# Search"); + expect(system).toContain("# Code Mode (Active) — Agent: Claude Code"); + }); + + it("uses the full read-aloud block for voiceOutput=full", async () => { + const composer = composerFor("be helpful"); + const system = (await composer.system(turn({ composeContext: { voiceOutput: "full" } }))) ?? ""; + expect(system).toContain("# Voice Output — Full Read-Aloud"); + }); +}); diff --git a/apps/x/packages/core/src/agent-runtime/copilot-system-composer.ts b/apps/x/packages/core/src/agent-runtime/copilot-system-composer.ts new file mode 100644 index 00000000..23238332 --- /dev/null +++ b/apps/x/packages/core/src/agent-runtime/copilot-system-composer.ts @@ -0,0 +1,30 @@ +import { z } from "zod"; +import type { SystemComposer } from "../agent-loop/system-composer.js"; +import type { AgentLoopTurn } from "../agent-loop/types.js"; +import { buildSystemInstructions } from "../agents/compose/system-prompt.js"; +import { AgentTools } from "./agent-tools.js"; + +// Real SystemComposer: builds the system prompt fresh per model call from the +// turn's agent (instructions) + its compose context (voice / search / code-mode) +// via the shared assembly used by the old runtime. Agent-less turns get no +// system prompt. +export class CopilotSystemComposer implements SystemComposer { + constructor(private agentTools: AgentTools) {} + + async system(turn: z.infer): Promise { + if (turn.agentId === null) return null; + const agent = await this.agentTools.agent(turn.agentId); + if (!agent) return null; + const compose = turn.composeContext; + return buildSystemInstructions({ + instructions: agent.instructions, + agentName: turn.agentId, + // Work directory is scoped per chat → keyed by session. + workDirId: turn.sessionId, + voiceInput: compose?.voiceInput, + voiceOutput: compose?.voiceOutput ?? null, + searchEnabled: compose?.searchEnabled, + codeMode: compose?.codeMode ?? null, + }); + } +} diff --git a/apps/x/packages/core/src/agent-runtime/copilot-user-message-context.ts b/apps/x/packages/core/src/agent-runtime/copilot-user-message-context.ts new file mode 100644 index 00000000..bcb374b6 --- /dev/null +++ b/apps/x/packages/core/src/agent-runtime/copilot-user-message-context.ts @@ -0,0 +1,29 @@ +import { z } from "zod"; +import { MessageList, MiddlePaneContext } from "@x/shared/dist/message.js"; +import type { UserMessageContextComposer } from "../sessions/user-message-context-composer.js"; +import { buildUserMessageContext } from "../agents/compose/user-context.js"; + +// Real UserMessageContextComposer: stamps each new user message with a fresh +// datetime and (for copilot-like agents) the current middle-pane context, the +// same way the old runtime did at message-dequeue time. +export class CopilotUserMessageContextComposer implements UserMessageContextComposer { + attach( + messages: z.infer, + ctx: { + agentId: string | null; + middlePaneContext: z.infer | null; + }, + ): z.infer { + return messages.map((message) => + message.role === "user" && message.userMessageContext === undefined + ? { + ...message, + userMessageContext: buildUserMessageContext({ + agentName: ctx.agentId, + middlePaneContext: ctx.middlePaneContext, + }), + } + : message, + ); + } +} diff --git a/apps/x/packages/core/src/agent-runtime/headless.ts b/apps/x/packages/core/src/agent-runtime/headless.ts new file mode 100644 index 00000000..ba352dce --- /dev/null +++ b/apps/x/packages/core/src/agent-runtime/headless.ts @@ -0,0 +1,96 @@ +import type { z } from "zod"; +import type { AgentLoopTurn } from "@x/shared/dist/agent-turn.js"; +import { getAgentRuntime } from "./index.js"; + +type Turn = z.infer; + +// Headless agent runs (live-note, background-task, knowledge pipelines, +// scheduled agents) are one-shot: a single user message → a single agent +// response. They are NOT conversations, so each maps to one standalone turn +// (sessionId null) rather than a session. The durable memory is the agent's +// own state file (note / task / config), never a copy-forward transcript — so +// turns stay isolated and never grow unbounded. +// +// Headless has no human to approve tool calls, so runs use auto permission +// mode: the classifier decides. (Manual mode would block forever waiting on a +// UI prompt that no one can answer.) + +export interface HeadlessRun { + agentId: string; + // Already-rendered prompt for this run (trigger block + objective + state). + message: string; + model?: string | null; + provider?: string | null; + // Analytics attribution for this run's LLM usage (PostHog `llm_usage`). + useCase: string; + subUseCase?: string; + // Called with the turn id the instant the turn is created (before it runs), + // so callers can record it / publish a "start" event. + onStart?: (turnId: string) => void | Promise; +} + +export interface HeadlessResult { + turnId: string; + turn: Turn; + // Final assistant text, or null on error / no assistant message. + summary: string | null; + // Terminal turn error message, or null on success. + error: string | null; +} + +// Run a headless agent to completion as a single standalone turn. +export async function runHeadlessAgent(run: HeadlessRun): Promise { + const { agentLoop } = await getAgentRuntime(); + const handle = await agentLoop.createTurn({ + agentId: run.agentId, + permissionMode: "auto", + useCase: run.useCase, + ...(run.subUseCase ? { subUseCase: run.subUseCase } : {}), + ...(run.provider ? { provider: run.provider } : {}), + ...(run.model ? { model: run.model } : {}), + messages: [{ role: "user", content: run.message }], + }); + if (run.onStart) await run.onStart(handle.id); + const turn = await handle.result; + const error = turn.error?.message ?? null; + return { + turnId: handle.id, + turn, + summary: error ? null : finalAssistantText(turn), + error, + }; +} + +// The last assistant message's text content, trimmed; null if there is none. +export function finalAssistantText(turn: Turn): string | null { + for (let i = turn.messages.length - 1; i >= 0; i--) { + const message = turn.messages[i]; + if (message.role !== "assistant") continue; + const text = typeof message.content === "string" + ? message.content + : message.content + .filter((part) => part.type === "text") + .map((part) => (part as { text: string }).text) + .join(""); + const trimmed = text.trim(); + return trimmed.length > 0 ? trimmed : null; + } + return null; +} + +// Paths touched by tool calls of the given tool names in a completed turn. +// Used by the knowledge pipelines that previously watched the run bus for +// file-editText / file-writeText invocations — the new bus carries only tool +// call ids, so paths are read from the turn's tool-call parts instead. +export function editedPaths(turn: Turn, toolNames: readonly string[]): string[] { + const paths = new Set(); + for (const message of turn.messages) { + if (message.role !== "assistant" || typeof message.content === "string") continue; + for (const part of message.content) { + if (part.type !== "tool-call" || !toolNames.includes(part.toolName)) continue; + const path = (part.arguments as { path?: unknown } | undefined)?.path; + if (typeof path === "string") paths.add(path); + } + } + return [...paths]; +} diff --git a/apps/x/packages/core/src/agent-runtime/index.ts b/apps/x/packages/core/src/agent-runtime/index.ts new file mode 100644 index 00000000..6ed6a190 --- /dev/null +++ b/apps/x/packages/core/src/agent-runtime/index.ts @@ -0,0 +1,80 @@ +import { AgentLoopImpl, type AgentLoop } from "../agent-loop/agent-loop.js"; +import { VercelModelAdapter } from "../agent-loop/model-adapter.js"; +import { SqliteTurnStore } from "../agent-loop/sqlite-turn-store.js"; +import { getDb, initStorage } from "../storage/database.js"; +import { SessionsImpl, type Sessions } from "../sessions/sessions.js"; +import { SqliteSessionStore } from "../sessions/sqlite-session-store.js"; +import { AgentTools } from "./agent-tools.js"; +import { CopilotSystemComposer } from "./copilot-system-composer.js"; +import { CopilotUserMessageContextComposer } from "./copilot-user-message-context.js"; +import { RealPermissionGate, type SessionGrants } from "./real-permission-gate.js"; +import { RealToolRunner } from "./real-tool-runner.js"; +import { TurnEventBus } from "./turn-event-bus.js"; + +export * from "./agent-tools.js"; +export * from "./real-tool-runner.js"; +export * from "./real-permission-gate.js"; +export * from "./copilot-system-composer.js"; +export * from "./copilot-user-message-context.js"; +export * from "./turn-event-bus.js"; +export * from "./headless.js"; + +export type AgentRuntime = { + sessions: Sessions; + agentLoop: AgentLoop; + // The session/turn event feed (live deltas + state snapshots). The main + // process subscribes and forwards it to renderer windows. + bus: TurnEventBus; +}; + +// The single assembly point for the new runtime. Wires the SQLite stores, the +// Vercel model adapter, and the two real bridges (tool runner + permission +// gate) into an AgentLoop, and layers Sessions on top. This is what the main +// process will instantiate once and hand to the IPC layer. +export async function createAgentRuntime(deps: { + sessionGrants?: SessionGrants; +} = {}): Promise { + await initStorage(); + const db = getDb(); + + const turnStore = new SqliteTurnStore(db); + const sessionStore = new SqliteSessionStore(db); + + // One AgentTools instance shared by both bridges so an agent's config is + // loaded and cached once, not once per bridge. + const agentTools = new AgentTools(); + + const bus = new TurnEventBus(); + const agentLoop = new AgentLoopImpl({ + store: turnStore, + modelAdapter: new VercelModelAdapter(), + toolRunner: new RealToolRunner({ agentTools }), + permissionGate: new RealPermissionGate({ + agentTools, + ...(deps.sessionGrants ? { sessionGrants: deps.sessionGrants } : {}), + }), + systemComposer: new CopilotSystemComposer(agentTools), + observer: bus, + }); + + const sessions = new SessionsImpl({ + sessionStore, + turnStore, + agentLoop, + userMessageContext: new CopilotUserMessageContextComposer(), + }); + return { sessions, agentLoop, bus }; +} + +// The process-wide runtime singleton. The main process creates it once at +// startup (passing any deps), and headless callers (schedulers, knowledge +// pipelines, live-note / background-task runners) reach the SAME instance — so +// their turns share the one agent loop, store, and event bus. +let runtimeSingleton: Promise | null = null; + +export function getAgentRuntime(deps: { sessionGrants?: SessionGrants } = {}): Promise { + if (!runtimeSingleton) { + runtimeSingleton = createAgentRuntime(deps); + } + return runtimeSingleton; +} diff --git a/apps/x/packages/core/src/agent-runtime/real-permission-gate.test.ts b/apps/x/packages/core/src/agent-runtime/real-permission-gate.test.ts new file mode 100644 index 00000000..cefd59fc --- /dev/null +++ b/apps/x/packages/core/src/agent-runtime/real-permission-gate.test.ts @@ -0,0 +1,164 @@ +import { describe, expect, it } from "vitest"; +import { z } from "zod"; +import { Agent, ToolAttachment } from "@x/shared/dist/agent.js"; +import { Message, ToolCallPart } from "@x/shared/dist/message.js"; +import type { AutoPermissionDecision } from "../security/auto-permission-classifier.js"; +import { AgentLoopTurn } from "../agent-loop/types.js"; +import type { getToolPermissionMetadata } from "../security/permission-metadata.js"; +import { AgentTools } from "./agent-tools.js"; +import { RealPermissionGate, type SessionGrants } from "./real-permission-gate.js"; + +// ─── helpers ──────────────────────────────────────────────────────────────── + +const builtin = (name: string): z.infer => ({ type: "builtin", name }); + +function agentToolsFor(tools: Record>): AgentTools { + const agent: z.infer = { name: "test-agent", instructions: "", tools }; + return new AgentTools(async () => agent); +} + +function call(toolName: string, args: Record = {}): z.infer { + return { type: "tool-call", toolCallId: `tc-${toolName}`, toolName, arguments: args }; +} + +function turn(overrides: Partial> = {}): z.infer { + const now = "2026-06-14T00:00:00Z"; + return { + id: "turn-1", + agentId: "test-agent", + provider: null, + model: null, + permissionMode: "auto", + useCase: null, + subUseCase: null, + sessionId: "sess-1", + sessionSeq: 1, + composeContext: null, + messages: [{ role: "user", content: "do it" } satisfies z.infer], + permissionRequests: [], + permissionDecisions: [], + startedTools: [], + dispatchedTools: [], + modelUsage: [], + error: null, + completedAt: null, + createdAt: now, + updatedAt: now, + ...overrides, + }; +} + +type MetadataFn = typeof getToolPermissionMetadata; + +// ─── check ─────────────────────────────────────────────────────────────────── + +describe("RealPermissionGate.check", () => { + it("requires no approval for a tool the agent does not have", async () => { + const gate = new RealPermissionGate({ + agentTools: agentToolsFor({}), + getMetadata: (async () => { + throw new Error("should not be consulted for an unknown tool"); + }) as unknown as MetadataFn, + }); + expect(await gate.check(call("nope"), turn())).toEqual({ required: false }); + }); + + it("returns the metadata as the request when approval is required", async () => { + const request = { kind: "command", commandNames: ["rm"] }; + const gate = new RealPermissionGate({ + agentTools: agentToolsFor({ executeCommand: builtin("executeCommand") }), + getMetadata: (async () => request) as unknown as MetadataFn, + }); + expect(await gate.check(call("executeCommand"), turn())).toEqual({ + required: true, + request, + }); + }); + + it("treats null metadata as no approval needed", async () => { + const gate = new RealPermissionGate({ + agentTools: agentToolsFor({ "file-readText": builtin("file-readText") }), + getMetadata: (async () => null) as unknown as MetadataFn, + }); + expect(await gate.check(call("file-readText"), turn())).toEqual({ required: false }); + }); + + it("consults session grants and passes them with the resolved attachment", async () => { + const seen: { attachment: unknown; commands: Set; grants: unknown }[] = []; + const sessionGrants: SessionGrants = { + commands: async (sessionId) => + sessionId === "sess-1" ? new Set(["ls"]) : new Set(), + fileAccess: async (sessionId) => + sessionId === "sess-1" ? [{ operation: "read", pathPrefix: "/tmp" }] : [], + }; + const gate = new RealPermissionGate({ + agentTools: agentToolsFor({ executeCommand: builtin("executeCommand") }), + sessionGrants, + getMetadata: (async (_tc, attachment, commands, grants) => { + seen.push({ attachment, commands, grants }); + return null; + }) as unknown as MetadataFn, + }); + + await gate.check(call("executeCommand"), turn()); + expect(seen).toHaveLength(1); + expect(seen[0].attachment).toMatchObject({ type: "builtin", name: "executeCommand" }); + expect(seen[0].commands).toEqual(new Set(["ls"])); + expect(seen[0].grants).toEqual([{ operation: "read", pathPrefix: "/tmp" }]); + }); +}); + +// ─── classify ────────────────────────────────────────────────────────────── + +describe("RealPermissionGate.classify", () => { + const request = { kind: "command" as const, commandNames: ["rm"] }; + + function gateWithClassifier(decisions: AutoPermissionDecision[], spy?: (input: unknown) => void) { + return new RealPermissionGate({ + agentTools: agentToolsFor({ executeCommand: builtin("executeCommand") }), + classifier: async (input) => { + spy?.(input); + return decisions; + }, + }); + } + + it("maps an allow decision to granted", async () => { + const gate = gateWithClassifier([ + { toolCallId: "tc-executeCommand", decision: "allow", reason: "safe" }, + ]); + expect(await gate.classify(call("executeCommand"), request, turn())).toEqual({ + decision: "granted", + reason: "safe", + }); + }); + + it("maps a deny decision to denied", async () => { + const gate = gateWithClassifier([ + { toolCallId: "tc-executeCommand", decision: "deny", reason: "destructive" }, + ]); + expect(await gate.classify(call("executeCommand"), request, turn())).toEqual({ + decision: "denied", + reason: "destructive", + }); + }); + + it("abstains when the classifier returns no decision for the call", async () => { + const gate = gateWithClassifier([]); + const result = await gate.classify(call("executeCommand"), request, turn()); + expect(result.decision).toBe("abstained"); + }); + + it("passes the parsed permission, run id, agent, and converted messages to the classifier", async () => { + let input: { runId: string; agentName: string | null; messages: unknown[]; candidates: { permission: unknown }[] } | undefined; + const gate = gateWithClassifier( + [{ toolCallId: "tc-executeCommand", decision: "allow", reason: "ok" }], + (i) => { input = i as typeof input; }, + ); + await gate.classify(call("executeCommand"), request, turn()); + expect(input?.runId).toBe("turn-1"); + expect(input?.agentName).toBe("test-agent"); + expect(input?.messages.length).toBeGreaterThan(0); + expect(input?.candidates[0].permission).toEqual(request); + }); +}); diff --git a/apps/x/packages/core/src/agent-runtime/real-permission-gate.ts b/apps/x/packages/core/src/agent-runtime/real-permission-gate.ts new file mode 100644 index 00000000..a894a094 --- /dev/null +++ b/apps/x/packages/core/src/agent-runtime/real-permission-gate.ts @@ -0,0 +1,104 @@ +import { z } from "zod"; +import { ToolCallPart } from "@x/shared/dist/message.js"; +import { ToolPermissionMetadata } from "@x/shared/dist/runs.js"; +import type { + PermissionCheckResult, + PermissionClassification, + PermissionGate, +} from "../agent-loop/permission-gate.js"; +import type { AgentLoopTurn } from "../agent-loop/types.js"; +import type { UseCase } from "../analytics/use_case.js"; +import { convertFromMessages } from "../agents/runtime.js"; +import { type FileAccessGrant } from "../config/security.js"; +import { classifyToolPermissions } from "../security/auto-permission-classifier.js"; +import { getToolPermissionMetadata } from "../security/permission-metadata.js"; +import { AgentTools } from "./agent-tools.js"; + +// Session-scoped grants ("allow for this chat") the gate must honor on top of +// the persistent allow-list. The integration layer populates these when a user +// approves with session scope; v1 ships an empty store, so until that layer +// exists every applicable call simply prompts. +export interface SessionGrants { + commands(sessionId: string | null): Promise>; + fileAccess(sessionId: string | null): Promise; +} + +export class EmptySessionGrants implements SessionGrants { + async commands(): Promise> { + return new Set(); + } + async fileAccess(): Promise { + return []; + } +} + +type MetadataFn = typeof getToolPermissionMetadata; +type ClassifierFn = typeof classifyToolPermissions; + +// Real PermissionGate: deterministic check() via getToolPermissionMetadata, and +// auto-mode classify() via the LLM classifier. The loop owns when to call these +// and what to do with the answer; this only adapts shapes. +export class RealPermissionGate implements PermissionGate { + private agentTools: AgentTools; + private grants: SessionGrants; + private getMetadata: MetadataFn; + private classifier: ClassifierFn; + private useCase: UseCase; + + constructor(deps: { + agentTools: AgentTools; + sessionGrants?: SessionGrants; + getMetadata?: MetadataFn; + classifier?: ClassifierFn; + useCase?: UseCase; + }) { + this.agentTools = deps.agentTools; + this.grants = deps.sessionGrants ?? new EmptySessionGrants(); + this.getMetadata = deps.getMetadata ?? getToolPermissionMetadata; + this.classifier = deps.classifier ?? classifyToolPermissions; + this.useCase = deps.useCase ?? "copilot_chat"; + } + + async check( + toolCall: z.infer, + turn: z.infer, + ): Promise { + const attachment = await this.agentTools.attachment(turn.agentId, toolCall.toolName); + // An unknown tool needs no approval — the runner turns it into an error + // ToolMessage, and there is nothing meaningful to approve. + if (!attachment) return { required: false }; + + const [commands, fileAccess] = await Promise.all([ + this.grants.commands(turn.sessionId), + this.grants.fileAccess(turn.sessionId), + ]); + const metadata = await this.getMetadata(toolCall, attachment, commands, fileAccess); + return metadata ? { required: true, request: metadata } : { required: false }; + } + + async classify( + toolCall: z.infer, + request: unknown, + turn: z.infer, + ): Promise { + // request is what check() persisted — our own metadata; parse to be safe. + const permission = ToolPermissionMetadata.parse(request); + const decisions = await this.classifier({ + runId: turn.id, + agentName: turn.agentId, + messages: convertFromMessages(turn.messages), + candidates: [{ toolCall, permission }], + // Per-turn attribution if the turn carries one; else the gate default. + useCase: (turn.useCase as UseCase | null) ?? this.useCase, + }); + const decision = decisions.find((d) => d.toolCallId === toolCall.toolCallId); + if (!decision) { + // The classifier declined to rule on this call — fall back to the user. + return { decision: "abstained", reason: "Classifier returned no decision for this tool call." }; + } + return { + decision: decision.decision === "allow" ? "granted" : "denied", + reason: decision.reason, + }; + } +} diff --git a/apps/x/packages/core/src/agent-runtime/real-tool-runner.test.ts b/apps/x/packages/core/src/agent-runtime/real-tool-runner.test.ts new file mode 100644 index 00000000..4f82cf5a --- /dev/null +++ b/apps/x/packages/core/src/agent-runtime/real-tool-runner.test.ts @@ -0,0 +1,236 @@ +import { mkdtemp, writeFile } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { describe, expect, it } from "vitest"; +import { z } from "zod"; +import { Agent, ToolAttachment } from "@x/shared/dist/agent.js"; +import { ToolCallPart } from "@x/shared/dist/message.js"; +import type { ToolRunContext } from "../agent-loop/tool-runner.js"; +import type { TurnEvent } from "../agent-loop/types.js"; +import type { ToolContext } from "../application/lib/exec-tool.js"; +import { IAbortRegistry } from "../runs/abort-registry.js"; +import { AgentTools } from "./agent-tools.js"; +import { RealToolRunner } from "./real-tool-runner.js"; + +// ─── helpers ──────────────────────────────────────────────────────────────── + +function makeAgent(tools: Record>): z.infer { + return { name: "test-agent", instructions: "", tools }; +} + +function agentToolsFor(tools: Record>): AgentTools { + return new AgentTools(async () => makeAgent(tools)); +} + +function toolCall(toolName: string, args: Record = {}): z.infer { + return { type: "tool-call", toolCallId: `tc-${toolName}`, toolName, arguments: args }; +} + +function makeCtx(): { + ctx: ToolRunContext; + emitted: TurnEvent[]; + controller: AbortController; +} { + const controller = new AbortController(); + const emitted: TurnEvent[] = []; + const ctx: ToolRunContext = { + turnId: "turn-1", + agentId: "test-agent", + codeMode: null, + signal: controller.signal, + emit: (event) => emitted.push(event), + }; + return { ctx, emitted, controller }; +} + +class FakeAbortRegistry implements IAbortRegistry { + createdFor: string[] = []; + aborted: string[] = []; + cleaned: string[] = []; + createForRun(runId: string): AbortSignal { + this.createdFor.push(runId); + return new AbortController().signal; + } + registerProcess(): void {} + unregisterProcess(): void {} + abort(runId: string): void { + this.aborted.push(runId); + } + forceAbort(): void {} + isAborted(): boolean { + return false; + } + cleanup(runId: string): void { + this.cleaned.push(runId); + } +} + +const builtin = (name: string): z.infer => ({ type: "builtin", name }); +const mcp = (name: string, inputSchema: unknown): z.infer => ({ + type: "mcp", + name, + description: `mcp ${name}`, + inputSchema, + mcpServerName: "srv", +}); + +// ─── definitions ────────────────────────────────────────────────────────────── + +describe("RealToolRunner.definitions", () => { + it("maps builtin (zod→JSON Schema), MCP (pass-through), and ask-human", async () => { + const schema = { type: "object", properties: { q: { type: "string" } } }; + const runner = new RealToolRunner({ + agentTools: agentToolsFor({ + "file-exists": builtin("file-exists"), + search: mcp("search", schema), + "ask-human": builtin("ask-human"), + }), + }); + + const defs = await runner.definitions("test-agent"); + const byName = new Map(defs.map((d) => [d.name, d])); + + expect(new Set(byName.keys())).toEqual(new Set(["file-exists", "search", "ask-human"])); + // builtin converted to JSON Schema with a `path` property + expect((byName.get("file-exists")!.inputSchema as { type?: string }).type).toBe("object"); + expect((byName.get("file-exists")!.inputSchema as { properties?: Record }).properties) + .toHaveProperty("path"); + // MCP schema passes through unchanged + expect(byName.get("search")!.inputSchema).toBe(schema); + // ask-human gets its synthesized schema + expect((byName.get("ask-human")!.inputSchema as { required?: string[] }).required).toEqual(["question"]); + }); + + it("returns nothing for an agent-less turn and skips agent-as-tool attachments", async () => { + const runner = new RealToolRunner({ + agentTools: agentToolsFor({ sub: { type: "agent", name: "sub" } }), + }); + expect(await runner.definitions(null)).toEqual([]); + expect(await runner.definitions("test-agent")).toEqual([]); + }); + + it("caches the agent config across calls (one load)", async () => { + let loads = 0; + const agentTools = new AgentTools(async () => { + loads++; + return makeAgent({ "file-exists": builtin("file-exists") }); + }); + const runner = new RealToolRunner({ agentTools }); + await runner.definitions("test-agent"); + await runner.definitions("test-agent"); + expect(loads).toBe(1); + }); +}); + +// ─── run ─────────────────────────────────────────────────────────────────── + +describe("RealToolRunner.run", () => { + it("dispatches a real builtin (file-exists) against an absolute path", async () => { + const dir = await mkdtemp(join(tmpdir(), "tool-runner-")); + const file = join(dir, "present.txt"); + await writeFile(file, "hi"); + + const runner = new RealToolRunner({ + agentTools: agentToolsFor({ "file-exists": builtin("file-exists") }), + }); + const { ctx } = makeCtx(); + + const present = await runner.run(toolCall("file-exists", { path: file }), ctx); + const absent = await runner.run(toolCall("file-exists", { path: join(dir, "nope.txt") }), ctx); + + expect(present).toMatchObject({ type: "result", value: { exists: true } }); + expect(absent).toMatchObject({ type: "result", value: { exists: false } }); + }); + + it("returns pending for ask-human without executing anything", async () => { + let called = false; + const runner = new RealToolRunner({ + agentTools: agentToolsFor({ "ask-human": builtin("ask-human") }), + execTool: async () => { + called = true; + return null; + }, + }); + const { ctx } = makeCtx(); + expect(await runner.run(toolCall("ask-human", { question: "ok?" }), ctx)).toEqual({ type: "pending" }); + expect(called).toBe(false); + }); + + it("returns an error outcome for a tool the agent does not have", async () => { + const runner = new RealToolRunner({ agentTools: agentToolsFor({}) }); + const { ctx } = makeCtx(); + expect(await runner.run(toolCall("nonexistent"), ctx)).toEqual({ + type: "error", + value: "Unknown tool: nonexistent", + }); + }); + + it("dispatches MCP calls through execTool with the attachment and args", async () => { + const seen: { attachment: unknown; args: unknown }[] = []; + const runner = new RealToolRunner({ + agentTools: agentToolsFor({ search: mcp("search", {}) }), + execTool: async (attachment, args) => { + seen.push({ attachment, args }); + return { hits: 3 }; + }, + }); + const { ctx } = makeCtx(); + const out = await runner.run(toolCall("search", { q: "term" }), ctx); + expect(out).toEqual({ type: "result", value: { hits: 3 } }); + expect(seen).toHaveLength(1); + expect(seen[0].attachment).toMatchObject({ type: "mcp", name: "search" }); + expect(seen[0].args).toEqual({ q: "term" }); + }); + + it("lets a thrown tool error propagate (the loop records it, not the bridge)", async () => { + const runner = new RealToolRunner({ + agentTools: agentToolsFor({ "file-exists": builtin("file-exists") }), + execTool: async () => { + throw new Error("ECONNRESET"); + }, + }); + const { ctx } = makeCtx(); + await expect(runner.run(toolCall("file-exists"), ctx)).rejects.toThrow("ECONNRESET"); + }); + + it("translates a tool-output-stream publish into a tool-output emit", async () => { + const runner = new RealToolRunner({ + agentTools: agentToolsFor({ executeCommand: builtin("executeCommand") }), + execTool: async (_attachment, _args, toolCtx?: ToolContext) => { + await toolCtx!.publish({ + runId: toolCtx!.runId, + type: "tool-output-stream", + toolCallId: toolCtx!.toolCallId, + toolName: "executeCommand", + output: "line of stdout", + subflow: [], + }); + return { ok: true }; + }, + }); + const { ctx, emitted } = makeCtx(); + await runner.run(toolCall("executeCommand"), ctx); + expect(emitted).toEqual([ + { type: "tool-output", toolCallId: "tc-executeCommand", chunk: "line of stdout" }, + ]); + }); + + it("brackets the run with the abort registry and forwards aborts to it", async () => { + const registry = new FakeAbortRegistry(); + const { ctx, controller } = makeCtx(); + const runner = new RealToolRunner({ + agentTools: agentToolsFor({ executeCommand: builtin("executeCommand") }), + abortRegistry: registry, + // The turn is stopped while the tool is in flight. + execTool: async () => { + controller.abort(); + return null; + }, + }); + await runner.run(toolCall("executeCommand"), ctx); + + expect(registry.createdFor).toEqual(["turn-1"]); + expect(registry.aborted).toEqual(["turn-1"]); // signal abort → registry.abort + expect(registry.cleaned).toEqual(["turn-1"]); + }); +}); diff --git a/apps/x/packages/core/src/agent-runtime/real-tool-runner.ts b/apps/x/packages/core/src/agent-runtime/real-tool-runner.ts new file mode 100644 index 00000000..7feda1cd --- /dev/null +++ b/apps/x/packages/core/src/agent-runtime/real-tool-runner.ts @@ -0,0 +1,91 @@ +import { z } from "zod"; +import { ToolCallPart } from "@x/shared/dist/message.js"; +import type { ToolDefinition } from "../agent-loop/types.js"; +import type { ToolRunContext, ToolRunner, ToolRunResult } from "../agent-loop/tool-runner.js"; +import { execTool, type ToolContext } from "../application/lib/exec-tool.js"; +import { IAbortRegistry, InMemoryAbortRegistry } from "../runs/abort-registry.js"; +import { AgentTools, ASK_HUMAN_TOOL } from "./agent-tools.js"; + +type ExecTool = typeof execTool; + +// Real ToolRunner: bridges the agent loop to the existing execTool dispatcher +// (builtins + MCP). The loop owns the lifecycle (start fact, ToolMessage, +// dispatched fact); this only resolves the attachment and runs it. +export class RealToolRunner implements ToolRunner { + private agentTools: AgentTools; + private abortRegistry: IAbortRegistry; + private execTool: ExecTool; + + constructor(deps: { + agentTools: AgentTools; + abortRegistry?: IAbortRegistry; + execTool?: ExecTool; + }) { + this.agentTools = deps.agentTools; + this.abortRegistry = deps.abortRegistry ?? new InMemoryAbortRegistry(); + this.execTool = deps.execTool ?? execTool; + } + + definitions(agentId: string | null): Promise { + return this.agentTools.definitions(agentId); + } + + async run( + toolCall: z.infer, + ctx: ToolRunContext, + ): Promise { + const attachment = await this.agentTools.attachment(ctx.agentId, toolCall.toolName); + if (!attachment) { + // The model named a tool this agent doesn't have — conversational + // error, not a turn error: the loop turns it into a ToolMessage. + return { type: "error", value: `Unknown tool: ${toolCall.toolName}` }; + } + if (attachment.type === "agent") { + throw new Error(`agent-as-tool is not supported: ${toolCall.toolName}`); + } + // ask-human never executes — it is delegated and answered out of band. + if (attachment.type === "builtin" && attachment.name === ASK_HUMAN_TOOL) { + return { type: "pending" }; + } + + // The signal is the primary kill path (executeCommandAbortable tears + // down its process tree off it). The registry is the secondary + // force-kill the old runtime used; wire it to the same signal so both + // mechanisms fire. createForRun/cleanup bracket this single call. + this.abortRegistry.createForRun(ctx.turnId); + const onAbort = () => this.abortRegistry.abort(ctx.turnId); + ctx.signal.addEventListener("abort", onAbort, { once: true }); + try { + const toolContext: ToolContext = { + runId: ctx.turnId, + toolCallId: toolCall.toolCallId, + signal: ctx.signal, + abortRegistry: this.abortRegistry, + publish: (event) => { + if (event.type === "tool-output-stream") { + ctx.emit({ + type: "tool-output", + toolCallId: event.toolCallId, + chunk: event.output, + }); + } + // Other run events (code-run-*) are deferred — the channel + // exists; deeper plumbing lands with code_agent_run. + return Promise.resolve(); + }, + codeMode: ctx.codeMode, + }; + // A thrown error propagates: the loop catches it (re-checking abort) + // and records it as an error ToolMessage, never a turn error. + const value = await this.execTool(attachment, asArgs(toolCall.arguments), toolContext); + return { type: "result", value: value === undefined ? null : value }; + } finally { + ctx.signal.removeEventListener("abort", onAbort); + this.abortRegistry.cleanup(ctx.turnId); + } + } +} + +function asArgs(args: unknown): Record { + return args && typeof args === "object" ? (args as Record) : {}; +} diff --git a/apps/x/packages/core/src/agent-runtime/smoke.test.ts b/apps/x/packages/core/src/agent-runtime/smoke.test.ts new file mode 100644 index 00000000..8055f6d8 --- /dev/null +++ b/apps/x/packages/core/src/agent-runtime/smoke.test.ts @@ -0,0 +1,140 @@ +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { z } from "zod"; +import { Agent, ToolAttachment } from "@x/shared/dist/agent.js"; +import { AssistantMessage, ToolCallPart } from "@x/shared/dist/message.js"; + +// End-to-end smoke test: REAL SQLite stores + REAL RealToolRunner (real +// execTool + real builtin) + REAL RealPermissionGate, driven by a FAKE model +// adapter, proving the bridges compose into a working session round-trip. + +let tmpDir: string; +let workspaceDir: string; +let storageModule: typeof import("../storage/index.js") | null = null; + +beforeEach(async () => { + tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "rowboat-agent-runtime-smoke-")); + workspaceDir = path.join(tmpDir, "workspace"); + await fs.mkdir(workspaceDir, { recursive: true }); + process.env.ROWBOAT_WORKDIR = workspaceDir; + vi.resetModules(); + // config.ts kicks off knowledge-repo init as an import side effect; mock it + // out so the test doesn't touch git (same pattern as the store tests). + vi.doMock("../knowledge/version_history.js", () => ({ + initRepo: vi.fn(async () => undefined), + })); + vi.doMock("../knowledge/deprecate_today_note.js", () => ({ + deprecateTodayNote: vi.fn(async () => undefined), + })); +}); + +afterEach(async () => { + if (storageModule) { + await storageModule.shutdownStorage().catch(() => undefined); + storageModule = null; + } + delete process.env.ROWBOAT_WORKDIR; + vi.doUnmock("../knowledge/version_history.js"); + vi.doUnmock("../knowledge/deprecate_today_note.js"); + vi.resetModules(); + await fs.rm(tmpDir, { recursive: true, force: true }); +}); + +const TEST_AGENT: z.infer = { + name: "smoke-agent", + instructions: "", + tools: { "file-exists": { type: "builtin", name: "file-exists" } satisfies z.infer }, +}; + +function toolCallMessage(toolCallId: string, args: Record): z.infer { + const part: z.infer = { type: "tool-call", toolCallId, toolName: "file-exists", arguments: args }; + return { role: "assistant", content: [part] }; +} + +describe("agent runtime (smoke)", () => { + it("runs a session turn that calls a real builtin and completes, persisted in SQLite", async () => { + const storage = await import("../storage/index.js"); + await storage.initStorage(); + storageModule = storage; + const db = storage.getDb(); + + const { SqliteTurnStore } = await import("../agent-loop/sqlite-turn-store.js"); + const { AgentLoopImpl } = await import("../agent-loop/agent-loop.js"); + const { EventStream } = await import("../agent-loop/event-stream.js"); + const { SqliteSessionStore } = await import("../sessions/sqlite-session-store.js"); + const { SessionsImpl } = await import("../sessions/sessions.js"); + const { AgentTools } = await import("./agent-tools.js"); + const { RealToolRunner } = await import("./real-tool-runner.js"); + const { RealPermissionGate } = await import("./real-permission-gate.js"); + const { TurnEventBus } = await import("./turn-event-bus.js"); + + // A real file inside the workspace → file-exists needs no permission. + const probe = path.join(workspaceDir, "probe.txt"); + await fs.writeFile(probe, "hi"); + + // Fake model: first step calls file-exists, second step ends the turn. + const steps: z.infer[] = [ + toolCallMessage("tc1", { path: probe }), + { role: "assistant", content: "done" }, + ]; + const modelAdapter = { + stream(): InstanceType { + const out = new EventStream(); + const message = steps.shift()!; + void (async () => { + await Promise.resolve(); + if (typeof message.content !== "string") { + for (const part of message.content) { + if (part.type === "tool-call") out.push({ type: "tool-call", toolCall: part }); + } + } + out.push({ type: "finish", message }); + out.end({ message, usage: null }); + })(); + return out; + }, + }; + + const agentTools = new AgentTools(async () => TEST_AGENT); + const turnStore = new SqliteTurnStore(db); + const bus = new TurnEventBus(); + const busEvents: { kind: string; turnId: string }[] = []; + bus.subscribe((e) => busEvents.push({ kind: e.kind, turnId: e.turnId })); + const agentLoop = new AgentLoopImpl({ + store: turnStore, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + modelAdapter: modelAdapter as any, + toolRunner: new RealToolRunner({ agentTools }), + permissionGate: new RealPermissionGate({ agentTools }), + observer: bus, + }); + const sessions = new SessionsImpl({ + sessionStore: new SqliteSessionStore(db), + turnStore, + agentLoop, + }); + + const session = await sessions.createSession({ agentId: "smoke-agent" }); + const turn = await (await sessions.sendMessage(session.id, [{ role: "user", content: "is it there?" }])).result; + + // The turn completed, the real builtin ran, and its result is recorded. + const { deriveTurnStatus } = await import("../agent-loop/types.js"); + expect(deriveTurnStatus(turn)).toBe("completed"); + expect(turn.sessionSeq).toBe(1); + const toolResult = turn.messages.find((m) => m.role === "tool"); + expect(toolResult).toBeDefined(); + expect(String((toolResult as { content: string }).content)).toContain('"exists":true'); + + // It is durably persisted: a fresh read of the session sees the turn. + const reread = await sessions.listTurns(session.id); + expect(reread).toHaveLength(1); + expect(reread[0].id).toBe(turn.id); + expect(deriveTurnStatus(reread[0])).toBe("completed"); + + // The bus saw live events and state snapshots for this turn. + expect(busEvents.some((e) => e.kind === "event" && e.turnId === turn.id)).toBe(true); + expect(busEvents.some((e) => e.kind === "state" && e.turnId === turn.id)).toBe(true); + }); +}); diff --git a/apps/x/packages/core/src/agent-runtime/turn-event-bus.test.ts b/apps/x/packages/core/src/agent-runtime/turn-event-bus.test.ts new file mode 100644 index 00000000..622d84cc --- /dev/null +++ b/apps/x/packages/core/src/agent-runtime/turn-event-bus.test.ts @@ -0,0 +1,72 @@ +import { describe, expect, it } from "vitest"; +import { z } from "zod"; +import { AgentLoopTurn } from "../agent-loop/types.js"; +import { SessionBusEvent, TurnEventBus } from "./turn-event-bus.js"; + +function turn(overrides: Partial> = {}): z.infer { + const now = "2026-06-14T00:00:00Z"; + return { + id: "t1", + agentId: null, + provider: null, + model: null, + permissionMode: "manual", + useCase: null, + subUseCase: null, + sessionId: "s1", + sessionSeq: 1, + composeContext: null, + messages: [], + permissionRequests: [], + permissionDecisions: [], + startedTools: [], + dispatchedTools: [], + modelUsage: [], + error: null, + completedAt: null, + createdAt: now, + updatedAt: now, + ...overrides, + }; +} + +describe("TurnEventBus", () => { + it("publishes onEvent as a tagged live event and onState as a snapshot", () => { + const bus = new TurnEventBus(); + const seen: SessionBusEvent[] = []; + bus.subscribe((e) => seen.push(e)); + + bus.onEvent({ turnId: "t1", sessionId: "s1" }, { type: "text-delta", delta: "hi" }); + bus.onState(turn()); + + expect(seen).toEqual([ + { kind: "event", turnId: "t1", sessionId: "s1", event: { type: "text-delta", delta: "hi" } }, + { kind: "state", turnId: "t1", sessionId: "s1", turn: turn() }, + ]); + }); + + it("fans out to every subscriber and stops after unsubscribe", () => { + const bus = new TurnEventBus(); + const a: SessionBusEvent[] = []; + const b: SessionBusEvent[] = []; + const offA = bus.subscribe((e) => a.push(e)); + bus.subscribe((e) => b.push(e)); + + bus.onEvent({ turnId: "t1", sessionId: "s1" }, { type: "tool-result", toolCallId: "x" }); + offA(); + bus.onEvent({ turnId: "t1", sessionId: "s1" }, { type: "tool-result", toolCallId: "y" }); + + expect(a).toHaveLength(1); + expect(b).toHaveLength(2); + }); + + it("isolates a throwing subscriber from the rest", () => { + const bus = new TurnEventBus(); + const ok: SessionBusEvent[] = []; + bus.subscribe(() => { throw new Error("boom"); }); + bus.subscribe((e) => ok.push(e)); + + expect(() => bus.onState(turn())).not.toThrow(); + expect(ok).toHaveLength(1); + }); +}); diff --git a/apps/x/packages/core/src/agent-runtime/turn-event-bus.ts b/apps/x/packages/core/src/agent-runtime/turn-event-bus.ts new file mode 100644 index 00000000..8834b807 --- /dev/null +++ b/apps/x/packages/core/src/agent-runtime/turn-event-bus.ts @@ -0,0 +1,39 @@ +import { z } from "zod"; +import type { SessionBusEvent } from "@x/shared/dist/sessions.js"; +import type { TurnEventMeta, TurnObserver } from "../agent-loop/turn-observer.js"; +import type { AgentLoopTurn, TurnEvent } from "../agent-loop/types.js"; + +export type { SessionBusEvent }; +export type SessionBusListener = (event: SessionBusEvent) => void; + +// Fan-out bus that doubles as the loop's TurnObserver. Fire-and-forget, same +// philosophy as the old runtime's IBus: a listener that throws or joins late +// never affects the loop — the durable truth is the persisted turn. +export class TurnEventBus implements TurnObserver { + private listeners = new Set(); + + subscribe(listener: SessionBusListener): () => void { + this.listeners.add(listener); + return () => { + this.listeners.delete(listener); + }; + } + + onEvent(meta: TurnEventMeta, event: TurnEvent): void { + this.publish({ kind: "event", turnId: meta.turnId, sessionId: meta.sessionId, event }); + } + + onState(turn: z.infer): void { + this.publish({ kind: "state", turnId: turn.id, sessionId: turn.sessionId, turn }); + } + + private publish(event: SessionBusEvent): void { + for (const listener of this.listeners) { + try { + listener(event); + } catch { + // A misbehaving subscriber must never break the loop or siblings. + } + } + } +} diff --git a/apps/x/packages/core/src/agent-schedule/runner.ts b/apps/x/packages/core/src/agent-schedule/runner.ts index 44dac07d..2c85395a 100644 --- a/apps/x/packages/core/src/agent-schedule/runner.ts +++ b/apps/x/packages/core/src/agent-schedule/runner.ts @@ -2,13 +2,9 @@ import { CronExpressionParser } from "cron-parser"; import container from "../di/container.js"; import { IAgentScheduleRepo } from "./repo.js"; import { IAgentScheduleStateRepo } from "./state-repo.js"; -import { IRunsRepo } from "../runs/repo.js"; -import { IAgentRuntime } from "../agents/runtime.js"; -import { IMonotonicallyIncreasingIdGenerator } from "../application/lib/id-gen.js"; import { AgentScheduleConfig, AgentScheduleEntry } from "@x/shared/dist/agent-schedule.js"; import { AgentScheduleState, AgentScheduleStateEntry } from "@x/shared/dist/agent-schedule-state.js"; -import { MessageEvent } from "@x/shared/dist/runs.js"; -import { createRun } from "../runs/runs.js"; +import { runHeadlessAgent } from "../agent-runtime/headless.js"; import z from "zod"; const DEFAULT_STARTING_MESSAGE = "go"; @@ -147,10 +143,7 @@ function shouldRunNow( async function runAgent( agentName: string, entry: z.infer, - stateRepo: IAgentScheduleStateRepo, - runsRepo: IRunsRepo, - agentRuntime: IAgentRuntime, - idGenerator: IMonotonicallyIncreasingIdGenerator + stateRepo: IAgentScheduleStateRepo ): Promise { console.log(`[AgentRunner] Starting agent: ${agentName}`); @@ -163,31 +156,17 @@ async function runAgent( }); try { - // Create a new run via core (resolves agent + default model+provider). - const run = await createRun({ + // One standalone turn per run (default model+provider resolved by the + // adapter): a scheduled agent run is one-shot, not a conversation. + const startingMessage = entry.startingMessage ?? DEFAULT_STARTING_MESSAGE; + const result = await runHeadlessAgent({ agentId: agentName, + message: startingMessage, useCase: 'copilot_chat', subUseCase: 'scheduled', }); - console.log(`[AgentRunner] Created run ${run.id} for agent ${agentName}`); - - // Add the starting message as a user message - const startingMessage = entry.startingMessage ?? DEFAULT_STARTING_MESSAGE; - const messageEvent: z.infer = { - runId: run.id, - type: "message", - messageId: await idGenerator.next(), - message: { - role: "user", - content: startingMessage, - }, - subflow: [], - }; - await runsRepo.appendEvents(run.id, [messageEvent]); - console.log(`[AgentRunner] Sent starting message to agent ${agentName}: "${startingMessage}"`); - - // Trigger the run - await agentRuntime.trigger(run.id); + if (result.error) throw new Error(result.error); + console.log(`[AgentRunner] Run ${result.turnId} completed for agent ${agentName}`); // Calculate next run time const nextRunAt = calculateNextRunAt(entry.schedule); @@ -264,9 +243,6 @@ async function checkForTimeouts( async function pollAndRun(): Promise { const scheduleRepo = container.resolve("agentScheduleRepo"); const stateRepo = container.resolve("agentScheduleStateRepo"); - const runsRepo = container.resolve("runsRepo"); - const agentRuntime = container.resolve("agentRuntime"); - const idGenerator = container.resolve("idGenerator"); // Load config and state let config: z.infer; @@ -314,7 +290,7 @@ async function pollAndRun(): Promise { if (shouldRunNow(entry, agentState)) { // Run agent (don't await - let it run in background) - runAgent(agentName, entry, stateRepo, runsRepo, agentRuntime, idGenerator).catch((error) => { + runAgent(agentName, entry, stateRepo).catch((error) => { console.error(`[AgentRunner] Unhandled error in runAgent for ${agentName}:`, error); }); } diff --git a/apps/x/packages/core/src/agents/compose/system-prompt.ts b/apps/x/packages/core/src/agents/compose/system-prompt.ts new file mode 100644 index 00000000..dda99c63 --- /dev/null +++ b/apps/x/packages/core/src/agents/compose/system-prompt.ts @@ -0,0 +1,176 @@ +import fs from "fs"; +import path from "path"; +import { z } from "zod"; +import { CodeMode, VoiceOutputMode } from "@x/shared/dist/message.js"; +import { WorkDir } from "../../config/config.js"; +import { isCopilotLikeAgent } from "./user-context.js"; + +// System-prompt assembly shared by the old runtime and the new SystemComposer. +// The system prompt is composed fresh per model call: agent instructions + +// hidden-user-context explainer + (copilot-only) agent notes & work dir + +// per-turn voice/search/code-mode blocks. Extracted verbatim from +// agents/runtime.ts so both runtimes produce byte-identical prompts. + +const AGENT_NOTES_DIR = path.join(WorkDir, "knowledge", "Agent Notes"); + +// Work directory is scoped per chat. Each chat gets its own sidecar config file +// so setting it in one chat does not leak into others. +export function workDirConfigFile(id: string): string { + return path.join(WorkDir, "config", `workdir-${id}.json`); +} + +export function loadUserWorkDir(id: string): string | null { + try { + const file = workDirConfigFile(id); + if (!fs.existsSync(file)) return null; + const raw = fs.readFileSync(file, "utf-8"); + const parsed = JSON.parse(raw) as { path?: unknown }; + const value = typeof parsed.path === "string" ? parsed.path.trim() : ""; + return value || null; + } catch { + return null; + } +} + +export function loadAgentNotesContext(): string | null { + const sections: string[] = []; + + const userFile = path.join(AGENT_NOTES_DIR, "user.md"); + const prefsFile = path.join(AGENT_NOTES_DIR, "preferences.md"); + + try { + if (fs.existsSync(userFile)) { + const content = fs.readFileSync(userFile, "utf-8").trim(); + if (content) { + sections.push(`## About the User\nThese are notes you took about the user in previous chats.\n\n${content}`); + } + } + } catch { /* ignore */ } + + try { + if (fs.existsSync(prefsFile)) { + const content = fs.readFileSync(prefsFile, "utf-8").trim(); + if (content) { + sections.push(`## User Preferences\nThese are notes you took on their general preferences.\n\n${content}`); + } + } + } catch { /* ignore */ } + + // List other Agent Notes files for on-demand access + const otherFiles: string[] = []; + const skipFiles = new Set(["user.md", "preferences.md", "inbox.md"]); + try { + if (fs.existsSync(AGENT_NOTES_DIR)) { + const listMdFiles = (dir: string, prefix: string): void => { + for (const entry of fs.readdirSync(dir)) { + const fullPath = path.join(dir, entry); + const stat = fs.statSync(fullPath); + if (stat.isDirectory()) { + listMdFiles(fullPath, `${prefix}${entry}/`); + } else if (entry.endsWith(".md") && !skipFiles.has(`${prefix}${entry}`)) { + otherFiles.push(`${prefix}${entry}`); + } + } + }; + listMdFiles(AGENT_NOTES_DIR, ""); + } + } catch { /* ignore */ } + + if (otherFiles.length > 0) { + sections.push(`## More Specific Preferences\nFor more specific preferences, you can read these files using file-readText. Only read them when relevant to the current task.\n\n${otherFiles.map(f => `- knowledge/Agent Notes/${f}`).join("\n")}`); + } + + if (sections.length === 0) return null; + return `# Agent Memory\n\n${sections.join("\n\n")}`; +} + +const USER_CONTEXT_SYSTEM_INSTRUCTIONS = `# Hidden User Context +User messages may include a hidden "# User Context" section before "# User Message". Treat it as runtime metadata captured when that specific user message was sent. The actual user-authored text starts under "# User Message". + +Use "Current date and time" for temporal reasoning. + +If Middle pane context is present, it reflects what the user had open at the time of that specific message and overrides earlier middle-pane references. If the conversation history references a different note or browser page, the user had since closed or navigated away from it. Do not treat earlier context as current. + +If Middle pane state is empty, the user was not looking at any relevant note or web page at that point. Answer the user's message on its own merits. + +If Middle pane state is note, the supplied path and content are available so you can reference the note when relevant. The user may or may not be talking about this note. Do NOT assume every message is about it. Only reference or act on this note when the user's message clearly relates to it, such as "this note", "what I'm looking at", "here", "above", "below", or questions whose subject is plainly the note's content. For unrelated questions, ignore this note entirely and answer normally. Do not mention that you can see this note unless it is relevant to the answer. + +If Middle pane state is browser, only the URL and page title are supplied; the page content itself is NOT included. If you need the page content to answer, use the browser tools available to you to read the page. The user may or may not be talking about this page. Only reference or act on this page when the user's message clearly relates to it, such as "this page", "this article", "what I'm looking at", "this site", or "summarize this". For unrelated questions, ignore this page entirely and answer normally. Do not mention that you can see the browser unless it is relevant to the answer.`; + +function workDirBlock(userWorkDir: string): string { + return `\n\n# User Work Directory +The user has chosen the following directory as their current **work directory**: + +\`${userWorkDir}\` + +Treat this as the **default location** for file operations whenever the user refers to files generically: +- "list the files", "show me what's in here", "what's the latest report" — list or look in the work directory. +- "save this", "export it", "write that to a file" — write the output into the work directory unless the user names another location. +- "open the file I was just working on", "the doc from earlier" — assume the work directory first. + +Use absolute paths rooted at this directory with the \`file-*\` tools. For example, list with \`file-list({ path: "${userWorkDir}" })\`, read text with \`file-readText\`, and write text with \`file-writeText\`. For PDFs, Office docs, images, scanned docs, and other non-text files, use \`parseFile\` or \`LLMParse\` with the absolute path; you do NOT need to copy the file into the workspace first. + +**Exceptions — these ALWAYS take precedence over the work directory default:** +1. **Knowledge base questions.** If the user asks about anything in the knowledge graph (notes, people, organizations, projects, topics) or paths starting with \`knowledge/\`, use file tools against \`knowledge/\` as documented above. Do NOT redirect those into the work directory. +2. **Explicit paths.** If the user names a different directory or gives an absolute/relative path (e.g. "in ~/Downloads", "from /tmp/foo", "the Desktop"), honor that path exactly and ignore the work-directory default for that request. +3. **Workspace-specific operations.** Anything that obviously belongs in the Rowboat workspace (config files, MCP servers, agent schedules, etc.) stays in the workspace, not the work directory. + +Do not announce the work directory unless it's relevant. Just use it.`; +} + +const VOICE_INPUT_BLOCK = `\n\n# Voice Input\nThe user's message was transcribed from speech. Be aware that:\n- There may be transcription errors. Silently correct obvious ones (e.g. homophones, misheard words). If an error is genuinely ambiguous, briefly mention your interpretation (e.g. "I'm assuming you meant X").\n- Spoken messages are often long-winded. The user may ramble, repeat themselves, or correct something they said earlier in the same message. Focus on their final intent, not every word verbatim.`; + +const VOICE_OUTPUT_SUMMARY_BLOCK = `\n\n# Voice Output (MANDATORY — READ THIS FIRST)\nThe user has voice output enabled. THIS IS YOUR #1 PRIORITY: you MUST start your response with tags. If your response does not begin with tags, the user will hear nothing — which is a broken experience. NEVER skip this.\n\nRules:\n1. YOUR VERY FIRST OUTPUT MUST BE A TAG. No exceptions. Do not start with markdown, headings, or any other text. The literal first characters of your response must be "".\n2. Place ALL tags at the BEGINNING of your response, before any detailed content. Do NOT intersperse tags throughout the response.\n3. Wrap EACH spoken sentence in its own separate tag so it can be spoken incrementally. Do NOT wrap everything in a single block.\n4. Use voice as a TL;DR and navigation aid — do NOT read the entire response aloud.\n5. After all tags, you may include detailed written content (markdown, tables, code, etc.) that will be shown visually but not spoken.\n\n## Examples\n\nExample 1 — User asks: "what happened in my meeting with Alex yesterday?"\n\nYour meeting with Alex covered three main things: the Q2 roadmap timeline, hiring for the backend role, and the client demo next week.\nI've pulled out the key details and action items below — the demo prep notes are at the end.\n\n## Meeting with Alex — March 11\n### Roadmap\n- Agreed to push Q2 launch to April 15...\n(detailed written content continues)\n\nExample 2 — User asks: "summarize my emails"\n\nYou have five new emails since this morning.\nTwo are from your team — Jordan sent the RFC you requested and Taylor flagged a contract issue.\nThere's also a warm intro from a VC partner connecting you with someone at a prospective customer.\nI've drafted responses for three of them. The details and drafts are below.\n\n(email blocks, tables, and detailed content follow)\n\nExample 3 — User asks: "what's on my calendar today?"\n\nYou've got a pretty packed day — seven meetings starting with standup at 9.\nThe big ones are your investor call at 11, lunch with a partner from your lead VC at 12:30, and a customer call at 4.\nYour only free block for deep work is 2:30 to 4.\n\n(calendar block with full event details follows)\n\nExample 4 — User asks: "draft an email to Sam with our metrics"\n\nDone — I've drafted the email to Sam with your latest WAU and churn numbers.\nTake a look at the draft below and send it when you're ready.\n\n(email block with draft follows)\n\nREMEMBER: If you do not start with tags, the user hears silence. Always speak first, then write.`; + +const VOICE_OUTPUT_FULL_BLOCK = `\n\n# Voice Output — Full Read-Aloud (MANDATORY — READ THIS FIRST)\nThe user wants your ENTIRE response spoken aloud. THIS IS YOUR #1 PRIORITY: every single sentence must be wrapped in tags. If you write anything outside tags, the user will not hear it — which is a broken experience. NEVER skip this.\n\nRules:\n1. YOUR VERY FIRST OUTPUT MUST BE A TAG. No exceptions. The literal first characters of your response must be "".\n2. Wrap EACH sentence in its own separate tag so it can be spoken incrementally.\n3. Write your response in a natural, conversational style suitable for listening — no markdown headings, bullet points, or formatting symbols. Use plain spoken language.\n4. Structure the content as if you are speaking to the user directly. Use transitions like "first", "also", "one more thing" instead of visual formatting.\n5. EVERY sentence MUST be inside a tag. Do not leave ANY content outside tags. If it's not in a tag, the user cannot hear it.\n\n## Examples\n\nExample 1 — User asks: "what happened in my meeting with Alex yesterday?"\n\nYour meeting with Alex covered three main things.\nFirst, you discussed the Q2 roadmap timeline and agreed to push the launch to April.\nSecond, you talked about hiring for the backend role — Alex will send over two candidates by Friday.\nAnd lastly, the client demo is next week on Thursday at 2pm, and you're handling the intro slides.\n\nExample 2 — User asks: "summarize my emails"\n\nYou've got five new emails since this morning.\nTwo are from your team — Jordan sent the RFC you asked for, and Taylor flagged a contract issue that needs your sign-off.\nThere's a warm intro from a VC partner connecting you with an engineering lead at a potential customer.\nAnd someone from a prospective client wants to confirm your API tier before your call this afternoon.\nI've drafted replies for three of them — the metrics update, the intro, and the API question.\nThe only one I left for you is Taylor's contract redline, since that needs your judgment on the liability cap.\n\nExample 3 — User asks: "what's on my calendar today?"\n\nYou've got a packed day — seven meetings starting with standup at 9.\nThe highlights are your investor call at 11, lunch with a VC partner at 12:30, and a customer call at 4.\nYour only open block for deep work is 2:30 to 4, so plan accordingly.\nOh, and your 1-on-1 with your co-founder is at 5:30 — that's a walking meeting.\n\nExample 4 — User asks: "how are our metrics looking?"\n\nMetrics are looking strong this week.\nYou hit 2,573 weekly active users, which is up 12% week over week.\nThat means you've crossed the 2,500 milestone — worth calling out in your next investor update.\nChurn is down to 4.1%, improving month over month.\nThe trailing 8-week compound growth rate is about 10%.\n\nREMEMBER: Start with immediately. No preamble, no markdown before it. Speak first.`; + +const SEARCH_BLOCK = `\n\n# Search\nThe user has requested a search. Use the web-search tool to answer their query.`; + +function codeModeBlock(codeMode: z.infer): string { + const agentDisplay = codeMode === "claude" ? "Claude Code" : "Codex"; + return `\n\n# Code Mode (Active) — Agent: ${agentDisplay} +The user has turned on **code mode** and the composer chip is set to **${agentDisplay}** (\`${codeMode}\`). For EVERY coding task this turn, use **${agentDisplay}**, and narrate that agent ("Using ${agentDisplay} to …"). + +The chip is the single source of truth for which agent runs: +- Do NOT carry over a different agent from earlier in this thread — even if a previous run used the other agent, use **${agentDisplay}** now. +- Do NOT switch agents based on an in-chat text request ("use codex", "switch to claude"). The agent only changes when the user toggles the chip; if they ask in chat, tell them to toggle the chip. + +**How to run coding work — call the \`code_agent_run\` tool** with: +- \`agent\`: \`${codeMode}\` (always — match the chip). +- \`cwd\`: the absolute project/working directory (resolve it per the code-with-agents skill — a path the user named, the "# User Work Directory" block, or ask once). +- \`prompt\`: a clear, self-contained coding instruction. + +The tool runs the agent on-device and streams its tool calls, file diffs, and plan into the chat; any action needing approval surfaces as an inline permission card, so you do NOT pre-confirm with an in-chat "reply yes". This chat keeps ONE persistent agent session, so follow-up coding requests automatically resume with full context — just call \`code_agent_run\` again. Do NOT shell out to \`acpx\` or \`executeCommand\` for coding, and do NOT fall back to your own file tools. + +If the user's message is clearly NOT a coding request (small talk, an unrelated question), answer directly without invoking the coding agent. Code mode signals readiness, not that every message must route through the agent.`; +} + +// Assembles the full system prompt. workDirId is the chat scope used to look up +// a per-chat work directory (runId in the old runtime, sessionId in the new); +// null skips the work-dir block. +export function buildSystemInstructions(opts: { + instructions: string; + agentName: string | null | undefined; + workDirId: string | null; + voiceInput?: boolean; + voiceOutput?: z.infer | null; + searchEnabled?: boolean; + codeMode?: z.infer | null; +}): string { + let out = `${opts.instructions}\n\n${USER_CONTEXT_SYSTEM_INSTRUCTIONS}`; + + if (isCopilotLikeAgent(opts.agentName)) { + const notes = loadAgentNotesContext(); + if (notes) out += `\n\n${notes}`; + const userWorkDir = opts.workDirId ? loadUserWorkDir(opts.workDirId) : null; + if (userWorkDir) out += workDirBlock(userWorkDir); + } + + if (opts.voiceInput) out += VOICE_INPUT_BLOCK; + if (opts.voiceOutput === "summary") out += VOICE_OUTPUT_SUMMARY_BLOCK; + else if (opts.voiceOutput === "full") out += VOICE_OUTPUT_FULL_BLOCK; + if (opts.searchEnabled) out += SEARCH_BLOCK; + if (opts.codeMode) out += codeModeBlock(opts.codeMode); + + return out; +} diff --git a/apps/x/packages/core/src/agents/compose/user-context.ts b/apps/x/packages/core/src/agents/compose/user-context.ts new file mode 100644 index 00000000..6444b2d3 --- /dev/null +++ b/apps/x/packages/core/src/agents/compose/user-context.ts @@ -0,0 +1,88 @@ +import { z } from "zod"; +import { MiddlePaneContext, UserMessageContext } from "@x/shared/dist/message.js"; + +// Per-message context helpers shared by the old runtime and the new composer. +// Datetime + middle-pane ride on the user message (not the system prompt): +// captured fresh when the message is sent, then prepended at model-call time by +// convertFromMessages. Extracted verbatim from agents/runtime.ts. + +export function isCopilotLikeAgent(agentName: string | null | undefined): boolean { + return agentName === "copilot" || agentName === "rowboatx"; +} + +export function formatCurrentDateTime(now: Date): string { + return now.toLocaleString("en-US", { + weekday: "long", + year: "numeric", + month: "long", + day: "numeric", + hour: "numeric", + minute: "2-digit", + timeZoneName: "short", + }); +} + +export function toUserMessageContextMiddlePane( + middlePaneContext: z.infer | null, +): z.infer["middlePane"] { + if (!middlePaneContext) { + return { kind: "empty" }; + } + if (middlePaneContext.kind === "note") { + return { + kind: "note", + path: middlePaneContext.path, + content: middlePaneContext.content, + }; + } + return { + kind: "browser", + url: middlePaneContext.url, + title: middlePaneContext.title, + }; +} + +export function buildUserMessageContext({ + agentName, + middlePaneContext, +}: { + agentName: string | null | undefined; + middlePaneContext: z.infer | null; +}): z.infer { + return { + currentDateTime: formatCurrentDateTime(new Date()), + ...(isCopilotLikeAgent(agentName) + ? { middlePane: toUserMessageContextMiddlePane(middlePaneContext) } + : {}), + }; +} + +export function formatUserMessageContextForLlm( + userMessageContext: z.infer, +): string { + const sections: string[] = []; + + if (userMessageContext.currentDateTime) { + sections.push(`Current date and time: ${userMessageContext.currentDateTime}`); + } + + if (userMessageContext.middlePane) { + if (userMessageContext.middlePane.kind === "empty") { + sections.push(`Middle pane:\nState: empty`); + } else if (userMessageContext.middlePane.kind === "note") { + sections.push(`Middle pane:\nState: note\nPath: ${userMessageContext.middlePane.path}\n\nContent:\n\`\`\`\n${userMessageContext.middlePane.content}\n\`\`\``); + } else { + sections.push(`Middle pane:\nState: browser\nURL: ${userMessageContext.middlePane.url}\nTitle: ${userMessageContext.middlePane.title}`); + } + } + + if (sections.length === 0) { + return ""; + } + + return `# User Context +${sections.join("\n\n")} + +# User Message +`; +} diff --git a/apps/x/packages/core/src/agents/runtime.ts b/apps/x/packages/core/src/agents/runtime.ts index 0f2c22d5..0ec84dc3 100644 --- a/apps/x/packages/core/src/agents/runtime.ts +++ b/apps/x/packages/core/src/agents/runtime.ts @@ -1,290 +1,26 @@ -import { jsonSchema, ModelMessage } from "ai"; -import fs from "fs"; -import path from "path"; -import { WorkDir } from "../config/config.js"; -import { Agent, ToolAttachment } from "@x/shared/dist/agent.js"; -import { AssistantContentPart, AssistantMessage, Message, MessageList, ProviderOptions, ToolCallPart, ToolMessage, UserMessageContext } from "@x/shared/dist/message.js"; -import { LanguageModel, stepCountIs, streamText, tool, Tool, ToolSet } from "ai"; +import { ModelMessage } from "ai"; +import { Agent } from "@x/shared/dist/agent.js"; +import { Message, UserMessageContext } from "@x/shared/dist/message.js"; import { z } from "zod"; -import { LlmStepStreamEvent } from "@x/shared/dist/llm-step-events.js"; -import { execTool } from "../application/lib/exec-tool.js"; -import { AskHumanRequestEvent, RunEvent, ToolPermissionMetadata, ToolPermissionRequestEvent } from "@x/shared/dist/runs.js"; -import { BuiltinTools } from "../application/lib/builtin-tools.js"; +import { parse } from "yaml"; import { buildCopilotAgent } from "../application/assistant/agent.js"; import { buildLiveNoteAgent } from "../knowledge/live-note/agent.js"; import { buildBackgroundTaskAgent } from "../background-tasks/agent.js"; -import { isBlocked, extractCommandNames } from "../application/lib/command-executor.js"; -import { getFileAccessAllowList, type FileAccessGrant, type FileAccessOperation } from "../config/security.js"; -import { resolveFilePathForPermission } from "../filesystem/files.js"; import container from "../di/container.js"; -import { notifyIfEnabled } from "../application/notification/notifier.js"; -import { IModelConfigRepo } from "../models/repo.js"; -import { createProvider } from "../models/models.js"; -import { resolveProviderConfig } from "../models/defaults.js"; import { IAgentsRepo } from "./repo.js"; -import { IMonotonicallyIncreasingIdGenerator } from "../application/lib/id-gen.js"; -import { IBus } from "../application/lib/bus.js"; -import { IMessageQueue, type MiddlePaneContext } from "../application/lib/message-queue.js"; -import { IRunsRepo } from "../runs/repo.js"; -import { IRunsLock } from "../runs/lock.js"; -import { IAbortRegistry } from "../runs/abort-registry.js"; -import { PrefixLogger } from "@x/shared"; -import { parse } from "yaml"; -import { captureLlmUsage } from "../analytics/usage.js"; -import { enterUseCase, withUseCase, type UseCase } from "../analytics/use_case.js"; import { getRaw as getNoteCreationRaw } from "../knowledge/note_creation.js"; import { getRaw as getLabelingAgentRaw } from "../knowledge/labeling_agent.js"; import { getRaw as getNoteTaggingAgentRaw } from "../knowledge/note_tagging_agent.js"; import { getRaw as getInlineTaskAgentRaw } from "../knowledge/inline_task_agent.js"; import { getRaw as getAgentNotesAgentRaw } from "../knowledge/agent_notes_agent.js"; -import { classifyToolPermissions, type AutoPermissionCandidate } from "../security/auto-permission-classifier.js"; -const AGENT_NOTES_DIR = path.join(WorkDir, 'knowledge', 'Agent Notes'); - -// Work directory is scoped per run (per chat). Each run gets its own sidecar -// config file so setting it in one chat does not leak into others. -function workDirConfigFile(runId: string): string { - return path.join(WorkDir, 'config', `workdir-${runId}.json`); -} - -type ToolPermissionMetadataValue = z.infer; - -function isPathInside(parent: string, child: string): boolean { - const relative = path.relative(parent, child); - return relative === '' || (!!relative && !relative.startsWith('..') && !path.isAbsolute(relative)); -} - -function fileGrantCoversPath(grant: FileAccessGrant, operation: FileAccessOperation, resolvedPath: string): boolean { - return grant.operation === operation && isPathInside(path.resolve(grant.pathPrefix), path.resolve(resolvedPath)); -} - -function commonPathPrefix(paths: string[]): string { - if (!paths.length) return path.resolve(WorkDir); - const split = paths.map(p => path.resolve(p).split(path.sep).filter(Boolean)); - const first = split[0]; - const common: string[] = []; - for (let i = 0; i < first.length; i++) { - if (split.every(parts => parts[i] === first[i])) { - common.push(first[i]); - } else { - break; - } - } - const prefix = `${path.sep}${common.join(path.sep)}`; - return prefix === path.sep ? prefix : path.resolve(prefix); -} - -function grantPrefixForTool(toolName: string, resolvedPaths: string[]): string { - if (toolName === 'file-list' || toolName === 'file-glob' || toolName === 'file-grep' || toolName === 'file-mkdir') { - return commonPathPrefix(resolvedPaths); - } - const parentPaths = resolvedPaths.map(p => path.dirname(p)); - return commonPathPrefix(parentPaths); -} - -function filePermissionTargets(toolName: string, args: Record): { operation: FileAccessOperation; paths: string[] } | null { - const pathArg = typeof args.path === 'string' ? args.path : undefined; - switch (toolName) { - case 'file-readText': - case 'parseFile': - case 'LLMParse': - case 'file-exists': - case 'file-stat': - return pathArg ? { operation: 'read', paths: [pathArg] } : null; - case 'file-list': - return pathArg ? { operation: 'list', paths: [pathArg || '.'] } : null; - case 'file-glob': - return { operation: 'search', paths: [typeof args.cwd === 'string' && args.cwd ? args.cwd : '.'] }; - case 'file-grep': - return { operation: 'search', paths: [typeof args.searchPath === 'string' && args.searchPath ? args.searchPath : '.'] }; - case 'file-writeText': - case 'file-editText': - case 'file-mkdir': - return pathArg ? { operation: 'write', paths: [pathArg] } : null; - case 'file-copy': - case 'file-rename': { - const from = typeof args.from === 'string' ? args.from : undefined; - const to = typeof args.to === 'string' ? args.to : undefined; - return from && to ? { operation: 'write', paths: [from, to] } : null; - } - case 'file-remove': - return pathArg ? { operation: 'delete', paths: [pathArg] } : null; - default: - return null; - } -} - -async function getToolPermissionMetadata( - toolCall: z.infer, - underlyingTool: z.infer, - sessionAllowedCommands: Set, - sessionAllowedFileAccess: FileAccessGrant[], -): Promise { - if (underlyingTool.type !== 'builtin') { - return null; - } - - if (underlyingTool.name === 'executeCommand') { - const args = toolCall.arguments; - if (!args || typeof args !== 'object' || !('command' in args)) { - return null; - } - const command = String((args as { command: unknown }).command); - if (!isBlocked(command, sessionAllowedCommands)) { - return null; - } - return { - kind: 'command', - commandNames: extractCommandNames(command), - }; - } - - const args = toolCall.arguments && typeof toolCall.arguments === 'object' - ? toolCall.arguments as Record - : {}; - const targets = filePermissionTargets(underlyingTool.name, args); - if (!targets) { - return null; - } - - const resolvedTargets = await Promise.all(targets.paths.map(p => resolveFilePathForPermission(p))); - const outsideWorkspacePaths = resolvedTargets - .filter(target => !target.isInsideWorkspace) - .map(target => target.canonicalPath); - if (!outsideWorkspacePaths.length) { - return null; - } - - const persistentGrants = getFileAccessAllowList(); - const allGrants = [...persistentGrants, ...sessionAllowedFileAccess]; - const uncovered = outsideWorkspacePaths.filter(resolvedPath => - !allGrants.some(grant => fileGrantCoversPath(grant, targets.operation, resolvedPath)) - ); - if (!uncovered.length) { - return null; - } - - return { - kind: 'file', - operation: targets.operation, - paths: uncovered, - pathPrefix: grantPrefixForTool(underlyingTool.name, uncovered), - }; -} - -function loadUserWorkDir(runId: string): string | null { - try { - const file = workDirConfigFile(runId); - if (!fs.existsSync(file)) return null; - const raw = fs.readFileSync(file, 'utf-8'); - const parsed = JSON.parse(raw) as { path?: unknown }; - const value = typeof parsed.path === 'string' ? parsed.path.trim() : ''; - return value || null; - } catch { - return null; - } -} - -function loadAgentNotesContext(): string | null { - const sections: string[] = []; - - const userFile = path.join(AGENT_NOTES_DIR, 'user.md'); - const prefsFile = path.join(AGENT_NOTES_DIR, 'preferences.md'); - - try { - if (fs.existsSync(userFile)) { - const content = fs.readFileSync(userFile, 'utf-8').trim(); - if (content) { - sections.push(`## About the User\nThese are notes you took about the user in previous chats.\n\n${content}`); - } - } - } catch { /* ignore */ } - - try { - if (fs.existsSync(prefsFile)) { - const content = fs.readFileSync(prefsFile, 'utf-8').trim(); - if (content) { - sections.push(`## User Preferences\nThese are notes you took on their general preferences.\n\n${content}`); - } - } - } catch { /* ignore */ } - - // List other Agent Notes files for on-demand access - const otherFiles: string[] = []; - const skipFiles = new Set(['user.md', 'preferences.md', 'inbox.md']); - try { - if (fs.existsSync(AGENT_NOTES_DIR)) { - function listMdFiles(dir: string, prefix: string) { - for (const entry of fs.readdirSync(dir)) { - const fullPath = path.join(dir, entry); - const stat = fs.statSync(fullPath); - if (stat.isDirectory()) { - listMdFiles(fullPath, `${prefix}${entry}/`); - } else if (entry.endsWith('.md') && !skipFiles.has(`${prefix}${entry}`)) { - otherFiles.push(`${prefix}${entry}`); - } - } - } - listMdFiles(AGENT_NOTES_DIR, ''); - } - } catch { /* ignore */ } - - if (otherFiles.length > 0) { - sections.push(`## More Specific Preferences\nFor more specific preferences, you can read these files using file-readText. Only read them when relevant to the current task.\n\n${otherFiles.map(f => `- knowledge/Agent Notes/${f}`).join('\n')}`); - } - - if (sections.length === 0) return null; - return `# Agent Memory\n\n${sections.join('\n\n')}`; -} - -function isCopilotLikeAgent(agentName: string | null | undefined): boolean { - return agentName === 'copilot' || agentName === 'rowboatx'; -} - -function formatCurrentDateTime(now: Date): string { - return now.toLocaleString('en-US', { - weekday: 'long', - year: 'numeric', - month: 'long', - day: 'numeric', - hour: 'numeric', - minute: '2-digit', - timeZoneName: 'short', - }); -} - -function toUserMessageContextMiddlePane(middlePaneContext: MiddlePaneContext | null): z.infer['middlePane'] { - if (!middlePaneContext) { - return { kind: 'empty' }; - } - if (middlePaneContext.kind === 'note') { - return { - kind: 'note', - path: middlePaneContext.path, - content: middlePaneContext.content, - }; - } - return { - kind: 'browser', - url: middlePaneContext.url, - title: middlePaneContext.title, - }; -} - -function buildUserMessageContext({ - agentName, - middlePaneContext, -}: { - agentName: string | null | undefined; - middlePaneContext: MiddlePaneContext | null; -}): z.infer { - return { - currentDateTime: formatCurrentDateTime(new Date()), - ...(isCopilotLikeAgent(agentName) - ? { middlePane: toUserMessageContextMiddlePane(middlePaneContext) } - : {}), - }; -} +// Runtime-agnostic agent helpers shared across the app after the old agent +// runtime was retired. The new runtime (agent-loop + agent-runtime bridges) +// owns execution; this module keeps only: +// - loadAgent: resolve an agent's config by id (built-ins + repo fallback) +// - convertFromMessages: our Message[] -> Vercel AI SDK ModelMessage[] +// - formatUserMessageContextForLlm: render UserMessageContext as a text prefix +// (private; used by convertFromMessages) function formatUserMessageContextForLlm(userMessageContext: z.infer): string { const sections: string[] = []; @@ -314,338 +50,6 @@ ${sections.join('\n\n')} `; } -const USER_CONTEXT_SYSTEM_INSTRUCTIONS = `# Hidden User Context -User messages may include a hidden "# User Context" section before "# User Message". Treat it as runtime metadata captured when that specific user message was sent. The actual user-authored text starts under "# User Message". - -Use "Current date and time" for temporal reasoning. - -If Middle pane context is present, it reflects what the user had open at the time of that specific message and overrides earlier middle-pane references. If the conversation history references a different note or browser page, the user had since closed or navigated away from it. Do not treat earlier context as current. - -If Middle pane state is empty, the user was not looking at any relevant note or web page at that point. Answer the user's message on its own merits. - -If Middle pane state is note, the supplied path and content are available so you can reference the note when relevant. The user may or may not be talking about this note. Do NOT assume every message is about it. Only reference or act on this note when the user's message clearly relates to it, such as "this note", "what I'm looking at", "here", "above", "below", or questions whose subject is plainly the note's content. For unrelated questions, ignore this note entirely and answer normally. Do not mention that you can see this note unless it is relevant to the answer. - -If Middle pane state is browser, only the URL and page title are supplied; the page content itself is NOT included. If you need the page content to answer, use the browser tools available to you to read the page. The user may or may not be talking about this page. Only reference or act on this page when the user's message clearly relates to it, such as "this page", "this article", "what I'm looking at", "this site", or "summarize this". For unrelated questions, ignore this page entirely and answer normally. Do not mention that you can see the browser unless it is relevant to the answer.`; - -export interface IAgentRuntime { - trigger(runId: string): Promise; -} - -export class AgentRuntime implements IAgentRuntime { - private runsRepo: IRunsRepo; - private idGenerator: IMonotonicallyIncreasingIdGenerator; - private bus: IBus; - private messageQueue: IMessageQueue; - private modelConfigRepo: IModelConfigRepo; - private runsLock: IRunsLock; - private abortRegistry: IAbortRegistry; - - constructor({ - runsRepo, - idGenerator, - bus, - messageQueue, - modelConfigRepo, - runsLock, - abortRegistry, - }: { - runsRepo: IRunsRepo; - idGenerator: IMonotonicallyIncreasingIdGenerator; - bus: IBus; - messageQueue: IMessageQueue; - modelConfigRepo: IModelConfigRepo; - runsLock: IRunsLock; - abortRegistry: IAbortRegistry; - }) { - this.runsRepo = runsRepo; - this.idGenerator = idGenerator; - this.bus = bus; - this.messageQueue = messageQueue; - this.modelConfigRepo = modelConfigRepo; - this.runsLock = runsLock; - this.abortRegistry = abortRegistry; - } - - async trigger(runId: string): Promise { - if (!await this.runsLock.lock(runId)) { - console.log(`unable to acquire lock on run ${runId}`); - return; - } - const signal = this.abortRegistry.createForRun(runId); - try { - await this.bus.publish({ - runId, - type: "run-processing-start", - subflow: [], - }); - let totalEvents = 0; - while (true) { - // Check for abort before each iteration - if (signal.aborted) { - break; - } - - let eventCount = 0; - const run = await this.runsRepo.fetch(runId); - if (!run) { - throw new Error(`Run ${runId} not found`); - } - const state = new AgentState(); - for (const event of run.log) { - state.ingest(event); - } - try { - for await (const event of streamAgent({ - state, - idGenerator: this.idGenerator, - runId, - messageQueue: this.messageQueue, - modelConfigRepo: this.modelConfigRepo, - signal, - abortRegistry: this.abortRegistry, - bus: this.bus, - })) { - eventCount++; - if (event.type !== "llm-stream-event") { - await this.runsRepo.appendEvents(runId, [event]); - } - await this.bus.publish(event); - } - } catch (error) { - if (error instanceof Error && error.name === "AbortError") { - // Abort detected — exit cleanly - break; - } - throw error; - } - - totalEvents += eventCount; - // if no events, break - if (!eventCount) { - break; - } - } - - // Emit run-stopped event if aborted - if (signal.aborted) { - const stoppedEvent: z.infer = { - runId, - type: "run-stopped", - reason: "user-requested", - subflow: [], - }; - await this.runsRepo.appendEvents(runId, [stoppedEvent]); - await this.bus.publish(stoppedEvent); - } else if (totalEvents > 0) { - // The run reached a natural stopping point and actually did - // something this cycle. Notify "chat completion" — unless it - // paused on a permission request, which surfaces its own - // notification (distinguish by inspecting the final state). - const finalRun = await this.runsRepo.fetch(runId); - if (finalRun) { - const finalState = new AgentState(); - for (const event of finalRun.log) { - finalState.ingest(event); - } - if (finalState.getPendingPermissions().length === 0) { - void notifyIfEnabled("chat_completion", { - title: "Response ready", - message: "Your agent finished responding.", - link: `rowboat://open?type=chat&runId=${runId}`, - actionLabel: "Open", - onlyWhenBackground: true, - }); - } - } - } - } catch (error) { - console.error(`Run ${runId} failed:`, error); - const message = error instanceof Error - ? (error.stack || error.message || error.name) - : typeof error === "string" ? error : JSON.stringify(error); - const errorEvent: z.infer = { - runId, - type: "error", - error: message, - subflow: [], - }; - await this.runsRepo.appendEvents(runId, [errorEvent]); - await this.bus.publish(errorEvent); - } finally { - this.abortRegistry.cleanup(runId); - await this.runsLock.release(runId); - await this.bus.publish({ - runId, - type: "run-processing-end", - subflow: [], - }); - } - } -} - -export async function mapAgentTool(t: z.infer): Promise { - switch (t.type) { - case "mcp": - return tool({ - name: t.name, - description: t.description, - inputSchema: jsonSchema(t.inputSchema), - }); - case "agent": { - const agent = await loadAgent(t.name); - if (!agent) { - throw new Error(`Agent ${t.name} not found`); - } - return tool({ - name: t.name, - description: agent.description, - inputSchema: z.object({ - message: z.string().describe("The message to send to the workflow"), - }), - }); - } - case "builtin": { - if (t.name === "ask-human") { - return tool({ - description: "Ask a human before proceeding. Optionally pass `options` (an array of short button labels) to render the question as a one-click choice; the user's response will be the chosen label verbatim.", - inputSchema: z.object({ - question: z.string().describe("The question to ask the human"), - options: z.array(z.string()).optional().describe("Optional short button labels (2-4 recommended). If provided, the user picks one with a single click instead of typing. The response you receive will be the chosen label."), - }), - }); - } - const match = BuiltinTools[t.name]; - if (!match) { - throw new Error(`Unknown builtin tool: ${t.name}`); - } - return tool({ - description: match.description, - inputSchema: match.inputSchema, - }); - } - } -} - -export class RunLogger { - private logFile: string; - private fileHandle: fs.WriteStream; - - ensureRunsDir() { - const runsDir = path.join(WorkDir, "runs"); - if (!fs.existsSync(runsDir)) { - fs.mkdirSync(runsDir, { recursive: true }); - } - } - - constructor(runId: string) { - this.ensureRunsDir(); - this.logFile = path.join(WorkDir, "runs", `${runId}.jsonl`); - this.fileHandle = fs.createWriteStream(this.logFile, { - flags: "a", - encoding: "utf8", - }); - } - - log(event: z.infer) { - if (event.type !== "llm-stream-event") { - this.fileHandle.write(JSON.stringify(event) + "\n"); - } - } - - close() { - this.fileHandle.close(); - } -} - -export class StreamStepMessageBuilder { - private parts: z.infer[] = []; - private textBuffer: string = ""; - private reasoningBuffer: string = ""; - private providerOptions: z.infer | undefined = undefined; - private reasoningProviderOptions: z.infer | undefined = undefined; - - flushBuffers() { - if (this.reasoningBuffer || this.reasoningProviderOptions) { - this.parts.push({ type: "reasoning", text: this.reasoningBuffer, providerOptions: this.reasoningProviderOptions }); - this.reasoningBuffer = ""; - this.reasoningProviderOptions = undefined; - } - if (this.textBuffer) { - this.parts.push({ type: "text", text: this.textBuffer }); - this.textBuffer = ""; - } - } - - ingest(event: z.infer) { - switch (event.type) { - case "reasoning-start": - break; - case "reasoning-end": - this.reasoningProviderOptions = event.providerOptions; - this.flushBuffers(); - break; - case "text-start": - case "text-end": - this.flushBuffers(); - break; - case "reasoning-delta": - this.reasoningBuffer += event.delta; - break; - case "text-delta": - this.textBuffer += event.delta; - break; - case "tool-call": - this.parts.push({ - type: "tool-call", - toolCallId: event.toolCallId, - toolName: event.toolName, - arguments: event.input, - providerOptions: event.providerOptions, - }); - break; - case "finish-step": - this.providerOptions = event.providerOptions; - break; - case "error": - this.flushBuffers(); - break; - } - } - - get(): z.infer { - this.flushBuffers(); - return { - role: "assistant", - content: this.parts, - providerOptions: this.providerOptions, - }; - } -} - -function formatLlmStreamError(rawError: unknown): string { - let name: string | undefined; - let responseBody: string | undefined; - if (rawError && typeof rawError === "object") { - const err = rawError as Record; - const nested = (err.error && typeof err.error === "object") ? err.error as Record : null; - const nameValue = err.name ?? nested?.name; - const responseBodyValue = err.responseBody ?? nested?.responseBody; - if (nameValue !== undefined) { - name = String(nameValue); - } - if (responseBodyValue !== undefined) { - responseBody = String(responseBodyValue); - } - } else if (typeof rawError === "string") { - responseBody = rawError; - } - - const lines: string[] = []; - if (name) lines.push(`name: ${name}`); - if (responseBody) lines.push(`responseBody: ${responseBody}`); - return lines.length ? lines.join("\n") : "Model stream error"; -} - export async function loadAgent(id: string): Promise> { if (id === "copilot" || id === "rowboatx") { return buildCopilotAgent(); @@ -899,901 +303,3 @@ export function convertFromMessages(messages: z.infer[]): ModelM // doing this because: https://github.com/OpenRouterTeam/ai-sdk-provider/issues/262 return JSON.parse(JSON.stringify(result)); } - -async function buildTools(agent: z.infer): Promise { - const tools: ToolSet = {}; - for (const [name, tool] of Object.entries(agent.tools ?? {})) { - try { - // Skip builtin tools that declare themselves unavailable - if (tool.type === 'builtin') { - const builtin = BuiltinTools[tool.name]; - if (builtin?.isAvailable && !(await builtin.isAvailable())) { - continue; - } - } - tools[name] = await mapAgentTool(tool); - } catch (error) { - console.error(`Error mapping tool ${name}:`, error); - continue; - } - } - return tools; -} - -export class AgentState { - runId: string | null = null; - agent: z.infer | null = null; - agentName: string | null = null; - runModel: string | null = null; - runProvider: string | null = null; - permissionMode: "manual" | "auto" = "manual"; - runUseCase: UseCase | null = null; - runSubUseCase: string | null = null; - messages: z.infer = []; - lastAssistantMsg: z.infer | null = null; - subflowStates: Record = {}; - toolCallIdMap: Record> = {}; - pendingToolCalls: Record = {}; - pendingToolPermissionRequests: Record> = {}; - pendingAskHumanRequests: Record> = {}; - allowedToolCallIds: Record = {}; - deniedToolCallIds: Record = {}; - autoAllowedToolCalls: Record = {}; - autoDeniedToolCalls: Record = {}; - sessionAllowedCommands: Set = new Set(); - sessionAllowedFileAccess: FileAccessGrant[] = []; - - getPendingPermissions(): z.infer[] { - const response: z.infer[] = []; - for (const [id, subflowState] of Object.entries(this.subflowStates)) { - for (const perm of subflowState.getPendingPermissions()) { - response.push({ - ...perm, - subflow: [id, ...perm.subflow], - }); - } - } - for (const perm of Object.values(this.pendingToolPermissionRequests)) { - response.push({ - ...perm, - subflow: [], - }); - } - return response; - } - - getPendingAskHumans(): z.infer[] { - const response: z.infer[] = []; - for (const [id, subflowState] of Object.entries(this.subflowStates)) { - for (const ask of subflowState.getPendingAskHumans()) { - response.push({ - ...ask, - subflow: [id, ...ask.subflow], - }); - } - } - for (const ask of Object.values(this.pendingAskHumanRequests)) { - response.push({ - ...ask, - subflow: [], - }); - } - return response; - } - - /** - * Returns tool-result messages for all pending tool calls, marking them as aborted. - * This is called when a run is stopped so the LLM knows what happened to its tool requests. - */ - getAbortedToolResults(): z.infer[] { - const results: z.infer[] = []; - for (const toolCallId of Object.keys(this.pendingToolCalls)) { - const toolCall = this.toolCallIdMap[toolCallId]; - if (toolCall) { - results.push({ - role: "tool", - content: JSON.stringify({ error: "Tool execution aborted" }), - toolCallId, - toolName: toolCall.toolName, - }); - } - } - return results; - } - - /** - * Clear all pending state (permissions, ask-human, tool calls). - * Used when a run is stopped. - */ - clearAllPending(): void { - this.pendingToolPermissionRequests = {}; - this.pendingAskHumanRequests = {}; - // Recursively clear subflows - for (const subflow of Object.values(this.subflowStates)) { - subflow.clearAllPending(); - } - } - - finalResponse(): string { - if (!this.lastAssistantMsg) { - return ''; - } - if (typeof this.lastAssistantMsg.content === "string") { - return this.lastAssistantMsg.content; - } - return this.lastAssistantMsg.content.reduce((acc, part) => { - if (part.type === "text") { - return acc + part.text; - } - return acc; - }, ""); - } - - ingest(event: z.infer) { - if (event.subflow.length > 0) { - const { subflow, ...rest } = event; - if (!this.subflowStates[subflow[0]]) { - this.subflowStates[subflow[0]] = new AgentState(); - } - this.subflowStates[subflow[0]].ingest({ - ...rest, - subflow: subflow.slice(1), - }); - return; - } - switch (event.type) { - case "start": - this.runId = event.runId; - this.agentName = event.agentName; - this.runModel = event.model; - this.runProvider = event.provider; - this.permissionMode = event.permissionMode ?? "manual"; - this.runUseCase = event.useCase ?? null; - this.runSubUseCase = event.subUseCase ?? null; - break; - case "spawn-subflow": - // Seed the subflow state with its agent so downstream loadAgent works. - // Subflows inherit the parent run's model+provider — there's one pair per run. - if (!this.subflowStates[event.toolCallId]) { - this.subflowStates[event.toolCallId] = new AgentState(); - } - this.subflowStates[event.toolCallId].agentName = event.agentName; - this.subflowStates[event.toolCallId].runModel = this.runModel; - this.subflowStates[event.toolCallId].runProvider = this.runProvider; - this.subflowStates[event.toolCallId].permissionMode = this.permissionMode; - this.subflowStates[event.toolCallId].runUseCase = this.runUseCase; - this.subflowStates[event.toolCallId].runSubUseCase = this.runSubUseCase; - break; - case "message": - this.messages.push(event.message); - if (event.message.content instanceof Array) { - for (const part of event.message.content) { - if (part.type === "tool-call") { - this.toolCallIdMap[part.toolCallId] = part; - this.pendingToolCalls[part.toolCallId] = true; - } - } - } - if (event.message.role === "tool") { - const message = event.message as z.infer; - delete this.pendingToolCalls[message.toolCallId]; - } - if (event.message.role === "assistant") { - this.lastAssistantMsg = event.message; - } - break; - case "tool-permission-request": - this.pendingToolPermissionRequests[event.toolCall.toolCallId] = event; - break; - case "tool-permission-response": - switch (event.response) { - case "approve": - this.allowedToolCallIds[event.toolCallId] = true; - { - const permissionRequest = this.pendingToolPermissionRequests[event.toolCallId]; - if (event.scope === "session" && permissionRequest?.permission?.kind === "file") { - this.sessionAllowedFileAccess.push({ - operation: permissionRequest.permission.operation, - pathPrefix: permissionRequest.permission.pathPrefix, - }); - } - } - // For session scope, extract command names and add to session allowlist - if (event.scope === "session") { - const toolCall = this.toolCallIdMap[event.toolCallId]; - if (toolCall && typeof toolCall.arguments === 'object' && toolCall.arguments !== null && 'command' in toolCall.arguments) { - const names = extractCommandNames(String(toolCall.arguments.command)); - for (const name of names) { - this.sessionAllowedCommands.add(name); - } - } - } - break; - case "deny": - this.deniedToolCallIds[event.toolCallId] = true; - delete this.autoDeniedToolCalls[event.toolCallId]; - break; - } - delete this.pendingToolPermissionRequests[event.toolCallId]; - break; - case "tool-permission-auto-decision": - switch (event.decision) { - case "allow": - this.allowedToolCallIds[event.toolCallId] = true; - this.autoAllowedToolCalls[event.toolCallId] = { reason: event.reason }; - break; - case "deny": - this.autoDeniedToolCalls[event.toolCallId] = { reason: event.reason }; - break; - } - break; - case "ask-human-request": - this.pendingAskHumanRequests[event.toolCallId] = event; - break; - case "ask-human-response": { - // console.error('im here', this.agentName, this.runId, event.subflow); - const ogEvent = this.pendingAskHumanRequests[event.toolCallId]; - this.messages.push({ - role: "tool", - content: JSON.stringify({ - userResponse: event.response, - }), - toolCallId: ogEvent.toolCallId, - toolName: this.toolCallIdMap[ogEvent.toolCallId]!.toolName, - }); - delete this.pendingAskHumanRequests[ogEvent.toolCallId]; - break; - } - } - } -} - -export async function* streamAgent({ - state, - idGenerator, - runId, - messageQueue, - modelConfigRepo, - signal, - abortRegistry, - bus, -}: { - state: AgentState, - idGenerator: IMonotonicallyIncreasingIdGenerator; - runId: string; - messageQueue: IMessageQueue; - modelConfigRepo: IModelConfigRepo; - signal: AbortSignal; - abortRegistry: IAbortRegistry; - bus: IBus; -}): AsyncGenerator, void, unknown> { - const logger = new PrefixLogger(`run-${runId}-${state.agentName}`); - - async function* processEvent(event: z.infer): AsyncGenerator, void, unknown> { - state.ingest(event); - yield event; - } - - // set up agent - const agent = await loadAgent(state.agentName!); - - // set up tools - const tools = await buildTools(agent); - - // model+provider were resolved and frozen on the run at runs:create time. - // Look up the named provider's current credentials from models.json and - // instantiate the LLM client. No selection happens here. - if (!state.runModel || !state.runProvider) { - throw new Error(`Run ${runId} is missing model/provider on its start event`); - } - const modelId = state.runModel; - const providerConfig = await resolveProviderConfig(state.runProvider); - const provider = createProvider(providerConfig); - const model = provider.languageModel(modelId); - logger.log(`using model: ${modelId} (provider: ${state.runProvider})`); - - // Install use-case context for tool-internal LLM calls (e.g. parseFile) - // so they can tag their `llm_usage` events with the parent run's category. - enterUseCase({ - useCase: state.runUseCase ?? "copilot_chat", - ...(state.runSubUseCase ? { subUseCase: state.runSubUseCase } : {}), - ...(state.agentName ? { agentName: state.agentName } : {}), - }); - - let loopCounter = 0; - let voiceInput = false; - let voiceOutput: 'summary' | 'full' | null = null; - let searchEnabled = false; - let codeMode: 'claude' | 'codex' | null = null; - let codeCwd: string | null = null; - let codePolicy: 'ask' | 'auto-approve-reads' | 'yolo' | null = null; - let middlePaneContext: - | { kind: 'note'; path: string; content: string } - | { kind: 'browser'; url: string; title: string } - | null = null; - while (true) { - // Check abort at the top of each iteration - signal.throwIfAborted(); - - loopCounter++; - const loopLogger = logger.child(`iter-${loopCounter}`); - loopLogger.log('starting loop iteration'); - - // execute any pending tool calls - for (const toolCallId of Object.keys(state.pendingToolCalls)) { - const toolCall = state.toolCallIdMap[toolCallId]; - const _logger = loopLogger.child(`tc-${toolCallId}-${toolCall.toolName}`); - _logger.log('processing'); - - // if ask-human, skip - if (toolCall.toolName === "ask-human") { - _logger.log('skipping, reason: ask-human'); - continue; - } - - // if tool has been denied, deny - if (state.deniedToolCallIds[toolCallId]) { - _logger.log('returning denied tool message, reason: tool has been denied'); - const autoDenied = state.autoDeniedToolCalls[toolCallId]; - yield* processEvent({ - runId, - messageId: await idGenerator.next(), - type: "message", - message: { - role: "tool", - content: autoDenied - ? JSON.stringify({ - success: false, - error: `Auto-permission denied: ${autoDenied.reason}`, - }) - : "Unable to execute this tool: Permission was denied.", - toolCallId: toolCallId, - toolName: toolCall.toolName, - }, - subflow: [], - }); - continue; - } - - // if permission is pending on this tool call, skip execution - if (state.pendingToolPermissionRequests[toolCallId]) { - _logger.log('skipping, reason: permission is pending'); - continue; - } - - // execute approved tool - // Check abort before starting tool execution - if (signal.aborted) { - _logger.log('skipping, reason: aborted'); - break; - } - _logger.log('executing tool'); - yield* processEvent({ - runId, - type: "tool-invocation", - toolCallId, - toolName: toolCall.toolName, - input: JSON.stringify(toolCall.arguments ?? {}), - subflow: [], - }); - let result: unknown = null; - try { - if (agent.tools![toolCall.toolName].type === "agent") { - const subflowState = state.subflowStates[toolCallId]; - for await (const event of streamAgent({ - state: subflowState, - idGenerator, - runId, - messageQueue, - modelConfigRepo, - signal, - abortRegistry, - bus, - })) { - yield* processEvent({ - ...event, - subflow: [toolCallId, ...event.subflow], - }); - } - if (!subflowState.getPendingAskHumans().length && !subflowState.getPendingPermissions().length) { - result = subflowState.finalResponse(); - } - } else { - result = await execTool(agent.tools![toolCall.toolName], toolCall.arguments, { - runId, - toolCallId, - signal, - abortRegistry, - publish: (event) => bus.publish(event), - codeMode, - codeCwd, - codePolicy, - }); - } - } catch (error) { - if ((error instanceof Error && error.name === "AbortError") || signal.aborted) { - throw error; - } - const message = error instanceof Error ? (error.message || error.name) : String(error); - _logger.log('tool failed', message); - result = { - success: false, - error: message, - toolName: toolCall.toolName, - }; - } - const resultPayload = result === undefined ? null : result; - const resultMsg: z.infer = { - role: "tool", - content: JSON.stringify(resultPayload), - toolCallId: toolCall.toolCallId, - toolName: toolCall.toolName, - }; - yield* processEvent({ - runId, - type: "tool-result", - toolCallId: toolCall.toolCallId, - toolName: toolCall.toolName, - result: resultPayload, - subflow: [], - }); - yield* processEvent({ - runId, - messageId: await idGenerator.next(), - type: "message", - message: resultMsg, - subflow: [], - }); - } - - // if waiting on user permission or ask-human, exit - if (state.getPendingAskHumans().length || state.getPendingPermissions().length) { - loopLogger.log('exiting loop, reason: pending asks or permissions'); - return; - } - - // get any queued user messages - while (true) { - const msg = await messageQueue.dequeue(runId); - if (!msg) { - break; - } - if (msg.voiceInput) { - voiceInput = true; - } - if (msg.searchEnabled) { - searchEnabled = true; - } - // Code mode is per-message: latest message decides whether the assistant - // should route coding work through the code-with-agents skill / chosen agent. - codeMode = msg.codeMode ?? null; - codeCwd = msg.codeCwd ?? null; - codePolicy = msg.codePolicy ?? null; - if (msg.voiceOutput) { - voiceOutput = msg.voiceOutput; - } - // Middle pane is NOT sticky — it should reflect the state at the moment of the - // latest user message. If the user closed the pane between messages, clear it. - middlePaneContext = msg.middlePaneContext ?? null; - loopLogger.log('dequeued user message', msg.messageId); - const userMessageContext = buildUserMessageContext({ - agentName: state.agentName, - middlePaneContext, - }); - yield* processEvent({ - runId, - type: "message", - messageId: msg.messageId, - message: { - role: "user", - content: msg.message, - userMessageContext, - }, - subflow: [], - }); - } - - // if last response is from assistant and text, exit - const lastMessage = state.messages[state.messages.length - 1]; - if (lastMessage - && lastMessage.role === "assistant" - && (typeof lastMessage.content === "string" - || !lastMessage.content.some(part => part.type === "tool-call") - ) - ) { - loopLogger.log('exiting loop, reason: last message is from assistant and text'); - return; - } - - // run one LLM turn. - loopLogger.log('running llm turn'); - // stream agent response and build message - const messageBuilder = new StreamStepMessageBuilder(); - let instructionsWithDateTime = `${agent.instructions}\n\n${USER_CONTEXT_SYSTEM_INSTRUCTIONS}`; - // Inject Agent Notes context for copilot - if (state.agentName === 'copilot' || state.agentName === 'rowboatx') { - const agentNotesContext = loadAgentNotesContext(); - if (agentNotesContext) { - instructionsWithDateTime += `\n\n${agentNotesContext}`; - } - const userWorkDir = loadUserWorkDir(runId); - if (userWorkDir) { - loopLogger.log('injecting user work directory', userWorkDir); - instructionsWithDateTime += `\n\n# User Work Directory -The user has chosen the following directory as their current **work directory**: - -\`${userWorkDir}\` - -Treat this as the **default location** for file operations whenever the user refers to files generically: -- "list the files", "show me what's in here", "what's the latest report" — list or look in the work directory. -- "save this", "export it", "write that to a file" — write the output into the work directory unless the user names another location. -- "open the file I was just working on", "the doc from earlier" — assume the work directory first. - -Use absolute paths rooted at this directory with the \`file-*\` tools. For example, list with \`file-list({ path: "${userWorkDir}" })\`, read text with \`file-readText\`, and write text with \`file-writeText\`. For PDFs, Office docs, images, scanned docs, and other non-text files, use \`parseFile\` or \`LLMParse\` with the absolute path; you do NOT need to copy the file into the workspace first. - -**Exceptions — these ALWAYS take precedence over the work directory default:** -1. **Knowledge base questions.** If the user asks about anything in the knowledge graph (notes, people, organizations, projects, topics) or paths starting with \`knowledge/\`, use file tools against \`knowledge/\` as documented above. Do NOT redirect those into the work directory. -2. **Explicit paths.** If the user names a different directory or gives an absolute/relative path (e.g. "in ~/Downloads", "from /tmp/foo", "the Desktop"), honor that path exactly and ignore the work-directory default for that request. -3. **Workspace-specific operations.** Anything that obviously belongs in the Rowboat workspace (config files, MCP servers, agent schedules, etc.) stays in the workspace, not the work directory. - -Do not announce the work directory unless it's relevant. Just use it.`; - } - } - if (voiceInput) { - loopLogger.log('voice input enabled, injecting voice input prompt'); - instructionsWithDateTime += `\n\n# Voice Input\nThe user's message was transcribed from speech. Be aware that:\n- There may be transcription errors. Silently correct obvious ones (e.g. homophones, misheard words). If an error is genuinely ambiguous, briefly mention your interpretation (e.g. "I'm assuming you meant X").\n- Spoken messages are often long-winded. The user may ramble, repeat themselves, or correct something they said earlier in the same message. Focus on their final intent, not every word verbatim.`; - } - if (voiceOutput === 'summary') { - loopLogger.log('voice output enabled (summary mode), injecting voice output prompt'); - instructionsWithDateTime += `\n\n# Voice Output (MANDATORY — READ THIS FIRST)\nThe user has voice output enabled. THIS IS YOUR #1 PRIORITY: you MUST start your response with tags. If your response does not begin with tags, the user will hear nothing — which is a broken experience. NEVER skip this.\n\nRules:\n1. YOUR VERY FIRST OUTPUT MUST BE A TAG. No exceptions. Do not start with markdown, headings, or any other text. The literal first characters of your response must be "".\n2. Place ALL tags at the BEGINNING of your response, before any detailed content. Do NOT intersperse tags throughout the response.\n3. Wrap EACH spoken sentence in its own separate tag so it can be spoken incrementally. Do NOT wrap everything in a single block.\n4. Use voice as a TL;DR and navigation aid — do NOT read the entire response aloud.\n5. After all tags, you may include detailed written content (markdown, tables, code, etc.) that will be shown visually but not spoken.\n\n## Examples\n\nExample 1 — User asks: "what happened in my meeting with Alex yesterday?"\n\nYour meeting with Alex covered three main things: the Q2 roadmap timeline, hiring for the backend role, and the client demo next week.\nI've pulled out the key details and action items below — the demo prep notes are at the end.\n\n## Meeting with Alex — March 11\n### Roadmap\n- Agreed to push Q2 launch to April 15...\n(detailed written content continues)\n\nExample 2 — User asks: "summarize my emails"\n\nYou have five new emails since this morning.\nTwo are from your team — Jordan sent the RFC you requested and Taylor flagged a contract issue.\nThere's also a warm intro from a VC partner connecting you with someone at a prospective customer.\nI've drafted responses for three of them. The details and drafts are below.\n\n(email blocks, tables, and detailed content follow)\n\nExample 3 — User asks: "what's on my calendar today?"\n\nYou've got a pretty packed day — seven meetings starting with standup at 9.\nThe big ones are your investor call at 11, lunch with a partner from your lead VC at 12:30, and a customer call at 4.\nYour only free block for deep work is 2:30 to 4.\n\n(calendar block with full event details follows)\n\nExample 4 — User asks: "draft an email to Sam with our metrics"\n\nDone — I've drafted the email to Sam with your latest WAU and churn numbers.\nTake a look at the draft below and send it when you're ready.\n\n(email block with draft follows)\n\nREMEMBER: If you do not start with tags, the user hears silence. Always speak first, then write.`; - } else if (voiceOutput === 'full') { - loopLogger.log('voice output enabled (full mode), injecting voice output prompt'); - instructionsWithDateTime += `\n\n# Voice Output — Full Read-Aloud (MANDATORY — READ THIS FIRST)\nThe user wants your ENTIRE response spoken aloud. THIS IS YOUR #1 PRIORITY: every single sentence must be wrapped in tags. If you write anything outside tags, the user will not hear it — which is a broken experience. NEVER skip this.\n\nRules:\n1. YOUR VERY FIRST OUTPUT MUST BE A TAG. No exceptions. The literal first characters of your response must be "".\n2. Wrap EACH sentence in its own separate tag so it can be spoken incrementally.\n3. Write your response in a natural, conversational style suitable for listening — no markdown headings, bullet points, or formatting symbols. Use plain spoken language.\n4. Structure the content as if you are speaking to the user directly. Use transitions like "first", "also", "one more thing" instead of visual formatting.\n5. EVERY sentence MUST be inside a tag. Do not leave ANY content outside tags. If it's not in a tag, the user cannot hear it.\n\n## Examples\n\nExample 1 — User asks: "what happened in my meeting with Alex yesterday?"\n\nYour meeting with Alex covered three main things.\nFirst, you discussed the Q2 roadmap timeline and agreed to push the launch to April.\nSecond, you talked about hiring for the backend role — Alex will send over two candidates by Friday.\nAnd lastly, the client demo is next week on Thursday at 2pm, and you're handling the intro slides.\n\nExample 2 — User asks: "summarize my emails"\n\nYou've got five new emails since this morning.\nTwo are from your team — Jordan sent the RFC you asked for, and Taylor flagged a contract issue that needs your sign-off.\nThere's a warm intro from a VC partner connecting you with an engineering lead at a potential customer.\nAnd someone from a prospective client wants to confirm your API tier before your call this afternoon.\nI've drafted replies for three of them — the metrics update, the intro, and the API question.\nThe only one I left for you is Taylor's contract redline, since that needs your judgment on the liability cap.\n\nExample 3 — User asks: "what's on my calendar today?"\n\nYou've got a packed day — seven meetings starting with standup at 9.\nThe highlights are your investor call at 11, lunch with a VC partner at 12:30, and a customer call at 4.\nYour only open block for deep work is 2:30 to 4, so plan accordingly.\nOh, and your 1-on-1 with your co-founder is at 5:30 — that's a walking meeting.\n\nExample 4 — User asks: "how are our metrics looking?"\n\nMetrics are looking strong this week.\nYou hit 2,573 weekly active users, which is up 12% week over week.\nThat means you've crossed the 2,500 milestone — worth calling out in your next investor update.\nChurn is down to 4.1%, improving month over month.\nThe trailing 8-week compound growth rate is about 10%.\n\nREMEMBER: Start with immediately. No preamble, no markdown before it. Speak first.`; - } - if (searchEnabled) { - loopLogger.log('search enabled, injecting search prompt'); - instructionsWithDateTime += `\n\n# Search\nThe user has requested a search. Use the web-search tool to answer their query.`; - } - if (codeMode) { - loopLogger.log('code mode enabled, injecting coding-agent context', codeMode); - const agentDisplay = codeMode === 'claude' ? 'Claude Code' : 'Codex'; - instructionsWithDateTime += `\n\n# Code Mode (Active) — Agent: ${agentDisplay} -The user has turned on **code mode** and the composer chip is set to **${agentDisplay}** (\`${codeMode}\`). For EVERY coding task this turn, use **${agentDisplay}**, and narrate that agent ("Using ${agentDisplay} to …"). - -The chip is the single source of truth for which agent runs: -- Do NOT carry over a different agent from earlier in this thread — even if a previous run used the other agent, use **${agentDisplay}** now. -- Do NOT switch agents based on an in-chat text request ("use codex", "switch to claude"). The agent only changes when the user toggles the chip; if they ask in chat, tell them to toggle the chip. - -**How to run coding work — call the \`code_agent_run\` tool** with: -- \`agent\`: \`${codeMode}\` (always — match the chip). -- \`cwd\`: ${codeCwd ? `\`${codeCwd}\` (always — this coding session is pinned to that directory; never use another path)` : `the absolute project/working directory (resolve it per the code-with-agents skill — a path the user named, the "# User Work Directory" block, or ask once)`}. -- \`prompt\`: a clear, self-contained coding instruction. - -The tool runs the agent on-device and streams its tool calls, file diffs, and plan into the chat; any action needing approval surfaces as an inline permission card, so you do NOT pre-confirm with an in-chat "reply yes". This chat keeps ONE persistent agent session, so follow-up coding requests automatically resume with full context — just call \`code_agent_run\` again. Do NOT shell out to \`acpx\` or \`executeCommand\` for coding, and do NOT fall back to your own file tools. - -If the user's message is clearly NOT a coding request (small talk, an unrelated question), answer directly without invoking the coding agent. Code mode signals readiness, not that every message must route through the agent.`; - } - let streamError: string | null = null; - for await (const event of streamLlm( - model, - state.messages, - instructionsWithDateTime, - tools, - signal, - { - useCase: state.runUseCase ?? "copilot_chat", - ...(state.runSubUseCase ? { subUseCase: state.runSubUseCase } : {}), - agentName: state.agentName ?? undefined, - modelId, - providerName: state.runProvider!, - }, - )) { - messageBuilder.ingest(event); - yield* processEvent({ - runId, - type: "llm-stream-event", - event: event, - subflow: [], - }); - if (event.type === "error") { - streamError = event.error; - yield* processEvent({ - runId, - type: "error", - error: streamError, - subflow: [], - }); - break; - } - } - - // build and emit final message from agent response - const message = messageBuilder.get(); - yield* processEvent({ - runId, - messageId: await idGenerator.next(), - type: "message", - message, - subflow: [], - }); - - if (streamError) { - return; - } - - // if there were any ask-human calls, emit those events - if (message.content instanceof Array) { - const permissionCandidates: AutoPermissionCandidate[] = []; - for (const part of message.content) { - if (part.type === "tool-call") { - const underlyingTool = agent.tools![part.toolName]; - if (underlyingTool.type === "builtin" && underlyingTool.name === "ask-human") { - loopLogger.log('emitting ask-human-request, toolCallId:', part.toolCallId); - const rawOptions = (part.arguments as { options?: unknown }).options; - const options = Array.isArray(rawOptions) - ? rawOptions.filter((o): o is string => typeof o === 'string' && o.trim().length > 0) - : undefined; - yield* processEvent({ - runId, - type: "ask-human-request", - toolCallId: part.toolCallId, - query: part.arguments.question, - ...(options && options.length > 0 ? { options } : {}), - subflow: [], - }); - } - const permission = await getToolPermissionMetadata( - part, - underlyingTool, - state.sessionAllowedCommands, - state.sessionAllowedFileAccess, - ); - if (permission) { - permissionCandidates.push({ toolCall: part, permission }); - } - if (underlyingTool.type === "agent" && underlyingTool.name) { - loopLogger.log('emitting spawn-subflow, toolCallId:', part.toolCallId); - yield* processEvent({ - runId, - type: "spawn-subflow", - agentName: underlyingTool.name, - toolCallId: part.toolCallId, - subflow: [], - }); - yield* processEvent({ - runId, - messageId: await idGenerator.next(), - type: "message", - message: { - role: "user", - content: part.arguments.message, - }, - subflow: [part.toolCallId], - }); - } - } - } - - if (permissionCandidates.length > 0) { - // Permission prompts block the run, so they surface even when the - // app is focused (no onlyWhenBackground gate). - const notifyPermissionPrompt = (toolCall: typeof permissionCandidates[number]["toolCall"]) => { - void notifyIfEnabled("agent_permission", { - title: "Permission needed", - message: `${agent.name} wants to run "${toolCall.toolName}". Review to continue.`, - link: `rowboat://open?type=chat&runId=${runId}`, - actionLabel: "Review", - }); - }; - if (state.permissionMode === "auto") { - let decisionsByToolCallId = new Map(); - try { - const decisions = await classifyToolPermissions({ - runId, - agentName: state.agentName, - messages: convertFromMessages(state.messages), - candidates: permissionCandidates, - useCase: state.runUseCase ?? "copilot_chat", - subUseCase: state.runSubUseCase, - }); - decisionsByToolCallId = new Map(decisions.map((decision) => [ - decision.toolCallId, - { decision: decision.decision, reason: decision.reason }, - ])); - } catch (error) { - loopLogger.log( - 'auto-permission classifier failed:', - error instanceof Error ? error.message : String(error), - ); - } - - for (const candidate of permissionCandidates) { - const decision = decisionsByToolCallId.get(candidate.toolCall.toolCallId); - if (!decision) { - loopLogger.log('auto-permission missing decision, falling back to prompt:', candidate.toolCall.toolCallId); - yield* processEvent({ - runId, - type: "tool-permission-request", - toolCall: candidate.toolCall, - permission: candidate.permission, - subflow: [], - }); - notifyPermissionPrompt(candidate.toolCall); - continue; - } - - loopLogger.log( - 'emitting tool-permission-auto-decision, toolCallId:', - candidate.toolCall.toolCallId, - 'decision:', - decision.decision, - ); - yield* processEvent({ - runId, - type: "tool-permission-auto-decision", - toolCallId: candidate.toolCall.toolCallId, - toolCall: candidate.toolCall, - permission: candidate.permission, - decision: decision.decision, - reason: decision.reason, - subflow: [], - }); - if (decision.decision === "deny") { - loopLogger.log( - 'auto-permission denied, falling back to prompt:', - candidate.toolCall.toolCallId, - ); - yield* processEvent({ - runId, - type: "tool-permission-request", - toolCall: candidate.toolCall, - permission: candidate.permission, - subflow: [], - }); - notifyPermissionPrompt(candidate.toolCall); - } - } - } else { - for (const candidate of permissionCandidates) { - loopLogger.log('emitting tool-permission-request, toolCallId:', candidate.toolCall.toolCallId); - yield* processEvent({ - runId, - type: "tool-permission-request", - toolCall: candidate.toolCall, - permission: candidate.permission, - subflow: [], - }); - notifyPermissionPrompt(candidate.toolCall); - } - } - } - } - } -} - -interface StreamLlmAnalytics { - useCase: UseCase; - subUseCase?: string; - agentName?: string; - modelId: string; - providerName: string; -} - -async function* streamLlm( - model: LanguageModel, - messages: z.infer, - instructions: string, - tools: ToolSet, - signal?: AbortSignal, - analytics?: StreamLlmAnalytics, -): AsyncGenerator, void, unknown> { - const converted = convertFromMessages(messages); - console.log(`! SENDING payload to model: `, JSON.stringify(converted)) - const streamResult = analytics - ? withUseCase({ - useCase: analytics.useCase, - ...(analytics.subUseCase ? { subUseCase: analytics.subUseCase } : {}), - ...(analytics.agentName ? { agentName: analytics.agentName } : {}), - }, () => streamText({ - model, - messages: converted, - system: instructions, - tools, - stopWhen: stepCountIs(1), - abortSignal: signal, - })) - : streamText({ - model, - messages: converted, - system: instructions, - tools, - stopWhen: stepCountIs(1), - abortSignal: signal, - }); - const { fullStream } = streamResult; - for await (const event of fullStream) { - // Check abort on every chunk for responsiveness - signal?.throwIfAborted(); - console.log("-> \t\tstream event", JSON.stringify(event)); - switch (event.type) { - case "error": - yield { - type: "error", - error: formatLlmStreamError((event as { error?: unknown }).error ?? event), - }; - return; - case "reasoning-start": - yield { - type: "reasoning-start", - providerOptions: event.providerMetadata, - }; - break; - case "reasoning-delta": - yield { - type: "reasoning-delta", - delta: event.text, - providerOptions: event.providerMetadata, - }; - break; - case "reasoning-end": - yield { - type: "reasoning-end", - providerOptions: event.providerMetadata, - }; - break; - case "text-start": - yield { - type: "text-start", - providerOptions: event.providerMetadata, - }; - break; - case "text-end": - yield { - type: "text-end", - providerOptions: event.providerMetadata, - }; - break; - case "text-delta": - yield { - type: "text-delta", - delta: event.text, - providerOptions: event.providerMetadata, - }; - break; - case "tool-call": - yield { - type: "tool-call", - toolCallId: event.toolCallId, - toolName: event.toolName, - input: event.input, - providerOptions: event.providerMetadata, - }; - break; - case "finish-step": - if (analytics) { - captureLlmUsage({ - useCase: analytics.useCase, - ...(analytics.subUseCase ? { subUseCase: analytics.subUseCase } : {}), - ...(analytics.agentName ? { agentName: analytics.agentName } : {}), - model: analytics.modelId, - provider: analytics.providerName, - usage: event.usage, - }); - } - yield { - type: "finish-step", - usage: event.usage, - finishReason: event.finishReason, - providerOptions: event.providerMetadata, - }; - break; - default: - console.log('unknown stream event:', JSON.stringify(event)); - continue; - } - } -} -export const MappedToolCall = z.object({ - toolCall: ToolCallPart, - agentTool: ToolAttachment, -}); diff --git a/apps/x/packages/core/src/agents/utils.ts b/apps/x/packages/core/src/agents/utils.ts index cfc899d6..02d146f2 100644 --- a/apps/x/packages/core/src/agents/utils.ts +++ b/apps/x/packages/core/src/agents/utils.ts @@ -1,11 +1,7 @@ -import { bus } from "../runs/bus.js"; -import { fetchRun } from "../runs/runs.js"; - -type RunRecord = Awaited>; - -function extractRunErrors(run: RunRecord): string[] { - return run.log.flatMap((event) => event.type === "error" ? [event.error] : []); -} +// Error formatting shared across the knowledge pipelines. The run-based +// helpers that once lived here (waitForRunCompletion / extractAgentResponse) +// were retired with the old agent runtime; headless callers now await a turn +// via `agent-runtime/headless.ts` instead. export class RunFailedError extends Error { readonly runId: string; @@ -29,56 +25,3 @@ export function getErrorDetails(error: unknown): string { } return String(error); } - -/** - * Extract the assistant's final text response from a run's log. - * @param runId - * @returns The assistant's final text response or null if not found. - */ -export async function extractAgentResponse(runId: string): Promise { - const run = await fetchRun(runId); - for (let i = run.log.length - 1; i >= 0; i--) { - const event = run.log[i]; - if (event.type === 'message' && event.message.role === 'assistant') { - const content = event.message.content; - if (typeof content === 'string') return content; - if (Array.isArray(content)) { - const text = content - .filter((p) => p.type === 'text') - .map((p) => 'text' in p ? p.text : '') - .join(''); - return text || null; - } - } - } - return null; -} - -/** - * Wait for a run to complete by listening for run-processing-end event - */ -export async function waitForRunCompletion( - runId: string, - opts: { throwOnError?: boolean } = {}, -): Promise { - return new Promise((resolve, reject) => { - void (async () => { - const unsubscribe = await bus.subscribe('*', async (event) => { - if (event.type === 'run-processing-end' && event.runId === runId) { - unsubscribe(); - try { - const run = await fetchRun(runId); - const errors = extractRunErrors(run); - if (opts.throwOnError && errors.length > 0) { - reject(new RunFailedError(runId, errors)); - return; - } - resolve(run); - } catch (error) { - reject(error); - } - } - }); - })().catch(reject); - }); -} diff --git a/apps/x/packages/core/src/application/lib/message-queue.ts b/apps/x/packages/core/src/application/lib/message-queue.ts deleted file mode 100644 index 3e8c27f3..00000000 --- a/apps/x/packages/core/src/application/lib/message-queue.ts +++ /dev/null @@ -1,71 +0,0 @@ -import { IMonotonicallyIncreasingIdGenerator } from "./id-gen.js"; -import { UserMessageContent } from "@x/shared/dist/message.js"; -import z from "zod"; - -export type UserMessageContentType = z.infer; -export type VoiceOutputMode = 'summary' | 'full'; -export type MiddlePaneContext = - | { kind: 'note'; path: string; content: string } - | { kind: 'browser'; url: string; title: string }; - -export type CodeMode = 'claude' | 'codex'; -export type CodePolicy = 'ask' | 'auto-approve-reads' | 'yolo'; - -type EnqueuedMessage = { - messageId: string; - message: UserMessageContentType; - voiceInput?: boolean; - voiceOutput?: VoiceOutputMode; - searchEnabled?: boolean; - codeMode?: CodeMode; - // Code-section sessions pin the coding agent's working directory and - // approval policy for the turn (code_agent_run honors these over its - // model-provided arguments / the global policy). - codeCwd?: string; - codePolicy?: CodePolicy; - middlePaneContext?: MiddlePaneContext; -}; - -export interface IMessageQueue { - enqueue(runId: string, message: UserMessageContentType, voiceInput?: boolean, voiceOutput?: VoiceOutputMode, searchEnabled?: boolean, middlePaneContext?: MiddlePaneContext, codeMode?: CodeMode, codeCwd?: string, codePolicy?: CodePolicy): Promise; - dequeue(runId: string): Promise; -} - -export class InMemoryMessageQueue implements IMessageQueue { - private store: Record = {}; - private idGenerator: IMonotonicallyIncreasingIdGenerator; - - constructor({ - idGenerator, - }: { - idGenerator: IMonotonicallyIncreasingIdGenerator; - }) { - this.idGenerator = idGenerator; - } - - async enqueue(runId: string, message: UserMessageContentType, voiceInput?: boolean, voiceOutput?: VoiceOutputMode, searchEnabled?: boolean, middlePaneContext?: MiddlePaneContext, codeMode?: CodeMode, codeCwd?: string, codePolicy?: CodePolicy): Promise { - if (!this.store[runId]) { - this.store[runId] = []; - } - const id = await this.idGenerator.next(); - this.store[runId].push({ - messageId: id, - message, - voiceInput, - voiceOutput, - searchEnabled, - codeMode, - codeCwd, - codePolicy, - middlePaneContext, - }); - return id; - } - - async dequeue(runId: string): Promise { - if (!this.store[runId]) { - return null; - } - return this.store[runId].shift() ?? null; - } -} diff --git a/apps/x/packages/core/src/background-tasks/fileops.ts b/apps/x/packages/core/src/background-tasks/fileops.ts index d282a529..4bd1a4e2 100644 --- a/apps/x/packages/core/src/background-tasks/fileops.ts +++ b/apps/x/packages/core/src/background-tasks/fileops.ts @@ -219,10 +219,10 @@ export async function listTasks(opts: ListTasksOptions = {}): Promise.jsonl`; readers fetch via the standard -// runs:fetch IPC. Read concurrency is unconstrained; write is serialized via +// One line per run, run id only. Prepended on each start so the newest is at +// the top — no sorting needed on read. Each id is a standalone turn id; readers +// load the transcript via the `sessions:getTurn` IPC. Read concurrency is +// unconstrained; write is serialized via // `withFileLock` on the task.yaml path (same lock as patches, so a run-start // patch and a prepend don't race). // --------------------------------------------------------------------------- diff --git a/apps/x/packages/core/src/background-tasks/runner.ts b/apps/x/packages/core/src/background-tasks/runner.ts index 7fb73bdb..764008c2 100644 --- a/apps/x/packages/core/src/background-tasks/runner.ts +++ b/apps/x/packages/core/src/background-tasks/runner.ts @@ -1,9 +1,8 @@ import type { BackgroundTask, BackgroundTaskTriggerType } from '@x/shared/dist/background-task.js'; import { PrefixLogger } from '@x/shared/dist/prefix-logger.js'; import { fetchTask, patchTask, prependRunId } from './fileops.js'; -import { createRun, createMessage } from '../runs/runs.js'; +import { runHeadlessAgent } from '../agent-runtime/headless.js'; import { getBackgroundTaskAgentModel } from '../models/defaults.js'; -import { extractAgentResponse, waitForRunCompletion } from '../agents/utils.js'; import { buildTriggerBlock } from '../agents/build-trigger-block.js'; import { backgroundTaskBus } from './bus.js'; @@ -104,89 +103,85 @@ export async function runBackgroundTask( // by an LLM call to create-background-task) should fall through to the // default just like undefined does. const model = task.model || await getBackgroundTaskAgentModel(); - const agentRun = await createRun({ + + // One standalone turn per run (sessionId null): a background-task run is + // one-shot, not a conversation — the task's index.md is the memory. + const result = await runHeadlessAgent({ agentId: 'background-task-agent', + message: buildMessage(slug, task, trigger, context), model, - ...(task.provider ? { provider: task.provider } : {}), + // Granular trigger as the analytics sub-use-case (cron / window / + // event / manual) — matches live-note's attribution. useCase: 'background_task_agent', - // Granular trigger as analytics sub-use-case — matches live-note's - // pattern at runner.ts:149. subUseCase: trigger, + ...(task.provider ? { provider: task.provider } : {}), + onStart: async (turnId) => { + // Record this run in the task's runs.log pointer file (newest + // first) — an index tying run ids to this task. + await prependRunId(slug, turnId); + log.log(`${slug} — start trigger=${trigger} runId=${turnId}`); + // Bump `lastAttemptAt` + `lastRunId` immediately (before the + // agent executes). `lastAttemptAt` is the scheduler's backoff + // anchor and the disk-persistent in-flight signal (lastAttemptAt + // > lastRunAt). Crucially we leave `lastRunAt` / `lastRunSummary` + // / `lastRunError` untouched — the previous successful run stays + // visible in the UI even while this new run is in-flight or fails. + await patchTask(slug, { + lastAttemptAt: new Date().toISOString(), + lastRunId: turnId, + }); + backgroundTaskBus.publish({ + type: 'background_task_agent_start', + slug, + trigger, + runId: turnId, + }); + }, }); + const runId = result.turnId; - const runId = agentRun.id; - // Record this run in the task's runs.log pointer file (newest first). - // The transcript itself lives at the global $WorkDir/runs/.jsonl - // — runs.log is just an index that ties runIds to this task. - await prependRunId(slug, runId); - const startedAt = new Date().toISOString(); - - log.log(`${slug} — start trigger=${trigger} runId=${runId}`); - - // Bump `lastAttemptAt` + `lastRunId` immediately (before the agent - // executes). `lastAttemptAt` is the scheduler's backoff anchor and the - // disk-persistent in-flight signal (lastAttemptAt > lastRunAt). Crucially - // we leave `lastRunAt` / `lastRunSummary` / `lastRunError` untouched — - // the previous successful run stays visible in the UI even while this - // new run is in-flight or fails. - await patchTask(slug, { - lastAttemptAt: startedAt, - lastRunId: runId, - }); - - backgroundTaskBus.publish({ - type: 'background_task_agent_start', - slug, - trigger, - runId, - }); - - try { - await createMessage(runId, buildMessage(slug, task, trigger, context)); - await waitForRunCompletion(runId, { throwOnError: true }); - const summary = await extractAgentResponse(runId); - - // Success — bump cycle anchor, refresh summary, clear any prior error. - await patchTask(slug, { - lastRunAt: new Date().toISOString(), - lastRunSummary: summary ?? undefined, - lastRunError: undefined, - }); - - log.log(`${slug} — done summary="${truncate(summary)}"`); - - backgroundTaskBus.publish({ - type: 'background_task_agent_complete', - slug, - runId, - ...(summary ? { summary } : {}), - }); - - return { slug, runId, summary }; - } catch (err) { - const msg = err instanceof Error ? err.message : String(err); - + if (result.error) { // Failure — only record the error. `lastRunAt` and `lastRunSummary` // are deliberately untouched so the user keeps seeing the last good // state; the scheduler's backoff (lastAttemptAt + 5min) prevents // retry-storming. try { - await patchTask(slug, { lastRunError: msg }); + await patchTask(slug, { lastRunError: result.error }); } catch { // don't mask the original error } - log.log(`${slug} — failed: ${truncate(msg)}`); + log.log(`${slug} — failed: ${truncate(result.error)}`); backgroundTaskBus.publish({ type: 'background_task_agent_complete', slug, runId, - error: msg, + error: result.error, }); - return { slug, runId, summary: null, error: msg }; + return { slug, runId, summary: null, error: result.error }; } + + const summary = result.summary; + + // Success — bump cycle anchor, refresh summary, clear any prior error. + await patchTask(slug, { + lastRunAt: new Date().toISOString(), + lastRunSummary: summary ?? undefined, + lastRunError: undefined, + }); + + log.log(`${slug} — done summary="${truncate(summary)}"`); + + backgroundTaskBus.publish({ + type: 'background_task_agent_complete', + slug, + runId, + ...(summary ? { summary } : {}), + }); + + return { slug, runId, summary }; } finally { runningTasks.delete(slug); } diff --git a/apps/x/packages/core/src/di/container.ts b/apps/x/packages/core/src/di/container.ts index 4bfc92ed..03648b11 100644 --- a/apps/x/packages/core/src/di/container.ts +++ b/apps/x/packages/core/src/di/container.ts @@ -2,12 +2,9 @@ import { asClass, asValue, createContainer, InjectionMode } from "awilix"; import { FSModelConfigRepo, IModelConfigRepo } from "../models/repo.js"; import { FSMcpConfigRepo, IMcpConfigRepo } from "../mcp/repo.js"; import { FSAgentsRepo, IAgentsRepo } from "../agents/repo.js"; -import { FSRunsRepo, IRunsRepo } from "../runs/repo.js"; import { IMonotonicallyIncreasingIdGenerator, IdGen } from "../application/lib/id-gen.js"; -import { IMessageQueue, InMemoryMessageQueue } from "../application/lib/message-queue.js"; import { IBus, InMemoryBus } from "../application/lib/bus.js"; import { IRunsLock, InMemoryRunsLock } from "../runs/lock.js"; -import { IAgentRuntime, AgentRuntime } from "../agents/runtime.js"; import { FSOAuthRepo, IOAuthRepo } from "../auth/repo.js"; import { FSClientRegistrationRepo, IClientRegistrationRepo } from "../auth/client-repo.js"; import { FSGranolaConfigRepo, IGranolaConfigRepo } from "../knowledge/granola/repo.js"; @@ -32,16 +29,13 @@ const container = createContainer({ container.register({ idGenerator: asClass(IdGen).singleton(), - messageQueue: asClass(InMemoryMessageQueue).singleton(), bus: asClass(InMemoryBus).singleton(), runsLock: asClass(InMemoryRunsLock).singleton(), abortRegistry: asClass(InMemoryAbortRegistry).singleton(), - agentRuntime: asClass(AgentRuntime).singleton(), mcpConfigRepo: asClass(FSMcpConfigRepo).singleton(), modelConfigRepo: asClass(FSModelConfigRepo).singleton(), agentsRepo: asClass(FSAgentsRepo).singleton(), - runsRepo: asClass(FSRunsRepo).singleton(), oauthRepo: asClass(FSOAuthRepo).singleton(), clientRegistrationRepo: asClass(FSClientRegistrationRepo).singleton(), granolaConfigRepo: asClass(FSGranolaConfigRepo).singleton(), diff --git a/apps/x/packages/core/src/knowledge/agent_notes.ts b/apps/x/packages/core/src/knowledge/agent_notes.ts index f1380c4a..793a7eae 100644 --- a/apps/x/packages/core/src/knowledge/agent_notes.ts +++ b/apps/x/packages/core/src/knowledge/agent_notes.ts @@ -2,9 +2,9 @@ import fs from 'fs'; import path from 'path'; import { google } from 'googleapis'; import { WorkDir } from '../config/config.js'; -import { createRun, createMessage } from '../runs/runs.js'; +import { runHeadlessAgent } from '../agent-runtime/headless.js'; import { getKgModel } from '../models/defaults.js'; -import { getErrorDetails, waitForRunCompletion } from '../agents/utils.js'; +import { getErrorDetails } from '../agents/utils.js'; import { serviceLogger } from '../services/service_logger.js'; import { loadUserConfig, updateUserEmail } from '../config/user_config.js'; import { GoogleClientFactory } from './google-client-factory.js'; @@ -281,14 +281,14 @@ async function processAgentNotes(): Promise { const timestamp = new Date().toISOString(); const message = `Current timestamp: ${timestamp}\n\nProcess the following source material and update the Agent Notes folder accordingly.\n\n${messageParts.join('\n\n')}`; - const agentRun = await createRun({ + const result = await runHeadlessAgent({ agentId: AGENT_ID, + message, model: await getKgModel(), useCase: 'knowledge_sync', subUseCase: 'agent_notes', }); - await createMessage(agentRun.id, message); - await waitForRunCompletion(agentRun.id, { throwOnError: true }); + if (result.error) throw new Error(result.error); // Mark everything as processed for (const p of emailPaths) { diff --git a/apps/x/packages/core/src/knowledge/build_graph.ts b/apps/x/packages/core/src/knowledge/build_graph.ts index 3e05e611..2e232206 100644 --- a/apps/x/packages/core/src/knowledge/build_graph.ts +++ b/apps/x/packages/core/src/knowledge/build_graph.ts @@ -2,9 +2,8 @@ import fs from 'fs'; import path from 'path'; import { WorkDir } from '../config/config.js'; import { getKgModel } from '../models/defaults.js'; -import { createRun, createMessage } from '../runs/runs.js'; -import { bus } from '../runs/bus.js'; -import { getErrorDetails, waitForRunCompletion } from '../agents/utils.js'; +import { editedPaths, runHeadlessAgent } from '../agent-runtime/headless.js'; +import { getErrorDetails } from '../agents/utils.js'; import { serviceLogger, type ServiceRunContext } from '../services/service_logger.js'; import { loadState, @@ -87,15 +86,6 @@ function hasNoiseLabels(content: string): boolean { return false; } -function extractPathFromToolInput(input: string): string | null { - try { - const parsed = JSON.parse(input) as { path?: string }; - return typeof parsed.path === 'string' ? parsed.path : null; - } catch { - return null; - } -} - function ensureSuggestedTopicsFileLocation(): string { if (fs.existsSync(SUGGESTED_TOPICS_PATH)) { return SUGGESTED_TOPICS_PATH; @@ -250,13 +240,6 @@ async function createNotesFromBatch( fs.mkdirSync(NOTES_OUTPUT_DIR, { recursive: true }); } - // Create a run for the note creation agent - const run = await createRun({ - agentId: NOTE_CREATION_AGENT, - model: await getKgModel(), - useCase: 'knowledge_sync', - subUseCase: 'build_graph', - }); const suggestedTopicsContent = readSuggestedTopicsFile(); // Build message with index and all files in the batch @@ -291,37 +274,21 @@ async function createNotesFromBatch( message += `\n\n---\n\n`; }); - const notesCreated = new Set(); - const notesModified = new Set(); - - const unsubscribe = await bus.subscribe(run.id, async (event) => { - if (event.type !== "tool-invocation") { - return; - } - if (event.toolName !== "file-writeText" && event.toolName !== "file-editText") { - return; - } - const toolPath = extractPathFromToolInput(event.input); - if (!toolPath) { - return; - } - if (event.toolName === "file-writeText") { - notesCreated.add(toolPath); - } else if (event.toolName === "file-editText") { - notesModified.add(toolPath); - } + const result = await runHeadlessAgent({ + agentId: NOTE_CREATION_AGENT, + message, + model: await getKgModel(), + useCase: 'knowledge_sync', + subUseCase: 'build_graph', }); + if (result.error) throw new Error(result.error); - await createMessage(run.id, message); + // Created vs modified, read from the completed turn's tool calls: a + // file-writeText is a new note, a file-editText is an update. + const notesCreated = new Set(editedPaths(result.turn, ['file-writeText'])); + const notesModified = new Set(editedPaths(result.turn, ['file-editText'])); - // Wait for the run to complete - try { - await waitForRunCompletion(run.id, { throwOnError: true }); - } finally { - unsubscribe(); - } - - return { runId: run.id, notesCreated, notesModified }; + return { runId: result.turnId, notesCreated, notesModified }; } /** diff --git a/apps/x/packages/core/src/knowledge/inline_tasks.ts b/apps/x/packages/core/src/knowledge/inline_tasks.ts index cd2f23c0..ed8b7db1 100644 --- a/apps/x/packages/core/src/knowledge/inline_tasks.ts +++ b/apps/x/packages/core/src/knowledge/inline_tasks.ts @@ -3,13 +3,12 @@ import path from 'path'; import { CronExpressionParser } from 'cron-parser'; import { generateText } from 'ai'; import { WorkDir } from '../config/config.js'; -import { createRun, createMessage, fetchRun } from '../runs/runs.js'; +import { runHeadlessAgent } from '../agent-runtime/headless.js'; import { getKgModel } from '../models/defaults.js'; import container from '../di/container.js'; import type { IModelConfigRepo } from '../models/repo.js'; import { createProvider } from '../models/models.js'; import { inlineTask } from '@x/shared'; -import { extractAgentResponse, waitForRunCompletion } from '../agents/utils.js'; import { captureLlmUsage } from '../analytics/usage.js'; import { withUseCase } from '../analytics/use_case.js'; @@ -470,13 +469,6 @@ async function processInlineTasks(): Promise { console.log(`[InlineTasks] Running task: "${task.instruction.slice(0, 80)}..."`); try { - const run = await createRun({ - agentId: INLINE_TASK_AGENT, - model: await getKgModel(), - useCase: 'knowledge_sync', - subUseCase: 'inline_task_run', - }); - const message = [ `Execute the following instruction from the note "${relativePath}":`, '', @@ -488,10 +480,14 @@ async function processInlineTasks(): Promise { '```', ].join('\n'); - await createMessage(run.id, message); - await waitForRunCompletion(run.id); - - const result = await extractAgentResponse(run.id); + const agentResult = await runHeadlessAgent({ + agentId: INLINE_TASK_AGENT, + message, + model: await getKgModel(), + useCase: 'knowledge_sync', + subUseCase: 'inline_task_run', + }); + const result = agentResult.summary; if (result) { if (task.targetId) { // Recurring task with target region — replace content inside the region @@ -555,13 +551,6 @@ export async function processRowboatInstruction( scheduleLabel: string | null; response: string | null; }> { - const run = await createRun({ - agentId: INLINE_TASK_AGENT, - model: await getKgModel(), - useCase: 'knowledge_sync', - subUseCase: 'inline_task_run', - }); - const message = [ `Process the following @rowboat instruction from the note "${notePath}":`, '', @@ -573,10 +562,14 @@ export async function processRowboatInstruction( '```', ].join('\n'); - await createMessage(run.id, message); - await waitForRunCompletion(run.id); - - const rawResponse = await extractAgentResponse(run.id); + const agentResult = await runHeadlessAgent({ + agentId: INLINE_TASK_AGENT, + message, + model: await getKgModel(), + useCase: 'knowledge_sync', + subUseCase: 'inline_task_run', + }); + const rawResponse = agentResult.summary; if (!rawResponse) { return { instruction, schedule: null, scheduleLabel: null, response: null }; } diff --git a/apps/x/packages/core/src/knowledge/label_emails.ts b/apps/x/packages/core/src/knowledge/label_emails.ts index ebb940c6..b71d43b0 100644 --- a/apps/x/packages/core/src/knowledge/label_emails.ts +++ b/apps/x/packages/core/src/knowledge/label_emails.ts @@ -1,10 +1,9 @@ import fs from 'fs'; import path from 'path'; import { WorkDir } from '../config/config.js'; -import { createRun, createMessage } from '../runs/runs.js'; +import { editedPaths, runHeadlessAgent } from '../agent-runtime/headless.js'; import { getKgModel } from '../models/defaults.js'; -import { bus } from '../runs/bus.js'; -import { getErrorDetails, waitForRunCompletion } from '../agents/utils.js'; +import { getErrorDetails } from '../agents/utils.js'; import { serviceLogger } from '../services/service_logger.js'; import { limitEventItems } from './limit_event_items.js'; import { @@ -70,13 +69,6 @@ function getUnlabeledEmails(state: LabelingState): string[] { async function labelEmailBatch( files: { path: string; content: string }[] ): Promise<{ runId: string; filesEdited: Set }> { - const run = await createRun({ - agentId: LABELING_AGENT, - model: await getKgModel(), - useCase: 'knowledge_sync', - subUseCase: 'label_emails', - }); - let message = `Label the following ${files.length} email files by prepending YAML frontmatter.\n\n`; message += `**Important:** Use workspace-relative paths with file-editText (e.g. "gmail_sync/email.md", NOT absolute paths).\n\n`; @@ -92,33 +84,19 @@ async function labelEmailBatch( message += `\n\n---\n\n`; } - const filesEdited = new Set(); - - const unsubscribe = await bus.subscribe(run.id, async (event) => { - if (event.type !== 'tool-invocation') { - return; - } - if (event.toolName !== 'file-editText') { - return; - } - try { - const parsed = JSON.parse(event.input) as { path?: string }; - if (typeof parsed.path === 'string') { - filesEdited.add(parsed.path); - } - } catch { - // ignore parse errors - } + const result = await runHeadlessAgent({ + agentId: LABELING_AGENT, + message, + model: await getKgModel(), + useCase: 'knowledge_sync', + subUseCase: 'label_emails', }); + if (result.error) throw new Error(result.error); - await createMessage(run.id, message); - try { - await waitForRunCompletion(run.id, { throwOnError: true }); - } finally { - unsubscribe(); - } + // Files the agent edited, read from the completed turn's tool calls. + const filesEdited = new Set(editedPaths(result.turn, ['file-editText'])); - return { runId: run.id, filesEdited }; + return { runId: result.turnId, filesEdited }; } /** diff --git a/apps/x/packages/core/src/knowledge/live-note/runner.ts b/apps/x/packages/core/src/knowledge/live-note/runner.ts index b263c3e5..af36c8ac 100644 --- a/apps/x/packages/core/src/knowledge/live-note/runner.ts +++ b/apps/x/packages/core/src/knowledge/live-note/runner.ts @@ -1,8 +1,7 @@ import type { LiveNote, LiveNoteTriggerType } from '@x/shared/dist/live-note.js'; import { fetchLiveNote, patchLiveNote, readNoteBody } from './fileops.js'; -import { createRun, createMessage } from '../../runs/runs.js'; +import { runHeadlessAgent } from '../../agent-runtime/headless.js'; import { getLiveNoteAgentModel } from '../../models/defaults.js'; -import { extractAgentResponse, waitForRunCompletion } from '../../agents/utils.js'; import { buildTriggerBlock } from '../../agents/build-trigger-block.js'; import { liveNoteBus } from './bus.js'; import { PrefixLogger } from '@x/shared/dist/prefix-logger.js'; @@ -109,98 +108,90 @@ export async function runLiveNoteAgent( const bodyBefore = await readNoteBody(filePath); const model = live.model ?? await getLiveNoteAgentModel(); - const agentRun = await createRun({ + + // One standalone turn per run (sessionId null): a live-note run is + // one-shot, not a conversation — the note file is the durable memory. + const result = await runHeadlessAgent({ agentId: 'live-note-agent', + message: buildMessage(filePath, live, trigger, context), model, - ...(live.provider ? { provider: live.provider } : {}), + // Granular trigger as the analytics sub-use-case (manual / cron / + // window / event) so dashboards can break runs down by what woke them. useCase: 'live_note_agent', - // Use the granular trigger as the analytics sub-use-case so - // dashboards can break down agent runs by what woke them up - // (manual / cron / window / event). Pass 1 routing emits the - // separate `routing` sub-use-case from routing.ts. subUseCase: trigger, + ...(live.provider ? { provider: live.provider } : {}), + onStart: async (turnId) => { + log.log(`${filePath} — start trigger=${trigger} runId=${turnId}`); + // Bump `lastAttemptAt` immediately (before the agent executes) + // so the scheduler's next poll suppresses duplicate firings + // during a slow run and applies a backoff after a failure. + // `lastRunAt` is only bumped on *success* below — that way + // failures don't lock the cycle anchor for cron / window. + await patchLiveNote(filePath, { + lastAttemptAt: new Date().toISOString(), + lastRunId: turnId, + }); + await liveNoteBus.publish({ + type: 'live_note_agent_start', + filePath, + trigger, + runId: turnId, + }); + }, }); + const runId = result.turnId; - log.log(`${filePath} — start trigger=${trigger} runId=${agentRun.id}`); - - // Bump `lastAttemptAt` immediately (before the agent executes) so the - // scheduler's next poll suppresses duplicate firings during a slow run - // and applies a backoff after a failure. `lastRunAt` is only bumped on - // *success* below — that way failures don't lock the cycle anchor for - // cron / window triggers. - await patchLiveNote(filePath, { - lastAttemptAt: new Date().toISOString(), - lastRunId: agentRun.id, - }); - - await liveNoteBus.publish({ - type: 'live_note_agent_start', - filePath, - trigger, - runId: agentRun.id, - }); - - try { - await createMessage(agentRun.id, buildMessage(filePath, live, trigger, context)); - // throwOnError: surface any error event in the run's log (LLM API - // failures, tool errors, billing/credit issues) as a rejection so - // the failure branch records lastRunError. Without this the run - // can "complete" with errors silently and we'd hit the success - // branch with an empty summary, clobbering any prior lastRunError. - await waitForRunCompletion(agentRun.id, { throwOnError: true }); - const summary = await extractAgentResponse(agentRun.id); - - const bodyAfter = await readNoteBody(filePath); - const didUpdate = bodyAfter !== bodyBefore; - - // Success — bump the cycle anchor, refresh the summary, clear any - // prior error. - await patchLiveNote(filePath, { - lastRunAt: new Date().toISOString(), - lastRunSummary: summary ?? undefined, - lastRunError: undefined, - }); - - log.log(`${filePath} — done action=${didUpdate ? 'replace' : 'no_update'} summary="${truncate(summary)}"`); - - await liveNoteBus.publish({ - type: 'live_note_agent_complete', - filePath, - runId: agentRun.id, - summary: summary ?? undefined, - }); - - return { - filePath, - runId: agentRun.id, - action: didUpdate ? 'replace' : 'no_update', - contentBefore: bodyBefore, - contentAfter: bodyAfter, - summary, - }; - } catch (err) { - const msg = err instanceof Error ? err.message : String(err); - + if (result.error) { // Failure — keep `lastRunAt` and `lastRunSummary` intact so the // user keeps seeing the last good state. Just record the error; // the scheduler's backoff (lastAttemptAt + 5min) prevents storming. try { - await patchLiveNote(filePath, { lastRunError: msg }); + await patchLiveNote(filePath, { lastRunError: result.error }); } catch { // Don't mask the original error if the patch itself fails. } - log.log(`${filePath} — failed: ${truncate(msg)}`); + log.log(`${filePath} — failed: ${truncate(result.error)}`); await liveNoteBus.publish({ type: 'live_note_agent_complete', filePath, - runId: agentRun.id, - error: msg, + runId, + error: result.error, }); - return { filePath, runId: agentRun.id, action: 'no_update', contentBefore: bodyBefore, contentAfter: null, summary: null, error: msg }; + return { filePath, runId, action: 'no_update', contentBefore: bodyBefore, contentAfter: null, summary: null, error: result.error }; } + + const summary = result.summary; + const bodyAfter = await readNoteBody(filePath); + const didUpdate = bodyAfter !== bodyBefore; + + // Success — bump the cycle anchor, refresh the summary, clear any + // prior error. + await patchLiveNote(filePath, { + lastRunAt: new Date().toISOString(), + lastRunSummary: summary ?? undefined, + lastRunError: undefined, + }); + + log.log(`${filePath} — done action=${didUpdate ? 'replace' : 'no_update'} summary="${truncate(summary)}"`); + + await liveNoteBus.publish({ + type: 'live_note_agent_complete', + filePath, + runId, + summary: summary ?? undefined, + }); + + return { + filePath, + runId, + action: didUpdate ? 'replace' : 'no_update', + contentBefore: bodyBefore, + contentAfter: bodyAfter, + summary, + }; } finally { runningLiveNotes.delete(filePath); } diff --git a/apps/x/packages/core/src/knowledge/tag_notes.ts b/apps/x/packages/core/src/knowledge/tag_notes.ts index 7cc7b426..0a5c9957 100644 --- a/apps/x/packages/core/src/knowledge/tag_notes.ts +++ b/apps/x/packages/core/src/knowledge/tag_notes.ts @@ -1,10 +1,9 @@ import fs from 'fs'; import path from 'path'; import { WorkDir } from '../config/config.js'; -import { createRun, createMessage } from '../runs/runs.js'; +import { editedPaths, runHeadlessAgent } from '../agent-runtime/headless.js'; import { getKgModel } from '../models/defaults.js'; -import { bus } from '../runs/bus.js'; -import { getErrorDetails, waitForRunCompletion } from '../agents/utils.js'; +import { getErrorDetails } from '../agents/utils.js'; import { serviceLogger } from '../services/service_logger.js'; import { limitEventItems } from './limit_event_items.js'; import { @@ -83,13 +82,6 @@ function getUntaggedNotes(state: NoteTaggingState): string[] { async function tagNoteBatch( files: { path: string; content: string }[] ): Promise<{ runId: string; filesEdited: Set }> { - const run = await createRun({ - agentId: NOTE_TAGGING_AGENT, - model: await getKgModel(), - useCase: 'knowledge_sync', - subUseCase: 'tag_notes', - }); - let message = `Tag the following ${files.length} knowledge notes by prepending YAML frontmatter with appropriate tags.\n\n`; message += `**Important:** Use workspace-relative paths with file-editText (e.g. "knowledge/People/Sarah Chen.md", NOT absolute paths).\n\n`; @@ -105,33 +97,20 @@ async function tagNoteBatch( message += `\n\n---\n\n`; } - const filesEdited = new Set(); - - const unsubscribe = await bus.subscribe(run.id, async (event) => { - if (event.type !== 'tool-invocation') { - return; - } - if (event.toolName !== 'file-editText') { - return; - } - try { - const parsed = JSON.parse(event.input) as { path?: string }; - if (typeof parsed.path === 'string') { - filesEdited.add(parsed.path); - } - } catch { - // ignore parse errors - } + const result = await runHeadlessAgent({ + agentId: NOTE_TAGGING_AGENT, + message, + model: await getKgModel(), + useCase: 'knowledge_sync', + subUseCase: 'tag_notes', }); + if (result.error) throw new Error(result.error); - await createMessage(run.id, message); - try { - await waitForRunCompletion(run.id, { throwOnError: true }); - } finally { - unsubscribe(); - } + // Files the agent edited, read from the completed turn's tool calls (the + // new runtime's event bus carries only tool-call ids, not args). + const filesEdited = new Set(editedPaths(result.turn, ['file-editText'])); - return { runId: run.id, filesEdited }; + return { runId: result.turnId, filesEdited }; } /** diff --git a/apps/x/packages/core/src/pre_built/runner.ts b/apps/x/packages/core/src/pre_built/runner.ts index 0596372f..d3d1a160 100644 --- a/apps/x/packages/core/src/pre_built/runner.ts +++ b/apps/x/packages/core/src/pre_built/runner.ts @@ -1,9 +1,8 @@ import fs from 'fs'; import path from 'path'; import { WorkDir } from '../config/config.js'; -import { createRun, createMessage } from '../runs/runs.js'; +import { runHeadlessAgent } from '../agent-runtime/headless.js'; import { getKgModel } from '../models/defaults.js'; -import { waitForRunCompletion } from '../agents/utils.js'; import { loadConfig, loadState, @@ -38,16 +37,8 @@ async function runAgent(agentName: string): Promise { } try { - // Create a run for the agent - // The agent file is expected to be in the agents directory with the same name - const run = await createRun({ - agentId: agentName, - model: await getKgModel(), - useCase: 'knowledge_sync', - subUseCase: 'pre_built', - }); - - // Build trigger message with user context + // Build trigger message with user context. The agent file is expected + // to be in the agents directory with the same name as `agentName`. const message = `Run your scheduled task. **Current time:** ${new Date().toISOString()} @@ -59,10 +50,14 @@ async function runAgent(agentName: string): Promise { Process new items and use the user context above to identify yourself when drafting responses.`; - await createMessage(run.id, message); - - // Wait for completion - await waitForRunCompletion(run.id); + const result = await runHeadlessAgent({ + agentId: agentName, + message, + model: await getKgModel(), + useCase: 'knowledge_sync', + subUseCase: 'pre_built', + }); + if (result.error) throw new Error(result.error); // Update last run time setLastRunTime(agentName, new Date()); @@ -79,8 +74,6 @@ Process new items and use the user context above to identify yourself when draft * Check all agents and run those that are due */ async function checkAndRunAgents(): Promise { - const config = loadConfig(); - for (const agentName of PREBUILT_AGENTS) { try { if (shouldRunAgent(agentName)) { diff --git a/apps/x/packages/core/src/runs/bus.ts b/apps/x/packages/core/src/runs/bus.ts deleted file mode 100644 index 7ddb8577..00000000 --- a/apps/x/packages/core/src/runs/bus.ts +++ /dev/null @@ -1,4 +0,0 @@ -import container from "../di/container.js"; -import { IBus } from "../application/lib/bus.js"; - -export const bus = container.resolve('bus'); \ No newline at end of file diff --git a/apps/x/packages/core/src/security/permission-metadata.test.ts b/apps/x/packages/core/src/security/permission-metadata.test.ts new file mode 100644 index 00000000..a1c293dd --- /dev/null +++ b/apps/x/packages/core/src/security/permission-metadata.test.ts @@ -0,0 +1,90 @@ +import { mkdtemp, realpath, writeFile } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { describe, expect, it } from "vitest"; +import { z } from "zod"; +import { ToolAttachment } from "@x/shared/dist/agent.js"; +import { ToolCallPart } from "@x/shared/dist/message.js"; +import { WorkDir } from "../config/config.js"; +import type { FileAccessGrant } from "../config/security.js"; +import { getToolPermissionMetadata } from "./permission-metadata.js"; + +function call(toolName: string, args: Record): z.infer { + return { type: "tool-call", toolCallId: "tc1", toolName, arguments: args }; +} +const builtin = (name: string): z.infer => ({ type: "builtin", name }); + +// `zzbinary` is not in any plausible allow-list, so executeCommand on it is +// always "blocked" → requires permission, unless the session explicitly allows +// it. This keeps the command-branch tests independent of the dev's config. +describe("getToolPermissionMetadata", () => { + it("flags a blocked command and lists its command names", async () => { + const meta = await getToolPermissionMetadata( + call("executeCommand", { command: "zzbinary --flag" }), + builtin("executeCommand"), + new Set(), + [], + ); + expect(meta).toEqual({ kind: "command", commandNames: ["zzbinary"] }); + }); + + it("short-circuits a command the session already allows", async () => { + const meta = await getToolPermissionMetadata( + call("executeCommand", { command: "zzbinary --flag" }), + builtin("executeCommand"), + new Set(["zzbinary"]), + [], + ); + expect(meta).toBeNull(); + }); + + it("requires permission for a write outside the workspace", async () => { + const dir = await mkdtemp(join(tmpdir(), "perm-meta-")); + const target = join(dir, "out.txt"); + const meta = await getToolPermissionMetadata( + call("file-writeText", { path: target }), + builtin("file-writeText"), + new Set(), + [], + ); + expect(meta).toMatchObject({ kind: "file", operation: "write" }); + expect((meta as { paths: string[] }).paths.length).toBe(1); + }); + + it("short-circuits when a session file grant covers the path", async () => { + const dir = await mkdtemp(join(tmpdir(), "perm-meta-")); + const target = join(dir, "out.txt"); + // Stored grants hold canonical paths (the gate compares against the + // realpath'd target); on macOS /tmp resolves under /private. + const grant: FileAccessGrant = { operation: "write", pathPrefix: await realpath(dir) }; + const meta = await getToolPermissionMetadata( + call("file-writeText", { path: target }), + builtin("file-writeText"), + new Set(), + [grant], + ); + expect(meta).toBeNull(); + }); + + it("requires no permission for a path inside the workspace", async () => { + const inside = join(WorkDir, "knowledge"); + await writeFile(join(WorkDir, "knowledge", ".perm-meta-probe"), "x").catch(() => undefined); + const meta = await getToolPermissionMetadata( + call("file-readText", { path: inside }), + builtin("file-readText"), + new Set(), + [], + ); + expect(meta).toBeNull(); + }); + + it("never requires permission for non-builtin (MCP) tools", async () => { + const meta = await getToolPermissionMetadata( + call("search", { q: "x" }), + { type: "mcp", name: "search", description: "", inputSchema: {}, mcpServerName: "srv" }, + new Set(), + [], + ); + expect(meta).toBeNull(); + }); +}); diff --git a/apps/x/packages/core/src/security/permission-metadata.ts b/apps/x/packages/core/src/security/permission-metadata.ts new file mode 100644 index 00000000..e74248bd --- /dev/null +++ b/apps/x/packages/core/src/security/permission-metadata.ts @@ -0,0 +1,143 @@ +import path from "path"; +import { z } from "zod"; +import { ToolAttachment } from "@x/shared/dist/agent.js"; +import { ToolCallPart } from "@x/shared/dist/message.js"; +import { ToolPermissionMetadata } from "@x/shared/dist/runs.js"; +import { WorkDir } from "../config/config.js"; +import { isBlocked, extractCommandNames } from "../application/lib/command-executor.js"; +import { getFileAccessAllowList, type FileAccessGrant, type FileAccessOperation } from "../config/security.js"; +import { resolveFilePathForPermission } from "../filesystem/files.js"; + +// Deterministic permission metadata for a tool call: given a tool call and its +// underlying attachment, returns what the user would be approving (a set of +// command names, or a file operation over paths) — or null when no approval is +// required (covered by the workspace boundary or an existing allow-list grant). +// +// Extracted verbatim from agents/runtime.ts so the old runtime AND the new +// agent-loop PermissionGate share one implementation. Pure except for reading +// the persistent file-access allow-list and resolving real paths on disk. + +export type ToolPermissionMetadataValue = z.infer; + +function isPathInside(parent: string, child: string): boolean { + const relative = path.relative(parent, child); + return relative === '' || (!!relative && !relative.startsWith('..') && !path.isAbsolute(relative)); +} + +function fileGrantCoversPath(grant: FileAccessGrant, operation: FileAccessOperation, resolvedPath: string): boolean { + return grant.operation === operation && isPathInside(path.resolve(grant.pathPrefix), path.resolve(resolvedPath)); +} + +function commonPathPrefix(paths: string[]): string { + if (!paths.length) return path.resolve(WorkDir); + const split = paths.map(p => path.resolve(p).split(path.sep).filter(Boolean)); + const first = split[0]; + const common: string[] = []; + for (let i = 0; i < first.length; i++) { + if (split.every(parts => parts[i] === first[i])) { + common.push(first[i]); + } else { + break; + } + } + const prefix = `${path.sep}${common.join(path.sep)}`; + return prefix === path.sep ? prefix : path.resolve(prefix); +} + +function grantPrefixForTool(toolName: string, resolvedPaths: string[]): string { + if (toolName === 'file-list' || toolName === 'file-glob' || toolName === 'file-grep' || toolName === 'file-mkdir') { + return commonPathPrefix(resolvedPaths); + } + const parentPaths = resolvedPaths.map(p => path.dirname(p)); + return commonPathPrefix(parentPaths); +} + +function filePermissionTargets(toolName: string, args: Record): { operation: FileAccessOperation; paths: string[] } | null { + const pathArg = typeof args.path === 'string' ? args.path : undefined; + switch (toolName) { + case 'file-readText': + case 'parseFile': + case 'LLMParse': + case 'file-exists': + case 'file-stat': + return pathArg ? { operation: 'read', paths: [pathArg] } : null; + case 'file-list': + return pathArg ? { operation: 'list', paths: [pathArg || '.'] } : null; + case 'file-glob': + return { operation: 'search', paths: [typeof args.cwd === 'string' && args.cwd ? args.cwd : '.'] }; + case 'file-grep': + return { operation: 'search', paths: [typeof args.searchPath === 'string' && args.searchPath ? args.searchPath : '.'] }; + case 'file-writeText': + case 'file-editText': + case 'file-mkdir': + return pathArg ? { operation: 'write', paths: [pathArg] } : null; + case 'file-copy': + case 'file-rename': { + const from = typeof args.from === 'string' ? args.from : undefined; + const to = typeof args.to === 'string' ? args.to : undefined; + return from && to ? { operation: 'write', paths: [from, to] } : null; + } + case 'file-remove': + return pathArg ? { operation: 'delete', paths: [pathArg] } : null; + default: + return null; + } +} + +export async function getToolPermissionMetadata( + toolCall: z.infer, + underlyingTool: z.infer, + sessionAllowedCommands: Set, + sessionAllowedFileAccess: FileAccessGrant[], +): Promise { + if (underlyingTool.type !== 'builtin') { + return null; + } + + if (underlyingTool.name === 'executeCommand') { + const args = toolCall.arguments; + if (!args || typeof args !== 'object' || !('command' in args)) { + return null; + } + const command = String((args as { command: unknown }).command); + if (!isBlocked(command, sessionAllowedCommands)) { + return null; + } + return { + kind: 'command', + commandNames: extractCommandNames(command), + }; + } + + const args = toolCall.arguments && typeof toolCall.arguments === 'object' + ? toolCall.arguments as Record + : {}; + const targets = filePermissionTargets(underlyingTool.name, args); + if (!targets) { + return null; + } + + const resolvedTargets = await Promise.all(targets.paths.map(p => resolveFilePathForPermission(p))); + const outsideWorkspacePaths = resolvedTargets + .filter(target => !target.isInsideWorkspace) + .map(target => target.canonicalPath); + if (!outsideWorkspacePaths.length) { + return null; + } + + const persistentGrants = getFileAccessAllowList(); + const allGrants = [...persistentGrants, ...sessionAllowedFileAccess]; + const uncovered = outsideWorkspacePaths.filter(resolvedPath => + !allGrants.some(grant => fileGrantCoversPath(grant, targets.operation, resolvedPath)) + ); + if (!uncovered.length) { + return null; + } + + return { + kind: 'file', + operation: targets.operation, + paths: uncovered, + pathPrefix: grantPrefixForTool(underlyingTool.name, uncovered), + }; +} diff --git a/apps/x/packages/core/src/sessions/in-memory-session-store.ts b/apps/x/packages/core/src/sessions/in-memory-session-store.ts index bfc3df55..769b1ee4 100644 --- a/apps/x/packages/core/src/sessions/in-memory-session-store.ts +++ b/apps/x/packages/core/src/sessions/in-memory-session-store.ts @@ -30,4 +30,8 @@ export class InMemorySessionStore implements SessionStore { } this.sessions.set(session.id, structuredClone(session)); } + + async delete(id: string): Promise { + this.sessions.delete(id); + } } diff --git a/apps/x/packages/core/src/sessions/session-store.ts b/apps/x/packages/core/src/sessions/session-store.ts index cf127e8f..64c82886 100644 --- a/apps/x/packages/core/src/sessions/session-store.ts +++ b/apps/x/packages/core/src/sessions/session-store.ts @@ -9,4 +9,5 @@ export interface SessionStore { // Most recently active first (updatedAt descending). list(filter?: { agentId?: string }): Promise[]>; update(session: z.infer): Promise; + delete(id: string): Promise; } diff --git a/apps/x/packages/core/src/sessions/sessions.test.ts b/apps/x/packages/core/src/sessions/sessions.test.ts index e0c0f01c..1b897075 100644 --- a/apps/x/packages/core/src/sessions/sessions.test.ts +++ b/apps/x/packages/core/src/sessions/sessions.test.ts @@ -79,7 +79,7 @@ class FakeModelAdapter implements ModelAdapter { class FakeToolRunner implements ToolRunner { ran: string[] = []; - definitions() { + async definitions() { return []; } @@ -129,8 +129,11 @@ function turnFixture( provider: null, model: null, permissionMode: "manual", + useCase: null, + subUseCase: null, sessionId: null, sessionSeq: null, + composeContext: null, messages: [], permissionRequests: [], permissionDecisions: [], diff --git a/apps/x/packages/core/src/sessions/sessions.ts b/apps/x/packages/core/src/sessions/sessions.ts index da9b781e..9e30b3bd 100644 --- a/apps/x/packages/core/src/sessions/sessions.ts +++ b/apps/x/packages/core/src/sessions/sessions.ts @@ -6,9 +6,14 @@ import { KeyedMutex } from "../agent-loop/mutex.js"; import type { TurnStore } from "../agent-loop/turn-store.js"; import { AgentLoopTurn, + ComposeContext, closedTranscript, deriveTurnStatus, } from "../agent-loop/types.js"; +import { + NoopUserMessageContextComposer, + type UserMessageContextComposer, +} from "./user-message-context-composer.js"; import type { SessionStore } from "./session-store.js"; import { CreateSessionInput, SendMessageOptions, Session } from "./types.js"; @@ -27,26 +32,44 @@ export interface Sessions { ): Promise; getHistory(sessionId: string): Promise>; listTurns(sessionId: string): Promise[]>; + // Permanently remove a session and all of its turns. + deleteSession(sessionId: string): Promise; } function nowIso(): string { return new Date().toISOString(); } +// Distill the compose chips from send options into the turn's composeContext — +// null when none are set, so a plain message stores nothing. +function composeContextFromOptions( + options: z.infer, +): z.infer | null { + const compose: z.infer = {}; + if (options.voiceInput !== undefined) compose.voiceInput = options.voiceInput; + if (options.voiceOutput !== undefined) compose.voiceOutput = options.voiceOutput; + if (options.searchEnabled !== undefined) compose.searchEnabled = options.searchEnabled; + if (options.codeMode !== undefined) compose.codeMode = options.codeMode; + return Object.keys(compose).length > 0 ? compose : null; +} + export class SessionsImpl implements Sessions { private sessionStore: SessionStore; private turnStore: TurnStore; private agentLoop: AgentLoop; + private userMessageContext: UserMessageContextComposer; private mutex = new KeyedMutex(); constructor(deps: { sessionStore: SessionStore; turnStore: TurnStore; agentLoop: AgentLoop; + userMessageContext?: UserMessageContextComposer; }) { this.sessionStore = deps.sessionStore; this.turnStore = deps.turnStore; this.agentLoop = deps.agentLoop; + this.userMessageContext = deps.userMessageContext ?? new NoopUserMessageContextComposer(); } async createSession( @@ -73,6 +96,16 @@ export class SessionsImpl implements Sessions { return this.sessionStore.list(filter); } + async deleteSession(sessionId: string): Promise { + // Serialize against in-flight sends for this session, then drop its + // turns before the session row so a crash mid-delete never strands a + // session pointing at half-removed turns. + await this.mutex.run(sessionId, async () => { + await this.turnStore.deleteBySession(sessionId); + await this.sessionStore.delete(sessionId); + }); + } + async sendMessage( sessionId: string, messages: z.infer, @@ -99,6 +132,15 @@ export class SessionsImpl implements Sessions { ); } } + // Attach per-message context (fresh datetime + middle pane) to the + // new user messages only — history already carries its own. Delegated + // to the injected composer (no-op by default; copilot-aware in the + // real runtime), keeping this layer agent-agnostic. + const withContext = this.userMessageContext.attach(newMessages, { + agentId: session.agentId, + middlePaneContext: parsedOptions.middlePaneContext ?? null, + }); + const composeContext = composeContextFromOptions(parsedOptions); // Bump recency BEFORE creating the turn: if this write fails, no // orphan turn is left running with its handle lost to the caller. session.updatedAt = nowIso(); @@ -110,9 +152,13 @@ export class SessionsImpl implements Sessions { ...(parsedOptions.permissionMode !== undefined ? { permissionMode: parsedOptions.permissionMode } : {}), + // Sessions are the chat surface — default attribution to chat. + useCase: parsedOptions.useCase ?? "copilot_chat", + subUseCase: parsedOptions.subUseCase ?? null, + composeContext, sessionId, sessionSeq: (latest?.sessionSeq ?? 0) + 1, - messages: [...(latest ? closedTranscript(latest) : []), ...newMessages], + messages: [...(latest ? closedTranscript(latest) : []), ...withContext], }); }); } diff --git a/apps/x/packages/core/src/sessions/sqlite-session-store.ts b/apps/x/packages/core/src/sessions/sqlite-session-store.ts index e8857eae..850a35cb 100644 --- a/apps/x/packages/core/src/sessions/sqlite-session-store.ts +++ b/apps/x/packages/core/src/sessions/sqlite-session-store.ts @@ -48,6 +48,13 @@ export class SqliteSessionStore implements SessionStore { throw new Error(`Session not found: ${id}`); } } + + async delete(id: string): Promise { + await this.db + .deleteFrom("sessions") + .where("id", "=", id) + .execute(); + } } function toRow(session: z.infer): Insertable { diff --git a/apps/x/packages/core/src/sessions/types.ts b/apps/x/packages/core/src/sessions/types.ts index 96b4c7f9..3b3d3b01 100644 --- a/apps/x/packages/core/src/sessions/types.ts +++ b/apps/x/packages/core/src/sessions/types.ts @@ -1,27 +1,3 @@ -import { z } from "zod"; -import { PermissionMode } from "../agent-loop/types.js"; - -// A session is a grouping label plus a title — an ordered chain of turns, -// linked via the turn's sessionId/sessionSeq. All configuration (provider, -// model, permission mode) flows through sendMessage at the moment it is used -// and lands on the turn row as the durable record; the session deliberately -// stores none of it. agentId is the exception: a session is a conversation -// WITH an agent, and "list sessions for agent X" is a session-level query. -export const Session = z.object({ - id: z.string(), - agentId: z.string().nullable(), - title: z.string().nullable(), - createdAt: z.string(), - updatedAt: z.string(), -}); - -export const CreateSessionInput = z.object({ - agentId: z.string().nullable().optional(), - title: z.string().nullable().optional(), -}); - -export const SendMessageOptions = z.object({ - provider: z.string().nullable().optional(), - model: z.string().nullable().optional(), - permissionMode: PermissionMode.optional(), -}); +// Session contracts now live in @x/shared so the IPC layer and renderer share +// them. This re-export keeps core's `./types.js` imports working unchanged. +export * from "@x/shared/dist/sessions.js"; diff --git a/apps/x/packages/core/src/sessions/user-message-context-composer.ts b/apps/x/packages/core/src/sessions/user-message-context-composer.ts new file mode 100644 index 00000000..4ac6a5e3 --- /dev/null +++ b/apps/x/packages/core/src/sessions/user-message-context-composer.ts @@ -0,0 +1,24 @@ +import { z } from "zod"; +import { MessageList, MiddlePaneContext } from "@x/shared/dist/message.js"; + +// Attaches per-message context (fresh datetime, middle pane) to the new user +// messages of a send. Injected into Sessions so the generic session layer stays +// agent-agnostic and deterministic; the real (copilot) implementation lives in +// agent-runtime, symmetric with the loop's SystemComposer. +export interface UserMessageContextComposer { + attach( + messages: z.infer, + ctx: { + agentId: string | null; + middlePaneContext: z.infer | null; + }, + ): z.infer; +} + +// Default: attaches nothing. Keeps Sessions usable (and unit tests +// deterministic) without a composer. +export class NoopUserMessageContextComposer implements UserMessageContextComposer { + attach(messages: z.infer): z.infer { + return messages; + } +} diff --git a/apps/x/packages/core/src/storage/migrations.ts b/apps/x/packages/core/src/storage/migrations.ts index e605b516..e88cbaa7 100644 --- a/apps/x/packages/core/src/storage/migrations.ts +++ b/apps/x/packages/core/src/storage/migrations.ts @@ -125,6 +125,37 @@ const migrations: Record = { await db.schema.alterTable("agent_loop_turns").dropColumn("prefix_length").execute(); }, }, + "2026-06-14_0006_turn_compose_context": { + async up(db: MigrationDb): Promise { + // Per-turn compose chips (voice / search / code-mode) as JSON, or + // null when the turn had none. Existing rows default to null. + await db.schema + .alterTable("agent_loop_turns") + .addColumn("compose_context", "text") + .execute(); + }, + async down(db: MigrationDb): Promise { + await db.schema.alterTable("agent_loop_turns").dropColumn("compose_context").execute(); + }, + }, + "2026-06-14_0007_turn_use_case": { + async up(db: MigrationDb): Promise { + // Analytics attribution (use case / sub use case) for the turn's LLM + // usage. Existing rows default to null (untagged). + await db.schema + .alterTable("agent_loop_turns") + .addColumn("use_case", "text") + .execute(); + await db.schema + .alterTable("agent_loop_turns") + .addColumn("sub_use_case", "text") + .execute(); + }, + async down(db: MigrationDb): Promise { + await db.schema.alterTable("agent_loop_turns").dropColumn("sub_use_case").execute(); + await db.schema.alterTable("agent_loop_turns").dropColumn("use_case").execute(); + }, + }, }; class InCodeMigrationProvider implements MigrationProvider { diff --git a/apps/x/packages/core/src/storage/schema.ts b/apps/x/packages/core/src/storage/schema.ts index dfc53c11..1e8879e5 100644 --- a/apps/x/packages/core/src/storage/schema.ts +++ b/apps/x/packages/core/src/storage/schema.ts @@ -14,8 +14,11 @@ export interface AgentLoopTurnsTable { provider: string | null; model: string | null; permission_mode: string; + use_case: string | null; + sub_use_case: string | null; session_id: string | null; session_seq: number | null; + compose_context: string | null; // JSON: ComposeContext | null messages: string; // JSON: MessageList (delta past prefix_length) prefix_length: number; // copy-forward prefix deduped at rest; 0 = stored whole permission_requests: string; // JSON: PermissionRequest[] diff --git a/apps/x/packages/shared/src/agent-turn.ts b/apps/x/packages/shared/src/agent-turn.ts new file mode 100644 index 00000000..b86e52ef --- /dev/null +++ b/apps/x/packages/shared/src/agent-turn.ts @@ -0,0 +1,305 @@ +import { z } from "zod"; +import { + AssistantMessage, + CodeMode, + Message, + MessageList, + ToolCallPart, + VoiceOutputMode, +} from "./message.js"; + +// ─── Persisted fact schemas ───────────────────────────────────────────────── +// +// A turn is five append-only fact logs + set-once scalars. Records are never +// mutated or deleted; every field records exactly one non-derivable fact. +// Everything else (status, per-call lifecycle) is derived. +// +// This is the cross-boundary contract for the new runtime (like runs.ts was for +// the old one): core persists/derives it, the IPC layer ships it, the renderer +// renders it. Pure (zod + message schemas) so it is safe to import in the +// browser-side renderer. + +export const PermissionRequest = z.object({ + toolCallId: z.string(), + // What the user is approving (file access, command, ...). Computed from + // tool args by the PermissionGate, so it must be persisted to pin down + // exactly what was asked. + request: z.unknown(), + requestedAt: z.string(), +}); + +export const PermissionDecision = z.discriminatedUnion("decidedBy", [ + z.object({ + toolCallId: z.string(), + decidedBy: z.literal("user"), + decision: z.enum(["granted", "denied"]), + reason: z.string().nullable(), + decidedAt: z.string(), + }), + z.object({ + toolCallId: z.string(), + decidedBy: z.literal("classifier"), + decision: z.enum(["granted", "denied", "abstained"]), + reason: z.string(), + decidedAt: z.string(), + }), +]); + +export const StartedTool = z.object({ + toolCallId: z.string(), + startedAt: z.string(), +}); + +export const DispatchedTool = z.object({ + toolCallId: z.string(), + dispatchedAt: z.string(), +}); + +// One entry per model call. Token counts are as reported by the provider — +// null when the provider did not report that field. Aggregate via totalUsage. +export const ModelUsage = z.object({ + inputTokens: z.number().nullable(), + outputTokens: z.number().nullable(), + totalTokens: z.number().nullable(), + reasoningTokens: z.number().nullable(), + cachedInputTokens: z.number().nullable(), + at: z.string(), +}); + +export const AgentLoopError = z.object({ + message: z.string(), + code: z.string().optional(), + details: z.unknown().optional(), + at: z.string(), +}); + +export const PermissionMode = z.enum(["manual", "auto"]); + +// Per-turn compose chips that shape the system prompt and tool routing. +// Middle-pane context is NOT here — it rides on the user message +// (UserMessage.userMessageContext), captured fresh at send time. +export const ComposeContext = z.object({ + voiceInput: z.boolean().optional(), + voiceOutput: VoiceOutputMode.optional(), + searchEnabled: z.boolean().optional(), + codeMode: CodeMode.optional(), +}); + +export const AgentLoopTurn = z.object({ + id: z.string(), + agentId: z.string().nullable(), + provider: z.string().nullable(), + model: z.string().nullable(), + permissionMode: PermissionMode, + + // Analytics attribution for this turn's LLM usage (the PostHog `llm_usage` + // event). Opaque strings here — values come from core's UseCase taxonomy + // (e.g. "copilot_chat", "live_note_agent", "knowledge_sync"). null when the + // turn isn't attributed. Also installed into the async-local use-case context + // so nested LLM calls (permission classifier, builtin tools) inherit it. + useCase: z.string().nullable(), + subUseCase: z.string().nullable(), + + // Session linkage — opaque to the loop (the sessions layer owns the + // meaning). seq is the turn's 1-based position within its session. + sessionId: z.string().nullable(), + sessionSeq: z.number().int().positive().nullable(), + + // Per-turn compose chips (voice / search / code-mode); null when none. + // Read by the SystemComposer and (codeMode) the tool runner. + composeContext: ComposeContext.nullable(), + + // append-only fact logs + messages: MessageList, + permissionRequests: z.array(PermissionRequest), + permissionDecisions: z.array(PermissionDecision), + startedTools: z.array(StartedTool), + dispatchedTools: z.array(DispatchedTool), + modelUsage: z.array(ModelUsage), + + // set-once scalars + error: AgentLoopError.nullable(), + completedAt: z.string().nullable(), + + createdAt: z.string(), + updatedAt: z.string(), +}); + +export const AgentLoopInput = z.object({ + agentId: z.string().nullable().optional(), + provider: z.string().nullable().optional(), + model: z.string().nullable().optional(), + permissionMode: PermissionMode.optional(), + useCase: z.string().nullable().optional(), + subUseCase: z.string().nullable().optional(), + sessionId: z.string().nullable().optional(), + sessionSeq: z.number().int().positive().nullable().optional(), + composeContext: ComposeContext.nullable().optional(), + // May include prior-conversation history; turns are self-contained by design. + messages: MessageList.min(1), +}).refine( + (input) => (input.sessionId == null) === (input.sessionSeq == null), + { message: "sessionId and sessionSeq must be set together" }, +); + +// ─── Tool definitions (environment, not turn state) ──────────────────────── + +export type ToolDefinition = { + name: string; + description?: string; + // JSON Schema for the tool input + inputSchema?: unknown; +}; + +// ─── Live (never persisted) event types ───────────────────────────────────── + +export type ModelStreamEvent = + | { type: "text-delta"; delta: string } + | { type: "reasoning-delta"; delta: string } + | { type: "tool-call"; toolCall: z.infer } + | { type: "finish"; message: z.infer } + | { type: "error"; error: unknown }; + +export type TurnEvent = + | ModelStreamEvent + | { type: "tool-execution-start"; toolCallId: string } + // Incremental output streamed by a tool while it runs (e.g. command stdout, + // code-agent progress). Live-only and never persisted — the final result is + // recorded as a ToolMessage; this is purely for the UI to watch in real time. + | { type: "tool-output"; toolCallId: string; chunk: string } + | { type: "tool-result"; toolCallId: string } + | { type: "permission-requested"; toolCallId: string }; + +// ─── Derived state ────────────────────────────────────────────────────────── + +export type TurnStatus = "waiting" | "completed" | "error" | "idle"; + +export type ToolCallState = + | "resolved" // matching ToolMessage exists — terminal + | "dispatched" // delegated; result arrives via setToolResult + | "interrupted" // started but never resolved nor dispatched (crash/abort) + | "needs-classifier" // open request, auto mode, classifier has not spoken + | "awaiting-user" // open request, waiting on a user decision + | "cleared" // terminal `granted` decision; ready to execute + | "unevaluated"; // no facts yet; permission gate has not been consulted + +export function toolCallParts( + turn: z.infer, +): z.infer[] { + const parts: z.infer[] = []; + for (const msg of turn.messages) { + if (msg.role !== "assistant" || typeof msg.content === "string") continue; + for (const part of msg.content) { + if (part.type === "tool-call") parts.push(part); + } + } + return parts; +} + +export function resolvedToolCallIds(turn: z.infer): Set { + const ids = new Set(); + for (const msg of turn.messages) { + if (msg.role === "tool") ids.add(msg.toolCallId); + } + return ids; +} + +export function unresolvedToolCalls( + turn: z.infer, +): z.infer[] { + const resolved = resolvedToolCallIds(turn); + return toolCallParts(turn).filter((part) => !resolved.has(part.toolCallId)); +} + +export function deriveToolCallState( + turn: z.infer, + toolCallId: string, +): ToolCallState { + if (resolvedToolCallIds(turn).has(toolCallId)) return "resolved"; + if (turn.dispatchedTools.some((t) => t.toolCallId === toolCallId)) return "dispatched"; + if (turn.startedTools.some((t) => t.toolCallId === toolCallId)) return "interrupted"; + + const request = turn.permissionRequests.find((r) => r.toolCallId === toolCallId); + if (request) { + const decisions = turn.permissionDecisions.filter((d) => d.toolCallId === toolCallId); + const terminal = decisions.find((d) => d.decision === "granted" || d.decision === "denied"); + if (terminal) { + // A denied call always has its denial ToolMessage appended atomically + // with the decision, so an unresolved terminal decision should be + // `granted` — but check explicitly: an unpaired denial (a buggy + // future writer) must never derive as executable. It falls back to + // awaiting-user, which self-heals via a fresh decision. + return terminal.decision === "granted" ? "cleared" : "awaiting-user"; + } + if (turn.permissionMode === "auto" && !decisions.some((d) => d.decidedBy === "classifier")) { + return "needs-classifier"; + } + return "awaiting-user"; + } + + return "unevaluated"; +} + +export function deriveTurnStatus(turn: z.infer): TurnStatus { + if (turn.error !== null) return "error"; + if (turn.completedAt !== null) return "completed"; + for (const call of unresolvedToolCalls(turn)) { + const state = deriveToolCallState(turn, call.toolCallId); + if (state === "awaiting-user" || state === "dispatched") return "waiting"; + } + return "idle"; +} + +// The transcript as a successor turn would see it: a terminal turn's dangling +// tool calls are closed out with synthetic ToolMessages so a follow-up never +// re-executes — or hangs on — stale calls. Pure and deterministic over an +// immutable (terminal) turn, which is what lets the sessions layer build the +// next turn's input from it AND lets stores reproduce it byte-for-byte. +export function closedTranscript( + turn: z.infer, +): z.infer[] { + const messages = [...turn.messages]; + for (const call of unresolvedToolCalls(turn)) { + messages.push({ + role: "tool", + content: closureContent(deriveToolCallState(turn, call.toolCallId)), + toolCallId: call.toolCallId, + toolName: call.toolName, + }); + } + return messages; +} + +// Honest per-state wording for a dangling call: how far did it actually get? +function closureContent(state: ToolCallState): string { + switch (state) { + case "interrupted": + // execution began in-process; the side effect may have landed + return "Tool execution was interrupted before completing. It may or may not have taken effect; do not assume it ran."; + case "dispatched": + // delegated to an external runner; it may still finish out there + return "Tool was dispatched but its result never arrived; it may have completed externally. Do not assume it ran or that it failed."; + default: + // never reached execution (unevaluated / awaiting permission / cleared-but-not-started) + return "Tool was not executed: the turn was stopped before this call ran."; + } +} + +// Sum of all model calls in the turn. A field is null only if no call +// reported it; otherwise unreported entries count as 0 toward the sum. +export function totalUsage( + turn: z.infer, +): Omit, "at"> { + const sum = (field: "inputTokens" | "outputTokens" | "totalTokens" | "reasoningTokens" | "cachedInputTokens") => { + const reported = turn.modelUsage.map((u) => u[field]).filter((v) => v !== null); + if (reported.length === 0) return null; + return reported.reduce((a, b) => a + b, 0); + }; + return { + inputTokens: sum("inputTokens"), + outputTokens: sum("outputTokens"), + totalTokens: sum("totalTokens"), + reasoningTokens: sum("reasoningTokens"), + cachedInputTokens: sum("cachedInputTokens"), + }; +} diff --git a/apps/x/packages/shared/src/message.ts b/apps/x/packages/shared/src/message.ts index cdf5d983..6e32ba5e 100644 --- a/apps/x/packages/shared/src/message.ts +++ b/apps/x/packages/shared/src/message.ts @@ -69,6 +69,17 @@ export const UserMessageContext = z.object({ ]).optional(), }); +// What the user had open in the middle pane when a message was sent. Captured +// per message and surfaced to the agent via UserMessageContext.middlePane. +export const MiddlePaneContext = z.discriminatedUnion("kind", [ + z.object({ kind: z.literal("note"), path: z.string(), content: z.string() }), + z.object({ kind: z.literal("browser"), url: z.string(), title: z.string() }), +]); + +// Per-message compose chips (voice transcription / read-aloud, coding agent). +export const VoiceOutputMode = z.enum(["summary", "full"]); +export const CodeMode = z.enum(["claude", "codex"]); + export const UserMessage = z.object({ role: z.literal("user"), content: UserMessageContent, diff --git a/apps/x/packages/shared/src/sessions.ts b/apps/x/packages/shared/src/sessions.ts new file mode 100644 index 00000000..82a12f79 --- /dev/null +++ b/apps/x/packages/shared/src/sessions.ts @@ -0,0 +1,47 @@ +import { z } from "zod"; +import { CodeMode, MiddlePaneContext, VoiceOutputMode } from "./message.js"; +import { AgentLoopTurn, PermissionMode, type TurnEvent } from "./agent-turn.js"; + +// A session is a grouping label plus a title — an ordered chain of turns, +// linked via the turn's sessionId/sessionSeq. All configuration (provider, +// model, permission mode) flows through sendMessage at the moment it is used +// and lands on the turn row as the durable record; the session deliberately +// stores none of it. agentId is the exception: a session is a conversation +// WITH an agent, and "list sessions for agent X" is a session-level query. +export const Session = z.object({ + id: z.string(), + agentId: z.string().nullable(), + title: z.string().nullable(), + createdAt: z.string(), + updatedAt: z.string(), +}); + +export const CreateSessionInput = z.object({ + agentId: z.string().nullable().optional(), + title: z.string().nullable().optional(), +}); + +export const SendMessageOptions = z.object({ + provider: z.string().nullable().optional(), + model: z.string().nullable().optional(), + permissionMode: PermissionMode.optional(), + // Analytics attribution. Defaults to "copilot_chat" in the sessions layer + // (sessions are the chat surface) when omitted. + useCase: z.string().optional(), + subUseCase: z.string().optional(), + // Per-message compose chips. voice/search/codeMode shape the turn's system + // prompt (stored as the turn's composeContext); middlePaneContext + a fresh + // datetime ride on the user message itself. + voiceInput: z.boolean().optional(), + voiceOutput: VoiceOutputMode.optional(), + searchEnabled: z.boolean().optional(), + codeMode: CodeMode.optional(), + middlePaneContext: MiddlePaneContext.optional(), +}); + +// What the renderer's single feed consumer receives: live deltas (`event`) and +// committed state snapshots (`state`). Both carry turnId + sessionId so the +// useAgentTurn / useAgentSession hooks can filter. +export type SessionBusEvent = + | { kind: "event"; turnId: string; sessionId: string | null; event: TurnEvent } + | { kind: "state"; turnId: string; sessionId: string | null; turn: z.infer };