diff --git a/apps/x/packages/core/src/agents/runtime.ts b/apps/x/packages/core/src/agents/runtime.ts index 014f67fc..b74151fa 100644 --- a/apps/x/packages/core/src/agents/runtime.ts +++ b/apps/x/packages/core/src/agents/runtime.ts @@ -1068,35 +1068,34 @@ export async function* streamAgent({ // Local-provider prioritization: background agents yield to active chat if (isBackgroundAgent) { await waitIfChatActive(providerFlavor, signal); - } else { - markChatActive(); } try { - for await (const event of streamLlm( - model, - state.messages, - instructionsWithDateTime, - tools, - signal, - )) { - messageBuilder.ingest(event); - yield* processEvent({ - runId, - type: "llm-stream-event", - event: event, - subflow: [], - }); - if (event.type === "error") { - streamError = event.error; + if (!isBackgroundAgent) markChatActive(); + for await (const event of streamLlm( + model, + state.messages, + instructionsWithDateTime, + tools, + signal, + )) { + messageBuilder.ingest(event); yield* processEvent({ runId, - type: "error", - error: streamError, + type: "llm-stream-event", + event: event, subflow: [], }); - break; + if (event.type === "error") { + streamError = event.error; + yield* processEvent({ + runId, + type: "error", + error: streamError, + subflow: [], + }); + break; + } } - } } finally { if (!isBackgroundAgent) { markChatIdle(); diff --git a/apps/x/packages/core/src/models/llm-queue.ts b/apps/x/packages/core/src/models/llm-queue.ts index 2b48a128..805ce339 100644 --- a/apps/x/packages/core/src/models/llm-queue.ts +++ b/apps/x/packages/core/src/models/llm-queue.ts @@ -9,23 +9,24 @@ * Cloud providers bypass this entirely — they handle concurrency fine. */ +import type { z } from "zod"; +import type { LlmProvider } from "@x/shared/dist/models.js"; + +type ProviderFlavor = z.infer["flavor"]; + let chatActiveCount = 0; let chatIdleResolvers: Array<() => void> = []; -/** - * Call when an interactive chat LLM stream starts. - * Nestable — supports concurrent interactive streams. - */ export function markChatActive(): void { chatActiveCount++; } -/** - * Call when an interactive chat LLM stream ends. - * When all interactive streams finish, waiting background tasks resume. - */ export function markChatIdle(): void { - chatActiveCount = Math.max(0, chatActiveCount - 1); + if (chatActiveCount <= 0) { + console.warn("[llm-queue] markChatIdle called with no active chat — possible mismatched calls"); + return; + } + chatActiveCount--; if (chatActiveCount === 0) { const resolvers = chatIdleResolvers; chatIdleResolvers = []; @@ -35,23 +36,16 @@ export function markChatIdle(): void { } } -/** - * Returns true if the provider flavor represents a local inference server. - */ -export function isLocalProvider(flavor: string): boolean { +export function isLocalProvider(flavor: ProviderFlavor): boolean { return flavor === "ollama" || flavor === "openai-compatible"; } /** * Background services call this before each LLM request. - * - If the provider is cloud-based, returns immediately. - * - If the provider is local and no chat is active, returns immediately. - * - If the provider is local and chat IS active, waits until chat finishes. - * - * @param providerFlavor - The provider flavor string (e.g. "ollama", "openai", "anthropic") - * @param signal - Optional AbortSignal to cancel the wait + * Returns immediately for cloud providers or when no chat is active. + * For local providers with active chat, waits until chat finishes. */ -export function waitIfChatActive(providerFlavor: string, signal?: AbortSignal): Promise { +export function waitIfChatActive(providerFlavor: ProviderFlavor, signal?: AbortSignal): Promise { if (!isLocalProvider(providerFlavor)) { return Promise.resolve(); } diff --git a/apps/x/packages/core/src/models/models.ts b/apps/x/packages/core/src/models/models.ts index 38b6801f..71202f5f 100644 --- a/apps/x/packages/core/src/models/models.ts +++ b/apps/x/packages/core/src/models/models.ts @@ -7,6 +7,7 @@ import { createOllama } from "ollama-ai-provider-v2"; import { createOpenRouter } from '@openrouter/ai-sdk-provider'; import { createOpenAICompatible } from '@ai-sdk/openai-compatible'; import { LlmModelConfig, LlmProvider } from "@x/shared/dist/models.js"; +import { isLocalProvider } from "./llm-queue.js"; import z from "zod"; import { isSignedIn } from "../account/account.js"; import { getGatewayProvider } from "./gateway.js"; @@ -75,8 +76,7 @@ export async function testModelConnection( model: string, timeoutMs?: number, ): Promise<{ success: boolean; error?: string }> { - const isLocal = providerConfig.flavor === "ollama" || providerConfig.flavor === "openai-compatible"; - const effectiveTimeout = timeoutMs ?? (isLocal ? 60000 : 8000); + const effectiveTimeout = timeoutMs ?? (isLocalProvider(providerConfig.flavor) ? 60000 : 8000); const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), effectiveTimeout); try {