diff --git a/apps/x/packages/core/src/agents/runtime.ts b/apps/x/packages/core/src/agents/runtime.ts
index 014f67fc..b74151fa 100644
--- a/apps/x/packages/core/src/agents/runtime.ts
+++ b/apps/x/packages/core/src/agents/runtime.ts
@@ -1068,35 +1068,34 @@ export async function* streamAgent({
         // Local-provider prioritization: background agents yield to active chat
         if (isBackgroundAgent) {
             await waitIfChatActive(providerFlavor, signal);
-        } else {
-            markChatActive();
         }
         try {
-        for await (const event of streamLlm(
-            model,
-            state.messages,
-            instructionsWithDateTime,
-            tools,
-            signal,
-        )) {
-            messageBuilder.ingest(event);
-            yield* processEvent({
-                runId,
-                type: "llm-stream-event",
-                event: event,
-                subflow: [],
-            });
-            if (event.type === "error") {
-                streamError = event.error;
+            if (!isBackgroundAgent) markChatActive();
+            for await (const event of streamLlm(
+                model,
+                state.messages,
+                instructionsWithDateTime,
+                tools,
+                signal,
+            )) {
+                messageBuilder.ingest(event);
                 yield* processEvent({
                     runId,
-                    type: "error",
-                    error: streamError,
+                    type: "llm-stream-event",
+                    event: event,
                     subflow: [],
                 });
-                break;
+                if (event.type === "error") {
+                    streamError = event.error;
+                    yield* processEvent({
+                        runId,
+                        type: "error",
+                        error: streamError,
+                        subflow: [],
+                    });
+                    break;
+                }
             }
-        }
         } finally {
             if (!isBackgroundAgent) {
                 markChatIdle();
diff --git a/apps/x/packages/core/src/models/llm-queue.ts b/apps/x/packages/core/src/models/llm-queue.ts
index 2b48a128..805ce339 100644
--- a/apps/x/packages/core/src/models/llm-queue.ts
+++ b/apps/x/packages/core/src/models/llm-queue.ts
@@ -9,23 +9,24 @@
  * Cloud providers bypass this entirely — they handle concurrency fine.
  */
 
+import type { z } from "zod";
+import type { LlmProvider } from "@x/shared/dist/models.js";
+
+type ProviderFlavor = z.infer<typeof LlmProvider>["flavor"];
+
 let chatActiveCount = 0;
 let chatIdleResolvers: Array<() => void> = [];
 
-/**
- * Call when an interactive chat LLM stream starts.
- * Nestable — supports concurrent interactive streams.
- */
 export function markChatActive(): void {
     chatActiveCount++;
 }
 
-/**
- * Call when an interactive chat LLM stream ends.
- * When all interactive streams finish, waiting background tasks resume.
- */
 export function markChatIdle(): void {
-    chatActiveCount = Math.max(0, chatActiveCount - 1);
+    if (chatActiveCount <= 0) {
+        console.warn("[llm-queue] markChatIdle called with no active chat — possible mismatched calls");
+        return;
+    }
+    chatActiveCount--;
     if (chatActiveCount === 0) {
         const resolvers = chatIdleResolvers;
         chatIdleResolvers = [];
@@ -35,23 +36,16 @@ export function markChatIdle(): void {
     }
 }
 
-/**
- * Returns true if the provider flavor represents a local inference server.
- */
-export function isLocalProvider(flavor: string): boolean {
+export function isLocalProvider(flavor: ProviderFlavor): boolean {
     return flavor === "ollama" || flavor === "openai-compatible";
 }
 
 /**
  * Background services call this before each LLM request.
- * - If the provider is cloud-based, returns immediately.
- * - If the provider is local and no chat is active, returns immediately.
- * - If the provider is local and chat IS active, waits until chat finishes.
- *
- * @param providerFlavor - The provider flavor string (e.g. "ollama", "openai", "anthropic")
- * @param signal - Optional AbortSignal to cancel the wait
+ * Returns immediately for cloud providers or when no chat is active.
+ * For local providers with active chat, waits until chat finishes.
  */
-export function waitIfChatActive(providerFlavor: string, signal?: AbortSignal): Promise<void> {
+export function waitIfChatActive(providerFlavor: ProviderFlavor, signal?: AbortSignal): Promise<void> {
     if (!isLocalProvider(providerFlavor)) {
         return Promise.resolve();
     }
diff --git a/apps/x/packages/core/src/models/models.ts b/apps/x/packages/core/src/models/models.ts
index 38b6801f..71202f5f 100644
--- a/apps/x/packages/core/src/models/models.ts
+++ b/apps/x/packages/core/src/models/models.ts
@@ -7,6 +7,7 @@ import { createOllama } from "ollama-ai-provider-v2";
 import { createOpenRouter } from '@openrouter/ai-sdk-provider';
 import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
 import { LlmModelConfig, LlmProvider } from "@x/shared/dist/models.js";
+import { isLocalProvider } from "./llm-queue.js";
 import z from "zod";
 import { isSignedIn } from "../account/account.js";
 import { getGatewayProvider } from "./gateway.js";
@@ -75,8 +76,7 @@ export async function testModelConnection(
     model: string,
     timeoutMs?: number,
 ): Promise<{ success: boolean; error?: string }> {
-    const isLocal = providerConfig.flavor === "ollama" || providerConfig.flavor === "openai-compatible";
-    const effectiveTimeout = timeoutMs ?? (isLocal ? 60000 : 8000);
+    const effectiveTimeout = timeoutMs ?? (isLocalProvider(providerConfig.flavor) ? 60000 : 8000);
     const controller = new AbortController();
     const timeout = setTimeout(() => controller.abort(), effectiveTimeout);
     try {