feat(chat_deepagent): optimize agent creation by offloading middleware stack compilation to a separate thread

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-04-28 05:03:52 -07:00
parent 05ca4c0b9f
commit 9ec9b64348

View file

@ -367,7 +367,74 @@ async def create_surfsense_deep_agent(
"[create_agent] System prompt built in %.3fs", time.perf_counter() - _t0 "[create_agent] System prompt built in %.3fs", time.perf_counter() - _t0
) )
# -- Build the middleware stack (mirrors create_deep_agent internals) ------ # Combine system_prompt with BASE_AGENT_PROMPT (same as create_deep_agent)
final_system_prompt = system_prompt + "\n\n" + BASE_AGENT_PROMPT
# The middleware stack — and especially ``SubAgentMiddleware`` — is *not*
# cheap to build. ``SubAgentMiddleware.__init__`` calls ``create_agent``
# synchronously to compile the general-purpose subagent's full state graph
# (every tool + every middleware → pydantic schemas + langgraph compile).
# On gpt-5.x agents that's roughly 1.52s of pure CPU work. If we run it
# directly here it blocks the asyncio event loop for the whole streaming
# task (and any other coroutine sharing this loop), which is why
# "agent creation" wall-clock time used to stretch to ~34s. Move the
# entire middleware build + main-graph compile into a single
# ``asyncio.to_thread`` so the heavy CPU work runs off-loop and the
# event loop stays responsive.
_t0 = time.perf_counter()
agent = await asyncio.to_thread(
_build_compiled_agent_blocking,
llm=llm,
tools=tools,
final_system_prompt=final_system_prompt,
backend_resolver=backend_resolver,
filesystem_mode=filesystem_selection.mode,
search_space_id=search_space_id,
user_id=user_id,
thread_id=thread_id,
visibility=visibility,
anon_session_id=anon_session_id,
available_connectors=available_connectors,
available_document_types=available_document_types,
mentioned_document_ids=mentioned_document_ids,
checkpointer=checkpointer,
)
_perf_log.info(
"[create_agent] Middleware stack + graph compiled in %.3fs",
time.perf_counter() - _t0,
)
_perf_log.info(
"[create_agent] Total agent creation in %.3fs",
time.perf_counter() - _t_agent_total,
)
return agent
def _build_compiled_agent_blocking(
*,
llm: BaseChatModel,
tools: Sequence[BaseTool],
final_system_prompt: str,
backend_resolver: Any,
filesystem_mode: FilesystemMode,
search_space_id: int,
user_id: str | None,
thread_id: int | None,
visibility: ChatVisibility,
anon_session_id: str | None,
available_connectors: list[str] | None,
available_document_types: list[str] | None,
mentioned_document_ids: list[int] | None,
checkpointer: Checkpointer,
):
"""Build the middleware stack and compile the agent graph synchronously.
Runs in a worker thread (see ``asyncio.to_thread`` call site) so the heavy
CPU work most notably ``SubAgentMiddleware.__init__`` eagerly calling
``create_agent`` to compile the general-purpose subagent does not block
the event loop.
"""
_memory_middleware = MemoryInjectionMiddleware( _memory_middleware = MemoryInjectionMiddleware(
user_id=user_id, user_id=user_id,
search_space_id=search_space_id, search_space_id=search_space_id,
@ -386,7 +453,7 @@ async def create_surfsense_deep_agent(
FileIntentMiddleware(llm=llm), FileIntentMiddleware(llm=llm),
SurfSenseFilesystemMiddleware( SurfSenseFilesystemMiddleware(
backend=backend_resolver, backend=backend_resolver,
filesystem_mode=filesystem_selection.mode, filesystem_mode=filesystem_mode,
search_space_id=search_space_id, search_space_id=search_space_id,
created_by_id=user_id, created_by_id=user_id,
thread_id=thread_id, thread_id=thread_id,
@ -415,19 +482,19 @@ async def create_surfsense_deep_agent(
AnonymousDocumentMiddleware( AnonymousDocumentMiddleware(
anon_session_id=anon_session_id, anon_session_id=anon_session_id,
) )
if filesystem_selection.mode == FilesystemMode.CLOUD if filesystem_mode == FilesystemMode.CLOUD
else None, else None,
KnowledgeTreeMiddleware( KnowledgeTreeMiddleware(
search_space_id=search_space_id, search_space_id=search_space_id,
filesystem_mode=filesystem_selection.mode, filesystem_mode=filesystem_mode,
llm=llm, llm=llm,
) )
if filesystem_selection.mode == FilesystemMode.CLOUD if filesystem_mode == FilesystemMode.CLOUD
else None, else None,
KnowledgePriorityMiddleware( KnowledgePriorityMiddleware(
llm=llm, llm=llm,
search_space_id=search_space_id, search_space_id=search_space_id,
filesystem_mode=filesystem_selection.mode, filesystem_mode=filesystem_mode,
available_connectors=available_connectors, available_connectors=available_connectors,
available_document_types=available_document_types, available_document_types=available_document_types,
mentioned_document_ids=mentioned_document_ids, mentioned_document_ids=mentioned_document_ids,
@ -435,7 +502,7 @@ async def create_surfsense_deep_agent(
FileIntentMiddleware(llm=llm), FileIntentMiddleware(llm=llm),
SurfSenseFilesystemMiddleware( SurfSenseFilesystemMiddleware(
backend=backend_resolver, backend=backend_resolver,
filesystem_mode=filesystem_selection.mode, filesystem_mode=filesystem_mode,
search_space_id=search_space_id, search_space_id=search_space_id,
created_by_id=user_id, created_by_id=user_id,
thread_id=thread_id, thread_id=thread_id,
@ -443,32 +510,27 @@ async def create_surfsense_deep_agent(
KnowledgeBasePersistenceMiddleware( KnowledgeBasePersistenceMiddleware(
search_space_id=search_space_id, search_space_id=search_space_id,
created_by_id=user_id, created_by_id=user_id,
filesystem_mode=filesystem_selection.mode, filesystem_mode=filesystem_mode,
) )
if filesystem_selection.mode == FilesystemMode.CLOUD if filesystem_mode == FilesystemMode.CLOUD
else None, else None,
SubAgentMiddleware(backend=StateBackend, subagents=[general_purpose_spec]), SubAgentMiddleware(backend=StateBackend, subagents=[general_purpose_spec]),
create_safe_summarization_middleware(llm, StateBackend), create_safe_summarization_middleware(llm, StateBackend),
PatchToolCallsMiddleware(), PatchToolCallsMiddleware(),
DedupHITLToolCallsMiddleware(agent_tools=tools), DedupHITLToolCallsMiddleware(agent_tools=list(tools)),
AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"), AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
] ]
deepagent_middleware = [m for m in deepagent_middleware if m is not None] deepagent_middleware = [m for m in deepagent_middleware if m is not None]
# Combine system_prompt with BASE_AGENT_PROMPT (same as create_deep_agent) agent = create_agent(
final_system_prompt = system_prompt + "\n\n" + BASE_AGENT_PROMPT
_t0 = time.perf_counter()
agent = await asyncio.to_thread(
create_agent,
llm, llm,
system_prompt=final_system_prompt, system_prompt=final_system_prompt,
tools=tools, tools=list(tools),
middleware=deepagent_middleware, middleware=deepagent_middleware,
context_schema=SurfSenseContextSchema, context_schema=SurfSenseContextSchema,
checkpointer=checkpointer, checkpointer=checkpointer,
) )
agent = agent.with_config( return agent.with_config(
{ {
"recursion_limit": 10_000, "recursion_limit": 10_000,
"metadata": { "metadata": {
@ -477,13 +539,3 @@ async def create_surfsense_deep_agent(
}, },
} }
) )
_perf_log.info(
"[create_agent] Graph compiled (create_agent) in %.3fs",
time.perf_counter() - _t0,
)
_perf_log.info(
"[create_agent] Total agent creation in %.3fs",
time.perf_counter() - _t_agent_total,
)
return agent