mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-25 19:15:18 +02:00
feat(chat_deepagent): optimize agent creation by offloading middleware stack compilation to a separate thread
This commit is contained in:
parent
05ca4c0b9f
commit
9ec9b64348
1 changed files with 80 additions and 28 deletions
|
|
@ -367,7 +367,74 @@ async def create_surfsense_deep_agent(
|
||||||
"[create_agent] System prompt built in %.3fs", time.perf_counter() - _t0
|
"[create_agent] System prompt built in %.3fs", time.perf_counter() - _t0
|
||||||
)
|
)
|
||||||
|
|
||||||
# -- Build the middleware stack (mirrors create_deep_agent internals) ------
|
# Combine system_prompt with BASE_AGENT_PROMPT (same as create_deep_agent)
|
||||||
|
final_system_prompt = system_prompt + "\n\n" + BASE_AGENT_PROMPT
|
||||||
|
|
||||||
|
# The middleware stack — and especially ``SubAgentMiddleware`` — is *not*
|
||||||
|
# cheap to build. ``SubAgentMiddleware.__init__`` calls ``create_agent``
|
||||||
|
# synchronously to compile the general-purpose subagent's full state graph
|
||||||
|
# (every tool + every middleware → pydantic schemas + langgraph compile).
|
||||||
|
# On gpt-5.x agents that's roughly 1.5–2s of pure CPU work. If we run it
|
||||||
|
# directly here it blocks the asyncio event loop for the whole streaming
|
||||||
|
# task (and any other coroutine sharing this loop), which is why
|
||||||
|
# "agent creation" wall-clock time used to stretch to ~3–4s. Move the
|
||||||
|
# entire middleware build + main-graph compile into a single
|
||||||
|
# ``asyncio.to_thread`` so the heavy CPU work runs off-loop and the
|
||||||
|
# event loop stays responsive.
|
||||||
|
_t0 = time.perf_counter()
|
||||||
|
agent = await asyncio.to_thread(
|
||||||
|
_build_compiled_agent_blocking,
|
||||||
|
llm=llm,
|
||||||
|
tools=tools,
|
||||||
|
final_system_prompt=final_system_prompt,
|
||||||
|
backend_resolver=backend_resolver,
|
||||||
|
filesystem_mode=filesystem_selection.mode,
|
||||||
|
search_space_id=search_space_id,
|
||||||
|
user_id=user_id,
|
||||||
|
thread_id=thread_id,
|
||||||
|
visibility=visibility,
|
||||||
|
anon_session_id=anon_session_id,
|
||||||
|
available_connectors=available_connectors,
|
||||||
|
available_document_types=available_document_types,
|
||||||
|
mentioned_document_ids=mentioned_document_ids,
|
||||||
|
checkpointer=checkpointer,
|
||||||
|
)
|
||||||
|
_perf_log.info(
|
||||||
|
"[create_agent] Middleware stack + graph compiled in %.3fs",
|
||||||
|
time.perf_counter() - _t0,
|
||||||
|
)
|
||||||
|
|
||||||
|
_perf_log.info(
|
||||||
|
"[create_agent] Total agent creation in %.3fs",
|
||||||
|
time.perf_counter() - _t_agent_total,
|
||||||
|
)
|
||||||
|
return agent
|
||||||
|
|
||||||
|
|
||||||
|
def _build_compiled_agent_blocking(
|
||||||
|
*,
|
||||||
|
llm: BaseChatModel,
|
||||||
|
tools: Sequence[BaseTool],
|
||||||
|
final_system_prompt: str,
|
||||||
|
backend_resolver: Any,
|
||||||
|
filesystem_mode: FilesystemMode,
|
||||||
|
search_space_id: int,
|
||||||
|
user_id: str | None,
|
||||||
|
thread_id: int | None,
|
||||||
|
visibility: ChatVisibility,
|
||||||
|
anon_session_id: str | None,
|
||||||
|
available_connectors: list[str] | None,
|
||||||
|
available_document_types: list[str] | None,
|
||||||
|
mentioned_document_ids: list[int] | None,
|
||||||
|
checkpointer: Checkpointer,
|
||||||
|
):
|
||||||
|
"""Build the middleware stack and compile the agent graph synchronously.
|
||||||
|
|
||||||
|
Runs in a worker thread (see ``asyncio.to_thread`` call site) so the heavy
|
||||||
|
CPU work — most notably ``SubAgentMiddleware.__init__`` eagerly calling
|
||||||
|
``create_agent`` to compile the general-purpose subagent — does not block
|
||||||
|
the event loop.
|
||||||
|
"""
|
||||||
_memory_middleware = MemoryInjectionMiddleware(
|
_memory_middleware = MemoryInjectionMiddleware(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
|
|
@ -386,7 +453,7 @@ async def create_surfsense_deep_agent(
|
||||||
FileIntentMiddleware(llm=llm),
|
FileIntentMiddleware(llm=llm),
|
||||||
SurfSenseFilesystemMiddleware(
|
SurfSenseFilesystemMiddleware(
|
||||||
backend=backend_resolver,
|
backend=backend_resolver,
|
||||||
filesystem_mode=filesystem_selection.mode,
|
filesystem_mode=filesystem_mode,
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
created_by_id=user_id,
|
created_by_id=user_id,
|
||||||
thread_id=thread_id,
|
thread_id=thread_id,
|
||||||
|
|
@ -415,19 +482,19 @@ async def create_surfsense_deep_agent(
|
||||||
AnonymousDocumentMiddleware(
|
AnonymousDocumentMiddleware(
|
||||||
anon_session_id=anon_session_id,
|
anon_session_id=anon_session_id,
|
||||||
)
|
)
|
||||||
if filesystem_selection.mode == FilesystemMode.CLOUD
|
if filesystem_mode == FilesystemMode.CLOUD
|
||||||
else None,
|
else None,
|
||||||
KnowledgeTreeMiddleware(
|
KnowledgeTreeMiddleware(
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
filesystem_mode=filesystem_selection.mode,
|
filesystem_mode=filesystem_mode,
|
||||||
llm=llm,
|
llm=llm,
|
||||||
)
|
)
|
||||||
if filesystem_selection.mode == FilesystemMode.CLOUD
|
if filesystem_mode == FilesystemMode.CLOUD
|
||||||
else None,
|
else None,
|
||||||
KnowledgePriorityMiddleware(
|
KnowledgePriorityMiddleware(
|
||||||
llm=llm,
|
llm=llm,
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
filesystem_mode=filesystem_selection.mode,
|
filesystem_mode=filesystem_mode,
|
||||||
available_connectors=available_connectors,
|
available_connectors=available_connectors,
|
||||||
available_document_types=available_document_types,
|
available_document_types=available_document_types,
|
||||||
mentioned_document_ids=mentioned_document_ids,
|
mentioned_document_ids=mentioned_document_ids,
|
||||||
|
|
@ -435,7 +502,7 @@ async def create_surfsense_deep_agent(
|
||||||
FileIntentMiddleware(llm=llm),
|
FileIntentMiddleware(llm=llm),
|
||||||
SurfSenseFilesystemMiddleware(
|
SurfSenseFilesystemMiddleware(
|
||||||
backend=backend_resolver,
|
backend=backend_resolver,
|
||||||
filesystem_mode=filesystem_selection.mode,
|
filesystem_mode=filesystem_mode,
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
created_by_id=user_id,
|
created_by_id=user_id,
|
||||||
thread_id=thread_id,
|
thread_id=thread_id,
|
||||||
|
|
@ -443,32 +510,27 @@ async def create_surfsense_deep_agent(
|
||||||
KnowledgeBasePersistenceMiddleware(
|
KnowledgeBasePersistenceMiddleware(
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
created_by_id=user_id,
|
created_by_id=user_id,
|
||||||
filesystem_mode=filesystem_selection.mode,
|
filesystem_mode=filesystem_mode,
|
||||||
)
|
)
|
||||||
if filesystem_selection.mode == FilesystemMode.CLOUD
|
if filesystem_mode == FilesystemMode.CLOUD
|
||||||
else None,
|
else None,
|
||||||
SubAgentMiddleware(backend=StateBackend, subagents=[general_purpose_spec]),
|
SubAgentMiddleware(backend=StateBackend, subagents=[general_purpose_spec]),
|
||||||
create_safe_summarization_middleware(llm, StateBackend),
|
create_safe_summarization_middleware(llm, StateBackend),
|
||||||
PatchToolCallsMiddleware(),
|
PatchToolCallsMiddleware(),
|
||||||
DedupHITLToolCallsMiddleware(agent_tools=tools),
|
DedupHITLToolCallsMiddleware(agent_tools=list(tools)),
|
||||||
AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
|
AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
|
||||||
]
|
]
|
||||||
deepagent_middleware = [m for m in deepagent_middleware if m is not None]
|
deepagent_middleware = [m for m in deepagent_middleware if m is not None]
|
||||||
|
|
||||||
# Combine system_prompt with BASE_AGENT_PROMPT (same as create_deep_agent)
|
agent = create_agent(
|
||||||
final_system_prompt = system_prompt + "\n\n" + BASE_AGENT_PROMPT
|
|
||||||
|
|
||||||
_t0 = time.perf_counter()
|
|
||||||
agent = await asyncio.to_thread(
|
|
||||||
create_agent,
|
|
||||||
llm,
|
llm,
|
||||||
system_prompt=final_system_prompt,
|
system_prompt=final_system_prompt,
|
||||||
tools=tools,
|
tools=list(tools),
|
||||||
middleware=deepagent_middleware,
|
middleware=deepagent_middleware,
|
||||||
context_schema=SurfSenseContextSchema,
|
context_schema=SurfSenseContextSchema,
|
||||||
checkpointer=checkpointer,
|
checkpointer=checkpointer,
|
||||||
)
|
)
|
||||||
agent = agent.with_config(
|
return agent.with_config(
|
||||||
{
|
{
|
||||||
"recursion_limit": 10_000,
|
"recursion_limit": 10_000,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
|
@ -477,13 +539,3 @@ async def create_surfsense_deep_agent(
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
_perf_log.info(
|
|
||||||
"[create_agent] Graph compiled (create_agent) in %.3fs",
|
|
||||||
time.perf_counter() - _t0,
|
|
||||||
)
|
|
||||||
|
|
||||||
_perf_log.info(
|
|
||||||
"[create_agent] Total agent creation in %.3fs",
|
|
||||||
time.perf_counter() - _t_agent_total,
|
|
||||||
)
|
|
||||||
return agent
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue