multi_agent_chat: real-graph regressions for unified HITL paths + format pass

This commit is contained in:
CREDO23 2026-05-14 17:41:24 +02:00
parent adb52fb575
commit 0723702320
34 changed files with 920 additions and 90 deletions

View file

@ -14,7 +14,7 @@ from langgraph.types import Checkpointer
from app.agents.multi_agent_chat.middleware.stack import (
build_main_agent_deepagent_middleware,
)
from app.agents.multi_agent_chat.subagents.shared.permissions import (
from app.agents.multi_agent_chat.subagents.shared.tool_kinds import (
ToolsPermissions,
)
from app.agents.new_chat.context import SurfSenseContextSchema

View file

@ -10,7 +10,7 @@ from langchain_core.language_models import BaseChatModel
from langchain_core.tools import BaseTool
from langgraph.types import Checkpointer
from app.agents.multi_agent_chat.subagents.shared.permissions import ToolsPermissions
from app.agents.multi_agent_chat.subagents.shared.tool_kinds import ToolsPermissions
from app.agents.new_chat.agent_cache import (
flags_signature,
get_cache,

View file

@ -49,7 +49,7 @@ def build_main_agent_system_prompt(
custom_system_instructions: str | None = None,
use_default_system_instructions: bool = True,
citations_enabled: bool = True,
model_name: str | None = None, # noqa: ARG001 — kept for caller compatibility
model_name: str | None = None,
) -> str:
resolved_today = (today or datetime.now(UTC)).astimezone(UTC).date().isoformat()
visibility = thread_visibility or ChatVisibility.PRIVATE
@ -62,7 +62,9 @@ def build_main_agent_system_prompt(
if custom_system_instructions and custom_system_instructions.strip():
parts.append(
"\n" + custom_system_instructions.format(resolved_today=resolved_today) + "\n"
"\n"
+ custom_system_instructions.format(resolved_today=resolved_today)
+ "\n"
)
if use_default_system_instructions:

View file

@ -61,9 +61,7 @@ def slice_decisions_by_tool_call(
routed: dict[str, dict[str, Any]] = {}
cursor = 0
for tool_call_id, action_count in pending_list:
routed[tool_call_id] = {
"decisions": decisions[cursor : cursor + action_count]
}
routed[tool_call_id] = {"decisions": decisions[cursor : cursor + action_count]}
cursor += action_count
return routed

View file

@ -22,7 +22,7 @@ if TYPE_CHECKING:
def check_cloud_write_namespace(
mw: "SurfSenseFilesystemMiddleware",
mw: SurfSenseFilesystemMiddleware,
path: str,
runtime: ToolRuntime[None, SurfSenseFilesystemState],
) -> str | None:

View file

@ -25,7 +25,7 @@ if TYPE_CHECKING:
def current_cwd(
mw: "SurfSenseFilesystemMiddleware",
mw: SurfSenseFilesystemMiddleware,
runtime: ToolRuntime[None, SurfSenseFilesystemState],
) -> str:
cwd = runtime.state.get("cwd") if hasattr(runtime, "state") else None
@ -35,7 +35,7 @@ def current_cwd(
def get_contract_suggested_path(
mw: "SurfSenseFilesystemMiddleware",
mw: SurfSenseFilesystemMiddleware,
runtime: ToolRuntime[None, SurfSenseFilesystemState],
) -> str:
"""Read the planner's suggested write path; otherwise default to ``notes.md``."""
@ -47,7 +47,7 @@ def get_contract_suggested_path(
def resolve_relative(
mw: "SurfSenseFilesystemMiddleware",
mw: SurfSenseFilesystemMiddleware,
path: str,
runtime: ToolRuntime[None, SurfSenseFilesystemState],
) -> str:
@ -63,7 +63,7 @@ def resolve_relative(
def resolve_write_target_path(
mw: "SurfSenseFilesystemMiddleware",
mw: SurfSenseFilesystemMiddleware,
file_path: str,
runtime: ToolRuntime[None, SurfSenseFilesystemState],
) -> str:
@ -77,7 +77,7 @@ def resolve_write_target_path(
def resolve_move_target_path(
mw: "SurfSenseFilesystemMiddleware",
mw: SurfSenseFilesystemMiddleware,
file_path: str,
runtime: ToolRuntime[None, SurfSenseFilesystemState],
) -> str:
@ -91,7 +91,7 @@ def resolve_move_target_path(
def resolve_list_target_path(
mw: "SurfSenseFilesystemMiddleware",
mw: SurfSenseFilesystemMiddleware,
path: str,
runtime: ToolRuntime[None, SurfSenseFilesystemState],
) -> str:
@ -105,7 +105,7 @@ def resolve_list_target_path(
def normalize_local_mount_path(
mw: "SurfSenseFilesystemMiddleware",
mw: SurfSenseFilesystemMiddleware,
candidate: str,
runtime: ToolRuntime[None, SurfSenseFilesystemState],
) -> str:

View file

@ -9,9 +9,7 @@ from .common import HEADER, SANDBOX_ADDENDUM
from .desktop import BODY as DESKTOP_BODY
def build_system_prompt(
mode: FilesystemMode, *, sandbox_available: bool
) -> str:
def build_system_prompt(mode: FilesystemMode, *, sandbox_available: bool) -> str:
"""Assemble the FS prompt: common header + mode body + optional sandbox section."""
body = CLOUD_BODY if mode == FilesystemMode.CLOUD else DESKTOP_BODY
base = HEADER + body

View file

@ -21,7 +21,7 @@ if TYPE_CHECKING:
from ...middleware import SurfSenseFilesystemMiddleware
def create_cd_tool(mw: "SurfSenseFilesystemMiddleware") -> BaseTool:
def create_cd_tool(mw: SurfSenseFilesystemMiddleware) -> BaseTool:
description = select_description(mw._filesystem_mode)
async def async_cd(

View file

@ -24,7 +24,7 @@ if TYPE_CHECKING:
from ...middleware import SurfSenseFilesystemMiddleware
def create_edit_file_tool(mw: "SurfSenseFilesystemMiddleware") -> BaseTool:
def create_edit_file_tool(mw: SurfSenseFilesystemMiddleware) -> BaseTool:
description = select_description(mw._filesystem_mode)
async def async_edit_file(

View file

@ -36,7 +36,7 @@ def wrap_as_python(code: str) -> str:
async def execute_in_sandbox(
mw: "SurfSenseFilesystemMiddleware",
mw: SurfSenseFilesystemMiddleware,
command: str,
runtime: ToolRuntime[None, SurfSenseFilesystemState],
timeout: int | None,
@ -59,14 +59,12 @@ async def execute_in_sandbox(
try:
return await _try_sandbox_execute(mw, command, runtime, timeout)
except Exception:
logger.exception(
"Sandbox retry also failed for thread %s", mw._thread_id
)
logger.exception("Sandbox retry also failed for thread %s", mw._thread_id)
return "Error: Code execution is temporarily unavailable. Please try again."
async def _try_sandbox_execute(
mw: "SurfSenseFilesystemMiddleware",
mw: SurfSenseFilesystemMiddleware,
command: str,
runtime: ToolRuntime[None, SurfSenseFilesystemState],
timeout: int | None,

View file

@ -17,13 +17,11 @@ if TYPE_CHECKING:
from ...middleware import SurfSenseFilesystemMiddleware
def create_execute_code_tool(mw: "SurfSenseFilesystemMiddleware") -> BaseTool:
def create_execute_code_tool(mw: SurfSenseFilesystemMiddleware) -> BaseTool:
description = select_description(mw._filesystem_mode)
def sync_execute_code(
command: Annotated[
str, "Python code to execute. Use print() to see output."
],
command: Annotated[str, "Python code to execute. Use print() to see output."],
runtime: ToolRuntime[None, SurfSenseFilesystemState],
timeout: Annotated[
int | None,
@ -35,14 +33,10 @@ def create_execute_code_tool(mw: "SurfSenseFilesystemMiddleware") -> BaseTool:
return f"Error: timeout must be non-negative, got {timeout}."
if timeout > MAX_EXECUTE_TIMEOUT:
return f"Error: timeout {timeout}s exceeds maximum ({MAX_EXECUTE_TIMEOUT}s)."
return run_async_blocking(
execute_in_sandbox(mw, command, runtime, timeout)
)
return run_async_blocking(execute_in_sandbox(mw, command, runtime, timeout))
async def async_execute_code(
command: Annotated[
str, "Python code to execute. Use print() to see output."
],
command: Annotated[str, "Python code to execute. Use print() to see output."],
runtime: ToolRuntime[None, SurfSenseFilesystemState],
timeout: Annotated[
int | None,

View file

@ -20,7 +20,7 @@ if TYPE_CHECKING:
from ...middleware import SurfSenseFilesystemMiddleware
def create_list_tree_tool(mw: "SurfSenseFilesystemMiddleware") -> BaseTool:
def create_list_tree_tool(mw: SurfSenseFilesystemMiddleware) -> BaseTool:
description = select_description(mw._filesystem_mode)
async def async_list_tree(

View file

@ -19,7 +19,7 @@ if TYPE_CHECKING:
from ...middleware import SurfSenseFilesystemMiddleware
def create_ls_tool(mw: "SurfSenseFilesystemMiddleware") -> BaseTool:
def create_ls_tool(mw: SurfSenseFilesystemMiddleware) -> BaseTool:
description = select_description(mw._filesystem_mode)
async def async_ls(

View file

@ -23,7 +23,7 @@ if TYPE_CHECKING:
from ...middleware import SurfSenseFilesystemMiddleware
def create_mkdir_tool(mw: "SurfSenseFilesystemMiddleware") -> BaseTool:
def create_mkdir_tool(mw: SurfSenseFilesystemMiddleware) -> BaseTool:
description = select_description(mw._filesystem_mode)
async def async_mkdir(

View file

@ -18,7 +18,7 @@ if TYPE_CHECKING:
async def cloud_move_file(
mw: "SurfSenseFilesystemMiddleware",
mw: SurfSenseFilesystemMiddleware,
runtime: ToolRuntime[None, SurfSenseFilesystemState],
source: str,
dest: str,
@ -39,8 +39,7 @@ async def cloud_move_file(
)
if not source.startswith(DOCUMENTS_ROOT + "/"):
return (
"Error: cloud move_file source must be under /documents/ (got "
f"'{source}')."
f"Error: cloud move_file source must be under /documents/ (got '{source}')."
)
if not dest.startswith(DOCUMENTS_ROOT + "/"):
return (
@ -89,9 +88,7 @@ async def cloud_move_file(
],
"messages": [
ToolMessage(
content=(
f"Moved '{source}' to '{dest}' (will commit at end of turn)."
),
content=(f"Moved '{source}' to '{dest}' (will commit at end of turn)."),
tool_call_id=runtime.tool_call_id,
)
],

View file

@ -23,7 +23,7 @@ if TYPE_CHECKING:
from ...middleware import SurfSenseFilesystemMiddleware
def create_move_file_tool(mw: "SurfSenseFilesystemMiddleware") -> BaseTool:
def create_move_file_tool(mw: SurfSenseFilesystemMiddleware) -> BaseTool:
description = select_description(mw._filesystem_mode)
async def async_move_file(
@ -85,9 +85,7 @@ def create_move_file_tool(mw: "SurfSenseFilesystemMiddleware") -> BaseTool:
] = False,
) -> Command | str:
return run_async_blocking(
async_move_file(
source_path, destination_path, runtime, overwrite=overwrite
)
async_move_file(source_path, destination_path, runtime, overwrite=overwrite)
)
return StructuredTool.from_function(

View file

@ -16,7 +16,7 @@ if TYPE_CHECKING:
from ...middleware import SurfSenseFilesystemMiddleware
def create_pwd_tool(mw: "SurfSenseFilesystemMiddleware") -> BaseTool:
def create_pwd_tool(mw: SurfSenseFilesystemMiddleware) -> BaseTool:
description = select_description(mw._filesystem_mode)
def sync_pwd(

View file

@ -21,7 +21,7 @@ if TYPE_CHECKING:
from ...middleware import SurfSenseFilesystemMiddleware
def create_read_file_tool(mw: "SurfSenseFilesystemMiddleware") -> BaseTool:
def create_read_file_tool(mw: SurfSenseFilesystemMiddleware) -> BaseTool:
description = select_description(mw._filesystem_mode)
async def async_read_file(
@ -90,9 +90,7 @@ def create_read_file_tool(mw: "SurfSenseFilesystemMiddleware") -> BaseTool:
"Maximum number of lines to read.",
] = 100,
) -> Command | str:
return run_async_blocking(
async_read_file(file_path, runtime, offset, limit)
)
return run_async_blocking(async_read_file(file_path, runtime, offset, limit))
return StructuredTool.from_function(
name="read_file",

View file

@ -22,7 +22,7 @@ if TYPE_CHECKING:
async def cloud_rm(
mw: "SurfSenseFilesystemMiddleware",
mw: SurfSenseFilesystemMiddleware,
runtime: ToolRuntime[None, SurfSenseFilesystemState],
validated: str,
) -> Command | str:
@ -31,8 +31,7 @@ async def cloud_rm(
return f"Error: refusing to rm '{validated}'."
if not validated.startswith(DOCUMENTS_ROOT + "/"):
return (
"Error: cloud rm must target a path under /documents/ "
f"(got '{validated}')."
f"Error: cloud rm must target a path under /documents/ (got '{validated}')."
)
anon = runtime.state.get("kb_anon_doc") or {}
@ -41,14 +40,10 @@ async def cloud_rm(
staged_dirs = list(runtime.state.get("staged_dirs") or [])
if validated in staged_dirs:
return (
f"Error: '{validated}' is a directory. Use rmdir for "
"empty directories."
)
return f"Error: '{validated}' is a directory. Use rmdir for empty directories."
pending_dir_deletes = list(runtime.state.get("pending_dir_deletes") or [])
if any(
isinstance(d, dict) and d.get("path") == validated
for d in pending_dir_deletes
isinstance(d, dict) and d.get("path") == validated for d in pending_dir_deletes
):
return f"Error: '{validated}' is already queued for rmdir."
@ -57,14 +52,11 @@ async def cloud_rm(
children = await backend.als_info(validated)
if children:
return (
f"Error: '{validated}' is a directory. Use rmdir for "
"empty directories."
f"Error: '{validated}' is a directory. Use rmdir for empty directories."
)
pending_deletes = list(runtime.state.get("pending_deletes") or [])
if any(
isinstance(d, dict) and d.get("path") == validated for d in pending_deletes
):
if any(isinstance(d, dict) and d.get("path") == validated for d in pending_deletes):
return f"'{validated}' is already queued for deletion."
files_state = runtime.state.get("files") or {}
@ -93,8 +85,7 @@ async def cloud_rm(
"messages": [
ToolMessage(
content=(
f"Staged delete of '{validated}' (will commit at "
"end of turn)."
f"Staged delete of '{validated}' (will commit at end of turn)."
),
tool_call_id=runtime.tool_call_id,
)
@ -114,7 +105,7 @@ async def cloud_rm(
async def desktop_rm(
mw: "SurfSenseFilesystemMiddleware",
mw: SurfSenseFilesystemMiddleware,
runtime: ToolRuntime[None, SurfSenseFilesystemState],
validated: str,
) -> Command | str:

View file

@ -21,7 +21,7 @@ if TYPE_CHECKING:
from ...middleware import SurfSenseFilesystemMiddleware
def create_rm_tool(mw: "SurfSenseFilesystemMiddleware") -> BaseTool:
def create_rm_tool(mw: SurfSenseFilesystemMiddleware) -> BaseTool:
description = select_description(mw._filesystem_mode)
async def async_rm(

View file

@ -26,7 +26,7 @@ if TYPE_CHECKING:
async def cloud_rmdir(
mw: "SurfSenseFilesystemMiddleware",
mw: SurfSenseFilesystemMiddleware,
runtime: ToolRuntime[None, SurfSenseFilesystemState],
validated: str,
) -> Command | str:
@ -49,8 +49,7 @@ async def cloud_rmdir(
staged_dirs = list(runtime.state.get("staged_dirs") or [])
pending_dir_deletes = list(runtime.state.get("pending_dir_deletes") or [])
if any(
isinstance(d, dict) and d.get("path") == validated
for d in pending_dir_deletes
isinstance(d, dict) and d.get("path") == validated for d in pending_dir_deletes
):
return f"'{validated}' is already queued for deletion."
@ -61,11 +60,7 @@ async def cloud_rmdir(
if isinstance(backend, KBPostgresBackend):
children = list(await backend.als_info(validated))
if (
isinstance(backend, KBPostgresBackend)
and not children
and not exists_in_staged
):
if isinstance(backend, KBPostgresBackend) and not children and not exists_in_staged:
loaded = await backend._load_file_data(validated)
if loaded is not None:
return f"Error: '{validated}' is a file. Use rm to delete files."
@ -79,9 +74,7 @@ async def cloud_rmdir(
return f"Error: directory '{validated}' not found."
if children:
return (
f"Error: directory '{validated}' is not empty. Remove contents first."
)
return f"Error: directory '{validated}' is not empty. Remove contents first."
if exists_in_staged:
rest = [d for d in staged_dirs if d != validated]
@ -109,8 +102,7 @@ async def cloud_rmdir(
"messages": [
ToolMessage(
content=(
f"Staged rmdir of '{validated}' (will commit "
"at end of turn)."
f"Staged rmdir of '{validated}' (will commit at end of turn)."
),
tool_call_id=runtime.tool_call_id,
)
@ -120,7 +112,7 @@ async def cloud_rmdir(
async def desktop_rmdir(
mw: "SurfSenseFilesystemMiddleware",
mw: SurfSenseFilesystemMiddleware,
runtime: ToolRuntime[None, SurfSenseFilesystemState],
validated: str,
) -> Command | str:

View file

@ -21,7 +21,7 @@ if TYPE_CHECKING:
from ...middleware import SurfSenseFilesystemMiddleware
def create_rmdir_tool(mw: "SurfSenseFilesystemMiddleware") -> BaseTool:
def create_rmdir_tool(mw: SurfSenseFilesystemMiddleware) -> BaseTool:
description = select_description(mw._filesystem_mode)
async def async_rmdir(

View file

@ -23,7 +23,7 @@ if TYPE_CHECKING:
from ...middleware import SurfSenseFilesystemMiddleware
def create_write_file_tool(mw: "SurfSenseFilesystemMiddleware") -> BaseTool:
def create_write_file_tool(mw: SurfSenseFilesystemMiddleware) -> BaseTool:
description = select_description(mw._filesystem_mode)
async def async_write_file(
@ -73,9 +73,7 @@ def create_write_file_tool(mw: "SurfSenseFilesystemMiddleware") -> BaseTool:
content: Annotated[str, "Text content to write to the file."],
runtime: ToolRuntime[None, SurfSenseFilesystemState],
) -> Command | str:
return run_async_blocking(
async_write_file(file_path, content, runtime)
)
return run_async_blocking(async_write_file(file_path, content, runtime))
return StructuredTool.from_function(
name="write_file",