Merge remote-tracking branch 'upstream/dev' into feature/multi-agent

This commit is contained in:
CREDO23 2026-05-01 00:05:20 +02:00
commit 5d3b8b9ca9
83 changed files with 10514 additions and 638 deletions

View file

@ -584,13 +584,33 @@ class VercelStreamingService:
# Tool Parts
# =========================================================================
def format_tool_input_start(self, tool_call_id: str, tool_name: str) -> str:
def format_tool_input_start(
self,
tool_call_id: str,
tool_name: str,
*,
langchain_tool_call_id: str | None = None,
) -> str:
"""
Format the start of tool input streaming.
Args:
tool_call_id: The unique tool call identifier
tool_name: The name of the tool being called
tool_call_id: The unique tool call identifier. May be EITHER the
synthetic ``call_<run_id>`` id derived from LangGraph
``run_id`` (legacy / ``SURFSENSE_ENABLE_STREAM_PARITY_V2``
OFF, or the unmatched-fallback path under parity_v2) OR
the authoritative LangChain ``tool_call.id`` (parity_v2
path: when the provider streams ``tool_call_chunks`` we
register the ``index`` and reuse the lc-id as the card
id so live ``tool-input-delta`` events can be routed
without a downstream join). Either way, the same id is
preserved across ``tool-input-start`` / ``-delta`` /
``-available`` / ``tool-output-available`` for one call.
tool_name: The name of the tool being called.
langchain_tool_call_id: Optional authoritative LangChain
``tool_call.id``. When set, surfaces as
``langchainToolCallId`` so the frontend can join this card
to the action-log row written by ``ActionLogMiddleware``.
Returns:
str: SSE formatted tool input start part
@ -598,13 +618,14 @@ class VercelStreamingService:
Example output:
data: {"type":"tool-input-start","toolCallId":"call_abc123","toolName":"getWeather"}
"""
return self._format_sse(
{
"type": "tool-input-start",
"toolCallId": tool_call_id,
"toolName": tool_name,
}
)
payload: dict[str, Any] = {
"type": "tool-input-start",
"toolCallId": tool_call_id,
"toolName": tool_name,
}
if langchain_tool_call_id:
payload["langchainToolCallId"] = langchain_tool_call_id
return self._format_sse(payload)
def format_tool_input_delta(self, tool_call_id: str, input_text_delta: str) -> str:
"""
@ -629,7 +650,12 @@ class VercelStreamingService:
)
def format_tool_input_available(
self, tool_call_id: str, tool_name: str, input_data: dict[str, Any]
self,
tool_call_id: str,
tool_name: str,
input_data: dict[str, Any],
*,
langchain_tool_call_id: str | None = None,
) -> str:
"""
Format the completion of tool input.
@ -638,6 +664,8 @@ class VercelStreamingService:
tool_call_id: The tool call identifier
tool_name: The name of the tool
input_data: The complete tool input parameters
langchain_tool_call_id: Optional authoritative LangChain
``tool_call.id`` (see ``format_tool_input_start``).
Returns:
str: SSE formatted tool input available part
@ -645,22 +673,34 @@ class VercelStreamingService:
Example output:
data: {"type":"tool-input-available","toolCallId":"call_abc123","toolName":"getWeather","input":{"city":"SF"}}
"""
return self._format_sse(
{
"type": "tool-input-available",
"toolCallId": tool_call_id,
"toolName": tool_name,
"input": input_data,
}
)
payload: dict[str, Any] = {
"type": "tool-input-available",
"toolCallId": tool_call_id,
"toolName": tool_name,
"input": input_data,
}
if langchain_tool_call_id:
payload["langchainToolCallId"] = langchain_tool_call_id
return self._format_sse(payload)
def format_tool_output_available(self, tool_call_id: str, output: Any) -> str:
def format_tool_output_available(
self,
tool_call_id: str,
output: Any,
*,
langchain_tool_call_id: str | None = None,
) -> str:
"""
Format tool execution output.
Args:
tool_call_id: The tool call identifier
output: The tool execution result
langchain_tool_call_id: Optional authoritative LangChain
``tool_call.id`` extracted from ``ToolMessage.tool_call_id``.
When set, the frontend can backfill any card whose
``langchainToolCallId`` was not yet known at
``tool-input-start`` time.
Returns:
str: SSE formatted tool output available part
@ -668,13 +708,14 @@ class VercelStreamingService:
Example output:
data: {"type":"tool-output-available","toolCallId":"call_abc123","output":{"weather":"sunny"}}
"""
return self._format_sse(
{
"type": "tool-output-available",
"toolCallId": tool_call_id,
"output": output,
}
)
payload: dict[str, Any] = {
"type": "tool-output-available",
"toolCallId": tool_call_id,
"output": output,
}
if langchain_tool_call_id:
payload["langchainToolCallId"] = langchain_tool_call_id
return self._format_sse(payload)
# =========================================================================
# Step Parts

View file

@ -8,7 +8,9 @@ Operation outcomes mirror the plan:
* **KB-owned actions** (NOTE / FILE / FOLDER mutations): restore from
:class:`app.db.DocumentRevision` / :class:`app.db.FolderRevision` rows
written before the original mutation.
written before the original mutation. ``rm``/``rmdir`` re-INSERT a fresh
row from the snapshot; ``write_file`` create / ``mkdir`` DELETE the row
that was created; everything else is an in-place restore.
* **Connector-owned actions with a declared ``reverse_descriptor``**: invoke
the inverse tool through the agent's normal permission stack (NOT
bypassed). Out of scope for this PR returns ``REVERSE_NOT_IMPLEMENTED``.
@ -18,6 +20,11 @@ Operation outcomes mirror the plan:
A successful revert appends a NEW row to ``agent_action_log`` with
``reverse_of=<original_action_id>`` and the requesting user's
``user_id``, preserving an auditable chain.
Dispatch must be exact-match (``tool_name == name``), NOT prefix matching.
``"rmdir".startswith("rm")`` would otherwise mis-route directory revert
to the document branch (and ``delete_note`` vs ``delete_folder`` is the
same trap waiting to happen).
"""
from __future__ import annotations
@ -25,17 +32,31 @@ from __future__ import annotations
import logging
from dataclasses import dataclass
from datetime import UTC, datetime
from typing import Literal
from typing import Any, Literal
from sqlalchemy import select
from sqlalchemy import delete, select
from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.new_chat.path_resolver import (
DOCUMENTS_ROOT,
safe_filename,
safe_folder_segment,
)
from app.db import (
AgentActionLog,
Chunk,
Document,
DocumentRevision,
DocumentType,
Folder,
FolderRevision,
NewChatThread,
)
from app.utils.document_converters import (
embed_texts,
generate_content_hash,
generate_unique_identifier_hash,
)
logger = logging.getLogger(__name__)
@ -110,14 +131,244 @@ def can_revert(
# ---------------------------------------------------------------------------
# Revert paths
# Helper: reconstruct virtual path from a snapshot
# ---------------------------------------------------------------------------
async def _virtual_path_from_snapshot(
session: AsyncSession,
revision: DocumentRevision,
) -> str | None:
"""Reconstruct the virtual_path the document was at before mutation.
Preference order:
1. ``metadata_before["virtual_path"]`` written by every snapshot
helper since this PR.
2. Compose ``"<folder_path>/<title_before>"`` from
``folder_id_before`` + ``title_before``. Walks the folder chain via
``parent_id``.
"""
metadata = revision.metadata_before or {}
candidate = metadata.get("virtual_path") if isinstance(metadata, dict) else None
if isinstance(candidate, str) and candidate.startswith(DOCUMENTS_ROOT):
return candidate
title = revision.title_before
if not isinstance(title, str) or not title:
return None
parts: list[str] = []
cursor: int | None = revision.folder_id_before
visited: set[int] = set()
while cursor is not None and cursor not in visited:
visited.add(cursor)
folder = await session.get(Folder, cursor)
if folder is None:
return None
parts.append(safe_folder_segment(str(folder.name or "")))
cursor = folder.parent_id
parts.reverse()
base = f"{DOCUMENTS_ROOT}/" + "/".join(parts) if parts else DOCUMENTS_ROOT
filename = safe_filename(title)
return f"{base}/{filename}"
# ---------------------------------------------------------------------------
# Document revision restore (write/edit/move/rm)
# ---------------------------------------------------------------------------
def _set_field(target: Any, field: str, value: Any) -> None:
if value is not None:
setattr(target, field, value)
async def _restore_in_place_document(
session: AsyncSession,
*,
revision: DocumentRevision,
) -> RevertOutcome:
"""Apply an in-place restore to an existing :class:`Document`."""
if revision.document_id is None:
return RevertOutcome(
status="tool_unavailable",
message=(
"Original document was hard-deleted; in-place restore is not possible."
),
)
doc = await session.get(Document, revision.document_id)
if doc is None:
return RevertOutcome(
status="tool_unavailable",
message="Original document has been deleted; revert cannot proceed.",
)
_set_field(doc, "content", revision.content_before)
_set_field(doc, "source_markdown", revision.content_before)
_set_field(doc, "title", revision.title_before)
_set_field(doc, "folder_id", revision.folder_id_before)
metadata_before = revision.metadata_before or {}
if isinstance(metadata_before, dict) and metadata_before:
doc.document_metadata = dict(metadata_before)
if isinstance(revision.content_before, str):
doc.content_hash = generate_content_hash(
revision.content_before, doc.search_space_id
)
virtual_path = await _virtual_path_from_snapshot(session, revision)
if virtual_path:
doc.unique_identifier_hash = generate_unique_identifier_hash(
DocumentType.NOTE,
virtual_path,
doc.search_space_id,
)
chunks_before = revision.chunks_before
if isinstance(chunks_before, list):
await session.execute(delete(Chunk).where(Chunk.document_id == doc.id))
chunk_texts = [
str(c.get("content"))
for c in chunks_before
if isinstance(c, dict) and isinstance(c.get("content"), str)
]
if chunk_texts:
chunk_embeddings = embed_texts(chunk_texts)
session.add_all(
[
Chunk(document_id=doc.id, content=text, embedding=embedding)
for text, embedding in zip(
chunk_texts, chunk_embeddings, strict=True
)
]
)
if isinstance(revision.content_before, str):
doc.embedding = embed_texts([revision.content_before])[0]
doc.updated_at = datetime.now(UTC)
return RevertOutcome(status="ok", message="Document restored from snapshot.")
async def _reinsert_document_from_revision(
session: AsyncSession,
*,
revision: DocumentRevision,
) -> RevertOutcome:
"""Re-INSERT a deleted :class:`Document` from a snapshot row (``rm`` revert)."""
if not isinstance(revision.title_before, str) or not revision.title_before:
return RevertOutcome(
status="not_reversible",
message="Snapshot lacks title_before; cannot recreate document.",
)
if not isinstance(revision.content_before, str):
return RevertOutcome(
status="not_reversible",
message="Snapshot lacks content_before; cannot recreate document.",
)
virtual_path = await _virtual_path_from_snapshot(session, revision)
if not virtual_path:
return RevertOutcome(
status="not_reversible",
message=(
"Snapshot is missing both metadata_before['virtual_path'] AND "
"a resolvable (folder_id_before, title_before) pair."
),
)
search_space_id = revision.search_space_id
unique_identifier_hash = generate_unique_identifier_hash(
DocumentType.NOTE,
virtual_path,
search_space_id,
)
collision = await session.execute(
select(Document.id).where(
Document.search_space_id == search_space_id,
Document.unique_identifier_hash == unique_identifier_hash,
)
)
if collision.scalar_one_or_none() is not None:
return RevertOutcome(
status="tool_unavailable",
message=(
f"A document already exists at '{virtual_path}'; revert would "
"collide. Move the live doc out of the way first."
),
)
metadata = revision.metadata_before or {}
if not isinstance(metadata, dict):
metadata = {}
metadata = dict(metadata)
metadata["virtual_path"] = virtual_path
content = revision.content_before
new_doc = Document(
title=revision.title_before,
document_type=DocumentType.NOTE,
document_metadata=metadata,
content=content,
content_hash=generate_content_hash(content, search_space_id),
unique_identifier_hash=unique_identifier_hash,
source_markdown=content,
search_space_id=search_space_id,
folder_id=revision.folder_id_before,
updated_at=datetime.now(UTC),
)
session.add(new_doc)
await session.flush()
new_doc.embedding = embed_texts([content])[0]
chunk_texts = []
chunks_before = revision.chunks_before
if isinstance(chunks_before, list):
chunk_texts = [
str(c.get("content"))
for c in chunks_before
if isinstance(c, dict) and isinstance(c.get("content"), str)
]
if chunk_texts:
chunk_embeddings = embed_texts(chunk_texts)
session.add_all(
[
Chunk(document_id=new_doc.id, content=text, embedding=embedding)
for text, embedding in zip(chunk_texts, chunk_embeddings, strict=True)
]
)
# Repoint the snapshot at the recreated row so a follow-up revert of
# the same row works as expected.
revision.document_id = new_doc.id
return RevertOutcome(
status="ok",
message=f"Re-inserted document '{revision.title_before}' from snapshot.",
)
async def _delete_created_document(
session: AsyncSession,
*,
revision: DocumentRevision,
) -> RevertOutcome:
"""Delete the document that ``write_file`` created (``content_before IS NULL``)."""
if revision.document_id is None:
return RevertOutcome(
status="ok",
message="No live row to delete (already removed elsewhere).",
)
await session.execute(delete(Document).where(Document.id == revision.document_id))
return RevertOutcome(
status="ok",
message="Deleted the document that was created by this action.",
)
async def _restore_document_revision(
session: AsyncSession, *, action: AgentActionLog
) -> RevertOutcome:
"""Restore the most recent :class:`DocumentRevision` for ``action``."""
"""Dispatch document-level revert based on ``action.tool_name``."""
stmt = (
select(DocumentRevision)
.where(DocumentRevision.agent_action_id == action.id)
@ -132,23 +383,111 @@ async def _restore_document_revision(
message="No document_revisions row tied to this action.",
)
from app.db import Document # late import to avoid cycles at module load
tool_name = (action.tool_name or "").lower()
doc = await session.get(Document, revision.document_id)
if doc is None:
if tool_name == "rm":
return await _reinsert_document_from_revision(session, revision=revision)
if tool_name == "write_file" and revision.content_before is None:
return await _delete_created_document(session, revision=revision)
return await _restore_in_place_document(session, revision=revision)
# ---------------------------------------------------------------------------
# Folder revision restore (mkdir/rmdir/rename/move)
# ---------------------------------------------------------------------------
async def _restore_in_place_folder(
session: AsyncSession,
*,
revision: FolderRevision,
) -> RevertOutcome:
if revision.folder_id is None:
return RevertOutcome(
status="tool_unavailable",
message="Original document has been deleted; revert cannot proceed.",
message="Original folder was hard-deleted; in-place restore is impossible.",
)
folder = await session.get(Folder, revision.folder_id)
if folder is None:
return RevertOutcome(
status="tool_unavailable",
message="Original folder has been deleted; revert cannot proceed.",
)
_set_field(folder, "name", revision.name_before)
_set_field(folder, "parent_id", revision.parent_id_before)
_set_field(folder, "position", revision.position_before)
folder.updated_at = datetime.now(UTC)
return RevertOutcome(status="ok", message="Folder restored from snapshot.")
async def _reinsert_folder_from_revision(
session: AsyncSession,
*,
revision: FolderRevision,
) -> RevertOutcome:
if not isinstance(revision.name_before, str) or not revision.name_before:
return RevertOutcome(
status="not_reversible",
message="Snapshot lacks name_before; cannot recreate folder.",
)
new_folder = Folder(
name=revision.name_before,
parent_id=revision.parent_id_before,
position=revision.position_before,
search_space_id=revision.search_space_id,
updated_at=datetime.now(UTC),
)
session.add(new_folder)
await session.flush()
revision.folder_id = new_folder.id
return RevertOutcome(
status="ok",
message=f"Re-inserted folder '{revision.name_before}' from snapshot.",
)
async def _delete_created_folder(
session: AsyncSession,
*,
revision: FolderRevision,
) -> RevertOutcome:
if revision.folder_id is None:
return RevertOutcome(
status="ok",
message="No live folder row to delete (already removed elsewhere).",
)
folder_id = revision.folder_id
has_doc = await session.execute(
select(Document.id).where(Document.folder_id == folder_id).limit(1)
)
if has_doc.scalar_one_or_none() is not None:
return RevertOutcome(
status="tool_unavailable",
message=(
"Folder is no longer empty (documents have been added since "
"mkdir); cannot revert."
),
)
has_child = await session.execute(
select(Folder.id).where(Folder.parent_id == folder_id).limit(1)
)
if has_child.scalar_one_or_none() is not None:
return RevertOutcome(
status="tool_unavailable",
message=(
"Folder is no longer empty (sub-folders have been added "
"since mkdir); cannot revert."
),
)
if revision.content_before is not None:
doc.content = revision.content_before
if revision.title_before is not None:
doc.title = revision.title_before
if revision.folder_id_before is not None:
doc.folder_id = revision.folder_id_before
doc.updated_at = datetime.now(UTC)
return RevertOutcome(status="ok", message="Document restored from snapshot.")
await session.execute(delete(Folder).where(Folder.id == folder_id))
return RevertOutcome(
status="ok",
message="Deleted the folder that was created by this action.",
)
async def _restore_folder_revision(
@ -168,41 +507,44 @@ async def _restore_folder_revision(
message="No folder_revisions row tied to this action.",
)
from app.db import Folder
tool_name = (action.tool_name or "").lower()
folder = await session.get(Folder, revision.folder_id)
if folder is None:
return RevertOutcome(
status="tool_unavailable",
message="Original folder has been deleted; revert cannot proceed.",
)
if tool_name == "rmdir":
return await _reinsert_folder_from_revision(session, revision=revision)
if revision.name_before is not None:
folder.name = revision.name_before
if revision.parent_id_before is not None:
folder.parent_id = revision.parent_id_before
if revision.position_before is not None:
folder.position = revision.position_before
folder.updated_at = datetime.now(UTC)
return RevertOutcome(status="ok", message="Folder restored from snapshot.")
if tool_name == "mkdir":
return await _delete_created_folder(session, revision=revision)
return await _restore_in_place_folder(session, revision=revision)
# Tool-name prefixes that route to KB document / folder revert paths. Kept
# as data so a future PR adding new KB-owned tools doesn't have to touch
# this module's control flow.
_DOC_TOOL_PREFIXES: tuple[str, ...] = (
"edit_file",
"write_file",
"update_memory",
"create_note",
"update_note",
"delete_note",
# ---------------------------------------------------------------------------
# Dispatch
# ---------------------------------------------------------------------------
#
# Exact-name dispatch: ``tool_name == name``, NOT ``startswith(...)``.
# Prefix-matching mis-routes pairs like ``rm``/``rmdir`` and
# ``delete_note``/``delete_folder``.
_DOC_TOOLS: frozenset[str] = frozenset(
{
"edit_file",
"write_file",
"move_file",
"rm",
"update_memory",
"create_note",
"update_note",
"delete_note",
}
)
_FOLDER_TOOL_PREFIXES: tuple[str, ...] = (
"mkdir",
"move_file",
"rename_folder",
"delete_folder",
_FOLDER_TOOLS: frozenset[str] = frozenset(
{
"mkdir",
"rmdir",
"rename_folder",
"delete_folder",
}
)
@ -220,9 +562,9 @@ async def revert_action(
"""
tool_name = (action.tool_name or "").lower()
if tool_name.startswith(_DOC_TOOL_PREFIXES):
if tool_name in _DOC_TOOLS:
outcome = await _restore_document_revision(session, action=action)
elif tool_name.startswith(_FOLDER_TOOL_PREFIXES):
elif tool_name in _FOLDER_TOOLS:
outcome = await _restore_folder_revision(session, action=action)
elif action.reverse_descriptor:
# Connector-owned reversibles run through the normal permission