mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-29 19:35:20 +02:00
refactor: remove search_surfsense_docs tool and related references
- Deleted the `search_surfsense_docs` tool and its associated files, streamlining the agent's toolset. - Updated various components and prompts to remove references to the now-removed tool, ensuring consistency across the codebase. - Adjusted documentation to direct users to the SurfSense documentation link for product-related queries instead.
This commit is contained in:
parent
9b9e6828c7
commit
40ca9e6ed2
71 changed files with 232 additions and 1676 deletions
|
|
@ -1,8 +1,8 @@
|
|||
"""Build and emit the first ``thinking-1`` step for a new-chat turn.
|
||||
|
||||
The step title and "Processing X" items are derived from what the user sent
|
||||
(text snippet, image count, mentioned doc titles) so the FE can render a
|
||||
meaningful placeholder while the agent stream warms up.
|
||||
(text snippet, image count) so the FE can render a meaningful placeholder
|
||||
while the agent stream warms up.
|
||||
|
||||
``thinking-1`` is the canonical id for this step — every subsequent
|
||||
``thinking-N`` produced by ``stream_agent_events`` folds into the same
|
||||
|
|
@ -15,7 +15,6 @@ from collections.abc import Iterator
|
|||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
from app.db import SurfsenseDocsDocument
|
||||
from app.services.new_streaming_service import VercelStreamingService
|
||||
|
||||
|
||||
|
|
@ -37,14 +36,9 @@ def build_initial_thinking_step(
|
|||
*,
|
||||
user_query: str,
|
||||
user_image_data_urls: list[str] | None,
|
||||
mentioned_surfsense_docs: list[SurfsenseDocsDocument],
|
||||
) -> InitialThinkingStep:
|
||||
if mentioned_surfsense_docs:
|
||||
title = "Analyzing referenced content"
|
||||
action_verb = "Analyzing"
|
||||
else:
|
||||
title = "Understanding your request"
|
||||
action_verb = "Processing"
|
||||
title = "Understanding your request"
|
||||
action_verb = "Processing"
|
||||
|
||||
processing_parts: list[str] = []
|
||||
if user_query.strip():
|
||||
|
|
@ -55,18 +49,6 @@ def build_initial_thinking_step(
|
|||
else:
|
||||
processing_parts.append("(message)")
|
||||
|
||||
if mentioned_surfsense_docs:
|
||||
doc_names: list[str] = []
|
||||
for doc in mentioned_surfsense_docs:
|
||||
t = doc.title
|
||||
if len(t) > 30:
|
||||
t = t[:27] + "..."
|
||||
doc_names.append(t)
|
||||
if len(doc_names) == 1:
|
||||
processing_parts.append(f"[{doc_names[0]}]")
|
||||
else:
|
||||
processing_parts.append(f"[{len(doc_names)} docs]")
|
||||
|
||||
items = [f"{action_verb}: {' '.join(processing_parts)}"]
|
||||
return InitialThinkingStep(step_id="thinking-1", title=title, items=items)
|
||||
|
||||
|
|
|
|||
|
|
@ -5,20 +5,17 @@ Pipeline:
|
|||
1. **History bootstrap** — only for cloned chats with no LangGraph checkpoint
|
||||
yet; flips the per-thread ``needs_history_bootstrap`` flag back to False
|
||||
once the rows are loaded.
|
||||
2. **Mentioned SurfSense docs** — eager-load chunks so the formatter has the
|
||||
full content without a second roundtrip.
|
||||
3. **Recent reports** — top 3 by id desc with non-null content, so the LLM
|
||||
2. **Recent reports** — top 3 by id desc with non-null content, so the LLM
|
||||
can resolve ``report_id`` for versioning without spelunking history.
|
||||
4. **@-mention resolve** (cloud mode) — substitute ``@title`` tokens in the
|
||||
3. **@-mention resolve** (cloud mode) — substitute ``@title`` tokens in the
|
||||
query with canonical ``\`/documents/...\``` paths the LLM expects.
|
||||
5. **Context block render** — XML-wrap surfsense docs + reports, prepend to
|
||||
the rewritten query, optionally prefix with display name for SEARCH_SPACE
|
||||
4. **Context block render** — XML-wrap recent reports, prepend to the
|
||||
rewritten query, optionally prefix with display name for SEARCH_SPACE
|
||||
visibility.
|
||||
6. **HumanMessage** — multimodal content if images are attached.
|
||||
5. **HumanMessage** — multimodal content if images are attached.
|
||||
|
||||
Returns the assembled ``input_state`` dict plus side-channel data the
|
||||
orchestrator needs downstream (``accepted_folder_ids`` for runtime context;
|
||||
``mentioned_surfsense_docs`` for the initial thinking step).
|
||||
orchestrator needs downstream (``accepted_folder_ids`` for runtime context).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -30,7 +27,6 @@ from typing import Any
|
|||
from langchain_core.messages import HumanMessage
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.future import select
|
||||
from sqlalchemy.orm import selectinload
|
||||
|
||||
from app.agents.new_chat.filesystem_selection import FilesystemMode
|
||||
from app.agents.new_chat.mention_resolver import resolve_mentions, substitute_in_text
|
||||
|
|
@ -38,10 +34,6 @@ from app.db import (
|
|||
ChatVisibility,
|
||||
NewChatThread,
|
||||
Report,
|
||||
SurfsenseDocsDocument,
|
||||
)
|
||||
from app.tasks.chat.streaming.context.mentioned_docs import (
|
||||
format_mentioned_surfsense_docs_as_context,
|
||||
)
|
||||
from app.utils.content_utils import bootstrap_history_from_db
|
||||
from app.utils.user_message_multimodal import build_human_message_content
|
||||
|
|
@ -55,13 +47,10 @@ class NewChatInputState:
|
|||
|
||||
``input_state`` is fed straight to the agent. ``accepted_folder_ids``
|
||||
feeds the runtime context (the resolver may have dropped some chips).
|
||||
``mentioned_surfsense_docs`` is consumed by the initial thinking-step
|
||||
builder for the FE placeholder before the agent stream starts.
|
||||
"""
|
||||
|
||||
input_state: dict[str, Any]
|
||||
accepted_folder_ids: list[int]
|
||||
mentioned_surfsense_docs: list[SurfsenseDocsDocument]
|
||||
|
||||
|
||||
async def build_new_chat_input_state(
|
||||
|
|
@ -72,7 +61,6 @@ async def build_new_chat_input_state(
|
|||
user_query: str,
|
||||
user_image_data_urls: list[str] | None,
|
||||
mentioned_document_ids: list[int] | None,
|
||||
mentioned_surfsense_doc_ids: list[int] | None,
|
||||
mentioned_folder_ids: list[int] | None,
|
||||
mentioned_documents: list[dict[str, Any]] | None,
|
||||
needs_history_bootstrap: bool,
|
||||
|
|
@ -96,15 +84,6 @@ async def build_new_chat_input_state(
|
|||
thread.needs_history_bootstrap = False
|
||||
await session.commit()
|
||||
|
||||
mentioned_surfsense_docs: list[SurfsenseDocsDocument] = []
|
||||
if mentioned_surfsense_doc_ids:
|
||||
result = await session.execute(
|
||||
select(SurfsenseDocsDocument)
|
||||
.options(selectinload(SurfsenseDocsDocument.chunks))
|
||||
.filter(SurfsenseDocsDocument.id.in_(mentioned_surfsense_doc_ids))
|
||||
)
|
||||
mentioned_surfsense_docs = list(result.scalars().all())
|
||||
|
||||
# Top 3 reports keyed by id desc (newest first) with content present,
|
||||
# surfaced inline so the LLM resolves ``report_id`` for versioning without
|
||||
# digging through conversation history.
|
||||
|
|
@ -125,14 +104,12 @@ async def build_new_chat_input_state(
|
|||
user_query=user_query,
|
||||
filesystem_mode=filesystem_mode,
|
||||
mentioned_document_ids=mentioned_document_ids,
|
||||
mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
|
||||
mentioned_folder_ids=mentioned_folder_ids,
|
||||
mentioned_documents=mentioned_documents,
|
||||
)
|
||||
|
||||
final_query = _render_query_with_context(
|
||||
agent_user_query=agent_user_query,
|
||||
mentioned_surfsense_docs=mentioned_surfsense_docs,
|
||||
recent_reports=recent_reports,
|
||||
)
|
||||
|
||||
|
|
@ -154,7 +131,6 @@ async def build_new_chat_input_state(
|
|||
return NewChatInputState(
|
||||
input_state=input_state,
|
||||
accepted_folder_ids=accepted_folder_ids,
|
||||
mentioned_surfsense_docs=mentioned_surfsense_docs,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -165,7 +141,6 @@ async def _resolve_mentions_for_query(
|
|||
user_query: str,
|
||||
filesystem_mode: str,
|
||||
mentioned_document_ids: list[int] | None,
|
||||
mentioned_surfsense_doc_ids: list[int] | None,
|
||||
mentioned_folder_ids: list[int] | None,
|
||||
mentioned_documents: list[dict[str, Any]] | None,
|
||||
) -> tuple[str, list[int]]:
|
||||
|
|
@ -187,10 +162,7 @@ async def _resolve_mentions_for_query(
|
|||
accepted_folder_ids: list[int] = []
|
||||
|
||||
has_any_mention = bool(
|
||||
mentioned_document_ids
|
||||
or mentioned_surfsense_doc_ids
|
||||
or mentioned_folder_ids
|
||||
or mentioned_documents
|
||||
mentioned_document_ids or mentioned_folder_ids or mentioned_documents
|
||||
)
|
||||
if filesystem_mode != FilesystemMode.CLOUD.value or not has_any_mention:
|
||||
return agent_user_query, accepted_folder_ids
|
||||
|
|
@ -214,7 +186,6 @@ async def _resolve_mentions_for_query(
|
|||
search_space_id=search_space_id,
|
||||
mentioned_documents=chip_objs,
|
||||
mentioned_document_ids=mentioned_document_ids,
|
||||
mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
|
||||
mentioned_folder_ids=mentioned_folder_ids,
|
||||
)
|
||||
agent_user_query = substitute_in_text(user_query, resolved.token_to_path)
|
||||
|
|
@ -225,17 +196,11 @@ async def _resolve_mentions_for_query(
|
|||
def _render_query_with_context(
|
||||
*,
|
||||
agent_user_query: str,
|
||||
mentioned_surfsense_docs: list[SurfsenseDocsDocument],
|
||||
recent_reports: list[Report],
|
||||
) -> str:
|
||||
"""Prepend surfsense-docs + recent-reports XML blocks to the user query."""
|
||||
"""Prepend recent-reports XML block to the user query."""
|
||||
context_parts: list[str] = []
|
||||
|
||||
if mentioned_surfsense_docs:
|
||||
context_parts.append(
|
||||
format_mentioned_surfsense_docs_as_context(mentioned_surfsense_docs)
|
||||
)
|
||||
|
||||
if recent_reports:
|
||||
report_lines: list[str] = []
|
||||
for r in recent_reports:
|
||||
|
|
|
|||
|
|
@ -123,7 +123,6 @@ async def stream_new_chat(
|
|||
user_id: str | None = None,
|
||||
llm_config_id: int = -1,
|
||||
mentioned_document_ids: list[int] | None = None,
|
||||
mentioned_surfsense_doc_ids: list[int] | None = None,
|
||||
mentioned_folder_ids: list[int] | None = None,
|
||||
mentioned_documents: list[dict[str, Any]] | None = None,
|
||||
checkpoint_id: str | None = None,
|
||||
|
|
@ -435,7 +434,6 @@ async def stream_new_chat(
|
|||
user_query=user_query,
|
||||
user_image_data_urls=user_image_data_urls,
|
||||
mentioned_document_ids=mentioned_document_ids,
|
||||
mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
|
||||
mentioned_folder_ids=mentioned_folder_ids,
|
||||
mentioned_documents=mentioned_documents,
|
||||
needs_history_bootstrap=needs_history_bootstrap,
|
||||
|
|
@ -447,7 +445,6 @@ async def stream_new_chat(
|
|||
)
|
||||
input_state = assembled.input_state
|
||||
accepted_folder_ids = assembled.accepted_folder_ids
|
||||
mentioned_surfsense_docs = assembled.mentioned_surfsense_docs
|
||||
_perf_log.info(
|
||||
"[stream_new_chat] History bootstrap + doc/report queries in %.3fs",
|
||||
time.perf_counter() - _t0,
|
||||
|
|
@ -560,7 +557,6 @@ async def stream_new_chat(
|
|||
initial_step = build_initial_thinking_step(
|
||||
user_query=user_query,
|
||||
user_image_data_urls=user_image_data_urls,
|
||||
mentioned_surfsense_docs=mentioned_surfsense_docs,
|
||||
)
|
||||
for sse in iter_initial_thinking_step_frame(
|
||||
initial_step,
|
||||
|
|
@ -575,7 +571,7 @@ async def stream_new_chat(
|
|||
# Drop the heavy ORM objects + the container that holds them so they
|
||||
# aren't retained for the entire streaming duration. ``input_state``
|
||||
# already carries the langchain_messages list independently.
|
||||
del assembled, mentioned_surfsense_docs
|
||||
del assembled
|
||||
|
||||
title_task = spawn_title_task(
|
||||
chat_id=chat_id,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue