From 40ca9e6ed2d17b7a112fb9424c31fbedc9ae1e28 Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Thu, 28 May 2026 22:35:14 -0700 Subject: [PATCH] refactor: remove `search_surfsense_docs` tool and related references - Deleted the `search_surfsense_docs` tool and its associated files, streamlining the agent's toolset. - Updated various components and prompts to remove references to the now-removed tool, ensuring consistency across the codebase. - Adjusted documentation to direct users to the SurfSense documentation link for product-related queries instead. --- .../146_drop_surfsense_docs_tables.py | 129 +++++++++ .../system_prompt/prompts/citations/on.md | 4 +- .../prompts/dynamic_context/private.md | 4 +- .../prompts/dynamic_context/team.md | 4 +- .../system_prompt/prompts/kb_first.md | 8 +- .../prompts/providers/anthropic.md | 2 +- .../prompts/providers/deepseek.md | 2 +- .../system_prompt/prompts/providers/google.md | 2 +- .../prompts/providers/openai_classic.md | 5 +- .../system_prompt/prompts/routing.md | 9 +- .../tools/search_surfsense_docs/__init__.py | 1 - .../search_surfsense_docs/description.md | 10 - .../tools/search_surfsense_docs/example.md | 15 -- .../main_agent/tools/index.py | 1 - .../builtins/research/system_prompt.md | 1 - .../builtins/research/tools/__init__.py | 4 +- .../builtins/research/tools/index.py | 2 - .../research/tools/search_surfsense_docs.py | 145 ---------- .../app/agents/new_chat/feature_flags.py | 2 +- .../app/agents/new_chat/mention_resolver.py | 10 +- .../new_chat/prompts/base/citations_on.md | 3 +- .../prompts/base/kb_only_policy_private.md | 2 +- .../prompts/base/kb_only_policy_team.md | 2 +- .../prompts/base/tool_routing_private.md | 1 + .../prompts/base/tool_routing_team.md | 1 + .../app/agents/new_chat/prompts/composer.py | 1 - .../prompts/examples/search_surfsense_docs.md | 9 - .../prompts/tools/search_surfsense_docs.md | 7 - .../skills/builtin/email-drafting/SKILL.md | 1 - .../skills/builtin/kb-research/SKILL.md | 2 +- .../skills/builtin/meeting-prep/SKILL.md | 2 +- .../skills/builtin/report-writing/SKILL.md | 2 +- .../skills/builtin/slack-summary/SKILL.md | 1 - .../app/agents/new_chat/subagents/config.py | 5 +- .../app/agents/new_chat/tools/__init__.py | 3 - .../app/agents/new_chat/tools/registry.py | 10 - .../new_chat/tools/search_surfsense_docs.py | 174 ------------ surfsense_backend/app/app.py | 8 - surfsense_backend/app/db.py | 45 ---- surfsense_backend/app/routes/__init__.py | 2 - .../app/routes/new_chat_routes.py | 2 - .../app/routes/surfsense_docs_routes.py | 172 ------------ surfsense_backend/app/schemas/new_chat.py | 4 - .../app/schemas/surfsense_docs.py | 43 --- .../app/tasks/chat/stream_new_chat.py | 112 +------- .../tasks/chat/streaming/context/__init__.py | 6 +- .../chat/streaming/context/mentioned_docs.py | 58 ---- .../flows/new_chat/initial_thinking_step.py | 26 +- .../streaming/flows/new_chat/input_state.py | 51 +--- .../streaming/flows/new_chat/orchestrator.py | 6 +- .../app/tasks/surfsense_docs_indexer.py | 249 ------------------ surfsense_backend/app/utils/surfsense_docs.py | 13 - .../scripts/seed_surfsense_docs.py | 40 --- .../test_default_permissions_layering.py | 1 - .../new_chat/test_specialized_subagents.py | 14 +- .../test_parallel_refactor_parity.py | 35 +-- .../components/builder/mention-task-input.tsx | 9 +- .../new-chat/[[...chat_id]]/page.tsx | 36 +-- .../atoms/chat/mentioned-documents.atom.ts | 5 +- .../assistant-ui/inline-citation.tsx | 159 +---------- .../components/assistant-ui/thread.tsx | 2 +- .../layout/ui/sidebar/DocumentsSidebar.tsx | 1 - .../new-chat/document-mention-picker.tsx | 113 ++------ .../contracts/enums/connectorIcons.tsx | 3 - surfsense_web/contracts/enums/toolIcons.tsx | 3 - .../contracts/types/document.types.ts | 55 ---- .../lib/apis/documents-api.service.ts | 46 ---- surfsense_web/lib/chat/thread-persistence.ts | 1 - .../lib/documents/document-type-labels.ts | 1 - surfsense_web/lib/query-client/cache-keys.ts | 1 - surfsense_web/tsc_out.txt | Bin 0 -> 32582 bytes 71 files changed, 232 insertions(+), 1676 deletions(-) create mode 100644 surfsense_backend/alembic/versions/146_drop_surfsense_docs_tables.py delete mode 100644 surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_surfsense_docs/__init__.py delete mode 100644 surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_surfsense_docs/description.md delete mode 100644 surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_surfsense_docs/example.md delete mode 100644 surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/search_surfsense_docs.py delete mode 100644 surfsense_backend/app/agents/new_chat/prompts/examples/search_surfsense_docs.md delete mode 100644 surfsense_backend/app/agents/new_chat/prompts/tools/search_surfsense_docs.md delete mode 100644 surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py delete mode 100644 surfsense_backend/app/routes/surfsense_docs_routes.py delete mode 100644 surfsense_backend/app/schemas/surfsense_docs.py delete mode 100644 surfsense_backend/app/tasks/chat/streaming/context/mentioned_docs.py delete mode 100644 surfsense_backend/app/tasks/surfsense_docs_indexer.py delete mode 100644 surfsense_backend/app/utils/surfsense_docs.py delete mode 100644 surfsense_backend/scripts/seed_surfsense_docs.py create mode 100644 surfsense_web/tsc_out.txt diff --git a/surfsense_backend/alembic/versions/146_drop_surfsense_docs_tables.py b/surfsense_backend/alembic/versions/146_drop_surfsense_docs_tables.py new file mode 100644 index 000000000..725405834 --- /dev/null +++ b/surfsense_backend/alembic/versions/146_drop_surfsense_docs_tables.py @@ -0,0 +1,129 @@ +"""Drop Surfsense docs tables (feature removed end to end) + +Revision ID: 146 +Revises: 145 +Create Date: 2026-05-28 + +Removes the SurfSense product-documentation feature: the +``surfsense_docs_documents`` and ``surfsense_docs_chunks`` tables (created +in revision 60) and the GIN trigram index on the title column (added in +revision 67). The docs were seeded at startup from local MDX files, so no +user data is lost. Downgrade recreates the tables and indexes. +""" + +from collections.abc import Sequence + +from alembic import op +from app.config import config + +# revision identifiers, used by Alembic. +revision: str = "146" +down_revision: str | None = "145" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + +# Embedding dimension is required to recreate the vector columns on downgrade. +EMBEDDING_DIM = config.embedding_model_instance.dimension + + +def upgrade() -> None: + """Drop surfsense docs tables and all their indexes.""" + # Trigram index from revision 67 + op.execute("DROP INDEX IF EXISTS idx_surfsense_docs_title_trgm") + + # Full-text search indexes + op.execute("DROP INDEX IF EXISTS surfsense_docs_chunks_search_index") + op.execute("DROP INDEX IF EXISTS surfsense_docs_documents_search_index") + + # Vector indexes + op.execute("DROP INDEX IF EXISTS surfsense_docs_chunks_vector_index") + op.execute("DROP INDEX IF EXISTS surfsense_docs_documents_vector_index") + + # B-tree indexes + op.execute("DROP INDEX IF EXISTS ix_surfsense_docs_chunks_document_id") + op.execute("DROP INDEX IF EXISTS ix_surfsense_docs_documents_updated_at") + op.execute("DROP INDEX IF EXISTS ix_surfsense_docs_documents_content_hash") + op.execute("DROP INDEX IF EXISTS ix_surfsense_docs_documents_source") + + # Tables (chunks first due to FK) + op.execute("DROP TABLE IF EXISTS surfsense_docs_chunks") + op.execute("DROP TABLE IF EXISTS surfsense_docs_documents") + + +def downgrade() -> None: + """Recreate surfsense docs tables and indexes (reverses revisions 60 + 67).""" + op.execute( + f""" + CREATE TABLE IF NOT EXISTS surfsense_docs_documents ( + id SERIAL PRIMARY KEY, + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), + source VARCHAR NOT NULL UNIQUE, + title VARCHAR NOT NULL, + content TEXT NOT NULL, + content_hash VARCHAR NOT NULL, + embedding vector({EMBEDDING_DIM}), + updated_at TIMESTAMP WITH TIME ZONE + ); + """ + ) + op.execute( + f""" + CREATE TABLE IF NOT EXISTS surfsense_docs_chunks ( + id SERIAL PRIMARY KEY, + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), + content TEXT NOT NULL, + embedding vector({EMBEDDING_DIM}), + document_id INTEGER NOT NULL REFERENCES surfsense_docs_documents(id) ON DELETE CASCADE + ); + """ + ) + + # B-tree indexes + op.execute( + "CREATE INDEX IF NOT EXISTS ix_surfsense_docs_documents_source ON surfsense_docs_documents(source)" + ) + op.execute( + "CREATE INDEX IF NOT EXISTS ix_surfsense_docs_documents_content_hash ON surfsense_docs_documents(content_hash)" + ) + op.execute( + "CREATE INDEX IF NOT EXISTS ix_surfsense_docs_documents_updated_at ON surfsense_docs_documents(updated_at)" + ) + op.execute( + "CREATE INDEX IF NOT EXISTS ix_surfsense_docs_chunks_document_id ON surfsense_docs_chunks(document_id)" + ) + + # Vector indexes + op.execute( + """ + CREATE INDEX IF NOT EXISTS surfsense_docs_documents_vector_index + ON surfsense_docs_documents USING hnsw (embedding public.vector_cosine_ops); + """ + ) + op.execute( + """ + CREATE INDEX IF NOT EXISTS surfsense_docs_chunks_vector_index + ON surfsense_docs_chunks USING hnsw (embedding public.vector_cosine_ops); + """ + ) + + # Full-text search indexes + op.execute( + """ + CREATE INDEX IF NOT EXISTS surfsense_docs_documents_search_index + ON surfsense_docs_documents USING gin (to_tsvector('english', content)); + """ + ) + op.execute( + """ + CREATE INDEX IF NOT EXISTS surfsense_docs_chunks_search_index + ON surfsense_docs_chunks USING gin (to_tsvector('english', content)); + """ + ) + + # Trigram index from revision 67 + op.execute( + """ + CREATE INDEX IF NOT EXISTS idx_surfsense_docs_title_trgm + ON surfsense_docs_documents USING gin (title gin_trgm_ops); + """ + ) diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md index e61a0bffb..2abd95d5a 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md @@ -4,8 +4,8 @@ never invent ids you didn't see. Citation ids are resolved by exact-match lookup; a wrong id silently breaks the link, so when in doubt, omit. ### Channel A — chunk blocks injected this turn -When `search_surfsense_docs` or `web_search` returns `` / -`` blocks in this turn: +When `web_search` returns `` / `` blocks in this +turn: 1. For each factual statement taken from those chunks, add `[citation:chunk_id]` using the **exact** id from a visible diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/private.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/private.md index 71c86be40..8f2bfca4e 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/private.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/private.md @@ -20,8 +20,8 @@ it to resolve paths the user describes in natural language ("my Q2 roadmap", delegating to a specialist. `` and `` blocks are chunked indexed content returned -by KB search (from `search_surfsense_docs`, or backing ``). -Each chunk carries a stable `id` attribute. +by KB search (backing ``). Each chunk carries a stable +`id` attribute. If a block doesn't appear this turn, work from the conversation alone. diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/team.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/team.md index 592c2ed9c..a5892c23a 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/team.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/team.md @@ -20,8 +20,8 @@ week's planning notes") into concrete document references before delegating to a specialist. `` and `` blocks are chunked indexed content returned -by KB search (from `search_surfsense_docs`, or backing ``). -Each chunk carries a stable `id` attribute. +by KB search (backing ``). Each chunk carries a stable +`id` attribute. If a block doesn't appear this turn, work from the conversation alone. diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/kb_first.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/kb_first.md index f06a52c1d..80fa4bf8f 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/kb_first.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/kb_first.md @@ -1,19 +1,21 @@ CRITICAL — ground factual answers in what you actually receive this turn: - injected workspace context (see ``), -- results from your own tool calls (`search_surfsense_docs`, `web_search`, - `scrape_webpage`), +- results from your own tool calls (`web_search`, `scrape_webpage`), - or substantive summaries returned by a `task` specialist you invoked. Do **not** answer factual or informational questions from general knowledge unless the user explicitly authorises it after you say you couldn't find enough in those sources. The flow when nothing is found: -1. Say you couldn't find enough in their workspace, docs, or tool output. +1. Say you couldn't find enough in their workspace or tool output. 2. Ask: *"Would you like me to answer from my general knowledge instead?"* 3. Only answer from general knowledge after a clear yes. This rule does NOT apply to: casual conversation · meta-questions about SurfSense ("what can you do?") · formatting or analysis of content already in chat · clear rewrite/edit instructions · lightweight web research. + +For "how do I use SurfSense" / product-documentation questions, point the +user to https://www.surfsense.com/docs. diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/anthropic.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/anthropic.md index 89154c443..d852f5955 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/anthropic.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/anthropic.md @@ -5,7 +5,7 @@ Structured reasoning: - For non-trivial work, `` / short `` before tool calls is fine. Professional objectivity: -- Accuracy over flattery; verify with **search_surfsense_docs**, **web_search**, **scrape_webpage**, or **task** when unsure — don’t invent connector access. +- Accuracy over flattery; verify with **web_search**, **scrape_webpage**, or **task** when unsure — don’t invent connector access. Task management: - For 3+ steps, use todo tooling; update statuses promptly. diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/deepseek.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/deepseek.md index 4254e9ed5..01d56999f 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/deepseek.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/deepseek.md @@ -13,6 +13,6 @@ Attribution: Tool calls: - Parallelise independent calls. -- Prefer **search_surfsense_docs** for SurfSense docs/product questions before **web_search** when that fits the ask. +- For SurfSense docs/product questions, point the user to https://www.surfsense.com/docs. - Don’t invent paths, chunk ids, or URLs — only values from tools or the user. diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/google.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/google.md index dc5073538..32ed959c1 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/google.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/google.md @@ -7,7 +7,7 @@ Output style: - GitHub-flavoured Markdown; monospace-friendly. Workflow (Understand → Plan → Act → Verify): -1. **Understand:** parse the ask; use **search_surfsense_docs** / injected workspace context before guessing. +1. **Understand:** parse the ask; use injected workspace context before guessing. 2. **Plan:** for multi-step work, a short plan first. 3. **Act:** only with tools you actually have on this agent (see `` and ``). Connector work → **task**. 4. **Verify:** re-read or re-search only when it materially reduces risk. diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/openai_classic.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/openai_classic.md index 7ff3ec912..8596c42cd 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/openai_classic.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/openai_classic.md @@ -15,6 +15,7 @@ Output style: Tool calls: - Parallelise independent calls in one turn. -- Prefer **search_surfsense_docs** for SurfSense-product questions, **web_search** / **scrape_webpage** - for fresh public facts; integrations and heavy workflows → **task**. +- For SurfSense-product questions, point the user to https://www.surfsense.com/docs; + use **web_search** / **scrape_webpage** for fresh public facts; integrations and + heavy workflows → **task**. diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/routing.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/routing.md index 1038dde3d..28cf0ac63 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/routing.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/routing.md @@ -3,10 +3,7 @@ You have two execution channels. Pick the one that owns the work — never simulate one with the other. ### 1. Direct tools (you call them yourself) -- `search_surfsense_docs` — SurfSense product docs (setup, configuration, - connector docs, feature behavior). -- `web_search` — search the public web (anything outside SurfSense docs and - the workspace KB). +- `web_search` — search the public web (anything outside the workspace KB). - `scrape_webpage` — fetch the body of a specific public URL. - `update_memory` — curate persistent memory (see ``). - `write_todos` — maintain a structured plan when the turn series spans @@ -14,6 +11,10 @@ simulate one with the other. `in_progress` **before** the `task` call that handles it, `completed` once the call returns. Skip for single-step requests. +**Questions about how to use SurfSense itself** (setup, configuration, +connectors, feature behavior) — point the user to the documentation: +https://www.surfsense.com/docs. There is no docs-search tool; give the link. + **You have NO filesystem tools.** Any read, write, edit, move, rename, or search inside the user's workspace goes through `task(knowledge_base, …)` — never via `write_file`, `ls`, or any direct file operation. diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_surfsense_docs/__init__.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_surfsense_docs/__init__.py deleted file mode 100644 index c2cda318e..000000000 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_surfsense_docs/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""``search_surfsense_docs`` — description + few-shot examples.""" diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_surfsense_docs/description.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_surfsense_docs/description.md deleted file mode 100644 index 256d3f3a4..000000000 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_surfsense_docs/description.md +++ /dev/null @@ -1,10 +0,0 @@ -- `search_surfsense_docs` — Search official SurfSense documentation (product - help). - - Use when the user asks how SurfSense itself works — setup, configuration, - connector documentation, feature behavior, anything covered in the - product docs. - - Not a substitute for `task` when the user wants actions inside a - connected service (Gmail, Slack, Jira, Notion, etc.). - - Args: `query`, `top_k` (default 10). - - Returns doc excerpts; chunk ids may appear for attribution — see - `` for the contract. diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_surfsense_docs/example.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_surfsense_docs/example.md deleted file mode 100644 index d53ad8c91..000000000 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_surfsense_docs/example.md +++ /dev/null @@ -1,15 +0,0 @@ - -user: "How do I install SurfSense?" -→ search_surfsense_docs(query="installation setup") - - - -user: "What connectors does SurfSense support?" -→ search_surfsense_docs(query="available connectors integrations") - - - -user: "How do I set up the Notion connector?" -→ search_surfsense_docs(query="Notion connector setup configuration") -(Changing data inside Notion itself → `task(notion, …)`, not this tool.) - diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/index.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/index.py index 88509eda7..70fb42c0d 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/index.py +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/index.py @@ -6,7 +6,6 @@ Connector integrations, MCP, deliverables, etc. are delegated via ``task`` subag from __future__ import annotations MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED: tuple[str, ...] = ( - "search_surfsense_docs", "web_search", "scrape_webpage", "update_memory", diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/system_prompt.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/system_prompt.md index 3eabd8ee0..1b9ccaefa 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/system_prompt.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/system_prompt.md @@ -8,7 +8,6 @@ Gather and synthesize evidence using SurfSense research tools with clear citatio - `web_search` - `scrape_webpage` -- `search_surfsense_docs` diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/__init__.py b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/__init__.py index 414cc96f4..7234942b6 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/__init__.py +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/__init__.py @@ -1,11 +1,9 @@ -"""Research-stage tools: web search, scrape, and in-product doc search.""" +"""Research-stage tools: web search and scrape.""" from .scrape_webpage import create_scrape_webpage_tool -from .search_surfsense_docs import create_search_surfsense_docs_tool from .web_search import create_web_search_tool __all__ = [ "create_scrape_webpage_tool", - "create_search_surfsense_docs_tool", "create_web_search_tool", ] diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/index.py b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/index.py index ea544a8da..d8abce46c 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/index.py +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/index.py @@ -9,7 +9,6 @@ from langchain_core.tools import BaseTool from app.agents.new_chat.permissions import Ruleset from .scrape_webpage import create_scrape_webpage_tool -from .search_surfsense_docs import create_search_surfsense_docs_tool from .web_search import create_web_search_tool NAME = "research" @@ -27,5 +26,4 @@ def load_tools( available_connectors=d.get("available_connectors"), ), create_scrape_webpage_tool(firecrawl_api_key=d.get("firecrawl_api_key")), - create_search_surfsense_docs_tool(db_session=d["db_session"]), ] diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/search_surfsense_docs.py b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/search_surfsense_docs.py deleted file mode 100644 index ccc5c49e2..000000000 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/search_surfsense_docs.py +++ /dev/null @@ -1,145 +0,0 @@ -"""Semantic search over pre-indexed in-app documentation chunks for user how-to questions.""" - -import asyncio -import json - -from langchain_core.tools import tool -from sqlalchemy import select -from sqlalchemy.ext.asyncio import AsyncSession - -from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument -from app.utils.document_converters import embed_text -from app.utils.surfsense_docs import surfsense_docs_public_url - - -def format_surfsense_docs_results(results: list[tuple]) -> str: - """Format (chunk, document) rows as XML with ``doc-`` chunk IDs for citations and UI routing.""" - if not results: - return "No relevant Surfsense documentation found for your query." - - # Group chunks by document - grouped: dict[int, dict] = {} - for chunk, doc in results: - public_url = surfsense_docs_public_url(doc.source) - if doc.id not in grouped: - grouped[doc.id] = { - "document_id": f"doc-{doc.id}", - "document_type": "SURFSENSE_DOCS", - "title": doc.title, - "url": public_url, - "metadata": {"source": doc.source, "public_url": public_url}, - "chunks": [], - } - grouped[doc.id]["chunks"].append( - { - "chunk_id": f"doc-{chunk.id}", - "content": chunk.content, - } - ) - - # Render XML matching format_documents_for_context structure - parts: list[str] = [] - for g in grouped.values(): - metadata_json = json.dumps(g["metadata"], ensure_ascii=False) - - parts.append("") - parts.append("") - parts.append(f" {g['document_id']}") - parts.append(f" {g['document_type']}") - parts.append(f" <![CDATA[{g['title']}]]>") - parts.append(f" ") - parts.append(f" ") - parts.append("") - parts.append("") - parts.append("") - - for ch in g["chunks"]: - parts.append( - f" " - ) - - parts.append("") - parts.append("") - parts.append("") - - return "\n".join(parts).strip() - - -async def search_surfsense_docs_async( - query: str, - db_session: AsyncSession, - top_k: int = 10, -) -> str: - """ - Search Surfsense documentation using vector similarity. - - Args: - query: The search query about Surfsense usage - db_session: Database session for executing queries - top_k: Number of results to return - - Returns: - Formatted string with relevant documentation content - """ - # Get embedding for the query - query_embedding = await asyncio.to_thread(embed_text, query) - - # Vector similarity search on chunks, joining with documents - stmt = ( - select(SurfsenseDocsChunk, SurfsenseDocsDocument) - .join( - SurfsenseDocsDocument, - SurfsenseDocsChunk.document_id == SurfsenseDocsDocument.id, - ) - .order_by(SurfsenseDocsChunk.embedding.op("<=>")(query_embedding)) - .limit(top_k) - ) - - result = await db_session.execute(stmt) - rows = result.all() - - return format_surfsense_docs_results(rows) - - -def create_search_surfsense_docs_tool(db_session: AsyncSession): - """ - Factory function to create the search_surfsense_docs tool. - - Args: - db_session: Database session for executing queries - - Returns: - A configured tool function for searching Surfsense documentation - """ - - @tool - async def search_surfsense_docs(query: str, top_k: int = 10) -> str: - """ - Search Surfsense documentation for help with using the application. - - Use this tool when the user asks questions about: - - How to use Surfsense features - - Installation and setup instructions - - Configuration options and settings - - Troubleshooting common issues - - Available connectors and integrations - - Browser extension usage - - API documentation - - This searches the official Surfsense documentation that was indexed - at deployment time. It does NOT search the user's personal knowledge base. - - Args: - query: The search query about Surfsense usage or features - top_k: Number of documentation chunks to retrieve (default: 10) - - Returns: - Relevant documentation content formatted with chunk IDs for citations - """ - return await search_surfsense_docs_async( - query=query, - db_session=db_session, - top_k=top_k, - ) - - return search_surfsense_docs diff --git a/surfsense_backend/app/agents/new_chat/feature_flags.py b/surfsense_backend/app/agents/new_chat/feature_flags.py index 3cea051ef..27188fac3 100644 --- a/surfsense_backend/app/agents/new_chat/feature_flags.py +++ b/surfsense_backend/app/agents/new_chat/feature_flags.py @@ -104,7 +104,7 @@ class AgentFeatureFlags: # ``tools/google_drive``, ``tools/dropbox``, ``tools/onedrive``, # ``tools/google_calendar``, ``tools/confluence``, ``tools/discord``, # ``tools/teams``, ``tools/luma``, ``connected_accounts``, - # ``update_memory``, ``search_surfsense_docs``) now acquire fresh + # ``update_memory``) now acquire fresh # short-lived ``AsyncSession`` instances per call via # :data:`async_session_maker`. The factory still accepts ``db_session`` # for registry compatibility but ``del``'s it immediately — see any diff --git a/surfsense_backend/app/agents/new_chat/mention_resolver.py b/surfsense_backend/app/agents/new_chat/mention_resolver.py index 6a025b947..f13dbc6ae 100644 --- a/surfsense_backend/app/agents/new_chat/mention_resolver.py +++ b/surfsense_backend/app/agents/new_chat/mention_resolver.py @@ -73,9 +73,8 @@ class ResolvedMentionSet: ``@Project Roadmap`` is never shadowed by a shorter prefix ``@Project``). - ``mentioned_document_ids`` collapses doc + surfsense_doc chips into - a single ordered, deduped list because the priority middleware - treats them uniformly downstream — see + ``mentioned_document_ids`` is an ordered, deduped list consumed by + the priority middleware downstream — see ``KnowledgePriorityMiddleware._compute_priority_paths``. """ @@ -103,7 +102,6 @@ async def resolve_mentions( search_space_id: int, mentioned_documents: list[MentionedDocumentInfo] | None, mentioned_document_ids: list[int] | None = None, - mentioned_surfsense_doc_ids: list[int] | None = None, mentioned_folder_ids: list[int] | None = None, ) -> ResolvedMentionSet: """Resolve every @-mention chip on a turn into virtual paths. @@ -111,8 +109,7 @@ async def resolve_mentions( The function takes both the ``mentioned_documents`` discriminated list (chip metadata used for substitution + persistence) and the parallel id arrays (``mentioned_document_ids``, - ``mentioned_surfsense_doc_ids``, ``mentioned_folder_ids``) for two - reasons: + ``mentioned_folder_ids``) for two reasons: * Legacy clients that haven't migrated to the unified chip list still send the id arrays — we treat the union as authoritative. @@ -142,7 +139,6 @@ async def resolve_mentions( dict.fromkeys( [ *(mentioned_document_ids or []), - *(mentioned_surfsense_doc_ids or []), *chip_doc_ids, ] ) diff --git a/surfsense_backend/app/agents/new_chat/prompts/base/citations_on.md b/surfsense_backend/app/agents/new_chat/prompts/base/citations_on.md index 56291bf3e..3562ce66e 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/base/citations_on.md +++ b/surfsense_backend/app/agents/new_chat/prompts/base/citations_on.md @@ -59,14 +59,13 @@ Do NOT cite document_id. Always use the chunk id. - NEVER create your own citation format - use the exact chunk_id values from the documents in the [citation:chunk_id] format - NEVER format citations as clickable links or as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only - NEVER make up chunk IDs if you are unsure about the chunk_id. It is better to omit the citation than to guess -- Copy the EXACT chunk id from the XML - if it says ``, use [citation:doc-123] +- Copy the EXACT chunk id from the XML - if it says ``, use [citation:5] - If the chunk id is a URL like ``, use [citation:https://example.com/page] CORRECT citation formats: - [citation:5] (numeric chunk ID from knowledge base) -- [citation:doc-123] (for Surfsense documentation chunks) - [citation:https://example.com/article] (URL chunk ID from web search results) - [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3] (multiple citations) diff --git a/surfsense_backend/app/agents/new_chat/prompts/base/kb_only_policy_private.md b/surfsense_backend/app/agents/new_chat/prompts/base/kb_only_policy_private.md index 9cc767e7e..073b75fa5 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/base/kb_only_policy_private.md +++ b/surfsense_backend/app/agents/new_chat/prompts/base/kb_only_policy_private.md @@ -7,7 +7,7 @@ CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE: 2. Ask the user: "Would you like me to answer from my general knowledge instead?" 3. ONLY provide a general-knowledge answer AFTER the user explicitly says yes. - This policy does NOT apply to: - * Casual conversation, greetings, or meta-questions about SurfSense itself (e.g., "what can you do?") + * Casual conversation, greetings, or meta-questions about SurfSense itself (e.g., "what can you do?"). For "how do I use SurfSense" / product-documentation questions, point the user to https://www.surfsense.com/docs. * Formatting, summarization, or analysis of content already present in the conversation * Following user instructions that are clearly task-oriented (e.g., "rewrite this in bullet points") * Tool-usage actions like generating reports, podcasts, images, or scraping webpages diff --git a/surfsense_backend/app/agents/new_chat/prompts/base/kb_only_policy_team.md b/surfsense_backend/app/agents/new_chat/prompts/base/kb_only_policy_team.md index 1d806dbae..1a43ed490 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/base/kb_only_policy_team.md +++ b/surfsense_backend/app/agents/new_chat/prompts/base/kb_only_policy_team.md @@ -7,7 +7,7 @@ CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE: 2. Ask: "Would you like me to answer from my general knowledge instead?" 3. ONLY provide a general-knowledge answer AFTER a team member explicitly says yes. - This policy does NOT apply to: - * Casual conversation, greetings, or meta-questions about SurfSense itself (e.g., "what can you do?") + * Casual conversation, greetings, or meta-questions about SurfSense itself (e.g., "what can you do?"). For "how do I use SurfSense" / product-documentation questions, point the user to https://www.surfsense.com/docs. * Formatting, summarization, or analysis of content already present in the conversation * Following user instructions that are clearly task-oriented (e.g., "rewrite this in bullet points") * Tool-usage actions like generating reports, podcasts, images, or scraping webpages diff --git a/surfsense_backend/app/agents/new_chat/prompts/base/tool_routing_private.md b/surfsense_backend/app/agents/new_chat/prompts/base/tool_routing_private.md index b8bb069e2..9121de879 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/base/tool_routing_private.md +++ b/surfsense_backend/app/agents/new_chat/prompts/base/tool_routing_private.md @@ -13,6 +13,7 @@ When to use which tool: - Knowledge base content (Notion, GitHub, files, notes) → automatically searched - Real-time public web data → call web_search - Reading a specific webpage → call scrape_webpage +- SurfSense product / how-to questions (setup, configuration, connectors, feature behavior) → point the user to the documentation: https://www.surfsense.com/docs **`task` subagents (when to delegate):** - **`linear_specialist`** — Linear-only investigations and tool use. diff --git a/surfsense_backend/app/agents/new_chat/prompts/base/tool_routing_team.md b/surfsense_backend/app/agents/new_chat/prompts/base/tool_routing_team.md index b081a2123..c5383be77 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/base/tool_routing_team.md +++ b/surfsense_backend/app/agents/new_chat/prompts/base/tool_routing_team.md @@ -13,6 +13,7 @@ When to use which tool: - Knowledge base content (Notion, GitHub, files, notes) → automatically searched - Real-time public web data → call web_search - Reading a specific webpage → call scrape_webpage +- SurfSense product / how-to questions (setup, configuration, connectors, feature behavior) → point the user to the documentation: https://www.surfsense.com/docs **`task` subagents (when to delegate):** - **`linear_specialist`** — Linear-only investigations and tool use. diff --git a/surfsense_backend/app/agents/new_chat/prompts/composer.py b/surfsense_backend/app/agents/new_chat/prompts/composer.py index 42f8303e6..412665813 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/composer.py +++ b/surfsense_backend/app/agents/new_chat/prompts/composer.py @@ -151,7 +151,6 @@ def _read_fragment(subpath: str) -> str: # Ordered for reading flow: fundamentals first, then artifact generators, # then memory at the end (mirrors the legacy ``_ALL_TOOL_NAMES_ORDERED``). ALL_TOOL_NAMES_ORDERED: tuple[str, ...] = ( - "search_surfsense_docs", "web_search", "generate_podcast", "generate_video_presentation", diff --git a/surfsense_backend/app/agents/new_chat/prompts/examples/search_surfsense_docs.md b/surfsense_backend/app/agents/new_chat/prompts/examples/search_surfsense_docs.md deleted file mode 100644 index b90f2b7a7..000000000 --- a/surfsense_backend/app/agents/new_chat/prompts/examples/search_surfsense_docs.md +++ /dev/null @@ -1,9 +0,0 @@ - -- User: "How do I install SurfSense?" - - Call: `search_surfsense_docs(query="installation setup")` -- User: "What connectors does SurfSense support?" - - Call: `search_surfsense_docs(query="available connectors integrations")` -- User: "How do I set up the Notion connector?" - - Call: `search_surfsense_docs(query="Notion connector setup configuration")` -- User: "How do I use Docker to run SurfSense?" - - Call: `search_surfsense_docs(query="Docker installation setup")` diff --git a/surfsense_backend/app/agents/new_chat/prompts/tools/search_surfsense_docs.md b/surfsense_backend/app/agents/new_chat/prompts/tools/search_surfsense_docs.md deleted file mode 100644 index 133717fec..000000000 --- a/surfsense_backend/app/agents/new_chat/prompts/tools/search_surfsense_docs.md +++ /dev/null @@ -1,7 +0,0 @@ - -- search_surfsense_docs: Search the official SurfSense documentation. - - Use this tool when the user asks anything about SurfSense itself (the application they are using). - - Args: - - query: The search query about SurfSense - - top_k: Number of documentation chunks to retrieve (default: 10) - - Returns: Documentation content with chunk IDs for citations (prefixed with 'doc-', e.g., [citation:doc-123]) diff --git a/surfsense_backend/app/agents/new_chat/skills/builtin/email-drafting/SKILL.md b/surfsense_backend/app/agents/new_chat/skills/builtin/email-drafting/SKILL.md index 32e599e98..2dbc8ec43 100644 --- a/surfsense_backend/app/agents/new_chat/skills/builtin/email-drafting/SKILL.md +++ b/surfsense_backend/app/agents/new_chat/skills/builtin/email-drafting/SKILL.md @@ -1,7 +1,6 @@ --- name: email-drafting description: Draft an email matching the user's voice, with structured intent and CTA -allowed-tools: search_surfsense_docs --- # Email drafting diff --git a/surfsense_backend/app/agents/new_chat/skills/builtin/kb-research/SKILL.md b/surfsense_backend/app/agents/new_chat/skills/builtin/kb-research/SKILL.md index c268278ab..0f0b5ffbb 100644 --- a/surfsense_backend/app/agents/new_chat/skills/builtin/kb-research/SKILL.md +++ b/surfsense_backend/app/agents/new_chat/skills/builtin/kb-research/SKILL.md @@ -1,7 +1,7 @@ --- name: kb-research description: Structured approach to finding and synthesizing information from the user's knowledge base -allowed-tools: search_surfsense_docs, scrape_webpage, read_file, ls_tree, grep, web_search +allowed-tools: scrape_webpage, read_file, ls_tree, grep, web_search --- # Knowledge-base research diff --git a/surfsense_backend/app/agents/new_chat/skills/builtin/meeting-prep/SKILL.md b/surfsense_backend/app/agents/new_chat/skills/builtin/meeting-prep/SKILL.md index 9657eb078..5a375fbde 100644 --- a/surfsense_backend/app/agents/new_chat/skills/builtin/meeting-prep/SKILL.md +++ b/surfsense_backend/app/agents/new_chat/skills/builtin/meeting-prep/SKILL.md @@ -1,7 +1,7 @@ --- name: meeting-prep description: Pull together briefing materials before a scheduled meeting -allowed-tools: search_surfsense_docs, web_search, scrape_webpage, read_file +allowed-tools: web_search, scrape_webpage, read_file --- # Meeting preparation diff --git a/surfsense_backend/app/agents/new_chat/skills/builtin/report-writing/SKILL.md b/surfsense_backend/app/agents/new_chat/skills/builtin/report-writing/SKILL.md index 17ac2f391..cfea9593f 100644 --- a/surfsense_backend/app/agents/new_chat/skills/builtin/report-writing/SKILL.md +++ b/surfsense_backend/app/agents/new_chat/skills/builtin/report-writing/SKILL.md @@ -1,7 +1,7 @@ --- name: report-writing description: How to scope, draft, and revise a Markdown report artifact via generate_report -allowed-tools: generate_report, search_surfsense_docs, read_file +allowed-tools: generate_report, read_file --- # Report writing diff --git a/surfsense_backend/app/agents/new_chat/skills/builtin/slack-summary/SKILL.md b/surfsense_backend/app/agents/new_chat/skills/builtin/slack-summary/SKILL.md index 33b9e72a2..1a4c3da9f 100644 --- a/surfsense_backend/app/agents/new_chat/skills/builtin/slack-summary/SKILL.md +++ b/surfsense_backend/app/agents/new_chat/skills/builtin/slack-summary/SKILL.md @@ -1,7 +1,6 @@ --- name: slack-summary description: Distill a Slack channel or thread into actionable summary -allowed-tools: search_surfsense_docs --- # Slack summarization diff --git a/surfsense_backend/app/agents/new_chat/subagents/config.py b/surfsense_backend/app/agents/new_chat/subagents/config.py index b993d2b06..2cfd47441 100644 --- a/surfsense_backend/app/agents/new_chat/subagents/config.py +++ b/surfsense_backend/app/agents/new_chat/subagents/config.py @@ -46,7 +46,6 @@ logger = logging.getLogger(__name__) # ``glob``, ``grep``) plus the SurfSense-side read tools. EXPLORE_READ_TOOLS: frozenset[str] = frozenset( { - "search_surfsense_docs", "web_search", "scrape_webpage", "read_file", @@ -61,7 +60,6 @@ EXPLORE_READ_TOOLS: frozenset[str] = frozenset( # is needed, the parent should hand off to ``explore`` first. REPORT_WRITER_TOOLS: frozenset[str] = frozenset( { - "search_surfsense_docs", "read_file", "generate_report", } @@ -222,7 +220,6 @@ EXPLORE_SYSTEM_PROMPT = """You are the **explore** subagent for SurfSense. Conduct read-only research across the user's knowledge base, the web, and any documents the parent agent has surfaced. Return a synthesized answer with explicit citations — never speculate beyond the sources you have actually inspected. ## Tools available -- `search_surfsense_docs` — fast hybrid search over the user's knowledge base. - `web_search` — only when the user's KB clearly does not contain the answer. - `scrape_webpage` — to read a URL the user or the search results provided. - `read_file`, `ls`, `glob`, `grep` — to inspect specific documents or trees the parent has flagged. @@ -242,7 +239,7 @@ Produce a single high-quality report deliverable using `generate_report`. The pa ## Workflow 1. **Outline first.** Before calling `generate_report`, write a one-paragraph outline of the sections you plan to produce. Confirm the outline reflects the parent's instructions. -2. **Source resolution.** Decide whether to call `search_surfsense_docs` and `read_file` for any final-checks, or whether the parent's earlier tool calls already cover the source set. +2. **Source resolution.** Decide whether to call `read_file` for any final-checks, or whether the parent's earlier tool calls already cover the source set. 3. **One report.** Call `generate_report` exactly once with `source_strategy` chosen per the topic and chat history (see the `report-writing` skill). 4. **Confirm.** End with a one-sentence summary in your final message — never paste the report back into chat; the artifact card renders itself. """ diff --git a/surfsense_backend/app/agents/new_chat/tools/__init__.py b/surfsense_backend/app/agents/new_chat/tools/__init__.py index bc444b0c0..4b5ae3706 100644 --- a/surfsense_backend/app/agents/new_chat/tools/__init__.py +++ b/surfsense_backend/app/agents/new_chat/tools/__init__.py @@ -5,7 +5,6 @@ This module contains all the tools available to the SurfSense agent. To add a new tool, see the documentation in registry.py. Available tools: -- search_surfsense_docs: Search Surfsense documentation for usage help - generate_podcast: Generate audio podcasts from content - generate_video_presentation: Generate video presentations with slides and narration - generate_image: Generate images from text descriptions using AI models @@ -31,7 +30,6 @@ from .registry import ( get_tool_by_name, ) from .scrape_webpage import create_scrape_webpage_tool -from .search_surfsense_docs import create_search_surfsense_docs_tool from .update_memory import create_update_memory_tool, create_update_team_memory_tool from .video_presentation import create_generate_video_presentation_tool @@ -47,7 +45,6 @@ __all__ = [ "create_generate_podcast_tool", "create_generate_video_presentation_tool", "create_scrape_webpage_tool", - "create_search_surfsense_docs_tool", "create_update_memory_tool", "create_update_team_memory_tool", "format_documents_for_context", diff --git a/surfsense_backend/app/agents/new_chat/tools/registry.py b/surfsense_backend/app/agents/new_chat/tools/registry.py index 8c263ca20..6f011e372 100644 --- a/surfsense_backend/app/agents/new_chat/tools/registry.py +++ b/surfsense_backend/app/agents/new_chat/tools/registry.py @@ -101,7 +101,6 @@ from .podcast import create_generate_podcast_tool from .report import create_generate_report_tool from .resume import create_generate_resume_tool from .scrape_webpage import create_scrape_webpage_tool -from .search_surfsense_docs import create_search_surfsense_docs_tool from .teams import ( create_list_teams_channels_tool, create_read_teams_messages_tool, @@ -258,15 +257,6 @@ BUILTIN_TOOLS: list[ToolDefinition] = [ ), requires=[], ), - # Surfsense documentation search tool - ToolDefinition( - name="search_surfsense_docs", - description="Search Surfsense documentation for help with using the application", - factory=lambda deps: create_search_surfsense_docs_tool( - db_session=deps["db_session"], - ), - requires=["db_session"], - ), # ========================================================================= # SERVICE ACCOUNT DISCOVERY # Generic tool for the LLM to discover connected accounts and resolve diff --git a/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py b/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py deleted file mode 100644 index d8a0efac7..000000000 --- a/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py +++ /dev/null @@ -1,174 +0,0 @@ -""" -Surfsense documentation search tool. - -This tool allows the agent to search the pre-indexed Surfsense documentation -to help users with questions about how to use the application. - -The documentation is indexed at deployment time from MDX files and stored -in dedicated tables (surfsense_docs_documents, surfsense_docs_chunks). -""" - -import asyncio -import json - -from langchain_core.tools import tool -from sqlalchemy import select -from sqlalchemy.ext.asyncio import AsyncSession - -from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument, async_session_maker -from app.utils.document_converters import embed_text -from app.utils.surfsense_docs import surfsense_docs_public_url - - -def format_surfsense_docs_results(results: list[tuple]) -> str: - """ - Format search results into XML structure for the LLM context. - - Uses the same XML structure as format_documents_for_context from knowledge_base.py - but with 'doc-' prefix on chunk IDs. This allows: - - LLM to use consistent [citation:doc-XXX] format - - Frontend to detect 'doc-' prefix and route to surfsense docs endpoint - - Args: - results: List of (chunk, document) tuples from the database query - - Returns: - Formatted XML string with documentation content and citation-ready chunks - """ - if not results: - return "No relevant Surfsense documentation found for your query." - - # Group chunks by document - grouped: dict[int, dict] = {} - for chunk, doc in results: - public_url = surfsense_docs_public_url(doc.source) - if doc.id not in grouped: - grouped[doc.id] = { - "document_id": f"doc-{doc.id}", - "document_type": "SURFSENSE_DOCS", - "title": doc.title, - "url": public_url, - "metadata": {"source": doc.source, "public_url": public_url}, - "chunks": [], - } - grouped[doc.id]["chunks"].append( - { - "chunk_id": f"doc-{chunk.id}", - "content": chunk.content, - } - ) - - # Render XML matching format_documents_for_context structure - parts: list[str] = [] - for g in grouped.values(): - metadata_json = json.dumps(g["metadata"], ensure_ascii=False) - - parts.append("") - parts.append("") - parts.append(f" {g['document_id']}") - parts.append(f" {g['document_type']}") - parts.append(f" <![CDATA[{g['title']}]]>") - parts.append(f" ") - parts.append(f" ") - parts.append("") - parts.append("") - parts.append("") - - for ch in g["chunks"]: - parts.append( - f" " - ) - - parts.append("") - parts.append("") - parts.append("") - - return "\n".join(parts).strip() - - -async def search_surfsense_docs_async( - query: str, - db_session: AsyncSession, - top_k: int = 10, -) -> str: - """ - Search Surfsense documentation using vector similarity. - - Args: - query: The search query about Surfsense usage - db_session: Database session for executing queries - top_k: Number of results to return - - Returns: - Formatted string with relevant documentation content - """ - # Get embedding for the query - query_embedding = await asyncio.to_thread(embed_text, query) - - # Vector similarity search on chunks, joining with documents - stmt = ( - select(SurfsenseDocsChunk, SurfsenseDocsDocument) - .join( - SurfsenseDocsDocument, - SurfsenseDocsChunk.document_id == SurfsenseDocsDocument.id, - ) - .order_by(SurfsenseDocsChunk.embedding.op("<=>")(query_embedding)) - .limit(top_k) - ) - - result = await db_session.execute(stmt) - rows = result.all() - - return format_surfsense_docs_results(rows) - - -def create_search_surfsense_docs_tool(db_session: AsyncSession): - """ - Factory function to create the search_surfsense_docs tool. - - The tool acquires its own short-lived ``AsyncSession`` per call via - :data:`async_session_maker` so the closure is safe to share across - HTTP requests by the compiled-agent cache. Capturing a per-request - session here would surface stale/closed sessions on cache hits. - - Args: - db_session: Reserved for registry compatibility. Per-call sessions - are opened via :data:`async_session_maker` inside the tool body. - - Returns: - A configured tool function for searching Surfsense documentation - """ - del db_session # per-call session — see docstring - - @tool - async def search_surfsense_docs(query: str, top_k: int = 10) -> str: - """ - Search Surfsense documentation for help with using the application. - - Use this tool when the user asks questions about: - - How to use Surfsense features - - Installation and setup instructions - - Configuration options and settings - - Troubleshooting common issues - - Available connectors and integrations - - Browser extension usage - - API documentation - - This searches the official Surfsense documentation that was indexed - at deployment time. It does NOT search the user's personal knowledge base. - - Args: - query: The search query about Surfsense usage or features - top_k: Number of documentation chunks to retrieve (default: 10) - - Returns: - Relevant documentation content formatted with chunk IDs for citations - """ - async with async_session_maker() as db_session: - return await search_surfsense_docs_async( - query=query, - db_session=db_session, - top_k=top_k, - ) - - return search_surfsense_docs diff --git a/surfsense_backend/app/app.py b/surfsense_backend/app/app.py index 43b0af7d2..223eb5a1b 100644 --- a/surfsense_backend/app/app.py +++ b/surfsense_backend/app/app.py @@ -43,7 +43,6 @@ from app.rate_limiter import get_real_client_ip, limiter from app.routes import router as crud_router from app.routes.auth_routes import router as auth_router from app.schemas import UserCreate, UserRead, UserUpdate -from app.tasks.surfsense_docs_indexer import seed_surfsense_docs from app.users import SECRET, auth_backend, current_active_user, fastapi_users from app.utils.perf import log_system_snapshot @@ -576,13 +575,6 @@ async def lifespan(app: FastAPI): initialize_llm_router() initialize_image_gen_router() initialize_vision_llm_router() - try: - await asyncio.wait_for(seed_surfsense_docs(), timeout=120) - except TimeoutError: - logging.getLogger(__name__).warning( - "Surfsense docs seeding timed out after 120s — skipping. " - "Docs will be indexed on the next restart." - ) # Phase 1.7 — JIT warmup. Bounded so a stuck warmup never delays # worker readiness. ``shield`` so Uvicorn cancelling startup diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index fe2e53268..d6ee9ff88 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -1150,46 +1150,6 @@ class Chunk(BaseModel, TimestampMixin): document = relationship("Document", back_populates="chunks") -class SurfsenseDocsDocument(BaseModel, TimestampMixin): - """ - Surfsense documentation storage. - Indexed at migration time from MDX files. - """ - - __tablename__ = "surfsense_docs_documents" - - source = Column( - String, nullable=False, unique=True, index=True - ) # File path: "connectors/slack.mdx" - title = Column(String, nullable=False) - content = Column(Text, nullable=False) - content_hash = Column(String, nullable=False, index=True) # For detecting changes - embedding = Column(Vector(config.embedding_model_instance.dimension)) - updated_at = Column(TIMESTAMP(timezone=True), nullable=True, index=True) - - chunks = relationship( - "SurfsenseDocsChunk", - back_populates="document", - cascade="all, delete-orphan", - ) - - -class SurfsenseDocsChunk(BaseModel, TimestampMixin): - """Chunk storage for Surfsense documentation.""" - - __tablename__ = "surfsense_docs_chunks" - - content = Column(Text, nullable=False) - embedding = Column(Vector(config.embedding_model_instance.dimension)) - - document_id = Column( - Integer, - ForeignKey("surfsense_docs_documents.id", ondelete="CASCADE"), - nullable=False, - ) - document = relationship("SurfsenseDocsDocument", back_populates="chunks") - - class Podcast(BaseModel, TimestampMixin): """Podcast model for storing generated podcasts.""" @@ -2680,11 +2640,6 @@ async def setup_indexes(): "CREATE INDEX IF NOT EXISTS idx_documents_search_space_updated ON documents (search_space_id, updated_at DESC NULLS LAST) INCLUDE (id, title, document_type)" ) ) - await conn.execute( - text( - "CREATE INDEX IF NOT EXISTS idx_surfsense_docs_title_trgm ON surfsense_docs_documents USING gin (title gin_trgm_ops)" - ) - ) async def create_db_and_tables(): diff --git a/surfsense_backend/app/routes/__init__.py b/surfsense_backend/app/routes/__init__.py index 48a095456..8373f13c3 100644 --- a/surfsense_backend/app/routes/__init__.py +++ b/surfsense_backend/app/routes/__init__.py @@ -55,7 +55,6 @@ from .search_source_connectors_routes import router as search_source_connectors_ from .search_spaces_routes import router as search_spaces_router from .slack_add_connector_route import router as slack_add_connector_router from .stripe_routes import router as stripe_router -from .surfsense_docs_routes import router as surfsense_docs_router from .team_memory_routes import router as team_memory_router from .teams_add_connector_route import router as teams_add_connector_router from .video_presentations_routes import router as video_presentations_router @@ -108,7 +107,6 @@ router.include_router(new_llm_config_router) # LLM configs with prompt configur router.include_router(model_list_router) # Dynamic model catalogue from OpenRouter router.include_router(logs_router) router.include_router(circleback_webhook_router) # Circleback meeting webhooks -router.include_router(surfsense_docs_router) # Surfsense documentation for citations router.include_router(notifications_router) # Notifications with Zero sync router.include_router( mcp_oauth_router diff --git a/surfsense_backend/app/routes/new_chat_routes.py b/surfsense_backend/app/routes/new_chat_routes.py index fb4d5a049..63b7732a9 100644 --- a/surfsense_backend/app/routes/new_chat_routes.py +++ b/surfsense_backend/app/routes/new_chat_routes.py @@ -1785,7 +1785,6 @@ async def handle_new_chat( user_id=str(user.id), llm_config_id=llm_config_id, mentioned_document_ids=request.mentioned_document_ids, - mentioned_surfsense_doc_ids=request.mentioned_surfsense_doc_ids, mentioned_folder_ids=request.mentioned_folder_ids, mentioned_connector_ids=request.mentioned_connector_ids, mentioned_connectors=mentioned_connectors_payload, @@ -2278,7 +2277,6 @@ async def regenerate_response( user_id=str(user.id), llm_config_id=llm_config_id, mentioned_document_ids=request.mentioned_document_ids, - mentioned_surfsense_doc_ids=request.mentioned_surfsense_doc_ids, mentioned_folder_ids=request.mentioned_folder_ids, mentioned_connector_ids=request.mentioned_connector_ids, mentioned_connectors=mentioned_connectors_payload, diff --git a/surfsense_backend/app/routes/surfsense_docs_routes.py b/surfsense_backend/app/routes/surfsense_docs_routes.py deleted file mode 100644 index 0d5428dec..000000000 --- a/surfsense_backend/app/routes/surfsense_docs_routes.py +++ /dev/null @@ -1,172 +0,0 @@ -""" -Routes for Surfsense documentation. - -These endpoints support the citation system for Surfsense docs, -allowing the frontend to fetch document details when a user clicks -on a [citation:doc-XXX] link. -""" - -from fastapi import APIRouter, Depends, HTTPException -from sqlalchemy import func, select -from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.orm import selectinload - -from app.db import ( - SurfsenseDocsChunk, - SurfsenseDocsDocument, - User, - get_async_session, -) -from app.schemas import PaginatedResponse -from app.schemas.surfsense_docs import ( - SurfsenseDocsChunkRead, - SurfsenseDocsDocumentRead, - SurfsenseDocsDocumentWithChunksRead, -) -from app.users import current_active_user -from app.utils.surfsense_docs import surfsense_docs_public_url - -router = APIRouter() - - -@router.get( - "/surfsense-docs/by-chunk/{chunk_id}", - response_model=SurfsenseDocsDocumentWithChunksRead, -) -async def get_surfsense_doc_by_chunk_id( - chunk_id: int, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """ - Retrieves a Surfsense documentation document based on a chunk ID. - - This endpoint is used by the frontend to resolve [citation:doc-XXX] links. - """ - try: - # Get the chunk - chunk_result = await session.execute( - select(SurfsenseDocsChunk).filter(SurfsenseDocsChunk.id == chunk_id) - ) - chunk = chunk_result.scalars().first() - - if not chunk: - raise HTTPException( - status_code=404, - detail=f"Surfsense docs chunk with id {chunk_id} not found", - ) - - # Get the associated document with all its chunks - document_result = await session.execute( - select(SurfsenseDocsDocument) - .options(selectinload(SurfsenseDocsDocument.chunks)) - .filter(SurfsenseDocsDocument.id == chunk.document_id) - ) - document = document_result.scalars().first() - - if not document: - raise HTTPException( - status_code=404, - detail="Surfsense docs document not found", - ) - - # Sort chunks by ID - sorted_chunks = sorted(document.chunks, key=lambda x: x.id) - - return SurfsenseDocsDocumentWithChunksRead( - id=document.id, - title=document.title, - source=document.source, - public_url=surfsense_docs_public_url(document.source), - content=document.content, - chunks=[ - SurfsenseDocsChunkRead(id=c.id, content=c.content) - for c in sorted_chunks - ], - ) - except HTTPException: - raise - except Exception as e: - raise HTTPException( - status_code=500, - detail=f"Failed to retrieve Surfsense documentation: {e!s}", - ) from e - - -@router.get( - "/surfsense-docs", - response_model=PaginatedResponse[SurfsenseDocsDocumentRead], -) -async def list_surfsense_docs( - page: int = 0, - page_size: int = 50, - title: str | None = None, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """ - List all Surfsense documentation documents. - - Args: - page: Zero-based page index. - page_size: Number of items per page (default: 50). - title: Optional title filter (case-insensitive substring match). - session: Database session (injected). - user: Current authenticated user (injected). - - Returns: - PaginatedResponse[SurfsenseDocsDocumentRead]: Paginated list of Surfsense docs. - """ - try: - # Base query - query = select(SurfsenseDocsDocument) - count_query = select(func.count()).select_from(SurfsenseDocsDocument) - - # Filter by title if provided - if title and title.strip(): - query = query.filter(SurfsenseDocsDocument.title.ilike(f"%{title}%")) - count_query = count_query.filter( - SurfsenseDocsDocument.title.ilike(f"%{title}%") - ) - - # Get total count - total_result = await session.execute(count_query) - total = total_result.scalar() or 0 - - # Calculate offset - offset = page * page_size - - # Get paginated results - result = await session.execute( - query.order_by(SurfsenseDocsDocument.title).offset(offset).limit(page_size) - ) - docs = result.scalars().all() - - # Convert to response format - items = [ - SurfsenseDocsDocumentRead( - id=doc.id, - title=doc.title, - source=doc.source, - public_url=surfsense_docs_public_url(doc.source), - content=doc.content, - created_at=doc.created_at, - updated_at=doc.updated_at, - ) - for doc in docs - ] - - has_more = (offset + len(items)) < total - - return PaginatedResponse( - items=items, - total=total, - page=page, - page_size=page_size, - has_more=has_more, - ) - except Exception as e: - raise HTTPException( - status_code=500, - detail=f"Failed to list Surfsense documentation: {e!s}", - ) from e diff --git a/surfsense_backend/app/schemas/new_chat.py b/surfsense_backend/app/schemas/new_chat.py index 8b49413c6..ab95f9b6b 100644 --- a/surfsense_backend/app/schemas/new_chat.py +++ b/surfsense_backend/app/schemas/new_chat.py @@ -239,9 +239,6 @@ class NewChatRequest(BaseModel): mentioned_document_ids: list[int] | None = ( None # Optional document IDs mentioned with @ in the chat ) - mentioned_surfsense_doc_ids: list[int] | None = ( - None # Optional SurfSense documentation IDs mentioned with @ in the chat - ) mentioned_folder_ids: list[int] | None = Field( default=None, description=( @@ -326,7 +323,6 @@ class RegenerateRequest(BaseModel): None # New user query (for edit). None = reload with same query ) mentioned_document_ids: list[int] | None = None - mentioned_surfsense_doc_ids: list[int] | None = None mentioned_folder_ids: list[int] | None = Field( default=None, description=( diff --git a/surfsense_backend/app/schemas/surfsense_docs.py b/surfsense_backend/app/schemas/surfsense_docs.py deleted file mode 100644 index 3adf25032..000000000 --- a/surfsense_backend/app/schemas/surfsense_docs.py +++ /dev/null @@ -1,43 +0,0 @@ -""" -Schemas for Surfsense documentation. -""" - -from datetime import datetime - -from pydantic import BaseModel, ConfigDict - - -class SurfsenseDocsChunkRead(BaseModel): - """Schema for a Surfsense docs chunk.""" - - id: int - content: str - - model_config = ConfigDict(from_attributes=True) - - -class SurfsenseDocsDocumentRead(BaseModel): - """Schema for a Surfsense docs document (without chunks).""" - - id: int - title: str - source: str - public_url: str - content: str - created_at: datetime | None = None - updated_at: datetime | None = None - - model_config = ConfigDict(from_attributes=True) - - -class SurfsenseDocsDocumentWithChunksRead(BaseModel): - """Schema for a Surfsense docs document with its chunks.""" - - id: int - title: str - source: str - public_url: str - content: str - chunks: list[SurfsenseDocsChunkRead] - - model_config = ConfigDict(from_attributes=True) diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index 78f80c955..e150cf494 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -25,7 +25,6 @@ from uuid import UUID import anyio from langchain_core.messages import HumanMessage from sqlalchemy.future import select -from sqlalchemy.orm import selectinload from app.agents.multi_agent_chat import create_multi_agent_chat_deep_agent from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent @@ -55,7 +54,6 @@ from app.db import ( NewChatThread, Report, SearchSourceConnectorType, - SurfsenseDocsDocument, async_session_maker, shielded_async_session, ) @@ -77,7 +75,6 @@ from app.tasks.chat.streaming.helpers.interrupt_inspector import ( ) from app.utils.content_utils import bootstrap_history_from_db from app.utils.perf import get_perf_logger, log_system_snapshot, trim_native_heap -from app.utils.surfsense_docs import surfsense_docs_public_url from app.utils.user_message_multimodal import build_human_message_content _background_tasks: set[asyncio.Task] = set() @@ -198,58 +195,6 @@ def _extract_chunk_parts(chunk: Any) -> dict[str, Any]: return out -def format_mentioned_surfsense_docs_as_context( - documents: list[SurfsenseDocsDocument], -) -> str: - """Format mentioned SurfSense documentation as context for the agent.""" - if not documents: - return "" - - context_parts = [""] - context_parts.append( - "The user has explicitly mentioned the following SurfSense documentation pages. " - "These are official documentation about how to use SurfSense and should be used to answer questions about the application. " - "Use [citation:CHUNK_ID] format for citations (e.g., [citation:doc-123])." - ) - - for doc in documents: - public_url = surfsense_docs_public_url(doc.source) - metadata_json = json.dumps( - {"source": doc.source, "public_url": public_url}, ensure_ascii=False - ) - - context_parts.append("") - context_parts.append("") - context_parts.append(f" doc-{doc.id}") - context_parts.append(" SURFSENSE_DOCS") - context_parts.append(f" <![CDATA[{doc.title}]]>") - context_parts.append(f" ") - context_parts.append( - f" " - ) - context_parts.append("") - context_parts.append("") - context_parts.append("") - - if hasattr(doc, "chunks") and doc.chunks: - for chunk in doc.chunks: - context_parts.append( - f" " - ) - else: - context_parts.append( - f" " - ) - - context_parts.append("") - context_parts.append("") - context_parts.append("") - - context_parts.append("") - - return "\n".join(context_parts) - - def extract_todos_from_deepagents(command_output) -> dict: """ Extract todos from deepagents' TodoListMiddleware Command output. @@ -837,7 +782,6 @@ async def stream_new_chat( user_id: str | None = None, llm_config_id: int = -1, mentioned_document_ids: list[int] | None = None, - mentioned_surfsense_doc_ids: list[int] | None = None, mentioned_folder_ids: list[int] | None = None, mentioned_connector_ids: list[int] | None = None, mentioned_connectors: list[dict[str, Any]] | None = None, @@ -869,7 +813,6 @@ async def stream_new_chat( llm_config_id: The LLM configuration ID (default: -1 for first global config) needs_history_bootstrap: If True, load message history from DB (for cloned chats) mentioned_document_ids: Optional list of document IDs mentioned with @ in the chat - mentioned_surfsense_doc_ids: Optional list of SurfSense doc IDs mentioned with @ in the chat mentioned_folder_ids: Optional list of knowledge-base folder IDs mentioned with @ (cloud mode) checkpoint_id: Optional checkpoint ID to rewind/fork from (for edit/reload operations) @@ -1295,19 +1238,7 @@ async def stream_new_chat( # Mentioned KB documents are now handled by KnowledgeBaseSearchMiddleware # which merges them into the scoped filesystem with full document - # structure. Only SurfSense docs and report context are inlined here. - - # Fetch mentioned SurfSense docs if any - mentioned_surfsense_docs: list[SurfsenseDocsDocument] = [] - if mentioned_surfsense_doc_ids: - result = await session.execute( - select(SurfsenseDocsDocument) - .options(selectinload(SurfsenseDocsDocument.chunks)) - .filter( - SurfsenseDocsDocument.id.in_(mentioned_surfsense_doc_ids), - ) - ) - mentioned_surfsense_docs = list(result.scalars().all()) + # structure. Only report context is inlined here. # Fetch the most recent report(s) in this thread so the LLM can # easily find report_id for versioning decisions, instead of @@ -1341,10 +1272,7 @@ async def stream_new_chat( agent_user_query = user_query accepted_folder_ids: list[int] = [] if fs_mode == FilesystemMode.CLOUD.value and ( - mentioned_document_ids - or mentioned_surfsense_doc_ids - or mentioned_folder_ids - or mentioned_documents + mentioned_document_ids or mentioned_folder_ids or mentioned_documents ): from app.schemas.new_chat import ( MentionedDocumentInfo as _MentionedDocumentInfo, @@ -1370,23 +1298,17 @@ async def stream_new_chat( search_space_id=search_space_id, mentioned_documents=chip_objs, mentioned_document_ids=mentioned_document_ids, - mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids, mentioned_folder_ids=mentioned_folder_ids, ) agent_user_query = substitute_in_text(user_query, resolved.token_to_path) accepted_folder_ids = resolved.mentioned_folder_ids - # Format the user query with context (SurfSense docs + reports only). + # Format the user query with context (reports only). # Uses ``agent_user_query`` so the LLM sees backtick-wrapped paths # instead of bare ``@title`` tokens. final_query = agent_user_query context_parts = [] - if mentioned_surfsense_docs: - context_parts.append( - format_mentioned_surfsense_docs_as_context(mentioned_surfsense_docs) - ) - if mentioned_connectors: connector_lines = [] for connector in mentioned_connectors: @@ -1617,12 +1539,8 @@ async def stream_new_chat( stream_result.content_builder = AssistantContentBuilder() # Initial thinking step - analyzing the request - if mentioned_surfsense_docs: - initial_title = "Analyzing referenced content" - action_verb = "Analyzing" - else: - initial_title = "Understanding your request" - action_verb = "Processing" + initial_title = "Understanding your request" + action_verb = "Processing" processing_parts = [] if user_query.strip(): @@ -1633,18 +1551,6 @@ async def stream_new_chat( else: processing_parts.append("(message)") - if mentioned_surfsense_docs: - doc_names = [] - for doc in mentioned_surfsense_docs: - title = doc.title - if len(title) > 30: - title = title[:27] + "..." - doc_names.append(title) - if len(doc_names) == 1: - processing_parts.append(f"[{doc_names[0]}]") - else: - processing_parts.append(f"[{len(doc_names)} docs]") - initial_items = [f"{action_verb}: {' '.join(processing_parts)}"] initial_step_id = "thinking-1" @@ -1664,10 +1570,10 @@ async def stream_new_chat( items=initial_items, ) - # These ORM objects (with eagerly-loaded chunks) can be very large. - # They're only needed to build context strings already copied into - # final_query / langchain_messages — release them before streaming. - del mentioned_surfsense_docs, recent_reports + # These ORM objects can be large. They're only needed to build context + # strings already copied into final_query / langchain_messages — + # release them before streaming. + del recent_reports del langchain_messages, final_query # Check if this is the first assistant response so we can generate diff --git a/surfsense_backend/app/tasks/chat/streaming/context/__init__.py b/surfsense_backend/app/tasks/chat/streaming/context/__init__.py index f858a6c06..4cf58d76f 100644 --- a/surfsense_backend/app/tasks/chat/streaming/context/__init__.py +++ b/surfsense_backend/app/tasks/chat/streaming/context/__init__.py @@ -1,15 +1,11 @@ -"""Pre-agent context shaping: mentioned-doc rendering and todos extraction.""" +"""Pre-agent context shaping: todos extraction.""" from __future__ import annotations from app.tasks.chat.streaming.context.deepagents_todos import ( extract_todos_from_deepagents, ) -from app.tasks.chat.streaming.context.mentioned_docs import ( - format_mentioned_surfsense_docs_as_context, -) __all__ = [ "extract_todos_from_deepagents", - "format_mentioned_surfsense_docs_as_context", ] diff --git a/surfsense_backend/app/tasks/chat/streaming/context/mentioned_docs.py b/surfsense_backend/app/tasks/chat/streaming/context/mentioned_docs.py deleted file mode 100644 index e02e98d34..000000000 --- a/surfsense_backend/app/tasks/chat/streaming/context/mentioned_docs.py +++ /dev/null @@ -1,58 +0,0 @@ -"""Render user-mentioned SurfSense docs as XML context for the agent.""" - -from __future__ import annotations - -import json - -from app.db import SurfsenseDocsDocument -from app.utils.surfsense_docs import surfsense_docs_public_url - - -def format_mentioned_surfsense_docs_as_context( - documents: list[SurfsenseDocsDocument], -) -> str: - if not documents: - return "" - - context_parts = [""] - context_parts.append( - "The user has explicitly mentioned the following SurfSense documentation pages. " - "These are official documentation about how to use SurfSense and should be used to answer questions about the application. " - "Use [citation:CHUNK_ID] format for citations (e.g., [citation:doc-123])." - ) - - for doc in documents: - public_url = surfsense_docs_public_url(doc.source) - metadata_json = json.dumps( - {"source": doc.source, "public_url": public_url}, ensure_ascii=False - ) - - context_parts.append("") - context_parts.append("") - context_parts.append(f" doc-{doc.id}") - context_parts.append(" SURFSENSE_DOCS") - context_parts.append(f" <![CDATA[{doc.title}]]>") - context_parts.append(f" ") - context_parts.append( - f" " - ) - context_parts.append("") - context_parts.append("") - context_parts.append("") - - if hasattr(doc, "chunks") and doc.chunks: - for chunk in doc.chunks: - context_parts.append( - f" " - ) - else: - context_parts.append( - f" " - ) - - context_parts.append("") - context_parts.append("") - context_parts.append("") - - context_parts.append("") - return "\n".join(context_parts) diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/initial_thinking_step.py b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/initial_thinking_step.py index c860e517e..e727200eb 100644 --- a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/initial_thinking_step.py +++ b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/initial_thinking_step.py @@ -1,8 +1,8 @@ """Build and emit the first ``thinking-1`` step for a new-chat turn. The step title and "Processing X" items are derived from what the user sent -(text snippet, image count, mentioned doc titles) so the FE can render a -meaningful placeholder while the agent stream warms up. +(text snippet, image count) so the FE can render a meaningful placeholder +while the agent stream warms up. ``thinking-1`` is the canonical id for this step — every subsequent ``thinking-N`` produced by ``stream_agent_events`` folds into the same @@ -15,7 +15,6 @@ from collections.abc import Iterator from dataclasses import dataclass from typing import Any -from app.db import SurfsenseDocsDocument from app.services.new_streaming_service import VercelStreamingService @@ -37,14 +36,9 @@ def build_initial_thinking_step( *, user_query: str, user_image_data_urls: list[str] | None, - mentioned_surfsense_docs: list[SurfsenseDocsDocument], ) -> InitialThinkingStep: - if mentioned_surfsense_docs: - title = "Analyzing referenced content" - action_verb = "Analyzing" - else: - title = "Understanding your request" - action_verb = "Processing" + title = "Understanding your request" + action_verb = "Processing" processing_parts: list[str] = [] if user_query.strip(): @@ -55,18 +49,6 @@ def build_initial_thinking_step( else: processing_parts.append("(message)") - if mentioned_surfsense_docs: - doc_names: list[str] = [] - for doc in mentioned_surfsense_docs: - t = doc.title - if len(t) > 30: - t = t[:27] + "..." - doc_names.append(t) - if len(doc_names) == 1: - processing_parts.append(f"[{doc_names[0]}]") - else: - processing_parts.append(f"[{len(doc_names)} docs]") - items = [f"{action_verb}: {' '.join(processing_parts)}"] return InitialThinkingStep(step_id="thinking-1", title=title, items=items) diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/input_state.py b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/input_state.py index f508571b0..0c6704bd1 100644 --- a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/input_state.py +++ b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/input_state.py @@ -5,20 +5,17 @@ Pipeline: 1. **History bootstrap** — only for cloned chats with no LangGraph checkpoint yet; flips the per-thread ``needs_history_bootstrap`` flag back to False once the rows are loaded. - 2. **Mentioned SurfSense docs** — eager-load chunks so the formatter has the - full content without a second roundtrip. - 3. **Recent reports** — top 3 by id desc with non-null content, so the LLM + 2. **Recent reports** — top 3 by id desc with non-null content, so the LLM can resolve ``report_id`` for versioning without spelunking history. - 4. **@-mention resolve** (cloud mode) — substitute ``@title`` tokens in the + 3. **@-mention resolve** (cloud mode) — substitute ``@title`` tokens in the query with canonical ``\`/documents/...\``` paths the LLM expects. - 5. **Context block render** — XML-wrap surfsense docs + reports, prepend to - the rewritten query, optionally prefix with display name for SEARCH_SPACE + 4. **Context block render** — XML-wrap recent reports, prepend to the + rewritten query, optionally prefix with display name for SEARCH_SPACE visibility. - 6. **HumanMessage** — multimodal content if images are attached. + 5. **HumanMessage** — multimodal content if images are attached. Returns the assembled ``input_state`` dict plus side-channel data the -orchestrator needs downstream (``accepted_folder_ids`` for runtime context; -``mentioned_surfsense_docs`` for the initial thinking step). +orchestrator needs downstream (``accepted_folder_ids`` for runtime context). """ from __future__ import annotations @@ -30,7 +27,6 @@ from typing import Any from langchain_core.messages import HumanMessage from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select -from sqlalchemy.orm import selectinload from app.agents.new_chat.filesystem_selection import FilesystemMode from app.agents.new_chat.mention_resolver import resolve_mentions, substitute_in_text @@ -38,10 +34,6 @@ from app.db import ( ChatVisibility, NewChatThread, Report, - SurfsenseDocsDocument, -) -from app.tasks.chat.streaming.context.mentioned_docs import ( - format_mentioned_surfsense_docs_as_context, ) from app.utils.content_utils import bootstrap_history_from_db from app.utils.user_message_multimodal import build_human_message_content @@ -55,13 +47,10 @@ class NewChatInputState: ``input_state`` is fed straight to the agent. ``accepted_folder_ids`` feeds the runtime context (the resolver may have dropped some chips). - ``mentioned_surfsense_docs`` is consumed by the initial thinking-step - builder for the FE placeholder before the agent stream starts. """ input_state: dict[str, Any] accepted_folder_ids: list[int] - mentioned_surfsense_docs: list[SurfsenseDocsDocument] async def build_new_chat_input_state( @@ -72,7 +61,6 @@ async def build_new_chat_input_state( user_query: str, user_image_data_urls: list[str] | None, mentioned_document_ids: list[int] | None, - mentioned_surfsense_doc_ids: list[int] | None, mentioned_folder_ids: list[int] | None, mentioned_documents: list[dict[str, Any]] | None, needs_history_bootstrap: bool, @@ -96,15 +84,6 @@ async def build_new_chat_input_state( thread.needs_history_bootstrap = False await session.commit() - mentioned_surfsense_docs: list[SurfsenseDocsDocument] = [] - if mentioned_surfsense_doc_ids: - result = await session.execute( - select(SurfsenseDocsDocument) - .options(selectinload(SurfsenseDocsDocument.chunks)) - .filter(SurfsenseDocsDocument.id.in_(mentioned_surfsense_doc_ids)) - ) - mentioned_surfsense_docs = list(result.scalars().all()) - # Top 3 reports keyed by id desc (newest first) with content present, # surfaced inline so the LLM resolves ``report_id`` for versioning without # digging through conversation history. @@ -125,14 +104,12 @@ async def build_new_chat_input_state( user_query=user_query, filesystem_mode=filesystem_mode, mentioned_document_ids=mentioned_document_ids, - mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids, mentioned_folder_ids=mentioned_folder_ids, mentioned_documents=mentioned_documents, ) final_query = _render_query_with_context( agent_user_query=agent_user_query, - mentioned_surfsense_docs=mentioned_surfsense_docs, recent_reports=recent_reports, ) @@ -154,7 +131,6 @@ async def build_new_chat_input_state( return NewChatInputState( input_state=input_state, accepted_folder_ids=accepted_folder_ids, - mentioned_surfsense_docs=mentioned_surfsense_docs, ) @@ -165,7 +141,6 @@ async def _resolve_mentions_for_query( user_query: str, filesystem_mode: str, mentioned_document_ids: list[int] | None, - mentioned_surfsense_doc_ids: list[int] | None, mentioned_folder_ids: list[int] | None, mentioned_documents: list[dict[str, Any]] | None, ) -> tuple[str, list[int]]: @@ -187,10 +162,7 @@ async def _resolve_mentions_for_query( accepted_folder_ids: list[int] = [] has_any_mention = bool( - mentioned_document_ids - or mentioned_surfsense_doc_ids - or mentioned_folder_ids - or mentioned_documents + mentioned_document_ids or mentioned_folder_ids or mentioned_documents ) if filesystem_mode != FilesystemMode.CLOUD.value or not has_any_mention: return agent_user_query, accepted_folder_ids @@ -214,7 +186,6 @@ async def _resolve_mentions_for_query( search_space_id=search_space_id, mentioned_documents=chip_objs, mentioned_document_ids=mentioned_document_ids, - mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids, mentioned_folder_ids=mentioned_folder_ids, ) agent_user_query = substitute_in_text(user_query, resolved.token_to_path) @@ -225,17 +196,11 @@ async def _resolve_mentions_for_query( def _render_query_with_context( *, agent_user_query: str, - mentioned_surfsense_docs: list[SurfsenseDocsDocument], recent_reports: list[Report], ) -> str: - """Prepend surfsense-docs + recent-reports XML blocks to the user query.""" + """Prepend recent-reports XML block to the user query.""" context_parts: list[str] = [] - if mentioned_surfsense_docs: - context_parts.append( - format_mentioned_surfsense_docs_as_context(mentioned_surfsense_docs) - ) - if recent_reports: report_lines: list[str] = [] for r in recent_reports: diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/orchestrator.py b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/orchestrator.py index 6d0853502..1892320d3 100644 --- a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/orchestrator.py +++ b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/orchestrator.py @@ -123,7 +123,6 @@ async def stream_new_chat( user_id: str | None = None, llm_config_id: int = -1, mentioned_document_ids: list[int] | None = None, - mentioned_surfsense_doc_ids: list[int] | None = None, mentioned_folder_ids: list[int] | None = None, mentioned_documents: list[dict[str, Any]] | None = None, checkpoint_id: str | None = None, @@ -435,7 +434,6 @@ async def stream_new_chat( user_query=user_query, user_image_data_urls=user_image_data_urls, mentioned_document_ids=mentioned_document_ids, - mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids, mentioned_folder_ids=mentioned_folder_ids, mentioned_documents=mentioned_documents, needs_history_bootstrap=needs_history_bootstrap, @@ -447,7 +445,6 @@ async def stream_new_chat( ) input_state = assembled.input_state accepted_folder_ids = assembled.accepted_folder_ids - mentioned_surfsense_docs = assembled.mentioned_surfsense_docs _perf_log.info( "[stream_new_chat] History bootstrap + doc/report queries in %.3fs", time.perf_counter() - _t0, @@ -560,7 +557,6 @@ async def stream_new_chat( initial_step = build_initial_thinking_step( user_query=user_query, user_image_data_urls=user_image_data_urls, - mentioned_surfsense_docs=mentioned_surfsense_docs, ) for sse in iter_initial_thinking_step_frame( initial_step, @@ -575,7 +571,7 @@ async def stream_new_chat( # Drop the heavy ORM objects + the container that holds them so they # aren't retained for the entire streaming duration. ``input_state`` # already carries the langchain_messages list independently. - del assembled, mentioned_surfsense_docs + del assembled title_task = spawn_title_task( chat_id=chat_id, diff --git a/surfsense_backend/app/tasks/surfsense_docs_indexer.py b/surfsense_backend/app/tasks/surfsense_docs_indexer.py deleted file mode 100644 index db88c8700..000000000 --- a/surfsense_backend/app/tasks/surfsense_docs_indexer.py +++ /dev/null @@ -1,249 +0,0 @@ -""" -Surfsense documentation indexer. -Indexes MDX documentation files at startup. -""" - -import hashlib -import logging -import re -from datetime import UTC, datetime -from pathlib import Path - -from sqlalchemy import delete as sa_delete, select -from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.orm import selectinload -from sqlalchemy.orm.attributes import set_committed_value - -from app.config import config -from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument, async_session_maker -from app.utils.document_converters import embed_text - -logger = logging.getLogger(__name__) - - -async def _safe_set_docs_chunks( - session: AsyncSession, document: SurfsenseDocsDocument, chunks: list -) -> None: - """safe_set_chunks variant for the SurfsenseDocsDocument/Chunk models.""" - if document.id is not None: - await session.execute( - sa_delete(SurfsenseDocsChunk).where( - SurfsenseDocsChunk.document_id == document.id - ) - ) - for chunk in chunks: - chunk.document_id = document.id - - set_committed_value(document, "chunks", chunks) - session.add_all(chunks) - - -# Path to docs relative to project root -DOCS_DIR = ( - Path(__file__).resolve().parent.parent.parent.parent - / "surfsense_web" - / "content" - / "docs" -) - - -def parse_mdx_frontmatter(content: str) -> tuple[str, str]: - """ - Parse MDX file to extract frontmatter title and content. - - Args: - content: Raw MDX file content - - Returns: - Tuple of (title, content_without_frontmatter) - """ - # Match frontmatter between --- markers - frontmatter_pattern = r"^---\s*\n(.*?)\n---\s*\n" - match = re.match(frontmatter_pattern, content, re.DOTALL) - - if match: - frontmatter = match.group(1) - content_without_frontmatter = content[match.end() :] - - # Extract title from frontmatter - title_match = re.search(r"^title:\s*(.+)$", frontmatter, re.MULTILINE) - title = title_match.group(1).strip() if title_match else "Untitled" - - # Remove quotes if present - title = title.strip("\"'") - - return title, content_without_frontmatter.strip() - - return "Untitled", content.strip() - - -def get_all_mdx_files() -> list[Path]: - """ - Get all MDX files from the docs directory. - - Returns: - List of Path objects for each MDX file - """ - if not DOCS_DIR.exists(): - logger.warning(f"Docs directory not found: {DOCS_DIR}") - return [] - - return list(DOCS_DIR.rglob("*.mdx")) - - -def generate_surfsense_docs_content_hash(content: str) -> str: - """Generate SHA-256 hash for Surfsense docs content.""" - return hashlib.sha256(content.encode("utf-8")).hexdigest() - - -def create_surfsense_docs_chunks(content: str) -> list[SurfsenseDocsChunk]: - """ - Create chunks from Surfsense documentation content. - - Args: - content: Document content to chunk - - Returns: - List of SurfsenseDocsChunk objects with embeddings - """ - return [ - SurfsenseDocsChunk( - content=chunk.text, - embedding=embed_text(chunk.text), - ) - for chunk in config.chunker_instance.chunk(content) - ] - - -async def index_surfsense_docs(session: AsyncSession) -> tuple[int, int, int, int]: - """ - Index all Surfsense documentation files. - - Args: - session: SQLAlchemy async session - - Returns: - Tuple of (created, updated, skipped, deleted) counts - """ - created = 0 - updated = 0 - skipped = 0 - deleted = 0 - - # Get all existing docs from database - existing_docs_result = await session.execute( - select(SurfsenseDocsDocument).options( - selectinload(SurfsenseDocsDocument.chunks) - ) - ) - existing_docs = {doc.source: doc for doc in existing_docs_result.scalars().all()} - - # Track which sources we've processed - processed_sources = set() - - # Get all MDX files - mdx_files = get_all_mdx_files() - logger.info(f"Found {len(mdx_files)} MDX files to index") - - for mdx_file in mdx_files: - try: - source = str(mdx_file.relative_to(DOCS_DIR)) - processed_sources.add(source) - - # Read file content - raw_content = mdx_file.read_text(encoding="utf-8") - title, content = parse_mdx_frontmatter(raw_content) - content_hash = generate_surfsense_docs_content_hash(raw_content) - - if source in existing_docs: - existing_doc = existing_docs[source] - - # Check if content changed - if existing_doc.content_hash == content_hash: - logger.debug(f"Skipping unchanged: {source}") - skipped += 1 - continue - - # Content changed - update document - logger.info(f"Updating changed document: {source}") - - # Create new chunks - chunks = create_surfsense_docs_chunks(content) - - # Update document fields - existing_doc.title = title - existing_doc.content = content - existing_doc.content_hash = content_hash - existing_doc.embedding = embed_text(content) - await _safe_set_docs_chunks(session, existing_doc, chunks) - existing_doc.updated_at = datetime.now(UTC) - - updated += 1 - else: - # New document - create it - logger.info(f"Creating new document: {source}") - - chunks = create_surfsense_docs_chunks(content) - - document = SurfsenseDocsDocument( - source=source, - title=title, - content=content, - content_hash=content_hash, - embedding=embed_text(content), - chunks=chunks, - updated_at=datetime.now(UTC), - ) - - session.add(document) - created += 1 - - except Exception as e: - logger.error(f"Error processing {mdx_file}: {e}", exc_info=True) - continue - - # Delete documents for removed files - for source, doc in existing_docs.items(): - if source not in processed_sources: - logger.info(f"Deleting removed document: {source}") - await session.delete(doc) - deleted += 1 - - # Commit all changes - await session.commit() - - logger.info( - f"Indexing complete: {created} created, {updated} updated, " - f"{skipped} skipped, {deleted} deleted" - ) - - return created, updated, skipped, deleted - - -async def seed_surfsense_docs() -> tuple[int, int, int, int]: - """ - Seed Surfsense documentation into the database. - - This function indexes all MDX files from the docs directory. - It handles creating, updating, and deleting docs based on content changes. - - Returns: - Tuple of (created, updated, skipped, deleted) counts - Returns (0, 0, 0, 0) if an error occurs - """ - logger.info("Starting Surfsense docs indexing...") - - try: - async with async_session_maker() as session: - created, updated, skipped, deleted = await index_surfsense_docs(session) - - logger.info( - f"Surfsense docs indexing complete: " - f"created={created}, updated={updated}, skipped={skipped}, deleted={deleted}" - ) - - return created, updated, skipped, deleted - - except Exception as e: - logger.error(f"Failed to seed Surfsense docs: {e}", exc_info=True) - return 0, 0, 0, 0 diff --git a/surfsense_backend/app/utils/surfsense_docs.py b/surfsense_backend/app/utils/surfsense_docs.py deleted file mode 100644 index 9a6ab11a9..000000000 --- a/surfsense_backend/app/utils/surfsense_docs.py +++ /dev/null @@ -1,13 +0,0 @@ -"""Utilities for SurfSense's built-in documentation index.""" - -from pathlib import PurePosixPath - -DOCS_PUBLIC_ROOT = PurePosixPath("/docs") - - -def surfsense_docs_public_url(source: str) -> str: - """Return the public docs route for an indexed documentation source path.""" - docs_path = PurePosixPath(source).with_suffix("") - if docs_path.name == "index": - docs_path = docs_path.parent - return (DOCS_PUBLIC_ROOT / docs_path).as_posix() diff --git a/surfsense_backend/scripts/seed_surfsense_docs.py b/surfsense_backend/scripts/seed_surfsense_docs.py deleted file mode 100644 index 68899c2aa..000000000 --- a/surfsense_backend/scripts/seed_surfsense_docs.py +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python -""" -Seed Surfsense documentation into the database. - -CLI wrapper for the seed_surfsense_docs function. -Can be run manually for debugging or re-indexing. - -Usage: - python scripts/seed_surfsense_docs.py -""" - -import asyncio -import sys -from pathlib import Path - -# Add the parent directory to the path so we can import app modules -sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) - -from app.tasks.surfsense_docs_indexer import seed_surfsense_docs - - -def main(): - """CLI entry point for seeding Surfsense docs.""" - print("=" * 50) - print(" Surfsense Documentation Seeding") - print("=" * 50) - - created, updated, skipped, deleted = asyncio.run(seed_surfsense_docs()) - - print() - print("Results:") - print(f" Created: {created}") - print(f" Updated: {updated}") - print(f" Skipped: {skipped}") - print(f" Deleted: {deleted}") - print("=" * 50) - - -if __name__ == "__main__": - main() diff --git a/surfsense_backend/tests/unit/agents/new_chat/test_default_permissions_layering.py b/surfsense_backend/tests/unit/agents/new_chat/test_default_permissions_layering.py index ac6b5d95c..2f222e148 100644 --- a/surfsense_backend/tests/unit/agents/new_chat/test_default_permissions_layering.py +++ b/surfsense_backend/tests/unit/agents/new_chat/test_default_permissions_layering.py @@ -60,7 +60,6 @@ class TestReadOnlyToolsAllowed: "glob", "web_search", "scrape_webpage", - "search_surfsense_docs", "get_connected_accounts", "write_todos", "task", diff --git a/surfsense_backend/tests/unit/agents/new_chat/test_specialized_subagents.py b/surfsense_backend/tests/unit/agents/new_chat/test_specialized_subagents.py index 3035cc8e0..3c7fe5336 100644 --- a/surfsense_backend/tests/unit/agents/new_chat/test_specialized_subagents.py +++ b/surfsense_backend/tests/unit/agents/new_chat/test_specialized_subagents.py @@ -22,12 +22,6 @@ from app.agents.new_chat.subagents.config import ( # --------------------------------------------------------------------------- -@tool -def search_surfsense_docs(query: str) -> str: - """Search the user's KB.""" - return "" - - @tool def web_search(query: str) -> str: """Search the public web.""" @@ -95,7 +89,6 @@ def generate_report(topic: str) -> str: ALL_TOOLS = [ - search_surfsense_docs, web_search, scrape_webpage, read_file, @@ -161,7 +154,7 @@ class TestReportWriterSubagent: names = {t.name for t in spec["tools"]} # type: ignore[index] assert names == REPORT_WRITER_TOOLS & {t.name for t in ALL_TOOLS} assert "generate_report" in names - assert "search_surfsense_docs" in names + assert "read_file" in names def test_deny_rules_block_writes_but_allow_generate_report(self) -> None: spec = build_report_writer_subagent(tools=ALL_TOOLS) @@ -272,9 +265,9 @@ class TestFilterToolsWarningSuppression: # Allowed set asks for two registry tools (one present, one # not) plus a bunch of middleware-provided names. _filter_tools( - [search_surfsense_docs], + [web_search], allowed_names={ - "search_surfsense_docs", + "web_search", "scrape_webpage", # legitimately missing → should warn "read_file", # mw-provided → suppressed "ls", @@ -322,7 +315,6 @@ class TestDenyPatternsCoverage: def test_deny_patterns_do_not_match_safe_read_tools(self) -> None: canonical_reads = [ - "search_surfsense_docs", "read_file", "ls_tree", "grep", diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_parallel_refactor_parity.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_parallel_refactor_parity.py index ff4ca30df..e014bb911 100644 --- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_parallel_refactor_parity.py +++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_parallel_refactor_parity.py @@ -25,7 +25,6 @@ from __future__ import annotations import asyncio import inspect -from dataclasses import dataclass from typing import Any from unittest.mock import AsyncMock, patch @@ -140,45 +139,28 @@ def test_orchestrators_are_async_generator_functions() -> None: # ------------------------------------------------------------ initial thinking -@dataclass -class _FakeSurfsenseDoc: - """Stand-in for ``SurfsenseDocsDocument`` with just the field we read.""" - - title: str - - @pytest.mark.parametrize( - "user_query, image_urls, docs, expected_title, expected_action", + "user_query, image_urls, expected_title, expected_action", [ - ("hello world", None, [], "Understanding your request", "Processing"), + ("hello world", None, "Understanding your request", "Processing"), ( "", ["data:image/png;base64,AAA"], - [], "Understanding your request", "Processing", ), - ("", None, [], "Understanding your request", "Processing"), - ( - "doc question", - None, - [_FakeSurfsenseDoc(title="My Doc")], - "Analyzing referenced content", - "Analyzing", - ), + ("", None, "Understanding your request", "Processing"), ], ) def test_initial_thinking_step_branches( user_query: str, image_urls: list[str] | None, - docs: list[Any], expected_title: str, expected_action: str, ) -> None: step = build_initial_thinking_step( user_query=user_query, user_image_data_urls=image_urls, - mentioned_surfsense_docs=docs, # type: ignore[arg-type] ) assert step.step_id == "thinking-1" assert step.title == expected_title @@ -191,7 +173,6 @@ def test_initial_thinking_step_truncates_long_query() -> None: step = build_initial_thinking_step( user_query=long_query, user_image_data_urls=None, - mentioned_surfsense_docs=[], ) # 80-char truncation + ellipsis, sandwiched after "Processing: ". assert "..." in step.items[0] @@ -200,16 +181,6 @@ def test_initial_thinking_step_truncates_long_query() -> None: assert payload.startswith("x" * 80) and payload.endswith("...") -def test_initial_thinking_step_collapses_many_doc_names() -> None: - docs = [_FakeSurfsenseDoc(title=f"Doc {i}") for i in range(5)] - step = build_initial_thinking_step( - user_query="q", - user_image_data_urls=None, - mentioned_surfsense_docs=docs, # type: ignore[arg-type] - ) - assert "[5 docs]" in step.items[0] - - # ------------------------------------------------------------ capability gate diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/components/builder/mention-task-input.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/components/builder/mention-task-input.tsx index 312454056..c0651a90b 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/automations/components/builder/mention-task-input.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/automations/components/builder/mention-task-input.tsx @@ -89,10 +89,10 @@ function removeFirstToken(text: string, token: string): string { /** * Task input that reuses the chat ``@`` mention experience -- the same - * ``InlineMentionEditor`` + ``DocumentMentionPicker`` as the composer, minus - * SurfSense product docs. The editor is the source of truth while mounted; - * ``onChange`` reports both the plain text (chips rendered as ``@Title``) and - * the structured mention list so the builder can persist IDs for the run. + * ``InlineMentionEditor`` + ``DocumentMentionPicker`` as the composer. The + * editor is the source of truth while mounted; ``onChange`` reports both the + * plain text (chips rendered as ``@Title``) and the structured mention list + * so the builder can persist IDs for the run. */ export function MentionTaskInput({ searchSpaceId, @@ -233,7 +233,6 @@ export function MentionTaskInput({ documentsApiService.searchDocumentTitles({ queryParams: prefetchParams }), staleTime: 60 * 1000, }); - - queryClient.prefetchQuery({ - queryKey: ["surfsense-docs-mention", "", false], - queryFn: () => - documentsApiService.getSurfsenseDocs({ - queryParams: { page: 0, page_size: 20 }, - }), - staleTime: 3 * 60 * 1000, - }); }, [searchSpaceId, queryClient]); // Handle scroll to comment from URL query params (e.g., from inbox item click) @@ -949,7 +940,6 @@ export default function NewChatPage() { trackChatMessageSent(searchSpaceId, currentThreadId, { hasAttachments: userImages.length > 0, hasMentionedDocuments: - mentionedDocumentIds.surfsense_doc_ids.length > 0 || mentionedDocumentIds.document_ids.length > 0 || mentionedDocumentIds.folder_ids.length > 0 || mentionedDocumentIds.connector_ids.length > 0, @@ -1027,12 +1017,11 @@ export default function NewChatPage() { // Get mentioned document IDs for context (separate fields for backend) const hasDocumentIds = mentionedDocumentIds.document_ids.length > 0; - const hasSurfsenseDocIds = mentionedDocumentIds.surfsense_doc_ids.length > 0; const hasFolderIds = mentionedDocumentIds.folder_ids.length > 0; const hasConnectorIds = mentionedDocumentIds.connector_ids.length > 0; // Clear mentioned documents after capturing them - if (hasDocumentIds || hasSurfsenseDocIds || hasFolderIds || hasConnectorIds) { + if (hasDocumentIds || hasFolderIds || hasConnectorIds) { setMentionedDocuments([]); } @@ -1054,9 +1043,6 @@ export default function NewChatPage() { mentioned_document_ids: hasDocumentIds ? mentionedDocumentIds.document_ids : undefined, - mentioned_surfsense_doc_ids: hasSurfsenseDocIds - ? mentionedDocumentIds.surfsense_doc_ids - : undefined, mentioned_folder_ids: hasFolderIds ? mentionedDocumentIds.folder_ids : undefined, mentioned_connector_ids: hasConnectorIds ? mentionedDocumentIds.connector_ids @@ -1947,18 +1933,14 @@ export default function NewChatPage() { const selection = await getAgentFilesystemSelection(searchSpaceId, { localFilesystemEnabled, }); - // Partition the source mentions back into doc/surfsense_doc/folder - // id buckets so the regenerate route can pass them to - // ``stream_new_chat`` and the priority middleware sees the - // same ``[USER-MENTIONED]`` priority entries the original - // turn did. Without this partition the regenerate flow - // silently dropped the agent's mention awareness — same - // architectural bug we fixed on the new-chat path. - const regenerateSurfsenseDocIds = sourceMentionedDocs - .filter((d) => d.kind === "doc" && d.document_type === "SURFSENSE_DOCS") - .map((d) => d.id); + // Partition the source mentions back into doc/folder id buckets + // so the regenerate route can pass them to ``stream_new_chat`` + // and the priority middleware sees the same ``[USER-MENTIONED]`` + // priority entries the original turn did. Without this partition + // the regenerate flow silently dropped the agent's mention + // awareness — same architectural bug we fixed on the new-chat path. const regenerateDocIds = sourceMentionedDocs - .filter((d) => d.kind === "doc" && d.document_type !== "SURFSENSE_DOCS") + .filter((d) => d.kind === "doc") .map((d) => d.id); const regenerateFolderIds = sourceMentionedDocs .filter((d) => d.kind === "folder") @@ -1973,8 +1955,6 @@ export default function NewChatPage() { client_platform: selection.client_platform, local_filesystem_mounts: selection.local_filesystem_mounts, mentioned_document_ids: regenerateDocIds.length > 0 ? regenerateDocIds : undefined, - mentioned_surfsense_doc_ids: - regenerateSurfsenseDocIds.length > 0 ? regenerateSurfsenseDocIds : undefined, mentioned_folder_ids: regenerateFolderIds.length > 0 ? regenerateFolderIds : undefined, mentioned_connector_ids: regenerateConnectors.length > 0 ? regenerateConnectors.map((d) => d.id) : undefined, diff --git a/surfsense_web/atoms/chat/mentioned-documents.atom.ts b/surfsense_web/atoms/chat/mentioned-documents.atom.ts index 25d1e397a..cf1bd8bcf 100644 --- a/surfsense_web/atoms/chat/mentioned-documents.atom.ts +++ b/surfsense_web/atoms/chat/mentioned-documents.atom.ts @@ -102,10 +102,7 @@ export const mentionedDocumentIdsAtom = atom((get) => { const folders = deduped.filter((m) => m.kind === "folder"); const connectors = deduped.filter((m) => m.kind === "connector"); return { - surfsense_doc_ids: docs - .filter((doc) => doc.document_type === "SURFSENSE_DOCS") - .map((doc) => doc.id), - document_ids: docs.filter((doc) => doc.document_type !== "SURFSENSE_DOCS").map((doc) => doc.id), + document_ids: docs.map((doc) => doc.id), folder_ids: folders.map((f) => f.id), connector_ids: connectors.map((c) => c.id), connectors: connectors.map((c) => ({ diff --git a/surfsense_web/components/assistant-ui/inline-citation.tsx b/surfsense_web/components/assistant-ui/inline-citation.tsx index a788c0ce6..cbf3c82d6 100644 --- a/surfsense_web/components/assistant-ui/inline-citation.tsx +++ b/surfsense_web/components/assistant-ui/inline-citation.tsx @@ -1,16 +1,13 @@ "use client"; -import { useQuery } from "@tanstack/react-query"; import { useSetAtom } from "jotai"; -import { ExternalLink, FileText } from "lucide-react"; -import dynamic from "next/dynamic"; +import { FileText } from "lucide-react"; import type { FC } from "react"; import { useState } from "react"; import { openCitationPanelAtom } from "@/atoms/citation/citation-panel.atom"; import { useCitationMetadata } from "@/components/assistant-ui/citation-metadata-context"; import { CitationPanelContent } from "@/components/citation-panel/citation-panel"; import { Citation } from "@/components/tool-ui/citation"; -import { CitationHoverPopover } from "@/components/tool-ui/citation/citation-hover-popover"; import { Button } from "@/components/ui/button"; import { Drawer, @@ -19,21 +16,8 @@ import { DrawerHeader, DrawerTitle, } from "@/components/ui/drawer"; -import { Spinner } from "@/components/ui/spinner"; import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; import { useMediaQuery } from "@/hooks/use-media-query"; -import { documentsApiService } from "@/lib/apis/documents-api.service"; -import { cacheKeys } from "@/lib/query-client/cache-keys"; - -// Lazily load MarkdownViewer here to break the static import cycle: -// `markdown-viewer.tsx` → `citation-renderer.tsx` → `inline-citation.tsx` -// would otherwise pull `markdown-viewer.tsx` back in at module-init time. -// Only `SurfsenseDocCitation` (popover body) ever renders this viewer, so -// the lazy boundary is invisible to most call paths. -const MarkdownViewer = dynamic( - () => import("@/components/markdown-viewer").then((m) => m.MarkdownViewer), - { ssr: false, loading: () => } -); interface InlineCitationProps { chunkId: number; @@ -41,9 +25,7 @@ interface InlineCitationProps { } /** - * Inline citation badge for knowledge-base chunks (numeric chunk IDs) and - * Surfsense documentation chunks (`isDocsChunk`). Negative chunk IDs render as - * a static "doc" pill (anonymous/synthetic uploads). + * Inline citation badge for knowledge-base chunks (numeric chunk IDs). * * Numeric KB chunks: clicking opens the citation panel in the right * sidebar (alongside the chat — does not replace it). The panel shows @@ -51,12 +33,13 @@ interface InlineCitationProps { * `chunk_window`), with the cited one highlighted and an option to * expand the window or jump into the full document via the editor panel. * - * Surfsense docs chunks: rendered as a hover-controlled shadcn Popover that - * lazily fetches and previews the cited chunk inline, since those docs aren't - * indexed into the user's search space and have no tab to open. + * Negative chunk IDs and legacy SurfSense-docs chunks (`isDocsChunk`) render + * as a static, non-interactive "doc" pill. The SurfSense product-docs feature + * was removed, so those markers are inert (no fetch, no preview) — they only + * survive in old persisted messages. */ export const InlineCitation: FC = ({ chunkId, isDocsChunk = false }) => { - if (chunkId < 0) { + if (chunkId < 0 || isDocsChunk) { return ( @@ -68,15 +51,11 @@ export const InlineCitation: FC = ({ chunkId, isDocsChunk = doc - Uploaded document + {isDocsChunk ? "Documentation reference" : "Uploaded document"} ); } - if (isDocsChunk) { - return ; - } - return ; }; @@ -127,128 +106,6 @@ const NumericChunkCitation: FC<{ chunkId: number }> = ({ chunkId }) => { ); }; -const SurfsenseDocCitation: FC<{ chunkId: number }> = ({ chunkId }) => { - const isTouchLike = useMediaQuery("(hover: none), (pointer: coarse)"); - const [mobilePreviewOpen, setMobilePreviewOpen] = useState(false); - const docQuery = useSurfsenseDocPreviewQuery(chunkId, mobilePreviewOpen); - - const handleMobileClick = () => { - setMobilePreviewOpen(true); - }; - - return ( - <> - ( - - )} - > - - - - - - - Surfsense documentation - - - - - - ); -}; - -function useSurfsenseDocPreviewQuery(chunkId: number, enabled = true) { - return useQuery({ - queryKey: cacheKeys.documents.byChunk(`doc-${chunkId}`), - queryFn: () => documentsApiService.getSurfsenseDocByChunk(chunkId), - staleTime: 5 * 60 * 1000, - enabled, - }); -} - -type SurfsenseDocPreviewQuery = ReturnType; - -const SurfsenseDocPreview: FC<{ chunkId: number }> = ({ chunkId }) => { - const query = useSurfsenseDocPreviewQuery(chunkId); - - return ; -}; - -const SurfsenseDocPreviewContent: FC<{ - chunkId: number; - query: SurfsenseDocPreviewQuery; - contentClassName?: string; -}> = ({ chunkId, query, contentClassName = "max-h-72" }) => { - const { data, isLoading, error } = query; - - const citedChunk = data?.chunks.find((c) => c.id === chunkId) ?? data?.chunks[0]; - - return ( - <> -
-
-

{data?.title ?? "Surfsense documentation"}

-

Chunk #{chunkId}

-
- {data?.public_url && ( - - - Open - - )} -
-
- {isLoading && ( -
- - Loading… -
- )} - {error && ( -

- {error instanceof Error ? error.message : "Failed to load chunk"} -

- )} - {!isLoading && !error && citedChunk?.content && ( - - )} - {!isLoading && !error && !citedChunk?.content && ( -

No content available.

- )} -
- - ); -}; - import { tryGetHostname } from "@/lib/url"; interface UrlCitationProps { diff --git a/surfsense_web/components/assistant-ui/thread.tsx b/surfsense_web/components/assistant-ui/thread.tsx index 9abcfbb49..0336ffd35 100644 --- a/surfsense_web/components/assistant-ui/thread.tsx +++ b/surfsense_web/components/assistant-ui/thread.tsx @@ -1593,7 +1593,7 @@ interface ToolGroup { const TOOL_GROUPS: ToolGroup[] = [ { label: "Research", - tools: ["search_surfsense_docs", "scrape_webpage"], + tools: ["scrape_webpage"], }, { label: "Generate", diff --git a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx index 757ee2fc2..881fbe2b0 100644 --- a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx +++ b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx @@ -90,7 +90,6 @@ const DesktopLocalTabContent = dynamic( ); const NON_DELETABLE_DOCUMENT_TYPES: readonly string[] = [ - "SURFSENSE_DOCS", "USER_MEMORY", "TEAM_MEMORY", ]; diff --git a/surfsense_web/components/new-chat/document-mention-picker.tsx b/surfsense_web/components/new-chat/document-mention-picker.tsx index 8e3fd4ca8..769327e1e 100644 --- a/surfsense_web/components/new-chat/document-mention-picker.tsx +++ b/surfsense_web/components/new-chat/document-mention-picker.tsx @@ -3,14 +3,7 @@ import { useQuery as useZeroQuery } from "@rocicorp/zero/react"; import { keepPreviousData, useQuery } from "@tanstack/react-query"; import { useAtomValue } from "jotai"; -import { - BookOpen, - ChevronLeft, - ChevronRight, - Files, - Folder as FolderIcon, - Unplug, -} from "lucide-react"; +import { ChevronLeft, ChevronRight, Files, Folder as FolderIcon, Unplug } from "lucide-react"; import { Fragment, forwardRef, @@ -57,13 +50,6 @@ interface DocumentMentionPickerProps { onDone: () => void; initialSelectedDocuments?: MentionedDocumentInfo[]; externalSearch?: string; - /** - * Whether to surface the "SurfSense Docs" (product documentation) branch - * and include those docs in search results. Defaults to ``true`` so the - * chat composer is unchanged; callers like the automation task input pass - * ``false`` to reference only the user's own knowledge base + connectors. - */ - includeSurfsenseDocs?: boolean; } const PAGE_SIZE = 20; @@ -74,7 +60,6 @@ const RECENTS_STORAGE_PREFIX = "surfsense:composer-mention-recents:v1:"; type BrowseView = | { kind: "root" } - | { kind: "surfsense-docs" } | { kind: "files-folders" } | { kind: "connectors" } | { kind: "connector-type"; connectorType: string; title: string }; @@ -241,7 +226,6 @@ export const DocumentMentionPicker = forwardRef< onDone, initialSelectedDocuments = [], externalSearch = "", - includeSurfsenseDocs = true, }, ref ) { @@ -298,15 +282,6 @@ export const DocumentMentionPicker = forwardRef< [searchSpaceId, debouncedSearch, isSearchValid] ); - const surfsenseDocsQueryParams = useMemo(() => { - const params: { page: number; page_size: number; title?: string } = { - page: 0, - page_size: PAGE_SIZE, - }; - if (isSearchValid) params.title = debouncedSearch.trim(); - return params; - }, [debouncedSearch, isSearchValid]); - const { data: titleSearchResults, isLoading: isTitleSearchLoading } = useQuery({ queryKey: ["document-titles", titleSearchParams], queryFn: ({ signal }) => @@ -316,15 +291,6 @@ export const DocumentMentionPicker = forwardRef< placeholderData: keepPreviousData, }); - const { data: surfsenseDocs, isLoading: isSurfsenseDocsLoading } = useQuery({ - queryKey: ["surfsense-docs-mention", debouncedSearch, isSearchValid], - queryFn: ({ signal }) => - documentsApiService.getSurfsenseDocs({ queryParams: surfsenseDocsQueryParams }, signal), - staleTime: 3 * 60 * 1000, - enabled: includeSurfsenseDocs && (!hasSearch || isSearchValid), - placeholderData: keepPreviousData, - }); - const filterBySearchTerm = useCallback( (docs: Pick[]) => { if (!isSearchValid) return docs; @@ -338,23 +304,13 @@ export const DocumentMentionPicker = forwardRef< if (currentPage !== 0) return; const combinedDocs: Pick[] = []; - if (includeSurfsenseDocs && surfsenseDocs?.items) { - for (const doc of surfsenseDocs.items) { - combinedDocs.push({ - id: doc.id, - title: doc.title, - document_type: "SURFSENSE_DOCS", - }); - } - } - if (titleSearchResults?.items) { combinedDocs.push(...titleSearchResults.items); setHasMore(titleSearchResults.has_more); } setAccumulatedDocuments(filterBySearchTerm(combinedDocs)); - }, [titleSearchResults, surfsenseDocs, currentPage, filterBySearchTerm, includeSurfsenseDocs]); + }, [titleSearchResults, currentPage, filterBySearchTerm]); const loadNextPage = useCallback(async () => { if (isLoadingMore || !hasMore) return; @@ -391,14 +347,6 @@ export const DocumentMentionPicker = forwardRef< return accumulatedDocuments.filter((doc) => doc.title.toLowerCase().includes(searchLower)); }, [accumulatedDocuments, deferredSearch, isSingleCharSearch]); - const surfsenseDocsList = useMemo( - () => actualDocuments.filter((doc) => doc.document_type === "SURFSENSE_DOCS"), - [actualDocuments] - ); - const userDocsList = useMemo( - () => actualDocuments.filter((doc) => doc.document_type !== "SURFSENSE_DOCS"), - [actualDocuments] - ); const folderMentions = useMemo(() => { const all = (zeroFolders ?? []).map((f) => makeFolderMention({ id: f.id, title: f.name })); if (!hasSearch) return all; @@ -463,7 +411,6 @@ export const DocumentMentionPicker = forwardRef< () => new Set(initialSelectedDocuments.map((d) => getMentionDocKey(d))), [initialSelectedDocuments] ); - const showSurfsenseDocsRoot = includeSurfsenseDocs && surfsenseDocsList.length > 0; const selectMention = useCallback( (mention: MentionedDocumentInfo) => { @@ -487,16 +434,6 @@ export const DocumentMentionPicker = forwardRef< const rootNodes = useMemo[]>(() => { const nodes: ComposerSuggestionNode[] = [...recentRootNodes]; - if (showSurfsenseDocsRoot) { - nodes.push({ - id: "surfsense-docs", - label: "SurfSense Docs", - subtitle: "Browse product documentation", - icon: , - type: "branch", - value: { kind: "view", view: { kind: "surfsense-docs" } }, - }); - } nodes.push( { id: "files-folders", @@ -519,7 +456,7 @@ export const DocumentMentionPicker = forwardRef< } ); return nodes; - }, [activeConnectors.length, recentRootNodes, showSurfsenseDocsRoot]); + }, [activeConnectors.length, recentRootNodes]); const searchNodes = useMemo[]>(() => { const searchLower = (isSingleCharSearch ? deferredSearch : debouncedSearch) @@ -582,19 +519,6 @@ export const DocumentMentionPicker = forwardRef< const browseNodes = useMemo[]>(() => { if (view.kind === "root") return rootNodes; - if (view.kind === "surfsense-docs") { - return surfsenseDocsList.map((doc) => { - const mention = makeDocMention(doc); - return { - id: getMentionDocKey(mention), - label: doc.title, - icon: getConnectorIcon(doc.document_type, "size-4"), - type: "item" as const, - disabled: selectedKeys.has(getMentionDocKey(mention)), - value: { kind: "mention" as const, mention }, - }; - }); - } if (view.kind === "files-folders") { const folders = folderMentions.map((mention) => ({ id: getMentionDocKey(mention), @@ -605,7 +529,7 @@ export const DocumentMentionPicker = forwardRef< disabled: selectedKeys.has(getMentionDocKey(mention)), value: { kind: "mention" as const, mention }, })); - const docs = userDocsList.map((doc) => { + const docs = actualDocuments.map((doc) => { const mention = makeDocMention(doc); return { id: getMentionDocKey(mention), @@ -652,13 +576,12 @@ export const DocumentMentionPicker = forwardRef< }; }); }, [ + actualDocuments, activeConnectors, connectorTypeEntries, folderMentions, rootNodes, selectedKeys, - surfsenseDocsList, - userDocsList, view, ]); @@ -708,27 +631,23 @@ export const DocumentMentionPicker = forwardRef< const isRootBrowseView = !hasSearch && view.kind === "root"; const isVisibleViewLoading = hasSearch - ? isTitleSearchLoading || isSurfsenseDocsLoading || isConnectorsLoading - : view.kind === "surfsense-docs" - ? isSurfsenseDocsLoading - : view.kind === "files-folders" - ? isTitleSearchLoading - : view.kind === "connectors" || view.kind === "connector-type" - ? isConnectorsLoading - : false; + ? isTitleSearchLoading || isConnectorsLoading + : view.kind === "files-folders" + ? isTitleSearchLoading + : view.kind === "connectors" || view.kind === "connector-type" + ? isConnectorsLoading + : false; const actualLoading = isVisibleViewLoading && !isSingleCharSearch && visibleNodes.length === 0 && !isRootBrowseView; const title = hasSearch || view.kind === "root" ? null - : view.kind === "surfsense-docs" - ? "SurfSense Docs" - : view.kind === "files-folders" - ? "Files & Folders" - : view.kind === "connectors" - ? "Connectors" - : view.title; + : view.kind === "files-folders" + ? "Files & Folders" + : view.kind === "connectors" + ? "Connectors" + : view.title; return ( ; case "EXTENSION": return ; - case "SURFSENSE_DOCS": - return ; case "USER_MEMORY": case "TEAM_MEMORY": return ; diff --git a/surfsense_web/contracts/enums/toolIcons.tsx b/surfsense_web/contracts/enums/toolIcons.tsx index 668cb51cd..494c0eaee 100644 --- a/surfsense_web/contracts/enums/toolIcons.tsx +++ b/surfsense_web/contracts/enums/toolIcons.tsx @@ -1,5 +1,4 @@ import { - BookOpen, Brain, Calendar, FileEdit, @@ -47,7 +46,6 @@ const TOOL_ICONS: Record = { // Web / search scrape_webpage: ScanLine, web_search: Globe, - search_surfsense_docs: BookOpen, // Automations create_automation: Workflow, // Memory @@ -152,7 +150,6 @@ const TOOL_DISPLAY_NAMES: Record = { // Web / search scrape_webpage: "Read webpage", web_search: "Search the web", - search_surfsense_docs: "Search knowledge base", // Automations create_automation: "Create automation", // Memory diff --git a/surfsense_web/contracts/types/document.types.ts b/surfsense_web/contracts/types/document.types.ts index ccc15fa62..82c6cbdaf 100644 --- a/surfsense_web/contracts/types/document.types.ts +++ b/surfsense_web/contracts/types/document.types.ts @@ -27,7 +27,6 @@ export const documentTypeEnum = z.enum([ "CIRCLEBACK", "OBSIDIAN_CONNECTOR", "LOCAL_FOLDER_FILE", - "SURFSENSE_DOCS", "NOTE", "USER_MEMORY", "TEAM_MEMORY", @@ -77,27 +76,6 @@ export const documentWithChunks = document.extend({ chunk_start_index: z.number().optional().default(0), }); -/** - * Surfsense documentation schemas - * Follows the same pattern as document/documentWithChunks - */ -export const surfsenseDocsChunk = z.object({ - id: z.number(), - content: z.string(), -}); - -export const surfsenseDocsDocument = z.object({ - id: z.number(), - title: z.string(), - source: z.string(), - public_url: z.string(), - content: z.string(), -}); - -export const surfsenseDocsDocumentWithChunks = surfsenseDocsDocument.extend({ - chunks: z.array(surfsenseDocsChunk), -}); - /** * Get documents */ @@ -284,32 +262,6 @@ export const getDocumentChunksResponse = z.object({ has_more: z.boolean(), }); -/** - * Get Surfsense docs by chunk - */ -export const getSurfsenseDocsByChunkRequest = z.object({ - chunk_id: z.number(), -}); - -export const getSurfsenseDocsByChunkResponse = surfsenseDocsDocumentWithChunks; - -/** - * List Surfsense docs - */ -export const getSurfsenseDocsRequest = z.object({ - queryParams: paginationQueryParams.extend({ - title: z.string().optional(), - }), -}); - -export const getSurfsenseDocsResponse = z.object({ - items: z.array(surfsenseDocsDocument), - total: z.number(), - page: z.number(), - page_size: z.number(), - has_more: z.boolean(), -}); - /** * Update document */ @@ -358,13 +310,6 @@ export type DeleteDocumentResponse = z.infer; export type DocumentTypeEnum = z.infer; export type DocumentSortBy = z.infer; export type SortOrder = z.infer; -export type SurfsenseDocsChunk = z.infer; -export type SurfsenseDocsDocument = z.infer; -export type SurfsenseDocsDocumentWithChunks = z.infer; -export type GetSurfsenseDocsByChunkRequest = z.infer; -export type GetSurfsenseDocsByChunkResponse = z.infer; -export type GetSurfsenseDocsRequest = z.infer; -export type GetSurfsenseDocsResponse = z.infer; export type GetDocumentChunksRequest = z.infer; export type GetDocumentChunksResponse = z.infer; export type ChunkRead = z.infer; diff --git a/surfsense_web/lib/apis/documents-api.service.ts b/surfsense_web/lib/apis/documents-api.service.ts index 630c88d16..f9785c8a8 100644 --- a/surfsense_web/lib/apis/documents-api.service.ts +++ b/surfsense_web/lib/apis/documents-api.service.ts @@ -12,7 +12,6 @@ import { type GetDocumentsRequest, type GetDocumentsStatusRequest, type GetDocumentTypeCountsRequest, - type GetSurfsenseDocsRequest, getDocumentByChunkRequest, getDocumentByChunkResponse, getDocumentChunksRequest, @@ -25,9 +24,6 @@ import { getDocumentsStatusResponse, getDocumentTypeCountsRequest, getDocumentTypeCountsResponse, - getSurfsenseDocsByChunkResponse, - getSurfsenseDocsRequest, - getSurfsenseDocsResponse, type SearchDocumentsRequest, type SearchDocumentTitlesRequest, searchDocumentsRequest, @@ -363,48 +359,6 @@ class DocumentsApiService { ); }; - /** - * Get Surfsense documentation by chunk ID - * Used for resolving [citation:doc-XXX] citations - */ - getSurfsenseDocByChunk = async (chunkId: number) => { - return baseApiService.get( - `/api/v1/surfsense-docs/by-chunk/${chunkId}`, - getSurfsenseDocsByChunkResponse - ); - }; - - /** - * List all Surfsense documentation documents - * @param request - The request with query params - * @param signal - Optional AbortSignal for request cancellation - */ - getSurfsenseDocs = async (request: GetSurfsenseDocsRequest, signal?: AbortSignal) => { - const parsedRequest = getSurfsenseDocsRequest.safeParse(request); - - if (!parsedRequest.success) { - console.error("Invalid request:", parsedRequest.error); - - const errorMessage = parsedRequest.error.issues.map((issue) => issue.message).join(", "); - throw new ValidationError(`Invalid request: ${errorMessage}`); - } - - // Transform query params to be string values - const transformedQueryParams = parsedRequest.data.queryParams - ? Object.fromEntries( - Object.entries(parsedRequest.data.queryParams).map(([k, v]) => [k, String(v)]) - ) - : undefined; - - const queryParams = transformedQueryParams - ? new URLSearchParams(transformedQueryParams).toString() - : ""; - - const url = `/api/v1/surfsense-docs?${queryParams}`; - - return baseApiService.get(url, getSurfsenseDocsResponse, { signal }); - }; - /** * Update a document */ diff --git a/surfsense_web/lib/chat/thread-persistence.ts b/surfsense_web/lib/chat/thread-persistence.ts index abe6bc02c..d30b87665 100644 --- a/surfsense_web/lib/chat/thread-persistence.ts +++ b/surfsense_web/lib/chat/thread-persistence.ts @@ -221,7 +221,6 @@ export interface RegenerateParams { content: string; }>; mentionedDocumentIds?: number[]; - mentionedSurfsenseDocIds?: number[]; } /** diff --git a/surfsense_web/lib/documents/document-type-labels.ts b/surfsense_web/lib/documents/document-type-labels.ts index 844961886..9e187f940 100644 --- a/surfsense_web/lib/documents/document-type-labels.ts +++ b/surfsense_web/lib/documents/document-type-labels.ts @@ -25,7 +25,6 @@ export function getDocumentTypeLabel(type: string): string { CIRCLEBACK: "Circleback", OBSIDIAN_CONNECTOR: "Obsidian", LOCAL_FOLDER_FILE: "Local Folder", - SURFSENSE_DOCS: "SurfSense Docs", NOTE: "Note", COMPOSIO_GOOGLE_DRIVE_CONNECTOR: "Composio Google Drive", COMPOSIO_GMAIL_CONNECTOR: "Composio Gmail", diff --git a/surfsense_web/lib/query-client/cache-keys.ts b/surfsense_web/lib/query-client/cache-keys.ts index 8943d6842..35724cf94 100644 --- a/surfsense_web/lib/query-client/cache-keys.ts +++ b/surfsense_web/lib/query-client/cache-keys.ts @@ -30,7 +30,6 @@ export const cacheKeys = { withQueryParams: (queries: GetDocumentsRequest["queryParams"]) => ["documents-with-queries", ...stableEntries(queries)] as const, document: (documentId: string) => ["document", documentId] as const, - byChunk: (chunkId: string) => ["documents", "by-chunk", chunkId] as const, }, logs: { list: (searchSpaceId?: number | string) => ["logs", "list", searchSpaceId] as const, diff --git a/surfsense_web/tsc_out.txt b/surfsense_web/tsc_out.txt new file mode 100644 index 0000000000000000000000000000000000000000..c51e470851df2ea0ac136579d4d6112e183bad16 GIT binary patch literal 32582 zcmeI5Yi|_E6^8qBr2L1GKg2{@Fg6(2EFm&p!;%0-HlSUtBFh-t%i`O523TjqUr+Kr zb>>jj-PJu8U+|KaHJ+L7>Z(&!=klIYr)vKDpUvV(e{;o^;-})U*e|w=KkM78Vo%?8 z_36i=ug~xF`JeiBRBY&d?qJ>5Ol-=SW;;(4)NEEKm@ z=Ie0psjm0*dLK&B#%A$aSN8QjD7Hkyw(9Na^H0T_@M%kT_VjnH_+#-0-RTvpwWsHG z^|08{{{>OfD`rL4QJ~>SbwJZ^dTpu9zOEk@2WsUY^x$vd?wY=H_gJ*t)e{4KdZQA% zp|<7PbI*$t)w8X#zZD;gd;04Y|JLiBo_Ve7JK^bVef~yQKZaVUpEkEOYJ_VvuMTJD8wT+NTrY@lOT zb-@!h^Stkbe!(FhhI5f}R^$da!smmTe^Pua`s;Jn*S(%$7SYgllM9gb)1XF`v2hEy!}9vqmi%`Zb@>sV_|4J?sT?8 zqt)4t&UUyh8;vbG+tHyOQ0VU$^?>=ofM(9b8Vn^u=Lf?5?5Y_tDCw~_7JRRcvuU?9rX=M+1$59-M79` zyLne2TfRAlOcCu9^#xkZn_C~!M*l#*Rs8GT6Fo9t!u*$VE#@V7i67-JHer7Aif9YJ z??mr-IpA@wfYBM5x8WLh@BwW`n5FG7mUxQzMX^_XmDvft7o%--?uFW;zX@8w1Hh-i z$Bh049)Z<~ukuLs*gf+YK8OOI!z=V2+UUX=cv3cdHuCruRvSLcufZ3!@%9<9F^10$ z)KB-w7!mmKIE+B_aoi@rnfC)F+wysS3jP%wRGx=k@h8<&zQ)|_>koc-rsoca<3LGf z7f+3vxNl=d8T=u*Y9_P^KSj=-3BGOpxyZ$g6i?U7q_i)o&T`FV%Acw2W7Pyd!8i5V zjHW^05ZWt0@4OBF_AwTd{nGX5{^++W=AA5ubGf%nPinW9Ur7}62nXhSQKaO*~R-+dL z&4{9VUBA0#iJm@j8E7;uXB6*?d*Fc>1F#5>+16B7F6+6d1K`T(Z2~o+&xq9!Ex9V5 zK{DUzh0RFsUzKK{j9E+O1aIM2aj8e*ZindCjaD0Yvser=xulwQ)egK*Z=5rtHl5AH zP+Sk)4fi7L4~CRQ&2leTlJTQ0E-NOBrrC{Gs}$8+Wk$-j*k>f=erB>>a#=dgtoGhE_H$Qjf{f^ZIN?9x}AErdBPQ`Bdeh!BB;jGqgY?wq7od(pHT_K$t#b z(WXVTmQprKGVMNX90cpVt5KQ@TC*;pAi2*Wq0ngK4Zhe-FutAMzz-$CCSYcU0#Vc+izNd*sXhGsdet_s+Eg2_v;Y9#QSXa z%_gzAAVTzI8H=NP4TtYaa#)uGnP1ga)~WD0a`ZMKbS%#x!S)Z6I(u3@@j9EHH#@Ku z${l^OV@BV4p#50(4d3cSHKYI2TFt|O{=WPQ&~?4Prc2YYcl$;wE7G$d7t?2Hsizt@ z5X80Op;if3_5ZO-Z>Ude^5|ZM&&#_0RDUaa{<%J{HPC>^i4R9>uLCYK-_QDq^*GWv z&S~`LgDtVrbDEntLvzUtRq-5BLM^ZLH`h24L?QNMRlZZL*eS5(g$J9~;o!5>znKirjAsLAqm6#^bzX(>f*-(EZ4m?GySM5y7}-Z*(po;2 zu#@u@xCwgeyclI~Q}d0#wtCCS^Gq6gvFxNtEK0|Vq2#fh5Yt;Eh*YE@XrEezh zR}_c7r5T=Yj7QHamxw&5+ky9H^<-?3IkXj9R-tXZob_jJr(c`Ey5fZR-!e z8XtBbELy+DI(Ky+2{c^~+}RvSUl?XNr9Rdb@p7;DYuJPFUUVbBu_vyPe64TP>TFt@ zX`Hyi=(U&EZc|?v zM)sG)A8rBq!$`iZOt$maH2owuu9>GsTRzLky4?LF`|&jMNqlKba*KRVZqYAlHtFS} zwA-|^`FVQj8KA^l$#=x^M`^q%r=y_&MJ=n?Cc~@&`&y^$iIMnuDkTUX9W&IeJrC%SmrvSq7iAMC~&eeP+?RW!|; zyAOJVKN*MIeaCOPd#r2_k7|0BEJMpa&)1C>@Mu=Pe%BTGyD3JM_7qr333$i~XmVmZBD7dkdy7ulGgV06@mUWRI^5e~8EY14Crr^kNO zXMAKUi{3`Zg&h}8&V}q^crJ@h{wY2lJ0q}0p6~0{7>iln4dE8ucf-VJ4z|&(GZ7tf zJ@E~h9WoHeYvmJo1DylP@jd&lZE3AJ2Yn_;uFv**@DkCD>w2Z~JQGl=w~LgU+~8Fr z^|}0Xr@%8kC>OJ!syf?s6LFkF$-T*7PLE(4-m3&VUC~;sChui6PQK)Z#tE#m)%uKi z%V4Gr*%`}&Wo5XHqNZoYYti?A(D#@-@5r*fj&a*;@aA5z-aZdDk$7St&J&IG>_mB2?#)@T9S%2b19I$exZ`Nf@9A)O=M7f1>hIYb>~MJW0;>C5 zI)pSD`Hw<5=Z}y&93E}8Bet>!(l!U>k`dLlJ=5g5P4monEq;D3>u|X3E@qd;+2L^3 z1UnqQ8}>(a@o>{)O%V}%5s$reINaC6b2_(Mm8s{nIp~MI8!=onm^|YL3>$0Q1yzHni35TCW z9!jbW)79L(Jj9H?XwAzjy1C~-2xM+-gxnm?J!sBQv2wO+fR!lng2^!^;~`gfeqDJ~ zOHEIW-)?!_EGsEyD_2*LVrkPnxt=%zV#i!u1^1Xp)pBhppUO{a-56#?HMf6J`6jWS z%y!HT$o02eHa{b|=Q?hRBR38=<$ew2^xVYE%t$iH(Ivasvp?dMbpCU9EwjUSw^U15 z&t^gfUp)uJtD zRqWy?Tio_T=FVv19JjtQ7m4s0o&1Usdt{q`h2loYOkm8bd4x^-MO{jA5@h3<-%)Gx zx~_c*Yw8>SH-^2OQR|%VH4}FBzf`{;>*SdFU7lUibKmQfvxk}ckM-nJoeS_n_1B-i zuV)`=Z^R4T^O}~mXQEzmUF|oYFpyFUJx^3y?j(X(j&s>o)NQlXt7WM4c^Wal* z!DEKFoi7PaT@Lmj)u}&9Q?_Z^nc<1@^m9W!WnUh-?mh-cZ++LBm1gG(=*su?V2}J9 zM%!;klsVQ(Hhf+un6C2O)i$fXb7nVakTp?7Kk~m$SN;Wodts&1wPTvi*@R=8=zHdfO_eVoeY05vpy4SQD+hBXRx`>^EtnGi+FnN)P~>EQxxjB?UryEan+{% zvz#e#U0Kb@^0S~f%o7HuaxU$x5UigUz-frK&&x8Z+1ui0LXsg(Y2HtQ9`yY)rc-ms zvz>AfC%GBT`*<4 zorph--x=dq;cSl>9g5P)DHQO^p<494?);7sdvo!mn)kX-!Y2<@zelDXh#oxOl`u>E zN|fuA=_zeLH{!BTirpP^Piu6 zKkesE&}~>R&Mc35D|gNhcoH0ph5=1u9m_IrPod+`i*qUi3O2jr8ke2+$aKos zuFEY>Ll3`EZQAGZP;}!0DC%pDIDsE~bxr@HXW=u&dOv5!G>^-oW()kZDvkfM>cR8r z1uJXzko~UM2m2*ATCg*t`rR{jLKz51UuGE25e=W}* zJ7?CHbHD7I5I^TB?p4ltCl6KYp?U$u%gIyd zOinFS!4JHj{jOSk&$K6Y)=PS&-Z%S|F6IG@z%F0L3x2clVx4&qzW=P*XVeTofgO;X LlrupwRW$TJ&LaAJ literal 0 HcmV?d00001