diff --git a/surfsense_backend/alembic/versions/144_add_automation_tables.py b/surfsense_backend/alembic/versions/144_add_automation_tables.py index 8d836095d..39f927417 100644 --- a/surfsense_backend/alembic/versions/144_add_automation_tables.py +++ b/surfsense_backend/alembic/versions/144_add_automation_tables.py @@ -98,9 +98,7 @@ def upgrade() -> None: op.execute( "CREATE INDEX ix_automation_triggers_automation_id ON automation_triggers(automation_id);" ) - op.execute( - "CREATE INDEX ix_automation_triggers_type ON automation_triggers(type);" - ) + op.execute("CREATE INDEX ix_automation_triggers_type ON automation_triggers(type);") op.execute( "CREATE INDEX ix_automation_triggers_enabled ON automation_triggers(enabled);" ) diff --git a/surfsense_backend/alembic/versions/146_drop_surfsense_docs_tables.py b/surfsense_backend/alembic/versions/146_drop_surfsense_docs_tables.py new file mode 100644 index 000000000..725405834 --- /dev/null +++ b/surfsense_backend/alembic/versions/146_drop_surfsense_docs_tables.py @@ -0,0 +1,129 @@ +"""Drop Surfsense docs tables (feature removed end to end) + +Revision ID: 146 +Revises: 145 +Create Date: 2026-05-28 + +Removes the SurfSense product-documentation feature: the +``surfsense_docs_documents`` and ``surfsense_docs_chunks`` tables (created +in revision 60) and the GIN trigram index on the title column (added in +revision 67). The docs were seeded at startup from local MDX files, so no +user data is lost. Downgrade recreates the tables and indexes. +""" + +from collections.abc import Sequence + +from alembic import op +from app.config import config + +# revision identifiers, used by Alembic. +revision: str = "146" +down_revision: str | None = "145" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + +# Embedding dimension is required to recreate the vector columns on downgrade. +EMBEDDING_DIM = config.embedding_model_instance.dimension + + +def upgrade() -> None: + """Drop surfsense docs tables and all their indexes.""" + # Trigram index from revision 67 + op.execute("DROP INDEX IF EXISTS idx_surfsense_docs_title_trgm") + + # Full-text search indexes + op.execute("DROP INDEX IF EXISTS surfsense_docs_chunks_search_index") + op.execute("DROP INDEX IF EXISTS surfsense_docs_documents_search_index") + + # Vector indexes + op.execute("DROP INDEX IF EXISTS surfsense_docs_chunks_vector_index") + op.execute("DROP INDEX IF EXISTS surfsense_docs_documents_vector_index") + + # B-tree indexes + op.execute("DROP INDEX IF EXISTS ix_surfsense_docs_chunks_document_id") + op.execute("DROP INDEX IF EXISTS ix_surfsense_docs_documents_updated_at") + op.execute("DROP INDEX IF EXISTS ix_surfsense_docs_documents_content_hash") + op.execute("DROP INDEX IF EXISTS ix_surfsense_docs_documents_source") + + # Tables (chunks first due to FK) + op.execute("DROP TABLE IF EXISTS surfsense_docs_chunks") + op.execute("DROP TABLE IF EXISTS surfsense_docs_documents") + + +def downgrade() -> None: + """Recreate surfsense docs tables and indexes (reverses revisions 60 + 67).""" + op.execute( + f""" + CREATE TABLE IF NOT EXISTS surfsense_docs_documents ( + id SERIAL PRIMARY KEY, + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), + source VARCHAR NOT NULL UNIQUE, + title VARCHAR NOT NULL, + content TEXT NOT NULL, + content_hash VARCHAR NOT NULL, + embedding vector({EMBEDDING_DIM}), + updated_at TIMESTAMP WITH TIME ZONE + ); + """ + ) + op.execute( + f""" + CREATE TABLE IF NOT EXISTS surfsense_docs_chunks ( + id SERIAL PRIMARY KEY, + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), + content TEXT NOT NULL, + embedding vector({EMBEDDING_DIM}), + document_id INTEGER NOT NULL REFERENCES surfsense_docs_documents(id) ON DELETE CASCADE + ); + """ + ) + + # B-tree indexes + op.execute( + "CREATE INDEX IF NOT EXISTS ix_surfsense_docs_documents_source ON surfsense_docs_documents(source)" + ) + op.execute( + "CREATE INDEX IF NOT EXISTS ix_surfsense_docs_documents_content_hash ON surfsense_docs_documents(content_hash)" + ) + op.execute( + "CREATE INDEX IF NOT EXISTS ix_surfsense_docs_documents_updated_at ON surfsense_docs_documents(updated_at)" + ) + op.execute( + "CREATE INDEX IF NOT EXISTS ix_surfsense_docs_chunks_document_id ON surfsense_docs_chunks(document_id)" + ) + + # Vector indexes + op.execute( + """ + CREATE INDEX IF NOT EXISTS surfsense_docs_documents_vector_index + ON surfsense_docs_documents USING hnsw (embedding public.vector_cosine_ops); + """ + ) + op.execute( + """ + CREATE INDEX IF NOT EXISTS surfsense_docs_chunks_vector_index + ON surfsense_docs_chunks USING hnsw (embedding public.vector_cosine_ops); + """ + ) + + # Full-text search indexes + op.execute( + """ + CREATE INDEX IF NOT EXISTS surfsense_docs_documents_search_index + ON surfsense_docs_documents USING gin (to_tsvector('english', content)); + """ + ) + op.execute( + """ + CREATE INDEX IF NOT EXISTS surfsense_docs_chunks_search_index + ON surfsense_docs_chunks USING gin (to_tsvector('english', content)); + """ + ) + + # Trigram index from revision 67 + op.execute( + """ + CREATE INDEX IF NOT EXISTS idx_surfsense_docs_title_trgm + ON surfsense_docs_documents USING gin (title gin_trgm_ops); + """ + ) diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md index e61a0bffb..2abd95d5a 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md @@ -4,8 +4,8 @@ never invent ids you didn't see. Citation ids are resolved by exact-match lookup; a wrong id silently breaks the link, so when in doubt, omit. ### Channel A — chunk blocks injected this turn -When `search_surfsense_docs` or `web_search` returns `` / -`` blocks in this turn: +When `web_search` returns `` / `` blocks in this +turn: 1. For each factual statement taken from those chunks, add `[citation:chunk_id]` using the **exact** id from a visible diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/private.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/private.md index 71c86be40..8f2bfca4e 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/private.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/private.md @@ -20,8 +20,8 @@ it to resolve paths the user describes in natural language ("my Q2 roadmap", delegating to a specialist. `` and `` blocks are chunked indexed content returned -by KB search (from `search_surfsense_docs`, or backing ``). -Each chunk carries a stable `id` attribute. +by KB search (backing ``). Each chunk carries a stable +`id` attribute. If a block doesn't appear this turn, work from the conversation alone. diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/team.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/team.md index 592c2ed9c..a5892c23a 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/team.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/team.md @@ -20,8 +20,8 @@ week's planning notes") into concrete document references before delegating to a specialist. `` and `` blocks are chunked indexed content returned -by KB search (from `search_surfsense_docs`, or backing ``). -Each chunk carries a stable `id` attribute. +by KB search (backing ``). Each chunk carries a stable +`id` attribute. If a block doesn't appear this turn, work from the conversation alone. diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/kb_first.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/kb_first.md index f06a52c1d..80fa4bf8f 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/kb_first.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/kb_first.md @@ -1,19 +1,21 @@ CRITICAL — ground factual answers in what you actually receive this turn: - injected workspace context (see ``), -- results from your own tool calls (`search_surfsense_docs`, `web_search`, - `scrape_webpage`), +- results from your own tool calls (`web_search`, `scrape_webpage`), - or substantive summaries returned by a `task` specialist you invoked. Do **not** answer factual or informational questions from general knowledge unless the user explicitly authorises it after you say you couldn't find enough in those sources. The flow when nothing is found: -1. Say you couldn't find enough in their workspace, docs, or tool output. +1. Say you couldn't find enough in their workspace or tool output. 2. Ask: *"Would you like me to answer from my general knowledge instead?"* 3. Only answer from general knowledge after a clear yes. This rule does NOT apply to: casual conversation · meta-questions about SurfSense ("what can you do?") · formatting or analysis of content already in chat · clear rewrite/edit instructions · lightweight web research. + +For "how do I use SurfSense" / product-documentation questions, point the +user to https://www.surfsense.com/docs. diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/anthropic.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/anthropic.md index 89154c443..d852f5955 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/anthropic.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/anthropic.md @@ -5,7 +5,7 @@ Structured reasoning: - For non-trivial work, `` / short `` before tool calls is fine. Professional objectivity: -- Accuracy over flattery; verify with **search_surfsense_docs**, **web_search**, **scrape_webpage**, or **task** when unsure — don’t invent connector access. +- Accuracy over flattery; verify with **web_search**, **scrape_webpage**, or **task** when unsure — don’t invent connector access. Task management: - For 3+ steps, use todo tooling; update statuses promptly. diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/deepseek.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/deepseek.md index 4254e9ed5..01d56999f 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/deepseek.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/deepseek.md @@ -13,6 +13,6 @@ Attribution: Tool calls: - Parallelise independent calls. -- Prefer **search_surfsense_docs** for SurfSense docs/product questions before **web_search** when that fits the ask. +- For SurfSense docs/product questions, point the user to https://www.surfsense.com/docs. - Don’t invent paths, chunk ids, or URLs — only values from tools or the user. diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/google.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/google.md index dc5073538..32ed959c1 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/google.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/google.md @@ -7,7 +7,7 @@ Output style: - GitHub-flavoured Markdown; monospace-friendly. Workflow (Understand → Plan → Act → Verify): -1. **Understand:** parse the ask; use **search_surfsense_docs** / injected workspace context before guessing. +1. **Understand:** parse the ask; use injected workspace context before guessing. 2. **Plan:** for multi-step work, a short plan first. 3. **Act:** only with tools you actually have on this agent (see `` and ``). Connector work → **task**. 4. **Verify:** re-read or re-search only when it materially reduces risk. diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/openai_classic.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/openai_classic.md index 7ff3ec912..8596c42cd 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/openai_classic.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/openai_classic.md @@ -15,6 +15,7 @@ Output style: Tool calls: - Parallelise independent calls in one turn. -- Prefer **search_surfsense_docs** for SurfSense-product questions, **web_search** / **scrape_webpage** - for fresh public facts; integrations and heavy workflows → **task**. +- For SurfSense-product questions, point the user to https://www.surfsense.com/docs; + use **web_search** / **scrape_webpage** for fresh public facts; integrations and + heavy workflows → **task**. diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/routing.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/routing.md index 1038dde3d..28cf0ac63 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/routing.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/routing.md @@ -3,10 +3,7 @@ You have two execution channels. Pick the one that owns the work — never simulate one with the other. ### 1. Direct tools (you call them yourself) -- `search_surfsense_docs` — SurfSense product docs (setup, configuration, - connector docs, feature behavior). -- `web_search` — search the public web (anything outside SurfSense docs and - the workspace KB). +- `web_search` — search the public web (anything outside the workspace KB). - `scrape_webpage` — fetch the body of a specific public URL. - `update_memory` — curate persistent memory (see ``). - `write_todos` — maintain a structured plan when the turn series spans @@ -14,6 +11,10 @@ simulate one with the other. `in_progress` **before** the `task` call that handles it, `completed` once the call returns. Skip for single-step requests. +**Questions about how to use SurfSense itself** (setup, configuration, +connectors, feature behavior) — point the user to the documentation: +https://www.surfsense.com/docs. There is no docs-search tool; give the link. + **You have NO filesystem tools.** Any read, write, edit, move, rename, or search inside the user's workspace goes through `task(knowledge_base, …)` — never via `write_file`, `ls`, or any direct file operation. diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_surfsense_docs/__init__.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_surfsense_docs/__init__.py deleted file mode 100644 index c2cda318e..000000000 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_surfsense_docs/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""``search_surfsense_docs`` — description + few-shot examples.""" diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_surfsense_docs/description.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_surfsense_docs/description.md deleted file mode 100644 index 256d3f3a4..000000000 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_surfsense_docs/description.md +++ /dev/null @@ -1,10 +0,0 @@ -- `search_surfsense_docs` — Search official SurfSense documentation (product - help). - - Use when the user asks how SurfSense itself works — setup, configuration, - connector documentation, feature behavior, anything covered in the - product docs. - - Not a substitute for `task` when the user wants actions inside a - connected service (Gmail, Slack, Jira, Notion, etc.). - - Args: `query`, `top_k` (default 10). - - Returns doc excerpts; chunk ids may appear for attribution — see - `` for the contract. diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_surfsense_docs/example.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_surfsense_docs/example.md deleted file mode 100644 index d53ad8c91..000000000 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_surfsense_docs/example.md +++ /dev/null @@ -1,15 +0,0 @@ - -user: "How do I install SurfSense?" -→ search_surfsense_docs(query="installation setup") - - - -user: "What connectors does SurfSense support?" -→ search_surfsense_docs(query="available connectors integrations") - - - -user: "How do I set up the Notion connector?" -→ search_surfsense_docs(query="Notion connector setup configuration") -(Changing data inside Notion itself → `task(notion, …)`, not this tool.) - diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/prompt.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/prompt.py index 45870e768..09854aa2e 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/prompt.py +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/prompt.py @@ -28,7 +28,6 @@ from __future__ import annotations from datetime import UTC, datetime - _HEADER = """\ You are the SurfSense automation drafter. Convert the user intent below into a SINGLE JSON object matching the AutomationCreate schema. Output diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/index.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/index.py index 88509eda7..70fb42c0d 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/index.py +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/index.py @@ -6,7 +6,6 @@ Connector integrations, MCP, deliverables, etc. are delegated via ``task`` subag from __future__ import annotations MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED: tuple[str, ...] = ( - "search_surfsense_docs", "web_search", "scrape_webpage", "update_memory", diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/task_tool.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/task_tool.py index 91a0be506..eaed9a55f 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/task_tool.py +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/task_tool.py @@ -404,9 +404,7 @@ def build_task_tool_with_parent_config( continue messages = payload.get("messages") or [] last_text = _safe_message_text(messages[-1]).rstrip() if messages else "" - message_blocks.append( - f"[task {task_index}] {last_text or ''}" - ) + message_blocks.append(f"[task {task_index}] {last_text or ''}") try: child_trace = _build_tool_trace(messages) except Exception: diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/deliverables/tools/podcast.py b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/deliverables/tools/podcast.py index 84617d38b..298257799 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/deliverables/tools/podcast.py +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/deliverables/tools/podcast.py @@ -117,9 +117,7 @@ def create_generate_podcast_tool( "podcast_id": podcast_id, "title": podcast_title, "file_location": file_location, - "message": ( - "Podcast generated and saved to your podcast panel." - ), + "message": ("Podcast generated and saved to your podcast panel."), } return with_receipt( payload=payload, diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/deliverables/tools/video_presentation.py b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/deliverables/tools/video_presentation.py index 8c52293de..5407c8834 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/deliverables/tools/video_presentation.py +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/deliverables/tools/video_presentation.py @@ -126,8 +126,7 @@ def create_generate_video_presentation_tool( elapsed, ) err = ( - "Background worker reported FAILED status for this " - "video presentation." + "Background worker reported FAILED status for this video presentation." ) payload = { "status": VideoPresentationStatus.FAILED.value, @@ -151,9 +150,7 @@ def create_generate_video_presentation_tool( except Exception as e: error_message = str(e) - logger.exception( - "[generate_video_presentation] Error: %s", error_message - ) + logger.exception("[generate_video_presentation] Error: %s", error_message) payload = { "status": VideoPresentationStatus.FAILED.value, "error": error_message, diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/system_prompt.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/system_prompt.md index 3eabd8ee0..1b9ccaefa 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/system_prompt.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/system_prompt.md @@ -8,7 +8,6 @@ Gather and synthesize evidence using SurfSense research tools with clear citatio - `web_search` - `scrape_webpage` -- `search_surfsense_docs` diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/__init__.py b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/__init__.py index 414cc96f4..7234942b6 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/__init__.py +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/__init__.py @@ -1,11 +1,9 @@ -"""Research-stage tools: web search, scrape, and in-product doc search.""" +"""Research-stage tools: web search and scrape.""" from .scrape_webpage import create_scrape_webpage_tool -from .search_surfsense_docs import create_search_surfsense_docs_tool from .web_search import create_web_search_tool __all__ = [ "create_scrape_webpage_tool", - "create_search_surfsense_docs_tool", "create_web_search_tool", ] diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/index.py b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/index.py index ea544a8da..d8abce46c 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/index.py +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/index.py @@ -9,7 +9,6 @@ from langchain_core.tools import BaseTool from app.agents.new_chat.permissions import Ruleset from .scrape_webpage import create_scrape_webpage_tool -from .search_surfsense_docs import create_search_surfsense_docs_tool from .web_search import create_web_search_tool NAME = "research" @@ -27,5 +26,4 @@ def load_tools( available_connectors=d.get("available_connectors"), ), create_scrape_webpage_tool(firecrawl_api_key=d.get("firecrawl_api_key")), - create_search_surfsense_docs_tool(db_session=d["db_session"]), ] diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/search_surfsense_docs.py b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/search_surfsense_docs.py deleted file mode 100644 index ccc5c49e2..000000000 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/search_surfsense_docs.py +++ /dev/null @@ -1,145 +0,0 @@ -"""Semantic search over pre-indexed in-app documentation chunks for user how-to questions.""" - -import asyncio -import json - -from langchain_core.tools import tool -from sqlalchemy import select -from sqlalchemy.ext.asyncio import AsyncSession - -from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument -from app.utils.document_converters import embed_text -from app.utils.surfsense_docs import surfsense_docs_public_url - - -def format_surfsense_docs_results(results: list[tuple]) -> str: - """Format (chunk, document) rows as XML with ``doc-`` chunk IDs for citations and UI routing.""" - if not results: - return "No relevant Surfsense documentation found for your query." - - # Group chunks by document - grouped: dict[int, dict] = {} - for chunk, doc in results: - public_url = surfsense_docs_public_url(doc.source) - if doc.id not in grouped: - grouped[doc.id] = { - "document_id": f"doc-{doc.id}", - "document_type": "SURFSENSE_DOCS", - "title": doc.title, - "url": public_url, - "metadata": {"source": doc.source, "public_url": public_url}, - "chunks": [], - } - grouped[doc.id]["chunks"].append( - { - "chunk_id": f"doc-{chunk.id}", - "content": chunk.content, - } - ) - - # Render XML matching format_documents_for_context structure - parts: list[str] = [] - for g in grouped.values(): - metadata_json = json.dumps(g["metadata"], ensure_ascii=False) - - parts.append("") - parts.append("") - parts.append(f" {g['document_id']}") - parts.append(f" {g['document_type']}") - parts.append(f" <![CDATA[{g['title']}]]>") - parts.append(f" ") - parts.append(f" ") - parts.append("") - parts.append("") - parts.append("") - - for ch in g["chunks"]: - parts.append( - f" " - ) - - parts.append("") - parts.append("") - parts.append("") - - return "\n".join(parts).strip() - - -async def search_surfsense_docs_async( - query: str, - db_session: AsyncSession, - top_k: int = 10, -) -> str: - """ - Search Surfsense documentation using vector similarity. - - Args: - query: The search query about Surfsense usage - db_session: Database session for executing queries - top_k: Number of results to return - - Returns: - Formatted string with relevant documentation content - """ - # Get embedding for the query - query_embedding = await asyncio.to_thread(embed_text, query) - - # Vector similarity search on chunks, joining with documents - stmt = ( - select(SurfsenseDocsChunk, SurfsenseDocsDocument) - .join( - SurfsenseDocsDocument, - SurfsenseDocsChunk.document_id == SurfsenseDocsDocument.id, - ) - .order_by(SurfsenseDocsChunk.embedding.op("<=>")(query_embedding)) - .limit(top_k) - ) - - result = await db_session.execute(stmt) - rows = result.all() - - return format_surfsense_docs_results(rows) - - -def create_search_surfsense_docs_tool(db_session: AsyncSession): - """ - Factory function to create the search_surfsense_docs tool. - - Args: - db_session: Database session for executing queries - - Returns: - A configured tool function for searching Surfsense documentation - """ - - @tool - async def search_surfsense_docs(query: str, top_k: int = 10) -> str: - """ - Search Surfsense documentation for help with using the application. - - Use this tool when the user asks questions about: - - How to use Surfsense features - - Installation and setup instructions - - Configuration options and settings - - Troubleshooting common issues - - Available connectors and integrations - - Browser extension usage - - API documentation - - This searches the official Surfsense documentation that was indexed - at deployment time. It does NOT search the user's personal knowledge base. - - Args: - query: The search query about Surfsense usage or features - top_k: Number of documentation chunks to retrieve (default: 10) - - Returns: - Relevant documentation content formatted with chunk IDs for citations - """ - return await search_surfsense_docs_async( - query=query, - db_session=db_session, - top_k=top_k, - ) - - return search_surfsense_docs diff --git a/surfsense_backend/app/agents/new_chat/feature_flags.py b/surfsense_backend/app/agents/new_chat/feature_flags.py index 3cea051ef..27188fac3 100644 --- a/surfsense_backend/app/agents/new_chat/feature_flags.py +++ b/surfsense_backend/app/agents/new_chat/feature_flags.py @@ -104,7 +104,7 @@ class AgentFeatureFlags: # ``tools/google_drive``, ``tools/dropbox``, ``tools/onedrive``, # ``tools/google_calendar``, ``tools/confluence``, ``tools/discord``, # ``tools/teams``, ``tools/luma``, ``connected_accounts``, - # ``update_memory``, ``search_surfsense_docs``) now acquire fresh + # ``update_memory``) now acquire fresh # short-lived ``AsyncSession`` instances per call via # :data:`async_session_maker`. The factory still accepts ``db_session`` # for registry compatibility but ``del``'s it immediately — see any diff --git a/surfsense_backend/app/agents/new_chat/mention_resolver.py b/surfsense_backend/app/agents/new_chat/mention_resolver.py index 6a025b947..f13dbc6ae 100644 --- a/surfsense_backend/app/agents/new_chat/mention_resolver.py +++ b/surfsense_backend/app/agents/new_chat/mention_resolver.py @@ -73,9 +73,8 @@ class ResolvedMentionSet: ``@Project Roadmap`` is never shadowed by a shorter prefix ``@Project``). - ``mentioned_document_ids`` collapses doc + surfsense_doc chips into - a single ordered, deduped list because the priority middleware - treats them uniformly downstream — see + ``mentioned_document_ids`` is an ordered, deduped list consumed by + the priority middleware downstream — see ``KnowledgePriorityMiddleware._compute_priority_paths``. """ @@ -103,7 +102,6 @@ async def resolve_mentions( search_space_id: int, mentioned_documents: list[MentionedDocumentInfo] | None, mentioned_document_ids: list[int] | None = None, - mentioned_surfsense_doc_ids: list[int] | None = None, mentioned_folder_ids: list[int] | None = None, ) -> ResolvedMentionSet: """Resolve every @-mention chip on a turn into virtual paths. @@ -111,8 +109,7 @@ async def resolve_mentions( The function takes both the ``mentioned_documents`` discriminated list (chip metadata used for substitution + persistence) and the parallel id arrays (``mentioned_document_ids``, - ``mentioned_surfsense_doc_ids``, ``mentioned_folder_ids``) for two - reasons: + ``mentioned_folder_ids``) for two reasons: * Legacy clients that haven't migrated to the unified chip list still send the id arrays — we treat the union as authoritative. @@ -142,7 +139,6 @@ async def resolve_mentions( dict.fromkeys( [ *(mentioned_document_ids or []), - *(mentioned_surfsense_doc_ids or []), *chip_doc_ids, ] ) diff --git a/surfsense_backend/app/agents/new_chat/prompts/base/citations_on.md b/surfsense_backend/app/agents/new_chat/prompts/base/citations_on.md index 56291bf3e..3562ce66e 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/base/citations_on.md +++ b/surfsense_backend/app/agents/new_chat/prompts/base/citations_on.md @@ -59,14 +59,13 @@ Do NOT cite document_id. Always use the chunk id. - NEVER create your own citation format - use the exact chunk_id values from the documents in the [citation:chunk_id] format - NEVER format citations as clickable links or as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only - NEVER make up chunk IDs if you are unsure about the chunk_id. It is better to omit the citation than to guess -- Copy the EXACT chunk id from the XML - if it says ``, use [citation:doc-123] +- Copy the EXACT chunk id from the XML - if it says ``, use [citation:5] - If the chunk id is a URL like ``, use [citation:https://example.com/page] CORRECT citation formats: - [citation:5] (numeric chunk ID from knowledge base) -- [citation:doc-123] (for Surfsense documentation chunks) - [citation:https://example.com/article] (URL chunk ID from web search results) - [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3] (multiple citations) diff --git a/surfsense_backend/app/agents/new_chat/prompts/base/kb_only_policy_private.md b/surfsense_backend/app/agents/new_chat/prompts/base/kb_only_policy_private.md index 9cc767e7e..073b75fa5 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/base/kb_only_policy_private.md +++ b/surfsense_backend/app/agents/new_chat/prompts/base/kb_only_policy_private.md @@ -7,7 +7,7 @@ CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE: 2. Ask the user: "Would you like me to answer from my general knowledge instead?" 3. ONLY provide a general-knowledge answer AFTER the user explicitly says yes. - This policy does NOT apply to: - * Casual conversation, greetings, or meta-questions about SurfSense itself (e.g., "what can you do?") + * Casual conversation, greetings, or meta-questions about SurfSense itself (e.g., "what can you do?"). For "how do I use SurfSense" / product-documentation questions, point the user to https://www.surfsense.com/docs. * Formatting, summarization, or analysis of content already present in the conversation * Following user instructions that are clearly task-oriented (e.g., "rewrite this in bullet points") * Tool-usage actions like generating reports, podcasts, images, or scraping webpages diff --git a/surfsense_backend/app/agents/new_chat/prompts/base/kb_only_policy_team.md b/surfsense_backend/app/agents/new_chat/prompts/base/kb_only_policy_team.md index 1d806dbae..1a43ed490 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/base/kb_only_policy_team.md +++ b/surfsense_backend/app/agents/new_chat/prompts/base/kb_only_policy_team.md @@ -7,7 +7,7 @@ CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE: 2. Ask: "Would you like me to answer from my general knowledge instead?" 3. ONLY provide a general-knowledge answer AFTER a team member explicitly says yes. - This policy does NOT apply to: - * Casual conversation, greetings, or meta-questions about SurfSense itself (e.g., "what can you do?") + * Casual conversation, greetings, or meta-questions about SurfSense itself (e.g., "what can you do?"). For "how do I use SurfSense" / product-documentation questions, point the user to https://www.surfsense.com/docs. * Formatting, summarization, or analysis of content already present in the conversation * Following user instructions that are clearly task-oriented (e.g., "rewrite this in bullet points") * Tool-usage actions like generating reports, podcasts, images, or scraping webpages diff --git a/surfsense_backend/app/agents/new_chat/prompts/base/tool_routing_private.md b/surfsense_backend/app/agents/new_chat/prompts/base/tool_routing_private.md index b8bb069e2..9121de879 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/base/tool_routing_private.md +++ b/surfsense_backend/app/agents/new_chat/prompts/base/tool_routing_private.md @@ -13,6 +13,7 @@ When to use which tool: - Knowledge base content (Notion, GitHub, files, notes) → automatically searched - Real-time public web data → call web_search - Reading a specific webpage → call scrape_webpage +- SurfSense product / how-to questions (setup, configuration, connectors, feature behavior) → point the user to the documentation: https://www.surfsense.com/docs **`task` subagents (when to delegate):** - **`linear_specialist`** — Linear-only investigations and tool use. diff --git a/surfsense_backend/app/agents/new_chat/prompts/base/tool_routing_team.md b/surfsense_backend/app/agents/new_chat/prompts/base/tool_routing_team.md index b081a2123..c5383be77 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/base/tool_routing_team.md +++ b/surfsense_backend/app/agents/new_chat/prompts/base/tool_routing_team.md @@ -13,6 +13,7 @@ When to use which tool: - Knowledge base content (Notion, GitHub, files, notes) → automatically searched - Real-time public web data → call web_search - Reading a specific webpage → call scrape_webpage +- SurfSense product / how-to questions (setup, configuration, connectors, feature behavior) → point the user to the documentation: https://www.surfsense.com/docs **`task` subagents (when to delegate):** - **`linear_specialist`** — Linear-only investigations and tool use. diff --git a/surfsense_backend/app/agents/new_chat/prompts/composer.py b/surfsense_backend/app/agents/new_chat/prompts/composer.py index 42f8303e6..412665813 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/composer.py +++ b/surfsense_backend/app/agents/new_chat/prompts/composer.py @@ -151,7 +151,6 @@ def _read_fragment(subpath: str) -> str: # Ordered for reading flow: fundamentals first, then artifact generators, # then memory at the end (mirrors the legacy ``_ALL_TOOL_NAMES_ORDERED``). ALL_TOOL_NAMES_ORDERED: tuple[str, ...] = ( - "search_surfsense_docs", "web_search", "generate_podcast", "generate_video_presentation", diff --git a/surfsense_backend/app/agents/new_chat/prompts/examples/search_surfsense_docs.md b/surfsense_backend/app/agents/new_chat/prompts/examples/search_surfsense_docs.md deleted file mode 100644 index b90f2b7a7..000000000 --- a/surfsense_backend/app/agents/new_chat/prompts/examples/search_surfsense_docs.md +++ /dev/null @@ -1,9 +0,0 @@ - -- User: "How do I install SurfSense?" - - Call: `search_surfsense_docs(query="installation setup")` -- User: "What connectors does SurfSense support?" - - Call: `search_surfsense_docs(query="available connectors integrations")` -- User: "How do I set up the Notion connector?" - - Call: `search_surfsense_docs(query="Notion connector setup configuration")` -- User: "How do I use Docker to run SurfSense?" - - Call: `search_surfsense_docs(query="Docker installation setup")` diff --git a/surfsense_backend/app/agents/new_chat/prompts/tools/search_surfsense_docs.md b/surfsense_backend/app/agents/new_chat/prompts/tools/search_surfsense_docs.md deleted file mode 100644 index 133717fec..000000000 --- a/surfsense_backend/app/agents/new_chat/prompts/tools/search_surfsense_docs.md +++ /dev/null @@ -1,7 +0,0 @@ - -- search_surfsense_docs: Search the official SurfSense documentation. - - Use this tool when the user asks anything about SurfSense itself (the application they are using). - - Args: - - query: The search query about SurfSense - - top_k: Number of documentation chunks to retrieve (default: 10) - - Returns: Documentation content with chunk IDs for citations (prefixed with 'doc-', e.g., [citation:doc-123]) diff --git a/surfsense_backend/app/agents/new_chat/skills/builtin/email-drafting/SKILL.md b/surfsense_backend/app/agents/new_chat/skills/builtin/email-drafting/SKILL.md index 32e599e98..2dbc8ec43 100644 --- a/surfsense_backend/app/agents/new_chat/skills/builtin/email-drafting/SKILL.md +++ b/surfsense_backend/app/agents/new_chat/skills/builtin/email-drafting/SKILL.md @@ -1,7 +1,6 @@ --- name: email-drafting description: Draft an email matching the user's voice, with structured intent and CTA -allowed-tools: search_surfsense_docs --- # Email drafting diff --git a/surfsense_backend/app/agents/new_chat/skills/builtin/kb-research/SKILL.md b/surfsense_backend/app/agents/new_chat/skills/builtin/kb-research/SKILL.md index c268278ab..0f0b5ffbb 100644 --- a/surfsense_backend/app/agents/new_chat/skills/builtin/kb-research/SKILL.md +++ b/surfsense_backend/app/agents/new_chat/skills/builtin/kb-research/SKILL.md @@ -1,7 +1,7 @@ --- name: kb-research description: Structured approach to finding and synthesizing information from the user's knowledge base -allowed-tools: search_surfsense_docs, scrape_webpage, read_file, ls_tree, grep, web_search +allowed-tools: scrape_webpage, read_file, ls_tree, grep, web_search --- # Knowledge-base research diff --git a/surfsense_backend/app/agents/new_chat/skills/builtin/meeting-prep/SKILL.md b/surfsense_backend/app/agents/new_chat/skills/builtin/meeting-prep/SKILL.md index 9657eb078..5a375fbde 100644 --- a/surfsense_backend/app/agents/new_chat/skills/builtin/meeting-prep/SKILL.md +++ b/surfsense_backend/app/agents/new_chat/skills/builtin/meeting-prep/SKILL.md @@ -1,7 +1,7 @@ --- name: meeting-prep description: Pull together briefing materials before a scheduled meeting -allowed-tools: search_surfsense_docs, web_search, scrape_webpage, read_file +allowed-tools: web_search, scrape_webpage, read_file --- # Meeting preparation diff --git a/surfsense_backend/app/agents/new_chat/skills/builtin/report-writing/SKILL.md b/surfsense_backend/app/agents/new_chat/skills/builtin/report-writing/SKILL.md index 17ac2f391..cfea9593f 100644 --- a/surfsense_backend/app/agents/new_chat/skills/builtin/report-writing/SKILL.md +++ b/surfsense_backend/app/agents/new_chat/skills/builtin/report-writing/SKILL.md @@ -1,7 +1,7 @@ --- name: report-writing description: How to scope, draft, and revise a Markdown report artifact via generate_report -allowed-tools: generate_report, search_surfsense_docs, read_file +allowed-tools: generate_report, read_file --- # Report writing diff --git a/surfsense_backend/app/agents/new_chat/skills/builtin/slack-summary/SKILL.md b/surfsense_backend/app/agents/new_chat/skills/builtin/slack-summary/SKILL.md index 33b9e72a2..1a4c3da9f 100644 --- a/surfsense_backend/app/agents/new_chat/skills/builtin/slack-summary/SKILL.md +++ b/surfsense_backend/app/agents/new_chat/skills/builtin/slack-summary/SKILL.md @@ -1,7 +1,6 @@ --- name: slack-summary description: Distill a Slack channel or thread into actionable summary -allowed-tools: search_surfsense_docs --- # Slack summarization diff --git a/surfsense_backend/app/agents/new_chat/subagents/config.py b/surfsense_backend/app/agents/new_chat/subagents/config.py index b993d2b06..2cfd47441 100644 --- a/surfsense_backend/app/agents/new_chat/subagents/config.py +++ b/surfsense_backend/app/agents/new_chat/subagents/config.py @@ -46,7 +46,6 @@ logger = logging.getLogger(__name__) # ``glob``, ``grep``) plus the SurfSense-side read tools. EXPLORE_READ_TOOLS: frozenset[str] = frozenset( { - "search_surfsense_docs", "web_search", "scrape_webpage", "read_file", @@ -61,7 +60,6 @@ EXPLORE_READ_TOOLS: frozenset[str] = frozenset( # is needed, the parent should hand off to ``explore`` first. REPORT_WRITER_TOOLS: frozenset[str] = frozenset( { - "search_surfsense_docs", "read_file", "generate_report", } @@ -222,7 +220,6 @@ EXPLORE_SYSTEM_PROMPT = """You are the **explore** subagent for SurfSense. Conduct read-only research across the user's knowledge base, the web, and any documents the parent agent has surfaced. Return a synthesized answer with explicit citations — never speculate beyond the sources you have actually inspected. ## Tools available -- `search_surfsense_docs` — fast hybrid search over the user's knowledge base. - `web_search` — only when the user's KB clearly does not contain the answer. - `scrape_webpage` — to read a URL the user or the search results provided. - `read_file`, `ls`, `glob`, `grep` — to inspect specific documents or trees the parent has flagged. @@ -242,7 +239,7 @@ Produce a single high-quality report deliverable using `generate_report`. The pa ## Workflow 1. **Outline first.** Before calling `generate_report`, write a one-paragraph outline of the sections you plan to produce. Confirm the outline reflects the parent's instructions. -2. **Source resolution.** Decide whether to call `search_surfsense_docs` and `read_file` for any final-checks, or whether the parent's earlier tool calls already cover the source set. +2. **Source resolution.** Decide whether to call `read_file` for any final-checks, or whether the parent's earlier tool calls already cover the source set. 3. **One report.** Call `generate_report` exactly once with `source_strategy` chosen per the topic and chat history (see the `report-writing` skill). 4. **Confirm.** End with a one-sentence summary in your final message — never paste the report back into chat; the artifact card renders itself. """ diff --git a/surfsense_backend/app/agents/new_chat/tools/__init__.py b/surfsense_backend/app/agents/new_chat/tools/__init__.py index bc444b0c0..4b5ae3706 100644 --- a/surfsense_backend/app/agents/new_chat/tools/__init__.py +++ b/surfsense_backend/app/agents/new_chat/tools/__init__.py @@ -5,7 +5,6 @@ This module contains all the tools available to the SurfSense agent. To add a new tool, see the documentation in registry.py. Available tools: -- search_surfsense_docs: Search Surfsense documentation for usage help - generate_podcast: Generate audio podcasts from content - generate_video_presentation: Generate video presentations with slides and narration - generate_image: Generate images from text descriptions using AI models @@ -31,7 +30,6 @@ from .registry import ( get_tool_by_name, ) from .scrape_webpage import create_scrape_webpage_tool -from .search_surfsense_docs import create_search_surfsense_docs_tool from .update_memory import create_update_memory_tool, create_update_team_memory_tool from .video_presentation import create_generate_video_presentation_tool @@ -47,7 +45,6 @@ __all__ = [ "create_generate_podcast_tool", "create_generate_video_presentation_tool", "create_scrape_webpage_tool", - "create_search_surfsense_docs_tool", "create_update_memory_tool", "create_update_team_memory_tool", "format_documents_for_context", diff --git a/surfsense_backend/app/agents/new_chat/tools/podcast.py b/surfsense_backend/app/agents/new_chat/tools/podcast.py index 36aecfe49..83ac98768 100644 --- a/surfsense_backend/app/agents/new_chat/tools/podcast.py +++ b/surfsense_backend/app/agents/new_chat/tools/podcast.py @@ -131,9 +131,7 @@ def create_generate_podcast_tool( "podcast_id": podcast_id, "title": podcast_title, "file_location": file_location, - "message": ( - "Podcast generated and saved to your podcast panel." - ), + "message": ("Podcast generated and saved to your podcast panel."), } # Only other terminal state is FAILED. @@ -146,9 +144,7 @@ def create_generate_podcast_tool( "status": PodcastStatus.FAILED.value, "podcast_id": podcast_id, "title": podcast_title, - "error": ( - "Background worker reported FAILED status for this podcast." - ), + "error": ("Background worker reported FAILED status for this podcast."), } except Exception as e: diff --git a/surfsense_backend/app/agents/new_chat/tools/registry.py b/surfsense_backend/app/agents/new_chat/tools/registry.py index 8c263ca20..6f011e372 100644 --- a/surfsense_backend/app/agents/new_chat/tools/registry.py +++ b/surfsense_backend/app/agents/new_chat/tools/registry.py @@ -101,7 +101,6 @@ from .podcast import create_generate_podcast_tool from .report import create_generate_report_tool from .resume import create_generate_resume_tool from .scrape_webpage import create_scrape_webpage_tool -from .search_surfsense_docs import create_search_surfsense_docs_tool from .teams import ( create_list_teams_channels_tool, create_read_teams_messages_tool, @@ -258,15 +257,6 @@ BUILTIN_TOOLS: list[ToolDefinition] = [ ), requires=[], ), - # Surfsense documentation search tool - ToolDefinition( - name="search_surfsense_docs", - description="Search Surfsense documentation for help with using the application", - factory=lambda deps: create_search_surfsense_docs_tool( - db_session=deps["db_session"], - ), - requires=["db_session"], - ), # ========================================================================= # SERVICE ACCOUNT DISCOVERY # Generic tool for the LLM to discover connected accounts and resolve diff --git a/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py b/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py deleted file mode 100644 index d8a0efac7..000000000 --- a/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py +++ /dev/null @@ -1,174 +0,0 @@ -""" -Surfsense documentation search tool. - -This tool allows the agent to search the pre-indexed Surfsense documentation -to help users with questions about how to use the application. - -The documentation is indexed at deployment time from MDX files and stored -in dedicated tables (surfsense_docs_documents, surfsense_docs_chunks). -""" - -import asyncio -import json - -from langchain_core.tools import tool -from sqlalchemy import select -from sqlalchemy.ext.asyncio import AsyncSession - -from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument, async_session_maker -from app.utils.document_converters import embed_text -from app.utils.surfsense_docs import surfsense_docs_public_url - - -def format_surfsense_docs_results(results: list[tuple]) -> str: - """ - Format search results into XML structure for the LLM context. - - Uses the same XML structure as format_documents_for_context from knowledge_base.py - but with 'doc-' prefix on chunk IDs. This allows: - - LLM to use consistent [citation:doc-XXX] format - - Frontend to detect 'doc-' prefix and route to surfsense docs endpoint - - Args: - results: List of (chunk, document) tuples from the database query - - Returns: - Formatted XML string with documentation content and citation-ready chunks - """ - if not results: - return "No relevant Surfsense documentation found for your query." - - # Group chunks by document - grouped: dict[int, dict] = {} - for chunk, doc in results: - public_url = surfsense_docs_public_url(doc.source) - if doc.id not in grouped: - grouped[doc.id] = { - "document_id": f"doc-{doc.id}", - "document_type": "SURFSENSE_DOCS", - "title": doc.title, - "url": public_url, - "metadata": {"source": doc.source, "public_url": public_url}, - "chunks": [], - } - grouped[doc.id]["chunks"].append( - { - "chunk_id": f"doc-{chunk.id}", - "content": chunk.content, - } - ) - - # Render XML matching format_documents_for_context structure - parts: list[str] = [] - for g in grouped.values(): - metadata_json = json.dumps(g["metadata"], ensure_ascii=False) - - parts.append("") - parts.append("") - parts.append(f" {g['document_id']}") - parts.append(f" {g['document_type']}") - parts.append(f" <![CDATA[{g['title']}]]>") - parts.append(f" ") - parts.append(f" ") - parts.append("") - parts.append("") - parts.append("") - - for ch in g["chunks"]: - parts.append( - f" " - ) - - parts.append("") - parts.append("") - parts.append("") - - return "\n".join(parts).strip() - - -async def search_surfsense_docs_async( - query: str, - db_session: AsyncSession, - top_k: int = 10, -) -> str: - """ - Search Surfsense documentation using vector similarity. - - Args: - query: The search query about Surfsense usage - db_session: Database session for executing queries - top_k: Number of results to return - - Returns: - Formatted string with relevant documentation content - """ - # Get embedding for the query - query_embedding = await asyncio.to_thread(embed_text, query) - - # Vector similarity search on chunks, joining with documents - stmt = ( - select(SurfsenseDocsChunk, SurfsenseDocsDocument) - .join( - SurfsenseDocsDocument, - SurfsenseDocsChunk.document_id == SurfsenseDocsDocument.id, - ) - .order_by(SurfsenseDocsChunk.embedding.op("<=>")(query_embedding)) - .limit(top_k) - ) - - result = await db_session.execute(stmt) - rows = result.all() - - return format_surfsense_docs_results(rows) - - -def create_search_surfsense_docs_tool(db_session: AsyncSession): - """ - Factory function to create the search_surfsense_docs tool. - - The tool acquires its own short-lived ``AsyncSession`` per call via - :data:`async_session_maker` so the closure is safe to share across - HTTP requests by the compiled-agent cache. Capturing a per-request - session here would surface stale/closed sessions on cache hits. - - Args: - db_session: Reserved for registry compatibility. Per-call sessions - are opened via :data:`async_session_maker` inside the tool body. - - Returns: - A configured tool function for searching Surfsense documentation - """ - del db_session # per-call session — see docstring - - @tool - async def search_surfsense_docs(query: str, top_k: int = 10) -> str: - """ - Search Surfsense documentation for help with using the application. - - Use this tool when the user asks questions about: - - How to use Surfsense features - - Installation and setup instructions - - Configuration options and settings - - Troubleshooting common issues - - Available connectors and integrations - - Browser extension usage - - API documentation - - This searches the official Surfsense documentation that was indexed - at deployment time. It does NOT search the user's personal knowledge base. - - Args: - query: The search query about Surfsense usage or features - top_k: Number of documentation chunks to retrieve (default: 10) - - Returns: - Relevant documentation content formatted with chunk IDs for citations - """ - async with async_session_maker() as db_session: - return await search_surfsense_docs_async( - query=query, - db_session=db_session, - top_k=top_k, - ) - - return search_surfsense_docs diff --git a/surfsense_backend/app/agents/new_chat/tools/video_presentation.py b/surfsense_backend/app/agents/new_chat/tools/video_presentation.py index 4bf13b28e..34f5183ca 100644 --- a/surfsense_backend/app/agents/new_chat/tools/video_presentation.py +++ b/surfsense_backend/app/agents/new_chat/tools/video_presentation.py @@ -127,9 +127,7 @@ def create_generate_video_presentation_tool( except Exception as e: error_message = str(e) - logger.exception( - "[generate_video_presentation] Error: %s", error_message - ) + logger.exception("[generate_video_presentation] Error: %s", error_message) return { "status": VideoPresentationStatus.FAILED.value, "error": error_message, diff --git a/surfsense_backend/app/app.py b/surfsense_backend/app/app.py index 43b0af7d2..223eb5a1b 100644 --- a/surfsense_backend/app/app.py +++ b/surfsense_backend/app/app.py @@ -43,7 +43,6 @@ from app.rate_limiter import get_real_client_ip, limiter from app.routes import router as crud_router from app.routes.auth_routes import router as auth_router from app.schemas import UserCreate, UserRead, UserUpdate -from app.tasks.surfsense_docs_indexer import seed_surfsense_docs from app.users import SECRET, auth_backend, current_active_user, fastapi_users from app.utils.perf import log_system_snapshot @@ -576,13 +575,6 @@ async def lifespan(app: FastAPI): initialize_llm_router() initialize_image_gen_router() initialize_vision_llm_router() - try: - await asyncio.wait_for(seed_surfsense_docs(), timeout=120) - except TimeoutError: - logging.getLogger(__name__).warning( - "Surfsense docs seeding timed out after 120s — skipping. " - "Docs will be indexed on the next restart." - ) # Phase 1.7 — JIT warmup. Bounded so a stuck warmup never delays # worker readiness. ``shield`` so Uvicorn cancelling startup diff --git a/surfsense_backend/app/automations/actions/__init__.py b/surfsense_backend/app/automations/actions/__init__.py index 9ef091cb3..72669532f 100644 --- a/surfsense_backend/app/automations/actions/__init__.py +++ b/surfsense_backend/app/automations/actions/__init__.py @@ -21,4 +21,4 @@ __all__ = [ ] # Built-in actions self-register at import time. -from . import agent_task # noqa: E402, F401 +from . import agent_task # noqa: F401 diff --git a/surfsense_backend/app/automations/actions/agent_task/__init__.py b/surfsense_backend/app/automations/actions/agent_task/__init__.py index 308812211..3a42a2815 100644 --- a/surfsense_backend/app/automations/actions/agent_task/__init__.py +++ b/surfsense_backend/app/automations/actions/agent_task/__init__.py @@ -12,4 +12,4 @@ from .params import AgentTaskActionParams __all__ = ["AgentTaskActionParams", "build_handler"] # Side-effect: register on the actions store. -from . import definition # noqa: E402, F401 +from . import definition # noqa: F401 diff --git a/surfsense_backend/app/automations/actions/agent_task/factory.py b/surfsense_backend/app/automations/actions/agent_task/factory.py index 18a408e13..dec75dce8 100644 --- a/surfsense_backend/app/automations/actions/agent_task/factory.py +++ b/surfsense_backend/app/automations/actions/agent_task/factory.py @@ -18,6 +18,11 @@ def build_handler(ctx: ActionContext) -> ActionHandler: ctx=ctx, query=validated.query, auto_approve_all=validated.auto_approve_all, + mentioned_document_ids=validated.mentioned_document_ids, + mentioned_folder_ids=validated.mentioned_folder_ids, + mentioned_connector_ids=validated.mentioned_connector_ids, + mentioned_connectors=validated.mentioned_connectors, + mentioned_documents=validated.mentioned_documents, ) return handle diff --git a/surfsense_backend/app/automations/actions/agent_task/invoke.py b/surfsense_backend/app/automations/actions/agent_task/invoke.py index a37e9beed..fa02d263f 100644 --- a/surfsense_backend/app/automations/actions/agent_task/invoke.py +++ b/surfsense_backend/app/automations/actions/agent_task/invoke.py @@ -8,12 +8,15 @@ from typing import Any from langchain_core.messages import HumanMessage from langgraph.types import Command +from sqlalchemy.ext.asyncio import AsyncSession from app.agents.multi_agent_chat import create_multi_agent_chat_deep_agent +from app.agents.new_chat.context import SurfSenseContextSchema +from app.agents.new_chat.mention_resolver import resolve_mentions, substitute_in_text from app.db import ChatVisibility, async_session_maker +from app.schemas.new_chat import MentionedDocumentInfo from ..types import ActionContext - from .auto_decide import build_auto_decisions from .dependencies import build_dependencies from .finalize import extract_final_assistant_message @@ -23,17 +26,118 @@ from .finalize import extract_final_assistant_message _MAX_RESUMES = 50 +def _build_connector_block(connectors: list[dict[str, Any]]) -> str | None: + """Render the ```` context block (same shape as chat). + + Mirrors ``stream_new_chat`` so the agent gets the exact connector accounts + the user picked. Returns ``None`` when nothing renders. + """ + lines: list[str] = [] + for connector in connectors: + connector_id = connector.get("id") + connector_type = connector.get("connector_type") or connector.get( + "document_type" + ) + account_name = connector.get("account_name") or connector.get("title") + if connector_id is None or connector_type is None: + continue + lines.append( + f' - connector_id={connector_id}, connector_type="{connector_type}", ' + f'account_name="{account_name or ""}"' + ) + if not lines: + return None + return ( + "\n" + "The user selected these exact connector accounts with @. " + "These entries are selection metadata, not retrieved connector content. " + "When a connector-backed tool needs an account, use the matching " + "connector_id from this list if the tool supports connector_id:\n" + + "\n".join(lines) + + "\n" + ) + + +async def _resolve_mention_context( + session: AsyncSession, + *, + search_space_id: int, + query: str, + mentioned_document_ids: list[int] | None, + mentioned_folder_ids: list[int] | None, + mentioned_connector_ids: list[int] | None, + mentioned_connectors: list[MentionedDocumentInfo] | None, + mentioned_documents: list[MentionedDocumentInfo] | None, +) -> tuple[str, SurfSenseContextSchema | None]: + """Resolve @-mentions into a rewritten query + per-invocation context. + + Automation always runs in cloud filesystem mode, so we mirror the chat + ``new_chat`` flow: substitute ``@title`` tokens with canonical + ``/documents/...`` paths, prepend a ```` block, and + build a ``SurfSenseContextSchema`` that ``KnowledgePriorityMiddleware`` + reads via ``runtime.context``. Returns ``(query, None)`` unchanged when + there are no mentions. + """ + has_mentions = bool( + mentioned_document_ids + or mentioned_folder_ids + or mentioned_connector_ids + or mentioned_connectors + or mentioned_documents + ) + if not has_mentions: + return query, None + + resolved = await resolve_mentions( + session, + search_space_id=search_space_id, + mentioned_documents=mentioned_documents, + mentioned_document_ids=mentioned_document_ids, + mentioned_folder_ids=mentioned_folder_ids, + ) + agent_query = substitute_in_text(query, resolved.token_to_path) + + # ``SurfSenseContextSchema.mentioned_connectors`` is typed ``list[dict]`` and + # the connector block reads dicts, so dump the pydantic chips once. + connector_dicts = [c.model_dump() for c in (mentioned_connectors or [])] + connector_block = _build_connector_block(connector_dicts) + if connector_block: + agent_query = f"{connector_block}\n\n{agent_query}" + + runtime_context = SurfSenseContextSchema( + search_space_id=search_space_id, + mentioned_document_ids=list( + resolved.mentioned_document_ids or (mentioned_document_ids or []) + ), + mentioned_folder_ids=list( + resolved.mentioned_folder_ids or (mentioned_folder_ids or []) + ), + mentioned_connector_ids=list(mentioned_connector_ids or []), + mentioned_connectors=connector_dicts, + ) + return agent_query, runtime_context + + async def run_agent_task( *, ctx: ActionContext, query: str, auto_approve_all: bool, + mentioned_document_ids: list[int] | None = None, + mentioned_folder_ids: list[int] | None = None, + mentioned_connector_ids: list[int] | None = None, + mentioned_connectors: list[MentionedDocumentInfo] | None = None, + mentioned_documents: list[MentionedDocumentInfo] | None = None, ) -> dict[str, Any]: """Invoke multi_agent_chat for one rendered query and return its outcome. Opens its own DB session so the executor's bookkeeping session isn't tied up for the entire invocation. The LangGraph ``thread_id`` (a fresh UUID) is returned as ``agent_session_id`` for later inspection. + + @-mentions (files / folders / connectors) chosen in the task input are + resolved the same way the chat flow does and forwarded to the agent via the + per-invocation ``context`` so they actually scope retrieval. """ agent_session_id = str(uuid.uuid4()) user_id = str(ctx.creator_user_id) if ctx.creator_user_id else None @@ -56,12 +160,24 @@ async def run_agent_task( agent_config=deps.agent_config, firecrawl_api_key=deps.firecrawl_api_key, thread_visibility=ChatVisibility.PRIVATE, + mentioned_document_ids=mentioned_document_ids, + ) + + agent_query, runtime_context = await _resolve_mention_context( + agent_session, + search_space_id=ctx.search_space_id, + query=query, + mentioned_document_ids=mentioned_document_ids, + mentioned_folder_ids=mentioned_folder_ids, + mentioned_connector_ids=mentioned_connector_ids, + mentioned_connectors=mentioned_connectors, + mentioned_documents=mentioned_documents, ) request_id = f"automation:{ctx.run_id}:{ctx.step_id}" turn_id = f"{request_id}:{int(time.time() * 1000)}" input_state: dict[str, Any] = { - "messages": [HumanMessage(content=query)], + "messages": [HumanMessage(content=agent_query)], "search_space_id": ctx.search_space_id, "request_id": request_id, "turn_id": turn_id, @@ -74,8 +190,17 @@ async def run_agent_task( }, "recursion_limit": 10_000, } + if runtime_context is not None: + runtime_context.request_id = request_id + runtime_context.turn_id = turn_id - result = await agent.ainvoke(input_state, config=config) + # The compiled graph declares ``context_schema=SurfSenseContextSchema``; + # mentions only reach ``KnowledgePriorityMiddleware`` via ``context=``. + invoke_kwargs: dict[str, Any] = {"config": config} + if runtime_context is not None: + invoke_kwargs["context"] = runtime_context + + result = await agent.ainvoke(input_state, **invoke_kwargs) resumes = 0 while True: @@ -88,7 +213,7 @@ async def run_agent_task( ) lg_resume_map, routed = build_auto_decisions(state, decision) config["configurable"]["surfsense_resume_value"] = routed - result = await agent.ainvoke(Command(resume=lg_resume_map), config=config) + result = await agent.ainvoke(Command(resume=lg_resume_map), **invoke_kwargs) resumes += 1 return { diff --git a/surfsense_backend/app/automations/actions/agent_task/params.py b/surfsense_backend/app/automations/actions/agent_task/params.py index b0e99a78b..ad6f35edb 100644 --- a/surfsense_backend/app/automations/actions/agent_task/params.py +++ b/surfsense_backend/app/automations/actions/agent_task/params.py @@ -4,6 +4,8 @@ from __future__ import annotations from pydantic import BaseModel, ConfigDict, Field +from app.schemas.new_chat import MentionedDocumentInfo + class AgentTaskActionParams(BaseModel): """Run a multi_agent_chat turn from an automation step.""" @@ -19,3 +21,32 @@ class AgentTaskActionParams(BaseModel): default=False, description="If true, every HITL approval is auto-approved; otherwise rejected.", ) + + # @-mention references chosen in the task input. Mirror the ``new_chat`` + # request fields (minus SurfSense product docs) so the run can scope + # retrieval to the user's selected files / folders / connectors. All + # optional and additive; a task with no mentions behaves as before. + mentioned_document_ids: list[int] | None = Field( + default=None, + description="Knowledge-base document IDs the task references with @.", + ) + mentioned_folder_ids: list[int] | None = Field( + default=None, + description="Knowledge-base folder IDs the task references with @.", + ) + mentioned_connector_ids: list[int] | None = Field( + default=None, + description="Concrete connector account IDs the task references with @.", + ) + mentioned_connectors: list[MentionedDocumentInfo] | None = Field( + default=None, + description="Display/context metadata for the @-mentioned connector accounts.", + ) + mentioned_documents: list[MentionedDocumentInfo] | None = Field( + default=None, + description=( + "Chip metadata (id, title, kind, ...) for every @-mention so the " + "run can resolve titles to virtual paths and substitute them in " + "the query." + ), + ) diff --git a/surfsense_backend/app/automations/persistence/models/run.py b/surfsense_backend/app/automations/persistence/models/run.py index 262e4c2bf..471b2df77 100644 --- a/surfsense_backend/app/automations/persistence/models/run.py +++ b/surfsense_backend/app/automations/persistence/models/run.py @@ -50,7 +50,7 @@ class AutomationRun(BaseModel, TimestampMixin): definition_snapshot = Column(JSONB, nullable=False) # merged & validated inputs the run was dispatched with - # (trigger.static_inputs ∪ producer runtime data, static wins on collision) + # (trigger.static_inputs union producer runtime data, static wins on collision) inputs = Column(JSONB, nullable=False, server_default="{}") # one entry per executed step; agent_task entries carry their own # `agent_session_id` inside their entry diff --git a/surfsense_backend/app/automations/runtime/executor.py b/surfsense_backend/app/automations/runtime/executor.py index b8a377e5b..6a33ab314 100644 --- a/surfsense_backend/app/automations/runtime/executor.py +++ b/surfsense_backend/app/automations/runtime/executor.py @@ -6,9 +6,9 @@ from typing import Any from sqlalchemy.ext.asyncio import AsyncSession +from app.automations.actions.types import ActionContext from app.automations.persistence.enums.run_status import RunStatus from app.automations.persistence.models.run import AutomationRun -from app.automations.actions.types import ActionContext from app.automations.schemas.definition.envelope import AutomationDefinition from app.automations.schemas.definition.plan_step import PlanStep from app.automations.templating import build_run_context @@ -32,7 +32,10 @@ async def execute_run(session: AsyncSession, run_id: int) -> None: await repository.mark_failed( session, run, - {"message": f"definition_snapshot invalid: {exc}", "type": type(exc).__name__}, + { + "message": f"definition_snapshot invalid: {exc}", + "type": type(exc).__name__, + }, ) await session.commit() return @@ -92,7 +95,9 @@ async def _run_on_failure( await session.commit() -def _build_template_ctx(run: AutomationRun, step_outputs: dict[str, Any]) -> dict[str, Any]: +def _build_template_ctx( + run: AutomationRun, step_outputs: dict[str, Any] +) -> dict[str, Any]: automation = run.automation trigger = run.trigger return build_run_context( diff --git a/surfsense_backend/app/automations/runtime/step.py b/surfsense_backend/app/automations/runtime/step.py index ac18b5e1f..6e7c9c671 100644 --- a/surfsense_backend/app/automations/runtime/step.py +++ b/surfsense_backend/app/automations/runtime/step.py @@ -30,14 +30,18 @@ async def execute_step( try: should_run = evaluate_predicate(step.when, template_context) except Exception as exc: - return _result(step, "failed", started_at, attempts=0, error=_error(exc, "when")) + return _result( + step, "failed", started_at, attempts=0, error=_error(exc, "when") + ) if not should_run: return _result(step, "skipped", started_at, attempts=0) try: resolved_params = render_value(step.params, template_context) except Exception as exc: - return _result(step, "failed", started_at, attempts=0, error=_error(exc, "render")) + return _result( + step, "failed", started_at, attempts=0, error=_error(exc, "render") + ) action = get_action(step.action) if action is None: @@ -46,12 +50,17 @@ async def execute_step( "failed", started_at, attempts=0, - error={"message": f"action not registered: {step.action}", "type": "ActionNotFound"}, + error={ + "message": f"action not registered: {step.action}", + "type": "ActionNotFound", + }, ) handler = action.build_handler(action_context) - max_retries = step.max_retries if step.max_retries is not None else default_max_retries + max_retries = ( + step.max_retries if step.max_retries is not None else default_max_retries + ) timeout = step.timeout_seconds or default_timeout_seconds try: @@ -62,7 +71,9 @@ async def execute_step( timeout=timeout, ) except Exception as exc: - return _result(step, "failed", started_at, attempts=max_retries + 1, error=_error(exc)) + return _result( + step, "failed", started_at, attempts=max_retries + 1, error=_error(exc) + ) return _result(step, "succeeded", started_at, attempts=attempts, result=result) diff --git a/surfsense_backend/app/automations/schemas/definition/execution.py b/surfsense_backend/app/automations/schemas/definition/execution.py index 61861f8d8..bdbad62f8 100644 --- a/surfsense_backend/app/automations/schemas/definition/execution.py +++ b/surfsense_backend/app/automations/schemas/definition/execution.py @@ -12,7 +12,9 @@ from .plan_step import PlanStep class Execution(BaseModel): model_config = ConfigDict(extra="forbid") - timeout_seconds: int = Field(default=600, gt=0, description="Wall-clock cap for the run.") + timeout_seconds: int = Field( + default=600, gt=0, description="Wall-clock cap for the run." + ) max_retries: int = Field(default=2, ge=0, description="Per-step retry budget.") retry_backoff: Literal["exponential", "linear", "none"] = "exponential" concurrency: Literal["drop_if_running", "queue", "always"] = "drop_if_running" diff --git a/surfsense_backend/app/automations/schemas/definition/plan_step.py b/surfsense_backend/app/automations/schemas/definition/plan_step.py index 5d16f1f3e..0d3bb9dfc 100644 --- a/surfsense_backend/app/automations/schemas/definition/plan_step.py +++ b/surfsense_backend/app/automations/schemas/definition/plan_step.py @@ -11,7 +11,9 @@ class PlanStep(BaseModel): model_config = ConfigDict(extra="forbid") step_id: str = Field(..., min_length=1, description="Unique within the plan.") - action: str = Field(..., min_length=1, description="Action type; resolved via registry.") + action: str = Field( + ..., min_length=1, description="Action type; resolved via registry." + ) when: str | None = Field( default=None, description="Optional predicate; step is skipped when falsy.", diff --git a/surfsense_backend/app/automations/schemas/definition/trigger_spec.py b/surfsense_backend/app/automations/schemas/definition/trigger_spec.py index a359a2f63..e6a995bbf 100644 --- a/surfsense_backend/app/automations/schemas/definition/trigger_spec.py +++ b/surfsense_backend/app/automations/schemas/definition/trigger_spec.py @@ -10,7 +10,9 @@ from pydantic import BaseModel, ConfigDict, Field class TriggerSpec(BaseModel): model_config = ConfigDict(extra="forbid") - type: str = Field(..., min_length=1, description="Trigger type; resolved via registry.") + type: str = Field( + ..., min_length=1, description="Trigger type; resolved via registry." + ) params: dict[str, Any] = Field( default_factory=dict, description="Type-specific params; validated against the trigger's schema.", diff --git a/surfsense_backend/app/automations/services/automation.py b/surfsense_backend/app/automations/services/automation.py index 9140da3b5..0d2937e0e 100644 --- a/surfsense_backend/app/automations/services/automation.py +++ b/surfsense_backend/app/automations/services/automation.py @@ -10,14 +10,14 @@ from sqlalchemy import func, select from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.orm import selectinload +from app.automations.persistence.enums.trigger_type import TriggerType +from app.automations.persistence.models.automation import Automation +from app.automations.persistence.models.trigger import AutomationTrigger from app.automations.schemas.api import ( AutomationCreate, AutomationUpdate, TriggerCreate, ) -from app.automations.persistence.enums.trigger_type import TriggerType -from app.automations.persistence.models.automation import Automation -from app.automations.persistence.models.trigger import AutomationTrigger from app.automations.triggers import get_trigger from app.automations.triggers.schedule import compute_next_fire_at from app.db import Permission, User, get_async_session @@ -34,7 +34,9 @@ class AutomationService: async def create(self, payload: AutomationCreate) -> Automation: """Create an automation and its initial triggers in one transaction.""" - await self._authorize(payload.search_space_id, Permission.AUTOMATIONS_CREATE.value) + await self._authorize( + payload.search_space_id, Permission.AUTOMATIONS_CREATE.value + ) automation = Automation( search_space_id=payload.search_space_id, @@ -67,22 +69,32 @@ class AutomationService: ) rows = ( - await self.session.execute( - base.order_by(Automation.created_at.desc()).limit(limit).offset(offset) + ( + await self.session.execute( + base.order_by(Automation.created_at.desc()) + .limit(limit) + .offset(offset) + ) ) - ).scalars().all() + .scalars() + .all() + ) return list(rows), int(total or 0) async def get(self, automation_id: int) -> Automation: """Get an automation with its triggers loaded.""" automation = await self._get_with_triggers_or_raise(automation_id) - await self._authorize(automation.search_space_id, Permission.AUTOMATIONS_READ.value) + await self._authorize( + automation.search_space_id, Permission.AUTOMATIONS_READ.value + ) return automation async def update(self, automation_id: int, patch: AutomationUpdate) -> Automation: """Patch fields. Bumps ``version`` when ``definition`` changes.""" automation = await self._get_with_triggers_or_raise(automation_id) - await self._authorize(automation.search_space_id, Permission.AUTOMATIONS_UPDATE.value) + await self._authorize( + automation.search_space_id, Permission.AUTOMATIONS_UPDATE.value + ) data = patch.model_dump(exclude_unset=True) @@ -93,7 +105,9 @@ class AutomationService: if "status" in data: automation.status = data["status"] if "definition" in data: - automation.definition = patch.definition.model_dump(mode="json", by_alias=True) + automation.definition = patch.definition.model_dump( + mode="json", by_alias=True + ) automation.version += 1 await self.session.commit() @@ -102,7 +116,9 @@ class AutomationService: async def delete(self, automation_id: int) -> None: """Delete an automation; FK cascades remove triggers and runs.""" automation = await self._get_or_raise(automation_id) - await self._authorize(automation.search_space_id, Permission.AUTOMATIONS_DELETE.value) + await self._authorize( + automation.search_space_id, Permission.AUTOMATIONS_DELETE.value + ) await self.session.delete(automation) await self.session.commit() @@ -141,7 +157,9 @@ def _build_trigger(spec: TriggerCreate) -> AutomationTrigger: """Validate trigger params via its registered Pydantic model and build the ORM row.""" definition = get_trigger(spec.type.value) if definition is None: - raise HTTPException(status_code=422, detail=f"unknown trigger type {spec.type.value!r}") + raise HTTPException( + status_code=422, detail=f"unknown trigger type {spec.type.value!r}" + ) try: validated = definition.params_model.model_validate(spec.params) diff --git a/surfsense_backend/app/automations/services/run.py b/surfsense_backend/app/automations/services/run.py index ac9970241..3ef80416f 100644 --- a/surfsense_backend/app/automations/services/run.py +++ b/surfsense_backend/app/automations/services/run.py @@ -36,10 +36,16 @@ class RunService: ) rows = ( - await self.session.execute( - base.order_by(AutomationRun.created_at.desc()).limit(limit).offset(offset) + ( + await self.session.execute( + base.order_by(AutomationRun.created_at.desc()) + .limit(limit) + .offset(offset) + ) ) - ).scalars().all() + .scalars() + .all() + ) return list(rows), int(total or 0) async def get(self, *, automation_id: int, run_id: int) -> AutomationRun: diff --git a/surfsense_backend/app/automations/services/trigger.py b/surfsense_backend/app/automations/services/trigger.py index c76cc0740..29ac84557 100644 --- a/surfsense_backend/app/automations/services/trigger.py +++ b/surfsense_backend/app/automations/services/trigger.py @@ -8,10 +8,10 @@ from fastapi import Depends, HTTPException from pydantic import ValidationError from sqlalchemy.ext.asyncio import AsyncSession -from app.automations.schemas.api import TriggerCreate, TriggerUpdate from app.automations.persistence.enums.trigger_type import TriggerType from app.automations.persistence.models.automation import Automation from app.automations.persistence.models.trigger import AutomationTrigger +from app.automations.schemas.api import TriggerCreate, TriggerUpdate from app.automations.triggers import get_trigger from app.automations.triggers.schedule import compute_next_fire_at from app.db import Permission, User, get_async_session @@ -40,7 +40,9 @@ class TriggerService: params=validated_params, static_inputs=payload.static_inputs, enabled=payload.enabled, - next_fire_at=_initial_next_fire(payload.type, validated_params, payload.enabled), + next_fire_at=_initial_next_fire( + payload.type, validated_params, payload.enabled + ), ) self.session.add(trigger) await self.session.commit() @@ -54,7 +56,9 @@ class TriggerService: trigger_id: int, patch: TriggerUpdate, ) -> AutomationTrigger: - await self._authorize_automation(automation_id, Permission.AUTOMATIONS_UPDATE.value) + await self._authorize_automation( + automation_id, Permission.AUTOMATIONS_UPDATE.value + ) trigger = await self._get_trigger_or_raise(automation_id, trigger_id) data = patch.model_dump(exclude_unset=True) @@ -80,7 +84,9 @@ class TriggerService: return trigger async def remove(self, *, automation_id: int, trigger_id: int) -> None: - await self._authorize_automation(automation_id, Permission.AUTOMATIONS_UPDATE.value) + await self._authorize_automation( + automation_id, Permission.AUTOMATIONS_UPDATE.value + ) trigger = await self._get_trigger_or_raise(automation_id, trigger_id) await self.session.delete(trigger) await self.session.commit() diff --git a/surfsense_backend/app/automations/tasks/execute_run.py b/surfsense_backend/app/automations/tasks/execute_run.py index 5fc84698b..ed448515d 100644 --- a/surfsense_backend/app/automations/tasks/execute_run.py +++ b/surfsense_backend/app/automations/tasks/execute_run.py @@ -17,7 +17,7 @@ TASK_NAME = "automation_run_execute" @celery_app.task(name=TASK_NAME, bind=True) -def automation_run_execute(self, run_id: int) -> None: # noqa: ARG001 — Celery bind +def automation_run_execute(self, run_id: int) -> None: """Execute one ``AutomationRun``. Idempotent: terminal runs no-op.""" return run_async_celery_task(lambda: _impl(run_id)) diff --git a/surfsense_backend/app/automations/tasks/schedule_tick.py b/surfsense_backend/app/automations/tasks/schedule_tick.py index 385bd7242..90fff66fc 100644 --- a/surfsense_backend/app/automations/tasks/schedule_tick.py +++ b/surfsense_backend/app/automations/tasks/schedule_tick.py @@ -103,9 +103,7 @@ async def _self_heal_null_next_fire(session: AsyncSession, *, now: datetime) -> await session.commit() -async def _claim_due_triggers( - session: AsyncSession, *, now: datetime -) -> list[_Claim]: +async def _claim_due_triggers(session: AsyncSession, *, now: datetime) -> list[_Claim]: """Lock and advance due rows; return per-trigger fire context.""" stmt = ( select(AutomationTrigger) diff --git a/surfsense_backend/app/automations/triggers/__init__.py b/surfsense_backend/app/automations/triggers/__init__.py index d7abb6b5d..f630ebf6f 100644 --- a/surfsense_backend/app/automations/triggers/__init__.py +++ b/surfsense_backend/app/automations/triggers/__init__.py @@ -17,4 +17,4 @@ __all__ = [ ] # Built-in triggers self-register at import time. -from . import schedule # noqa: E402, F401 +from . import schedule # noqa: F401 diff --git a/surfsense_backend/app/automations/triggers/schedule/__init__.py b/surfsense_backend/app/automations/triggers/schedule/__init__.py index 5587692b9..92f478aac 100644 --- a/surfsense_backend/app/automations/triggers/schedule/__init__.py +++ b/surfsense_backend/app/automations/triggers/schedule/__init__.py @@ -15,4 +15,4 @@ __all__ = [ ] # Side-effect: register on the triggers store. -from . import definition # noqa: E402, F401 +from . import definition # noqa: F401 diff --git a/surfsense_backend/app/automations/triggers/schedule/cron.py b/surfsense_backend/app/automations/triggers/schedule/cron.py index 7155bab33..a8401e4a3 100644 --- a/surfsense_backend/app/automations/triggers/schedule/cron.py +++ b/surfsense_backend/app/automations/triggers/schedule/cron.py @@ -32,6 +32,10 @@ def compute_next_fire_at(cron: str, timezone: str, *, after: datetime) -> dateti given timezone before evaluation so DST and IANA rules apply correctly. """ tz = ZoneInfo(timezone) - base = after.astimezone(tz) if after.tzinfo else after.replace(tzinfo=UTC).astimezone(tz) + base = ( + after.astimezone(tz) + if after.tzinfo + else after.replace(tzinfo=UTC).astimezone(tz) + ) nxt: datetime = croniter(cron, base).get_next(datetime) return nxt.astimezone(UTC) diff --git a/surfsense_backend/app/automations/triggers/schedule/params.py b/surfsense_backend/app/automations/triggers/schedule/params.py index 21da84f68..f3945a1b8 100644 --- a/surfsense_backend/app/automations/triggers/schedule/params.py +++ b/surfsense_backend/app/automations/triggers/schedule/params.py @@ -10,7 +10,9 @@ from .cron import InvalidCronError, validate_cron class ScheduleTriggerParams(BaseModel): model_config = ConfigDict(extra="forbid") - cron: str = Field(..., description="Five-field cron expression.", examples=["0 9 * * 1-5"]) + cron: str = Field( + ..., description="Five-field cron expression.", examples=["0 9 * * 1-5"] + ) timezone: str = Field(..., description="IANA timezone.", examples=["Africa/Kigali"]) @model_validator(mode="after") diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index ac880ded5..d6ee9ff88 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -1150,46 +1150,6 @@ class Chunk(BaseModel, TimestampMixin): document = relationship("Document", back_populates="chunks") -class SurfsenseDocsDocument(BaseModel, TimestampMixin): - """ - Surfsense documentation storage. - Indexed at migration time from MDX files. - """ - - __tablename__ = "surfsense_docs_documents" - - source = Column( - String, nullable=False, unique=True, index=True - ) # File path: "connectors/slack.mdx" - title = Column(String, nullable=False) - content = Column(Text, nullable=False) - content_hash = Column(String, nullable=False, index=True) # For detecting changes - embedding = Column(Vector(config.embedding_model_instance.dimension)) - updated_at = Column(TIMESTAMP(timezone=True), nullable=True, index=True) - - chunks = relationship( - "SurfsenseDocsChunk", - back_populates="document", - cascade="all, delete-orphan", - ) - - -class SurfsenseDocsChunk(BaseModel, TimestampMixin): - """Chunk storage for Surfsense documentation.""" - - __tablename__ = "surfsense_docs_chunks" - - content = Column(Text, nullable=False) - embedding = Column(Vector(config.embedding_model_instance.dimension)) - - document_id = Column( - Integer, - ForeignKey("surfsense_docs_documents.id", ondelete="CASCADE"), - nullable=False, - ) - document = relationship("SurfsenseDocsDocument", back_populates="chunks") - - class Podcast(BaseModel, TimestampMixin): """Podcast model for storing generated podcasts.""" @@ -2605,7 +2565,6 @@ from app.automations.persistence import ( # noqa: E402, F401 AutomationTrigger, ) - engine = create_async_engine( DATABASE_URL, pool_size=30, @@ -2681,11 +2640,6 @@ async def setup_indexes(): "CREATE INDEX IF NOT EXISTS idx_documents_search_space_updated ON documents (search_space_id, updated_at DESC NULLS LAST) INCLUDE (id, title, document_type)" ) ) - await conn.execute( - text( - "CREATE INDEX IF NOT EXISTS idx_surfsense_docs_title_trgm ON surfsense_docs_documents USING gin (title gin_trgm_ops)" - ) - ) async def create_db_and_tables(): diff --git a/surfsense_backend/app/routes/__init__.py b/surfsense_backend/app/routes/__init__.py index ef1c9312a..8373f13c3 100644 --- a/surfsense_backend/app/routes/__init__.py +++ b/surfsense_backend/app/routes/__init__.py @@ -1,5 +1,7 @@ from fastapi import APIRouter +from app.automations.api import router as automations_router + from .agent_action_log_route import router as agent_action_log_router from .agent_flags_route import router as agent_flags_router from .agent_permissions_route import router as agent_permissions_router @@ -7,7 +9,6 @@ from .agent_revert_route import router as agent_revert_router from .airtable_add_connector_route import ( router as airtable_add_connector_router, ) -from app.automations.api import router as automations_router from .chat_comments_routes import router as chat_comments_router from .circleback_webhook_route import router as circleback_webhook_router from .clickup_add_connector_route import router as clickup_add_connector_router @@ -54,7 +55,6 @@ from .search_source_connectors_routes import router as search_source_connectors_ from .search_spaces_routes import router as search_spaces_router from .slack_add_connector_route import router as slack_add_connector_router from .stripe_routes import router as stripe_router -from .surfsense_docs_routes import router as surfsense_docs_router from .team_memory_routes import router as team_memory_router from .teams_add_connector_route import router as teams_add_connector_router from .video_presentations_routes import router as video_presentations_router @@ -107,7 +107,6 @@ router.include_router(new_llm_config_router) # LLM configs with prompt configur router.include_router(model_list_router) # Dynamic model catalogue from OpenRouter router.include_router(logs_router) router.include_router(circleback_webhook_router) # Circleback meeting webhooks -router.include_router(surfsense_docs_router) # Surfsense documentation for citations router.include_router(notifications_router) # Notifications with Zero sync router.include_router( mcp_oauth_router diff --git a/surfsense_backend/app/routes/new_chat_routes.py b/surfsense_backend/app/routes/new_chat_routes.py index fb4d5a049..63b7732a9 100644 --- a/surfsense_backend/app/routes/new_chat_routes.py +++ b/surfsense_backend/app/routes/new_chat_routes.py @@ -1785,7 +1785,6 @@ async def handle_new_chat( user_id=str(user.id), llm_config_id=llm_config_id, mentioned_document_ids=request.mentioned_document_ids, - mentioned_surfsense_doc_ids=request.mentioned_surfsense_doc_ids, mentioned_folder_ids=request.mentioned_folder_ids, mentioned_connector_ids=request.mentioned_connector_ids, mentioned_connectors=mentioned_connectors_payload, @@ -2278,7 +2277,6 @@ async def regenerate_response( user_id=str(user.id), llm_config_id=llm_config_id, mentioned_document_ids=request.mentioned_document_ids, - mentioned_surfsense_doc_ids=request.mentioned_surfsense_doc_ids, mentioned_folder_ids=request.mentioned_folder_ids, mentioned_connector_ids=request.mentioned_connector_ids, mentioned_connectors=mentioned_connectors_payload, diff --git a/surfsense_backend/app/routes/surfsense_docs_routes.py b/surfsense_backend/app/routes/surfsense_docs_routes.py deleted file mode 100644 index 0d5428dec..000000000 --- a/surfsense_backend/app/routes/surfsense_docs_routes.py +++ /dev/null @@ -1,172 +0,0 @@ -""" -Routes for Surfsense documentation. - -These endpoints support the citation system for Surfsense docs, -allowing the frontend to fetch document details when a user clicks -on a [citation:doc-XXX] link. -""" - -from fastapi import APIRouter, Depends, HTTPException -from sqlalchemy import func, select -from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.orm import selectinload - -from app.db import ( - SurfsenseDocsChunk, - SurfsenseDocsDocument, - User, - get_async_session, -) -from app.schemas import PaginatedResponse -from app.schemas.surfsense_docs import ( - SurfsenseDocsChunkRead, - SurfsenseDocsDocumentRead, - SurfsenseDocsDocumentWithChunksRead, -) -from app.users import current_active_user -from app.utils.surfsense_docs import surfsense_docs_public_url - -router = APIRouter() - - -@router.get( - "/surfsense-docs/by-chunk/{chunk_id}", - response_model=SurfsenseDocsDocumentWithChunksRead, -) -async def get_surfsense_doc_by_chunk_id( - chunk_id: int, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """ - Retrieves a Surfsense documentation document based on a chunk ID. - - This endpoint is used by the frontend to resolve [citation:doc-XXX] links. - """ - try: - # Get the chunk - chunk_result = await session.execute( - select(SurfsenseDocsChunk).filter(SurfsenseDocsChunk.id == chunk_id) - ) - chunk = chunk_result.scalars().first() - - if not chunk: - raise HTTPException( - status_code=404, - detail=f"Surfsense docs chunk with id {chunk_id} not found", - ) - - # Get the associated document with all its chunks - document_result = await session.execute( - select(SurfsenseDocsDocument) - .options(selectinload(SurfsenseDocsDocument.chunks)) - .filter(SurfsenseDocsDocument.id == chunk.document_id) - ) - document = document_result.scalars().first() - - if not document: - raise HTTPException( - status_code=404, - detail="Surfsense docs document not found", - ) - - # Sort chunks by ID - sorted_chunks = sorted(document.chunks, key=lambda x: x.id) - - return SurfsenseDocsDocumentWithChunksRead( - id=document.id, - title=document.title, - source=document.source, - public_url=surfsense_docs_public_url(document.source), - content=document.content, - chunks=[ - SurfsenseDocsChunkRead(id=c.id, content=c.content) - for c in sorted_chunks - ], - ) - except HTTPException: - raise - except Exception as e: - raise HTTPException( - status_code=500, - detail=f"Failed to retrieve Surfsense documentation: {e!s}", - ) from e - - -@router.get( - "/surfsense-docs", - response_model=PaginatedResponse[SurfsenseDocsDocumentRead], -) -async def list_surfsense_docs( - page: int = 0, - page_size: int = 50, - title: str | None = None, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """ - List all Surfsense documentation documents. - - Args: - page: Zero-based page index. - page_size: Number of items per page (default: 50). - title: Optional title filter (case-insensitive substring match). - session: Database session (injected). - user: Current authenticated user (injected). - - Returns: - PaginatedResponse[SurfsenseDocsDocumentRead]: Paginated list of Surfsense docs. - """ - try: - # Base query - query = select(SurfsenseDocsDocument) - count_query = select(func.count()).select_from(SurfsenseDocsDocument) - - # Filter by title if provided - if title and title.strip(): - query = query.filter(SurfsenseDocsDocument.title.ilike(f"%{title}%")) - count_query = count_query.filter( - SurfsenseDocsDocument.title.ilike(f"%{title}%") - ) - - # Get total count - total_result = await session.execute(count_query) - total = total_result.scalar() or 0 - - # Calculate offset - offset = page * page_size - - # Get paginated results - result = await session.execute( - query.order_by(SurfsenseDocsDocument.title).offset(offset).limit(page_size) - ) - docs = result.scalars().all() - - # Convert to response format - items = [ - SurfsenseDocsDocumentRead( - id=doc.id, - title=doc.title, - source=doc.source, - public_url=surfsense_docs_public_url(doc.source), - content=doc.content, - created_at=doc.created_at, - updated_at=doc.updated_at, - ) - for doc in docs - ] - - has_more = (offset + len(items)) < total - - return PaginatedResponse( - items=items, - total=total, - page=page, - page_size=page_size, - has_more=has_more, - ) - except Exception as e: - raise HTTPException( - status_code=500, - detail=f"Failed to list Surfsense documentation: {e!s}", - ) from e diff --git a/surfsense_backend/app/schemas/new_chat.py b/surfsense_backend/app/schemas/new_chat.py index 8b49413c6..ab95f9b6b 100644 --- a/surfsense_backend/app/schemas/new_chat.py +++ b/surfsense_backend/app/schemas/new_chat.py @@ -239,9 +239,6 @@ class NewChatRequest(BaseModel): mentioned_document_ids: list[int] | None = ( None # Optional document IDs mentioned with @ in the chat ) - mentioned_surfsense_doc_ids: list[int] | None = ( - None # Optional SurfSense documentation IDs mentioned with @ in the chat - ) mentioned_folder_ids: list[int] | None = Field( default=None, description=( @@ -326,7 +323,6 @@ class RegenerateRequest(BaseModel): None # New user query (for edit). None = reload with same query ) mentioned_document_ids: list[int] | None = None - mentioned_surfsense_doc_ids: list[int] | None = None mentioned_folder_ids: list[int] | None = Field( default=None, description=( diff --git a/surfsense_backend/app/schemas/surfsense_docs.py b/surfsense_backend/app/schemas/surfsense_docs.py deleted file mode 100644 index 3adf25032..000000000 --- a/surfsense_backend/app/schemas/surfsense_docs.py +++ /dev/null @@ -1,43 +0,0 @@ -""" -Schemas for Surfsense documentation. -""" - -from datetime import datetime - -from pydantic import BaseModel, ConfigDict - - -class SurfsenseDocsChunkRead(BaseModel): - """Schema for a Surfsense docs chunk.""" - - id: int - content: str - - model_config = ConfigDict(from_attributes=True) - - -class SurfsenseDocsDocumentRead(BaseModel): - """Schema for a Surfsense docs document (without chunks).""" - - id: int - title: str - source: str - public_url: str - content: str - created_at: datetime | None = None - updated_at: datetime | None = None - - model_config = ConfigDict(from_attributes=True) - - -class SurfsenseDocsDocumentWithChunksRead(BaseModel): - """Schema for a Surfsense docs document with its chunks.""" - - id: int - title: str - source: str - public_url: str - content: str - chunks: list[SurfsenseDocsChunkRead] - - model_config = ConfigDict(from_attributes=True) diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index 78f80c955..e150cf494 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -25,7 +25,6 @@ from uuid import UUID import anyio from langchain_core.messages import HumanMessage from sqlalchemy.future import select -from sqlalchemy.orm import selectinload from app.agents.multi_agent_chat import create_multi_agent_chat_deep_agent from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent @@ -55,7 +54,6 @@ from app.db import ( NewChatThread, Report, SearchSourceConnectorType, - SurfsenseDocsDocument, async_session_maker, shielded_async_session, ) @@ -77,7 +75,6 @@ from app.tasks.chat.streaming.helpers.interrupt_inspector import ( ) from app.utils.content_utils import bootstrap_history_from_db from app.utils.perf import get_perf_logger, log_system_snapshot, trim_native_heap -from app.utils.surfsense_docs import surfsense_docs_public_url from app.utils.user_message_multimodal import build_human_message_content _background_tasks: set[asyncio.Task] = set() @@ -198,58 +195,6 @@ def _extract_chunk_parts(chunk: Any) -> dict[str, Any]: return out -def format_mentioned_surfsense_docs_as_context( - documents: list[SurfsenseDocsDocument], -) -> str: - """Format mentioned SurfSense documentation as context for the agent.""" - if not documents: - return "" - - context_parts = [""] - context_parts.append( - "The user has explicitly mentioned the following SurfSense documentation pages. " - "These are official documentation about how to use SurfSense and should be used to answer questions about the application. " - "Use [citation:CHUNK_ID] format for citations (e.g., [citation:doc-123])." - ) - - for doc in documents: - public_url = surfsense_docs_public_url(doc.source) - metadata_json = json.dumps( - {"source": doc.source, "public_url": public_url}, ensure_ascii=False - ) - - context_parts.append("") - context_parts.append("") - context_parts.append(f" doc-{doc.id}") - context_parts.append(" SURFSENSE_DOCS") - context_parts.append(f" <![CDATA[{doc.title}]]>") - context_parts.append(f" ") - context_parts.append( - f" " - ) - context_parts.append("") - context_parts.append("") - context_parts.append("") - - if hasattr(doc, "chunks") and doc.chunks: - for chunk in doc.chunks: - context_parts.append( - f" " - ) - else: - context_parts.append( - f" " - ) - - context_parts.append("") - context_parts.append("") - context_parts.append("") - - context_parts.append("") - - return "\n".join(context_parts) - - def extract_todos_from_deepagents(command_output) -> dict: """ Extract todos from deepagents' TodoListMiddleware Command output. @@ -837,7 +782,6 @@ async def stream_new_chat( user_id: str | None = None, llm_config_id: int = -1, mentioned_document_ids: list[int] | None = None, - mentioned_surfsense_doc_ids: list[int] | None = None, mentioned_folder_ids: list[int] | None = None, mentioned_connector_ids: list[int] | None = None, mentioned_connectors: list[dict[str, Any]] | None = None, @@ -869,7 +813,6 @@ async def stream_new_chat( llm_config_id: The LLM configuration ID (default: -1 for first global config) needs_history_bootstrap: If True, load message history from DB (for cloned chats) mentioned_document_ids: Optional list of document IDs mentioned with @ in the chat - mentioned_surfsense_doc_ids: Optional list of SurfSense doc IDs mentioned with @ in the chat mentioned_folder_ids: Optional list of knowledge-base folder IDs mentioned with @ (cloud mode) checkpoint_id: Optional checkpoint ID to rewind/fork from (for edit/reload operations) @@ -1295,19 +1238,7 @@ async def stream_new_chat( # Mentioned KB documents are now handled by KnowledgeBaseSearchMiddleware # which merges them into the scoped filesystem with full document - # structure. Only SurfSense docs and report context are inlined here. - - # Fetch mentioned SurfSense docs if any - mentioned_surfsense_docs: list[SurfsenseDocsDocument] = [] - if mentioned_surfsense_doc_ids: - result = await session.execute( - select(SurfsenseDocsDocument) - .options(selectinload(SurfsenseDocsDocument.chunks)) - .filter( - SurfsenseDocsDocument.id.in_(mentioned_surfsense_doc_ids), - ) - ) - mentioned_surfsense_docs = list(result.scalars().all()) + # structure. Only report context is inlined here. # Fetch the most recent report(s) in this thread so the LLM can # easily find report_id for versioning decisions, instead of @@ -1341,10 +1272,7 @@ async def stream_new_chat( agent_user_query = user_query accepted_folder_ids: list[int] = [] if fs_mode == FilesystemMode.CLOUD.value and ( - mentioned_document_ids - or mentioned_surfsense_doc_ids - or mentioned_folder_ids - or mentioned_documents + mentioned_document_ids or mentioned_folder_ids or mentioned_documents ): from app.schemas.new_chat import ( MentionedDocumentInfo as _MentionedDocumentInfo, @@ -1370,23 +1298,17 @@ async def stream_new_chat( search_space_id=search_space_id, mentioned_documents=chip_objs, mentioned_document_ids=mentioned_document_ids, - mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids, mentioned_folder_ids=mentioned_folder_ids, ) agent_user_query = substitute_in_text(user_query, resolved.token_to_path) accepted_folder_ids = resolved.mentioned_folder_ids - # Format the user query with context (SurfSense docs + reports only). + # Format the user query with context (reports only). # Uses ``agent_user_query`` so the LLM sees backtick-wrapped paths # instead of bare ``@title`` tokens. final_query = agent_user_query context_parts = [] - if mentioned_surfsense_docs: - context_parts.append( - format_mentioned_surfsense_docs_as_context(mentioned_surfsense_docs) - ) - if mentioned_connectors: connector_lines = [] for connector in mentioned_connectors: @@ -1617,12 +1539,8 @@ async def stream_new_chat( stream_result.content_builder = AssistantContentBuilder() # Initial thinking step - analyzing the request - if mentioned_surfsense_docs: - initial_title = "Analyzing referenced content" - action_verb = "Analyzing" - else: - initial_title = "Understanding your request" - action_verb = "Processing" + initial_title = "Understanding your request" + action_verb = "Processing" processing_parts = [] if user_query.strip(): @@ -1633,18 +1551,6 @@ async def stream_new_chat( else: processing_parts.append("(message)") - if mentioned_surfsense_docs: - doc_names = [] - for doc in mentioned_surfsense_docs: - title = doc.title - if len(title) > 30: - title = title[:27] + "..." - doc_names.append(title) - if len(doc_names) == 1: - processing_parts.append(f"[{doc_names[0]}]") - else: - processing_parts.append(f"[{len(doc_names)} docs]") - initial_items = [f"{action_verb}: {' '.join(processing_parts)}"] initial_step_id = "thinking-1" @@ -1664,10 +1570,10 @@ async def stream_new_chat( items=initial_items, ) - # These ORM objects (with eagerly-loaded chunks) can be very large. - # They're only needed to build context strings already copied into - # final_query / langchain_messages — release them before streaming. - del mentioned_surfsense_docs, recent_reports + # These ORM objects can be large. They're only needed to build context + # strings already copied into final_query / langchain_messages — + # release them before streaming. + del recent_reports del langchain_messages, final_query # Check if this is the first assistant response so we can generate diff --git a/surfsense_backend/app/tasks/chat/streaming/context/__init__.py b/surfsense_backend/app/tasks/chat/streaming/context/__init__.py index f858a6c06..4cf58d76f 100644 --- a/surfsense_backend/app/tasks/chat/streaming/context/__init__.py +++ b/surfsense_backend/app/tasks/chat/streaming/context/__init__.py @@ -1,15 +1,11 @@ -"""Pre-agent context shaping: mentioned-doc rendering and todos extraction.""" +"""Pre-agent context shaping: todos extraction.""" from __future__ import annotations from app.tasks.chat.streaming.context.deepagents_todos import ( extract_todos_from_deepagents, ) -from app.tasks.chat.streaming.context.mentioned_docs import ( - format_mentioned_surfsense_docs_as_context, -) __all__ = [ "extract_todos_from_deepagents", - "format_mentioned_surfsense_docs_as_context", ] diff --git a/surfsense_backend/app/tasks/chat/streaming/context/deepagents_todos.py b/surfsense_backend/app/tasks/chat/streaming/context/deepagents_todos.py index 0bbf4f0a5..b9cbf6506 100644 --- a/surfsense_backend/app/tasks/chat/streaming/context/deepagents_todos.py +++ b/surfsense_backend/app/tasks/chat/streaming/context/deepagents_todos.py @@ -19,9 +19,7 @@ def extract_todos_from_deepagents(command_output: Any) -> dict: elif isinstance(command_output, dict): if "todos" in command_output: todos_data = command_output.get("todos", []) - elif "update" in command_output and isinstance( - command_output["update"], dict - ): + elif "update" in command_output and isinstance(command_output["update"], dict): todos_data = command_output["update"].get("todos", []) return {"todos": todos_data} diff --git a/surfsense_backend/app/tasks/chat/streaming/context/mentioned_docs.py b/surfsense_backend/app/tasks/chat/streaming/context/mentioned_docs.py deleted file mode 100644 index e02e98d34..000000000 --- a/surfsense_backend/app/tasks/chat/streaming/context/mentioned_docs.py +++ /dev/null @@ -1,58 +0,0 @@ -"""Render user-mentioned SurfSense docs as XML context for the agent.""" - -from __future__ import annotations - -import json - -from app.db import SurfsenseDocsDocument -from app.utils.surfsense_docs import surfsense_docs_public_url - - -def format_mentioned_surfsense_docs_as_context( - documents: list[SurfsenseDocsDocument], -) -> str: - if not documents: - return "" - - context_parts = [""] - context_parts.append( - "The user has explicitly mentioned the following SurfSense documentation pages. " - "These are official documentation about how to use SurfSense and should be used to answer questions about the application. " - "Use [citation:CHUNK_ID] format for citations (e.g., [citation:doc-123])." - ) - - for doc in documents: - public_url = surfsense_docs_public_url(doc.source) - metadata_json = json.dumps( - {"source": doc.source, "public_url": public_url}, ensure_ascii=False - ) - - context_parts.append("") - context_parts.append("") - context_parts.append(f" doc-{doc.id}") - context_parts.append(" SURFSENSE_DOCS") - context_parts.append(f" <![CDATA[{doc.title}]]>") - context_parts.append(f" ") - context_parts.append( - f" " - ) - context_parts.append("") - context_parts.append("") - context_parts.append("") - - if hasattr(doc, "chunks") and doc.chunks: - for chunk in doc.chunks: - context_parts.append( - f" " - ) - else: - context_parts.append( - f" " - ) - - context_parts.append("") - context_parts.append("") - context_parts.append("") - - context_parts.append("") - return "\n".join(context_parts) diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/auto_pin.py b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/auto_pin.py index cb20eb011..af496cee7 100644 --- a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/auto_pin.py +++ b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/auto_pin.py @@ -69,17 +69,13 @@ async def resolve_initial_auto_pin( "pin.requires_image_input": requires_image_input, }, ) - return AutoPinResult( - llm_config_id=pinned.resolved_llm_config_id, error=None - ) + return AutoPinResult(llm_config_id=pinned.resolved_llm_config_id, error=None) except ValueError as pin_error: # The "no vision-capable cfg" path raises a ValueError whose message # we map to the friendly image-input SSE error so the user sees the # same message regardless of whether the gate fired in the resolver or # in ``llm_capability.assert_vision_capability_for_image_turn``. - is_vision_failure = ( - requires_image_input and "vision-capable" in str(pin_error) - ) + is_vision_failure = requires_image_input and "vision-capable" in str(pin_error) error_code = ( "MODEL_DOES_NOT_SUPPORT_IMAGE_INPUT" if is_vision_failure diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/initial_thinking_step.py b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/initial_thinking_step.py index c860e517e..e727200eb 100644 --- a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/initial_thinking_step.py +++ b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/initial_thinking_step.py @@ -1,8 +1,8 @@ """Build and emit the first ``thinking-1`` step for a new-chat turn. The step title and "Processing X" items are derived from what the user sent -(text snippet, image count, mentioned doc titles) so the FE can render a -meaningful placeholder while the agent stream warms up. +(text snippet, image count) so the FE can render a meaningful placeholder +while the agent stream warms up. ``thinking-1`` is the canonical id for this step — every subsequent ``thinking-N`` produced by ``stream_agent_events`` folds into the same @@ -15,7 +15,6 @@ from collections.abc import Iterator from dataclasses import dataclass from typing import Any -from app.db import SurfsenseDocsDocument from app.services.new_streaming_service import VercelStreamingService @@ -37,14 +36,9 @@ def build_initial_thinking_step( *, user_query: str, user_image_data_urls: list[str] | None, - mentioned_surfsense_docs: list[SurfsenseDocsDocument], ) -> InitialThinkingStep: - if mentioned_surfsense_docs: - title = "Analyzing referenced content" - action_verb = "Analyzing" - else: - title = "Understanding your request" - action_verb = "Processing" + title = "Understanding your request" + action_verb = "Processing" processing_parts: list[str] = [] if user_query.strip(): @@ -55,18 +49,6 @@ def build_initial_thinking_step( else: processing_parts.append("(message)") - if mentioned_surfsense_docs: - doc_names: list[str] = [] - for doc in mentioned_surfsense_docs: - t = doc.title - if len(t) > 30: - t = t[:27] + "..." - doc_names.append(t) - if len(doc_names) == 1: - processing_parts.append(f"[{doc_names[0]}]") - else: - processing_parts.append(f"[{len(doc_names)} docs]") - items = [f"{action_verb}: {' '.join(processing_parts)}"] return InitialThinkingStep(step_id="thinking-1", title=title, items=items) diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/input_state.py b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/input_state.py index fb171c244..0c6704bd1 100644 --- a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/input_state.py +++ b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/input_state.py @@ -5,20 +5,17 @@ Pipeline: 1. **History bootstrap** — only for cloned chats with no LangGraph checkpoint yet; flips the per-thread ``needs_history_bootstrap`` flag back to False once the rows are loaded. - 2. **Mentioned SurfSense docs** — eager-load chunks so the formatter has the - full content without a second roundtrip. - 3. **Recent reports** — top 3 by id desc with non-null content, so the LLM + 2. **Recent reports** — top 3 by id desc with non-null content, so the LLM can resolve ``report_id`` for versioning without spelunking history. - 4. **@-mention resolve** (cloud mode) — substitute ``@title`` tokens in the + 3. **@-mention resolve** (cloud mode) — substitute ``@title`` tokens in the query with canonical ``\`/documents/...\``` paths the LLM expects. - 5. **Context block render** — XML-wrap surfsense docs + reports, prepend to - the rewritten query, optionally prefix with display name for SEARCH_SPACE + 4. **Context block render** — XML-wrap recent reports, prepend to the + rewritten query, optionally prefix with display name for SEARCH_SPACE visibility. - 6. **HumanMessage** — multimodal content if images are attached. + 5. **HumanMessage** — multimodal content if images are attached. Returns the assembled ``input_state`` dict plus side-channel data the -orchestrator needs downstream (``accepted_folder_ids`` for runtime context; -``mentioned_surfsense_docs`` for the initial thinking step). +orchestrator needs downstream (``accepted_folder_ids`` for runtime context). """ from __future__ import annotations @@ -30,7 +27,6 @@ from typing import Any from langchain_core.messages import HumanMessage from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select -from sqlalchemy.orm import selectinload from app.agents.new_chat.filesystem_selection import FilesystemMode from app.agents.new_chat.mention_resolver import resolve_mentions, substitute_in_text @@ -38,10 +34,6 @@ from app.db import ( ChatVisibility, NewChatThread, Report, - SurfsenseDocsDocument, -) -from app.tasks.chat.streaming.context.mentioned_docs import ( - format_mentioned_surfsense_docs_as_context, ) from app.utils.content_utils import bootstrap_history_from_db from app.utils.user_message_multimodal import build_human_message_content @@ -55,13 +47,10 @@ class NewChatInputState: ``input_state`` is fed straight to the agent. ``accepted_folder_ids`` feeds the runtime context (the resolver may have dropped some chips). - ``mentioned_surfsense_docs`` is consumed by the initial thinking-step - builder for the FE placeholder before the agent stream starts. """ input_state: dict[str, Any] accepted_folder_ids: list[int] - mentioned_surfsense_docs: list[SurfsenseDocsDocument] async def build_new_chat_input_state( @@ -72,7 +61,6 @@ async def build_new_chat_input_state( user_query: str, user_image_data_urls: list[str] | None, mentioned_document_ids: list[int] | None, - mentioned_surfsense_doc_ids: list[int] | None, mentioned_folder_ids: list[int] | None, mentioned_documents: list[dict[str, Any]] | None, needs_history_bootstrap: bool, @@ -96,15 +84,6 @@ async def build_new_chat_input_state( thread.needs_history_bootstrap = False await session.commit() - mentioned_surfsense_docs: list[SurfsenseDocsDocument] = [] - if mentioned_surfsense_doc_ids: - result = await session.execute( - select(SurfsenseDocsDocument) - .options(selectinload(SurfsenseDocsDocument.chunks)) - .filter(SurfsenseDocsDocument.id.in_(mentioned_surfsense_doc_ids)) - ) - mentioned_surfsense_docs = list(result.scalars().all()) - # Top 3 reports keyed by id desc (newest first) with content present, # surfaced inline so the LLM resolves ``report_id`` for versioning without # digging through conversation history. @@ -125,14 +104,12 @@ async def build_new_chat_input_state( user_query=user_query, filesystem_mode=filesystem_mode, mentioned_document_ids=mentioned_document_ids, - mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids, mentioned_folder_ids=mentioned_folder_ids, mentioned_documents=mentioned_documents, ) final_query = _render_query_with_context( agent_user_query=agent_user_query, - mentioned_surfsense_docs=mentioned_surfsense_docs, recent_reports=recent_reports, ) @@ -154,7 +131,6 @@ async def build_new_chat_input_state( return NewChatInputState( input_state=input_state, accepted_folder_ids=accepted_folder_ids, - mentioned_surfsense_docs=mentioned_surfsense_docs, ) @@ -165,7 +141,6 @@ async def _resolve_mentions_for_query( user_query: str, filesystem_mode: str, mentioned_document_ids: list[int] | None, - mentioned_surfsense_doc_ids: list[int] | None, mentioned_folder_ids: list[int] | None, mentioned_documents: list[dict[str, Any]] | None, ) -> tuple[str, list[int]]: @@ -187,10 +162,7 @@ async def _resolve_mentions_for_query( accepted_folder_ids: list[int] = [] has_any_mention = bool( - mentioned_document_ids - or mentioned_surfsense_doc_ids - or mentioned_folder_ids - or mentioned_documents + mentioned_document_ids or mentioned_folder_ids or mentioned_documents ) if filesystem_mode != FilesystemMode.CLOUD.value or not has_any_mention: return agent_user_query, accepted_folder_ids @@ -207,16 +179,13 @@ async def _resolve_mentions_for_query( try: chip_objs.append(MentionedDocumentInfo.model_validate(raw)) except Exception: - logger.debug( - "stream_new_chat: dropping malformed mention chip %r", raw - ) + logger.debug("stream_new_chat: dropping malformed mention chip %r", raw) resolved = await resolve_mentions( session, search_space_id=search_space_id, mentioned_documents=chip_objs, mentioned_document_ids=mentioned_document_ids, - mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids, mentioned_folder_ids=mentioned_folder_ids, ) agent_user_query = substitute_in_text(user_query, resolved.token_to_path) @@ -227,17 +196,11 @@ async def _resolve_mentions_for_query( def _render_query_with_context( *, agent_user_query: str, - mentioned_surfsense_docs: list[SurfsenseDocsDocument], recent_reports: list[Report], ) -> str: - """Prepend surfsense-docs + recent-reports XML blocks to the user query.""" + """Prepend recent-reports XML block to the user query.""" context_parts: list[str] = [] - if mentioned_surfsense_docs: - context_parts.append( - format_mentioned_surfsense_docs_as_context(mentioned_surfsense_docs) - ) - if recent_reports: report_lines: list[str] = [] for r in recent_reports: diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/llm_capability.py b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/llm_capability.py index ff5a56eec..9f4e5d2d8 100644 --- a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/llm_capability.py +++ b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/llm_capability.py @@ -48,9 +48,7 @@ def check_image_input_capability( return None model_label = agent_config.config_name or agent_config.model_name or "model" - ot.add_event( - "quota.denied", {"quota.code": "MODEL_DOES_NOT_SUPPORT_IMAGE_INPUT"} - ) + ot.add_event("quota.denied", {"quota.code": "MODEL_DOES_NOT_SUPPORT_IMAGE_INPUT"}) return ( ( f"The selected model ({model_label}) does not support " diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/orchestrator.py b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/orchestrator.py index bca72b5ea..1892320d3 100644 --- a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/orchestrator.py +++ b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/orchestrator.py @@ -123,7 +123,6 @@ async def stream_new_chat( user_id: str | None = None, llm_config_id: int = -1, mentioned_document_ids: list[int] | None = None, - mentioned_surfsense_doc_ids: list[int] | None = None, mentioned_folder_ids: list[int] | None = None, mentioned_documents: list[dict[str, Any]] | None = None, checkpoint_id: str | None = None, @@ -259,7 +258,8 @@ async def stream_new_chat( if needs_premium_quota(agent_config, user_id): premium_reservation = await reserve_premium( - agent_config=agent_config, user_id=user_id # type: ignore[arg-type] + agent_config=agent_config, + user_id=user_id, # type: ignore[arg-type] ) if not premium_reservation.allowed: ot.add_event("quota.denied", {"quota.code": "PREMIUM_QUOTA_EXHAUSTED"}) @@ -434,7 +434,6 @@ async def stream_new_chat( user_query=user_query, user_image_data_urls=user_image_data_urls, mentioned_document_ids=mentioned_document_ids, - mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids, mentioned_folder_ids=mentioned_folder_ids, mentioned_documents=mentioned_documents, needs_history_bootstrap=needs_history_bootstrap, @@ -446,7 +445,6 @@ async def stream_new_chat( ) input_state = assembled.input_state accepted_folder_ids = assembled.accepted_folder_ids - mentioned_surfsense_docs = assembled.mentioned_surfsense_docs _perf_log.info( "[stream_new_chat] History bootstrap + doc/report queries in %.3fs", time.perf_counter() - _t0, @@ -492,7 +490,9 @@ async def stream_new_chat( # --- Block 4: First SSE frames --- - for sse in iter_initial_frames(streaming_service, turn_id=stream_result.turn_id): + for sse in iter_initial_frames( + streaming_service, turn_id=stream_result.turn_id + ): yield sse # --- Block 5: Persistence join + message-id frames --- @@ -557,7 +557,6 @@ async def stream_new_chat( initial_step = build_initial_thinking_step( user_query=user_query, user_image_data_urls=user_image_data_urls, - mentioned_surfsense_docs=mentioned_surfsense_docs, ) for sse in iter_initial_thinking_step_frame( initial_step, @@ -572,7 +571,7 @@ async def stream_new_chat( # Drop the heavy ORM objects + the container that holds them so they # aren't retained for the entire streaming duration. ``input_state`` # already carries the langchain_messages list independently. - del assembled, mentioned_surfsense_docs + del assembled title_task = spawn_title_task( chat_id=chat_id, @@ -693,7 +692,9 @@ async def stream_new_chat( fallback_commit_search_space_id=search_space_id, fallback_commit_created_by_id=user_id, fallback_commit_filesystem_mode=( - filesystem_selection.mode if filesystem_selection else FilesystemMode.CLOUD + filesystem_selection.mode + if filesystem_selection + else FilesystemMode.CLOUD ), fallback_commit_thread_id=chat_id, runtime_context=runtime_context, @@ -715,11 +716,7 @@ async def stream_new_chat( title_emitted = True # Account for the case where the task completed but produced no # title — flip the flag anyway so we don't keep checking it. - if ( - title_task is not None - and title_task.done() - and not title_emitted - ): + if title_task is not None and title_task.done() and not title_emitted: title_emitted = True _perf_log.info( @@ -811,9 +808,7 @@ async def stream_new_chat( end_turn(str(chat_id)) if premium_reservation is not None and user_id: - await release_premium( - reservation=premium_reservation, user_id=user_id - ) + await release_premium(reservation=premium_reservation, user_id=user_id) await close_session_and_clear_ai_responding(session, chat_id) @@ -852,9 +847,9 @@ async def stream_new_chat( # Break circular refs held by the agent graph, tools, and LLM # wrappers so the GC can reclaim them in a single pass. - agent = llm = connector_service = None # noqa: F841 - input_state = stream_result = None # noqa: F841 - session = None # noqa: F841 + agent = llm = connector_service = None + input_state = stream_result = None + session = None run_gc_pass(log_prefix="stream_new_chat", chat_id=chat_id) close_chat_request_span( diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/runtime_context.py b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/runtime_context.py index 1f11be1fe..cf1e8c3fb 100644 --- a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/runtime_context.py +++ b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/runtime_context.py @@ -30,9 +30,7 @@ def build_new_chat_runtime_context( return SurfSenseContextSchema( search_space_id=search_space_id, mentioned_document_ids=list(mentioned_document_ids or []), - mentioned_folder_ids=list( - accepted_folder_ids or mentioned_folder_ids or [] - ), + mentioned_folder_ids=list(accepted_folder_ids or mentioned_folder_ids or []), request_id=request_id, turn_id=turn_id, ) diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/title_gen.py b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/title_gen.py index 11312110f..7db45941b 100644 --- a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/title_gen.py +++ b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/title_gen.py @@ -133,12 +133,8 @@ async def _generate_title( # inherited Azure endpoint — see ``provider_api_base`` for the # same bug repro on the image-gen / vision paths. raw_model = getattr(llm, "model", "") or "" - provider_prefix = ( - raw_model.split("/", 1)[0] if "/" in raw_model else None - ) - provider_value = ( - agent_config.provider if agent_config is not None else None - ) + provider_prefix = raw_model.split("/", 1)[0] if "/" in raw_model else None + provider_value = agent_config.provider if agent_config is not None else None title_api_base = resolve_api_base( provider=provider_value, provider_prefix=provider_prefix, diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/orchestrator.py b/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/orchestrator.py index b67ac987e..e1b95aa63 100644 --- a/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/orchestrator.py +++ b/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/orchestrator.py @@ -15,14 +15,10 @@ building blocks under ``flows/shared/``. Mirrors ``stream_new_chat`` but: from __future__ import annotations import contextlib -import gc import logging -import sys import time -import uuid as _uuid from collections.abc import AsyncGenerator from functools import partial -from typing import Any from uuid import UUID import anyio @@ -32,7 +28,7 @@ from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent from app.agents.new_chat.filesystem_selection import FilesystemMode, FilesystemSelection from app.agents.new_chat.middleware.busy_mutex import end_turn from app.config import config as _app_config -from app.db import ChatVisibility, async_session_maker, shielded_async_session +from app.db import ChatVisibility, async_session_maker from app.observability import otel as ot from app.services.chat_session_state_service import set_ai_responding from app.services.new_streaming_service import VercelStreamingService @@ -89,7 +85,7 @@ from app.tasks.chat.streaming.flows.shared.terminal_error import ( ) from app.tasks.chat.streaming.shared.stream_result import StreamResult from app.tasks.chat.streaming.shared.utils import resume_step_prefix -from app.utils.perf import get_perf_logger, log_system_snapshot +from app.utils.perf import get_perf_logger logger = logging.getLogger(__name__) _perf_log = get_perf_logger() @@ -217,12 +213,11 @@ async def stream_resume_chat( if needs_premium_quota(agent_config, user_id): premium_reservation = await reserve_premium( - agent_config=agent_config, user_id=user_id # type: ignore[arg-type] + agent_config=agent_config, + user_id=user_id, # type: ignore[arg-type] ) if not premium_reservation.allowed: - ot.add_event( - "quota.denied", {"quota.code": "PREMIUM_QUOTA_EXHAUSTED"} - ) + ot.add_event("quota.denied", {"quota.code": "PREMIUM_QUOTA_EXHAUSTED"}) if requested_llm_config_id == 0: try: pinned_fb = await resolve_or_get_pinned_llm_config_id( @@ -396,7 +391,9 @@ async def stream_resume_chat( # --- First SSE frames --- - for sse in iter_initial_frames(streaming_service, turn_id=stream_result.turn_id): + for sse in iter_initial_frames( + streaming_service, turn_id=stream_result.turn_id + ): yield sse # --- Assistant-shell persistence + id frame --- @@ -517,7 +514,9 @@ async def stream_resume_chat( fallback_commit_search_space_id=search_space_id, fallback_commit_created_by_id=user_id, fallback_commit_filesystem_mode=( - filesystem_selection.mode if filesystem_selection else FilesystemMode.CLOUD + filesystem_selection.mode + if filesystem_selection + else FilesystemMode.CLOUD ), fallback_commit_thread_id=chat_id, runtime_context=runtime_context, @@ -589,9 +588,7 @@ async def stream_resume_chat( end_turn(str(chat_id)) if premium_reservation is not None and user_id: - await release_premium( - reservation=premium_reservation, user_id=user_id - ) + await release_premium(reservation=premium_reservation, user_id=user_id) await close_session_and_clear_ai_responding(session, chat_id) @@ -609,13 +606,11 @@ async def stream_resume_chat( if not busy_error_raised: with contextlib.suppress(Exception): end_turn(str(chat_id)) - _perf_log.info( - "[stream_resume] end_turn cleanup (chat_id=%s)", chat_id - ) + _perf_log.info("[stream_resume] end_turn cleanup (chat_id=%s)", chat_id) - agent = llm = connector_service = None # noqa: F841 - stream_result = None # noqa: F841 - session = None # noqa: F841 + agent = llm = connector_service = None + stream_result = None + session = None run_gc_pass(log_prefix="stream_resume", chat_id=chat_id) close_chat_request_span( diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/resume_routing.py b/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/resume_routing.py index 300fbc9bd..7f4f67aac 100644 --- a/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/resume_routing.py +++ b/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/resume_routing.py @@ -47,9 +47,7 @@ async def build_resume_routing( slice_decisions_by_tool_call, ) - parent_state = await agent.aget_state( - {"configurable": {"thread_id": str(chat_id)}} - ) + parent_state = await agent.aget_state({"configurable": {"thread_id": str(chat_id)}}) pending = collect_pending_tool_calls(parent_state) _perf_log.info( "[hitl_route] resume_entry chat_id=%s decisions=%d pending_subagents=%d", diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/shared/assistant_finalize.py b/surfsense_backend/app/tasks/chat/streaming/flows/shared/assistant_finalize.py index d16f81ac7..be1f102f3 100644 --- a/surfsense_backend/app/tasks/chat/streaming/flows/shared/assistant_finalize.py +++ b/surfsense_backend/app/tasks/chat/streaming/flows/shared/assistant_finalize.py @@ -49,9 +49,7 @@ async def finalize_assistant_message( was never assigned. """ if not ( - stream_result - and stream_result.turn_id - and stream_result.assistant_message_id + stream_result and stream_result.turn_id and stream_result.assistant_message_id ): return diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/shared/finally_cleanup.py b/surfsense_backend/app/tasks/chat/streaming/flows/shared/finally_cleanup.py index 8d425402f..f9454775e 100644 --- a/surfsense_backend/app/tasks/chat/streaming/flows/shared/finally_cleanup.py +++ b/surfsense_backend/app/tasks/chat/streaming/flows/shared/finally_cleanup.py @@ -39,9 +39,7 @@ async def close_session_and_clear_ai_responding( async with shielded_async_session() as fresh_session: await clear_ai_responding(fresh_session, chat_id) except Exception: - logger.warning( - "Failed to clear AI responding state for thread %s", chat_id - ) + logger.warning("Failed to clear AI responding state for thread %s", chat_id) with contextlib.suppress(Exception): session.expunge_all() diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/shared/premium_quota.py b/surfsense_backend/app/tasks/chat/streaming/flows/shared/premium_quota.py index 0ec40d275..cbf44764c 100644 --- a/surfsense_backend/app/tasks/chat/streaming/flows/shared/premium_quota.py +++ b/surfsense_backend/app/tasks/chat/streaming/flows/shared/premium_quota.py @@ -41,9 +41,7 @@ class PremiumReservation: allowed: bool -def needs_premium_quota( - agent_config: AgentConfig | None, user_id: str | None -) -> bool: +def needs_premium_quota(agent_config: AgentConfig | None, user_id: str | None) -> bool: return bool(agent_config is not None and user_id and agent_config.is_premium) @@ -61,8 +59,10 @@ async def reserve_premium( request_id = _uuid.uuid4().hex[:16] litellm_params = agent_config.litellm_params or {} base_model = ( - litellm_params.get("base_model") if isinstance(litellm_params, dict) else None - ) or agent_config.model_name or "" + (litellm_params.get("base_model") if isinstance(litellm_params, dict) else None) + or agent_config.model_name + or "" + ) reserve_amount_micros = estimate_call_reserve_micros( base_model=base_model, quota_reserve_tokens=agent_config.quota_reserve_tokens, diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/shared/span.py b/surfsense_backend/app/tasks/chat/streaming/flows/shared/span.py index 1e5169af1..74b9682ed 100644 --- a/surfsense_backend/app/tasks/chat/streaming/flows/shared/span.py +++ b/surfsense_backend/app/tasks/chat/streaming/flows/shared/span.py @@ -6,8 +6,7 @@ import contextlib import sys from typing import Any, Literal -from app.observability import metrics as ot_metrics -from app.observability import otel as ot +from app.observability import metrics as ot_metrics, otel as ot def open_chat_request_span( diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/shared/terminal_error.py b/surfsense_backend/app/tasks/chat/streaming/flows/shared/terminal_error.py index c9db2caf2..b305dba23 100644 --- a/surfsense_backend/app/tasks/chat/streaming/flows/shared/terminal_error.py +++ b/surfsense_backend/app/tasks/chat/streaming/flows/shared/terminal_error.py @@ -15,8 +15,7 @@ from collections.abc import Iterator from typing import Any, Literal from app.agents.new_chat.errors import BusyError -from app.observability import metrics as ot_metrics -from app.observability import otel as ot +from app.observability import metrics as ot_metrics, otel as ot from app.services.new_streaming_service import VercelStreamingService from app.tasks.chat.streaming.errors.classifier import classify_stream_exception from app.tasks.chat.streaming.errors.emitter import emit_stream_terminal_error diff --git a/surfsense_backend/app/tasks/surfsense_docs_indexer.py b/surfsense_backend/app/tasks/surfsense_docs_indexer.py deleted file mode 100644 index db88c8700..000000000 --- a/surfsense_backend/app/tasks/surfsense_docs_indexer.py +++ /dev/null @@ -1,249 +0,0 @@ -""" -Surfsense documentation indexer. -Indexes MDX documentation files at startup. -""" - -import hashlib -import logging -import re -from datetime import UTC, datetime -from pathlib import Path - -from sqlalchemy import delete as sa_delete, select -from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.orm import selectinload -from sqlalchemy.orm.attributes import set_committed_value - -from app.config import config -from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument, async_session_maker -from app.utils.document_converters import embed_text - -logger = logging.getLogger(__name__) - - -async def _safe_set_docs_chunks( - session: AsyncSession, document: SurfsenseDocsDocument, chunks: list -) -> None: - """safe_set_chunks variant for the SurfsenseDocsDocument/Chunk models.""" - if document.id is not None: - await session.execute( - sa_delete(SurfsenseDocsChunk).where( - SurfsenseDocsChunk.document_id == document.id - ) - ) - for chunk in chunks: - chunk.document_id = document.id - - set_committed_value(document, "chunks", chunks) - session.add_all(chunks) - - -# Path to docs relative to project root -DOCS_DIR = ( - Path(__file__).resolve().parent.parent.parent.parent - / "surfsense_web" - / "content" - / "docs" -) - - -def parse_mdx_frontmatter(content: str) -> tuple[str, str]: - """ - Parse MDX file to extract frontmatter title and content. - - Args: - content: Raw MDX file content - - Returns: - Tuple of (title, content_without_frontmatter) - """ - # Match frontmatter between --- markers - frontmatter_pattern = r"^---\s*\n(.*?)\n---\s*\n" - match = re.match(frontmatter_pattern, content, re.DOTALL) - - if match: - frontmatter = match.group(1) - content_without_frontmatter = content[match.end() :] - - # Extract title from frontmatter - title_match = re.search(r"^title:\s*(.+)$", frontmatter, re.MULTILINE) - title = title_match.group(1).strip() if title_match else "Untitled" - - # Remove quotes if present - title = title.strip("\"'") - - return title, content_without_frontmatter.strip() - - return "Untitled", content.strip() - - -def get_all_mdx_files() -> list[Path]: - """ - Get all MDX files from the docs directory. - - Returns: - List of Path objects for each MDX file - """ - if not DOCS_DIR.exists(): - logger.warning(f"Docs directory not found: {DOCS_DIR}") - return [] - - return list(DOCS_DIR.rglob("*.mdx")) - - -def generate_surfsense_docs_content_hash(content: str) -> str: - """Generate SHA-256 hash for Surfsense docs content.""" - return hashlib.sha256(content.encode("utf-8")).hexdigest() - - -def create_surfsense_docs_chunks(content: str) -> list[SurfsenseDocsChunk]: - """ - Create chunks from Surfsense documentation content. - - Args: - content: Document content to chunk - - Returns: - List of SurfsenseDocsChunk objects with embeddings - """ - return [ - SurfsenseDocsChunk( - content=chunk.text, - embedding=embed_text(chunk.text), - ) - for chunk in config.chunker_instance.chunk(content) - ] - - -async def index_surfsense_docs(session: AsyncSession) -> tuple[int, int, int, int]: - """ - Index all Surfsense documentation files. - - Args: - session: SQLAlchemy async session - - Returns: - Tuple of (created, updated, skipped, deleted) counts - """ - created = 0 - updated = 0 - skipped = 0 - deleted = 0 - - # Get all existing docs from database - existing_docs_result = await session.execute( - select(SurfsenseDocsDocument).options( - selectinload(SurfsenseDocsDocument.chunks) - ) - ) - existing_docs = {doc.source: doc for doc in existing_docs_result.scalars().all()} - - # Track which sources we've processed - processed_sources = set() - - # Get all MDX files - mdx_files = get_all_mdx_files() - logger.info(f"Found {len(mdx_files)} MDX files to index") - - for mdx_file in mdx_files: - try: - source = str(mdx_file.relative_to(DOCS_DIR)) - processed_sources.add(source) - - # Read file content - raw_content = mdx_file.read_text(encoding="utf-8") - title, content = parse_mdx_frontmatter(raw_content) - content_hash = generate_surfsense_docs_content_hash(raw_content) - - if source in existing_docs: - existing_doc = existing_docs[source] - - # Check if content changed - if existing_doc.content_hash == content_hash: - logger.debug(f"Skipping unchanged: {source}") - skipped += 1 - continue - - # Content changed - update document - logger.info(f"Updating changed document: {source}") - - # Create new chunks - chunks = create_surfsense_docs_chunks(content) - - # Update document fields - existing_doc.title = title - existing_doc.content = content - existing_doc.content_hash = content_hash - existing_doc.embedding = embed_text(content) - await _safe_set_docs_chunks(session, existing_doc, chunks) - existing_doc.updated_at = datetime.now(UTC) - - updated += 1 - else: - # New document - create it - logger.info(f"Creating new document: {source}") - - chunks = create_surfsense_docs_chunks(content) - - document = SurfsenseDocsDocument( - source=source, - title=title, - content=content, - content_hash=content_hash, - embedding=embed_text(content), - chunks=chunks, - updated_at=datetime.now(UTC), - ) - - session.add(document) - created += 1 - - except Exception as e: - logger.error(f"Error processing {mdx_file}: {e}", exc_info=True) - continue - - # Delete documents for removed files - for source, doc in existing_docs.items(): - if source not in processed_sources: - logger.info(f"Deleting removed document: {source}") - await session.delete(doc) - deleted += 1 - - # Commit all changes - await session.commit() - - logger.info( - f"Indexing complete: {created} created, {updated} updated, " - f"{skipped} skipped, {deleted} deleted" - ) - - return created, updated, skipped, deleted - - -async def seed_surfsense_docs() -> tuple[int, int, int, int]: - """ - Seed Surfsense documentation into the database. - - This function indexes all MDX files from the docs directory. - It handles creating, updating, and deleting docs based on content changes. - - Returns: - Tuple of (created, updated, skipped, deleted) counts - Returns (0, 0, 0, 0) if an error occurs - """ - logger.info("Starting Surfsense docs indexing...") - - try: - async with async_session_maker() as session: - created, updated, skipped, deleted = await index_surfsense_docs(session) - - logger.info( - f"Surfsense docs indexing complete: " - f"created={created}, updated={updated}, skipped={skipped}, deleted={deleted}" - ) - - return created, updated, skipped, deleted - - except Exception as e: - logger.error(f"Failed to seed Surfsense docs: {e}", exc_info=True) - return 0, 0, 0, 0 diff --git a/surfsense_backend/app/utils/surfsense_docs.py b/surfsense_backend/app/utils/surfsense_docs.py deleted file mode 100644 index 9a6ab11a9..000000000 --- a/surfsense_backend/app/utils/surfsense_docs.py +++ /dev/null @@ -1,13 +0,0 @@ -"""Utilities for SurfSense's built-in documentation index.""" - -from pathlib import PurePosixPath - -DOCS_PUBLIC_ROOT = PurePosixPath("/docs") - - -def surfsense_docs_public_url(source: str) -> str: - """Return the public docs route for an indexed documentation source path.""" - docs_path = PurePosixPath(source).with_suffix("") - if docs_path.name == "index": - docs_path = docs_path.parent - return (DOCS_PUBLIC_ROOT / docs_path).as_posix() diff --git a/surfsense_backend/scripts/seed_surfsense_docs.py b/surfsense_backend/scripts/seed_surfsense_docs.py deleted file mode 100644 index 68899c2aa..000000000 --- a/surfsense_backend/scripts/seed_surfsense_docs.py +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python -""" -Seed Surfsense documentation into the database. - -CLI wrapper for the seed_surfsense_docs function. -Can be run manually for debugging or re-indexing. - -Usage: - python scripts/seed_surfsense_docs.py -""" - -import asyncio -import sys -from pathlib import Path - -# Add the parent directory to the path so we can import app modules -sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) - -from app.tasks.surfsense_docs_indexer import seed_surfsense_docs - - -def main(): - """CLI entry point for seeding Surfsense docs.""" - print("=" * 50) - print(" Surfsense Documentation Seeding") - print("=" * 50) - - created, updated, skipped, deleted = asyncio.run(seed_surfsense_docs()) - - print() - print("Results:") - print(f" Created: {created}") - print(f" Updated: {updated}") - print(f" Skipped: {skipped}") - print(f" Deleted: {deleted}") - print("=" * 50) - - -if __name__ == "__main__": - main() diff --git a/surfsense_backend/tests/unit/agents/new_chat/test_default_permissions_layering.py b/surfsense_backend/tests/unit/agents/new_chat/test_default_permissions_layering.py index ac6b5d95c..2f222e148 100644 --- a/surfsense_backend/tests/unit/agents/new_chat/test_default_permissions_layering.py +++ b/surfsense_backend/tests/unit/agents/new_chat/test_default_permissions_layering.py @@ -60,7 +60,6 @@ class TestReadOnlyToolsAllowed: "glob", "web_search", "scrape_webpage", - "search_surfsense_docs", "get_connected_accounts", "write_todos", "task", diff --git a/surfsense_backend/tests/unit/agents/new_chat/test_specialized_subagents.py b/surfsense_backend/tests/unit/agents/new_chat/test_specialized_subagents.py index 3035cc8e0..3c7fe5336 100644 --- a/surfsense_backend/tests/unit/agents/new_chat/test_specialized_subagents.py +++ b/surfsense_backend/tests/unit/agents/new_chat/test_specialized_subagents.py @@ -22,12 +22,6 @@ from app.agents.new_chat.subagents.config import ( # --------------------------------------------------------------------------- -@tool -def search_surfsense_docs(query: str) -> str: - """Search the user's KB.""" - return "" - - @tool def web_search(query: str) -> str: """Search the public web.""" @@ -95,7 +89,6 @@ def generate_report(topic: str) -> str: ALL_TOOLS = [ - search_surfsense_docs, web_search, scrape_webpage, read_file, @@ -161,7 +154,7 @@ class TestReportWriterSubagent: names = {t.name for t in spec["tools"]} # type: ignore[index] assert names == REPORT_WRITER_TOOLS & {t.name for t in ALL_TOOLS} assert "generate_report" in names - assert "search_surfsense_docs" in names + assert "read_file" in names def test_deny_rules_block_writes_but_allow_generate_report(self) -> None: spec = build_report_writer_subagent(tools=ALL_TOOLS) @@ -272,9 +265,9 @@ class TestFilterToolsWarningSuppression: # Allowed set asks for two registry tools (one present, one # not) plus a bunch of middleware-provided names. _filter_tools( - [search_surfsense_docs], + [web_search], allowed_names={ - "search_surfsense_docs", + "web_search", "scrape_webpage", # legitimately missing → should warn "read_file", # mw-provided → suppressed "ls", @@ -322,7 +315,6 @@ class TestDenyPatternsCoverage: def test_deny_patterns_do_not_match_safe_read_tools(self) -> None: canonical_reads = [ - "search_surfsense_docs", "read_file", "ls_tree", "grep", diff --git a/surfsense_backend/tests/unit/automations/actions/agent_task/test_finalize.py b/surfsense_backend/tests/unit/automations/actions/agent_task/test_finalize.py index bd49d764c..aa6c74549 100644 --- a/surfsense_backend/tests/unit/automations/actions/agent_task/test_finalize.py +++ b/surfsense_backend/tests/unit/automations/actions/agent_task/test_finalize.py @@ -72,7 +72,11 @@ def test_extract_returns_none_when_no_assistant_text_is_present() -> None: anything?" rather than guess whether ``""`` means silence or empty output. Empty-string contents are normalized to ``None`` too.""" no_ai = {"messages": [HumanMessage(content="just a question")]} - only_tools = {"messages": [AIMessage(content=[{"type": "tool_use", "name": "x", "input": {}}])]} + only_tools = { + "messages": [ + AIMessage(content=[{"type": "tool_use", "name": "x", "input": {}}]) + ] + } empty_string = {"messages": [AIMessage(content=" ")]} assert extract_final_assistant_message(no_ai) is None diff --git a/surfsense_backend/tests/unit/automations/runtime/test_retries.py b/surfsense_backend/tests/unit/automations/runtime/test_retries.py index f0f12ca59..05fd02ab6 100644 --- a/surfsense_backend/tests/unit/automations/runtime/test_retries.py +++ b/surfsense_backend/tests/unit/automations/runtime/test_retries.py @@ -33,7 +33,9 @@ async def test_with_retries_returns_result_and_attempts_one_on_first_success() - assert calls == 1 -async def test_with_retries_returns_attempt_count_when_succeeding_after_failures() -> None: +async def test_with_retries_returns_attempt_count_when_succeeding_after_failures() -> ( + None +): """A coroutine that fails twice then succeeds returns ``attempts=3`` (the actual attempt that produced the result). Locks the contract that the caller can distinguish first-try success from a recovery.""" diff --git a/surfsense_backend/tests/unit/automations/schemas/definition/test_envelope.py b/surfsense_backend/tests/unit/automations/schemas/definition/test_envelope.py index c625b0ec9..d7b392a1d 100644 --- a/surfsense_backend/tests/unit/automations/schemas/definition/test_envelope.py +++ b/surfsense_backend/tests/unit/automations/schemas/definition/test_envelope.py @@ -11,7 +11,9 @@ from app.automations.schemas.definition.plan_step import PlanStep pytestmark = pytest.mark.unit -def test_automation_definition_accepts_minimal_valid_input_with_sensible_defaults() -> None: +def test_automation_definition_accepts_minimal_valid_input_with_sensible_defaults() -> ( + None +): """A definition with just ``name`` + a one-step ``plan`` is valid and fills in the rest with safe defaults so users don't have to write out every section to get started.""" diff --git a/surfsense_backend/tests/unit/automations/templating/test_environment.py b/surfsense_backend/tests/unit/automations/templating/test_environment.py index ec1c0ee40..64850c9c5 100644 --- a/surfsense_backend/tests/unit/automations/templating/test_environment.py +++ b/surfsense_backend/tests/unit/automations/templating/test_environment.py @@ -32,7 +32,9 @@ def test_environment_finalizes_datetime_output_to_iso_string() -> None: when emitting ``inputs.fired_at`` and other datetime values.""" dt = datetime(2026, 5, 28, 14, 30, tzinfo=UTC) - assert render_template("{{ moment }}", {"moment": dt}) == "2026-05-28T14:30:00+00:00" + assert ( + render_template("{{ moment }}", {"moment": dt}) == "2026-05-28T14:30:00+00:00" + ) def test_environment_finalizes_none_output_to_empty_string() -> None: diff --git a/surfsense_backend/tests/unit/automations/test_definition_types.py b/surfsense_backend/tests/unit/automations/test_definition_types.py index 231e4fa97..2320b61d3 100644 --- a/surfsense_backend/tests/unit/automations/test_definition_types.py +++ b/surfsense_backend/tests/unit/automations/test_definition_types.py @@ -31,7 +31,7 @@ def test_action_definition_params_schema_reflects_params_model() -> None: name="N", description="D", params_model=_Topic, - build_handler=lambda _ctx: (lambda _p: {}), # type: ignore[arg-type,return-value] + build_handler=lambda _ctx: lambda _p: {}, # type: ignore[arg-type,return-value] ) schema = definition.params_schema diff --git a/surfsense_backend/tests/unit/automations/test_stores.py b/surfsense_backend/tests/unit/automations/test_stores.py index e54062d64..d005d7be7 100644 --- a/surfsense_backend/tests/unit/automations/test_stores.py +++ b/surfsense_backend/tests/unit/automations/test_stores.py @@ -29,7 +29,9 @@ class _Params(BaseModel): def _trigger(type_: str = "test_trigger") -> TriggerDefinition: - return TriggerDefinition(type=type_, description="Test trigger.", params_model=_Params) + return TriggerDefinition( + type=type_, description="Test trigger.", params_model=_Params + ) def _action(type_: str = "test_action") -> ActionDefinition: @@ -38,7 +40,7 @@ def _action(type_: str = "test_action") -> ActionDefinition: name="Test", description="Test action.", params_model=_Params, - build_handler=lambda _ctx: (lambda _p: {}), # type: ignore[arg-type,return-value] + build_handler=lambda _ctx: lambda _p: {}, # type: ignore[arg-type,return-value] ) @@ -112,4 +114,4 @@ def test_all_triggers_returns_defensive_snapshot( snapshot = all_triggers() snapshot.pop("snapshot_test") - assert get_trigger("snapshot_test") is not None \ No newline at end of file + assert get_trigger("snapshot_test") is not None diff --git a/surfsense_backend/tests/unit/automations/triggers/schedule/test_cron.py b/surfsense_backend/tests/unit/automations/triggers/schedule/test_cron.py index 261e51b18..5c7580823 100644 --- a/surfsense_backend/tests/unit/automations/triggers/schedule/test_cron.py +++ b/surfsense_backend/tests/unit/automations/triggers/schedule/test_cron.py @@ -45,8 +45,12 @@ def test_compute_next_fire_at_respects_dst_offset_change() -> None: winter_after = datetime(2026, 2, 15, 0, 0, tzinfo=UTC) summer_after = datetime(2026, 4, 15, 0, 0, tzinfo=UTC) - winter_fire = compute_next_fire_at("0 9 * * *", "America/New_York", after=winter_after) - summer_fire = compute_next_fire_at("0 9 * * *", "America/New_York", after=summer_after) + winter_fire = compute_next_fire_at( + "0 9 * * *", "America/New_York", after=winter_after + ) + summer_fire = compute_next_fire_at( + "0 9 * * *", "America/New_York", after=summer_after + ) assert winter_fire == datetime(2026, 2, 15, 14, 0, tzinfo=UTC) assert summer_fire == datetime(2026, 4, 15, 13, 0, tzinfo=UTC) diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_parallel_refactor_parity.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_parallel_refactor_parity.py index eb24b4df8..e014bb911 100644 --- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_parallel_refactor_parity.py +++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_parallel_refactor_parity.py @@ -25,7 +25,6 @@ from __future__ import annotations import asyncio import inspect -from dataclasses import dataclass from typing import Any from unittest.mock import AsyncMock, patch @@ -33,7 +32,6 @@ import pytest from app.agents.new_chat.context import SurfSenseContextSchema from app.services.new_streaming_service import VercelStreamingService - from app.tasks.chat.stream_new_chat import ( stream_new_chat as old_stream_new_chat, stream_resume_chat as old_stream_resume_chat, @@ -141,39 +139,28 @@ def test_orchestrators_are_async_generator_functions() -> None: # ------------------------------------------------------------ initial thinking -@dataclass -class _FakeSurfsenseDoc: - """Stand-in for ``SurfsenseDocsDocument`` with just the field we read.""" - - title: str - - @pytest.mark.parametrize( - "user_query, image_urls, docs, expected_title, expected_action", + "user_query, image_urls, expected_title, expected_action", [ - ("hello world", None, [], "Understanding your request", "Processing"), - ("", ["data:image/png;base64,AAA"], [], "Understanding your request", "Processing"), - ("", None, [], "Understanding your request", "Processing"), + ("hello world", None, "Understanding your request", "Processing"), ( - "doc question", - None, - [_FakeSurfsenseDoc(title="My Doc")], - "Analyzing referenced content", - "Analyzing", + "", + ["data:image/png;base64,AAA"], + "Understanding your request", + "Processing", ), + ("", None, "Understanding your request", "Processing"), ], ) def test_initial_thinking_step_branches( user_query: str, image_urls: list[str] | None, - docs: list[Any], expected_title: str, expected_action: str, ) -> None: step = build_initial_thinking_step( user_query=user_query, user_image_data_urls=image_urls, - mentioned_surfsense_docs=docs, # type: ignore[arg-type] ) assert step.step_id == "thinking-1" assert step.title == expected_title @@ -186,7 +173,6 @@ def test_initial_thinking_step_truncates_long_query() -> None: step = build_initial_thinking_step( user_query=long_query, user_image_data_urls=None, - mentioned_surfsense_docs=[], ) # 80-char truncation + ellipsis, sandwiched after "Processing: ". assert "..." in step.items[0] @@ -195,23 +181,14 @@ def test_initial_thinking_step_truncates_long_query() -> None: assert payload.startswith("x" * 80) and payload.endswith("...") -def test_initial_thinking_step_collapses_many_doc_names() -> None: - docs = [_FakeSurfsenseDoc(title=f"Doc {i}") for i in range(5)] - step = build_initial_thinking_step( - user_query="q", - user_image_data_urls=None, - mentioned_surfsense_docs=docs, # type: ignore[arg-type] - ) - assert "[5 docs]" in step.items[0] - - # ------------------------------------------------------------ capability gate def test_image_capability_passes_without_images() -> None: - assert check_image_input_capability( - user_image_data_urls=None, agent_config=None - ) is None + assert ( + check_image_input_capability(user_image_data_urls=None, agent_config=None) + is None + ) def test_image_capability_passes_when_capability_unknown() -> None: @@ -500,9 +477,7 @@ def test_can_recover_provider_rate_limit_rejects_non_rate_limit_exception() -> N def test_spawn_set_ai_responding_bg_noop_without_user_id() -> None: async def _run() -> set[asyncio.Task]: background: set[asyncio.Task] = set() - spawn_set_ai_responding_bg( - chat_id=1, user_id=None, background_tasks=background - ) + spawn_set_ai_responding_bg(chat_id=1, user_id=None, background_tasks=background) return background bg = asyncio.run(_run()) diff --git a/surfsense_web/app/(home)/free/page.tsx b/surfsense_web/app/(home)/free/page.tsx index 4512f3396..5cea9b6d2 100644 --- a/surfsense_web/app/(home)/free/page.tsx +++ b/surfsense_web/app/(home)/free/page.tsx @@ -221,10 +221,7 @@ export default async function FreeHubPage() { {/* In-content ad: above the model table */} -