mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-29 19:35:20 +02:00
refactor: remove search_surfsense_docs tool and related references
- Deleted the `search_surfsense_docs` tool and its associated files, streamlining the agent's toolset. - Updated various components and prompts to remove references to the now-removed tool, ensuring consistency across the codebase. - Adjusted documentation to direct users to the SurfSense documentation link for product-related queries instead.
This commit is contained in:
parent
9b9e6828c7
commit
40ca9e6ed2
71 changed files with 232 additions and 1676 deletions
|
|
@ -0,0 +1,129 @@
|
|||
"""Drop Surfsense docs tables (feature removed end to end)
|
||||
|
||||
Revision ID: 146
|
||||
Revises: 145
|
||||
Create Date: 2026-05-28
|
||||
|
||||
Removes the SurfSense product-documentation feature: the
|
||||
``surfsense_docs_documents`` and ``surfsense_docs_chunks`` tables (created
|
||||
in revision 60) and the GIN trigram index on the title column (added in
|
||||
revision 67). The docs were seeded at startup from local MDX files, so no
|
||||
user data is lost. Downgrade recreates the tables and indexes.
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
||||
from alembic import op
|
||||
from app.config import config
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "146"
|
||||
down_revision: str | None = "145"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
# Embedding dimension is required to recreate the vector columns on downgrade.
|
||||
EMBEDDING_DIM = config.embedding_model_instance.dimension
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Drop surfsense docs tables and all their indexes."""
|
||||
# Trigram index from revision 67
|
||||
op.execute("DROP INDEX IF EXISTS idx_surfsense_docs_title_trgm")
|
||||
|
||||
# Full-text search indexes
|
||||
op.execute("DROP INDEX IF EXISTS surfsense_docs_chunks_search_index")
|
||||
op.execute("DROP INDEX IF EXISTS surfsense_docs_documents_search_index")
|
||||
|
||||
# Vector indexes
|
||||
op.execute("DROP INDEX IF EXISTS surfsense_docs_chunks_vector_index")
|
||||
op.execute("DROP INDEX IF EXISTS surfsense_docs_documents_vector_index")
|
||||
|
||||
# B-tree indexes
|
||||
op.execute("DROP INDEX IF EXISTS ix_surfsense_docs_chunks_document_id")
|
||||
op.execute("DROP INDEX IF EXISTS ix_surfsense_docs_documents_updated_at")
|
||||
op.execute("DROP INDEX IF EXISTS ix_surfsense_docs_documents_content_hash")
|
||||
op.execute("DROP INDEX IF EXISTS ix_surfsense_docs_documents_source")
|
||||
|
||||
# Tables (chunks first due to FK)
|
||||
op.execute("DROP TABLE IF EXISTS surfsense_docs_chunks")
|
||||
op.execute("DROP TABLE IF EXISTS surfsense_docs_documents")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Recreate surfsense docs tables and indexes (reverses revisions 60 + 67)."""
|
||||
op.execute(
|
||||
f"""
|
||||
CREATE TABLE IF NOT EXISTS surfsense_docs_documents (
|
||||
id SERIAL PRIMARY KEY,
|
||||
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
|
||||
source VARCHAR NOT NULL UNIQUE,
|
||||
title VARCHAR NOT NULL,
|
||||
content TEXT NOT NULL,
|
||||
content_hash VARCHAR NOT NULL,
|
||||
embedding vector({EMBEDDING_DIM}),
|
||||
updated_at TIMESTAMP WITH TIME ZONE
|
||||
);
|
||||
"""
|
||||
)
|
||||
op.execute(
|
||||
f"""
|
||||
CREATE TABLE IF NOT EXISTS surfsense_docs_chunks (
|
||||
id SERIAL PRIMARY KEY,
|
||||
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
|
||||
content TEXT NOT NULL,
|
||||
embedding vector({EMBEDDING_DIM}),
|
||||
document_id INTEGER NOT NULL REFERENCES surfsense_docs_documents(id) ON DELETE CASCADE
|
||||
);
|
||||
"""
|
||||
)
|
||||
|
||||
# B-tree indexes
|
||||
op.execute(
|
||||
"CREATE INDEX IF NOT EXISTS ix_surfsense_docs_documents_source ON surfsense_docs_documents(source)"
|
||||
)
|
||||
op.execute(
|
||||
"CREATE INDEX IF NOT EXISTS ix_surfsense_docs_documents_content_hash ON surfsense_docs_documents(content_hash)"
|
||||
)
|
||||
op.execute(
|
||||
"CREATE INDEX IF NOT EXISTS ix_surfsense_docs_documents_updated_at ON surfsense_docs_documents(updated_at)"
|
||||
)
|
||||
op.execute(
|
||||
"CREATE INDEX IF NOT EXISTS ix_surfsense_docs_chunks_document_id ON surfsense_docs_chunks(document_id)"
|
||||
)
|
||||
|
||||
# Vector indexes
|
||||
op.execute(
|
||||
"""
|
||||
CREATE INDEX IF NOT EXISTS surfsense_docs_documents_vector_index
|
||||
ON surfsense_docs_documents USING hnsw (embedding public.vector_cosine_ops);
|
||||
"""
|
||||
)
|
||||
op.execute(
|
||||
"""
|
||||
CREATE INDEX IF NOT EXISTS surfsense_docs_chunks_vector_index
|
||||
ON surfsense_docs_chunks USING hnsw (embedding public.vector_cosine_ops);
|
||||
"""
|
||||
)
|
||||
|
||||
# Full-text search indexes
|
||||
op.execute(
|
||||
"""
|
||||
CREATE INDEX IF NOT EXISTS surfsense_docs_documents_search_index
|
||||
ON surfsense_docs_documents USING gin (to_tsvector('english', content));
|
||||
"""
|
||||
)
|
||||
op.execute(
|
||||
"""
|
||||
CREATE INDEX IF NOT EXISTS surfsense_docs_chunks_search_index
|
||||
ON surfsense_docs_chunks USING gin (to_tsvector('english', content));
|
||||
"""
|
||||
)
|
||||
|
||||
# Trigram index from revision 67
|
||||
op.execute(
|
||||
"""
|
||||
CREATE INDEX IF NOT EXISTS idx_surfsense_docs_title_trgm
|
||||
ON surfsense_docs_documents USING gin (title gin_trgm_ops);
|
||||
"""
|
||||
)
|
||||
|
|
@ -4,8 +4,8 @@ never invent ids you didn't see. Citation ids are resolved by exact-match
|
|||
lookup; a wrong id silently breaks the link, so when in doubt, omit.
|
||||
|
||||
### Channel A — chunk blocks injected this turn
|
||||
When `search_surfsense_docs` or `web_search` returns `<document>` /
|
||||
`<chunk id='…'>` blocks in this turn:
|
||||
When `web_search` returns `<document>` / `<chunk id='…'>` blocks in this
|
||||
turn:
|
||||
|
||||
1. For each factual statement taken from those chunks, add
|
||||
`[citation:chunk_id]` using the **exact** id from a visible
|
||||
|
|
|
|||
|
|
@ -20,8 +20,8 @@ it to resolve paths the user describes in natural language ("my Q2 roadmap",
|
|||
delegating to a specialist.
|
||||
|
||||
`<document>` and `<chunk id='…'>` blocks are chunked indexed content returned
|
||||
by KB search (from `search_surfsense_docs`, or backing `<priority_documents>`).
|
||||
Each chunk carries a stable `id` attribute.
|
||||
by KB search (backing `<priority_documents>`). Each chunk carries a stable
|
||||
`id` attribute.
|
||||
|
||||
If a block doesn't appear this turn, work from the conversation alone.
|
||||
</dynamic_context>
|
||||
|
|
|
|||
|
|
@ -20,8 +20,8 @@ week's planning notes") into concrete document references before delegating
|
|||
to a specialist.
|
||||
|
||||
`<document>` and `<chunk id='…'>` blocks are chunked indexed content returned
|
||||
by KB search (from `search_surfsense_docs`, or backing `<priority_documents>`).
|
||||
Each chunk carries a stable `id` attribute.
|
||||
by KB search (backing `<priority_documents>`). Each chunk carries a stable
|
||||
`id` attribute.
|
||||
|
||||
If a block doesn't appear this turn, work from the conversation alone.
|
||||
</dynamic_context>
|
||||
|
|
|
|||
|
|
@ -1,19 +1,21 @@
|
|||
<knowledge_base_first>
|
||||
CRITICAL — ground factual answers in what you actually receive this turn:
|
||||
- injected workspace context (see `<dynamic_context>`),
|
||||
- results from your own tool calls (`search_surfsense_docs`, `web_search`,
|
||||
`scrape_webpage`),
|
||||
- results from your own tool calls (`web_search`, `scrape_webpage`),
|
||||
- or substantive summaries returned by a `task` specialist you invoked.
|
||||
|
||||
Do **not** answer factual or informational questions from general knowledge
|
||||
unless the user explicitly authorises it after you say you couldn't find
|
||||
enough in those sources. The flow when nothing is found:
|
||||
|
||||
1. Say you couldn't find enough in their workspace, docs, or tool output.
|
||||
1. Say you couldn't find enough in their workspace or tool output.
|
||||
2. Ask: *"Would you like me to answer from my general knowledge instead?"*
|
||||
3. Only answer from general knowledge after a clear yes.
|
||||
|
||||
This rule does NOT apply to: casual conversation · meta-questions about
|
||||
SurfSense ("what can you do?") · formatting or analysis of content already
|
||||
in chat · clear rewrite/edit instructions · lightweight web research.
|
||||
|
||||
For "how do I use SurfSense" / product-documentation questions, point the
|
||||
user to https://www.surfsense.com/docs.
|
||||
</knowledge_base_first>
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ Structured reasoning:
|
|||
- For non-trivial work, `<thinking>` / short `<plan>` before tool calls is fine.
|
||||
|
||||
Professional objectivity:
|
||||
- Accuracy over flattery; verify with **search_surfsense_docs**, **web_search**, **scrape_webpage**, or **task** when unsure — don’t invent connector access.
|
||||
- Accuracy over flattery; verify with **web_search**, **scrape_webpage**, or **task** when unsure — don’t invent connector access.
|
||||
|
||||
Task management:
|
||||
- For 3+ steps, use todo tooling; update statuses promptly.
|
||||
|
|
|
|||
|
|
@ -13,6 +13,6 @@ Attribution:
|
|||
|
||||
Tool calls:
|
||||
- Parallelise independent calls.
|
||||
- Prefer **search_surfsense_docs** for SurfSense docs/product questions before **web_search** when that fits the ask.
|
||||
- For SurfSense docs/product questions, point the user to https://www.surfsense.com/docs.
|
||||
- Don’t invent paths, chunk ids, or URLs — only values from tools or the user.
|
||||
</provider_hints>
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ Output style:
|
|||
- GitHub-flavoured Markdown; monospace-friendly.
|
||||
|
||||
Workflow (Understand → Plan → Act → Verify):
|
||||
1. **Understand:** parse the ask; use **search_surfsense_docs** / injected workspace context before guessing.
|
||||
1. **Understand:** parse the ask; use injected workspace context before guessing.
|
||||
2. **Plan:** for multi-step work, a short plan first.
|
||||
3. **Act:** only with tools you actually have on this agent (see `<tools>` and `<tool_routing>`). Connector work → **task**.
|
||||
4. **Verify:** re-read or re-search only when it materially reduces risk.
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ Output style:
|
|||
|
||||
Tool calls:
|
||||
- Parallelise independent calls in one turn.
|
||||
- Prefer **search_surfsense_docs** for SurfSense-product questions, **web_search** / **scrape_webpage**
|
||||
for fresh public facts; integrations and heavy workflows → **task**.
|
||||
- For SurfSense-product questions, point the user to https://www.surfsense.com/docs;
|
||||
use **web_search** / **scrape_webpage** for fresh public facts; integrations and
|
||||
heavy workflows → **task**.
|
||||
</provider_hints>
|
||||
|
|
|
|||
|
|
@ -3,10 +3,7 @@ You have two execution channels. Pick the one that owns the work — never
|
|||
simulate one with the other.
|
||||
|
||||
### 1. Direct tools (you call them yourself)
|
||||
- `search_surfsense_docs` — SurfSense product docs (setup, configuration,
|
||||
connector docs, feature behavior).
|
||||
- `web_search` — search the public web (anything outside SurfSense docs and
|
||||
the workspace KB).
|
||||
- `web_search` — search the public web (anything outside the workspace KB).
|
||||
- `scrape_webpage` — fetch the body of a specific public URL.
|
||||
- `update_memory` — curate persistent memory (see `<memory_protocol>`).
|
||||
- `write_todos` — maintain a structured plan when the turn series spans
|
||||
|
|
@ -14,6 +11,10 @@ simulate one with the other.
|
|||
`in_progress` **before** the `task` call that handles it, `completed`
|
||||
once the call returns. Skip for single-step requests.
|
||||
|
||||
**Questions about how to use SurfSense itself** (setup, configuration,
|
||||
connectors, feature behavior) — point the user to the documentation:
|
||||
https://www.surfsense.com/docs. There is no docs-search tool; give the link.
|
||||
|
||||
**You have NO filesystem tools.** Any read, write, edit, move, rename, or
|
||||
search inside the user's workspace goes through `task(knowledge_base, …)` —
|
||||
never via `write_file`, `ls`, or any direct file operation.
|
||||
|
|
|
|||
|
|
@ -1 +0,0 @@
|
|||
"""``search_surfsense_docs`` — description + few-shot examples."""
|
||||
|
|
@ -1,10 +0,0 @@
|
|||
- `search_surfsense_docs` — Search official SurfSense documentation (product
|
||||
help).
|
||||
- Use when the user asks how SurfSense itself works — setup, configuration,
|
||||
connector documentation, feature behavior, anything covered in the
|
||||
product docs.
|
||||
- Not a substitute for `task` when the user wants actions inside a
|
||||
connected service (Gmail, Slack, Jira, Notion, etc.).
|
||||
- Args: `query`, `top_k` (default 10).
|
||||
- Returns doc excerpts; chunk ids may appear for attribution — see
|
||||
`<citations>` for the contract.
|
||||
|
|
@ -1,15 +0,0 @@
|
|||
<example>
|
||||
user: "How do I install SurfSense?"
|
||||
→ search_surfsense_docs(query="installation setup")
|
||||
</example>
|
||||
|
||||
<example>
|
||||
user: "What connectors does SurfSense support?"
|
||||
→ search_surfsense_docs(query="available connectors integrations")
|
||||
</example>
|
||||
|
||||
<example>
|
||||
user: "How do I set up the Notion connector?"
|
||||
→ search_surfsense_docs(query="Notion connector setup configuration")
|
||||
(Changing data inside Notion itself → `task(notion, …)`, not this tool.)
|
||||
</example>
|
||||
|
|
@ -6,7 +6,6 @@ Connector integrations, MCP, deliverables, etc. are delegated via ``task`` subag
|
|||
from __future__ import annotations
|
||||
|
||||
MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED: tuple[str, ...] = (
|
||||
"search_surfsense_docs",
|
||||
"web_search",
|
||||
"scrape_webpage",
|
||||
"update_memory",
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@ Gather and synthesize evidence using SurfSense research tools with clear citatio
|
|||
<available_tools>
|
||||
- `web_search`
|
||||
- `scrape_webpage`
|
||||
- `search_surfsense_docs`
|
||||
</available_tools>
|
||||
|
||||
<tool_policy>
|
||||
|
|
|
|||
|
|
@ -1,11 +1,9 @@
|
|||
"""Research-stage tools: web search, scrape, and in-product doc search."""
|
||||
"""Research-stage tools: web search and scrape."""
|
||||
|
||||
from .scrape_webpage import create_scrape_webpage_tool
|
||||
from .search_surfsense_docs import create_search_surfsense_docs_tool
|
||||
from .web_search import create_web_search_tool
|
||||
|
||||
__all__ = [
|
||||
"create_scrape_webpage_tool",
|
||||
"create_search_surfsense_docs_tool",
|
||||
"create_web_search_tool",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ from langchain_core.tools import BaseTool
|
|||
from app.agents.new_chat.permissions import Ruleset
|
||||
|
||||
from .scrape_webpage import create_scrape_webpage_tool
|
||||
from .search_surfsense_docs import create_search_surfsense_docs_tool
|
||||
from .web_search import create_web_search_tool
|
||||
|
||||
NAME = "research"
|
||||
|
|
@ -27,5 +26,4 @@ def load_tools(
|
|||
available_connectors=d.get("available_connectors"),
|
||||
),
|
||||
create_scrape_webpage_tool(firecrawl_api_key=d.get("firecrawl_api_key")),
|
||||
create_search_surfsense_docs_tool(db_session=d["db_session"]),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -1,145 +0,0 @@
|
|||
"""Semantic search over pre-indexed in-app documentation chunks for user how-to questions."""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
|
||||
from langchain_core.tools import tool
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument
|
||||
from app.utils.document_converters import embed_text
|
||||
from app.utils.surfsense_docs import surfsense_docs_public_url
|
||||
|
||||
|
||||
def format_surfsense_docs_results(results: list[tuple]) -> str:
|
||||
"""Format (chunk, document) rows as XML with ``doc-`` chunk IDs for citations and UI routing."""
|
||||
if not results:
|
||||
return "No relevant Surfsense documentation found for your query."
|
||||
|
||||
# Group chunks by document
|
||||
grouped: dict[int, dict] = {}
|
||||
for chunk, doc in results:
|
||||
public_url = surfsense_docs_public_url(doc.source)
|
||||
if doc.id not in grouped:
|
||||
grouped[doc.id] = {
|
||||
"document_id": f"doc-{doc.id}",
|
||||
"document_type": "SURFSENSE_DOCS",
|
||||
"title": doc.title,
|
||||
"url": public_url,
|
||||
"metadata": {"source": doc.source, "public_url": public_url},
|
||||
"chunks": [],
|
||||
}
|
||||
grouped[doc.id]["chunks"].append(
|
||||
{
|
||||
"chunk_id": f"doc-{chunk.id}",
|
||||
"content": chunk.content,
|
||||
}
|
||||
)
|
||||
|
||||
# Render XML matching format_documents_for_context structure
|
||||
parts: list[str] = []
|
||||
for g in grouped.values():
|
||||
metadata_json = json.dumps(g["metadata"], ensure_ascii=False)
|
||||
|
||||
parts.append("<document>")
|
||||
parts.append("<document_metadata>")
|
||||
parts.append(f" <document_id>{g['document_id']}</document_id>")
|
||||
parts.append(f" <document_type>{g['document_type']}</document_type>")
|
||||
parts.append(f" <title><![CDATA[{g['title']}]]></title>")
|
||||
parts.append(f" <url><![CDATA[{g['url']}]]></url>")
|
||||
parts.append(f" <metadata_json><![CDATA[{metadata_json}]]></metadata_json>")
|
||||
parts.append("</document_metadata>")
|
||||
parts.append("")
|
||||
parts.append("<document_content>")
|
||||
|
||||
for ch in g["chunks"]:
|
||||
parts.append(
|
||||
f" <chunk id='{ch['chunk_id']}'><![CDATA[{ch['content']}]]></chunk>"
|
||||
)
|
||||
|
||||
parts.append("</document_content>")
|
||||
parts.append("</document>")
|
||||
parts.append("")
|
||||
|
||||
return "\n".join(parts).strip()
|
||||
|
||||
|
||||
async def search_surfsense_docs_async(
|
||||
query: str,
|
||||
db_session: AsyncSession,
|
||||
top_k: int = 10,
|
||||
) -> str:
|
||||
"""
|
||||
Search Surfsense documentation using vector similarity.
|
||||
|
||||
Args:
|
||||
query: The search query about Surfsense usage
|
||||
db_session: Database session for executing queries
|
||||
top_k: Number of results to return
|
||||
|
||||
Returns:
|
||||
Formatted string with relevant documentation content
|
||||
"""
|
||||
# Get embedding for the query
|
||||
query_embedding = await asyncio.to_thread(embed_text, query)
|
||||
|
||||
# Vector similarity search on chunks, joining with documents
|
||||
stmt = (
|
||||
select(SurfsenseDocsChunk, SurfsenseDocsDocument)
|
||||
.join(
|
||||
SurfsenseDocsDocument,
|
||||
SurfsenseDocsChunk.document_id == SurfsenseDocsDocument.id,
|
||||
)
|
||||
.order_by(SurfsenseDocsChunk.embedding.op("<=>")(query_embedding))
|
||||
.limit(top_k)
|
||||
)
|
||||
|
||||
result = await db_session.execute(stmt)
|
||||
rows = result.all()
|
||||
|
||||
return format_surfsense_docs_results(rows)
|
||||
|
||||
|
||||
def create_search_surfsense_docs_tool(db_session: AsyncSession):
|
||||
"""
|
||||
Factory function to create the search_surfsense_docs tool.
|
||||
|
||||
Args:
|
||||
db_session: Database session for executing queries
|
||||
|
||||
Returns:
|
||||
A configured tool function for searching Surfsense documentation
|
||||
"""
|
||||
|
||||
@tool
|
||||
async def search_surfsense_docs(query: str, top_k: int = 10) -> str:
|
||||
"""
|
||||
Search Surfsense documentation for help with using the application.
|
||||
|
||||
Use this tool when the user asks questions about:
|
||||
- How to use Surfsense features
|
||||
- Installation and setup instructions
|
||||
- Configuration options and settings
|
||||
- Troubleshooting common issues
|
||||
- Available connectors and integrations
|
||||
- Browser extension usage
|
||||
- API documentation
|
||||
|
||||
This searches the official Surfsense documentation that was indexed
|
||||
at deployment time. It does NOT search the user's personal knowledge base.
|
||||
|
||||
Args:
|
||||
query: The search query about Surfsense usage or features
|
||||
top_k: Number of documentation chunks to retrieve (default: 10)
|
||||
|
||||
Returns:
|
||||
Relevant documentation content formatted with chunk IDs for citations
|
||||
"""
|
||||
return await search_surfsense_docs_async(
|
||||
query=query,
|
||||
db_session=db_session,
|
||||
top_k=top_k,
|
||||
)
|
||||
|
||||
return search_surfsense_docs
|
||||
|
|
@ -104,7 +104,7 @@ class AgentFeatureFlags:
|
|||
# ``tools/google_drive``, ``tools/dropbox``, ``tools/onedrive``,
|
||||
# ``tools/google_calendar``, ``tools/confluence``, ``tools/discord``,
|
||||
# ``tools/teams``, ``tools/luma``, ``connected_accounts``,
|
||||
# ``update_memory``, ``search_surfsense_docs``) now acquire fresh
|
||||
# ``update_memory``) now acquire fresh
|
||||
# short-lived ``AsyncSession`` instances per call via
|
||||
# :data:`async_session_maker`. The factory still accepts ``db_session``
|
||||
# for registry compatibility but ``del``'s it immediately — see any
|
||||
|
|
|
|||
|
|
@ -73,9 +73,8 @@ class ResolvedMentionSet:
|
|||
``@Project Roadmap`` is never shadowed by a shorter prefix
|
||||
``@Project``).
|
||||
|
||||
``mentioned_document_ids`` collapses doc + surfsense_doc chips into
|
||||
a single ordered, deduped list because the priority middleware
|
||||
treats them uniformly downstream — see
|
||||
``mentioned_document_ids`` is an ordered, deduped list consumed by
|
||||
the priority middleware downstream — see
|
||||
``KnowledgePriorityMiddleware._compute_priority_paths``.
|
||||
"""
|
||||
|
||||
|
|
@ -103,7 +102,6 @@ async def resolve_mentions(
|
|||
search_space_id: int,
|
||||
mentioned_documents: list[MentionedDocumentInfo] | None,
|
||||
mentioned_document_ids: list[int] | None = None,
|
||||
mentioned_surfsense_doc_ids: list[int] | None = None,
|
||||
mentioned_folder_ids: list[int] | None = None,
|
||||
) -> ResolvedMentionSet:
|
||||
"""Resolve every @-mention chip on a turn into virtual paths.
|
||||
|
|
@ -111,8 +109,7 @@ async def resolve_mentions(
|
|||
The function takes both the ``mentioned_documents`` discriminated
|
||||
list (chip metadata used for substitution + persistence) and the
|
||||
parallel id arrays (``mentioned_document_ids``,
|
||||
``mentioned_surfsense_doc_ids``, ``mentioned_folder_ids``) for two
|
||||
reasons:
|
||||
``mentioned_folder_ids``) for two reasons:
|
||||
|
||||
* Legacy clients that haven't migrated to the unified chip list
|
||||
still send the id arrays — we treat the union as authoritative.
|
||||
|
|
@ -142,7 +139,6 @@ async def resolve_mentions(
|
|||
dict.fromkeys(
|
||||
[
|
||||
*(mentioned_document_ids or []),
|
||||
*(mentioned_surfsense_doc_ids or []),
|
||||
*chip_doc_ids,
|
||||
]
|
||||
)
|
||||
|
|
|
|||
|
|
@ -59,14 +59,13 @@ Do NOT cite document_id. Always use the chunk id.
|
|||
- NEVER create your own citation format - use the exact chunk_id values from the documents in the [citation:chunk_id] format
|
||||
- NEVER format citations as clickable links or as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only
|
||||
- NEVER make up chunk IDs if you are unsure about the chunk_id. It is better to omit the citation than to guess
|
||||
- Copy the EXACT chunk id from the XML - if it says `<chunk id='doc-123'>`, use [citation:doc-123]
|
||||
- Copy the EXACT chunk id from the XML - if it says `<chunk id='5'>`, use [citation:5]
|
||||
- If the chunk id is a URL like `<chunk id='https://example.com/page'>`, use [citation:https://example.com/page]
|
||||
</citation_format>
|
||||
|
||||
<citation_examples>
|
||||
CORRECT citation formats:
|
||||
- [citation:5] (numeric chunk ID from knowledge base)
|
||||
- [citation:doc-123] (for Surfsense documentation chunks)
|
||||
- [citation:https://example.com/article] (URL chunk ID from web search results)
|
||||
- [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3] (multiple citations)
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE:
|
|||
2. Ask the user: "Would you like me to answer from my general knowledge instead?"
|
||||
3. ONLY provide a general-knowledge answer AFTER the user explicitly says yes.
|
||||
- This policy does NOT apply to:
|
||||
* Casual conversation, greetings, or meta-questions about SurfSense itself (e.g., "what can you do?")
|
||||
* Casual conversation, greetings, or meta-questions about SurfSense itself (e.g., "what can you do?"). For "how do I use SurfSense" / product-documentation questions, point the user to https://www.surfsense.com/docs.
|
||||
* Formatting, summarization, or analysis of content already present in the conversation
|
||||
* Following user instructions that are clearly task-oriented (e.g., "rewrite this in bullet points")
|
||||
* Tool-usage actions like generating reports, podcasts, images, or scraping webpages
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE:
|
|||
2. Ask: "Would you like me to answer from my general knowledge instead?"
|
||||
3. ONLY provide a general-knowledge answer AFTER a team member explicitly says yes.
|
||||
- This policy does NOT apply to:
|
||||
* Casual conversation, greetings, or meta-questions about SurfSense itself (e.g., "what can you do?")
|
||||
* Casual conversation, greetings, or meta-questions about SurfSense itself (e.g., "what can you do?"). For "how do I use SurfSense" / product-documentation questions, point the user to https://www.surfsense.com/docs.
|
||||
* Formatting, summarization, or analysis of content already present in the conversation
|
||||
* Following user instructions that are clearly task-oriented (e.g., "rewrite this in bullet points")
|
||||
* Tool-usage actions like generating reports, podcasts, images, or scraping webpages
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ When to use which tool:
|
|||
- Knowledge base content (Notion, GitHub, files, notes) → automatically searched
|
||||
- Real-time public web data → call web_search
|
||||
- Reading a specific webpage → call scrape_webpage
|
||||
- SurfSense product / how-to questions (setup, configuration, connectors, feature behavior) → point the user to the documentation: https://www.surfsense.com/docs
|
||||
|
||||
**`task` subagents (when to delegate):**
|
||||
- **`linear_specialist`** — Linear-only investigations and tool use.
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ When to use which tool:
|
|||
- Knowledge base content (Notion, GitHub, files, notes) → automatically searched
|
||||
- Real-time public web data → call web_search
|
||||
- Reading a specific webpage → call scrape_webpage
|
||||
- SurfSense product / how-to questions (setup, configuration, connectors, feature behavior) → point the user to the documentation: https://www.surfsense.com/docs
|
||||
|
||||
**`task` subagents (when to delegate):**
|
||||
- **`linear_specialist`** — Linear-only investigations and tool use.
|
||||
|
|
|
|||
|
|
@ -151,7 +151,6 @@ def _read_fragment(subpath: str) -> str:
|
|||
# Ordered for reading flow: fundamentals first, then artifact generators,
|
||||
# then memory at the end (mirrors the legacy ``_ALL_TOOL_NAMES_ORDERED``).
|
||||
ALL_TOOL_NAMES_ORDERED: tuple[str, ...] = (
|
||||
"search_surfsense_docs",
|
||||
"web_search",
|
||||
"generate_podcast",
|
||||
"generate_video_presentation",
|
||||
|
|
|
|||
|
|
@ -1,9 +0,0 @@
|
|||
|
||||
- User: "How do I install SurfSense?"
|
||||
- Call: `search_surfsense_docs(query="installation setup")`
|
||||
- User: "What connectors does SurfSense support?"
|
||||
- Call: `search_surfsense_docs(query="available connectors integrations")`
|
||||
- User: "How do I set up the Notion connector?"
|
||||
- Call: `search_surfsense_docs(query="Notion connector setup configuration")`
|
||||
- User: "How do I use Docker to run SurfSense?"
|
||||
- Call: `search_surfsense_docs(query="Docker installation setup")`
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
|
||||
- search_surfsense_docs: Search the official SurfSense documentation.
|
||||
- Use this tool when the user asks anything about SurfSense itself (the application they are using).
|
||||
- Args:
|
||||
- query: The search query about SurfSense
|
||||
- top_k: Number of documentation chunks to retrieve (default: 10)
|
||||
- Returns: Documentation content with chunk IDs for citations (prefixed with 'doc-', e.g., [citation:doc-123])
|
||||
|
|
@ -1,7 +1,6 @@
|
|||
---
|
||||
name: email-drafting
|
||||
description: Draft an email matching the user's voice, with structured intent and CTA
|
||||
allowed-tools: search_surfsense_docs
|
||||
---
|
||||
|
||||
# Email drafting
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
---
|
||||
name: kb-research
|
||||
description: Structured approach to finding and synthesizing information from the user's knowledge base
|
||||
allowed-tools: search_surfsense_docs, scrape_webpage, read_file, ls_tree, grep, web_search
|
||||
allowed-tools: scrape_webpage, read_file, ls_tree, grep, web_search
|
||||
---
|
||||
|
||||
# Knowledge-base research
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
---
|
||||
name: meeting-prep
|
||||
description: Pull together briefing materials before a scheduled meeting
|
||||
allowed-tools: search_surfsense_docs, web_search, scrape_webpage, read_file
|
||||
allowed-tools: web_search, scrape_webpage, read_file
|
||||
---
|
||||
|
||||
# Meeting preparation
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
---
|
||||
name: report-writing
|
||||
description: How to scope, draft, and revise a Markdown report artifact via generate_report
|
||||
allowed-tools: generate_report, search_surfsense_docs, read_file
|
||||
allowed-tools: generate_report, read_file
|
||||
---
|
||||
|
||||
# Report writing
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
---
|
||||
name: slack-summary
|
||||
description: Distill a Slack channel or thread into actionable summary
|
||||
allowed-tools: search_surfsense_docs
|
||||
---
|
||||
|
||||
# Slack summarization
|
||||
|
|
|
|||
|
|
@ -46,7 +46,6 @@ logger = logging.getLogger(__name__)
|
|||
# ``glob``, ``grep``) plus the SurfSense-side read tools.
|
||||
EXPLORE_READ_TOOLS: frozenset[str] = frozenset(
|
||||
{
|
||||
"search_surfsense_docs",
|
||||
"web_search",
|
||||
"scrape_webpage",
|
||||
"read_file",
|
||||
|
|
@ -61,7 +60,6 @@ EXPLORE_READ_TOOLS: frozenset[str] = frozenset(
|
|||
# is needed, the parent should hand off to ``explore`` first.
|
||||
REPORT_WRITER_TOOLS: frozenset[str] = frozenset(
|
||||
{
|
||||
"search_surfsense_docs",
|
||||
"read_file",
|
||||
"generate_report",
|
||||
}
|
||||
|
|
@ -222,7 +220,6 @@ EXPLORE_SYSTEM_PROMPT = """You are the **explore** subagent for SurfSense.
|
|||
Conduct read-only research across the user's knowledge base, the web, and any documents the parent agent has surfaced. Return a synthesized answer with explicit citations — never speculate beyond the sources you have actually inspected.
|
||||
|
||||
## Tools available
|
||||
- `search_surfsense_docs` — fast hybrid search over the user's knowledge base.
|
||||
- `web_search` — only when the user's KB clearly does not contain the answer.
|
||||
- `scrape_webpage` — to read a URL the user or the search results provided.
|
||||
- `read_file`, `ls`, `glob`, `grep` — to inspect specific documents or trees the parent has flagged.
|
||||
|
|
@ -242,7 +239,7 @@ Produce a single high-quality report deliverable using `generate_report`. The pa
|
|||
|
||||
## Workflow
|
||||
1. **Outline first.** Before calling `generate_report`, write a one-paragraph outline of the sections you plan to produce. Confirm the outline reflects the parent's instructions.
|
||||
2. **Source resolution.** Decide whether to call `search_surfsense_docs` and `read_file` for any final-checks, or whether the parent's earlier tool calls already cover the source set.
|
||||
2. **Source resolution.** Decide whether to call `read_file` for any final-checks, or whether the parent's earlier tool calls already cover the source set.
|
||||
3. **One report.** Call `generate_report` exactly once with `source_strategy` chosen per the topic and chat history (see the `report-writing` skill).
|
||||
4. **Confirm.** End with a one-sentence summary in your final message — never paste the report back into chat; the artifact card renders itself.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ This module contains all the tools available to the SurfSense agent.
|
|||
To add a new tool, see the documentation in registry.py.
|
||||
|
||||
Available tools:
|
||||
- search_surfsense_docs: Search Surfsense documentation for usage help
|
||||
- generate_podcast: Generate audio podcasts from content
|
||||
- generate_video_presentation: Generate video presentations with slides and narration
|
||||
- generate_image: Generate images from text descriptions using AI models
|
||||
|
|
@ -31,7 +30,6 @@ from .registry import (
|
|||
get_tool_by_name,
|
||||
)
|
||||
from .scrape_webpage import create_scrape_webpage_tool
|
||||
from .search_surfsense_docs import create_search_surfsense_docs_tool
|
||||
from .update_memory import create_update_memory_tool, create_update_team_memory_tool
|
||||
from .video_presentation import create_generate_video_presentation_tool
|
||||
|
||||
|
|
@ -47,7 +45,6 @@ __all__ = [
|
|||
"create_generate_podcast_tool",
|
||||
"create_generate_video_presentation_tool",
|
||||
"create_scrape_webpage_tool",
|
||||
"create_search_surfsense_docs_tool",
|
||||
"create_update_memory_tool",
|
||||
"create_update_team_memory_tool",
|
||||
"format_documents_for_context",
|
||||
|
|
|
|||
|
|
@ -101,7 +101,6 @@ from .podcast import create_generate_podcast_tool
|
|||
from .report import create_generate_report_tool
|
||||
from .resume import create_generate_resume_tool
|
||||
from .scrape_webpage import create_scrape_webpage_tool
|
||||
from .search_surfsense_docs import create_search_surfsense_docs_tool
|
||||
from .teams import (
|
||||
create_list_teams_channels_tool,
|
||||
create_read_teams_messages_tool,
|
||||
|
|
@ -258,15 +257,6 @@ BUILTIN_TOOLS: list[ToolDefinition] = [
|
|||
),
|
||||
requires=[],
|
||||
),
|
||||
# Surfsense documentation search tool
|
||||
ToolDefinition(
|
||||
name="search_surfsense_docs",
|
||||
description="Search Surfsense documentation for help with using the application",
|
||||
factory=lambda deps: create_search_surfsense_docs_tool(
|
||||
db_session=deps["db_session"],
|
||||
),
|
||||
requires=["db_session"],
|
||||
),
|
||||
# =========================================================================
|
||||
# SERVICE ACCOUNT DISCOVERY
|
||||
# Generic tool for the LLM to discover connected accounts and resolve
|
||||
|
|
|
|||
|
|
@ -1,174 +0,0 @@
|
|||
"""
|
||||
Surfsense documentation search tool.
|
||||
|
||||
This tool allows the agent to search the pre-indexed Surfsense documentation
|
||||
to help users with questions about how to use the application.
|
||||
|
||||
The documentation is indexed at deployment time from MDX files and stored
|
||||
in dedicated tables (surfsense_docs_documents, surfsense_docs_chunks).
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
|
||||
from langchain_core.tools import tool
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument, async_session_maker
|
||||
from app.utils.document_converters import embed_text
|
||||
from app.utils.surfsense_docs import surfsense_docs_public_url
|
||||
|
||||
|
||||
def format_surfsense_docs_results(results: list[tuple]) -> str:
|
||||
"""
|
||||
Format search results into XML structure for the LLM context.
|
||||
|
||||
Uses the same XML structure as format_documents_for_context from knowledge_base.py
|
||||
but with 'doc-' prefix on chunk IDs. This allows:
|
||||
- LLM to use consistent [citation:doc-XXX] format
|
||||
- Frontend to detect 'doc-' prefix and route to surfsense docs endpoint
|
||||
|
||||
Args:
|
||||
results: List of (chunk, document) tuples from the database query
|
||||
|
||||
Returns:
|
||||
Formatted XML string with documentation content and citation-ready chunks
|
||||
"""
|
||||
if not results:
|
||||
return "No relevant Surfsense documentation found for your query."
|
||||
|
||||
# Group chunks by document
|
||||
grouped: dict[int, dict] = {}
|
||||
for chunk, doc in results:
|
||||
public_url = surfsense_docs_public_url(doc.source)
|
||||
if doc.id not in grouped:
|
||||
grouped[doc.id] = {
|
||||
"document_id": f"doc-{doc.id}",
|
||||
"document_type": "SURFSENSE_DOCS",
|
||||
"title": doc.title,
|
||||
"url": public_url,
|
||||
"metadata": {"source": doc.source, "public_url": public_url},
|
||||
"chunks": [],
|
||||
}
|
||||
grouped[doc.id]["chunks"].append(
|
||||
{
|
||||
"chunk_id": f"doc-{chunk.id}",
|
||||
"content": chunk.content,
|
||||
}
|
||||
)
|
||||
|
||||
# Render XML matching format_documents_for_context structure
|
||||
parts: list[str] = []
|
||||
for g in grouped.values():
|
||||
metadata_json = json.dumps(g["metadata"], ensure_ascii=False)
|
||||
|
||||
parts.append("<document>")
|
||||
parts.append("<document_metadata>")
|
||||
parts.append(f" <document_id>{g['document_id']}</document_id>")
|
||||
parts.append(f" <document_type>{g['document_type']}</document_type>")
|
||||
parts.append(f" <title><![CDATA[{g['title']}]]></title>")
|
||||
parts.append(f" <url><![CDATA[{g['url']}]]></url>")
|
||||
parts.append(f" <metadata_json><![CDATA[{metadata_json}]]></metadata_json>")
|
||||
parts.append("</document_metadata>")
|
||||
parts.append("")
|
||||
parts.append("<document_content>")
|
||||
|
||||
for ch in g["chunks"]:
|
||||
parts.append(
|
||||
f" <chunk id='{ch['chunk_id']}'><![CDATA[{ch['content']}]]></chunk>"
|
||||
)
|
||||
|
||||
parts.append("</document_content>")
|
||||
parts.append("</document>")
|
||||
parts.append("")
|
||||
|
||||
return "\n".join(parts).strip()
|
||||
|
||||
|
||||
async def search_surfsense_docs_async(
|
||||
query: str,
|
||||
db_session: AsyncSession,
|
||||
top_k: int = 10,
|
||||
) -> str:
|
||||
"""
|
||||
Search Surfsense documentation using vector similarity.
|
||||
|
||||
Args:
|
||||
query: The search query about Surfsense usage
|
||||
db_session: Database session for executing queries
|
||||
top_k: Number of results to return
|
||||
|
||||
Returns:
|
||||
Formatted string with relevant documentation content
|
||||
"""
|
||||
# Get embedding for the query
|
||||
query_embedding = await asyncio.to_thread(embed_text, query)
|
||||
|
||||
# Vector similarity search on chunks, joining with documents
|
||||
stmt = (
|
||||
select(SurfsenseDocsChunk, SurfsenseDocsDocument)
|
||||
.join(
|
||||
SurfsenseDocsDocument,
|
||||
SurfsenseDocsChunk.document_id == SurfsenseDocsDocument.id,
|
||||
)
|
||||
.order_by(SurfsenseDocsChunk.embedding.op("<=>")(query_embedding))
|
||||
.limit(top_k)
|
||||
)
|
||||
|
||||
result = await db_session.execute(stmt)
|
||||
rows = result.all()
|
||||
|
||||
return format_surfsense_docs_results(rows)
|
||||
|
||||
|
||||
def create_search_surfsense_docs_tool(db_session: AsyncSession):
|
||||
"""
|
||||
Factory function to create the search_surfsense_docs tool.
|
||||
|
||||
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||
:data:`async_session_maker` so the closure is safe to share across
|
||||
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||
session here would surface stale/closed sessions on cache hits.
|
||||
|
||||
Args:
|
||||
db_session: Reserved for registry compatibility. Per-call sessions
|
||||
are opened via :data:`async_session_maker` inside the tool body.
|
||||
|
||||
Returns:
|
||||
A configured tool function for searching Surfsense documentation
|
||||
"""
|
||||
del db_session # per-call session — see docstring
|
||||
|
||||
@tool
|
||||
async def search_surfsense_docs(query: str, top_k: int = 10) -> str:
|
||||
"""
|
||||
Search Surfsense documentation for help with using the application.
|
||||
|
||||
Use this tool when the user asks questions about:
|
||||
- How to use Surfsense features
|
||||
- Installation and setup instructions
|
||||
- Configuration options and settings
|
||||
- Troubleshooting common issues
|
||||
- Available connectors and integrations
|
||||
- Browser extension usage
|
||||
- API documentation
|
||||
|
||||
This searches the official Surfsense documentation that was indexed
|
||||
at deployment time. It does NOT search the user's personal knowledge base.
|
||||
|
||||
Args:
|
||||
query: The search query about Surfsense usage or features
|
||||
top_k: Number of documentation chunks to retrieve (default: 10)
|
||||
|
||||
Returns:
|
||||
Relevant documentation content formatted with chunk IDs for citations
|
||||
"""
|
||||
async with async_session_maker() as db_session:
|
||||
return await search_surfsense_docs_async(
|
||||
query=query,
|
||||
db_session=db_session,
|
||||
top_k=top_k,
|
||||
)
|
||||
|
||||
return search_surfsense_docs
|
||||
|
|
@ -43,7 +43,6 @@ from app.rate_limiter import get_real_client_ip, limiter
|
|||
from app.routes import router as crud_router
|
||||
from app.routes.auth_routes import router as auth_router
|
||||
from app.schemas import UserCreate, UserRead, UserUpdate
|
||||
from app.tasks.surfsense_docs_indexer import seed_surfsense_docs
|
||||
from app.users import SECRET, auth_backend, current_active_user, fastapi_users
|
||||
from app.utils.perf import log_system_snapshot
|
||||
|
||||
|
|
@ -576,13 +575,6 @@ async def lifespan(app: FastAPI):
|
|||
initialize_llm_router()
|
||||
initialize_image_gen_router()
|
||||
initialize_vision_llm_router()
|
||||
try:
|
||||
await asyncio.wait_for(seed_surfsense_docs(), timeout=120)
|
||||
except TimeoutError:
|
||||
logging.getLogger(__name__).warning(
|
||||
"Surfsense docs seeding timed out after 120s — skipping. "
|
||||
"Docs will be indexed on the next restart."
|
||||
)
|
||||
|
||||
# Phase 1.7 — JIT warmup. Bounded so a stuck warmup never delays
|
||||
# worker readiness. ``shield`` so Uvicorn cancelling startup
|
||||
|
|
|
|||
|
|
@ -1150,46 +1150,6 @@ class Chunk(BaseModel, TimestampMixin):
|
|||
document = relationship("Document", back_populates="chunks")
|
||||
|
||||
|
||||
class SurfsenseDocsDocument(BaseModel, TimestampMixin):
|
||||
"""
|
||||
Surfsense documentation storage.
|
||||
Indexed at migration time from MDX files.
|
||||
"""
|
||||
|
||||
__tablename__ = "surfsense_docs_documents"
|
||||
|
||||
source = Column(
|
||||
String, nullable=False, unique=True, index=True
|
||||
) # File path: "connectors/slack.mdx"
|
||||
title = Column(String, nullable=False)
|
||||
content = Column(Text, nullable=False)
|
||||
content_hash = Column(String, nullable=False, index=True) # For detecting changes
|
||||
embedding = Column(Vector(config.embedding_model_instance.dimension))
|
||||
updated_at = Column(TIMESTAMP(timezone=True), nullable=True, index=True)
|
||||
|
||||
chunks = relationship(
|
||||
"SurfsenseDocsChunk",
|
||||
back_populates="document",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
||||
|
||||
class SurfsenseDocsChunk(BaseModel, TimestampMixin):
|
||||
"""Chunk storage for Surfsense documentation."""
|
||||
|
||||
__tablename__ = "surfsense_docs_chunks"
|
||||
|
||||
content = Column(Text, nullable=False)
|
||||
embedding = Column(Vector(config.embedding_model_instance.dimension))
|
||||
|
||||
document_id = Column(
|
||||
Integer,
|
||||
ForeignKey("surfsense_docs_documents.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
document = relationship("SurfsenseDocsDocument", back_populates="chunks")
|
||||
|
||||
|
||||
class Podcast(BaseModel, TimestampMixin):
|
||||
"""Podcast model for storing generated podcasts."""
|
||||
|
||||
|
|
@ -2680,11 +2640,6 @@ async def setup_indexes():
|
|||
"CREATE INDEX IF NOT EXISTS idx_documents_search_space_updated ON documents (search_space_id, updated_at DESC NULLS LAST) INCLUDE (id, title, document_type)"
|
||||
)
|
||||
)
|
||||
await conn.execute(
|
||||
text(
|
||||
"CREATE INDEX IF NOT EXISTS idx_surfsense_docs_title_trgm ON surfsense_docs_documents USING gin (title gin_trgm_ops)"
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
async def create_db_and_tables():
|
||||
|
|
|
|||
|
|
@ -55,7 +55,6 @@ from .search_source_connectors_routes import router as search_source_connectors_
|
|||
from .search_spaces_routes import router as search_spaces_router
|
||||
from .slack_add_connector_route import router as slack_add_connector_router
|
||||
from .stripe_routes import router as stripe_router
|
||||
from .surfsense_docs_routes import router as surfsense_docs_router
|
||||
from .team_memory_routes import router as team_memory_router
|
||||
from .teams_add_connector_route import router as teams_add_connector_router
|
||||
from .video_presentations_routes import router as video_presentations_router
|
||||
|
|
@ -108,7 +107,6 @@ router.include_router(new_llm_config_router) # LLM configs with prompt configur
|
|||
router.include_router(model_list_router) # Dynamic model catalogue from OpenRouter
|
||||
router.include_router(logs_router)
|
||||
router.include_router(circleback_webhook_router) # Circleback meeting webhooks
|
||||
router.include_router(surfsense_docs_router) # Surfsense documentation for citations
|
||||
router.include_router(notifications_router) # Notifications with Zero sync
|
||||
router.include_router(
|
||||
mcp_oauth_router
|
||||
|
|
|
|||
|
|
@ -1785,7 +1785,6 @@ async def handle_new_chat(
|
|||
user_id=str(user.id),
|
||||
llm_config_id=llm_config_id,
|
||||
mentioned_document_ids=request.mentioned_document_ids,
|
||||
mentioned_surfsense_doc_ids=request.mentioned_surfsense_doc_ids,
|
||||
mentioned_folder_ids=request.mentioned_folder_ids,
|
||||
mentioned_connector_ids=request.mentioned_connector_ids,
|
||||
mentioned_connectors=mentioned_connectors_payload,
|
||||
|
|
@ -2278,7 +2277,6 @@ async def regenerate_response(
|
|||
user_id=str(user.id),
|
||||
llm_config_id=llm_config_id,
|
||||
mentioned_document_ids=request.mentioned_document_ids,
|
||||
mentioned_surfsense_doc_ids=request.mentioned_surfsense_doc_ids,
|
||||
mentioned_folder_ids=request.mentioned_folder_ids,
|
||||
mentioned_connector_ids=request.mentioned_connector_ids,
|
||||
mentioned_connectors=mentioned_connectors_payload,
|
||||
|
|
|
|||
|
|
@ -1,172 +0,0 @@
|
|||
"""
|
||||
Routes for Surfsense documentation.
|
||||
|
||||
These endpoints support the citation system for Surfsense docs,
|
||||
allowing the frontend to fetch document details when a user clicks
|
||||
on a [citation:doc-XXX] link.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.orm import selectinload
|
||||
|
||||
from app.db import (
|
||||
SurfsenseDocsChunk,
|
||||
SurfsenseDocsDocument,
|
||||
User,
|
||||
get_async_session,
|
||||
)
|
||||
from app.schemas import PaginatedResponse
|
||||
from app.schemas.surfsense_docs import (
|
||||
SurfsenseDocsChunkRead,
|
||||
SurfsenseDocsDocumentRead,
|
||||
SurfsenseDocsDocumentWithChunksRead,
|
||||
)
|
||||
from app.users import current_active_user
|
||||
from app.utils.surfsense_docs import surfsense_docs_public_url
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get(
|
||||
"/surfsense-docs/by-chunk/{chunk_id}",
|
||||
response_model=SurfsenseDocsDocumentWithChunksRead,
|
||||
)
|
||||
async def get_surfsense_doc_by_chunk_id(
|
||||
chunk_id: int,
|
||||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
Retrieves a Surfsense documentation document based on a chunk ID.
|
||||
|
||||
This endpoint is used by the frontend to resolve [citation:doc-XXX] links.
|
||||
"""
|
||||
try:
|
||||
# Get the chunk
|
||||
chunk_result = await session.execute(
|
||||
select(SurfsenseDocsChunk).filter(SurfsenseDocsChunk.id == chunk_id)
|
||||
)
|
||||
chunk = chunk_result.scalars().first()
|
||||
|
||||
if not chunk:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"Surfsense docs chunk with id {chunk_id} not found",
|
||||
)
|
||||
|
||||
# Get the associated document with all its chunks
|
||||
document_result = await session.execute(
|
||||
select(SurfsenseDocsDocument)
|
||||
.options(selectinload(SurfsenseDocsDocument.chunks))
|
||||
.filter(SurfsenseDocsDocument.id == chunk.document_id)
|
||||
)
|
||||
document = document_result.scalars().first()
|
||||
|
||||
if not document:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="Surfsense docs document not found",
|
||||
)
|
||||
|
||||
# Sort chunks by ID
|
||||
sorted_chunks = sorted(document.chunks, key=lambda x: x.id)
|
||||
|
||||
return SurfsenseDocsDocumentWithChunksRead(
|
||||
id=document.id,
|
||||
title=document.title,
|
||||
source=document.source,
|
||||
public_url=surfsense_docs_public_url(document.source),
|
||||
content=document.content,
|
||||
chunks=[
|
||||
SurfsenseDocsChunkRead(id=c.id, content=c.content)
|
||||
for c in sorted_chunks
|
||||
],
|
||||
)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to retrieve Surfsense documentation: {e!s}",
|
||||
) from e
|
||||
|
||||
|
||||
@router.get(
|
||||
"/surfsense-docs",
|
||||
response_model=PaginatedResponse[SurfsenseDocsDocumentRead],
|
||||
)
|
||||
async def list_surfsense_docs(
|
||||
page: int = 0,
|
||||
page_size: int = 50,
|
||||
title: str | None = None,
|
||||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
List all Surfsense documentation documents.
|
||||
|
||||
Args:
|
||||
page: Zero-based page index.
|
||||
page_size: Number of items per page (default: 50).
|
||||
title: Optional title filter (case-insensitive substring match).
|
||||
session: Database session (injected).
|
||||
user: Current authenticated user (injected).
|
||||
|
||||
Returns:
|
||||
PaginatedResponse[SurfsenseDocsDocumentRead]: Paginated list of Surfsense docs.
|
||||
"""
|
||||
try:
|
||||
# Base query
|
||||
query = select(SurfsenseDocsDocument)
|
||||
count_query = select(func.count()).select_from(SurfsenseDocsDocument)
|
||||
|
||||
# Filter by title if provided
|
||||
if title and title.strip():
|
||||
query = query.filter(SurfsenseDocsDocument.title.ilike(f"%{title}%"))
|
||||
count_query = count_query.filter(
|
||||
SurfsenseDocsDocument.title.ilike(f"%{title}%")
|
||||
)
|
||||
|
||||
# Get total count
|
||||
total_result = await session.execute(count_query)
|
||||
total = total_result.scalar() or 0
|
||||
|
||||
# Calculate offset
|
||||
offset = page * page_size
|
||||
|
||||
# Get paginated results
|
||||
result = await session.execute(
|
||||
query.order_by(SurfsenseDocsDocument.title).offset(offset).limit(page_size)
|
||||
)
|
||||
docs = result.scalars().all()
|
||||
|
||||
# Convert to response format
|
||||
items = [
|
||||
SurfsenseDocsDocumentRead(
|
||||
id=doc.id,
|
||||
title=doc.title,
|
||||
source=doc.source,
|
||||
public_url=surfsense_docs_public_url(doc.source),
|
||||
content=doc.content,
|
||||
created_at=doc.created_at,
|
||||
updated_at=doc.updated_at,
|
||||
)
|
||||
for doc in docs
|
||||
]
|
||||
|
||||
has_more = (offset + len(items)) < total
|
||||
|
||||
return PaginatedResponse(
|
||||
items=items,
|
||||
total=total,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
has_more=has_more,
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to list Surfsense documentation: {e!s}",
|
||||
) from e
|
||||
|
|
@ -239,9 +239,6 @@ class NewChatRequest(BaseModel):
|
|||
mentioned_document_ids: list[int] | None = (
|
||||
None # Optional document IDs mentioned with @ in the chat
|
||||
)
|
||||
mentioned_surfsense_doc_ids: list[int] | None = (
|
||||
None # Optional SurfSense documentation IDs mentioned with @ in the chat
|
||||
)
|
||||
mentioned_folder_ids: list[int] | None = Field(
|
||||
default=None,
|
||||
description=(
|
||||
|
|
@ -326,7 +323,6 @@ class RegenerateRequest(BaseModel):
|
|||
None # New user query (for edit). None = reload with same query
|
||||
)
|
||||
mentioned_document_ids: list[int] | None = None
|
||||
mentioned_surfsense_doc_ids: list[int] | None = None
|
||||
mentioned_folder_ids: list[int] | None = Field(
|
||||
default=None,
|
||||
description=(
|
||||
|
|
|
|||
|
|
@ -1,43 +0,0 @@
|
|||
"""
|
||||
Schemas for Surfsense documentation.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
|
||||
class SurfsenseDocsChunkRead(BaseModel):
|
||||
"""Schema for a Surfsense docs chunk."""
|
||||
|
||||
id: int
|
||||
content: str
|
||||
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
|
||||
class SurfsenseDocsDocumentRead(BaseModel):
|
||||
"""Schema for a Surfsense docs document (without chunks)."""
|
||||
|
||||
id: int
|
||||
title: str
|
||||
source: str
|
||||
public_url: str
|
||||
content: str
|
||||
created_at: datetime | None = None
|
||||
updated_at: datetime | None = None
|
||||
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
|
||||
class SurfsenseDocsDocumentWithChunksRead(BaseModel):
|
||||
"""Schema for a Surfsense docs document with its chunks."""
|
||||
|
||||
id: int
|
||||
title: str
|
||||
source: str
|
||||
public_url: str
|
||||
content: str
|
||||
chunks: list[SurfsenseDocsChunkRead]
|
||||
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
|
@ -25,7 +25,6 @@ from uuid import UUID
|
|||
import anyio
|
||||
from langchain_core.messages import HumanMessage
|
||||
from sqlalchemy.future import select
|
||||
from sqlalchemy.orm import selectinload
|
||||
|
||||
from app.agents.multi_agent_chat import create_multi_agent_chat_deep_agent
|
||||
from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent
|
||||
|
|
@ -55,7 +54,6 @@ from app.db import (
|
|||
NewChatThread,
|
||||
Report,
|
||||
SearchSourceConnectorType,
|
||||
SurfsenseDocsDocument,
|
||||
async_session_maker,
|
||||
shielded_async_session,
|
||||
)
|
||||
|
|
@ -77,7 +75,6 @@ from app.tasks.chat.streaming.helpers.interrupt_inspector import (
|
|||
)
|
||||
from app.utils.content_utils import bootstrap_history_from_db
|
||||
from app.utils.perf import get_perf_logger, log_system_snapshot, trim_native_heap
|
||||
from app.utils.surfsense_docs import surfsense_docs_public_url
|
||||
from app.utils.user_message_multimodal import build_human_message_content
|
||||
|
||||
_background_tasks: set[asyncio.Task] = set()
|
||||
|
|
@ -198,58 +195,6 @@ def _extract_chunk_parts(chunk: Any) -> dict[str, Any]:
|
|||
return out
|
||||
|
||||
|
||||
def format_mentioned_surfsense_docs_as_context(
|
||||
documents: list[SurfsenseDocsDocument],
|
||||
) -> str:
|
||||
"""Format mentioned SurfSense documentation as context for the agent."""
|
||||
if not documents:
|
||||
return ""
|
||||
|
||||
context_parts = ["<mentioned_surfsense_docs>"]
|
||||
context_parts.append(
|
||||
"The user has explicitly mentioned the following SurfSense documentation pages. "
|
||||
"These are official documentation about how to use SurfSense and should be used to answer questions about the application. "
|
||||
"Use [citation:CHUNK_ID] format for citations (e.g., [citation:doc-123])."
|
||||
)
|
||||
|
||||
for doc in documents:
|
||||
public_url = surfsense_docs_public_url(doc.source)
|
||||
metadata_json = json.dumps(
|
||||
{"source": doc.source, "public_url": public_url}, ensure_ascii=False
|
||||
)
|
||||
|
||||
context_parts.append("<document>")
|
||||
context_parts.append("<document_metadata>")
|
||||
context_parts.append(f" <document_id>doc-{doc.id}</document_id>")
|
||||
context_parts.append(" <document_type>SURFSENSE_DOCS</document_type>")
|
||||
context_parts.append(f" <title><![CDATA[{doc.title}]]></title>")
|
||||
context_parts.append(f" <url><![CDATA[{public_url}]]></url>")
|
||||
context_parts.append(
|
||||
f" <metadata_json><![CDATA[{metadata_json}]]></metadata_json>"
|
||||
)
|
||||
context_parts.append("</document_metadata>")
|
||||
context_parts.append("")
|
||||
context_parts.append("<document_content>")
|
||||
|
||||
if hasattr(doc, "chunks") and doc.chunks:
|
||||
for chunk in doc.chunks:
|
||||
context_parts.append(
|
||||
f" <chunk id='doc-{chunk.id}'><![CDATA[{chunk.content}]]></chunk>"
|
||||
)
|
||||
else:
|
||||
context_parts.append(
|
||||
f" <chunk id='doc-0'><![CDATA[{doc.content}]]></chunk>"
|
||||
)
|
||||
|
||||
context_parts.append("</document_content>")
|
||||
context_parts.append("</document>")
|
||||
context_parts.append("")
|
||||
|
||||
context_parts.append("</mentioned_surfsense_docs>")
|
||||
|
||||
return "\n".join(context_parts)
|
||||
|
||||
|
||||
def extract_todos_from_deepagents(command_output) -> dict:
|
||||
"""
|
||||
Extract todos from deepagents' TodoListMiddleware Command output.
|
||||
|
|
@ -837,7 +782,6 @@ async def stream_new_chat(
|
|||
user_id: str | None = None,
|
||||
llm_config_id: int = -1,
|
||||
mentioned_document_ids: list[int] | None = None,
|
||||
mentioned_surfsense_doc_ids: list[int] | None = None,
|
||||
mentioned_folder_ids: list[int] | None = None,
|
||||
mentioned_connector_ids: list[int] | None = None,
|
||||
mentioned_connectors: list[dict[str, Any]] | None = None,
|
||||
|
|
@ -869,7 +813,6 @@ async def stream_new_chat(
|
|||
llm_config_id: The LLM configuration ID (default: -1 for first global config)
|
||||
needs_history_bootstrap: If True, load message history from DB (for cloned chats)
|
||||
mentioned_document_ids: Optional list of document IDs mentioned with @ in the chat
|
||||
mentioned_surfsense_doc_ids: Optional list of SurfSense doc IDs mentioned with @ in the chat
|
||||
mentioned_folder_ids: Optional list of knowledge-base folder IDs mentioned with @ (cloud mode)
|
||||
checkpoint_id: Optional checkpoint ID to rewind/fork from (for edit/reload operations)
|
||||
|
||||
|
|
@ -1295,19 +1238,7 @@ async def stream_new_chat(
|
|||
|
||||
# Mentioned KB documents are now handled by KnowledgeBaseSearchMiddleware
|
||||
# which merges them into the scoped filesystem with full document
|
||||
# structure. Only SurfSense docs and report context are inlined here.
|
||||
|
||||
# Fetch mentioned SurfSense docs if any
|
||||
mentioned_surfsense_docs: list[SurfsenseDocsDocument] = []
|
||||
if mentioned_surfsense_doc_ids:
|
||||
result = await session.execute(
|
||||
select(SurfsenseDocsDocument)
|
||||
.options(selectinload(SurfsenseDocsDocument.chunks))
|
||||
.filter(
|
||||
SurfsenseDocsDocument.id.in_(mentioned_surfsense_doc_ids),
|
||||
)
|
||||
)
|
||||
mentioned_surfsense_docs = list(result.scalars().all())
|
||||
# structure. Only report context is inlined here.
|
||||
|
||||
# Fetch the most recent report(s) in this thread so the LLM can
|
||||
# easily find report_id for versioning decisions, instead of
|
||||
|
|
@ -1341,10 +1272,7 @@ async def stream_new_chat(
|
|||
agent_user_query = user_query
|
||||
accepted_folder_ids: list[int] = []
|
||||
if fs_mode == FilesystemMode.CLOUD.value and (
|
||||
mentioned_document_ids
|
||||
or mentioned_surfsense_doc_ids
|
||||
or mentioned_folder_ids
|
||||
or mentioned_documents
|
||||
mentioned_document_ids or mentioned_folder_ids or mentioned_documents
|
||||
):
|
||||
from app.schemas.new_chat import (
|
||||
MentionedDocumentInfo as _MentionedDocumentInfo,
|
||||
|
|
@ -1370,23 +1298,17 @@ async def stream_new_chat(
|
|||
search_space_id=search_space_id,
|
||||
mentioned_documents=chip_objs,
|
||||
mentioned_document_ids=mentioned_document_ids,
|
||||
mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
|
||||
mentioned_folder_ids=mentioned_folder_ids,
|
||||
)
|
||||
agent_user_query = substitute_in_text(user_query, resolved.token_to_path)
|
||||
accepted_folder_ids = resolved.mentioned_folder_ids
|
||||
|
||||
# Format the user query with context (SurfSense docs + reports only).
|
||||
# Format the user query with context (reports only).
|
||||
# Uses ``agent_user_query`` so the LLM sees backtick-wrapped paths
|
||||
# instead of bare ``@title`` tokens.
|
||||
final_query = agent_user_query
|
||||
context_parts = []
|
||||
|
||||
if mentioned_surfsense_docs:
|
||||
context_parts.append(
|
||||
format_mentioned_surfsense_docs_as_context(mentioned_surfsense_docs)
|
||||
)
|
||||
|
||||
if mentioned_connectors:
|
||||
connector_lines = []
|
||||
for connector in mentioned_connectors:
|
||||
|
|
@ -1617,12 +1539,8 @@ async def stream_new_chat(
|
|||
stream_result.content_builder = AssistantContentBuilder()
|
||||
|
||||
# Initial thinking step - analyzing the request
|
||||
if mentioned_surfsense_docs:
|
||||
initial_title = "Analyzing referenced content"
|
||||
action_verb = "Analyzing"
|
||||
else:
|
||||
initial_title = "Understanding your request"
|
||||
action_verb = "Processing"
|
||||
initial_title = "Understanding your request"
|
||||
action_verb = "Processing"
|
||||
|
||||
processing_parts = []
|
||||
if user_query.strip():
|
||||
|
|
@ -1633,18 +1551,6 @@ async def stream_new_chat(
|
|||
else:
|
||||
processing_parts.append("(message)")
|
||||
|
||||
if mentioned_surfsense_docs:
|
||||
doc_names = []
|
||||
for doc in mentioned_surfsense_docs:
|
||||
title = doc.title
|
||||
if len(title) > 30:
|
||||
title = title[:27] + "..."
|
||||
doc_names.append(title)
|
||||
if len(doc_names) == 1:
|
||||
processing_parts.append(f"[{doc_names[0]}]")
|
||||
else:
|
||||
processing_parts.append(f"[{len(doc_names)} docs]")
|
||||
|
||||
initial_items = [f"{action_verb}: {' '.join(processing_parts)}"]
|
||||
initial_step_id = "thinking-1"
|
||||
|
||||
|
|
@ -1664,10 +1570,10 @@ async def stream_new_chat(
|
|||
items=initial_items,
|
||||
)
|
||||
|
||||
# These ORM objects (with eagerly-loaded chunks) can be very large.
|
||||
# They're only needed to build context strings already copied into
|
||||
# final_query / langchain_messages — release them before streaming.
|
||||
del mentioned_surfsense_docs, recent_reports
|
||||
# These ORM objects can be large. They're only needed to build context
|
||||
# strings already copied into final_query / langchain_messages —
|
||||
# release them before streaming.
|
||||
del recent_reports
|
||||
del langchain_messages, final_query
|
||||
|
||||
# Check if this is the first assistant response so we can generate
|
||||
|
|
|
|||
|
|
@ -1,15 +1,11 @@
|
|||
"""Pre-agent context shaping: mentioned-doc rendering and todos extraction."""
|
||||
"""Pre-agent context shaping: todos extraction."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from app.tasks.chat.streaming.context.deepagents_todos import (
|
||||
extract_todos_from_deepagents,
|
||||
)
|
||||
from app.tasks.chat.streaming.context.mentioned_docs import (
|
||||
format_mentioned_surfsense_docs_as_context,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"extract_todos_from_deepagents",
|
||||
"format_mentioned_surfsense_docs_as_context",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -1,58 +0,0 @@
|
|||
"""Render user-mentioned SurfSense docs as XML context for the agent."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from app.db import SurfsenseDocsDocument
|
||||
from app.utils.surfsense_docs import surfsense_docs_public_url
|
||||
|
||||
|
||||
def format_mentioned_surfsense_docs_as_context(
|
||||
documents: list[SurfsenseDocsDocument],
|
||||
) -> str:
|
||||
if not documents:
|
||||
return ""
|
||||
|
||||
context_parts = ["<mentioned_surfsense_docs>"]
|
||||
context_parts.append(
|
||||
"The user has explicitly mentioned the following SurfSense documentation pages. "
|
||||
"These are official documentation about how to use SurfSense and should be used to answer questions about the application. "
|
||||
"Use [citation:CHUNK_ID] format for citations (e.g., [citation:doc-123])."
|
||||
)
|
||||
|
||||
for doc in documents:
|
||||
public_url = surfsense_docs_public_url(doc.source)
|
||||
metadata_json = json.dumps(
|
||||
{"source": doc.source, "public_url": public_url}, ensure_ascii=False
|
||||
)
|
||||
|
||||
context_parts.append("<document>")
|
||||
context_parts.append("<document_metadata>")
|
||||
context_parts.append(f" <document_id>doc-{doc.id}</document_id>")
|
||||
context_parts.append(" <document_type>SURFSENSE_DOCS</document_type>")
|
||||
context_parts.append(f" <title><![CDATA[{doc.title}]]></title>")
|
||||
context_parts.append(f" <url><![CDATA[{public_url}]]></url>")
|
||||
context_parts.append(
|
||||
f" <metadata_json><![CDATA[{metadata_json}]]></metadata_json>"
|
||||
)
|
||||
context_parts.append("</document_metadata>")
|
||||
context_parts.append("")
|
||||
context_parts.append("<document_content>")
|
||||
|
||||
if hasattr(doc, "chunks") and doc.chunks:
|
||||
for chunk in doc.chunks:
|
||||
context_parts.append(
|
||||
f" <chunk id='doc-{chunk.id}'><![CDATA[{chunk.content}]]></chunk>"
|
||||
)
|
||||
else:
|
||||
context_parts.append(
|
||||
f" <chunk id='doc-0'><![CDATA[{doc.content}]]></chunk>"
|
||||
)
|
||||
|
||||
context_parts.append("</document_content>")
|
||||
context_parts.append("</document>")
|
||||
context_parts.append("")
|
||||
|
||||
context_parts.append("</mentioned_surfsense_docs>")
|
||||
return "\n".join(context_parts)
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
"""Build and emit the first ``thinking-1`` step for a new-chat turn.
|
||||
|
||||
The step title and "Processing X" items are derived from what the user sent
|
||||
(text snippet, image count, mentioned doc titles) so the FE can render a
|
||||
meaningful placeholder while the agent stream warms up.
|
||||
(text snippet, image count) so the FE can render a meaningful placeholder
|
||||
while the agent stream warms up.
|
||||
|
||||
``thinking-1`` is the canonical id for this step — every subsequent
|
||||
``thinking-N`` produced by ``stream_agent_events`` folds into the same
|
||||
|
|
@ -15,7 +15,6 @@ from collections.abc import Iterator
|
|||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
from app.db import SurfsenseDocsDocument
|
||||
from app.services.new_streaming_service import VercelStreamingService
|
||||
|
||||
|
||||
|
|
@ -37,14 +36,9 @@ def build_initial_thinking_step(
|
|||
*,
|
||||
user_query: str,
|
||||
user_image_data_urls: list[str] | None,
|
||||
mentioned_surfsense_docs: list[SurfsenseDocsDocument],
|
||||
) -> InitialThinkingStep:
|
||||
if mentioned_surfsense_docs:
|
||||
title = "Analyzing referenced content"
|
||||
action_verb = "Analyzing"
|
||||
else:
|
||||
title = "Understanding your request"
|
||||
action_verb = "Processing"
|
||||
title = "Understanding your request"
|
||||
action_verb = "Processing"
|
||||
|
||||
processing_parts: list[str] = []
|
||||
if user_query.strip():
|
||||
|
|
@ -55,18 +49,6 @@ def build_initial_thinking_step(
|
|||
else:
|
||||
processing_parts.append("(message)")
|
||||
|
||||
if mentioned_surfsense_docs:
|
||||
doc_names: list[str] = []
|
||||
for doc in mentioned_surfsense_docs:
|
||||
t = doc.title
|
||||
if len(t) > 30:
|
||||
t = t[:27] + "..."
|
||||
doc_names.append(t)
|
||||
if len(doc_names) == 1:
|
||||
processing_parts.append(f"[{doc_names[0]}]")
|
||||
else:
|
||||
processing_parts.append(f"[{len(doc_names)} docs]")
|
||||
|
||||
items = [f"{action_verb}: {' '.join(processing_parts)}"]
|
||||
return InitialThinkingStep(step_id="thinking-1", title=title, items=items)
|
||||
|
||||
|
|
|
|||
|
|
@ -5,20 +5,17 @@ Pipeline:
|
|||
1. **History bootstrap** — only for cloned chats with no LangGraph checkpoint
|
||||
yet; flips the per-thread ``needs_history_bootstrap`` flag back to False
|
||||
once the rows are loaded.
|
||||
2. **Mentioned SurfSense docs** — eager-load chunks so the formatter has the
|
||||
full content without a second roundtrip.
|
||||
3. **Recent reports** — top 3 by id desc with non-null content, so the LLM
|
||||
2. **Recent reports** — top 3 by id desc with non-null content, so the LLM
|
||||
can resolve ``report_id`` for versioning without spelunking history.
|
||||
4. **@-mention resolve** (cloud mode) — substitute ``@title`` tokens in the
|
||||
3. **@-mention resolve** (cloud mode) — substitute ``@title`` tokens in the
|
||||
query with canonical ``\`/documents/...\``` paths the LLM expects.
|
||||
5. **Context block render** — XML-wrap surfsense docs + reports, prepend to
|
||||
the rewritten query, optionally prefix with display name for SEARCH_SPACE
|
||||
4. **Context block render** — XML-wrap recent reports, prepend to the
|
||||
rewritten query, optionally prefix with display name for SEARCH_SPACE
|
||||
visibility.
|
||||
6. **HumanMessage** — multimodal content if images are attached.
|
||||
5. **HumanMessage** — multimodal content if images are attached.
|
||||
|
||||
Returns the assembled ``input_state`` dict plus side-channel data the
|
||||
orchestrator needs downstream (``accepted_folder_ids`` for runtime context;
|
||||
``mentioned_surfsense_docs`` for the initial thinking step).
|
||||
orchestrator needs downstream (``accepted_folder_ids`` for runtime context).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -30,7 +27,6 @@ from typing import Any
|
|||
from langchain_core.messages import HumanMessage
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.future import select
|
||||
from sqlalchemy.orm import selectinload
|
||||
|
||||
from app.agents.new_chat.filesystem_selection import FilesystemMode
|
||||
from app.agents.new_chat.mention_resolver import resolve_mentions, substitute_in_text
|
||||
|
|
@ -38,10 +34,6 @@ from app.db import (
|
|||
ChatVisibility,
|
||||
NewChatThread,
|
||||
Report,
|
||||
SurfsenseDocsDocument,
|
||||
)
|
||||
from app.tasks.chat.streaming.context.mentioned_docs import (
|
||||
format_mentioned_surfsense_docs_as_context,
|
||||
)
|
||||
from app.utils.content_utils import bootstrap_history_from_db
|
||||
from app.utils.user_message_multimodal import build_human_message_content
|
||||
|
|
@ -55,13 +47,10 @@ class NewChatInputState:
|
|||
|
||||
``input_state`` is fed straight to the agent. ``accepted_folder_ids``
|
||||
feeds the runtime context (the resolver may have dropped some chips).
|
||||
``mentioned_surfsense_docs`` is consumed by the initial thinking-step
|
||||
builder for the FE placeholder before the agent stream starts.
|
||||
"""
|
||||
|
||||
input_state: dict[str, Any]
|
||||
accepted_folder_ids: list[int]
|
||||
mentioned_surfsense_docs: list[SurfsenseDocsDocument]
|
||||
|
||||
|
||||
async def build_new_chat_input_state(
|
||||
|
|
@ -72,7 +61,6 @@ async def build_new_chat_input_state(
|
|||
user_query: str,
|
||||
user_image_data_urls: list[str] | None,
|
||||
mentioned_document_ids: list[int] | None,
|
||||
mentioned_surfsense_doc_ids: list[int] | None,
|
||||
mentioned_folder_ids: list[int] | None,
|
||||
mentioned_documents: list[dict[str, Any]] | None,
|
||||
needs_history_bootstrap: bool,
|
||||
|
|
@ -96,15 +84,6 @@ async def build_new_chat_input_state(
|
|||
thread.needs_history_bootstrap = False
|
||||
await session.commit()
|
||||
|
||||
mentioned_surfsense_docs: list[SurfsenseDocsDocument] = []
|
||||
if mentioned_surfsense_doc_ids:
|
||||
result = await session.execute(
|
||||
select(SurfsenseDocsDocument)
|
||||
.options(selectinload(SurfsenseDocsDocument.chunks))
|
||||
.filter(SurfsenseDocsDocument.id.in_(mentioned_surfsense_doc_ids))
|
||||
)
|
||||
mentioned_surfsense_docs = list(result.scalars().all())
|
||||
|
||||
# Top 3 reports keyed by id desc (newest first) with content present,
|
||||
# surfaced inline so the LLM resolves ``report_id`` for versioning without
|
||||
# digging through conversation history.
|
||||
|
|
@ -125,14 +104,12 @@ async def build_new_chat_input_state(
|
|||
user_query=user_query,
|
||||
filesystem_mode=filesystem_mode,
|
||||
mentioned_document_ids=mentioned_document_ids,
|
||||
mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
|
||||
mentioned_folder_ids=mentioned_folder_ids,
|
||||
mentioned_documents=mentioned_documents,
|
||||
)
|
||||
|
||||
final_query = _render_query_with_context(
|
||||
agent_user_query=agent_user_query,
|
||||
mentioned_surfsense_docs=mentioned_surfsense_docs,
|
||||
recent_reports=recent_reports,
|
||||
)
|
||||
|
||||
|
|
@ -154,7 +131,6 @@ async def build_new_chat_input_state(
|
|||
return NewChatInputState(
|
||||
input_state=input_state,
|
||||
accepted_folder_ids=accepted_folder_ids,
|
||||
mentioned_surfsense_docs=mentioned_surfsense_docs,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -165,7 +141,6 @@ async def _resolve_mentions_for_query(
|
|||
user_query: str,
|
||||
filesystem_mode: str,
|
||||
mentioned_document_ids: list[int] | None,
|
||||
mentioned_surfsense_doc_ids: list[int] | None,
|
||||
mentioned_folder_ids: list[int] | None,
|
||||
mentioned_documents: list[dict[str, Any]] | None,
|
||||
) -> tuple[str, list[int]]:
|
||||
|
|
@ -187,10 +162,7 @@ async def _resolve_mentions_for_query(
|
|||
accepted_folder_ids: list[int] = []
|
||||
|
||||
has_any_mention = bool(
|
||||
mentioned_document_ids
|
||||
or mentioned_surfsense_doc_ids
|
||||
or mentioned_folder_ids
|
||||
or mentioned_documents
|
||||
mentioned_document_ids or mentioned_folder_ids or mentioned_documents
|
||||
)
|
||||
if filesystem_mode != FilesystemMode.CLOUD.value or not has_any_mention:
|
||||
return agent_user_query, accepted_folder_ids
|
||||
|
|
@ -214,7 +186,6 @@ async def _resolve_mentions_for_query(
|
|||
search_space_id=search_space_id,
|
||||
mentioned_documents=chip_objs,
|
||||
mentioned_document_ids=mentioned_document_ids,
|
||||
mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
|
||||
mentioned_folder_ids=mentioned_folder_ids,
|
||||
)
|
||||
agent_user_query = substitute_in_text(user_query, resolved.token_to_path)
|
||||
|
|
@ -225,17 +196,11 @@ async def _resolve_mentions_for_query(
|
|||
def _render_query_with_context(
|
||||
*,
|
||||
agent_user_query: str,
|
||||
mentioned_surfsense_docs: list[SurfsenseDocsDocument],
|
||||
recent_reports: list[Report],
|
||||
) -> str:
|
||||
"""Prepend surfsense-docs + recent-reports XML blocks to the user query."""
|
||||
"""Prepend recent-reports XML block to the user query."""
|
||||
context_parts: list[str] = []
|
||||
|
||||
if mentioned_surfsense_docs:
|
||||
context_parts.append(
|
||||
format_mentioned_surfsense_docs_as_context(mentioned_surfsense_docs)
|
||||
)
|
||||
|
||||
if recent_reports:
|
||||
report_lines: list[str] = []
|
||||
for r in recent_reports:
|
||||
|
|
|
|||
|
|
@ -123,7 +123,6 @@ async def stream_new_chat(
|
|||
user_id: str | None = None,
|
||||
llm_config_id: int = -1,
|
||||
mentioned_document_ids: list[int] | None = None,
|
||||
mentioned_surfsense_doc_ids: list[int] | None = None,
|
||||
mentioned_folder_ids: list[int] | None = None,
|
||||
mentioned_documents: list[dict[str, Any]] | None = None,
|
||||
checkpoint_id: str | None = None,
|
||||
|
|
@ -435,7 +434,6 @@ async def stream_new_chat(
|
|||
user_query=user_query,
|
||||
user_image_data_urls=user_image_data_urls,
|
||||
mentioned_document_ids=mentioned_document_ids,
|
||||
mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
|
||||
mentioned_folder_ids=mentioned_folder_ids,
|
||||
mentioned_documents=mentioned_documents,
|
||||
needs_history_bootstrap=needs_history_bootstrap,
|
||||
|
|
@ -447,7 +445,6 @@ async def stream_new_chat(
|
|||
)
|
||||
input_state = assembled.input_state
|
||||
accepted_folder_ids = assembled.accepted_folder_ids
|
||||
mentioned_surfsense_docs = assembled.mentioned_surfsense_docs
|
||||
_perf_log.info(
|
||||
"[stream_new_chat] History bootstrap + doc/report queries in %.3fs",
|
||||
time.perf_counter() - _t0,
|
||||
|
|
@ -560,7 +557,6 @@ async def stream_new_chat(
|
|||
initial_step = build_initial_thinking_step(
|
||||
user_query=user_query,
|
||||
user_image_data_urls=user_image_data_urls,
|
||||
mentioned_surfsense_docs=mentioned_surfsense_docs,
|
||||
)
|
||||
for sse in iter_initial_thinking_step_frame(
|
||||
initial_step,
|
||||
|
|
@ -575,7 +571,7 @@ async def stream_new_chat(
|
|||
# Drop the heavy ORM objects + the container that holds them so they
|
||||
# aren't retained for the entire streaming duration. ``input_state``
|
||||
# already carries the langchain_messages list independently.
|
||||
del assembled, mentioned_surfsense_docs
|
||||
del assembled
|
||||
|
||||
title_task = spawn_title_task(
|
||||
chat_id=chat_id,
|
||||
|
|
|
|||
|
|
@ -1,249 +0,0 @@
|
|||
"""
|
||||
Surfsense documentation indexer.
|
||||
Indexes MDX documentation files at startup.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import re
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
|
||||
from sqlalchemy import delete as sa_delete, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.orm import selectinload
|
||||
from sqlalchemy.orm.attributes import set_committed_value
|
||||
|
||||
from app.config import config
|
||||
from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument, async_session_maker
|
||||
from app.utils.document_converters import embed_text
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def _safe_set_docs_chunks(
|
||||
session: AsyncSession, document: SurfsenseDocsDocument, chunks: list
|
||||
) -> None:
|
||||
"""safe_set_chunks variant for the SurfsenseDocsDocument/Chunk models."""
|
||||
if document.id is not None:
|
||||
await session.execute(
|
||||
sa_delete(SurfsenseDocsChunk).where(
|
||||
SurfsenseDocsChunk.document_id == document.id
|
||||
)
|
||||
)
|
||||
for chunk in chunks:
|
||||
chunk.document_id = document.id
|
||||
|
||||
set_committed_value(document, "chunks", chunks)
|
||||
session.add_all(chunks)
|
||||
|
||||
|
||||
# Path to docs relative to project root
|
||||
DOCS_DIR = (
|
||||
Path(__file__).resolve().parent.parent.parent.parent
|
||||
/ "surfsense_web"
|
||||
/ "content"
|
||||
/ "docs"
|
||||
)
|
||||
|
||||
|
||||
def parse_mdx_frontmatter(content: str) -> tuple[str, str]:
|
||||
"""
|
||||
Parse MDX file to extract frontmatter title and content.
|
||||
|
||||
Args:
|
||||
content: Raw MDX file content
|
||||
|
||||
Returns:
|
||||
Tuple of (title, content_without_frontmatter)
|
||||
"""
|
||||
# Match frontmatter between --- markers
|
||||
frontmatter_pattern = r"^---\s*\n(.*?)\n---\s*\n"
|
||||
match = re.match(frontmatter_pattern, content, re.DOTALL)
|
||||
|
||||
if match:
|
||||
frontmatter = match.group(1)
|
||||
content_without_frontmatter = content[match.end() :]
|
||||
|
||||
# Extract title from frontmatter
|
||||
title_match = re.search(r"^title:\s*(.+)$", frontmatter, re.MULTILINE)
|
||||
title = title_match.group(1).strip() if title_match else "Untitled"
|
||||
|
||||
# Remove quotes if present
|
||||
title = title.strip("\"'")
|
||||
|
||||
return title, content_without_frontmatter.strip()
|
||||
|
||||
return "Untitled", content.strip()
|
||||
|
||||
|
||||
def get_all_mdx_files() -> list[Path]:
|
||||
"""
|
||||
Get all MDX files from the docs directory.
|
||||
|
||||
Returns:
|
||||
List of Path objects for each MDX file
|
||||
"""
|
||||
if not DOCS_DIR.exists():
|
||||
logger.warning(f"Docs directory not found: {DOCS_DIR}")
|
||||
return []
|
||||
|
||||
return list(DOCS_DIR.rglob("*.mdx"))
|
||||
|
||||
|
||||
def generate_surfsense_docs_content_hash(content: str) -> str:
|
||||
"""Generate SHA-256 hash for Surfsense docs content."""
|
||||
return hashlib.sha256(content.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def create_surfsense_docs_chunks(content: str) -> list[SurfsenseDocsChunk]:
|
||||
"""
|
||||
Create chunks from Surfsense documentation content.
|
||||
|
||||
Args:
|
||||
content: Document content to chunk
|
||||
|
||||
Returns:
|
||||
List of SurfsenseDocsChunk objects with embeddings
|
||||
"""
|
||||
return [
|
||||
SurfsenseDocsChunk(
|
||||
content=chunk.text,
|
||||
embedding=embed_text(chunk.text),
|
||||
)
|
||||
for chunk in config.chunker_instance.chunk(content)
|
||||
]
|
||||
|
||||
|
||||
async def index_surfsense_docs(session: AsyncSession) -> tuple[int, int, int, int]:
|
||||
"""
|
||||
Index all Surfsense documentation files.
|
||||
|
||||
Args:
|
||||
session: SQLAlchemy async session
|
||||
|
||||
Returns:
|
||||
Tuple of (created, updated, skipped, deleted) counts
|
||||
"""
|
||||
created = 0
|
||||
updated = 0
|
||||
skipped = 0
|
||||
deleted = 0
|
||||
|
||||
# Get all existing docs from database
|
||||
existing_docs_result = await session.execute(
|
||||
select(SurfsenseDocsDocument).options(
|
||||
selectinload(SurfsenseDocsDocument.chunks)
|
||||
)
|
||||
)
|
||||
existing_docs = {doc.source: doc for doc in existing_docs_result.scalars().all()}
|
||||
|
||||
# Track which sources we've processed
|
||||
processed_sources = set()
|
||||
|
||||
# Get all MDX files
|
||||
mdx_files = get_all_mdx_files()
|
||||
logger.info(f"Found {len(mdx_files)} MDX files to index")
|
||||
|
||||
for mdx_file in mdx_files:
|
||||
try:
|
||||
source = str(mdx_file.relative_to(DOCS_DIR))
|
||||
processed_sources.add(source)
|
||||
|
||||
# Read file content
|
||||
raw_content = mdx_file.read_text(encoding="utf-8")
|
||||
title, content = parse_mdx_frontmatter(raw_content)
|
||||
content_hash = generate_surfsense_docs_content_hash(raw_content)
|
||||
|
||||
if source in existing_docs:
|
||||
existing_doc = existing_docs[source]
|
||||
|
||||
# Check if content changed
|
||||
if existing_doc.content_hash == content_hash:
|
||||
logger.debug(f"Skipping unchanged: {source}")
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
# Content changed - update document
|
||||
logger.info(f"Updating changed document: {source}")
|
||||
|
||||
# Create new chunks
|
||||
chunks = create_surfsense_docs_chunks(content)
|
||||
|
||||
# Update document fields
|
||||
existing_doc.title = title
|
||||
existing_doc.content = content
|
||||
existing_doc.content_hash = content_hash
|
||||
existing_doc.embedding = embed_text(content)
|
||||
await _safe_set_docs_chunks(session, existing_doc, chunks)
|
||||
existing_doc.updated_at = datetime.now(UTC)
|
||||
|
||||
updated += 1
|
||||
else:
|
||||
# New document - create it
|
||||
logger.info(f"Creating new document: {source}")
|
||||
|
||||
chunks = create_surfsense_docs_chunks(content)
|
||||
|
||||
document = SurfsenseDocsDocument(
|
||||
source=source,
|
||||
title=title,
|
||||
content=content,
|
||||
content_hash=content_hash,
|
||||
embedding=embed_text(content),
|
||||
chunks=chunks,
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
created += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing {mdx_file}: {e}", exc_info=True)
|
||||
continue
|
||||
|
||||
# Delete documents for removed files
|
||||
for source, doc in existing_docs.items():
|
||||
if source not in processed_sources:
|
||||
logger.info(f"Deleting removed document: {source}")
|
||||
await session.delete(doc)
|
||||
deleted += 1
|
||||
|
||||
# Commit all changes
|
||||
await session.commit()
|
||||
|
||||
logger.info(
|
||||
f"Indexing complete: {created} created, {updated} updated, "
|
||||
f"{skipped} skipped, {deleted} deleted"
|
||||
)
|
||||
|
||||
return created, updated, skipped, deleted
|
||||
|
||||
|
||||
async def seed_surfsense_docs() -> tuple[int, int, int, int]:
|
||||
"""
|
||||
Seed Surfsense documentation into the database.
|
||||
|
||||
This function indexes all MDX files from the docs directory.
|
||||
It handles creating, updating, and deleting docs based on content changes.
|
||||
|
||||
Returns:
|
||||
Tuple of (created, updated, skipped, deleted) counts
|
||||
Returns (0, 0, 0, 0) if an error occurs
|
||||
"""
|
||||
logger.info("Starting Surfsense docs indexing...")
|
||||
|
||||
try:
|
||||
async with async_session_maker() as session:
|
||||
created, updated, skipped, deleted = await index_surfsense_docs(session)
|
||||
|
||||
logger.info(
|
||||
f"Surfsense docs indexing complete: "
|
||||
f"created={created}, updated={updated}, skipped={skipped}, deleted={deleted}"
|
||||
)
|
||||
|
||||
return created, updated, skipped, deleted
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to seed Surfsense docs: {e}", exc_info=True)
|
||||
return 0, 0, 0, 0
|
||||
|
|
@ -1,13 +0,0 @@
|
|||
"""Utilities for SurfSense's built-in documentation index."""
|
||||
|
||||
from pathlib import PurePosixPath
|
||||
|
||||
DOCS_PUBLIC_ROOT = PurePosixPath("/docs")
|
||||
|
||||
|
||||
def surfsense_docs_public_url(source: str) -> str:
|
||||
"""Return the public docs route for an indexed documentation source path."""
|
||||
docs_path = PurePosixPath(source).with_suffix("")
|
||||
if docs_path.name == "index":
|
||||
docs_path = docs_path.parent
|
||||
return (DOCS_PUBLIC_ROOT / docs_path).as_posix()
|
||||
|
|
@ -1,40 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
"""
|
||||
Seed Surfsense documentation into the database.
|
||||
|
||||
CLI wrapper for the seed_surfsense_docs function.
|
||||
Can be run manually for debugging or re-indexing.
|
||||
|
||||
Usage:
|
||||
python scripts/seed_surfsense_docs.py
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add the parent directory to the path so we can import app modules
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
||||
|
||||
from app.tasks.surfsense_docs_indexer import seed_surfsense_docs
|
||||
|
||||
|
||||
def main():
|
||||
"""CLI entry point for seeding Surfsense docs."""
|
||||
print("=" * 50)
|
||||
print(" Surfsense Documentation Seeding")
|
||||
print("=" * 50)
|
||||
|
||||
created, updated, skipped, deleted = asyncio.run(seed_surfsense_docs())
|
||||
|
||||
print()
|
||||
print("Results:")
|
||||
print(f" Created: {created}")
|
||||
print(f" Updated: {updated}")
|
||||
print(f" Skipped: {skipped}")
|
||||
print(f" Deleted: {deleted}")
|
||||
print("=" * 50)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -60,7 +60,6 @@ class TestReadOnlyToolsAllowed:
|
|||
"glob",
|
||||
"web_search",
|
||||
"scrape_webpage",
|
||||
"search_surfsense_docs",
|
||||
"get_connected_accounts",
|
||||
"write_todos",
|
||||
"task",
|
||||
|
|
|
|||
|
|
@ -22,12 +22,6 @@ from app.agents.new_chat.subagents.config import (
|
|||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@tool
|
||||
def search_surfsense_docs(query: str) -> str:
|
||||
"""Search the user's KB."""
|
||||
return ""
|
||||
|
||||
|
||||
@tool
|
||||
def web_search(query: str) -> str:
|
||||
"""Search the public web."""
|
||||
|
|
@ -95,7 +89,6 @@ def generate_report(topic: str) -> str:
|
|||
|
||||
|
||||
ALL_TOOLS = [
|
||||
search_surfsense_docs,
|
||||
web_search,
|
||||
scrape_webpage,
|
||||
read_file,
|
||||
|
|
@ -161,7 +154,7 @@ class TestReportWriterSubagent:
|
|||
names = {t.name for t in spec["tools"]} # type: ignore[index]
|
||||
assert names == REPORT_WRITER_TOOLS & {t.name for t in ALL_TOOLS}
|
||||
assert "generate_report" in names
|
||||
assert "search_surfsense_docs" in names
|
||||
assert "read_file" in names
|
||||
|
||||
def test_deny_rules_block_writes_but_allow_generate_report(self) -> None:
|
||||
spec = build_report_writer_subagent(tools=ALL_TOOLS)
|
||||
|
|
@ -272,9 +265,9 @@ class TestFilterToolsWarningSuppression:
|
|||
# Allowed set asks for two registry tools (one present, one
|
||||
# not) plus a bunch of middleware-provided names.
|
||||
_filter_tools(
|
||||
[search_surfsense_docs],
|
||||
[web_search],
|
||||
allowed_names={
|
||||
"search_surfsense_docs",
|
||||
"web_search",
|
||||
"scrape_webpage", # legitimately missing → should warn
|
||||
"read_file", # mw-provided → suppressed
|
||||
"ls",
|
||||
|
|
@ -322,7 +315,6 @@ class TestDenyPatternsCoverage:
|
|||
|
||||
def test_deny_patterns_do_not_match_safe_read_tools(self) -> None:
|
||||
canonical_reads = [
|
||||
"search_surfsense_docs",
|
||||
"read_file",
|
||||
"ls_tree",
|
||||
"grep",
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@ from __future__ import annotations
|
|||
|
||||
import asyncio
|
||||
import inspect
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
|
|
@ -140,45 +139,28 @@ def test_orchestrators_are_async_generator_functions() -> None:
|
|||
# ------------------------------------------------------------ initial thinking
|
||||
|
||||
|
||||
@dataclass
|
||||
class _FakeSurfsenseDoc:
|
||||
"""Stand-in for ``SurfsenseDocsDocument`` with just the field we read."""
|
||||
|
||||
title: str
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"user_query, image_urls, docs, expected_title, expected_action",
|
||||
"user_query, image_urls, expected_title, expected_action",
|
||||
[
|
||||
("hello world", None, [], "Understanding your request", "Processing"),
|
||||
("hello world", None, "Understanding your request", "Processing"),
|
||||
(
|
||||
"",
|
||||
["data:image/png;base64,AAA"],
|
||||
[],
|
||||
"Understanding your request",
|
||||
"Processing",
|
||||
),
|
||||
("", None, [], "Understanding your request", "Processing"),
|
||||
(
|
||||
"doc question",
|
||||
None,
|
||||
[_FakeSurfsenseDoc(title="My Doc")],
|
||||
"Analyzing referenced content",
|
||||
"Analyzing",
|
||||
),
|
||||
("", None, "Understanding your request", "Processing"),
|
||||
],
|
||||
)
|
||||
def test_initial_thinking_step_branches(
|
||||
user_query: str,
|
||||
image_urls: list[str] | None,
|
||||
docs: list[Any],
|
||||
expected_title: str,
|
||||
expected_action: str,
|
||||
) -> None:
|
||||
step = build_initial_thinking_step(
|
||||
user_query=user_query,
|
||||
user_image_data_urls=image_urls,
|
||||
mentioned_surfsense_docs=docs, # type: ignore[arg-type]
|
||||
)
|
||||
assert step.step_id == "thinking-1"
|
||||
assert step.title == expected_title
|
||||
|
|
@ -191,7 +173,6 @@ def test_initial_thinking_step_truncates_long_query() -> None:
|
|||
step = build_initial_thinking_step(
|
||||
user_query=long_query,
|
||||
user_image_data_urls=None,
|
||||
mentioned_surfsense_docs=[],
|
||||
)
|
||||
# 80-char truncation + ellipsis, sandwiched after "Processing: ".
|
||||
assert "..." in step.items[0]
|
||||
|
|
@ -200,16 +181,6 @@ def test_initial_thinking_step_truncates_long_query() -> None:
|
|||
assert payload.startswith("x" * 80) and payload.endswith("...")
|
||||
|
||||
|
||||
def test_initial_thinking_step_collapses_many_doc_names() -> None:
|
||||
docs = [_FakeSurfsenseDoc(title=f"Doc {i}") for i in range(5)]
|
||||
step = build_initial_thinking_step(
|
||||
user_query="q",
|
||||
user_image_data_urls=None,
|
||||
mentioned_surfsense_docs=docs, # type: ignore[arg-type]
|
||||
)
|
||||
assert "[5 docs]" in step.items[0]
|
||||
|
||||
|
||||
# ------------------------------------------------------------ capability gate
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -89,10 +89,10 @@ function removeFirstToken(text: string, token: string): string {
|
|||
|
||||
/**
|
||||
* Task input that reuses the chat ``@`` mention experience -- the same
|
||||
* ``InlineMentionEditor`` + ``DocumentMentionPicker`` as the composer, minus
|
||||
* SurfSense product docs. The editor is the source of truth while mounted;
|
||||
* ``onChange`` reports both the plain text (chips rendered as ``@Title``) and
|
||||
* the structured mention list so the builder can persist IDs for the run.
|
||||
* ``InlineMentionEditor`` + ``DocumentMentionPicker`` as the composer. The
|
||||
* editor is the source of truth while mounted; ``onChange`` reports both the
|
||||
* plain text (chips rendered as ``@Title``) and the structured mention list
|
||||
* so the builder can persist IDs for the run.
|
||||
*/
|
||||
export function MentionTaskInput({
|
||||
searchSpaceId,
|
||||
|
|
@ -233,7 +233,6 @@ export function MentionTaskInput({
|
|||
<DocumentMentionPicker
|
||||
ref={pickerRef}
|
||||
searchSpaceId={searchSpaceId}
|
||||
includeSurfsenseDocs={false}
|
||||
onSelectionChange={handleSelection}
|
||||
onDone={closePopover}
|
||||
initialSelectedDocuments={mentions}
|
||||
|
|
|
|||
|
|
@ -740,15 +740,6 @@ export default function NewChatPage() {
|
|||
queryFn: () => documentsApiService.searchDocumentTitles({ queryParams: prefetchParams }),
|
||||
staleTime: 60 * 1000,
|
||||
});
|
||||
|
||||
queryClient.prefetchQuery({
|
||||
queryKey: ["surfsense-docs-mention", "", false],
|
||||
queryFn: () =>
|
||||
documentsApiService.getSurfsenseDocs({
|
||||
queryParams: { page: 0, page_size: 20 },
|
||||
}),
|
||||
staleTime: 3 * 60 * 1000,
|
||||
});
|
||||
}, [searchSpaceId, queryClient]);
|
||||
|
||||
// Handle scroll to comment from URL query params (e.g., from inbox item click)
|
||||
|
|
@ -949,7 +940,6 @@ export default function NewChatPage() {
|
|||
trackChatMessageSent(searchSpaceId, currentThreadId, {
|
||||
hasAttachments: userImages.length > 0,
|
||||
hasMentionedDocuments:
|
||||
mentionedDocumentIds.surfsense_doc_ids.length > 0 ||
|
||||
mentionedDocumentIds.document_ids.length > 0 ||
|
||||
mentionedDocumentIds.folder_ids.length > 0 ||
|
||||
mentionedDocumentIds.connector_ids.length > 0,
|
||||
|
|
@ -1027,12 +1017,11 @@ export default function NewChatPage() {
|
|||
|
||||
// Get mentioned document IDs for context (separate fields for backend)
|
||||
const hasDocumentIds = mentionedDocumentIds.document_ids.length > 0;
|
||||
const hasSurfsenseDocIds = mentionedDocumentIds.surfsense_doc_ids.length > 0;
|
||||
const hasFolderIds = mentionedDocumentIds.folder_ids.length > 0;
|
||||
const hasConnectorIds = mentionedDocumentIds.connector_ids.length > 0;
|
||||
|
||||
// Clear mentioned documents after capturing them
|
||||
if (hasDocumentIds || hasSurfsenseDocIds || hasFolderIds || hasConnectorIds) {
|
||||
if (hasDocumentIds || hasFolderIds || hasConnectorIds) {
|
||||
setMentionedDocuments([]);
|
||||
}
|
||||
|
||||
|
|
@ -1054,9 +1043,6 @@ export default function NewChatPage() {
|
|||
mentioned_document_ids: hasDocumentIds
|
||||
? mentionedDocumentIds.document_ids
|
||||
: undefined,
|
||||
mentioned_surfsense_doc_ids: hasSurfsenseDocIds
|
||||
? mentionedDocumentIds.surfsense_doc_ids
|
||||
: undefined,
|
||||
mentioned_folder_ids: hasFolderIds ? mentionedDocumentIds.folder_ids : undefined,
|
||||
mentioned_connector_ids: hasConnectorIds
|
||||
? mentionedDocumentIds.connector_ids
|
||||
|
|
@ -1947,18 +1933,14 @@ export default function NewChatPage() {
|
|||
const selection = await getAgentFilesystemSelection(searchSpaceId, {
|
||||
localFilesystemEnabled,
|
||||
});
|
||||
// Partition the source mentions back into doc/surfsense_doc/folder
|
||||
// id buckets so the regenerate route can pass them to
|
||||
// ``stream_new_chat`` and the priority middleware sees the
|
||||
// same ``[USER-MENTIONED]`` priority entries the original
|
||||
// turn did. Without this partition the regenerate flow
|
||||
// silently dropped the agent's mention awareness — same
|
||||
// architectural bug we fixed on the new-chat path.
|
||||
const regenerateSurfsenseDocIds = sourceMentionedDocs
|
||||
.filter((d) => d.kind === "doc" && d.document_type === "SURFSENSE_DOCS")
|
||||
.map((d) => d.id);
|
||||
// Partition the source mentions back into doc/folder id buckets
|
||||
// so the regenerate route can pass them to ``stream_new_chat``
|
||||
// and the priority middleware sees the same ``[USER-MENTIONED]``
|
||||
// priority entries the original turn did. Without this partition
|
||||
// the regenerate flow silently dropped the agent's mention
|
||||
// awareness — same architectural bug we fixed on the new-chat path.
|
||||
const regenerateDocIds = sourceMentionedDocs
|
||||
.filter((d) => d.kind === "doc" && d.document_type !== "SURFSENSE_DOCS")
|
||||
.filter((d) => d.kind === "doc")
|
||||
.map((d) => d.id);
|
||||
const regenerateFolderIds = sourceMentionedDocs
|
||||
.filter((d) => d.kind === "folder")
|
||||
|
|
@ -1973,8 +1955,6 @@ export default function NewChatPage() {
|
|||
client_platform: selection.client_platform,
|
||||
local_filesystem_mounts: selection.local_filesystem_mounts,
|
||||
mentioned_document_ids: regenerateDocIds.length > 0 ? regenerateDocIds : undefined,
|
||||
mentioned_surfsense_doc_ids:
|
||||
regenerateSurfsenseDocIds.length > 0 ? regenerateSurfsenseDocIds : undefined,
|
||||
mentioned_folder_ids: regenerateFolderIds.length > 0 ? regenerateFolderIds : undefined,
|
||||
mentioned_connector_ids:
|
||||
regenerateConnectors.length > 0 ? regenerateConnectors.map((d) => d.id) : undefined,
|
||||
|
|
|
|||
|
|
@ -102,10 +102,7 @@ export const mentionedDocumentIdsAtom = atom((get) => {
|
|||
const folders = deduped.filter((m) => m.kind === "folder");
|
||||
const connectors = deduped.filter((m) => m.kind === "connector");
|
||||
return {
|
||||
surfsense_doc_ids: docs
|
||||
.filter((doc) => doc.document_type === "SURFSENSE_DOCS")
|
||||
.map((doc) => doc.id),
|
||||
document_ids: docs.filter((doc) => doc.document_type !== "SURFSENSE_DOCS").map((doc) => doc.id),
|
||||
document_ids: docs.map((doc) => doc.id),
|
||||
folder_ids: folders.map((f) => f.id),
|
||||
connector_ids: connectors.map((c) => c.id),
|
||||
connectors: connectors.map((c) => ({
|
||||
|
|
|
|||
|
|
@ -1,16 +1,13 @@
|
|||
"use client";
|
||||
|
||||
import { useQuery } from "@tanstack/react-query";
|
||||
import { useSetAtom } from "jotai";
|
||||
import { ExternalLink, FileText } from "lucide-react";
|
||||
import dynamic from "next/dynamic";
|
||||
import { FileText } from "lucide-react";
|
||||
import type { FC } from "react";
|
||||
import { useState } from "react";
|
||||
import { openCitationPanelAtom } from "@/atoms/citation/citation-panel.atom";
|
||||
import { useCitationMetadata } from "@/components/assistant-ui/citation-metadata-context";
|
||||
import { CitationPanelContent } from "@/components/citation-panel/citation-panel";
|
||||
import { Citation } from "@/components/tool-ui/citation";
|
||||
import { CitationHoverPopover } from "@/components/tool-ui/citation/citation-hover-popover";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import {
|
||||
Drawer,
|
||||
|
|
@ -19,21 +16,8 @@ import {
|
|||
DrawerHeader,
|
||||
DrawerTitle,
|
||||
} from "@/components/ui/drawer";
|
||||
import { Spinner } from "@/components/ui/spinner";
|
||||
import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
|
||||
import { useMediaQuery } from "@/hooks/use-media-query";
|
||||
import { documentsApiService } from "@/lib/apis/documents-api.service";
|
||||
import { cacheKeys } from "@/lib/query-client/cache-keys";
|
||||
|
||||
// Lazily load MarkdownViewer here to break the static import cycle:
|
||||
// `markdown-viewer.tsx` → `citation-renderer.tsx` → `inline-citation.tsx`
|
||||
// would otherwise pull `markdown-viewer.tsx` back in at module-init time.
|
||||
// Only `SurfsenseDocCitation` (popover body) ever renders this viewer, so
|
||||
// the lazy boundary is invisible to most call paths.
|
||||
const MarkdownViewer = dynamic(
|
||||
() => import("@/components/markdown-viewer").then((m) => m.MarkdownViewer),
|
||||
{ ssr: false, loading: () => <Spinner size="xs" /> }
|
||||
);
|
||||
|
||||
interface InlineCitationProps {
|
||||
chunkId: number;
|
||||
|
|
@ -41,9 +25,7 @@ interface InlineCitationProps {
|
|||
}
|
||||
|
||||
/**
|
||||
* Inline citation badge for knowledge-base chunks (numeric chunk IDs) and
|
||||
* Surfsense documentation chunks (`isDocsChunk`). Negative chunk IDs render as
|
||||
* a static "doc" pill (anonymous/synthetic uploads).
|
||||
* Inline citation badge for knowledge-base chunks (numeric chunk IDs).
|
||||
*
|
||||
* Numeric KB chunks: clicking opens the citation panel in the right
|
||||
* sidebar (alongside the chat — does not replace it). The panel shows
|
||||
|
|
@ -51,12 +33,13 @@ interface InlineCitationProps {
|
|||
* `chunk_window`), with the cited one highlighted and an option to
|
||||
* expand the window or jump into the full document via the editor panel.
|
||||
*
|
||||
* Surfsense docs chunks: rendered as a hover-controlled shadcn Popover that
|
||||
* lazily fetches and previews the cited chunk inline, since those docs aren't
|
||||
* indexed into the user's search space and have no tab to open.
|
||||
* Negative chunk IDs and legacy SurfSense-docs chunks (`isDocsChunk`) render
|
||||
* as a static, non-interactive "doc" pill. The SurfSense product-docs feature
|
||||
* was removed, so those markers are inert (no fetch, no preview) — they only
|
||||
* survive in old persisted messages.
|
||||
*/
|
||||
export const InlineCitation: FC<InlineCitationProps> = ({ chunkId, isDocsChunk = false }) => {
|
||||
if (chunkId < 0) {
|
||||
if (chunkId < 0 || isDocsChunk) {
|
||||
return (
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
|
|
@ -68,15 +51,11 @@ export const InlineCitation: FC<InlineCitationProps> = ({ chunkId, isDocsChunk =
|
|||
doc
|
||||
</span>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent>Uploaded document</TooltipContent>
|
||||
<TooltipContent>{isDocsChunk ? "Documentation reference" : "Uploaded document"}</TooltipContent>
|
||||
</Tooltip>
|
||||
);
|
||||
}
|
||||
|
||||
if (isDocsChunk) {
|
||||
return <SurfsenseDocCitation chunkId={chunkId} />;
|
||||
}
|
||||
|
||||
return <NumericChunkCitation chunkId={chunkId} />;
|
||||
};
|
||||
|
||||
|
|
@ -127,128 +106,6 @@ const NumericChunkCitation: FC<{ chunkId: number }> = ({ chunkId }) => {
|
|||
);
|
||||
};
|
||||
|
||||
const SurfsenseDocCitation: FC<{ chunkId: number }> = ({ chunkId }) => {
|
||||
const isTouchLike = useMediaQuery("(hover: none), (pointer: coarse)");
|
||||
const [mobilePreviewOpen, setMobilePreviewOpen] = useState(false);
|
||||
const docQuery = useSurfsenseDocPreviewQuery(chunkId, mobilePreviewOpen);
|
||||
|
||||
const handleMobileClick = () => {
|
||||
setMobilePreviewOpen(true);
|
||||
};
|
||||
|
||||
return (
|
||||
<>
|
||||
<CitationHoverPopover
|
||||
id={`doc-${chunkId}`}
|
||||
contentClassName="w-96 max-w-[calc(100vw-2rem)] p-0"
|
||||
align="start"
|
||||
trigger={(hoverProps) => (
|
||||
<Button
|
||||
type="button"
|
||||
variant="ghost"
|
||||
size={null}
|
||||
onClick={isTouchLike ? handleMobileClick : undefined}
|
||||
className="ml-0.5 inline-flex h-5 min-w-5 items-center justify-center gap-0.5 rounded-md bg-popover px-1.5 text-[11px] font-medium text-popover-foreground/80 align-baseline"
|
||||
aria-label={`Show Surfsense documentation chunk ${chunkId}`}
|
||||
title="Surfsense documentation"
|
||||
{...hoverProps}
|
||||
>
|
||||
<FileText className="size-3" />
|
||||
doc
|
||||
</Button>
|
||||
)}
|
||||
>
|
||||
<SurfsenseDocPreview chunkId={chunkId} />
|
||||
</CitationHoverPopover>
|
||||
<Drawer
|
||||
open={mobilePreviewOpen}
|
||||
onOpenChange={setMobilePreviewOpen}
|
||||
shouldScaleBackground={false}
|
||||
>
|
||||
<DrawerContent className="max-h-[85vh] z-80" overlayClassName="z-80">
|
||||
<DrawerHandle />
|
||||
<DrawerHeader className="pb-0">
|
||||
<DrawerTitle>Surfsense documentation</DrawerTitle>
|
||||
</DrawerHeader>
|
||||
<SurfsenseDocPreviewContent
|
||||
chunkId={chunkId}
|
||||
query={docQuery}
|
||||
contentClassName="max-h-[60vh]"
|
||||
/>
|
||||
</DrawerContent>
|
||||
</Drawer>
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
function useSurfsenseDocPreviewQuery(chunkId: number, enabled = true) {
|
||||
return useQuery({
|
||||
queryKey: cacheKeys.documents.byChunk(`doc-${chunkId}`),
|
||||
queryFn: () => documentsApiService.getSurfsenseDocByChunk(chunkId),
|
||||
staleTime: 5 * 60 * 1000,
|
||||
enabled,
|
||||
});
|
||||
}
|
||||
|
||||
type SurfsenseDocPreviewQuery = ReturnType<typeof useSurfsenseDocPreviewQuery>;
|
||||
|
||||
const SurfsenseDocPreview: FC<{ chunkId: number }> = ({ chunkId }) => {
|
||||
const query = useSurfsenseDocPreviewQuery(chunkId);
|
||||
|
||||
return <SurfsenseDocPreviewContent chunkId={chunkId} query={query} />;
|
||||
};
|
||||
|
||||
const SurfsenseDocPreviewContent: FC<{
|
||||
chunkId: number;
|
||||
query: SurfsenseDocPreviewQuery;
|
||||
contentClassName?: string;
|
||||
}> = ({ chunkId, query, contentClassName = "max-h-72" }) => {
|
||||
const { data, isLoading, error } = query;
|
||||
|
||||
const citedChunk = data?.chunks.find((c) => c.id === chunkId) ?? data?.chunks[0];
|
||||
|
||||
return (
|
||||
<>
|
||||
<div className="flex items-center justify-between gap-2 border-b px-3 py-2">
|
||||
<div className="min-w-0">
|
||||
<p className="truncate text-sm font-medium">{data?.title ?? "Surfsense documentation"}</p>
|
||||
<p className="text-[11px] text-muted-foreground">Chunk #{chunkId}</p>
|
||||
</div>
|
||||
{data?.public_url && (
|
||||
<a
|
||||
href={data.public_url}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="inline-flex shrink-0 items-center gap-1 rounded-md px-2 py-1 text-[11px] font-medium text-primary hover:bg-primary/10"
|
||||
>
|
||||
<ExternalLink className="size-3" />
|
||||
Open
|
||||
</a>
|
||||
)}
|
||||
</div>
|
||||
<div className={`${contentClassName} overflow-auto px-3 py-2 text-sm`}>
|
||||
{isLoading && (
|
||||
<div className="flex items-center gap-2 py-4 text-muted-foreground">
|
||||
<Spinner size="xs" />
|
||||
<span className="text-xs">Loading…</span>
|
||||
</div>
|
||||
)}
|
||||
{error && (
|
||||
<p className="py-4 text-xs text-destructive">
|
||||
{error instanceof Error ? error.message : "Failed to load chunk"}
|
||||
</p>
|
||||
)}
|
||||
{!isLoading && !error && citedChunk?.content && (
|
||||
<MarkdownViewer content={citedChunk.content} maxLength={1500} enableCitations />
|
||||
)}
|
||||
{!isLoading && !error && !citedChunk?.content && (
|
||||
<p className="py-4 text-xs text-muted-foreground">No content available.</p>
|
||||
)}
|
||||
</div>
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
import { tryGetHostname } from "@/lib/url";
|
||||
|
||||
interface UrlCitationProps {
|
||||
|
|
|
|||
|
|
@ -1593,7 +1593,7 @@ interface ToolGroup {
|
|||
const TOOL_GROUPS: ToolGroup[] = [
|
||||
{
|
||||
label: "Research",
|
||||
tools: ["search_surfsense_docs", "scrape_webpage"],
|
||||
tools: ["scrape_webpage"],
|
||||
},
|
||||
{
|
||||
label: "Generate",
|
||||
|
|
|
|||
|
|
@ -90,7 +90,6 @@ const DesktopLocalTabContent = dynamic(
|
|||
);
|
||||
|
||||
const NON_DELETABLE_DOCUMENT_TYPES: readonly string[] = [
|
||||
"SURFSENSE_DOCS",
|
||||
"USER_MEMORY",
|
||||
"TEAM_MEMORY",
|
||||
];
|
||||
|
|
|
|||
|
|
@ -3,14 +3,7 @@
|
|||
import { useQuery as useZeroQuery } from "@rocicorp/zero/react";
|
||||
import { keepPreviousData, useQuery } from "@tanstack/react-query";
|
||||
import { useAtomValue } from "jotai";
|
||||
import {
|
||||
BookOpen,
|
||||
ChevronLeft,
|
||||
ChevronRight,
|
||||
Files,
|
||||
Folder as FolderIcon,
|
||||
Unplug,
|
||||
} from "lucide-react";
|
||||
import { ChevronLeft, ChevronRight, Files, Folder as FolderIcon, Unplug } from "lucide-react";
|
||||
import {
|
||||
Fragment,
|
||||
forwardRef,
|
||||
|
|
@ -57,13 +50,6 @@ interface DocumentMentionPickerProps {
|
|||
onDone: () => void;
|
||||
initialSelectedDocuments?: MentionedDocumentInfo[];
|
||||
externalSearch?: string;
|
||||
/**
|
||||
* Whether to surface the "SurfSense Docs" (product documentation) branch
|
||||
* and include those docs in search results. Defaults to ``true`` so the
|
||||
* chat composer is unchanged; callers like the automation task input pass
|
||||
* ``false`` to reference only the user's own knowledge base + connectors.
|
||||
*/
|
||||
includeSurfsenseDocs?: boolean;
|
||||
}
|
||||
|
||||
const PAGE_SIZE = 20;
|
||||
|
|
@ -74,7 +60,6 @@ const RECENTS_STORAGE_PREFIX = "surfsense:composer-mention-recents:v1:";
|
|||
|
||||
type BrowseView =
|
||||
| { kind: "root" }
|
||||
| { kind: "surfsense-docs" }
|
||||
| { kind: "files-folders" }
|
||||
| { kind: "connectors" }
|
||||
| { kind: "connector-type"; connectorType: string; title: string };
|
||||
|
|
@ -241,7 +226,6 @@ export const DocumentMentionPicker = forwardRef<
|
|||
onDone,
|
||||
initialSelectedDocuments = [],
|
||||
externalSearch = "",
|
||||
includeSurfsenseDocs = true,
|
||||
},
|
||||
ref
|
||||
) {
|
||||
|
|
@ -298,15 +282,6 @@ export const DocumentMentionPicker = forwardRef<
|
|||
[searchSpaceId, debouncedSearch, isSearchValid]
|
||||
);
|
||||
|
||||
const surfsenseDocsQueryParams = useMemo(() => {
|
||||
const params: { page: number; page_size: number; title?: string } = {
|
||||
page: 0,
|
||||
page_size: PAGE_SIZE,
|
||||
};
|
||||
if (isSearchValid) params.title = debouncedSearch.trim();
|
||||
return params;
|
||||
}, [debouncedSearch, isSearchValid]);
|
||||
|
||||
const { data: titleSearchResults, isLoading: isTitleSearchLoading } = useQuery({
|
||||
queryKey: ["document-titles", titleSearchParams],
|
||||
queryFn: ({ signal }) =>
|
||||
|
|
@ -316,15 +291,6 @@ export const DocumentMentionPicker = forwardRef<
|
|||
placeholderData: keepPreviousData,
|
||||
});
|
||||
|
||||
const { data: surfsenseDocs, isLoading: isSurfsenseDocsLoading } = useQuery({
|
||||
queryKey: ["surfsense-docs-mention", debouncedSearch, isSearchValid],
|
||||
queryFn: ({ signal }) =>
|
||||
documentsApiService.getSurfsenseDocs({ queryParams: surfsenseDocsQueryParams }, signal),
|
||||
staleTime: 3 * 60 * 1000,
|
||||
enabled: includeSurfsenseDocs && (!hasSearch || isSearchValid),
|
||||
placeholderData: keepPreviousData,
|
||||
});
|
||||
|
||||
const filterBySearchTerm = useCallback(
|
||||
(docs: Pick<Document, "id" | "title" | "document_type">[]) => {
|
||||
if (!isSearchValid) return docs;
|
||||
|
|
@ -338,23 +304,13 @@ export const DocumentMentionPicker = forwardRef<
|
|||
if (currentPage !== 0) return;
|
||||
const combinedDocs: Pick<Document, "id" | "title" | "document_type">[] = [];
|
||||
|
||||
if (includeSurfsenseDocs && surfsenseDocs?.items) {
|
||||
for (const doc of surfsenseDocs.items) {
|
||||
combinedDocs.push({
|
||||
id: doc.id,
|
||||
title: doc.title,
|
||||
document_type: "SURFSENSE_DOCS",
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (titleSearchResults?.items) {
|
||||
combinedDocs.push(...titleSearchResults.items);
|
||||
setHasMore(titleSearchResults.has_more);
|
||||
}
|
||||
|
||||
setAccumulatedDocuments(filterBySearchTerm(combinedDocs));
|
||||
}, [titleSearchResults, surfsenseDocs, currentPage, filterBySearchTerm, includeSurfsenseDocs]);
|
||||
}, [titleSearchResults, currentPage, filterBySearchTerm]);
|
||||
|
||||
const loadNextPage = useCallback(async () => {
|
||||
if (isLoadingMore || !hasMore) return;
|
||||
|
|
@ -391,14 +347,6 @@ export const DocumentMentionPicker = forwardRef<
|
|||
return accumulatedDocuments.filter((doc) => doc.title.toLowerCase().includes(searchLower));
|
||||
}, [accumulatedDocuments, deferredSearch, isSingleCharSearch]);
|
||||
|
||||
const surfsenseDocsList = useMemo(
|
||||
() => actualDocuments.filter((doc) => doc.document_type === "SURFSENSE_DOCS"),
|
||||
[actualDocuments]
|
||||
);
|
||||
const userDocsList = useMemo(
|
||||
() => actualDocuments.filter((doc) => doc.document_type !== "SURFSENSE_DOCS"),
|
||||
[actualDocuments]
|
||||
);
|
||||
const folderMentions = useMemo(() => {
|
||||
const all = (zeroFolders ?? []).map((f) => makeFolderMention({ id: f.id, title: f.name }));
|
||||
if (!hasSearch) return all;
|
||||
|
|
@ -463,7 +411,6 @@ export const DocumentMentionPicker = forwardRef<
|
|||
() => new Set(initialSelectedDocuments.map((d) => getMentionDocKey(d))),
|
||||
[initialSelectedDocuments]
|
||||
);
|
||||
const showSurfsenseDocsRoot = includeSurfsenseDocs && surfsenseDocsList.length > 0;
|
||||
|
||||
const selectMention = useCallback(
|
||||
(mention: MentionedDocumentInfo) => {
|
||||
|
|
@ -487,16 +434,6 @@ export const DocumentMentionPicker = forwardRef<
|
|||
|
||||
const rootNodes = useMemo<ComposerSuggestionNode<ResourceNodeValue>[]>(() => {
|
||||
const nodes: ComposerSuggestionNode<ResourceNodeValue>[] = [...recentRootNodes];
|
||||
if (showSurfsenseDocsRoot) {
|
||||
nodes.push({
|
||||
id: "surfsense-docs",
|
||||
label: "SurfSense Docs",
|
||||
subtitle: "Browse product documentation",
|
||||
icon: <BookOpen className="size-4" />,
|
||||
type: "branch",
|
||||
value: { kind: "view", view: { kind: "surfsense-docs" } },
|
||||
});
|
||||
}
|
||||
nodes.push(
|
||||
{
|
||||
id: "files-folders",
|
||||
|
|
@ -519,7 +456,7 @@ export const DocumentMentionPicker = forwardRef<
|
|||
}
|
||||
);
|
||||
return nodes;
|
||||
}, [activeConnectors.length, recentRootNodes, showSurfsenseDocsRoot]);
|
||||
}, [activeConnectors.length, recentRootNodes]);
|
||||
|
||||
const searchNodes = useMemo<ComposerSuggestionNode<ResourceNodeValue>[]>(() => {
|
||||
const searchLower = (isSingleCharSearch ? deferredSearch : debouncedSearch)
|
||||
|
|
@ -582,19 +519,6 @@ export const DocumentMentionPicker = forwardRef<
|
|||
|
||||
const browseNodes = useMemo<ComposerSuggestionNode<ResourceNodeValue>[]>(() => {
|
||||
if (view.kind === "root") return rootNodes;
|
||||
if (view.kind === "surfsense-docs") {
|
||||
return surfsenseDocsList.map((doc) => {
|
||||
const mention = makeDocMention(doc);
|
||||
return {
|
||||
id: getMentionDocKey(mention),
|
||||
label: doc.title,
|
||||
icon: getConnectorIcon(doc.document_type, "size-4"),
|
||||
type: "item" as const,
|
||||
disabled: selectedKeys.has(getMentionDocKey(mention)),
|
||||
value: { kind: "mention" as const, mention },
|
||||
};
|
||||
});
|
||||
}
|
||||
if (view.kind === "files-folders") {
|
||||
const folders = folderMentions.map((mention) => ({
|
||||
id: getMentionDocKey(mention),
|
||||
|
|
@ -605,7 +529,7 @@ export const DocumentMentionPicker = forwardRef<
|
|||
disabled: selectedKeys.has(getMentionDocKey(mention)),
|
||||
value: { kind: "mention" as const, mention },
|
||||
}));
|
||||
const docs = userDocsList.map((doc) => {
|
||||
const docs = actualDocuments.map((doc) => {
|
||||
const mention = makeDocMention(doc);
|
||||
return {
|
||||
id: getMentionDocKey(mention),
|
||||
|
|
@ -652,13 +576,12 @@ export const DocumentMentionPicker = forwardRef<
|
|||
};
|
||||
});
|
||||
}, [
|
||||
actualDocuments,
|
||||
activeConnectors,
|
||||
connectorTypeEntries,
|
||||
folderMentions,
|
||||
rootNodes,
|
||||
selectedKeys,
|
||||
surfsenseDocsList,
|
||||
userDocsList,
|
||||
view,
|
||||
]);
|
||||
|
||||
|
|
@ -708,27 +631,23 @@ export const DocumentMentionPicker = forwardRef<
|
|||
|
||||
const isRootBrowseView = !hasSearch && view.kind === "root";
|
||||
const isVisibleViewLoading = hasSearch
|
||||
? isTitleSearchLoading || isSurfsenseDocsLoading || isConnectorsLoading
|
||||
: view.kind === "surfsense-docs"
|
||||
? isSurfsenseDocsLoading
|
||||
: view.kind === "files-folders"
|
||||
? isTitleSearchLoading
|
||||
: view.kind === "connectors" || view.kind === "connector-type"
|
||||
? isConnectorsLoading
|
||||
: false;
|
||||
? isTitleSearchLoading || isConnectorsLoading
|
||||
: view.kind === "files-folders"
|
||||
? isTitleSearchLoading
|
||||
: view.kind === "connectors" || view.kind === "connector-type"
|
||||
? isConnectorsLoading
|
||||
: false;
|
||||
const actualLoading =
|
||||
isVisibleViewLoading && !isSingleCharSearch && visibleNodes.length === 0 && !isRootBrowseView;
|
||||
|
||||
const title =
|
||||
hasSearch || view.kind === "root"
|
||||
? null
|
||||
: view.kind === "surfsense-docs"
|
||||
? "SurfSense Docs"
|
||||
: view.kind === "files-folders"
|
||||
? "Files & Folders"
|
||||
: view.kind === "connectors"
|
||||
? "Connectors"
|
||||
: view.title;
|
||||
: view.kind === "files-folders"
|
||||
? "Files & Folders"
|
||||
: view.kind === "connectors"
|
||||
? "Connectors"
|
||||
: view.title;
|
||||
|
||||
return (
|
||||
<ComposerSuggestionList
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
import { IconUsersGroup } from "@tabler/icons-react";
|
||||
import {
|
||||
BookOpen,
|
||||
Brain,
|
||||
File,
|
||||
FileText,
|
||||
|
|
@ -119,8 +118,6 @@ export const getConnectorIcon = (connectorType: EnumConnectorName | string, clas
|
|||
return <FileText {...iconProps} />;
|
||||
case "EXTENSION":
|
||||
return <Webhook {...iconProps} />;
|
||||
case "SURFSENSE_DOCS":
|
||||
return <BookOpen {...iconProps} />;
|
||||
case "USER_MEMORY":
|
||||
case "TEAM_MEMORY":
|
||||
return <Brain {...iconProps} />;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
import {
|
||||
BookOpen,
|
||||
Brain,
|
||||
Calendar,
|
||||
FileEdit,
|
||||
|
|
@ -47,7 +46,6 @@ const TOOL_ICONS: Record<string, LucideIcon> = {
|
|||
// Web / search
|
||||
scrape_webpage: ScanLine,
|
||||
web_search: Globe,
|
||||
search_surfsense_docs: BookOpen,
|
||||
// Automations
|
||||
create_automation: Workflow,
|
||||
// Memory
|
||||
|
|
@ -152,7 +150,6 @@ const TOOL_DISPLAY_NAMES: Record<string, string> = {
|
|||
// Web / search
|
||||
scrape_webpage: "Read webpage",
|
||||
web_search: "Search the web",
|
||||
search_surfsense_docs: "Search knowledge base",
|
||||
// Automations
|
||||
create_automation: "Create automation",
|
||||
// Memory
|
||||
|
|
|
|||
|
|
@ -27,7 +27,6 @@ export const documentTypeEnum = z.enum([
|
|||
"CIRCLEBACK",
|
||||
"OBSIDIAN_CONNECTOR",
|
||||
"LOCAL_FOLDER_FILE",
|
||||
"SURFSENSE_DOCS",
|
||||
"NOTE",
|
||||
"USER_MEMORY",
|
||||
"TEAM_MEMORY",
|
||||
|
|
@ -77,27 +76,6 @@ export const documentWithChunks = document.extend({
|
|||
chunk_start_index: z.number().optional().default(0),
|
||||
});
|
||||
|
||||
/**
|
||||
* Surfsense documentation schemas
|
||||
* Follows the same pattern as document/documentWithChunks
|
||||
*/
|
||||
export const surfsenseDocsChunk = z.object({
|
||||
id: z.number(),
|
||||
content: z.string(),
|
||||
});
|
||||
|
||||
export const surfsenseDocsDocument = z.object({
|
||||
id: z.number(),
|
||||
title: z.string(),
|
||||
source: z.string(),
|
||||
public_url: z.string(),
|
||||
content: z.string(),
|
||||
});
|
||||
|
||||
export const surfsenseDocsDocumentWithChunks = surfsenseDocsDocument.extend({
|
||||
chunks: z.array(surfsenseDocsChunk),
|
||||
});
|
||||
|
||||
/**
|
||||
* Get documents
|
||||
*/
|
||||
|
|
@ -284,32 +262,6 @@ export const getDocumentChunksResponse = z.object({
|
|||
has_more: z.boolean(),
|
||||
});
|
||||
|
||||
/**
|
||||
* Get Surfsense docs by chunk
|
||||
*/
|
||||
export const getSurfsenseDocsByChunkRequest = z.object({
|
||||
chunk_id: z.number(),
|
||||
});
|
||||
|
||||
export const getSurfsenseDocsByChunkResponse = surfsenseDocsDocumentWithChunks;
|
||||
|
||||
/**
|
||||
* List Surfsense docs
|
||||
*/
|
||||
export const getSurfsenseDocsRequest = z.object({
|
||||
queryParams: paginationQueryParams.extend({
|
||||
title: z.string().optional(),
|
||||
}),
|
||||
});
|
||||
|
||||
export const getSurfsenseDocsResponse = z.object({
|
||||
items: z.array(surfsenseDocsDocument),
|
||||
total: z.number(),
|
||||
page: z.number(),
|
||||
page_size: z.number(),
|
||||
has_more: z.boolean(),
|
||||
});
|
||||
|
||||
/**
|
||||
* Update document
|
||||
*/
|
||||
|
|
@ -358,13 +310,6 @@ export type DeleteDocumentResponse = z.infer<typeof deleteDocumentResponse>;
|
|||
export type DocumentTypeEnum = z.infer<typeof documentTypeEnum>;
|
||||
export type DocumentSortBy = z.infer<typeof documentSortByEnum>;
|
||||
export type SortOrder = z.infer<typeof sortOrderEnum>;
|
||||
export type SurfsenseDocsChunk = z.infer<typeof surfsenseDocsChunk>;
|
||||
export type SurfsenseDocsDocument = z.infer<typeof surfsenseDocsDocument>;
|
||||
export type SurfsenseDocsDocumentWithChunks = z.infer<typeof surfsenseDocsDocumentWithChunks>;
|
||||
export type GetSurfsenseDocsByChunkRequest = z.infer<typeof getSurfsenseDocsByChunkRequest>;
|
||||
export type GetSurfsenseDocsByChunkResponse = z.infer<typeof getSurfsenseDocsByChunkResponse>;
|
||||
export type GetSurfsenseDocsRequest = z.infer<typeof getSurfsenseDocsRequest>;
|
||||
export type GetSurfsenseDocsResponse = z.infer<typeof getSurfsenseDocsResponse>;
|
||||
export type GetDocumentChunksRequest = z.infer<typeof getDocumentChunksRequest>;
|
||||
export type GetDocumentChunksResponse = z.infer<typeof getDocumentChunksResponse>;
|
||||
export type ChunkRead = z.infer<typeof chunkRead>;
|
||||
|
|
|
|||
|
|
@ -12,7 +12,6 @@ import {
|
|||
type GetDocumentsRequest,
|
||||
type GetDocumentsStatusRequest,
|
||||
type GetDocumentTypeCountsRequest,
|
||||
type GetSurfsenseDocsRequest,
|
||||
getDocumentByChunkRequest,
|
||||
getDocumentByChunkResponse,
|
||||
getDocumentChunksRequest,
|
||||
|
|
@ -25,9 +24,6 @@ import {
|
|||
getDocumentsStatusResponse,
|
||||
getDocumentTypeCountsRequest,
|
||||
getDocumentTypeCountsResponse,
|
||||
getSurfsenseDocsByChunkResponse,
|
||||
getSurfsenseDocsRequest,
|
||||
getSurfsenseDocsResponse,
|
||||
type SearchDocumentsRequest,
|
||||
type SearchDocumentTitlesRequest,
|
||||
searchDocumentsRequest,
|
||||
|
|
@ -363,48 +359,6 @@ class DocumentsApiService {
|
|||
);
|
||||
};
|
||||
|
||||
/**
|
||||
* Get Surfsense documentation by chunk ID
|
||||
* Used for resolving [citation:doc-XXX] citations
|
||||
*/
|
||||
getSurfsenseDocByChunk = async (chunkId: number) => {
|
||||
return baseApiService.get(
|
||||
`/api/v1/surfsense-docs/by-chunk/${chunkId}`,
|
||||
getSurfsenseDocsByChunkResponse
|
||||
);
|
||||
};
|
||||
|
||||
/**
|
||||
* List all Surfsense documentation documents
|
||||
* @param request - The request with query params
|
||||
* @param signal - Optional AbortSignal for request cancellation
|
||||
*/
|
||||
getSurfsenseDocs = async (request: GetSurfsenseDocsRequest, signal?: AbortSignal) => {
|
||||
const parsedRequest = getSurfsenseDocsRequest.safeParse(request);
|
||||
|
||||
if (!parsedRequest.success) {
|
||||
console.error("Invalid request:", parsedRequest.error);
|
||||
|
||||
const errorMessage = parsedRequest.error.issues.map((issue) => issue.message).join(", ");
|
||||
throw new ValidationError(`Invalid request: ${errorMessage}`);
|
||||
}
|
||||
|
||||
// Transform query params to be string values
|
||||
const transformedQueryParams = parsedRequest.data.queryParams
|
||||
? Object.fromEntries(
|
||||
Object.entries(parsedRequest.data.queryParams).map(([k, v]) => [k, String(v)])
|
||||
)
|
||||
: undefined;
|
||||
|
||||
const queryParams = transformedQueryParams
|
||||
? new URLSearchParams(transformedQueryParams).toString()
|
||||
: "";
|
||||
|
||||
const url = `/api/v1/surfsense-docs?${queryParams}`;
|
||||
|
||||
return baseApiService.get(url, getSurfsenseDocsResponse, { signal });
|
||||
};
|
||||
|
||||
/**
|
||||
* Update a document
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -221,7 +221,6 @@ export interface RegenerateParams {
|
|||
content: string;
|
||||
}>;
|
||||
mentionedDocumentIds?: number[];
|
||||
mentionedSurfsenseDocIds?: number[];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@ export function getDocumentTypeLabel(type: string): string {
|
|||
CIRCLEBACK: "Circleback",
|
||||
OBSIDIAN_CONNECTOR: "Obsidian",
|
||||
LOCAL_FOLDER_FILE: "Local Folder",
|
||||
SURFSENSE_DOCS: "SurfSense Docs",
|
||||
NOTE: "Note",
|
||||
COMPOSIO_GOOGLE_DRIVE_CONNECTOR: "Composio Google Drive",
|
||||
COMPOSIO_GMAIL_CONNECTOR: "Composio Gmail",
|
||||
|
|
|
|||
|
|
@ -30,7 +30,6 @@ export const cacheKeys = {
|
|||
withQueryParams: (queries: GetDocumentsRequest["queryParams"]) =>
|
||||
["documents-with-queries", ...stableEntries(queries)] as const,
|
||||
document: (documentId: string) => ["document", documentId] as const,
|
||||
byChunk: (chunkId: string) => ["documents", "by-chunk", chunkId] as const,
|
||||
},
|
||||
logs: {
|
||||
list: (searchSpaceId?: number | string) => ["logs", "list", searchSpaceId] as const,
|
||||
|
|
|
|||
BIN
surfsense_web/tsc_out.txt
Normal file
BIN
surfsense_web/tsc_out.txt
Normal file
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue