mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-29 19:35:20 +02:00
Merge pull request #1445 from MODSetter/dev_mod
feat: added basic UI for automations and removed surfsense docs in chat related code.
This commit is contained in:
commit
5d90fbe99f
165 changed files with 3149 additions and 2328 deletions
|
|
@ -98,9 +98,7 @@ def upgrade() -> None:
|
||||||
op.execute(
|
op.execute(
|
||||||
"CREATE INDEX ix_automation_triggers_automation_id ON automation_triggers(automation_id);"
|
"CREATE INDEX ix_automation_triggers_automation_id ON automation_triggers(automation_id);"
|
||||||
)
|
)
|
||||||
op.execute(
|
op.execute("CREATE INDEX ix_automation_triggers_type ON automation_triggers(type);")
|
||||||
"CREATE INDEX ix_automation_triggers_type ON automation_triggers(type);"
|
|
||||||
)
|
|
||||||
op.execute(
|
op.execute(
|
||||||
"CREATE INDEX ix_automation_triggers_enabled ON automation_triggers(enabled);"
|
"CREATE INDEX ix_automation_triggers_enabled ON automation_triggers(enabled);"
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,129 @@
|
||||||
|
"""Drop Surfsense docs tables (feature removed end to end)
|
||||||
|
|
||||||
|
Revision ID: 146
|
||||||
|
Revises: 145
|
||||||
|
Create Date: 2026-05-28
|
||||||
|
|
||||||
|
Removes the SurfSense product-documentation feature: the
|
||||||
|
``surfsense_docs_documents`` and ``surfsense_docs_chunks`` tables (created
|
||||||
|
in revision 60) and the GIN trigram index on the title column (added in
|
||||||
|
revision 67). The docs were seeded at startup from local MDX files, so no
|
||||||
|
user data is lost. Downgrade recreates the tables and indexes.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from collections.abc import Sequence
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
from app.config import config
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision: str = "146"
|
||||||
|
down_revision: str | None = "145"
|
||||||
|
branch_labels: str | Sequence[str] | None = None
|
||||||
|
depends_on: str | Sequence[str] | None = None
|
||||||
|
|
||||||
|
# Embedding dimension is required to recreate the vector columns on downgrade.
|
||||||
|
EMBEDDING_DIM = config.embedding_model_instance.dimension
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
"""Drop surfsense docs tables and all their indexes."""
|
||||||
|
# Trigram index from revision 67
|
||||||
|
op.execute("DROP INDEX IF EXISTS idx_surfsense_docs_title_trgm")
|
||||||
|
|
||||||
|
# Full-text search indexes
|
||||||
|
op.execute("DROP INDEX IF EXISTS surfsense_docs_chunks_search_index")
|
||||||
|
op.execute("DROP INDEX IF EXISTS surfsense_docs_documents_search_index")
|
||||||
|
|
||||||
|
# Vector indexes
|
||||||
|
op.execute("DROP INDEX IF EXISTS surfsense_docs_chunks_vector_index")
|
||||||
|
op.execute("DROP INDEX IF EXISTS surfsense_docs_documents_vector_index")
|
||||||
|
|
||||||
|
# B-tree indexes
|
||||||
|
op.execute("DROP INDEX IF EXISTS ix_surfsense_docs_chunks_document_id")
|
||||||
|
op.execute("DROP INDEX IF EXISTS ix_surfsense_docs_documents_updated_at")
|
||||||
|
op.execute("DROP INDEX IF EXISTS ix_surfsense_docs_documents_content_hash")
|
||||||
|
op.execute("DROP INDEX IF EXISTS ix_surfsense_docs_documents_source")
|
||||||
|
|
||||||
|
# Tables (chunks first due to FK)
|
||||||
|
op.execute("DROP TABLE IF EXISTS surfsense_docs_chunks")
|
||||||
|
op.execute("DROP TABLE IF EXISTS surfsense_docs_documents")
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
"""Recreate surfsense docs tables and indexes (reverses revisions 60 + 67)."""
|
||||||
|
op.execute(
|
||||||
|
f"""
|
||||||
|
CREATE TABLE IF NOT EXISTS surfsense_docs_documents (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
|
||||||
|
source VARCHAR NOT NULL UNIQUE,
|
||||||
|
title VARCHAR NOT NULL,
|
||||||
|
content TEXT NOT NULL,
|
||||||
|
content_hash VARCHAR NOT NULL,
|
||||||
|
embedding vector({EMBEDDING_DIM}),
|
||||||
|
updated_at TIMESTAMP WITH TIME ZONE
|
||||||
|
);
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
op.execute(
|
||||||
|
f"""
|
||||||
|
CREATE TABLE IF NOT EXISTS surfsense_docs_chunks (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
|
||||||
|
content TEXT NOT NULL,
|
||||||
|
embedding vector({EMBEDDING_DIM}),
|
||||||
|
document_id INTEGER NOT NULL REFERENCES surfsense_docs_documents(id) ON DELETE CASCADE
|
||||||
|
);
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
# B-tree indexes
|
||||||
|
op.execute(
|
||||||
|
"CREATE INDEX IF NOT EXISTS ix_surfsense_docs_documents_source ON surfsense_docs_documents(source)"
|
||||||
|
)
|
||||||
|
op.execute(
|
||||||
|
"CREATE INDEX IF NOT EXISTS ix_surfsense_docs_documents_content_hash ON surfsense_docs_documents(content_hash)"
|
||||||
|
)
|
||||||
|
op.execute(
|
||||||
|
"CREATE INDEX IF NOT EXISTS ix_surfsense_docs_documents_updated_at ON surfsense_docs_documents(updated_at)"
|
||||||
|
)
|
||||||
|
op.execute(
|
||||||
|
"CREATE INDEX IF NOT EXISTS ix_surfsense_docs_chunks_document_id ON surfsense_docs_chunks(document_id)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Vector indexes
|
||||||
|
op.execute(
|
||||||
|
"""
|
||||||
|
CREATE INDEX IF NOT EXISTS surfsense_docs_documents_vector_index
|
||||||
|
ON surfsense_docs_documents USING hnsw (embedding public.vector_cosine_ops);
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
op.execute(
|
||||||
|
"""
|
||||||
|
CREATE INDEX IF NOT EXISTS surfsense_docs_chunks_vector_index
|
||||||
|
ON surfsense_docs_chunks USING hnsw (embedding public.vector_cosine_ops);
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
# Full-text search indexes
|
||||||
|
op.execute(
|
||||||
|
"""
|
||||||
|
CREATE INDEX IF NOT EXISTS surfsense_docs_documents_search_index
|
||||||
|
ON surfsense_docs_documents USING gin (to_tsvector('english', content));
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
op.execute(
|
||||||
|
"""
|
||||||
|
CREATE INDEX IF NOT EXISTS surfsense_docs_chunks_search_index
|
||||||
|
ON surfsense_docs_chunks USING gin (to_tsvector('english', content));
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
# Trigram index from revision 67
|
||||||
|
op.execute(
|
||||||
|
"""
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_surfsense_docs_title_trgm
|
||||||
|
ON surfsense_docs_documents USING gin (title gin_trgm_ops);
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
@ -4,8 +4,8 @@ never invent ids you didn't see. Citation ids are resolved by exact-match
|
||||||
lookup; a wrong id silently breaks the link, so when in doubt, omit.
|
lookup; a wrong id silently breaks the link, so when in doubt, omit.
|
||||||
|
|
||||||
### Channel A — chunk blocks injected this turn
|
### Channel A — chunk blocks injected this turn
|
||||||
When `search_surfsense_docs` or `web_search` returns `<document>` /
|
When `web_search` returns `<document>` / `<chunk id='…'>` blocks in this
|
||||||
`<chunk id='…'>` blocks in this turn:
|
turn:
|
||||||
|
|
||||||
1. For each factual statement taken from those chunks, add
|
1. For each factual statement taken from those chunks, add
|
||||||
`[citation:chunk_id]` using the **exact** id from a visible
|
`[citation:chunk_id]` using the **exact** id from a visible
|
||||||
|
|
|
||||||
|
|
@ -20,8 +20,8 @@ it to resolve paths the user describes in natural language ("my Q2 roadmap",
|
||||||
delegating to a specialist.
|
delegating to a specialist.
|
||||||
|
|
||||||
`<document>` and `<chunk id='…'>` blocks are chunked indexed content returned
|
`<document>` and `<chunk id='…'>` blocks are chunked indexed content returned
|
||||||
by KB search (from `search_surfsense_docs`, or backing `<priority_documents>`).
|
by KB search (backing `<priority_documents>`). Each chunk carries a stable
|
||||||
Each chunk carries a stable `id` attribute.
|
`id` attribute.
|
||||||
|
|
||||||
If a block doesn't appear this turn, work from the conversation alone.
|
If a block doesn't appear this turn, work from the conversation alone.
|
||||||
</dynamic_context>
|
</dynamic_context>
|
||||||
|
|
|
||||||
|
|
@ -20,8 +20,8 @@ week's planning notes") into concrete document references before delegating
|
||||||
to a specialist.
|
to a specialist.
|
||||||
|
|
||||||
`<document>` and `<chunk id='…'>` blocks are chunked indexed content returned
|
`<document>` and `<chunk id='…'>` blocks are chunked indexed content returned
|
||||||
by KB search (from `search_surfsense_docs`, or backing `<priority_documents>`).
|
by KB search (backing `<priority_documents>`). Each chunk carries a stable
|
||||||
Each chunk carries a stable `id` attribute.
|
`id` attribute.
|
||||||
|
|
||||||
If a block doesn't appear this turn, work from the conversation alone.
|
If a block doesn't appear this turn, work from the conversation alone.
|
||||||
</dynamic_context>
|
</dynamic_context>
|
||||||
|
|
|
||||||
|
|
@ -1,19 +1,21 @@
|
||||||
<knowledge_base_first>
|
<knowledge_base_first>
|
||||||
CRITICAL — ground factual answers in what you actually receive this turn:
|
CRITICAL — ground factual answers in what you actually receive this turn:
|
||||||
- injected workspace context (see `<dynamic_context>`),
|
- injected workspace context (see `<dynamic_context>`),
|
||||||
- results from your own tool calls (`search_surfsense_docs`, `web_search`,
|
- results from your own tool calls (`web_search`, `scrape_webpage`),
|
||||||
`scrape_webpage`),
|
|
||||||
- or substantive summaries returned by a `task` specialist you invoked.
|
- or substantive summaries returned by a `task` specialist you invoked.
|
||||||
|
|
||||||
Do **not** answer factual or informational questions from general knowledge
|
Do **not** answer factual or informational questions from general knowledge
|
||||||
unless the user explicitly authorises it after you say you couldn't find
|
unless the user explicitly authorises it after you say you couldn't find
|
||||||
enough in those sources. The flow when nothing is found:
|
enough in those sources. The flow when nothing is found:
|
||||||
|
|
||||||
1. Say you couldn't find enough in their workspace, docs, or tool output.
|
1. Say you couldn't find enough in their workspace or tool output.
|
||||||
2. Ask: *"Would you like me to answer from my general knowledge instead?"*
|
2. Ask: *"Would you like me to answer from my general knowledge instead?"*
|
||||||
3. Only answer from general knowledge after a clear yes.
|
3. Only answer from general knowledge after a clear yes.
|
||||||
|
|
||||||
This rule does NOT apply to: casual conversation · meta-questions about
|
This rule does NOT apply to: casual conversation · meta-questions about
|
||||||
SurfSense ("what can you do?") · formatting or analysis of content already
|
SurfSense ("what can you do?") · formatting or analysis of content already
|
||||||
in chat · clear rewrite/edit instructions · lightweight web research.
|
in chat · clear rewrite/edit instructions · lightweight web research.
|
||||||
|
|
||||||
|
For "how do I use SurfSense" / product-documentation questions, point the
|
||||||
|
user to https://www.surfsense.com/docs.
|
||||||
</knowledge_base_first>
|
</knowledge_base_first>
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ Structured reasoning:
|
||||||
- For non-trivial work, `<thinking>` / short `<plan>` before tool calls is fine.
|
- For non-trivial work, `<thinking>` / short `<plan>` before tool calls is fine.
|
||||||
|
|
||||||
Professional objectivity:
|
Professional objectivity:
|
||||||
- Accuracy over flattery; verify with **search_surfsense_docs**, **web_search**, **scrape_webpage**, or **task** when unsure — don’t invent connector access.
|
- Accuracy over flattery; verify with **web_search**, **scrape_webpage**, or **task** when unsure — don’t invent connector access.
|
||||||
|
|
||||||
Task management:
|
Task management:
|
||||||
- For 3+ steps, use todo tooling; update statuses promptly.
|
- For 3+ steps, use todo tooling; update statuses promptly.
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,6 @@ Attribution:
|
||||||
|
|
||||||
Tool calls:
|
Tool calls:
|
||||||
- Parallelise independent calls.
|
- Parallelise independent calls.
|
||||||
- Prefer **search_surfsense_docs** for SurfSense docs/product questions before **web_search** when that fits the ask.
|
- For SurfSense docs/product questions, point the user to https://www.surfsense.com/docs.
|
||||||
- Don’t invent paths, chunk ids, or URLs — only values from tools or the user.
|
- Don’t invent paths, chunk ids, or URLs — only values from tools or the user.
|
||||||
</provider_hints>
|
</provider_hints>
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ Output style:
|
||||||
- GitHub-flavoured Markdown; monospace-friendly.
|
- GitHub-flavoured Markdown; monospace-friendly.
|
||||||
|
|
||||||
Workflow (Understand → Plan → Act → Verify):
|
Workflow (Understand → Plan → Act → Verify):
|
||||||
1. **Understand:** parse the ask; use **search_surfsense_docs** / injected workspace context before guessing.
|
1. **Understand:** parse the ask; use injected workspace context before guessing.
|
||||||
2. **Plan:** for multi-step work, a short plan first.
|
2. **Plan:** for multi-step work, a short plan first.
|
||||||
3. **Act:** only with tools you actually have on this agent (see `<tools>` and `<tool_routing>`). Connector work → **task**.
|
3. **Act:** only with tools you actually have on this agent (see `<tools>` and `<tool_routing>`). Connector work → **task**.
|
||||||
4. **Verify:** re-read or re-search only when it materially reduces risk.
|
4. **Verify:** re-read or re-search only when it materially reduces risk.
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ Output style:
|
||||||
|
|
||||||
Tool calls:
|
Tool calls:
|
||||||
- Parallelise independent calls in one turn.
|
- Parallelise independent calls in one turn.
|
||||||
- Prefer **search_surfsense_docs** for SurfSense-product questions, **web_search** / **scrape_webpage**
|
- For SurfSense-product questions, point the user to https://www.surfsense.com/docs;
|
||||||
for fresh public facts; integrations and heavy workflows → **task**.
|
use **web_search** / **scrape_webpage** for fresh public facts; integrations and
|
||||||
|
heavy workflows → **task**.
|
||||||
</provider_hints>
|
</provider_hints>
|
||||||
|
|
|
||||||
|
|
@ -3,10 +3,7 @@ You have two execution channels. Pick the one that owns the work — never
|
||||||
simulate one with the other.
|
simulate one with the other.
|
||||||
|
|
||||||
### 1. Direct tools (you call them yourself)
|
### 1. Direct tools (you call them yourself)
|
||||||
- `search_surfsense_docs` — SurfSense product docs (setup, configuration,
|
- `web_search` — search the public web (anything outside the workspace KB).
|
||||||
connector docs, feature behavior).
|
|
||||||
- `web_search` — search the public web (anything outside SurfSense docs and
|
|
||||||
the workspace KB).
|
|
||||||
- `scrape_webpage` — fetch the body of a specific public URL.
|
- `scrape_webpage` — fetch the body of a specific public URL.
|
||||||
- `update_memory` — curate persistent memory (see `<memory_protocol>`).
|
- `update_memory` — curate persistent memory (see `<memory_protocol>`).
|
||||||
- `write_todos` — maintain a structured plan when the turn series spans
|
- `write_todos` — maintain a structured plan when the turn series spans
|
||||||
|
|
@ -14,6 +11,10 @@ simulate one with the other.
|
||||||
`in_progress` **before** the `task` call that handles it, `completed`
|
`in_progress` **before** the `task` call that handles it, `completed`
|
||||||
once the call returns. Skip for single-step requests.
|
once the call returns. Skip for single-step requests.
|
||||||
|
|
||||||
|
**Questions about how to use SurfSense itself** (setup, configuration,
|
||||||
|
connectors, feature behavior) — point the user to the documentation:
|
||||||
|
https://www.surfsense.com/docs. There is no docs-search tool; give the link.
|
||||||
|
|
||||||
**You have NO filesystem tools.** Any read, write, edit, move, rename, or
|
**You have NO filesystem tools.** Any read, write, edit, move, rename, or
|
||||||
search inside the user's workspace goes through `task(knowledge_base, …)` —
|
search inside the user's workspace goes through `task(knowledge_base, …)` —
|
||||||
never via `write_file`, `ls`, or any direct file operation.
|
never via `write_file`, `ls`, or any direct file operation.
|
||||||
|
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
"""``search_surfsense_docs`` — description + few-shot examples."""
|
|
||||||
|
|
@ -1,10 +0,0 @@
|
||||||
- `search_surfsense_docs` — Search official SurfSense documentation (product
|
|
||||||
help).
|
|
||||||
- Use when the user asks how SurfSense itself works — setup, configuration,
|
|
||||||
connector documentation, feature behavior, anything covered in the
|
|
||||||
product docs.
|
|
||||||
- Not a substitute for `task` when the user wants actions inside a
|
|
||||||
connected service (Gmail, Slack, Jira, Notion, etc.).
|
|
||||||
- Args: `query`, `top_k` (default 10).
|
|
||||||
- Returns doc excerpts; chunk ids may appear for attribution — see
|
|
||||||
`<citations>` for the contract.
|
|
||||||
|
|
@ -1,15 +0,0 @@
|
||||||
<example>
|
|
||||||
user: "How do I install SurfSense?"
|
|
||||||
→ search_surfsense_docs(query="installation setup")
|
|
||||||
</example>
|
|
||||||
|
|
||||||
<example>
|
|
||||||
user: "What connectors does SurfSense support?"
|
|
||||||
→ search_surfsense_docs(query="available connectors integrations")
|
|
||||||
</example>
|
|
||||||
|
|
||||||
<example>
|
|
||||||
user: "How do I set up the Notion connector?"
|
|
||||||
→ search_surfsense_docs(query="Notion connector setup configuration")
|
|
||||||
(Changing data inside Notion itself → `task(notion, …)`, not this tool.)
|
|
||||||
</example>
|
|
||||||
|
|
@ -28,7 +28,6 @@ from __future__ import annotations
|
||||||
|
|
||||||
from datetime import UTC, datetime
|
from datetime import UTC, datetime
|
||||||
|
|
||||||
|
|
||||||
_HEADER = """\
|
_HEADER = """\
|
||||||
You are the SurfSense automation drafter. Convert the user intent below
|
You are the SurfSense automation drafter. Convert the user intent below
|
||||||
into a SINGLE JSON object matching the AutomationCreate schema. Output
|
into a SINGLE JSON object matching the AutomationCreate schema. Output
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,6 @@ Connector integrations, MCP, deliverables, etc. are delegated via ``task`` subag
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED: tuple[str, ...] = (
|
MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED: tuple[str, ...] = (
|
||||||
"search_surfsense_docs",
|
|
||||||
"web_search",
|
"web_search",
|
||||||
"scrape_webpage",
|
"scrape_webpage",
|
||||||
"update_memory",
|
"update_memory",
|
||||||
|
|
|
||||||
|
|
@ -404,9 +404,7 @@ def build_task_tool_with_parent_config(
|
||||||
continue
|
continue
|
||||||
messages = payload.get("messages") or []
|
messages = payload.get("messages") or []
|
||||||
last_text = _safe_message_text(messages[-1]).rstrip() if messages else ""
|
last_text = _safe_message_text(messages[-1]).rstrip() if messages else ""
|
||||||
message_blocks.append(
|
message_blocks.append(f"[task {task_index}] {last_text or '<empty>'}")
|
||||||
f"[task {task_index}] {last_text or '<empty>'}"
|
|
||||||
)
|
|
||||||
try:
|
try:
|
||||||
child_trace = _build_tool_trace(messages)
|
child_trace = _build_tool_trace(messages)
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|
|
||||||
|
|
@ -117,9 +117,7 @@ def create_generate_podcast_tool(
|
||||||
"podcast_id": podcast_id,
|
"podcast_id": podcast_id,
|
||||||
"title": podcast_title,
|
"title": podcast_title,
|
||||||
"file_location": file_location,
|
"file_location": file_location,
|
||||||
"message": (
|
"message": ("Podcast generated and saved to your podcast panel."),
|
||||||
"Podcast generated and saved to your podcast panel."
|
|
||||||
),
|
|
||||||
}
|
}
|
||||||
return with_receipt(
|
return with_receipt(
|
||||||
payload=payload,
|
payload=payload,
|
||||||
|
|
|
||||||
|
|
@ -126,8 +126,7 @@ def create_generate_video_presentation_tool(
|
||||||
elapsed,
|
elapsed,
|
||||||
)
|
)
|
||||||
err = (
|
err = (
|
||||||
"Background worker reported FAILED status for this "
|
"Background worker reported FAILED status for this video presentation."
|
||||||
"video presentation."
|
|
||||||
)
|
)
|
||||||
payload = {
|
payload = {
|
||||||
"status": VideoPresentationStatus.FAILED.value,
|
"status": VideoPresentationStatus.FAILED.value,
|
||||||
|
|
@ -151,9 +150,7 @@ def create_generate_video_presentation_tool(
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_message = str(e)
|
error_message = str(e)
|
||||||
logger.exception(
|
logger.exception("[generate_video_presentation] Error: %s", error_message)
|
||||||
"[generate_video_presentation] Error: %s", error_message
|
|
||||||
)
|
|
||||||
payload = {
|
payload = {
|
||||||
"status": VideoPresentationStatus.FAILED.value,
|
"status": VideoPresentationStatus.FAILED.value,
|
||||||
"error": error_message,
|
"error": error_message,
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,6 @@ Gather and synthesize evidence using SurfSense research tools with clear citatio
|
||||||
<available_tools>
|
<available_tools>
|
||||||
- `web_search`
|
- `web_search`
|
||||||
- `scrape_webpage`
|
- `scrape_webpage`
|
||||||
- `search_surfsense_docs`
|
|
||||||
</available_tools>
|
</available_tools>
|
||||||
|
|
||||||
<tool_policy>
|
<tool_policy>
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,9 @@
|
||||||
"""Research-stage tools: web search, scrape, and in-product doc search."""
|
"""Research-stage tools: web search and scrape."""
|
||||||
|
|
||||||
from .scrape_webpage import create_scrape_webpage_tool
|
from .scrape_webpage import create_scrape_webpage_tool
|
||||||
from .search_surfsense_docs import create_search_surfsense_docs_tool
|
|
||||||
from .web_search import create_web_search_tool
|
from .web_search import create_web_search_tool
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"create_scrape_webpage_tool",
|
"create_scrape_webpage_tool",
|
||||||
"create_search_surfsense_docs_tool",
|
|
||||||
"create_web_search_tool",
|
"create_web_search_tool",
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,6 @@ from langchain_core.tools import BaseTool
|
||||||
from app.agents.new_chat.permissions import Ruleset
|
from app.agents.new_chat.permissions import Ruleset
|
||||||
|
|
||||||
from .scrape_webpage import create_scrape_webpage_tool
|
from .scrape_webpage import create_scrape_webpage_tool
|
||||||
from .search_surfsense_docs import create_search_surfsense_docs_tool
|
|
||||||
from .web_search import create_web_search_tool
|
from .web_search import create_web_search_tool
|
||||||
|
|
||||||
NAME = "research"
|
NAME = "research"
|
||||||
|
|
@ -27,5 +26,4 @@ def load_tools(
|
||||||
available_connectors=d.get("available_connectors"),
|
available_connectors=d.get("available_connectors"),
|
||||||
),
|
),
|
||||||
create_scrape_webpage_tool(firecrawl_api_key=d.get("firecrawl_api_key")),
|
create_scrape_webpage_tool(firecrawl_api_key=d.get("firecrawl_api_key")),
|
||||||
create_search_surfsense_docs_tool(db_session=d["db_session"]),
|
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -1,145 +0,0 @@
|
||||||
"""Semantic search over pre-indexed in-app documentation chunks for user how-to questions."""
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import json
|
|
||||||
|
|
||||||
from langchain_core.tools import tool
|
|
||||||
from sqlalchemy import select
|
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
|
||||||
|
|
||||||
from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument
|
|
||||||
from app.utils.document_converters import embed_text
|
|
||||||
from app.utils.surfsense_docs import surfsense_docs_public_url
|
|
||||||
|
|
||||||
|
|
||||||
def format_surfsense_docs_results(results: list[tuple]) -> str:
|
|
||||||
"""Format (chunk, document) rows as XML with ``doc-`` chunk IDs for citations and UI routing."""
|
|
||||||
if not results:
|
|
||||||
return "No relevant Surfsense documentation found for your query."
|
|
||||||
|
|
||||||
# Group chunks by document
|
|
||||||
grouped: dict[int, dict] = {}
|
|
||||||
for chunk, doc in results:
|
|
||||||
public_url = surfsense_docs_public_url(doc.source)
|
|
||||||
if doc.id not in grouped:
|
|
||||||
grouped[doc.id] = {
|
|
||||||
"document_id": f"doc-{doc.id}",
|
|
||||||
"document_type": "SURFSENSE_DOCS",
|
|
||||||
"title": doc.title,
|
|
||||||
"url": public_url,
|
|
||||||
"metadata": {"source": doc.source, "public_url": public_url},
|
|
||||||
"chunks": [],
|
|
||||||
}
|
|
||||||
grouped[doc.id]["chunks"].append(
|
|
||||||
{
|
|
||||||
"chunk_id": f"doc-{chunk.id}",
|
|
||||||
"content": chunk.content,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Render XML matching format_documents_for_context structure
|
|
||||||
parts: list[str] = []
|
|
||||||
for g in grouped.values():
|
|
||||||
metadata_json = json.dumps(g["metadata"], ensure_ascii=False)
|
|
||||||
|
|
||||||
parts.append("<document>")
|
|
||||||
parts.append("<document_metadata>")
|
|
||||||
parts.append(f" <document_id>{g['document_id']}</document_id>")
|
|
||||||
parts.append(f" <document_type>{g['document_type']}</document_type>")
|
|
||||||
parts.append(f" <title><![CDATA[{g['title']}]]></title>")
|
|
||||||
parts.append(f" <url><![CDATA[{g['url']}]]></url>")
|
|
||||||
parts.append(f" <metadata_json><![CDATA[{metadata_json}]]></metadata_json>")
|
|
||||||
parts.append("</document_metadata>")
|
|
||||||
parts.append("")
|
|
||||||
parts.append("<document_content>")
|
|
||||||
|
|
||||||
for ch in g["chunks"]:
|
|
||||||
parts.append(
|
|
||||||
f" <chunk id='{ch['chunk_id']}'><![CDATA[{ch['content']}]]></chunk>"
|
|
||||||
)
|
|
||||||
|
|
||||||
parts.append("</document_content>")
|
|
||||||
parts.append("</document>")
|
|
||||||
parts.append("")
|
|
||||||
|
|
||||||
return "\n".join(parts).strip()
|
|
||||||
|
|
||||||
|
|
||||||
async def search_surfsense_docs_async(
|
|
||||||
query: str,
|
|
||||||
db_session: AsyncSession,
|
|
||||||
top_k: int = 10,
|
|
||||||
) -> str:
|
|
||||||
"""
|
|
||||||
Search Surfsense documentation using vector similarity.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
query: The search query about Surfsense usage
|
|
||||||
db_session: Database session for executing queries
|
|
||||||
top_k: Number of results to return
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Formatted string with relevant documentation content
|
|
||||||
"""
|
|
||||||
# Get embedding for the query
|
|
||||||
query_embedding = await asyncio.to_thread(embed_text, query)
|
|
||||||
|
|
||||||
# Vector similarity search on chunks, joining with documents
|
|
||||||
stmt = (
|
|
||||||
select(SurfsenseDocsChunk, SurfsenseDocsDocument)
|
|
||||||
.join(
|
|
||||||
SurfsenseDocsDocument,
|
|
||||||
SurfsenseDocsChunk.document_id == SurfsenseDocsDocument.id,
|
|
||||||
)
|
|
||||||
.order_by(SurfsenseDocsChunk.embedding.op("<=>")(query_embedding))
|
|
||||||
.limit(top_k)
|
|
||||||
)
|
|
||||||
|
|
||||||
result = await db_session.execute(stmt)
|
|
||||||
rows = result.all()
|
|
||||||
|
|
||||||
return format_surfsense_docs_results(rows)
|
|
||||||
|
|
||||||
|
|
||||||
def create_search_surfsense_docs_tool(db_session: AsyncSession):
|
|
||||||
"""
|
|
||||||
Factory function to create the search_surfsense_docs tool.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
db_session: Database session for executing queries
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A configured tool function for searching Surfsense documentation
|
|
||||||
"""
|
|
||||||
|
|
||||||
@tool
|
|
||||||
async def search_surfsense_docs(query: str, top_k: int = 10) -> str:
|
|
||||||
"""
|
|
||||||
Search Surfsense documentation for help with using the application.
|
|
||||||
|
|
||||||
Use this tool when the user asks questions about:
|
|
||||||
- How to use Surfsense features
|
|
||||||
- Installation and setup instructions
|
|
||||||
- Configuration options and settings
|
|
||||||
- Troubleshooting common issues
|
|
||||||
- Available connectors and integrations
|
|
||||||
- Browser extension usage
|
|
||||||
- API documentation
|
|
||||||
|
|
||||||
This searches the official Surfsense documentation that was indexed
|
|
||||||
at deployment time. It does NOT search the user's personal knowledge base.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
query: The search query about Surfsense usage or features
|
|
||||||
top_k: Number of documentation chunks to retrieve (default: 10)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Relevant documentation content formatted with chunk IDs for citations
|
|
||||||
"""
|
|
||||||
return await search_surfsense_docs_async(
|
|
||||||
query=query,
|
|
||||||
db_session=db_session,
|
|
||||||
top_k=top_k,
|
|
||||||
)
|
|
||||||
|
|
||||||
return search_surfsense_docs
|
|
||||||
|
|
@ -104,7 +104,7 @@ class AgentFeatureFlags:
|
||||||
# ``tools/google_drive``, ``tools/dropbox``, ``tools/onedrive``,
|
# ``tools/google_drive``, ``tools/dropbox``, ``tools/onedrive``,
|
||||||
# ``tools/google_calendar``, ``tools/confluence``, ``tools/discord``,
|
# ``tools/google_calendar``, ``tools/confluence``, ``tools/discord``,
|
||||||
# ``tools/teams``, ``tools/luma``, ``connected_accounts``,
|
# ``tools/teams``, ``tools/luma``, ``connected_accounts``,
|
||||||
# ``update_memory``, ``search_surfsense_docs``) now acquire fresh
|
# ``update_memory``) now acquire fresh
|
||||||
# short-lived ``AsyncSession`` instances per call via
|
# short-lived ``AsyncSession`` instances per call via
|
||||||
# :data:`async_session_maker`. The factory still accepts ``db_session``
|
# :data:`async_session_maker`. The factory still accepts ``db_session``
|
||||||
# for registry compatibility but ``del``'s it immediately — see any
|
# for registry compatibility but ``del``'s it immediately — see any
|
||||||
|
|
|
||||||
|
|
@ -73,9 +73,8 @@ class ResolvedMentionSet:
|
||||||
``@Project Roadmap`` is never shadowed by a shorter prefix
|
``@Project Roadmap`` is never shadowed by a shorter prefix
|
||||||
``@Project``).
|
``@Project``).
|
||||||
|
|
||||||
``mentioned_document_ids`` collapses doc + surfsense_doc chips into
|
``mentioned_document_ids`` is an ordered, deduped list consumed by
|
||||||
a single ordered, deduped list because the priority middleware
|
the priority middleware downstream — see
|
||||||
treats them uniformly downstream — see
|
|
||||||
``KnowledgePriorityMiddleware._compute_priority_paths``.
|
``KnowledgePriorityMiddleware._compute_priority_paths``.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
@ -103,7 +102,6 @@ async def resolve_mentions(
|
||||||
search_space_id: int,
|
search_space_id: int,
|
||||||
mentioned_documents: list[MentionedDocumentInfo] | None,
|
mentioned_documents: list[MentionedDocumentInfo] | None,
|
||||||
mentioned_document_ids: list[int] | None = None,
|
mentioned_document_ids: list[int] | None = None,
|
||||||
mentioned_surfsense_doc_ids: list[int] | None = None,
|
|
||||||
mentioned_folder_ids: list[int] | None = None,
|
mentioned_folder_ids: list[int] | None = None,
|
||||||
) -> ResolvedMentionSet:
|
) -> ResolvedMentionSet:
|
||||||
"""Resolve every @-mention chip on a turn into virtual paths.
|
"""Resolve every @-mention chip on a turn into virtual paths.
|
||||||
|
|
@ -111,8 +109,7 @@ async def resolve_mentions(
|
||||||
The function takes both the ``mentioned_documents`` discriminated
|
The function takes both the ``mentioned_documents`` discriminated
|
||||||
list (chip metadata used for substitution + persistence) and the
|
list (chip metadata used for substitution + persistence) and the
|
||||||
parallel id arrays (``mentioned_document_ids``,
|
parallel id arrays (``mentioned_document_ids``,
|
||||||
``mentioned_surfsense_doc_ids``, ``mentioned_folder_ids``) for two
|
``mentioned_folder_ids``) for two reasons:
|
||||||
reasons:
|
|
||||||
|
|
||||||
* Legacy clients that haven't migrated to the unified chip list
|
* Legacy clients that haven't migrated to the unified chip list
|
||||||
still send the id arrays — we treat the union as authoritative.
|
still send the id arrays — we treat the union as authoritative.
|
||||||
|
|
@ -142,7 +139,6 @@ async def resolve_mentions(
|
||||||
dict.fromkeys(
|
dict.fromkeys(
|
||||||
[
|
[
|
||||||
*(mentioned_document_ids or []),
|
*(mentioned_document_ids or []),
|
||||||
*(mentioned_surfsense_doc_ids or []),
|
|
||||||
*chip_doc_ids,
|
*chip_doc_ids,
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -59,14 +59,13 @@ Do NOT cite document_id. Always use the chunk id.
|
||||||
- NEVER create your own citation format - use the exact chunk_id values from the documents in the [citation:chunk_id] format
|
- NEVER create your own citation format - use the exact chunk_id values from the documents in the [citation:chunk_id] format
|
||||||
- NEVER format citations as clickable links or as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only
|
- NEVER format citations as clickable links or as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only
|
||||||
- NEVER make up chunk IDs if you are unsure about the chunk_id. It is better to omit the citation than to guess
|
- NEVER make up chunk IDs if you are unsure about the chunk_id. It is better to omit the citation than to guess
|
||||||
- Copy the EXACT chunk id from the XML - if it says `<chunk id='doc-123'>`, use [citation:doc-123]
|
- Copy the EXACT chunk id from the XML - if it says `<chunk id='5'>`, use [citation:5]
|
||||||
- If the chunk id is a URL like `<chunk id='https://example.com/page'>`, use [citation:https://example.com/page]
|
- If the chunk id is a URL like `<chunk id='https://example.com/page'>`, use [citation:https://example.com/page]
|
||||||
</citation_format>
|
</citation_format>
|
||||||
|
|
||||||
<citation_examples>
|
<citation_examples>
|
||||||
CORRECT citation formats:
|
CORRECT citation formats:
|
||||||
- [citation:5] (numeric chunk ID from knowledge base)
|
- [citation:5] (numeric chunk ID from knowledge base)
|
||||||
- [citation:doc-123] (for Surfsense documentation chunks)
|
|
||||||
- [citation:https://example.com/article] (URL chunk ID from web search results)
|
- [citation:https://example.com/article] (URL chunk ID from web search results)
|
||||||
- [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3] (multiple citations)
|
- [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3] (multiple citations)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE:
|
||||||
2. Ask the user: "Would you like me to answer from my general knowledge instead?"
|
2. Ask the user: "Would you like me to answer from my general knowledge instead?"
|
||||||
3. ONLY provide a general-knowledge answer AFTER the user explicitly says yes.
|
3. ONLY provide a general-knowledge answer AFTER the user explicitly says yes.
|
||||||
- This policy does NOT apply to:
|
- This policy does NOT apply to:
|
||||||
* Casual conversation, greetings, or meta-questions about SurfSense itself (e.g., "what can you do?")
|
* Casual conversation, greetings, or meta-questions about SurfSense itself (e.g., "what can you do?"). For "how do I use SurfSense" / product-documentation questions, point the user to https://www.surfsense.com/docs.
|
||||||
* Formatting, summarization, or analysis of content already present in the conversation
|
* Formatting, summarization, or analysis of content already present in the conversation
|
||||||
* Following user instructions that are clearly task-oriented (e.g., "rewrite this in bullet points")
|
* Following user instructions that are clearly task-oriented (e.g., "rewrite this in bullet points")
|
||||||
* Tool-usage actions like generating reports, podcasts, images, or scraping webpages
|
* Tool-usage actions like generating reports, podcasts, images, or scraping webpages
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE:
|
||||||
2. Ask: "Would you like me to answer from my general knowledge instead?"
|
2. Ask: "Would you like me to answer from my general knowledge instead?"
|
||||||
3. ONLY provide a general-knowledge answer AFTER a team member explicitly says yes.
|
3. ONLY provide a general-knowledge answer AFTER a team member explicitly says yes.
|
||||||
- This policy does NOT apply to:
|
- This policy does NOT apply to:
|
||||||
* Casual conversation, greetings, or meta-questions about SurfSense itself (e.g., "what can you do?")
|
* Casual conversation, greetings, or meta-questions about SurfSense itself (e.g., "what can you do?"). For "how do I use SurfSense" / product-documentation questions, point the user to https://www.surfsense.com/docs.
|
||||||
* Formatting, summarization, or analysis of content already present in the conversation
|
* Formatting, summarization, or analysis of content already present in the conversation
|
||||||
* Following user instructions that are clearly task-oriented (e.g., "rewrite this in bullet points")
|
* Following user instructions that are clearly task-oriented (e.g., "rewrite this in bullet points")
|
||||||
* Tool-usage actions like generating reports, podcasts, images, or scraping webpages
|
* Tool-usage actions like generating reports, podcasts, images, or scraping webpages
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,7 @@ When to use which tool:
|
||||||
- Knowledge base content (Notion, GitHub, files, notes) → automatically searched
|
- Knowledge base content (Notion, GitHub, files, notes) → automatically searched
|
||||||
- Real-time public web data → call web_search
|
- Real-time public web data → call web_search
|
||||||
- Reading a specific webpage → call scrape_webpage
|
- Reading a specific webpage → call scrape_webpage
|
||||||
|
- SurfSense product / how-to questions (setup, configuration, connectors, feature behavior) → point the user to the documentation: https://www.surfsense.com/docs
|
||||||
|
|
||||||
**`task` subagents (when to delegate):**
|
**`task` subagents (when to delegate):**
|
||||||
- **`linear_specialist`** — Linear-only investigations and tool use.
|
- **`linear_specialist`** — Linear-only investigations and tool use.
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,7 @@ When to use which tool:
|
||||||
- Knowledge base content (Notion, GitHub, files, notes) → automatically searched
|
- Knowledge base content (Notion, GitHub, files, notes) → automatically searched
|
||||||
- Real-time public web data → call web_search
|
- Real-time public web data → call web_search
|
||||||
- Reading a specific webpage → call scrape_webpage
|
- Reading a specific webpage → call scrape_webpage
|
||||||
|
- SurfSense product / how-to questions (setup, configuration, connectors, feature behavior) → point the user to the documentation: https://www.surfsense.com/docs
|
||||||
|
|
||||||
**`task` subagents (when to delegate):**
|
**`task` subagents (when to delegate):**
|
||||||
- **`linear_specialist`** — Linear-only investigations and tool use.
|
- **`linear_specialist`** — Linear-only investigations and tool use.
|
||||||
|
|
|
||||||
|
|
@ -151,7 +151,6 @@ def _read_fragment(subpath: str) -> str:
|
||||||
# Ordered for reading flow: fundamentals first, then artifact generators,
|
# Ordered for reading flow: fundamentals first, then artifact generators,
|
||||||
# then memory at the end (mirrors the legacy ``_ALL_TOOL_NAMES_ORDERED``).
|
# then memory at the end (mirrors the legacy ``_ALL_TOOL_NAMES_ORDERED``).
|
||||||
ALL_TOOL_NAMES_ORDERED: tuple[str, ...] = (
|
ALL_TOOL_NAMES_ORDERED: tuple[str, ...] = (
|
||||||
"search_surfsense_docs",
|
|
||||||
"web_search",
|
"web_search",
|
||||||
"generate_podcast",
|
"generate_podcast",
|
||||||
"generate_video_presentation",
|
"generate_video_presentation",
|
||||||
|
|
|
||||||
|
|
@ -1,9 +0,0 @@
|
||||||
|
|
||||||
- User: "How do I install SurfSense?"
|
|
||||||
- Call: `search_surfsense_docs(query="installation setup")`
|
|
||||||
- User: "What connectors does SurfSense support?"
|
|
||||||
- Call: `search_surfsense_docs(query="available connectors integrations")`
|
|
||||||
- User: "How do I set up the Notion connector?"
|
|
||||||
- Call: `search_surfsense_docs(query="Notion connector setup configuration")`
|
|
||||||
- User: "How do I use Docker to run SurfSense?"
|
|
||||||
- Call: `search_surfsense_docs(query="Docker installation setup")`
|
|
||||||
|
|
@ -1,7 +0,0 @@
|
||||||
|
|
||||||
- search_surfsense_docs: Search the official SurfSense documentation.
|
|
||||||
- Use this tool when the user asks anything about SurfSense itself (the application they are using).
|
|
||||||
- Args:
|
|
||||||
- query: The search query about SurfSense
|
|
||||||
- top_k: Number of documentation chunks to retrieve (default: 10)
|
|
||||||
- Returns: Documentation content with chunk IDs for citations (prefixed with 'doc-', e.g., [citation:doc-123])
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
---
|
---
|
||||||
name: email-drafting
|
name: email-drafting
|
||||||
description: Draft an email matching the user's voice, with structured intent and CTA
|
description: Draft an email matching the user's voice, with structured intent and CTA
|
||||||
allowed-tools: search_surfsense_docs
|
|
||||||
---
|
---
|
||||||
|
|
||||||
# Email drafting
|
# Email drafting
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
---
|
---
|
||||||
name: kb-research
|
name: kb-research
|
||||||
description: Structured approach to finding and synthesizing information from the user's knowledge base
|
description: Structured approach to finding and synthesizing information from the user's knowledge base
|
||||||
allowed-tools: search_surfsense_docs, scrape_webpage, read_file, ls_tree, grep, web_search
|
allowed-tools: scrape_webpage, read_file, ls_tree, grep, web_search
|
||||||
---
|
---
|
||||||
|
|
||||||
# Knowledge-base research
|
# Knowledge-base research
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
---
|
---
|
||||||
name: meeting-prep
|
name: meeting-prep
|
||||||
description: Pull together briefing materials before a scheduled meeting
|
description: Pull together briefing materials before a scheduled meeting
|
||||||
allowed-tools: search_surfsense_docs, web_search, scrape_webpage, read_file
|
allowed-tools: web_search, scrape_webpage, read_file
|
||||||
---
|
---
|
||||||
|
|
||||||
# Meeting preparation
|
# Meeting preparation
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
---
|
---
|
||||||
name: report-writing
|
name: report-writing
|
||||||
description: How to scope, draft, and revise a Markdown report artifact via generate_report
|
description: How to scope, draft, and revise a Markdown report artifact via generate_report
|
||||||
allowed-tools: generate_report, search_surfsense_docs, read_file
|
allowed-tools: generate_report, read_file
|
||||||
---
|
---
|
||||||
|
|
||||||
# Report writing
|
# Report writing
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
---
|
---
|
||||||
name: slack-summary
|
name: slack-summary
|
||||||
description: Distill a Slack channel or thread into actionable summary
|
description: Distill a Slack channel or thread into actionable summary
|
||||||
allowed-tools: search_surfsense_docs
|
|
||||||
---
|
---
|
||||||
|
|
||||||
# Slack summarization
|
# Slack summarization
|
||||||
|
|
|
||||||
|
|
@ -46,7 +46,6 @@ logger = logging.getLogger(__name__)
|
||||||
# ``glob``, ``grep``) plus the SurfSense-side read tools.
|
# ``glob``, ``grep``) plus the SurfSense-side read tools.
|
||||||
EXPLORE_READ_TOOLS: frozenset[str] = frozenset(
|
EXPLORE_READ_TOOLS: frozenset[str] = frozenset(
|
||||||
{
|
{
|
||||||
"search_surfsense_docs",
|
|
||||||
"web_search",
|
"web_search",
|
||||||
"scrape_webpage",
|
"scrape_webpage",
|
||||||
"read_file",
|
"read_file",
|
||||||
|
|
@ -61,7 +60,6 @@ EXPLORE_READ_TOOLS: frozenset[str] = frozenset(
|
||||||
# is needed, the parent should hand off to ``explore`` first.
|
# is needed, the parent should hand off to ``explore`` first.
|
||||||
REPORT_WRITER_TOOLS: frozenset[str] = frozenset(
|
REPORT_WRITER_TOOLS: frozenset[str] = frozenset(
|
||||||
{
|
{
|
||||||
"search_surfsense_docs",
|
|
||||||
"read_file",
|
"read_file",
|
||||||
"generate_report",
|
"generate_report",
|
||||||
}
|
}
|
||||||
|
|
@ -222,7 +220,6 @@ EXPLORE_SYSTEM_PROMPT = """You are the **explore** subagent for SurfSense.
|
||||||
Conduct read-only research across the user's knowledge base, the web, and any documents the parent agent has surfaced. Return a synthesized answer with explicit citations — never speculate beyond the sources you have actually inspected.
|
Conduct read-only research across the user's knowledge base, the web, and any documents the parent agent has surfaced. Return a synthesized answer with explicit citations — never speculate beyond the sources you have actually inspected.
|
||||||
|
|
||||||
## Tools available
|
## Tools available
|
||||||
- `search_surfsense_docs` — fast hybrid search over the user's knowledge base.
|
|
||||||
- `web_search` — only when the user's KB clearly does not contain the answer.
|
- `web_search` — only when the user's KB clearly does not contain the answer.
|
||||||
- `scrape_webpage` — to read a URL the user or the search results provided.
|
- `scrape_webpage` — to read a URL the user or the search results provided.
|
||||||
- `read_file`, `ls`, `glob`, `grep` — to inspect specific documents or trees the parent has flagged.
|
- `read_file`, `ls`, `glob`, `grep` — to inspect specific documents or trees the parent has flagged.
|
||||||
|
|
@ -242,7 +239,7 @@ Produce a single high-quality report deliverable using `generate_report`. The pa
|
||||||
|
|
||||||
## Workflow
|
## Workflow
|
||||||
1. **Outline first.** Before calling `generate_report`, write a one-paragraph outline of the sections you plan to produce. Confirm the outline reflects the parent's instructions.
|
1. **Outline first.** Before calling `generate_report`, write a one-paragraph outline of the sections you plan to produce. Confirm the outline reflects the parent's instructions.
|
||||||
2. **Source resolution.** Decide whether to call `search_surfsense_docs` and `read_file` for any final-checks, or whether the parent's earlier tool calls already cover the source set.
|
2. **Source resolution.** Decide whether to call `read_file` for any final-checks, or whether the parent's earlier tool calls already cover the source set.
|
||||||
3. **One report.** Call `generate_report` exactly once with `source_strategy` chosen per the topic and chat history (see the `report-writing` skill).
|
3. **One report.** Call `generate_report` exactly once with `source_strategy` chosen per the topic and chat history (see the `report-writing` skill).
|
||||||
4. **Confirm.** End with a one-sentence summary in your final message — never paste the report back into chat; the artifact card renders itself.
|
4. **Confirm.** End with a one-sentence summary in your final message — never paste the report back into chat; the artifact card renders itself.
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,6 @@ This module contains all the tools available to the SurfSense agent.
|
||||||
To add a new tool, see the documentation in registry.py.
|
To add a new tool, see the documentation in registry.py.
|
||||||
|
|
||||||
Available tools:
|
Available tools:
|
||||||
- search_surfsense_docs: Search Surfsense documentation for usage help
|
|
||||||
- generate_podcast: Generate audio podcasts from content
|
- generate_podcast: Generate audio podcasts from content
|
||||||
- generate_video_presentation: Generate video presentations with slides and narration
|
- generate_video_presentation: Generate video presentations with slides and narration
|
||||||
- generate_image: Generate images from text descriptions using AI models
|
- generate_image: Generate images from text descriptions using AI models
|
||||||
|
|
@ -31,7 +30,6 @@ from .registry import (
|
||||||
get_tool_by_name,
|
get_tool_by_name,
|
||||||
)
|
)
|
||||||
from .scrape_webpage import create_scrape_webpage_tool
|
from .scrape_webpage import create_scrape_webpage_tool
|
||||||
from .search_surfsense_docs import create_search_surfsense_docs_tool
|
|
||||||
from .update_memory import create_update_memory_tool, create_update_team_memory_tool
|
from .update_memory import create_update_memory_tool, create_update_team_memory_tool
|
||||||
from .video_presentation import create_generate_video_presentation_tool
|
from .video_presentation import create_generate_video_presentation_tool
|
||||||
|
|
||||||
|
|
@ -47,7 +45,6 @@ __all__ = [
|
||||||
"create_generate_podcast_tool",
|
"create_generate_podcast_tool",
|
||||||
"create_generate_video_presentation_tool",
|
"create_generate_video_presentation_tool",
|
||||||
"create_scrape_webpage_tool",
|
"create_scrape_webpage_tool",
|
||||||
"create_search_surfsense_docs_tool",
|
|
||||||
"create_update_memory_tool",
|
"create_update_memory_tool",
|
||||||
"create_update_team_memory_tool",
|
"create_update_team_memory_tool",
|
||||||
"format_documents_for_context",
|
"format_documents_for_context",
|
||||||
|
|
|
||||||
|
|
@ -131,9 +131,7 @@ def create_generate_podcast_tool(
|
||||||
"podcast_id": podcast_id,
|
"podcast_id": podcast_id,
|
||||||
"title": podcast_title,
|
"title": podcast_title,
|
||||||
"file_location": file_location,
|
"file_location": file_location,
|
||||||
"message": (
|
"message": ("Podcast generated and saved to your podcast panel."),
|
||||||
"Podcast generated and saved to your podcast panel."
|
|
||||||
),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Only other terminal state is FAILED.
|
# Only other terminal state is FAILED.
|
||||||
|
|
@ -146,9 +144,7 @@ def create_generate_podcast_tool(
|
||||||
"status": PodcastStatus.FAILED.value,
|
"status": PodcastStatus.FAILED.value,
|
||||||
"podcast_id": podcast_id,
|
"podcast_id": podcast_id,
|
||||||
"title": podcast_title,
|
"title": podcast_title,
|
||||||
"error": (
|
"error": ("Background worker reported FAILED status for this podcast."),
|
||||||
"Background worker reported FAILED status for this podcast."
|
|
||||||
),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
||||||
|
|
@ -101,7 +101,6 @@ from .podcast import create_generate_podcast_tool
|
||||||
from .report import create_generate_report_tool
|
from .report import create_generate_report_tool
|
||||||
from .resume import create_generate_resume_tool
|
from .resume import create_generate_resume_tool
|
||||||
from .scrape_webpage import create_scrape_webpage_tool
|
from .scrape_webpage import create_scrape_webpage_tool
|
||||||
from .search_surfsense_docs import create_search_surfsense_docs_tool
|
|
||||||
from .teams import (
|
from .teams import (
|
||||||
create_list_teams_channels_tool,
|
create_list_teams_channels_tool,
|
||||||
create_read_teams_messages_tool,
|
create_read_teams_messages_tool,
|
||||||
|
|
@ -258,15 +257,6 @@ BUILTIN_TOOLS: list[ToolDefinition] = [
|
||||||
),
|
),
|
||||||
requires=[],
|
requires=[],
|
||||||
),
|
),
|
||||||
# Surfsense documentation search tool
|
|
||||||
ToolDefinition(
|
|
||||||
name="search_surfsense_docs",
|
|
||||||
description="Search Surfsense documentation for help with using the application",
|
|
||||||
factory=lambda deps: create_search_surfsense_docs_tool(
|
|
||||||
db_session=deps["db_session"],
|
|
||||||
),
|
|
||||||
requires=["db_session"],
|
|
||||||
),
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# SERVICE ACCOUNT DISCOVERY
|
# SERVICE ACCOUNT DISCOVERY
|
||||||
# Generic tool for the LLM to discover connected accounts and resolve
|
# Generic tool for the LLM to discover connected accounts and resolve
|
||||||
|
|
|
||||||
|
|
@ -1,174 +0,0 @@
|
||||||
"""
|
|
||||||
Surfsense documentation search tool.
|
|
||||||
|
|
||||||
This tool allows the agent to search the pre-indexed Surfsense documentation
|
|
||||||
to help users with questions about how to use the application.
|
|
||||||
|
|
||||||
The documentation is indexed at deployment time from MDX files and stored
|
|
||||||
in dedicated tables (surfsense_docs_documents, surfsense_docs_chunks).
|
|
||||||
"""
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import json
|
|
||||||
|
|
||||||
from langchain_core.tools import tool
|
|
||||||
from sqlalchemy import select
|
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
|
||||||
|
|
||||||
from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument, async_session_maker
|
|
||||||
from app.utils.document_converters import embed_text
|
|
||||||
from app.utils.surfsense_docs import surfsense_docs_public_url
|
|
||||||
|
|
||||||
|
|
||||||
def format_surfsense_docs_results(results: list[tuple]) -> str:
|
|
||||||
"""
|
|
||||||
Format search results into XML structure for the LLM context.
|
|
||||||
|
|
||||||
Uses the same XML structure as format_documents_for_context from knowledge_base.py
|
|
||||||
but with 'doc-' prefix on chunk IDs. This allows:
|
|
||||||
- LLM to use consistent [citation:doc-XXX] format
|
|
||||||
- Frontend to detect 'doc-' prefix and route to surfsense docs endpoint
|
|
||||||
|
|
||||||
Args:
|
|
||||||
results: List of (chunk, document) tuples from the database query
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Formatted XML string with documentation content and citation-ready chunks
|
|
||||||
"""
|
|
||||||
if not results:
|
|
||||||
return "No relevant Surfsense documentation found for your query."
|
|
||||||
|
|
||||||
# Group chunks by document
|
|
||||||
grouped: dict[int, dict] = {}
|
|
||||||
for chunk, doc in results:
|
|
||||||
public_url = surfsense_docs_public_url(doc.source)
|
|
||||||
if doc.id not in grouped:
|
|
||||||
grouped[doc.id] = {
|
|
||||||
"document_id": f"doc-{doc.id}",
|
|
||||||
"document_type": "SURFSENSE_DOCS",
|
|
||||||
"title": doc.title,
|
|
||||||
"url": public_url,
|
|
||||||
"metadata": {"source": doc.source, "public_url": public_url},
|
|
||||||
"chunks": [],
|
|
||||||
}
|
|
||||||
grouped[doc.id]["chunks"].append(
|
|
||||||
{
|
|
||||||
"chunk_id": f"doc-{chunk.id}",
|
|
||||||
"content": chunk.content,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Render XML matching format_documents_for_context structure
|
|
||||||
parts: list[str] = []
|
|
||||||
for g in grouped.values():
|
|
||||||
metadata_json = json.dumps(g["metadata"], ensure_ascii=False)
|
|
||||||
|
|
||||||
parts.append("<document>")
|
|
||||||
parts.append("<document_metadata>")
|
|
||||||
parts.append(f" <document_id>{g['document_id']}</document_id>")
|
|
||||||
parts.append(f" <document_type>{g['document_type']}</document_type>")
|
|
||||||
parts.append(f" <title><![CDATA[{g['title']}]]></title>")
|
|
||||||
parts.append(f" <url><![CDATA[{g['url']}]]></url>")
|
|
||||||
parts.append(f" <metadata_json><![CDATA[{metadata_json}]]></metadata_json>")
|
|
||||||
parts.append("</document_metadata>")
|
|
||||||
parts.append("")
|
|
||||||
parts.append("<document_content>")
|
|
||||||
|
|
||||||
for ch in g["chunks"]:
|
|
||||||
parts.append(
|
|
||||||
f" <chunk id='{ch['chunk_id']}'><![CDATA[{ch['content']}]]></chunk>"
|
|
||||||
)
|
|
||||||
|
|
||||||
parts.append("</document_content>")
|
|
||||||
parts.append("</document>")
|
|
||||||
parts.append("")
|
|
||||||
|
|
||||||
return "\n".join(parts).strip()
|
|
||||||
|
|
||||||
|
|
||||||
async def search_surfsense_docs_async(
|
|
||||||
query: str,
|
|
||||||
db_session: AsyncSession,
|
|
||||||
top_k: int = 10,
|
|
||||||
) -> str:
|
|
||||||
"""
|
|
||||||
Search Surfsense documentation using vector similarity.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
query: The search query about Surfsense usage
|
|
||||||
db_session: Database session for executing queries
|
|
||||||
top_k: Number of results to return
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Formatted string with relevant documentation content
|
|
||||||
"""
|
|
||||||
# Get embedding for the query
|
|
||||||
query_embedding = await asyncio.to_thread(embed_text, query)
|
|
||||||
|
|
||||||
# Vector similarity search on chunks, joining with documents
|
|
||||||
stmt = (
|
|
||||||
select(SurfsenseDocsChunk, SurfsenseDocsDocument)
|
|
||||||
.join(
|
|
||||||
SurfsenseDocsDocument,
|
|
||||||
SurfsenseDocsChunk.document_id == SurfsenseDocsDocument.id,
|
|
||||||
)
|
|
||||||
.order_by(SurfsenseDocsChunk.embedding.op("<=>")(query_embedding))
|
|
||||||
.limit(top_k)
|
|
||||||
)
|
|
||||||
|
|
||||||
result = await db_session.execute(stmt)
|
|
||||||
rows = result.all()
|
|
||||||
|
|
||||||
return format_surfsense_docs_results(rows)
|
|
||||||
|
|
||||||
|
|
||||||
def create_search_surfsense_docs_tool(db_session: AsyncSession):
|
|
||||||
"""
|
|
||||||
Factory function to create the search_surfsense_docs tool.
|
|
||||||
|
|
||||||
The tool acquires its own short-lived ``AsyncSession`` per call via
|
|
||||||
:data:`async_session_maker` so the closure is safe to share across
|
|
||||||
HTTP requests by the compiled-agent cache. Capturing a per-request
|
|
||||||
session here would surface stale/closed sessions on cache hits.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
db_session: Reserved for registry compatibility. Per-call sessions
|
|
||||||
are opened via :data:`async_session_maker` inside the tool body.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A configured tool function for searching Surfsense documentation
|
|
||||||
"""
|
|
||||||
del db_session # per-call session — see docstring
|
|
||||||
|
|
||||||
@tool
|
|
||||||
async def search_surfsense_docs(query: str, top_k: int = 10) -> str:
|
|
||||||
"""
|
|
||||||
Search Surfsense documentation for help with using the application.
|
|
||||||
|
|
||||||
Use this tool when the user asks questions about:
|
|
||||||
- How to use Surfsense features
|
|
||||||
- Installation and setup instructions
|
|
||||||
- Configuration options and settings
|
|
||||||
- Troubleshooting common issues
|
|
||||||
- Available connectors and integrations
|
|
||||||
- Browser extension usage
|
|
||||||
- API documentation
|
|
||||||
|
|
||||||
This searches the official Surfsense documentation that was indexed
|
|
||||||
at deployment time. It does NOT search the user's personal knowledge base.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
query: The search query about Surfsense usage or features
|
|
||||||
top_k: Number of documentation chunks to retrieve (default: 10)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Relevant documentation content formatted with chunk IDs for citations
|
|
||||||
"""
|
|
||||||
async with async_session_maker() as db_session:
|
|
||||||
return await search_surfsense_docs_async(
|
|
||||||
query=query,
|
|
||||||
db_session=db_session,
|
|
||||||
top_k=top_k,
|
|
||||||
)
|
|
||||||
|
|
||||||
return search_surfsense_docs
|
|
||||||
|
|
@ -127,9 +127,7 @@ def create_generate_video_presentation_tool(
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_message = str(e)
|
error_message = str(e)
|
||||||
logger.exception(
|
logger.exception("[generate_video_presentation] Error: %s", error_message)
|
||||||
"[generate_video_presentation] Error: %s", error_message
|
|
||||||
)
|
|
||||||
return {
|
return {
|
||||||
"status": VideoPresentationStatus.FAILED.value,
|
"status": VideoPresentationStatus.FAILED.value,
|
||||||
"error": error_message,
|
"error": error_message,
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,6 @@ from app.rate_limiter import get_real_client_ip, limiter
|
||||||
from app.routes import router as crud_router
|
from app.routes import router as crud_router
|
||||||
from app.routes.auth_routes import router as auth_router
|
from app.routes.auth_routes import router as auth_router
|
||||||
from app.schemas import UserCreate, UserRead, UserUpdate
|
from app.schemas import UserCreate, UserRead, UserUpdate
|
||||||
from app.tasks.surfsense_docs_indexer import seed_surfsense_docs
|
|
||||||
from app.users import SECRET, auth_backend, current_active_user, fastapi_users
|
from app.users import SECRET, auth_backend, current_active_user, fastapi_users
|
||||||
from app.utils.perf import log_system_snapshot
|
from app.utils.perf import log_system_snapshot
|
||||||
|
|
||||||
|
|
@ -576,13 +575,6 @@ async def lifespan(app: FastAPI):
|
||||||
initialize_llm_router()
|
initialize_llm_router()
|
||||||
initialize_image_gen_router()
|
initialize_image_gen_router()
|
||||||
initialize_vision_llm_router()
|
initialize_vision_llm_router()
|
||||||
try:
|
|
||||||
await asyncio.wait_for(seed_surfsense_docs(), timeout=120)
|
|
||||||
except TimeoutError:
|
|
||||||
logging.getLogger(__name__).warning(
|
|
||||||
"Surfsense docs seeding timed out after 120s — skipping. "
|
|
||||||
"Docs will be indexed on the next restart."
|
|
||||||
)
|
|
||||||
|
|
||||||
# Phase 1.7 — JIT warmup. Bounded so a stuck warmup never delays
|
# Phase 1.7 — JIT warmup. Bounded so a stuck warmup never delays
|
||||||
# worker readiness. ``shield`` so Uvicorn cancelling startup
|
# worker readiness. ``shield`` so Uvicorn cancelling startup
|
||||||
|
|
|
||||||
|
|
@ -21,4 +21,4 @@ __all__ = [
|
||||||
]
|
]
|
||||||
|
|
||||||
# Built-in actions self-register at import time.
|
# Built-in actions self-register at import time.
|
||||||
from . import agent_task # noqa: E402, F401
|
from . import agent_task # noqa: F401
|
||||||
|
|
|
||||||
|
|
@ -12,4 +12,4 @@ from .params import AgentTaskActionParams
|
||||||
__all__ = ["AgentTaskActionParams", "build_handler"]
|
__all__ = ["AgentTaskActionParams", "build_handler"]
|
||||||
|
|
||||||
# Side-effect: register on the actions store.
|
# Side-effect: register on the actions store.
|
||||||
from . import definition # noqa: E402, F401
|
from . import definition # noqa: F401
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,11 @@ def build_handler(ctx: ActionContext) -> ActionHandler:
|
||||||
ctx=ctx,
|
ctx=ctx,
|
||||||
query=validated.query,
|
query=validated.query,
|
||||||
auto_approve_all=validated.auto_approve_all,
|
auto_approve_all=validated.auto_approve_all,
|
||||||
|
mentioned_document_ids=validated.mentioned_document_ids,
|
||||||
|
mentioned_folder_ids=validated.mentioned_folder_ids,
|
||||||
|
mentioned_connector_ids=validated.mentioned_connector_ids,
|
||||||
|
mentioned_connectors=validated.mentioned_connectors,
|
||||||
|
mentioned_documents=validated.mentioned_documents,
|
||||||
)
|
)
|
||||||
|
|
||||||
return handle
|
return handle
|
||||||
|
|
|
||||||
|
|
@ -8,12 +8,15 @@ from typing import Any
|
||||||
|
|
||||||
from langchain_core.messages import HumanMessage
|
from langchain_core.messages import HumanMessage
|
||||||
from langgraph.types import Command
|
from langgraph.types import Command
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.agents.multi_agent_chat import create_multi_agent_chat_deep_agent
|
from app.agents.multi_agent_chat import create_multi_agent_chat_deep_agent
|
||||||
|
from app.agents.new_chat.context import SurfSenseContextSchema
|
||||||
|
from app.agents.new_chat.mention_resolver import resolve_mentions, substitute_in_text
|
||||||
from app.db import ChatVisibility, async_session_maker
|
from app.db import ChatVisibility, async_session_maker
|
||||||
|
from app.schemas.new_chat import MentionedDocumentInfo
|
||||||
|
|
||||||
from ..types import ActionContext
|
from ..types import ActionContext
|
||||||
|
|
||||||
from .auto_decide import build_auto_decisions
|
from .auto_decide import build_auto_decisions
|
||||||
from .dependencies import build_dependencies
|
from .dependencies import build_dependencies
|
||||||
from .finalize import extract_final_assistant_message
|
from .finalize import extract_final_assistant_message
|
||||||
|
|
@ -23,17 +26,118 @@ from .finalize import extract_final_assistant_message
|
||||||
_MAX_RESUMES = 50
|
_MAX_RESUMES = 50
|
||||||
|
|
||||||
|
|
||||||
|
def _build_connector_block(connectors: list[dict[str, Any]]) -> str | None:
|
||||||
|
"""Render the ``<mentioned_connectors>`` context block (same shape as chat).
|
||||||
|
|
||||||
|
Mirrors ``stream_new_chat`` so the agent gets the exact connector accounts
|
||||||
|
the user picked. Returns ``None`` when nothing renders.
|
||||||
|
"""
|
||||||
|
lines: list[str] = []
|
||||||
|
for connector in connectors:
|
||||||
|
connector_id = connector.get("id")
|
||||||
|
connector_type = connector.get("connector_type") or connector.get(
|
||||||
|
"document_type"
|
||||||
|
)
|
||||||
|
account_name = connector.get("account_name") or connector.get("title")
|
||||||
|
if connector_id is None or connector_type is None:
|
||||||
|
continue
|
||||||
|
lines.append(
|
||||||
|
f' - connector_id={connector_id}, connector_type="{connector_type}", '
|
||||||
|
f'account_name="{account_name or ""}"'
|
||||||
|
)
|
||||||
|
if not lines:
|
||||||
|
return None
|
||||||
|
return (
|
||||||
|
"<mentioned_connectors>\n"
|
||||||
|
"The user selected these exact connector accounts with @. "
|
||||||
|
"These entries are selection metadata, not retrieved connector content. "
|
||||||
|
"When a connector-backed tool needs an account, use the matching "
|
||||||
|
"connector_id from this list if the tool supports connector_id:\n"
|
||||||
|
+ "\n".join(lines)
|
||||||
|
+ "\n</mentioned_connectors>"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _resolve_mention_context(
|
||||||
|
session: AsyncSession,
|
||||||
|
*,
|
||||||
|
search_space_id: int,
|
||||||
|
query: str,
|
||||||
|
mentioned_document_ids: list[int] | None,
|
||||||
|
mentioned_folder_ids: list[int] | None,
|
||||||
|
mentioned_connector_ids: list[int] | None,
|
||||||
|
mentioned_connectors: list[MentionedDocumentInfo] | None,
|
||||||
|
mentioned_documents: list[MentionedDocumentInfo] | None,
|
||||||
|
) -> tuple[str, SurfSenseContextSchema | None]:
|
||||||
|
"""Resolve @-mentions into a rewritten query + per-invocation context.
|
||||||
|
|
||||||
|
Automation always runs in cloud filesystem mode, so we mirror the chat
|
||||||
|
``new_chat`` flow: substitute ``@title`` tokens with canonical
|
||||||
|
``/documents/...`` paths, prepend a ``<mentioned_connectors>`` block, and
|
||||||
|
build a ``SurfSenseContextSchema`` that ``KnowledgePriorityMiddleware``
|
||||||
|
reads via ``runtime.context``. Returns ``(query, None)`` unchanged when
|
||||||
|
there are no mentions.
|
||||||
|
"""
|
||||||
|
has_mentions = bool(
|
||||||
|
mentioned_document_ids
|
||||||
|
or mentioned_folder_ids
|
||||||
|
or mentioned_connector_ids
|
||||||
|
or mentioned_connectors
|
||||||
|
or mentioned_documents
|
||||||
|
)
|
||||||
|
if not has_mentions:
|
||||||
|
return query, None
|
||||||
|
|
||||||
|
resolved = await resolve_mentions(
|
||||||
|
session,
|
||||||
|
search_space_id=search_space_id,
|
||||||
|
mentioned_documents=mentioned_documents,
|
||||||
|
mentioned_document_ids=mentioned_document_ids,
|
||||||
|
mentioned_folder_ids=mentioned_folder_ids,
|
||||||
|
)
|
||||||
|
agent_query = substitute_in_text(query, resolved.token_to_path)
|
||||||
|
|
||||||
|
# ``SurfSenseContextSchema.mentioned_connectors`` is typed ``list[dict]`` and
|
||||||
|
# the connector block reads dicts, so dump the pydantic chips once.
|
||||||
|
connector_dicts = [c.model_dump() for c in (mentioned_connectors or [])]
|
||||||
|
connector_block = _build_connector_block(connector_dicts)
|
||||||
|
if connector_block:
|
||||||
|
agent_query = f"{connector_block}\n\n<user_query>{agent_query}</user_query>"
|
||||||
|
|
||||||
|
runtime_context = SurfSenseContextSchema(
|
||||||
|
search_space_id=search_space_id,
|
||||||
|
mentioned_document_ids=list(
|
||||||
|
resolved.mentioned_document_ids or (mentioned_document_ids or [])
|
||||||
|
),
|
||||||
|
mentioned_folder_ids=list(
|
||||||
|
resolved.mentioned_folder_ids or (mentioned_folder_ids or [])
|
||||||
|
),
|
||||||
|
mentioned_connector_ids=list(mentioned_connector_ids or []),
|
||||||
|
mentioned_connectors=connector_dicts,
|
||||||
|
)
|
||||||
|
return agent_query, runtime_context
|
||||||
|
|
||||||
|
|
||||||
async def run_agent_task(
|
async def run_agent_task(
|
||||||
*,
|
*,
|
||||||
ctx: ActionContext,
|
ctx: ActionContext,
|
||||||
query: str,
|
query: str,
|
||||||
auto_approve_all: bool,
|
auto_approve_all: bool,
|
||||||
|
mentioned_document_ids: list[int] | None = None,
|
||||||
|
mentioned_folder_ids: list[int] | None = None,
|
||||||
|
mentioned_connector_ids: list[int] | None = None,
|
||||||
|
mentioned_connectors: list[MentionedDocumentInfo] | None = None,
|
||||||
|
mentioned_documents: list[MentionedDocumentInfo] | None = None,
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
"""Invoke multi_agent_chat for one rendered query and return its outcome.
|
"""Invoke multi_agent_chat for one rendered query and return its outcome.
|
||||||
|
|
||||||
Opens its own DB session so the executor's bookkeeping session isn't tied
|
Opens its own DB session so the executor's bookkeeping session isn't tied
|
||||||
up for the entire invocation. The LangGraph ``thread_id`` (a fresh UUID)
|
up for the entire invocation. The LangGraph ``thread_id`` (a fresh UUID)
|
||||||
is returned as ``agent_session_id`` for later inspection.
|
is returned as ``agent_session_id`` for later inspection.
|
||||||
|
|
||||||
|
@-mentions (files / folders / connectors) chosen in the task input are
|
||||||
|
resolved the same way the chat flow does and forwarded to the agent via the
|
||||||
|
per-invocation ``context`` so they actually scope retrieval.
|
||||||
"""
|
"""
|
||||||
agent_session_id = str(uuid.uuid4())
|
agent_session_id = str(uuid.uuid4())
|
||||||
user_id = str(ctx.creator_user_id) if ctx.creator_user_id else None
|
user_id = str(ctx.creator_user_id) if ctx.creator_user_id else None
|
||||||
|
|
@ -56,12 +160,24 @@ async def run_agent_task(
|
||||||
agent_config=deps.agent_config,
|
agent_config=deps.agent_config,
|
||||||
firecrawl_api_key=deps.firecrawl_api_key,
|
firecrawl_api_key=deps.firecrawl_api_key,
|
||||||
thread_visibility=ChatVisibility.PRIVATE,
|
thread_visibility=ChatVisibility.PRIVATE,
|
||||||
|
mentioned_document_ids=mentioned_document_ids,
|
||||||
|
)
|
||||||
|
|
||||||
|
agent_query, runtime_context = await _resolve_mention_context(
|
||||||
|
agent_session,
|
||||||
|
search_space_id=ctx.search_space_id,
|
||||||
|
query=query,
|
||||||
|
mentioned_document_ids=mentioned_document_ids,
|
||||||
|
mentioned_folder_ids=mentioned_folder_ids,
|
||||||
|
mentioned_connector_ids=mentioned_connector_ids,
|
||||||
|
mentioned_connectors=mentioned_connectors,
|
||||||
|
mentioned_documents=mentioned_documents,
|
||||||
)
|
)
|
||||||
|
|
||||||
request_id = f"automation:{ctx.run_id}:{ctx.step_id}"
|
request_id = f"automation:{ctx.run_id}:{ctx.step_id}"
|
||||||
turn_id = f"{request_id}:{int(time.time() * 1000)}"
|
turn_id = f"{request_id}:{int(time.time() * 1000)}"
|
||||||
input_state: dict[str, Any] = {
|
input_state: dict[str, Any] = {
|
||||||
"messages": [HumanMessage(content=query)],
|
"messages": [HumanMessage(content=agent_query)],
|
||||||
"search_space_id": ctx.search_space_id,
|
"search_space_id": ctx.search_space_id,
|
||||||
"request_id": request_id,
|
"request_id": request_id,
|
||||||
"turn_id": turn_id,
|
"turn_id": turn_id,
|
||||||
|
|
@ -74,8 +190,17 @@ async def run_agent_task(
|
||||||
},
|
},
|
||||||
"recursion_limit": 10_000,
|
"recursion_limit": 10_000,
|
||||||
}
|
}
|
||||||
|
if runtime_context is not None:
|
||||||
|
runtime_context.request_id = request_id
|
||||||
|
runtime_context.turn_id = turn_id
|
||||||
|
|
||||||
result = await agent.ainvoke(input_state, config=config)
|
# The compiled graph declares ``context_schema=SurfSenseContextSchema``;
|
||||||
|
# mentions only reach ``KnowledgePriorityMiddleware`` via ``context=``.
|
||||||
|
invoke_kwargs: dict[str, Any] = {"config": config}
|
||||||
|
if runtime_context is not None:
|
||||||
|
invoke_kwargs["context"] = runtime_context
|
||||||
|
|
||||||
|
result = await agent.ainvoke(input_state, **invoke_kwargs)
|
||||||
|
|
||||||
resumes = 0
|
resumes = 0
|
||||||
while True:
|
while True:
|
||||||
|
|
@ -88,7 +213,7 @@ async def run_agent_task(
|
||||||
)
|
)
|
||||||
lg_resume_map, routed = build_auto_decisions(state, decision)
|
lg_resume_map, routed = build_auto_decisions(state, decision)
|
||||||
config["configurable"]["surfsense_resume_value"] = routed
|
config["configurable"]["surfsense_resume_value"] = routed
|
||||||
result = await agent.ainvoke(Command(resume=lg_resume_map), config=config)
|
result = await agent.ainvoke(Command(resume=lg_resume_map), **invoke_kwargs)
|
||||||
resumes += 1
|
resumes += 1
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,8 @@ from __future__ import annotations
|
||||||
|
|
||||||
from pydantic import BaseModel, ConfigDict, Field
|
from pydantic import BaseModel, ConfigDict, Field
|
||||||
|
|
||||||
|
from app.schemas.new_chat import MentionedDocumentInfo
|
||||||
|
|
||||||
|
|
||||||
class AgentTaskActionParams(BaseModel):
|
class AgentTaskActionParams(BaseModel):
|
||||||
"""Run a multi_agent_chat turn from an automation step."""
|
"""Run a multi_agent_chat turn from an automation step."""
|
||||||
|
|
@ -19,3 +21,32 @@ class AgentTaskActionParams(BaseModel):
|
||||||
default=False,
|
default=False,
|
||||||
description="If true, every HITL approval is auto-approved; otherwise rejected.",
|
description="If true, every HITL approval is auto-approved; otherwise rejected.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# @-mention references chosen in the task input. Mirror the ``new_chat``
|
||||||
|
# request fields (minus SurfSense product docs) so the run can scope
|
||||||
|
# retrieval to the user's selected files / folders / connectors. All
|
||||||
|
# optional and additive; a task with no mentions behaves as before.
|
||||||
|
mentioned_document_ids: list[int] | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="Knowledge-base document IDs the task references with @.",
|
||||||
|
)
|
||||||
|
mentioned_folder_ids: list[int] | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="Knowledge-base folder IDs the task references with @.",
|
||||||
|
)
|
||||||
|
mentioned_connector_ids: list[int] | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="Concrete connector account IDs the task references with @.",
|
||||||
|
)
|
||||||
|
mentioned_connectors: list[MentionedDocumentInfo] | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="Display/context metadata for the @-mentioned connector accounts.",
|
||||||
|
)
|
||||||
|
mentioned_documents: list[MentionedDocumentInfo] | None = Field(
|
||||||
|
default=None,
|
||||||
|
description=(
|
||||||
|
"Chip metadata (id, title, kind, ...) for every @-mention so the "
|
||||||
|
"run can resolve titles to virtual paths and substitute them in "
|
||||||
|
"the query."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -50,7 +50,7 @@ class AutomationRun(BaseModel, TimestampMixin):
|
||||||
definition_snapshot = Column(JSONB, nullable=False)
|
definition_snapshot = Column(JSONB, nullable=False)
|
||||||
|
|
||||||
# merged & validated inputs the run was dispatched with
|
# merged & validated inputs the run was dispatched with
|
||||||
# (trigger.static_inputs ∪ producer runtime data, static wins on collision)
|
# (trigger.static_inputs union producer runtime data, static wins on collision)
|
||||||
inputs = Column(JSONB, nullable=False, server_default="{}")
|
inputs = Column(JSONB, nullable=False, server_default="{}")
|
||||||
# one entry per executed step; agent_task entries carry their own
|
# one entry per executed step; agent_task entries carry their own
|
||||||
# `agent_session_id` inside their entry
|
# `agent_session_id` inside their entry
|
||||||
|
|
|
||||||
|
|
@ -6,9 +6,9 @@ from typing import Any
|
||||||
|
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from app.automations.actions.types import ActionContext
|
||||||
from app.automations.persistence.enums.run_status import RunStatus
|
from app.automations.persistence.enums.run_status import RunStatus
|
||||||
from app.automations.persistence.models.run import AutomationRun
|
from app.automations.persistence.models.run import AutomationRun
|
||||||
from app.automations.actions.types import ActionContext
|
|
||||||
from app.automations.schemas.definition.envelope import AutomationDefinition
|
from app.automations.schemas.definition.envelope import AutomationDefinition
|
||||||
from app.automations.schemas.definition.plan_step import PlanStep
|
from app.automations.schemas.definition.plan_step import PlanStep
|
||||||
from app.automations.templating import build_run_context
|
from app.automations.templating import build_run_context
|
||||||
|
|
@ -32,7 +32,10 @@ async def execute_run(session: AsyncSession, run_id: int) -> None:
|
||||||
await repository.mark_failed(
|
await repository.mark_failed(
|
||||||
session,
|
session,
|
||||||
run,
|
run,
|
||||||
{"message": f"definition_snapshot invalid: {exc}", "type": type(exc).__name__},
|
{
|
||||||
|
"message": f"definition_snapshot invalid: {exc}",
|
||||||
|
"type": type(exc).__name__,
|
||||||
|
},
|
||||||
)
|
)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
return
|
return
|
||||||
|
|
@ -92,7 +95,9 @@ async def _run_on_failure(
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
|
|
||||||
def _build_template_ctx(run: AutomationRun, step_outputs: dict[str, Any]) -> dict[str, Any]:
|
def _build_template_ctx(
|
||||||
|
run: AutomationRun, step_outputs: dict[str, Any]
|
||||||
|
) -> dict[str, Any]:
|
||||||
automation = run.automation
|
automation = run.automation
|
||||||
trigger = run.trigger
|
trigger = run.trigger
|
||||||
return build_run_context(
|
return build_run_context(
|
||||||
|
|
|
||||||
|
|
@ -30,14 +30,18 @@ async def execute_step(
|
||||||
try:
|
try:
|
||||||
should_run = evaluate_predicate(step.when, template_context)
|
should_run = evaluate_predicate(step.when, template_context)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
return _result(step, "failed", started_at, attempts=0, error=_error(exc, "when"))
|
return _result(
|
||||||
|
step, "failed", started_at, attempts=0, error=_error(exc, "when")
|
||||||
|
)
|
||||||
if not should_run:
|
if not should_run:
|
||||||
return _result(step, "skipped", started_at, attempts=0)
|
return _result(step, "skipped", started_at, attempts=0)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
resolved_params = render_value(step.params, template_context)
|
resolved_params = render_value(step.params, template_context)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
return _result(step, "failed", started_at, attempts=0, error=_error(exc, "render"))
|
return _result(
|
||||||
|
step, "failed", started_at, attempts=0, error=_error(exc, "render")
|
||||||
|
)
|
||||||
|
|
||||||
action = get_action(step.action)
|
action = get_action(step.action)
|
||||||
if action is None:
|
if action is None:
|
||||||
|
|
@ -46,12 +50,17 @@ async def execute_step(
|
||||||
"failed",
|
"failed",
|
||||||
started_at,
|
started_at,
|
||||||
attempts=0,
|
attempts=0,
|
||||||
error={"message": f"action not registered: {step.action}", "type": "ActionNotFound"},
|
error={
|
||||||
|
"message": f"action not registered: {step.action}",
|
||||||
|
"type": "ActionNotFound",
|
||||||
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
handler = action.build_handler(action_context)
|
handler = action.build_handler(action_context)
|
||||||
|
|
||||||
max_retries = step.max_retries if step.max_retries is not None else default_max_retries
|
max_retries = (
|
||||||
|
step.max_retries if step.max_retries is not None else default_max_retries
|
||||||
|
)
|
||||||
timeout = step.timeout_seconds or default_timeout_seconds
|
timeout = step.timeout_seconds or default_timeout_seconds
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -62,7 +71,9 @@ async def execute_step(
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
)
|
)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
return _result(step, "failed", started_at, attempts=max_retries + 1, error=_error(exc))
|
return _result(
|
||||||
|
step, "failed", started_at, attempts=max_retries + 1, error=_error(exc)
|
||||||
|
)
|
||||||
|
|
||||||
return _result(step, "succeeded", started_at, attempts=attempts, result=result)
|
return _result(step, "succeeded", started_at, attempts=attempts, result=result)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,9 @@ from .plan_step import PlanStep
|
||||||
class Execution(BaseModel):
|
class Execution(BaseModel):
|
||||||
model_config = ConfigDict(extra="forbid")
|
model_config = ConfigDict(extra="forbid")
|
||||||
|
|
||||||
timeout_seconds: int = Field(default=600, gt=0, description="Wall-clock cap for the run.")
|
timeout_seconds: int = Field(
|
||||||
|
default=600, gt=0, description="Wall-clock cap for the run."
|
||||||
|
)
|
||||||
max_retries: int = Field(default=2, ge=0, description="Per-step retry budget.")
|
max_retries: int = Field(default=2, ge=0, description="Per-step retry budget.")
|
||||||
retry_backoff: Literal["exponential", "linear", "none"] = "exponential"
|
retry_backoff: Literal["exponential", "linear", "none"] = "exponential"
|
||||||
concurrency: Literal["drop_if_running", "queue", "always"] = "drop_if_running"
|
concurrency: Literal["drop_if_running", "queue", "always"] = "drop_if_running"
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,9 @@ class PlanStep(BaseModel):
|
||||||
model_config = ConfigDict(extra="forbid")
|
model_config = ConfigDict(extra="forbid")
|
||||||
|
|
||||||
step_id: str = Field(..., min_length=1, description="Unique within the plan.")
|
step_id: str = Field(..., min_length=1, description="Unique within the plan.")
|
||||||
action: str = Field(..., min_length=1, description="Action type; resolved via registry.")
|
action: str = Field(
|
||||||
|
..., min_length=1, description="Action type; resolved via registry."
|
||||||
|
)
|
||||||
when: str | None = Field(
|
when: str | None = Field(
|
||||||
default=None,
|
default=None,
|
||||||
description="Optional predicate; step is skipped when falsy.",
|
description="Optional predicate; step is skipped when falsy.",
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,9 @@ from pydantic import BaseModel, ConfigDict, Field
|
||||||
class TriggerSpec(BaseModel):
|
class TriggerSpec(BaseModel):
|
||||||
model_config = ConfigDict(extra="forbid")
|
model_config = ConfigDict(extra="forbid")
|
||||||
|
|
||||||
type: str = Field(..., min_length=1, description="Trigger type; resolved via registry.")
|
type: str = Field(
|
||||||
|
..., min_length=1, description="Trigger type; resolved via registry."
|
||||||
|
)
|
||||||
params: dict[str, Any] = Field(
|
params: dict[str, Any] = Field(
|
||||||
default_factory=dict,
|
default_factory=dict,
|
||||||
description="Type-specific params; validated against the trigger's schema.",
|
description="Type-specific params; validated against the trigger's schema.",
|
||||||
|
|
|
||||||
|
|
@ -10,14 +10,14 @@ from sqlalchemy import func, select
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
from sqlalchemy.orm import selectinload
|
from sqlalchemy.orm import selectinload
|
||||||
|
|
||||||
|
from app.automations.persistence.enums.trigger_type import TriggerType
|
||||||
|
from app.automations.persistence.models.automation import Automation
|
||||||
|
from app.automations.persistence.models.trigger import AutomationTrigger
|
||||||
from app.automations.schemas.api import (
|
from app.automations.schemas.api import (
|
||||||
AutomationCreate,
|
AutomationCreate,
|
||||||
AutomationUpdate,
|
AutomationUpdate,
|
||||||
TriggerCreate,
|
TriggerCreate,
|
||||||
)
|
)
|
||||||
from app.automations.persistence.enums.trigger_type import TriggerType
|
|
||||||
from app.automations.persistence.models.automation import Automation
|
|
||||||
from app.automations.persistence.models.trigger import AutomationTrigger
|
|
||||||
from app.automations.triggers import get_trigger
|
from app.automations.triggers import get_trigger
|
||||||
from app.automations.triggers.schedule import compute_next_fire_at
|
from app.automations.triggers.schedule import compute_next_fire_at
|
||||||
from app.db import Permission, User, get_async_session
|
from app.db import Permission, User, get_async_session
|
||||||
|
|
@ -34,7 +34,9 @@ class AutomationService:
|
||||||
|
|
||||||
async def create(self, payload: AutomationCreate) -> Automation:
|
async def create(self, payload: AutomationCreate) -> Automation:
|
||||||
"""Create an automation and its initial triggers in one transaction."""
|
"""Create an automation and its initial triggers in one transaction."""
|
||||||
await self._authorize(payload.search_space_id, Permission.AUTOMATIONS_CREATE.value)
|
await self._authorize(
|
||||||
|
payload.search_space_id, Permission.AUTOMATIONS_CREATE.value
|
||||||
|
)
|
||||||
|
|
||||||
automation = Automation(
|
automation = Automation(
|
||||||
search_space_id=payload.search_space_id,
|
search_space_id=payload.search_space_id,
|
||||||
|
|
@ -67,22 +69,32 @@ class AutomationService:
|
||||||
)
|
)
|
||||||
|
|
||||||
rows = (
|
rows = (
|
||||||
|
(
|
||||||
await self.session.execute(
|
await self.session.execute(
|
||||||
base.order_by(Automation.created_at.desc()).limit(limit).offset(offset)
|
base.order_by(Automation.created_at.desc())
|
||||||
|
.limit(limit)
|
||||||
|
.offset(offset)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.scalars()
|
||||||
|
.all()
|
||||||
)
|
)
|
||||||
).scalars().all()
|
|
||||||
return list(rows), int(total or 0)
|
return list(rows), int(total or 0)
|
||||||
|
|
||||||
async def get(self, automation_id: int) -> Automation:
|
async def get(self, automation_id: int) -> Automation:
|
||||||
"""Get an automation with its triggers loaded."""
|
"""Get an automation with its triggers loaded."""
|
||||||
automation = await self._get_with_triggers_or_raise(automation_id)
|
automation = await self._get_with_triggers_or_raise(automation_id)
|
||||||
await self._authorize(automation.search_space_id, Permission.AUTOMATIONS_READ.value)
|
await self._authorize(
|
||||||
|
automation.search_space_id, Permission.AUTOMATIONS_READ.value
|
||||||
|
)
|
||||||
return automation
|
return automation
|
||||||
|
|
||||||
async def update(self, automation_id: int, patch: AutomationUpdate) -> Automation:
|
async def update(self, automation_id: int, patch: AutomationUpdate) -> Automation:
|
||||||
"""Patch fields. Bumps ``version`` when ``definition`` changes."""
|
"""Patch fields. Bumps ``version`` when ``definition`` changes."""
|
||||||
automation = await self._get_with_triggers_or_raise(automation_id)
|
automation = await self._get_with_triggers_or_raise(automation_id)
|
||||||
await self._authorize(automation.search_space_id, Permission.AUTOMATIONS_UPDATE.value)
|
await self._authorize(
|
||||||
|
automation.search_space_id, Permission.AUTOMATIONS_UPDATE.value
|
||||||
|
)
|
||||||
|
|
||||||
data = patch.model_dump(exclude_unset=True)
|
data = patch.model_dump(exclude_unset=True)
|
||||||
|
|
||||||
|
|
@ -93,7 +105,9 @@ class AutomationService:
|
||||||
if "status" in data:
|
if "status" in data:
|
||||||
automation.status = data["status"]
|
automation.status = data["status"]
|
||||||
if "definition" in data:
|
if "definition" in data:
|
||||||
automation.definition = patch.definition.model_dump(mode="json", by_alias=True)
|
automation.definition = patch.definition.model_dump(
|
||||||
|
mode="json", by_alias=True
|
||||||
|
)
|
||||||
automation.version += 1
|
automation.version += 1
|
||||||
|
|
||||||
await self.session.commit()
|
await self.session.commit()
|
||||||
|
|
@ -102,7 +116,9 @@ class AutomationService:
|
||||||
async def delete(self, automation_id: int) -> None:
|
async def delete(self, automation_id: int) -> None:
|
||||||
"""Delete an automation; FK cascades remove triggers and runs."""
|
"""Delete an automation; FK cascades remove triggers and runs."""
|
||||||
automation = await self._get_or_raise(automation_id)
|
automation = await self._get_or_raise(automation_id)
|
||||||
await self._authorize(automation.search_space_id, Permission.AUTOMATIONS_DELETE.value)
|
await self._authorize(
|
||||||
|
automation.search_space_id, Permission.AUTOMATIONS_DELETE.value
|
||||||
|
)
|
||||||
await self.session.delete(automation)
|
await self.session.delete(automation)
|
||||||
await self.session.commit()
|
await self.session.commit()
|
||||||
|
|
||||||
|
|
@ -141,7 +157,9 @@ def _build_trigger(spec: TriggerCreate) -> AutomationTrigger:
|
||||||
"""Validate trigger params via its registered Pydantic model and build the ORM row."""
|
"""Validate trigger params via its registered Pydantic model and build the ORM row."""
|
||||||
definition = get_trigger(spec.type.value)
|
definition = get_trigger(spec.type.value)
|
||||||
if definition is None:
|
if definition is None:
|
||||||
raise HTTPException(status_code=422, detail=f"unknown trigger type {spec.type.value!r}")
|
raise HTTPException(
|
||||||
|
status_code=422, detail=f"unknown trigger type {spec.type.value!r}"
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
validated = definition.params_model.model_validate(spec.params)
|
validated = definition.params_model.model_validate(spec.params)
|
||||||
|
|
|
||||||
|
|
@ -36,10 +36,16 @@ class RunService:
|
||||||
)
|
)
|
||||||
|
|
||||||
rows = (
|
rows = (
|
||||||
|
(
|
||||||
await self.session.execute(
|
await self.session.execute(
|
||||||
base.order_by(AutomationRun.created_at.desc()).limit(limit).offset(offset)
|
base.order_by(AutomationRun.created_at.desc())
|
||||||
|
.limit(limit)
|
||||||
|
.offset(offset)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.scalars()
|
||||||
|
.all()
|
||||||
)
|
)
|
||||||
).scalars().all()
|
|
||||||
return list(rows), int(total or 0)
|
return list(rows), int(total or 0)
|
||||||
|
|
||||||
async def get(self, *, automation_id: int, run_id: int) -> AutomationRun:
|
async def get(self, *, automation_id: int, run_id: int) -> AutomationRun:
|
||||||
|
|
|
||||||
|
|
@ -8,10 +8,10 @@ from fastapi import Depends, HTTPException
|
||||||
from pydantic import ValidationError
|
from pydantic import ValidationError
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.automations.schemas.api import TriggerCreate, TriggerUpdate
|
|
||||||
from app.automations.persistence.enums.trigger_type import TriggerType
|
from app.automations.persistence.enums.trigger_type import TriggerType
|
||||||
from app.automations.persistence.models.automation import Automation
|
from app.automations.persistence.models.automation import Automation
|
||||||
from app.automations.persistence.models.trigger import AutomationTrigger
|
from app.automations.persistence.models.trigger import AutomationTrigger
|
||||||
|
from app.automations.schemas.api import TriggerCreate, TriggerUpdate
|
||||||
from app.automations.triggers import get_trigger
|
from app.automations.triggers import get_trigger
|
||||||
from app.automations.triggers.schedule import compute_next_fire_at
|
from app.automations.triggers.schedule import compute_next_fire_at
|
||||||
from app.db import Permission, User, get_async_session
|
from app.db import Permission, User, get_async_session
|
||||||
|
|
@ -40,7 +40,9 @@ class TriggerService:
|
||||||
params=validated_params,
|
params=validated_params,
|
||||||
static_inputs=payload.static_inputs,
|
static_inputs=payload.static_inputs,
|
||||||
enabled=payload.enabled,
|
enabled=payload.enabled,
|
||||||
next_fire_at=_initial_next_fire(payload.type, validated_params, payload.enabled),
|
next_fire_at=_initial_next_fire(
|
||||||
|
payload.type, validated_params, payload.enabled
|
||||||
|
),
|
||||||
)
|
)
|
||||||
self.session.add(trigger)
|
self.session.add(trigger)
|
||||||
await self.session.commit()
|
await self.session.commit()
|
||||||
|
|
@ -54,7 +56,9 @@ class TriggerService:
|
||||||
trigger_id: int,
|
trigger_id: int,
|
||||||
patch: TriggerUpdate,
|
patch: TriggerUpdate,
|
||||||
) -> AutomationTrigger:
|
) -> AutomationTrigger:
|
||||||
await self._authorize_automation(automation_id, Permission.AUTOMATIONS_UPDATE.value)
|
await self._authorize_automation(
|
||||||
|
automation_id, Permission.AUTOMATIONS_UPDATE.value
|
||||||
|
)
|
||||||
trigger = await self._get_trigger_or_raise(automation_id, trigger_id)
|
trigger = await self._get_trigger_or_raise(automation_id, trigger_id)
|
||||||
|
|
||||||
data = patch.model_dump(exclude_unset=True)
|
data = patch.model_dump(exclude_unset=True)
|
||||||
|
|
@ -80,7 +84,9 @@ class TriggerService:
|
||||||
return trigger
|
return trigger
|
||||||
|
|
||||||
async def remove(self, *, automation_id: int, trigger_id: int) -> None:
|
async def remove(self, *, automation_id: int, trigger_id: int) -> None:
|
||||||
await self._authorize_automation(automation_id, Permission.AUTOMATIONS_UPDATE.value)
|
await self._authorize_automation(
|
||||||
|
automation_id, Permission.AUTOMATIONS_UPDATE.value
|
||||||
|
)
|
||||||
trigger = await self._get_trigger_or_raise(automation_id, trigger_id)
|
trigger = await self._get_trigger_or_raise(automation_id, trigger_id)
|
||||||
await self.session.delete(trigger)
|
await self.session.delete(trigger)
|
||||||
await self.session.commit()
|
await self.session.commit()
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@ TASK_NAME = "automation_run_execute"
|
||||||
|
|
||||||
|
|
||||||
@celery_app.task(name=TASK_NAME, bind=True)
|
@celery_app.task(name=TASK_NAME, bind=True)
|
||||||
def automation_run_execute(self, run_id: int) -> None: # noqa: ARG001 — Celery bind
|
def automation_run_execute(self, run_id: int) -> None:
|
||||||
"""Execute one ``AutomationRun``. Idempotent: terminal runs no-op."""
|
"""Execute one ``AutomationRun``. Idempotent: terminal runs no-op."""
|
||||||
return run_async_celery_task(lambda: _impl(run_id))
|
return run_async_celery_task(lambda: _impl(run_id))
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -103,9 +103,7 @@ async def _self_heal_null_next_fire(session: AsyncSession, *, now: datetime) ->
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
|
|
||||||
async def _claim_due_triggers(
|
async def _claim_due_triggers(session: AsyncSession, *, now: datetime) -> list[_Claim]:
|
||||||
session: AsyncSession, *, now: datetime
|
|
||||||
) -> list[_Claim]:
|
|
||||||
"""Lock and advance due rows; return per-trigger fire context."""
|
"""Lock and advance due rows; return per-trigger fire context."""
|
||||||
stmt = (
|
stmt = (
|
||||||
select(AutomationTrigger)
|
select(AutomationTrigger)
|
||||||
|
|
|
||||||
|
|
@ -17,4 +17,4 @@ __all__ = [
|
||||||
]
|
]
|
||||||
|
|
||||||
# Built-in triggers self-register at import time.
|
# Built-in triggers self-register at import time.
|
||||||
from . import schedule # noqa: E402, F401
|
from . import schedule # noqa: F401
|
||||||
|
|
|
||||||
|
|
@ -15,4 +15,4 @@ __all__ = [
|
||||||
]
|
]
|
||||||
|
|
||||||
# Side-effect: register on the triggers store.
|
# Side-effect: register on the triggers store.
|
||||||
from . import definition # noqa: E402, F401
|
from . import definition # noqa: F401
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,10 @@ def compute_next_fire_at(cron: str, timezone: str, *, after: datetime) -> dateti
|
||||||
given timezone before evaluation so DST and IANA rules apply correctly.
|
given timezone before evaluation so DST and IANA rules apply correctly.
|
||||||
"""
|
"""
|
||||||
tz = ZoneInfo(timezone)
|
tz = ZoneInfo(timezone)
|
||||||
base = after.astimezone(tz) if after.tzinfo else after.replace(tzinfo=UTC).astimezone(tz)
|
base = (
|
||||||
|
after.astimezone(tz)
|
||||||
|
if after.tzinfo
|
||||||
|
else after.replace(tzinfo=UTC).astimezone(tz)
|
||||||
|
)
|
||||||
nxt: datetime = croniter(cron, base).get_next(datetime)
|
nxt: datetime = croniter(cron, base).get_next(datetime)
|
||||||
return nxt.astimezone(UTC)
|
return nxt.astimezone(UTC)
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,9 @@ from .cron import InvalidCronError, validate_cron
|
||||||
class ScheduleTriggerParams(BaseModel):
|
class ScheduleTriggerParams(BaseModel):
|
||||||
model_config = ConfigDict(extra="forbid")
|
model_config = ConfigDict(extra="forbid")
|
||||||
|
|
||||||
cron: str = Field(..., description="Five-field cron expression.", examples=["0 9 * * 1-5"])
|
cron: str = Field(
|
||||||
|
..., description="Five-field cron expression.", examples=["0 9 * * 1-5"]
|
||||||
|
)
|
||||||
timezone: str = Field(..., description="IANA timezone.", examples=["Africa/Kigali"])
|
timezone: str = Field(..., description="IANA timezone.", examples=["Africa/Kigali"])
|
||||||
|
|
||||||
@model_validator(mode="after")
|
@model_validator(mode="after")
|
||||||
|
|
|
||||||
|
|
@ -1150,46 +1150,6 @@ class Chunk(BaseModel, TimestampMixin):
|
||||||
document = relationship("Document", back_populates="chunks")
|
document = relationship("Document", back_populates="chunks")
|
||||||
|
|
||||||
|
|
||||||
class SurfsenseDocsDocument(BaseModel, TimestampMixin):
|
|
||||||
"""
|
|
||||||
Surfsense documentation storage.
|
|
||||||
Indexed at migration time from MDX files.
|
|
||||||
"""
|
|
||||||
|
|
||||||
__tablename__ = "surfsense_docs_documents"
|
|
||||||
|
|
||||||
source = Column(
|
|
||||||
String, nullable=False, unique=True, index=True
|
|
||||||
) # File path: "connectors/slack.mdx"
|
|
||||||
title = Column(String, nullable=False)
|
|
||||||
content = Column(Text, nullable=False)
|
|
||||||
content_hash = Column(String, nullable=False, index=True) # For detecting changes
|
|
||||||
embedding = Column(Vector(config.embedding_model_instance.dimension))
|
|
||||||
updated_at = Column(TIMESTAMP(timezone=True), nullable=True, index=True)
|
|
||||||
|
|
||||||
chunks = relationship(
|
|
||||||
"SurfsenseDocsChunk",
|
|
||||||
back_populates="document",
|
|
||||||
cascade="all, delete-orphan",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class SurfsenseDocsChunk(BaseModel, TimestampMixin):
|
|
||||||
"""Chunk storage for Surfsense documentation."""
|
|
||||||
|
|
||||||
__tablename__ = "surfsense_docs_chunks"
|
|
||||||
|
|
||||||
content = Column(Text, nullable=False)
|
|
||||||
embedding = Column(Vector(config.embedding_model_instance.dimension))
|
|
||||||
|
|
||||||
document_id = Column(
|
|
||||||
Integer,
|
|
||||||
ForeignKey("surfsense_docs_documents.id", ondelete="CASCADE"),
|
|
||||||
nullable=False,
|
|
||||||
)
|
|
||||||
document = relationship("SurfsenseDocsDocument", back_populates="chunks")
|
|
||||||
|
|
||||||
|
|
||||||
class Podcast(BaseModel, TimestampMixin):
|
class Podcast(BaseModel, TimestampMixin):
|
||||||
"""Podcast model for storing generated podcasts."""
|
"""Podcast model for storing generated podcasts."""
|
||||||
|
|
||||||
|
|
@ -2605,7 +2565,6 @@ from app.automations.persistence import ( # noqa: E402, F401
|
||||||
AutomationTrigger,
|
AutomationTrigger,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
engine = create_async_engine(
|
engine = create_async_engine(
|
||||||
DATABASE_URL,
|
DATABASE_URL,
|
||||||
pool_size=30,
|
pool_size=30,
|
||||||
|
|
@ -2681,11 +2640,6 @@ async def setup_indexes():
|
||||||
"CREATE INDEX IF NOT EXISTS idx_documents_search_space_updated ON documents (search_space_id, updated_at DESC NULLS LAST) INCLUDE (id, title, document_type)"
|
"CREATE INDEX IF NOT EXISTS idx_documents_search_space_updated ON documents (search_space_id, updated_at DESC NULLS LAST) INCLUDE (id, title, document_type)"
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
await conn.execute(
|
|
||||||
text(
|
|
||||||
"CREATE INDEX IF NOT EXISTS idx_surfsense_docs_title_trgm ON surfsense_docs_documents USING gin (title gin_trgm_ops)"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
async def create_db_and_tables():
|
async def create_db_and_tables():
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
from fastapi import APIRouter
|
from fastapi import APIRouter
|
||||||
|
|
||||||
|
from app.automations.api import router as automations_router
|
||||||
|
|
||||||
from .agent_action_log_route import router as agent_action_log_router
|
from .agent_action_log_route import router as agent_action_log_router
|
||||||
from .agent_flags_route import router as agent_flags_router
|
from .agent_flags_route import router as agent_flags_router
|
||||||
from .agent_permissions_route import router as agent_permissions_router
|
from .agent_permissions_route import router as agent_permissions_router
|
||||||
|
|
@ -7,7 +9,6 @@ from .agent_revert_route import router as agent_revert_router
|
||||||
from .airtable_add_connector_route import (
|
from .airtable_add_connector_route import (
|
||||||
router as airtable_add_connector_router,
|
router as airtable_add_connector_router,
|
||||||
)
|
)
|
||||||
from app.automations.api import router as automations_router
|
|
||||||
from .chat_comments_routes import router as chat_comments_router
|
from .chat_comments_routes import router as chat_comments_router
|
||||||
from .circleback_webhook_route import router as circleback_webhook_router
|
from .circleback_webhook_route import router as circleback_webhook_router
|
||||||
from .clickup_add_connector_route import router as clickup_add_connector_router
|
from .clickup_add_connector_route import router as clickup_add_connector_router
|
||||||
|
|
@ -54,7 +55,6 @@ from .search_source_connectors_routes import router as search_source_connectors_
|
||||||
from .search_spaces_routes import router as search_spaces_router
|
from .search_spaces_routes import router as search_spaces_router
|
||||||
from .slack_add_connector_route import router as slack_add_connector_router
|
from .slack_add_connector_route import router as slack_add_connector_router
|
||||||
from .stripe_routes import router as stripe_router
|
from .stripe_routes import router as stripe_router
|
||||||
from .surfsense_docs_routes import router as surfsense_docs_router
|
|
||||||
from .team_memory_routes import router as team_memory_router
|
from .team_memory_routes import router as team_memory_router
|
||||||
from .teams_add_connector_route import router as teams_add_connector_router
|
from .teams_add_connector_route import router as teams_add_connector_router
|
||||||
from .video_presentations_routes import router as video_presentations_router
|
from .video_presentations_routes import router as video_presentations_router
|
||||||
|
|
@ -107,7 +107,6 @@ router.include_router(new_llm_config_router) # LLM configs with prompt configur
|
||||||
router.include_router(model_list_router) # Dynamic model catalogue from OpenRouter
|
router.include_router(model_list_router) # Dynamic model catalogue from OpenRouter
|
||||||
router.include_router(logs_router)
|
router.include_router(logs_router)
|
||||||
router.include_router(circleback_webhook_router) # Circleback meeting webhooks
|
router.include_router(circleback_webhook_router) # Circleback meeting webhooks
|
||||||
router.include_router(surfsense_docs_router) # Surfsense documentation for citations
|
|
||||||
router.include_router(notifications_router) # Notifications with Zero sync
|
router.include_router(notifications_router) # Notifications with Zero sync
|
||||||
router.include_router(
|
router.include_router(
|
||||||
mcp_oauth_router
|
mcp_oauth_router
|
||||||
|
|
|
||||||
|
|
@ -1785,7 +1785,6 @@ async def handle_new_chat(
|
||||||
user_id=str(user.id),
|
user_id=str(user.id),
|
||||||
llm_config_id=llm_config_id,
|
llm_config_id=llm_config_id,
|
||||||
mentioned_document_ids=request.mentioned_document_ids,
|
mentioned_document_ids=request.mentioned_document_ids,
|
||||||
mentioned_surfsense_doc_ids=request.mentioned_surfsense_doc_ids,
|
|
||||||
mentioned_folder_ids=request.mentioned_folder_ids,
|
mentioned_folder_ids=request.mentioned_folder_ids,
|
||||||
mentioned_connector_ids=request.mentioned_connector_ids,
|
mentioned_connector_ids=request.mentioned_connector_ids,
|
||||||
mentioned_connectors=mentioned_connectors_payload,
|
mentioned_connectors=mentioned_connectors_payload,
|
||||||
|
|
@ -2278,7 +2277,6 @@ async def regenerate_response(
|
||||||
user_id=str(user.id),
|
user_id=str(user.id),
|
||||||
llm_config_id=llm_config_id,
|
llm_config_id=llm_config_id,
|
||||||
mentioned_document_ids=request.mentioned_document_ids,
|
mentioned_document_ids=request.mentioned_document_ids,
|
||||||
mentioned_surfsense_doc_ids=request.mentioned_surfsense_doc_ids,
|
|
||||||
mentioned_folder_ids=request.mentioned_folder_ids,
|
mentioned_folder_ids=request.mentioned_folder_ids,
|
||||||
mentioned_connector_ids=request.mentioned_connector_ids,
|
mentioned_connector_ids=request.mentioned_connector_ids,
|
||||||
mentioned_connectors=mentioned_connectors_payload,
|
mentioned_connectors=mentioned_connectors_payload,
|
||||||
|
|
|
||||||
|
|
@ -1,172 +0,0 @@
|
||||||
"""
|
|
||||||
Routes for Surfsense documentation.
|
|
||||||
|
|
||||||
These endpoints support the citation system for Surfsense docs,
|
|
||||||
allowing the frontend to fetch document details when a user clicks
|
|
||||||
on a [citation:doc-XXX] link.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, HTTPException
|
|
||||||
from sqlalchemy import func, select
|
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
|
||||||
from sqlalchemy.orm import selectinload
|
|
||||||
|
|
||||||
from app.db import (
|
|
||||||
SurfsenseDocsChunk,
|
|
||||||
SurfsenseDocsDocument,
|
|
||||||
User,
|
|
||||||
get_async_session,
|
|
||||||
)
|
|
||||||
from app.schemas import PaginatedResponse
|
|
||||||
from app.schemas.surfsense_docs import (
|
|
||||||
SurfsenseDocsChunkRead,
|
|
||||||
SurfsenseDocsDocumentRead,
|
|
||||||
SurfsenseDocsDocumentWithChunksRead,
|
|
||||||
)
|
|
||||||
from app.users import current_active_user
|
|
||||||
from app.utils.surfsense_docs import surfsense_docs_public_url
|
|
||||||
|
|
||||||
router = APIRouter()
|
|
||||||
|
|
||||||
|
|
||||||
@router.get(
|
|
||||||
"/surfsense-docs/by-chunk/{chunk_id}",
|
|
||||||
response_model=SurfsenseDocsDocumentWithChunksRead,
|
|
||||||
)
|
|
||||||
async def get_surfsense_doc_by_chunk_id(
|
|
||||||
chunk_id: int,
|
|
||||||
session: AsyncSession = Depends(get_async_session),
|
|
||||||
user: User = Depends(current_active_user),
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
Retrieves a Surfsense documentation document based on a chunk ID.
|
|
||||||
|
|
||||||
This endpoint is used by the frontend to resolve [citation:doc-XXX] links.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Get the chunk
|
|
||||||
chunk_result = await session.execute(
|
|
||||||
select(SurfsenseDocsChunk).filter(SurfsenseDocsChunk.id == chunk_id)
|
|
||||||
)
|
|
||||||
chunk = chunk_result.scalars().first()
|
|
||||||
|
|
||||||
if not chunk:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=404,
|
|
||||||
detail=f"Surfsense docs chunk with id {chunk_id} not found",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Get the associated document with all its chunks
|
|
||||||
document_result = await session.execute(
|
|
||||||
select(SurfsenseDocsDocument)
|
|
||||||
.options(selectinload(SurfsenseDocsDocument.chunks))
|
|
||||||
.filter(SurfsenseDocsDocument.id == chunk.document_id)
|
|
||||||
)
|
|
||||||
document = document_result.scalars().first()
|
|
||||||
|
|
||||||
if not document:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=404,
|
|
||||||
detail="Surfsense docs document not found",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Sort chunks by ID
|
|
||||||
sorted_chunks = sorted(document.chunks, key=lambda x: x.id)
|
|
||||||
|
|
||||||
return SurfsenseDocsDocumentWithChunksRead(
|
|
||||||
id=document.id,
|
|
||||||
title=document.title,
|
|
||||||
source=document.source,
|
|
||||||
public_url=surfsense_docs_public_url(document.source),
|
|
||||||
content=document.content,
|
|
||||||
chunks=[
|
|
||||||
SurfsenseDocsChunkRead(id=c.id, content=c.content)
|
|
||||||
for c in sorted_chunks
|
|
||||||
],
|
|
||||||
)
|
|
||||||
except HTTPException:
|
|
||||||
raise
|
|
||||||
except Exception as e:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=500,
|
|
||||||
detail=f"Failed to retrieve Surfsense documentation: {e!s}",
|
|
||||||
) from e
|
|
||||||
|
|
||||||
|
|
||||||
@router.get(
|
|
||||||
"/surfsense-docs",
|
|
||||||
response_model=PaginatedResponse[SurfsenseDocsDocumentRead],
|
|
||||||
)
|
|
||||||
async def list_surfsense_docs(
|
|
||||||
page: int = 0,
|
|
||||||
page_size: int = 50,
|
|
||||||
title: str | None = None,
|
|
||||||
session: AsyncSession = Depends(get_async_session),
|
|
||||||
user: User = Depends(current_active_user),
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
List all Surfsense documentation documents.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
page: Zero-based page index.
|
|
||||||
page_size: Number of items per page (default: 50).
|
|
||||||
title: Optional title filter (case-insensitive substring match).
|
|
||||||
session: Database session (injected).
|
|
||||||
user: Current authenticated user (injected).
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
PaginatedResponse[SurfsenseDocsDocumentRead]: Paginated list of Surfsense docs.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Base query
|
|
||||||
query = select(SurfsenseDocsDocument)
|
|
||||||
count_query = select(func.count()).select_from(SurfsenseDocsDocument)
|
|
||||||
|
|
||||||
# Filter by title if provided
|
|
||||||
if title and title.strip():
|
|
||||||
query = query.filter(SurfsenseDocsDocument.title.ilike(f"%{title}%"))
|
|
||||||
count_query = count_query.filter(
|
|
||||||
SurfsenseDocsDocument.title.ilike(f"%{title}%")
|
|
||||||
)
|
|
||||||
|
|
||||||
# Get total count
|
|
||||||
total_result = await session.execute(count_query)
|
|
||||||
total = total_result.scalar() or 0
|
|
||||||
|
|
||||||
# Calculate offset
|
|
||||||
offset = page * page_size
|
|
||||||
|
|
||||||
# Get paginated results
|
|
||||||
result = await session.execute(
|
|
||||||
query.order_by(SurfsenseDocsDocument.title).offset(offset).limit(page_size)
|
|
||||||
)
|
|
||||||
docs = result.scalars().all()
|
|
||||||
|
|
||||||
# Convert to response format
|
|
||||||
items = [
|
|
||||||
SurfsenseDocsDocumentRead(
|
|
||||||
id=doc.id,
|
|
||||||
title=doc.title,
|
|
||||||
source=doc.source,
|
|
||||||
public_url=surfsense_docs_public_url(doc.source),
|
|
||||||
content=doc.content,
|
|
||||||
created_at=doc.created_at,
|
|
||||||
updated_at=doc.updated_at,
|
|
||||||
)
|
|
||||||
for doc in docs
|
|
||||||
]
|
|
||||||
|
|
||||||
has_more = (offset + len(items)) < total
|
|
||||||
|
|
||||||
return PaginatedResponse(
|
|
||||||
items=items,
|
|
||||||
total=total,
|
|
||||||
page=page,
|
|
||||||
page_size=page_size,
|
|
||||||
has_more=has_more,
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=500,
|
|
||||||
detail=f"Failed to list Surfsense documentation: {e!s}",
|
|
||||||
) from e
|
|
||||||
|
|
@ -239,9 +239,6 @@ class NewChatRequest(BaseModel):
|
||||||
mentioned_document_ids: list[int] | None = (
|
mentioned_document_ids: list[int] | None = (
|
||||||
None # Optional document IDs mentioned with @ in the chat
|
None # Optional document IDs mentioned with @ in the chat
|
||||||
)
|
)
|
||||||
mentioned_surfsense_doc_ids: list[int] | None = (
|
|
||||||
None # Optional SurfSense documentation IDs mentioned with @ in the chat
|
|
||||||
)
|
|
||||||
mentioned_folder_ids: list[int] | None = Field(
|
mentioned_folder_ids: list[int] | None = Field(
|
||||||
default=None,
|
default=None,
|
||||||
description=(
|
description=(
|
||||||
|
|
@ -326,7 +323,6 @@ class RegenerateRequest(BaseModel):
|
||||||
None # New user query (for edit). None = reload with same query
|
None # New user query (for edit). None = reload with same query
|
||||||
)
|
)
|
||||||
mentioned_document_ids: list[int] | None = None
|
mentioned_document_ids: list[int] | None = None
|
||||||
mentioned_surfsense_doc_ids: list[int] | None = None
|
|
||||||
mentioned_folder_ids: list[int] | None = Field(
|
mentioned_folder_ids: list[int] | None = Field(
|
||||||
default=None,
|
default=None,
|
||||||
description=(
|
description=(
|
||||||
|
|
|
||||||
|
|
@ -1,43 +0,0 @@
|
||||||
"""
|
|
||||||
Schemas for Surfsense documentation.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
from pydantic import BaseModel, ConfigDict
|
|
||||||
|
|
||||||
|
|
||||||
class SurfsenseDocsChunkRead(BaseModel):
|
|
||||||
"""Schema for a Surfsense docs chunk."""
|
|
||||||
|
|
||||||
id: int
|
|
||||||
content: str
|
|
||||||
|
|
||||||
model_config = ConfigDict(from_attributes=True)
|
|
||||||
|
|
||||||
|
|
||||||
class SurfsenseDocsDocumentRead(BaseModel):
|
|
||||||
"""Schema for a Surfsense docs document (without chunks)."""
|
|
||||||
|
|
||||||
id: int
|
|
||||||
title: str
|
|
||||||
source: str
|
|
||||||
public_url: str
|
|
||||||
content: str
|
|
||||||
created_at: datetime | None = None
|
|
||||||
updated_at: datetime | None = None
|
|
||||||
|
|
||||||
model_config = ConfigDict(from_attributes=True)
|
|
||||||
|
|
||||||
|
|
||||||
class SurfsenseDocsDocumentWithChunksRead(BaseModel):
|
|
||||||
"""Schema for a Surfsense docs document with its chunks."""
|
|
||||||
|
|
||||||
id: int
|
|
||||||
title: str
|
|
||||||
source: str
|
|
||||||
public_url: str
|
|
||||||
content: str
|
|
||||||
chunks: list[SurfsenseDocsChunkRead]
|
|
||||||
|
|
||||||
model_config = ConfigDict(from_attributes=True)
|
|
||||||
|
|
@ -25,7 +25,6 @@ from uuid import UUID
|
||||||
import anyio
|
import anyio
|
||||||
from langchain_core.messages import HumanMessage
|
from langchain_core.messages import HumanMessage
|
||||||
from sqlalchemy.future import select
|
from sqlalchemy.future import select
|
||||||
from sqlalchemy.orm import selectinload
|
|
||||||
|
|
||||||
from app.agents.multi_agent_chat import create_multi_agent_chat_deep_agent
|
from app.agents.multi_agent_chat import create_multi_agent_chat_deep_agent
|
||||||
from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent
|
from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent
|
||||||
|
|
@ -55,7 +54,6 @@ from app.db import (
|
||||||
NewChatThread,
|
NewChatThread,
|
||||||
Report,
|
Report,
|
||||||
SearchSourceConnectorType,
|
SearchSourceConnectorType,
|
||||||
SurfsenseDocsDocument,
|
|
||||||
async_session_maker,
|
async_session_maker,
|
||||||
shielded_async_session,
|
shielded_async_session,
|
||||||
)
|
)
|
||||||
|
|
@ -77,7 +75,6 @@ from app.tasks.chat.streaming.helpers.interrupt_inspector import (
|
||||||
)
|
)
|
||||||
from app.utils.content_utils import bootstrap_history_from_db
|
from app.utils.content_utils import bootstrap_history_from_db
|
||||||
from app.utils.perf import get_perf_logger, log_system_snapshot, trim_native_heap
|
from app.utils.perf import get_perf_logger, log_system_snapshot, trim_native_heap
|
||||||
from app.utils.surfsense_docs import surfsense_docs_public_url
|
|
||||||
from app.utils.user_message_multimodal import build_human_message_content
|
from app.utils.user_message_multimodal import build_human_message_content
|
||||||
|
|
||||||
_background_tasks: set[asyncio.Task] = set()
|
_background_tasks: set[asyncio.Task] = set()
|
||||||
|
|
@ -198,58 +195,6 @@ def _extract_chunk_parts(chunk: Any) -> dict[str, Any]:
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
def format_mentioned_surfsense_docs_as_context(
|
|
||||||
documents: list[SurfsenseDocsDocument],
|
|
||||||
) -> str:
|
|
||||||
"""Format mentioned SurfSense documentation as context for the agent."""
|
|
||||||
if not documents:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
context_parts = ["<mentioned_surfsense_docs>"]
|
|
||||||
context_parts.append(
|
|
||||||
"The user has explicitly mentioned the following SurfSense documentation pages. "
|
|
||||||
"These are official documentation about how to use SurfSense and should be used to answer questions about the application. "
|
|
||||||
"Use [citation:CHUNK_ID] format for citations (e.g., [citation:doc-123])."
|
|
||||||
)
|
|
||||||
|
|
||||||
for doc in documents:
|
|
||||||
public_url = surfsense_docs_public_url(doc.source)
|
|
||||||
metadata_json = json.dumps(
|
|
||||||
{"source": doc.source, "public_url": public_url}, ensure_ascii=False
|
|
||||||
)
|
|
||||||
|
|
||||||
context_parts.append("<document>")
|
|
||||||
context_parts.append("<document_metadata>")
|
|
||||||
context_parts.append(f" <document_id>doc-{doc.id}</document_id>")
|
|
||||||
context_parts.append(" <document_type>SURFSENSE_DOCS</document_type>")
|
|
||||||
context_parts.append(f" <title><![CDATA[{doc.title}]]></title>")
|
|
||||||
context_parts.append(f" <url><![CDATA[{public_url}]]></url>")
|
|
||||||
context_parts.append(
|
|
||||||
f" <metadata_json><![CDATA[{metadata_json}]]></metadata_json>"
|
|
||||||
)
|
|
||||||
context_parts.append("</document_metadata>")
|
|
||||||
context_parts.append("")
|
|
||||||
context_parts.append("<document_content>")
|
|
||||||
|
|
||||||
if hasattr(doc, "chunks") and doc.chunks:
|
|
||||||
for chunk in doc.chunks:
|
|
||||||
context_parts.append(
|
|
||||||
f" <chunk id='doc-{chunk.id}'><![CDATA[{chunk.content}]]></chunk>"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
context_parts.append(
|
|
||||||
f" <chunk id='doc-0'><![CDATA[{doc.content}]]></chunk>"
|
|
||||||
)
|
|
||||||
|
|
||||||
context_parts.append("</document_content>")
|
|
||||||
context_parts.append("</document>")
|
|
||||||
context_parts.append("")
|
|
||||||
|
|
||||||
context_parts.append("</mentioned_surfsense_docs>")
|
|
||||||
|
|
||||||
return "\n".join(context_parts)
|
|
||||||
|
|
||||||
|
|
||||||
def extract_todos_from_deepagents(command_output) -> dict:
|
def extract_todos_from_deepagents(command_output) -> dict:
|
||||||
"""
|
"""
|
||||||
Extract todos from deepagents' TodoListMiddleware Command output.
|
Extract todos from deepagents' TodoListMiddleware Command output.
|
||||||
|
|
@ -837,7 +782,6 @@ async def stream_new_chat(
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
llm_config_id: int = -1,
|
llm_config_id: int = -1,
|
||||||
mentioned_document_ids: list[int] | None = None,
|
mentioned_document_ids: list[int] | None = None,
|
||||||
mentioned_surfsense_doc_ids: list[int] | None = None,
|
|
||||||
mentioned_folder_ids: list[int] | None = None,
|
mentioned_folder_ids: list[int] | None = None,
|
||||||
mentioned_connector_ids: list[int] | None = None,
|
mentioned_connector_ids: list[int] | None = None,
|
||||||
mentioned_connectors: list[dict[str, Any]] | None = None,
|
mentioned_connectors: list[dict[str, Any]] | None = None,
|
||||||
|
|
@ -869,7 +813,6 @@ async def stream_new_chat(
|
||||||
llm_config_id: The LLM configuration ID (default: -1 for first global config)
|
llm_config_id: The LLM configuration ID (default: -1 for first global config)
|
||||||
needs_history_bootstrap: If True, load message history from DB (for cloned chats)
|
needs_history_bootstrap: If True, load message history from DB (for cloned chats)
|
||||||
mentioned_document_ids: Optional list of document IDs mentioned with @ in the chat
|
mentioned_document_ids: Optional list of document IDs mentioned with @ in the chat
|
||||||
mentioned_surfsense_doc_ids: Optional list of SurfSense doc IDs mentioned with @ in the chat
|
|
||||||
mentioned_folder_ids: Optional list of knowledge-base folder IDs mentioned with @ (cloud mode)
|
mentioned_folder_ids: Optional list of knowledge-base folder IDs mentioned with @ (cloud mode)
|
||||||
checkpoint_id: Optional checkpoint ID to rewind/fork from (for edit/reload operations)
|
checkpoint_id: Optional checkpoint ID to rewind/fork from (for edit/reload operations)
|
||||||
|
|
||||||
|
|
@ -1295,19 +1238,7 @@ async def stream_new_chat(
|
||||||
|
|
||||||
# Mentioned KB documents are now handled by KnowledgeBaseSearchMiddleware
|
# Mentioned KB documents are now handled by KnowledgeBaseSearchMiddleware
|
||||||
# which merges them into the scoped filesystem with full document
|
# which merges them into the scoped filesystem with full document
|
||||||
# structure. Only SurfSense docs and report context are inlined here.
|
# structure. Only report context is inlined here.
|
||||||
|
|
||||||
# Fetch mentioned SurfSense docs if any
|
|
||||||
mentioned_surfsense_docs: list[SurfsenseDocsDocument] = []
|
|
||||||
if mentioned_surfsense_doc_ids:
|
|
||||||
result = await session.execute(
|
|
||||||
select(SurfsenseDocsDocument)
|
|
||||||
.options(selectinload(SurfsenseDocsDocument.chunks))
|
|
||||||
.filter(
|
|
||||||
SurfsenseDocsDocument.id.in_(mentioned_surfsense_doc_ids),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
mentioned_surfsense_docs = list(result.scalars().all())
|
|
||||||
|
|
||||||
# Fetch the most recent report(s) in this thread so the LLM can
|
# Fetch the most recent report(s) in this thread so the LLM can
|
||||||
# easily find report_id for versioning decisions, instead of
|
# easily find report_id for versioning decisions, instead of
|
||||||
|
|
@ -1341,10 +1272,7 @@ async def stream_new_chat(
|
||||||
agent_user_query = user_query
|
agent_user_query = user_query
|
||||||
accepted_folder_ids: list[int] = []
|
accepted_folder_ids: list[int] = []
|
||||||
if fs_mode == FilesystemMode.CLOUD.value and (
|
if fs_mode == FilesystemMode.CLOUD.value and (
|
||||||
mentioned_document_ids
|
mentioned_document_ids or mentioned_folder_ids or mentioned_documents
|
||||||
or mentioned_surfsense_doc_ids
|
|
||||||
or mentioned_folder_ids
|
|
||||||
or mentioned_documents
|
|
||||||
):
|
):
|
||||||
from app.schemas.new_chat import (
|
from app.schemas.new_chat import (
|
||||||
MentionedDocumentInfo as _MentionedDocumentInfo,
|
MentionedDocumentInfo as _MentionedDocumentInfo,
|
||||||
|
|
@ -1370,23 +1298,17 @@ async def stream_new_chat(
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
mentioned_documents=chip_objs,
|
mentioned_documents=chip_objs,
|
||||||
mentioned_document_ids=mentioned_document_ids,
|
mentioned_document_ids=mentioned_document_ids,
|
||||||
mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
|
|
||||||
mentioned_folder_ids=mentioned_folder_ids,
|
mentioned_folder_ids=mentioned_folder_ids,
|
||||||
)
|
)
|
||||||
agent_user_query = substitute_in_text(user_query, resolved.token_to_path)
|
agent_user_query = substitute_in_text(user_query, resolved.token_to_path)
|
||||||
accepted_folder_ids = resolved.mentioned_folder_ids
|
accepted_folder_ids = resolved.mentioned_folder_ids
|
||||||
|
|
||||||
# Format the user query with context (SurfSense docs + reports only).
|
# Format the user query with context (reports only).
|
||||||
# Uses ``agent_user_query`` so the LLM sees backtick-wrapped paths
|
# Uses ``agent_user_query`` so the LLM sees backtick-wrapped paths
|
||||||
# instead of bare ``@title`` tokens.
|
# instead of bare ``@title`` tokens.
|
||||||
final_query = agent_user_query
|
final_query = agent_user_query
|
||||||
context_parts = []
|
context_parts = []
|
||||||
|
|
||||||
if mentioned_surfsense_docs:
|
|
||||||
context_parts.append(
|
|
||||||
format_mentioned_surfsense_docs_as_context(mentioned_surfsense_docs)
|
|
||||||
)
|
|
||||||
|
|
||||||
if mentioned_connectors:
|
if mentioned_connectors:
|
||||||
connector_lines = []
|
connector_lines = []
|
||||||
for connector in mentioned_connectors:
|
for connector in mentioned_connectors:
|
||||||
|
|
@ -1617,10 +1539,6 @@ async def stream_new_chat(
|
||||||
stream_result.content_builder = AssistantContentBuilder()
|
stream_result.content_builder = AssistantContentBuilder()
|
||||||
|
|
||||||
# Initial thinking step - analyzing the request
|
# Initial thinking step - analyzing the request
|
||||||
if mentioned_surfsense_docs:
|
|
||||||
initial_title = "Analyzing referenced content"
|
|
||||||
action_verb = "Analyzing"
|
|
||||||
else:
|
|
||||||
initial_title = "Understanding your request"
|
initial_title = "Understanding your request"
|
||||||
action_verb = "Processing"
|
action_verb = "Processing"
|
||||||
|
|
||||||
|
|
@ -1633,18 +1551,6 @@ async def stream_new_chat(
|
||||||
else:
|
else:
|
||||||
processing_parts.append("(message)")
|
processing_parts.append("(message)")
|
||||||
|
|
||||||
if mentioned_surfsense_docs:
|
|
||||||
doc_names = []
|
|
||||||
for doc in mentioned_surfsense_docs:
|
|
||||||
title = doc.title
|
|
||||||
if len(title) > 30:
|
|
||||||
title = title[:27] + "..."
|
|
||||||
doc_names.append(title)
|
|
||||||
if len(doc_names) == 1:
|
|
||||||
processing_parts.append(f"[{doc_names[0]}]")
|
|
||||||
else:
|
|
||||||
processing_parts.append(f"[{len(doc_names)} docs]")
|
|
||||||
|
|
||||||
initial_items = [f"{action_verb}: {' '.join(processing_parts)}"]
|
initial_items = [f"{action_verb}: {' '.join(processing_parts)}"]
|
||||||
initial_step_id = "thinking-1"
|
initial_step_id = "thinking-1"
|
||||||
|
|
||||||
|
|
@ -1664,10 +1570,10 @@ async def stream_new_chat(
|
||||||
items=initial_items,
|
items=initial_items,
|
||||||
)
|
)
|
||||||
|
|
||||||
# These ORM objects (with eagerly-loaded chunks) can be very large.
|
# These ORM objects can be large. They're only needed to build context
|
||||||
# They're only needed to build context strings already copied into
|
# strings already copied into final_query / langchain_messages —
|
||||||
# final_query / langchain_messages — release them before streaming.
|
# release them before streaming.
|
||||||
del mentioned_surfsense_docs, recent_reports
|
del recent_reports
|
||||||
del langchain_messages, final_query
|
del langchain_messages, final_query
|
||||||
|
|
||||||
# Check if this is the first assistant response so we can generate
|
# Check if this is the first assistant response so we can generate
|
||||||
|
|
|
||||||
|
|
@ -1,15 +1,11 @@
|
||||||
"""Pre-agent context shaping: mentioned-doc rendering and todos extraction."""
|
"""Pre-agent context shaping: todos extraction."""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from app.tasks.chat.streaming.context.deepagents_todos import (
|
from app.tasks.chat.streaming.context.deepagents_todos import (
|
||||||
extract_todos_from_deepagents,
|
extract_todos_from_deepagents,
|
||||||
)
|
)
|
||||||
from app.tasks.chat.streaming.context.mentioned_docs import (
|
|
||||||
format_mentioned_surfsense_docs_as_context,
|
|
||||||
)
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"extract_todos_from_deepagents",
|
"extract_todos_from_deepagents",
|
||||||
"format_mentioned_surfsense_docs_as_context",
|
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -19,9 +19,7 @@ def extract_todos_from_deepagents(command_output: Any) -> dict:
|
||||||
elif isinstance(command_output, dict):
|
elif isinstance(command_output, dict):
|
||||||
if "todos" in command_output:
|
if "todos" in command_output:
|
||||||
todos_data = command_output.get("todos", [])
|
todos_data = command_output.get("todos", [])
|
||||||
elif "update" in command_output and isinstance(
|
elif "update" in command_output and isinstance(command_output["update"], dict):
|
||||||
command_output["update"], dict
|
|
||||||
):
|
|
||||||
todos_data = command_output["update"].get("todos", [])
|
todos_data = command_output["update"].get("todos", [])
|
||||||
|
|
||||||
return {"todos": todos_data}
|
return {"todos": todos_data}
|
||||||
|
|
|
||||||
|
|
@ -1,58 +0,0 @@
|
||||||
"""Render user-mentioned SurfSense docs as XML context for the agent."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json
|
|
||||||
|
|
||||||
from app.db import SurfsenseDocsDocument
|
|
||||||
from app.utils.surfsense_docs import surfsense_docs_public_url
|
|
||||||
|
|
||||||
|
|
||||||
def format_mentioned_surfsense_docs_as_context(
|
|
||||||
documents: list[SurfsenseDocsDocument],
|
|
||||||
) -> str:
|
|
||||||
if not documents:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
context_parts = ["<mentioned_surfsense_docs>"]
|
|
||||||
context_parts.append(
|
|
||||||
"The user has explicitly mentioned the following SurfSense documentation pages. "
|
|
||||||
"These are official documentation about how to use SurfSense and should be used to answer questions about the application. "
|
|
||||||
"Use [citation:CHUNK_ID] format for citations (e.g., [citation:doc-123])."
|
|
||||||
)
|
|
||||||
|
|
||||||
for doc in documents:
|
|
||||||
public_url = surfsense_docs_public_url(doc.source)
|
|
||||||
metadata_json = json.dumps(
|
|
||||||
{"source": doc.source, "public_url": public_url}, ensure_ascii=False
|
|
||||||
)
|
|
||||||
|
|
||||||
context_parts.append("<document>")
|
|
||||||
context_parts.append("<document_metadata>")
|
|
||||||
context_parts.append(f" <document_id>doc-{doc.id}</document_id>")
|
|
||||||
context_parts.append(" <document_type>SURFSENSE_DOCS</document_type>")
|
|
||||||
context_parts.append(f" <title><![CDATA[{doc.title}]]></title>")
|
|
||||||
context_parts.append(f" <url><![CDATA[{public_url}]]></url>")
|
|
||||||
context_parts.append(
|
|
||||||
f" <metadata_json><![CDATA[{metadata_json}]]></metadata_json>"
|
|
||||||
)
|
|
||||||
context_parts.append("</document_metadata>")
|
|
||||||
context_parts.append("")
|
|
||||||
context_parts.append("<document_content>")
|
|
||||||
|
|
||||||
if hasattr(doc, "chunks") and doc.chunks:
|
|
||||||
for chunk in doc.chunks:
|
|
||||||
context_parts.append(
|
|
||||||
f" <chunk id='doc-{chunk.id}'><![CDATA[{chunk.content}]]></chunk>"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
context_parts.append(
|
|
||||||
f" <chunk id='doc-0'><![CDATA[{doc.content}]]></chunk>"
|
|
||||||
)
|
|
||||||
|
|
||||||
context_parts.append("</document_content>")
|
|
||||||
context_parts.append("</document>")
|
|
||||||
context_parts.append("")
|
|
||||||
|
|
||||||
context_parts.append("</mentioned_surfsense_docs>")
|
|
||||||
return "\n".join(context_parts)
|
|
||||||
|
|
@ -69,17 +69,13 @@ async def resolve_initial_auto_pin(
|
||||||
"pin.requires_image_input": requires_image_input,
|
"pin.requires_image_input": requires_image_input,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
return AutoPinResult(
|
return AutoPinResult(llm_config_id=pinned.resolved_llm_config_id, error=None)
|
||||||
llm_config_id=pinned.resolved_llm_config_id, error=None
|
|
||||||
)
|
|
||||||
except ValueError as pin_error:
|
except ValueError as pin_error:
|
||||||
# The "no vision-capable cfg" path raises a ValueError whose message
|
# The "no vision-capable cfg" path raises a ValueError whose message
|
||||||
# we map to the friendly image-input SSE error so the user sees the
|
# we map to the friendly image-input SSE error so the user sees the
|
||||||
# same message regardless of whether the gate fired in the resolver or
|
# same message regardless of whether the gate fired in the resolver or
|
||||||
# in ``llm_capability.assert_vision_capability_for_image_turn``.
|
# in ``llm_capability.assert_vision_capability_for_image_turn``.
|
||||||
is_vision_failure = (
|
is_vision_failure = requires_image_input and "vision-capable" in str(pin_error)
|
||||||
requires_image_input and "vision-capable" in str(pin_error)
|
|
||||||
)
|
|
||||||
error_code = (
|
error_code = (
|
||||||
"MODEL_DOES_NOT_SUPPORT_IMAGE_INPUT"
|
"MODEL_DOES_NOT_SUPPORT_IMAGE_INPUT"
|
||||||
if is_vision_failure
|
if is_vision_failure
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
"""Build and emit the first ``thinking-1`` step for a new-chat turn.
|
"""Build and emit the first ``thinking-1`` step for a new-chat turn.
|
||||||
|
|
||||||
The step title and "Processing X" items are derived from what the user sent
|
The step title and "Processing X" items are derived from what the user sent
|
||||||
(text snippet, image count, mentioned doc titles) so the FE can render a
|
(text snippet, image count) so the FE can render a meaningful placeholder
|
||||||
meaningful placeholder while the agent stream warms up.
|
while the agent stream warms up.
|
||||||
|
|
||||||
``thinking-1`` is the canonical id for this step — every subsequent
|
``thinking-1`` is the canonical id for this step — every subsequent
|
||||||
``thinking-N`` produced by ``stream_agent_events`` folds into the same
|
``thinking-N`` produced by ``stream_agent_events`` folds into the same
|
||||||
|
|
@ -15,7 +15,6 @@ from collections.abc import Iterator
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from app.db import SurfsenseDocsDocument
|
|
||||||
from app.services.new_streaming_service import VercelStreamingService
|
from app.services.new_streaming_service import VercelStreamingService
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -37,12 +36,7 @@ def build_initial_thinking_step(
|
||||||
*,
|
*,
|
||||||
user_query: str,
|
user_query: str,
|
||||||
user_image_data_urls: list[str] | None,
|
user_image_data_urls: list[str] | None,
|
||||||
mentioned_surfsense_docs: list[SurfsenseDocsDocument],
|
|
||||||
) -> InitialThinkingStep:
|
) -> InitialThinkingStep:
|
||||||
if mentioned_surfsense_docs:
|
|
||||||
title = "Analyzing referenced content"
|
|
||||||
action_verb = "Analyzing"
|
|
||||||
else:
|
|
||||||
title = "Understanding your request"
|
title = "Understanding your request"
|
||||||
action_verb = "Processing"
|
action_verb = "Processing"
|
||||||
|
|
||||||
|
|
@ -55,18 +49,6 @@ def build_initial_thinking_step(
|
||||||
else:
|
else:
|
||||||
processing_parts.append("(message)")
|
processing_parts.append("(message)")
|
||||||
|
|
||||||
if mentioned_surfsense_docs:
|
|
||||||
doc_names: list[str] = []
|
|
||||||
for doc in mentioned_surfsense_docs:
|
|
||||||
t = doc.title
|
|
||||||
if len(t) > 30:
|
|
||||||
t = t[:27] + "..."
|
|
||||||
doc_names.append(t)
|
|
||||||
if len(doc_names) == 1:
|
|
||||||
processing_parts.append(f"[{doc_names[0]}]")
|
|
||||||
else:
|
|
||||||
processing_parts.append(f"[{len(doc_names)} docs]")
|
|
||||||
|
|
||||||
items = [f"{action_verb}: {' '.join(processing_parts)}"]
|
items = [f"{action_verb}: {' '.join(processing_parts)}"]
|
||||||
return InitialThinkingStep(step_id="thinking-1", title=title, items=items)
|
return InitialThinkingStep(step_id="thinking-1", title=title, items=items)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,20 +5,17 @@ Pipeline:
|
||||||
1. **History bootstrap** — only for cloned chats with no LangGraph checkpoint
|
1. **History bootstrap** — only for cloned chats with no LangGraph checkpoint
|
||||||
yet; flips the per-thread ``needs_history_bootstrap`` flag back to False
|
yet; flips the per-thread ``needs_history_bootstrap`` flag back to False
|
||||||
once the rows are loaded.
|
once the rows are loaded.
|
||||||
2. **Mentioned SurfSense docs** — eager-load chunks so the formatter has the
|
2. **Recent reports** — top 3 by id desc with non-null content, so the LLM
|
||||||
full content without a second roundtrip.
|
|
||||||
3. **Recent reports** — top 3 by id desc with non-null content, so the LLM
|
|
||||||
can resolve ``report_id`` for versioning without spelunking history.
|
can resolve ``report_id`` for versioning without spelunking history.
|
||||||
4. **@-mention resolve** (cloud mode) — substitute ``@title`` tokens in the
|
3. **@-mention resolve** (cloud mode) — substitute ``@title`` tokens in the
|
||||||
query with canonical ``\`/documents/...\``` paths the LLM expects.
|
query with canonical ``\`/documents/...\``` paths the LLM expects.
|
||||||
5. **Context block render** — XML-wrap surfsense docs + reports, prepend to
|
4. **Context block render** — XML-wrap recent reports, prepend to the
|
||||||
the rewritten query, optionally prefix with display name for SEARCH_SPACE
|
rewritten query, optionally prefix with display name for SEARCH_SPACE
|
||||||
visibility.
|
visibility.
|
||||||
6. **HumanMessage** — multimodal content if images are attached.
|
5. **HumanMessage** — multimodal content if images are attached.
|
||||||
|
|
||||||
Returns the assembled ``input_state`` dict plus side-channel data the
|
Returns the assembled ``input_state`` dict plus side-channel data the
|
||||||
orchestrator needs downstream (``accepted_folder_ids`` for runtime context;
|
orchestrator needs downstream (``accepted_folder_ids`` for runtime context).
|
||||||
``mentioned_surfsense_docs`` for the initial thinking step).
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
@ -30,7 +27,6 @@ from typing import Any
|
||||||
from langchain_core.messages import HumanMessage
|
from langchain_core.messages import HumanMessage
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
from sqlalchemy.future import select
|
from sqlalchemy.future import select
|
||||||
from sqlalchemy.orm import selectinload
|
|
||||||
|
|
||||||
from app.agents.new_chat.filesystem_selection import FilesystemMode
|
from app.agents.new_chat.filesystem_selection import FilesystemMode
|
||||||
from app.agents.new_chat.mention_resolver import resolve_mentions, substitute_in_text
|
from app.agents.new_chat.mention_resolver import resolve_mentions, substitute_in_text
|
||||||
|
|
@ -38,10 +34,6 @@ from app.db import (
|
||||||
ChatVisibility,
|
ChatVisibility,
|
||||||
NewChatThread,
|
NewChatThread,
|
||||||
Report,
|
Report,
|
||||||
SurfsenseDocsDocument,
|
|
||||||
)
|
|
||||||
from app.tasks.chat.streaming.context.mentioned_docs import (
|
|
||||||
format_mentioned_surfsense_docs_as_context,
|
|
||||||
)
|
)
|
||||||
from app.utils.content_utils import bootstrap_history_from_db
|
from app.utils.content_utils import bootstrap_history_from_db
|
||||||
from app.utils.user_message_multimodal import build_human_message_content
|
from app.utils.user_message_multimodal import build_human_message_content
|
||||||
|
|
@ -55,13 +47,10 @@ class NewChatInputState:
|
||||||
|
|
||||||
``input_state`` is fed straight to the agent. ``accepted_folder_ids``
|
``input_state`` is fed straight to the agent. ``accepted_folder_ids``
|
||||||
feeds the runtime context (the resolver may have dropped some chips).
|
feeds the runtime context (the resolver may have dropped some chips).
|
||||||
``mentioned_surfsense_docs`` is consumed by the initial thinking-step
|
|
||||||
builder for the FE placeholder before the agent stream starts.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
input_state: dict[str, Any]
|
input_state: dict[str, Any]
|
||||||
accepted_folder_ids: list[int]
|
accepted_folder_ids: list[int]
|
||||||
mentioned_surfsense_docs: list[SurfsenseDocsDocument]
|
|
||||||
|
|
||||||
|
|
||||||
async def build_new_chat_input_state(
|
async def build_new_chat_input_state(
|
||||||
|
|
@ -72,7 +61,6 @@ async def build_new_chat_input_state(
|
||||||
user_query: str,
|
user_query: str,
|
||||||
user_image_data_urls: list[str] | None,
|
user_image_data_urls: list[str] | None,
|
||||||
mentioned_document_ids: list[int] | None,
|
mentioned_document_ids: list[int] | None,
|
||||||
mentioned_surfsense_doc_ids: list[int] | None,
|
|
||||||
mentioned_folder_ids: list[int] | None,
|
mentioned_folder_ids: list[int] | None,
|
||||||
mentioned_documents: list[dict[str, Any]] | None,
|
mentioned_documents: list[dict[str, Any]] | None,
|
||||||
needs_history_bootstrap: bool,
|
needs_history_bootstrap: bool,
|
||||||
|
|
@ -96,15 +84,6 @@ async def build_new_chat_input_state(
|
||||||
thread.needs_history_bootstrap = False
|
thread.needs_history_bootstrap = False
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
mentioned_surfsense_docs: list[SurfsenseDocsDocument] = []
|
|
||||||
if mentioned_surfsense_doc_ids:
|
|
||||||
result = await session.execute(
|
|
||||||
select(SurfsenseDocsDocument)
|
|
||||||
.options(selectinload(SurfsenseDocsDocument.chunks))
|
|
||||||
.filter(SurfsenseDocsDocument.id.in_(mentioned_surfsense_doc_ids))
|
|
||||||
)
|
|
||||||
mentioned_surfsense_docs = list(result.scalars().all())
|
|
||||||
|
|
||||||
# Top 3 reports keyed by id desc (newest first) with content present,
|
# Top 3 reports keyed by id desc (newest first) with content present,
|
||||||
# surfaced inline so the LLM resolves ``report_id`` for versioning without
|
# surfaced inline so the LLM resolves ``report_id`` for versioning without
|
||||||
# digging through conversation history.
|
# digging through conversation history.
|
||||||
|
|
@ -125,14 +104,12 @@ async def build_new_chat_input_state(
|
||||||
user_query=user_query,
|
user_query=user_query,
|
||||||
filesystem_mode=filesystem_mode,
|
filesystem_mode=filesystem_mode,
|
||||||
mentioned_document_ids=mentioned_document_ids,
|
mentioned_document_ids=mentioned_document_ids,
|
||||||
mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
|
|
||||||
mentioned_folder_ids=mentioned_folder_ids,
|
mentioned_folder_ids=mentioned_folder_ids,
|
||||||
mentioned_documents=mentioned_documents,
|
mentioned_documents=mentioned_documents,
|
||||||
)
|
)
|
||||||
|
|
||||||
final_query = _render_query_with_context(
|
final_query = _render_query_with_context(
|
||||||
agent_user_query=agent_user_query,
|
agent_user_query=agent_user_query,
|
||||||
mentioned_surfsense_docs=mentioned_surfsense_docs,
|
|
||||||
recent_reports=recent_reports,
|
recent_reports=recent_reports,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -154,7 +131,6 @@ async def build_new_chat_input_state(
|
||||||
return NewChatInputState(
|
return NewChatInputState(
|
||||||
input_state=input_state,
|
input_state=input_state,
|
||||||
accepted_folder_ids=accepted_folder_ids,
|
accepted_folder_ids=accepted_folder_ids,
|
||||||
mentioned_surfsense_docs=mentioned_surfsense_docs,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -165,7 +141,6 @@ async def _resolve_mentions_for_query(
|
||||||
user_query: str,
|
user_query: str,
|
||||||
filesystem_mode: str,
|
filesystem_mode: str,
|
||||||
mentioned_document_ids: list[int] | None,
|
mentioned_document_ids: list[int] | None,
|
||||||
mentioned_surfsense_doc_ids: list[int] | None,
|
|
||||||
mentioned_folder_ids: list[int] | None,
|
mentioned_folder_ids: list[int] | None,
|
||||||
mentioned_documents: list[dict[str, Any]] | None,
|
mentioned_documents: list[dict[str, Any]] | None,
|
||||||
) -> tuple[str, list[int]]:
|
) -> tuple[str, list[int]]:
|
||||||
|
|
@ -187,10 +162,7 @@ async def _resolve_mentions_for_query(
|
||||||
accepted_folder_ids: list[int] = []
|
accepted_folder_ids: list[int] = []
|
||||||
|
|
||||||
has_any_mention = bool(
|
has_any_mention = bool(
|
||||||
mentioned_document_ids
|
mentioned_document_ids or mentioned_folder_ids or mentioned_documents
|
||||||
or mentioned_surfsense_doc_ids
|
|
||||||
or mentioned_folder_ids
|
|
||||||
or mentioned_documents
|
|
||||||
)
|
)
|
||||||
if filesystem_mode != FilesystemMode.CLOUD.value or not has_any_mention:
|
if filesystem_mode != FilesystemMode.CLOUD.value or not has_any_mention:
|
||||||
return agent_user_query, accepted_folder_ids
|
return agent_user_query, accepted_folder_ids
|
||||||
|
|
@ -207,16 +179,13 @@ async def _resolve_mentions_for_query(
|
||||||
try:
|
try:
|
||||||
chip_objs.append(MentionedDocumentInfo.model_validate(raw))
|
chip_objs.append(MentionedDocumentInfo.model_validate(raw))
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.debug(
|
logger.debug("stream_new_chat: dropping malformed mention chip %r", raw)
|
||||||
"stream_new_chat: dropping malformed mention chip %r", raw
|
|
||||||
)
|
|
||||||
|
|
||||||
resolved = await resolve_mentions(
|
resolved = await resolve_mentions(
|
||||||
session,
|
session,
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
mentioned_documents=chip_objs,
|
mentioned_documents=chip_objs,
|
||||||
mentioned_document_ids=mentioned_document_ids,
|
mentioned_document_ids=mentioned_document_ids,
|
||||||
mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
|
|
||||||
mentioned_folder_ids=mentioned_folder_ids,
|
mentioned_folder_ids=mentioned_folder_ids,
|
||||||
)
|
)
|
||||||
agent_user_query = substitute_in_text(user_query, resolved.token_to_path)
|
agent_user_query = substitute_in_text(user_query, resolved.token_to_path)
|
||||||
|
|
@ -227,17 +196,11 @@ async def _resolve_mentions_for_query(
|
||||||
def _render_query_with_context(
|
def _render_query_with_context(
|
||||||
*,
|
*,
|
||||||
agent_user_query: str,
|
agent_user_query: str,
|
||||||
mentioned_surfsense_docs: list[SurfsenseDocsDocument],
|
|
||||||
recent_reports: list[Report],
|
recent_reports: list[Report],
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Prepend surfsense-docs + recent-reports XML blocks to the user query."""
|
"""Prepend recent-reports XML block to the user query."""
|
||||||
context_parts: list[str] = []
|
context_parts: list[str] = []
|
||||||
|
|
||||||
if mentioned_surfsense_docs:
|
|
||||||
context_parts.append(
|
|
||||||
format_mentioned_surfsense_docs_as_context(mentioned_surfsense_docs)
|
|
||||||
)
|
|
||||||
|
|
||||||
if recent_reports:
|
if recent_reports:
|
||||||
report_lines: list[str] = []
|
report_lines: list[str] = []
|
||||||
for r in recent_reports:
|
for r in recent_reports:
|
||||||
|
|
|
||||||
|
|
@ -48,9 +48,7 @@ def check_image_input_capability(
|
||||||
return None
|
return None
|
||||||
|
|
||||||
model_label = agent_config.config_name or agent_config.model_name or "model"
|
model_label = agent_config.config_name or agent_config.model_name or "model"
|
||||||
ot.add_event(
|
ot.add_event("quota.denied", {"quota.code": "MODEL_DOES_NOT_SUPPORT_IMAGE_INPUT"})
|
||||||
"quota.denied", {"quota.code": "MODEL_DOES_NOT_SUPPORT_IMAGE_INPUT"}
|
|
||||||
)
|
|
||||||
return (
|
return (
|
||||||
(
|
(
|
||||||
f"The selected model ({model_label}) does not support "
|
f"The selected model ({model_label}) does not support "
|
||||||
|
|
|
||||||
|
|
@ -123,7 +123,6 @@ async def stream_new_chat(
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
llm_config_id: int = -1,
|
llm_config_id: int = -1,
|
||||||
mentioned_document_ids: list[int] | None = None,
|
mentioned_document_ids: list[int] | None = None,
|
||||||
mentioned_surfsense_doc_ids: list[int] | None = None,
|
|
||||||
mentioned_folder_ids: list[int] | None = None,
|
mentioned_folder_ids: list[int] | None = None,
|
||||||
mentioned_documents: list[dict[str, Any]] | None = None,
|
mentioned_documents: list[dict[str, Any]] | None = None,
|
||||||
checkpoint_id: str | None = None,
|
checkpoint_id: str | None = None,
|
||||||
|
|
@ -259,7 +258,8 @@ async def stream_new_chat(
|
||||||
|
|
||||||
if needs_premium_quota(agent_config, user_id):
|
if needs_premium_quota(agent_config, user_id):
|
||||||
premium_reservation = await reserve_premium(
|
premium_reservation = await reserve_premium(
|
||||||
agent_config=agent_config, user_id=user_id # type: ignore[arg-type]
|
agent_config=agent_config,
|
||||||
|
user_id=user_id, # type: ignore[arg-type]
|
||||||
)
|
)
|
||||||
if not premium_reservation.allowed:
|
if not premium_reservation.allowed:
|
||||||
ot.add_event("quota.denied", {"quota.code": "PREMIUM_QUOTA_EXHAUSTED"})
|
ot.add_event("quota.denied", {"quota.code": "PREMIUM_QUOTA_EXHAUSTED"})
|
||||||
|
|
@ -434,7 +434,6 @@ async def stream_new_chat(
|
||||||
user_query=user_query,
|
user_query=user_query,
|
||||||
user_image_data_urls=user_image_data_urls,
|
user_image_data_urls=user_image_data_urls,
|
||||||
mentioned_document_ids=mentioned_document_ids,
|
mentioned_document_ids=mentioned_document_ids,
|
||||||
mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
|
|
||||||
mentioned_folder_ids=mentioned_folder_ids,
|
mentioned_folder_ids=mentioned_folder_ids,
|
||||||
mentioned_documents=mentioned_documents,
|
mentioned_documents=mentioned_documents,
|
||||||
needs_history_bootstrap=needs_history_bootstrap,
|
needs_history_bootstrap=needs_history_bootstrap,
|
||||||
|
|
@ -446,7 +445,6 @@ async def stream_new_chat(
|
||||||
)
|
)
|
||||||
input_state = assembled.input_state
|
input_state = assembled.input_state
|
||||||
accepted_folder_ids = assembled.accepted_folder_ids
|
accepted_folder_ids = assembled.accepted_folder_ids
|
||||||
mentioned_surfsense_docs = assembled.mentioned_surfsense_docs
|
|
||||||
_perf_log.info(
|
_perf_log.info(
|
||||||
"[stream_new_chat] History bootstrap + doc/report queries in %.3fs",
|
"[stream_new_chat] History bootstrap + doc/report queries in %.3fs",
|
||||||
time.perf_counter() - _t0,
|
time.perf_counter() - _t0,
|
||||||
|
|
@ -492,7 +490,9 @@ async def stream_new_chat(
|
||||||
|
|
||||||
# --- Block 4: First SSE frames ---
|
# --- Block 4: First SSE frames ---
|
||||||
|
|
||||||
for sse in iter_initial_frames(streaming_service, turn_id=stream_result.turn_id):
|
for sse in iter_initial_frames(
|
||||||
|
streaming_service, turn_id=stream_result.turn_id
|
||||||
|
):
|
||||||
yield sse
|
yield sse
|
||||||
|
|
||||||
# --- Block 5: Persistence join + message-id frames ---
|
# --- Block 5: Persistence join + message-id frames ---
|
||||||
|
|
@ -557,7 +557,6 @@ async def stream_new_chat(
|
||||||
initial_step = build_initial_thinking_step(
|
initial_step = build_initial_thinking_step(
|
||||||
user_query=user_query,
|
user_query=user_query,
|
||||||
user_image_data_urls=user_image_data_urls,
|
user_image_data_urls=user_image_data_urls,
|
||||||
mentioned_surfsense_docs=mentioned_surfsense_docs,
|
|
||||||
)
|
)
|
||||||
for sse in iter_initial_thinking_step_frame(
|
for sse in iter_initial_thinking_step_frame(
|
||||||
initial_step,
|
initial_step,
|
||||||
|
|
@ -572,7 +571,7 @@ async def stream_new_chat(
|
||||||
# Drop the heavy ORM objects + the container that holds them so they
|
# Drop the heavy ORM objects + the container that holds them so they
|
||||||
# aren't retained for the entire streaming duration. ``input_state``
|
# aren't retained for the entire streaming duration. ``input_state``
|
||||||
# already carries the langchain_messages list independently.
|
# already carries the langchain_messages list independently.
|
||||||
del assembled, mentioned_surfsense_docs
|
del assembled
|
||||||
|
|
||||||
title_task = spawn_title_task(
|
title_task = spawn_title_task(
|
||||||
chat_id=chat_id,
|
chat_id=chat_id,
|
||||||
|
|
@ -693,7 +692,9 @@ async def stream_new_chat(
|
||||||
fallback_commit_search_space_id=search_space_id,
|
fallback_commit_search_space_id=search_space_id,
|
||||||
fallback_commit_created_by_id=user_id,
|
fallback_commit_created_by_id=user_id,
|
||||||
fallback_commit_filesystem_mode=(
|
fallback_commit_filesystem_mode=(
|
||||||
filesystem_selection.mode if filesystem_selection else FilesystemMode.CLOUD
|
filesystem_selection.mode
|
||||||
|
if filesystem_selection
|
||||||
|
else FilesystemMode.CLOUD
|
||||||
),
|
),
|
||||||
fallback_commit_thread_id=chat_id,
|
fallback_commit_thread_id=chat_id,
|
||||||
runtime_context=runtime_context,
|
runtime_context=runtime_context,
|
||||||
|
|
@ -715,11 +716,7 @@ async def stream_new_chat(
|
||||||
title_emitted = True
|
title_emitted = True
|
||||||
# Account for the case where the task completed but produced no
|
# Account for the case where the task completed but produced no
|
||||||
# title — flip the flag anyway so we don't keep checking it.
|
# title — flip the flag anyway so we don't keep checking it.
|
||||||
if (
|
if title_task is not None and title_task.done() and not title_emitted:
|
||||||
title_task is not None
|
|
||||||
and title_task.done()
|
|
||||||
and not title_emitted
|
|
||||||
):
|
|
||||||
title_emitted = True
|
title_emitted = True
|
||||||
|
|
||||||
_perf_log.info(
|
_perf_log.info(
|
||||||
|
|
@ -811,9 +808,7 @@ async def stream_new_chat(
|
||||||
end_turn(str(chat_id))
|
end_turn(str(chat_id))
|
||||||
|
|
||||||
if premium_reservation is not None and user_id:
|
if premium_reservation is not None and user_id:
|
||||||
await release_premium(
|
await release_premium(reservation=premium_reservation, user_id=user_id)
|
||||||
reservation=premium_reservation, user_id=user_id
|
|
||||||
)
|
|
||||||
|
|
||||||
await close_session_and_clear_ai_responding(session, chat_id)
|
await close_session_and_clear_ai_responding(session, chat_id)
|
||||||
|
|
||||||
|
|
@ -852,9 +847,9 @@ async def stream_new_chat(
|
||||||
|
|
||||||
# Break circular refs held by the agent graph, tools, and LLM
|
# Break circular refs held by the agent graph, tools, and LLM
|
||||||
# wrappers so the GC can reclaim them in a single pass.
|
# wrappers so the GC can reclaim them in a single pass.
|
||||||
agent = llm = connector_service = None # noqa: F841
|
agent = llm = connector_service = None
|
||||||
input_state = stream_result = None # noqa: F841
|
input_state = stream_result = None
|
||||||
session = None # noqa: F841
|
session = None
|
||||||
|
|
||||||
run_gc_pass(log_prefix="stream_new_chat", chat_id=chat_id)
|
run_gc_pass(log_prefix="stream_new_chat", chat_id=chat_id)
|
||||||
close_chat_request_span(
|
close_chat_request_span(
|
||||||
|
|
|
||||||
|
|
@ -30,9 +30,7 @@ def build_new_chat_runtime_context(
|
||||||
return SurfSenseContextSchema(
|
return SurfSenseContextSchema(
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
mentioned_document_ids=list(mentioned_document_ids or []),
|
mentioned_document_ids=list(mentioned_document_ids or []),
|
||||||
mentioned_folder_ids=list(
|
mentioned_folder_ids=list(accepted_folder_ids or mentioned_folder_ids or []),
|
||||||
accepted_folder_ids or mentioned_folder_ids or []
|
|
||||||
),
|
|
||||||
request_id=request_id,
|
request_id=request_id,
|
||||||
turn_id=turn_id,
|
turn_id=turn_id,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -133,12 +133,8 @@ async def _generate_title(
|
||||||
# inherited Azure endpoint — see ``provider_api_base`` for the
|
# inherited Azure endpoint — see ``provider_api_base`` for the
|
||||||
# same bug repro on the image-gen / vision paths.
|
# same bug repro on the image-gen / vision paths.
|
||||||
raw_model = getattr(llm, "model", "") or ""
|
raw_model = getattr(llm, "model", "") or ""
|
||||||
provider_prefix = (
|
provider_prefix = raw_model.split("/", 1)[0] if "/" in raw_model else None
|
||||||
raw_model.split("/", 1)[0] if "/" in raw_model else None
|
provider_value = agent_config.provider if agent_config is not None else None
|
||||||
)
|
|
||||||
provider_value = (
|
|
||||||
agent_config.provider if agent_config is not None else None
|
|
||||||
)
|
|
||||||
title_api_base = resolve_api_base(
|
title_api_base = resolve_api_base(
|
||||||
provider=provider_value,
|
provider=provider_value,
|
||||||
provider_prefix=provider_prefix,
|
provider_prefix=provider_prefix,
|
||||||
|
|
|
||||||
|
|
@ -15,14 +15,10 @@ building blocks under ``flows/shared/``. Mirrors ``stream_new_chat`` but:
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import contextlib
|
import contextlib
|
||||||
import gc
|
|
||||||
import logging
|
import logging
|
||||||
import sys
|
|
||||||
import time
|
import time
|
||||||
import uuid as _uuid
|
|
||||||
from collections.abc import AsyncGenerator
|
from collections.abc import AsyncGenerator
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from typing import Any
|
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
|
||||||
import anyio
|
import anyio
|
||||||
|
|
@ -32,7 +28,7 @@ from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent
|
||||||
from app.agents.new_chat.filesystem_selection import FilesystemMode, FilesystemSelection
|
from app.agents.new_chat.filesystem_selection import FilesystemMode, FilesystemSelection
|
||||||
from app.agents.new_chat.middleware.busy_mutex import end_turn
|
from app.agents.new_chat.middleware.busy_mutex import end_turn
|
||||||
from app.config import config as _app_config
|
from app.config import config as _app_config
|
||||||
from app.db import ChatVisibility, async_session_maker, shielded_async_session
|
from app.db import ChatVisibility, async_session_maker
|
||||||
from app.observability import otel as ot
|
from app.observability import otel as ot
|
||||||
from app.services.chat_session_state_service import set_ai_responding
|
from app.services.chat_session_state_service import set_ai_responding
|
||||||
from app.services.new_streaming_service import VercelStreamingService
|
from app.services.new_streaming_service import VercelStreamingService
|
||||||
|
|
@ -89,7 +85,7 @@ from app.tasks.chat.streaming.flows.shared.terminal_error import (
|
||||||
)
|
)
|
||||||
from app.tasks.chat.streaming.shared.stream_result import StreamResult
|
from app.tasks.chat.streaming.shared.stream_result import StreamResult
|
||||||
from app.tasks.chat.streaming.shared.utils import resume_step_prefix
|
from app.tasks.chat.streaming.shared.utils import resume_step_prefix
|
||||||
from app.utils.perf import get_perf_logger, log_system_snapshot
|
from app.utils.perf import get_perf_logger
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
_perf_log = get_perf_logger()
|
_perf_log = get_perf_logger()
|
||||||
|
|
@ -217,12 +213,11 @@ async def stream_resume_chat(
|
||||||
|
|
||||||
if needs_premium_quota(agent_config, user_id):
|
if needs_premium_quota(agent_config, user_id):
|
||||||
premium_reservation = await reserve_premium(
|
premium_reservation = await reserve_premium(
|
||||||
agent_config=agent_config, user_id=user_id # type: ignore[arg-type]
|
agent_config=agent_config,
|
||||||
|
user_id=user_id, # type: ignore[arg-type]
|
||||||
)
|
)
|
||||||
if not premium_reservation.allowed:
|
if not premium_reservation.allowed:
|
||||||
ot.add_event(
|
ot.add_event("quota.denied", {"quota.code": "PREMIUM_QUOTA_EXHAUSTED"})
|
||||||
"quota.denied", {"quota.code": "PREMIUM_QUOTA_EXHAUSTED"}
|
|
||||||
)
|
|
||||||
if requested_llm_config_id == 0:
|
if requested_llm_config_id == 0:
|
||||||
try:
|
try:
|
||||||
pinned_fb = await resolve_or_get_pinned_llm_config_id(
|
pinned_fb = await resolve_or_get_pinned_llm_config_id(
|
||||||
|
|
@ -396,7 +391,9 @@ async def stream_resume_chat(
|
||||||
|
|
||||||
# --- First SSE frames ---
|
# --- First SSE frames ---
|
||||||
|
|
||||||
for sse in iter_initial_frames(streaming_service, turn_id=stream_result.turn_id):
|
for sse in iter_initial_frames(
|
||||||
|
streaming_service, turn_id=stream_result.turn_id
|
||||||
|
):
|
||||||
yield sse
|
yield sse
|
||||||
|
|
||||||
# --- Assistant-shell persistence + id frame ---
|
# --- Assistant-shell persistence + id frame ---
|
||||||
|
|
@ -517,7 +514,9 @@ async def stream_resume_chat(
|
||||||
fallback_commit_search_space_id=search_space_id,
|
fallback_commit_search_space_id=search_space_id,
|
||||||
fallback_commit_created_by_id=user_id,
|
fallback_commit_created_by_id=user_id,
|
||||||
fallback_commit_filesystem_mode=(
|
fallback_commit_filesystem_mode=(
|
||||||
filesystem_selection.mode if filesystem_selection else FilesystemMode.CLOUD
|
filesystem_selection.mode
|
||||||
|
if filesystem_selection
|
||||||
|
else FilesystemMode.CLOUD
|
||||||
),
|
),
|
||||||
fallback_commit_thread_id=chat_id,
|
fallback_commit_thread_id=chat_id,
|
||||||
runtime_context=runtime_context,
|
runtime_context=runtime_context,
|
||||||
|
|
@ -589,9 +588,7 @@ async def stream_resume_chat(
|
||||||
end_turn(str(chat_id))
|
end_turn(str(chat_id))
|
||||||
|
|
||||||
if premium_reservation is not None and user_id:
|
if premium_reservation is not None and user_id:
|
||||||
await release_premium(
|
await release_premium(reservation=premium_reservation, user_id=user_id)
|
||||||
reservation=premium_reservation, user_id=user_id
|
|
||||||
)
|
|
||||||
|
|
||||||
await close_session_and_clear_ai_responding(session, chat_id)
|
await close_session_and_clear_ai_responding(session, chat_id)
|
||||||
|
|
||||||
|
|
@ -609,13 +606,11 @@ async def stream_resume_chat(
|
||||||
if not busy_error_raised:
|
if not busy_error_raised:
|
||||||
with contextlib.suppress(Exception):
|
with contextlib.suppress(Exception):
|
||||||
end_turn(str(chat_id))
|
end_turn(str(chat_id))
|
||||||
_perf_log.info(
|
_perf_log.info("[stream_resume] end_turn cleanup (chat_id=%s)", chat_id)
|
||||||
"[stream_resume] end_turn cleanup (chat_id=%s)", chat_id
|
|
||||||
)
|
|
||||||
|
|
||||||
agent = llm = connector_service = None # noqa: F841
|
agent = llm = connector_service = None
|
||||||
stream_result = None # noqa: F841
|
stream_result = None
|
||||||
session = None # noqa: F841
|
session = None
|
||||||
|
|
||||||
run_gc_pass(log_prefix="stream_resume", chat_id=chat_id)
|
run_gc_pass(log_prefix="stream_resume", chat_id=chat_id)
|
||||||
close_chat_request_span(
|
close_chat_request_span(
|
||||||
|
|
|
||||||
|
|
@ -47,9 +47,7 @@ async def build_resume_routing(
|
||||||
slice_decisions_by_tool_call,
|
slice_decisions_by_tool_call,
|
||||||
)
|
)
|
||||||
|
|
||||||
parent_state = await agent.aget_state(
|
parent_state = await agent.aget_state({"configurable": {"thread_id": str(chat_id)}})
|
||||||
{"configurable": {"thread_id": str(chat_id)}}
|
|
||||||
)
|
|
||||||
pending = collect_pending_tool_calls(parent_state)
|
pending = collect_pending_tool_calls(parent_state)
|
||||||
_perf_log.info(
|
_perf_log.info(
|
||||||
"[hitl_route] resume_entry chat_id=%s decisions=%d pending_subagents=%d",
|
"[hitl_route] resume_entry chat_id=%s decisions=%d pending_subagents=%d",
|
||||||
|
|
|
||||||
|
|
@ -49,9 +49,7 @@ async def finalize_assistant_message(
|
||||||
was never assigned.
|
was never assigned.
|
||||||
"""
|
"""
|
||||||
if not (
|
if not (
|
||||||
stream_result
|
stream_result and stream_result.turn_id and stream_result.assistant_message_id
|
||||||
and stream_result.turn_id
|
|
||||||
and stream_result.assistant_message_id
|
|
||||||
):
|
):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -39,9 +39,7 @@ async def close_session_and_clear_ai_responding(
|
||||||
async with shielded_async_session() as fresh_session:
|
async with shielded_async_session() as fresh_session:
|
||||||
await clear_ai_responding(fresh_session, chat_id)
|
await clear_ai_responding(fresh_session, chat_id)
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.warning(
|
logger.warning("Failed to clear AI responding state for thread %s", chat_id)
|
||||||
"Failed to clear AI responding state for thread %s", chat_id
|
|
||||||
)
|
|
||||||
|
|
||||||
with contextlib.suppress(Exception):
|
with contextlib.suppress(Exception):
|
||||||
session.expunge_all()
|
session.expunge_all()
|
||||||
|
|
|
||||||
|
|
@ -41,9 +41,7 @@ class PremiumReservation:
|
||||||
allowed: bool
|
allowed: bool
|
||||||
|
|
||||||
|
|
||||||
def needs_premium_quota(
|
def needs_premium_quota(agent_config: AgentConfig | None, user_id: str | None) -> bool:
|
||||||
agent_config: AgentConfig | None, user_id: str | None
|
|
||||||
) -> bool:
|
|
||||||
return bool(agent_config is not None and user_id and agent_config.is_premium)
|
return bool(agent_config is not None and user_id and agent_config.is_premium)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -61,8 +59,10 @@ async def reserve_premium(
|
||||||
request_id = _uuid.uuid4().hex[:16]
|
request_id = _uuid.uuid4().hex[:16]
|
||||||
litellm_params = agent_config.litellm_params or {}
|
litellm_params = agent_config.litellm_params or {}
|
||||||
base_model = (
|
base_model = (
|
||||||
litellm_params.get("base_model") if isinstance(litellm_params, dict) else None
|
(litellm_params.get("base_model") if isinstance(litellm_params, dict) else None)
|
||||||
) or agent_config.model_name or ""
|
or agent_config.model_name
|
||||||
|
or ""
|
||||||
|
)
|
||||||
reserve_amount_micros = estimate_call_reserve_micros(
|
reserve_amount_micros = estimate_call_reserve_micros(
|
||||||
base_model=base_model,
|
base_model=base_model,
|
||||||
quota_reserve_tokens=agent_config.quota_reserve_tokens,
|
quota_reserve_tokens=agent_config.quota_reserve_tokens,
|
||||||
|
|
|
||||||
|
|
@ -6,8 +6,7 @@ import contextlib
|
||||||
import sys
|
import sys
|
||||||
from typing import Any, Literal
|
from typing import Any, Literal
|
||||||
|
|
||||||
from app.observability import metrics as ot_metrics
|
from app.observability import metrics as ot_metrics, otel as ot
|
||||||
from app.observability import otel as ot
|
|
||||||
|
|
||||||
|
|
||||||
def open_chat_request_span(
|
def open_chat_request_span(
|
||||||
|
|
|
||||||
|
|
@ -15,8 +15,7 @@ from collections.abc import Iterator
|
||||||
from typing import Any, Literal
|
from typing import Any, Literal
|
||||||
|
|
||||||
from app.agents.new_chat.errors import BusyError
|
from app.agents.new_chat.errors import BusyError
|
||||||
from app.observability import metrics as ot_metrics
|
from app.observability import metrics as ot_metrics, otel as ot
|
||||||
from app.observability import otel as ot
|
|
||||||
from app.services.new_streaming_service import VercelStreamingService
|
from app.services.new_streaming_service import VercelStreamingService
|
||||||
from app.tasks.chat.streaming.errors.classifier import classify_stream_exception
|
from app.tasks.chat.streaming.errors.classifier import classify_stream_exception
|
||||||
from app.tasks.chat.streaming.errors.emitter import emit_stream_terminal_error
|
from app.tasks.chat.streaming.errors.emitter import emit_stream_terminal_error
|
||||||
|
|
|
||||||
|
|
@ -1,249 +0,0 @@
|
||||||
"""
|
|
||||||
Surfsense documentation indexer.
|
|
||||||
Indexes MDX documentation files at startup.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import hashlib
|
|
||||||
import logging
|
|
||||||
import re
|
|
||||||
from datetime import UTC, datetime
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from sqlalchemy import delete as sa_delete, select
|
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
|
||||||
from sqlalchemy.orm import selectinload
|
|
||||||
from sqlalchemy.orm.attributes import set_committed_value
|
|
||||||
|
|
||||||
from app.config import config
|
|
||||||
from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument, async_session_maker
|
|
||||||
from app.utils.document_converters import embed_text
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
async def _safe_set_docs_chunks(
|
|
||||||
session: AsyncSession, document: SurfsenseDocsDocument, chunks: list
|
|
||||||
) -> None:
|
|
||||||
"""safe_set_chunks variant for the SurfsenseDocsDocument/Chunk models."""
|
|
||||||
if document.id is not None:
|
|
||||||
await session.execute(
|
|
||||||
sa_delete(SurfsenseDocsChunk).where(
|
|
||||||
SurfsenseDocsChunk.document_id == document.id
|
|
||||||
)
|
|
||||||
)
|
|
||||||
for chunk in chunks:
|
|
||||||
chunk.document_id = document.id
|
|
||||||
|
|
||||||
set_committed_value(document, "chunks", chunks)
|
|
||||||
session.add_all(chunks)
|
|
||||||
|
|
||||||
|
|
||||||
# Path to docs relative to project root
|
|
||||||
DOCS_DIR = (
|
|
||||||
Path(__file__).resolve().parent.parent.parent.parent
|
|
||||||
/ "surfsense_web"
|
|
||||||
/ "content"
|
|
||||||
/ "docs"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def parse_mdx_frontmatter(content: str) -> tuple[str, str]:
|
|
||||||
"""
|
|
||||||
Parse MDX file to extract frontmatter title and content.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
content: Raw MDX file content
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Tuple of (title, content_without_frontmatter)
|
|
||||||
"""
|
|
||||||
# Match frontmatter between --- markers
|
|
||||||
frontmatter_pattern = r"^---\s*\n(.*?)\n---\s*\n"
|
|
||||||
match = re.match(frontmatter_pattern, content, re.DOTALL)
|
|
||||||
|
|
||||||
if match:
|
|
||||||
frontmatter = match.group(1)
|
|
||||||
content_without_frontmatter = content[match.end() :]
|
|
||||||
|
|
||||||
# Extract title from frontmatter
|
|
||||||
title_match = re.search(r"^title:\s*(.+)$", frontmatter, re.MULTILINE)
|
|
||||||
title = title_match.group(1).strip() if title_match else "Untitled"
|
|
||||||
|
|
||||||
# Remove quotes if present
|
|
||||||
title = title.strip("\"'")
|
|
||||||
|
|
||||||
return title, content_without_frontmatter.strip()
|
|
||||||
|
|
||||||
return "Untitled", content.strip()
|
|
||||||
|
|
||||||
|
|
||||||
def get_all_mdx_files() -> list[Path]:
|
|
||||||
"""
|
|
||||||
Get all MDX files from the docs directory.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of Path objects for each MDX file
|
|
||||||
"""
|
|
||||||
if not DOCS_DIR.exists():
|
|
||||||
logger.warning(f"Docs directory not found: {DOCS_DIR}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
return list(DOCS_DIR.rglob("*.mdx"))
|
|
||||||
|
|
||||||
|
|
||||||
def generate_surfsense_docs_content_hash(content: str) -> str:
|
|
||||||
"""Generate SHA-256 hash for Surfsense docs content."""
|
|
||||||
return hashlib.sha256(content.encode("utf-8")).hexdigest()
|
|
||||||
|
|
||||||
|
|
||||||
def create_surfsense_docs_chunks(content: str) -> list[SurfsenseDocsChunk]:
|
|
||||||
"""
|
|
||||||
Create chunks from Surfsense documentation content.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
content: Document content to chunk
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of SurfsenseDocsChunk objects with embeddings
|
|
||||||
"""
|
|
||||||
return [
|
|
||||||
SurfsenseDocsChunk(
|
|
||||||
content=chunk.text,
|
|
||||||
embedding=embed_text(chunk.text),
|
|
||||||
)
|
|
||||||
for chunk in config.chunker_instance.chunk(content)
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
async def index_surfsense_docs(session: AsyncSession) -> tuple[int, int, int, int]:
|
|
||||||
"""
|
|
||||||
Index all Surfsense documentation files.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
session: SQLAlchemy async session
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Tuple of (created, updated, skipped, deleted) counts
|
|
||||||
"""
|
|
||||||
created = 0
|
|
||||||
updated = 0
|
|
||||||
skipped = 0
|
|
||||||
deleted = 0
|
|
||||||
|
|
||||||
# Get all existing docs from database
|
|
||||||
existing_docs_result = await session.execute(
|
|
||||||
select(SurfsenseDocsDocument).options(
|
|
||||||
selectinload(SurfsenseDocsDocument.chunks)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
existing_docs = {doc.source: doc for doc in existing_docs_result.scalars().all()}
|
|
||||||
|
|
||||||
# Track which sources we've processed
|
|
||||||
processed_sources = set()
|
|
||||||
|
|
||||||
# Get all MDX files
|
|
||||||
mdx_files = get_all_mdx_files()
|
|
||||||
logger.info(f"Found {len(mdx_files)} MDX files to index")
|
|
||||||
|
|
||||||
for mdx_file in mdx_files:
|
|
||||||
try:
|
|
||||||
source = str(mdx_file.relative_to(DOCS_DIR))
|
|
||||||
processed_sources.add(source)
|
|
||||||
|
|
||||||
# Read file content
|
|
||||||
raw_content = mdx_file.read_text(encoding="utf-8")
|
|
||||||
title, content = parse_mdx_frontmatter(raw_content)
|
|
||||||
content_hash = generate_surfsense_docs_content_hash(raw_content)
|
|
||||||
|
|
||||||
if source in existing_docs:
|
|
||||||
existing_doc = existing_docs[source]
|
|
||||||
|
|
||||||
# Check if content changed
|
|
||||||
if existing_doc.content_hash == content_hash:
|
|
||||||
logger.debug(f"Skipping unchanged: {source}")
|
|
||||||
skipped += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Content changed - update document
|
|
||||||
logger.info(f"Updating changed document: {source}")
|
|
||||||
|
|
||||||
# Create new chunks
|
|
||||||
chunks = create_surfsense_docs_chunks(content)
|
|
||||||
|
|
||||||
# Update document fields
|
|
||||||
existing_doc.title = title
|
|
||||||
existing_doc.content = content
|
|
||||||
existing_doc.content_hash = content_hash
|
|
||||||
existing_doc.embedding = embed_text(content)
|
|
||||||
await _safe_set_docs_chunks(session, existing_doc, chunks)
|
|
||||||
existing_doc.updated_at = datetime.now(UTC)
|
|
||||||
|
|
||||||
updated += 1
|
|
||||||
else:
|
|
||||||
# New document - create it
|
|
||||||
logger.info(f"Creating new document: {source}")
|
|
||||||
|
|
||||||
chunks = create_surfsense_docs_chunks(content)
|
|
||||||
|
|
||||||
document = SurfsenseDocsDocument(
|
|
||||||
source=source,
|
|
||||||
title=title,
|
|
||||||
content=content,
|
|
||||||
content_hash=content_hash,
|
|
||||||
embedding=embed_text(content),
|
|
||||||
chunks=chunks,
|
|
||||||
updated_at=datetime.now(UTC),
|
|
||||||
)
|
|
||||||
|
|
||||||
session.add(document)
|
|
||||||
created += 1
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error processing {mdx_file}: {e}", exc_info=True)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Delete documents for removed files
|
|
||||||
for source, doc in existing_docs.items():
|
|
||||||
if source not in processed_sources:
|
|
||||||
logger.info(f"Deleting removed document: {source}")
|
|
||||||
await session.delete(doc)
|
|
||||||
deleted += 1
|
|
||||||
|
|
||||||
# Commit all changes
|
|
||||||
await session.commit()
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
f"Indexing complete: {created} created, {updated} updated, "
|
|
||||||
f"{skipped} skipped, {deleted} deleted"
|
|
||||||
)
|
|
||||||
|
|
||||||
return created, updated, skipped, deleted
|
|
||||||
|
|
||||||
|
|
||||||
async def seed_surfsense_docs() -> tuple[int, int, int, int]:
|
|
||||||
"""
|
|
||||||
Seed Surfsense documentation into the database.
|
|
||||||
|
|
||||||
This function indexes all MDX files from the docs directory.
|
|
||||||
It handles creating, updating, and deleting docs based on content changes.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Tuple of (created, updated, skipped, deleted) counts
|
|
||||||
Returns (0, 0, 0, 0) if an error occurs
|
|
||||||
"""
|
|
||||||
logger.info("Starting Surfsense docs indexing...")
|
|
||||||
|
|
||||||
try:
|
|
||||||
async with async_session_maker() as session:
|
|
||||||
created, updated, skipped, deleted = await index_surfsense_docs(session)
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
f"Surfsense docs indexing complete: "
|
|
||||||
f"created={created}, updated={updated}, skipped={skipped}, deleted={deleted}"
|
|
||||||
)
|
|
||||||
|
|
||||||
return created, updated, skipped, deleted
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to seed Surfsense docs: {e}", exc_info=True)
|
|
||||||
return 0, 0, 0, 0
|
|
||||||
|
|
@ -1,13 +0,0 @@
|
||||||
"""Utilities for SurfSense's built-in documentation index."""
|
|
||||||
|
|
||||||
from pathlib import PurePosixPath
|
|
||||||
|
|
||||||
DOCS_PUBLIC_ROOT = PurePosixPath("/docs")
|
|
||||||
|
|
||||||
|
|
||||||
def surfsense_docs_public_url(source: str) -> str:
|
|
||||||
"""Return the public docs route for an indexed documentation source path."""
|
|
||||||
docs_path = PurePosixPath(source).with_suffix("")
|
|
||||||
if docs_path.name == "index":
|
|
||||||
docs_path = docs_path.parent
|
|
||||||
return (DOCS_PUBLIC_ROOT / docs_path).as_posix()
|
|
||||||
|
|
@ -1,40 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
"""
|
|
||||||
Seed Surfsense documentation into the database.
|
|
||||||
|
|
||||||
CLI wrapper for the seed_surfsense_docs function.
|
|
||||||
Can be run manually for debugging or re-indexing.
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
python scripts/seed_surfsense_docs.py
|
|
||||||
"""
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import sys
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
# Add the parent directory to the path so we can import app modules
|
|
||||||
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
||||||
|
|
||||||
from app.tasks.surfsense_docs_indexer import seed_surfsense_docs
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""CLI entry point for seeding Surfsense docs."""
|
|
||||||
print("=" * 50)
|
|
||||||
print(" Surfsense Documentation Seeding")
|
|
||||||
print("=" * 50)
|
|
||||||
|
|
||||||
created, updated, skipped, deleted = asyncio.run(seed_surfsense_docs())
|
|
||||||
|
|
||||||
print()
|
|
||||||
print("Results:")
|
|
||||||
print(f" Created: {created}")
|
|
||||||
print(f" Updated: {updated}")
|
|
||||||
print(f" Skipped: {skipped}")
|
|
||||||
print(f" Deleted: {deleted}")
|
|
||||||
print("=" * 50)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
@ -60,7 +60,6 @@ class TestReadOnlyToolsAllowed:
|
||||||
"glob",
|
"glob",
|
||||||
"web_search",
|
"web_search",
|
||||||
"scrape_webpage",
|
"scrape_webpage",
|
||||||
"search_surfsense_docs",
|
|
||||||
"get_connected_accounts",
|
"get_connected_accounts",
|
||||||
"write_todos",
|
"write_todos",
|
||||||
"task",
|
"task",
|
||||||
|
|
|
||||||
|
|
@ -22,12 +22,6 @@ from app.agents.new_chat.subagents.config import (
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@tool
|
|
||||||
def search_surfsense_docs(query: str) -> str:
|
|
||||||
"""Search the user's KB."""
|
|
||||||
return ""
|
|
||||||
|
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
def web_search(query: str) -> str:
|
def web_search(query: str) -> str:
|
||||||
"""Search the public web."""
|
"""Search the public web."""
|
||||||
|
|
@ -95,7 +89,6 @@ def generate_report(topic: str) -> str:
|
||||||
|
|
||||||
|
|
||||||
ALL_TOOLS = [
|
ALL_TOOLS = [
|
||||||
search_surfsense_docs,
|
|
||||||
web_search,
|
web_search,
|
||||||
scrape_webpage,
|
scrape_webpage,
|
||||||
read_file,
|
read_file,
|
||||||
|
|
@ -161,7 +154,7 @@ class TestReportWriterSubagent:
|
||||||
names = {t.name for t in spec["tools"]} # type: ignore[index]
|
names = {t.name for t in spec["tools"]} # type: ignore[index]
|
||||||
assert names == REPORT_WRITER_TOOLS & {t.name for t in ALL_TOOLS}
|
assert names == REPORT_WRITER_TOOLS & {t.name for t in ALL_TOOLS}
|
||||||
assert "generate_report" in names
|
assert "generate_report" in names
|
||||||
assert "search_surfsense_docs" in names
|
assert "read_file" in names
|
||||||
|
|
||||||
def test_deny_rules_block_writes_but_allow_generate_report(self) -> None:
|
def test_deny_rules_block_writes_but_allow_generate_report(self) -> None:
|
||||||
spec = build_report_writer_subagent(tools=ALL_TOOLS)
|
spec = build_report_writer_subagent(tools=ALL_TOOLS)
|
||||||
|
|
@ -272,9 +265,9 @@ class TestFilterToolsWarningSuppression:
|
||||||
# Allowed set asks for two registry tools (one present, one
|
# Allowed set asks for two registry tools (one present, one
|
||||||
# not) plus a bunch of middleware-provided names.
|
# not) plus a bunch of middleware-provided names.
|
||||||
_filter_tools(
|
_filter_tools(
|
||||||
[search_surfsense_docs],
|
[web_search],
|
||||||
allowed_names={
|
allowed_names={
|
||||||
"search_surfsense_docs",
|
"web_search",
|
||||||
"scrape_webpage", # legitimately missing → should warn
|
"scrape_webpage", # legitimately missing → should warn
|
||||||
"read_file", # mw-provided → suppressed
|
"read_file", # mw-provided → suppressed
|
||||||
"ls",
|
"ls",
|
||||||
|
|
@ -322,7 +315,6 @@ class TestDenyPatternsCoverage:
|
||||||
|
|
||||||
def test_deny_patterns_do_not_match_safe_read_tools(self) -> None:
|
def test_deny_patterns_do_not_match_safe_read_tools(self) -> None:
|
||||||
canonical_reads = [
|
canonical_reads = [
|
||||||
"search_surfsense_docs",
|
|
||||||
"read_file",
|
"read_file",
|
||||||
"ls_tree",
|
"ls_tree",
|
||||||
"grep",
|
"grep",
|
||||||
|
|
|
||||||
|
|
@ -72,7 +72,11 @@ def test_extract_returns_none_when_no_assistant_text_is_present() -> None:
|
||||||
anything?" rather than guess whether ``""`` means silence or empty
|
anything?" rather than guess whether ``""`` means silence or empty
|
||||||
output. Empty-string contents are normalized to ``None`` too."""
|
output. Empty-string contents are normalized to ``None`` too."""
|
||||||
no_ai = {"messages": [HumanMessage(content="just a question")]}
|
no_ai = {"messages": [HumanMessage(content="just a question")]}
|
||||||
only_tools = {"messages": [AIMessage(content=[{"type": "tool_use", "name": "x", "input": {}}])]}
|
only_tools = {
|
||||||
|
"messages": [
|
||||||
|
AIMessage(content=[{"type": "tool_use", "name": "x", "input": {}}])
|
||||||
|
]
|
||||||
|
}
|
||||||
empty_string = {"messages": [AIMessage(content=" ")]}
|
empty_string = {"messages": [AIMessage(content=" ")]}
|
||||||
|
|
||||||
assert extract_final_assistant_message(no_ai) is None
|
assert extract_final_assistant_message(no_ai) is None
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,9 @@ async def test_with_retries_returns_result_and_attempts_one_on_first_success() -
|
||||||
assert calls == 1
|
assert calls == 1
|
||||||
|
|
||||||
|
|
||||||
async def test_with_retries_returns_attempt_count_when_succeeding_after_failures() -> None:
|
async def test_with_retries_returns_attempt_count_when_succeeding_after_failures() -> (
|
||||||
|
None
|
||||||
|
):
|
||||||
"""A coroutine that fails twice then succeeds returns ``attempts=3``
|
"""A coroutine that fails twice then succeeds returns ``attempts=3``
|
||||||
(the actual attempt that produced the result). Locks the contract
|
(the actual attempt that produced the result). Locks the contract
|
||||||
that the caller can distinguish first-try success from a recovery."""
|
that the caller can distinguish first-try success from a recovery."""
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,9 @@ from app.automations.schemas.definition.plan_step import PlanStep
|
||||||
pytestmark = pytest.mark.unit
|
pytestmark = pytest.mark.unit
|
||||||
|
|
||||||
|
|
||||||
def test_automation_definition_accepts_minimal_valid_input_with_sensible_defaults() -> None:
|
def test_automation_definition_accepts_minimal_valid_input_with_sensible_defaults() -> (
|
||||||
|
None
|
||||||
|
):
|
||||||
"""A definition with just ``name`` + a one-step ``plan`` is valid and
|
"""A definition with just ``name`` + a one-step ``plan`` is valid and
|
||||||
fills in the rest with safe defaults so users don't have to write
|
fills in the rest with safe defaults so users don't have to write
|
||||||
out every section to get started."""
|
out every section to get started."""
|
||||||
|
|
|
||||||
|
|
@ -32,7 +32,9 @@ def test_environment_finalizes_datetime_output_to_iso_string() -> None:
|
||||||
when emitting ``inputs.fired_at`` and other datetime values."""
|
when emitting ``inputs.fired_at`` and other datetime values."""
|
||||||
dt = datetime(2026, 5, 28, 14, 30, tzinfo=UTC)
|
dt = datetime(2026, 5, 28, 14, 30, tzinfo=UTC)
|
||||||
|
|
||||||
assert render_template("{{ moment }}", {"moment": dt}) == "2026-05-28T14:30:00+00:00"
|
assert (
|
||||||
|
render_template("{{ moment }}", {"moment": dt}) == "2026-05-28T14:30:00+00:00"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_environment_finalizes_none_output_to_empty_string() -> None:
|
def test_environment_finalizes_none_output_to_empty_string() -> None:
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,7 @@ def test_action_definition_params_schema_reflects_params_model() -> None:
|
||||||
name="N",
|
name="N",
|
||||||
description="D",
|
description="D",
|
||||||
params_model=_Topic,
|
params_model=_Topic,
|
||||||
build_handler=lambda _ctx: (lambda _p: {}), # type: ignore[arg-type,return-value]
|
build_handler=lambda _ctx: lambda _p: {}, # type: ignore[arg-type,return-value]
|
||||||
)
|
)
|
||||||
|
|
||||||
schema = definition.params_schema
|
schema = definition.params_schema
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,9 @@ class _Params(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
def _trigger(type_: str = "test_trigger") -> TriggerDefinition:
|
def _trigger(type_: str = "test_trigger") -> TriggerDefinition:
|
||||||
return TriggerDefinition(type=type_, description="Test trigger.", params_model=_Params)
|
return TriggerDefinition(
|
||||||
|
type=type_, description="Test trigger.", params_model=_Params
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _action(type_: str = "test_action") -> ActionDefinition:
|
def _action(type_: str = "test_action") -> ActionDefinition:
|
||||||
|
|
@ -38,7 +40,7 @@ def _action(type_: str = "test_action") -> ActionDefinition:
|
||||||
name="Test",
|
name="Test",
|
||||||
description="Test action.",
|
description="Test action.",
|
||||||
params_model=_Params,
|
params_model=_Params,
|
||||||
build_handler=lambda _ctx: (lambda _p: {}), # type: ignore[arg-type,return-value]
|
build_handler=lambda _ctx: lambda _p: {}, # type: ignore[arg-type,return-value]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue