mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-26 21:39:43 +02:00
Merge pull request #1539 from CREDO23/improve-chat-agent-context-and-citations
[FEAT] Unified [n] citation registry for KB + web, pull-based retrieval
This commit is contained in:
commit
94fdb8a113
160 changed files with 4097 additions and 5238 deletions
|
|
@ -433,14 +433,6 @@ LANGSMITH_PROJECT=surfsense
|
|||
# Skills + subagents
|
||||
# SURFSENSE_ENABLE_SKILLS=false
|
||||
# SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS=false
|
||||
# SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE=false
|
||||
|
||||
# KB retrieval mode (default OFF = lazy). When OFF, the main agent retrieves
|
||||
# KB content on demand via the `search_knowledge_base` tool and skips the
|
||||
# expensive per-turn pre-injection (planner LLM + embed + hybrid search,
|
||||
# ~2.3s); explicit @-mentions are still surfaced cheaply. Set to true to
|
||||
# restore the original eager `<priority_documents>` pre-injection.
|
||||
# SURFSENSE_ENABLE_KB_PRIORITY_PREINJECTION=false
|
||||
|
||||
# Snapshot / revert
|
||||
# SURFSENSE_ENABLE_ACTION_LOG=false
|
||||
|
|
|
|||
|
|
@ -6,8 +6,6 @@ read-only). This middleware loads it once on the first turn into
|
|||
|
||||
* :class:`KnowledgeTreeMiddleware` can render the synthetic ``/documents``
|
||||
view without touching the DB.
|
||||
* :class:`KnowledgePriorityMiddleware` skips hybrid search and emits a
|
||||
degenerate priority list.
|
||||
* :class:`KBPostgresBackend` (``als_info`` / ``aread`` / ``_load_file_data``)
|
||||
recognises the synthetic path.
|
||||
|
||||
|
|
|
|||
|
|
@ -1,42 +0,0 @@
|
|||
"""KB priority planner: <priority_documents> injection."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from langchain_core.language_models import BaseChatModel
|
||||
|
||||
from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
|
||||
from app.agents.chat.multi_agent_chat.shared.middleware.knowledge_search import (
|
||||
KnowledgePriorityMiddleware,
|
||||
)
|
||||
from app.services.llm_service import get_planner_llm
|
||||
|
||||
|
||||
def build_knowledge_priority_mw(
|
||||
*,
|
||||
llm: BaseChatModel,
|
||||
search_space_id: int,
|
||||
filesystem_mode: FilesystemMode,
|
||||
available_connectors: list[str] | None,
|
||||
available_document_types: list[str] | None,
|
||||
mentioned_document_ids: list[int] | None,
|
||||
preinjection_enabled: bool = True,
|
||||
) -> KnowledgePriorityMiddleware:
|
||||
"""Build the KB priority middleware.
|
||||
|
||||
When ``preinjection_enabled`` is False (the lazy default), the middleware
|
||||
runs in mentions-only mode: it skips the expensive planner LLM + embedding
|
||||
+ hybrid search and only surfaces explicit @-mentions. The main agent is
|
||||
expected to pull relevant KB content on demand via the
|
||||
``search_knowledge_base`` tool instead.
|
||||
"""
|
||||
return KnowledgePriorityMiddleware(
|
||||
llm=llm,
|
||||
planner_llm=get_planner_llm(),
|
||||
search_space_id=search_space_id,
|
||||
filesystem_mode=filesystem_mode,
|
||||
available_connectors=available_connectors,
|
||||
available_document_types=available_document_types,
|
||||
mentioned_document_ids=mentioned_document_ids,
|
||||
inject_system_message=False,
|
||||
mentions_only=not preinjection_enabled,
|
||||
)
|
||||
|
|
@ -1,10 +1,11 @@
|
|||
"""Main-agent middleware list assembly: one line per slot.
|
||||
|
||||
The main agent is a pure router — filesystem reads/writes are owned by the
|
||||
``knowledge_base`` subagent and delegated via the ``task`` tool. The stack
|
||||
here only renders KB context (workspace tree + priority docs), projects it
|
||||
into system messages, and commits any subagent-side staged writes at end of
|
||||
turn (cloud mode).
|
||||
``knowledge_base`` subagent and delegated via the ``task`` tool. Knowledge-base
|
||||
retrieval is pull-based: the ``search_knowledge_base`` tool runs the hybrid
|
||||
search on demand and renders ``<retrieved_context>`` with ``[n]`` citation
|
||||
labels. The stack here computes the workspace tree, commits any subagent-side
|
||||
staged writes at end of turn (cloud mode), and wires the supporting middleware.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -33,9 +34,6 @@ from app.agents.chat.multi_agent_chat.shared.middleware.anthropic_cache import (
|
|||
from app.agents.chat.multi_agent_chat.shared.middleware.compaction import (
|
||||
build_compaction_mw,
|
||||
)
|
||||
from app.agents.chat.multi_agent_chat.shared.middleware.kb_context_projection import (
|
||||
build_kb_context_projection_mw,
|
||||
)
|
||||
from app.agents.chat.multi_agent_chat.shared.middleware.patch_tool_calls import (
|
||||
build_patch_tool_calls_mw,
|
||||
)
|
||||
|
|
@ -84,7 +82,6 @@ from .context_editing import build_context_editing_mw
|
|||
from .dedup_hitl import build_dedup_hitl_mw
|
||||
from .doom_loop import build_doom_loop_mw
|
||||
from .kb_persistence import build_kb_persistence_mw
|
||||
from .knowledge_priority import build_knowledge_priority_mw
|
||||
from .knowledge_tree import build_knowledge_tree_mw
|
||||
from .noop_injection import build_noop_injection_mw
|
||||
from .otel_span import build_otel_mw
|
||||
|
|
@ -237,16 +234,6 @@ def build_main_agent_deepagent_middleware(
|
|||
search_space_id=search_space_id,
|
||||
llm=llm,
|
||||
),
|
||||
build_knowledge_priority_mw(
|
||||
llm=llm,
|
||||
search_space_id=search_space_id,
|
||||
filesystem_mode=filesystem_mode,
|
||||
available_connectors=available_connectors,
|
||||
available_document_types=available_document_types,
|
||||
mentioned_document_ids=mentioned_document_ids,
|
||||
preinjection_enabled=flags.enable_kb_priority_preinjection,
|
||||
),
|
||||
build_kb_context_projection_mw(),
|
||||
build_kb_persistence_mw(
|
||||
filesystem_mode=filesystem_mode,
|
||||
search_space_id=search_space_id,
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ allowed-tools: scrape_webpage, read_file, ls_tree, grep, web_search
|
|||
1. Decompose the user's question into 2-4 specific, citation-worthy sub-questions.
|
||||
2. For each sub-question, run **one** targeted KB search (focused on terms the user would have written, not synonyms). Open the most relevant 2-3 documents fully via `read_file` if their excerpts are too short.
|
||||
3. Use `grep` to find supporting passages in long files instead of re-reading them end to end.
|
||||
4. Cite every claim with `[citation:chunk_id]` exactly as the chunk tag specifies.
|
||||
4. Cite every claim with the `[n]` label shown on the passage you used (search results and `read_file` output both carry them); never write a chunk id, URL, or title yourself.
|
||||
|
||||
## What good output looks like
|
||||
- Short paragraphs with inline citations.
|
||||
|
|
|
|||
|
|
@ -1,12 +1,13 @@
|
|||
<citations>
|
||||
Citation markers are **disabled** in this configuration.
|
||||
|
||||
Do NOT include `[citation:…]` markers anywhere, even if tool descriptions or
|
||||
Do NOT include `[n]` citation labels or `[citation:…]` markers anywhere, even if
|
||||
tool output (`<retrieved_context>`, `<web_results>`), tool descriptions, or
|
||||
examples reference them. Ignore citation-format reminders elsewhere in this
|
||||
prompt when they conflict with this block.
|
||||
|
||||
1. Answer in plain prose. Optional markdown links to public URLs when
|
||||
sources are URLs.
|
||||
2. Do not expose raw chunk ids, document ids, or internal ids to the user.
|
||||
3. Present KB or docs facts naturally without attribution markers.
|
||||
3. Present KB, web, or docs facts naturally without attribution markers.
|
||||
</citations>
|
||||
|
|
|
|||
|
|
@ -1,42 +1,16 @@
|
|||
<citations>
|
||||
Citations reach the answer through two channels. Use whichever applies — and
|
||||
never invent ids you didn't see. Citation ids are resolved by exact-match
|
||||
lookup; a wrong id silently breaks the link, so when in doubt, omit.
|
||||
Cite with one token: the bracket label `[n]`. Every citable result —
|
||||
`search_knowledge_base` passages, `web_search` results, and prose from a
|
||||
`task` knowledge_base/research specialist — already carries `[n]` labels on a
|
||||
single shared count. Those labels are the only citation you write; the server
|
||||
resolves each one back to its source after the turn.
|
||||
|
||||
### Channel A — chunk blocks injected this turn
|
||||
When `web_search` returns `<document>` / `<chunk id='…'>` blocks in this
|
||||
turn:
|
||||
|
||||
1. For each factual statement taken from those chunks, add
|
||||
`[citation:chunk_id]` using the **exact** id from a visible
|
||||
`<chunk id='…'>` tag. Copy digit-for-digit (or the URL verbatim);
|
||||
do not retype from memory.
|
||||
2. `<document_id>` is the parent doc id, **not** a citation source —
|
||||
only ids inside `<chunk id='…'>` count.
|
||||
3. Multiple chunks → `[citation:id1], [citation:id2]` (comma-separated,
|
||||
each id copied individually).
|
||||
4. Never invent, normalise, or guess at adjacent ids; if unsure, omit.
|
||||
5. Plain brackets only — no markdown links, no footnote numbering.
|
||||
|
||||
### Channel B — citations relayed by a `task` specialist
|
||||
A `task(...)` tool message may contain `[citation:<chunk_id>]` markers
|
||||
the specialist already attached to its prose. The specialist saw the
|
||||
underlying `<chunk id='…'>` blocks; you didn't. So:
|
||||
|
||||
1. **Preserve those markers verbatim** in your final answer — do not
|
||||
reformat, renumber, drop, or wrap them in markdown links. When you
|
||||
paraphrase a specialist sentence, copy the marker character-for-
|
||||
character; do not regenerate the id from memory (LLMs reliably
|
||||
corrupt nearby digits).
|
||||
2. Keep each marker attached to the sentence the specialist attached
|
||||
it to.
|
||||
3. Do **not** add new `[citation:…]` markers of your own to a
|
||||
specialist's prose; if a fact has no marker, the specialist
|
||||
couldn't tie it to a chunk and neither can you.
|
||||
4. When a specialist returns JSON, the citation markers live inside
|
||||
the prose-bearing fields (e.g. a summary or excerpt). Pull them
|
||||
along with the surrounding sentence when you quote.
|
||||
|
||||
If neither channel surfaces citation markers this turn, do not fabricate
|
||||
them.
|
||||
1. Put the label right after the claim it supports.
|
||||
2. Several sources for one claim: stack brackets, `[1][2]`.
|
||||
3. Copy labels exactly as shown, a specialist's included — never renumber them,
|
||||
add your own, or write the underlying title, date, id, or URL instead.
|
||||
4. Write the bare `[n]` and nothing else: no `[citation:...]`, no markdown links,
|
||||
no footnote marks, no "References" section.
|
||||
5. Only label claims the sources support. If nothing shown backs a claim — or you
|
||||
never saw a label — leave it uncited; never invent one.
|
||||
</citations>
|
||||
|
|
|
|||
|
|
@ -8,20 +8,14 @@ standing instructions. It also reports current character usage versus the
|
|||
hard limit so you can manage the budget. Treat it as background colour for
|
||||
your answer, not as the task itself.
|
||||
|
||||
`<priority_documents>` lists the workspace documents most relevant to the
|
||||
latest user message, ranked by relevance score, with `[USER-MENTIONED]`
|
||||
flagged on anything the user explicitly referenced. When the task is about
|
||||
workspace content, read these first; matched passages inside each document
|
||||
are flagged via `<chunk_index>` so you can jump straight to them.
|
||||
|
||||
`<workspace_tree>` shows the full `/documents/` folder and file layout. Use
|
||||
it to resolve paths the user describes in natural language ("my Q2 roadmap",
|
||||
"last week's meeting notes") into concrete document references before
|
||||
delegating to a specialist.
|
||||
|
||||
`<document>` and `<chunk id='…'>` blocks are chunked indexed content returned
|
||||
by KB search (backing `<priority_documents>`). Each chunk carries a stable
|
||||
`id` attribute.
|
||||
`<retrieved_context>` blocks hold knowledge-base passages from
|
||||
`search_knowledge_base`; each `<document>` inside is in excerpt view and every
|
||||
passage is prefixed with an `[n]` citation label.
|
||||
|
||||
If a block doesn't appear this turn, work from the conversation alone.
|
||||
</dynamic_context>
|
||||
|
|
|
|||
|
|
@ -7,21 +7,14 @@ decisions, conventions, architecture notes, processes, key facts. It also
|
|||
reports current character usage versus the hard limit so you can manage the
|
||||
budget. Treat it as background colour for your answer, not as the task itself.
|
||||
|
||||
`<priority_documents>` lists the workspace documents most relevant to the
|
||||
latest user message, ranked by relevance score, with `[USER-MENTIONED]`
|
||||
flagged on anything someone in the thread explicitly referenced. When the
|
||||
task is about workspace content, read these first; matched passages inside
|
||||
each document are flagged via `<chunk_index>` so you can jump straight to
|
||||
them.
|
||||
|
||||
`<workspace_tree>` shows the full `/documents/` folder and file layout. Use
|
||||
it to resolve paths described in natural language ("the Q2 roadmap", "last
|
||||
week's planning notes") into concrete document references before delegating
|
||||
to a specialist.
|
||||
|
||||
`<document>` and `<chunk id='…'>` blocks are chunked indexed content returned
|
||||
by KB search (backing `<priority_documents>`). Each chunk carries a stable
|
||||
`id` attribute.
|
||||
`<retrieved_context>` blocks hold knowledge-base passages from
|
||||
`search_knowledge_base`; each `<document>` inside is in excerpt view and every
|
||||
passage is prefixed with an `[n]` citation label.
|
||||
|
||||
If a block doesn't appear this turn, work from the conversation alone.
|
||||
</dynamic_context>
|
||||
|
|
|
|||
|
|
@ -14,5 +14,5 @@ Workflow (Understand → Plan → Act → Verify):
|
|||
|
||||
Discipline:
|
||||
- Do not imply access to connectors, MCP tools, or deliverable generators except via **task**.
|
||||
- Pass paths to **task(knowledge_base, …)** only when you saw them in `<workspace_tree>` or `<priority_documents>`. Otherwise describe the document in natural language and let the subagent resolve it.
|
||||
- Pass paths to **task(knowledge_base, …)** only when you saw them in `<workspace_tree>`. Otherwise describe the document in natural language and let the subagent resolve it.
|
||||
</provider_hints>
|
||||
|
|
|
|||
|
|
@ -8,8 +8,8 @@ Tool discipline:
|
|||
- Typically one investigative tool per turn unless several independent read-only queries are clearly needed; don’t repeat identical calls.
|
||||
|
||||
Attribution:
|
||||
- When citations are **enabled** (see citation block above) and you answer from chunk-tagged documents, use `[citation:chunk_id]` exactly as specified there.
|
||||
- When citations are **disabled**, never emit `[citation:…]` — plain prose and links per tool guidance.
|
||||
- When citations are **enabled** (see citation block above) and you answer from labelled passages, cite with the bare `[n]` label exactly as specified there.
|
||||
- When citations are **disabled**, never emit `[n]` or `[citation:…]` — plain prose and links per tool guidance.
|
||||
|
||||
Style:
|
||||
- No emojis unless asked; flat lists for short answers.
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ You are running on an OpenAI Codex-class model (SurfSense **main agent**).
|
|||
|
||||
Output style:
|
||||
- Concise; don’t paste huge fetch blobs — summarize.
|
||||
- When citations are **enabled** and you rely on chunk-tagged docs, references may use `[citation:chunk_id]` per the citation block above; when **disabled**, use prose and URLs only.
|
||||
- When citations are **enabled** and you rely on labelled passages, cite with the bare `[n]` label per the citation block above; when **disabled**, use prose and URLs only.
|
||||
- Numbered lists work well when the user should reply with a single option index.
|
||||
- No emojis; single-level bullets.
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,10 @@
|
|||
facts, anything outside SurfSense docs and the workspace KB. Reach for
|
||||
it whenever freshness matters or you'd otherwise guess from memory.
|
||||
- Don't refuse with "I lack network access" — call the tool.
|
||||
- Returns a `<web_results>` block: each result is labelled `[n]`. Cite a
|
||||
result by writing that `[n]` after the statement it supports (when
|
||||
citations are enabled) — do not hand-write the URL as a markdown link.
|
||||
- If results are thin, say so and offer to refine the query.
|
||||
- Args: `query`, `top_k` (default 10, max 50).
|
||||
- Follow up with `scrape_webpage` on the best URL when snippets are too
|
||||
shallow. Present sources with `[label](url)` markdown links.
|
||||
shallow.
|
||||
|
|
|
|||
|
|
@ -1,12 +1,11 @@
|
|||
"""On-demand ``search_knowledge_base`` main-agent tool (OpenCode-style lazy RAG).
|
||||
"""On-demand ``search_knowledge_base`` main-agent tool (citation-spine RAG).
|
||||
|
||||
The main agent no longer receives eagerly pre-injected KB context on every
|
||||
turn (see :class:`KnowledgePriorityMiddleware`, now gated off by default).
|
||||
Instead it calls this tool only when it decides it needs knowledge-base
|
||||
content. The tool runs a single hybrid search (embed + DB search, ~0.5s),
|
||||
formats the top matches for the model, and writes ``kb_matched_chunk_ids``
|
||||
into graph state so matched-section highlighting is preserved when the agent
|
||||
later reads a document via ``task(knowledge_base)``.
|
||||
The main agent calls this when it decides it needs knowledge-base content. The
|
||||
tool runs one hybrid search, renders the matched passages as a
|
||||
``<retrieved_context>`` block whose passages carry server-assigned ``[n]``
|
||||
labels, and persists the conversation's ``CitationRegistry`` onto graph state so
|
||||
the ``[n]`` -> ``[citation:<payload>]`` normalizer can resolve them after the
|
||||
turn.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -18,153 +17,70 @@ from langchain.tools import ToolRuntime
|
|||
from langchain_core.messages import ToolMessage
|
||||
from langchain_core.tools import BaseTool, StructuredTool
|
||||
from langgraph.types import Command
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.agents.chat.multi_agent_chat.shared.middleware.knowledge_search import (
|
||||
search_knowledge_base as _hybrid_search_kb,
|
||||
from app.agents.chat.multi_agent_chat.shared.citations import load_registry
|
||||
from app.agents.chat.multi_agent_chat.shared.retrieval import SearchScope, build_context
|
||||
from app.agents.chat.multi_agent_chat.shared.retrieval.hybrid_search import (
|
||||
search_chunks,
|
||||
)
|
||||
from app.agents.chat.multi_agent_chat.shared.state.filesystem_state import (
|
||||
SurfSenseFilesystemState,
|
||||
)
|
||||
from app.agents.chat.runtime.path_resolver import (
|
||||
PathIndex,
|
||||
build_path_index,
|
||||
doc_to_virtual_path,
|
||||
)
|
||||
from app.db import Document, shielded_async_session
|
||||
from app.agents.chat.runtime.references import referenced_document_ids
|
||||
from app.db import shielded_async_session
|
||||
from app.utils.perf import get_perf_logger
|
||||
|
||||
_perf_log = get_perf_logger()
|
||||
|
||||
_DEFAULT_TOP_K = 5
|
||||
_MAX_TOP_K = 20
|
||||
_PER_DOC_SNIPPET_CHARS = 1200
|
||||
_MAX_TOTAL_CHARS = 16_000
|
||||
|
||||
_TOOL_DESCRIPTION = (
|
||||
"Search the user's knowledge base (their indexed documents, files, and "
|
||||
"connector content) for passages relevant to a query, using hybrid "
|
||||
"semantic + keyword retrieval.\n\n"
|
||||
"Use this FIRST to ground any factual or informational answer about the "
|
||||
"user's own documents, notes, or connected sources. The workspace tree "
|
||||
"shows which files exist; this tool pulls the actual relevant content. "
|
||||
"Each hit returns the document's virtual path, a relevance score, and the "
|
||||
"matched snippets. If you need a document's full text, delegate a read to "
|
||||
"the knowledge_base specialist via `task` using the returned path.\n\n"
|
||||
"user's own documents, notes, or connected sources. It returns a "
|
||||
"<retrieved_context> block: each matched passage is labelled [n]. Cite a "
|
||||
"passage by writing that [n] after the statement it supports.\n\n"
|
||||
"Write a focused, specific query containing the concrete entities, "
|
||||
"acronyms, people, projects, or terms you are looking for."
|
||||
)
|
||||
|
||||
|
||||
async def _resolve_virtual_paths(
|
||||
results: list[dict[str, Any]],
|
||||
def _search_types(
|
||||
available_connectors: list[str] | None,
|
||||
available_document_types: list[str] | None,
|
||||
) -> tuple[str, ...] | None:
|
||||
"""Merge connector + document-type filters into a scope; ``None`` if unrestricted."""
|
||||
types: set[str] = set()
|
||||
if available_document_types:
|
||||
types.update(available_document_types)
|
||||
if available_connectors:
|
||||
types.update(available_connectors)
|
||||
return tuple(sorted(types)) or None
|
||||
|
||||
|
||||
async def _build_search_scope(
|
||||
session: AsyncSession,
|
||||
*,
|
||||
search_space_id: int,
|
||||
) -> dict[int, str]:
|
||||
"""Resolve ``Document.id`` -> canonical virtual path for the search hits."""
|
||||
doc_ids = [
|
||||
doc_id
|
||||
for doc_id in (
|
||||
(doc.get("document") or {}).get("id")
|
||||
for doc in results
|
||||
if isinstance(doc, dict)
|
||||
)
|
||||
if isinstance(doc_id, int)
|
||||
]
|
||||
if not doc_ids:
|
||||
return {}
|
||||
|
||||
async with shielded_async_session() as session:
|
||||
index: PathIndex = await build_path_index(session, search_space_id)
|
||||
folder_rows = await session.execute(
|
||||
select(Document.id, Document.folder_id).where(
|
||||
Document.search_space_id == search_space_id,
|
||||
Document.id.in_(doc_ids),
|
||||
)
|
||||
)
|
||||
folder_by_doc_id = {row.id: row.folder_id for row in folder_rows.all()}
|
||||
|
||||
paths: dict[int, str] = {}
|
||||
for doc in results:
|
||||
doc_meta = doc.get("document") or {}
|
||||
doc_id = doc_meta.get("id")
|
||||
if not isinstance(doc_id, int):
|
||||
continue
|
||||
folder_id = folder_by_doc_id.get(doc_id, doc_meta.get("folder_id"))
|
||||
paths[doc_id] = doc_to_virtual_path(
|
||||
doc_id=doc_id,
|
||||
title=str(doc_meta.get("title") or "untitled"),
|
||||
folder_id=folder_id if isinstance(folder_id, int) else None,
|
||||
index=index,
|
||||
)
|
||||
return paths
|
||||
|
||||
|
||||
def _format_hits(
|
||||
results: list[dict[str, Any]],
|
||||
*,
|
||||
paths: dict[int, str],
|
||||
query: str,
|
||||
) -> str:
|
||||
"""Render search hits as a compact, model-readable block."""
|
||||
if not results:
|
||||
return (
|
||||
f"No knowledge-base matches found for query: {query!r}.\n"
|
||||
"Tell the user nothing relevant was found in their workspace, or "
|
||||
"try a different query."
|
||||
)
|
||||
|
||||
lines: list[str] = [f"<knowledge_base_results query={query!r}>"]
|
||||
total = len(lines[0])
|
||||
for rank, doc in enumerate(results, start=1):
|
||||
doc_meta = doc.get("document") or {}
|
||||
doc_id = doc_meta.get("id")
|
||||
title = str(doc_meta.get("title") or "untitled")
|
||||
doc_type = doc_meta.get("document_type") or doc.get("source") or "document"
|
||||
score = doc.get("score")
|
||||
score_str = f"{score:.3f}" if isinstance(score, int | float) else "n/a"
|
||||
path = paths.get(doc_id) if isinstance(doc_id, int) else None
|
||||
|
||||
header = f"\n{rank}. {title} (type={doc_type}, score={score_str})" + (
|
||||
f"\n path: {path}" if path else ""
|
||||
)
|
||||
|
||||
content = (doc.get("content") or "").strip()
|
||||
if content:
|
||||
snippet = content[:_PER_DOC_SNIPPET_CHARS].strip()
|
||||
if len(content) > _PER_DOC_SNIPPET_CHARS:
|
||||
snippet += " ..."
|
||||
body = "\n " + snippet.replace("\n", "\n ")
|
||||
else:
|
||||
body = "\n (no preview available; read the document for details)"
|
||||
|
||||
entry = header + body
|
||||
if total + len(entry) > _MAX_TOTAL_CHARS:
|
||||
lines.append("\n<!-- additional matches truncated to fit context -->")
|
||||
break
|
||||
lines.append(entry)
|
||||
total += len(entry)
|
||||
|
||||
lines.append(
|
||||
"\n\nTo read a full document, delegate to the knowledge_base specialist "
|
||||
"with `task`, referencing the path above."
|
||||
document_types: tuple[str, ...] | None,
|
||||
runtime: ToolRuntime[None, SurfSenseFilesystemState],
|
||||
) -> SearchScope:
|
||||
"""Assemble the retrieval scope: workspace document-type filter + @-mention pins."""
|
||||
ctx = getattr(runtime, "context", None)
|
||||
document_ids = await referenced_document_ids(
|
||||
session,
|
||||
search_space_id=search_space_id,
|
||||
document_ids=getattr(ctx, "mentioned_document_ids", None),
|
||||
folder_ids=getattr(ctx, "mentioned_folder_ids", None),
|
||||
)
|
||||
return SearchScope(
|
||||
document_types=document_types,
|
||||
document_ids=document_ids or None,
|
||||
)
|
||||
lines.append("\n</knowledge_base_results>")
|
||||
return "".join(lines)
|
||||
|
||||
|
||||
def _matched_chunk_ids(results: list[dict[str, Any]]) -> dict[int, list[int]]:
|
||||
"""Extract ``Document.id`` -> matched chunk ids for state hand-off."""
|
||||
matched: dict[int, list[int]] = {}
|
||||
for doc in results:
|
||||
doc_id = (doc.get("document") or {}).get("id")
|
||||
if not isinstance(doc_id, int):
|
||||
continue
|
||||
chunk_ids = doc.get("matched_chunk_ids") or []
|
||||
normalized = [int(cid) for cid in chunk_ids if isinstance(cid, int | str)]
|
||||
if normalized:
|
||||
matched[doc_id] = normalized
|
||||
return matched
|
||||
|
||||
|
||||
def create_search_knowledge_base_tool(
|
||||
|
|
@ -176,8 +92,7 @@ def create_search_knowledge_base_tool(
|
|||
"""Factory for the on-demand ``search_knowledge_base`` tool."""
|
||||
|
||||
_space_id = search_space_id
|
||||
_connectors = available_connectors
|
||||
_doc_types = available_document_types
|
||||
_document_types = _search_types(available_connectors, available_document_types)
|
||||
|
||||
async def _impl(
|
||||
query: Annotated[
|
||||
|
|
@ -195,34 +110,45 @@ def create_search_knowledge_base_tool(
|
|||
return "Error: provide a non-empty search query."
|
||||
|
||||
clamped_top_k = min(max(1, top_k), _MAX_TOP_K)
|
||||
t0 = time.perf_counter()
|
||||
results = await _hybrid_search_kb(
|
||||
query=cleaned_query,
|
||||
search_space_id=_space_id,
|
||||
available_connectors=_connectors,
|
||||
available_document_types=_doc_types,
|
||||
top_k=clamped_top_k,
|
||||
)
|
||||
registry = load_registry(getattr(runtime, "state", None))
|
||||
|
||||
paths = await _resolve_virtual_paths(results, search_space_id=_space_id)
|
||||
rendered = _format_hits(results, paths=paths, query=cleaned_query)
|
||||
matched = _matched_chunk_ids(results)
|
||||
t0 = time.perf_counter()
|
||||
async with shielded_async_session() as session:
|
||||
scope = await _build_search_scope(
|
||||
session,
|
||||
search_space_id=_space_id,
|
||||
document_types=_document_types,
|
||||
runtime=runtime,
|
||||
)
|
||||
hits = await search_chunks(
|
||||
session,
|
||||
search_space_id=_space_id,
|
||||
query=cleaned_query,
|
||||
scope=scope,
|
||||
top_k=clamped_top_k,
|
||||
)
|
||||
rendered = build_context(cleaned_query, hits, registry)
|
||||
|
||||
_perf_log.info(
|
||||
"[search_knowledge_base] tool query=%r results=%d chars=%d in %.3fs",
|
||||
"[search_knowledge_base] tool query=%r docs=%d in %.3fs",
|
||||
cleaned_query[:60],
|
||||
len(results),
|
||||
len(rendered),
|
||||
len(hits),
|
||||
time.perf_counter() - t0,
|
||||
)
|
||||
|
||||
if rendered is None:
|
||||
return (
|
||||
f"No knowledge-base matches found for query: {cleaned_query!r}.\n"
|
||||
"Tell the user nothing relevant was found in their workspace, or "
|
||||
"try a different query."
|
||||
)
|
||||
|
||||
update: dict[str, Any] = {
|
||||
"messages": [
|
||||
ToolMessage(content=rendered, tool_call_id=runtime.tool_call_id)
|
||||
],
|
||||
"citation_registry": registry,
|
||||
}
|
||||
if matched:
|
||||
update["kb_matched_chunk_ids"] = matched
|
||||
return Command(update=update)
|
||||
|
||||
return StructuredTool.from_function(
|
||||
|
|
|
|||
|
|
@ -0,0 +1,22 @@
|
|||
"""Citation registry: maps model-facing ``[n]`` labels to real sources.
|
||||
|
||||
Server-side only; the model sees only the bare ``[n]``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from .markers import to_frontend_payload
|
||||
from .models import CitationEntry, CitationSourceType
|
||||
from .normalizer import normalize_citations
|
||||
from .registry import CitationRegistry, make_key
|
||||
from .state import load_registry
|
||||
|
||||
__all__ = [
|
||||
"CitationEntry",
|
||||
"CitationRegistry",
|
||||
"CitationSourceType",
|
||||
"load_registry",
|
||||
"make_key",
|
||||
"normalize_citations",
|
||||
"to_frontend_payload",
|
||||
]
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
"""Map a registered citation to the frontend ``[citation:<payload>]`` payload.
|
||||
|
||||
The citation renderer understands a chunk id (``42``), a negative chunk id for
|
||||
anonymous uploads (``-3``), and a URL. This is the seam that turns a server-side
|
||||
source into one the renderer can resolve; it grows as more source kinds become
|
||||
renderable. Kinds with no renderable form yet return ``None`` so the marker is
|
||||
dropped rather than emitted broken.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from .models import CitationEntry, CitationSourceType
|
||||
|
||||
|
||||
def to_frontend_payload(entry: CitationEntry) -> str | None:
|
||||
"""Inner payload for ``[citation:<payload>]``, or ``None`` if not renderable."""
|
||||
locator = entry.locator
|
||||
match entry.source_type:
|
||||
case CitationSourceType.KB_CHUNK | CitationSourceType.ANON_CHUNK:
|
||||
chunk_id = locator.get("chunk_id")
|
||||
return str(chunk_id) if chunk_id is not None else None
|
||||
case CitationSourceType.WEB_RESULT:
|
||||
url = locator.get("url")
|
||||
return url or None
|
||||
case _:
|
||||
# Connector items and chat turns have no client-side renderer yet
|
||||
# (the frontend resolves only chunk ids and URLs), so they stay
|
||||
# unmarked until both a registration path and a renderer exist.
|
||||
return None
|
||||
|
||||
|
||||
__all__ = ["to_frontend_payload"]
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
"""Data shapes for the citation registry."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class CitationSourceType(str, Enum):
|
||||
"""Source kind of a citable unit; the value is the stable wire/dedup form."""
|
||||
|
||||
KB_CHUNK = "kb_chunk"
|
||||
KB_DOCUMENT = "kb_document"
|
||||
CONNECTOR_ITEM = "connector_item"
|
||||
WEB_RESULT = "web_result"
|
||||
CHAT_TURN = "chat_turn"
|
||||
ANON_CHUNK = "anon_chunk"
|
||||
|
||||
|
||||
class CitationEntry(BaseModel):
|
||||
"""A registered unit: ``n`` (the label), ``locator`` (identity), ``display`` (UI only)."""
|
||||
|
||||
n: int
|
||||
source_type: CitationSourceType
|
||||
locator: dict[str, Any]
|
||||
display: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
__all__ = ["CitationEntry", "CitationSourceType"]
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
"""Rewrite model ``[n]`` citations into frontend ``[citation:<payload>]`` markers.
|
||||
|
||||
The model cites with tiny ordinals ``[n]`` — one per bracket. Several citations
|
||||
are just several brackets (``[1][2]`` or ``[1], [2]``). Each ordinal is resolved
|
||||
through the registry and replaced with a marker the citation renderer
|
||||
understands. Unknown or not-yet-renderable ordinals are dropped, so a bad
|
||||
citation disappears rather than misleads. Code spans are left untouched.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from collections.abc import Callable
|
||||
|
||||
from .markers import to_frontend_payload
|
||||
from .registry import CitationRegistry
|
||||
|
||||
# Fenced (```...```) and inline (`...`) code; mirrors the frontend's single
|
||||
# code-region pattern so ordinals inside examples are never rewritten.
|
||||
_CODE_REGION = re.compile(r"```[\s\S]*?```|`[^`\n]+`")
|
||||
|
||||
# A single ordinal in a bracket: `[1]`, `[12]`. We deliberately match even when
|
||||
# glued to the preceding word (`docs[17]`) because the model very frequently
|
||||
# writes citations that way — requiring a non-word char before `[` (to dodge
|
||||
# `arr[1]`) silently dropped those citations, leaving raw `[n]` that both fails to
|
||||
# render and reads like array indexing. Genuine code/array syntax is instead
|
||||
# protected by the code-region carve-out below; an unresolved ordinal drops
|
||||
# harmlessly. Adjacent citations `[1][2]` are each rewritten.
|
||||
_ORDINAL = re.compile(r"\[\s*(\d+)\s*\]")
|
||||
|
||||
|
||||
def normalize_citations(text: str, registry: CitationRegistry) -> str:
|
||||
"""Replace each ``[n]`` with its resolved marker; drop the unresolved."""
|
||||
if not text:
|
||||
return text
|
||||
|
||||
rewrite = _ordinal_rewriter(registry)
|
||||
return _outside_code(text, lambda span: _ORDINAL.sub(rewrite, span))
|
||||
|
||||
|
||||
def _ordinal_rewriter(registry: CitationRegistry) -> Callable[[re.Match[str]], str]:
|
||||
"""Build the substitution that turns one ordinal into a marker (or drops it)."""
|
||||
|
||||
def rewrite(match: re.Match[str]) -> str:
|
||||
entry = registry.resolve(int(match.group(1)))
|
||||
payload = to_frontend_payload(entry) if entry else None
|
||||
return f"[citation:{payload}]" if payload is not None else ""
|
||||
|
||||
return rewrite
|
||||
|
||||
|
||||
def _outside_code(text: str, transform: Callable[[str], str]) -> str:
|
||||
"""Apply ``transform`` to non-code spans only; code regions pass through verbatim."""
|
||||
parts = []
|
||||
last = 0
|
||||
for region in _CODE_REGION.finditer(text):
|
||||
parts.append(transform(text[last : region.start()]))
|
||||
parts.append(region.group(0))
|
||||
last = region.end()
|
||||
parts.append(transform(text[last:]))
|
||||
return "".join(parts)
|
||||
|
||||
|
||||
__all__ = ["normalize_citations"]
|
||||
|
|
@ -0,0 +1,91 @@
|
|||
"""Maps the model-facing ``[n]`` to its source.
|
||||
|
||||
Pydantic for reliable serialization in checkpointed, cross-agent state.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from .models import CitationEntry, CitationSourceType
|
||||
|
||||
|
||||
def make_key(source_type: CitationSourceType, locator: dict[str, Any]) -> str:
|
||||
"""Stable, order-insensitive dedup key; ``source_type`` prefix avoids cross-kind collisions."""
|
||||
type_value = (
|
||||
source_type.value
|
||||
if isinstance(source_type, CitationSourceType)
|
||||
else str(source_type)
|
||||
)
|
||||
return f"{type_value}|{json.dumps(locator, sort_keys=True, default=str)}"
|
||||
|
||||
|
||||
class CitationRegistry(BaseModel):
|
||||
"""Per-conversation ``[n]`` ↔ unit map (find-or-create, monotonic)."""
|
||||
|
||||
by_n: dict[int, CitationEntry] = Field(default_factory=dict)
|
||||
by_key: dict[str, int] = Field(default_factory=dict)
|
||||
next_n: int = 1
|
||||
|
||||
def register(
|
||||
self,
|
||||
source_type: CitationSourceType,
|
||||
locator: dict[str, Any],
|
||||
display: dict[str, Any] | None = None,
|
||||
) -> int:
|
||||
"""Return the ``[n]`` for this unit, minting a new one only if unseen."""
|
||||
key = make_key(source_type, locator)
|
||||
existing = self.by_key.get(key)
|
||||
if existing is not None:
|
||||
return existing
|
||||
|
||||
n = self.next_n
|
||||
self.by_n[n] = CitationEntry(
|
||||
n=n,
|
||||
source_type=source_type,
|
||||
locator=dict(locator),
|
||||
display=dict(display or {}),
|
||||
)
|
||||
self.by_key[key] = n
|
||||
self.next_n = n + 1
|
||||
return n
|
||||
|
||||
def resolve(self, n: int) -> CitationEntry | None:
|
||||
"""Map ``[n]`` back to its source; unknown → ``None`` so bad citations drop."""
|
||||
return self.by_n.get(n)
|
||||
|
||||
def merge(self, other: CitationRegistry) -> CitationRegistry:
|
||||
"""Union ``self`` with ``other`` (find-or-create), returning a new registry.
|
||||
|
||||
Needed because separate branches (parent + subagents, parallel tool calls)
|
||||
each register into a registry forked from the same base. A plain replace
|
||||
would drop one branch's mappings; this unions them so ``[n]`` stays globally
|
||||
consistent and no source is lost:
|
||||
|
||||
- A source already in ``self`` keeps its existing ``[n]``.
|
||||
- A source only in ``other`` keeps its ``[n]`` when that slot is free.
|
||||
- A collision (same ``[n]``, different source on each side) re-mints the
|
||||
``other`` entry to a fresh ``[n]`` and advances ``next_n`` past both.
|
||||
|
||||
Pure: neither registry is mutated. Entries are folded in ascending ``[n]``
|
||||
order so the result is deterministic.
|
||||
"""
|
||||
merged = self.model_copy(deep=True)
|
||||
for n in sorted(other.by_n):
|
||||
entry = other.by_n[n]
|
||||
key = make_key(entry.source_type, entry.locator)
|
||||
if key in merged.by_key:
|
||||
continue
|
||||
if n in merged.by_n:
|
||||
merged.register(entry.source_type, entry.locator, entry.display)
|
||||
else:
|
||||
merged.by_n[n] = entry.model_copy(deep=True)
|
||||
merged.by_key[key] = n
|
||||
merged.next_n = max(merged.next_n, n + 1)
|
||||
return merged
|
||||
|
||||
|
||||
__all__ = ["CitationRegistry", "make_key"]
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
"""Read the conversation's ``CitationRegistry`` out of graph state.
|
||||
|
||||
The registry is checkpointed, so it may come back as a live ``CitationRegistry``
|
||||
or a plain dict (after (de)serialization). Both the search tool and the read
|
||||
path load it the same way before registering new ``[n]`` and writing it back.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Mapping
|
||||
from typing import Any
|
||||
|
||||
from .registry import CitationRegistry
|
||||
|
||||
|
||||
def load_registry(state: Mapping[str, Any] | None) -> CitationRegistry:
|
||||
"""Return the registry from ``state``, tolerating a serialized dict or absence."""
|
||||
raw = state.get("citation_registry") if state else None
|
||||
if isinstance(raw, CitationRegistry):
|
||||
return raw
|
||||
if isinstance(raw, dict):
|
||||
return CitationRegistry.model_validate(raw)
|
||||
return CitationRegistry()
|
||||
|
||||
|
||||
__all__ = ["load_registry"]
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
"""Render citable documents for the model: one shape for search, read, and web.
|
||||
|
||||
``render_document`` emits one ``<document title=… source=… view="excerpt|full">``
|
||||
block whose passages carry server-assigned ``[n]`` labels. ``render_search_context``
|
||||
wraps KB excerpt blocks in ``<retrieved_context>``; ``render_web_results`` wraps web
|
||||
excerpt blocks in ``<web_results>``. Both cite with the same ``[n]`` spine.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from .document import render_document
|
||||
from .models import DocumentView, RenderableDocument, RenderablePassage
|
||||
from .search_context import render_search_context
|
||||
from .source_label import source_label
|
||||
from .web_results import render_web_results
|
||||
|
||||
__all__ = [
|
||||
"DocumentView",
|
||||
"RenderableDocument",
|
||||
"RenderablePassage",
|
||||
"render_document",
|
||||
"render_search_context",
|
||||
"render_web_results",
|
||||
"source_label",
|
||||
]
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
"""Render one citable document as a ``<document>`` block.
|
||||
|
||||
Every citable surface (KB search excerpts, KB full reads, web results) uses the
|
||||
same block; ``view`` and the passages shown are what differ. Each passage is
|
||||
registered for citation as it renders, so its ``[n]`` resolves back to its source
|
||||
later.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
|
||||
|
||||
from .models import DocumentView, RenderableDocument, RenderablePassage
|
||||
|
||||
|
||||
def render_document(
|
||||
document: RenderableDocument,
|
||||
*,
|
||||
view: DocumentView,
|
||||
registry: CitationRegistry,
|
||||
) -> str | None:
|
||||
"""Render one ``<document>`` block, registering each passage for citation.
|
||||
|
||||
Returns ``None`` when the document has no passage to show. Mutates ``registry``
|
||||
(find-or-create).
|
||||
"""
|
||||
if not document.passages:
|
||||
return None
|
||||
|
||||
lines = [_open_tag(document, view)]
|
||||
for passage in document.passages:
|
||||
lines.append(_render_passage(document, passage, registry))
|
||||
lines.append("</document>")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _open_tag(document: RenderableDocument, view: DocumentView) -> str:
|
||||
attrs = [f'title="{_attr(document.title)}"']
|
||||
if document.source:
|
||||
attrs.append(f'source="{_attr(document.source)}"')
|
||||
attrs.append(f'view="{view}"')
|
||||
return f"<document {' '.join(attrs)}>"
|
||||
|
||||
|
||||
def _render_passage(
|
||||
document: RenderableDocument,
|
||||
passage: RenderablePassage,
|
||||
registry: CitationRegistry,
|
||||
) -> str:
|
||||
n = registry.register(
|
||||
passage.source_type,
|
||||
passage.locator,
|
||||
{"title": document.title, "source": document.source},
|
||||
)
|
||||
label = f" [{n}] "
|
||||
body = passage.content.strip().replace("\n", "\n" + " " * len(label))
|
||||
return f"{label}{body}"
|
||||
|
||||
|
||||
def _attr(value: str) -> str:
|
||||
collapsed = " ".join(str(value).split())
|
||||
return (
|
||||
collapsed.replace("&", "&")
|
||||
.replace("<", "<")
|
||||
.replace(">", ">")
|
||||
.replace('"', """)
|
||||
)
|
||||
|
||||
|
||||
__all__ = ["render_document"]
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
"""Inputs for rendering a citable document for the model.
|
||||
|
||||
A passage is one citable unit — what the model cites with ``[n]``. A document
|
||||
groups the passages shown from one source. The same shapes feed every citable
|
||||
surface: KB search excerpts, KB full reads, and web results.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Literal
|
||||
|
||||
from app.agents.chat.multi_agent_chat.shared.citations import CitationSourceType
|
||||
|
||||
DocumentView = Literal["excerpt", "full"]
|
||||
"""How much of the source is shown: a search slice, or the whole object."""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RenderablePassage:
|
||||
"""One citable unit: what the model cites with ``[n]``.
|
||||
|
||||
``locator`` is the source-specific identity registered for this passage (a KB
|
||||
chunk's ``{document_id, chunk_id}``, a web result's ``{url}``). ``source_type``
|
||||
selects how that locator resolves to a frontend payload.
|
||||
"""
|
||||
|
||||
content: str
|
||||
locator: dict[str, Any]
|
||||
source_type: CitationSourceType = CitationSourceType.KB_CHUNK
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RenderableDocument:
|
||||
"""A source document and the passages to render from it, in order."""
|
||||
|
||||
title: str
|
||||
source: str | None = None
|
||||
passages: list[RenderablePassage] = field(default_factory=list)
|
||||
|
||||
|
||||
__all__ = ["DocumentView", "RenderableDocument", "RenderablePassage"]
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
"""Wrap search excerpts in the ``<retrieved_context>`` block.
|
||||
|
||||
Each document renders through the shared ``render_document``; this module adds the
|
||||
container and the one-time header that teaches the model how to read and cite.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
|
||||
|
||||
from .document import render_document
|
||||
from .models import RenderableDocument
|
||||
|
||||
_HEADER = (
|
||||
"These are excerpts from the user's knowledge base, selected for this query.\n"
|
||||
"A document is a full source (a file, a Slack thread, a Notion page); each\n"
|
||||
"<document> below is in excerpt view, so you are seeing only the chunks that\n"
|
||||
"matched this query, not the whole source. Cite a chunk with its [n]. Read the\n"
|
||||
"document for full context before claiming it only says X."
|
||||
)
|
||||
|
||||
|
||||
def render_search_context(
|
||||
documents: list[RenderableDocument],
|
||||
registry: CitationRegistry,
|
||||
) -> str | None:
|
||||
"""Render retrieved documents as excerpt blocks inside ``<retrieved_context>``.
|
||||
|
||||
Returns ``None`` when no document has a passage to show, so the caller can skip
|
||||
the block. Mutates ``registry`` (find-or-create), so a passage seen again in a
|
||||
later turn keeps its original ``[n]``.
|
||||
"""
|
||||
blocks = [
|
||||
block
|
||||
for document in documents
|
||||
if (
|
||||
block := render_document(document, view="excerpt", registry=registry)
|
||||
)
|
||||
is not None
|
||||
]
|
||||
if not blocks:
|
||||
return None
|
||||
|
||||
return (
|
||||
"<retrieved_context>\n"
|
||||
+ _HEADER
|
||||
+ "\n"
|
||||
+ "\n".join(blocks)
|
||||
+ "\n</retrieved_context>"
|
||||
)
|
||||
|
||||
|
||||
__all__ = ["render_search_context"]
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
"""Build a short, honest source label for a knowledge-base document.
|
||||
|
||||
A label orients the model about where a passage came from — e.g. ``Slack`` or
|
||||
``Web · docs.python.org``. It is derived only from the document's type and any
|
||||
URL in its metadata, so it never asserts detail we don't actually have. Search
|
||||
hits and full reads both build their ``<document source=…>`` from here, so the
|
||||
label a passage carries is identical whichever surface it arrives through.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
_FRIENDLY_NAMES = {
|
||||
"FILE": "File",
|
||||
"NOTE": "Note",
|
||||
"EXTENSION": "Saved page",
|
||||
"CRAWLED_URL": "Web",
|
||||
"YOUTUBE_VIDEO": "YouTube",
|
||||
"SLACK_CONNECTOR": "Slack",
|
||||
"TEAMS_CONNECTOR": "Teams",
|
||||
"DISCORD_CONNECTOR": "Discord",
|
||||
"NOTION_CONNECTOR": "Notion",
|
||||
"GITHUB_CONNECTOR": "GitHub",
|
||||
"LINEAR_CONNECTOR": "Linear",
|
||||
"JIRA_CONNECTOR": "Jira",
|
||||
"CONFLUENCE_CONNECTOR": "Confluence",
|
||||
"CLICKUP_CONNECTOR": "ClickUp",
|
||||
"AIRTABLE_CONNECTOR": "Airtable",
|
||||
"OBSIDIAN_CONNECTOR": "Obsidian",
|
||||
"BOOKSTACK_CONNECTOR": "BookStack",
|
||||
}
|
||||
|
||||
_URL_KEYS = ("url", "source_url", "link", "source")
|
||||
|
||||
|
||||
def source_label(document_type: str | None, metadata: dict[str, Any]) -> str | None:
|
||||
"""``Source`` or ``Source · host``; ``None`` when nothing is known."""
|
||||
name = _friendly_name(document_type)
|
||||
host = _url_host(metadata)
|
||||
if name and host:
|
||||
return f"{name} · {host}"
|
||||
return name or host
|
||||
|
||||
|
||||
def _friendly_name(document_type: str | None) -> str | None:
|
||||
if not document_type:
|
||||
return None
|
||||
return _FRIENDLY_NAMES.get(document_type, _prettify(document_type))
|
||||
|
||||
|
||||
def _prettify(document_type: str) -> str:
|
||||
"""Fallback name for unmapped types: ``GOOGLE_DRIVE_FILE`` → ``Google Drive``."""
|
||||
words = document_type.replace("_CONNECTOR", "").replace("_FILE", "").split("_")
|
||||
return " ".join(word.capitalize() for word in words if word)
|
||||
|
||||
|
||||
def _url_host(metadata: dict[str, Any]) -> str | None:
|
||||
for key in _URL_KEYS:
|
||||
value = metadata.get(key)
|
||||
if isinstance(value, str) and value.startswith(("http://", "https://")):
|
||||
host = urlparse(value).netloc
|
||||
if host:
|
||||
return host.removeprefix("www.")
|
||||
return None
|
||||
|
||||
|
||||
__all__ = ["source_label"]
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
"""Wrap live web-search results in a ``<web_results>`` block.
|
||||
|
||||
Each result renders through the shared ``render_document`` (excerpt view), so a
|
||||
web result is cited with ``[n]`` exactly like a knowledge-base passage. Only the
|
||||
container and header differ — they tell the model these came from the public web,
|
||||
not the user's workspace.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
|
||||
|
||||
from .document import render_document
|
||||
from .models import RenderableDocument
|
||||
|
||||
_HEADER = (
|
||||
"These are live results from a public web search for this query. Each\n"
|
||||
"<document> below is one result in excerpt view; cite a result with its [n]\n"
|
||||
"after the statement it supports. Scrape the URL for full context before\n"
|
||||
"making a definitive claim from a snippet."
|
||||
)
|
||||
|
||||
|
||||
def render_web_results(
|
||||
documents: list[RenderableDocument],
|
||||
registry: CitationRegistry,
|
||||
) -> str | None:
|
||||
"""Render web results as excerpt blocks inside ``<web_results>``.
|
||||
|
||||
Returns ``None`` when no result has content to show, so the caller can skip
|
||||
the block. Mutates ``registry`` (find-or-create), so a URL seen again keeps
|
||||
its original ``[n]``.
|
||||
"""
|
||||
blocks = [
|
||||
block
|
||||
for document in documents
|
||||
if (
|
||||
block := render_document(document, view="excerpt", registry=registry)
|
||||
)
|
||||
is not None
|
||||
]
|
||||
if not blocks:
|
||||
return None
|
||||
|
||||
return (
|
||||
"<web_results>\n"
|
||||
+ _HEADER
|
||||
+ "\n"
|
||||
+ "\n".join(blocks)
|
||||
+ "\n</web_results>"
|
||||
)
|
||||
|
||||
|
||||
__all__ = ["render_web_results"]
|
||||
|
|
@ -53,14 +53,6 @@ class AgentFeatureFlags:
|
|||
# Skills + subagents
|
||||
enable_skills: bool = True
|
||||
enable_specialized_subagents: bool = True
|
||||
enable_kb_planner_runnable: bool = True
|
||||
|
||||
# KB retrieval mode — when False (default), the main agent retrieves KB
|
||||
# content lazily via the on-demand ``search_knowledge_base`` tool and the
|
||||
# expensive per-turn pre-injection (planner LLM + embed + hybrid search,
|
||||
# ~2.3s) is skipped; explicit @-mentions are still surfaced cheaply. Set
|
||||
# True to restore the original eager ``<priority_documents>`` pre-injection.
|
||||
enable_kb_priority_preinjection: bool = False
|
||||
|
||||
# Snapshot / revert
|
||||
enable_action_log: bool = True
|
||||
|
|
@ -118,9 +110,6 @@ class AgentFeatureFlags:
|
|||
enable_llm_tool_selector=False,
|
||||
enable_skills=False,
|
||||
enable_specialized_subagents=False,
|
||||
enable_kb_planner_runnable=False,
|
||||
# Full rollback restores the original eager KB pre-injection.
|
||||
enable_kb_priority_preinjection=True,
|
||||
enable_action_log=False,
|
||||
enable_revert_route=False,
|
||||
enable_plugin_loader=False,
|
||||
|
|
@ -156,12 +145,6 @@ class AgentFeatureFlags:
|
|||
enable_specialized_subagents=_env_bool(
|
||||
"SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS", True
|
||||
),
|
||||
enable_kb_planner_runnable=_env_bool(
|
||||
"SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE", True
|
||||
),
|
||||
enable_kb_priority_preinjection=_env_bool(
|
||||
"SURFSENSE_ENABLE_KB_PRIORITY_PREINJECTION", False
|
||||
),
|
||||
# Snapshot / revert
|
||||
enable_action_log=_env_bool("SURFSENSE_ENABLE_ACTION_LOG", True),
|
||||
enable_revert_route=_env_bool("SURFSENSE_ENABLE_REVERT_ROUTE", True),
|
||||
|
|
@ -198,7 +181,6 @@ class AgentFeatureFlags:
|
|||
self.enable_llm_tool_selector,
|
||||
self.enable_skills,
|
||||
self.enable_specialized_subagents,
|
||||
self.enable_kb_planner_runnable,
|
||||
self.enable_action_log,
|
||||
self.enable_revert_route,
|
||||
self.enable_plugin_loader,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,50 @@
|
|||
"""Contribute the ``citation_registry`` state channel to a subagent.
|
||||
|
||||
The conversation's ``[n]`` -> source registry lives on graph state behind a
|
||||
merge reducer (see :mod:`app.agents.chat.multi_agent_chat.shared.state.reducers`).
|
||||
The orchestrator and the KB subagent get that channel for free via the filesystem
|
||||
state schema, but a citable subagent that does *not* use the filesystem (e.g.
|
||||
``research``) still needs the channel declared so its tools can register ``[n]``
|
||||
via ``Command(update={"citation_registry": ...})`` and have it merge back up.
|
||||
|
||||
This middleware adds *only* that channel — no tools, no behavior — so any subagent
|
||||
that mints citations can opt in without inheriting filesystem semantics.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Annotated, NotRequired
|
||||
|
||||
from langchain.agents.middleware import AgentMiddleware
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
|
||||
from app.agents.chat.multi_agent_chat.shared.state.reducers import (
|
||||
_citation_registry_merge_reducer,
|
||||
)
|
||||
|
||||
|
||||
class CitationState(TypedDict):
|
||||
"""State carrying just the per-conversation ``[n]`` -> source registry."""
|
||||
|
||||
citation_registry: NotRequired[
|
||||
Annotated[CitationRegistry, _citation_registry_merge_reducer]
|
||||
]
|
||||
|
||||
|
||||
class CitationStateMiddleware(AgentMiddleware): # type: ignore[type-arg]
|
||||
"""Declare the ``citation_registry`` channel; no tools, no hooks."""
|
||||
|
||||
tools = ()
|
||||
state_schema = CitationState
|
||||
|
||||
|
||||
def build_citation_state_mw() -> CitationStateMiddleware:
|
||||
return CitationStateMiddleware()
|
||||
|
||||
|
||||
__all__ = [
|
||||
"CitationState",
|
||||
"CitationStateMiddleware",
|
||||
"build_citation_state_mw",
|
||||
]
|
||||
|
|
@ -1,103 +0,0 @@
|
|||
"""Shared XML builder for KB documents.
|
||||
|
||||
Produces the citation-friendly XML used by every read of a knowledge-base
|
||||
document (lazy-loaded by :class:`KBPostgresBackend` and synthetic anonymous
|
||||
files). The XML carries a ``<chunk_index>`` near the top so the LLM can jump
|
||||
directly to matched-chunk line ranges via ``read_file(offset=…, limit=…)``.
|
||||
|
||||
Extracted from the original ``knowledge_search.py`` so the backend, the
|
||||
priority middleware, and any future renderer share a single implementation.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
|
||||
def build_document_xml(
|
||||
document: dict[str, Any],
|
||||
matched_chunk_ids: set[int] | None = None,
|
||||
) -> str:
|
||||
"""Build citation-friendly XML with a ``<chunk_index>`` for smart seeking.
|
||||
|
||||
Args:
|
||||
document: Dict shape produced by hybrid search / lazy-load helpers.
|
||||
Expected keys: ``document`` (with ``id``, ``title``,
|
||||
``document_type``, ``metadata``) and ``chunks``
|
||||
(list of ``{chunk_id, content}``).
|
||||
matched_chunk_ids: Optional set of chunk IDs to flag as
|
||||
``matched="true"`` in the chunk index.
|
||||
"""
|
||||
matched = matched_chunk_ids or set()
|
||||
|
||||
doc_meta = document.get("document") or {}
|
||||
metadata = (doc_meta.get("metadata") or {}) if isinstance(doc_meta, dict) else {}
|
||||
document_id = doc_meta.get("id", document.get("document_id", "unknown"))
|
||||
document_type = doc_meta.get("document_type", document.get("source", "UNKNOWN"))
|
||||
title = doc_meta.get("title") or metadata.get("title") or "Untitled Document"
|
||||
url = (
|
||||
metadata.get("url") or metadata.get("source") or metadata.get("page_url") or ""
|
||||
)
|
||||
metadata_json = json.dumps(metadata, ensure_ascii=False)
|
||||
|
||||
metadata_lines: list[str] = [
|
||||
"<document>",
|
||||
"<document_metadata>",
|
||||
f" <document_id>{document_id}</document_id>",
|
||||
f" <document_type>{document_type}</document_type>",
|
||||
f" <title><![CDATA[{title}]]></title>",
|
||||
f" <url><![CDATA[{url}]]></url>",
|
||||
f" <metadata_json><![CDATA[{metadata_json}]]></metadata_json>",
|
||||
"</document_metadata>",
|
||||
"",
|
||||
]
|
||||
|
||||
chunks = document.get("chunks") or []
|
||||
chunk_entries: list[tuple[int | None, str]] = []
|
||||
if isinstance(chunks, list):
|
||||
for chunk in chunks:
|
||||
if not isinstance(chunk, dict):
|
||||
continue
|
||||
chunk_id = chunk.get("chunk_id") or chunk.get("id")
|
||||
chunk_content = str(chunk.get("content", "")).strip()
|
||||
if not chunk_content:
|
||||
continue
|
||||
if chunk_id is None:
|
||||
xml = f" <chunk><![CDATA[{chunk_content}]]></chunk>"
|
||||
else:
|
||||
xml = f" <chunk id='{chunk_id}'><![CDATA[{chunk_content}]]></chunk>"
|
||||
chunk_entries.append((chunk_id, xml))
|
||||
|
||||
index_overhead = 1 + len(chunk_entries) + 1 + 1 + 1
|
||||
first_chunk_line = len(metadata_lines) + index_overhead + 1
|
||||
|
||||
current_line = first_chunk_line
|
||||
index_entry_lines: list[str] = []
|
||||
for cid, xml_str in chunk_entries:
|
||||
num_lines = xml_str.count("\n") + 1
|
||||
end_line = current_line + num_lines - 1
|
||||
matched_attr = ' matched="true"' if cid is not None and cid in matched else ""
|
||||
if cid is not None:
|
||||
index_entry_lines.append(
|
||||
f' <entry chunk_id="{cid}" lines="{current_line}-{end_line}"{matched_attr}/>'
|
||||
)
|
||||
else:
|
||||
index_entry_lines.append(
|
||||
f' <entry lines="{current_line}-{end_line}"{matched_attr}/>'
|
||||
)
|
||||
current_line = end_line + 1
|
||||
|
||||
lines = metadata_lines.copy()
|
||||
lines.append("<chunk_index>")
|
||||
lines.extend(index_entry_lines)
|
||||
lines.append("</chunk_index>")
|
||||
lines.append("")
|
||||
lines.append("<document_content>")
|
||||
for _, xml_str in chunk_entries:
|
||||
lines.append(xml_str)
|
||||
lines.extend(["</document_content>", "</document>"])
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
__all__ = ["build_document_xml"]
|
||||
|
|
@ -42,8 +42,15 @@ from langchain.tools import ToolRuntime
|
|||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.agents.chat.multi_agent_chat.shared.middleware.filesystem.backends.document_xml import (
|
||||
build_document_xml,
|
||||
from app.agents.chat.multi_agent_chat.shared.citations import (
|
||||
CitationRegistry,
|
||||
CitationSourceType,
|
||||
)
|
||||
from app.agents.chat.multi_agent_chat.shared.document_render import (
|
||||
RenderableDocument,
|
||||
RenderablePassage,
|
||||
render_document,
|
||||
source_label,
|
||||
)
|
||||
from app.agents.chat.runtime.path_resolver import (
|
||||
DOCUMENTS_ROOT,
|
||||
|
|
@ -59,6 +66,21 @@ _TEMP_PREFIX = "temp_"
|
|||
_GREP_MAX_TOTAL_MATCHES = 50
|
||||
_GREP_MAX_PER_DOC = 5
|
||||
|
||||
_EMPTY_DOCUMENT_NOTICE = "(This document has no readable content.)"
|
||||
|
||||
|
||||
def render_full_document(
|
||||
document: RenderableDocument,
|
||||
registry: CitationRegistry,
|
||||
) -> str:
|
||||
"""Render a whole KB document (``view="full"``), registering each chunk's ``[n]``.
|
||||
|
||||
Falls back to a short notice when the document has no chunks, so a read never
|
||||
returns blank.
|
||||
"""
|
||||
rendered = render_document(document, view="full", registry=registry)
|
||||
return rendered if rendered is not None else _EMPTY_DOCUMENT_NOTICE
|
||||
|
||||
|
||||
def _basename(path: str) -> str:
|
||||
return path.rsplit("/", 1)[-1]
|
||||
|
|
@ -127,13 +149,6 @@ class KBPostgresBackend(BackendProtocol):
|
|||
anon = self.state.get("kb_anon_doc")
|
||||
return anon if isinstance(anon, dict) else None
|
||||
|
||||
def _matched_chunk_ids(self, doc_id: int) -> set[int]:
|
||||
mapping = self.state.get("kb_matched_chunk_ids") or {}
|
||||
try:
|
||||
return set(mapping.get(doc_id, []) or [])
|
||||
except TypeError:
|
||||
return set()
|
||||
|
||||
@staticmethod
|
||||
def _file_data_size(file_data: dict[str, Any]) -> int:
|
||||
try:
|
||||
|
|
@ -466,80 +481,93 @@ class KBPostgresBackend(BackendProtocol):
|
|||
def read(self, file_path: str, offset: int = 0, limit: int = 2000) -> str: # type: ignore[override]
|
||||
return asyncio.run(self.aread(file_path, offset, limit))
|
||||
|
||||
async def _load_file_data(
|
||||
async def aload_document(
|
||||
self,
|
||||
path: str,
|
||||
) -> tuple[dict[str, Any], int | None] | None:
|
||||
"""Lazy-load a virtual KB document into a deepagents ``FileData``.
|
||||
) -> tuple[RenderableDocument, int | None] | None:
|
||||
"""Lazy-load a virtual KB document as a :class:`RenderableDocument`.
|
||||
|
||||
Returns ``(file_data, doc_id)`` or ``None`` if the path doesn't map
|
||||
to any known document. ``doc_id`` is ``None`` for the synthetic
|
||||
anonymous document so the caller doesn't track it as a DB-backed file.
|
||||
Returns ``(document, doc_id)`` with every chunk in document order, or
|
||||
``None`` if the path maps to no known document. ``doc_id`` is ``None``
|
||||
for the synthetic anonymous upload so the caller doesn't track it as a
|
||||
DB-backed file. Pure data — rendering and citation registration happen in
|
||||
the caller (see :meth:`_load_file_data` and the ``read_file`` tool).
|
||||
"""
|
||||
anon = self._kb_anon_doc()
|
||||
if anon and str(anon.get("path") or "") == path:
|
||||
doc_payload = {
|
||||
"document_id": -1,
|
||||
"chunks": list(anon.get("chunks") or []),
|
||||
"matched_chunk_ids": [],
|
||||
"document": {
|
||||
"id": -1,
|
||||
"title": anon.get("title") or "uploaded_document",
|
||||
"document_type": "FILE",
|
||||
"metadata": {"source": "anonymous_upload"},
|
||||
},
|
||||
"source": "FILE",
|
||||
}
|
||||
xml = build_document_xml(doc_payload, matched_chunk_ids=set())
|
||||
file_data = create_file_data(xml)
|
||||
return file_data, None
|
||||
document = RenderableDocument(
|
||||
title=str(anon.get("title") or "uploaded_document"),
|
||||
source="Uploaded file",
|
||||
passages=[
|
||||
RenderablePassage(
|
||||
content=str(chunk.get("content", "")),
|
||||
locator={
|
||||
"document_id": -1,
|
||||
"chunk_id": int(chunk["chunk_id"]),
|
||||
},
|
||||
source_type=CitationSourceType.ANON_CHUNK,
|
||||
)
|
||||
for chunk in (anon.get("chunks") or [])
|
||||
if isinstance(chunk, dict) and chunk.get("chunk_id") is not None
|
||||
],
|
||||
)
|
||||
return document, None
|
||||
|
||||
if not path.startswith(DOCUMENTS_ROOT):
|
||||
return None
|
||||
|
||||
async with shielded_async_session() as session:
|
||||
document = await virtual_path_to_doc(
|
||||
document_row = await virtual_path_to_doc(
|
||||
session,
|
||||
search_space_id=self.search_space_id,
|
||||
virtual_path=path,
|
||||
)
|
||||
if document is None:
|
||||
if document_row is None:
|
||||
return None
|
||||
chunk_rows = await session.execute(
|
||||
select(Chunk.id, Chunk.content)
|
||||
.where(Chunk.document_id == document.id)
|
||||
.where(Chunk.document_id == document_row.id)
|
||||
.order_by(Chunk.position, Chunk.id)
|
||||
)
|
||||
chunks = [
|
||||
{"chunk_id": row.id, "content": row.content} for row in chunk_rows.all()
|
||||
]
|
||||
chunks = chunk_rows.all()
|
||||
|
||||
doc_payload = {
|
||||
"document_id": document.id,
|
||||
"chunks": chunks,
|
||||
"matched_chunk_ids": list(self._matched_chunk_ids(document.id)),
|
||||
"document": {
|
||||
"id": document.id,
|
||||
"title": document.title,
|
||||
"document_type": (
|
||||
document.document_type.value
|
||||
if getattr(document, "document_type", None) is not None
|
||||
else "UNKNOWN"
|
||||
),
|
||||
"metadata": dict(document.document_metadata or {}),
|
||||
},
|
||||
"source": (
|
||||
document.document_type.value
|
||||
if getattr(document, "document_type", None) is not None
|
||||
else "UNKNOWN"
|
||||
),
|
||||
}
|
||||
xml = build_document_xml(
|
||||
doc_payload,
|
||||
matched_chunk_ids=self._matched_chunk_ids(document.id),
|
||||
document_type = (
|
||||
document_row.document_type.value
|
||||
if getattr(document_row, "document_type", None) is not None
|
||||
else None
|
||||
)
|
||||
file_data = create_file_data(xml)
|
||||
return file_data, document.id
|
||||
metadata = dict(document_row.document_metadata or {})
|
||||
document = RenderableDocument(
|
||||
title=document_row.title,
|
||||
source=source_label(document_type, metadata),
|
||||
passages=[
|
||||
RenderablePassage(
|
||||
content=row.content,
|
||||
locator={"document_id": document_row.id, "chunk_id": row.id},
|
||||
)
|
||||
for row in chunks
|
||||
],
|
||||
)
|
||||
return document, document_row.id
|
||||
|
||||
async def _load_file_data(
|
||||
self,
|
||||
path: str,
|
||||
) -> tuple[dict[str, Any], int | None] | None:
|
||||
"""Render a virtual KB document into a deepagents ``FileData``.
|
||||
|
||||
Used by the filesystem ops (move/edit existence + content staging) and the
|
||||
backend's own ``aread``/``aedit``. These have no conversation registry to
|
||||
persist into, so the ``[n]`` labels are minted into a throwaway registry —
|
||||
the canonical, citation-persisting read is the ``read_file`` tool, which
|
||||
renders from :meth:`aload_document` against the state registry.
|
||||
"""
|
||||
loaded = await self.aload_document(path)
|
||||
if loaded is None:
|
||||
return None
|
||||
document, doc_id = loaded
|
||||
rendered = render_full_document(document, CitationRegistry())
|
||||
return create_file_data(rendered), doc_id
|
||||
|
||||
# ------------------------------------------------------------------ writes
|
||||
|
||||
|
|
@ -1037,4 +1065,5 @@ __all__ = [
|
|||
"KBPostgresBackend",
|
||||
"list_tree_listing",
|
||||
"paginate_listing",
|
||||
"render_full_document",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -37,8 +37,8 @@ def build_backend_resolver(
|
|||
|
||||
In cloud mode the resolver returns a fresh :class:`KBPostgresBackend`
|
||||
bound to the current ``runtime`` so the backend can read staging state
|
||||
(``staged_dirs``, ``pending_moves``, ``files`` cache, ``kb_anon_doc``,
|
||||
``kb_matched_chunk_ids``) for each tool call. When no ``search_space_id``
|
||||
(``staged_dirs``, ``pending_moves``, ``files`` cache, ``kb_anon_doc``)
|
||||
for each tool call. When no ``search_space_id``
|
||||
is provided, the resolver falls back to :class:`StateBackend` (used by
|
||||
sub-agents and tests that don't need DB-backed reads).
|
||||
|
||||
|
|
|
|||
|
|
@ -35,26 +35,14 @@ current working directory (`cwd`, default `/documents`).
|
|||
turn alongside any new/edited documents. Snapshot/revert is enabled
|
||||
for every destructive operation when action logging is on.
|
||||
|
||||
## Reading Documents Efficiently
|
||||
## Reading Documents
|
||||
|
||||
Documents are formatted as XML. Each document contains:
|
||||
- `<document_metadata>` — title, type, URL, etc.
|
||||
- `<chunk_index>` — a table of every chunk with its **line range** and a
|
||||
`matched="true"` flag for chunks that matched the search query.
|
||||
- `<document_content>` — the actual chunks in original document order.
|
||||
|
||||
**Workflow**: when reading a large document, read the first ~20 lines to see
|
||||
the `<chunk_index>`, identify chunks marked `matched="true"`, then use
|
||||
`read_file(path, offset=<start_line>, limit=<lines>)` to jump directly to
|
||||
those sections instead of reading the entire file sequentially.
|
||||
|
||||
Use `<chunk id='...'>` values as citation IDs in your answers.
|
||||
|
||||
## Priority List
|
||||
|
||||
You receive a `<priority_documents>` system message each turn listing the
|
||||
top-K paths most relevant to the user's query (by hybrid search). Read those
|
||||
first — matched sections are flagged inside each document's `<chunk_index>`.
|
||||
A knowledge-base document is returned as a `<document … view="full">` block —
|
||||
the whole source, with each passage labelled `[n]`. `view="full"` means you are
|
||||
seeing the complete document, not an excerpt. Use `read_file(path, offset, limit)`
|
||||
to page through a large document. Cite a passage by writing its `[n]` after the
|
||||
statement it supports — the same `[n]` that passage had in
|
||||
`search_knowledge_base` results.
|
||||
|
||||
## Workspace Tree
|
||||
|
||||
|
|
|
|||
|
|
@ -37,13 +37,4 @@ directory (`cwd`).
|
|||
- Cross-mount moves are not supported.
|
||||
- Desktop deletes hit disk immediately and cannot be undone via the
|
||||
agent's revert flow — confirm before calling `rm`/`rmdir`.
|
||||
|
||||
## Priority List
|
||||
|
||||
You may receive a `<priority_documents>` system message listing the top-K
|
||||
documents from the user's SurfSense knowledge base — these are cloud-ingested
|
||||
via connectors (Notion, Slack, etc.), not local files. Treat it as a hint:
|
||||
consult it when the task spans both local and cloud sources (e.g. drafting a
|
||||
local note from a Notion summary); skip when the task is purely about local
|
||||
files.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -10,11 +10,11 @@ Usage:
|
|||
- By default, reads up to 100 lines from the beginning.
|
||||
- Use `offset` and `limit` for pagination when files are large.
|
||||
- Results include line numbers.
|
||||
- Documents contain a `<chunk_index>` near the top listing every chunk with
|
||||
its line range and a `matched="true"` flag for search-relevant chunks.
|
||||
Read the index first, then jump to matched chunks with
|
||||
`read_file(path, offset=<start_line>, limit=<num_lines>)`.
|
||||
- Use chunk IDs (`<chunk id='...'>`) as citations in answers.
|
||||
- A knowledge-base document is returned as a `<document … view="full">` block:
|
||||
the whole source, with each passage labelled `[n]`. `view="full"` means you are
|
||||
seeing the complete document, not an excerpt.
|
||||
- Cite a passage by writing its `[n]` after the statement it supports — the same
|
||||
`[n]` you would use for that passage from `search_knowledge_base`.
|
||||
"""
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -4,14 +4,20 @@ from __future__ import annotations
|
|||
|
||||
from typing import TYPE_CHECKING, Annotated, Any
|
||||
|
||||
from deepagents.backends.utils import format_read_response, validate_path
|
||||
from deepagents.backends.utils import (
|
||||
create_file_data,
|
||||
format_read_response,
|
||||
validate_path,
|
||||
)
|
||||
from langchain.tools import ToolRuntime
|
||||
from langchain_core.messages import ToolMessage
|
||||
from langchain_core.tools import BaseTool, StructuredTool
|
||||
from langgraph.types import Command
|
||||
|
||||
from app.agents.chat.multi_agent_chat.shared.citations import load_registry
|
||||
from app.agents.chat.multi_agent_chat.shared.middleware.filesystem.backends.kb_postgres import (
|
||||
KBPostgresBackend,
|
||||
render_full_document,
|
||||
)
|
||||
from app.agents.chat.multi_agent_chat.shared.state.filesystem_state import (
|
||||
SurfSenseFilesystemState,
|
||||
|
|
@ -55,10 +61,12 @@ def create_read_file_tool(mw: SurfSenseFilesystemMiddleware) -> BaseTool:
|
|||
|
||||
backend = mw._get_backend(runtime)
|
||||
if isinstance(backend, KBPostgresBackend):
|
||||
loaded = await backend._load_file_data(validated)
|
||||
loaded = await backend.aload_document(validated)
|
||||
if loaded is None:
|
||||
return f"Error: File '{validated}' not found"
|
||||
file_data, doc_id = loaded
|
||||
document, doc_id = loaded
|
||||
registry = load_registry(runtime.state)
|
||||
file_data = create_file_data(render_full_document(document, registry))
|
||||
rendered = format_read_response(file_data, offset, limit)
|
||||
update: dict[str, Any] = {
|
||||
"files": {validated: file_data},
|
||||
|
|
@ -68,6 +76,7 @@ def create_read_file_tool(mw: SurfSenseFilesystemMiddleware) -> BaseTool:
|
|||
tool_call_id=runtime.tool_call_id,
|
||||
)
|
||||
],
|
||||
"citation_registry": registry,
|
||||
}
|
||||
if doc_id is not None:
|
||||
update["doc_id_by_path"] = {validated: doc_id}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
"""Project ``workspace_tree_text`` + ``kb_priority`` from state into SystemMessages."""
|
||||
"""Project ``workspace_tree_text`` from state into a SystemMessage."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
|
@ -14,18 +14,15 @@ from app.agents.chat.multi_agent_chat.shared.state.filesystem_state import (
|
|||
)
|
||||
from app.utils.perf import get_perf_logger
|
||||
|
||||
from .knowledge_search import _render_priority_message
|
||||
|
||||
_perf_log = get_perf_logger()
|
||||
|
||||
|
||||
class KbContextProjectionMiddleware(AgentMiddleware): # type: ignore[type-arg]
|
||||
"""Emit ``<workspace_tree>`` + ``<priority_documents>`` from shared state.
|
||||
"""Emit the ``<workspace_tree>`` from shared state.
|
||||
|
||||
Read-only consumer: no DB, no LLM, no state writes. The orchestrator's
|
||||
renderer middlewares populate the source fields; this projection lets any
|
||||
agent (orchestrator or subagent) put the same content in front of its
|
||||
own LLM call.
|
||||
``KnowledgeTreeMiddleware`` populates ``workspace_tree_text``; this
|
||||
projection lets a subagent put the same tree in front of its own LLM call.
|
||||
"""
|
||||
|
||||
tools = ()
|
||||
|
|
@ -39,28 +36,19 @@ class KbContextProjectionMiddleware(AgentMiddleware): # type: ignore[type-arg]
|
|||
del runtime
|
||||
start = time.perf_counter()
|
||||
tree_text = state.get("workspace_tree_text")
|
||||
priority = state.get("kb_priority")
|
||||
if not tree_text and not priority:
|
||||
if not tree_text:
|
||||
_perf_log.info(
|
||||
"[kb_context_projection] tree=0 priority=0 elapsed=%.3fs",
|
||||
"[kb_context_projection] tree=0 elapsed=%.3fs",
|
||||
time.perf_counter() - start,
|
||||
)
|
||||
return None
|
||||
|
||||
messages = list(state.get("messages") or [])
|
||||
insert_at = max(len(messages) - 1, 0)
|
||||
tree_chars = 0
|
||||
if tree_text:
|
||||
tree_chars = len(tree_text)
|
||||
messages.insert(insert_at, SystemMessage(content=tree_text))
|
||||
priority_count = 0
|
||||
if priority:
|
||||
priority_count = len(priority) if hasattr(priority, "__len__") else 1
|
||||
messages.insert(insert_at, _render_priority_message(priority))
|
||||
messages.insert(insert_at, SystemMessage(content=tree_text))
|
||||
_perf_log.info(
|
||||
"[kb_context_projection] tree_chars=%d priority_items=%d elapsed=%.3fs",
|
||||
tree_chars,
|
||||
priority_count,
|
||||
"[kb_context_projection] tree_chars=%d elapsed=%.3fs",
|
||||
len(tree_text),
|
||||
time.perf_counter() - start,
|
||||
)
|
||||
return {"messages": messages}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,18 @@
|
|||
"""Knowledge-base retrieval: hybrid search rendered as citable evidence.
|
||||
|
||||
Public surface is the service (``search_knowledge_base_context``) and its input
|
||||
value object (``SearchScope``); the rest are building blocks.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from .models import ChunkHit, DocumentHit, SearchScope
|
||||
from .service import build_context, search_knowledge_base_context
|
||||
|
||||
__all__ = [
|
||||
"ChunkHit",
|
||||
"DocumentHit",
|
||||
"SearchScope",
|
||||
"build_context",
|
||||
"search_knowledge_base_context",
|
||||
]
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
"""Turn retriever ``DocumentHit``s into renderable documents."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from app.agents.chat.multi_agent_chat.shared.document_render import (
|
||||
RenderableDocument,
|
||||
RenderablePassage,
|
||||
source_label,
|
||||
)
|
||||
|
||||
from .models import DocumentHit
|
||||
|
||||
|
||||
def to_renderable_document(hit: DocumentHit) -> RenderableDocument:
|
||||
"""Map one hit to the shape the document-fragment renderer consumes."""
|
||||
return RenderableDocument(
|
||||
title=hit.title,
|
||||
source=source_label(hit.document_type, hit.metadata),
|
||||
passages=[
|
||||
RenderablePassage(
|
||||
content=chunk.content,
|
||||
locator={"document_id": hit.document_id, "chunk_id": chunk.chunk_id},
|
||||
)
|
||||
for chunk in hit.chunks
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
__all__ = ["to_renderable_document"]
|
||||
|
|
@ -0,0 +1,250 @@
|
|||
"""Hybrid (semantic + keyword) chunk search with reciprocal-rank fusion.
|
||||
|
||||
Only matched chunks are citable, so the fused result already holds every passage
|
||||
shown — there is no second per-document fetch. Returns the top ``top_k``
|
||||
documents, each carrying its matched chunks in reading order.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
import time
|
||||
|
||||
from sqlalchemy import func, select, text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.orm import joinedload
|
||||
|
||||
from app.config import config
|
||||
from app.db import Chunk, Document, DocumentType
|
||||
from app.observability import metrics, otel
|
||||
from app.utils.perf import get_perf_logger
|
||||
|
||||
from .models import ChunkHit, DocumentHit, SearchScope
|
||||
|
||||
_RRF_K = 60
|
||||
_CANDIDATE_MULTIPLIER = 5 # fused-chunk pool size relative to top_k
|
||||
_MAX_PASSAGES_PER_DOC = 12
|
||||
_SURFACE = "chunks"
|
||||
|
||||
|
||||
async def search_chunks(
|
||||
db_session: AsyncSession,
|
||||
*,
|
||||
search_space_id: int,
|
||||
query: str,
|
||||
scope: SearchScope,
|
||||
top_k: int,
|
||||
query_embedding: list[float] | None = None,
|
||||
) -> list[DocumentHit]:
|
||||
"""Top ``top_k`` documents for ``query`` within scope, each with its chunks.
|
||||
|
||||
Instrumented seam: traces the search, records its duration, and logs a
|
||||
timing line. The fusion logic lives in :func:`_search`.
|
||||
"""
|
||||
started = time.perf_counter()
|
||||
with otel.kb_search_span(
|
||||
search_space_id=search_space_id,
|
||||
query_chars=len(query),
|
||||
extra={"search.surface": _SURFACE, "search.mode": "hybrid"},
|
||||
) as span:
|
||||
try:
|
||||
documents = await _search(
|
||||
db_session,
|
||||
search_space_id=search_space_id,
|
||||
query=query,
|
||||
scope=scope,
|
||||
top_k=top_k,
|
||||
query_embedding=query_embedding,
|
||||
)
|
||||
finally:
|
||||
elapsed_ms = (time.perf_counter() - started) * 1000
|
||||
metrics.record_kb_search_duration(
|
||||
elapsed_ms, search_space_id=search_space_id, surface=_SURFACE
|
||||
)
|
||||
span.set_attribute("result.count", len(documents))
|
||||
get_perf_logger().info(
|
||||
"[chunk_search] hybrid in %.3fs docs=%d space=%d",
|
||||
elapsed_ms / 1000,
|
||||
len(documents),
|
||||
search_space_id,
|
||||
)
|
||||
return documents
|
||||
|
||||
|
||||
async def _search(
|
||||
db_session: AsyncSession,
|
||||
*,
|
||||
search_space_id: int,
|
||||
query: str,
|
||||
scope: SearchScope,
|
||||
top_k: int,
|
||||
query_embedding: list[float] | None,
|
||||
) -> list[DocumentHit]:
|
||||
"""Fusion search itself: resolve scope, fuse the two legs, group by document."""
|
||||
document_types = _resolve_document_types(scope.document_types)
|
||||
if document_types == []: # types requested, none recognized → nothing matches
|
||||
return []
|
||||
|
||||
if query_embedding is None:
|
||||
query_embedding = await asyncio.to_thread(
|
||||
config.embedding_model_instance.embed, query
|
||||
)
|
||||
|
||||
conditions = _base_conditions(search_space_id, scope, document_types)
|
||||
rows = await _fused_chunks(
|
||||
db_session,
|
||||
query=query,
|
||||
query_embedding=query_embedding,
|
||||
conditions=conditions,
|
||||
candidate_pool=top_k * _CANDIDATE_MULTIPLIER,
|
||||
)
|
||||
return _group_into_documents(rows, top_k=top_k)
|
||||
|
||||
|
||||
def _resolve_document_types(
|
||||
raw: tuple[str, ...] | None,
|
||||
) -> list[DocumentType] | None:
|
||||
"""Map type names to enum members; ``None`` when unfiltered, ``[]`` if all unknown."""
|
||||
if not raw:
|
||||
return None
|
||||
resolved: list[DocumentType] = []
|
||||
for name in raw:
|
||||
with contextlib.suppress(KeyError):
|
||||
resolved.append(DocumentType[name])
|
||||
return resolved
|
||||
|
||||
|
||||
def _base_conditions(
|
||||
search_space_id: int,
|
||||
scope: SearchScope,
|
||||
document_types: list[DocumentType] | None,
|
||||
) -> list:
|
||||
"""Filters shared by both search legs."""
|
||||
conditions = [
|
||||
Document.search_space_id == search_space_id,
|
||||
func.coalesce(Document.status["state"].astext, "ready") != "deleting",
|
||||
]
|
||||
if document_types:
|
||||
conditions.append(Document.document_type.in_(document_types))
|
||||
if scope.document_ids:
|
||||
conditions.append(Document.id.in_(scope.document_ids))
|
||||
if scope.start_date is not None:
|
||||
conditions.append(Document.updated_at >= scope.start_date)
|
||||
if scope.end_date is not None:
|
||||
conditions.append(Document.updated_at <= scope.end_date)
|
||||
return conditions
|
||||
|
||||
|
||||
async def _fused_chunks(
|
||||
db_session: AsyncSession,
|
||||
*,
|
||||
query: str,
|
||||
query_embedding: list[float],
|
||||
conditions: list,
|
||||
candidate_pool: int,
|
||||
):
|
||||
"""Run semantic + keyword legs and fuse them with RRF; return (Chunk, score) rows."""
|
||||
tsvector = func.to_tsvector("english", Chunk.content)
|
||||
tsquery = func.plainto_tsquery("english", query)
|
||||
|
||||
semantic = (
|
||||
select(
|
||||
Chunk.id,
|
||||
func.rank()
|
||||
.over(order_by=Chunk.embedding.op("<=>")(query_embedding))
|
||||
.label("rank"),
|
||||
)
|
||||
.join(Document, Chunk.document_id == Document.id)
|
||||
.where(*conditions)
|
||||
.order_by(Chunk.embedding.op("<=>")(query_embedding))
|
||||
.limit(candidate_pool)
|
||||
.cte("semantic_search")
|
||||
)
|
||||
|
||||
keyword = (
|
||||
select(
|
||||
Chunk.id,
|
||||
func.rank()
|
||||
.over(order_by=func.ts_rank_cd(tsvector, tsquery).desc())
|
||||
.label("rank"),
|
||||
)
|
||||
.join(Document, Chunk.document_id == Document.id)
|
||||
.where(*conditions)
|
||||
.where(tsvector.op("@@")(tsquery))
|
||||
.order_by(func.ts_rank_cd(tsvector, tsquery).desc())
|
||||
.limit(candidate_pool)
|
||||
.cte("keyword_search")
|
||||
)
|
||||
|
||||
fused = (
|
||||
select(
|
||||
Chunk,
|
||||
(
|
||||
func.coalesce(1.0 / (_RRF_K + semantic.c.rank), 0.0)
|
||||
+ func.coalesce(1.0 / (_RRF_K + keyword.c.rank), 0.0)
|
||||
).label("score"),
|
||||
)
|
||||
.select_from(
|
||||
semantic.outerjoin(keyword, semantic.c.id == keyword.c.id, full=True)
|
||||
)
|
||||
.join(Chunk, Chunk.id == func.coalesce(semantic.c.id, keyword.c.id))
|
||||
.options(joinedload(Chunk.document))
|
||||
.order_by(text("score DESC"))
|
||||
.limit(candidate_pool)
|
||||
)
|
||||
|
||||
result = await db_session.execute(fused)
|
||||
return result.all()
|
||||
|
||||
|
||||
def _group_into_documents(rows, *, top_k: int) -> list[DocumentHit]:
|
||||
"""Group fused chunks by document, keep the top_k best, order chunks for reading."""
|
||||
chunks_by_doc: dict[int, list[ChunkHit]] = {}
|
||||
document_by_id: dict[int, Document] = {}
|
||||
best_score: dict[int, float] = {}
|
||||
order: list[int] = []
|
||||
|
||||
for chunk, score in rows:
|
||||
document_id = chunk.document.id
|
||||
if document_id not in chunks_by_doc:
|
||||
chunks_by_doc[document_id] = []
|
||||
document_by_id[document_id] = chunk.document
|
||||
best_score[document_id] = float(score)
|
||||
order.append(document_id)
|
||||
chunks_by_doc[document_id].append(
|
||||
ChunkHit(
|
||||
chunk_id=chunk.id,
|
||||
content=chunk.content,
|
||||
position=chunk.position,
|
||||
score=float(score),
|
||||
)
|
||||
)
|
||||
|
||||
return [
|
||||
DocumentHit(
|
||||
document_id=document_id,
|
||||
title=document_by_id[document_id].title,
|
||||
document_type=_type_value(document_by_id[document_id]),
|
||||
metadata=document_by_id[document_id].document_metadata or {},
|
||||
score=best_score[document_id],
|
||||
chunks=_reading_order(chunks_by_doc[document_id]),
|
||||
)
|
||||
for document_id in order[:top_k]
|
||||
]
|
||||
|
||||
|
||||
def _reading_order(chunks: list[ChunkHit]) -> list[ChunkHit]:
|
||||
"""Keep the most relevant chunks, then present them in document order."""
|
||||
most_relevant = sorted(chunks, key=lambda c: c.score, reverse=True)[
|
||||
:_MAX_PASSAGES_PER_DOC
|
||||
]
|
||||
return sorted(most_relevant, key=lambda c: c.position)
|
||||
|
||||
|
||||
def _type_value(document: Document) -> str | None:
|
||||
document_type = getattr(document, "document_type", None)
|
||||
return document_type.value if document_type is not None else None
|
||||
|
||||
|
||||
__all__ = ["search_chunks"]
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
"""Value objects for knowledge-base retrieval: the query scope and raw hits.
|
||||
|
||||
``SearchScope`` is the optional filter a search runs under. ``DocumentHit`` /
|
||||
``ChunkHit`` are the retriever's typed output — matched chunks grouped by their
|
||||
document — which the adapter turns into renderable ``RenderableDocument``s.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SearchScope:
|
||||
"""Filters narrowing a search; ``None``/empty means "whole knowledge base"."""
|
||||
|
||||
document_types: tuple[str, ...] | None = None
|
||||
document_ids: tuple[int, ...] | None = None
|
||||
start_date: datetime | None = None
|
||||
end_date: datetime | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ChunkHit:
|
||||
"""One matched chunk, with the position that orders it within its document."""
|
||||
|
||||
chunk_id: int
|
||||
content: str
|
||||
position: int
|
||||
score: float
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DocumentHit:
|
||||
"""A document and the chunks that matched the query, ordered by position."""
|
||||
|
||||
document_id: int
|
||||
title: str
|
||||
document_type: str | None
|
||||
metadata: dict[str, Any]
|
||||
score: float
|
||||
chunks: list[ChunkHit] = field(default_factory=list)
|
||||
|
||||
|
||||
__all__ = ["ChunkHit", "DocumentHit", "SearchScope"]
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
"""Reorder retrieved documents with the configured reranker (no-op if disabled).
|
||||
|
||||
Ranking is by concatenated matched-chunk content; ``DocumentHit`` order is
|
||||
rewritten to follow the reranker's result.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from .models import DocumentHit
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.services.reranker_service import RerankerService
|
||||
|
||||
|
||||
def rerank_hits(
|
||||
query: str,
|
||||
hits: list[DocumentHit],
|
||||
reranker: RerankerService | None,
|
||||
) -> list[DocumentHit]:
|
||||
"""Return ``hits`` reordered by the reranker; unchanged when none is set."""
|
||||
if reranker is None or len(hits) < 2:
|
||||
return hits
|
||||
|
||||
hit_by_id = {hit.document_id: hit for hit in hits}
|
||||
ranked = reranker.rerank_documents(query, [_as_document(hit) for hit in hits])
|
||||
reordered = [
|
||||
hit_by_id[doc["document_id"]]
|
||||
for doc in ranked
|
||||
if doc.get("document_id") in hit_by_id
|
||||
]
|
||||
# Fall back to the original order if the reranker dropped or garbled ids.
|
||||
return reordered if len(reordered) == len(hits) else hits
|
||||
|
||||
|
||||
def _as_document(hit: DocumentHit) -> dict[str, Any]:
|
||||
"""The minimal dict shape ``RerankerService.rerank_documents`` scores on."""
|
||||
return {
|
||||
"document_id": hit.document_id,
|
||||
"content": "\n\n".join(chunk.content for chunk in hit.chunks),
|
||||
"score": hit.score,
|
||||
"document": {
|
||||
"id": hit.document_id,
|
||||
"title": hit.title,
|
||||
"document_type": hit.document_type,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
__all__ = ["rerank_hits"]
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
"""Search the knowledge base and render it as model-facing ``<retrieved_context>``.
|
||||
|
||||
The retrieval spine end to end: hybrid search → rerank → adapt → render, with
|
||||
each shown passage registered for ``[n]`` citation along the way.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
|
||||
from app.agents.chat.multi_agent_chat.shared.document_render import (
|
||||
render_search_context,
|
||||
)
|
||||
|
||||
from .adapter import to_renderable_document
|
||||
from .hybrid_search import search_chunks
|
||||
from .models import DocumentHit, SearchScope
|
||||
from .reranking import rerank_hits
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.services.reranker_service import RerankerService
|
||||
|
||||
_DEFAULT_TOP_K = 10
|
||||
|
||||
|
||||
async def search_knowledge_base_context(
|
||||
db_session: AsyncSession,
|
||||
*,
|
||||
search_space_id: int,
|
||||
query: str,
|
||||
registry: CitationRegistry,
|
||||
scope: SearchScope | None = None,
|
||||
reranker: RerankerService | None = None,
|
||||
top_k: int = _DEFAULT_TOP_K,
|
||||
) -> str | None:
|
||||
"""Retrieve KB evidence for ``query`` and render it, registering each ``[n]``.
|
||||
|
||||
Returns ``None`` when nothing matched, so the caller can skip the block.
|
||||
"""
|
||||
hits = await search_chunks(
|
||||
db_session,
|
||||
search_space_id=search_space_id,
|
||||
query=query,
|
||||
scope=scope or SearchScope(),
|
||||
top_k=top_k,
|
||||
)
|
||||
return build_context(query, hits, registry, reranker=reranker)
|
||||
|
||||
|
||||
def build_context(
|
||||
query: str,
|
||||
hits: list[DocumentHit],
|
||||
registry: CitationRegistry,
|
||||
*,
|
||||
reranker: RerankerService | None = None,
|
||||
) -> str | None:
|
||||
"""Rerank → adapt → render. Pure given ``hits``, so it is unit-testable."""
|
||||
ranked = rerank_hits(query, hits, reranker)
|
||||
documents = [to_renderable_document(hit) for hit in ranked]
|
||||
return render_search_context(documents, registry)
|
||||
|
||||
|
||||
__all__ = ["build_context", "search_knowledge_base_context"]
|
||||
|
|
@ -13,9 +13,8 @@ extra fields needed to implement Postgres-backed virtual filesystem semantics:
|
|||
* ``dirty_paths`` — paths whose state file content differs from DB.
|
||||
* ``dirty_path_tool_calls`` — sidecar map ``path -> latest tool_call_id`` for
|
||||
dirty paths; used to bind the per-path snapshot to an action_id.
|
||||
* ``kb_priority`` — top-K priority hints rendered into a system message.
|
||||
* ``kb_matched_chunk_ids`` — internal hand-off for matched-chunk highlighting.
|
||||
* ``kb_anon_doc`` — Redis-loaded anonymous document (if any).
|
||||
* ``citation_registry`` — per-conversation ``[n]`` -> source map for citations.
|
||||
* ``tree_version`` — bumped by persistence; invalidates the tree render cache.
|
||||
* ``workspace_tree_text`` — pre-rendered ``<workspace_tree>`` body for the turn.
|
||||
|
||||
|
|
@ -30,9 +29,11 @@ from typing import Annotated, Any, NotRequired
|
|||
from deepagents.middleware.filesystem import FilesystemState
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
|
||||
from app.agents.chat.multi_agent_chat.shared.receipts.receipt import Receipt
|
||||
from app.agents.chat.multi_agent_chat.shared.state.reducers import (
|
||||
_add_unique_reducer,
|
||||
_citation_registry_merge_reducer,
|
||||
_dict_merge_with_tombstones_reducer,
|
||||
_int_counter_merge_reducer,
|
||||
_list_append_reducer,
|
||||
|
|
@ -67,14 +68,6 @@ class PendingDelete(TypedDict, total=False):
|
|||
tool_call_id: str
|
||||
|
||||
|
||||
class KbPriorityEntry(TypedDict, total=False):
|
||||
path: str
|
||||
score: float
|
||||
document_id: int | None
|
||||
title: str
|
||||
mentioned: bool
|
||||
|
||||
|
||||
class KbAnonDoc(TypedDict, total=False):
|
||||
"""In-memory anonymous-session document loaded from Redis."""
|
||||
|
||||
|
|
@ -159,15 +152,16 @@ class SurfSenseFilesystemState(FilesystemState):
|
|||
to the latest action_id (the one the user is most likely to revert).
|
||||
"""
|
||||
|
||||
kb_priority: NotRequired[Annotated[list[KbPriorityEntry], _replace_reducer]]
|
||||
"""Top-K priority hints rendered as a system message before the user turn."""
|
||||
|
||||
kb_matched_chunk_ids: NotRequired[Annotated[dict[int, list[int]], _replace_reducer]]
|
||||
"""Internal: ``Document.id`` -> list of matched chunk IDs from hybrid search."""
|
||||
|
||||
kb_anon_doc: NotRequired[Annotated[KbAnonDoc | None, _replace_reducer]]
|
||||
"""Anonymous-session document loaded from Redis (read-only, no DB row)."""
|
||||
|
||||
citation_registry: NotRequired[
|
||||
Annotated[CitationRegistry, _citation_registry_merge_reducer]
|
||||
]
|
||||
"""Per-conversation ``[n]`` -> source map; written by retrieval, read by the
|
||||
normalizer. Merges (union, find-or-create) so parallel/subagent registrations
|
||||
stay globally consistent instead of clobbering each other."""
|
||||
|
||||
tree_version: NotRequired[Annotated[int, _replace_reducer]]
|
||||
"""Monotonically increasing counter; bumped when commits change the KB tree."""
|
||||
|
||||
|
|
@ -206,7 +200,6 @@ class SurfSenseFilesystemState(FilesystemState):
|
|||
|
||||
__all__ = [
|
||||
"KbAnonDoc",
|
||||
"KbPriorityEntry",
|
||||
"PendingDelete",
|
||||
"PendingMove",
|
||||
"SurfSenseFilesystemState",
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
These reducers back the extra state fields used by the cloud-mode filesystem
|
||||
agent (`cwd`, `staged_dirs`, `pending_moves`, `dirty_paths`, `doc_id_by_path`,
|
||||
`kb_priority`, `kb_matched_chunk_ids`, `kb_anon_doc`, `tree_version`).
|
||||
`kb_anon_doc`, `tree_version`).
|
||||
|
||||
Tools mutate these fields ONLY via `Command(update={...})` returns; the
|
||||
reducers are responsible for merging successive updates atomically and for
|
||||
|
|
@ -20,6 +20,8 @@ from __future__ import annotations
|
|||
|
||||
from typing import Any, Final, TypeVar
|
||||
|
||||
from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
|
||||
|
||||
_CLEAR: Final[str] = "\x00__SURFSENSE_FILESYSTEM_CLEAR__\x00"
|
||||
"""Reset sentinel; pass it inside a list/dict update to request a reset.
|
||||
|
||||
|
|
@ -204,6 +206,41 @@ def _int_counter_merge_reducer(
|
|||
return base
|
||||
|
||||
|
||||
def _as_registry(value: Any) -> CitationRegistry | None:
|
||||
"""Coerce a state value into a ``CitationRegistry``.
|
||||
|
||||
The checkpointer serializes ``Command.update`` via ``ormsgpack`` *before*
|
||||
reducers run, so an update can arrive as a plain ``dict`` rather than a model.
|
||||
"""
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, CitationRegistry):
|
||||
return value
|
||||
if isinstance(value, dict):
|
||||
return CitationRegistry.model_validate(value)
|
||||
return None
|
||||
|
||||
|
||||
def _citation_registry_merge_reducer(
|
||||
left: Any,
|
||||
right: Any,
|
||||
) -> CitationRegistry | None:
|
||||
"""Union two citation registries instead of replacing.
|
||||
|
||||
Find-or-create across both sides so ``[n]`` stays globally consistent when
|
||||
branches (parent + subagents, parallel tool calls) each register into a
|
||||
registry forked from the same base. Collisions re-mint rather than drop. See
|
||||
:meth:`CitationRegistry.merge`.
|
||||
"""
|
||||
right_reg = _as_registry(right)
|
||||
left_reg = _as_registry(left)
|
||||
if right_reg is None:
|
||||
return left_reg
|
||||
if left_reg is None:
|
||||
return right_reg
|
||||
return left_reg.merge(right_reg)
|
||||
|
||||
|
||||
def _initial_filesystem_state() -> dict[str, Any]:
|
||||
"""Default empty values for SurfSense filesystem state fields.
|
||||
|
||||
|
|
@ -221,8 +258,6 @@ def _initial_filesystem_state() -> dict[str, Any]:
|
|||
"doc_id_by_path": {},
|
||||
"dirty_paths": [],
|
||||
"dirty_path_tool_calls": {},
|
||||
"kb_priority": [],
|
||||
"kb_matched_chunk_ids": {},
|
||||
"kb_anon_doc": None,
|
||||
"tree_version": 0,
|
||||
}
|
||||
|
|
@ -231,6 +266,7 @@ def _initial_filesystem_state() -> dict[str, Any]:
|
|||
__all__ = [
|
||||
"_CLEAR",
|
||||
"_add_unique_reducer",
|
||||
"_citation_registry_merge_reducer",
|
||||
"_dict_merge_with_tombstones_reducer",
|
||||
"_initial_filesystem_state",
|
||||
"_int_counter_merge_reducer",
|
||||
|
|
|
|||
|
|
@ -1,762 +0,0 @@
|
|||
"""
|
||||
Knowledge base search tool for the SurfSense agent.
|
||||
|
||||
This module provides:
|
||||
- Connector constants and normalization
|
||||
- Async knowledge base search across multiple connectors
|
||||
- Document formatting for LLM context
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.db import NATIVE_TO_LEGACY_DOCTYPE, shielded_async_session
|
||||
from app.services.connector_service import ConnectorService
|
||||
from app.utils.perf import get_perf_logger
|
||||
|
||||
# Connectors that call external live-search APIs. These are handled by the
|
||||
# ``web_search`` tool and must be excluded from knowledge-base searches.
|
||||
_LIVE_SEARCH_CONNECTORS: set[str] = {
|
||||
"TAVILY_API",
|
||||
"LINKUP_API",
|
||||
"BAIDU_SEARCH_API",
|
||||
}
|
||||
|
||||
# Patterns that indicate the query has no meaningful search signal.
|
||||
# plainto_tsquery('english', '*') produces an empty tsquery and an embedding
|
||||
# of '*' is random noise, so both keyword and semantic search degrade to
|
||||
# arbitrary ordering — large documents (many chunks) dominate by chance.
|
||||
_DEGENERATE_QUERY_RE = re.compile(
|
||||
r"^[\s*?_.#@!\-/\\]+$" # only wildcards, punctuation, whitespace
|
||||
)
|
||||
|
||||
# Max chunks per document when doing a recency-based browse instead of
|
||||
# a real search. We want breadth (many docs) over depth (many chunks).
|
||||
_BROWSE_MAX_CHUNKS_PER_DOC = 5
|
||||
|
||||
|
||||
def _is_degenerate_query(query: str) -> bool:
|
||||
"""Return True when the query carries no meaningful search signal.
|
||||
|
||||
Catches wildcard patterns (``*``, ``**``), empty / whitespace-only
|
||||
strings, and single-character non-word tokens. These queries cause
|
||||
both keyword search (empty tsquery) and semantic search (meaningless
|
||||
embedding) to return effectively random results.
|
||||
"""
|
||||
stripped = query.strip()
|
||||
if not stripped:
|
||||
return True
|
||||
return bool(_DEGENERATE_QUERY_RE.match(stripped))
|
||||
|
||||
|
||||
async def _browse_recent_documents(
|
||||
search_space_id: int,
|
||||
document_type: str | list[str] | None,
|
||||
top_k: int,
|
||||
start_date: datetime | None,
|
||||
end_date: datetime | None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Return the most-recent documents (recency-ordered, no search ranking).
|
||||
|
||||
Used as a fallback when the search query is degenerate (e.g. ``*``) and
|
||||
semantic / keyword search would produce arbitrary results. Returns
|
||||
document-grouped dicts in the same shape as ``_combined_rrf_search``
|
||||
so the rest of the pipeline works unchanged.
|
||||
"""
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import joinedload
|
||||
|
||||
from app.db import Chunk, Document, DocumentType
|
||||
|
||||
perf = get_perf_logger()
|
||||
t0 = time.perf_counter()
|
||||
|
||||
base_conditions = [Document.search_space_id == search_space_id]
|
||||
|
||||
if document_type is not None:
|
||||
type_list = (
|
||||
document_type if isinstance(document_type, list) else [document_type]
|
||||
)
|
||||
doc_type_enums = []
|
||||
for dt in type_list:
|
||||
if isinstance(dt, str):
|
||||
with contextlib.suppress(KeyError):
|
||||
doc_type_enums.append(DocumentType[dt])
|
||||
else:
|
||||
doc_type_enums.append(dt)
|
||||
if not doc_type_enums:
|
||||
return []
|
||||
if len(doc_type_enums) == 1:
|
||||
base_conditions.append(Document.document_type == doc_type_enums[0])
|
||||
else:
|
||||
base_conditions.append(Document.document_type.in_(doc_type_enums))
|
||||
|
||||
if start_date is not None:
|
||||
base_conditions.append(Document.updated_at >= start_date)
|
||||
if end_date is not None:
|
||||
base_conditions.append(Document.updated_at <= end_date)
|
||||
|
||||
async with shielded_async_session() as session:
|
||||
doc_query = (
|
||||
select(Document)
|
||||
.options(joinedload(Document.search_space))
|
||||
.where(*base_conditions)
|
||||
.order_by(Document.updated_at.desc())
|
||||
.limit(top_k)
|
||||
)
|
||||
result = await session.execute(doc_query)
|
||||
documents = result.scalars().unique().all()
|
||||
|
||||
if not documents:
|
||||
return []
|
||||
|
||||
doc_ids = [d.id for d in documents]
|
||||
|
||||
chunk_query = (
|
||||
select(Chunk)
|
||||
.where(Chunk.document_id.in_(doc_ids))
|
||||
.order_by(Chunk.document_id, Chunk.position, Chunk.id)
|
||||
)
|
||||
chunk_result = await session.execute(chunk_query)
|
||||
raw_chunks = chunk_result.scalars().all()
|
||||
|
||||
doc_chunk_counts: dict[int, int] = {}
|
||||
doc_chunks: dict[int, list[dict]] = {d.id: [] for d in documents}
|
||||
for chunk in raw_chunks:
|
||||
did = chunk.document_id
|
||||
count = doc_chunk_counts.get(did, 0)
|
||||
if count < _BROWSE_MAX_CHUNKS_PER_DOC:
|
||||
doc_chunks[did].append({"chunk_id": chunk.id, "content": chunk.content})
|
||||
doc_chunk_counts[did] = count + 1
|
||||
|
||||
results: list[dict[str, Any]] = []
|
||||
for doc in documents:
|
||||
chunks_list = doc_chunks.get(doc.id, [])
|
||||
results.append(
|
||||
{
|
||||
"document_id": doc.id,
|
||||
"content": "\n\n".join(
|
||||
c["content"] for c in chunks_list if c.get("content")
|
||||
),
|
||||
"score": 0.0,
|
||||
"chunks": chunks_list,
|
||||
"document": {
|
||||
"id": doc.id,
|
||||
"title": doc.title,
|
||||
"document_type": doc.document_type.value
|
||||
if getattr(doc, "document_type", None)
|
||||
else None,
|
||||
"metadata": doc.document_metadata or {},
|
||||
},
|
||||
"source": doc.document_type.value
|
||||
if getattr(doc, "document_type", None)
|
||||
else None,
|
||||
}
|
||||
)
|
||||
|
||||
perf.info(
|
||||
"[kb_browse] recency browse in %.3fs docs=%d space=%d type=%s",
|
||||
time.perf_counter() - t0,
|
||||
len(results),
|
||||
search_space_id,
|
||||
document_type,
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Connector Constants and Normalization
|
||||
# =============================================================================
|
||||
|
||||
# Canonical connector values used internally by ConnectorService
|
||||
# Includes all document types and search source connectors
|
||||
_ALL_CONNECTORS: list[str] = [
|
||||
"EXTENSION",
|
||||
"FILE",
|
||||
"SLACK_CONNECTOR",
|
||||
"TEAMS_CONNECTOR",
|
||||
"NOTION_CONNECTOR",
|
||||
"YOUTUBE_VIDEO",
|
||||
"GITHUB_CONNECTOR",
|
||||
"ELASTICSEARCH_CONNECTOR",
|
||||
"LINEAR_CONNECTOR",
|
||||
"JIRA_CONNECTOR",
|
||||
"CONFLUENCE_CONNECTOR",
|
||||
"CLICKUP_CONNECTOR",
|
||||
"GOOGLE_CALENDAR_CONNECTOR",
|
||||
"GOOGLE_GMAIL_CONNECTOR",
|
||||
"GOOGLE_DRIVE_FILE",
|
||||
"DISCORD_CONNECTOR",
|
||||
"AIRTABLE_CONNECTOR",
|
||||
"LUMA_CONNECTOR",
|
||||
"NOTE",
|
||||
"BOOKSTACK_CONNECTOR",
|
||||
"CRAWLED_URL",
|
||||
"CIRCLEBACK",
|
||||
"OBSIDIAN_CONNECTOR",
|
||||
"ONEDRIVE_FILE",
|
||||
"DROPBOX_FILE",
|
||||
]
|
||||
|
||||
# Human-readable descriptions for each connector type
|
||||
# Used for generating dynamic docstrings and informing the LLM
|
||||
CONNECTOR_DESCRIPTIONS: dict[str, str] = {
|
||||
"EXTENSION": "Web content saved via SurfSense browser extension (personal browsing history)",
|
||||
"FILE": "User-uploaded documents (PDFs, Word, etc.) (personal files)",
|
||||
"NOTE": "SurfSense Notes (notes created inside SurfSense)",
|
||||
"SLACK_CONNECTOR": "Slack conversations and shared content (personal workspace communications)",
|
||||
"TEAMS_CONNECTOR": "Microsoft Teams messages and conversations (personal Teams communications)",
|
||||
"NOTION_CONNECTOR": "Notion workspace pages and databases (personal knowledge management)",
|
||||
"YOUTUBE_VIDEO": "YouTube video transcripts and metadata (personally saved videos)",
|
||||
"GITHUB_CONNECTOR": "GitHub repository content and issues (personal repositories and interactions)",
|
||||
"ELASTICSEARCH_CONNECTOR": "Elasticsearch indexed documents and data (personal Elasticsearch instances)",
|
||||
"LINEAR_CONNECTOR": "Linear project issues and discussions (personal project management)",
|
||||
"JIRA_CONNECTOR": "Jira project issues, tickets, and comments (personal project tracking)",
|
||||
"CONFLUENCE_CONNECTOR": "Confluence pages and comments (personal project documentation)",
|
||||
"CLICKUP_CONNECTOR": "ClickUp tasks and project data (personal task management)",
|
||||
"GOOGLE_CALENDAR_CONNECTOR": "Google Calendar events, meetings, and schedules (personal calendar)",
|
||||
"GOOGLE_GMAIL_CONNECTOR": "Google Gmail emails and conversations (personal emails)",
|
||||
"GOOGLE_DRIVE_FILE": "Google Drive files and documents (personal cloud storage)",
|
||||
"DISCORD_CONNECTOR": "Discord server conversations and shared content (personal community)",
|
||||
"AIRTABLE_CONNECTOR": "Airtable records, tables, and database content (personal data)",
|
||||
"LUMA_CONNECTOR": "Luma events and meetings",
|
||||
"WEBCRAWLER_CONNECTOR": "Webpages indexed by SurfSense (personally selected websites)",
|
||||
"CRAWLED_URL": "Webpages indexed by SurfSense (personally selected websites)",
|
||||
"BOOKSTACK_CONNECTOR": "BookStack pages (personal documentation)",
|
||||
"CIRCLEBACK": "Circleback meeting notes, transcripts, and action items",
|
||||
"OBSIDIAN_CONNECTOR": "Obsidian vault notes and markdown files (personal notes)",
|
||||
"ONEDRIVE_FILE": "Microsoft OneDrive files and documents (personal cloud storage)",
|
||||
"DROPBOX_FILE": "Dropbox files and documents (cloud storage)",
|
||||
}
|
||||
|
||||
|
||||
def _normalize_connectors(
|
||||
connectors_to_search: list[str] | None,
|
||||
available_connectors: list[str] | None = None,
|
||||
) -> list[str]:
|
||||
"""Normalize model-supplied connectors to canonical ConnectorService types.
|
||||
|
||||
Maps user-facing aliases (e.g. WEBCRAWLER_CONNECTOR), drops unknowns, and
|
||||
constrains to ``available_connectors`` when given. Empty input defaults to
|
||||
all available connectors (minus live-search ones).
|
||||
"""
|
||||
valid_set = (
|
||||
set(available_connectors) if available_connectors else set(_ALL_CONNECTORS)
|
||||
)
|
||||
valid_set -= _LIVE_SEARCH_CONNECTORS
|
||||
|
||||
if not connectors_to_search:
|
||||
base = (
|
||||
list(available_connectors)
|
||||
if available_connectors
|
||||
else list(_ALL_CONNECTORS)
|
||||
)
|
||||
return [c for c in base if c not in _LIVE_SEARCH_CONNECTORS]
|
||||
|
||||
normalized: list[str] = []
|
||||
for raw in connectors_to_search:
|
||||
c = (raw or "").strip().upper()
|
||||
if not c:
|
||||
continue
|
||||
if c == "WEBCRAWLER_CONNECTOR":
|
||||
c = "CRAWLED_URL"
|
||||
normalized.append(c)
|
||||
|
||||
# De-dupe (order-preserving), keeping only known + available connectors.
|
||||
seen: set[str] = set()
|
||||
out: list[str] = []
|
||||
for c in normalized:
|
||||
if c in seen:
|
||||
continue
|
||||
if c not in _ALL_CONNECTORS:
|
||||
continue
|
||||
if c not in valid_set:
|
||||
continue
|
||||
seen.add(c)
|
||||
out.append(c)
|
||||
|
||||
# Nothing matched: fall back to all available.
|
||||
if not out:
|
||||
base = (
|
||||
list(available_connectors)
|
||||
if available_connectors
|
||||
else list(_ALL_CONNECTORS)
|
||||
)
|
||||
return [c for c in base if c not in _LIVE_SEARCH_CONNECTORS]
|
||||
return out
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Document Formatting
|
||||
# =============================================================================
|
||||
|
||||
|
||||
# Fraction of the model's context window (in characters) that a single tool
|
||||
# result is allowed to occupy. The remainder is reserved for system prompt,
|
||||
# conversation history, and model output. With ~4 chars/token this gives a
|
||||
# tool result ≈ 25 % of the context budget in tokens.
|
||||
_TOOL_OUTPUT_CONTEXT_FRACTION = 0.25
|
||||
_CHARS_PER_TOKEN = 4
|
||||
|
||||
# Hard-floor / ceiling so the budget is always sensible regardless of what
|
||||
# the model reports.
|
||||
_MIN_TOOL_OUTPUT_CHARS = 20_000 # ~5K tokens
|
||||
_MAX_TOOL_OUTPUT_CHARS = 200_000 # ~50K tokens
|
||||
_MAX_CHUNK_CHARS = 8_000
|
||||
|
||||
# Rank-adaptive per-document budget allocation.
|
||||
# Top-ranked (most relevant) documents get a larger share of the budget so
|
||||
# we pack as much high-quality context as possible.
|
||||
#
|
||||
# fraction(rank) = _TOP_DOC_BUDGET_FRACTION / (1 + rank * _RANK_DECAY)
|
||||
#
|
||||
# Examples (128K budget, 8K chunk cap):
|
||||
# rank 0 → 40% → 6 chunks | rank 3 → 19% → 3 chunks
|
||||
# rank 1 → 30% → 4 chunks | rank 10 → 10% → 3 chunks (floor)
|
||||
# rank 2 → 24% → 3 chunks |
|
||||
_TOP_DOC_BUDGET_FRACTION = 0.40
|
||||
_RANK_DECAY = 0.35
|
||||
_MIN_CHUNKS_PER_DOC = 3
|
||||
|
||||
|
||||
def _compute_tool_output_budget(max_input_tokens: int | None) -> int:
|
||||
"""Derive a character budget from the model's context window.
|
||||
|
||||
Uses ``litellm.get_model_info`` via the value already resolved by
|
||||
``ChatLiteLLMRouter`` / ``ChatLiteLLM`` and passed through the dependency
|
||||
chain as ``max_input_tokens``. Falls back to a conservative default when
|
||||
the value is unavailable.
|
||||
"""
|
||||
if max_input_tokens is None or max_input_tokens <= 0:
|
||||
return _MIN_TOOL_OUTPUT_CHARS # conservative fallback
|
||||
|
||||
budget = int(max_input_tokens * _CHARS_PER_TOKEN * _TOOL_OUTPUT_CONTEXT_FRACTION)
|
||||
return max(_MIN_TOOL_OUTPUT_CHARS, min(budget, _MAX_TOOL_OUTPUT_CHARS))
|
||||
|
||||
|
||||
_INTERNAL_METADATA_KEYS: frozenset[str] = frozenset(
|
||||
{
|
||||
"message_id",
|
||||
"thread_id",
|
||||
"event_id",
|
||||
"calendar_id",
|
||||
"google_drive_file_id",
|
||||
"onedrive_file_id",
|
||||
"dropbox_file_id",
|
||||
"page_id",
|
||||
"issue_id",
|
||||
"connector_id",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def format_documents_for_context(
|
||||
documents: list[dict[str, Any]],
|
||||
*,
|
||||
max_chars: int = _MAX_TOOL_OUTPUT_CHARS,
|
||||
max_chunk_chars: int = _MAX_CHUNK_CHARS,
|
||||
max_chunks_per_doc: int = 0,
|
||||
) -> str:
|
||||
"""Format retrieved documents into an XML context string for the LLM.
|
||||
|
||||
Documents are emitted highest-relevance first until ``max_chars`` is hit.
|
||||
``max_chunks_per_doc=0`` auto-computes a rank-adaptive cap so top results get
|
||||
more chunks and no single large document monopolizes the budget.
|
||||
"""
|
||||
if not documents:
|
||||
return ""
|
||||
|
||||
# Group chunks by document id, preserving chunk_id so [citation:123] works.
|
||||
# ConnectorService returns document-grouped results ({document, chunks, source}).
|
||||
grouped: dict[str, dict[str, Any]] = {}
|
||||
|
||||
for doc in documents:
|
||||
document_info = (doc.get("document") or {}) if isinstance(doc, dict) else {}
|
||||
metadata = (
|
||||
(document_info.get("metadata") or {})
|
||||
if isinstance(document_info, dict)
|
||||
else {}
|
||||
)
|
||||
if not metadata and isinstance(doc, dict):
|
||||
# Some result shapes may place metadata at the top level.
|
||||
metadata = doc.get("metadata") or {}
|
||||
|
||||
source = (
|
||||
(doc.get("source") if isinstance(doc, dict) else None)
|
||||
or document_info.get("document_type")
|
||||
or metadata.get("document_type")
|
||||
or "UNKNOWN"
|
||||
)
|
||||
|
||||
# Identity: prefer document_id, else type+title+url.
|
||||
document_id_val = document_info.get("id")
|
||||
title = (
|
||||
document_info.get("title") or metadata.get("title") or "Untitled Document"
|
||||
)
|
||||
url = (
|
||||
metadata.get("url")
|
||||
or metadata.get("source")
|
||||
or metadata.get("page_url")
|
||||
or ""
|
||||
)
|
||||
|
||||
doc_key = (
|
||||
str(document_id_val)
|
||||
if document_id_val is not None
|
||||
else f"{source}::{title}::{url}"
|
||||
)
|
||||
|
||||
if doc_key not in grouped:
|
||||
grouped[doc_key] = {
|
||||
"document_id": document_id_val
|
||||
if document_id_val is not None
|
||||
else doc_key,
|
||||
"document_type": metadata.get("document_type") or source,
|
||||
"title": title,
|
||||
"url": url,
|
||||
"metadata": metadata,
|
||||
"chunks": [],
|
||||
}
|
||||
|
||||
# Prefer document-grouped chunks when present.
|
||||
chunks_list = doc.get("chunks") if isinstance(doc, dict) else None
|
||||
if isinstance(chunks_list, list) and chunks_list:
|
||||
for ch in chunks_list:
|
||||
if not isinstance(ch, dict):
|
||||
continue
|
||||
chunk_id = ch.get("chunk_id") or ch.get("id")
|
||||
content = (ch.get("content") or "").strip()
|
||||
if not content:
|
||||
continue
|
||||
grouped[doc_key]["chunks"].append(
|
||||
{"chunk_id": chunk_id, "content": content}
|
||||
)
|
||||
continue
|
||||
|
||||
# Fallback: treat this as a flat chunk-like object
|
||||
if not isinstance(doc, dict):
|
||||
continue
|
||||
chunk_id = doc.get("chunk_id") or doc.get("id")
|
||||
content = (doc.get("content") or "").strip()
|
||||
if not content:
|
||||
continue
|
||||
grouped[doc_key]["chunks"].append({"chunk_id": chunk_id, "content": content})
|
||||
|
||||
# Live search connectors whose results should be cited by URL rather than
|
||||
# a numeric chunk_id (the numeric IDs are meaningless auto-incremented counters).
|
||||
live_search_connectors = {
|
||||
"TAVILY_API",
|
||||
"LINKUP_API",
|
||||
"BAIDU_SEARCH_API",
|
||||
}
|
||||
|
||||
parts: list[str] = []
|
||||
total_chars = 0
|
||||
total_docs = len(grouped)
|
||||
|
||||
for doc_idx, g in enumerate(grouped.values()):
|
||||
metadata_clean = {
|
||||
k: v for k, v in g["metadata"].items() if k not in _INTERNAL_METADATA_KEYS
|
||||
}
|
||||
metadata_json = json.dumps(metadata_clean, ensure_ascii=False)
|
||||
is_live_search = g["document_type"] in live_search_connectors
|
||||
|
||||
doc_lines: list[str] = [
|
||||
"<document>",
|
||||
"<document_metadata>",
|
||||
f" <document_id>{g['document_id']}</document_id>",
|
||||
f" <document_type>{g['document_type']}</document_type>",
|
||||
f" <title><![CDATA[{g['title']}]]></title>",
|
||||
f" <url><![CDATA[{g['url']}]]></url>",
|
||||
f" <metadata_json><![CDATA[{metadata_json}]]></metadata_json>",
|
||||
"</document_metadata>",
|
||||
"",
|
||||
"<document_content>",
|
||||
]
|
||||
|
||||
# Rank-adaptive per-document chunk cap: top results get more chunks.
|
||||
if max_chunks_per_doc > 0:
|
||||
chunks_allowed = max_chunks_per_doc
|
||||
else:
|
||||
doc_fraction = _TOP_DOC_BUDGET_FRACTION / (1 + doc_idx * _RANK_DECAY)
|
||||
max_doc_chars = int(max_chars * doc_fraction)
|
||||
xml_overhead = 500
|
||||
chunks_allowed = max(
|
||||
(max_doc_chars - xml_overhead) // max(max_chunk_chars, 1),
|
||||
_MIN_CHUNKS_PER_DOC,
|
||||
)
|
||||
|
||||
chunks = g["chunks"]
|
||||
if len(chunks) > chunks_allowed:
|
||||
chunks = chunks[:chunks_allowed]
|
||||
|
||||
for ch in chunks:
|
||||
ch_content = ch["content"]
|
||||
if max_chunk_chars and len(ch_content) > max_chunk_chars:
|
||||
ch_content = ch_content[:max_chunk_chars] + "\n...(truncated)"
|
||||
ch_id = g["url"] if (is_live_search and g["url"]) else ch["chunk_id"]
|
||||
if ch_id is None:
|
||||
doc_lines.append(f" <chunk><![CDATA[{ch_content}]]></chunk>")
|
||||
else:
|
||||
doc_lines.append(
|
||||
f" <chunk id='{ch_id}'><![CDATA[{ch_content}]]></chunk>"
|
||||
)
|
||||
|
||||
doc_lines.extend(["</document_content>", "</document>", ""])
|
||||
|
||||
doc_xml = "\n".join(doc_lines)
|
||||
doc_len = len(doc_xml)
|
||||
|
||||
if total_chars + doc_len > max_chars:
|
||||
remaining = total_docs - doc_idx
|
||||
if doc_idx == 0:
|
||||
parts.append(doc_xml)
|
||||
total_chars += doc_len
|
||||
parts.append(
|
||||
f"<!-- Output truncated: {remaining} more document(s) omitted "
|
||||
f"(budget {max_chars} chars). Refine your query or reduce top_k "
|
||||
f"to retrieve different results. -->"
|
||||
)
|
||||
break
|
||||
|
||||
parts.append(doc_xml)
|
||||
total_chars += doc_len
|
||||
|
||||
result = "\n".join(parts).strip()
|
||||
|
||||
# Hard safety net: if the result is still over budget (e.g. a single massive
|
||||
# first document), forcibly truncate with a closing comment.
|
||||
if len(result) > max_chars:
|
||||
truncation_msg = "\n<!-- ...output forcibly truncated to fit context window -->"
|
||||
result = result[: max_chars - len(truncation_msg)] + truncation_msg
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Knowledge Base Search
|
||||
# =============================================================================
|
||||
|
||||
|
||||
async def search_knowledge_base_async(
|
||||
query: str,
|
||||
search_space_id: int,
|
||||
db_session: AsyncSession,
|
||||
connector_service: ConnectorService,
|
||||
connectors_to_search: list[str] | None = None,
|
||||
top_k: int = 10,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
available_connectors: list[str] | None = None,
|
||||
available_document_types: list[str] | None = None,
|
||||
max_input_tokens: int | None = None,
|
||||
) -> str:
|
||||
"""Search the knowledge base across connectors and return formatted results.
|
||||
|
||||
``available_document_types`` lets local connectors with no indexed data be
|
||||
skipped (no embedding / DB round-trip), and ``max_input_tokens`` sizes the
|
||||
output to the model's context window.
|
||||
"""
|
||||
perf = get_perf_logger()
|
||||
t0 = time.perf_counter()
|
||||
|
||||
deduplicated = await search_knowledge_base_raw_async(
|
||||
query=query,
|
||||
search_space_id=search_space_id,
|
||||
db_session=db_session,
|
||||
connector_service=connector_service,
|
||||
connectors_to_search=connectors_to_search,
|
||||
top_k=top_k,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
available_connectors=available_connectors,
|
||||
available_document_types=available_document_types,
|
||||
)
|
||||
|
||||
if not deduplicated:
|
||||
return "No documents found in the knowledge base. The search space has no indexed content yet."
|
||||
|
||||
# Use browse chunk cap for degenerate queries, otherwise adaptive chunking.
|
||||
max_chunks_per_doc = (
|
||||
_BROWSE_MAX_CHUNKS_PER_DOC if _is_degenerate_query(query) else 0
|
||||
)
|
||||
output_budget = _compute_tool_output_budget(max_input_tokens)
|
||||
result = format_documents_for_context(
|
||||
deduplicated,
|
||||
max_chars=output_budget,
|
||||
max_chunks_per_doc=max_chunks_per_doc,
|
||||
)
|
||||
|
||||
if len(result) > output_budget:
|
||||
perf.warning(
|
||||
"[kb_search] output STILL exceeds budget after format (%d > %d), "
|
||||
"hard truncation should have fired",
|
||||
len(result),
|
||||
output_budget,
|
||||
)
|
||||
|
||||
perf.info(
|
||||
"[kb_search] TOTAL in %.3fs total_docs=%d deduped=%d output_chars=%d "
|
||||
"budget=%d max_input_tokens=%s space=%d",
|
||||
time.perf_counter() - t0,
|
||||
len(deduplicated),
|
||||
len(deduplicated),
|
||||
len(result),
|
||||
output_budget,
|
||||
max_input_tokens,
|
||||
search_space_id,
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
async def search_knowledge_base_raw_async(
|
||||
query: str,
|
||||
search_space_id: int,
|
||||
db_session: AsyncSession,
|
||||
connector_service: ConnectorService,
|
||||
connectors_to_search: list[str] | None = None,
|
||||
top_k: int = 10,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
available_connectors: list[str] | None = None,
|
||||
available_document_types: list[str] | None = None,
|
||||
query_embedding: list[float] | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Search knowledge base and return raw document dicts (no XML formatting)."""
|
||||
perf = get_perf_logger()
|
||||
t0 = time.perf_counter()
|
||||
all_documents: list[dict[str, Any]] = []
|
||||
|
||||
# Preserve the public signature for compatibility even if values are unused.
|
||||
_ = (db_session, connector_service)
|
||||
|
||||
from app.agents.chat.multi_agent_chat.shared.date_filters import resolve_date_range
|
||||
|
||||
resolved_start_date, resolved_end_date = resolve_date_range(
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
||||
connectors = _normalize_connectors(connectors_to_search, available_connectors)
|
||||
|
||||
if available_document_types:
|
||||
doc_types_set = set(available_document_types)
|
||||
connectors = [
|
||||
c
|
||||
for c in connectors
|
||||
if c in doc_types_set
|
||||
or NATIVE_TO_LEGACY_DOCTYPE.get(c, "") in doc_types_set
|
||||
]
|
||||
|
||||
if not connectors:
|
||||
return []
|
||||
|
||||
if _is_degenerate_query(query):
|
||||
perf.info(
|
||||
"[kb_search_raw] degenerate query %r detected - recency browse",
|
||||
query,
|
||||
)
|
||||
browse_connectors = connectors if connectors else [None] # type: ignore[list-item]
|
||||
expanded_browse = []
|
||||
for connector in browse_connectors:
|
||||
if connector is not None and connector in NATIVE_TO_LEGACY_DOCTYPE:
|
||||
expanded_browse.append([connector, NATIVE_TO_LEGACY_DOCTYPE[connector]])
|
||||
else:
|
||||
expanded_browse.append(connector)
|
||||
browse_results = await asyncio.gather(
|
||||
*[
|
||||
_browse_recent_documents(
|
||||
search_space_id=search_space_id,
|
||||
document_type=connector,
|
||||
top_k=top_k,
|
||||
start_date=resolved_start_date,
|
||||
end_date=resolved_end_date,
|
||||
)
|
||||
for connector in expanded_browse
|
||||
]
|
||||
)
|
||||
for docs in browse_results:
|
||||
all_documents.extend(docs)
|
||||
else:
|
||||
if query_embedding is None:
|
||||
from app.config import config as app_config
|
||||
|
||||
query_embedding = app_config.embedding_model_instance.embed(query)
|
||||
|
||||
max_parallel_searches = 4
|
||||
semaphore = asyncio.Semaphore(max_parallel_searches)
|
||||
|
||||
async def _search_one_connector(connector: str) -> list[dict[str, Any]]:
|
||||
try:
|
||||
async with semaphore, shielded_async_session() as isolated_session:
|
||||
svc = ConnectorService(isolated_session, search_space_id)
|
||||
return await svc._combined_rrf_search(
|
||||
query_text=query,
|
||||
search_space_id=search_space_id,
|
||||
document_type=connector,
|
||||
top_k=top_k,
|
||||
start_date=resolved_start_date,
|
||||
end_date=resolved_end_date,
|
||||
query_embedding=query_embedding,
|
||||
)
|
||||
except Exception as exc:
|
||||
perf.warning("[kb_search_raw] connector=%s FAILED: %s", connector, exc)
|
||||
return []
|
||||
|
||||
connector_results = await asyncio.gather(
|
||||
*[_search_one_connector(connector) for connector in connectors]
|
||||
)
|
||||
for docs in connector_results:
|
||||
all_documents.extend(docs)
|
||||
|
||||
seen_doc_ids: set[Any] = set()
|
||||
seen_content_hashes: set[int] = set()
|
||||
deduplicated: list[dict[str, Any]] = []
|
||||
|
||||
def _content_fingerprint(document: dict[str, Any]) -> int | None:
|
||||
chunks = document.get("chunks")
|
||||
if isinstance(chunks, list):
|
||||
chunk_texts = []
|
||||
for chunk in chunks:
|
||||
if not isinstance(chunk, dict):
|
||||
continue
|
||||
chunk_content = (chunk.get("content") or "").strip()
|
||||
if chunk_content:
|
||||
chunk_texts.append(chunk_content)
|
||||
if chunk_texts:
|
||||
return hash("||".join(chunk_texts))
|
||||
flat_content = (document.get("content") or "").strip()
|
||||
if flat_content:
|
||||
return hash(flat_content)
|
||||
return None
|
||||
|
||||
for doc in all_documents:
|
||||
doc_id = (doc.get("document", {}) or {}).get("id")
|
||||
if doc_id is not None:
|
||||
if doc_id in seen_doc_ids:
|
||||
continue
|
||||
seen_doc_ids.add(doc_id)
|
||||
deduplicated.append(doc)
|
||||
continue
|
||||
content_hash = _content_fingerprint(doc)
|
||||
if content_hash is not None and content_hash in seen_content_hashes:
|
||||
continue
|
||||
if content_hash is not None:
|
||||
seen_content_hashes.add(content_hash)
|
||||
deduplicated.append(doc)
|
||||
|
||||
deduplicated.sort(key=lambda doc: doc.get("score", 0), reverse=True)
|
||||
perf.info(
|
||||
"[kb_search_raw] done in %.3fs total=%d deduped=%d",
|
||||
time.perf_counter() - t0,
|
||||
len(all_documents),
|
||||
len(deduplicated),
|
||||
)
|
||||
return deduplicated
|
||||
|
|
@ -23,6 +23,45 @@ from app.services.llm_service import get_agent_llm
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _report_search_types(
|
||||
available_connectors: list[str] | None,
|
||||
available_document_types: list[str] | None,
|
||||
) -> tuple[str, ...] | None:
|
||||
"""Build the document-type scope for the shared KB search.
|
||||
|
||||
``None`` means "search every indexed type"; a tuple narrows the scope to the
|
||||
connectors/document types the search space actually has.
|
||||
"""
|
||||
types: set[str] = set()
|
||||
if available_document_types:
|
||||
types.update(available_document_types)
|
||||
if available_connectors:
|
||||
types.update(available_connectors)
|
||||
return tuple(sorted(types)) or None
|
||||
|
||||
|
||||
def _render_kb_hits_for_report(hits: list[Any]) -> str:
|
||||
"""Render KB hits as plain titled source text for the report writer.
|
||||
|
||||
Citations are intentionally omitted from reports for now, so no ``[n]``
|
||||
labels or chunk ids are emitted — just titled document content for grounding.
|
||||
"""
|
||||
from app.agents.chat.multi_agent_chat.shared.document_render import source_label
|
||||
|
||||
blocks: list[str] = []
|
||||
for hit in hits:
|
||||
label = source_label(hit.document_type, hit.metadata)
|
||||
header = f"{hit.title} ({label})" if label else hit.title
|
||||
body = "\n\n".join(
|
||||
chunk.content.strip() for chunk in hit.chunks if chunk.content.strip()
|
||||
)
|
||||
if not body:
|
||||
continue
|
||||
blocks.append(f"## {header}\n\n{body}")
|
||||
return "\n\n".join(blocks)
|
||||
|
||||
|
||||
# ─── Shared Formatting Rules ────────────────────────────────────────────────
|
||||
# Reusable formatting instructions appended to section-level and review prompts.
|
||||
|
||||
|
|
@ -788,31 +827,46 @@ def create_generate_report_tool(
|
|||
f"{query_count} queries: {search_queries[:5]}"
|
||||
)
|
||||
try:
|
||||
from .knowledge_base import search_knowledge_base_async
|
||||
from app.agents.chat.multi_agent_chat.shared.retrieval.hybrid_search import (
|
||||
search_chunks,
|
||||
)
|
||||
from app.agents.chat.multi_agent_chat.shared.retrieval.models import (
|
||||
DocumentHit,
|
||||
SearchScope,
|
||||
)
|
||||
|
||||
scope = SearchScope(
|
||||
document_types=_report_search_types(
|
||||
available_connectors, available_document_types
|
||||
)
|
||||
)
|
||||
|
||||
# Each query gets its own short-lived session.
|
||||
async def _run_single_query(q: str) -> str:
|
||||
async def _run_single_query(q: str) -> list[DocumentHit]:
|
||||
async with shielded_async_session() as kb_session:
|
||||
kb_connector_svc = ConnectorService(
|
||||
kb_session, search_space_id
|
||||
)
|
||||
return await search_knowledge_base_async(
|
||||
query=q,
|
||||
return await search_chunks(
|
||||
kb_session,
|
||||
search_space_id=search_space_id,
|
||||
db_session=kb_session,
|
||||
connector_service=kb_connector_svc,
|
||||
query=q,
|
||||
scope=scope,
|
||||
top_k=10,
|
||||
available_connectors=available_connectors,
|
||||
available_document_types=available_document_types,
|
||||
)
|
||||
|
||||
kb_results = await asyncio.gather(
|
||||
hits_per_query = await asyncio.gather(
|
||||
*[_run_single_query(q) for q in search_queries[:5]]
|
||||
)
|
||||
|
||||
kb_text_parts = [r for r in kb_results if r and r.strip()]
|
||||
if kb_text_parts:
|
||||
kb_combined = "\n\n---\n\n".join(kb_text_parts)
|
||||
seen_doc_ids: set[int] = set()
|
||||
merged_hits: list[DocumentHit] = []
|
||||
for hits in hits_per_query:
|
||||
for hit in hits:
|
||||
if hit.document_id in seen_doc_ids:
|
||||
continue
|
||||
seen_doc_ids.add(hit.document_id)
|
||||
merged_hits.append(hit)
|
||||
|
||||
kb_combined = _render_kb_hits_for_report(merged_hits)
|
||||
if kb_combined.strip():
|
||||
if effective_source.strip():
|
||||
effective_source = (
|
||||
effective_source
|
||||
|
|
@ -822,20 +876,17 @@ def create_generate_report_tool(
|
|||
else:
|
||||
effective_source = kb_combined
|
||||
|
||||
# Count docs found (rough: count <document> tags)
|
||||
doc_count = kb_combined.count("<document>")
|
||||
doc_count = len(merged_hits)
|
||||
dispatch_custom_event(
|
||||
"report_progress",
|
||||
{
|
||||
"phase": "kb_search_done",
|
||||
"message": f"Found {doc_count} relevant documents"
|
||||
if doc_count
|
||||
else f"Found results from {len(kb_text_parts)} queries",
|
||||
"message": f"Found {doc_count} relevant documents",
|
||||
},
|
||||
)
|
||||
logger.info(
|
||||
f"[generate_report] KB search added ~{len(kb_combined)} chars "
|
||||
f"from {len(kb_text_parts)} queries"
|
||||
f"from {doc_count} documents"
|
||||
)
|
||||
else:
|
||||
dispatch_custom_event(
|
||||
|
|
|
|||
|
|
@ -2,4 +2,4 @@ Read-only specialist for the user's workspace (documents and folders). Use to fi
|
|||
|
||||
Pass your full question as one string. The specialist runs in isolation: it cannot see this thread, so include any path hints, filters, or constraints it needs.
|
||||
|
||||
The specialist returns plain prose with absolute paths and `[citation:<chunk_id>]` markers when claims came from KB-indexed chunks. Preserve those markers verbatim if you forward the answer.
|
||||
The specialist returns plain prose with absolute paths and `[n]` citation labels when claims came from KB-indexed documents. Preserve those labels verbatim if you forward the answer.
|
||||
|
|
|
|||
|
|
@ -6,10 +6,9 @@ You are the SurfSense knowledge base specialist for the user's `/documents/` wor
|
|||
|
||||
- If the supervisor already provided a precise path (e.g. `/documents/notes/2026-05-11.md`), use it directly — skip the lookup steps below.
|
||||
- Otherwise, most requests reference documents by description (`"my meeting notes from last week"`, `"the design doc"`). Resolve them yourself:
|
||||
1. Consult `<priority_documents>` — it's a hint about top-K likely matches, not a directive. Skip when the ranked entries don't fit the task.
|
||||
2. Walk `<workspace_tree>` for descriptive folder/filename matches.
|
||||
3. Use the `glob` tool for filename patterns the tree didn't surface, and the `grep` tool when the description points at *content* rather than a name.
|
||||
4. Only return `status=blocked` with `missing_fields=["path"]` when the description is genuinely ambiguous after a thorough lookup.
|
||||
1. Walk `<workspace_tree>` for descriptive folder/filename matches.
|
||||
2. Use the `glob` tool for filename patterns the tree didn't surface, and the `grep` tool when the description points at *content* rather than a name.
|
||||
3. Only return `status=blocked` with `missing_fields=["path"]` when the description is genuinely ambiguous after a thorough lookup.
|
||||
|
||||
For writes (where you choose the path yourself):
|
||||
|
||||
|
|
@ -35,42 +34,31 @@ Map outcomes to your `status`:
|
|||
|
||||
You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see.
|
||||
|
||||
## Chunk citations in your prose
|
||||
## Citations in your prose
|
||||
|
||||
When `read_file` returns a KB-indexed document under `/documents/`, the response includes `<chunk id='…'>` blocks. Whenever a fact in your `action_summary` or `evidence.content_excerpt` came from a specific chunk, append `[citation:<chunk_id>]` to the sentence stating that fact, using the **exact** id from the `<chunk id='…'>` tag. The caller relays these markers to the end user verbatim, and the UI resolves each id by exact match against the database, so a wrong id silently breaks the citation.
|
||||
When `read_file` returns a KB-indexed document under `/documents/`, it comes back as a `<document … view="full">` block whose passages are each prefixed with a bracketed label — `[1]`, `[2]`, `[3]`. That `[n]` is the citation label. Whenever a fact in your `action_summary` or `evidence.content_excerpt` came from a specific passage, append its `[n]` to the sentence stating that fact, copying the label **exactly** as shown. The caller relays these labels verbatim and the server resolves each one, so a wrong number silently breaks the citation.
|
||||
|
||||
### Where chunk ids live in `read_file` output
|
||||
### Where the labels live in `read_file` output
|
||||
|
||||
A KB document's XML has three numeric attributes — only **one** is a citation source:
|
||||
A KB document reads back like this — only the bracketed `[n]` is a citation label:
|
||||
|
||||
```
|
||||
<document>
|
||||
<document_metadata>
|
||||
<document_id>42</document_id> ← NOT a citation. Parent doc id; ignore for citations.
|
||||
...
|
||||
</document_metadata>
|
||||
<chunk_index>
|
||||
<entry chunk_id="128" lines="14-22"/> ← Index hint; the same id also appears below.
|
||||
<entry chunk_id="129" lines="23-30" matched="true"/>
|
||||
</chunk_index>
|
||||
<document_content>
|
||||
<chunk id='128'><![CDATA[…]]></chunk> ← This is the citation source.
|
||||
<chunk id='129'><![CDATA[…]]></chunk>
|
||||
</document_content>
|
||||
<document title="Q2 Roadmap" source="File" view="full">
|
||||
[3] First milestone is …
|
||||
[4] Second milestone is …
|
||||
</document>
|
||||
```
|
||||
|
||||
### Rules
|
||||
|
||||
- Use the **exact** id from a `<chunk id='…'>` tag whose content you actually quoted or paraphrased. Copy digit-for-digit; do **not** retype from memory.
|
||||
- Before emitting `[citation:N]`, confirm the literal substring `<chunk id='N'>` (or its index twin `chunk_id="N"`) appears in the tool result you are summarising this turn. If you can't see it, omit the citation.
|
||||
- Never cite `<document_id>` — that's the parent doc, not a chunk.
|
||||
- Never invent, normalise, shorten, or guess at adjacent ids. If unsure between two candidates, omit rather than pick.
|
||||
- Use the **exact** `[n]` shown next to the passage you actually quoted or paraphrased. Copy it digit-for-digit; do **not** retype from memory or renumber.
|
||||
- Before emitting an `[n]`, confirm that bracketed label appears in the `read_file` output you are summarising this turn. If you can't see it, omit the citation.
|
||||
- Labels are **not** sequential by position — a passage may be `[7]` while the one above it is `[3]` (numbering is shared across the whole conversation). Copy what you see; never guess an adjacent number.
|
||||
- Write the bare label `[n]` only — no `[citation:…]` wrapper, no markdown links, no parentheses, no footnote numbers.
|
||||
- Several passages behind one point → each in its own brackets with nothing between: `[3][4]`. Never `[3, 4]` and never a range like `[3-4]`.
|
||||
- Prefer **fewer accurate citations** over many speculative ones.
|
||||
- Multiple chunks supporting the same point → comma-separated and copied individually: `[citation:128], [citation:129]`.
|
||||
- Plain square brackets only — no markdown links, no parentheses, no footnote numbers.
|
||||
- Tool results without `<chunk id='…'>` (write/edit/move confirmations, `ls` / `glob` / `grep` listings, error strings) carry no chunk id and need none.
|
||||
- Populate `evidence.chunk_ids` with **only** ids you actually emitted in `[citation:…]` markers — same set, same digits.
|
||||
- Tool results without `[n]` labels (write/edit/move confirmations, `ls` / `glob` / `grep` listings, error strings) carry no label and need none.
|
||||
- Populate `evidence.citations` with **only** the labels you actually emitted — same numbers.
|
||||
|
||||
## Examples
|
||||
|
||||
|
|
@ -89,7 +77,7 @@ A KB document's XML has three numeric attributes — only **one** is a citation
|
|||
"path": "/documents/meetings/2026-05-11-meeting.md",
|
||||
"matched_candidates": null,
|
||||
"content_excerpt": null,
|
||||
"chunk_ids": null
|
||||
"citations": null
|
||||
},
|
||||
"next_step": null,
|
||||
"missing_fields": null,
|
||||
|
|
@ -100,7 +88,7 @@ A KB document's XML has three numeric attributes — only **one** is a citation
|
|||
**Example 2 — edit by inference:**
|
||||
|
||||
- *Supervisor task:* `"Add a bullet about the new feature flag to my Q2 roadmap"`
|
||||
- *You:* search for the roadmap doc — check `<priority_documents>` and `<workspace_tree>` first; if neither surfaces it, widen with the `glob` tool (try filename patterns the user's language suggests) or the `grep` tool (search by content). Suppose `<priority_documents>` hits `/documents/planning/q2-roadmap.md` → `read_file("/documents/planning/q2-roadmap.md")` → `edit_file("/documents/planning/q2-roadmap.md", old, new)` → success.
|
||||
- *You:* search for the roadmap doc — check `<workspace_tree>` first; if it doesn't surface the doc, widen with the `glob` tool (try filename patterns the user's language suggests) or the `grep` tool (search by content). Suppose the tree hits `/documents/planning/q2-roadmap.md` → `read_file("/documents/planning/q2-roadmap.md")` → `edit_file("/documents/planning/q2-roadmap.md", old, new)` → success.
|
||||
- *Output:* `status=success`, evidence includes path and the inserted snippet.
|
||||
|
||||
**Example 3 — blocked, multiple candidates:**
|
||||
|
|
@ -121,7 +109,7 @@ A KB document's XML has three numeric attributes — only **one** is a citation
|
|||
{ "id": "/documents/design/auth-rework.md", "label": "Auth Rework" }
|
||||
],
|
||||
"content_excerpt": null,
|
||||
"chunk_ids": null
|
||||
"citations": null
|
||||
},
|
||||
"next_step": "Ask the user which design doc to update.",
|
||||
"missing_fields": ["path"],
|
||||
|
|
@ -142,7 +130,7 @@ Return **only** one JSON object (no markdown or prose outside it):
|
|||
"path": string | null,
|
||||
"matched_candidates": [ { "id": string, "label": string } ] | null,
|
||||
"content_excerpt": string | null,
|
||||
"chunk_ids": string[] | null
|
||||
"citations": number[] | null
|
||||
},
|
||||
"next_step": string | null,
|
||||
"missing_fields": string[] | null,
|
||||
|
|
|
|||
|
|
@ -9,8 +9,7 @@ You are the SurfSense workspace specialist for the user's local folders.
|
|||
1. If you do not know which mounts exist, call `ls('/')` first.
|
||||
2. Walk likely folders with the `ls` and `list_tree` tools.
|
||||
3. Use the `glob` tool for filename patterns; use the `grep` tool when the description points at *content* rather than a name.
|
||||
4. `<priority_documents>` lists top-K cloud-ingested docs, not local files — consult it only when the task spans both worlds (e.g. drafting a local note from a Notion source). Skip otherwise.
|
||||
5. Only return `status=blocked` with `missing_fields=["path"]` when the description is genuinely ambiguous after a thorough lookup.
|
||||
4. Only return `status=blocked` with `missing_fields=["path"]` when the description is genuinely ambiguous after a thorough lookup.
|
||||
|
||||
For writes (where you choose the path yourself):
|
||||
|
||||
|
|
@ -33,11 +32,11 @@ Map outcomes to your `status`:
|
|||
- Any other `"Error: …"` → `status=error` and relay the tool's message verbatim as `next_step`.
|
||||
- HITL rejection → `status=blocked` with `next_step="User declined this filesystem action. Do not retry."`.
|
||||
|
||||
You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see. (`chunk_ids` is always `null` in desktop mode — see "Chunk citations in your prose" below.)
|
||||
You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see. (`citations` is always `null` in desktop mode — see "Citations in your prose" below.)
|
||||
|
||||
## Chunk citations in your prose
|
||||
## Citations in your prose
|
||||
|
||||
In desktop mode your filesystem tools read local files only, and local-file tool results do **not** carry `<chunk id='…'>` tags. Do not emit `[citation:…]` markers in `action_summary` or `evidence.content_excerpt`, and leave `evidence.chunk_ids` `null` — the absolute path is the only reference for local-file work.
|
||||
In desktop mode your filesystem tools read local files only, which are not KB-indexed and carry no `[n]` citation labels. Do not emit `[n]` or `[citation:…]` markers in `action_summary` or `evidence.content_excerpt`, and leave `evidence.citations` `null` — the absolute path is the only reference for local-file work.
|
||||
|
||||
## Examples
|
||||
|
||||
|
|
@ -56,7 +55,7 @@ In desktop mode your filesystem tools read local files only, and local-file tool
|
|||
"path": "/notes/meetings/2026-05-11-meeting.md",
|
||||
"matched_candidates": null,
|
||||
"content_excerpt": null,
|
||||
"chunk_ids": null
|
||||
"citations": null
|
||||
},
|
||||
"next_step": null,
|
||||
"missing_fields": null,
|
||||
|
|
@ -88,7 +87,7 @@ In desktop mode your filesystem tools read local files only, and local-file tool
|
|||
{ "id": "/projects/web/design/auth-rework.md", "label": "Auth Rework" }
|
||||
],
|
||||
"content_excerpt": null,
|
||||
"chunk_ids": null
|
||||
"citations": null
|
||||
},
|
||||
"next_step": "Ask the user which design doc to update.",
|
||||
"missing_fields": ["path"],
|
||||
|
|
@ -109,7 +108,7 @@ Return **only** one JSON object (no markdown or prose outside it):
|
|||
"path": string | null,
|
||||
"matched_candidates": [ { "id": string, "label": string } ] | null,
|
||||
"content_excerpt": string | null,
|
||||
"chunk_ids": string[] | null
|
||||
"citations": number[] | null
|
||||
},
|
||||
"next_step": string | null,
|
||||
"missing_fields": string[] | null,
|
||||
|
|
|
|||
|
|
@ -6,9 +6,8 @@ You answer workspace questions for another agent. The end user does **not** see
|
|||
|
||||
The caller's question often references documents by description (`"my meeting notes from last week"`, `"the design doc"`). Resolve them yourself:
|
||||
|
||||
1. Consult `<priority_documents>` — a hint about top-K likely matches, not a directive. Skip when the ranked entries don't fit.
|
||||
2. Walk `<workspace_tree>` for descriptive folder/filename matches.
|
||||
3. Use `glob` for filename patterns the tree didn't surface, and `grep` when the description points at *content* rather than a name.
|
||||
1. Walk `<workspace_tree>` for descriptive folder/filename matches.
|
||||
2. Use `glob` for filename patterns the tree didn't surface, and `grep` when the description points at *content* rather than a name.
|
||||
|
||||
If a precise path was already given, use it directly — skip the lookup.
|
||||
|
||||
|
|
@ -28,41 +27,30 @@ Reply in plain prose:
|
|||
- If the workspace does not contain the requested information, say so explicitly. Do not fabricate paths or content.
|
||||
- If the question is genuinely ambiguous after a thorough lookup, list the candidates with their paths and stop.
|
||||
|
||||
## Chunk citations
|
||||
## Citations
|
||||
|
||||
When the evidence for a claim came from a `read_file` response that included `<chunk id='…'>` blocks (i.e. a KB-indexed document under `/documents/`), append `[citation:<chunk_id>]` to the sentence stating that claim. The caller passes these markers through to the end user verbatim, and the UI resolves each id by exact match against the database, so a wrong id silently breaks the citation.
|
||||
When the evidence for a claim came from a `read_file` response for a KB-indexed document under `/documents/`, the document reads back as a `<document … view="full">` block whose passages are each prefixed with a bracketed label — `[1]`, `[2]`, `[3]`. That `[n]` is the citation label. Append the relevant `[n]` to the sentence stating the claim, copying it **exactly** as shown. The caller passes these labels through verbatim and the server resolves each one, so a wrong number silently breaks the citation.
|
||||
|
||||
### Where chunk ids live in `read_file` output
|
||||
### Where the labels live in `read_file` output
|
||||
|
||||
A KB document's XML has three numeric attributes — only **one** is a citation source:
|
||||
A KB document reads back like this — only the bracketed `[n]` is a citation label:
|
||||
|
||||
```
|
||||
<document>
|
||||
<document_metadata>
|
||||
<document_id>42</document_id> ← NOT a citation. Parent doc id; ignore for citations.
|
||||
...
|
||||
</document_metadata>
|
||||
<chunk_index>
|
||||
<entry chunk_id="128" lines="14-22"/> ← Index hint; the same id also appears below.
|
||||
<entry chunk_id="129" lines="23-30" matched="true"/>
|
||||
</chunk_index>
|
||||
<document_content>
|
||||
<chunk id='128'><![CDATA[…]]></chunk> ← This is the citation source.
|
||||
<chunk id='129'><![CDATA[…]]></chunk>
|
||||
</document_content>
|
||||
<document title="Q2 Roadmap" source="File" view="full">
|
||||
[3] First milestone is …
|
||||
[4] Second milestone is …
|
||||
</document>
|
||||
```
|
||||
|
||||
### Rules
|
||||
|
||||
- Use the **exact** id from a `<chunk id='…'>` tag whose content you actually quoted or paraphrased. Copy digit-for-digit; do **not** retype from memory.
|
||||
- Before emitting `[citation:N]`, confirm the literal substring `<chunk id='N'>` (or its index twin `chunk_id="N"`) appears in the tool result you are summarising this turn. If you can't see it, omit the citation.
|
||||
- Never cite `<document_id>` — that's the parent doc, not a chunk.
|
||||
- Never invent, normalise, shorten, or guess at adjacent ids. If unsure between two candidates, omit rather than pick.
|
||||
- Prefer **fewer accurate citations** over many speculative ones. One correct `[citation:128]` is more useful than a string of wrong ids.
|
||||
- Multiple chunks supporting the same point → comma-separated and copied individually: `[citation:128], [citation:129]`.
|
||||
- Plain square brackets only — no markdown links, no parentheses, no footnote numbers.
|
||||
- If a claim came from a tool result that did **not** carry a chunk id (`ls`, `glob`, `grep` listings, error strings, or files without `<chunk id='…'>`), skip the citation.
|
||||
- The absolute path under `/documents/` is always required; chunk citations are additive, they do not replace the path reference.
|
||||
- Use the **exact** `[n]` shown next to the passage you actually quoted or paraphrased. Copy it digit-for-digit; do **not** retype from memory or renumber.
|
||||
- Before emitting an `[n]`, confirm that bracketed label appears in the `read_file` output you are summarising this turn. If you can't see it, omit the citation.
|
||||
- Labels are **not** sequential by position — a passage may be `[7]` while the one above it is `[3]` (numbering is shared across the whole conversation). Copy what you see; never guess an adjacent number.
|
||||
- Prefer **fewer accurate citations** over many speculative ones. One correct `[3]` is more useful than a string of wrong numbers.
|
||||
- Several passages behind one point → each in its own brackets with nothing between: `[3][4]`. Never `[3, 4]` and never a range like `[3-4]`.
|
||||
- Write the bare label `[n]` only — no `[citation:…]` wrapper, no markdown links, no parentheses, no footnote numbers.
|
||||
- If a claim came from a tool result that did **not** carry `[n]` labels (`ls`, `glob`, `grep` listings, error strings), skip the citation.
|
||||
- The absolute path under `/documents/` is always required; `[n]` labels are additive, they do not replace the path reference.
|
||||
|
||||
Example: `The Q2 roadmap lists three milestones (/documents/planning/q2-roadmap.md) [citation:128], [citation:129].`
|
||||
Example: `The Q2 roadmap lists three milestones (/documents/planning/q2-roadmap.md) [3][4].`
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ The caller's question often references files by description (`"my meeting notes
|
|||
1. If you do not know which mounts exist, call `ls('/')` first.
|
||||
2. Walk likely folders with the `ls` and `list_tree` tools.
|
||||
3. Use `glob` for filename patterns; use `grep` when the description points at *content* rather than a name.
|
||||
4. `<priority_documents>` lists top-K cloud-ingested docs, not local files — consult it only when the task spans both worlds (e.g. drafting a local note from a Notion source). Skip otherwise.
|
||||
|
||||
If a precise path was already given, use it directly — skip the lookup.
|
||||
|
||||
|
|
@ -29,6 +28,6 @@ Reply in plain prose:
|
|||
- If the workspace does not contain the requested information, say so explicitly. Do not fabricate paths or content.
|
||||
- If the question is genuinely ambiguous after a thorough lookup, list the candidates with their paths and stop.
|
||||
|
||||
## Chunk citations
|
||||
## Citations
|
||||
|
||||
In desktop mode your filesystem tools read local files only, and local-file `read_file` responses do **not** carry `<chunk id='…'>` tags. Cite each claim with the absolute local path; do not emit `[citation:…]` markers — your caller has nothing to resolve them against.
|
||||
In desktop mode your filesystem tools read local files only, which are not KB-indexed and carry no `[n]` citation labels. Cite each claim with the absolute local path; do not emit `[n]` or `[citation:…]` markers — your caller has nothing to resolve them against.
|
||||
|
|
|
|||
|
|
@ -7,6 +7,9 @@ from typing import Any
|
|||
from langchain_core.language_models import BaseChatModel
|
||||
from langchain_core.tools import BaseTool
|
||||
|
||||
from app.agents.chat.multi_agent_chat.shared.middleware.citation_state import (
|
||||
build_citation_state_mw,
|
||||
)
|
||||
from app.agents.chat.multi_agent_chat.subagents.shared.md_file_reader import (
|
||||
read_md_file,
|
||||
)
|
||||
|
|
@ -31,6 +34,12 @@ def build_subagent(
|
|||
or "Handles research tasks for this workspace."
|
||||
)
|
||||
system_prompt = read_md_file(__package__, "system_prompt").strip()
|
||||
# web_search registers WEB_RESULT citations via Command(update=...); the
|
||||
# citation-state middleware declares the channel so those [n] merge back up.
|
||||
middleware_with_citations = {
|
||||
**(middleware_stack or {}),
|
||||
"citation_state": build_citation_state_mw(),
|
||||
}
|
||||
return pack_subagent(
|
||||
name=NAME,
|
||||
description=description,
|
||||
|
|
@ -39,5 +48,5 @@ def build_subagent(
|
|||
ruleset=RULESET,
|
||||
dependencies=dependencies,
|
||||
model=model,
|
||||
middleware_stack=middleware_stack,
|
||||
middleware_stack=middleware_with_citations,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -17,6 +17,16 @@ Gather and synthesize evidence using SurfSense research tools with clear citatio
|
|||
- Never fabricate facts, citations, URLs, or quote text.
|
||||
</tool_policy>
|
||||
|
||||
<citations>
|
||||
`web_search` returns a `<web_results>` block whose results are each prefixed with a bracketed label — `[1]`, `[2]`, `[3]`. That `[n]` is the citation label. When a finding came from a specific result, append its `[n]` to that finding, copying the label **exactly** as shown. The caller relays these labels verbatim and the server resolves each one, so a wrong number silently breaks the citation.
|
||||
|
||||
- Use the exact `[n]` shown next to the result you actually used; never renumber, guess, or invent a label.
|
||||
- Before emitting an `[n]`, confirm that bracketed label appears in the `web_search` output this turn. If you can't see it, omit it.
|
||||
- Write the bare label `[n]` only — no `[citation:…]` wrapper, no markdown links.
|
||||
- Several results behind one finding → each in its own brackets with nothing between: `[1][2]`.
|
||||
- `scrape_webpage` returns raw page text with no `[n]` labels; a fact drawn only from a scrape carries no citation (report the URL in `evidence.sources` instead).
|
||||
</citations>
|
||||
|
||||
<out_of_scope>
|
||||
- Do not execute connector mutations (email/calendar/docs/chat writes) or deliverable generation.
|
||||
</out_of_scope>
|
||||
|
|
@ -47,6 +57,6 @@ Return **only** one JSON object (no markdown/prose):
|
|||
}
|
||||
<include snippet="output_contract_base"/>
|
||||
Route-specific rules:
|
||||
- `evidence.findings`: max 10 entries, each a single sentence stating one distinct fact. Do not paste raw paragraphs, scraped pages, or quote blocks.
|
||||
- `evidence.sources`: max 10 URLs, one per finding when applicable. List each URL once.
|
||||
- `evidence.findings`: max 10 entries, each a single sentence stating one distinct fact. Append the supporting `[n]` to each finding drawn from a `web_search` result. Do not paste raw paragraphs, scraped pages, or quote blocks.
|
||||
- `evidence.sources`: max 10 URLs, one per finding when applicable. List each URL once. (Citations travel as `[n]`; `sources` is for transparency and for scrape-only facts that carry no `[n]`.)
|
||||
</output_contract>
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
"""Research-stage tools: web search and scrape."""
|
||||
"""Research-stage tools: web search (shared) and scrape."""
|
||||
|
||||
from app.agents.chat.shared.tools.web_search import create_web_search_tool
|
||||
|
||||
from .scrape_webpage import create_scrape_webpage_tool
|
||||
from .web_search import create_web_search_tool
|
||||
|
||||
__all__ = [
|
||||
"create_scrape_webpage_tool",
|
||||
|
|
|
|||
|
|
@ -7,9 +7,9 @@ from typing import Any
|
|||
from langchain_core.tools import BaseTool
|
||||
|
||||
from app.agents.chat.multi_agent_chat.shared.permissions import Ruleset
|
||||
from app.agents.chat.shared.tools.web_search import create_web_search_tool
|
||||
|
||||
from .scrape_webpage import create_scrape_webpage_tool
|
||||
from .web_search import create_web_search_tool
|
||||
|
||||
NAME = "research"
|
||||
|
||||
|
|
|
|||
|
|
@ -1,241 +0,0 @@
|
|||
"""Real-time web search: SearXNG plus configured live-search connectors (Tavily, Linkup, Baidu, etc.)."""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.tools import StructuredTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from app.db import shielded_async_session
|
||||
from app.services.connector_service import ConnectorService
|
||||
from app.utils.perf import get_perf_logger
|
||||
|
||||
_LIVE_SEARCH_CONNECTORS: set[str] = {
|
||||
"TAVILY_API",
|
||||
"LINKUP_API",
|
||||
"BAIDU_SEARCH_API",
|
||||
}
|
||||
|
||||
_LIVE_CONNECTOR_SPECS: dict[str, tuple[str, bool, bool, dict[str, Any]]] = {
|
||||
"TAVILY_API": ("search_tavily", False, True, {}),
|
||||
"LINKUP_API": ("search_linkup", False, False, {"mode": "standard"}),
|
||||
"BAIDU_SEARCH_API": ("search_baidu", False, True, {}),
|
||||
}
|
||||
|
||||
_CONNECTOR_LABELS: dict[str, str] = {
|
||||
"TAVILY_API": "Tavily",
|
||||
"LINKUP_API": "Linkup",
|
||||
"BAIDU_SEARCH_API": "Baidu",
|
||||
}
|
||||
|
||||
|
||||
class WebSearchInput(BaseModel):
|
||||
"""Input schema for the web_search tool."""
|
||||
|
||||
query: str = Field(
|
||||
description="The search query to look up on the web. Use specific, descriptive terms.",
|
||||
)
|
||||
top_k: int = Field(
|
||||
default=10,
|
||||
description="Number of results to retrieve (default: 10, max: 50).",
|
||||
)
|
||||
|
||||
|
||||
def _format_web_results(
|
||||
documents: list[dict[str, Any]],
|
||||
*,
|
||||
max_chars: int = 50_000,
|
||||
) -> str:
|
||||
"""Format web search results into XML suitable for the LLM context."""
|
||||
if not documents:
|
||||
return "No web search results found."
|
||||
|
||||
parts: list[str] = []
|
||||
total_chars = 0
|
||||
|
||||
for doc in documents:
|
||||
doc_info = doc.get("document") or {}
|
||||
metadata = doc_info.get("metadata") or {}
|
||||
title = doc_info.get("title") or "Web Result"
|
||||
url = metadata.get("url") or ""
|
||||
content = (doc.get("content") or "").strip()
|
||||
source = metadata.get("document_type") or doc.get("source") or "WEB_SEARCH"
|
||||
if not content:
|
||||
continue
|
||||
|
||||
metadata_json = json.dumps(metadata, ensure_ascii=False)
|
||||
doc_xml = "\n".join(
|
||||
[
|
||||
"<document>",
|
||||
"<document_metadata>",
|
||||
f" <document_type>{source}</document_type>",
|
||||
f" <title><![CDATA[{title}]]></title>",
|
||||
f" <url><![CDATA[{url}]]></url>",
|
||||
f" <metadata_json><![CDATA[{metadata_json}]]></metadata_json>",
|
||||
"</document_metadata>",
|
||||
"<document_content>",
|
||||
f" <chunk id='{url}'><![CDATA[{content}]]></chunk>",
|
||||
"</document_content>",
|
||||
"</document>",
|
||||
"",
|
||||
]
|
||||
)
|
||||
|
||||
if total_chars + len(doc_xml) > max_chars:
|
||||
parts.append("<!-- Output truncated to fit context window -->")
|
||||
break
|
||||
|
||||
parts.append(doc_xml)
|
||||
total_chars += len(doc_xml)
|
||||
|
||||
return "\n".join(parts).strip() or "No web search results found."
|
||||
|
||||
|
||||
async def _search_live_connector(
|
||||
connector: str,
|
||||
query: str,
|
||||
search_space_id: int,
|
||||
top_k: int,
|
||||
semaphore: asyncio.Semaphore,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Dispatch a single live-search connector (Tavily / Linkup / Baidu)."""
|
||||
perf = get_perf_logger()
|
||||
spec = _LIVE_CONNECTOR_SPECS.get(connector)
|
||||
if spec is None:
|
||||
return []
|
||||
|
||||
method_name, _includes_date_range, includes_top_k, extra_kwargs = spec
|
||||
kwargs: dict[str, Any] = {
|
||||
"user_query": query,
|
||||
"search_space_id": search_space_id,
|
||||
**extra_kwargs,
|
||||
}
|
||||
if includes_top_k:
|
||||
kwargs["top_k"] = top_k
|
||||
|
||||
try:
|
||||
t0 = time.perf_counter()
|
||||
async with semaphore, shielded_async_session() as session:
|
||||
svc = ConnectorService(session, search_space_id)
|
||||
_, chunks = await getattr(svc, method_name)(**kwargs)
|
||||
perf.info(
|
||||
"[web_search] connector=%s results=%d in %.3fs",
|
||||
connector,
|
||||
len(chunks),
|
||||
time.perf_counter() - t0,
|
||||
)
|
||||
return chunks
|
||||
except Exception as e:
|
||||
perf.warning("[web_search] connector=%s FAILED: %s", connector, e)
|
||||
return []
|
||||
|
||||
|
||||
def create_web_search_tool(
|
||||
search_space_id: int | None = None,
|
||||
available_connectors: list[str] | None = None,
|
||||
) -> StructuredTool:
|
||||
"""Factory for the ``web_search`` tool.
|
||||
|
||||
Dispatches in parallel to the platform SearXNG instance and any
|
||||
user-configured live-search connectors (Tavily, Linkup, Baidu).
|
||||
"""
|
||||
active_live_connectors: list[str] = []
|
||||
if available_connectors:
|
||||
active_live_connectors = [
|
||||
c for c in available_connectors if c in _LIVE_SEARCH_CONNECTORS
|
||||
]
|
||||
|
||||
engine_names = ["SearXNG (platform default)"]
|
||||
engine_names.extend(_CONNECTOR_LABELS.get(c, c) for c in active_live_connectors)
|
||||
engines_summary = ", ".join(engine_names)
|
||||
|
||||
description = (
|
||||
"Search the web for real-time information. "
|
||||
"Use this for current events, news, prices, weather, public facts, or any "
|
||||
"question that requires up-to-date information from the internet.\n\n"
|
||||
f"Active search engines: {engines_summary}.\n"
|
||||
"All configured engines are queried in parallel and results are merged."
|
||||
)
|
||||
|
||||
_search_space_id = search_space_id
|
||||
_active_live = active_live_connectors
|
||||
|
||||
async def _web_search_impl(query: str, top_k: int = 10) -> str:
|
||||
from app.services import web_search_service
|
||||
|
||||
perf = get_perf_logger()
|
||||
t0 = time.perf_counter()
|
||||
clamped_top_k = min(max(1, top_k), 50)
|
||||
|
||||
semaphore = asyncio.Semaphore(4)
|
||||
tasks: list[asyncio.Task[list[dict[str, Any]]]] = []
|
||||
|
||||
if web_search_service.is_available():
|
||||
|
||||
async def _searxng() -> list[dict[str, Any]]:
|
||||
async with semaphore:
|
||||
_result_obj, docs = await web_search_service.search(
|
||||
query=query,
|
||||
top_k=clamped_top_k,
|
||||
)
|
||||
return docs
|
||||
|
||||
tasks.append(asyncio.ensure_future(_searxng()))
|
||||
|
||||
if _search_space_id is not None:
|
||||
for connector in _active_live:
|
||||
tasks.append(
|
||||
asyncio.ensure_future(
|
||||
_search_live_connector(
|
||||
connector=connector,
|
||||
query=query,
|
||||
search_space_id=_search_space_id,
|
||||
top_k=clamped_top_k,
|
||||
semaphore=semaphore,
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
if not tasks:
|
||||
return "Web search is not available — no search engines are configured."
|
||||
|
||||
results_lists = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
all_documents: list[dict[str, Any]] = []
|
||||
for result in results_lists:
|
||||
if isinstance(result, BaseException):
|
||||
perf.warning("[web_search] a search engine failed: %s", result)
|
||||
continue
|
||||
all_documents.extend(result)
|
||||
|
||||
seen_urls: set[str] = set()
|
||||
deduplicated: list[dict[str, Any]] = []
|
||||
for doc in all_documents:
|
||||
url = ((doc.get("document") or {}).get("metadata") or {}).get("url", "")
|
||||
if url and url in seen_urls:
|
||||
continue
|
||||
if url:
|
||||
seen_urls.add(url)
|
||||
deduplicated.append(doc)
|
||||
|
||||
formatted = _format_web_results(deduplicated)
|
||||
|
||||
perf.info(
|
||||
"[web_search] query=%r engines=%d results=%d deduped=%d chars=%d in %.3fs",
|
||||
query[:60],
|
||||
len(tasks),
|
||||
len(all_documents),
|
||||
len(deduplicated),
|
||||
len(formatted),
|
||||
time.perf_counter() - t0,
|
||||
)
|
||||
return formatted
|
||||
|
||||
return StructuredTool(
|
||||
name="web_search",
|
||||
description=description,
|
||||
coroutine=_web_search_impl,
|
||||
args_schema=WebSearchInput,
|
||||
)
|
||||
|
|
@ -74,8 +74,9 @@ class ResolvedMentionSet:
|
|||
``@Project``).
|
||||
|
||||
``mentioned_document_ids`` is an ordered, deduped list consumed by
|
||||
the priority middleware downstream — see
|
||||
``KnowledgePriorityMiddleware._compute_priority_paths``.
|
||||
the on-demand ``search_knowledge_base`` tool downstream (via
|
||||
``referenced_document_ids``) to pin @-mentioned docs into the
|
||||
retrieval scope.
|
||||
"""
|
||||
|
||||
mentions: list[ResolvedMention] = field(default_factory=list)
|
||||
|
|
@ -113,8 +114,8 @@ async def resolve_mentions(
|
|||
|
||||
* Legacy clients that haven't migrated to the unified chip list
|
||||
still send the id arrays — we treat the union as authoritative.
|
||||
* The id arrays are the canonical input to
|
||||
``KnowledgePriorityMiddleware`` (via ``SurfSenseContextSchema``);
|
||||
* The id arrays are the canonical input to the retrieval scope
|
||||
(via ``SurfSenseContextSchema`` → ``referenced_document_ids``);
|
||||
returning the deduped, validated lists lets the route forward
|
||||
them unchanged.
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ This module is the single source of truth for mapping ``Document`` rows to
|
|||
virtual paths under ``/documents/`` and back. It is used by:
|
||||
|
||||
* :class:`KnowledgeTreeMiddleware` (rendering the workspace tree)
|
||||
* :class:`KnowledgePriorityMiddleware` (computing priority paths)
|
||||
* :class:`KBPostgresBackend` (``als_info`` / ``aread`` / move operations)
|
||||
* :class:`KnowledgeBasePersistenceMiddleware` (resolving moves and creates)
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,95 @@
|
|||
"""Resolved ``@``-references and their pointer block.
|
||||
|
||||
References are scope, not content: they tell the model what the user pointed
|
||||
at this turn so it can retrieve from those sources with tools.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.agents.chat.runtime.path_resolver import build_path_index
|
||||
from app.schemas.new_chat import MentionedDocumentInfo
|
||||
|
||||
from .chat import resolve_chat_references
|
||||
from .connectors import resolve_connector_references
|
||||
from .documents import referenced_document_ids, resolve_document_references
|
||||
from .folders import resolve_folder_references
|
||||
from .models import (
|
||||
ChatReference,
|
||||
ConnectorReference,
|
||||
DocumentReference,
|
||||
FolderReference,
|
||||
Reference,
|
||||
ReferenceKind,
|
||||
)
|
||||
from .reference_pointers import render_reference_pointers
|
||||
|
||||
|
||||
async def resolve_references(
|
||||
session: AsyncSession,
|
||||
*,
|
||||
search_space_id: int,
|
||||
requesting_user_id: str | None,
|
||||
current_chat_id: int,
|
||||
document_ids: list[int] | None = None,
|
||||
folder_ids: list[int] | None = None,
|
||||
connector_ids: list[int] | None = None,
|
||||
connector_chips: list[MentionedDocumentInfo] | None = None,
|
||||
thread_ids: list[int] | None = None,
|
||||
) -> list[Reference]:
|
||||
"""Resolve a turn's ``@``-references into one ordered pointer list.
|
||||
|
||||
Order is documents, folders, connectors, chats. The path index is built
|
||||
once and shared by the document and folder resolvers.
|
||||
"""
|
||||
references: list[Reference] = []
|
||||
|
||||
if document_ids or folder_ids:
|
||||
index = await build_path_index(session, search_space_id)
|
||||
if document_ids:
|
||||
references += await resolve_document_references(
|
||||
session,
|
||||
search_space_id=search_space_id,
|
||||
document_ids=document_ids,
|
||||
index=index,
|
||||
)
|
||||
if folder_ids:
|
||||
references += await resolve_folder_references(
|
||||
session,
|
||||
search_space_id=search_space_id,
|
||||
folder_ids=folder_ids,
|
||||
index=index,
|
||||
)
|
||||
|
||||
if connector_ids:
|
||||
references += await resolve_connector_references(
|
||||
session,
|
||||
search_space_id=search_space_id,
|
||||
connector_ids=connector_ids,
|
||||
chips=connector_chips,
|
||||
)
|
||||
|
||||
if thread_ids:
|
||||
references += await resolve_chat_references(
|
||||
session,
|
||||
search_space_id=search_space_id,
|
||||
requesting_user_id=requesting_user_id,
|
||||
current_chat_id=current_chat_id,
|
||||
thread_ids=thread_ids,
|
||||
)
|
||||
|
||||
return references
|
||||
|
||||
|
||||
__all__ = [
|
||||
"ChatReference",
|
||||
"ConnectorReference",
|
||||
"DocumentReference",
|
||||
"FolderReference",
|
||||
"Reference",
|
||||
"ReferenceKind",
|
||||
"referenced_document_ids",
|
||||
"render_reference_pointers",
|
||||
"resolve_references",
|
||||
]
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
"""Resolve ``@chat`` mentions into pointers, access-checked, titles only."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from .resolver import resolve_chat_references
|
||||
|
||||
__all__ = ["resolve_chat_references"]
|
||||
|
|
@ -0,0 +1,79 @@
|
|||
"""Access-checked lookup of chat threads the requester may read.
|
||||
|
||||
The single place chat visibility is enforced: a thread is readable when it is
|
||||
shared with the search space, the requester created it, or it is a legacy
|
||||
null-creator thread and the requester owns the search space. Anything else is
|
||||
dropped (fail-closed).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from uuid import UUID
|
||||
|
||||
from sqlalchemy import or_, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.db import ChatVisibility, NewChatThread, SearchSpace
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _visibility_predicate(user_uuid: UUID | None, *, include_legacy: bool):
|
||||
"""SQL predicate for threads the requester may read."""
|
||||
conditions = [NewChatThread.visibility == ChatVisibility.SEARCH_SPACE]
|
||||
if user_uuid is not None:
|
||||
conditions.append(NewChatThread.created_by_id == user_uuid)
|
||||
if include_legacy:
|
||||
conditions.append(NewChatThread.created_by_id.is_(None))
|
||||
return or_(*conditions)
|
||||
|
||||
|
||||
async def accessible_threads(
|
||||
session: AsyncSession,
|
||||
*,
|
||||
search_space_id: int,
|
||||
requesting_user_id: str | None,
|
||||
thread_ids: list[int],
|
||||
exclude_thread_id: int | None = None,
|
||||
) -> list[NewChatThread]:
|
||||
"""Threads in this space the requester may read, in requested order.
|
||||
|
||||
Input order is preserved and de-duplicated; ``exclude_thread_id`` (the
|
||||
active chat) is removed so a chat never references itself. Inaccessible or
|
||||
foreign ids are silently dropped.
|
||||
"""
|
||||
requested = [tid for tid in dict.fromkeys(thread_ids) if tid != exclude_thread_id]
|
||||
if not requested:
|
||||
return []
|
||||
|
||||
user_uuid: UUID | None = None
|
||||
if requesting_user_id:
|
||||
try:
|
||||
user_uuid = UUID(requesting_user_id)
|
||||
except (TypeError, ValueError):
|
||||
logger.warning(
|
||||
"accessible_threads: invalid user_id=%r; restricting to shared",
|
||||
requesting_user_id,
|
||||
)
|
||||
|
||||
# Legacy null-creator threads are readable only by the search-space owner.
|
||||
include_legacy = False
|
||||
if user_uuid is not None:
|
||||
owner_id = await session.scalar(
|
||||
select(SearchSpace.user_id).where(SearchSpace.id == search_space_id)
|
||||
)
|
||||
include_legacy = owner_id == user_uuid
|
||||
|
||||
rows = await session.execute(
|
||||
select(NewChatThread).where(
|
||||
NewChatThread.id.in_(requested),
|
||||
NewChatThread.search_space_id == search_space_id,
|
||||
_visibility_predicate(user_uuid, include_legacy=include_legacy),
|
||||
)
|
||||
)
|
||||
threads_by_id = {row.id: row for row in rows.scalars().all()}
|
||||
return [threads_by_id[tid] for tid in requested if tid in threads_by_id]
|
||||
|
||||
|
||||
__all__ = ["accessible_threads"]
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
"""Resolve ``@chat`` mentions into pointer references.
|
||||
|
||||
Chats are not KB-indexed, so a chat reference is a pointer only; its turns are
|
||||
read on demand via the chat read tool, not injected here. Only the title is
|
||||
needed, so this takes the cheap access-checked path and never loads transcripts.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from ..models import ChatReference
|
||||
from .access import accessible_threads
|
||||
|
||||
|
||||
async def resolve_chat_references(
|
||||
session: AsyncSession,
|
||||
*,
|
||||
search_space_id: int,
|
||||
requesting_user_id: str | None,
|
||||
current_chat_id: int,
|
||||
thread_ids: list[int],
|
||||
) -> list[ChatReference]:
|
||||
"""Map ``@chat`` thread ids to access-checked pointers (titles only)."""
|
||||
if not thread_ids:
|
||||
return []
|
||||
|
||||
threads = await accessible_threads(
|
||||
session,
|
||||
search_space_id=search_space_id,
|
||||
requesting_user_id=requesting_user_id,
|
||||
thread_ids=thread_ids,
|
||||
exclude_thread_id=current_chat_id,
|
||||
)
|
||||
return [
|
||||
ChatReference(entity_id=thread.id, label=str(thread.title or "Untitled chat"))
|
||||
for thread in threads
|
||||
]
|
||||
|
||||
|
||||
__all__ = ["resolve_chat_references"]
|
||||
|
|
@ -0,0 +1,83 @@
|
|||
"""Resolve ``@connector`` account mentions into references for the pointer block."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.db import SearchSourceConnector
|
||||
from app.schemas.new_chat import MentionedDocumentInfo
|
||||
|
||||
from .models import ConnectorReference
|
||||
|
||||
|
||||
def connector_pointer_fields(
|
||||
*,
|
||||
account_name: str | None,
|
||||
connector_type: str | None,
|
||||
fallback_name: str | None,
|
||||
) -> tuple[str, str | None]:
|
||||
"""Pick the account label and provider for a connector pointer.
|
||||
|
||||
Prefers the chip the user selected (``account_name`` / ``connector_type``)
|
||||
and falls back to the stored connector name.
|
||||
"""
|
||||
label = account_name or fallback_name or "account"
|
||||
return label, connector_type or None
|
||||
|
||||
|
||||
async def resolve_connector_references(
|
||||
session: AsyncSession,
|
||||
*,
|
||||
search_space_id: int,
|
||||
connector_ids: list[int],
|
||||
chips: list[MentionedDocumentInfo] | None = None,
|
||||
) -> list[ConnectorReference]:
|
||||
"""Map ``@connector`` ids to references; ids outside the space are dropped.
|
||||
|
||||
The DB check only confirms the connector belongs to this search space;
|
||||
display fields come from the user's chip.
|
||||
"""
|
||||
if not connector_ids:
|
||||
return []
|
||||
|
||||
rows = await session.execute(
|
||||
select(
|
||||
SearchSourceConnector.id,
|
||||
SearchSourceConnector.name,
|
||||
SearchSourceConnector.connector_type,
|
||||
).where(
|
||||
SearchSourceConnector.search_space_id == search_space_id,
|
||||
SearchSourceConnector.id.in_(connector_ids),
|
||||
)
|
||||
)
|
||||
accessible = {row.id: row for row in rows.all()}
|
||||
|
||||
chip_by_id = {
|
||||
chip.id: chip for chip in (chips or []) if chip.kind == "connector"
|
||||
}
|
||||
|
||||
references: list[ConnectorReference] = []
|
||||
for connector_id in dict.fromkeys(connector_ids):
|
||||
row = accessible.get(connector_id)
|
||||
if row is None:
|
||||
continue
|
||||
chip = chip_by_id.get(connector_id)
|
||||
stored_type = getattr(row.connector_type, "value", row.connector_type)
|
||||
label, provider = connector_pointer_fields(
|
||||
account_name=chip.account_name if chip else None,
|
||||
connector_type=(chip.connector_type if chip else None)
|
||||
or (str(stored_type) if stored_type else None),
|
||||
fallback_name=str(row.name or ""),
|
||||
)
|
||||
references.append(
|
||||
ConnectorReference(
|
||||
entity_id=connector_id,
|
||||
label=label,
|
||||
provider=provider,
|
||||
)
|
||||
)
|
||||
return references
|
||||
|
||||
|
||||
__all__ = ["connector_pointer_fields", "resolve_connector_references"]
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
"""Resolve ``@document`` references.
|
||||
|
||||
Two concerns, one subject: ``resolver`` turns document ids into pointer
|
||||
references for the model, ``referenced`` turns ``@document`` / ``@folder``
|
||||
mentions into the document ids a retrieval is confined to.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from .referenced import referenced_document_ids
|
||||
from .resolver import resolve_document_references
|
||||
|
||||
__all__ = ["referenced_document_ids", "resolve_document_references"]
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
"""Resolve ``@document`` / ``@folder`` mentions to the documents they point at.
|
||||
|
||||
Reference resolution, not retrieval: this answers "which knowledge-base
|
||||
documents did the user point at this turn?". ``@document`` ids pass through;
|
||||
``@folder`` ids expand to the documents directly inside each folder within this
|
||||
search space (direct children only, not nested subfolders). The caller turns the
|
||||
returned ids into a retrieval ``SearchScope``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.db import Document
|
||||
|
||||
|
||||
async def referenced_document_ids(
|
||||
session: AsyncSession,
|
||||
*,
|
||||
search_space_id: int,
|
||||
document_ids: list[int] | None = None,
|
||||
folder_ids: list[int] | None = None,
|
||||
) -> tuple[int, ...]:
|
||||
"""Sorted document ids the user pointed at (empty = nothing referenced)."""
|
||||
doc_ids = set(document_ids or [])
|
||||
folders = list(folder_ids or [])
|
||||
if folders:
|
||||
rows = await session.execute(
|
||||
select(Document.id).where(
|
||||
Document.search_space_id == search_space_id,
|
||||
Document.folder_id.in_(folders),
|
||||
)
|
||||
)
|
||||
doc_ids.update(rows.scalars().all())
|
||||
return tuple(sorted(doc_ids))
|
||||
|
||||
|
||||
__all__ = ["referenced_document_ids"]
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
"""Resolve ``@document`` ids into references for the pointer block."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.agents.chat.runtime.path_resolver import PathIndex, doc_to_virtual_path
|
||||
from app.db import Document
|
||||
|
||||
from ..models import DocumentReference
|
||||
|
||||
|
||||
async def resolve_document_references(
|
||||
session: AsyncSession,
|
||||
*,
|
||||
search_space_id: int,
|
||||
document_ids: list[int],
|
||||
index: PathIndex,
|
||||
) -> list[DocumentReference]:
|
||||
"""Map document ids to references in input order; unknown ids are dropped.
|
||||
|
||||
Best-effort and fail-closed: an id outside ``search_space_id`` (deleted or
|
||||
foreign) simply does not produce a reference.
|
||||
"""
|
||||
if not document_ids:
|
||||
return []
|
||||
|
||||
rows = await session.execute(
|
||||
select(Document).where(
|
||||
Document.search_space_id == search_space_id,
|
||||
Document.id.in_(document_ids),
|
||||
)
|
||||
)
|
||||
documents_by_id = {row.id: row for row in rows.scalars().all()}
|
||||
|
||||
references: list[DocumentReference] = []
|
||||
for document_id in dict.fromkeys(document_ids):
|
||||
document = documents_by_id.get(document_id)
|
||||
if document is None:
|
||||
continue
|
||||
title = str(document.title or "untitled")
|
||||
references.append(
|
||||
DocumentReference(
|
||||
entity_id=document.id,
|
||||
label=title,
|
||||
path=doc_to_virtual_path(
|
||||
doc_id=document.id,
|
||||
title=title,
|
||||
folder_id=document.folder_id,
|
||||
index=index,
|
||||
),
|
||||
)
|
||||
)
|
||||
return references
|
||||
|
||||
|
||||
__all__ = ["resolve_document_references"]
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
"""Resolve ``@folder`` ids into references for the pointer block."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.agents.chat.runtime.path_resolver import DOCUMENTS_ROOT, PathIndex
|
||||
from app.db import Folder
|
||||
|
||||
from .models import FolderReference
|
||||
|
||||
|
||||
def folder_pointer_path(folder_id: int, folder_paths: dict[int, str]) -> str:
|
||||
"""Trailing-slash virtual path so the model reads the pointer as a directory."""
|
||||
base = folder_paths.get(folder_id, DOCUMENTS_ROOT)
|
||||
return base if base.endswith("/") else f"{base}/"
|
||||
|
||||
|
||||
async def resolve_folder_references(
|
||||
session: AsyncSession,
|
||||
*,
|
||||
search_space_id: int,
|
||||
folder_ids: list[int],
|
||||
index: PathIndex,
|
||||
) -> list[FolderReference]:
|
||||
"""Map folder ids to references in input order; unknown ids are dropped."""
|
||||
if not folder_ids:
|
||||
return []
|
||||
|
||||
rows = await session.execute(
|
||||
select(Folder).where(
|
||||
Folder.search_space_id == search_space_id,
|
||||
Folder.id.in_(folder_ids),
|
||||
)
|
||||
)
|
||||
folders_by_id = {row.id: row for row in rows.scalars().all()}
|
||||
|
||||
references: list[FolderReference] = []
|
||||
for folder_id in dict.fromkeys(folder_ids):
|
||||
folder = folders_by_id.get(folder_id)
|
||||
if folder is None:
|
||||
continue
|
||||
references.append(
|
||||
FolderReference(
|
||||
entity_id=folder.id,
|
||||
label=str(folder.name or "untitled"),
|
||||
path=folder_pointer_path(folder.id, index.folder_paths),
|
||||
)
|
||||
)
|
||||
return references
|
||||
|
||||
|
||||
__all__ = ["folder_pointer_path", "resolve_folder_references"]
|
||||
|
|
@ -0,0 +1,73 @@
|
|||
"""Data shapes for resolved ``@``-references.
|
||||
|
||||
One type per kind so each carries exactly the fields it needs: documents and
|
||||
folders have a path, connectors have a provider, chats have neither. ``kind`` is
|
||||
a class-level discriminator used by the renderer and scope builder.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import ClassVar
|
||||
|
||||
|
||||
class ReferenceKind(str, Enum):
|
||||
"""What the user pointed at; the value is the label shown to the model."""
|
||||
|
||||
DOCUMENT = "document"
|
||||
FOLDER = "folder"
|
||||
CONNECTOR = "connector"
|
||||
CHAT = "chat"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class _Reference:
|
||||
"""Identity shared by every reference kind."""
|
||||
|
||||
entity_id: int
|
||||
label: str
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DocumentReference(_Reference):
|
||||
"""A referenced document, reachable by its virtual path."""
|
||||
|
||||
path: str
|
||||
kind: ClassVar[ReferenceKind] = ReferenceKind.DOCUMENT
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class FolderReference(_Reference):
|
||||
"""A referenced folder, reachable by its virtual path."""
|
||||
|
||||
path: str
|
||||
kind: ClassVar[ReferenceKind] = ReferenceKind.FOLDER
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ConnectorReference(_Reference):
|
||||
"""A referenced connector account; ``provider`` is its type label."""
|
||||
|
||||
provider: str | None = None
|
||||
kind: ClassVar[ReferenceKind] = ReferenceKind.CONNECTOR
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ChatReference(_Reference):
|
||||
"""A referenced chat thread; its turns are read on demand, not here."""
|
||||
|
||||
kind: ClassVar[ReferenceKind] = ReferenceKind.CHAT
|
||||
|
||||
|
||||
Reference = DocumentReference | FolderReference | ConnectorReference | ChatReference
|
||||
|
||||
|
||||
__all__ = [
|
||||
"ChatReference",
|
||||
"ConnectorReference",
|
||||
"DocumentReference",
|
||||
"FolderReference",
|
||||
"Reference",
|
||||
"ReferenceKind",
|
||||
]
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
"""Render resolved references into a ``<referenced_this_turn>`` pointer block.
|
||||
|
||||
Pointers, not content: each line names what the user referenced and how to
|
||||
reach it (a path, a connector handle, a title) so the model knows what to
|
||||
retrieve from. Actual content is pulled later via tools, never injected here.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from .models import (
|
||||
ChatReference,
|
||||
ConnectorReference,
|
||||
DocumentReference,
|
||||
FolderReference,
|
||||
Reference,
|
||||
)
|
||||
|
||||
_HEADER = (
|
||||
"The user pointed at these with @ this turn. They are scope, not content "
|
||||
"— when the question is about them, retrieve from them before answering."
|
||||
)
|
||||
|
||||
|
||||
def render_reference_pointers(references: list[Reference]) -> str | None:
|
||||
"""Render references as one read-only pointer block.
|
||||
|
||||
Returns ``None`` when there is nothing to render so callers can skip the
|
||||
block entirely.
|
||||
"""
|
||||
if not references:
|
||||
return None
|
||||
|
||||
lines = [_render_pointer(reference) for reference in references]
|
||||
return (
|
||||
"<referenced_this_turn>\n"
|
||||
f"{_HEADER}\n"
|
||||
+ "\n".join(lines)
|
||||
+ "\n</referenced_this_turn>"
|
||||
)
|
||||
|
||||
|
||||
def _render_pointer(reference: Reference) -> str:
|
||||
"""One ``- {kind} {id} — {handle}`` line, shaped per kind."""
|
||||
head = f"- {reference.kind.value} {reference.entity_id} — "
|
||||
return head + _handle(reference)
|
||||
|
||||
|
||||
def _handle(reference: Reference) -> str:
|
||||
"""The human-reachable handle: a path, a connector provider, or a title."""
|
||||
label = _clean(reference.label)
|
||||
match reference:
|
||||
case DocumentReference() | FolderReference():
|
||||
return f'"{label}" ({reference.path})'
|
||||
case ConnectorReference():
|
||||
provider = _clean(reference.provider) if reference.provider else ""
|
||||
return f"{provider} ({label})" if provider else label
|
||||
case ChatReference():
|
||||
return f'"{label}"'
|
||||
|
||||
|
||||
def _clean(text: str) -> str:
|
||||
"""Collapse whitespace so a title can't break the one-line pointer."""
|
||||
return " ".join(text.split())
|
||||
|
||||
|
||||
__all__ = ["render_reference_pointers"]
|
||||
|
|
@ -11,9 +11,9 @@ MUST live on this context object instead of being captured into a
|
|||
middleware ``__init__`` closure. Middlewares read fields back via
|
||||
``runtime.context.<field>``; tools read them via ``runtime.context``.
|
||||
|
||||
This object is read inside both ``KnowledgePriorityMiddleware`` (for
|
||||
``mentioned_document_ids``) and any future middleware that needs
|
||||
per-request state without invalidating the compiled-agent cache.
|
||||
This object is read by the ``search_knowledge_base`` tool (for
|
||||
``mentioned_document_ids``) and any middleware that needs per-request
|
||||
state without invalidating the compiled-agent cache.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -43,13 +43,12 @@ class SurfSenseContextSchema:
|
|||
Phase 1.5 fields:
|
||||
search_space_id: Search space the request is scoped to.
|
||||
mentioned_document_ids: KB documents the user @-mentioned this turn.
|
||||
Read by ``KnowledgePriorityMiddleware`` to seed its priority
|
||||
list. Stays out of the compiled-agent cache key — that's the
|
||||
whole point of putting it here.
|
||||
Read by the ``search_knowledge_base`` tool to pin these docs
|
||||
into the retrieval scope. Stays out of the compiled-agent cache
|
||||
key — that's the whole point of putting it here.
|
||||
mentioned_folder_ids: KB folders the user @-mentioned this turn
|
||||
(cloud filesystem mode). Surfaced as ``[USER-MENTIONED]``
|
||||
entries in ``<priority_documents>`` so the agent prioritises
|
||||
walking those folders with ``ls`` / ``find_documents``.
|
||||
(cloud filesystem mode). Pinned into the ``search_knowledge_base``
|
||||
retrieval scope so matches from those folders are prioritised.
|
||||
file_operation_contract: One-shot file operation contract for the
|
||||
upcoming turn (reserved; not currently populated).
|
||||
turn_id / request_id: Correlation IDs surfaced by the streaming
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ Extends ``SummarizationMiddleware`` with three SurfSense behaviors:
|
|||
|
||||
1. A structured summary template (:data:`SURFSENSE_SUMMARY_PROMPT`) instead of
|
||||
the base freeform prompt.
|
||||
2. Protected SystemMessages (injected hints like ``<priority_documents>``) are
|
||||
2. Protected SystemMessages (injected hints like ``<workspace_tree>``) are
|
||||
kept verbatim instead of being summarized away.
|
||||
3. ``content=None`` is sanitized before ``get_buffer_string`` (some providers
|
||||
stream tool-only AIMessages with ``None`` content, which would crash it).
|
||||
|
|
@ -77,7 +77,6 @@ Respond ONLY with the structured summary. Do not include any text before or afte
|
|||
# compaction step happens *before* re-injection in some paths, so we
|
||||
# must preserve them verbatim across the cutoff.
|
||||
PROTECTED_SYSTEM_PREFIXES: tuple[str, ...] = (
|
||||
"<priority_documents>", # KnowledgePriorityMiddleware
|
||||
"<workspace_tree>", # KnowledgeTreeMiddleware
|
||||
"<file_operation_contract>", # reserved file-operation contract prefix
|
||||
"<user_memory>", # MemoryInjectionMiddleware
|
||||
|
|
|
|||
|
|
@ -4,20 +4,40 @@ Web search tool for the SurfSense agent.
|
|||
Provides a unified tool for real-time web searches that dispatches to all
|
||||
configured search engines: the platform SearXNG instance (always available)
|
||||
plus any user-configured live-search connectors (Tavily, Linkup, Baidu).
|
||||
|
||||
Each result is registered into the conversation citation registry as a
|
||||
``WEB_RESULT`` and rendered with a server-assigned ``[n]`` label, so the model
|
||||
cites the web exactly like the knowledge base — one ``[n]`` spine, no special
|
||||
web citation form.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import time
|
||||
from typing import Any
|
||||
from __future__ import annotations
|
||||
|
||||
from langchain_core.tools import StructuredTool
|
||||
from pydantic import BaseModel, Field
|
||||
import asyncio
|
||||
import time
|
||||
from typing import TYPE_CHECKING, Annotated, Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from langchain.tools import ToolRuntime
|
||||
from langchain_core.messages import ToolMessage
|
||||
from langchain_core.tools import BaseTool, StructuredTool
|
||||
from langgraph.types import Command
|
||||
|
||||
from app.db import shielded_async_session
|
||||
from app.services.connector_service import ConnectorService
|
||||
from app.utils.perf import get_perf_logger
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.agents.chat.multi_agent_chat.shared.document_render import (
|
||||
RenderableDocument,
|
||||
)
|
||||
|
||||
# NOTE: imports from ``app.agents.chat.multi_agent_chat`` are done lazily inside
|
||||
# the functions below. This module lives under ``app.agents.chat.shared`` but is
|
||||
# imported during the ``multi_agent_chat`` package's own init cascade (via the
|
||||
# research subagent); importing that package at module load would re-enter a
|
||||
# partially-initialized module. Lazy imports break that cycle.
|
||||
|
||||
_LIVE_SEARCH_CONNECTORS: set[str] = {
|
||||
"TAVILY_API",
|
||||
"LINKUP_API",
|
||||
|
|
@ -37,28 +57,29 @@ _CONNECTOR_LABELS: dict[str, str] = {
|
|||
}
|
||||
|
||||
|
||||
class WebSearchInput(BaseModel):
|
||||
"""Input schema for the web_search tool."""
|
||||
|
||||
query: str = Field(
|
||||
description="The search query to look up on the web. Use specific, descriptive terms.",
|
||||
)
|
||||
top_k: int = Field(
|
||||
default=10,
|
||||
description="Number of results to retrieve (default: 10, max: 50).",
|
||||
)
|
||||
def _web_source_label(url: str) -> str:
|
||||
"""A compact, human-readable source for the ``<document source=…>`` attr."""
|
||||
domain = urlparse(url).netloc.removeprefix("www.") if url else ""
|
||||
return f"Web · {domain}" if domain else "Web"
|
||||
|
||||
|
||||
def _format_web_results(
|
||||
def _to_renderable_web_documents(
|
||||
documents: list[dict[str, Any]],
|
||||
*,
|
||||
max_chars: int = 50_000,
|
||||
) -> str:
|
||||
"""Format web search results into XML suitable for the LLM context."""
|
||||
if not documents:
|
||||
return "No web search results found."
|
||||
) -> list[RenderableDocument]:
|
||||
"""Map raw web results to renderable documents, one passage (the snippet) each.
|
||||
|
||||
parts: list[str] = []
|
||||
A result with no URL is skipped: ``url`` is the citation locator, so without
|
||||
it the result cannot be registered or resolved.
|
||||
"""
|
||||
from app.agents.chat.multi_agent_chat.shared.citations import CitationSourceType
|
||||
from app.agents.chat.multi_agent_chat.shared.document_render import (
|
||||
RenderableDocument,
|
||||
RenderablePassage,
|
||||
)
|
||||
|
||||
renderables: list[RenderableDocument] = []
|
||||
total_chars = 0
|
||||
|
||||
for doc in documents:
|
||||
|
|
@ -67,36 +88,28 @@ def _format_web_results(
|
|||
title = doc_info.get("title") or "Web Result"
|
||||
url = metadata.get("url") or ""
|
||||
content = (doc.get("content") or "").strip()
|
||||
source = metadata.get("document_type") or doc.get("source") or "WEB_SEARCH"
|
||||
if not content:
|
||||
if not content or not url:
|
||||
continue
|
||||
|
||||
metadata_json = json.dumps(metadata, ensure_ascii=False)
|
||||
doc_xml = "\n".join(
|
||||
[
|
||||
"<document>",
|
||||
"<document_metadata>",
|
||||
f" <document_type>{source}</document_type>",
|
||||
f" <title><![CDATA[{title}]]></title>",
|
||||
f" <url><![CDATA[{url}]]></url>",
|
||||
f" <metadata_json><![CDATA[{metadata_json}]]></metadata_json>",
|
||||
"</document_metadata>",
|
||||
"<document_content>",
|
||||
f" <chunk id='{url}'><![CDATA[{content}]]></chunk>",
|
||||
"</document_content>",
|
||||
"</document>",
|
||||
"",
|
||||
]
|
||||
)
|
||||
|
||||
if total_chars + len(doc_xml) > max_chars:
|
||||
parts.append("<!-- Output truncated to fit context window -->")
|
||||
total_chars += len(content)
|
||||
if total_chars > max_chars:
|
||||
break
|
||||
|
||||
parts.append(doc_xml)
|
||||
total_chars += len(doc_xml)
|
||||
renderables.append(
|
||||
RenderableDocument(
|
||||
title=title,
|
||||
source=_web_source_label(url),
|
||||
passages=[
|
||||
RenderablePassage(
|
||||
content=content,
|
||||
locator={"url": url},
|
||||
source_type=CitationSourceType.WEB_RESULT,
|
||||
)
|
||||
],
|
||||
)
|
||||
)
|
||||
|
||||
return "\n".join(parts).strip() or "No web search results found."
|
||||
return renderables
|
||||
|
||||
|
||||
async def _search_live_connector(
|
||||
|
|
@ -141,7 +154,7 @@ async def _search_live_connector(
|
|||
def create_web_search_tool(
|
||||
search_space_id: int | None = None,
|
||||
available_connectors: list[str] | None = None,
|
||||
) -> StructuredTool:
|
||||
) -> BaseTool:
|
||||
"""Factory for the ``web_search`` tool.
|
||||
|
||||
Dispatches in parallel to the platform SearXNG instance and any
|
||||
|
|
@ -168,7 +181,17 @@ def create_web_search_tool(
|
|||
_search_space_id = search_space_id
|
||||
_active_live = active_live_connectors
|
||||
|
||||
async def _web_search_impl(query: str, top_k: int = 10) -> str:
|
||||
async def _web_search_impl(
|
||||
query: Annotated[
|
||||
str,
|
||||
"The search query to look up on the web. Use specific, descriptive terms.",
|
||||
],
|
||||
runtime: ToolRuntime,
|
||||
top_k: Annotated[
|
||||
int,
|
||||
"Number of results to retrieve (default: 10, max: 50).",
|
||||
] = 10,
|
||||
) -> Command | str:
|
||||
from app.services import web_search_service
|
||||
|
||||
perf = get_perf_logger()
|
||||
|
|
@ -226,22 +249,39 @@ def create_web_search_tool(
|
|||
seen_urls.add(url)
|
||||
deduplicated.append(doc)
|
||||
|
||||
formatted = _format_web_results(deduplicated)
|
||||
from app.agents.chat.multi_agent_chat.shared.citations import load_registry
|
||||
from app.agents.chat.multi_agent_chat.shared.document_render import (
|
||||
render_web_results,
|
||||
)
|
||||
|
||||
registry = load_registry(getattr(runtime, "state", None))
|
||||
renderables = _to_renderable_web_documents(deduplicated)
|
||||
rendered = render_web_results(renderables, registry)
|
||||
|
||||
perf.info(
|
||||
"[web_search] query=%r engines=%d results=%d deduped=%d chars=%d in %.3fs",
|
||||
"[web_search] query=%r engines=%d results=%d deduped=%d renderable=%d in %.3fs",
|
||||
query[:60],
|
||||
len(tasks),
|
||||
len(all_documents),
|
||||
len(deduplicated),
|
||||
len(formatted),
|
||||
len(renderables),
|
||||
time.perf_counter() - t0,
|
||||
)
|
||||
return formatted
|
||||
|
||||
return StructuredTool(
|
||||
if rendered is None:
|
||||
return "No web search results found."
|
||||
|
||||
return Command(
|
||||
update={
|
||||
"messages": [
|
||||
ToolMessage(content=rendered, tool_call_id=runtime.tool_call_id)
|
||||
],
|
||||
"citation_registry": registry,
|
||||
}
|
||||
)
|
||||
|
||||
return StructuredTool.from_function(
|
||||
name="web_search",
|
||||
description=description,
|
||||
coroutine=_web_search_impl,
|
||||
args_schema=WebSearchInput,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ async def _resolve_mention_context(
|
|||
Automation always runs in cloud filesystem mode, so we mirror the chat
|
||||
``new_chat`` flow: substitute ``@title`` tokens with canonical
|
||||
``/documents/...`` paths, prepend a ``<mentioned_connectors>`` block, and
|
||||
build a ``SurfSenseContextSchema`` that ``KnowledgePriorityMiddleware``
|
||||
build a ``SurfSenseContextSchema`` that the ``search_knowledge_base`` tool
|
||||
reads via ``runtime.context``. Returns ``(query, None)`` unchanged when
|
||||
there are no mentions.
|
||||
"""
|
||||
|
|
@ -210,7 +210,7 @@ async def run_agent_task(
|
|||
runtime_context.turn_id = turn_id
|
||||
|
||||
# The compiled graph declares ``context_schema=SurfSenseContextSchema``;
|
||||
# mentions only reach ``KnowledgePriorityMiddleware`` via ``context=``.
|
||||
# mentions only reach the ``search_knowledge_base`` tool via ``context=``.
|
||||
invoke_kwargs: dict[str, Any] = {"config": config}
|
||||
if runtime_context is not None:
|
||||
invoke_kwargs["context"] = runtime_context
|
||||
|
|
|
|||
|
|
@ -1,135 +0,0 @@
|
|||
"""
|
||||
Thin compatibility wrapper around :mod:`app.prompts.system_prompt_composer.composer`.
|
||||
|
||||
The composer split the previous monolithic prompt string into a fragment
|
||||
tree under ``prompts/`` plus a model-family dispatch step (see the
|
||||
composer module docstring for credits). This module preserves the public
|
||||
function surface (``build_surfsense_system_prompt`` /
|
||||
``build_configurable_system_prompt`` /
|
||||
``get_default_system_instructions`` / ``SURFSENSE_SYSTEM_PROMPT``) so
|
||||
that existing call sites — the multi-agent chat factory, anonymous chat
|
||||
routes, and the configurable-prompt admin path — keep working without churn.
|
||||
|
||||
For new call sites prefer importing ``compose_system_prompt`` directly
|
||||
from :mod:`app.prompts.system_prompt_composer.composer`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from app.db import ChatVisibility
|
||||
|
||||
from .system_prompt_composer.composer import (
|
||||
_read_fragment,
|
||||
compose_system_prompt,
|
||||
detect_provider_variant,
|
||||
)
|
||||
|
||||
# Optional routing fragments under ``prompts/routing/`` (see composer).
|
||||
_DEFAULT_CONNECTOR_ROUTING: tuple[str, ...] = ("linear", "slack")
|
||||
|
||||
# Public re-exports for backwards compatibility (some legacy code reads the
|
||||
# raw default-instructions text directly).
|
||||
SURFSENSE_SYSTEM_INSTRUCTIONS_TEMPLATE = (
|
||||
"<system_instruction>\nDefault SurfSense agent system instructions are now\n"
|
||||
"composed from prompts/base/*.md. See compose_system_prompt() for details.\n"
|
||||
"</system_instruction>"
|
||||
)
|
||||
|
||||
# Citation block re-exposed for legacy importers that referenced this constant
|
||||
# directly. The composer is the canonical source; this is a frozen snapshot
|
||||
# loaded at module-init time.
|
||||
SURFSENSE_CITATION_INSTRUCTIONS = _read_fragment("base/citations_on.md")
|
||||
SURFSENSE_NO_CITATION_INSTRUCTIONS = _read_fragment("base/citations_off.md")
|
||||
|
||||
|
||||
def build_surfsense_system_prompt(
|
||||
today: datetime | None = None,
|
||||
thread_visibility: ChatVisibility | None = None,
|
||||
enabled_tool_names: set[str] | None = None,
|
||||
disabled_tool_names: set[str] | None = None,
|
||||
mcp_connector_tools: dict[str, list[str]] | None = None,
|
||||
*,
|
||||
model_name: str | None = None,
|
||||
) -> str:
|
||||
"""Build the default SurfSense system prompt (citations on, defaults).
|
||||
|
||||
See :func:`app.prompts.system_prompt_composer.composer.compose_system_prompt`
|
||||
for full parameter docs.
|
||||
"""
|
||||
return compose_system_prompt(
|
||||
today=today,
|
||||
thread_visibility=thread_visibility,
|
||||
enabled_tool_names=enabled_tool_names,
|
||||
disabled_tool_names=disabled_tool_names,
|
||||
mcp_connector_tools=mcp_connector_tools,
|
||||
citations_enabled=True,
|
||||
model_name=model_name,
|
||||
connector_routing=_DEFAULT_CONNECTOR_ROUTING,
|
||||
)
|
||||
|
||||
|
||||
def build_configurable_system_prompt(
|
||||
custom_system_instructions: str | None = None,
|
||||
use_default_system_instructions: bool = True,
|
||||
citations_enabled: bool = True,
|
||||
today: datetime | None = None,
|
||||
thread_visibility: ChatVisibility | None = None,
|
||||
enabled_tool_names: set[str] | None = None,
|
||||
disabled_tool_names: set[str] | None = None,
|
||||
mcp_connector_tools: dict[str, list[str]] | None = None,
|
||||
*,
|
||||
model_name: str | None = None,
|
||||
) -> str:
|
||||
"""Build a configurable SurfSense system prompt.
|
||||
|
||||
See :func:`app.prompts.system_prompt_composer.composer.compose_system_prompt`
|
||||
for full parameter docs.
|
||||
"""
|
||||
return compose_system_prompt(
|
||||
today=today,
|
||||
thread_visibility=thread_visibility,
|
||||
enabled_tool_names=enabled_tool_names,
|
||||
disabled_tool_names=disabled_tool_names,
|
||||
mcp_connector_tools=mcp_connector_tools,
|
||||
custom_system_instructions=custom_system_instructions,
|
||||
use_default_system_instructions=use_default_system_instructions,
|
||||
citations_enabled=citations_enabled,
|
||||
model_name=model_name,
|
||||
connector_routing=_DEFAULT_CONNECTOR_ROUTING,
|
||||
)
|
||||
|
||||
|
||||
def get_default_system_instructions() -> str:
|
||||
"""Return the default ``<system_instruction>`` block (no tools / citations).
|
||||
|
||||
Useful for populating the UI when editing custom system instructions.
|
||||
The output reflects the current fragment tree, not a baked-in constant.
|
||||
"""
|
||||
resolved_today = datetime.now(UTC).date().isoformat()
|
||||
from .system_prompt_composer.composer import (
|
||||
_build_system_instructions, # local import
|
||||
)
|
||||
|
||||
return _build_system_instructions(
|
||||
visibility=ChatVisibility.PRIVATE,
|
||||
resolved_today=resolved_today,
|
||||
).strip()
|
||||
|
||||
|
||||
# Backwards compatibility — some modules import the constant directly.
|
||||
SURFSENSE_SYSTEM_PROMPT = build_surfsense_system_prompt()
|
||||
|
||||
|
||||
__all__ = [
|
||||
"SURFSENSE_CITATION_INSTRUCTIONS",
|
||||
"SURFSENSE_NO_CITATION_INSTRUCTIONS",
|
||||
"SURFSENSE_SYSTEM_INSTRUCTIONS_TEMPLATE",
|
||||
"SURFSENSE_SYSTEM_PROMPT",
|
||||
"build_configurable_system_prompt",
|
||||
"build_surfsense_system_prompt",
|
||||
"compose_system_prompt",
|
||||
"detect_provider_variant",
|
||||
"get_default_system_instructions",
|
||||
]
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
"""SurfSense agent prompt fragments.
|
||||
|
||||
The prompt is composed at runtime by :mod:`composer` from the markdown
|
||||
fragments under ``base/``, ``providers/``, ``tools/``, ``examples/``, and
|
||||
``routing/``. ``system_prompt.py`` is now a thin wrapper that delegates
|
||||
to :func:`composer.compose_system_prompt`.
|
||||
"""
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
You are SurfSense, a reasoning and acting AI agent designed to answer user questions using the user's personal knowledge base.
|
||||
|
||||
Today's date (UTC): {resolved_today}
|
||||
|
||||
When writing mathematical formulas or equations, ALWAYS use LaTeX notation. NEVER use backtick code spans or Unicode symbols for math.
|
||||
|
||||
NEVER expose internal tool parameter names, backend IDs, or implementation details to the user. Always use natural, user-friendly language instead.
|
||||
|
|
@ -1,9 +0,0 @@
|
|||
You are SurfSense, a reasoning and acting AI agent designed to answer questions in this team space using the team's shared knowledge base.
|
||||
|
||||
In this team thread, each message is prefixed with **[DisplayName of the author]**. Use this to attribute and reference the author of anything in the discussion (who asked a question, made a suggestion, or contributed an idea) and to cite who said what in your answers.
|
||||
|
||||
Today's date (UTC): {resolved_today}
|
||||
|
||||
When writing mathematical formulas or equations, ALWAYS use LaTeX notation. NEVER use backtick code spans or Unicode symbols for math.
|
||||
|
||||
NEVER expose internal tool parameter names, backend IDs, or implementation details to the user. Always use natural, user-friendly language instead.
|
||||
|
|
@ -1,16 +0,0 @@
|
|||
<citation_instructions>
|
||||
IMPORTANT: Citations are DISABLED for this configuration.
|
||||
|
||||
DO NOT include any citations in your responses. Specifically:
|
||||
1. Do NOT use the [citation:chunk_id] format anywhere in your response.
|
||||
2. Do NOT reference document IDs, chunk IDs, or source IDs.
|
||||
3. Simply provide the information naturally without any citation markers.
|
||||
4. Write your response as if you're having a normal conversation, incorporating the information from your knowledge seamlessly.
|
||||
|
||||
When answering questions based on documents from the knowledge base:
|
||||
- Present the information directly and confidently
|
||||
- Do not mention that information comes from specific documents or chunks
|
||||
- Integrate facts naturally into your response without attribution markers
|
||||
|
||||
Your goal is to provide helpful, informative answers in a clean, readable format without any citation notation.
|
||||
</citation_instructions>
|
||||
|
|
@ -1,89 +0,0 @@
|
|||
<citation_instructions>
|
||||
CRITICAL CITATION REQUIREMENTS:
|
||||
|
||||
1. For EVERY piece of information you include from the documents, add a citation in the format [citation:chunk_id] where chunk_id is the exact value from the `<chunk id='...'>` tag inside `<document_content>`.
|
||||
2. Make sure ALL factual statements from the documents have proper citations.
|
||||
3. If multiple chunks support the same point, include all relevant citations [citation:chunk_id1], [citation:chunk_id2].
|
||||
4. You MUST use the exact chunk_id values from the `<chunk id='...'>` attributes. Do not create your own citation numbers.
|
||||
5. Every citation MUST be in the format [citation:chunk_id] where chunk_id is the exact chunk id value.
|
||||
6. Never modify or change the chunk_id - always use the original values exactly as provided in the chunk tags.
|
||||
7. Do not return citations as clickable links.
|
||||
8. Never format citations as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only.
|
||||
9. Citations must ONLY appear as [citation:chunk_id] or [citation:chunk_id1], [citation:chunk_id2] format - never with parentheses, hyperlinks, or other formatting.
|
||||
10. Never make up chunk IDs. Only use chunk_id values that are explicitly provided in the `<chunk id='...'>` tags.
|
||||
11. If you are unsure about a chunk_id, do not include a citation rather than guessing or making one up.
|
||||
|
||||
<document_structure_example>
|
||||
The documents you receive are structured like this:
|
||||
|
||||
**Knowledge base documents (numeric chunk IDs):**
|
||||
<document>
|
||||
<document_metadata>
|
||||
<document_id>42</document_id>
|
||||
<document_type>GITHUB_CONNECTOR</document_type>
|
||||
<title><![CDATA[Some repo / file / issue title]]></title>
|
||||
<url><![CDATA[https://example.com]]></url>
|
||||
<metadata_json><![CDATA[{{"any":"other metadata"}}]]></metadata_json>
|
||||
</document_metadata>
|
||||
|
||||
<document_content>
|
||||
<chunk id='123'><![CDATA[First chunk text...]]></chunk>
|
||||
<chunk id='124'><![CDATA[Second chunk text...]]></chunk>
|
||||
</document_content>
|
||||
</document>
|
||||
|
||||
**Web search results (URL chunk IDs):**
|
||||
<document>
|
||||
<document_metadata>
|
||||
<document_type>WEB_SEARCH</document_type>
|
||||
<title><![CDATA[Some web search result]]></title>
|
||||
<url><![CDATA[https://example.com/article]]></url>
|
||||
</document_metadata>
|
||||
|
||||
<document_content>
|
||||
<chunk id='https://example.com/article'><![CDATA[Content from web search...]]></chunk>
|
||||
</document_content>
|
||||
</document>
|
||||
|
||||
IMPORTANT: You MUST cite using the EXACT chunk ids from the `<chunk id='...'>` tags.
|
||||
- For knowledge base documents, chunk ids are numeric (e.g. 123, 124) or prefixed (e.g. doc-45).
|
||||
- For live web search results, chunk ids are URLs (e.g. https://example.com/article).
|
||||
Do NOT cite document_id. Always use the chunk id.
|
||||
</document_structure_example>
|
||||
|
||||
<citation_format>
|
||||
- Every fact from the documents must have a citation in the format [citation:chunk_id] where chunk_id is the EXACT id value from a `<chunk id='...'>` tag
|
||||
- Citations should appear at the end of the sentence containing the information they support
|
||||
- Multiple citations should be separated by commas: [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3]
|
||||
- No need to return references section. Just citations in answer.
|
||||
- NEVER create your own citation format - use the exact chunk_id values from the documents in the [citation:chunk_id] format
|
||||
- NEVER format citations as clickable links or as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only
|
||||
- NEVER make up chunk IDs if you are unsure about the chunk_id. It is better to omit the citation than to guess
|
||||
- Copy the EXACT chunk id from the XML - if it says `<chunk id='5'>`, use [citation:5]
|
||||
- If the chunk id is a URL like `<chunk id='https://example.com/page'>`, use [citation:https://example.com/page]
|
||||
</citation_format>
|
||||
|
||||
<citation_examples>
|
||||
CORRECT citation formats:
|
||||
- [citation:5] (numeric chunk ID from knowledge base)
|
||||
- [citation:https://example.com/article] (URL chunk ID from web search results)
|
||||
- [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3] (multiple citations)
|
||||
|
||||
INCORRECT citation formats (DO NOT use):
|
||||
- Using parentheses and markdown links: ([citation:5](https://github.com/MODSetter/SurfSense))
|
||||
- Using parentheses around brackets: ([citation:5])
|
||||
- Using hyperlinked text: [link to source 5](https://example.com)
|
||||
- Using footnote style: ... library¹
|
||||
- Making up source IDs when source_id is unknown
|
||||
- Using old IEEE format: [1], [2], [3]
|
||||
- Using source types instead of IDs: [citation:GITHUB_CONNECTOR] instead of [citation:5]
|
||||
</citation_examples>
|
||||
|
||||
<citation_output_example>
|
||||
Based on your GitHub repositories and video content, Python's asyncio library provides tools for writing concurrent code using the async/await syntax [citation:5]. It's particularly useful for I/O-bound and high-level structured network code [citation:5].
|
||||
|
||||
According to web search results, the key advantage of asyncio is that it can improve performance by allowing other code to run while waiting for I/O operations to complete [citation:https://docs.python.org/3/library/asyncio.html]. This makes it excellent for scenarios like web scraping, API calls, database operations, or any situation where your program spends time waiting for external resources.
|
||||
|
||||
However, from your video learning, it's important to note that asyncio is not suitable for CPU-bound tasks as it runs on a single thread [citation:12]. For computationally intensive work, you'd want to use multiprocessing instead.
|
||||
</citation_output_example>
|
||||
</citation_instructions>
|
||||
|
|
@ -1,15 +0,0 @@
|
|||
<knowledge_base_only_policy>
|
||||
CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE:
|
||||
- You MUST answer questions ONLY using information retrieved from the user's knowledge base, web search results, scraped webpages, or other tool outputs.
|
||||
- You MUST NOT answer factual or informational questions from your own training data or general knowledge unless the user explicitly grants permission.
|
||||
- If the knowledge base search returns no relevant results AND no other tool provides the answer, you MUST:
|
||||
1. Inform the user that you could not find relevant information in their knowledge base.
|
||||
2. Ask the user: "Would you like me to answer from my general knowledge instead?"
|
||||
3. ONLY provide a general-knowledge answer AFTER the user explicitly says yes.
|
||||
- This policy does NOT apply to:
|
||||
* Casual conversation, greetings, or meta-questions about SurfSense itself (e.g., "what can you do?"). For "how do I use SurfSense" / product-documentation questions, point the user to https://www.surfsense.com/docs.
|
||||
* Formatting, summarization, or analysis of content already present in the conversation
|
||||
* Following user instructions that are clearly task-oriented (e.g., "rewrite this in bullet points")
|
||||
* Tool-usage actions like generating reports, podcasts, images, or scraping webpages
|
||||
* Queries about services that have direct tools (Linear, ClickUp, Jira, Slack, Airtable) — see <tool_routing> below
|
||||
</knowledge_base_only_policy>
|
||||
|
|
@ -1,15 +0,0 @@
|
|||
<knowledge_base_only_policy>
|
||||
CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE:
|
||||
- You MUST answer questions ONLY using information retrieved from the team's shared knowledge base, web search results, scraped webpages, or other tool outputs.
|
||||
- You MUST NOT answer factual or informational questions from your own training data or general knowledge unless a team member explicitly grants permission.
|
||||
- If the knowledge base search returns no relevant results AND no other tool provides the answer, you MUST:
|
||||
1. Inform the team that you could not find relevant information in the shared knowledge base.
|
||||
2. Ask: "Would you like me to answer from my general knowledge instead?"
|
||||
3. ONLY provide a general-knowledge answer AFTER a team member explicitly says yes.
|
||||
- This policy does NOT apply to:
|
||||
* Casual conversation, greetings, or meta-questions about SurfSense itself (e.g., "what can you do?"). For "how do I use SurfSense" / product-documentation questions, point the user to https://www.surfsense.com/docs.
|
||||
* Formatting, summarization, or analysis of content already present in the conversation
|
||||
* Following user instructions that are clearly task-oriented (e.g., "rewrite this in bullet points")
|
||||
* Tool-usage actions like generating reports, podcasts, images, or scraping webpages
|
||||
* Queries about services that have direct tools (Linear, ClickUp, Jira, Slack, Airtable) — see <tool_routing> below
|
||||
</knowledge_base_only_policy>
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
<memory_protocol>
|
||||
IMPORTANT — After understanding each user message, ALWAYS check: does this message
|
||||
reveal durable facts about the user (role, interests, preferences, projects,
|
||||
background, or standing instructions)? If yes, you MUST call update_memory
|
||||
alongside your normal response — do not defer this to a later turn.
|
||||
|
||||
Memory is stored as a heading-based markdown document. New entries should be
|
||||
under `##` headings such as `## Facts`, `## Preferences`, or `## Instructions`
|
||||
with bullets like `- YYYY-MM-DD: text`. If existing memory contains legacy
|
||||
`(YYYY-MM-DD) [fact|pref|instr]` markers, preserve the information but write
|
||||
new saves in the heading-based format.
|
||||
</memory_protocol>
|
||||
|
|
@ -1,14 +0,0 @@
|
|||
<memory_protocol>
|
||||
IMPORTANT — After understanding each user message, ALWAYS check: does this message
|
||||
reveal durable facts about the team (decisions, conventions, architecture, processes,
|
||||
or key facts)? If yes, you MUST call update_memory alongside your normal response —
|
||||
do not defer this to a later turn.
|
||||
|
||||
Team memory is stored as a heading-based markdown document. New entries should
|
||||
be under `##` headings such as `## Product Decisions`,
|
||||
`## Engineering Conventions`, `## Project Facts`, or `## Open Questions` with
|
||||
bullets like `- YYYY-MM-DD: text`. If existing memory contains legacy
|
||||
`(YYYY-MM-DD) [fact]` markers, preserve the information but write new saves in
|
||||
the heading-based format. Do not create personal headings such as
|
||||
`## Preferences` or `## Instructions`.
|
||||
</memory_protocol>
|
||||
|
|
@ -1,39 +0,0 @@
|
|||
<parameter_resolution>
|
||||
Some service tools require identifiers or context you do not have (account IDs,
|
||||
workspace names, channel IDs, project keys, etc.). NEVER ask the user for raw
|
||||
IDs or technical identifiers — they cannot memorise them.
|
||||
|
||||
Instead, follow this discovery pattern:
|
||||
1. Call a listing/discovery tool to find available options.
|
||||
2. ONE result → use it silently, no question to the user.
|
||||
3. MULTIPLE results → present the options by their display names and let the
|
||||
user choose. Never show raw UUIDs — always use friendly names.
|
||||
|
||||
Discovery tools by level:
|
||||
- Which account/workspace? → get_connected_accounts("<service>")
|
||||
- Which Jira site (cloudId)? → getAccessibleAtlassianResources
|
||||
- Which Jira project? → getVisibleJiraProjects (after resolving cloudId)
|
||||
- Which Jira issue type? → getJiraProjectIssueTypesMetadata (after resolving project)
|
||||
- Which channel? → slack_search_channels
|
||||
- Which base? → list_bases
|
||||
- Which table? → list_tables_for_base (after resolving baseId)
|
||||
- Which task? → clickup_search
|
||||
- Which issue? → list_issues (Linear) or searchJiraIssuesUsingJql (Jira)
|
||||
|
||||
For Jira specifically: ALWAYS call getAccessibleAtlassianResources first to
|
||||
obtain the cloudId, then pass it to other Jira tools. When creating an issue,
|
||||
chain: getAccessibleAtlassianResources → getVisibleJiraProjects → createJiraIssue.
|
||||
If there is only one option at each step, use it silently. If multiple, present
|
||||
friendly names.
|
||||
|
||||
Chain discovery when needed — e.g. for Airtable records: list_bases → pick
|
||||
base → list_tables_for_base → pick table → list_records_for_table.
|
||||
|
||||
MULTI-ACCOUNT TOOL NAMING: When the user has multiple accounts connected for
|
||||
the same service, tool names are prefixed to avoid collisions — e.g.
|
||||
linear_25_list_issues and linear_30_list_issues instead of two list_issues.
|
||||
Each prefixed tool's description starts with [Account: <display_name>] so you
|
||||
know which account it targets. Use get_connected_accounts("<service>") to see
|
||||
the full list of accounts with their connector IDs and display names.
|
||||
When only one account is connected, tools have their normal unprefixed names.
|
||||
</parameter_resolution>
|
||||
|
|
@ -1,24 +0,0 @@
|
|||
<tool_routing>
|
||||
CRITICAL — You have direct tools for these services: Linear, ClickUp, Jira, Slack, Airtable.
|
||||
Their data is NEVER in the knowledge base. You MUST call their tools immediately — never
|
||||
say "I don't see it in the knowledge base" or ask the user if they want you to check.
|
||||
Ignore any knowledge base results for these services.
|
||||
|
||||
When to use which tool:
|
||||
- Linear (issues, teams, users, projects when MCP exposes them) → hosted Linear MCP read tools (e.g. `list_issues`, `get_issue`, `list_teams`, `list_users`, …) and `save_issue` for create/update; native SurfSense Linear issue tools when present. For **multi-step Linear-only** work (several reads, structured evidence), delegate with the `task` tool to subagent **`linear_specialist`** instead of mixing unrelated tools.
|
||||
- ClickUp (tasks) → clickup_search, clickup_get_task
|
||||
- Jira (issues) → getAccessibleAtlassianResources (cloudId discovery), getVisibleJiraProjects (project discovery), getJiraProjectIssueTypesMetadata (issue type discovery), searchJiraIssuesUsingJql, createJiraIssue, editJiraIssue
|
||||
- Slack (messages, channels) → `slack_search_channels`, `slack_read_channel`, `slack_read_thread`, and other `slack_*` tools when connected. For **multi-step Slack-only** work, delegate with `task` to **`slack_specialist`**.
|
||||
- Airtable (bases, tables, records) → list_bases, list_tables_for_base, list_records_for_table
|
||||
- Knowledge base content (Notion, GitHub, files, notes) → automatically searched
|
||||
- Real-time public web data → call web_search
|
||||
- Reading a specific webpage → call scrape_webpage
|
||||
- SurfSense product / how-to questions (setup, configuration, connectors, feature behavior) → point the user to the documentation: https://www.surfsense.com/docs
|
||||
|
||||
**`task` subagents (when to delegate):**
|
||||
- **`linear_specialist`** — Linear-only investigations and tool use.
|
||||
- **`slack_specialist`** — Slack-only investigations and tool use.
|
||||
- **`connector_negotiator`** — **Cross-connector** chains (e.g. data from Slack then action in Linear).
|
||||
- **`explore`** — Read-only KB + web research with citations.
|
||||
- **`report_writer`** — Single `generate_report` deliverable.
|
||||
</tool_routing>
|
||||
|
|
@ -1,24 +0,0 @@
|
|||
<tool_routing>
|
||||
CRITICAL — You have direct tools for these services: Linear, ClickUp, Jira, Slack, Airtable.
|
||||
Their data is NEVER in the knowledge base. You MUST call their tools immediately — never
|
||||
say "I don't see it in the knowledge base" or ask if they want you to check.
|
||||
Ignore any knowledge base results for these services.
|
||||
|
||||
When to use which tool:
|
||||
- Linear (issues, teams, users, projects when MCP exposes them) → hosted Linear MCP read tools (e.g. `list_issues`, `get_issue`, `list_teams`, `list_users`, …) and `save_issue` for create/update; native SurfSense Linear issue tools when present. For **multi-step Linear-only** work (several reads, structured evidence), delegate with the `task` tool to subagent **`linear_specialist`** instead of mixing unrelated tools.
|
||||
- ClickUp (tasks) → clickup_search, clickup_get_task
|
||||
- Jira (issues) → getAccessibleAtlassianResources (cloudId discovery), getVisibleJiraProjects (project discovery), getJiraProjectIssueTypesMetadata (issue type discovery), searchJiraIssuesUsingJql, createJiraIssue, editJiraIssue
|
||||
- Slack (messages, channels) → `slack_search_channels`, `slack_read_channel`, `slack_read_thread`, and other `slack_*` tools when connected. For **multi-step Slack-only** work, delegate with `task` to **`slack_specialist`**.
|
||||
- Airtable (bases, tables, records) → list_bases, list_tables_for_base, list_records_for_table
|
||||
- Knowledge base content (Notion, GitHub, files, notes) → automatically searched
|
||||
- Real-time public web data → call web_search
|
||||
- Reading a specific webpage → call scrape_webpage
|
||||
- SurfSense product / how-to questions (setup, configuration, connectors, feature behavior) → point the user to the documentation: https://www.surfsense.com/docs
|
||||
|
||||
**`task` subagents (when to delegate):**
|
||||
- **`linear_specialist`** — Linear-only investigations and tool use.
|
||||
- **`slack_specialist`** — Slack-only investigations and tool use.
|
||||
- **`connector_negotiator`** — **Cross-connector** chains (e.g. data from Slack then action in Linear).
|
||||
- **`explore`** — Read-only KB + web research with citations.
|
||||
- **`report_writer`** — Single `generate_report` deliverable.
|
||||
</tool_routing>
|
||||
|
|
@ -1,403 +0,0 @@
|
|||
"""
|
||||
Prompt composer for the SurfSense ``new_chat`` agent.
|
||||
|
||||
This module assembles the agent's system prompt from the markdown fragments
|
||||
under :mod:`app.prompts.system_prompt_composer`. It replaces the monolithic
|
||||
``system_prompt.py`` with a clean, fragment-based composition:
|
||||
|
||||
::
|
||||
|
||||
prompts/
|
||||
base/ # agent identity, KB policy, tool routing, …
|
||||
providers/ # provider-specific tweaks (anthropic, gpt5, …)
|
||||
tools/ # one ``<name>.md`` per tool
|
||||
examples/ # one ``<name>.md`` per tool with call examples
|
||||
routing/ # connector-specific routing notes (linear, slack, …)
|
||||
|
||||
The model-family dispatch step (see :func:`detect_provider_variant`)
|
||||
mirrors OpenCode's ``packages/opencode/src/session/system.ts`` — different
|
||||
model families respond best to differently-styled prompts (Claude likes
|
||||
XML/narrative, GPT-5 wants channel-aware pragmatic, Codex needs
|
||||
terse/file:line, Gemini wants formal numbered steps, etc.). LangChain's
|
||||
``dynamic_prompt`` helper supports per-call prompt swaps but ships no
|
||||
out-of-the-box family classifier, so we keep our own.
|
||||
|
||||
Backwards compatibility
|
||||
=======================
|
||||
|
||||
``system_prompt.py`` re-exports :func:`compose_system_prompt` and wraps it
|
||||
in functions with the same signatures as the legacy
|
||||
``build_surfsense_system_prompt`` / ``build_configurable_system_prompt`` so
|
||||
existing call sites do not change.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from collections.abc import Iterable
|
||||
from datetime import UTC, datetime
|
||||
from importlib import resources
|
||||
|
||||
from app.db import ChatVisibility
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Provider variant detection
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# String literal alias for the supported provider-specific prompt variants.
|
||||
# When adding a new variant, also drop a matching ``providers/<variant>.md``
|
||||
# file in this package and (if appropriate) extend the regex matchers below.
|
||||
#
|
||||
# Stylistic clusters: each variant is a focused style nudge, NOT a full
|
||||
# system prompt — the main prompt is already assembled from base/ +
|
||||
# tools/ + routing/. The clustering itself (which models map to which
|
||||
# style) follows OpenCode's ``system.ts`` family table; see the module
|
||||
# docstring for credits.
|
||||
ProviderVariant = str
|
||||
# Known values:
|
||||
# "anthropic" — Claude family (XML-friendly, narrative todos)
|
||||
# "openai_reasoning" — GPT-5 / o-series (channel-aware pragmatic)
|
||||
# "openai_classic" — GPT-4 family (autonomous persistence)
|
||||
# "openai_codex" — gpt-*-codex (code-purist, terse, file:line refs)
|
||||
# "google" — Gemini (formal, <3-line, numbered workflow)
|
||||
# "kimi" — Moonshot Kimi-K* (action-bias, parallel tools)
|
||||
# "grok" — xAI Grok (extreme-terse, one-word ok)
|
||||
# "deepseek" — DeepSeek V3 / R1 (terse, R1-aware reasoning)
|
||||
# "default" — fallback, no provider-specific block emitted
|
||||
|
||||
# IMPORTANT: order of evaluation matters in :func:`detect_provider_variant`.
|
||||
# More specific patterns must come first (e.g. ``codex`` before
|
||||
# ``openai_reasoning`` because codex model ids contain ``gpt``).
|
||||
|
||||
_OPENAI_CODEX_RE = re.compile(
|
||||
r"\b(gpt-codex|codex-mini|gpt-[\d.]+-codex)\b", re.IGNORECASE
|
||||
)
|
||||
_OPENAI_REASONING_RE = re.compile(r"\b(gpt-5|o\d|o-)", re.IGNORECASE)
|
||||
_OPENAI_CLASSIC_RE = re.compile(r"\bgpt-4", re.IGNORECASE)
|
||||
_ANTHROPIC_RE = re.compile(r"\bclaude\b", re.IGNORECASE)
|
||||
_GOOGLE_RE = re.compile(r"\bgemini\b", re.IGNORECASE)
|
||||
_KIMI_RE = re.compile(r"\b(kimi[-\d.]*|moonshot)\b", re.IGNORECASE)
|
||||
_GROK_RE = re.compile(r"\bgrok\b", re.IGNORECASE)
|
||||
_DEEPSEEK_RE = re.compile(r"\bdeepseek\b", re.IGNORECASE)
|
||||
|
||||
|
||||
def detect_provider_variant(model_name: str | None) -> ProviderVariant:
|
||||
"""Pick a provider-specific prompt variant from a model id string.
|
||||
|
||||
Heuristic match on the model id; returns ``"default"`` when nothing
|
||||
matches so the composer can fall back to the empty placeholder file.
|
||||
|
||||
Order is significant: more-specific patterns are tried first so
|
||||
``gpt-5-codex`` routes to ``"openai_codex"`` rather than
|
||||
``"openai_reasoning"`` — same dispatch order as OpenCode's
|
||||
``packages/opencode/src/session/system.ts``.
|
||||
"""
|
||||
if not model_name:
|
||||
return "default"
|
||||
name = model_name.strip()
|
||||
if _OPENAI_CODEX_RE.search(name):
|
||||
return "openai_codex"
|
||||
if _OPENAI_REASONING_RE.search(name):
|
||||
return "openai_reasoning"
|
||||
if _OPENAI_CLASSIC_RE.search(name):
|
||||
return "openai_classic"
|
||||
if _ANTHROPIC_RE.search(name):
|
||||
return "anthropic"
|
||||
if _GOOGLE_RE.search(name):
|
||||
return "google"
|
||||
if _KIMI_RE.search(name):
|
||||
return "kimi"
|
||||
if _GROK_RE.search(name):
|
||||
return "grok"
|
||||
if _DEEPSEEK_RE.search(name):
|
||||
return "deepseek"
|
||||
return "default"
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Fragment loading
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
_PROMPTS_PACKAGE = "app.prompts.system_prompt_composer"
|
||||
|
||||
|
||||
def _read_fragment(subpath: str) -> str:
|
||||
"""Read a fragment file from the ``prompts/`` resource tree.
|
||||
|
||||
Returns the raw contents stripped of any single trailing newline so
|
||||
composition can append explicit separators without compounding blank
|
||||
lines. Missing files return an empty string so optional fragments
|
||||
(e.g. provider hints) act as no-ops.
|
||||
"""
|
||||
parts = subpath.split("/")
|
||||
try:
|
||||
ref = resources.files(_PROMPTS_PACKAGE).joinpath(*parts)
|
||||
if not ref.is_file():
|
||||
return ""
|
||||
text = ref.read_text(encoding="utf-8")
|
||||
except (FileNotFoundError, ModuleNotFoundError):
|
||||
return ""
|
||||
if text.endswith("\n"):
|
||||
text = text[:-1]
|
||||
return text
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Tool ordering + memory variant resolution
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
# Ordered for reading flow: fundamentals first, then artifact generators,
|
||||
# then memory at the end (mirrors the legacy ``_ALL_TOOL_NAMES_ORDERED``).
|
||||
ALL_TOOL_NAMES_ORDERED: tuple[str, ...] = (
|
||||
"web_search",
|
||||
"generate_podcast",
|
||||
"generate_video_presentation",
|
||||
"generate_report",
|
||||
"generate_resume",
|
||||
"generate_image",
|
||||
"scrape_webpage",
|
||||
"update_memory",
|
||||
)
|
||||
|
||||
|
||||
_MEMORY_VARIANT_TOOLS: frozenset[str] = frozenset({"update_memory"})
|
||||
|
||||
|
||||
def _tool_fragment_path(tool_name: str, variant: str) -> str:
|
||||
"""Resolve a tool's instruction fragment path.
|
||||
|
||||
Tools listed in :data:`_MEMORY_VARIANT_TOOLS` switch on the conversation
|
||||
visibility and load ``tools/<name>_<variant>.md``; everything else
|
||||
falls back to ``tools/<name>.md``.
|
||||
"""
|
||||
if tool_name in _MEMORY_VARIANT_TOOLS:
|
||||
return f"tools/{tool_name}_{variant}.md"
|
||||
return f"tools/{tool_name}.md"
|
||||
|
||||
|
||||
def _example_fragment_path(tool_name: str, variant: str) -> str:
|
||||
if tool_name in _MEMORY_VARIANT_TOOLS:
|
||||
return f"examples/{tool_name}_{variant}.md"
|
||||
return f"examples/{tool_name}.md"
|
||||
|
||||
|
||||
def _format_tool_label(tool_name: str) -> str:
|
||||
return tool_name.replace("_", " ").title()
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Section builders
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _build_system_instructions(
|
||||
*,
|
||||
visibility: ChatVisibility,
|
||||
resolved_today: str,
|
||||
) -> str:
|
||||
"""Reconstruct the legacy ``<system_instruction>`` block from fragments."""
|
||||
variant = "team" if visibility == ChatVisibility.SEARCH_SPACE else "private"
|
||||
|
||||
sections = [
|
||||
_read_fragment(f"base/agent_{variant}.md"),
|
||||
_read_fragment(f"base/kb_only_policy_{variant}.md"),
|
||||
_read_fragment(f"base/tool_routing_{variant}.md"),
|
||||
_read_fragment("base/parameter_resolution.md"),
|
||||
_read_fragment(f"base/memory_protocol_{variant}.md"),
|
||||
]
|
||||
body = "\n\n".join(s for s in sections if s)
|
||||
block = f"\n<system_instruction>\n{body}\n\n</system_instruction>\n"
|
||||
return block.format(resolved_today=resolved_today)
|
||||
|
||||
|
||||
def _build_mcp_routing_block(
|
||||
mcp_connector_tools: dict[str, list[str]] | None,
|
||||
) -> str:
|
||||
"""Emit the ``<mcp_tool_routing>`` block when at least one MCP server is wired."""
|
||||
if not mcp_connector_tools:
|
||||
return ""
|
||||
lines: list[str] = [
|
||||
"\n<mcp_tool_routing>",
|
||||
"You also have direct tools from these user-connected MCP servers.",
|
||||
"Their data is NEVER in the knowledge base — call their tools directly.",
|
||||
"",
|
||||
]
|
||||
for server_name, tool_names in mcp_connector_tools.items():
|
||||
lines.append(f"- {server_name} → {', '.join(tool_names)}")
|
||||
lines.append("</mcp_tool_routing>\n")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _build_tools_section(
|
||||
*,
|
||||
visibility: ChatVisibility,
|
||||
enabled_tool_names: set[str] | None,
|
||||
disabled_tool_names: set[str] | None,
|
||||
) -> str:
|
||||
"""Reconstruct the ``<tools>`` block + ``<tool_call_examples>`` block."""
|
||||
variant = "team" if visibility == ChatVisibility.SEARCH_SPACE else "private"
|
||||
|
||||
parts: list[str] = []
|
||||
preamble = _read_fragment("tools/_preamble.md")
|
||||
if preamble:
|
||||
parts.append(preamble + "\n")
|
||||
|
||||
examples: list[str] = []
|
||||
|
||||
for tool_name in ALL_TOOL_NAMES_ORDERED:
|
||||
if enabled_tool_names is not None and tool_name not in enabled_tool_names:
|
||||
continue
|
||||
|
||||
instruction = _read_fragment(_tool_fragment_path(tool_name, variant))
|
||||
if instruction:
|
||||
parts.append(instruction + "\n")
|
||||
|
||||
example = _read_fragment(_example_fragment_path(tool_name, variant))
|
||||
if example:
|
||||
examples.append(example + "\n")
|
||||
|
||||
known_disabled = (
|
||||
set(disabled_tool_names) & set(ALL_TOOL_NAMES_ORDERED)
|
||||
if disabled_tool_names
|
||||
else set()
|
||||
)
|
||||
if known_disabled:
|
||||
disabled_list = ", ".join(
|
||||
_format_tool_label(n) for n in ALL_TOOL_NAMES_ORDERED if n in known_disabled
|
||||
)
|
||||
parts.append(
|
||||
"\n"
|
||||
"DISABLED TOOLS (by user):\n"
|
||||
f"The following tools are available in SurfSense but have been disabled by the user for this session: {disabled_list}.\n"
|
||||
"You do NOT have access to these tools and MUST NOT claim you can use them.\n"
|
||||
"If the user asks about a capability provided by a disabled tool, let them know the relevant tool\n"
|
||||
"is currently disabled and they can re-enable it.\n"
|
||||
)
|
||||
|
||||
parts.append("\n</tools>\n")
|
||||
|
||||
if examples:
|
||||
parts.append("<tool_call_examples>")
|
||||
parts.extend(examples)
|
||||
parts.append("</tool_call_examples>\n")
|
||||
|
||||
return "".join(parts)
|
||||
|
||||
|
||||
def _build_provider_block(provider_variant: ProviderVariant) -> str:
|
||||
"""Optional provider-tuned hints. Empty for ``"default"``."""
|
||||
if not provider_variant or provider_variant == "default":
|
||||
return ""
|
||||
text = _read_fragment(f"providers/{provider_variant}.md")
|
||||
return f"\n{text}\n" if text else ""
|
||||
|
||||
|
||||
def _build_routing_block(connector_routing: Iterable[str] | None) -> str:
|
||||
if not connector_routing:
|
||||
return ""
|
||||
fragments: list[str] = []
|
||||
for name in connector_routing:
|
||||
text = _read_fragment(f"routing/{name}.md")
|
||||
if text:
|
||||
fragments.append(text)
|
||||
if not fragments:
|
||||
return ""
|
||||
return "\n" + "\n\n".join(fragments) + "\n"
|
||||
|
||||
|
||||
def _build_citation_block(citations_enabled: bool) -> str:
|
||||
fragment = (
|
||||
_read_fragment("base/citations_on.md")
|
||||
if citations_enabled
|
||||
else _read_fragment("base/citations_off.md")
|
||||
)
|
||||
return f"\n{fragment}\n" if fragment else ""
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Public API
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def compose_system_prompt(
|
||||
*,
|
||||
today: datetime | None = None,
|
||||
thread_visibility: ChatVisibility | None = None,
|
||||
enabled_tool_names: set[str] | None = None,
|
||||
disabled_tool_names: set[str] | None = None,
|
||||
mcp_connector_tools: dict[str, list[str]] | None = None,
|
||||
custom_system_instructions: str | None = None,
|
||||
use_default_system_instructions: bool = True,
|
||||
citations_enabled: bool = True,
|
||||
provider_variant: ProviderVariant | None = None,
|
||||
model_name: str | None = None,
|
||||
connector_routing: Iterable[str] | None = None,
|
||||
) -> str:
|
||||
"""Assemble the SurfSense system prompt from disk fragments.
|
||||
|
||||
Args:
|
||||
today: Optional clock injection for tests.
|
||||
thread_visibility: Private vs shared (team) — drives memory wording
|
||||
and a few base block variants.
|
||||
enabled_tool_names: When provided, only these tools' instructions
|
||||
are included; ``None`` keeps the legacy "include everything"
|
||||
behavior.
|
||||
disabled_tool_names: User-disabled tools (note appended to prompt).
|
||||
mcp_connector_tools: ``{server_name: [tool_names...]}`` to inject
|
||||
an explicit MCP routing block.
|
||||
custom_system_instructions: Free-form instructions that override
|
||||
the default ``<system_instruction>`` block.
|
||||
use_default_system_instructions: When ``custom_system_instructions``
|
||||
is empty/None, fall back to defaults (legacy semantics).
|
||||
citations_enabled: Include ``citations_on.md`` (true) or
|
||||
``citations_off.md`` (false).
|
||||
provider_variant: Explicit provider variant override
|
||||
(``"anthropic" | "openai_reasoning" | "openai_classic" | "google" | "default"``).
|
||||
When ``None``, falls back to :func:`detect_provider_variant`
|
||||
on ``model_name``.
|
||||
model_name: Used to auto-detect ``provider_variant`` when not
|
||||
provided explicitly.
|
||||
connector_routing: Optional list of routing fragment names
|
||||
(``["linear", "slack", ...]``) to include from
|
||||
``prompts/routing/``.
|
||||
|
||||
Returns:
|
||||
The fully composed system prompt string.
|
||||
"""
|
||||
resolved_today = (today or datetime.now(UTC)).astimezone(UTC).date().isoformat()
|
||||
visibility = thread_visibility or ChatVisibility.PRIVATE
|
||||
|
||||
if custom_system_instructions and custom_system_instructions.strip():
|
||||
sys_block = custom_system_instructions.format(resolved_today=resolved_today)
|
||||
elif use_default_system_instructions:
|
||||
sys_block = _build_system_instructions(
|
||||
visibility=visibility, resolved_today=resolved_today
|
||||
)
|
||||
else:
|
||||
sys_block = ""
|
||||
|
||||
sys_block += _build_mcp_routing_block(mcp_connector_tools)
|
||||
|
||||
if provider_variant is None:
|
||||
provider_variant = detect_provider_variant(model_name)
|
||||
sys_block += _build_provider_block(provider_variant)
|
||||
sys_block += _build_routing_block(connector_routing)
|
||||
|
||||
tools_block = _build_tools_section(
|
||||
visibility=visibility,
|
||||
enabled_tool_names=enabled_tool_names,
|
||||
disabled_tool_names=disabled_tool_names,
|
||||
)
|
||||
citation_block = _build_citation_block(citations_enabled)
|
||||
|
||||
return sys_block + tools_block + citation_block
|
||||
|
||||
|
||||
__all__ = [
|
||||
"ALL_TOOL_NAMES_ORDERED",
|
||||
"ProviderVariant",
|
||||
"compose_system_prompt",
|
||||
"detect_provider_variant",
|
||||
]
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
|
||||
- User: "Generate an image of a cat"
|
||||
- Call: `generate_image(prompt="A fluffy orange tabby cat sitting on a windowsill, bathed in warm golden sunlight, soft bokeh background with green houseplants, photorealistic style, cozy atmosphere")`
|
||||
- The generated image will automatically be displayed in the chat.
|
||||
- User: "Draw me a logo for a coffee shop called Bean Dream"
|
||||
- Call: `generate_image(prompt="Minimalist modern logo design for a coffee shop called 'Bean Dream', featuring a stylized coffee bean with dream-like swirls of steam, clean vector style, warm brown and cream color palette, white background, professional branding")`
|
||||
- The generated image will automatically be displayed in the chat.
|
||||
- User: "Show me this image: https://example.com/image.png"
|
||||
- Simply include it in your response using markdown: ``
|
||||
- User uploads an image file and asks: "What is this image about?"
|
||||
- The user's uploaded image is already visible in the chat.
|
||||
- Simply analyze the image content and respond directly.
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
|
||||
- User: "Give me a podcast about AI trends based on what we discussed"
|
||||
- First search for relevant content, then call: `generate_podcast(source_content="Based on our conversation and search results: [detailed summary of chat + search findings]", podcast_title="AI Trends Podcast")`
|
||||
- User: "Create a podcast summary of this conversation"
|
||||
- Call: `generate_podcast(source_content="Complete conversation summary:\n\nUser asked about [topic 1]:\n[Your detailed response]\n\nUser then asked about [topic 2]:\n[Your detailed response]\n\n[Continue for all exchanges in the conversation]", podcast_title="Conversation Summary")`
|
||||
- User: "Make a podcast about quantum computing"
|
||||
- First explore `/documents/` (ls/glob/grep/read_file), then: `generate_podcast(source_content="Key insights about quantum computing from retrieved files:\n\n[Comprehensive summary of findings]", podcast_title="Quantum Computing Explained")`
|
||||
|
|
@ -1,13 +0,0 @@
|
|||
|
||||
- User: "Generate a report about AI trends"
|
||||
- Call: `generate_report(topic="AI Trends Report", source_strategy="kb_search", search_queries=["AI trends recent developments", "artificial intelligence industry trends", "AI market growth and predictions"], report_style="detailed")`
|
||||
- WHY: Has creation verb "generate" → call the tool. No prior discussion → use kb_search.
|
||||
- User: "Write a research report from this conversation"
|
||||
- Call: `generate_report(topic="Research Report", source_strategy="conversation", source_content="Complete conversation summary:\n\n...", report_style="deep_research")`
|
||||
- WHY: Has creation verb "write" → call the tool. Conversation has the content → use source_strategy="conversation".
|
||||
- User: (after a report on Climate Change was generated) "Add a section about carbon capture technologies"
|
||||
- Call: `generate_report(topic="Climate Crisis: Causes, Impacts, and Solutions", source_strategy="conversation", source_content="[summary of conversation context if any]", parent_report_id=<previous_report_id>, user_instructions="Add a new section about carbon capture technologies")`
|
||||
- WHY: Has modification verb "add" + specific deliverable target → call the tool with parent_report_id.
|
||||
- User: (after a report was generated) "What else could we add to have more depth?"
|
||||
- Do NOT call generate_report. Answer in chat with suggestions.
|
||||
- WHY: No creation/modification verb directed at producing a deliverable.
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
|
||||
- User: "Build me a resume. I'm John Doe, engineer at Acme Corp..."
|
||||
- Call: `generate_resume(user_info="John Doe, engineer at Acme Corp...", max_pages=1)`
|
||||
- WHY: Has creation verb "build" + resume → call the tool.
|
||||
- User: "Create my CV with this info: [experience, education, skills]"
|
||||
- Call: `generate_resume(user_info="[experience, education, skills]", max_pages=1)`
|
||||
- User: "Build me a resume" (and there is a resume/CV document in the conversation context)
|
||||
- Extract the FULL content from the document in context, then call:
|
||||
`generate_resume(user_info="Name: John Doe\nEmail: john@example.com\n\nExperience:\n- Senior Engineer at Acme Corp (2020-2024)\n Led team of 5...\n\nEducation:\n- BS Computer Science, MIT (2016-2020)\n\nSkills: Python, TypeScript, AWS...", max_pages=1)`
|
||||
- WHY: Document content is available in context — extract ALL of it into user_info. Do NOT ignore referenced documents.
|
||||
- User: (after resume generated) "Change my title to Senior Engineer"
|
||||
- Call: `generate_resume(user_info="", user_instructions="Change the job title to Senior Engineer", parent_report_id=<previous_report_id>, max_pages=1)`
|
||||
- WHY: Modification verb "change" + refers to existing resume → set parent_report_id.
|
||||
- User: (after resume generated) "Make this 2 pages and expand projects"
|
||||
- Call: `generate_resume(user_info="", user_instructions="Expand projects and keep this to at most 2 pages", parent_report_id=<previous_report_id>, max_pages=2)`
|
||||
- WHY: Explicit page increase request → set max_pages to 2.
|
||||
- User: "How should I structure my resume?"
|
||||
- Do NOT call generate_resume. Answer in chat with advice.
|
||||
- WHY: No creation/modification verb.
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
|
||||
- User: "Give me a presentation about AI trends based on what we discussed"
|
||||
- First search for relevant content, then call: `generate_video_presentation(source_content="Based on our conversation and search results: [detailed summary of chat + search findings]", video_title="AI Trends Presentation")`
|
||||
- User: "Create slides summarizing this conversation"
|
||||
- Call: `generate_video_presentation(source_content="Complete conversation summary:\n\nUser asked about [topic 1]:\n[Your detailed response]\n\nUser then asked about [topic 2]:\n[Your detailed response]\n\n[Continue for all exchanges in the conversation]", video_title="Conversation Summary")`
|
||||
- User: "Make a video presentation about quantum computing"
|
||||
- First explore `/documents/` (ls/glob/grep/read_file), then: `generate_video_presentation(source_content="Key insights about quantum computing from retrieved files:\n\n[Comprehensive summary of findings]", video_title="Quantum Computing Explained")`
|
||||
|
|
@ -1,13 +0,0 @@
|
|||
|
||||
- User: "Check out https://dev.to/some-article"
|
||||
- Call: `scrape_webpage(url="https://dev.to/some-article")`
|
||||
- Respond with a structured analysis — key points, takeaways.
|
||||
- User: "Read this article and summarize it for me: https://example.com/blog/ai-trends"
|
||||
- Call: `scrape_webpage(url="https://example.com/blog/ai-trends")`
|
||||
- Respond with a thorough summary using headings and bullet points.
|
||||
- User: (after discussing https://example.com/stats) "Can you get the live data from that page?"
|
||||
- Call: `scrape_webpage(url="https://example.com/stats")`
|
||||
- IMPORTANT: Always attempt scraping first. Never refuse before trying the tool.
|
||||
- User: "https://example.com/blog/weekend-recipes"
|
||||
- Call: `scrape_webpage(url="https://example.com/blog/weekend-recipes")`
|
||||
- When a user sends just a URL with no instructions, scrape it and provide a concise summary of the content.
|
||||
|
|
@ -1,16 +0,0 @@
|
|||
|
||||
- <user_name>Alex</user_name>, <user_memory> is empty. User: "I'm a space enthusiast, explain astrophage to me"
|
||||
- The user casually shared a durable fact:
|
||||
update_memory(updated_memory="## Facts\n- 2025-03-15: Alex is a space enthusiast\n")
|
||||
- User: "Remember that I prefer concise answers over detailed explanations"
|
||||
- Durable preference. Merge with existing memory:
|
||||
update_memory(updated_memory="## Facts\n- 2025-03-15: Alex is a space enthusiast\n\n## Preferences\n- 2025-03-15: Alex prefers concise answers over detailed explanations\n")
|
||||
- User: "I actually moved to Tokyo last month"
|
||||
- Updated fact, date prefix reflects when recorded:
|
||||
update_memory(updated_memory="## Facts\n- 2025-03-15: Alex lives in Tokyo (previously London)\n...")
|
||||
- User: "I'm a freelance photographer working on a nature documentary"
|
||||
- Durable background info under a fitting heading:
|
||||
update_memory(updated_memory="...\n\n## Current Focus\n- 2025-03-15: Alex is a freelance photographer\n- 2025-03-15: Alex is working on a nature documentary\n")
|
||||
- User: "Always respond in bullet points"
|
||||
- Standing instruction:
|
||||
update_memory(updated_memory="...\n\n## Instructions\n- 2025-03-15: Always respond to Alex in bullet points\n")
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
|
||||
- User: "Let's remember that we decided to do weekly standup meetings on Mondays"
|
||||
- Durable team decision:
|
||||
update_memory(updated_memory="## Product Decisions\n- 2025-03-15: Weekly standup meetings happen on Mondays\n...")
|
||||
- User: "Our office is in downtown Seattle, 5th floor"
|
||||
- Durable team fact:
|
||||
update_memory(updated_memory="## Project Facts\n- 2025-03-15: Office location is downtown Seattle, 5th floor\n...")
|
||||
|
|
@ -1,8 +0,0 @@
|
|||
|
||||
- User: "What's the current USD to INR exchange rate?"
|
||||
- Call: `web_search(query="current USD to INR exchange rate")`
|
||||
- Then answer using the returned web results with citations.
|
||||
- User: "What's the latest news about AI?"
|
||||
- Call: `web_search(query="latest AI news today")`
|
||||
- User: "What's the weather in New York?"
|
||||
- Call: `web_search(query="weather New York today")`
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue