citations: consolidate prompts, retire eager path, refresh ADR

Rewrite the main-agent citation contract to a single [n] channel and sync
the orphaned system_prompt_composer surface to match; drop stale
[citation:chunk_id] / <chunk_index> references from dynamic_context and
provider hints. Reuse the shared hybrid search in the deliverables report
(citations omitted for now) and delete the orphaned report KB helper.
Remove the dead eager KnowledgePriorityMiddleware wiring (knowledge_priority
+ stack) and its legacy browse test. Update ADR 0001 to reflect the cutover.
This commit is contained in:
CREDO23 2026-06-25 15:27:09 +02:00
parent 49d675c065
commit ce15016533
20 changed files with 316 additions and 1127 deletions

View file

@ -1,42 +0,0 @@
"""KB priority planner: <priority_documents> injection."""
from __future__ import annotations
from langchain_core.language_models import BaseChatModel
from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
from app.agents.chat.multi_agent_chat.shared.middleware.knowledge_search import (
KnowledgePriorityMiddleware,
)
from app.services.llm_service import get_planner_llm
def build_knowledge_priority_mw(
*,
llm: BaseChatModel,
search_space_id: int,
filesystem_mode: FilesystemMode,
available_connectors: list[str] | None,
available_document_types: list[str] | None,
mentioned_document_ids: list[int] | None,
preinjection_enabled: bool = True,
) -> KnowledgePriorityMiddleware:
"""Build the KB priority middleware.
When ``preinjection_enabled`` is False (the lazy default), the middleware
runs in mentions-only mode: it skips the expensive planner LLM + embedding
+ hybrid search and only surfaces explicit @-mentions. The main agent is
expected to pull relevant KB content on demand via the
``search_knowledge_base`` tool instead.
"""
return KnowledgePriorityMiddleware(
llm=llm,
planner_llm=get_planner_llm(),
search_space_id=search_space_id,
filesystem_mode=filesystem_mode,
available_connectors=available_connectors,
available_document_types=available_document_types,
mentioned_document_ids=mentioned_document_ids,
inject_system_message=False,
mentions_only=not preinjection_enabled,
)

View file

@ -1,10 +1,11 @@
"""Main-agent middleware list assembly: one line per slot.
The main agent is a pure router filesystem reads/writes are owned by the
``knowledge_base`` subagent and delegated via the ``task`` tool. The stack
here only renders KB context (workspace tree + priority docs), projects it
into system messages, and commits any subagent-side staged writes at end of
turn (cloud mode).
``knowledge_base`` subagent and delegated via the ``task`` tool. Knowledge-base
retrieval is pull-based: the ``search_knowledge_base`` tool runs the hybrid
search on demand and renders ``<retrieved_context>`` with ``[n]`` citation
labels. The stack here computes the workspace tree, commits any subagent-side
staged writes at end of turn (cloud mode), and wires the supporting middleware.
"""
from __future__ import annotations
@ -33,9 +34,6 @@ from app.agents.chat.multi_agent_chat.shared.middleware.anthropic_cache import (
from app.agents.chat.multi_agent_chat.shared.middleware.compaction import (
build_compaction_mw,
)
from app.agents.chat.multi_agent_chat.shared.middleware.kb_context_projection import (
build_kb_context_projection_mw,
)
from app.agents.chat.multi_agent_chat.shared.middleware.patch_tool_calls import (
build_patch_tool_calls_mw,
)
@ -84,7 +82,6 @@ from .context_editing import build_context_editing_mw
from .dedup_hitl import build_dedup_hitl_mw
from .doom_loop import build_doom_loop_mw
from .kb_persistence import build_kb_persistence_mw
from .knowledge_priority import build_knowledge_priority_mw
from .knowledge_tree import build_knowledge_tree_mw
from .noop_injection import build_noop_injection_mw
from .otel_span import build_otel_mw
@ -237,16 +234,6 @@ def build_main_agent_deepagent_middleware(
search_space_id=search_space_id,
llm=llm,
),
build_knowledge_priority_mw(
llm=llm,
search_space_id=search_space_id,
filesystem_mode=filesystem_mode,
available_connectors=available_connectors,
available_document_types=available_document_types,
mentioned_document_ids=mentioned_document_ids,
preinjection_enabled=flags.enable_kb_priority_preinjection,
),
build_kb_context_projection_mw(),
build_kb_persistence_mw(
filesystem_mode=filesystem_mode,
search_space_id=search_space_id,

View file

@ -15,7 +15,7 @@ allowed-tools: scrape_webpage, read_file, ls_tree, grep, web_search
1. Decompose the user's question into 2-4 specific, citation-worthy sub-questions.
2. For each sub-question, run **one** targeted KB search (focused on terms the user would have written, not synonyms). Open the most relevant 2-3 documents fully via `read_file` if their excerpts are too short.
3. Use `grep` to find supporting passages in long files instead of re-reading them end to end.
4. Cite every claim with `[citation:chunk_id]` exactly as the chunk tag specifies.
4. Cite every claim with the `[n]` label shown on the passage you used (search results and `read_file` output both carry them); never write a chunk id, URL, or title yourself.
## What good output looks like
- Short paragraphs with inline citations.

View file

@ -1,12 +1,13 @@
<citations>
Citation markers are **disabled** in this configuration.
Do NOT include `[citation:…]` markers anywhere, even if tool descriptions or
Do NOT include `[n]` citation labels or `[citation:…]` markers anywhere, even if
tool output (`<retrieved_context>`, `<web_results>`), tool descriptions, or
examples reference them. Ignore citation-format reminders elsewhere in this
prompt when they conflict with this block.
1. Answer in plain prose. Optional markdown links to public URLs when
sources are URLs.
2. Do not expose raw chunk ids, document ids, or internal ids to the user.
3. Present KB or docs facts naturally without attribution markers.
3. Present KB, web, or docs facts naturally without attribution markers.
</citations>

View file

@ -1,42 +1,16 @@
<citations>
Citations reach the answer through two channels. Use whichever applies — and
never invent ids you didn't see. Citation ids are resolved by exact-match
lookup; a wrong id silently breaks the link, so when in doubt, omit.
Cite with one token: the bracket label `[n]`. Every citable result —
`search_knowledge_base` passages, `web_search` results, and prose from a
`task` knowledge_base/research specialist — already carries `[n]` labels on a
single shared count. Those labels are the only citation you write; the server
resolves each one back to its source after the turn.
### Channel A — chunk blocks injected this turn
When `web_search` returns `<document>` / `<chunk id='…'>` blocks in this
turn:
1. For each factual statement taken from those chunks, add
`[citation:chunk_id]` using the **exact** id from a visible
`<chunk id='…'>` tag. Copy digit-for-digit (or the URL verbatim);
do not retype from memory.
2. `<document_id>` is the parent doc id, **not** a citation source —
only ids inside `<chunk id='…'>` count.
3. Multiple chunks → `[citation:id1], [citation:id2]` (comma-separated,
each id copied individually).
4. Never invent, normalise, or guess at adjacent ids; if unsure, omit.
5. Plain brackets only — no markdown links, no footnote numbering.
### Channel B — citations relayed by a `task` specialist
A `task(...)` tool message may contain `[citation:<chunk_id>]` markers
the specialist already attached to its prose. The specialist saw the
underlying `<chunk id='…'>` blocks; you didn't. So:
1. **Preserve those markers verbatim** in your final answer — do not
reformat, renumber, drop, or wrap them in markdown links. When you
paraphrase a specialist sentence, copy the marker character-for-
character; do not regenerate the id from memory (LLMs reliably
corrupt nearby digits).
2. Keep each marker attached to the sentence the specialist attached
it to.
3. Do **not** add new `[citation:…]` markers of your own to a
specialist's prose; if a fact has no marker, the specialist
couldn't tie it to a chunk and neither can you.
4. When a specialist returns JSON, the citation markers live inside
the prose-bearing fields (e.g. a summary or excerpt). Pull them
along with the surrounding sentence when you quote.
If neither channel surfaces citation markers this turn, do not fabricate
them.
1. Put the label right after the claim it supports.
2. Several sources for one claim: stack brackets, `[1][2]`.
3. Copy labels exactly as shown, a specialist's included — never renumber them,
add your own, or write the underlying title, date, id, or URL instead.
4. Write the bare `[n]` and nothing else: no `[citation:...]`, no markdown links,
no footnote marks, no "References" section.
5. Only label claims the sources support. If nothing shown backs a claim — or you
never saw a label — leave it uncited; never invent one.
</citations>

View file

@ -11,17 +11,16 @@ your answer, not as the task itself.
`<priority_documents>` lists the workspace documents most relevant to the
latest user message, ranked by relevance score, with `[USER-MENTIONED]`
flagged on anything the user explicitly referenced. When the task is about
workspace content, read these first; matched passages inside each document
are flagged via `<chunk_index>` so you can jump straight to them.
workspace content, read these first.
`<workspace_tree>` shows the full `/documents/` folder and file layout. Use
it to resolve paths the user describes in natural language ("my Q2 roadmap",
"last week's meeting notes") into concrete document references before
delegating to a specialist.
`<document>` and `<chunk id='…'>` blocks are chunked indexed content returned
by KB search (backing `<priority_documents>`). Each chunk carries a stable
`id` attribute.
`<retrieved_context>` blocks hold knowledge-base passages from
`search_knowledge_base`; each `<document>` inside is in excerpt view and every
passage is prefixed with an `[n]` citation label.
If a block doesn't appear this turn, work from the conversation alone.
</dynamic_context>

View file

@ -10,18 +10,16 @@ budget. Treat it as background colour for your answer, not as the task itself.
`<priority_documents>` lists the workspace documents most relevant to the
latest user message, ranked by relevance score, with `[USER-MENTIONED]`
flagged on anything someone in the thread explicitly referenced. When the
task is about workspace content, read these first; matched passages inside
each document are flagged via `<chunk_index>` so you can jump straight to
them.
task is about workspace content, read these first.
`<workspace_tree>` shows the full `/documents/` folder and file layout. Use
it to resolve paths described in natural language ("the Q2 roadmap", "last
week's planning notes") into concrete document references before delegating
to a specialist.
`<document>` and `<chunk id='…'>` blocks are chunked indexed content returned
by KB search (backing `<priority_documents>`). Each chunk carries a stable
`id` attribute.
`<retrieved_context>` blocks hold knowledge-base passages from
`search_knowledge_base`; each `<document>` inside is in excerpt view and every
passage is prefixed with an `[n]` citation label.
If a block doesn't appear this turn, work from the conversation alone.
</dynamic_context>

View file

@ -8,8 +8,8 @@ Tool discipline:
- Typically one investigative tool per turn unless several independent read-only queries are clearly needed; dont repeat identical calls.
Attribution:
- When citations are **enabled** (see citation block above) and you answer from chunk-tagged documents, use `[citation:chunk_id]` exactly as specified there.
- When citations are **disabled**, never emit `[citation:…]` — plain prose and links per tool guidance.
- When citations are **enabled** (see citation block above) and you answer from labelled passages, cite with the bare `[n]` label exactly as specified there.
- When citations are **disabled**, never emit `[n]` or `[citation:…]` — plain prose and links per tool guidance.
Style:
- No emojis unless asked; flat lists for short answers.

View file

@ -3,7 +3,7 @@ You are running on an OpenAI Codex-class model (SurfSense **main agent**).
Output style:
- Concise; dont paste huge fetch blobs — summarize.
- When citations are **enabled** and you rely on chunk-tagged docs, references may use `[citation:chunk_id]` per the citation block above; when **disabled**, use prose and URLs only.
- When citations are **enabled** and you rely on labelled passages, cite with the bare `[n]` label per the citation block above; when **disabled**, use prose and URLs only.
- Numbered lists work well when the user should reply with a single option index.
- No emojis; single-level bullets.

View file

@ -1,762 +0,0 @@
"""
Knowledge base search tool for the SurfSense agent.
This module provides:
- Connector constants and normalization
- Async knowledge base search across multiple connectors
- Document formatting for LLM context
"""
import asyncio
import contextlib
import json
import re
import time
from datetime import datetime
from typing import Any
from sqlalchemy.ext.asyncio import AsyncSession
from app.db import NATIVE_TO_LEGACY_DOCTYPE, shielded_async_session
from app.services.connector_service import ConnectorService
from app.utils.perf import get_perf_logger
# Connectors that call external live-search APIs. These are handled by the
# ``web_search`` tool and must be excluded from knowledge-base searches.
_LIVE_SEARCH_CONNECTORS: set[str] = {
"TAVILY_API",
"LINKUP_API",
"BAIDU_SEARCH_API",
}
# Patterns that indicate the query has no meaningful search signal.
# plainto_tsquery('english', '*') produces an empty tsquery and an embedding
# of '*' is random noise, so both keyword and semantic search degrade to
# arbitrary ordering — large documents (many chunks) dominate by chance.
_DEGENERATE_QUERY_RE = re.compile(
r"^[\s*?_.#@!\-/\\]+$" # only wildcards, punctuation, whitespace
)
# Max chunks per document when doing a recency-based browse instead of
# a real search. We want breadth (many docs) over depth (many chunks).
_BROWSE_MAX_CHUNKS_PER_DOC = 5
def _is_degenerate_query(query: str) -> bool:
"""Return True when the query carries no meaningful search signal.
Catches wildcard patterns (``*``, ``**``), empty / whitespace-only
strings, and single-character non-word tokens. These queries cause
both keyword search (empty tsquery) and semantic search (meaningless
embedding) to return effectively random results.
"""
stripped = query.strip()
if not stripped:
return True
return bool(_DEGENERATE_QUERY_RE.match(stripped))
async def _browse_recent_documents(
search_space_id: int,
document_type: str | list[str] | None,
top_k: int,
start_date: datetime | None,
end_date: datetime | None,
) -> list[dict[str, Any]]:
"""Return the most-recent documents (recency-ordered, no search ranking).
Used as a fallback when the search query is degenerate (e.g. ``*``) and
semantic / keyword search would produce arbitrary results. Returns
document-grouped dicts in the same shape as ``_combined_rrf_search``
so the rest of the pipeline works unchanged.
"""
from sqlalchemy import select
from sqlalchemy.orm import joinedload
from app.db import Chunk, Document, DocumentType
perf = get_perf_logger()
t0 = time.perf_counter()
base_conditions = [Document.search_space_id == search_space_id]
if document_type is not None:
type_list = (
document_type if isinstance(document_type, list) else [document_type]
)
doc_type_enums = []
for dt in type_list:
if isinstance(dt, str):
with contextlib.suppress(KeyError):
doc_type_enums.append(DocumentType[dt])
else:
doc_type_enums.append(dt)
if not doc_type_enums:
return []
if len(doc_type_enums) == 1:
base_conditions.append(Document.document_type == doc_type_enums[0])
else:
base_conditions.append(Document.document_type.in_(doc_type_enums))
if start_date is not None:
base_conditions.append(Document.updated_at >= start_date)
if end_date is not None:
base_conditions.append(Document.updated_at <= end_date)
async with shielded_async_session() as session:
doc_query = (
select(Document)
.options(joinedload(Document.search_space))
.where(*base_conditions)
.order_by(Document.updated_at.desc())
.limit(top_k)
)
result = await session.execute(doc_query)
documents = result.scalars().unique().all()
if not documents:
return []
doc_ids = [d.id for d in documents]
chunk_query = (
select(Chunk)
.where(Chunk.document_id.in_(doc_ids))
.order_by(Chunk.document_id, Chunk.position, Chunk.id)
)
chunk_result = await session.execute(chunk_query)
raw_chunks = chunk_result.scalars().all()
doc_chunk_counts: dict[int, int] = {}
doc_chunks: dict[int, list[dict]] = {d.id: [] for d in documents}
for chunk in raw_chunks:
did = chunk.document_id
count = doc_chunk_counts.get(did, 0)
if count < _BROWSE_MAX_CHUNKS_PER_DOC:
doc_chunks[did].append({"chunk_id": chunk.id, "content": chunk.content})
doc_chunk_counts[did] = count + 1
results: list[dict[str, Any]] = []
for doc in documents:
chunks_list = doc_chunks.get(doc.id, [])
results.append(
{
"document_id": doc.id,
"content": "\n\n".join(
c["content"] for c in chunks_list if c.get("content")
),
"score": 0.0,
"chunks": chunks_list,
"document": {
"id": doc.id,
"title": doc.title,
"document_type": doc.document_type.value
if getattr(doc, "document_type", None)
else None,
"metadata": doc.document_metadata or {},
},
"source": doc.document_type.value
if getattr(doc, "document_type", None)
else None,
}
)
perf.info(
"[kb_browse] recency browse in %.3fs docs=%d space=%d type=%s",
time.perf_counter() - t0,
len(results),
search_space_id,
document_type,
)
return results
# =============================================================================
# Connector Constants and Normalization
# =============================================================================
# Canonical connector values used internally by ConnectorService
# Includes all document types and search source connectors
_ALL_CONNECTORS: list[str] = [
"EXTENSION",
"FILE",
"SLACK_CONNECTOR",
"TEAMS_CONNECTOR",
"NOTION_CONNECTOR",
"YOUTUBE_VIDEO",
"GITHUB_CONNECTOR",
"ELASTICSEARCH_CONNECTOR",
"LINEAR_CONNECTOR",
"JIRA_CONNECTOR",
"CONFLUENCE_CONNECTOR",
"CLICKUP_CONNECTOR",
"GOOGLE_CALENDAR_CONNECTOR",
"GOOGLE_GMAIL_CONNECTOR",
"GOOGLE_DRIVE_FILE",
"DISCORD_CONNECTOR",
"AIRTABLE_CONNECTOR",
"LUMA_CONNECTOR",
"NOTE",
"BOOKSTACK_CONNECTOR",
"CRAWLED_URL",
"CIRCLEBACK",
"OBSIDIAN_CONNECTOR",
"ONEDRIVE_FILE",
"DROPBOX_FILE",
]
# Human-readable descriptions for each connector type
# Used for generating dynamic docstrings and informing the LLM
CONNECTOR_DESCRIPTIONS: dict[str, str] = {
"EXTENSION": "Web content saved via SurfSense browser extension (personal browsing history)",
"FILE": "User-uploaded documents (PDFs, Word, etc.) (personal files)",
"NOTE": "SurfSense Notes (notes created inside SurfSense)",
"SLACK_CONNECTOR": "Slack conversations and shared content (personal workspace communications)",
"TEAMS_CONNECTOR": "Microsoft Teams messages and conversations (personal Teams communications)",
"NOTION_CONNECTOR": "Notion workspace pages and databases (personal knowledge management)",
"YOUTUBE_VIDEO": "YouTube video transcripts and metadata (personally saved videos)",
"GITHUB_CONNECTOR": "GitHub repository content and issues (personal repositories and interactions)",
"ELASTICSEARCH_CONNECTOR": "Elasticsearch indexed documents and data (personal Elasticsearch instances)",
"LINEAR_CONNECTOR": "Linear project issues and discussions (personal project management)",
"JIRA_CONNECTOR": "Jira project issues, tickets, and comments (personal project tracking)",
"CONFLUENCE_CONNECTOR": "Confluence pages and comments (personal project documentation)",
"CLICKUP_CONNECTOR": "ClickUp tasks and project data (personal task management)",
"GOOGLE_CALENDAR_CONNECTOR": "Google Calendar events, meetings, and schedules (personal calendar)",
"GOOGLE_GMAIL_CONNECTOR": "Google Gmail emails and conversations (personal emails)",
"GOOGLE_DRIVE_FILE": "Google Drive files and documents (personal cloud storage)",
"DISCORD_CONNECTOR": "Discord server conversations and shared content (personal community)",
"AIRTABLE_CONNECTOR": "Airtable records, tables, and database content (personal data)",
"LUMA_CONNECTOR": "Luma events and meetings",
"WEBCRAWLER_CONNECTOR": "Webpages indexed by SurfSense (personally selected websites)",
"CRAWLED_URL": "Webpages indexed by SurfSense (personally selected websites)",
"BOOKSTACK_CONNECTOR": "BookStack pages (personal documentation)",
"CIRCLEBACK": "Circleback meeting notes, transcripts, and action items",
"OBSIDIAN_CONNECTOR": "Obsidian vault notes and markdown files (personal notes)",
"ONEDRIVE_FILE": "Microsoft OneDrive files and documents (personal cloud storage)",
"DROPBOX_FILE": "Dropbox files and documents (cloud storage)",
}
def _normalize_connectors(
connectors_to_search: list[str] | None,
available_connectors: list[str] | None = None,
) -> list[str]:
"""Normalize model-supplied connectors to canonical ConnectorService types.
Maps user-facing aliases (e.g. WEBCRAWLER_CONNECTOR), drops unknowns, and
constrains to ``available_connectors`` when given. Empty input defaults to
all available connectors (minus live-search ones).
"""
valid_set = (
set(available_connectors) if available_connectors else set(_ALL_CONNECTORS)
)
valid_set -= _LIVE_SEARCH_CONNECTORS
if not connectors_to_search:
base = (
list(available_connectors)
if available_connectors
else list(_ALL_CONNECTORS)
)
return [c for c in base if c not in _LIVE_SEARCH_CONNECTORS]
normalized: list[str] = []
for raw in connectors_to_search:
c = (raw or "").strip().upper()
if not c:
continue
if c == "WEBCRAWLER_CONNECTOR":
c = "CRAWLED_URL"
normalized.append(c)
# De-dupe (order-preserving), keeping only known + available connectors.
seen: set[str] = set()
out: list[str] = []
for c in normalized:
if c in seen:
continue
if c not in _ALL_CONNECTORS:
continue
if c not in valid_set:
continue
seen.add(c)
out.append(c)
# Nothing matched: fall back to all available.
if not out:
base = (
list(available_connectors)
if available_connectors
else list(_ALL_CONNECTORS)
)
return [c for c in base if c not in _LIVE_SEARCH_CONNECTORS]
return out
# =============================================================================
# Document Formatting
# =============================================================================
# Fraction of the model's context window (in characters) that a single tool
# result is allowed to occupy. The remainder is reserved for system prompt,
# conversation history, and model output. With ~4 chars/token this gives a
# tool result ≈ 25 % of the context budget in tokens.
_TOOL_OUTPUT_CONTEXT_FRACTION = 0.25
_CHARS_PER_TOKEN = 4
# Hard-floor / ceiling so the budget is always sensible regardless of what
# the model reports.
_MIN_TOOL_OUTPUT_CHARS = 20_000 # ~5K tokens
_MAX_TOOL_OUTPUT_CHARS = 200_000 # ~50K tokens
_MAX_CHUNK_CHARS = 8_000
# Rank-adaptive per-document budget allocation.
# Top-ranked (most relevant) documents get a larger share of the budget so
# we pack as much high-quality context as possible.
#
# fraction(rank) = _TOP_DOC_BUDGET_FRACTION / (1 + rank * _RANK_DECAY)
#
# Examples (128K budget, 8K chunk cap):
# rank 0 → 40% → 6 chunks | rank 3 → 19% → 3 chunks
# rank 1 → 30% → 4 chunks | rank 10 → 10% → 3 chunks (floor)
# rank 2 → 24% → 3 chunks |
_TOP_DOC_BUDGET_FRACTION = 0.40
_RANK_DECAY = 0.35
_MIN_CHUNKS_PER_DOC = 3
def _compute_tool_output_budget(max_input_tokens: int | None) -> int:
"""Derive a character budget from the model's context window.
Uses ``litellm.get_model_info`` via the value already resolved by
``ChatLiteLLMRouter`` / ``ChatLiteLLM`` and passed through the dependency
chain as ``max_input_tokens``. Falls back to a conservative default when
the value is unavailable.
"""
if max_input_tokens is None or max_input_tokens <= 0:
return _MIN_TOOL_OUTPUT_CHARS # conservative fallback
budget = int(max_input_tokens * _CHARS_PER_TOKEN * _TOOL_OUTPUT_CONTEXT_FRACTION)
return max(_MIN_TOOL_OUTPUT_CHARS, min(budget, _MAX_TOOL_OUTPUT_CHARS))
_INTERNAL_METADATA_KEYS: frozenset[str] = frozenset(
{
"message_id",
"thread_id",
"event_id",
"calendar_id",
"google_drive_file_id",
"onedrive_file_id",
"dropbox_file_id",
"page_id",
"issue_id",
"connector_id",
}
)
def format_documents_for_context(
documents: list[dict[str, Any]],
*,
max_chars: int = _MAX_TOOL_OUTPUT_CHARS,
max_chunk_chars: int = _MAX_CHUNK_CHARS,
max_chunks_per_doc: int = 0,
) -> str:
"""Format retrieved documents into an XML context string for the LLM.
Documents are emitted highest-relevance first until ``max_chars`` is hit.
``max_chunks_per_doc=0`` auto-computes a rank-adaptive cap so top results get
more chunks and no single large document monopolizes the budget.
"""
if not documents:
return ""
# Group chunks by document id, preserving chunk_id so [citation:123] works.
# ConnectorService returns document-grouped results ({document, chunks, source}).
grouped: dict[str, dict[str, Any]] = {}
for doc in documents:
document_info = (doc.get("document") or {}) if isinstance(doc, dict) else {}
metadata = (
(document_info.get("metadata") or {})
if isinstance(document_info, dict)
else {}
)
if not metadata and isinstance(doc, dict):
# Some result shapes may place metadata at the top level.
metadata = doc.get("metadata") or {}
source = (
(doc.get("source") if isinstance(doc, dict) else None)
or document_info.get("document_type")
or metadata.get("document_type")
or "UNKNOWN"
)
# Identity: prefer document_id, else type+title+url.
document_id_val = document_info.get("id")
title = (
document_info.get("title") or metadata.get("title") or "Untitled Document"
)
url = (
metadata.get("url")
or metadata.get("source")
or metadata.get("page_url")
or ""
)
doc_key = (
str(document_id_val)
if document_id_val is not None
else f"{source}::{title}::{url}"
)
if doc_key not in grouped:
grouped[doc_key] = {
"document_id": document_id_val
if document_id_val is not None
else doc_key,
"document_type": metadata.get("document_type") or source,
"title": title,
"url": url,
"metadata": metadata,
"chunks": [],
}
# Prefer document-grouped chunks when present.
chunks_list = doc.get("chunks") if isinstance(doc, dict) else None
if isinstance(chunks_list, list) and chunks_list:
for ch in chunks_list:
if not isinstance(ch, dict):
continue
chunk_id = ch.get("chunk_id") or ch.get("id")
content = (ch.get("content") or "").strip()
if not content:
continue
grouped[doc_key]["chunks"].append(
{"chunk_id": chunk_id, "content": content}
)
continue
# Fallback: treat this as a flat chunk-like object
if not isinstance(doc, dict):
continue
chunk_id = doc.get("chunk_id") or doc.get("id")
content = (doc.get("content") or "").strip()
if not content:
continue
grouped[doc_key]["chunks"].append({"chunk_id": chunk_id, "content": content})
# Live search connectors whose results should be cited by URL rather than
# a numeric chunk_id (the numeric IDs are meaningless auto-incremented counters).
live_search_connectors = {
"TAVILY_API",
"LINKUP_API",
"BAIDU_SEARCH_API",
}
parts: list[str] = []
total_chars = 0
total_docs = len(grouped)
for doc_idx, g in enumerate(grouped.values()):
metadata_clean = {
k: v for k, v in g["metadata"].items() if k not in _INTERNAL_METADATA_KEYS
}
metadata_json = json.dumps(metadata_clean, ensure_ascii=False)
is_live_search = g["document_type"] in live_search_connectors
doc_lines: list[str] = [
"<document>",
"<document_metadata>",
f" <document_id>{g['document_id']}</document_id>",
f" <document_type>{g['document_type']}</document_type>",
f" <title><![CDATA[{g['title']}]]></title>",
f" <url><![CDATA[{g['url']}]]></url>",
f" <metadata_json><![CDATA[{metadata_json}]]></metadata_json>",
"</document_metadata>",
"",
"<document_content>",
]
# Rank-adaptive per-document chunk cap: top results get more chunks.
if max_chunks_per_doc > 0:
chunks_allowed = max_chunks_per_doc
else:
doc_fraction = _TOP_DOC_BUDGET_FRACTION / (1 + doc_idx * _RANK_DECAY)
max_doc_chars = int(max_chars * doc_fraction)
xml_overhead = 500
chunks_allowed = max(
(max_doc_chars - xml_overhead) // max(max_chunk_chars, 1),
_MIN_CHUNKS_PER_DOC,
)
chunks = g["chunks"]
if len(chunks) > chunks_allowed:
chunks = chunks[:chunks_allowed]
for ch in chunks:
ch_content = ch["content"]
if max_chunk_chars and len(ch_content) > max_chunk_chars:
ch_content = ch_content[:max_chunk_chars] + "\n...(truncated)"
ch_id = g["url"] if (is_live_search and g["url"]) else ch["chunk_id"]
if ch_id is None:
doc_lines.append(f" <chunk><![CDATA[{ch_content}]]></chunk>")
else:
doc_lines.append(
f" <chunk id='{ch_id}'><![CDATA[{ch_content}]]></chunk>"
)
doc_lines.extend(["</document_content>", "</document>", ""])
doc_xml = "\n".join(doc_lines)
doc_len = len(doc_xml)
if total_chars + doc_len > max_chars:
remaining = total_docs - doc_idx
if doc_idx == 0:
parts.append(doc_xml)
total_chars += doc_len
parts.append(
f"<!-- Output truncated: {remaining} more document(s) omitted "
f"(budget {max_chars} chars). Refine your query or reduce top_k "
f"to retrieve different results. -->"
)
break
parts.append(doc_xml)
total_chars += doc_len
result = "\n".join(parts).strip()
# Hard safety net: if the result is still over budget (e.g. a single massive
# first document), forcibly truncate with a closing comment.
if len(result) > max_chars:
truncation_msg = "\n<!-- ...output forcibly truncated to fit context window -->"
result = result[: max_chars - len(truncation_msg)] + truncation_msg
return result
# =============================================================================
# Knowledge Base Search
# =============================================================================
async def search_knowledge_base_async(
query: str,
search_space_id: int,
db_session: AsyncSession,
connector_service: ConnectorService,
connectors_to_search: list[str] | None = None,
top_k: int = 10,
start_date: datetime | None = None,
end_date: datetime | None = None,
available_connectors: list[str] | None = None,
available_document_types: list[str] | None = None,
max_input_tokens: int | None = None,
) -> str:
"""Search the knowledge base across connectors and return formatted results.
``available_document_types`` lets local connectors with no indexed data be
skipped (no embedding / DB round-trip), and ``max_input_tokens`` sizes the
output to the model's context window.
"""
perf = get_perf_logger()
t0 = time.perf_counter()
deduplicated = await search_knowledge_base_raw_async(
query=query,
search_space_id=search_space_id,
db_session=db_session,
connector_service=connector_service,
connectors_to_search=connectors_to_search,
top_k=top_k,
start_date=start_date,
end_date=end_date,
available_connectors=available_connectors,
available_document_types=available_document_types,
)
if not deduplicated:
return "No documents found in the knowledge base. The search space has no indexed content yet."
# Use browse chunk cap for degenerate queries, otherwise adaptive chunking.
max_chunks_per_doc = (
_BROWSE_MAX_CHUNKS_PER_DOC if _is_degenerate_query(query) else 0
)
output_budget = _compute_tool_output_budget(max_input_tokens)
result = format_documents_for_context(
deduplicated,
max_chars=output_budget,
max_chunks_per_doc=max_chunks_per_doc,
)
if len(result) > output_budget:
perf.warning(
"[kb_search] output STILL exceeds budget after format (%d > %d), "
"hard truncation should have fired",
len(result),
output_budget,
)
perf.info(
"[kb_search] TOTAL in %.3fs total_docs=%d deduped=%d output_chars=%d "
"budget=%d max_input_tokens=%s space=%d",
time.perf_counter() - t0,
len(deduplicated),
len(deduplicated),
len(result),
output_budget,
max_input_tokens,
search_space_id,
)
return result
async def search_knowledge_base_raw_async(
query: str,
search_space_id: int,
db_session: AsyncSession,
connector_service: ConnectorService,
connectors_to_search: list[str] | None = None,
top_k: int = 10,
start_date: datetime | None = None,
end_date: datetime | None = None,
available_connectors: list[str] | None = None,
available_document_types: list[str] | None = None,
query_embedding: list[float] | None = None,
) -> list[dict[str, Any]]:
"""Search knowledge base and return raw document dicts (no XML formatting)."""
perf = get_perf_logger()
t0 = time.perf_counter()
all_documents: list[dict[str, Any]] = []
# Preserve the public signature for compatibility even if values are unused.
_ = (db_session, connector_service)
from app.agents.chat.multi_agent_chat.shared.date_filters import resolve_date_range
resolved_start_date, resolved_end_date = resolve_date_range(
start_date=start_date,
end_date=end_date,
)
connectors = _normalize_connectors(connectors_to_search, available_connectors)
if available_document_types:
doc_types_set = set(available_document_types)
connectors = [
c
for c in connectors
if c in doc_types_set
or NATIVE_TO_LEGACY_DOCTYPE.get(c, "") in doc_types_set
]
if not connectors:
return []
if _is_degenerate_query(query):
perf.info(
"[kb_search_raw] degenerate query %r detected - recency browse",
query,
)
browse_connectors = connectors if connectors else [None] # type: ignore[list-item]
expanded_browse = []
for connector in browse_connectors:
if connector is not None and connector in NATIVE_TO_LEGACY_DOCTYPE:
expanded_browse.append([connector, NATIVE_TO_LEGACY_DOCTYPE[connector]])
else:
expanded_browse.append(connector)
browse_results = await asyncio.gather(
*[
_browse_recent_documents(
search_space_id=search_space_id,
document_type=connector,
top_k=top_k,
start_date=resolved_start_date,
end_date=resolved_end_date,
)
for connector in expanded_browse
]
)
for docs in browse_results:
all_documents.extend(docs)
else:
if query_embedding is None:
from app.config import config as app_config
query_embedding = app_config.embedding_model_instance.embed(query)
max_parallel_searches = 4
semaphore = asyncio.Semaphore(max_parallel_searches)
async def _search_one_connector(connector: str) -> list[dict[str, Any]]:
try:
async with semaphore, shielded_async_session() as isolated_session:
svc = ConnectorService(isolated_session, search_space_id)
return await svc._combined_rrf_search(
query_text=query,
search_space_id=search_space_id,
document_type=connector,
top_k=top_k,
start_date=resolved_start_date,
end_date=resolved_end_date,
query_embedding=query_embedding,
)
except Exception as exc:
perf.warning("[kb_search_raw] connector=%s FAILED: %s", connector, exc)
return []
connector_results = await asyncio.gather(
*[_search_one_connector(connector) for connector in connectors]
)
for docs in connector_results:
all_documents.extend(docs)
seen_doc_ids: set[Any] = set()
seen_content_hashes: set[int] = set()
deduplicated: list[dict[str, Any]] = []
def _content_fingerprint(document: dict[str, Any]) -> int | None:
chunks = document.get("chunks")
if isinstance(chunks, list):
chunk_texts = []
for chunk in chunks:
if not isinstance(chunk, dict):
continue
chunk_content = (chunk.get("content") or "").strip()
if chunk_content:
chunk_texts.append(chunk_content)
if chunk_texts:
return hash("||".join(chunk_texts))
flat_content = (document.get("content") or "").strip()
if flat_content:
return hash(flat_content)
return None
for doc in all_documents:
doc_id = (doc.get("document", {}) or {}).get("id")
if doc_id is not None:
if doc_id in seen_doc_ids:
continue
seen_doc_ids.add(doc_id)
deduplicated.append(doc)
continue
content_hash = _content_fingerprint(doc)
if content_hash is not None and content_hash in seen_content_hashes:
continue
if content_hash is not None:
seen_content_hashes.add(content_hash)
deduplicated.append(doc)
deduplicated.sort(key=lambda doc: doc.get("score", 0), reverse=True)
perf.info(
"[kb_search_raw] done in %.3fs total=%d deduped=%d",
time.perf_counter() - t0,
len(all_documents),
len(deduplicated),
)
return deduplicated

View file

@ -23,6 +23,45 @@ from app.services.llm_service import get_agent_llm
logger = logging.getLogger(__name__)
def _report_search_types(
available_connectors: list[str] | None,
available_document_types: list[str] | None,
) -> tuple[str, ...] | None:
"""Build the document-type scope for the shared KB search.
``None`` means "search every indexed type"; a tuple narrows the scope to the
connectors/document types the search space actually has.
"""
types: set[str] = set()
if available_document_types:
types.update(available_document_types)
if available_connectors:
types.update(available_connectors)
return tuple(sorted(types)) or None
def _render_kb_hits_for_report(hits: list[Any]) -> str:
"""Render KB hits as plain titled source text for the report writer.
Citations are intentionally omitted from reports for now, so no ``[n]``
labels or chunk ids are emitted just titled document content for grounding.
"""
from app.agents.chat.multi_agent_chat.shared.document_render import source_label
blocks: list[str] = []
for hit in hits:
label = source_label(hit.document_type, hit.metadata)
header = f"{hit.title} ({label})" if label else hit.title
body = "\n\n".join(
chunk.content.strip() for chunk in hit.chunks if chunk.content.strip()
)
if not body:
continue
blocks.append(f"## {header}\n\n{body}")
return "\n\n".join(blocks)
# ─── Shared Formatting Rules ────────────────────────────────────────────────
# Reusable formatting instructions appended to section-level and review prompts.
@ -788,31 +827,46 @@ def create_generate_report_tool(
f"{query_count} queries: {search_queries[:5]}"
)
try:
from .knowledge_base import search_knowledge_base_async
from app.agents.chat.multi_agent_chat.shared.retrieval.hybrid_search import (
search_chunks,
)
from app.agents.chat.multi_agent_chat.shared.retrieval.models import (
DocumentHit,
SearchScope,
)
scope = SearchScope(
document_types=_report_search_types(
available_connectors, available_document_types
)
)
# Each query gets its own short-lived session.
async def _run_single_query(q: str) -> str:
async def _run_single_query(q: str) -> list[DocumentHit]:
async with shielded_async_session() as kb_session:
kb_connector_svc = ConnectorService(
kb_session, search_space_id
)
return await search_knowledge_base_async(
query=q,
return await search_chunks(
kb_session,
search_space_id=search_space_id,
db_session=kb_session,
connector_service=kb_connector_svc,
query=q,
scope=scope,
top_k=10,
available_connectors=available_connectors,
available_document_types=available_document_types,
)
kb_results = await asyncio.gather(
hits_per_query = await asyncio.gather(
*[_run_single_query(q) for q in search_queries[:5]]
)
kb_text_parts = [r for r in kb_results if r and r.strip()]
if kb_text_parts:
kb_combined = "\n\n---\n\n".join(kb_text_parts)
seen_doc_ids: set[int] = set()
merged_hits: list[DocumentHit] = []
for hits in hits_per_query:
for hit in hits:
if hit.document_id in seen_doc_ids:
continue
seen_doc_ids.add(hit.document_id)
merged_hits.append(hit)
kb_combined = _render_kb_hits_for_report(merged_hits)
if kb_combined.strip():
if effective_source.strip():
effective_source = (
effective_source
@ -822,20 +876,17 @@ def create_generate_report_tool(
else:
effective_source = kb_combined
# Count docs found (rough: count <document> tags)
doc_count = kb_combined.count("<document>")
doc_count = len(merged_hits)
dispatch_custom_event(
"report_progress",
{
"phase": "kb_search_done",
"message": f"Found {doc_count} relevant documents"
if doc_count
else f"Found results from {len(kb_text_parts)} queries",
"message": f"Found {doc_count} relevant documents",
},
)
logger.info(
f"[generate_report] KB search added ~{len(kb_combined)} chars "
f"from {len(kb_text_parts)} queries"
f"from {doc_count} documents"
)
else:
dispatch_custom_event(

View file

@ -1,43 +0,0 @@
<citation_instructions>
You can cite the sources shown to you. Cited material arrives in labeled blocks
such as <retrieved_context> (and some tool results). Inside them, every passage
begins with a bracketed number — that number is its citation label: [1], [2], [3].
How to cite:
- When a statement relies on a passage, put that passage's label right after the
statement: "We pushed the launch to March 10 [1]."
- For several sources behind one statement, write each label in its own brackets
with nothing between them — [1][2]. Never merge them as [1, 2] and never use a
range like [1-3].
- Put the label at the end of the clause or sentence it supports.
Rules:
- Cite ONLY labels that were shown to you. The bracketed number is the single
thing you copy — never cite a title, a date, "chunk 4 of 19", a document id, or
a URL.
- Never invent a label and never renumber. If nothing shown supports a claim,
write it without a citation instead of guessing.
- Attribute only claims drawn from the provided sources; leave your own general
knowledge uncited.
- Plain square brackets only. No parentheses around them, no links or markdown
links like [1](http://...), no footnote marks like ¹.
- Do not add a "References" or "Sources" section; citations stay inline.
Example of context you might receive:
<retrieved_context>
Document: "Q3 Launch Notes" (Slack · #launch · 2026-03-02)
[1] We agreed to push the launch to March 10.
[2] Marketing will be notified next week.
Document: "Release Timeline" (Notion · 2026-02-28)
[3] Dates floated were March 10 and March 17.
</retrieved_context>
Correct:
The launch moved to March 10 [1][3], and marketing is told next week [2].
Incorrect — do not produce any of these:
- The launch moved to March 10 [1, 3]. (merged brackets)
- The launch moved to March 10 ([1]). (parentheses)
- The launch moved to March 10 [citation:1]. (you never write this form)
- The launch moved to March 10 [4]. (label was never shown)
</citation_instructions>

View file

@ -1,16 +1,13 @@
<citation_instructions>
IMPORTANT: Citations are DISABLED for this configuration.
Citation markers are **disabled** in this configuration.
DO NOT include any citations in your responses. Specifically:
1. Do NOT use the [citation:chunk_id] format anywhere in your response.
2. Do NOT reference document IDs, chunk IDs, or source IDs.
3. Simply provide the information naturally without any citation markers.
4. Write your response as if you're having a normal conversation, incorporating the information from your knowledge seamlessly.
Do NOT include `[n]` citation labels or `[citation:…]` markers anywhere, even if
tool output (`<retrieved_context>`) or examples reference them. Ignore
citation-format reminders elsewhere in this prompt when they conflict with this
block.
When answering questions based on documents from the knowledge base:
- Present the information directly and confidently
- Do not mention that information comes from specific documents or chunks
- Integrate facts naturally into your response without attribution markers
Your goal is to provide helpful, informative answers in a clean, readable format without any citation notation.
1. Answer in plain prose. Optional markdown links to public URLs when sources
are URLs.
2. Do not expose raw chunk ids, document ids, or internal ids to the user.
3. Present knowledge-base or web facts naturally without attribution markers.
</citation_instructions>

View file

@ -1,89 +1,16 @@
<citation_instructions>
CRITICAL CITATION REQUIREMENTS:
Cite with one token: the bracket label `[n]`. Cited material arrives in labeled
blocks such as `<retrieved_context>` (and some tool results); inside them every
passage begins with its `[n]` label on a single shared count. Those labels are
the only citation you write; the server resolves each one back to its source
after the turn.
1. For EVERY piece of information you include from the documents, add a citation in the format [citation:chunk_id] where chunk_id is the exact value from the `<chunk id='...'>` tag inside `<document_content>`.
2. Make sure ALL factual statements from the documents have proper citations.
3. If multiple chunks support the same point, include all relevant citations [citation:chunk_id1], [citation:chunk_id2].
4. You MUST use the exact chunk_id values from the `<chunk id='...'>` attributes. Do not create your own citation numbers.
5. Every citation MUST be in the format [citation:chunk_id] where chunk_id is the exact chunk id value.
6. Never modify or change the chunk_id - always use the original values exactly as provided in the chunk tags.
7. Do not return citations as clickable links.
8. Never format citations as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only.
9. Citations must ONLY appear as [citation:chunk_id] or [citation:chunk_id1], [citation:chunk_id2] format - never with parentheses, hyperlinks, or other formatting.
10. Never make up chunk IDs. Only use chunk_id values that are explicitly provided in the `<chunk id='...'>` tags.
11. If you are unsure about a chunk_id, do not include a citation rather than guessing or making one up.
<document_structure_example>
The documents you receive are structured like this:
**Knowledge base documents (numeric chunk IDs):**
<document>
<document_metadata>
<document_id>42</document_id>
<document_type>GITHUB_CONNECTOR</document_type>
<title><![CDATA[Some repo / file / issue title]]></title>
<url><![CDATA[https://example.com]]></url>
<metadata_json><![CDATA[{{"any":"other metadata"}}]]></metadata_json>
</document_metadata>
<document_content>
<chunk id='123'><![CDATA[First chunk text...]]></chunk>
<chunk id='124'><![CDATA[Second chunk text...]]></chunk>
</document_content>
</document>
**Web search results (URL chunk IDs):**
<document>
<document_metadata>
<document_type>WEB_SEARCH</document_type>
<title><![CDATA[Some web search result]]></title>
<url><![CDATA[https://example.com/article]]></url>
</document_metadata>
<document_content>
<chunk id='https://example.com/article'><![CDATA[Content from web search...]]></chunk>
</document_content>
</document>
IMPORTANT: You MUST cite using the EXACT chunk ids from the `<chunk id='...'>` tags.
- For knowledge base documents, chunk ids are numeric (e.g. 123, 124) or prefixed (e.g. doc-45).
- For live web search results, chunk ids are URLs (e.g. https://example.com/article).
Do NOT cite document_id. Always use the chunk id.
</document_structure_example>
<citation_format>
- Every fact from the documents must have a citation in the format [citation:chunk_id] where chunk_id is the EXACT id value from a `<chunk id='...'>` tag
- Citations should appear at the end of the sentence containing the information they support
- Multiple citations should be separated by commas: [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3]
- No need to return references section. Just citations in answer.
- NEVER create your own citation format - use the exact chunk_id values from the documents in the [citation:chunk_id] format
- NEVER format citations as clickable links or as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only
- NEVER make up chunk IDs if you are unsure about the chunk_id. It is better to omit the citation than to guess
- Copy the EXACT chunk id from the XML - if it says `<chunk id='5'>`, use [citation:5]
- If the chunk id is a URL like `<chunk id='https://example.com/page'>`, use [citation:https://example.com/page]
</citation_format>
<citation_examples>
CORRECT citation formats:
- [citation:5] (numeric chunk ID from knowledge base)
- [citation:https://example.com/article] (URL chunk ID from web search results)
- [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3] (multiple citations)
INCORRECT citation formats (DO NOT use):
- Using parentheses and markdown links: ([citation:5](https://github.com/MODSetter/SurfSense))
- Using parentheses around brackets: ([citation:5])
- Using hyperlinked text: [link to source 5](https://example.com)
- Using footnote style: ... library¹
- Making up source IDs when source_id is unknown
- Using old IEEE format: [1], [2], [3]
- Using source types instead of IDs: [citation:GITHUB_CONNECTOR] instead of [citation:5]
</citation_examples>
<citation_output_example>
Based on your GitHub repositories and video content, Python's asyncio library provides tools for writing concurrent code using the async/await syntax [citation:5]. It's particularly useful for I/O-bound and high-level structured network code [citation:5].
According to web search results, the key advantage of asyncio is that it can improve performance by allowing other code to run while waiting for I/O operations to complete [citation:https://docs.python.org/3/library/asyncio.html]. This makes it excellent for scenarios like web scraping, API calls, database operations, or any situation where your program spends time waiting for external resources.
However, from your video learning, it's important to note that asyncio is not suitable for CPU-bound tasks as it runs on a single thread [citation:12]. For computationally intensive work, you'd want to use multiprocessing instead.
</citation_output_example>
1. Put the label right after the claim it supports.
2. Several sources for one claim: stack brackets, `[1][2]`.
3. Copy labels exactly as shown — never renumber them, add your own, or write the
underlying title, date, id, or URL instead.
4. Write the bare `[n]` and nothing else: no `[citation:...]`, no markdown links
like `[1](http://…)`, no footnote marks, no "References" section.
5. Only label claims the sources support. If nothing shown backs a claim — or you
never saw a label — leave it uncited; never invent one.
</citation_instructions>

View file

@ -9,7 +9,7 @@ Reasoning hygiene (R1-aware):
Output style:
- Be concise. Default to a one-paragraph answer; expand only when the user asks for detail.
- Don't open with sycophantic phrasing ("Great question", "Sure, here you go"). Lead with the answer or the next action.
- For factual answers, cite once with `[citation:chunk_id]` and stop.
- For factual answers, cite once with the passage's `[n]` label and stop.
Tool calls:
- Issue independent tool calls in parallel within a single turn.

View file

@ -5,7 +5,7 @@ Maximum terseness:
- Answer in fewer than 4 lines unless the user asks for detail. One-word answers are best when they suffice.
- No preamble ("The answer is", "Here's what I'll do"), no postamble ("Hope that helps", "Let me know"). Get straight to the answer.
- Avoid restating the user's question.
- For factual lookups inside the knowledge base, give the answer with a single `[citation:chunk_id]` and stop.
- For factual lookups inside the knowledge base, give the answer with a single `[n]` label and stop.
Tool discipline:
- Use exactly ONE tool per assistant turn when investigating; wait for the result before deciding the next call. Do not loop on the same tool with the same arguments — pick a result and act.

View file

@ -3,7 +3,6 @@
from __future__ import annotations
import uuid
from contextlib import asynccontextmanager
from datetime import UTC, datetime
from unittest.mock import MagicMock
@ -227,23 +226,6 @@ def patched_embed(monkeypatch):
return mock
@pytest.fixture
def patched_shielded_session(async_engine, monkeypatch):
"""Replace ``shielded_async_session`` in the knowledge_base module
with one that yields sessions from the test engine."""
test_maker = async_sessionmaker(async_engine, expire_on_commit=False)
@asynccontextmanager
async def _test_shielded():
async with test_maker() as session:
yield session
monkeypatch.setattr(
"app.agents.chat.multi_agent_chat.subagents.builtins.deliverables.tools.knowledge_base.shielded_async_session",
_test_shielded,
)
# ---------------------------------------------------------------------------
# Indexer test helpers
# ---------------------------------------------------------------------------

View file

@ -1,46 +0,0 @@
"""Integration test: _browse_recent_documents returns docs of multiple types.
Exercises the browse path (degenerate-query fallback) with a real PostgreSQL
database. Verifies that passing a list of document types correctly returns
documents of all listed types -- the same ``.in_()`` SQL path used by hybrid
search but through the browse/recency-ordered code path.
"""
from __future__ import annotations
import pytest
pytestmark = pytest.mark.integration
async def test_browse_recent_documents_with_list_type_returns_both(
committed_google_data, patched_shielded_session
):
"""_browse_recent_documents returns docs of all types when given a list."""
from app.agents.chat.multi_agent_chat.subagents.builtins.deliverables.tools.knowledge_base import (
_browse_recent_documents,
)
space_id = committed_google_data["search_space_id"]
results = await _browse_recent_documents(
search_space_id=space_id,
document_type=["GOOGLE_DRIVE_FILE", "COMPOSIO_GOOGLE_DRIVE_CONNECTOR"],
top_k=10,
start_date=None,
end_date=None,
)
returned_types = set()
for doc in results:
doc_info = doc.get("document", {})
dtype = doc_info.get("document_type")
if dtype:
returned_types.add(dtype)
assert "GOOGLE_DRIVE_FILE" in returned_types, (
"Native Drive docs should appear in browse results"
)
assert "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" in returned_types, (
"Legacy Composio Drive docs should appear in browse results"
)

View file

@ -86,9 +86,10 @@ class TestCompose:
# Tools
assert "<tools>" in prompt
assert "</tools>" in prompt
# Citations on by default
# Citations on by default — the [n] / <retrieved_context> contract
assert "<citation_instructions>" in prompt
assert "[citation:chunk_id]" in prompt
assert "<retrieved_context>" in prompt
assert "[1][2]" in prompt
def test_team_visibility_uses_team_variants(self, fixed_today: datetime) -> None:
prompt = compose_system_prompt(
@ -116,9 +117,9 @@ class TestCompose:
def test_citations_disabled_swaps_block(self, fixed_today: datetime) -> None:
prompt_on = compose_system_prompt(today=fixed_today, citations_enabled=True)
prompt_off = compose_system_prompt(today=fixed_today, citations_enabled=False)
assert "Citations are DISABLED" in prompt_off
assert "Citations are DISABLED" not in prompt_on
assert "[citation:chunk_id]" in prompt_on
assert "Citation markers are **disabled**" in prompt_off
assert "Citation markers are **disabled**" not in prompt_on
assert "<retrieved_context>" in prompt_on
def test_enabled_tool_filter_only_includes_listed_tools(
self, fixed_today: datetime