mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-27 09:46:25 +02:00
feat: made agent file sytem optimized
This commit is contained in:
parent
ee0b59c0fa
commit
2cc2d339e6
67 changed files with 8011 additions and 5591 deletions
|
|
@ -5,7 +5,6 @@ This module contains all the tools available to the SurfSense agent.
|
|||
To add a new tool, see the documentation in registry.py.
|
||||
|
||||
Available tools:
|
||||
- search_knowledge_base: Search the user's personal knowledge base
|
||||
- search_surfsense_docs: Search Surfsense documentation for usage help
|
||||
- generate_podcast: Generate audio podcasts from content
|
||||
- generate_video_presentation: Generate video presentations with slides and narration
|
||||
|
|
@ -20,7 +19,6 @@ Available tools:
|
|||
from .generate_image import create_generate_image_tool
|
||||
from .knowledge_base import (
|
||||
CONNECTOR_DESCRIPTIONS,
|
||||
create_search_knowledge_base_tool,
|
||||
format_documents_for_context,
|
||||
search_knowledge_base_async,
|
||||
)
|
||||
|
|
@ -52,7 +50,6 @@ __all__ = [
|
|||
"create_recall_memory_tool",
|
||||
"create_save_memory_tool",
|
||||
"create_scrape_webpage_tool",
|
||||
"create_search_knowledge_base_tool",
|
||||
"create_search_surfsense_docs_tool",
|
||||
"format_documents_for_context",
|
||||
"get_all_tool_names",
|
||||
|
|
|
|||
|
|
@ -273,9 +273,7 @@ def create_update_calendar_event_tool(
|
|||
final_new_start_datetime, context
|
||||
)
|
||||
if final_new_end_datetime is not None:
|
||||
update_body["end"] = _build_time_body(
|
||||
final_new_end_datetime, context
|
||||
)
|
||||
update_body["end"] = _build_time_body(final_new_end_datetime, context)
|
||||
if final_new_description is not None:
|
||||
update_body["description"] = final_new_description
|
||||
if final_new_location is not None:
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ This module provides:
|
|||
- Connector constants and normalization
|
||||
- Async knowledge base search across multiple connectors
|
||||
- Document formatting for LLM context
|
||||
- Tool factory for creating search_knowledge_base tools
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
|
|
@ -16,8 +15,6 @@ import time
|
|||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.tools import StructuredTool
|
||||
from pydantic import BaseModel, Field
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.db import NATIVE_TO_LEGACY_DOCTYPE, shielded_async_session
|
||||
|
|
@ -619,9 +616,76 @@ async def search_knowledge_base_async(
|
|||
perf = get_perf_logger()
|
||||
t0 = time.perf_counter()
|
||||
|
||||
deduplicated = await search_knowledge_base_raw_async(
|
||||
query=query,
|
||||
search_space_id=search_space_id,
|
||||
db_session=db_session,
|
||||
connector_service=connector_service,
|
||||
connectors_to_search=connectors_to_search,
|
||||
top_k=top_k,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
available_connectors=available_connectors,
|
||||
available_document_types=available_document_types,
|
||||
)
|
||||
|
||||
if not deduplicated:
|
||||
return "No documents found in the knowledge base. The search space has no indexed content yet."
|
||||
|
||||
# Use browse chunk cap for degenerate queries, otherwise adaptive chunking.
|
||||
max_chunks_per_doc = (
|
||||
_BROWSE_MAX_CHUNKS_PER_DOC if _is_degenerate_query(query) else 0
|
||||
)
|
||||
output_budget = _compute_tool_output_budget(max_input_tokens)
|
||||
result = format_documents_for_context(
|
||||
deduplicated,
|
||||
max_chars=output_budget,
|
||||
max_chunks_per_doc=max_chunks_per_doc,
|
||||
)
|
||||
|
||||
if len(result) > output_budget:
|
||||
perf.warning(
|
||||
"[kb_search] output STILL exceeds budget after format (%d > %d), "
|
||||
"hard truncation should have fired",
|
||||
len(result),
|
||||
output_budget,
|
||||
)
|
||||
|
||||
perf.info(
|
||||
"[kb_search] TOTAL in %.3fs total_docs=%d deduped=%d output_chars=%d "
|
||||
"budget=%d max_input_tokens=%s space=%d",
|
||||
time.perf_counter() - t0,
|
||||
len(deduplicated),
|
||||
len(deduplicated),
|
||||
len(result),
|
||||
output_budget,
|
||||
max_input_tokens,
|
||||
search_space_id,
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
async def search_knowledge_base_raw_async(
|
||||
query: str,
|
||||
search_space_id: int,
|
||||
db_session: AsyncSession,
|
||||
connector_service: ConnectorService,
|
||||
connectors_to_search: list[str] | None = None,
|
||||
top_k: int = 10,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
available_connectors: list[str] | None = None,
|
||||
available_document_types: list[str] | None = None,
|
||||
query_embedding: list[float] | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Search knowledge base and return raw document dicts (no XML formatting)."""
|
||||
perf = get_perf_logger()
|
||||
t0 = time.perf_counter()
|
||||
all_documents: list[dict[str, Any]] = []
|
||||
|
||||
# Resolve date range (default last 2 years)
|
||||
# Preserve the public signature for compatibility even if values are unused.
|
||||
_ = (db_session, connector_service)
|
||||
|
||||
from app.agents.new_chat.utils import resolve_date_range
|
||||
|
||||
resolved_start_date, resolved_end_date = resolve_date_range(
|
||||
|
|
@ -631,144 +695,76 @@ async def search_knowledge_base_async(
|
|||
|
||||
connectors = _normalize_connectors(connectors_to_search, available_connectors)
|
||||
|
||||
# --- Optimization 1: skip connectors that have zero indexed documents ---
|
||||
if available_document_types:
|
||||
doc_types_set = set(available_document_types)
|
||||
before_count = len(connectors)
|
||||
connectors = [
|
||||
c
|
||||
for c in connectors
|
||||
if c in doc_types_set
|
||||
or NATIVE_TO_LEGACY_DOCTYPE.get(c, "") in doc_types_set
|
||||
]
|
||||
skipped = before_count - len(connectors)
|
||||
if skipped:
|
||||
perf.info(
|
||||
"[kb_search] skipped %d empty connectors (had %d, now %d)",
|
||||
skipped,
|
||||
before_count,
|
||||
len(connectors),
|
||||
)
|
||||
|
||||
perf.info(
|
||||
"[kb_search] searching %d connectors: %s (space=%d, top_k=%d)",
|
||||
len(connectors),
|
||||
connectors[:5],
|
||||
search_space_id,
|
||||
top_k,
|
||||
)
|
||||
|
||||
# --- Fast-path: no connectors left after filtering ---
|
||||
if not connectors:
|
||||
perf.info(
|
||||
"[kb_search] TOTAL in %.3fs — no connectors to search, returning empty",
|
||||
time.perf_counter() - t0,
|
||||
)
|
||||
return "No documents found in the knowledge base. The search space has no indexed content yet."
|
||||
return []
|
||||
|
||||
# --- Fast-path: degenerate queries (*, **, empty, etc.) ---
|
||||
# Semantic embedding of '*' is noise and plainto_tsquery('english', '*')
|
||||
# yields an empty tsquery, so both retrieval signals are useless.
|
||||
# Fall back to a recency-ordered browse that returns diverse results.
|
||||
if _is_degenerate_query(query):
|
||||
perf.info(
|
||||
"[kb_search] degenerate query %r detected - falling back to recency browse",
|
||||
"[kb_search_raw] degenerate query %r detected - recency browse",
|
||||
query,
|
||||
)
|
||||
browse_connectors = connectors if connectors else [None] # type: ignore[list-item]
|
||||
|
||||
expanded_browse = []
|
||||
for c in browse_connectors:
|
||||
if c is not None and c in NATIVE_TO_LEGACY_DOCTYPE:
|
||||
expanded_browse.append([c, NATIVE_TO_LEGACY_DOCTYPE[c]])
|
||||
for connector in browse_connectors:
|
||||
if connector is not None and connector in NATIVE_TO_LEGACY_DOCTYPE:
|
||||
expanded_browse.append([connector, NATIVE_TO_LEGACY_DOCTYPE[connector]])
|
||||
else:
|
||||
expanded_browse.append(c)
|
||||
|
||||
expanded_browse.append(connector)
|
||||
browse_results = await asyncio.gather(
|
||||
*[
|
||||
_browse_recent_documents(
|
||||
search_space_id=search_space_id,
|
||||
document_type=c,
|
||||
top_k=top_k,
|
||||
start_date=resolved_start_date,
|
||||
end_date=resolved_end_date,
|
||||
)
|
||||
for c in expanded_browse
|
||||
]
|
||||
)
|
||||
for docs in browse_results:
|
||||
all_documents.extend(docs)
|
||||
|
||||
# Skip dedup + formatting below (browse already returns unique docs)
|
||||
# but still cap output budget.
|
||||
output_budget = _compute_tool_output_budget(max_input_tokens)
|
||||
result = format_documents_for_context(
|
||||
all_documents,
|
||||
max_chars=output_budget,
|
||||
max_chunks_per_doc=_BROWSE_MAX_CHUNKS_PER_DOC,
|
||||
)
|
||||
perf.info(
|
||||
"[kb_search] TOTAL (browse) in %.3fs total_docs=%d output_chars=%d "
|
||||
"budget=%d space=%d",
|
||||
time.perf_counter() - t0,
|
||||
len(all_documents),
|
||||
len(result),
|
||||
output_budget,
|
||||
search_space_id,
|
||||
)
|
||||
return result
|
||||
|
||||
# --- Optimization 2: compute the query embedding once, share across all local searches ---
|
||||
from app.config import config as app_config
|
||||
|
||||
t_embed = time.perf_counter()
|
||||
precomputed_embedding = app_config.embedding_model_instance.embed(query)
|
||||
perf.info(
|
||||
"[kb_search] shared embedding computed in %.3fs",
|
||||
time.perf_counter() - t_embed,
|
||||
)
|
||||
|
||||
max_parallel_searches = 4
|
||||
semaphore = asyncio.Semaphore(max_parallel_searches)
|
||||
|
||||
async def _search_one_connector(connector: str) -> list[dict[str, Any]]:
|
||||
try:
|
||||
t_conn = time.perf_counter()
|
||||
async with semaphore, shielded_async_session() as isolated_session:
|
||||
svc = ConnectorService(isolated_session, search_space_id)
|
||||
chunks = await svc._combined_rrf_search(
|
||||
query_text=query,
|
||||
search_space_id=search_space_id,
|
||||
document_type=connector,
|
||||
top_k=top_k,
|
||||
start_date=resolved_start_date,
|
||||
end_date=resolved_end_date,
|
||||
query_embedding=precomputed_embedding,
|
||||
)
|
||||
perf.info(
|
||||
"[kb_search] connector=%s results=%d in %.3fs",
|
||||
connector,
|
||||
len(chunks),
|
||||
time.perf_counter() - t_conn,
|
||||
)
|
||||
return chunks
|
||||
except Exception as e:
|
||||
perf.warning("[kb_search] connector=%s FAILED: %s", connector, e)
|
||||
return []
|
||||
for connector in expanded_browse
|
||||
]
|
||||
)
|
||||
for docs in browse_results:
|
||||
all_documents.extend(docs)
|
||||
else:
|
||||
if query_embedding is None:
|
||||
from app.config import config as app_config
|
||||
|
||||
t_gather = time.perf_counter()
|
||||
connector_results = await asyncio.gather(
|
||||
*[_search_one_connector(connector) for connector in connectors]
|
||||
)
|
||||
perf.info(
|
||||
"[kb_search] all connectors gathered in %.3fs",
|
||||
time.perf_counter() - t_gather,
|
||||
)
|
||||
for chunks in connector_results:
|
||||
all_documents.extend(chunks)
|
||||
query_embedding = app_config.embedding_model_instance.embed(query)
|
||||
|
||||
max_parallel_searches = 4
|
||||
semaphore = asyncio.Semaphore(max_parallel_searches)
|
||||
|
||||
async def _search_one_connector(connector: str) -> list[dict[str, Any]]:
|
||||
try:
|
||||
async with semaphore, shielded_async_session() as isolated_session:
|
||||
svc = ConnectorService(isolated_session, search_space_id)
|
||||
return await svc._combined_rrf_search(
|
||||
query_text=query,
|
||||
search_space_id=search_space_id,
|
||||
document_type=connector,
|
||||
top_k=top_k,
|
||||
start_date=resolved_start_date,
|
||||
end_date=resolved_end_date,
|
||||
query_embedding=query_embedding,
|
||||
)
|
||||
except Exception as exc:
|
||||
perf.warning("[kb_search_raw] connector=%s FAILED: %s", connector, exc)
|
||||
return []
|
||||
|
||||
connector_results = await asyncio.gather(
|
||||
*[_search_one_connector(connector) for connector in connectors]
|
||||
)
|
||||
for docs in connector_results:
|
||||
all_documents.extend(docs)
|
||||
|
||||
# Deduplicate primarily by document ID. Only fall back to content hashing
|
||||
# when a document has no ID.
|
||||
seen_doc_ids: set[Any] = set()
|
||||
seen_content_hashes: set[int] = set()
|
||||
deduplicated: list[dict[str, Any]] = []
|
||||
|
|
@ -785,7 +781,6 @@ async def search_knowledge_base_async(
|
|||
chunk_texts.append(chunk_content)
|
||||
if chunk_texts:
|
||||
return hash("||".join(chunk_texts))
|
||||
|
||||
flat_content = (document.get("content") or "").strip()
|
||||
if flat_content:
|
||||
return hash(flat_content)
|
||||
|
|
@ -793,216 +788,24 @@ async def search_knowledge_base_async(
|
|||
|
||||
for doc in all_documents:
|
||||
doc_id = (doc.get("document", {}) or {}).get("id")
|
||||
|
||||
if doc_id is not None:
|
||||
if doc_id in seen_doc_ids:
|
||||
continue
|
||||
seen_doc_ids.add(doc_id)
|
||||
deduplicated.append(doc)
|
||||
continue
|
||||
|
||||
content_hash = _content_fingerprint(doc)
|
||||
if content_hash is not None and content_hash in seen_content_hashes:
|
||||
continue
|
||||
if content_hash is not None:
|
||||
if content_hash in seen_content_hashes:
|
||||
continue
|
||||
seen_content_hashes.add(content_hash)
|
||||
|
||||
deduplicated.append(doc)
|
||||
|
||||
# Sort by RRF score so the most relevant documents from ANY connector
|
||||
# appear first, preventing budget truncation from hiding top results.
|
||||
deduplicated.sort(key=lambda d: d.get("score", 0), reverse=True)
|
||||
|
||||
output_budget = _compute_tool_output_budget(max_input_tokens)
|
||||
result = format_documents_for_context(deduplicated, max_chars=output_budget)
|
||||
|
||||
if len(result) > output_budget:
|
||||
perf.warning(
|
||||
"[kb_search] output STILL exceeds budget after format (%d > %d), "
|
||||
"hard truncation should have fired",
|
||||
len(result),
|
||||
output_budget,
|
||||
)
|
||||
|
||||
deduplicated.sort(key=lambda doc: doc.get("score", 0), reverse=True)
|
||||
perf.info(
|
||||
"[kb_search] TOTAL in %.3fs total_docs=%d deduped=%d output_chars=%d "
|
||||
"budget=%d max_input_tokens=%s space=%d",
|
||||
"[kb_search_raw] done in %.3fs total=%d deduped=%d",
|
||||
time.perf_counter() - t0,
|
||||
len(all_documents),
|
||||
len(deduplicated),
|
||||
len(result),
|
||||
output_budget,
|
||||
max_input_tokens,
|
||||
search_space_id,
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def _build_connector_docstring(available_connectors: list[str] | None) -> str:
|
||||
"""
|
||||
Build the connector documentation section for the tool docstring.
|
||||
|
||||
Args:
|
||||
available_connectors: List of available connector types, or None for all
|
||||
|
||||
Returns:
|
||||
Formatted docstring section listing available connectors
|
||||
"""
|
||||
connectors = available_connectors if available_connectors else list(_ALL_CONNECTORS)
|
||||
|
||||
lines = []
|
||||
for connector in connectors:
|
||||
# Skip internal names, prefer user-facing aliases
|
||||
if connector == "CRAWLED_URL":
|
||||
# Show as WEBCRAWLER_CONNECTOR for user-facing docs
|
||||
description = CONNECTOR_DESCRIPTIONS.get(connector, connector)
|
||||
lines.append(f"- WEBCRAWLER_CONNECTOR: {description}")
|
||||
else:
|
||||
description = CONNECTOR_DESCRIPTIONS.get(connector, connector)
|
||||
lines.append(f"- {connector}: {description}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tool Input Schema
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class SearchKnowledgeBaseInput(BaseModel):
|
||||
"""Input schema for the search_knowledge_base tool."""
|
||||
|
||||
query: str = Field(
|
||||
description=(
|
||||
"The search query - use specific natural language terms. "
|
||||
"NEVER use wildcards like '*' or '**'; instead describe what you want "
|
||||
"(e.g. 'recent meeting notes' or 'project architecture overview')."
|
||||
),
|
||||
)
|
||||
top_k: int = Field(
|
||||
default=10,
|
||||
description="Number of results to retrieve (default: 10). Keep ≤20 for focused searches.",
|
||||
)
|
||||
start_date: str | None = Field(
|
||||
default=None,
|
||||
description="Optional ISO date/datetime (e.g. '2025-12-12' or '2025-12-12T00:00:00+00:00')",
|
||||
)
|
||||
end_date: str | None = Field(
|
||||
default=None,
|
||||
description="Optional ISO date/datetime (e.g. '2025-12-19' or '2025-12-19T23:59:59+00:00')",
|
||||
)
|
||||
connectors_to_search: list[str] | None = Field(
|
||||
default=None,
|
||||
description="Optional list of connector enums to search. If omitted, searches all available.",
|
||||
)
|
||||
|
||||
|
||||
def create_search_knowledge_base_tool(
|
||||
search_space_id: int,
|
||||
db_session: AsyncSession,
|
||||
connector_service: ConnectorService,
|
||||
available_connectors: list[str] | None = None,
|
||||
available_document_types: list[str] | None = None,
|
||||
max_input_tokens: int | None = None,
|
||||
) -> StructuredTool:
|
||||
"""
|
||||
Factory function to create the search_knowledge_base tool with injected dependencies.
|
||||
|
||||
Args:
|
||||
search_space_id: The user's search space ID
|
||||
db_session: Database session
|
||||
connector_service: Initialized connector service
|
||||
available_connectors: Optional list of connector types available in the search space.
|
||||
Used to dynamically generate the tool docstring.
|
||||
available_document_types: Optional list of document types that have data in the search space.
|
||||
Used to inform the LLM about what data exists.
|
||||
max_input_tokens: Model context window (tokens) from litellm model info.
|
||||
Used to dynamically size tool output.
|
||||
|
||||
Returns:
|
||||
A configured StructuredTool instance
|
||||
"""
|
||||
# Build connector documentation dynamically
|
||||
connector_docs = _build_connector_docstring(available_connectors)
|
||||
|
||||
# Build context about available document types
|
||||
doc_types_info = ""
|
||||
if available_document_types:
|
||||
doc_types_info = f"""
|
||||
|
||||
## Document types with indexed content in this search space
|
||||
|
||||
The following document types have content available for search:
|
||||
{", ".join(available_document_types)}
|
||||
|
||||
Focus searches on these types for best results."""
|
||||
|
||||
# Build the dynamic description for the tool
|
||||
# This is what the LLM sees when deciding whether/how to use the tool
|
||||
dynamic_description = f"""Search the user's personal knowledge base for relevant information.
|
||||
|
||||
Use this tool to find documents, notes, files, web pages, and other content the user has indexed.
|
||||
This searches ONLY local/indexed data (uploaded files, Notion, Slack, browser extension captures, etc.).
|
||||
For real-time web search (current events, news, live data), use the `web_search` tool instead.
|
||||
|
||||
IMPORTANT:
|
||||
- Always craft specific, descriptive search queries using natural language keywords.
|
||||
Good: "quarterly sales report Q3", "Python API authentication design".
|
||||
Bad: "*", "**", "everything", single characters. Wildcard/empty queries yield poor results.
|
||||
- Prefer multiple focused searches over a single broad one with high top_k.
|
||||
- If the user requests a specific source type (e.g. "my notes", "Slack messages"), pass `connectors_to_search=[...]` using the enums below.
|
||||
- If `connectors_to_search` is omitted/empty, the system will search broadly.
|
||||
- Only connectors that are enabled/configured for this search space are available.{doc_types_info}
|
||||
|
||||
## Available connector enums for `connectors_to_search`
|
||||
|
||||
{connector_docs}
|
||||
|
||||
NOTE: `WEBCRAWLER_CONNECTOR` is mapped internally to the canonical document type `CRAWLED_URL`."""
|
||||
|
||||
# Capture for closure
|
||||
_available_connectors = available_connectors
|
||||
_available_document_types = available_document_types
|
||||
|
||||
async def _search_knowledge_base_impl(
|
||||
query: str,
|
||||
top_k: int = 10,
|
||||
start_date: str | None = None,
|
||||
end_date: str | None = None,
|
||||
connectors_to_search: list[str] | None = None,
|
||||
) -> str:
|
||||
"""Implementation function for knowledge base search."""
|
||||
from app.agents.new_chat.utils import parse_date_or_datetime
|
||||
|
||||
parsed_start: datetime | None = None
|
||||
parsed_end: datetime | None = None
|
||||
|
||||
if start_date:
|
||||
parsed_start = parse_date_or_datetime(start_date)
|
||||
if end_date:
|
||||
parsed_end = parse_date_or_datetime(end_date)
|
||||
|
||||
return await search_knowledge_base_async(
|
||||
query=query,
|
||||
search_space_id=search_space_id,
|
||||
db_session=db_session,
|
||||
connector_service=connector_service,
|
||||
connectors_to_search=connectors_to_search,
|
||||
top_k=top_k,
|
||||
start_date=parsed_start,
|
||||
end_date=parsed_end,
|
||||
available_connectors=_available_connectors,
|
||||
available_document_types=_available_document_types,
|
||||
max_input_tokens=max_input_tokens,
|
||||
)
|
||||
|
||||
# Create StructuredTool with dynamic description
|
||||
# This properly sets the description that the LLM sees
|
||||
tool = StructuredTool(
|
||||
name="search_knowledge_base",
|
||||
description=dynamic_description,
|
||||
coroutine=_search_knowledge_base_impl,
|
||||
args_schema=SearchKnowledgeBaseInput,
|
||||
)
|
||||
|
||||
return tool
|
||||
return deduplicated
|
||||
|
|
|
|||
|
|
@ -71,7 +71,6 @@ from .jira import (
|
|||
create_delete_jira_issue_tool,
|
||||
create_update_jira_issue_tool,
|
||||
)
|
||||
from .knowledge_base import create_search_knowledge_base_tool
|
||||
from .linear import (
|
||||
create_create_linear_issue_tool,
|
||||
create_delete_linear_issue_tool,
|
||||
|
|
@ -128,23 +127,6 @@ class ToolDefinition:
|
|||
# Registry of all built-in tools
|
||||
# Contributors: Add your new tools here!
|
||||
BUILTIN_TOOLS: list[ToolDefinition] = [
|
||||
# Core tool - searches the user's knowledge base
|
||||
# Now supports dynamic connector/document type discovery
|
||||
ToolDefinition(
|
||||
name="search_knowledge_base",
|
||||
description="Search the user's personal knowledge base for relevant information",
|
||||
factory=lambda deps: create_search_knowledge_base_tool(
|
||||
search_space_id=deps["search_space_id"],
|
||||
db_session=deps["db_session"],
|
||||
connector_service=deps["connector_service"],
|
||||
# Optional: dynamically discovered connectors/document types
|
||||
available_connectors=deps.get("available_connectors"),
|
||||
available_document_types=deps.get("available_document_types"),
|
||||
max_input_tokens=deps.get("max_input_tokens"),
|
||||
),
|
||||
requires=["search_space_id", "db_session", "connector_service"],
|
||||
# Note: available_connectors and available_document_types are optional
|
||||
),
|
||||
# Podcast generation tool
|
||||
ToolDefinition(
|
||||
name="generate_podcast",
|
||||
|
|
@ -168,8 +150,8 @@ BUILTIN_TOOLS: list[ToolDefinition] = [
|
|||
requires=["search_space_id", "db_session", "thread_id"],
|
||||
),
|
||||
# Report generation tool (inline, short-lived sessions for DB ops)
|
||||
# Supports internal KB search via source_strategy so the agent doesn't
|
||||
# need to call search_knowledge_base separately before generating.
|
||||
# Supports internal KB search via source_strategy so the agent does not
|
||||
# need a separate search step before generating.
|
||||
ToolDefinition(
|
||||
name="generate_report",
|
||||
description="Generate a structured report from provided content and export it",
|
||||
|
|
@ -551,7 +533,7 @@ def build_tools(
|
|||
tools = build_tools(deps)
|
||||
|
||||
# Use only specific tools
|
||||
tools = build_tools(deps, enabled_tools=["search_knowledge_base"])
|
||||
tools = build_tools(deps, enabled_tools=["generate_report"])
|
||||
|
||||
# Use defaults but disable podcast
|
||||
tools = build_tools(deps, disabled_tools=["generate_podcast"])
|
||||
|
|
|
|||
|
|
@ -584,8 +584,8 @@ def create_generate_report_tool(
|
|||
search_space_id: The user's search space ID
|
||||
thread_id: The chat thread ID for associating the report
|
||||
connector_service: Optional connector service for internal KB search.
|
||||
When provided, the tool can search the knowledge base without the
|
||||
agent having to call search_knowledge_base separately.
|
||||
When provided, the tool can search the knowledge base internally
|
||||
(used by the "kb_search" and "auto" source strategies).
|
||||
available_connectors: Optional list of connector types available in the
|
||||
search space (used to scope internal KB searches).
|
||||
|
||||
|
|
@ -639,12 +639,13 @@ def create_generate_report_tool(
|
|||
|
||||
SOURCE STRATEGY (how to collect source material):
|
||||
- source_strategy="conversation" — The conversation already has
|
||||
enough context (prior Q&A, pasted text, uploaded files, scraped
|
||||
webpages). Pass a thorough summary as source_content.
|
||||
NEVER call search_knowledge_base separately first.
|
||||
enough context (prior Q&A, filesystem exploration, pasted text,
|
||||
uploaded files, scraped webpages). Pass a thorough summary as
|
||||
source_content.
|
||||
- source_strategy="kb_search" — Search the knowledge base
|
||||
internally. Provide 1-5 targeted search_queries. The tool
|
||||
handles searching — do NOT call search_knowledge_base first.
|
||||
handles searching internally — do NOT manually read and dump
|
||||
/documents/ files into source_content.
|
||||
- source_strategy="provided" — Use only what is in source_content
|
||||
(default, backward-compatible).
|
||||
- source_strategy="auto" — Use source_content if it has enough
|
||||
|
|
@ -1064,6 +1065,7 @@ def create_generate_report_tool(
|
|||
"title": topic,
|
||||
"word_count": metadata.get("word_count", 0),
|
||||
"is_revision": bool(parent_report_content),
|
||||
"report_markdown": report_content,
|
||||
"message": f"Report generated successfully: {topic}",
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue