refactor: simplify document title assignment across various connectors by removing prefix formatting

This commit is contained in:
Anish Sarkar 2026-02-05 02:30:20 +05:30
parent e615a6478c
commit 04884caeef
18 changed files with 44 additions and 50 deletions

View file

@ -319,7 +319,7 @@ async def _process_gmail_message_batch(
chunks = await create_document_chunks(markdown_content) chunks = await create_document_chunks(markdown_content)
existing_document.title = f"Gmail: {subject}" existing_document.title = subject
existing_document.content = summary_content existing_document.content = summary_content
existing_document.content_hash = content_hash existing_document.content_hash = content_hash
existing_document.embedding = summary_embedding existing_document.embedding = summary_embedding
@ -375,7 +375,7 @@ async def _process_gmail_message_batch(
document = Document( document = Document(
search_space_id=search_space_id, search_space_id=search_space_id,
title=f"Gmail: {subject}", title=subject,
document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["gmail"]), document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["gmail"]),
document_metadata={ document_metadata={
"message_id": message_id, "message_id": message_id,

View file

@ -346,7 +346,7 @@ async def index_composio_google_calendar(
chunks = await create_document_chunks(markdown_content) chunks = await create_document_chunks(markdown_content)
existing_document.title = f"Calendar: {summary}" existing_document.title = summary
existing_document.content = summary_content existing_document.content = summary_content
existing_document.content_hash = content_hash existing_document.content_hash = content_hash
existing_document.embedding = summary_embedding existing_document.embedding = summary_embedding
@ -422,7 +422,7 @@ async def index_composio_google_calendar(
document = Document( document = Document(
search_space_id=search_space_id, search_space_id=search_space_id,
title=f"Calendar: {summary}", title=summary,
document_type=DocumentType( document_type=DocumentType(
TOOLKIT_TO_DOCUMENT_TYPE["googlecalendar"] TOOLKIT_TO_DOCUMENT_TYPE["googlecalendar"]
), ),

View file

@ -1276,7 +1276,7 @@ async def _process_single_drive_file(
chunks = await create_document_chunks(markdown_content) chunks = await create_document_chunks(markdown_content)
existing_document.title = f"Drive: {file_name}" existing_document.title = file_name
existing_document.content = summary_content existing_document.content = summary_content
existing_document.content_hash = content_hash existing_document.content_hash = content_hash
existing_document.embedding = summary_embedding existing_document.embedding = summary_embedding
@ -1329,7 +1329,7 @@ async def _process_single_drive_file(
document = Document( document = Document(
search_space_id=search_space_id, search_space_id=search_space_id,
title=f"Drive: {file_name}", title=file_name,
document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"]), document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"]),
document_metadata={ document_metadata={
"file_id": file_id, "file_id": file_id,

View file

@ -982,7 +982,7 @@ class ConnectorService:
def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str: def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
channel_name = metadata.get("channel_name", "Unknown Channel") channel_name = metadata.get("channel_name", "Unknown Channel")
message_date = metadata.get("start_date", "") message_date = metadata.get("start_date", "")
title = f"Slack: {channel_name}" title = channel_name
if message_date: if message_date:
title += f" ({message_date})" title += f" ({message_date})"
return title return title
@ -1056,7 +1056,7 @@ class ConnectorService:
def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str: def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
page_title = metadata.get("page_title", "Untitled Page") page_title = metadata.get("page_title", "Untitled Page")
indexed_at = metadata.get("indexed_at", "") indexed_at = metadata.get("indexed_at", "")
title = f"Notion: {page_title}" title = page_title
if indexed_at: if indexed_at:
title += f" (indexed: {indexed_at})" title += f" (indexed: {indexed_at})"
return title return title
@ -1366,9 +1366,9 @@ class ConnectorService:
issue_title = metadata.get("issue_title", "Untitled Issue") issue_title = metadata.get("issue_title", "Untitled Issue")
issue_state = metadata.get("state", "") issue_state = metadata.get("state", "")
title = ( title = (
f"Linear: {issue_identifier} - {issue_title}" f"{issue_identifier} - {issue_title}"
if issue_identifier if issue_identifier
else f"Linear: {issue_title}" else issue_title
) )
if issue_state: if issue_state:
title += f" ({issue_state})" title += f" ({issue_state})"
@ -1466,9 +1466,9 @@ class ConnectorService:
issue_title = metadata.get("issue_title", "Untitled Issue") issue_title = metadata.get("issue_title", "Untitled Issue")
status = metadata.get("status", "") status = metadata.get("status", "")
title = ( title = (
f"Jira: {issue_key} - {issue_title}" f"{issue_key} - {issue_title}"
if issue_key if issue_key
else f"Jira: {issue_title}" else issue_title
) )
if status: if status:
title += f" ({status})" title += f" ({status})"
@ -1570,7 +1570,7 @@ class ConnectorService:
def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str: def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
event_summary = metadata.get("event_summary", "Untitled Event") event_summary = metadata.get("event_summary", "Untitled Event")
start_time = metadata.get("start_time", "") start_time = metadata.get("start_time", "")
title = f"Calendar: {event_summary}" title = event_summary
if start_time: if start_time:
title += f" ({start_time})" title += f" ({start_time})"
return title return title
@ -1675,7 +1675,7 @@ class ConnectorService:
def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str: def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
record_id = metadata.get("record_id", "") record_id = metadata.get("record_id", "")
return f"Airtable Record: {record_id}" if record_id else "Airtable Record" return record_id if record_id else "Airtable Record"
def _description_fn( def _description_fn(
_chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any] _chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
@ -1952,7 +1952,7 @@ class ConnectorService:
def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str: def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
page_title = metadata.get("page_title", "Untitled Page") page_title = metadata.get("page_title", "Untitled Page")
space_key = metadata.get("space_key", "") space_key = metadata.get("space_key", "")
title = f"Confluence: {page_title}" title = page_title
if space_key: if space_key:
title += f" ({space_key})" title += f" ({space_key})"
return title return title
@ -2238,7 +2238,7 @@ class ConnectorService:
def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str: def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
channel_name = metadata.get("channel_name", "Unknown Channel") channel_name = metadata.get("channel_name", "Unknown Channel")
message_date = metadata.get("start_date", "") message_date = metadata.get("start_date", "")
title = f"Discord: {channel_name}" title = channel_name
if message_date: if message_date:
title += f" ({message_date})" title += f" ({message_date})"
return title return title
@ -2314,7 +2314,7 @@ class ConnectorService:
team_name = metadata.get("team_name", "Unknown Team") team_name = metadata.get("team_name", "Unknown Team")
channel_name = metadata.get("channel_name", "Unknown Channel") channel_name = metadata.get("channel_name", "Unknown Channel")
message_date = metadata.get("start_date", "") message_date = metadata.get("start_date", "")
title = f"Teams: {team_name} - {channel_name}" title = f"{team_name} - {channel_name}"
if message_date: if message_date:
title += f" ({message_date})" title += f" ({message_date})"
return title return title
@ -2388,9 +2388,9 @@ class ConnectorService:
event_name = metadata.get("event_name", "Untitled Event") event_name = metadata.get("event_name", "Untitled Event")
start_time = metadata.get("start_time", "") start_time = metadata.get("start_time", "")
return ( return (
f"Luma: {event_name} ({start_time})" f"{event_name} ({start_time})"
if start_time if start_time
else f"Luma: {event_name}" else event_name
) )
def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str: def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
@ -2651,7 +2651,7 @@ class ConnectorService:
def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str: def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
page_name = metadata.get("page_name", "Untitled Page") page_name = metadata.get("page_name", "Untitled Page")
return f"BookStack: {page_name}" return page_name
def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str: def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
page_slug = metadata.get("page_slug", "") page_slug = metadata.get("page_slug", "")

View file

@ -327,9 +327,7 @@ async def index_airtable_records(
) )
# Update existing document # Update existing document
existing_document.title = ( existing_document.title = record_id
f"Airtable Record: {record_id}"
)
existing_document.content = summary_content existing_document.content = summary_content
existing_document.content_hash = content_hash existing_document.content_hash = content_hash
existing_document.embedding = summary_embedding existing_document.embedding = summary_embedding
@ -405,7 +403,7 @@ async def index_airtable_records(
) )
document = Document( document = Document(
search_space_id=search_space_id, search_space_id=search_space_id,
title=f"Airtable Record: {record_id}", title=record_id,
document_type=DocumentType.AIRTABLE_CONNECTOR, document_type=DocumentType.AIRTABLE_CONNECTOR,
document_metadata={ document_metadata={
"record_id": record_id, "record_id": record_id,

View file

@ -317,7 +317,7 @@ async def index_bookstack_pages(
chunks = await create_document_chunks(full_content) chunks = await create_document_chunks(full_content)
# Update existing document # Update existing document
existing_document.title = f"BookStack - {page_name}" existing_document.title = page_name
existing_document.content = summary_content existing_document.content = summary_content
existing_document.content_hash = content_hash existing_document.content_hash = content_hash
existing_document.embedding = summary_embedding existing_document.embedding = summary_embedding
@ -387,7 +387,7 @@ async def index_bookstack_pages(
logger.info(f"Creating new document for page {page_name}") logger.info(f"Creating new document for page {page_name}")
document = Document( document = Document(
search_space_id=search_space_id, search_space_id=search_space_id,
title=f"BookStack - {page_name}", title=page_name,
document_type=DocumentType.BOOKSTACK_CONNECTOR, document_type=DocumentType.BOOKSTACK_CONNECTOR,
document_metadata=doc_metadata, document_metadata=doc_metadata,
content=summary_content, content=summary_content,

View file

@ -299,7 +299,7 @@ async def index_clickup_tasks(
chunks = await create_document_chunks(task_content) chunks = await create_document_chunks(task_content)
# Update existing document # Update existing document
existing_document.title = f"Task - {task_name}" existing_document.title = task_name
existing_document.content = summary_content existing_document.content = summary_content
existing_document.content_hash = content_hash existing_document.content_hash = content_hash
existing_document.embedding = summary_embedding existing_document.embedding = summary_embedding
@ -376,7 +376,7 @@ async def index_clickup_tasks(
document = Document( document = Document(
search_space_id=search_space_id, search_space_id=search_space_id,
title=f"Task - {task_name}", title=task_name,
document_type=DocumentType.CLICKUP_CONNECTOR, document_type=DocumentType.CLICKUP_CONNECTOR,
document_metadata={ document_metadata={
"task_id": task_id, "task_id": task_id,

View file

@ -307,7 +307,7 @@ async def index_confluence_pages(
chunks = await create_document_chunks(full_content) chunks = await create_document_chunks(full_content)
# Update existing document # Update existing document
existing_document.title = f"Confluence - {page_title}" existing_document.title = page_title
existing_document.content = summary_content existing_document.content = summary_content
existing_document.content_hash = content_hash existing_document.content_hash = content_hash
existing_document.embedding = summary_embedding existing_document.embedding = summary_embedding
@ -387,7 +387,7 @@ async def index_confluence_pages(
logger.info(f"Creating new document for page {page_title}") logger.info(f"Creating new document for page {page_title}")
document = Document( document = Document(
search_space_id=search_space_id, search_space_id=search_space_id,
title=f"Confluence - {page_title}", title=page_title,
document_type=DocumentType.CONFLUENCE_CONNECTOR, document_type=DocumentType.CONFLUENCE_CONNECTOR,
document_metadata={ document_metadata={
"page_id": page_id, "page_id": page_id,

View file

@ -507,7 +507,7 @@ async def index_discord_messages(
# Create and store new document # Create and store new document
document = Document( document = Document(
search_space_id=search_space_id, search_space_id=search_space_id,
title=f"Discord - {guild_name}#{channel_name}", title=f"{guild_name}#{channel_name}",
document_type=DocumentType.DISCORD_CONNECTOR, document_type=DocumentType.DISCORD_CONNECTOR,
document_metadata={ document_metadata={
"guild_name": guild_name, "guild_name": guild_name,

View file

@ -418,7 +418,7 @@ async def _process_repository_digest(
} }
document = Document( document = Document(
title=f"GitHub Repository: {repo_full_name}", title=repo_full_name,
document_type=DocumentType.GITHUB_CONNECTOR, document_type=DocumentType.GITHUB_CONNECTOR,
document_metadata=doc_metadata, document_metadata=doc_metadata,
content=summary_text, content=summary_text,

View file

@ -396,7 +396,7 @@ async def index_google_calendar_events(
chunks = await create_document_chunks(event_markdown) chunks = await create_document_chunks(event_markdown)
# Update existing document # Update existing document
existing_document.title = f"Calendar Event - {event_summary}" existing_document.title = event_summary
existing_document.content = summary_content existing_document.content = summary_content
existing_document.content_hash = content_hash existing_document.content_hash = content_hash
existing_document.embedding = summary_embedding existing_document.embedding = summary_embedding
@ -482,7 +482,7 @@ async def index_google_calendar_events(
document = Document( document = Document(
search_space_id=search_space_id, search_space_id=search_space_id,
title=f"Calendar Event - {event_summary}", title=event_summary,
document_type=DocumentType.GOOGLE_CALENDAR_CONNECTOR, document_type=DocumentType.GOOGLE_CALENDAR_CONNECTOR,
document_metadata={ document_metadata={
"event_id": event_id, "event_id": event_id,

View file

@ -332,7 +332,7 @@ async def index_google_gmail_messages(
chunks = await create_document_chunks(markdown_content) chunks = await create_document_chunks(markdown_content)
# Update existing document # Update existing document
existing_document.title = f"Gmail: {subject}" existing_document.title = subject
existing_document.content = summary_content existing_document.content = summary_content
existing_document.content_hash = content_hash existing_document.content_hash = content_hash
existing_document.embedding = summary_embedding existing_document.embedding = summary_embedding
@ -405,7 +405,7 @@ async def index_google_gmail_messages(
logger.info(f"Creating new document for Gmail message: {subject}") logger.info(f"Creating new document for Gmail message: {subject}")
document = Document( document = Document(
search_space_id=search_space_id, search_space_id=search_space_id,
title=f"Gmail: {subject}", title=subject,
document_type=DocumentType.GOOGLE_GMAIL_CONNECTOR, document_type=DocumentType.GOOGLE_GMAIL_CONNECTOR,
document_metadata={ document_metadata={
"message_id": message_id, "message_id": message_id,

View file

@ -282,9 +282,7 @@ async def index_jira_issues(
chunks = await create_document_chunks(issue_content) chunks = await create_document_chunks(issue_content)
# Update existing document # Update existing document
existing_document.title = ( existing_document.title = f"{issue_identifier}: {issue_title}"
f"Jira - {issue_identifier}: {issue_title}"
)
existing_document.content = summary_content existing_document.content = summary_content
existing_document.content_hash = content_hash existing_document.content_hash = content_hash
existing_document.embedding = summary_embedding existing_document.embedding = summary_embedding
@ -364,7 +362,7 @@ async def index_jira_issues(
) )
document = Document( document = Document(
search_space_id=search_space_id, search_space_id=search_space_id,
title=f"Jira - {issue_identifier}: {issue_title}", title=f"{issue_identifier}: {issue_title}",
document_type=DocumentType.JIRA_CONNECTOR, document_type=DocumentType.JIRA_CONNECTOR,
document_metadata={ document_metadata={
"issue_id": issue_id, "issue_id": issue_id,

View file

@ -314,9 +314,7 @@ async def index_linear_issues(
chunks = await create_document_chunks(issue_content) chunks = await create_document_chunks(issue_content)
# Update existing document # Update existing document
existing_document.title = ( existing_document.title = f"{issue_identifier}: {issue_title}"
f"Linear - {issue_identifier}: {issue_title}"
)
existing_document.content = summary_content existing_document.content = summary_content
existing_document.content_hash = content_hash existing_document.content_hash = content_hash
existing_document.embedding = summary_embedding existing_document.embedding = summary_embedding
@ -397,7 +395,7 @@ async def index_linear_issues(
) )
document = Document( document = Document(
search_space_id=search_space_id, search_space_id=search_space_id,
title=f"Linear - {issue_identifier}: {issue_title}", title=f"{issue_identifier}: {issue_title}",
document_type=DocumentType.LINEAR_CONNECTOR, document_type=DocumentType.LINEAR_CONNECTOR,
document_metadata={ document_metadata={
"issue_id": issue_id, "issue_id": issue_id,

View file

@ -361,7 +361,7 @@ async def index_luma_events(
chunks = await create_document_chunks(event_markdown) chunks = await create_document_chunks(event_markdown)
# Update existing document # Update existing document
existing_document.title = f"Luma Event - {event_name}" existing_document.title = event_name
existing_document.content = summary_content existing_document.content = summary_content
existing_document.content_hash = content_hash existing_document.content_hash = content_hash
existing_document.embedding = summary_embedding existing_document.embedding = summary_embedding
@ -455,7 +455,7 @@ async def index_luma_events(
document = Document( document = Document(
search_space_id=search_space_id, search_space_id=search_space_id,
title=f"Luma Event - {event_name}", title=event_name,
document_type=DocumentType.LUMA_CONNECTOR, document_type=DocumentType.LUMA_CONNECTOR,
document_metadata={ document_metadata={
"event_id": event_id, "event_id": event_id,

View file

@ -387,7 +387,7 @@ async def index_notion_pages(
chunks = await create_document_chunks(markdown_content) chunks = await create_document_chunks(markdown_content)
# Update existing document # Update existing document
existing_document.title = f"Notion - {page_title}" existing_document.title = page_title
existing_document.content = summary_content existing_document.content = summary_content
existing_document.content_hash = content_hash existing_document.content_hash = content_hash
existing_document.embedding = summary_embedding existing_document.embedding = summary_embedding
@ -458,7 +458,7 @@ async def index_notion_pages(
# Create and store new document # Create and store new document
document = Document( document = Document(
search_space_id=search_space_id, search_space_id=search_space_id,
title=f"Notion - {page_title}", title=page_title,
document_type=DocumentType.NOTION_CONNECTOR, document_type=DocumentType.NOTION_CONNECTOR,
document_metadata={ document_metadata={
"page_title": page_title, "page_title": page_title,

View file

@ -373,7 +373,7 @@ async def index_slack_messages(
# Create and store new document # Create and store new document
document = Document( document = Document(
search_space_id=search_space_id, search_space_id=search_space_id,
title=f"Slack - {channel_name}", title=channel_name,
document_type=DocumentType.SLACK_CONNECTOR, document_type=DocumentType.SLACK_CONNECTOR,
document_metadata={ document_metadata={
"channel_name": channel_name, "channel_name": channel_name,

View file

@ -410,7 +410,7 @@ async def index_teams_messages(
# Create and store new document # Create and store new document
document = Document( document = Document(
search_space_id=search_space_id, search_space_id=search_space_id,
title=f"Teams - {team_name} - {channel_name}", title=f"{team_name} - {channel_name}",
document_type=DocumentType.TEAMS_CONNECTOR, document_type=DocumentType.TEAMS_CONNECTOR,
document_metadata={ document_metadata={
"team_name": team_name, "team_name": team_name,