From 04884caeef2b36d86c6b7680a43848536f224d4d Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Thu, 5 Feb 2026 02:30:20 +0530 Subject: [PATCH] refactor: simplify document title assignment across various connectors by removing prefix formatting --- .../connectors/composio_gmail_connector.py | 4 +-- .../composio_google_calendar_connector.py | 4 +-- .../composio_google_drive_connector.py | 4 +-- .../app/services/connector_service.py | 28 +++++++++---------- .../connector_indexers/airtable_indexer.py | 6 ++-- .../connector_indexers/bookstack_indexer.py | 4 +-- .../connector_indexers/clickup_indexer.py | 4 +-- .../connector_indexers/confluence_indexer.py | 4 +-- .../connector_indexers/discord_indexer.py | 2 +- .../connector_indexers/github_indexer.py | 2 +- .../google_calendar_indexer.py | 4 +-- .../google_gmail_indexer.py | 4 +-- .../tasks/connector_indexers/jira_indexer.py | 6 ++-- .../connector_indexers/linear_indexer.py | 6 ++-- .../tasks/connector_indexers/luma_indexer.py | 4 +-- .../connector_indexers/notion_indexer.py | 4 +-- .../tasks/connector_indexers/slack_indexer.py | 2 +- .../tasks/connector_indexers/teams_indexer.py | 2 +- 18 files changed, 44 insertions(+), 50 deletions(-) diff --git a/surfsense_backend/app/connectors/composio_gmail_connector.py b/surfsense_backend/app/connectors/composio_gmail_connector.py index 1964a4d45..05395bfba 100644 --- a/surfsense_backend/app/connectors/composio_gmail_connector.py +++ b/surfsense_backend/app/connectors/composio_gmail_connector.py @@ -319,7 +319,7 @@ async def _process_gmail_message_batch( chunks = await create_document_chunks(markdown_content) - existing_document.title = f"Gmail: {subject}" + existing_document.title = subject existing_document.content = summary_content existing_document.content_hash = content_hash existing_document.embedding = summary_embedding @@ -375,7 +375,7 @@ async def _process_gmail_message_batch( document = Document( search_space_id=search_space_id, - title=f"Gmail: {subject}", + title=subject, document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["gmail"]), document_metadata={ "message_id": message_id, diff --git a/surfsense_backend/app/connectors/composio_google_calendar_connector.py b/surfsense_backend/app/connectors/composio_google_calendar_connector.py index 78ff360ca..4b09f0790 100644 --- a/surfsense_backend/app/connectors/composio_google_calendar_connector.py +++ b/surfsense_backend/app/connectors/composio_google_calendar_connector.py @@ -346,7 +346,7 @@ async def index_composio_google_calendar( chunks = await create_document_chunks(markdown_content) - existing_document.title = f"Calendar: {summary}" + existing_document.title = summary existing_document.content = summary_content existing_document.content_hash = content_hash existing_document.embedding = summary_embedding @@ -422,7 +422,7 @@ async def index_composio_google_calendar( document = Document( search_space_id=search_space_id, - title=f"Calendar: {summary}", + title=summary, document_type=DocumentType( TOOLKIT_TO_DOCUMENT_TYPE["googlecalendar"] ), diff --git a/surfsense_backend/app/connectors/composio_google_drive_connector.py b/surfsense_backend/app/connectors/composio_google_drive_connector.py index 66669e4e0..d7299fbfe 100644 --- a/surfsense_backend/app/connectors/composio_google_drive_connector.py +++ b/surfsense_backend/app/connectors/composio_google_drive_connector.py @@ -1276,7 +1276,7 @@ async def _process_single_drive_file( chunks = await create_document_chunks(markdown_content) - existing_document.title = f"Drive: {file_name}" + existing_document.title = file_name existing_document.content = summary_content existing_document.content_hash = content_hash existing_document.embedding = summary_embedding @@ -1329,7 +1329,7 @@ async def _process_single_drive_file( document = Document( search_space_id=search_space_id, - title=f"Drive: {file_name}", + title=file_name, document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"]), document_metadata={ "file_id": file_id, diff --git a/surfsense_backend/app/services/connector_service.py b/surfsense_backend/app/services/connector_service.py index 4c5599815..6967902d1 100644 --- a/surfsense_backend/app/services/connector_service.py +++ b/surfsense_backend/app/services/connector_service.py @@ -982,7 +982,7 @@ class ConnectorService: def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str: channel_name = metadata.get("channel_name", "Unknown Channel") message_date = metadata.get("start_date", "") - title = f"Slack: {channel_name}" + title = channel_name if message_date: title += f" ({message_date})" return title @@ -1056,7 +1056,7 @@ class ConnectorService: def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str: page_title = metadata.get("page_title", "Untitled Page") indexed_at = metadata.get("indexed_at", "") - title = f"Notion: {page_title}" + title = page_title if indexed_at: title += f" (indexed: {indexed_at})" return title @@ -1366,9 +1366,9 @@ class ConnectorService: issue_title = metadata.get("issue_title", "Untitled Issue") issue_state = metadata.get("state", "") title = ( - f"Linear: {issue_identifier} - {issue_title}" + f"{issue_identifier} - {issue_title}" if issue_identifier - else f"Linear: {issue_title}" + else issue_title ) if issue_state: title += f" ({issue_state})" @@ -1466,9 +1466,9 @@ class ConnectorService: issue_title = metadata.get("issue_title", "Untitled Issue") status = metadata.get("status", "") title = ( - f"Jira: {issue_key} - {issue_title}" + f"{issue_key} - {issue_title}" if issue_key - else f"Jira: {issue_title}" + else issue_title ) if status: title += f" ({status})" @@ -1570,7 +1570,7 @@ class ConnectorService: def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str: event_summary = metadata.get("event_summary", "Untitled Event") start_time = metadata.get("start_time", "") - title = f"Calendar: {event_summary}" + title = event_summary if start_time: title += f" ({start_time})" return title @@ -1675,7 +1675,7 @@ class ConnectorService: def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str: record_id = metadata.get("record_id", "") - return f"Airtable Record: {record_id}" if record_id else "Airtable Record" + return record_id if record_id else "Airtable Record" def _description_fn( _chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any] @@ -1952,7 +1952,7 @@ class ConnectorService: def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str: page_title = metadata.get("page_title", "Untitled Page") space_key = metadata.get("space_key", "") - title = f"Confluence: {page_title}" + title = page_title if space_key: title += f" ({space_key})" return title @@ -2238,7 +2238,7 @@ class ConnectorService: def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str: channel_name = metadata.get("channel_name", "Unknown Channel") message_date = metadata.get("start_date", "") - title = f"Discord: {channel_name}" + title = channel_name if message_date: title += f" ({message_date})" return title @@ -2314,7 +2314,7 @@ class ConnectorService: team_name = metadata.get("team_name", "Unknown Team") channel_name = metadata.get("channel_name", "Unknown Channel") message_date = metadata.get("start_date", "") - title = f"Teams: {team_name} - {channel_name}" + title = f"{team_name} - {channel_name}" if message_date: title += f" ({message_date})" return title @@ -2388,9 +2388,9 @@ class ConnectorService: event_name = metadata.get("event_name", "Untitled Event") start_time = metadata.get("start_time", "") return ( - f"Luma: {event_name} ({start_time})" + f"{event_name} ({start_time})" if start_time - else f"Luma: {event_name}" + else event_name ) def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str: @@ -2651,7 +2651,7 @@ class ConnectorService: def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str: page_name = metadata.get("page_name", "Untitled Page") - return f"BookStack: {page_name}" + return page_name def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str: page_slug = metadata.get("page_slug", "") diff --git a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py index 029c4a87c..54b1afd26 100644 --- a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py @@ -327,9 +327,7 @@ async def index_airtable_records( ) # Update existing document - existing_document.title = ( - f"Airtable Record: {record_id}" - ) + existing_document.title = record_id existing_document.content = summary_content existing_document.content_hash = content_hash existing_document.embedding = summary_embedding @@ -405,7 +403,7 @@ async def index_airtable_records( ) document = Document( search_space_id=search_space_id, - title=f"Airtable Record: {record_id}", + title=record_id, document_type=DocumentType.AIRTABLE_CONNECTOR, document_metadata={ "record_id": record_id, diff --git a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py index fe608a8c9..f1338564e 100644 --- a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py @@ -317,7 +317,7 @@ async def index_bookstack_pages( chunks = await create_document_chunks(full_content) # Update existing document - existing_document.title = f"BookStack - {page_name}" + existing_document.title = page_name existing_document.content = summary_content existing_document.content_hash = content_hash existing_document.embedding = summary_embedding @@ -387,7 +387,7 @@ async def index_bookstack_pages( logger.info(f"Creating new document for page {page_name}") document = Document( search_space_id=search_space_id, - title=f"BookStack - {page_name}", + title=page_name, document_type=DocumentType.BOOKSTACK_CONNECTOR, document_metadata=doc_metadata, content=summary_content, diff --git a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py index a8991647c..2b8789e0c 100644 --- a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py @@ -299,7 +299,7 @@ async def index_clickup_tasks( chunks = await create_document_chunks(task_content) # Update existing document - existing_document.title = f"Task - {task_name}" + existing_document.title = task_name existing_document.content = summary_content existing_document.content_hash = content_hash existing_document.embedding = summary_embedding @@ -376,7 +376,7 @@ async def index_clickup_tasks( document = Document( search_space_id=search_space_id, - title=f"Task - {task_name}", + title=task_name, document_type=DocumentType.CLICKUP_CONNECTOR, document_metadata={ "task_id": task_id, diff --git a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py index 24859e685..74b4cc23d 100644 --- a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py @@ -307,7 +307,7 @@ async def index_confluence_pages( chunks = await create_document_chunks(full_content) # Update existing document - existing_document.title = f"Confluence - {page_title}" + existing_document.title = page_title existing_document.content = summary_content existing_document.content_hash = content_hash existing_document.embedding = summary_embedding @@ -387,7 +387,7 @@ async def index_confluence_pages( logger.info(f"Creating new document for page {page_title}") document = Document( search_space_id=search_space_id, - title=f"Confluence - {page_title}", + title=page_title, document_type=DocumentType.CONFLUENCE_CONNECTOR, document_metadata={ "page_id": page_id, diff --git a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py index 4999ba6d4..f9a6918a7 100644 --- a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py @@ -507,7 +507,7 @@ async def index_discord_messages( # Create and store new document document = Document( search_space_id=search_space_id, - title=f"Discord - {guild_name}#{channel_name}", + title=f"{guild_name}#{channel_name}", document_type=DocumentType.DISCORD_CONNECTOR, document_metadata={ "guild_name": guild_name, diff --git a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py index d82f18944..848db7623 100644 --- a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py @@ -418,7 +418,7 @@ async def _process_repository_digest( } document = Document( - title=f"GitHub Repository: {repo_full_name}", + title=repo_full_name, document_type=DocumentType.GITHUB_CONNECTOR, document_metadata=doc_metadata, content=summary_text, diff --git a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py index 386c9de43..8d7b8b045 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py @@ -396,7 +396,7 @@ async def index_google_calendar_events( chunks = await create_document_chunks(event_markdown) # Update existing document - existing_document.title = f"Calendar Event - {event_summary}" + existing_document.title = event_summary existing_document.content = summary_content existing_document.content_hash = content_hash existing_document.embedding = summary_embedding @@ -482,7 +482,7 @@ async def index_google_calendar_events( document = Document( search_space_id=search_space_id, - title=f"Calendar Event - {event_summary}", + title=event_summary, document_type=DocumentType.GOOGLE_CALENDAR_CONNECTOR, document_metadata={ "event_id": event_id, diff --git a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py index 34d06d796..805be5781 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py @@ -332,7 +332,7 @@ async def index_google_gmail_messages( chunks = await create_document_chunks(markdown_content) # Update existing document - existing_document.title = f"Gmail: {subject}" + existing_document.title = subject existing_document.content = summary_content existing_document.content_hash = content_hash existing_document.embedding = summary_embedding @@ -405,7 +405,7 @@ async def index_google_gmail_messages( logger.info(f"Creating new document for Gmail message: {subject}") document = Document( search_space_id=search_space_id, - title=f"Gmail: {subject}", + title=subject, document_type=DocumentType.GOOGLE_GMAIL_CONNECTOR, document_metadata={ "message_id": message_id, diff --git a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py index 6971703c1..508834b4f 100644 --- a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py @@ -282,9 +282,7 @@ async def index_jira_issues( chunks = await create_document_chunks(issue_content) # Update existing document - existing_document.title = ( - f"Jira - {issue_identifier}: {issue_title}" - ) + existing_document.title = f"{issue_identifier}: {issue_title}" existing_document.content = summary_content existing_document.content_hash = content_hash existing_document.embedding = summary_embedding @@ -364,7 +362,7 @@ async def index_jira_issues( ) document = Document( search_space_id=search_space_id, - title=f"Jira - {issue_identifier}: {issue_title}", + title=f"{issue_identifier}: {issue_title}", document_type=DocumentType.JIRA_CONNECTOR, document_metadata={ "issue_id": issue_id, diff --git a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py index a94420bc2..c28f151ca 100644 --- a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py @@ -314,9 +314,7 @@ async def index_linear_issues( chunks = await create_document_chunks(issue_content) # Update existing document - existing_document.title = ( - f"Linear - {issue_identifier}: {issue_title}" - ) + existing_document.title = f"{issue_identifier}: {issue_title}" existing_document.content = summary_content existing_document.content_hash = content_hash existing_document.embedding = summary_embedding @@ -397,7 +395,7 @@ async def index_linear_issues( ) document = Document( search_space_id=search_space_id, - title=f"Linear - {issue_identifier}: {issue_title}", + title=f"{issue_identifier}: {issue_title}", document_type=DocumentType.LINEAR_CONNECTOR, document_metadata={ "issue_id": issue_id, diff --git a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py index c0eb58d1d..f4527843c 100644 --- a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py @@ -361,7 +361,7 @@ async def index_luma_events( chunks = await create_document_chunks(event_markdown) # Update existing document - existing_document.title = f"Luma Event - {event_name}" + existing_document.title = event_name existing_document.content = summary_content existing_document.content_hash = content_hash existing_document.embedding = summary_embedding @@ -455,7 +455,7 @@ async def index_luma_events( document = Document( search_space_id=search_space_id, - title=f"Luma Event - {event_name}", + title=event_name, document_type=DocumentType.LUMA_CONNECTOR, document_metadata={ "event_id": event_id, diff --git a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py index b1adeb035..8d4d7650a 100644 --- a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py @@ -387,7 +387,7 @@ async def index_notion_pages( chunks = await create_document_chunks(markdown_content) # Update existing document - existing_document.title = f"Notion - {page_title}" + existing_document.title = page_title existing_document.content = summary_content existing_document.content_hash = content_hash existing_document.embedding = summary_embedding @@ -458,7 +458,7 @@ async def index_notion_pages( # Create and store new document document = Document( search_space_id=search_space_id, - title=f"Notion - {page_title}", + title=page_title, document_type=DocumentType.NOTION_CONNECTOR, document_metadata={ "page_title": page_title, diff --git a/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py b/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py index 3cb4e3c85..010d1eff4 100644 --- a/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py @@ -373,7 +373,7 @@ async def index_slack_messages( # Create and store new document document = Document( search_space_id=search_space_id, - title=f"Slack - {channel_name}", + title=channel_name, document_type=DocumentType.SLACK_CONNECTOR, document_metadata={ "channel_name": channel_name, diff --git a/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py b/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py index 1e26fbc42..d42c5b7f1 100644 --- a/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py @@ -410,7 +410,7 @@ async def index_teams_messages( # Create and store new document document = Document( search_space_id=search_space_id, - title=f"Teams - {team_name} - {channel_name}", + title=f"{team_name} - {channel_name}", document_type=DocumentType.TEAMS_CONNECTOR, document_metadata={ "team_name": team_name,