refactor: simplify document title assignment across various connectors by removing prefix formatting

2026-05-29 19:35:20 +02:00 · 2026-02-05 02:30:20 +05:30 · 2026-02-05 02:30:20 +05:30 · 04884caeef
commit 04884caeef
parent e615a6478c
18 changed files with 44 additions and 50 deletions
--- a/surfsense_backend/app/connectors/composio_gmail_connector.py
+++ b/surfsense_backend/app/connectors/composio_gmail_connector.py
@ -319,7 +319,7 @@ async def _process_gmail_message_batch(

                chunks = await create_document_chunks(markdown_content)

-                existing_document.title = f"Gmail: {subject}"
+                existing_document.title = subject
                existing_document.content = summary_content
                existing_document.content_hash = content_hash
                existing_document.embedding = summary_embedding
@ -375,7 +375,7 @@ async def _process_gmail_message_batch(

            document = Document(
                search_space_id=search_space_id,
-                title=f"Gmail: {subject}",
+                title=subject,
                document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["gmail"]),
                document_metadata={
                    "message_id": message_id,
--- a/surfsense_backend/app/connectors/composio_google_calendar_connector.py
+++ b/surfsense_backend/app/connectors/composio_google_calendar_connector.py
@ -346,7 +346,7 @@ async def index_composio_google_calendar(

                    chunks = await create_document_chunks(markdown_content)

-                    existing_document.title = f"Calendar: {summary}"
+                    existing_document.title = summary
                    existing_document.content = summary_content
                    existing_document.content_hash = content_hash
                    existing_document.embedding = summary_embedding
@ -422,7 +422,7 @@ async def index_composio_google_calendar(

                document = Document(
                    search_space_id=search_space_id,
-                    title=f"Calendar: {summary}",
+                    title=summary,
                    document_type=DocumentType(
                        TOOLKIT_TO_DOCUMENT_TYPE["googlecalendar"]
                    ),
--- a/surfsense_backend/app/connectors/composio_google_drive_connector.py
+++ b/surfsense_backend/app/connectors/composio_google_drive_connector.py
@ -1276,7 +1276,7 @@ async def _process_single_drive_file(

        chunks = await create_document_chunks(markdown_content)

-        existing_document.title = f"Drive: {file_name}"
+        existing_document.title = file_name
        existing_document.content = summary_content
        existing_document.content_hash = content_hash
        existing_document.embedding = summary_embedding
@ -1329,7 +1329,7 @@ async def _process_single_drive_file(

    document = Document(
        search_space_id=search_space_id,
-        title=f"Drive: {file_name}",
+        title=file_name,
        document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"]),
        document_metadata={
            "file_id": file_id,
--- a/surfsense_backend/app/services/connector_service.py
+++ b/surfsense_backend/app/services/connector_service.py
@ -982,7 +982,7 @@ class ConnectorService:
        def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
            channel_name = metadata.get("channel_name", "Unknown Channel")
            message_date = metadata.get("start_date", "")
-            title = f"Slack: {channel_name}"
+            title = channel_name
            if message_date:
                title += f" ({message_date})"
            return title
@ -1056,7 +1056,7 @@ class ConnectorService:
        def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
            page_title = metadata.get("page_title", "Untitled Page")
            indexed_at = metadata.get("indexed_at", "")
-            title = f"Notion: {page_title}"
+            title = page_title
            if indexed_at:
                title += f" (indexed: {indexed_at})"
            return title
@ -1366,9 +1366,9 @@ class ConnectorService:
            issue_title = metadata.get("issue_title", "Untitled Issue")
            issue_state = metadata.get("state", "")
            title = (
-                f"Linear: {issue_identifier} - {issue_title}"
+                f"{issue_identifier} - {issue_title}"
                if issue_identifier
-                else f"Linear: {issue_title}"
+                else issue_title
            )
            if issue_state:
                title += f" ({issue_state})"
@ -1466,9 +1466,9 @@ class ConnectorService:
            issue_title = metadata.get("issue_title", "Untitled Issue")
            status = metadata.get("status", "")
            title = (
-                f"Jira: {issue_key} - {issue_title}"
+                f"{issue_key} - {issue_title}"
                if issue_key
-                else f"Jira: {issue_title}"
+                else issue_title
            )
            if status:
                title += f" ({status})"
@ -1570,7 +1570,7 @@ class ConnectorService:
        def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
            event_summary = metadata.get("event_summary", "Untitled Event")
            start_time = metadata.get("start_time", "")
-            title = f"Calendar: {event_summary}"
+            title = event_summary
            if start_time:
                title += f" ({start_time})"
            return title
@ -1675,7 +1675,7 @@ class ConnectorService:

        def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
            record_id = metadata.get("record_id", "")
-            return f"Airtable Record: {record_id}" if record_id else "Airtable Record"
+            return record_id if record_id else "Airtable Record"

        def _description_fn(
            _chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
@ -1952,7 +1952,7 @@ class ConnectorService:
        def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
            page_title = metadata.get("page_title", "Untitled Page")
            space_key = metadata.get("space_key", "")
-            title = f"Confluence: {page_title}"
+            title = page_title
            if space_key:
                title += f" ({space_key})"
            return title
@ -2238,7 +2238,7 @@ class ConnectorService:
        def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
            channel_name = metadata.get("channel_name", "Unknown Channel")
            message_date = metadata.get("start_date", "")
-            title = f"Discord: {channel_name}"
+            title = channel_name
            if message_date:
                title += f" ({message_date})"
            return title
@ -2314,7 +2314,7 @@ class ConnectorService:
            team_name = metadata.get("team_name", "Unknown Team")
            channel_name = metadata.get("channel_name", "Unknown Channel")
            message_date = metadata.get("start_date", "")
-            title = f"Teams: {team_name} - {channel_name}"
+            title = f"{team_name} - {channel_name}"
            if message_date:
                title += f" ({message_date})"
            return title
@ -2388,9 +2388,9 @@ class ConnectorService:
            event_name = metadata.get("event_name", "Untitled Event")
            start_time = metadata.get("start_time", "")
            return (
-                f"Luma: {event_name} ({start_time})"
+                f"{event_name} ({start_time})"
                if start_time
-                else f"Luma: {event_name}"
+                else event_name
            )

        def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
@ -2651,7 +2651,7 @@ class ConnectorService:

        def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
            page_name = metadata.get("page_name", "Untitled Page")
-            return f"BookStack: {page_name}"
+            return page_name

        def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
            page_slug = metadata.get("page_slug", "")
--- a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py
@ -327,9 +327,7 @@ async def index_airtable_records(
                                    )

                                    # Update existing document
-                                    existing_document.title = (
-                                        f"Airtable Record: {record_id}"
-                                    )
+                                    existing_document.title = record_id
                                    existing_document.content = summary_content
                                    existing_document.content_hash = content_hash
                                    existing_document.embedding = summary_embedding
@ -405,7 +403,7 @@ async def index_airtable_records(
                            )
                            document = Document(
                                search_space_id=search_space_id,
-                                title=f"Airtable Record: {record_id}",
+                                title=record_id,
                                document_type=DocumentType.AIRTABLE_CONNECTOR,
                                document_metadata={
                                    "record_id": record_id,
--- a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py
@ -317,7 +317,7 @@ async def index_bookstack_pages(
                        chunks = await create_document_chunks(full_content)

                        # Update existing document
-                        existing_document.title = f"BookStack - {page_name}"
+                        existing_document.title = page_name
                        existing_document.content = summary_content
                        existing_document.content_hash = content_hash
                        existing_document.embedding = summary_embedding
@ -387,7 +387,7 @@ async def index_bookstack_pages(
                logger.info(f"Creating new document for page {page_name}")
                document = Document(
                    search_space_id=search_space_id,
-                    title=f"BookStack - {page_name}",
+                    title=page_name,
                    document_type=DocumentType.BOOKSTACK_CONNECTOR,
                    document_metadata=doc_metadata,
                    content=summary_content,
--- a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py
@ -299,7 +299,7 @@ async def index_clickup_tasks(
                            chunks = await create_document_chunks(task_content)

                            # Update existing document
-                            existing_document.title = f"Task - {task_name}"
+                            existing_document.title = task_name
                            existing_document.content = summary_content
                            existing_document.content_hash = content_hash
                            existing_document.embedding = summary_embedding
@ -376,7 +376,7 @@ async def index_clickup_tasks(

                    document = Document(
                        search_space_id=search_space_id,
-                        title=f"Task - {task_name}",
+                        title=task_name,
                        document_type=DocumentType.CLICKUP_CONNECTOR,
                        document_metadata={
                            "task_id": task_id,
--- a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py
@ -307,7 +307,7 @@ async def index_confluence_pages(
                        chunks = await create_document_chunks(full_content)

                        # Update existing document
-                        existing_document.title = f"Confluence - {page_title}"
+                        existing_document.title = page_title
                        existing_document.content = summary_content
                        existing_document.content_hash = content_hash
                        existing_document.embedding = summary_embedding
@ -387,7 +387,7 @@ async def index_confluence_pages(
                logger.info(f"Creating new document for page {page_title}")
                document = Document(
                    search_space_id=search_space_id,
-                    title=f"Confluence - {page_title}",
+                    title=page_title,
                    document_type=DocumentType.CONFLUENCE_CONNECTOR,
                    document_metadata={
                        "page_id": page_id,
--- a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py
@ -507,7 +507,7 @@ async def index_discord_messages(
                            # Create and store new document
                            document = Document(
                                search_space_id=search_space_id,
-                                title=f"Discord - {guild_name}#{channel_name}",
+                                title=f"{guild_name}#{channel_name}",
                                document_type=DocumentType.DISCORD_CONNECTOR,
                                document_metadata={
                                    "guild_name": guild_name,
--- a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py
@ -418,7 +418,7 @@ async def _process_repository_digest(
    }

    document = Document(
-        title=f"GitHub Repository: {repo_full_name}",
+        title=repo_full_name,
        document_type=DocumentType.GITHUB_CONNECTOR,
        document_metadata=doc_metadata,
        content=summary_text,
--- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
@ -396,7 +396,7 @@ async def index_google_calendar_events(
                        chunks = await create_document_chunks(event_markdown)

                        # Update existing document
-                        existing_document.title = f"Calendar Event - {event_summary}"
+                        existing_document.title = event_summary
                        existing_document.content = summary_content
                        existing_document.content_hash = content_hash
                        existing_document.embedding = summary_embedding
@ -482,7 +482,7 @@ async def index_google_calendar_events(

                document = Document(
                    search_space_id=search_space_id,
-                    title=f"Calendar Event - {event_summary}",
+                    title=event_summary,
                    document_type=DocumentType.GOOGLE_CALENDAR_CONNECTOR,
                    document_metadata={
                        "event_id": event_id,
--- a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
@ -332,7 +332,7 @@ async def index_google_gmail_messages(
                        chunks = await create_document_chunks(markdown_content)

                        # Update existing document
-                        existing_document.title = f"Gmail: {subject}"
+                        existing_document.title = subject
                        existing_document.content = summary_content
                        existing_document.content_hash = content_hash
                        existing_document.embedding = summary_embedding
@ -405,7 +405,7 @@ async def index_google_gmail_messages(
                logger.info(f"Creating new document for Gmail message: {subject}")
                document = Document(
                    search_space_id=search_space_id,
-                    title=f"Gmail: {subject}",
+                    title=subject,
                    document_type=DocumentType.GOOGLE_GMAIL_CONNECTOR,
                    document_metadata={
                        "message_id": message_id,
--- a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py
@ -282,9 +282,7 @@ async def index_jira_issues(
                        chunks = await create_document_chunks(issue_content)

                        # Update existing document
-                        existing_document.title = (
-                            f"Jira - {issue_identifier}: {issue_title}"
-                        )
+                        existing_document.title = f"{issue_identifier}: {issue_title}"
                        existing_document.content = summary_content
                        existing_document.content_hash = content_hash
                        existing_document.embedding = summary_embedding
@ -364,7 +362,7 @@ async def index_jira_issues(
                )
                document = Document(
                    search_space_id=search_space_id,
-                    title=f"Jira - {issue_identifier}: {issue_title}",
+                    title=f"{issue_identifier}: {issue_title}",
                    document_type=DocumentType.JIRA_CONNECTOR,
                    document_metadata={
                        "issue_id": issue_id,
--- a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py
@ -314,9 +314,7 @@ async def index_linear_issues(
                        chunks = await create_document_chunks(issue_content)

                        # Update existing document
-                        existing_document.title = (
-                            f"Linear - {issue_identifier}: {issue_title}"
-                        )
+                        existing_document.title = f"{issue_identifier}: {issue_title}"
                        existing_document.content = summary_content
                        existing_document.content_hash = content_hash
                        existing_document.embedding = summary_embedding
@ -397,7 +395,7 @@ async def index_linear_issues(
                )
                document = Document(
                    search_space_id=search_space_id,
-                    title=f"Linear - {issue_identifier}: {issue_title}",
+                    title=f"{issue_identifier}: {issue_title}",
                    document_type=DocumentType.LINEAR_CONNECTOR,
                    document_metadata={
                        "issue_id": issue_id,
--- a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
@ -361,7 +361,7 @@ async def index_luma_events(
                        chunks = await create_document_chunks(event_markdown)

                        # Update existing document
-                        existing_document.title = f"Luma Event - {event_name}"
+                        existing_document.title = event_name
                        existing_document.content = summary_content
                        existing_document.content_hash = content_hash
                        existing_document.embedding = summary_embedding
@ -455,7 +455,7 @@ async def index_luma_events(

                document = Document(
                    search_space_id=search_space_id,
-                    title=f"Luma Event - {event_name}",
+                    title=event_name,
                    document_type=DocumentType.LUMA_CONNECTOR,
                    document_metadata={
                        "event_id": event_id,
--- a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py
@ -387,7 +387,7 @@ async def index_notion_pages(
                        chunks = await create_document_chunks(markdown_content)

                        # Update existing document
-                        existing_document.title = f"Notion - {page_title}"
+                        existing_document.title = page_title
                        existing_document.content = summary_content
                        existing_document.content_hash = content_hash
                        existing_document.embedding = summary_embedding
@ -458,7 +458,7 @@ async def index_notion_pages(
                # Create and store new document
                document = Document(
                    search_space_id=search_space_id,
-                    title=f"Notion - {page_title}",
+                    title=page_title,
                    document_type=DocumentType.NOTION_CONNECTOR,
                    document_metadata={
                        "page_title": page_title,
--- a/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py
@ -373,7 +373,7 @@ async def index_slack_messages(
                    # Create and store new document
                    document = Document(
                        search_space_id=search_space_id,
-                        title=f"Slack - {channel_name}",
+                        title=channel_name,
                        document_type=DocumentType.SLACK_CONNECTOR,
                        document_metadata={
                            "channel_name": channel_name,
--- a/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py
@ -410,7 +410,7 @@ async def index_teams_messages(
                            # Create and store new document
                            document = Document(
                                search_space_id=search_space_id,
-                                title=f"Teams - {team_name} - {channel_name}",
+                                title=f"{team_name} - {channel_name}",
                                document_type=DocumentType.TEAMS_CONNECTOR,
                                document_metadata={
                                    "team_name": team_name,