From aac04320236db523a98f6e70235eb90ff08afdca Mon Sep 17 00:00:00 2001
From: "DESKTOP-RTLN3BA\\$punk" <vermarohanfinal@gmail.com>
Date: Mon, 5 Jan 2026 22:18:25 -0800
Subject: [PATCH] refactor: update Discord message indexing logic

- Enhanced the indexing process for Discord messages to treat each message as an individual document, improving metadata handling and content management.
- Replaced the announcement banner component and related state management with a more streamlined approach, removing unnecessary files and simplifying the dashboard layout.
- Updated logging messages for clarity and accuracy regarding processed messages.
---
 .../connector_indexers/discord_indexer.py     | 312 ++++++++----------
 surfsense_web/app/dashboard/layout.tsx        |   2 -
 surfsense_web/atoms/announcement.atom.ts      |   5 -
 .../components/announcement-banner.tsx        |  47 ---
 4 files changed, 129 insertions(+), 237 deletions(-)
 delete mode 100644 surfsense_web/atoms/announcement.atom.ts
 delete mode 100644 surfsense_web/components/announcement-banner.tsx

diff --git a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py
index b3de1f4b5..5c92d2601 100644
--- a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py
@@ -11,17 +11,15 @@ from sqlalchemy.ext.asyncio import AsyncSession
 from app.config import config
 from app.connectors.discord_connector import DiscordConnector
 from app.db import Document, DocumentType, SearchSourceConnectorType
-from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
     create_document_chunks,
     generate_content_hash,
-    generate_document_summary,
     generate_unique_identifier_hash,
 )
 
 from .base import (
-    build_document_metadata_string,
+    build_document_metadata_markdown,
     check_document_by_unique_identifier,
     get_connector_by_id,
     get_current_timestamp,
@@ -336,207 +334,155 @@ async def index_discord_messages(
                             documents_skipped += 1
                             continue
 
-                        # Convert messages to markdown format
-                        channel_content = (
-                            f"# Discord Channel: {guild_name} / {channel_name}\n\n"
-                        )
+                        # Process each message as an individual document (like Slack)
                         for msg in formatted_messages:
-                            user_name = msg.get("author_name", "Unknown User")
-                            timestamp = msg.get("created_at", "Unknown Time")
-                            text = msg.get("content", "")
-                            channel_content += (
-                                f"## {user_name} ({timestamp})\n\n{text}\n\n---\n\n"
+                            msg_id = msg.get("id", "")
+                            msg_user_name = msg.get("author_name", "Unknown User")
+                            msg_timestamp = msg.get("created_at", "Unknown Time")
+                            msg_text = msg.get("content", "")
+
+                            # Format document metadata (similar to Slack)
+                            metadata_sections = [
+                                (
+                                    "METADATA",
+                                    [
+                                        f"GUILD_NAME: {guild_name}",
+                                        f"GUILD_ID: {guild_id}",
+                                        f"CHANNEL_NAME: {channel_name}",
+                                        f"CHANNEL_ID: {channel_id}",
+                                        f"MESSAGE_TIMESTAMP: {msg_timestamp}",
+                                        f"MESSAGE_USER_NAME: {msg_user_name}",
+                                    ],
+                                ),
+                                (
+                                    "CONTENT",
+                                    [
+                                        "FORMAT: markdown",
+                                        "TEXT_START",
+                                        msg_text,
+                                        "TEXT_END",
+                                    ],
+                                ),
+                            ]
+
+                            # Build the document string
+                            combined_document_string = build_document_metadata_markdown(
+                                metadata_sections
                             )
 
-                        # Metadata sections
-                        metadata_sections = [
-                            (
-                                "METADATA",
-                                [
-                                    f"GUILD_NAME: {guild_name}",
-                                    f"GUILD_ID: {guild_id}",
-                                    f"CHANNEL_NAME: {channel_name}",
-                                    f"CHANNEL_ID: {channel_id}",
-                                    f"MESSAGE_COUNT: {len(formatted_messages)}",
-                                ],
-                            ),
-                            (
-                                "CONTENT",
-                                [
-                                    "FORMAT: markdown",
-                                    "TEXT_START",
-                                    channel_content,
-                                    "TEXT_END",
-                                ],
-                            ),
-                        ]
+                            # Generate unique identifier hash for this Discord message
+                            unique_identifier = f"{channel_id}_{msg_id}"
+                            unique_identifier_hash = generate_unique_identifier_hash(
+                                DocumentType.DISCORD_CONNECTOR,
+                                unique_identifier,
+                                search_space_id,
+                            )
 
-                        combined_document_string = build_document_metadata_string(
-                            metadata_sections
-                        )
+                            # Generate content hash
+                            content_hash = generate_content_hash(
+                                combined_document_string, search_space_id
+                            )
 
-                        # Generate unique identifier hash for this Discord channel
-                        unique_identifier_hash = generate_unique_identifier_hash(
-                            DocumentType.DISCORD_CONNECTOR, channel_id, search_space_id
-                        )
+                            # Check if document with this unique identifier already exists
+                            existing_document = await check_document_by_unique_identifier(
+                                session, unique_identifier_hash
+                            )
 
-                        # Generate content hash
-                        content_hash = generate_content_hash(
-                            combined_document_string, search_space_id
-                        )
-
-                        # Check if document with this unique identifier already exists
-                        existing_document = await check_document_by_unique_identifier(
-                            session, unique_identifier_hash
-                        )
-
-                        if existing_document:
-                            # Document exists - check if content has changed
-                            if existing_document.content_hash == content_hash:
-                                logger.info(
-                                    f"Document for Discord channel {guild_name}#{channel_name} unchanged. Skipping."
-                                )
-                                documents_skipped += 1
-                                continue
-                            else:
-                                # Content has changed - update the existing document
-                                logger.info(
-                                    f"Content changed for Discord channel {guild_name}#{channel_name}. Updating document."
-                                )
-
-                                # Get user's long context LLM
-                                user_llm = await get_user_long_context_llm(
-                                    session, user_id, search_space_id
-                                )
-                                if not user_llm:
-                                    logger.error(
-                                        f"No long context LLM configured for user {user_id}"
-                                    )
-                                    skipped_channels.append(
-                                        f"{guild_name}#{channel_name} (no LLM configured)"
+                            if existing_document:
+                                # Document exists - check if content has changed
+                                if existing_document.content_hash == content_hash:
+                                    logger.info(
+                                        f"Document for Discord message {msg_id} in {guild_name}#{channel_name} unchanged. Skipping."
                                     )
                                     documents_skipped += 1
                                     continue
+                                else:
+                                    # Content has changed - update the existing document
+                                    logger.info(
+                                        f"Content changed for Discord message {msg_id} in {guild_name}#{channel_name}. Updating document."
+                                    )
 
-                                # Generate summary with metadata
-                                document_metadata = {
-                                    "guild_name": guild_name,
-                                    "channel_name": channel_name,
-                                    "message_count": len(formatted_messages),
-                                    "document_type": "Discord Channel Messages",
-                                    "connector_type": "Discord",
-                                }
-                                (
-                                    summary_content,
-                                    summary_embedding,
-                                ) = await generate_document_summary(
-                                    combined_document_string,
-                                    user_llm,
-                                    document_metadata,
-                                )
+                                    # Update chunks and embedding
+                                    chunks = await create_document_chunks(
+                                        combined_document_string
+                                    )
+                                    doc_embedding = config.embedding_model_instance.embed(
+                                        combined_document_string
+                                    )
 
-                                # Chunks from channel content
-                                chunks = await create_document_chunks(channel_content)
+                                    # Update existing document
+                                    existing_document.content = combined_document_string
+                                    existing_document.content_hash = content_hash
+                                    existing_document.embedding = doc_embedding
+                                    existing_document.document_metadata = {
+                                        "guild_name": guild_name,
+                                        "guild_id": guild_id,
+                                        "channel_name": channel_name,
+                                        "channel_id": channel_id,
+                                        "message_id": msg_id,
+                                        "message_timestamp": msg_timestamp,
+                                        "message_user_name": msg_user_name,
+                                        "indexed_at": datetime.now(UTC).strftime(
+                                            "%Y-%m-%d %H:%M:%S"
+                                        ),
+                                    }
 
-                                # Update existing document
-                                existing_document.title = (
-                                    f"Discord - {guild_name}#{channel_name}"
-                                )
-                                existing_document.content = summary_content
-                                existing_document.content_hash = content_hash
-                                existing_document.embedding = summary_embedding
-                                existing_document.document_metadata = {
+                                    # Delete old chunks and add new ones
+                                    existing_document.chunks = chunks
+                                    existing_document.updated_at = get_current_timestamp()
+
+                                    documents_indexed += 1
+                                    logger.info(
+                                        f"Successfully updated Discord message {msg_id}"
+                                    )
+                                    continue
+
+                            # Document doesn't exist - create new one
+                            # Process chunks
+                            chunks = await create_document_chunks(combined_document_string)
+                            doc_embedding = config.embedding_model_instance.embed(
+                                combined_document_string
+                            )
+
+                            # Create and store new document
+                            document = Document(
+                                search_space_id=search_space_id,
+                                title=f"Discord - {guild_name}#{channel_name}",
+                                document_type=DocumentType.DISCORD_CONNECTOR,
+                                document_metadata={
                                     "guild_name": guild_name,
                                     "guild_id": guild_id,
                                     "channel_name": channel_name,
                                     "channel_id": channel_id,
-                                    "message_count": len(formatted_messages),
-                                    "start_date": start_date_iso,
-                                    "end_date": end_date_iso,
+                                    "message_id": msg_id,
+                                    "message_timestamp": msg_timestamp,
+                                    "message_user_name": msg_user_name,
                                     "indexed_at": datetime.now(UTC).strftime(
                                         "%Y-%m-%d %H:%M:%S"
                                     ),
-                                }
-                                existing_document.chunks = chunks
-                                existing_document.updated_at = get_current_timestamp()
+                                },
+                                content=combined_document_string,
+                                embedding=doc_embedding,
+                                chunks=chunks,
+                                content_hash=content_hash,
+                                unique_identifier_hash=unique_identifier_hash,
+                                updated_at=get_current_timestamp(),
+                            )
 
-                                documents_indexed += 1
+                            session.add(document)
+                            documents_indexed += 1
+
+                            # Batch commit every 10 documents
+                            if documents_indexed % 10 == 0:
                                 logger.info(
-                                    f"Successfully updated Discord channel {guild_name}#{channel_name}"
+                                    f"Committing batch: {documents_indexed} Discord messages processed so far"
                                 )
-                                continue
+                                await session.commit()
 
-                        # Document doesn't exist - create new one
-                        # Get user's long context LLM
-                        user_llm = await get_user_long_context_llm(
-                            session, user_id, search_space_id
-                        )
-                        if not user_llm:
-                            logger.error(
-                                f"No long context LLM configured for user {user_id}"
-                            )
-                            skipped_channels.append(
-                                f"{guild_name}#{channel_name} (no LLM configured)"
-                            )
-                            documents_skipped += 1
-                            continue
-
-                        # Generate summary with metadata
-                        document_metadata = {
-                            "guild_name": guild_name,
-                            "channel_name": channel_name,
-                            "message_count": len(formatted_messages),
-                            "document_type": "Discord Channel Messages",
-                            "connector_type": "Discord",
-                        }
-                        (
-                            summary_content,
-                            summary_embedding,
-                        ) = await generate_document_summary(
-                            combined_document_string, user_llm, document_metadata
-                        )
-
-                        # Chunks from channel content
-                        chunks = await create_document_chunks(channel_content)
-
-                        # Create and store new document
-                        document = Document(
-                            search_space_id=search_space_id,
-                            title=f"Discord - {guild_name}#{channel_name}",
-                            document_type=DocumentType.DISCORD_CONNECTOR,
-                            document_metadata={
-                                "guild_name": guild_name,
-                                "guild_id": guild_id,
-                                "channel_name": channel_name,
-                                "channel_id": channel_id,
-                                "message_count": len(formatted_messages),
-                                "start_date": start_date_iso,
-                                "end_date": end_date_iso,
-                                "indexed_at": datetime.now(UTC).strftime(
-                                    "%Y-%m-%d %H:%M:%S"
-                                ),
-                            },
-                            content=summary_content,
-                            content_hash=content_hash,
-                            unique_identifier_hash=unique_identifier_hash,
-                            embedding=summary_embedding,
-                            chunks=chunks,
-                            updated_at=get_current_timestamp(),
-                        )
-
-                        session.add(document)
-                        documents_indexed += 1
                         logger.info(
-                            f"Successfully indexed new channel {guild_name}#{channel_name} with {len(formatted_messages)} messages"
+                            f"Successfully indexed channel {guild_name}#{channel_name} with {len(formatted_messages)} messages"
                         )
 
-                        # Batch commit every 10 documents
-                        if documents_indexed % 10 == 0:
-                            logger.info(
-                                f"Committing batch: {documents_indexed} Discord channels processed so far"
-                            )
-                            await session.commit()
-
                 except Exception as e:
                     logger.error(
                         f"Error processing guild {guild_name}: {e!s}", exc_info=True
@@ -553,7 +499,7 @@ async def index_discord_messages(
 
         # Final commit for any remaining documents not yet committed in batches
         logger.info(
-            f"Final commit: Total {documents_indexed} Discord channels processed"
+            f"Final commit: Total {documents_indexed} Discord messages processed"
         )
         await session.commit()
 
@@ -561,18 +507,18 @@ async def index_discord_messages(
         result_message = None
         if skipped_channels:
             result_message = (
-                f"Processed {documents_indexed} channels. Skipped {len(skipped_channels)} channels: "
+                f"Processed {documents_indexed} messages. Skipped {len(skipped_channels)} channels: "
                 + ", ".join(skipped_channels)
             )
         else:
-            result_message = f"Processed {documents_indexed} channels."
+            result_message = f"Processed {documents_indexed} messages."
 
         # Log success
         await task_logger.log_task_success(
             log_entry,
             f"Successfully completed Discord indexing for connector {connector_id}",
             {
-                "channels_processed": documents_indexed,
+                "messages_processed": documents_indexed,
                 "documents_indexed": documents_indexed,
                 "documents_skipped": documents_skipped,
                 "skipped_channels_count": len(skipped_channels),
@@ -582,7 +528,7 @@ async def index_discord_messages(
         )
 
         logger.info(
-            f"Discord indexing completed: {documents_indexed} new channels, {documents_skipped} skipped"
+            f"Discord indexing completed: {documents_indexed} new messages, {documents_skipped} skipped"
         )
         return documents_indexed, result_message
 
diff --git a/surfsense_web/app/dashboard/layout.tsx b/surfsense_web/app/dashboard/layout.tsx
index 8763a622f..71cd6275f 100644
--- a/surfsense_web/app/dashboard/layout.tsx
+++ b/surfsense_web/app/dashboard/layout.tsx
@@ -2,7 +2,6 @@
 
 import { Loader2 } from "lucide-react";
 import { useEffect, useState } from "react";
-import { AnnouncementBanner } from "@/components/announcement-banner";
 import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
 import { getBearerToken, redirectToLogin } from "@/lib/auth-utils";
 
@@ -43,7 +42,6 @@ export default function DashboardLayout({ children }: DashboardLayoutProps) {
 
 	return (
 		<div className="h-full flex flex-col ">
-			<AnnouncementBanner />
 			<div className="flex-1 min-h-0">{children}</div>
 		</div>
 	);
diff --git a/surfsense_web/atoms/announcement.atom.ts b/surfsense_web/atoms/announcement.atom.ts
deleted file mode 100644
index 31e032978..000000000
--- a/surfsense_web/atoms/announcement.atom.ts
+++ /dev/null
@@ -1,5 +0,0 @@
-import { atomWithStorage } from "jotai/utils";
-
-// Atom to track whether the announcement banner has been dismissed
-// Persists to localStorage automatically
-export const announcementDismissedAtom = atomWithStorage("surfsense_announcement_dismissed", false);
diff --git a/surfsense_web/components/announcement-banner.tsx b/surfsense_web/components/announcement-banner.tsx
deleted file mode 100644
index 537aa6da7..000000000
--- a/surfsense_web/components/announcement-banner.tsx
+++ /dev/null
@@ -1,47 +0,0 @@
-"use client";
-
-import { useAtom } from "jotai";
-import { ExternalLink, Info, X } from "lucide-react";
-import { announcementDismissedAtom } from "@/atoms/announcement.atom";
-import { Button } from "@/components/ui/button";
-
-export function AnnouncementBanner() {
-	const [isDismissed, setIsDismissed] = useAtom(announcementDismissedAtom);
-
-	const handleDismiss = () => {
-		setIsDismissed(true);
-	};
-
-	if (isDismissed) return null;
-
-	return (
-		<div className="relative h-[3rem] flex items-center justify-center  border  bg-gradient-to-r from-blue-600 to-blue-500 dark:from-blue-700 dark:to-blue-600 border-b border-blue-700 dark:border-blue-800">
-			<div className="container mx-auto px-4">
-				<div className="flex items-center justify-center gap-3 py-2.5">
-					<Info className="h-4 w-4 text-blue-50 flex-shrink-0" />
-					<p className="text-sm text-blue-50 text-center font-medium">
-						SurfSense is a work in progress.{" "}
-						<a
-							href="https://github.com/MODSetter/SurfSense/issues"
-							target="_blank"
-							rel="noopener noreferrer"
-							className="inline-flex items-center gap-1 underline decoration-blue-200 underline-offset-2 hover:decoration-white transition-colors"
-						>
-							Report issues on GitHub
-							<ExternalLink className="h-3 w-3" />
-						</a>
-					</p>
-					<Button
-						variant="ghost"
-						size="sm"
-						className="h-7 w-7 p-0 shrink-0 text-blue-100 hover:text-white hover:bg-blue-700/50 dark:hover:bg-blue-800/50 absolute right-4"
-						onClick={handleDismiss}
-					>
-						<X className="h-3.5 w-3.5" />
-						<span className="sr-only">Dismiss</span>
-					</Button>
-				</div>
-			</div>
-		</div>
-	);
-}