Merge upstream/dev

2026-06-06 20:15:17 +02:00 · 2026-06-05 19:18:12 +02:00 · 2026-06-05 19:18:12 +02:00 · 8bdfd00a15
commit 8bdfd00a15
parent 52ff304d64 c2b8b3ac5e
191 changed files with 3301 additions and 4079 deletions
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/deliverables/tools/report.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/deliverables/tools/report.py
@ -16,7 +16,7 @@ from app.agents.chat.multi_agent_chat.shared.receipts.command import with_receip
 from app.agents.chat.multi_agent_chat.shared.receipts.receipt import make_receipt
 from app.db import Report, shielded_async_session
 from app.services.connector_service import ConnectorService
-from app.services.llm_service import get_document_summary_llm
+from app.services.llm_service import get_agent_llm

 logger = logging.getLogger(__name__)

@ -727,7 +727,7 @@ def create_generate_report_tool(
                            "creating standalone report"
                        )

-                llm = await get_document_summary_llm(read_session, search_space_id)
+                llm = await get_agent_llm(read_session, search_space_id)

            if not llm:
                error_msg = (
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/deliverables/tools/resume.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/deliverables/tools/resume.py
@ -17,7 +17,7 @@ from langgraph.types import Command
 from app.agents.chat.multi_agent_chat.shared.receipts.command import with_receipt
 from app.agents.chat.multi_agent_chat.shared.receipts.receipt import make_receipt
 from app.db import Report, shielded_async_session
-from app.services.llm_service import get_document_summary_llm
+from app.services.llm_service import get_agent_llm

 logger = logging.getLogger(__name__)

@ -578,7 +578,7 @@ def create_generate_resume_tool(
                            f"(group {report_group_id})"
                        )

-                llm = await get_document_summary_llm(read_session, search_space_id)
+                llm = await get_agent_llm(read_session, search_space_id)

            if not llm:
                error_msg = (
--- a/surfsense_backend/app/agents/podcaster/nodes.py
+++ b/surfsense_backend/app/agents/podcaster/nodes.py
@ -31,9 +31,7 @@ async def create_podcast_transcript(

    llm = await get_agent_llm(state.db_session, search_space_id)
    if not llm:
-        error_message = (
-            f"No document summary LLM configured for search space {search_space_id}"
-        )
+        error_message = f"No agent LLM configured for search space {search_space_id}"
        print(error_message)
        raise RuntimeError(error_message)

--- a/surfsense_backend/app/celery_app.py
+++ b/surfsense_backend/app/celery_app.py
@ -103,7 +103,7 @@ def init_worker(**kwargs):
    """Initialize the LLM Router and Image Gen Router when a Celery worker process starts.

    This ensures the Auto mode (LiteLLM Router) is available for background tasks
-    like document summarization and image generation.
+    like agent workflows and image generation.
    """
    from app.observability.bootstrap import init_otel

--- a/surfsense_backend/app/connectors/google_drive/content_extractor.py
+++ b/surfsense_backend/app/connectors/google_drive/content_extractor.py
@ -141,7 +141,6 @@ async def download_and_process_file(
    task_logger: TaskLoggingService,
    log_entry: Log,
    connector_id: int | None = None,
-    enable_summary: bool = True,
 ) -> tuple[Any, str | None, dict[str, Any] | None]:
    """
    Download Google Drive file and process using Surfsense file processors.
@ -215,8 +214,6 @@ async def download_and_process_file(
                "source_connector": "google_drive",
            },
        }
-        # Include connector_id for de-indexing support
-        connector_info["enable_summary"] = enable_summary
        if connector_id is not None:
            connector_info["connector_id"] = connector_id

--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@ -1781,9 +1781,6 @@ class SearchSpace(BaseModel, TimestampMixin):
    agent_llm_id = Column(
        Integer, nullable=True, default=0
    )  # For agent/chat operations, defaults to Auto mode
-    document_summary_llm_id = Column(
-        Integer, nullable=True, default=0
-    )  # For document summarization, defaults to Auto mode
    image_generation_config_id = Column(
        Integer, nullable=True, default=0
    )  # For image generation, defaults to Auto mode
@ -1951,12 +1948,6 @@ class SearchSourceConnector(BaseModel, TimestampMixin):
    last_indexed_at = Column(TIMESTAMP(timezone=True), nullable=True)
    config = Column(JSON, nullable=False)

-    # Summary generation (LLM-based) - disabled by default to save resources.
-    # When enabled, improves hybrid search quality at the cost of LLM calls.
-    enable_summary = Column(
-        Boolean, nullable=False, default=False, server_default="false"
-    )
-
    # Vision LLM for image files - disabled by default to save cost/time.
    # When enabled, images are described via a vision language model instead
    # of falling back to the document parser.
@ -2919,7 +2910,7 @@ async def shielded_async_session():
 async def setup_indexes():
    async with engine.begin() as conn:
        # Create indexes
-        # Document Summary Indexes
+        # Document embedding indexes
        await conn.execute(
            text(
                "CREATE INDEX IF NOT EXISTS document_vector_index ON documents USING hnsw (embedding public.vector_cosine_ops)"
--- a/surfsense_backend/app/indexing_pipeline/adapters/file_upload_adapter.py
+++ b/surfsense_backend/app/indexing_pipeline/adapters/file_upload_adapter.py
@ -18,8 +18,6 @@ class UploadDocumentAdapter:
        etl_service: str,
        search_space_id: int,
        user_id: str,
-        llm,
-        should_summarize: bool = False,
    ) -> None:
        connector_doc = ConnectorDocument(
            title=filename,
@ -29,9 +27,7 @@ class UploadDocumentAdapter:
            search_space_id=search_space_id,
            created_by_id=user_id,
            connector_id=None,
-            should_summarize=should_summarize,
            should_use_code_chunker=False,
-            fallback_summary=markdown_content[:4000],
            metadata={
                "FILE_NAME": filename,
                "ETL_SERVICE": etl_service,
@ -43,7 +39,7 @@ class UploadDocumentAdapter:
        if not documents:
            raise RuntimeError("prepare_for_indexing returned no documents")

-        indexed = await self._service.index(documents[0], connector_doc, llm)
+        indexed = await self._service.index(documents[0], connector_doc)

        if not DocumentStatus.is_state(indexed.status, DocumentStatus.READY):
            raise RuntimeError(indexed.status.get("reason", "Indexing failed"))
@ -51,7 +47,7 @@ class UploadDocumentAdapter:
        indexed.content_needs_reindexing = False
        await self._session.commit()

-    async def reindex(self, document: Document, llm) -> None:
+    async def reindex(self, document: Document) -> None:
        """Re-index an existing document after its source_markdown has been updated."""
        if not document.source_markdown:
            raise RuntimeError("Document has no source_markdown to reindex")
@ -66,15 +62,13 @@ class UploadDocumentAdapter:
            search_space_id=document.search_space_id,
            created_by_id=str(document.created_by_id),
            connector_id=document.connector_id,
-            should_summarize=True,
            should_use_code_chunker=False,
-            fallback_summary=document.source_markdown[:4000],
            metadata=metadata,
        )

        document.content_hash = compute_content_hash(connector_doc)

-        indexed = await self._service.index(document, connector_doc, llm)
+        indexed = await self._service.index(document, connector_doc)

        if not DocumentStatus.is_state(indexed.status, DocumentStatus.READY):
            raise RuntimeError(indexed.status.get("reason", "Reindexing failed"))
--- a/surfsense_backend/app/indexing_pipeline/connector_document.py
+++ b/surfsense_backend/app/indexing_pipeline/connector_document.py
@ -11,9 +11,7 @@ class ConnectorDocument(BaseModel):
    unique_id: str
    document_type: DocumentType
    search_space_id: int = Field(gt=0)
-    should_summarize: bool = True
    should_use_code_chunker: bool = False
-    fallback_summary: str | None = None
    metadata: dict = {}
    connector_id: int | None = None
    created_by_id: str
--- a/surfsense_backend/app/indexing_pipeline/document_summarizer.py
+++ b/surfsense_backend/app/indexing_pipeline/document_summarizer.py
@ -1,30 +0,0 @@
-from app.prompts import SUMMARY_PROMPT_TEMPLATE
-from app.utils.document_converters import optimize_content_for_context_window
-
-
-async def summarize_document(
-    source_markdown: str, llm, metadata: dict | None = None
-) -> str:
-    """Generate a text summary of a document using an LLM, prefixed with metadata when provided."""
-    model_name = getattr(llm, "model", "gpt-3.5-turbo")
-    optimized_content = optimize_content_for_context_window(
-        source_markdown, metadata, model_name
-    )
-
-    summary_chain = SUMMARY_PROMPT_TEMPLATE | llm
-    content_with_metadata = (
-        f"<DOCUMENT><DOCUMENT_METADATA>\n\n{metadata}\n\n</DOCUMENT_METADATA>"
-        f"\n\n<DOCUMENT_CONTENT>\n\n{optimized_content}\n\n</DOCUMENT_CONTENT></DOCUMENT>"
-    )
-    summary_result = await summary_chain.ainvoke({"document": content_with_metadata})
-    summary_content = summary_result.content
-
-    if metadata:
-        metadata_parts = ["# DOCUMENT METADATA"]
-        for key, value in metadata.items():
-            if value:
-                metadata_parts.append(f"**{key.replace('_', ' ').title()}:** {value}")
-        metadata_section = "\n".join(metadata_parts)
-        return f"{metadata_section}\n\n# DOCUMENT SUMMARY\n\n{summary_content}"
-
-    return summary_content
--- a/surfsense_backend/app/indexing_pipeline/indexing_pipeline_service.py
+++ b/surfsense_backend/app/indexing_pipeline/indexing_pipeline_service.py
@ -31,7 +31,6 @@ from app.indexing_pipeline.document_persistence import (
    attach_chunks_to_document,
    rollback_and_persist_failure,
 )
-from app.indexing_pipeline.document_summarizer import summarize_document
 from app.indexing_pipeline.exceptions import (
    EMBEDDING_ERRORS,
    PERMANENT_LLM_ERRORS,
@ -203,9 +202,7 @@ class IndexingPipelineService:

        await self.session.commit()

-    async def index_batch(
-        self, connector_docs: list[ConnectorDocument], llm
-    ) -> list[Document]:
+    async def index_batch(self, connector_docs: list[ConnectorDocument]) -> list[Document]:
        """Convenience method: prepare_for_indexing then index each document.

        Indexers that need heartbeat callbacks or custom per-document logic
@ -218,7 +215,7 @@ class IndexingPipelineService:
            connector_doc = doc_map.get(document.unique_identifier_hash)
            if connector_doc is None:
                continue
-            result = await self.index(document, connector_doc, llm)
+            result = await self.index(document, connector_doc)
            results.append(result)
        return results

@ -350,11 +347,9 @@ class IndexingPipelineService:
            await self.session.rollback()
            return []

-    async def index(
-        self, document: Document, connector_doc: ConnectorDocument, llm
-    ) -> Document:
+    async def index(self, document: Document, connector_doc: ConnectorDocument) -> Document:
        """
-        Run summarization, embedding, and chunking for a document and persist the results.
+        Run deterministic content storage, embedding, and chunking for a document.
        """
        ctx = PipelineLogContext(
            connector_id=connector_doc.connector_id,
@ -379,20 +374,7 @@ class IndexingPipelineService:
            document.status = DocumentStatus.processing()
            await self.session.commit()

-            t_step = time.perf_counter()
-            if connector_doc.should_summarize and llm is not None:
-                content = await summarize_document(
-                    connector_doc.source_markdown, llm, connector_doc.metadata
-                )
-                perf.info(
-                    "[indexing] summarize_document doc=%d in %.3fs",
-                    document.id,
-                    time.perf_counter() - t_step,
-                )
-            elif connector_doc.should_summarize and connector_doc.fallback_summary:
-                content = connector_doc.fallback_summary
-            else:
-                content = connector_doc.source_markdown
+            content = connector_doc.source_markdown

            await self.session.execute(
                delete(Chunk).where(Chunk.document_id == document.id)
@ -523,7 +505,6 @@ class IndexingPipelineService:
    async def index_batch_parallel(
        self,
        connector_docs: list[ConnectorDocument],
-        get_llm: Callable[[AsyncSession], Awaitable],
        *,
        max_concurrency: int = 4,
        on_heartbeat: Callable[[int], Awaitable[None]] | None = None,
@ -532,8 +513,8 @@ class IndexingPipelineService:
        """Index documents in parallel with bounded concurrency.

        Phase 1 (serial): prepare_for_indexing using self.session.
-        Phase 2 (parallel): index each document in an isolated session,
-        bounded by a semaphore to avoid overwhelming APIs/DB.
+        Phase 2 (parallel): index each document in an isolated session, bounded
+        by a semaphore to avoid overwhelming embedding APIs/DB.
        """
        logger = logging.getLogger(__name__)
        perf = get_perf_logger()
@ -577,9 +558,8 @@ class IndexingPipelineService:
                                failed_count += 1
                            return document

-                        llm = await get_llm(isolated_session)
                        iso_pipeline = IndexingPipelineService(isolated_session)
-                        result = await iso_pipeline.index(refetched, connector_doc, llm)
+                        result = await iso_pipeline.index(refetched, connector_doc)

                        async with lock:
                            if DocumentStatus.is_state(
--- a/surfsense_backend/app/routes/documents_routes.py
+++ b/surfsense_backend/app/routes/documents_routes.py
@ -125,7 +125,6 @@ async def create_documents(
 async def create_documents_file_upload(
    files: list[UploadFile],
    search_space_id: int = Form(...),
-    should_summarize: bool = Form(False),
    use_vision_llm: bool = Form(False),
    processing_mode: str = Form("basic"),
    session: AsyncSession = Depends(get_async_session),
@ -309,7 +308,6 @@ async def create_documents_file_upload(
                filename=filename,
                search_space_id=search_space_id,
                user_id=str(user.id),
-                should_summarize=should_summarize,
                use_vision_llm=use_vision_llm,
                processing_mode=validated_mode.value,
            )
@ -1586,7 +1584,6 @@ async def folder_upload(
    search_space_id: int = Form(...),
    relative_paths: str = Form(...),
    root_folder_id: int | None = Form(None),
-    enable_summary: bool = Form(False),
    use_vision_llm: bool = Form(False),
    processing_mode: str = Form("basic"),
    session: AsyncSession = Depends(get_async_session),
@ -1719,7 +1716,6 @@ async def folder_upload(
        user_id=str(user.id),
        folder_name=folder_name,
        root_folder_id=root_folder_id,
-        enable_summary=enable_summary,
        use_vision_llm=use_vision_llm,
        file_mappings=list(file_mappings),
        processing_mode=validated_mode.value,
--- a/surfsense_backend/app/routes/search_spaces_routes.py
+++ b/surfsense_backend/app/routes/search_spaces_routes.py
@ -617,9 +617,6 @@ async def get_llm_preferences(

        # Get full config objects for each role
        agent_llm = await _get_llm_config_by_id(session, search_space.agent_llm_id)
-        document_summary_llm = await _get_llm_config_by_id(
-            session, search_space.document_summary_llm_id
-        )
        image_generation_config = await _get_image_gen_config_by_id(
            session, search_space.image_generation_config_id
        )
@ -629,11 +626,9 @@ async def get_llm_preferences(

        return LLMPreferencesRead(
            agent_llm_id=search_space.agent_llm_id,
-            document_summary_llm_id=search_space.document_summary_llm_id,
            image_generation_config_id=search_space.image_generation_config_id,
            vision_llm_config_id=search_space.vision_llm_config_id,
            agent_llm=agent_llm,
-            document_summary_llm=document_summary_llm,
            image_generation_config=image_generation_config,
            vision_llm_config=vision_llm_config,
        )
@ -707,9 +702,6 @@ async def update_llm_preferences(

        # Get full config objects for response
        agent_llm = await _get_llm_config_by_id(session, search_space.agent_llm_id)
-        document_summary_llm = await _get_llm_config_by_id(
-            session, search_space.document_summary_llm_id
-        )
        image_generation_config = await _get_image_gen_config_by_id(
            session, search_space.image_generation_config_id
        )
@ -719,11 +711,9 @@ async def update_llm_preferences(

        return LLMPreferencesRead(
            agent_llm_id=search_space.agent_llm_id,
-            document_summary_llm_id=search_space.document_summary_llm_id,
            image_generation_config_id=search_space.image_generation_config_id,
            vision_llm_config_id=search_space.vision_llm_config_id,
            agent_llm=agent_llm,
-            document_summary_llm=document_summary_llm,
            image_generation_config=image_generation_config,
            vision_llm_config=vision_llm_config,
        )
--- a/surfsense_backend/app/schemas/new_llm_config.py
+++ b/surfsense_backend/app/schemas/new_llm_config.py
@ -221,9 +221,6 @@ class LLMPreferencesRead(BaseModel):
    agent_llm_id: int | None = Field(
        None, description="ID of the LLM config to use for agent/chat tasks"
    )
-    document_summary_llm_id: int | None = Field(
-        None, description="ID of the LLM config to use for document summarization"
-    )
    image_generation_config_id: int | None = Field(
        None, description="ID of the image generation config to use"
    )
@ -234,9 +231,6 @@ class LLMPreferencesRead(BaseModel):
    agent_llm: dict[str, Any] | None = Field(
        None, description="Full config for agent LLM"
    )
-    document_summary_llm: dict[str, Any] | None = Field(
-        None, description="Full config for document summary LLM"
-    )
    image_generation_config: dict[str, Any] | None = Field(
        None, description="Full config for image generation"
    )
@ -253,9 +247,6 @@ class LLMPreferencesUpdate(BaseModel):
    agent_llm_id: int | None = Field(
        None, description="ID of the LLM config to use for agent/chat tasks"
    )
-    document_summary_llm_id: int | None = Field(
-        None, description="ID of the LLM config to use for document summarization"
-    )
    image_generation_config_id: int | None = Field(
        None, description="ID of the image generation config to use"
    )
--- a/surfsense_backend/app/schemas/search_source_connector.py
+++ b/surfsense_backend/app/schemas/search_source_connector.py
@ -16,7 +16,6 @@ class SearchSourceConnectorBase(BaseModel):
    is_indexable: bool
    last_indexed_at: datetime | None = None
    config: dict[str, Any]
-    enable_summary: bool = False
    enable_vision_llm: bool = False
    periodic_indexing_enabled: bool = False
    indexing_frequency_minutes: int | None = None
@ -67,7 +66,6 @@ class SearchSourceConnectorUpdate(BaseModel):
    is_indexable: bool | None = None
    last_indexed_at: datetime | None = None
    config: dict[str, Any] | None = None
-    enable_summary: bool | None = None
    enable_vision_llm: bool | None = None
    periodic_indexing_enabled: bool | None = None
    indexing_frequency_minutes: int | None = None
--- a/surfsense_backend/app/services/confluence/kb_sync_service.py
+++ b/surfsense_backend/app/services/confluence/kb_sync_service.py
@ -9,7 +9,6 @@ from app.utils.document_converters import (
    create_document_chunks,
    embed_text,
    generate_content_hash,
-    generate_document_summary,
    generate_unique_identifier_hash,
 )

@ -65,29 +64,11 @@ class ConfluenceKBSyncService:
            if dup:
                content_hash = unique_hash

-            from app.services.llm_service import get_user_long_context_llm

-            user_llm = await get_user_long_context_llm(
-                self.db_session,
-                user_id,
-                search_space_id,
-                disable_streaming=True,
-            )

-            doc_metadata_for_summary = {
-                "page_title": page_title,
-                "space_id": space_id,
-                "document_type": "Confluence Page",
-                "connector_type": "Confluence",
-            }

-            if user_llm:
-                summary_content, summary_embedding = await generate_document_summary(
-                    page_content, user_llm, doc_metadata_for_summary
-                )
-            else:
-                summary_content = f"Confluence Page: {page_title}\n\n{page_content}"
-                summary_embedding = embed_text(summary_content)
+            summary_content = f"Confluence Page: {page_title}\n\n{page_content}"
+            summary_embedding = embed_text(summary_content)

            chunks = await create_document_chunks(page_content)
            now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
@ -185,25 +166,10 @@ class ConfluenceKBSyncService:

            space_id = (document.document_metadata or {}).get("space_id", "")

-            from app.services.llm_service import get_user_long_context_llm

-            user_llm = await get_user_long_context_llm(
-                self.db_session, user_id, search_space_id, disable_streaming=True
-            )

-            if user_llm:
-                doc_meta = {
-                    "page_title": page_title,
-                    "space_id": space_id,
-                    "document_type": "Confluence Page",
-                    "connector_type": "Confluence",
-                }
-                summary_content, summary_embedding = await generate_document_summary(
-                    page_content, user_llm, doc_meta
-                )
-            else:
-                summary_content = f"Confluence Page: {page_title}\n\n{page_content}"
-                summary_embedding = embed_text(summary_content)
+            summary_content = f"Confluence Page: {page_title}\n\n{page_content}"
+            summary_embedding = embed_text(summary_content)

            chunks = await create_document_chunks(page_content)

--- a/surfsense_backend/app/services/docling_service.py
+++ b/surfsense_backend/app/services/docling_service.py
@ -191,149 +191,6 @@ class DoclingService:
            logger.error(f"Full traceback: {traceback.format_exc()}")
            raise RuntimeError(f"Docling processing failed: {e}") from e

-    async def process_large_document_summary(
-        self, content: str, llm, document_title: str = "Document"
-    ) -> str:
-        """
-        Process large documents using chunked LLM summarization.
-
-        Args:
-            content: The full document content
-            llm: The language model to use for summarization
-            document_title: Title of the document for context
-
-        Returns:
-            Final summary of the document
-        """
-        # Large document threshold (100K characters ≈ 25K tokens)
-        large_document_threshold = 100_000
-
-        if len(content) <= large_document_threshold:
-            # For smaller documents, use direct processing
-            logger.info(
-                f"📄 Document size: {len(content)} chars - using direct processing"
-            )
-            from app.prompts import SUMMARY_PROMPT_TEMPLATE
-
-            summary_chain = SUMMARY_PROMPT_TEMPLATE | llm
-            result = await summary_chain.ainvoke({"document": content})
-            return result.content
-
-        logger.info(
-            f"📚 Large document detected: {len(content)} chars - using chunked processing"
-        )
-
-        # Import chunker from config
-        # Create LLM-optimized chunks (8K tokens max for safety)
-        from chonkie import OverlapRefinery, RecursiveChunker
-        from langchain_core.prompts import PromptTemplate
-
-        llm_chunker = RecursiveChunker(
-            chunk_size=8000  # Conservative for most LLMs
-        )
-
-        # Apply overlap refinery for context preservation (10% overlap = 800 tokens)
-        overlap_refinery = OverlapRefinery(
-            context_size=0.1,  # 10% overlap for context preservation
-            method="suffix",  # Add next chunk context to current chunk
-        )
-
-        # First chunk the content, then apply overlap refinery
-        initial_chunks = llm_chunker.chunk(content)
-        chunks = overlap_refinery.refine(initial_chunks)
-        total_chunks = len(chunks)
-
-        logger.info(f"📄 Split into {total_chunks} chunks for LLM processing")
-
-        # Template for chunk processing
-        chunk_template = PromptTemplate(
-            input_variables=["chunk", "chunk_number", "total_chunks"],
-            template="""<INSTRUCTIONS>
-You are summarizing chunk {chunk_number} of {total_chunks} from a large document.
-
-Create a comprehensive summary of this document chunk. Focus on:
- Key concepts, facts, and information
- Important details and context
- Main topics and themes
-
-Provide a clear, structured summary that captures the essential content.
-
-Chunk {chunk_number}/{total_chunks}:
-<document_chunk>
-{chunk}
-</document_chunk>
-</INSTRUCTIONS>""",
-        )
-
-        # Process each chunk individually
-        chunk_summaries = []
-        for i, chunk in enumerate(chunks, 1):
-            try:
-                logger.info(
-                    f"🔄 Processing chunk {i}/{total_chunks} ({len(chunk.text)} chars)"
-                )
-
-                chunk_chain = chunk_template | llm
-                chunk_result = await chunk_chain.ainvoke(
-                    {
-                        "chunk": chunk.text,
-                        "chunk_number": i,
-                        "total_chunks": total_chunks,
-                    }
-                )
-
-                chunk_summary = chunk_result.content
-                chunk_summaries.append(f"=== Section {i} ===\n{chunk_summary}")
-
-                logger.info(f"✅ Completed chunk {i}/{total_chunks}")
-
-            except Exception as e:
-                logger.error(f"❌ Failed to process chunk {i}/{total_chunks}: {e}")
-                chunk_summaries.append(f"=== Section {i} ===\n[Processing failed]")
-
-        # Combine summaries into final document summary
-        logger.info(f"🔄 Combining {len(chunk_summaries)} chunk summaries")
-
-        try:
-            combine_template = PromptTemplate(
-                input_variables=["summaries", "document_title"],
-                template="""<INSTRUCTIONS>
-You are combining multiple section summaries into a final comprehensive document summary.
-
-Create a unified, coherent summary from the following section summaries of "{document_title}".
-Ensure:
- Logical flow and organization
- No redundancy or repetition  
- Comprehensive coverage of all key points
- Professional, objective tone
-
-<section_summaries>
-{summaries}
-</section_summaries>
-</INSTRUCTIONS>""",
-            )
-
-            combined_summaries = "\n\n".join(chunk_summaries)
-            combine_chain = combine_template | llm
-
-            final_result = await combine_chain.ainvoke(
-                {"summaries": combined_summaries, "document_title": document_title}
-            )
-
-            final_summary = final_result.content
-            logger.info(
-                f"✅ Large document processing complete: {len(final_summary)} chars summary"
-            )
-
-            return final_summary
-
-        except Exception as e:
-            logger.error(f"❌ Failed to combine summaries: {e}")
-            # Fallback: return concatenated chunk summaries
-            fallback_summary = "\n\n".join(chunk_summaries)
-            logger.warning("⚠️ Using fallback combined summary")
-            return fallback_summary
-

 def create_docling_service() -> DoclingService:
    """Create a Docling service instance."""
--- a/surfsense_backend/app/services/dropbox/kb_sync_service.py
+++ b/surfsense_backend/app/services/dropbox/kb_sync_service.py
@ -9,7 +9,6 @@ from app.utils.document_converters import (
    create_document_chunks,
    embed_text,
    generate_content_hash,
-    generate_document_summary,
 )

 logger = logging.getLogger(__name__)
@ -72,29 +71,11 @@ class DropboxKBSyncService:
                )
                content_hash = unique_hash

-            from app.services.llm_service import get_user_long_context_llm

-            user_llm = await get_user_long_context_llm(
-                self.db_session,
-                user_id,
-                search_space_id,
-                disable_streaming=True,
-            )

-            doc_metadata_for_summary = {
-                "file_name": file_name,
-                "document_type": "Dropbox File",
-                "connector_type": "Dropbox",
-            }

-            if user_llm:
-                summary_content, summary_embedding = await generate_document_summary(
-                    indexable_content, user_llm, doc_metadata_for_summary
-                )
-            else:
-                logger.warning("No LLM configured — using fallback summary")
-                summary_content = f"Dropbox File: {file_name}\n\n{indexable_content}"
-                summary_embedding = embed_text(summary_content)
+            summary_content = f"Dropbox File: {file_name}\n\n{indexable_content}"
+            summary_embedding = embed_text(summary_content)

            chunks = await create_document_chunks(indexable_content)
            now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
--- a/surfsense_backend/app/services/gmail/kb_sync_service.py
+++ b/surfsense_backend/app/services/gmail/kb_sync_service.py
@ -9,7 +9,6 @@ from app.utils.document_converters import (
    create_document_chunks,
    embed_text,
    generate_content_hash,
-    generate_document_summary,
    generate_unique_identifier_hash,
 )

@ -78,30 +77,11 @@ class GmailKBSyncService:
                )
                content_hash = unique_hash

-            from app.services.llm_service import get_user_long_context_llm

-            user_llm = await get_user_long_context_llm(
-                self.db_session,
-                user_id,
-                search_space_id,
-                disable_streaming=True,
-            )

-            doc_metadata_for_summary = {
-                "subject": subject,
-                "sender": sender,
-                "document_type": "Gmail Message",
-                "connector_type": "Gmail",
-            }

-            if user_llm:
-                summary_content, summary_embedding = await generate_document_summary(
-                    indexable_content, user_llm, doc_metadata_for_summary
-                )
-            else:
-                logger.warning("No LLM configured -- using fallback summary")
-                summary_content = f"Gmail Message: {subject}\n\n{indexable_content}"
-                summary_embedding = await asyncio.to_thread(embed_text, summary_content)
+            summary_content = f"Gmail Message: {subject}\n\n{indexable_content}"
+            summary_embedding = await asyncio.to_thread(embed_text, summary_content)

            chunks = await create_document_chunks(indexable_content)
            now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
--- a/surfsense_backend/app/services/google_calendar/kb_sync_service.py
+++ b/surfsense_backend/app/services/google_calendar/kb_sync_service.py
@ -19,7 +19,6 @@ from app.utils.document_converters import (
    create_document_chunks,
    embed_text,
    generate_content_hash,
-    generate_document_summary,
    generate_unique_identifier_hash,
 )

@ -90,33 +89,13 @@ class GoogleCalendarKBSyncService:
                )
                content_hash = unique_hash

-            from app.services.llm_service import get_user_long_context_llm

-            user_llm = await get_user_long_context_llm(
-                self.db_session,
-                user_id,
-                search_space_id,
-                disable_streaming=True,
+
+
+            summary_content = (
+                f"Google Calendar Event: {event_summary}\n\n{indexable_content}"
            )
-
-            doc_metadata_for_summary = {
-                "event_summary": event_summary,
-                "start_time": start_time,
-                "end_time": end_time,
-                "document_type": "Google Calendar Event",
-                "connector_type": "Google Calendar",
-            }
-
-            if user_llm:
-                summary_content, summary_embedding = await generate_document_summary(
-                    indexable_content, user_llm, doc_metadata_for_summary
-                )
-            else:
-                logger.warning("No LLM configured -- using fallback summary")
-                summary_content = (
-                    f"Google Calendar Event: {event_summary}\n\n{indexable_content}"
-                )
-                summary_embedding = await asyncio.to_thread(embed_text, summary_content)
+            summary_embedding = await asyncio.to_thread(embed_text, summary_content)

            chunks = await create_document_chunks(indexable_content)
            now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
@ -273,29 +252,13 @@ class GoogleCalendarKBSyncService:
            if not indexable_content:
                return {"status": "error", "message": "Event produced empty content"}

-            from app.services.llm_service import get_user_long_context_llm

-            user_llm = await get_user_long_context_llm(
-                self.db_session, user_id, search_space_id, disable_streaming=True
+
+
+            summary_content = (
+                f"Google Calendar Event: {event_summary}\n\n{indexable_content}"
            )
-
-            doc_metadata_for_summary = {
-                "event_summary": event_summary,
-                "start_time": start_time,
-                "end_time": end_time,
-                "document_type": "Google Calendar Event",
-                "connector_type": "Google Calendar",
-            }
-
-            if user_llm:
-                summary_content, summary_embedding = await generate_document_summary(
-                    indexable_content, user_llm, doc_metadata_for_summary
-                )
-            else:
-                summary_content = (
-                    f"Google Calendar Event: {event_summary}\n\n{indexable_content}"
-                )
-                summary_embedding = await asyncio.to_thread(embed_text, summary_content)
+            summary_embedding = await asyncio.to_thread(embed_text, summary_content)

            chunks = await create_document_chunks(indexable_content)
            now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
--- a/surfsense_backend/app/services/google_drive/kb_sync_service.py
+++ b/surfsense_backend/app/services/google_drive/kb_sync_service.py
@ -8,7 +8,6 @@ from app.utils.document_converters import (
    create_document_chunks,
    embed_text,
    generate_content_hash,
-    generate_document_summary,
    generate_unique_identifier_hash,
 )

@ -74,32 +73,13 @@ class GoogleDriveKBSyncService:
                )
                content_hash = unique_hash

-            from app.services.llm_service import get_user_long_context_llm

-            user_llm = await get_user_long_context_llm(
-                self.db_session,
-                user_id,
-                search_space_id,
-                disable_streaming=True,
+
+
+            summary_content = (
+                f"Google Drive File: {file_name}\n\n{indexable_content}"
            )
-
-            doc_metadata_for_summary = {
-                "file_name": file_name,
-                "mime_type": mime_type,
-                "document_type": "Google Drive File",
-                "connector_type": "Google Drive",
-            }
-
-            if user_llm:
-                summary_content, summary_embedding = await generate_document_summary(
-                    indexable_content, user_llm, doc_metadata_for_summary
-                )
-            else:
-                logger.warning("No LLM configured — using fallback summary")
-                summary_content = (
-                    f"Google Drive File: {file_name}\n\n{indexable_content}"
-                )
-                summary_embedding = embed_text(summary_content)
+            summary_embedding = embed_text(summary_content)

            chunks = await create_document_chunks(indexable_content)
            now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
--- a/surfsense_backend/app/services/linear/kb_sync_service.py
+++ b/surfsense_backend/app/services/linear/kb_sync_service.py
@ -9,7 +9,6 @@ from app.utils.document_converters import (
    create_document_chunks,
    embed_text,
    generate_content_hash,
-    generate_document_summary,
    generate_unique_identifier_hash,
 )

@ -84,32 +83,13 @@ class LinearKBSyncService:
                )
                content_hash = unique_hash

-            from app.services.llm_service import get_user_long_context_llm

-            user_llm = await get_user_long_context_llm(
-                self.db_session,
-                user_id,
-                search_space_id,
-                disable_streaming=True,
+
+
+            summary_content = (
+                f"Linear Issue {issue_identifier}: {issue_title}\n\n{issue_content}"
            )
-
-            doc_metadata_for_summary = {
-                "issue_id": issue_identifier,
-                "issue_title": issue_title,
-                "document_type": "Linear Issue",
-                "connector_type": "Linear",
-            }
-
-            if user_llm:
-                summary_content, summary_embedding = await generate_document_summary(
-                    issue_content, user_llm, doc_metadata_for_summary
-                )
-            else:
-                logger.warning("No LLM configured — using fallback summary")
-                summary_content = (
-                    f"Linear Issue {issue_identifier}: {issue_title}\n\n{issue_content}"
-                )
-                summary_embedding = embed_text(summary_content)
+            summary_embedding = embed_text(summary_content)

            chunks = await create_document_chunks(issue_content)
            now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
@ -227,30 +207,12 @@ class LinearKBSyncService:
            comment_count = len(formatted_issue.get("comments", []))
            formatted_issue.get("description", "")

-            from app.services.llm_service import get_user_long_context_llm

-            user_llm = await get_user_long_context_llm(
-                self.db_session, user_id, search_space_id, disable_streaming=True
+
+            summary_content = (
+                f"Linear Issue {issue_identifier}: {issue_title}\n\n{issue_content}"
            )
-
-            if user_llm:
-                document_metadata_for_summary = {
-                    "issue_id": issue_identifier,
-                    "issue_title": issue_title,
-                    "state": state,
-                    "priority": priority,
-                    "comment_count": comment_count,
-                    "document_type": "Linear Issue",
-                    "connector_type": "Linear",
-                }
-                summary_content, summary_embedding = await generate_document_summary(
-                    issue_content, user_llm, document_metadata_for_summary
-                )
-            else:
-                summary_content = (
-                    f"Linear Issue {issue_identifier}: {issue_title}\n\n{issue_content}"
-                )
-                summary_embedding = embed_text(summary_content)
+            summary_embedding = embed_text(summary_content)

            chunks = await create_document_chunks(issue_content)

--- a/surfsense_backend/app/services/llm_service.py
+++ b/surfsense_backend/app/services/llm_service.py
@ -68,7 +68,6 @@ def _is_interactive_auth_provider(

 class LLMRole:
    AGENT = "agent"  # For agent/chat operations
-    DOCUMENT_SUMMARY = "document_summary"  # For document summarization


 def get_global_llm_config(llm_config_id: int) -> dict | None:
@ -268,7 +267,7 @@ async def get_search_space_llm_instance(
    Args:
        session: Database session
        search_space_id: Search Space ID
-        role: LLM role ('agent' or 'document_summary')
+        role: LLM role ('agent')

    Returns:
        ChatLiteLLM or ChatLiteLLMRouter instance, or None if not found
@ -285,11 +284,8 @@ async def get_search_space_llm_instance(
            return None

        # Get the appropriate LLM config ID based on role
-        llm_config_id = None
        if role == LLMRole.AGENT:
            llm_config_id = search_space.agent_llm_id
-        elif role == LLMRole.DOCUMENT_SUMMARY:
-            llm_config_id = search_space.document_summary_llm_id
        else:
            logger.error(f"Invalid LLM role: {role}")
            return None
@ -476,20 +472,13 @@ async def get_search_space_llm_instance(


 async def get_agent_llm(
-    session: AsyncSession, search_space_id: int
-) -> ChatLiteLLM | ChatLiteLLMRouter | None:
-    """Get the search space's agent LLM instance for chat operations."""
-    return await get_search_space_llm_instance(session, search_space_id, LLMRole.AGENT)
-
-
-async def get_document_summary_llm(
    session: AsyncSession, search_space_id: int, disable_streaming: bool = False
 ) -> ChatLiteLLM | ChatLiteLLMRouter | None:
-    """Get the search space's document summary LLM instance."""
+    """Get the search space's agent LLM instance for chat operations."""
    return await get_search_space_llm_instance(
        session,
        search_space_id,
-        LLMRole.DOCUMENT_SUMMARY,
+        LLMRole.AGENT,
        disable_streaming=disable_streaming,
    )

@ -655,22 +644,6 @@ async def get_vision_llm(
        return None


-# Backward-compatible alias (LLM preferences are now per-search-space, not per-user)
-async def get_user_long_context_llm(
-    session: AsyncSession,
-    user_id: str,
-    search_space_id: int,
-    disable_streaming: bool = False,
-) -> ChatLiteLLM | ChatLiteLLMRouter | None:
-    """
-    Deprecated: Use get_document_summary_llm instead.
-    The user_id parameter is ignored as LLM preferences are now per-search-space.
-    """
-    return await get_document_summary_llm(
-        session, search_space_id, disable_streaming=disable_streaming
-    )
-
-
 def get_planner_llm() -> ChatLiteLLM | None:
    """Return a planner LLM instance from the first global config marked
    ``is_planner: true``, or ``None`` if no planner config is defined.
--- a/surfsense_backend/app/services/notion/kb_sync_service.py
+++ b/surfsense_backend/app/services/notion/kb_sync_service.py
@ -8,7 +8,6 @@ from app.utils.document_converters import (
    create_document_chunks,
    embed_text,
    generate_content_hash,
-    generate_document_summary,
    generate_unique_identifier_hash,
 )

@ -73,30 +72,11 @@ class NotionKBSyncService:
                )
                content_hash = unique_hash

-            from app.services.llm_service import get_user_long_context_llm

-            user_llm = await get_user_long_context_llm(
-                self.db_session,
-                user_id,
-                search_space_id,
-                disable_streaming=True,
-            )

-            doc_metadata_for_summary = {
-                "page_title": page_title,
-                "page_id": page_id,
-                "document_type": "Notion Page",
-                "connector_type": "Notion",
-            }

-            if user_llm:
-                summary_content, summary_embedding = await generate_document_summary(
-                    markdown_content, user_llm, doc_metadata_for_summary
-                )
-            else:
-                logger.warning("No LLM configured — using fallback summary")
-                summary_content = f"Notion Page: {page_title}\n\n{markdown_content}"
-                summary_embedding = embed_text(summary_content)
+            summary_content = f"Notion Page: {page_title}\n\n{markdown_content}"
+            summary_embedding = embed_text(summary_content)

            chunks = await create_document_chunks(markdown_content)
            now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
@ -245,31 +225,11 @@ class NotionKBSyncService:
                f"Final content length: {len(full_content)} chars, verified={content_verified}"
            )

-            from app.services.llm_service import get_user_long_context_llm

            logger.debug("Generating summary and embeddings")
-            user_llm = await get_user_long_context_llm(
-                self.db_session,
-                user_id,
-                search_space_id,
-                disable_streaming=True,  # disable streaming to avoid leaking into the chat
-            )

-            if user_llm:
-                document_metadata_for_summary = {
-                    "page_title": document.document_metadata.get("page_title"),
-                    "page_id": document.document_metadata.get("page_id"),
-                    "document_type": "Notion Page",
-                    "connector_type": "Notion",
-                }
-                summary_content, summary_embedding = await generate_document_summary(
-                    full_content, user_llm, document_metadata_for_summary
-                )
-                logger.debug(f"Generated summary length: {len(summary_content)} chars")
-            else:
-                logger.warning("No LLM configured - using fallback summary")
-                summary_content = f"Notion Page: {document.document_metadata.get('page_title')}\n\n{full_content}"
-                summary_embedding = embed_text(summary_content)
+            summary_content = f"Notion Page: {document.document_metadata.get('page_title')}\n\n{full_content}"
+            summary_embedding = embed_text(summary_content)

            logger.debug("Creating new chunks")
            chunks = await create_document_chunks(full_content)
--- a/surfsense_backend/app/services/obsidian_plugin_indexer.py
+++ b/surfsense_backend/app/services/obsidian_plugin_indexer.py
@ -233,18 +233,6 @@ async def _resolve_attachment_vision_llm(
    return await get_vision_llm(session, search_space_id)


-async def _resolve_summary_llm(
-    session: AsyncSession, *, user_id: str, search_space_id: int, should_summarize: bool
-):
-    """Fetch summary LLM only when indexing summary is enabled."""
-    if not should_summarize:
-        return None
-
-    from app.services.llm_service import get_user_long_context_llm
-
-    return await get_user_long_context_llm(session, user_id, search_space_id)
-
-
 def _require_extracted_attachment_content(
    *, content: str, etl_meta: dict[str, Any], path: str
 ) -> str:
@ -349,13 +337,6 @@ async def upsert_note(
            path=payload.path,
        )

-    llm = await _resolve_summary_llm(
-        session,
-        user_id=str(user_id),
-        search_space_id=search_space_id,
-        should_summarize=connector.enable_summary,
-    )
-
    document_string = _build_document_string(
        payload, vault_name, content_override=content_for_index
    )
@ -374,8 +355,6 @@ async def upsert_note(
        search_space_id=search_space_id,
        connector_id=connector.id,
        created_by_id=str(user_id),
-        should_summarize=connector.enable_summary,
-        fallback_summary=f"Obsidian Note: {payload.name}\n\n{content_for_index}",
        metadata=metadata,
    )

@ -388,7 +367,7 @@ async def upsert_note(

    document = prepared[0]

-    return await pipeline.index(document, connector_doc, llm)
+    return await pipeline.index(document, connector_doc)


 async def rename_note(
--- a/surfsense_backend/app/services/onedrive/kb_sync_service.py
+++ b/surfsense_backend/app/services/onedrive/kb_sync_service.py
@ -10,7 +10,6 @@ from app.utils.document_converters import (
    create_document_chunks,
    embed_text,
    generate_content_hash,
-    generate_document_summary,
 )

 logger = logging.getLogger(__name__)
@ -73,30 +72,11 @@ class OneDriveKBSyncService:
                )
                content_hash = unique_hash

-            from app.services.llm_service import get_user_long_context_llm

-            user_llm = await get_user_long_context_llm(
-                self.db_session,
-                user_id,
-                search_space_id,
-                disable_streaming=True,
-            )

-            doc_metadata_for_summary = {
-                "file_name": file_name,
-                "mime_type": mime_type,
-                "document_type": "OneDrive File",
-                "connector_type": "OneDrive",
-            }

-            if user_llm:
-                summary_content, summary_embedding = await generate_document_summary(
-                    indexable_content, user_llm, doc_metadata_for_summary
-                )
-            else:
-                logger.warning("No LLM configured — using fallback summary")
-                summary_content = f"OneDrive File: {file_name}\n\n{indexable_content}"
-                summary_embedding = await asyncio.to_thread(embed_text, summary_content)
+            summary_content = f"OneDrive File: {file_name}\n\n{indexable_content}"
+            summary_embedding = await asyncio.to_thread(embed_text, summary_content)

            chunks = await create_document_chunks(indexable_content)
            now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
--- a/surfsense_backend/app/services/task_dispatcher.py
+++ b/surfsense_backend/app/services/task_dispatcher.py
@ -18,7 +18,6 @@ class TaskDispatcher(Protocol):
        filename: str,
        search_space_id: int,
        user_id: str,
-        should_summarize: bool = False,
        use_vision_llm: bool = False,
        processing_mode: str = "basic",
    ) -> None: ...
@ -35,7 +34,6 @@ class CeleryTaskDispatcher:
        filename: str,
        search_space_id: int,
        user_id: str,
-        should_summarize: bool = False,
        use_vision_llm: bool = False,
        processing_mode: str = "basic",
    ) -> None:
@ -49,7 +47,6 @@ class CeleryTaskDispatcher:
            filename=filename,
            search_space_id=search_space_id,
            user_id=user_id,
-            should_summarize=should_summarize,
            use_vision_llm=use_vision_llm,
            processing_mode=processing_mode,
        )
--- a/surfsense_backend/app/tasks/celery_tasks/document_reindex_tasks.py
+++ b/surfsense_backend/app/tasks/celery_tasks/document_reindex_tasks.py
@ -9,7 +9,6 @@ from sqlalchemy.orm import selectinload
 from app.celery_app import celery_app
 from app.db import Document
 from app.indexing_pipeline.adapters.file_upload_adapter import UploadDocumentAdapter
-from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.tasks.celery_tasks import get_celery_session_maker, run_async_celery_task

@ -68,12 +67,8 @@ async def _reindex_document(document_id: int, user_id: str):

            logger.info(f"Reindexing document {document_id} ({document.title})")

-            user_llm = await get_user_long_context_llm(
-                session, user_id, document.search_space_id
-            )
-
            adapter = UploadDocumentAdapter(session)
-            await adapter.reindex(document=document, llm=user_llm)
+            await adapter.reindex(document=document)

            await task_logger.log_task_success(
                log_entry,
--- a/surfsense_backend/app/tasks/celery_tasks/document_tasks.py
+++ b/surfsense_backend/app/tasks/celery_tasks/document_tasks.py
@ -765,7 +765,6 @@ def process_file_upload_with_document_task(
    filename: str,
    search_space_id: int,
    user_id: str,
-    should_summarize: bool = False,
    use_vision_llm: bool = False,
    processing_mode: str = "basic",
 ):
@ -782,7 +781,6 @@ def process_file_upload_with_document_task(
        filename: Original filename
        search_space_id: ID of the search space
        user_id: ID of the user
-        should_summarize: Whether to generate an LLM summary
    """
    import traceback

@ -814,7 +812,6 @@ def process_file_upload_with_document_task(
                filename,
                search_space_id,
                user_id,
-                should_summarize=should_summarize,
                use_vision_llm=use_vision_llm,
                processing_mode=processing_mode,
            )
@ -850,7 +847,6 @@ async def _process_file_with_document(
    filename: str,
    search_space_id: int,
    user_id: str,
-    should_summarize: bool = False,
    use_vision_llm: bool = False,
    processing_mode: str = "basic",
 ):
@ -954,7 +950,6 @@ async def _process_file_with_document(
                task_logger=task_logger,
                log_entry=log_entry,
                notification=notification,
-                should_summarize=should_summarize,
                use_vision_llm=use_vision_llm,
                processing_mode=processing_mode,
            )
@ -1258,7 +1253,6 @@ def index_local_folder_task(
    exclude_patterns: list[str] | None = None,
    file_extensions: list[str] | None = None,
    root_folder_id: int | None = None,
-    enable_summary: bool = False,
    target_file_paths: list[str] | None = None,
 ):
    """Celery task to index a local folder. Config is passed directly — no connector row."""
@ -1271,7 +1265,6 @@ def index_local_folder_task(
            exclude_patterns=exclude_patterns,
            file_extensions=file_extensions,
            root_folder_id=root_folder_id,
-            enable_summary=enable_summary,
            target_file_paths=target_file_paths,
        )
    )
@ -1285,7 +1278,6 @@ async def _index_local_folder_async(
    exclude_patterns: list[str] | None = None,
    file_extensions: list[str] | None = None,
    root_folder_id: int | None = None,
-    enable_summary: bool = False,
    target_file_paths: list[str] | None = None,
 ):
    """Run local folder indexing with notification + heartbeat."""
@ -1343,8 +1335,7 @@ async def _index_local_folder_async(
                exclude_patterns=exclude_patterns,
                file_extensions=file_extensions,
                root_folder_id=root_folder_id,
-                enable_summary=enable_summary,
-                target_file_paths=target_file_paths,
+                    target_file_paths=target_file_paths,
                on_heartbeat_callback=_heartbeat_progress
                if (is_batch or is_full_scan)
                else None,
@ -1400,7 +1391,6 @@ def index_uploaded_folder_files_task(
    user_id: str,
    folder_name: str,
    root_folder_id: int,
-    enable_summary: bool,
    file_mappings: list[dict],
    use_vision_llm: bool = False,
    processing_mode: str = "basic",
@ -1412,7 +1402,6 @@ def index_uploaded_folder_files_task(
            user_id=user_id,
            folder_name=folder_name,
            root_folder_id=root_folder_id,
-            enable_summary=enable_summary,
            file_mappings=file_mappings,
            use_vision_llm=use_vision_llm,
            processing_mode=processing_mode,
@ -1425,7 +1414,6 @@ async def _index_uploaded_folder_files_async(
    user_id: str,
    folder_name: str,
    root_folder_id: int,
-    enable_summary: bool,
    file_mappings: list[dict],
    use_vision_llm: bool = False,
    processing_mode: str = "basic",
@ -1475,8 +1463,7 @@ async def _index_uploaded_folder_files_async(
                user_id=user_id,
                folder_name=folder_name,
                root_folder_id=root_folder_id,
-                enable_summary=enable_summary,
-                file_mappings=file_mappings,
+                    file_mappings=file_mappings,
                on_heartbeat_callback=_heartbeat_progress,
                use_vision_llm=use_vision_llm,
                processing_mode=processing_mode,
@ -1563,12 +1550,10 @@ async def _ai_sort_search_space_async(search_space_id: int, user_id: str):
    t_start = time.perf_counter()
    try:
        from app.services.ai_file_sort_service import ai_sort_all_documents
-        from app.services.llm_service import get_document_summary_llm
+        from app.services.llm_service import get_agent_llm

        async with get_celery_session_maker()() as session:
-            llm = await get_document_summary_llm(
-                session, search_space_id, disable_streaming=True
-            )
+            llm = await get_agent_llm(session, search_space_id, disable_streaming=True)
            if llm is None:
                logger.warning(
                    "No LLM configured for search_space=%d, skipping AI sort",
@ -1604,7 +1589,7 @@ def ai_sort_document_task(self, search_space_id: int, user_id: str, document_id:
 async def _ai_sort_document_async(search_space_id: int, user_id: str, document_id: int):
    from app.db import Document
    from app.services.ai_file_sort_service import ai_sort_document
-    from app.services.llm_service import get_document_summary_llm
+    from app.services.llm_service import get_agent_llm

    async with get_celery_session_maker()() as session:
        document = await session.get(Document, document_id)
@ -1612,9 +1597,7 @@ async def _ai_sort_document_async(search_space_id: int, user_id: str, document_i
            logger.warning("Document %d not found, skipping AI sort", document_id)
            return

-        llm = await get_document_summary_llm(
-            session, search_space_id, disable_streaming=True
-        )
+        llm = await get_agent_llm(session, search_space_id, disable_streaming=True)
        if llm is None:
            logger.warning(
                "No LLM for search_space=%d, skipping AI sort of doc=%d",
--- a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/input_state.py
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/input_state.py
@ -114,8 +114,8 @@ async def build_new_chat_input_state(

    final_query = _render_query_with_context(
        agent_user_query=agent_user_query,
-        recent_reports=recent_reports,
        mentioned_connectors=mentioned_connectors,
+        recent_reports=recent_reports,
    )

    if thread_visibility == ChatVisibility.SEARCH_SPACE and current_user_display_name:
@ -198,44 +198,11 @@ async def _resolve_mentions_for_query(
    return agent_user_query, accepted_folder_ids


-def _render_connector_block(mentioned_connectors: list[dict[str, Any]]) -> str | None:
-    """Render the ``<mentioned_connectors>`` block, or ``None`` when empty.
-
-    Malformed entries (non-dict, or missing id/type) are skipped.
-    """
-    connector_lines: list[str] = []
-    for connector in mentioned_connectors:
-        if not isinstance(connector, dict):
-            continue
-        connector_id = connector.get("id")
-        connector_type = connector.get("connector_type") or connector.get(
-            "document_type"
-        )
-        account_name = connector.get("account_name") or connector.get("title")
-        if connector_id is None or connector_type is None:
-            continue
-        connector_lines.append(
-            f'  - connector_id={connector_id}, connector_type="{connector_type}", '
-            f'account_name="{account_name or ""}"'
-        )
-    if not connector_lines:
-        return None
-    return (
-        "<mentioned_connectors>\n"
-        "The user selected these exact connector accounts with @. "
-        "These entries are selection metadata, not retrieved connector content. "
-        "When a connector-backed tool needs an account, use the matching "
-        "connector_id from this list if the tool supports connector_id:\n"
-        + "\n".join(connector_lines)
-        + "\n</mentioned_connectors>"
-    )
-
-
 def _render_query_with_context(
    *,
    agent_user_query: str,
-    recent_reports: list[Report],
    mentioned_connectors: list[dict[str, Any]] | None,
+    recent_reports: list[Report],
 ) -> str:
    """Prepend the ``<mentioned_connectors>`` then ``<report_context>`` blocks.

@ -243,10 +210,9 @@ def _render_query_with_context(
    """
    context_parts: list[str] = []

-    if mentioned_connectors:
-        connector_block = _render_connector_block(mentioned_connectors)
-        if connector_block:
-            context_parts.append(connector_block)
+    connector_context = _render_mentioned_connectors(mentioned_connectors)
+    if connector_context:
+        context_parts.append(connector_context)

    if recent_reports:
        report_lines: list[str] = []
@ -272,3 +238,40 @@ def _render_query_with_context(
        return f"{context}\n\n<user_query>{agent_user_query}</user_query>"

    return agent_user_query
+
+
+def _render_mentioned_connectors(
+    mentioned_connectors: list[dict[str, Any]] | None,
+) -> str | None:
+    """Render selected connector account metadata for connector-backed tools."""
+    if not mentioned_connectors:
+        return None
+
+    connector_lines: list[str] = []
+    for connector in mentioned_connectors:
+        if not isinstance(connector, dict):
+            continue
+        connector_id = connector.get("id")
+        connector_type = connector.get("connector_type") or connector.get(
+            "document_type"
+        )
+        account_name = connector.get("account_name") or connector.get("title")
+        if connector_id is None or connector_type is None:
+            continue
+        connector_lines.append(
+            f'  - connector_id={connector_id}, connector_type="{connector_type}", '
+            f'account_name="{account_name or ""}"'
+        )
+
+    if not connector_lines:
+        return None
+
+    return (
+        "<mentioned_connectors>\n"
+        "The user selected these exact connector accounts with @. "
+        "These entries are selection metadata, not retrieved connector content. "
+        "When a connector-backed tool needs an account, use the matching "
+        "connector_id from this list if the tool supports connector_id:\n"
+        + "\n".join(connector_lines)
+        + "\n</mentioned_connectors>"
+    )
--- a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py
@ -14,13 +14,11 @@ from sqlalchemy.ext.asyncio import AsyncSession

 from app.connectors.airtable_history import AirtableHistoryConnector
 from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
-from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
    create_document_chunks,
    embed_text,
    generate_content_hash,
-    generate_document_summary,
    generate_unique_identifier_hash,
 )

@ -394,29 +392,10 @@ async def index_airtable_records(
                    document.status = DocumentStatus.processing()
                    await session.commit()

-                    # Heavy processing (LLM, embeddings, chunks)
-                    user_llm = await get_user_long_context_llm(
-                        session, user_id, search_space_id
-                    )
+                    # Heavy processing (embeddings, chunks)

-                    if user_llm and connector.enable_summary:
-                        document_metadata_for_summary = {
-                            "record_id": item["record_id"],
-                            "created_time": item["record"].get("CREATED_TIME()", ""),
-                            "document_type": "Airtable Record",
-                            "connector_type": "Airtable",
-                        }
-                        (
-                            summary_content,
-                            summary_embedding,
-                        ) = await generate_document_summary(
-                            item["markdown_content"],
-                            user_llm,
-                            document_metadata_for_summary,
-                        )
-                    else:
-                        summary_content = f"Airtable Record: {item['record_id']}\n\n{item['markdown_content']}"
-                        summary_embedding = embed_text(summary_content)
+                    summary_content = f"Airtable Record: {item['record_id']}\n\n{item['markdown_content']}"
+                    summary_embedding = embed_text(summary_content)

                    chunks = await create_document_chunks(item["markdown_content"])

--- a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py
@ -15,13 +15,11 @@ from sqlalchemy.ext.asyncio import AsyncSession

 from app.connectors.bookstack_connector import BookStackConnector
 from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
-from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
    create_document_chunks,
    embed_text,
    generate_content_hash,
-    generate_document_summary,
    generate_unique_identifier_hash,
 )

@ -384,10 +382,7 @@ async def index_bookstack_pages(
                document.status = DocumentStatus.processing()
                await session.commit()

-                # Heavy processing (LLM, embeddings, chunks)
-                user_llm = await get_user_long_context_llm(
-                    session, user_id, search_space_id
-                )
+                # Heavy processing (embeddings, chunks)

                # Build document metadata
                doc_metadata = {
@ -403,23 +398,8 @@ async def index_bookstack_pages(
                    "connector_id": connector_id,
                }

-                if user_llm and connector.enable_summary:
-                    summary_metadata = {
-                        "page_name": item["page_name"],
-                        "page_id": item["page_id"],
-                        "book_id": item["book_id"],
-                        "document_type": "BookStack Page",
-                        "connector_type": "BookStack",
-                    }
-                    (
-                        summary_content,
-                        summary_embedding,
-                    ) = await generate_document_summary(
-                        item["full_content"], user_llm, summary_metadata
-                    )
-                else:
-                    summary_content = f"BookStack Page: {item['page_name']}\n\nBook ID: {item['book_id']}\n\n{item['full_content']}"
-                    summary_embedding = embed_text(summary_content)
+                summary_content = f"BookStack Page: {item['page_name']}\n\nBook ID: {item['book_id']}\n\n{item['full_content']}"
+                summary_embedding = embed_text(summary_content)

                # Process chunks - using the full page content
                chunks = await create_document_chunks(item["full_content"])
--- a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py
@ -16,13 +16,11 @@ from sqlalchemy.ext.asyncio import AsyncSession

 from app.connectors.clickup_history import ClickUpHistoryConnector
 from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
-from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
    create_document_chunks,
    embed_text,
    generate_content_hash,
-    generate_document_summary,
    generate_unique_identifier_hash,
 )

@ -393,32 +391,10 @@ async def index_clickup_tasks(
                document.status = DocumentStatus.processing()
                await session.commit()

-                # Heavy processing (LLM, embeddings, chunks)
-                user_llm = await get_user_long_context_llm(
-                    session, user_id, search_space_id
-                )
+                # Heavy processing (embeddings, chunks)

-                if user_llm and connector.enable_summary:
-                    document_metadata_for_summary = {
-                        "task_id": item["task_id"],
-                        "task_name": item["task_name"],
-                        "task_status": item["task_status"],
-                        "task_priority": item["task_priority"],
-                        "task_list": item["task_list_name"],
-                        "task_space": item["task_space_name"],
-                        "assignees": len(item["task_assignees"]),
-                        "document_type": "ClickUp Task",
-                        "connector_type": "ClickUp",
-                    }
-                    (
-                        summary_content,
-                        summary_embedding,
-                    ) = await generate_document_summary(
-                        item["task_content"], user_llm, document_metadata_for_summary
-                    )
-                else:
-                    summary_content = item["task_content"]
-                    summary_embedding = embed_text(item["task_content"])
+                summary_content = item["task_content"]
+                summary_embedding = embed_text(item["task_content"])

                chunks = await create_document_chunks(item["task_content"])

--- a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py
@ -14,7 +14,6 @@ from app.indexing_pipeline.indexing_pipeline_service import (
    IndexingPipelineService,
    PlaceholderInfo,
 )
-from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService

 from .base import (
@ -36,7 +35,6 @@ def _build_connector_doc(
    connector_id: int,
    search_space_id: int,
    user_id: str,
-    enable_summary: bool,
 ) -> ConnectorDocument:
    """Map a raw Confluence page dict to a ConnectorDocument."""
    page_id = page.get("id", "")
@ -54,10 +52,6 @@ def _build_connector_doc(
        "connector_type": "Confluence",
    }

-    fallback_summary = (
-        f"Confluence Page: {page_title}\n\nSpace ID: {space_id}\n\n{full_content}"
-    )
-
    return ConnectorDocument(
        title=page_title,
        source_markdown=full_content,
@ -66,8 +60,6 @@ def _build_connector_doc(
        search_space_id=search_space_id,
        connector_id=connector_id,
        created_by_id=user_id,
-        should_summarize=enable_summary,
-        fallback_summary=fallback_summary,
        metadata=metadata,
    )

@ -268,8 +260,7 @@ async def index_confluence_pages(
                    connector_id=connector_id,
                    search_space_id=search_space_id,
                    user_id=user_id,
-                    enable_summary=connector.enable_summary,
-                )
+                    )

                with session.no_autoflush:
                    duplicate_by_content = await check_duplicate_document_by_hash(
@ -297,12 +288,8 @@ async def index_confluence_pages(

        await pipeline.migrate_legacy_docs(connector_docs)

-        async def _get_llm(s: AsyncSession):
-            return await get_user_long_context_llm(s, user_id, search_space_id)
-
        _, documents_indexed, documents_failed = await pipeline.index_batch_parallel(
            connector_docs,
-            _get_llm,
            max_concurrency=3,
            on_heartbeat=on_heartbeat_callback,
            heartbeat_interval=HEARTBEAT_INTERVAL_SECONDS,
--- a/surfsense_backend/app/tasks/connector_indexers/dropbox_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/dropbox_indexer.py
@ -27,7 +27,6 @@ from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnector
 from app.indexing_pipeline.connector_document import ConnectorDocument
 from app.indexing_pipeline.document_hashing import compute_identifier_hash
 from app.indexing_pipeline.indexing_pipeline_service import IndexingPipelineService
-from app.services.llm_service import get_user_long_context_llm
 from app.services.page_limit_service import PageLimitService
 from app.services.task_logging_service import TaskLoggingService
 from app.tasks.connector_indexers.base import (
@ -126,7 +125,6 @@ def _build_connector_doc(
    connector_id: int,
    search_space_id: int,
    user_id: str,
-    enable_summary: bool,
 ) -> ConnectorDocument:
    file_id = file.get("id", "")
    file_name = file.get("name", "Unknown")
@ -138,8 +136,6 @@ def _build_connector_doc(
        "connector_type": "Dropbox",
    }

-    fallback_summary = f"File: {file_name}\n\n{markdown[:4000]}"
-
    return ConnectorDocument(
        title=file_name,
        source_markdown=markdown,
@ -148,8 +144,6 @@ def _build_connector_doc(
        search_space_id=search_space_id,
        connector_id=connector_id,
        created_by_id=user_id,
-        should_summarize=enable_summary,
-        fallback_summary=fallback_summary,
        metadata=metadata,
    )

@ -161,7 +155,6 @@ async def _download_files_parallel(
    connector_id: int,
    search_space_id: int,
    user_id: str,
-    enable_summary: bool,
    max_concurrency: int = 3,
    on_heartbeat: HeartbeatCallbackType | None = None,
    vision_llm=None,
@ -191,7 +184,6 @@ async def _download_files_parallel(
                connector_id=connector_id,
                search_space_id=search_space_id,
                user_id=user_id,
-                enable_summary=enable_summary,
            )
            async with hb_lock:
                completed_count += 1
@ -223,7 +215,6 @@ async def _download_and_index(
    connector_id: int,
    search_space_id: int,
    user_id: str,
-    enable_summary: bool,
    on_heartbeat: HeartbeatCallbackType | None = None,
    vision_llm=None,
 ) -> tuple[int, int]:
@ -234,7 +225,6 @@ async def _download_and_index(
        connector_id=connector_id,
        search_space_id=search_space_id,
        user_id=user_id,
-        enable_summary=enable_summary,
        on_heartbeat=on_heartbeat,
        vision_llm=vision_llm,
    )
@ -243,13 +233,8 @@ async def _download_and_index(
    batch_failed = 0
    if connector_docs:
        pipeline = IndexingPipelineService(session)
-
-        async def _get_llm(s):
-            return await get_user_long_context_llm(s, user_id, search_space_id)
-
        _, batch_indexed, batch_failed = await pipeline.index_batch_parallel(
            connector_docs,
-            _get_llm,
            max_concurrency=3,
            on_heartbeat=on_heartbeat,
        )
@ -289,7 +274,6 @@ async def _index_with_delta_sync(
    log_entry: object,
    max_files: int,
    on_heartbeat_callback: HeartbeatCallbackType | None = None,
-    enable_summary: bool = True,
    vision_llm=None,
 ) -> tuple[int, int, int, str]:
    """Delta sync using Dropbox cursor-based change tracking.
@ -361,7 +345,6 @@ async def _index_with_delta_sync(
        connector_id=connector_id,
        search_space_id=search_space_id,
        user_id=user_id,
-        enable_summary=enable_summary,
        on_heartbeat=on_heartbeat_callback,
        vision_llm=vision_llm,
    )
@ -388,7 +371,6 @@ async def _index_full_scan(
    include_subfolders: bool = True,
    incremental_sync: bool = True,
    on_heartbeat_callback: HeartbeatCallbackType | None = None,
-    enable_summary: bool = True,
    vision_llm=None,
 ) -> tuple[int, int, int]:
    """Full scan indexing of a folder.
@ -473,7 +455,6 @@ async def _index_full_scan(
        connector_id=connector_id,
        search_space_id=search_space_id,
        user_id=user_id,
-        enable_summary=enable_summary,
        on_heartbeat=on_heartbeat_callback,
        vision_llm=vision_llm,
    )
@ -502,7 +483,6 @@ async def _index_selected_files(
    connector_id: int,
    search_space_id: int,
    user_id: str,
-    enable_summary: bool,
    incremental_sync: bool = True,
    on_heartbeat: HeartbeatCallbackType | None = None,
    vision_llm=None,
@ -563,7 +543,6 @@ async def _index_selected_files(
        connector_id=connector_id,
        search_space_id=search_space_id,
        user_id=user_id,
-        enable_summary=enable_summary,
        on_heartbeat=on_heartbeat,
        vision_llm=vision_llm,
    )
@ -629,7 +608,6 @@ async def index_dropbox_files(
            )
            return 0, 0, error_msg, 0

-        connector_enable_summary = getattr(connector, "enable_summary", True)
        connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False)
        vision_llm = None
        if connector_enable_vision_llm:
@ -664,7 +642,6 @@ async def index_dropbox_files(
                connector_id=connector_id,
                search_space_id=search_space_id,
                user_id=user_id,
-                enable_summary=connector_enable_summary,
                incremental_sync=incremental_sync,
                vision_llm=vision_llm,
            )
@ -700,7 +677,6 @@ async def index_dropbox_files(
                    task_logger,
                    log_entry,
                    max_files,
-                    enable_summary=connector_enable_summary,
                    vision_llm=vision_llm,
                )
                folder_cursors[folder_path] = new_cursor
@ -720,7 +696,6 @@ async def index_dropbox_files(
                    max_files,
                    include_subfolders,
                    incremental_sync=incremental_sync,
-                    enable_summary=connector_enable_summary,
                    vision_llm=vision_llm,
                )
                total_unsupported += unsup
--- a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py
@ -18,13 +18,11 @@ from sqlalchemy.ext.asyncio import AsyncSession

 from app.connectors.github_connector import GitHubConnector
 from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
-from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
    create_document_chunks,
    embed_text,
    generate_content_hash,
-    generate_document_summary,
    generate_unique_identifier_hash,
 )

@ -351,42 +349,14 @@ async def index_github_repos(
                document.status = DocumentStatus.processing()
                await session.commit()

-                # Heavy processing (LLM, embeddings, chunks)
-                user_llm = await get_user_long_context_llm(
-                    session, user_id, search_space_id
+                # Heavy processing (embeddings, chunks)
+
+                summary_text = (
+                    f"# GitHub Repository: {repo_full_name}\n\n"
+                    f"## Summary\n{digest.summary}\n\n"
+                    f"## File Structure\n{digest.tree}"
                )
-
-                document_metadata_for_summary = {
-                    "repository": repo_full_name,
-                    "document_type": "GitHub Repository",
-                    "connector_type": "GitHub",
-                    "ingestion_method": "gitingest",
-                    "file_tree": digest.tree[:2000]
-                    if len(digest.tree) > 2000
-                    else digest.tree,
-                    "estimated_tokens": digest.estimated_tokens,
-                }
-
-                if user_llm and connector.enable_summary:
-                    # Prepare content for summarization
-                    summary_content = digest.full_digest
-                    if len(summary_content) > MAX_DIGEST_CHARS:
-                        summary_content = (
-                            f"# Repository: {repo_full_name}\n\n"
-                            f"## File Structure\n\n{digest.tree}\n\n"
-                            f"## File Contents (truncated)\n\n{digest.content[: MAX_DIGEST_CHARS - len(digest.tree) - 200]}..."
-                        )
-
-                    summary_text, summary_embedding = await generate_document_summary(
-                        summary_content, user_llm, document_metadata_for_summary
-                    )
-                else:
-                    summary_text = (
-                        f"# GitHub Repository: {repo_full_name}\n\n"
-                        f"## Summary\n{digest.summary}\n\n"
-                        f"## File Structure\n{digest.tree}"
-                    )
-                    summary_embedding = embed_text(summary_text)
+                summary_embedding = embed_text(summary_text)

                # Chunk the full digest content for granular search
                try:
--- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
@ -2,7 +2,7 @@
 Google Calendar connector indexer.

 Uses the shared IndexingPipelineService for document deduplication,
-summarization, chunking, and embedding.
+chunking, and embedding.
 """

 from collections.abc import Awaitable, Callable
@ -21,7 +21,6 @@ from app.indexing_pipeline.indexing_pipeline_service import (
    PlaceholderInfo,
 )
 from app.services.composio_service import ComposioService
-from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.google_credentials import COMPOSIO_GOOGLE_CONNECTOR_TYPES

@ -53,7 +52,6 @@ def _build_connector_doc(
    connector_id: int,
    search_space_id: int,
    user_id: str,
-    enable_summary: bool,
 ) -> ConnectorDocument:
    """Map a raw Google Calendar API event dict to a ConnectorDocument."""
    event_id = event.get("id", "")
@ -78,8 +76,6 @@ def _build_connector_doc(
        "connector_type": "Google Calendar",
    }

-    fallback_summary = f"Google Calendar Event: {event_summary}\n\n{event_markdown}"
-
    return ConnectorDocument(
        title=event_summary,
        source_markdown=event_markdown,
@ -88,8 +84,6 @@ def _build_connector_doc(
        search_space_id=search_space_id,
        connector_id=connector_id,
        created_by_id=user_id,
-        should_summarize=enable_summary,
-        fallback_summary=fallback_summary,
        metadata=metadata,
    )

@ -420,8 +414,7 @@ async def index_google_calendar_events(
                    connector_id=connector_id,
                    search_space_id=search_space_id,
                    user_id=user_id,
-                    enable_summary=connector.enable_summary,
-                )
+                    )

                with session.no_autoflush:
                    duplicate = await check_duplicate_document_by_hash(
@ -448,13 +441,8 @@ async def index_google_calendar_events(

        # ── Pipeline: migrate legacy docs + parallel index ─────────────
        await pipeline.migrate_legacy_docs(connector_docs)
-
-        async def _get_llm(s):
-            return await get_user_long_context_llm(s, user_id, search_space_id)
-
        _, documents_indexed, documents_failed = await pipeline.index_batch_parallel(
            connector_docs,
-            _get_llm,
            max_concurrency=3,
            on_heartbeat=on_heartbeat_callback,
            heartbeat_interval=HEARTBEAT_INTERVAL_SECONDS,
--- a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
@ -40,7 +40,6 @@ from app.indexing_pipeline.indexing_pipeline_service import (
    PlaceholderInfo,
 )
 from app.services.composio_service import ComposioService
-from app.services.llm_service import get_user_long_context_llm
 from app.services.page_limit_service import PageLimitService
 from app.services.task_logging_service import TaskLoggingService
 from app.tasks.connector_indexers.base import (
@ -381,7 +380,6 @@ def _build_connector_doc(
    connector_id: int,
    search_space_id: int,
    user_id: str,
-    enable_summary: bool,
 ) -> ConnectorDocument:
    """Build a ConnectorDocument from Drive file metadata + extracted markdown."""
    file_id = file.get("id", "")
@ -394,8 +392,6 @@ def _build_connector_doc(
        "connector_type": "Google Drive",
    }

-    fallback_summary = f"File: {file_name}\n\n{markdown[:4000]}"
-
    return ConnectorDocument(
        title=file_name,
        source_markdown=markdown,
@ -404,8 +400,6 @@ def _build_connector_doc(
        search_space_id=search_space_id,
        connector_id=connector_id,
        created_by_id=user_id,
-        should_summarize=enable_summary,
-        fallback_summary=fallback_summary,
        metadata=metadata,
    )

@ -461,7 +455,6 @@ async def _download_files_parallel(
    connector_id: int,
    search_space_id: int,
    user_id: str,
-    enable_summary: bool,
    max_concurrency: int = 3,
    on_heartbeat: HeartbeatCallbackType | None = None,
    vision_llm=None,
@ -494,7 +487,6 @@ async def _download_files_parallel(
                connector_id=connector_id,
                search_space_id=search_space_id,
                user_id=user_id,
-                enable_summary=enable_summary,
            )
            async with hb_lock:
                completed_count += 1
@ -525,7 +517,6 @@ async def _process_single_file(
    connector_id: int,
    search_space_id: int,
    user_id: str,
-    enable_summary: bool = True,
    vision_llm=None,
 ) -> tuple[int, int, int]:
    """Download, extract, and index a single Drive file via the pipeline.
@ -561,8 +552,7 @@ async def _process_single_file(
            connector_id=connector_id,
            search_space_id=search_space_id,
            user_id=user_id,
-            enable_summary=enable_summary,
-        )
+            )

        pipeline = IndexingPipelineService(session)
        documents = await pipeline.prepare_for_indexing([doc])
@ -578,10 +568,7 @@ async def _process_single_file(
            connector_doc = doc_map.get(document.unique_identifier_hash)
            if not connector_doc:
                continue
-            user_llm = await get_user_long_context_llm(
-                session, user_id, search_space_id
-            )
-            await pipeline.index(document, connector_doc, user_llm)
+            await pipeline.index(document, connector_doc)

        await page_limit_service.update_page_usage(
            user_id, estimated_pages, allow_exceed=True
@ -636,7 +623,6 @@ async def _download_and_index(
    connector_id: int,
    search_space_id: int,
    user_id: str,
-    enable_summary: bool,
    on_heartbeat: HeartbeatCallbackType | None = None,
    vision_llm=None,
 ) -> tuple[int, int]:
@ -650,7 +636,6 @@ async def _download_and_index(
        connector_id=connector_id,
        search_space_id=search_space_id,
        user_id=user_id,
-        enable_summary=enable_summary,
        on_heartbeat=on_heartbeat,
        vision_llm=vision_llm,
    )
@ -659,13 +644,8 @@ async def _download_and_index(
    batch_failed = 0
    if connector_docs:
        pipeline = IndexingPipelineService(session)
-
-        async def _get_llm(s):
-            return await get_user_long_context_llm(s, user_id, search_space_id)
-
        _, batch_indexed, batch_failed = await pipeline.index_batch_parallel(
            connector_docs,
-            _get_llm,
            max_concurrency=3,
            on_heartbeat=on_heartbeat,
        )
@ -681,7 +661,6 @@ async def _index_selected_files(
    connector_id: int,
    search_space_id: int,
    user_id: str,
-    enable_summary: bool,
    on_heartbeat: HeartbeatCallbackType | None = None,
    vision_llm=None,
 ) -> tuple[int, int, int, list[str]]:
@ -746,7 +725,6 @@ async def _index_selected_files(
        connector_id=connector_id,
        search_space_id=search_space_id,
        user_id=user_id,
-        enable_summary=enable_summary,
        on_heartbeat=on_heartbeat,
        vision_llm=vision_llm,
    )
@ -781,7 +759,6 @@ async def _index_full_scan(
    max_files: int,
    include_subfolders: bool = False,
    on_heartbeat_callback: HeartbeatCallbackType | None = None,
-    enable_summary: bool = True,
    vision_llm=None,
 ) -> tuple[int, int, int]:
    """Full scan indexing of a folder.
@ -911,7 +888,6 @@ async def _index_full_scan(
        connector_id=connector_id,
        search_space_id=search_space_id,
        user_id=user_id,
-        enable_summary=enable_summary,
        on_heartbeat=on_heartbeat_callback,
        vision_llm=vision_llm,
    )
@ -946,7 +922,6 @@ async def _index_with_delta_sync(
    max_files: int,
    include_subfolders: bool = False,
    on_heartbeat_callback: HeartbeatCallbackType | None = None,
-    enable_summary: bool = True,
    vision_llm=None,
 ) -> tuple[int, int, int]:
    """Delta sync using change tracking.
@ -1054,7 +1029,6 @@ async def _index_with_delta_sync(
        connector_id=connector_id,
        search_space_id=search_space_id,
        user_id=user_id,
-        enable_summary=enable_summary,
        on_heartbeat=on_heartbeat_callback,
        vision_llm=vision_llm,
    )
@ -1142,7 +1116,6 @@ async def index_google_drive_files(
            )
            return 0, 0, client_error, 0

-        connector_enable_summary = getattr(connector, "enable_summary", True)
        connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False)
        vision_llm = None
        if connector_enable_vision_llm:
@ -1189,7 +1162,6 @@ async def index_google_drive_files(
                max_files,
                include_subfolders,
                on_heartbeat_callback,
-                connector_enable_summary,
                vision_llm=vision_llm,
            )
            documents_unsupported += du
@ -1208,7 +1180,6 @@ async def index_google_drive_files(
                max_files,
                include_subfolders,
                on_heartbeat_callback,
-                connector_enable_summary,
                vision_llm=vision_llm,
            )
            documents_indexed += ri
@ -1234,7 +1205,6 @@ async def index_google_drive_files(
                max_files,
                include_subfolders,
                on_heartbeat_callback,
-                connector_enable_summary,
                vision_llm=vision_llm,
            )

@ -1346,7 +1316,6 @@ async def index_google_drive_single_file(
            )
            return 0, client_error

-        connector_enable_summary = getattr(connector, "enable_summary", True)
        connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False)
        vision_llm = None
        if connector_enable_vision_llm:
@ -1370,7 +1339,6 @@ async def index_google_drive_single_file(
            connector_id,
            search_space_id,
            user_id,
-            connector_enable_summary,
            vision_llm=vision_llm,
        )
        await session.commit()
@ -1467,7 +1435,6 @@ async def index_google_drive_selected_files(
            )
            return 0, 0, [error_msg]

-        connector_enable_summary = getattr(connector, "enable_summary", True)
        connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False)
        vision_llm = None
        if connector_enable_vision_llm:
@ -1481,7 +1448,6 @@ async def index_google_drive_selected_files(
            connector_id=connector_id,
            search_space_id=search_space_id,
            user_id=user_id,
-            enable_summary=connector_enable_summary,
            on_heartbeat=on_heartbeat_callback,
            vision_llm=vision_llm,
        )
--- a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
@ -2,7 +2,7 @@
 Google Gmail connector indexer.

 Uses the shared IndexingPipelineService for document deduplication,
-summarization, chunking, and embedding.
+chunking, and embedding.
 """

 from collections.abc import Awaitable, Callable
@ -21,7 +21,6 @@ from app.indexing_pipeline.indexing_pipeline_service import (
    PlaceholderInfo,
 )
 from app.services.composio_service import ComposioService
-from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.google_credentials import COMPOSIO_GOOGLE_CONNECTOR_TYPES

@ -105,7 +104,6 @@ def _build_connector_doc(
    connector_id: int,
    search_space_id: int,
    user_id: str,
-    enable_summary: bool,
 ) -> ConnectorDocument:
    """Map a raw Gmail API message dict to a ConnectorDocument."""
    message_id = message.get("id", "")
@ -138,12 +136,6 @@ def _build_connector_doc(
        "connector_type": "Google Gmail",
    }

-    fallback_summary = (
-        f"Google Gmail Message: {subject}\n\n"
-        f"From: {sender}\nDate: {date_str}\n\n"
-        f"{markdown_content}"
-    )
-
    return ConnectorDocument(
        title=subject,
        source_markdown=markdown_content,
@ -152,8 +144,6 @@ def _build_connector_doc(
        search_space_id=search_space_id,
        connector_id=connector_id,
        created_by_id=user_id,
-        should_summarize=enable_summary,
-        fallback_summary=fallback_summary,
        metadata=metadata,
    )

@ -454,8 +444,7 @@ async def index_google_gmail_messages(
                    connector_id=connector_id,
                    search_space_id=search_space_id,
                    user_id=user_id,
-                    enable_summary=connector.enable_summary,
-                )
+                    )

                with session.no_autoflush:
                    duplicate = await check_duplicate_document_by_hash(
@ -483,13 +472,8 @@ async def index_google_gmail_messages(

        # ── Pipeline: migrate legacy docs + parallel index ─────────────
        await pipeline.migrate_legacy_docs(connector_docs)
-
-        async def _get_llm(s):
-            return await get_user_long_context_llm(s, user_id, search_space_id)
-
        _, documents_indexed, documents_failed = await pipeline.index_batch_parallel(
            connector_docs,
-            _get_llm,
            max_concurrency=3,
            on_heartbeat=on_heartbeat_callback,
            heartbeat_interval=HEARTBEAT_INTERVAL_SECONDS,
--- a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py
@ -2,7 +2,7 @@
 Linear connector indexer.

 Uses the shared IndexingPipelineService for document deduplication,
-summarization, chunking, and embedding with bounded parallel indexing.
+chunking, and embedding with bounded parallel indexing.
 """

 from collections.abc import Awaitable, Callable
@ -18,7 +18,6 @@ from app.indexing_pipeline.indexing_pipeline_service import (
    IndexingPipelineService,
    PlaceholderInfo,
 )
-from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService

 from .base import (
@ -41,7 +40,6 @@ def _build_connector_doc(
    connector_id: int,
    search_space_id: int,
    user_id: str,
-    enable_summary: bool,
 ) -> ConnectorDocument:
    """Map a raw Linear issue dict to a ConnectorDocument."""
    issue_id = issue.get("id", "")
@ -63,11 +61,6 @@ def _build_connector_doc(
        "connector_type": "Linear",
    }

-    fallback_summary = (
-        f"Linear Issue {issue_identifier}: {issue_title}\n\n"
-        f"Status: {state}\n\n{issue_content}"
-    )
-
    return ConnectorDocument(
        title=f"{issue_identifier}: {issue_title}",
        source_markdown=issue_content,
@ -76,8 +69,6 @@ def _build_connector_doc(
        search_space_id=search_space_id,
        connector_id=connector_id,
        created_by_id=user_id,
-        should_summarize=enable_summary,
-        fallback_summary=fallback_summary,
        metadata=metadata,
    )

@ -277,8 +268,7 @@ async def index_linear_issues(
                    connector_id=connector_id,
                    search_space_id=search_space_id,
                    user_id=user_id,
-                    enable_summary=connector.enable_summary,
-                )
+                    )

                with session.no_autoflush:
                    duplicate = await check_duplicate_document_by_hash(
@ -306,13 +296,8 @@ async def index_linear_issues(

        # ── Pipeline: migrate legacy docs + parallel index ────────────
        await pipeline.migrate_legacy_docs(connector_docs)
-
-        async def _get_llm(s):
-            return await get_user_long_context_llm(s, user_id, search_space_id)
-
        _, documents_indexed, documents_failed = await pipeline.index_batch_parallel(
            connector_docs,
-            _get_llm,
            max_concurrency=3,
            on_heartbeat=on_heartbeat_callback,
            heartbeat_interval=HEARTBEAT_INTERVAL_SECONDS,
--- a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py
@ -33,7 +33,6 @@ from app.db import (
 from app.indexing_pipeline.connector_document import ConnectorDocument
 from app.indexing_pipeline.document_hashing import compute_identifier_hash
 from app.indexing_pipeline.indexing_pipeline_service import IndexingPipelineService
-from app.services.llm_service import get_user_long_context_llm
 from app.services.page_limit_service import PageLimitExceededError, PageLimitService
 from app.services.task_logging_service import TaskLoggingService
 from app.tasks.celery_tasks import get_celery_session_maker
@ -478,7 +477,6 @@ def _build_connector_doc(
    *,
    search_space_id: int,
    user_id: str,
-    enable_summary: bool,
 ) -> ConnectorDocument:
    """Build a ConnectorDocument from a local file's extracted content."""
    unique_id = f"{folder_name}:{relative_path}"
@ -488,7 +486,6 @@ def _build_connector_doc(
        "document_type": "Local Folder File",
        "connector_type": "Local Folder",
    }
-    fallback_summary = f"File: {title}\n\n{content[:4000]}"

    return ConnectorDocument(
        title=title,
@ -498,8 +495,6 @@ def _build_connector_doc(
        search_space_id=search_space_id,
        connector_id=None,
        created_by_id=user_id,
-        should_summarize=enable_summary,
-        fallback_summary=fallback_summary,
        metadata=metadata,
    )

@ -513,7 +508,6 @@ async def index_local_folder(
    exclude_patterns: list[str] | None = None,
    file_extensions: list[str] | None = None,
    root_folder_id: int | None = None,
-    enable_summary: bool = False,
    target_file_paths: list[str] | None = None,
    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, int, int | None, str | None]:
@ -574,8 +568,7 @@ async def index_local_folder(
                        folder_path=folder_path,
                        folder_name=folder_name,
                        target_file_path=target_file_paths[0],
-                        enable_summary=enable_summary,
-                        root_folder_id=root_folder_id,
+                                root_folder_id=root_folder_id,
                        task_logger=task_logger,
                        log_entry=log_entry,
                    )
@ -587,8 +580,7 @@ async def index_local_folder(
                    folder_path=folder_path,
                    folder_name=folder_name,
                    target_file_paths=target_file_paths,
-                    enable_summary=enable_summary,
-                    root_folder_id=root_folder_id,
+                        root_folder_id=root_folder_id,
                    on_progress_callback=on_heartbeat_callback,
                )
                if err:
@ -774,8 +766,7 @@ async def index_local_folder(
                    folder_name=folder_name,
                    search_space_id=search_space_id,
                    user_id=user_id,
-                    enable_summary=enable_summary,
-                )
+                    )
                connector_docs.append(doc)
                file_meta_map[unique_identifier] = {
                    "relative_path": relative_path,
@ -845,15 +836,13 @@ async def index_local_folder(
            doc_map = {compute_unique_identifier_hash(cd): cd for cd in connector_docs}
            documents = await pipeline.prepare_for_indexing(connector_docs)

-            llm = await get_user_long_context_llm(session, user_id, search_space_id)
-
            for document in documents:
                connector_doc = doc_map.get(document.unique_identifier_hash)
                if connector_doc is None:
                    failed_count += 1
                    continue

-                result = await pipeline.index(document, connector_doc, llm)
+                result = await pipeline.index(document, connector_doc)

                if DocumentStatus.is_state(result.status, DocumentStatus.READY):
                    indexed_count += 1
@ -960,7 +949,6 @@ async def _index_batch_files(
    folder_path: str,
    folder_name: str,
    target_file_paths: list[str],
-    enable_summary: bool,
    root_folder_id: int | None,
    on_progress_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, int, str | None]:
@ -995,8 +983,7 @@ async def _index_batch_files(
                        folder_path=folder_path,
                        folder_name=folder_name,
                        target_file_path=file_path,
-                        enable_summary=enable_summary,
-                        root_folder_id=root_folder_id,
+                                root_folder_id=root_folder_id,
                        task_logger=task_logger,
                        log_entry=log_entry,
                    )
@ -1036,7 +1023,6 @@ async def _index_single_file(
    folder_path: str,
    folder_name: str,
    target_file_path: str,
-    enable_summary: bool,
    root_folder_id: int | None,
    task_logger,
    log_entry,
@ -1125,8 +1111,7 @@ async def _index_single_file(
            folder_name=folder_name,
            search_space_id=search_space_id,
            user_id=user_id,
-            enable_summary=enable_summary,
-        )
+            )

        if root_folder_id:
            connector_doc.folder_id = await _resolve_folder_for_file(
@ -1134,7 +1119,6 @@ async def _index_single_file(
            )

        pipeline = IndexingPipelineService(session)
-        llm = await get_user_long_context_llm(session, user_id, search_space_id)
        documents = await pipeline.prepare_for_indexing([connector_doc])

        if not documents:
@ -1142,7 +1126,7 @@ async def _index_single_file(

        db_doc = documents[0]

-        await pipeline.index(db_doc, connector_doc, llm)
+        await pipeline.index(db_doc, connector_doc)

        await session.refresh(db_doc)
        doc_meta = dict(db_doc.document_metadata or {})
@ -1275,7 +1259,6 @@ async def index_uploaded_files(
    user_id: str,
    folder_name: str,
    root_folder_id: int,
-    enable_summary: bool,
    file_mappings: list[dict],
    on_heartbeat_callback: HeartbeatCallbackType | None = None,
    use_vision_llm: bool = False,
@ -1318,7 +1301,6 @@ async def index_uploaded_files(

        page_limit_service = PageLimitService(session)
        pipeline = IndexingPipelineService(session)
-        llm = await get_user_long_context_llm(session, user_id, search_space_id)

        vision_llm_instance = None
        if use_vision_llm:
@ -1414,8 +1396,7 @@ async def index_uploaded_files(
                    folder_name=folder_name,
                    search_space_id=search_space_id,
                    user_id=user_id,
-                    enable_summary=enable_summary,
-                )
+                    )

                connector_doc.folder_id = await _resolve_folder_for_file(
                    session,
@ -1432,7 +1413,7 @@ async def index_uploaded_files(

                db_doc = documents[0]

-                await pipeline.index(db_doc, connector_doc, llm)
+                await pipeline.index(db_doc, connector_doc)

                await session.refresh(db_doc)
                doc_meta = dict(db_doc.document_metadata or {})
--- a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
@ -16,13 +16,11 @@ from sqlalchemy.ext.asyncio import AsyncSession

 from app.connectors.luma_connector import LumaConnector
 from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
-from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
    create_document_chunks,
    embed_text,
    generate_content_hash,
-    generate_document_summary,
    generate_unique_identifier_hash,
 )

@ -437,38 +435,14 @@ async def index_luma_events(
                document.status = DocumentStatus.processing()
                await session.commit()

-                # Heavy processing (LLM, embeddings, chunks)
-                user_llm = await get_user_long_context_llm(
-                    session, user_id, search_space_id
-                )
+                # Heavy processing (embeddings, chunks)

-                if user_llm and connector.enable_summary:
-                    document_metadata_for_summary = {
-                        "event_id": item["event_id"],
-                        "event_name": item["event_name"],
-                        "event_url": item["event_url"],
-                        "start_at": item["start_at"],
-                        "end_at": item["end_at"],
-                        "timezone": item["timezone"],
-                        "location": item["location"] or "No location",
-                        "city": item["city"],
-                        "hosts": item["host_names"],
-                        "document_type": "Luma Event",
-                        "connector_type": "Luma",
-                    }
-                    (
-                        summary_content,
-                        summary_embedding,
-                    ) = await generate_document_summary(
-                        item["event_markdown"], user_llm, document_metadata_for_summary
-                    )
-                else:
-                    summary_content = (
-                        f"Luma Event: {item['event_name']}\n\n{item['event_markdown']}"
-                    )
-                    summary_embedding = await asyncio.to_thread(
-                        embed_text, summary_content
-                    )
+                summary_content = (
+                    f"Luma Event: {item['event_name']}\n\n{item['event_markdown']}"
+                )
+                summary_embedding = await asyncio.to_thread(
+                    embed_text, summary_content
+                )

                chunks = await create_document_chunks(item["event_markdown"])

--- a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py
@ -2,7 +2,7 @@
 Notion connector indexer.

 Uses the shared IndexingPipelineService for document deduplication,
-summarization, chunking, and embedding with bounded parallel indexing.
+chunking, and embedding with bounded parallel indexing.
 """

 from collections.abc import Awaitable, Callable
@ -19,7 +19,6 @@ from app.indexing_pipeline.indexing_pipeline_service import (
    IndexingPipelineService,
    PlaceholderInfo,
 )
-from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.notion_utils import process_blocks

@ -43,7 +42,6 @@ def _build_connector_doc(
    connector_id: int,
    search_space_id: int,
    user_id: str,
-    enable_summary: bool,
 ) -> ConnectorDocument:
    """Map a raw Notion page dict to a ConnectorDocument."""
    page_id = page.get("page_id", "")
@ -57,8 +55,6 @@ def _build_connector_doc(
        "connector_type": "Notion",
    }

-    fallback_summary = f"Notion Page: {page_title}\n\n{markdown_content}"
-
    return ConnectorDocument(
        title=page_title,
        source_markdown=markdown_content,
@ -67,8 +63,6 @@ def _build_connector_doc(
        search_space_id=search_space_id,
        connector_id=connector_id,
        created_by_id=user_id,
-        should_summarize=enable_summary,
-        fallback_summary=fallback_summary,
        metadata=metadata,
    )

@ -314,8 +308,7 @@ async def index_notion_pages(
                    connector_id=connector_id,
                    search_space_id=search_space_id,
                    user_id=user_id,
-                    enable_summary=connector.enable_summary,
-                )
+                    )

                with session.no_autoflush:
                    duplicate = await check_duplicate_document_by_hash(
@ -343,13 +336,8 @@ async def index_notion_pages(

        # ── Pipeline: migrate legacy docs + parallel index ────────────
        await pipeline.migrate_legacy_docs(connector_docs)
-
-        async def _get_llm(s):
-            return await get_user_long_context_llm(s, user_id, search_space_id)
-
        _, documents_indexed, documents_failed = await pipeline.index_batch_parallel(
            connector_docs,
-            _get_llm,
            max_concurrency=3,
            on_heartbeat=on_heartbeat_callback,
            heartbeat_interval=HEARTBEAT_INTERVAL_SECONDS,
--- a/surfsense_backend/app/tasks/connector_indexers/onedrive_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/onedrive_indexer.py
@ -27,7 +27,6 @@ from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnector
 from app.indexing_pipeline.connector_document import ConnectorDocument
 from app.indexing_pipeline.document_hashing import compute_identifier_hash
 from app.indexing_pipeline.indexing_pipeline_service import IndexingPipelineService
-from app.services.llm_service import get_user_long_context_llm
 from app.services.page_limit_service import PageLimitService
 from app.services.task_logging_service import TaskLoggingService
 from app.tasks.connector_indexers.base import (
@ -133,7 +132,6 @@ def _build_connector_doc(
    connector_id: int,
    search_space_id: int,
    user_id: str,
-    enable_summary: bool,
 ) -> ConnectorDocument:
    file_id = file.get("id", "")
    file_name = file.get("name", "Unknown")
@ -145,8 +143,6 @@ def _build_connector_doc(
        "connector_type": "OneDrive",
    }

-    fallback_summary = f"File: {file_name}\n\n{markdown[:4000]}"
-
    return ConnectorDocument(
        title=file_name,
        source_markdown=markdown,
@ -155,8 +151,6 @@ def _build_connector_doc(
        search_space_id=search_space_id,
        connector_id=connector_id,
        created_by_id=user_id,
-        should_summarize=enable_summary,
-        fallback_summary=fallback_summary,
        metadata=metadata,
    )

@ -168,7 +162,6 @@ async def _download_files_parallel(
    connector_id: int,
    search_space_id: int,
    user_id: str,
-    enable_summary: bool,
    max_concurrency: int = 3,
    on_heartbeat: HeartbeatCallbackType | None = None,
    vision_llm=None,
@ -198,7 +191,6 @@ async def _download_files_parallel(
                connector_id=connector_id,
                search_space_id=search_space_id,
                user_id=user_id,
-                enable_summary=enable_summary,
            )
            async with hb_lock:
                completed_count += 1
@ -230,7 +222,6 @@ async def _download_and_index(
    connector_id: int,
    search_space_id: int,
    user_id: str,
-    enable_summary: bool,
    on_heartbeat: HeartbeatCallbackType | None = None,
    vision_llm=None,
 ) -> tuple[int, int]:
@ -241,7 +232,6 @@ async def _download_and_index(
        connector_id=connector_id,
        search_space_id=search_space_id,
        user_id=user_id,
-        enable_summary=enable_summary,
        on_heartbeat=on_heartbeat,
        vision_llm=vision_llm,
    )
@ -250,13 +240,8 @@ async def _download_and_index(
    batch_failed = 0
    if connector_docs:
        pipeline = IndexingPipelineService(session)
-
-        async def _get_llm(s):
-            return await get_user_long_context_llm(s, user_id, search_space_id)
-
        _, batch_indexed, batch_failed = await pipeline.index_batch_parallel(
            connector_docs,
-            _get_llm,
            max_concurrency=3,
            on_heartbeat=on_heartbeat,
        )
@ -294,7 +279,6 @@ async def _index_selected_files(
    connector_id: int,
    search_space_id: int,
    user_id: str,
-    enable_summary: bool,
    on_heartbeat: HeartbeatCallbackType | None = None,
    vision_llm=None,
 ) -> tuple[int, int, int, list[str]]:
@ -345,7 +329,6 @@ async def _index_selected_files(
        connector_id=connector_id,
        search_space_id=search_space_id,
        user_id=user_id,
-        enable_summary=enable_summary,
        on_heartbeat=on_heartbeat,
        vision_llm=vision_llm,
    )
@ -379,7 +362,6 @@ async def _index_full_scan(
    max_files: int,
    include_subfolders: bool = True,
    on_heartbeat_callback: HeartbeatCallbackType | None = None,
-    enable_summary: bool = True,
    vision_llm=None,
 ) -> tuple[int, int, int]:
    """Full scan indexing of a folder.
@ -454,7 +436,6 @@ async def _index_full_scan(
        connector_id=connector_id,
        search_space_id=search_space_id,
        user_id=user_id,
-        enable_summary=enable_summary,
        on_heartbeat=on_heartbeat_callback,
        vision_llm=vision_llm,
    )
@ -487,7 +468,6 @@ async def _index_with_delta_sync(
    log_entry: object,
    max_files: int,
    on_heartbeat_callback: HeartbeatCallbackType | None = None,
-    enable_summary: bool = True,
    vision_llm=None,
 ) -> tuple[int, int, int, str | None]:
    """Delta sync using OneDrive change tracking.
@ -579,7 +559,6 @@ async def _index_with_delta_sync(
        connector_id=connector_id,
        search_space_id=search_space_id,
        user_id=user_id,
-        enable_summary=enable_summary,
        on_heartbeat=on_heartbeat_callback,
        vision_llm=vision_llm,
    )
@ -651,7 +630,6 @@ async def index_onedrive_files(
            )
            return 0, 0, error_msg, 0

-        connector_enable_summary = getattr(connector, "enable_summary", True)
        connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False)
        vision_llm = None
        if connector_enable_vision_llm:
@ -681,7 +659,6 @@ async def index_onedrive_files(
                connector_id=connector_id,
                search_space_id=search_space_id,
                user_id=user_id,
-                enable_summary=connector_enable_summary,
                vision_llm=vision_llm,
            )
            total_indexed += indexed
@ -711,7 +688,6 @@ async def index_onedrive_files(
                    task_logger,
                    log_entry,
                    max_files,
-                    enable_summary=connector_enable_summary,
                    vision_llm=vision_llm,
                )
                total_indexed += indexed
@ -738,7 +714,6 @@ async def index_onedrive_files(
                    log_entry,
                    max_files,
                    include_subfolders,
-                    enable_summary=connector_enable_summary,
                    vision_llm=vision_llm,
                )
                total_indexed += ri
@ -758,7 +733,6 @@ async def index_onedrive_files(
                    log_entry,
                    max_files,
                    include_subfolders,
-                    enable_summary=connector_enable_summary,
                    vision_llm=vision_llm,
                )
                total_indexed += indexed
--- a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py
@ -15,13 +15,11 @@ from sqlalchemy.ext.asyncio import AsyncSession

 from app.connectors.webcrawler_connector import WebCrawlerConnector
 from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
-from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
    create_document_chunks,
    embed_text,
    generate_content_hash,
-    generate_document_summary,
    generate_unique_identifier_hash,
 )
 from app.utils.webcrawler_utils import parse_webcrawler_urls
@ -372,29 +370,10 @@ async def index_crawled_urls(
                        documents_skipped += 1
                        continue

-                # Generate summary with LLM
-                user_llm = await get_user_long_context_llm(
-                    session, user_id, search_space_id
-                )
+                # Select deterministic document content

-                if user_llm and connector.enable_summary:
-                    document_metadata_for_summary = {
-                        "url": url,
-                        "title": title,
-                        "description": description,
-                        "language": language,
-                        "document_type": "Crawled URL",
-                        "crawler_type": crawler_type,
-                    }
-                    (
-                        summary_content,
-                        summary_embedding,
-                    ) = await generate_document_summary(
-                        structured_document, user_llm, document_metadata_for_summary
-                    )
-                else:
-                    summary_content = f"Crawled URL: {title}\n\nURL: {url}\n\n{content}"
-                    summary_embedding = embed_text(summary_content)
+                summary_content = f"Crawled URL: {title}\n\nURL: {url}\n\n{content}"
+                summary_embedding = embed_text(summary_content)

                # Process chunks
                chunks = await create_document_chunks(content)
--- a/surfsense_backend/app/tasks/document_processors/_save.py
+++ b/surfsense_backend/app/tasks/document_processors/_save.py
@ -1,20 +1,15 @@
-"""
-Unified document save/update logic for file processors.
-"""
+"""Unified document save/update logic for file processors."""

-import asyncio
 import logging

 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.asyncio import AsyncSession

 from app.db import Document, DocumentStatus, DocumentType
-from app.services.llm_service import get_user_long_context_llm
 from app.utils.document_converters import (
    create_document_chunks,
    embed_text,
    generate_content_hash,
-    generate_document_summary,
 )

 from ._helpers import (
@ -24,59 +19,6 @@ from ._helpers import (
 )
 from .base import get_current_timestamp, safe_set_chunks

-# ---------------------------------------------------------------------------
-# Summary generation
-# ---------------------------------------------------------------------------
-
-
-async def _generate_summary(
-    markdown_content: str,
-    file_name: str,
-    etl_service: str,
-    user_llm,
-    enable_summary: bool,
-) -> tuple[str, list[float]]:
-    """
-    Generate a document summary and embedding.
-
-    Docling uses its own large-document summary strategy; other ETL services
-    use the standard ``generate_document_summary`` helper.
-    """
-    if not enable_summary:
-        summary = f"File: {file_name}\n\n{markdown_content[:4000]}"
-        return summary, await asyncio.to_thread(embed_text, summary)
-
-    if etl_service == "DOCLING":
-        from app.services.docling_service import create_docling_service
-
-        docling_service = create_docling_service()
-        summary_text = await docling_service.process_large_document_summary(
-            content=markdown_content, llm=user_llm, document_title=file_name
-        )
-
-        meta = {
-            "file_name": file_name,
-            "etl_service": etl_service,
-            "document_type": "File Document",
-        }
-        parts = ["# DOCUMENT METADATA"]
-        for key, value in meta.items():
-            if value:
-                formatted_key = key.replace("_", " ").title()
-                parts.append(f"**{formatted_key}:** {value}")
-
-        enhanced = "\n".join(parts) + "\n\n# DOCUMENT SUMMARY\n\n" + summary_text
-        return enhanced, await asyncio.to_thread(embed_text, enhanced)
-
-    # Standard summary (Unstructured / LlamaCloud / others)
-    meta = {
-        "file_name": file_name,
-        "etl_service": etl_service,
-        "document_type": "File Document",
-    }
-    return await generate_document_summary(markdown_content, user_llm, meta)
-
-
 # ---------------------------------------------------------------------------
 # Unified save function
 # ---------------------------------------------------------------------------
@ -90,7 +32,6 @@ async def save_file_document(
    user_id: str,
    etl_service: str,
    connector: dict | None = None,
-    enable_summary: bool = True,
 ) -> Document | None:
    """
    Process and store a file document with deduplication and migration support.
@ -106,7 +47,6 @@ async def save_file_document(
        user_id: ID of the user
        etl_service: Name of the ETL service (UNSTRUCTURED, LLAMACLOUD, DOCLING)
        connector: Optional connector info for Google Drive files
-        enable_summary: Whether to generate an AI summary

    Returns:
        Document object if successful, None if duplicate detected
@ -133,24 +73,16 @@ async def save_file_document(
            if should_skip:
                return doc

-        user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
-        if not user_llm:
-            raise RuntimeError(
-                f"No long context LLM configured for user {user_id} "
-                f"in search space {search_space_id}"
-            )
-
-        summary_content, summary_embedding = await _generate_summary(
-            markdown_content, file_name, etl_service, user_llm, enable_summary
-        )
+        document_content = f"File: {file_name}\n\n{markdown_content[:4000]}"
+        document_embedding = embed_text(document_content)
        chunks = await create_document_chunks(markdown_content)
        doc_metadata = {"FILE_NAME": file_name, "ETL_SERVICE": etl_service}

        if existing_document:
            existing_document.title = file_name
-            existing_document.content = summary_content
+            existing_document.content = document_content
            existing_document.content_hash = content_hash
-            existing_document.embedding = summary_embedding
+            existing_document.embedding = document_embedding
            existing_document.document_metadata = doc_metadata
            await safe_set_chunks(session, existing_document, chunks)
            existing_document.source_markdown = markdown_content
@ -171,8 +103,8 @@ async def save_file_document(
            title=file_name,
            document_type=doc_type,
            document_metadata=doc_metadata,
-            content=summary_content,
-            embedding=summary_embedding,
+            content=document_content,
+            embedding=document_embedding,
            chunks=chunks,
            content_hash=content_hash,
            unique_identifier_hash=primary_hash,
--- a/surfsense_backend/app/tasks/document_processors/circleback_processor.py
+++ b/surfsense_backend/app/tasks/document_processors/circleback_processor.py
@ -25,11 +25,10 @@ from app.db import (
    SearchSourceConnectorType,
    SearchSpace,
 )
-from app.services.llm_service import get_document_summary_llm
 from app.utils.document_converters import (
    create_document_chunks,
+    embed_text,
    generate_content_hash,
-    generate_document_summary,
    generate_unique_identifier_hash,
 )

@ -176,34 +175,8 @@ async def add_circleback_meeting_document(
        # PHASE 3: Process the document content
        # =======================================================================

-        # Get LLM for generating summary
-        llm = await get_document_summary_llm(session, search_space_id)
-        if not llm:
-            logger.warning(
-                f"No LLM configured for search space {search_space_id}. Using content as summary."
-            )
-            # Use first 1000 chars as summary if no LLM available
-            summary_content = (
-                markdown_content[:1000] + "..."
-                if len(markdown_content) > 1000
-                else markdown_content
-            )
-            summary_embedding = None
-        else:
-            # Generate summary with metadata
-            summary_metadata = {
-                "meeting_name": meeting_name,
-                "meeting_id": meeting_id,
-                "document_type": "Circleback Meeting",
-                **{
-                    k: v
-                    for k, v in metadata.items()
-                    if isinstance(v, str | int | float | bool)
-                },
-            }
-            summary_content, summary_embedding = await generate_document_summary(
-                markdown_content, llm, summary_metadata
-            )
+        summary_content = markdown_content
+        summary_embedding = embed_text(summary_content)

        # Process chunks
        chunks = await create_document_chunks(markdown_content)
@ -224,8 +197,7 @@ async def add_circleback_meeting_document(
        document.title = meeting_name
        document.content = summary_content
        document.content_hash = content_hash
-        if summary_embedding is not None:
-            document.embedding = summary_embedding
+        document.embedding = summary_embedding
        document.document_metadata = document_metadata
        await safe_set_chunks(session, document, chunks)
        document.source_markdown = markdown_content
--- a/surfsense_backend/app/tasks/document_processors/extension_processor.py
+++ b/surfsense_backend/app/tasks/document_processors/extension_processor.py
@ -9,12 +9,11 @@ from sqlalchemy.ext.asyncio import AsyncSession

 from app.db import Document, DocumentType
 from app.schemas import ExtensionDocumentContent
-from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
    create_document_chunks,
+    embed_text,
    generate_content_hash,
-    generate_document_summary,
    generate_unique_identifier_hash,
 )

@ -123,26 +122,8 @@ async def add_extension_received_document(
                    f"Content changed for URL {content.metadata.VisitedWebPageURL}. Updating document."
                )

-        # Get user's long context LLM (needed for both create and update)
-        user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
-        if not user_llm:
-            raise RuntimeError(
-                f"No long context LLM configured for user {user_id} in search space {search_space_id}"
-            )
-
-        # Generate summary with metadata
-        document_metadata = {
-            "session_id": content.metadata.BrowsingSessionId,
-            "url": content.metadata.VisitedWebPageURL,
-            "title": content.metadata.VisitedWebPageTitle,
-            "referrer": content.metadata.VisitedWebPageReffererURL,
-            "timestamp": content.metadata.VisitedWebPageDateWithTimeInISOString,
-            "duration_ms": content.metadata.VisitedWebPageVisitDurationInMilliseconds,
-            "document_type": "Browser Extension Capture",
-        }
-        summary_content, summary_embedding = await generate_document_summary(
-            combined_document_string, user_llm, document_metadata
-        )
+        summary_content = combined_document_string
+        summary_embedding = embed_text(summary_content)

        # Process chunks
        chunks = await create_document_chunks(content.pageContent)
--- a/surfsense_backend/app/tasks/document_processors/file_processors.py
+++ b/surfsense_backend/app/tasks/document_processors/file_processors.py
@ -10,7 +10,7 @@ from __future__ import annotations
 import contextlib
 import logging
 import os
-from dataclasses import dataclass, field
+from dataclasses import dataclass

 from fastapi import HTTPException
 from sqlalchemy.ext.asyncio import AsyncSession
@ -49,12 +49,6 @@ class _ProcessingContext:
    notification: Notification | None = None
    use_vision_llm: bool = False
    processing_mode: str = "basic"
-    enable_summary: bool = field(init=False)
-
-    def __post_init__(self) -> None:
-        self.enable_summary = (
-            self.connector.get("enable_summary", True) if self.connector else True
-        )


 # ---------------------------------------------------------------------------
@ -262,7 +256,6 @@ async def _process_document_upload(ctx: _ProcessingContext) -> Document | None:
        ctx.user_id,
        etl_result.etl_service,
        ctx.connector,
-        enable_summary=ctx.enable_summary,
    )

    if result:
@ -467,7 +460,6 @@ async def process_file_in_background_with_document(
    log_entry: Log,
    connector: dict | None = None,
    notification: Notification | None = None,
-    should_summarize: bool = False,
    use_vision_llm: bool = False,
    processing_mode: str = "basic",
 ) -> Document | None:
@ -483,7 +475,6 @@ async def process_file_in_background_with_document(
    from app.indexing_pipeline.adapters.file_upload_adapter import (
        UploadDocumentAdapter,
    )
-    from app.services.llm_service import get_user_long_context_llm
    from app.utils.document_converters import generate_content_hash

    from .base import check_duplicate_document
@ -523,8 +514,6 @@ async def process_file_in_background_with_document(
                stage="chunking",
            )

-        user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
-
        adapter = UploadDocumentAdapter(session)
        await adapter.index(
            markdown_content=markdown_content,
@ -532,8 +521,6 @@ async def process_file_in_background_with_document(
            etl_service=etl_service,
            search_space_id=search_space_id,
            user_id=user_id,
-            llm=user_llm,
-            should_summarize=should_summarize,
        )

        if billable_pages > 0:
--- a/surfsense_backend/app/tasks/document_processors/markdown_processor.py
+++ b/surfsense_backend/app/tasks/document_processors/markdown_processor.py
@ -8,12 +8,11 @@ from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.asyncio import AsyncSession

 from app.db import Document, DocumentStatus, DocumentType
-from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
    create_document_chunks,
+    embed_text,
    generate_content_hash,
-    generate_document_summary,
 )

 from ._helpers import (
@ -183,21 +182,8 @@ async def add_received_markdown_file_document(
                return doc
            # Content changed - continue to update

-        # Get user's long context LLM (needed for both create and update)
-        user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
-        if not user_llm:
-            raise RuntimeError(
-                f"No long context LLM configured for user {user_id} in search space {search_space_id}"
-            )
-
-        # Generate summary with metadata
-        document_metadata = {
-            "file_name": file_name,
-            "document_type": "Markdown File Document",
-        }
-        summary_content, summary_embedding = await generate_document_summary(
-            file_in_markdown, user_llm, document_metadata
-        )
+        summary_content = f"File: {file_name}\n\n{file_in_markdown[:4000]}"
+        summary_embedding = embed_text(summary_content)

        # Process chunks
        chunks = await create_document_chunks(file_in_markdown)
--- a/surfsense_backend/app/tasks/document_processors/youtube_processor.py
+++ b/surfsense_backend/app/tasks/document_processors/youtube_processor.py
@ -17,12 +17,11 @@ from sqlalchemy.ext.asyncio import AsyncSession
 from youtube_transcript_api import YouTubeTranscriptApi

 from app.db import Document, DocumentStatus, DocumentType
-from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
    create_document_chunks,
+    embed_text,
    generate_content_hash,
-    generate_document_summary,
    generate_unique_identifier_hash,
 )
 from app.utils.proxy_config import get_requests_proxies
@ -355,40 +354,8 @@ async def add_youtube_video_document(
            await session.commit()
            return document

-        # Get LLM for summary generation
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Preparing for summary generation: {video_data.get('title', 'YouTube Video')}",
-            {"stage": "llm_setup"},
-        )
-
-        # Get user's long context LLM
-        user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
-        if not user_llm:
-            raise RuntimeError(
-                f"No long context LLM configured for user {user_id} in search space {search_space_id}"
-            )
-
-        # Generate summary
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Generating summary for video: {video_data.get('title', 'YouTube Video')}",
-            {"stage": "summary_generation"},
-        )
-
-        # Generate summary with metadata
-        document_metadata_for_summary = {
-            "url": url,
-            "video_id": video_id,
-            "title": video_data.get("title", "YouTube Video"),
-            "author": video_data.get("author_name", "Unknown"),
-            "thumbnail": video_data.get("thumbnail_url", ""),
-            "document_type": "YouTube Video Document",
-            "has_transcript": "No captions available" not in transcript_text,
-        }
-        summary_content, summary_embedding = await generate_document_summary(
-            combined_document_string, user_llm, document_metadata_for_summary
-        )
+        summary_content = combined_document_string
+        summary_embedding = embed_text(summary_content)

        # Process chunks
        await task_logger.log_task_progress(
--- a/surfsense_backend/app/utils/document_converters.py
+++ b/surfsense_backend/app/utils/document_converters.py
@ -9,7 +9,6 @@ from litellm import get_model_info, token_counter

 from app.config import config
 from app.db import Chunk, DocumentType
-from app.prompts import SUMMARY_PROMPT_TEMPLATE

 logger = logging.getLogger(__name__)

@ -176,57 +175,6 @@ def optimize_content_for_context_window(
    return optimized_content


-async def generate_document_summary(
-    content: str,
-    user_llm,
-    document_metadata: dict | None = None,
-) -> tuple[str, list[float]]:
-    """
-    Generate summary and embedding for document content with metadata.
-
-    Args:
-        content: Document content
-        user_llm: User's LLM instance
-        document_metadata: Optional metadata dictionary to include in summary
-
-    Returns:
-        Tuple of (enhanced_summary_content, summary_embedding)
-    """
-    # Get model name from user_llm for token counting
-    model_name = getattr(user_llm, "model", "gpt-3.5-turbo")  # Fallback to default
-
-    # Optimize content to fit within context window
-    optimized_content = optimize_content_for_context_window(
-        content, document_metadata, model_name
-    )
-
-    summary_chain = SUMMARY_PROMPT_TEMPLATE | user_llm
-    content_with_metadata = f"<DOCUMENT><DOCUMENT_METADATA>\n\n{document_metadata}\n\n</DOCUMENT_METADATA>\n\n<DOCUMENT_CONTENT>\n\n{optimized_content}\n\n</DOCUMENT_CONTENT></DOCUMENT>"
-    summary_result = await summary_chain.ainvoke({"document": content_with_metadata})
-    summary_content = summary_result.content
-
-    # Combine summary with metadata if provided
-    if document_metadata:
-        metadata_parts = []
-        metadata_parts.append("# DOCUMENT METADATA")
-
-        for key, value in document_metadata.items():
-            if value:  # Only include non-empty values
-                formatted_key = key.replace("_", " ").title()
-                metadata_parts.append(f"**{formatted_key}:** {value}")
-
-        metadata_section = "\n".join(metadata_parts)
-        enhanced_summary_content = (
-            f"{metadata_section}\n\n# DOCUMENT SUMMARY\n\n{summary_content}"
-        )
-    else:
-        enhanced_summary_content = summary_content
-
-    summary_embedding = await asyncio.to_thread(embed_text, enhanced_summary_content)
-
-    return enhanced_summary_content, summary_embedding
-
-
 async def create_document_chunks(content: str) -> list[Chunk]:
    """
    Create chunks from document content.