Make Vision LLM opt-in for uploads and connectors

2026-04-29 10:56:24 +02:00 · 2026-04-10 16:45:51 +02:00 · 2026-04-10 16:45:51 +02:00 · a95bf58c8f
commit a95bf58c8f
parent 0aefcbd504
24 changed files with 276 additions and 20 deletions
--- a/surfsense_backend/alembic/versions/121_add_enable_vision_llm_to_connectors.py
+++ b/surfsense_backend/alembic/versions/121_add_enable_vision_llm_to_connectors.py
@ -0,0 +1,45 @@
 """121_add_enable_vision_llm_to_connectors
 Revision ID: 121
 Revises: 120
 Create Date: 2026-04-09
 Adds enable_vision_llm boolean column to search_source_connectors.
 Defaults to False so vision LLM image processing is opt-in.
 """
 from __future__ import annotations
 from collections.abc import Sequence
 import sqlalchemy as sa
 from alembic import op
 # revision identifiers, used by Alembic.
 revision: str = "121"
 down_revision: str | None = "120"
 branch_labels: str | Sequence[str] | None = None
 depends_on: str | Sequence[str] | None = None
 def upgrade() -> None:
    conn = op.get_bind()
    existing_columns = [
        col["name"] for col in sa.inspect(conn).get_columns("search_source_connectors")
    ]
    if "enable_vision_llm" not in existing_columns:
        op.add_column(
            "search_source_connectors",
            sa.Column(
                "enable_vision_llm",
                sa.Boolean(),
                nullable=False,
                server_default=sa.text("false"),
            ),
        )
 def downgrade() -> None:
    op.drop_column("search_source_connectors", "enable_vision_llm")
--- a/surfsense_backend/app/connectors/dropbox/content_extractor.py
+++ b/surfsense_backend/app/connectors/dropbox/content_extractor.py
@ -44,6 +44,8 @@ async def _export_paper_content(
 async def download_and_extract_content(
    client: DropboxClient,
    file: dict[str, Any],
    *,
    vision_llm=None,
 ) -> tuple[str | None, dict[str, Any], str | None]:
    """Download a Dropbox file and extract its content as markdown.
@ -91,7 +93,7 @@ async def download_and_extract_content(
        from app.etl_pipeline.etl_document import EtlRequest
        from app.etl_pipeline.etl_pipeline_service import EtlPipelineService
-        result = await EtlPipelineService().extract(
+        result = await EtlPipelineService(vision_llm=vision_llm).extract(
            EtlRequest(file_path=temp_file_path, filename=file_name)
        )
        markdown = result.markdown_content
--- a/surfsense_backend/app/connectors/google_drive/content_extractor.py
+++ b/surfsense_backend/app/connectors/google_drive/content_extractor.py
@ -27,6 +27,8 @@ logger = logging.getLogger(__name__)
 async def download_and_extract_content(
    client: GoogleDriveClient,
    file: dict[str, Any],
    *,
    vision_llm=None,
 ) -> tuple[str | None, dict[str, Any], str | None]:
    """Download a Google Drive file and extract its content as markdown.
@ -103,7 +105,9 @@ async def download_and_extract_content(
        etl_filename = (
            file_name + extension if is_google_workspace_file(mime_type) else file_name
        )
-        markdown = await _parse_file_to_markdown(temp_file_path, etl_filename)
+        markdown = await _parse_file_to_markdown(
            temp_file_path, etl_filename, vision_llm=vision_llm
        )
        return markdown, drive_metadata, None
    except Exception as e:
@ -115,12 +119,14 @@ async def download_and_extract_content(
                os.unlink(temp_file_path)
-async def _parse_file_to_markdown(file_path: str, filename: str) -> str:
+async def _parse_file_to_markdown(
    file_path: str, filename: str, *, vision_llm=None
 ) -> str:
    """Parse a local file to markdown using the unified ETL pipeline."""
    from app.etl_pipeline.etl_document import EtlRequest
    from app.etl_pipeline.etl_pipeline_service import EtlPipelineService
-    result = await EtlPipelineService().extract(
+    result = await EtlPipelineService(vision_llm=vision_llm).extract(
        EtlRequest(file_path=file_path, filename=filename)
    )
    return result.markdown_content
--- a/surfsense_backend/app/connectors/onedrive/content_extractor.py
+++ b/surfsense_backend/app/connectors/onedrive/content_extractor.py
@ -16,6 +16,8 @@ logger = logging.getLogger(__name__)
 async def download_and_extract_content(
    client: OneDriveClient,
    file: dict[str, Any],
    *,
    vision_llm=None,
 ) -> tuple[str | None, dict[str, Any], str | None]:
    """Download a OneDrive file and extract its content as markdown.
@ -65,7 +67,9 @@ async def download_and_extract_content(
        if error:
            return None, metadata, error
-        markdown = await _parse_file_to_markdown(temp_file_path, file_name)
+        markdown = await _parse_file_to_markdown(
            temp_file_path, file_name, vision_llm=vision_llm
        )
        return markdown, metadata, None
    except Exception as e:
@ -77,12 +81,14 @@ async def download_and_extract_content(
                os.unlink(temp_file_path)
-async def _parse_file_to_markdown(file_path: str, filename: str) -> str:
+async def _parse_file_to_markdown(
    file_path: str, filename: str, *, vision_llm=None
 ) -> str:
    """Parse a local file to markdown using the unified ETL pipeline."""
    from app.etl_pipeline.etl_document import EtlRequest
    from app.etl_pipeline.etl_pipeline_service import EtlPipelineService
-    result = await EtlPipelineService().extract(
+    result = await EtlPipelineService(vision_llm=vision_llm).extract(
        EtlRequest(file_path=file_path, filename=filename)
    )
    return result.markdown_content
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@ -1555,6 +1555,13 @@ class SearchSourceConnector(BaseModel, TimestampMixin):
        Boolean, nullable=False, default=False, server_default="false"
    )
    # Vision LLM for image files - disabled by default to save cost/time.
    # When enabled, images are described via a vision language model instead
    # of falling back to the document parser.
    enable_vision_llm = Column(
        Boolean, nullable=False, default=False, server_default="false"
    )
    # Periodic indexing fields
    periodic_indexing_enabled = Column(Boolean, nullable=False, default=False)
    indexing_frequency_minutes = Column(Integer, nullable=True)
--- a/surfsense_backend/app/routes/documents_routes.py
+++ b/surfsense_backend/app/routes/documents_routes.py
@ -123,6 +123,7 @@ async def create_documents_file_upload(
    files: list[UploadFile],
    search_space_id: int = Form(...),
    should_summarize: bool = Form(False),
    use_vision_llm: bool = Form(False),
    session: AsyncSession = Depends(get_async_session),
    user: User = Depends(current_active_user),
    dispatcher: TaskDispatcher = Depends(get_task_dispatcher),
@ -272,6 +273,7 @@ async def create_documents_file_upload(
                search_space_id=search_space_id,
                user_id=str(user.id),
                should_summarize=should_summarize,
                use_vision_llm=use_vision_llm,
            )
        return {
@ -1490,6 +1492,7 @@ async def folder_upload(
    relative_paths: str = Form(...),
    root_folder_id: int | None = Form(None),
    enable_summary: bool = Form(False),
    use_vision_llm: bool = Form(False),
    session: AsyncSession = Depends(get_async_session),
    user: User = Depends(current_active_user),
 ):
@ -1616,6 +1619,7 @@ async def folder_upload(
        folder_name=folder_name,
        root_folder_id=root_folder_id,
        enable_summary=enable_summary,
        use_vision_llm=use_vision_llm,
        file_mappings=list(file_mappings),
    )
--- a/surfsense_backend/app/schemas/search_source_connector.py
+++ b/surfsense_backend/app/schemas/search_source_connector.py
@ -17,6 +17,7 @@ class SearchSourceConnectorBase(BaseModel):
    last_indexed_at: datetime | None = None
    config: dict[str, Any]
    enable_summary: bool = False
    enable_vision_llm: bool = False
    periodic_indexing_enabled: bool = False
    indexing_frequency_minutes: int | None = None
    next_scheduled_at: datetime | None = None
@ -67,6 +68,7 @@ class SearchSourceConnectorUpdate(BaseModel):
    last_indexed_at: datetime | None = None
    config: dict[str, Any] | None = None
    enable_summary: bool | None = None
    enable_vision_llm: bool | None = None
    periodic_indexing_enabled: bool | None = None
    indexing_frequency_minutes: int | None = None
    next_scheduled_at: datetime | None = None
--- a/surfsense_backend/app/services/task_dispatcher.py
+++ b/surfsense_backend/app/services/task_dispatcher.py
@ -19,6 +19,7 @@ class TaskDispatcher(Protocol):
        search_space_id: int,
        user_id: str,
        should_summarize: bool = False,
        use_vision_llm: bool = False,
    ) -> None: ...
@ -34,6 +35,7 @@ class CeleryTaskDispatcher:
        search_space_id: int,
        user_id: str,
        should_summarize: bool = False,
        use_vision_llm: bool = False,
    ) -> None:
        from app.tasks.celery_tasks.document_tasks import (
            process_file_upload_with_document_task,
@ -46,6 +48,7 @@ class CeleryTaskDispatcher:
            search_space_id=search_space_id,
            user_id=user_id,
            should_summarize=should_summarize,
            use_vision_llm=use_vision_llm,
        )
--- a/surfsense_backend/app/tasks/celery_tasks/document_tasks.py
+++ b/surfsense_backend/app/tasks/celery_tasks/document_tasks.py
@ -778,6 +778,7 @@ def process_file_upload_with_document_task(
    search_space_id: int,
    user_id: str,
    should_summarize: bool = False,
    use_vision_llm: bool = False,
 ):
    """
    Celery task to process uploaded file with existing pending document.
@ -833,6 +834,7 @@ def process_file_upload_with_document_task(
                search_space_id,
                user_id,
                should_summarize=should_summarize,
                use_vision_llm=use_vision_llm,
            )
        )
        logger.info(
@ -869,6 +871,7 @@ async def _process_file_with_document(
    search_space_id: int,
    user_id: str,
    should_summarize: bool = False,
    use_vision_llm: bool = False,
 ):
    """
    Process file and update existing pending document status.
@ -971,6 +974,7 @@ async def _process_file_with_document(
                log_entry=log_entry,
                notification=notification,
                should_summarize=should_summarize,
                use_vision_llm=use_vision_llm,
            )
            # Update notification on success
@ -1428,6 +1432,7 @@ def index_uploaded_folder_files_task(
    root_folder_id: int,
    enable_summary: bool,
    file_mappings: list[dict],
    use_vision_llm: bool = False,
 ):
    """Celery task to index files uploaded from the desktop app."""
    loop = asyncio.new_event_loop()
@ -1441,6 +1446,7 @@ def index_uploaded_folder_files_task(
                root_folder_id=root_folder_id,
                enable_summary=enable_summary,
                file_mappings=file_mappings,
                use_vision_llm=use_vision_llm,
            )
        )
    finally:
@ -1454,6 +1460,7 @@ async def _index_uploaded_folder_files_async(
    root_folder_id: int,
    enable_summary: bool,
    file_mappings: list[dict],
    use_vision_llm: bool = False,
 ):
    """Run upload-based folder indexing with notification + heartbeat."""
    file_count = len(file_mappings)
@ -1503,6 +1510,7 @@ async def _index_uploaded_folder_files_async(
                enable_summary=enable_summary,
                file_mappings=file_mappings,
                on_heartbeat_callback=_heartbeat_progress,
                use_vision_llm=use_vision_llm,
            )
            if notification:
--- a/surfsense_backend/app/tasks/connector_indexers/dropbox_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/dropbox_indexer.py
@ -164,6 +164,7 @@ async def _download_files_parallel(
    enable_summary: bool,
    max_concurrency: int = 3,
    on_heartbeat: HeartbeatCallbackType | None = None,
    vision_llm=None,
 ) -> tuple[list[ConnectorDocument], int]:
    """Download and ETL files in parallel. Returns (docs, failed_count)."""
    results: list[ConnectorDocument] = []
@ -176,7 +177,7 @@ async def _download_files_parallel(
        nonlocal last_heartbeat, completed_count
        async with sem:
            markdown, db_metadata, error = await download_and_extract_content(
-                dropbox_client, file
+                dropbox_client, file, vision_llm=vision_llm
            )
            if error or not markdown:
                file_name = file.get("name", "Unknown")
@ -224,6 +225,7 @@ async def _download_and_index(
    user_id: str,
    enable_summary: bool,
    on_heartbeat: HeartbeatCallbackType | None = None,
    vision_llm=None,
 ) -> tuple[int, int]:
    """Parallel download then parallel indexing. Returns (batch_indexed, total_failed)."""
    connector_docs, download_failed = await _download_files_parallel(
@ -234,6 +236,7 @@ async def _download_and_index(
        user_id=user_id,
        enable_summary=enable_summary,
        on_heartbeat=on_heartbeat,
        vision_llm=vision_llm,
    )
    batch_indexed = 0
@ -287,6 +290,7 @@ async def _index_with_delta_sync(
    max_files: int,
    on_heartbeat_callback: HeartbeatCallbackType | None = None,
    enable_summary: bool = True,
    vision_llm=None,
 ) -> tuple[int, int, int, str]:
    """Delta sync using Dropbox cursor-based change tracking.
@ -359,6 +363,7 @@ async def _index_with_delta_sync(
        user_id=user_id,
        enable_summary=enable_summary,
        on_heartbeat=on_heartbeat_callback,
        vision_llm=vision_llm,
    )
    indexed = renamed_count + batch_indexed
@ -384,6 +389,7 @@ async def _index_full_scan(
    incremental_sync: bool = True,
    on_heartbeat_callback: HeartbeatCallbackType | None = None,
    enable_summary: bool = True,
    vision_llm=None,
 ) -> tuple[int, int, int]:
    """Full scan indexing of a folder.
@ -469,6 +475,7 @@ async def _index_full_scan(
        user_id=user_id,
        enable_summary=enable_summary,
        on_heartbeat=on_heartbeat_callback,
        vision_llm=vision_llm,
    )
    if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
@ -498,6 +505,7 @@ async def _index_selected_files(
    enable_summary: bool,
    incremental_sync: bool = True,
    on_heartbeat: HeartbeatCallbackType | None = None,
    vision_llm=None,
 ) -> tuple[int, int, int, list[str]]:
    """Index user-selected files using the parallel pipeline."""
    page_limit_service = PageLimitService(session)
@ -557,6 +565,7 @@ async def _index_selected_files(
        user_id=user_id,
        enable_summary=enable_summary,
        on_heartbeat=on_heartbeat,
        vision_llm=vision_llm,
    )
    if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
@ -621,6 +630,13 @@ async def index_dropbox_files(
            return 0, 0, error_msg, 0
        connector_enable_summary = getattr(connector, "enable_summary", True)
        connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False)
        vision_llm = None
        if connector_enable_vision_llm:
            from app.services.llm_service import get_vision_llm
            vision_llm = await get_vision_llm(session, search_space_id)
        dropbox_client = DropboxClient(session, connector_id)
        indexing_options = items_dict.get("indexing_options", {})
@ -650,6 +666,7 @@ async def index_dropbox_files(
                user_id=user_id,
                enable_summary=connector_enable_summary,
                incremental_sync=incremental_sync,
                vision_llm=vision_llm,
            )
            total_indexed += indexed
            total_skipped += skipped
@ -684,6 +701,7 @@ async def index_dropbox_files(
                    log_entry,
                    max_files,
                    enable_summary=connector_enable_summary,
                    vision_llm=vision_llm,
                )
                folder_cursors[folder_path] = new_cursor
                total_unsupported += unsup
@ -703,6 +721,7 @@ async def index_dropbox_files(
                    include_subfolders,
                    incremental_sync=incremental_sync,
                    enable_summary=connector_enable_summary,
                    vision_llm=vision_llm,
                )
                total_unsupported += unsup
--- a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
@ -261,6 +261,7 @@ async def _download_files_parallel(
    enable_summary: bool,
    max_concurrency: int = 3,
    on_heartbeat: HeartbeatCallbackType | None = None,
    vision_llm=None,
 ) -> tuple[list[ConnectorDocument], int]:
    """Download and ETL files in parallel, returning ConnectorDocuments.
@ -276,7 +277,7 @@ async def _download_files_parallel(
        nonlocal last_heartbeat, completed_count
        async with sem:
            markdown, drive_metadata, error = await download_and_extract_content(
-                drive_client, file
+                drive_client, file, vision_llm=vision_llm
            )
            if error or not markdown:
                file_name = file.get("name", "Unknown")
@ -322,6 +323,7 @@ async def _process_single_file(
    search_space_id: int,
    user_id: str,
    enable_summary: bool = True,
    vision_llm=None,
 ) -> tuple[int, int, int]:
    """Download, extract, and index a single Drive file via the pipeline.
@ -343,7 +345,7 @@ async def _process_single_file(
        await page_limit_service.check_page_limit(user_id, estimated_pages)
        markdown, drive_metadata, error = await download_and_extract_content(
-            drive_client, file
+            drive_client, file, vision_llm=vision_llm
        )
        if error or not markdown:
            logger.warning(f"ETL failed for {file_name}: {error}")
@ -433,6 +435,7 @@ async def _download_and_index(
    user_id: str,
    enable_summary: bool,
    on_heartbeat: HeartbeatCallbackType | None = None,
    vision_llm=None,
 ) -> tuple[int, int]:
    """Phase 2+3: parallel download then parallel indexing.
@ -446,6 +449,7 @@ async def _download_and_index(
        user_id=user_id,
        enable_summary=enable_summary,
        on_heartbeat=on_heartbeat,
        vision_llm=vision_llm,
    )
    batch_indexed = 0
@ -476,6 +480,7 @@ async def _index_selected_files(
    user_id: str,
    enable_summary: bool,
    on_heartbeat: HeartbeatCallbackType | None = None,
    vision_llm=None,
 ) -> tuple[int, int, int, list[str]]:
    """Index user-selected files using the parallel pipeline.
@ -540,6 +545,7 @@ async def _index_selected_files(
        user_id=user_id,
        enable_summary=enable_summary,
        on_heartbeat=on_heartbeat,
        vision_llm=vision_llm,
    )
    if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
@ -573,6 +579,7 @@ async def _index_full_scan(
    include_subfolders: bool = False,
    on_heartbeat_callback: HeartbeatCallbackType | None = None,
    enable_summary: bool = True,
    vision_llm=None,
 ) -> tuple[int, int, int]:
    """Full scan indexing of a folder.
@ -703,6 +710,7 @@ async def _index_full_scan(
        user_id=user_id,
        enable_summary=enable_summary,
        on_heartbeat=on_heartbeat_callback,
        vision_llm=vision_llm,
    )
    if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
@ -736,6 +744,7 @@ async def _index_with_delta_sync(
    include_subfolders: bool = False,
    on_heartbeat_callback: HeartbeatCallbackType | None = None,
    enable_summary: bool = True,
    vision_llm=None,
 ) -> tuple[int, int, int]:
    """Delta sync using change tracking.
@ -844,6 +853,7 @@ async def _index_with_delta_sync(
        user_id=user_id,
        enable_summary=enable_summary,
        on_heartbeat=on_heartbeat_callback,
        vision_llm=vision_llm,
    )
    if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
@ -947,6 +957,11 @@ async def index_google_drive_files(
                )
        connector_enable_summary = getattr(connector, "enable_summary", True)
        connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False)
        vision_llm = None
        if connector_enable_vision_llm:
            from app.services.llm_service import get_vision_llm
            vision_llm = await get_vision_llm(session, search_space_id)
        drive_client = GoogleDriveClient(
            session, connector_id, credentials=pre_built_credentials
        )
@ -986,6 +1001,7 @@ async def index_google_drive_files(
                include_subfolders,
                on_heartbeat_callback,
                connector_enable_summary,
                vision_llm=vision_llm,
            )
            documents_unsupported += du
            logger.info("Running reconciliation scan after delta sync")
@ -1004,6 +1020,7 @@ async def index_google_drive_files(
                include_subfolders,
                on_heartbeat_callback,
                connector_enable_summary,
                vision_llm=vision_llm,
            )
            documents_indexed += ri
            documents_skipped += rs
@ -1029,6 +1046,7 @@ async def index_google_drive_files(
                include_subfolders,
                on_heartbeat_callback,
                connector_enable_summary,
                vision_llm=vision_llm,
            )
        if documents_indexed > 0 or can_use_delta:
@ -1146,6 +1164,11 @@ async def index_google_drive_single_file(
                )
        connector_enable_summary = getattr(connector, "enable_summary", True)
        connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False)
        vision_llm = None
        if connector_enable_vision_llm:
            from app.services.llm_service import get_vision_llm
            vision_llm = await get_vision_llm(session, search_space_id)
        drive_client = GoogleDriveClient(
            session, connector_id, credentials=pre_built_credentials
        )
@ -1168,6 +1191,7 @@ async def index_google_drive_single_file(
            search_space_id,
            user_id,
            connector_enable_summary,
            vision_llm=vision_llm,
        )
        await session.commit()
@ -1278,6 +1302,11 @@ async def index_google_drive_selected_files(
                return 0, 0, [error_msg]
        connector_enable_summary = getattr(connector, "enable_summary", True)
        connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False)
        vision_llm = None
        if connector_enable_vision_llm:
            from app.services.llm_service import get_vision_llm
            vision_llm = await get_vision_llm(session, search_space_id)
        drive_client = GoogleDriveClient(
            session, connector_id, credentials=pre_built_credentials
        )
@ -1291,6 +1320,7 @@ async def index_google_drive_selected_files(
            user_id=user_id,
            enable_summary=connector_enable_summary,
            on_heartbeat=on_heartbeat_callback,
            vision_llm=vision_llm,
        )
        if unsupported > 0:
--- a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py
@ -153,7 +153,7 @@ def scan_folder(
    return files
-async def _read_file_content(file_path: str, filename: str) -> str:
+async def _read_file_content(file_path: str, filename: str, *, vision_llm=None) -> str:
    """Read file content via the unified ETL pipeline.
    All file types (plaintext, audio, direct-convert, document, image) are
@ -162,7 +162,7 @@ async def _read_file_content(file_path: str, filename: str) -> str:
    from app.etl_pipeline.etl_document import EtlRequest
    from app.etl_pipeline.etl_pipeline_service import EtlPipelineService
-    result = await EtlPipelineService().extract(
+    result = await EtlPipelineService(vision_llm=vision_llm).extract(
        EtlRequest(file_path=file_path, filename=filename)
    )
    return result.markdown_content
@ -199,12 +199,14 @@ async def _compute_file_content_hash(
    file_path: str,
    filename: str,
    search_space_id: int,
    *,
    vision_llm=None,
 ) -> tuple[str, str]:
    """Read a file (via ETL if needed) and compute its content hash.
    Returns (content_text, content_hash).
    """
-    content = await _read_file_content(file_path, filename)
+    content = await _read_file_content(file_path, filename, vision_llm=vision_llm)
    return content, _content_hash(content, search_space_id)
@ -1268,6 +1270,7 @@ async def index_uploaded_files(
    enable_summary: bool,
    file_mappings: list[dict],
    on_heartbeat_callback: HeartbeatCallbackType | None = None,
    use_vision_llm: bool = False,
 ) -> tuple[int, int, str | None]:
    """Index files uploaded from the desktop app via temp paths.
@ -1304,6 +1307,12 @@ async def index_uploaded_files(
        pipeline = IndexingPipelineService(session)
        llm = await get_user_long_context_llm(session, user_id, search_space_id)
        vision_llm_instance = None
        if use_vision_llm:
            from app.services.llm_service import get_vision_llm
            vision_llm_instance = await get_vision_llm(session, search_space_id)
        indexed_count = 0
        failed_count = 0
        errors: list[str] = []
@ -1351,7 +1360,8 @@ async def index_uploaded_files(
                try:
                    content, content_hash = await _compute_file_content_hash(
-                        temp_path, filename, search_space_id
+                        temp_path, filename, search_space_id,
                        vision_llm=vision_llm_instance,
                    )
                except Exception as e:
                    logger.warning(f"Could not read {relative_path}: {e}")
--- a/surfsense_backend/app/tasks/connector_indexers/onedrive_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/onedrive_indexer.py
@ -171,6 +171,7 @@ async def _download_files_parallel(
    enable_summary: bool,
    max_concurrency: int = 3,
    on_heartbeat: HeartbeatCallbackType | None = None,
    vision_llm=None,
 ) -> tuple[list[ConnectorDocument], int]:
    """Download and ETL files in parallel. Returns (docs, failed_count)."""
    results: list[ConnectorDocument] = []
@ -183,7 +184,7 @@ async def _download_files_parallel(
        nonlocal last_heartbeat, completed_count
        async with sem:
            markdown, od_metadata, error = await download_and_extract_content(
-                onedrive_client, file
+                onedrive_client, file, vision_llm=vision_llm
            )
            if error or not markdown:
                file_name = file.get("name", "Unknown")
@ -231,6 +232,7 @@ async def _download_and_index(
    user_id: str,
    enable_summary: bool,
    on_heartbeat: HeartbeatCallbackType | None = None,
    vision_llm=None,
 ) -> tuple[int, int]:
    """Parallel download then parallel indexing. Returns (batch_indexed, total_failed)."""
    connector_docs, download_failed = await _download_files_parallel(
@ -241,6 +243,7 @@ async def _download_and_index(
        user_id=user_id,
        enable_summary=enable_summary,
        on_heartbeat=on_heartbeat,
        vision_llm=vision_llm,
    )
    batch_indexed = 0
@ -293,6 +296,7 @@ async def _index_selected_files(
    user_id: str,
    enable_summary: bool,
    on_heartbeat: HeartbeatCallbackType | None = None,
    vision_llm=None,
 ) -> tuple[int, int, int, list[str]]:
    """Index user-selected files using the parallel pipeline."""
    page_limit_service = PageLimitService(session)
@ -343,6 +347,7 @@ async def _index_selected_files(
        user_id=user_id,
        enable_summary=enable_summary,
        on_heartbeat=on_heartbeat,
        vision_llm=vision_llm,
    )
    if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
@ -375,6 +380,7 @@ async def _index_full_scan(
    include_subfolders: bool = True,
    on_heartbeat_callback: HeartbeatCallbackType | None = None,
    enable_summary: bool = True,
    vision_llm=None,
 ) -> tuple[int, int, int]:
    """Full scan indexing of a folder.
@ -450,6 +456,7 @@ async def _index_full_scan(
        user_id=user_id,
        enable_summary=enable_summary,
        on_heartbeat=on_heartbeat_callback,
        vision_llm=vision_llm,
    )
    if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
@ -481,6 +488,7 @@ async def _index_with_delta_sync(
    max_files: int,
    on_heartbeat_callback: HeartbeatCallbackType | None = None,
    enable_summary: bool = True,
    vision_llm=None,
 ) -> tuple[int, int, int, str | None]:
    """Delta sync using OneDrive change tracking.
@ -573,6 +581,7 @@ async def _index_with_delta_sync(
        user_id=user_id,
        enable_summary=enable_summary,
        on_heartbeat=on_heartbeat_callback,
        vision_llm=vision_llm,
    )
    if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
@ -643,6 +652,12 @@ async def index_onedrive_files(
            return 0, 0, error_msg, 0
        connector_enable_summary = getattr(connector, "enable_summary", True)
        connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False)
        vision_llm = None
        if connector_enable_vision_llm:
            from app.services.llm_service import get_vision_llm
            vision_llm = await get_vision_llm(session, search_space_id)
        onedrive_client = OneDriveClient(session, connector_id)
        indexing_options = items_dict.get("indexing_options", {})
@ -666,6 +681,7 @@ async def index_onedrive_files(
                search_space_id=search_space_id,
                user_id=user_id,
                enable_summary=connector_enable_summary,
                vision_llm=vision_llm,
            )
            total_indexed += indexed
            total_skipped += skipped
@ -695,6 +711,7 @@ async def index_onedrive_files(
                    log_entry,
                    max_files,
                    enable_summary=connector_enable_summary,
                    vision_llm=vision_llm,
                )
                total_indexed += indexed
                total_skipped += skipped
@ -721,6 +738,7 @@ async def index_onedrive_files(
                    max_files,
                    include_subfolders,
                    enable_summary=connector_enable_summary,
                    vision_llm=vision_llm,
                )
                total_indexed += ri
                total_skipped += rs
@ -740,6 +758,7 @@ async def index_onedrive_files(
                    max_files,
                    include_subfolders,
                    enable_summary=connector_enable_summary,
                    vision_llm=vision_llm,
                )
                total_indexed += indexed
                total_skipped += skipped
--- a/surfsense_backend/app/tasks/document_processors/file_processors.py
+++ b/surfsense_backend/app/tasks/document_processors/file_processors.py
@ -46,6 +46,7 @@ class _ProcessingContext:
    log_entry: Log
    connector: dict | None = None
    notification: Notification | None = None
    use_vision_llm: bool = False
    enable_summary: bool = field(init=False)
    def __post_init__(self) -> None:
@ -134,7 +135,7 @@ async def _process_non_document_upload(ctx: _ProcessingContext) -> Document | No
    )
    vision_llm = None
-    if etl_classify(ctx.filename) == FileCategory.IMAGE:
+    if ctx.use_vision_llm and etl_classify(ctx.filename) == FileCategory.IMAGE:
        from app.services.llm_service import get_vision_llm
        vision_llm = await get_vision_llm(ctx.session, ctx.search_space_id)
@ -288,6 +289,7 @@ async def process_file_in_background(
    log_entry: Log,
    connector: dict | None = None,
    notification: Notification | None = None,
    use_vision_llm: bool = False,
 ) -> Document | None:
    ctx = _ProcessingContext(
        session=session,
@ -299,6 +301,7 @@ async def process_file_in_background(
        log_entry=log_entry,
        connector=connector,
        notification=notification,
        use_vision_llm=use_vision_llm,
    )
    try:
@ -349,6 +352,7 @@ async def _extract_file_content(
    task_logger: TaskLoggingService,
    log_entry: Log,
    notification: Notification | None,
    use_vision_llm: bool = False,
 ) -> tuple[str, str]:
    """
    Extract markdown content from a file regardless of type.
@ -396,7 +400,7 @@ async def _extract_file_content(
        await page_limit_service.check_page_limit(user_id, estimated_pages)
    vision_llm = None
-    if category == FileCategory.IMAGE:
+    if use_vision_llm and category == FileCategory.IMAGE:
        from app.services.llm_service import get_vision_llm
        vision_llm = await get_vision_llm(session, search_space_id)
@ -435,6 +439,7 @@ async def process_file_in_background_with_document(
    connector: dict | None = None,
    notification: Notification | None = None,
    should_summarize: bool = False,
    use_vision_llm: bool = False,
 ) -> Document | None:
    """
    Process file and update existing pending document (2-phase pattern).
@ -463,6 +468,7 @@ async def process_file_in_background_with_document(
            task_logger,
            log_entry,
            notification,
            use_vision_llm=use_vision_llm,
        )
        if not markdown_content:
--- a/surfsense_backend/tests/integration/document_upload/conftest.py
+++ b/surfsense_backend/tests/integration/document_upload/conftest.py
@ -69,6 +69,7 @@ class InlineTaskDispatcher:
        search_space_id: int,
        user_id: str,
        should_summarize: bool = False,
        use_vision_llm: bool = False,
    ) -> None:
        from app.tasks.celery_tasks.document_tasks import (
            _process_file_with_document,
@ -82,6 +83,7 @@ class InlineTaskDispatcher:
                search_space_id,
                user_id,
                should_summarize=should_summarize,
                use_vision_llm=use_vision_llm,
            )
--- a/surfsense_web/components/assistant-ui/connector-popup.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup.tsx
@ -98,6 +98,7 @@ export const ConnectorIndicator = forwardRef<ConnectorIndicatorHandle, Connector
 			periodicEnabled,
 			frequencyMinutes,
 			enableSummary,
 			enableVisionLlm,
 			allConnectors,
 			viewingAccountsType,
 			viewingMCPList,
@ -109,6 +110,7 @@ export const ConnectorIndicator = forwardRef<ConnectorIndicatorHandle, Connector
 			setPeriodicEnabled,
 			setFrequencyMinutes,
 			setEnableSummary,
 			setEnableVisionLlm,
 			handleOpenChange,
 			handleTabChange,
 			handleScroll,
@ -279,6 +281,7 @@ export const ConnectorIndicator = forwardRef<ConnectorIndicatorHandle, Connector
 							periodicEnabled={periodicEnabled}
 							frequencyMinutes={frequencyMinutes}
 							enableSummary={enableSummary}
 							enableVisionLlm={enableVisionLlm}
 							isSaving={isSaving}
 							isDisconnecting={isDisconnecting}
 							isIndexing={indexingConnectorIds.has(editingConnector.id)}
@ -288,6 +291,7 @@ export const ConnectorIndicator = forwardRef<ConnectorIndicatorHandle, Connector
 							onPeriodicEnabledChange={setPeriodicEnabled}
 							onFrequencyChange={setFrequencyMinutes}
 							onEnableSummaryChange={setEnableSummary}
 							onEnableVisionLlmChange={setEnableVisionLlm}
 							onSave={() => {
 								startIndexing(editingConnector.id);
 								handleSaveConnector(() => refreshConnectors());
@ -336,6 +340,7 @@ export const ConnectorIndicator = forwardRef<ConnectorIndicatorHandle, Connector
 							periodicEnabled={periodicEnabled}
 							frequencyMinutes={frequencyMinutes}
 							enableSummary={enableSummary}
 							enableVisionLlm={enableVisionLlm}
 							isStartingIndexing={isStartingIndexing}
 							isFromOAuth={isFromOAuth}
 							onStartDateChange={setStartDate}
@ -343,6 +348,7 @@ export const ConnectorIndicator = forwardRef<ConnectorIndicatorHandle, Connector
 							onPeriodicEnabledChange={setPeriodicEnabled}
 							onFrequencyChange={setFrequencyMinutes}
 							onEnableSummaryChange={setEnableSummary}
 							onEnableVisionLlmChange={setEnableVisionLlm}
 							onConfigChange={setIndexingConnectorConfig}
 							onStartIndexing={() => {
 								if (indexingConfig.connectorId) {
--- a/surfsense_web/components/assistant-ui/connector-popup/components/vision-llm-config.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/components/vision-llm-config.tsx
@ -0,0 +1,25 @@
 "use client";
 import type { FC } from "react";
 import { Switch } from "@/components/ui/switch";
 interface VisionLLMConfigProps {
 	enabled: boolean;
 	onEnabledChange: (enabled: boolean) => void;
 }
 export const VisionLLMConfig: FC<VisionLLMConfigProps> = ({ enabled, onEnabledChange }) => {
 	return (
 		<div className="rounded-xl bg-slate-400/5 dark:bg-white/5 p-3 sm:p-6">
 			<div className="flex items-center justify-between">
 				<div className="space-y-1">
 					<h3 className="font-medium text-sm sm:text-base">Enable Vision LLM</h3>
 					<p className="text-xs sm:text-sm text-muted-foreground">
 						Describes images using AI vision (costly, slower)
 					</p>
 				</div>
 				<Switch checked={enabled} onCheckedChange={onEnabledChange} />
 			</div>
 		</div>
 	);
 };
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
@ -15,6 +15,7 @@ import { cn } from "@/lib/utils";
 import { DateRangeSelector } from "../../components/date-range-selector";
 import { PeriodicSyncConfig } from "../../components/periodic-sync-config";
 import { SummaryConfig } from "../../components/summary-config";
 import { VisionLLMConfig } from "../../components/vision-llm-config";
 import { getConnectorDisplayName } from "../../tabs/all-connectors-tab";
 import { getConnectorConfigComponent } from "../index";
@ -38,6 +39,7 @@ interface ConnectorEditViewProps {
 	periodicEnabled: boolean;
 	frequencyMinutes: string;
 	enableSummary: boolean;
 	enableVisionLlm: boolean;
 	isSaving: boolean;
 	isDisconnecting: boolean;
 	isIndexing?: boolean;
@ -47,6 +49,7 @@ interface ConnectorEditViewProps {
 	onPeriodicEnabledChange: (enabled: boolean) => void;
 	onFrequencyChange: (frequency: string) => void;
 	onEnableSummaryChange: (enabled: boolean) => void;
 	onEnableVisionLlmChange: (enabled: boolean) => void;
 	onSave: () => void;
 	onDisconnect: () => void;
 	onBack: () => void;
@ -62,6 +65,7 @@ export const ConnectorEditView: FC<ConnectorEditViewProps> = ({
 	periodicEnabled,
 	frequencyMinutes,
 	enableSummary,
 	enableVisionLlm,
 	isSaving,
 	isDisconnecting,
 	isIndexing = false,
@ -71,6 +75,7 @@ export const ConnectorEditView: FC<ConnectorEditViewProps> = ({
 	onPeriodicEnabledChange,
 	onFrequencyChange,
 	onEnableSummaryChange,
 	onEnableVisionLlmChange,
 	onSave,
 	onDisconnect,
 	onBack,
@ -272,6 +277,14 @@ export const ConnectorEditView: FC<ConnectorEditViewProps> = ({
 								{/* AI Summary toggle */}
 								<SummaryConfig enabled={enableSummary} onEnabledChange={onEnableSummaryChange} />
 								{/* Vision LLM toggle - only for file-based connectors */}
 								{(connector.connector_type === "GOOGLE_DRIVE_CONNECTOR" ||
 									connector.connector_type === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" ||
 									connector.connector_type === "DROPBOX_CONNECTOR" ||
 									connector.connector_type === "ONEDRIVE_CONNECTOR") && (
 									<VisionLLMConfig enabled={enableVisionLlm} onEnabledChange={onEnableVisionLlmChange} />
 								)}
 								{/* Date range selector - not shown for file-based connectors (Drive, Dropbox, OneDrive), Webcrawler, GitHub, or Local Folder */}
 								{connector.connector_type !== "GOOGLE_DRIVE_CONNECTOR" &&
 									connector.connector_type !== "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" &&
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx
@ -10,6 +10,7 @@ import { cn } from "@/lib/utils";
 import { DateRangeSelector } from "../../components/date-range-selector";
 import { PeriodicSyncConfig } from "../../components/periodic-sync-config";
 import { SummaryConfig } from "../../components/summary-config";
 import { VisionLLMConfig } from "../../components/vision-llm-config";
 import type { IndexingConfigState } from "../../constants/connector-constants";
 import { getConnectorDisplayName } from "../../tabs/all-connectors-tab";
 import { getConnectorConfigComponent } from "../index";
@ -22,6 +23,7 @@ interface IndexingConfigurationViewProps {
 	periodicEnabled: boolean;
 	frequencyMinutes: string;
 	enableSummary: boolean;
 	enableVisionLlm: boolean;
 	isStartingIndexing: boolean;
 	isFromOAuth?: boolean;
 	onStartDateChange: (date: Date | undefined) => void;
@ -29,6 +31,7 @@ interface IndexingConfigurationViewProps {
 	onPeriodicEnabledChange: (enabled: boolean) => void;
 	onFrequencyChange: (frequency: string) => void;
 	onEnableSummaryChange: (enabled: boolean) => void;
 	onEnableVisionLlmChange: (enabled: boolean) => void;
 	onConfigChange?: (config: Record<string, unknown>) => void;
 	onStartIndexing: () => void;
 	onSkip: () => void;
@ -42,6 +45,7 @@ export const IndexingConfigurationView: FC<IndexingConfigurationViewProps> = ({
 	periodicEnabled,
 	frequencyMinutes,
 	enableSummary,
 	enableVisionLlm,
 	isStartingIndexing,
 	isFromOAuth = false,
 	onStartDateChange,
@ -49,6 +53,7 @@ export const IndexingConfigurationView: FC<IndexingConfigurationViewProps> = ({
 	onPeriodicEnabledChange,
 	onFrequencyChange,
 	onEnableSummaryChange,
 	onEnableVisionLlmChange,
 	onConfigChange,
 	onStartIndexing,
 	onSkip,
@ -158,6 +163,14 @@ export const IndexingConfigurationView: FC<IndexingConfigurationViewProps> = ({
 								{/* AI Summary toggle */}
 								<SummaryConfig enabled={enableSummary} onEnabledChange={onEnableSummaryChange} />
 								{/* Vision LLM toggle - only for file-based connectors */}
 								{(config.connectorType === "GOOGLE_DRIVE_CONNECTOR" ||
 									config.connectorType === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" ||
 									config.connectorType === "DROPBOX_CONNECTOR" ||
 									config.connectorType === "ONEDRIVE_CONNECTOR") && (
 									<VisionLLMConfig enabled={enableVisionLlm} onEnabledChange={onEnableVisionLlmChange} />
 								)}
 								{/* Date range selector - not shown for file-based connectors (Drive, Dropbox, OneDrive), Webcrawler, GitHub, or Local Folder */}
 								{config.connectorType !== "GOOGLE_DRIVE_CONNECTOR" &&
 									config.connectorType !== "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" &&
--- a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
+++ b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
@ -80,6 +80,7 @@ export const useConnectorDialog = () => {
 	const [periodicEnabled, setPeriodicEnabled] = useState(false);
 	const [frequencyMinutes, setFrequencyMinutes] = useState("1440");
 	const [enableSummary, setEnableSummary] = useState(false);
 	const [enableVisionLlm, setEnableVisionLlm] = useState(false);
 	// Edit mode state
 	const [editingConnector, setEditingConnector] = useState<SearchSourceConnector | null>(null);
@ -621,6 +622,7 @@ export const useConnectorDialog = () => {
 									setPeriodicEnabled(false);
 									setFrequencyMinutes("1440");
 									setEnableSummary(connector.enable_summary ?? false);
 									setEnableVisionLlm(connector.enable_vision_llm ?? false);
 									setStartDate(undefined);
 									setEndDate(undefined);
@ -763,12 +765,13 @@ export const useConnectorDialog = () => {
 				const endDateStr = endDate ? format(endDate, "yyyy-MM-dd") : undefined;
 				// Update connector with summary, periodic sync settings, and config changes
-				if (enableSummary || periodicEnabled || indexingConnectorConfig) {
+			if (enableSummary || enableVisionLlm || periodicEnabled || indexingConnectorConfig) {
-					const frequency = periodicEnabled ? parseInt(frequencyMinutes, 10) : undefined;
+				const frequency = periodicEnabled ? parseInt(frequencyMinutes, 10) : undefined;
 					await updateConnector({
 						id: indexingConfig.connectorId,
 						data: {
 							enable_summary: enableSummary,
 							enable_vision_llm: enableVisionLlm,
 							...(periodicEnabled && {
 								periodic_indexing_enabled: true,
 								indexing_frequency_minutes: frequency,
@ -896,6 +899,7 @@ export const useConnectorDialog = () => {
 			periodicEnabled,
 			frequencyMinutes,
 			enableSummary,
 			enableVisionLlm,
 			indexingConnectorConfig,
 			setIsOpen,
 		]
@ -960,6 +964,7 @@ export const useConnectorDialog = () => {
 			setPeriodicEnabled(!connector.is_indexable ? false : connector.periodic_indexing_enabled);
 			setFrequencyMinutes(connector.indexing_frequency_minutes?.toString() || "1440");
 			setEnableSummary(connector.enable_summary ?? false);
 			setEnableVisionLlm(connector.enable_vision_llm ?? false);
 			setStartDate(undefined);
 			setEndDate(undefined);
 		},
@ -1038,6 +1043,7 @@ export const useConnectorDialog = () => {
 					data: {
 						name: connectorName || editingConnector.name,
 						enable_summary: enableSummary,
 						enable_vision_llm: enableVisionLlm,
 						periodic_indexing_enabled: !editingConnector.is_indexable ? false : periodicEnabled,
 						indexing_frequency_minutes: !editingConnector.is_indexable ? null : frequency,
 						config: connectorConfig || editingConnector.config,
@ -1172,6 +1178,7 @@ export const useConnectorDialog = () => {
 			periodicEnabled,
 			frequencyMinutes,
 			enableSummary,
 			enableVisionLlm,
 			getFrequencyLabel,
 			connectorConfig,
 			connectorName,
@ -1332,6 +1339,7 @@ export const useConnectorDialog = () => {
 					setPeriodicEnabled(false);
 					setFrequencyMinutes("1440");
 					setEnableSummary(false);
 					setEnableVisionLlm(false);
 				}
 			}
 		},
@ -1368,6 +1376,7 @@ export const useConnectorDialog = () => {
 		periodicEnabled,
 		frequencyMinutes,
 		enableSummary,
 		enableVisionLlm,
 		searchSpaceId,
 		allConnectors,
 		viewingAccountsType,
@ -1382,6 +1391,7 @@ export const useConnectorDialog = () => {
 		setPeriodicEnabled,
 		setFrequencyMinutes,
 		setEnableSummary,
 		setEnableVisionLlm,
 		setConnectorName,
 		// Handlers
--- a/surfsense_web/components/sources/DocumentUploadTab.tsx
+++ b/surfsense_web/components/sources/DocumentUploadTab.tsx
@ -136,6 +136,7 @@ export function DocumentUploadTab({
 	const [uploadProgress, setUploadProgress] = useState(0);
 	const [accordionValue, setAccordionValue] = useState<string>("");
 	const [shouldSummarize, setShouldSummarize] = useState(false);
 	const [useVisionLlm, setUseVisionLlm] = useState(false);
 	const [uploadDocumentMutation] = useAtom(uploadDocumentMutationAtom);
 	const { mutate: uploadDocuments, isPending: isUploading } = uploadDocumentMutation;
 	const fileInputRef = useRef<HTMLInputElement>(null);
@ -361,6 +362,7 @@ export function DocumentUploadTab({
 						relative_paths: batch.map((e) => e.relativePath),
 						root_folder_id: rootFolderId,
 						enable_summary: shouldSummarize,
 						use_vision_llm: useVisionLlm,
 					}
 				);
@ -407,6 +409,7 @@ export function DocumentUploadTab({
 				files: rawFiles,
 				search_space_id: Number(searchSpaceId),
 				should_summarize: shouldSummarize,
 				use_vision_llm: useVisionLlm,
 			},
 			{
 				onSuccess: () => {
@ -696,6 +699,16 @@ export function DocumentUploadTab({
 						<Switch checked={shouldSummarize} onCheckedChange={setShouldSummarize} />
 					</div>
 					<div className={toggleRowClass}>
 						<div className="space-y-0.5">
 							<p className="font-medium text-sm">Enable Vision LLM</p>
 							<p className="text-xs text-muted-foreground">
 								Describes images using AI vision (costly, slower)
 							</p>
 						</div>
 						<Switch checked={useVisionLlm} onCheckedChange={setUseVisionLlm} />
 					</div>
 					<Button
 						className="w-full"
 						onClick={handleUpload}
--- a/surfsense_web/contracts/types/connector.types.ts
+++ b/surfsense_web/contracts/types/connector.types.ts
@ -44,6 +44,7 @@ export const searchSourceConnector = z.object({
 	last_indexed_at: z.string().nullable(),
 	config: z.record(z.string(), z.any()),
 	enable_summary: z.boolean().default(false),
 	enable_vision_llm: z.boolean().default(false),
 	periodic_indexing_enabled: z.boolean(),
 	indexing_frequency_minutes: z.number().nullable(),
 	next_scheduled_at: z.string().nullable(),
@ -98,6 +99,7 @@ export const createConnectorRequest = z.object({
 		last_indexed_at: true,
 		config: true,
 		enable_summary: true,
 		enable_vision_llm: true,
 		periodic_indexing_enabled: true,
 		indexing_frequency_minutes: true,
 		next_scheduled_at: true,
@ -123,6 +125,7 @@ export const updateConnectorRequest = z.object({
 			last_indexed_at: true,
 			config: true,
 			enable_summary: true,
 			enable_vision_llm: true,
 			periodic_indexing_enabled: true,
 			indexing_frequency_minutes: true,
 			next_scheduled_at: true,
--- a/surfsense_web/contracts/types/document.types.ts
+++ b/surfsense_web/contracts/types/document.types.ts
@ -148,6 +148,7 @@ export const uploadDocumentRequest = z.object({
 	files: z.array(z.instanceof(File)),
 	search_space_id: z.number(),
 	should_summarize: z.boolean().default(false),
 	use_vision_llm: z.boolean().default(false),
 });
 export const uploadDocumentResponse = z.object({
--- a/surfsense_web/lib/apis/documents-api.service.ts
+++ b/surfsense_web/lib/apis/documents-api.service.ts
@ -127,7 +127,7 @@ class DocumentsApiService {
 			throw new ValidationError(`Invalid request: ${errorMessage}`);
 		}
-		const { files, search_space_id, should_summarize } = parsedRequest.data;
+		const { files, search_space_id, should_summarize, use_vision_llm } = parsedRequest.data;
 		const UPLOAD_BATCH_SIZE = 5;
 		const batches: File[][] = [];
@ -146,6 +146,7 @@ class DocumentsApiService {
 			for (const file of batch) formData.append("files", file);
 			formData.append("search_space_id", String(search_space_id));
 			formData.append("should_summarize", String(should_summarize));
 			formData.append("use_vision_llm", String(use_vision_llm));
 			const controller = new AbortController();
 			const timeoutId = setTimeout(() => controller.abort(), 120_000);
@ -442,6 +443,7 @@ class DocumentsApiService {
 			relative_paths: string[];
 			root_folder_id?: number | null;
 			enable_summary?: boolean;
 			use_vision_llm?: boolean;
 		},
 		signal?: AbortSignal
 	): Promise<{ message: string; status: string; root_folder_id: number; file_count: number }> => {
@ -456,6 +458,7 @@ class DocumentsApiService {
 			formData.append("root_folder_id", String(metadata.root_folder_id));
 		}
 		formData.append("enable_summary", String(metadata.enable_summary ?? false));
 		formData.append("use_vision_llm", String(metadata.use_vision_llm ?? false));
 		const totalSize = files.reduce((acc, f) => acc + f.size, 0);
 		const timeoutMs = Math.min(Math.max((totalSize / (1024 * 1024)) * 5000, 30_000), 600_000);