From a95bf58c8f4e5ae593ca0eb49354c9668b8c3a51 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 10 Apr 2026 16:45:51 +0200 Subject: [PATCH] Make Vision LLM opt-in for uploads and connectors --- ...121_add_enable_vision_llm_to_connectors.py | 45 +++++++++++++++++++ .../connectors/dropbox/content_extractor.py | 4 +- .../google_drive/content_extractor.py | 12 +++-- .../connectors/onedrive/content_extractor.py | 12 +++-- surfsense_backend/app/db.py | 7 +++ .../app/routes/documents_routes.py | 4 ++ .../app/schemas/search_source_connector.py | 2 + .../app/services/task_dispatcher.py | 3 ++ .../app/tasks/celery_tasks/document_tasks.py | 8 ++++ .../connector_indexers/dropbox_indexer.py | 21 ++++++++- .../google_drive_indexer.py | 34 +++++++++++++- .../local_folder_indexer.py | 18 ++++++-- .../connector_indexers/onedrive_indexer.py | 21 ++++++++- .../document_processors/file_processors.py | 10 ++++- .../integration/document_upload/conftest.py | 2 + .../assistant-ui/connector-popup.tsx | 6 +++ .../components/vision-llm-config.tsx | 25 +++++++++++ .../views/connector-edit-view.tsx | 13 ++++++ .../views/indexing-configuration-view.tsx | 13 ++++++ .../hooks/use-connector-dialog.ts | 14 +++++- .../components/sources/DocumentUploadTab.tsx | 13 ++++++ .../contracts/types/connector.types.ts | 3 ++ .../contracts/types/document.types.ts | 1 + .../lib/apis/documents-api.service.ts | 5 ++- 24 files changed, 276 insertions(+), 20 deletions(-) create mode 100644 surfsense_backend/alembic/versions/121_add_enable_vision_llm_to_connectors.py create mode 100644 surfsense_web/components/assistant-ui/connector-popup/components/vision-llm-config.tsx diff --git a/surfsense_backend/alembic/versions/121_add_enable_vision_llm_to_connectors.py b/surfsense_backend/alembic/versions/121_add_enable_vision_llm_to_connectors.py new file mode 100644 index 000000000..659545645 --- /dev/null +++ b/surfsense_backend/alembic/versions/121_add_enable_vision_llm_to_connectors.py @@ -0,0 +1,45 @@ +"""121_add_enable_vision_llm_to_connectors + +Revision ID: 121 +Revises: 120 +Create Date: 2026-04-09 + +Adds enable_vision_llm boolean column to search_source_connectors. +Defaults to False so vision LLM image processing is opt-in. +""" + +from __future__ import annotations + +from collections.abc import Sequence + +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "121" +down_revision: str | None = "120" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + conn = op.get_bind() + existing_columns = [ + col["name"] for col in sa.inspect(conn).get_columns("search_source_connectors") + ] + + if "enable_vision_llm" not in existing_columns: + op.add_column( + "search_source_connectors", + sa.Column( + "enable_vision_llm", + sa.Boolean(), + nullable=False, + server_default=sa.text("false"), + ), + ) + + +def downgrade() -> None: + op.drop_column("search_source_connectors", "enable_vision_llm") diff --git a/surfsense_backend/app/connectors/dropbox/content_extractor.py b/surfsense_backend/app/connectors/dropbox/content_extractor.py index 8cbc3e417..372d2fc82 100644 --- a/surfsense_backend/app/connectors/dropbox/content_extractor.py +++ b/surfsense_backend/app/connectors/dropbox/content_extractor.py @@ -44,6 +44,8 @@ async def _export_paper_content( async def download_and_extract_content( client: DropboxClient, file: dict[str, Any], + *, + vision_llm=None, ) -> tuple[str | None, dict[str, Any], str | None]: """Download a Dropbox file and extract its content as markdown. @@ -91,7 +93,7 @@ async def download_and_extract_content( from app.etl_pipeline.etl_document import EtlRequest from app.etl_pipeline.etl_pipeline_service import EtlPipelineService - result = await EtlPipelineService().extract( + result = await EtlPipelineService(vision_llm=vision_llm).extract( EtlRequest(file_path=temp_file_path, filename=file_name) ) markdown = result.markdown_content diff --git a/surfsense_backend/app/connectors/google_drive/content_extractor.py b/surfsense_backend/app/connectors/google_drive/content_extractor.py index 83ff32e82..86c789b97 100644 --- a/surfsense_backend/app/connectors/google_drive/content_extractor.py +++ b/surfsense_backend/app/connectors/google_drive/content_extractor.py @@ -27,6 +27,8 @@ logger = logging.getLogger(__name__) async def download_and_extract_content( client: GoogleDriveClient, file: dict[str, Any], + *, + vision_llm=None, ) -> tuple[str | None, dict[str, Any], str | None]: """Download a Google Drive file and extract its content as markdown. @@ -103,7 +105,9 @@ async def download_and_extract_content( etl_filename = ( file_name + extension if is_google_workspace_file(mime_type) else file_name ) - markdown = await _parse_file_to_markdown(temp_file_path, etl_filename) + markdown = await _parse_file_to_markdown( + temp_file_path, etl_filename, vision_llm=vision_llm + ) return markdown, drive_metadata, None except Exception as e: @@ -115,12 +119,14 @@ async def download_and_extract_content( os.unlink(temp_file_path) -async def _parse_file_to_markdown(file_path: str, filename: str) -> str: +async def _parse_file_to_markdown( + file_path: str, filename: str, *, vision_llm=None +) -> str: """Parse a local file to markdown using the unified ETL pipeline.""" from app.etl_pipeline.etl_document import EtlRequest from app.etl_pipeline.etl_pipeline_service import EtlPipelineService - result = await EtlPipelineService().extract( + result = await EtlPipelineService(vision_llm=vision_llm).extract( EtlRequest(file_path=file_path, filename=filename) ) return result.markdown_content diff --git a/surfsense_backend/app/connectors/onedrive/content_extractor.py b/surfsense_backend/app/connectors/onedrive/content_extractor.py index 2238b8603..3154f2eca 100644 --- a/surfsense_backend/app/connectors/onedrive/content_extractor.py +++ b/surfsense_backend/app/connectors/onedrive/content_extractor.py @@ -16,6 +16,8 @@ logger = logging.getLogger(__name__) async def download_and_extract_content( client: OneDriveClient, file: dict[str, Any], + *, + vision_llm=None, ) -> tuple[str | None, dict[str, Any], str | None]: """Download a OneDrive file and extract its content as markdown. @@ -65,7 +67,9 @@ async def download_and_extract_content( if error: return None, metadata, error - markdown = await _parse_file_to_markdown(temp_file_path, file_name) + markdown = await _parse_file_to_markdown( + temp_file_path, file_name, vision_llm=vision_llm + ) return markdown, metadata, None except Exception as e: @@ -77,12 +81,14 @@ async def download_and_extract_content( os.unlink(temp_file_path) -async def _parse_file_to_markdown(file_path: str, filename: str) -> str: +async def _parse_file_to_markdown( + file_path: str, filename: str, *, vision_llm=None +) -> str: """Parse a local file to markdown using the unified ETL pipeline.""" from app.etl_pipeline.etl_document import EtlRequest from app.etl_pipeline.etl_pipeline_service import EtlPipelineService - result = await EtlPipelineService().extract( + result = await EtlPipelineService(vision_llm=vision_llm).extract( EtlRequest(file_path=file_path, filename=filename) ) return result.markdown_content diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index 01a6bbda0..cbcb5efa5 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -1555,6 +1555,13 @@ class SearchSourceConnector(BaseModel, TimestampMixin): Boolean, nullable=False, default=False, server_default="false" ) + # Vision LLM for image files - disabled by default to save cost/time. + # When enabled, images are described via a vision language model instead + # of falling back to the document parser. + enable_vision_llm = Column( + Boolean, nullable=False, default=False, server_default="false" + ) + # Periodic indexing fields periodic_indexing_enabled = Column(Boolean, nullable=False, default=False) indexing_frequency_minutes = Column(Integer, nullable=True) diff --git a/surfsense_backend/app/routes/documents_routes.py b/surfsense_backend/app/routes/documents_routes.py index 25841a107..aa7f98294 100644 --- a/surfsense_backend/app/routes/documents_routes.py +++ b/surfsense_backend/app/routes/documents_routes.py @@ -123,6 +123,7 @@ async def create_documents_file_upload( files: list[UploadFile], search_space_id: int = Form(...), should_summarize: bool = Form(False), + use_vision_llm: bool = Form(False), session: AsyncSession = Depends(get_async_session), user: User = Depends(current_active_user), dispatcher: TaskDispatcher = Depends(get_task_dispatcher), @@ -272,6 +273,7 @@ async def create_documents_file_upload( search_space_id=search_space_id, user_id=str(user.id), should_summarize=should_summarize, + use_vision_llm=use_vision_llm, ) return { @@ -1490,6 +1492,7 @@ async def folder_upload( relative_paths: str = Form(...), root_folder_id: int | None = Form(None), enable_summary: bool = Form(False), + use_vision_llm: bool = Form(False), session: AsyncSession = Depends(get_async_session), user: User = Depends(current_active_user), ): @@ -1616,6 +1619,7 @@ async def folder_upload( folder_name=folder_name, root_folder_id=root_folder_id, enable_summary=enable_summary, + use_vision_llm=use_vision_llm, file_mappings=list(file_mappings), ) diff --git a/surfsense_backend/app/schemas/search_source_connector.py b/surfsense_backend/app/schemas/search_source_connector.py index 1b0ed0b13..aac7b92d5 100644 --- a/surfsense_backend/app/schemas/search_source_connector.py +++ b/surfsense_backend/app/schemas/search_source_connector.py @@ -17,6 +17,7 @@ class SearchSourceConnectorBase(BaseModel): last_indexed_at: datetime | None = None config: dict[str, Any] enable_summary: bool = False + enable_vision_llm: bool = False periodic_indexing_enabled: bool = False indexing_frequency_minutes: int | None = None next_scheduled_at: datetime | None = None @@ -67,6 +68,7 @@ class SearchSourceConnectorUpdate(BaseModel): last_indexed_at: datetime | None = None config: dict[str, Any] | None = None enable_summary: bool | None = None + enable_vision_llm: bool | None = None periodic_indexing_enabled: bool | None = None indexing_frequency_minutes: int | None = None next_scheduled_at: datetime | None = None diff --git a/surfsense_backend/app/services/task_dispatcher.py b/surfsense_backend/app/services/task_dispatcher.py index 9a6fc7d63..7bb70b406 100644 --- a/surfsense_backend/app/services/task_dispatcher.py +++ b/surfsense_backend/app/services/task_dispatcher.py @@ -19,6 +19,7 @@ class TaskDispatcher(Protocol): search_space_id: int, user_id: str, should_summarize: bool = False, + use_vision_llm: bool = False, ) -> None: ... @@ -34,6 +35,7 @@ class CeleryTaskDispatcher: search_space_id: int, user_id: str, should_summarize: bool = False, + use_vision_llm: bool = False, ) -> None: from app.tasks.celery_tasks.document_tasks import ( process_file_upload_with_document_task, @@ -46,6 +48,7 @@ class CeleryTaskDispatcher: search_space_id=search_space_id, user_id=user_id, should_summarize=should_summarize, + use_vision_llm=use_vision_llm, ) diff --git a/surfsense_backend/app/tasks/celery_tasks/document_tasks.py b/surfsense_backend/app/tasks/celery_tasks/document_tasks.py index 62720826f..fc946b4bc 100644 --- a/surfsense_backend/app/tasks/celery_tasks/document_tasks.py +++ b/surfsense_backend/app/tasks/celery_tasks/document_tasks.py @@ -778,6 +778,7 @@ def process_file_upload_with_document_task( search_space_id: int, user_id: str, should_summarize: bool = False, + use_vision_llm: bool = False, ): """ Celery task to process uploaded file with existing pending document. @@ -833,6 +834,7 @@ def process_file_upload_with_document_task( search_space_id, user_id, should_summarize=should_summarize, + use_vision_llm=use_vision_llm, ) ) logger.info( @@ -869,6 +871,7 @@ async def _process_file_with_document( search_space_id: int, user_id: str, should_summarize: bool = False, + use_vision_llm: bool = False, ): """ Process file and update existing pending document status. @@ -971,6 +974,7 @@ async def _process_file_with_document( log_entry=log_entry, notification=notification, should_summarize=should_summarize, + use_vision_llm=use_vision_llm, ) # Update notification on success @@ -1428,6 +1432,7 @@ def index_uploaded_folder_files_task( root_folder_id: int, enable_summary: bool, file_mappings: list[dict], + use_vision_llm: bool = False, ): """Celery task to index files uploaded from the desktop app.""" loop = asyncio.new_event_loop() @@ -1441,6 +1446,7 @@ def index_uploaded_folder_files_task( root_folder_id=root_folder_id, enable_summary=enable_summary, file_mappings=file_mappings, + use_vision_llm=use_vision_llm, ) ) finally: @@ -1454,6 +1460,7 @@ async def _index_uploaded_folder_files_async( root_folder_id: int, enable_summary: bool, file_mappings: list[dict], + use_vision_llm: bool = False, ): """Run upload-based folder indexing with notification + heartbeat.""" file_count = len(file_mappings) @@ -1503,6 +1510,7 @@ async def _index_uploaded_folder_files_async( enable_summary=enable_summary, file_mappings=file_mappings, on_heartbeat_callback=_heartbeat_progress, + use_vision_llm=use_vision_llm, ) if notification: diff --git a/surfsense_backend/app/tasks/connector_indexers/dropbox_indexer.py b/surfsense_backend/app/tasks/connector_indexers/dropbox_indexer.py index 4a49944c2..9f8c1a33a 100644 --- a/surfsense_backend/app/tasks/connector_indexers/dropbox_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/dropbox_indexer.py @@ -164,6 +164,7 @@ async def _download_files_parallel( enable_summary: bool, max_concurrency: int = 3, on_heartbeat: HeartbeatCallbackType | None = None, + vision_llm=None, ) -> tuple[list[ConnectorDocument], int]: """Download and ETL files in parallel. Returns (docs, failed_count).""" results: list[ConnectorDocument] = [] @@ -176,7 +177,7 @@ async def _download_files_parallel( nonlocal last_heartbeat, completed_count async with sem: markdown, db_metadata, error = await download_and_extract_content( - dropbox_client, file + dropbox_client, file, vision_llm=vision_llm ) if error or not markdown: file_name = file.get("name", "Unknown") @@ -224,6 +225,7 @@ async def _download_and_index( user_id: str, enable_summary: bool, on_heartbeat: HeartbeatCallbackType | None = None, + vision_llm=None, ) -> tuple[int, int]: """Parallel download then parallel indexing. Returns (batch_indexed, total_failed).""" connector_docs, download_failed = await _download_files_parallel( @@ -234,6 +236,7 @@ async def _download_and_index( user_id=user_id, enable_summary=enable_summary, on_heartbeat=on_heartbeat, + vision_llm=vision_llm, ) batch_indexed = 0 @@ -287,6 +290,7 @@ async def _index_with_delta_sync( max_files: int, on_heartbeat_callback: HeartbeatCallbackType | None = None, enable_summary: bool = True, + vision_llm=None, ) -> tuple[int, int, int, str]: """Delta sync using Dropbox cursor-based change tracking. @@ -359,6 +363,7 @@ async def _index_with_delta_sync( user_id=user_id, enable_summary=enable_summary, on_heartbeat=on_heartbeat_callback, + vision_llm=vision_llm, ) indexed = renamed_count + batch_indexed @@ -384,6 +389,7 @@ async def _index_full_scan( incremental_sync: bool = True, on_heartbeat_callback: HeartbeatCallbackType | None = None, enable_summary: bool = True, + vision_llm=None, ) -> tuple[int, int, int]: """Full scan indexing of a folder. @@ -469,6 +475,7 @@ async def _index_full_scan( user_id=user_id, enable_summary=enable_summary, on_heartbeat=on_heartbeat_callback, + vision_llm=vision_llm, ) if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0: @@ -498,6 +505,7 @@ async def _index_selected_files( enable_summary: bool, incremental_sync: bool = True, on_heartbeat: HeartbeatCallbackType | None = None, + vision_llm=None, ) -> tuple[int, int, int, list[str]]: """Index user-selected files using the parallel pipeline.""" page_limit_service = PageLimitService(session) @@ -557,6 +565,7 @@ async def _index_selected_files( user_id=user_id, enable_summary=enable_summary, on_heartbeat=on_heartbeat, + vision_llm=vision_llm, ) if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0: @@ -621,6 +630,13 @@ async def index_dropbox_files( return 0, 0, error_msg, 0 connector_enable_summary = getattr(connector, "enable_summary", True) + connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False) + vision_llm = None + if connector_enable_vision_llm: + from app.services.llm_service import get_vision_llm + + vision_llm = await get_vision_llm(session, search_space_id) + dropbox_client = DropboxClient(session, connector_id) indexing_options = items_dict.get("indexing_options", {}) @@ -650,6 +666,7 @@ async def index_dropbox_files( user_id=user_id, enable_summary=connector_enable_summary, incremental_sync=incremental_sync, + vision_llm=vision_llm, ) total_indexed += indexed total_skipped += skipped @@ -684,6 +701,7 @@ async def index_dropbox_files( log_entry, max_files, enable_summary=connector_enable_summary, + vision_llm=vision_llm, ) folder_cursors[folder_path] = new_cursor total_unsupported += unsup @@ -703,6 +721,7 @@ async def index_dropbox_files( include_subfolders, incremental_sync=incremental_sync, enable_summary=connector_enable_summary, + vision_llm=vision_llm, ) total_unsupported += unsup diff --git a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py index b11087fe6..d8f95da63 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py @@ -261,6 +261,7 @@ async def _download_files_parallel( enable_summary: bool, max_concurrency: int = 3, on_heartbeat: HeartbeatCallbackType | None = None, + vision_llm=None, ) -> tuple[list[ConnectorDocument], int]: """Download and ETL files in parallel, returning ConnectorDocuments. @@ -276,7 +277,7 @@ async def _download_files_parallel( nonlocal last_heartbeat, completed_count async with sem: markdown, drive_metadata, error = await download_and_extract_content( - drive_client, file + drive_client, file, vision_llm=vision_llm ) if error or not markdown: file_name = file.get("name", "Unknown") @@ -322,6 +323,7 @@ async def _process_single_file( search_space_id: int, user_id: str, enable_summary: bool = True, + vision_llm=None, ) -> tuple[int, int, int]: """Download, extract, and index a single Drive file via the pipeline. @@ -343,7 +345,7 @@ async def _process_single_file( await page_limit_service.check_page_limit(user_id, estimated_pages) markdown, drive_metadata, error = await download_and_extract_content( - drive_client, file + drive_client, file, vision_llm=vision_llm ) if error or not markdown: logger.warning(f"ETL failed for {file_name}: {error}") @@ -433,6 +435,7 @@ async def _download_and_index( user_id: str, enable_summary: bool, on_heartbeat: HeartbeatCallbackType | None = None, + vision_llm=None, ) -> tuple[int, int]: """Phase 2+3: parallel download then parallel indexing. @@ -446,6 +449,7 @@ async def _download_and_index( user_id=user_id, enable_summary=enable_summary, on_heartbeat=on_heartbeat, + vision_llm=vision_llm, ) batch_indexed = 0 @@ -476,6 +480,7 @@ async def _index_selected_files( user_id: str, enable_summary: bool, on_heartbeat: HeartbeatCallbackType | None = None, + vision_llm=None, ) -> tuple[int, int, int, list[str]]: """Index user-selected files using the parallel pipeline. @@ -540,6 +545,7 @@ async def _index_selected_files( user_id=user_id, enable_summary=enable_summary, on_heartbeat=on_heartbeat, + vision_llm=vision_llm, ) if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0: @@ -573,6 +579,7 @@ async def _index_full_scan( include_subfolders: bool = False, on_heartbeat_callback: HeartbeatCallbackType | None = None, enable_summary: bool = True, + vision_llm=None, ) -> tuple[int, int, int]: """Full scan indexing of a folder. @@ -703,6 +710,7 @@ async def _index_full_scan( user_id=user_id, enable_summary=enable_summary, on_heartbeat=on_heartbeat_callback, + vision_llm=vision_llm, ) if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0: @@ -736,6 +744,7 @@ async def _index_with_delta_sync( include_subfolders: bool = False, on_heartbeat_callback: HeartbeatCallbackType | None = None, enable_summary: bool = True, + vision_llm=None, ) -> tuple[int, int, int]: """Delta sync using change tracking. @@ -844,6 +853,7 @@ async def _index_with_delta_sync( user_id=user_id, enable_summary=enable_summary, on_heartbeat=on_heartbeat_callback, + vision_llm=vision_llm, ) if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0: @@ -947,6 +957,11 @@ async def index_google_drive_files( ) connector_enable_summary = getattr(connector, "enable_summary", True) + connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False) + vision_llm = None + if connector_enable_vision_llm: + from app.services.llm_service import get_vision_llm + vision_llm = await get_vision_llm(session, search_space_id) drive_client = GoogleDriveClient( session, connector_id, credentials=pre_built_credentials ) @@ -986,6 +1001,7 @@ async def index_google_drive_files( include_subfolders, on_heartbeat_callback, connector_enable_summary, + vision_llm=vision_llm, ) documents_unsupported += du logger.info("Running reconciliation scan after delta sync") @@ -1004,6 +1020,7 @@ async def index_google_drive_files( include_subfolders, on_heartbeat_callback, connector_enable_summary, + vision_llm=vision_llm, ) documents_indexed += ri documents_skipped += rs @@ -1029,6 +1046,7 @@ async def index_google_drive_files( include_subfolders, on_heartbeat_callback, connector_enable_summary, + vision_llm=vision_llm, ) if documents_indexed > 0 or can_use_delta: @@ -1146,6 +1164,11 @@ async def index_google_drive_single_file( ) connector_enable_summary = getattr(connector, "enable_summary", True) + connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False) + vision_llm = None + if connector_enable_vision_llm: + from app.services.llm_service import get_vision_llm + vision_llm = await get_vision_llm(session, search_space_id) drive_client = GoogleDriveClient( session, connector_id, credentials=pre_built_credentials ) @@ -1168,6 +1191,7 @@ async def index_google_drive_single_file( search_space_id, user_id, connector_enable_summary, + vision_llm=vision_llm, ) await session.commit() @@ -1278,6 +1302,11 @@ async def index_google_drive_selected_files( return 0, 0, [error_msg] connector_enable_summary = getattr(connector, "enable_summary", True) + connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False) + vision_llm = None + if connector_enable_vision_llm: + from app.services.llm_service import get_vision_llm + vision_llm = await get_vision_llm(session, search_space_id) drive_client = GoogleDriveClient( session, connector_id, credentials=pre_built_credentials ) @@ -1291,6 +1320,7 @@ async def index_google_drive_selected_files( user_id=user_id, enable_summary=connector_enable_summary, on_heartbeat=on_heartbeat_callback, + vision_llm=vision_llm, ) if unsupported > 0: diff --git a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py index a531916e1..2d5f9648d 100644 --- a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py @@ -153,7 +153,7 @@ def scan_folder( return files -async def _read_file_content(file_path: str, filename: str) -> str: +async def _read_file_content(file_path: str, filename: str, *, vision_llm=None) -> str: """Read file content via the unified ETL pipeline. All file types (plaintext, audio, direct-convert, document, image) are @@ -162,7 +162,7 @@ async def _read_file_content(file_path: str, filename: str) -> str: from app.etl_pipeline.etl_document import EtlRequest from app.etl_pipeline.etl_pipeline_service import EtlPipelineService - result = await EtlPipelineService().extract( + result = await EtlPipelineService(vision_llm=vision_llm).extract( EtlRequest(file_path=file_path, filename=filename) ) return result.markdown_content @@ -199,12 +199,14 @@ async def _compute_file_content_hash( file_path: str, filename: str, search_space_id: int, + *, + vision_llm=None, ) -> tuple[str, str]: """Read a file (via ETL if needed) and compute its content hash. Returns (content_text, content_hash). """ - content = await _read_file_content(file_path, filename) + content = await _read_file_content(file_path, filename, vision_llm=vision_llm) return content, _content_hash(content, search_space_id) @@ -1268,6 +1270,7 @@ async def index_uploaded_files( enable_summary: bool, file_mappings: list[dict], on_heartbeat_callback: HeartbeatCallbackType | None = None, + use_vision_llm: bool = False, ) -> tuple[int, int, str | None]: """Index files uploaded from the desktop app via temp paths. @@ -1304,6 +1307,12 @@ async def index_uploaded_files( pipeline = IndexingPipelineService(session) llm = await get_user_long_context_llm(session, user_id, search_space_id) + vision_llm_instance = None + if use_vision_llm: + from app.services.llm_service import get_vision_llm + + vision_llm_instance = await get_vision_llm(session, search_space_id) + indexed_count = 0 failed_count = 0 errors: list[str] = [] @@ -1351,7 +1360,8 @@ async def index_uploaded_files( try: content, content_hash = await _compute_file_content_hash( - temp_path, filename, search_space_id + temp_path, filename, search_space_id, + vision_llm=vision_llm_instance, ) except Exception as e: logger.warning(f"Could not read {relative_path}: {e}") diff --git a/surfsense_backend/app/tasks/connector_indexers/onedrive_indexer.py b/surfsense_backend/app/tasks/connector_indexers/onedrive_indexer.py index 06517f542..aa654a9a9 100644 --- a/surfsense_backend/app/tasks/connector_indexers/onedrive_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/onedrive_indexer.py @@ -171,6 +171,7 @@ async def _download_files_parallel( enable_summary: bool, max_concurrency: int = 3, on_heartbeat: HeartbeatCallbackType | None = None, + vision_llm=None, ) -> tuple[list[ConnectorDocument], int]: """Download and ETL files in parallel. Returns (docs, failed_count).""" results: list[ConnectorDocument] = [] @@ -183,7 +184,7 @@ async def _download_files_parallel( nonlocal last_heartbeat, completed_count async with sem: markdown, od_metadata, error = await download_and_extract_content( - onedrive_client, file + onedrive_client, file, vision_llm=vision_llm ) if error or not markdown: file_name = file.get("name", "Unknown") @@ -231,6 +232,7 @@ async def _download_and_index( user_id: str, enable_summary: bool, on_heartbeat: HeartbeatCallbackType | None = None, + vision_llm=None, ) -> tuple[int, int]: """Parallel download then parallel indexing. Returns (batch_indexed, total_failed).""" connector_docs, download_failed = await _download_files_parallel( @@ -241,6 +243,7 @@ async def _download_and_index( user_id=user_id, enable_summary=enable_summary, on_heartbeat=on_heartbeat, + vision_llm=vision_llm, ) batch_indexed = 0 @@ -293,6 +296,7 @@ async def _index_selected_files( user_id: str, enable_summary: bool, on_heartbeat: HeartbeatCallbackType | None = None, + vision_llm=None, ) -> tuple[int, int, int, list[str]]: """Index user-selected files using the parallel pipeline.""" page_limit_service = PageLimitService(session) @@ -343,6 +347,7 @@ async def _index_selected_files( user_id=user_id, enable_summary=enable_summary, on_heartbeat=on_heartbeat, + vision_llm=vision_llm, ) if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0: @@ -375,6 +380,7 @@ async def _index_full_scan( include_subfolders: bool = True, on_heartbeat_callback: HeartbeatCallbackType | None = None, enable_summary: bool = True, + vision_llm=None, ) -> tuple[int, int, int]: """Full scan indexing of a folder. @@ -450,6 +456,7 @@ async def _index_full_scan( user_id=user_id, enable_summary=enable_summary, on_heartbeat=on_heartbeat_callback, + vision_llm=vision_llm, ) if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0: @@ -481,6 +488,7 @@ async def _index_with_delta_sync( max_files: int, on_heartbeat_callback: HeartbeatCallbackType | None = None, enable_summary: bool = True, + vision_llm=None, ) -> tuple[int, int, int, str | None]: """Delta sync using OneDrive change tracking. @@ -573,6 +581,7 @@ async def _index_with_delta_sync( user_id=user_id, enable_summary=enable_summary, on_heartbeat=on_heartbeat_callback, + vision_llm=vision_llm, ) if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0: @@ -643,6 +652,12 @@ async def index_onedrive_files( return 0, 0, error_msg, 0 connector_enable_summary = getattr(connector, "enable_summary", True) + connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False) + vision_llm = None + if connector_enable_vision_llm: + from app.services.llm_service import get_vision_llm + vision_llm = await get_vision_llm(session, search_space_id) + onedrive_client = OneDriveClient(session, connector_id) indexing_options = items_dict.get("indexing_options", {}) @@ -666,6 +681,7 @@ async def index_onedrive_files( search_space_id=search_space_id, user_id=user_id, enable_summary=connector_enable_summary, + vision_llm=vision_llm, ) total_indexed += indexed total_skipped += skipped @@ -695,6 +711,7 @@ async def index_onedrive_files( log_entry, max_files, enable_summary=connector_enable_summary, + vision_llm=vision_llm, ) total_indexed += indexed total_skipped += skipped @@ -721,6 +738,7 @@ async def index_onedrive_files( max_files, include_subfolders, enable_summary=connector_enable_summary, + vision_llm=vision_llm, ) total_indexed += ri total_skipped += rs @@ -740,6 +758,7 @@ async def index_onedrive_files( max_files, include_subfolders, enable_summary=connector_enable_summary, + vision_llm=vision_llm, ) total_indexed += indexed total_skipped += skipped diff --git a/surfsense_backend/app/tasks/document_processors/file_processors.py b/surfsense_backend/app/tasks/document_processors/file_processors.py index cd06657dc..9364fa1cb 100644 --- a/surfsense_backend/app/tasks/document_processors/file_processors.py +++ b/surfsense_backend/app/tasks/document_processors/file_processors.py @@ -46,6 +46,7 @@ class _ProcessingContext: log_entry: Log connector: dict | None = None notification: Notification | None = None + use_vision_llm: bool = False enable_summary: bool = field(init=False) def __post_init__(self) -> None: @@ -134,7 +135,7 @@ async def _process_non_document_upload(ctx: _ProcessingContext) -> Document | No ) vision_llm = None - if etl_classify(ctx.filename) == FileCategory.IMAGE: + if ctx.use_vision_llm and etl_classify(ctx.filename) == FileCategory.IMAGE: from app.services.llm_service import get_vision_llm vision_llm = await get_vision_llm(ctx.session, ctx.search_space_id) @@ -288,6 +289,7 @@ async def process_file_in_background( log_entry: Log, connector: dict | None = None, notification: Notification | None = None, + use_vision_llm: bool = False, ) -> Document | None: ctx = _ProcessingContext( session=session, @@ -299,6 +301,7 @@ async def process_file_in_background( log_entry=log_entry, connector=connector, notification=notification, + use_vision_llm=use_vision_llm, ) try: @@ -349,6 +352,7 @@ async def _extract_file_content( task_logger: TaskLoggingService, log_entry: Log, notification: Notification | None, + use_vision_llm: bool = False, ) -> tuple[str, str]: """ Extract markdown content from a file regardless of type. @@ -396,7 +400,7 @@ async def _extract_file_content( await page_limit_service.check_page_limit(user_id, estimated_pages) vision_llm = None - if category == FileCategory.IMAGE: + if use_vision_llm and category == FileCategory.IMAGE: from app.services.llm_service import get_vision_llm vision_llm = await get_vision_llm(session, search_space_id) @@ -435,6 +439,7 @@ async def process_file_in_background_with_document( connector: dict | None = None, notification: Notification | None = None, should_summarize: bool = False, + use_vision_llm: bool = False, ) -> Document | None: """ Process file and update existing pending document (2-phase pattern). @@ -463,6 +468,7 @@ async def process_file_in_background_with_document( task_logger, log_entry, notification, + use_vision_llm=use_vision_llm, ) if not markdown_content: diff --git a/surfsense_backend/tests/integration/document_upload/conftest.py b/surfsense_backend/tests/integration/document_upload/conftest.py index 62f4f6b47..f35d2e605 100644 --- a/surfsense_backend/tests/integration/document_upload/conftest.py +++ b/surfsense_backend/tests/integration/document_upload/conftest.py @@ -69,6 +69,7 @@ class InlineTaskDispatcher: search_space_id: int, user_id: str, should_summarize: bool = False, + use_vision_llm: bool = False, ) -> None: from app.tasks.celery_tasks.document_tasks import ( _process_file_with_document, @@ -82,6 +83,7 @@ class InlineTaskDispatcher: search_space_id, user_id, should_summarize=should_summarize, + use_vision_llm=use_vision_llm, ) diff --git a/surfsense_web/components/assistant-ui/connector-popup.tsx b/surfsense_web/components/assistant-ui/connector-popup.tsx index c41e986d4..84361e25b 100644 --- a/surfsense_web/components/assistant-ui/connector-popup.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup.tsx @@ -98,6 +98,7 @@ export const ConnectorIndicator = forwardRef { startIndexing(editingConnector.id); handleSaveConnector(() => refreshConnectors()); @@ -336,6 +340,7 @@ export const ConnectorIndicator = forwardRef { if (indexingConfig.connectorId) { diff --git a/surfsense_web/components/assistant-ui/connector-popup/components/vision-llm-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/components/vision-llm-config.tsx new file mode 100644 index 000000000..e5ebdbd06 --- /dev/null +++ b/surfsense_web/components/assistant-ui/connector-popup/components/vision-llm-config.tsx @@ -0,0 +1,25 @@ +"use client"; + +import type { FC } from "react"; +import { Switch } from "@/components/ui/switch"; + +interface VisionLLMConfigProps { + enabled: boolean; + onEnabledChange: (enabled: boolean) => void; +} + +export const VisionLLMConfig: FC = ({ enabled, onEnabledChange }) => { + return ( +
+
+
+

Enable Vision LLM

+

+ Describes images using AI vision (costly, slower) +

+
+ +
+
+ ); +}; diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx index 7308e1e26..bea5d12e8 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx @@ -15,6 +15,7 @@ import { cn } from "@/lib/utils"; import { DateRangeSelector } from "../../components/date-range-selector"; import { PeriodicSyncConfig } from "../../components/periodic-sync-config"; import { SummaryConfig } from "../../components/summary-config"; +import { VisionLLMConfig } from "../../components/vision-llm-config"; import { getConnectorDisplayName } from "../../tabs/all-connectors-tab"; import { getConnectorConfigComponent } from "../index"; @@ -38,6 +39,7 @@ interface ConnectorEditViewProps { periodicEnabled: boolean; frequencyMinutes: string; enableSummary: boolean; + enableVisionLlm: boolean; isSaving: boolean; isDisconnecting: boolean; isIndexing?: boolean; @@ -47,6 +49,7 @@ interface ConnectorEditViewProps { onPeriodicEnabledChange: (enabled: boolean) => void; onFrequencyChange: (frequency: string) => void; onEnableSummaryChange: (enabled: boolean) => void; + onEnableVisionLlmChange: (enabled: boolean) => void; onSave: () => void; onDisconnect: () => void; onBack: () => void; @@ -62,6 +65,7 @@ export const ConnectorEditView: FC = ({ periodicEnabled, frequencyMinutes, enableSummary, + enableVisionLlm, isSaving, isDisconnecting, isIndexing = false, @@ -71,6 +75,7 @@ export const ConnectorEditView: FC = ({ onPeriodicEnabledChange, onFrequencyChange, onEnableSummaryChange, + onEnableVisionLlmChange, onSave, onDisconnect, onBack, @@ -272,6 +277,14 @@ export const ConnectorEditView: FC = ({ {/* AI Summary toggle */} + {/* Vision LLM toggle - only for file-based connectors */} + {(connector.connector_type === "GOOGLE_DRIVE_CONNECTOR" || + connector.connector_type === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" || + connector.connector_type === "DROPBOX_CONNECTOR" || + connector.connector_type === "ONEDRIVE_CONNECTOR") && ( + + )} + {/* Date range selector - not shown for file-based connectors (Drive, Dropbox, OneDrive), Webcrawler, GitHub, or Local Folder */} {connector.connector_type !== "GOOGLE_DRIVE_CONNECTOR" && connector.connector_type !== "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" && diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx index e583cbe17..cb7438cde 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx @@ -10,6 +10,7 @@ import { cn } from "@/lib/utils"; import { DateRangeSelector } from "../../components/date-range-selector"; import { PeriodicSyncConfig } from "../../components/periodic-sync-config"; import { SummaryConfig } from "../../components/summary-config"; +import { VisionLLMConfig } from "../../components/vision-llm-config"; import type { IndexingConfigState } from "../../constants/connector-constants"; import { getConnectorDisplayName } from "../../tabs/all-connectors-tab"; import { getConnectorConfigComponent } from "../index"; @@ -22,6 +23,7 @@ interface IndexingConfigurationViewProps { periodicEnabled: boolean; frequencyMinutes: string; enableSummary: boolean; + enableVisionLlm: boolean; isStartingIndexing: boolean; isFromOAuth?: boolean; onStartDateChange: (date: Date | undefined) => void; @@ -29,6 +31,7 @@ interface IndexingConfigurationViewProps { onPeriodicEnabledChange: (enabled: boolean) => void; onFrequencyChange: (frequency: string) => void; onEnableSummaryChange: (enabled: boolean) => void; + onEnableVisionLlmChange: (enabled: boolean) => void; onConfigChange?: (config: Record) => void; onStartIndexing: () => void; onSkip: () => void; @@ -42,6 +45,7 @@ export const IndexingConfigurationView: FC = ({ periodicEnabled, frequencyMinutes, enableSummary, + enableVisionLlm, isStartingIndexing, isFromOAuth = false, onStartDateChange, @@ -49,6 +53,7 @@ export const IndexingConfigurationView: FC = ({ onPeriodicEnabledChange, onFrequencyChange, onEnableSummaryChange, + onEnableVisionLlmChange, onConfigChange, onStartIndexing, onSkip, @@ -158,6 +163,14 @@ export const IndexingConfigurationView: FC = ({ {/* AI Summary toggle */} + {/* Vision LLM toggle - only for file-based connectors */} + {(config.connectorType === "GOOGLE_DRIVE_CONNECTOR" || + config.connectorType === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" || + config.connectorType === "DROPBOX_CONNECTOR" || + config.connectorType === "ONEDRIVE_CONNECTOR") && ( + + )} + {/* Date range selector - not shown for file-based connectors (Drive, Dropbox, OneDrive), Webcrawler, GitHub, or Local Folder */} {config.connectorType !== "GOOGLE_DRIVE_CONNECTOR" && config.connectorType !== "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" && diff --git a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts index 6543bbd72..7331549b5 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts +++ b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts @@ -80,6 +80,7 @@ export const useConnectorDialog = () => { const [periodicEnabled, setPeriodicEnabled] = useState(false); const [frequencyMinutes, setFrequencyMinutes] = useState("1440"); const [enableSummary, setEnableSummary] = useState(false); + const [enableVisionLlm, setEnableVisionLlm] = useState(false); // Edit mode state const [editingConnector, setEditingConnector] = useState(null); @@ -621,6 +622,7 @@ export const useConnectorDialog = () => { setPeriodicEnabled(false); setFrequencyMinutes("1440"); setEnableSummary(connector.enable_summary ?? false); + setEnableVisionLlm(connector.enable_vision_llm ?? false); setStartDate(undefined); setEndDate(undefined); @@ -763,12 +765,13 @@ export const useConnectorDialog = () => { const endDateStr = endDate ? format(endDate, "yyyy-MM-dd") : undefined; // Update connector with summary, periodic sync settings, and config changes - if (enableSummary || periodicEnabled || indexingConnectorConfig) { - const frequency = periodicEnabled ? parseInt(frequencyMinutes, 10) : undefined; + if (enableSummary || enableVisionLlm || periodicEnabled || indexingConnectorConfig) { + const frequency = periodicEnabled ? parseInt(frequencyMinutes, 10) : undefined; await updateConnector({ id: indexingConfig.connectorId, data: { enable_summary: enableSummary, + enable_vision_llm: enableVisionLlm, ...(periodicEnabled && { periodic_indexing_enabled: true, indexing_frequency_minutes: frequency, @@ -896,6 +899,7 @@ export const useConnectorDialog = () => { periodicEnabled, frequencyMinutes, enableSummary, + enableVisionLlm, indexingConnectorConfig, setIsOpen, ] @@ -960,6 +964,7 @@ export const useConnectorDialog = () => { setPeriodicEnabled(!connector.is_indexable ? false : connector.periodic_indexing_enabled); setFrequencyMinutes(connector.indexing_frequency_minutes?.toString() || "1440"); setEnableSummary(connector.enable_summary ?? false); + setEnableVisionLlm(connector.enable_vision_llm ?? false); setStartDate(undefined); setEndDate(undefined); }, @@ -1038,6 +1043,7 @@ export const useConnectorDialog = () => { data: { name: connectorName || editingConnector.name, enable_summary: enableSummary, + enable_vision_llm: enableVisionLlm, periodic_indexing_enabled: !editingConnector.is_indexable ? false : periodicEnabled, indexing_frequency_minutes: !editingConnector.is_indexable ? null : frequency, config: connectorConfig || editingConnector.config, @@ -1172,6 +1178,7 @@ export const useConnectorDialog = () => { periodicEnabled, frequencyMinutes, enableSummary, + enableVisionLlm, getFrequencyLabel, connectorConfig, connectorName, @@ -1332,6 +1339,7 @@ export const useConnectorDialog = () => { setPeriodicEnabled(false); setFrequencyMinutes("1440"); setEnableSummary(false); + setEnableVisionLlm(false); } } }, @@ -1368,6 +1376,7 @@ export const useConnectorDialog = () => { periodicEnabled, frequencyMinutes, enableSummary, + enableVisionLlm, searchSpaceId, allConnectors, viewingAccountsType, @@ -1382,6 +1391,7 @@ export const useConnectorDialog = () => { setPeriodicEnabled, setFrequencyMinutes, setEnableSummary, + setEnableVisionLlm, setConnectorName, // Handlers diff --git a/surfsense_web/components/sources/DocumentUploadTab.tsx b/surfsense_web/components/sources/DocumentUploadTab.tsx index 117d376ec..e7f4451b8 100644 --- a/surfsense_web/components/sources/DocumentUploadTab.tsx +++ b/surfsense_web/components/sources/DocumentUploadTab.tsx @@ -136,6 +136,7 @@ export function DocumentUploadTab({ const [uploadProgress, setUploadProgress] = useState(0); const [accordionValue, setAccordionValue] = useState(""); const [shouldSummarize, setShouldSummarize] = useState(false); + const [useVisionLlm, setUseVisionLlm] = useState(false); const [uploadDocumentMutation] = useAtom(uploadDocumentMutationAtom); const { mutate: uploadDocuments, isPending: isUploading } = uploadDocumentMutation; const fileInputRef = useRef(null); @@ -361,6 +362,7 @@ export function DocumentUploadTab({ relative_paths: batch.map((e) => e.relativePath), root_folder_id: rootFolderId, enable_summary: shouldSummarize, + use_vision_llm: useVisionLlm, } ); @@ -407,6 +409,7 @@ export function DocumentUploadTab({ files: rawFiles, search_space_id: Number(searchSpaceId), should_summarize: shouldSummarize, + use_vision_llm: useVisionLlm, }, { onSuccess: () => { @@ -696,6 +699,16 @@ export function DocumentUploadTab({ +
+
+

Enable Vision LLM

+

+ Describes images using AI vision (costly, slower) +

+
+ +
+