Make Vision LLM opt-in for uploads and connectors

This commit is contained in:
CREDO23 2026-04-10 16:45:51 +02:00
parent 0aefcbd504
commit a95bf58c8f
24 changed files with 276 additions and 20 deletions

View file

@ -0,0 +1,45 @@
"""121_add_enable_vision_llm_to_connectors
Revision ID: 121
Revises: 120
Create Date: 2026-04-09
Adds enable_vision_llm boolean column to search_source_connectors.
Defaults to False so vision LLM image processing is opt-in.
"""
from __future__ import annotations
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "121"
down_revision: str | None = "120"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
conn = op.get_bind()
existing_columns = [
col["name"] for col in sa.inspect(conn).get_columns("search_source_connectors")
]
if "enable_vision_llm" not in existing_columns:
op.add_column(
"search_source_connectors",
sa.Column(
"enable_vision_llm",
sa.Boolean(),
nullable=False,
server_default=sa.text("false"),
),
)
def downgrade() -> None:
op.drop_column("search_source_connectors", "enable_vision_llm")

View file

@ -44,6 +44,8 @@ async def _export_paper_content(
async def download_and_extract_content(
client: DropboxClient,
file: dict[str, Any],
*,
vision_llm=None,
) -> tuple[str | None, dict[str, Any], str | None]:
"""Download a Dropbox file and extract its content as markdown.
@ -91,7 +93,7 @@ async def download_and_extract_content(
from app.etl_pipeline.etl_document import EtlRequest
from app.etl_pipeline.etl_pipeline_service import EtlPipelineService
result = await EtlPipelineService().extract(
result = await EtlPipelineService(vision_llm=vision_llm).extract(
EtlRequest(file_path=temp_file_path, filename=file_name)
)
markdown = result.markdown_content

View file

@ -27,6 +27,8 @@ logger = logging.getLogger(__name__)
async def download_and_extract_content(
client: GoogleDriveClient,
file: dict[str, Any],
*,
vision_llm=None,
) -> tuple[str | None, dict[str, Any], str | None]:
"""Download a Google Drive file and extract its content as markdown.
@ -103,7 +105,9 @@ async def download_and_extract_content(
etl_filename = (
file_name + extension if is_google_workspace_file(mime_type) else file_name
)
markdown = await _parse_file_to_markdown(temp_file_path, etl_filename)
markdown = await _parse_file_to_markdown(
temp_file_path, etl_filename, vision_llm=vision_llm
)
return markdown, drive_metadata, None
except Exception as e:
@ -115,12 +119,14 @@ async def download_and_extract_content(
os.unlink(temp_file_path)
async def _parse_file_to_markdown(file_path: str, filename: str) -> str:
async def _parse_file_to_markdown(
file_path: str, filename: str, *, vision_llm=None
) -> str:
"""Parse a local file to markdown using the unified ETL pipeline."""
from app.etl_pipeline.etl_document import EtlRequest
from app.etl_pipeline.etl_pipeline_service import EtlPipelineService
result = await EtlPipelineService().extract(
result = await EtlPipelineService(vision_llm=vision_llm).extract(
EtlRequest(file_path=file_path, filename=filename)
)
return result.markdown_content

View file

@ -16,6 +16,8 @@ logger = logging.getLogger(__name__)
async def download_and_extract_content(
client: OneDriveClient,
file: dict[str, Any],
*,
vision_llm=None,
) -> tuple[str | None, dict[str, Any], str | None]:
"""Download a OneDrive file and extract its content as markdown.
@ -65,7 +67,9 @@ async def download_and_extract_content(
if error:
return None, metadata, error
markdown = await _parse_file_to_markdown(temp_file_path, file_name)
markdown = await _parse_file_to_markdown(
temp_file_path, file_name, vision_llm=vision_llm
)
return markdown, metadata, None
except Exception as e:
@ -77,12 +81,14 @@ async def download_and_extract_content(
os.unlink(temp_file_path)
async def _parse_file_to_markdown(file_path: str, filename: str) -> str:
async def _parse_file_to_markdown(
file_path: str, filename: str, *, vision_llm=None
) -> str:
"""Parse a local file to markdown using the unified ETL pipeline."""
from app.etl_pipeline.etl_document import EtlRequest
from app.etl_pipeline.etl_pipeline_service import EtlPipelineService
result = await EtlPipelineService().extract(
result = await EtlPipelineService(vision_llm=vision_llm).extract(
EtlRequest(file_path=file_path, filename=filename)
)
return result.markdown_content

View file

@ -1555,6 +1555,13 @@ class SearchSourceConnector(BaseModel, TimestampMixin):
Boolean, nullable=False, default=False, server_default="false"
)
# Vision LLM for image files - disabled by default to save cost/time.
# When enabled, images are described via a vision language model instead
# of falling back to the document parser.
enable_vision_llm = Column(
Boolean, nullable=False, default=False, server_default="false"
)
# Periodic indexing fields
periodic_indexing_enabled = Column(Boolean, nullable=False, default=False)
indexing_frequency_minutes = Column(Integer, nullable=True)

View file

@ -123,6 +123,7 @@ async def create_documents_file_upload(
files: list[UploadFile],
search_space_id: int = Form(...),
should_summarize: bool = Form(False),
use_vision_llm: bool = Form(False),
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
dispatcher: TaskDispatcher = Depends(get_task_dispatcher),
@ -272,6 +273,7 @@ async def create_documents_file_upload(
search_space_id=search_space_id,
user_id=str(user.id),
should_summarize=should_summarize,
use_vision_llm=use_vision_llm,
)
return {
@ -1490,6 +1492,7 @@ async def folder_upload(
relative_paths: str = Form(...),
root_folder_id: int | None = Form(None),
enable_summary: bool = Form(False),
use_vision_llm: bool = Form(False),
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
@ -1616,6 +1619,7 @@ async def folder_upload(
folder_name=folder_name,
root_folder_id=root_folder_id,
enable_summary=enable_summary,
use_vision_llm=use_vision_llm,
file_mappings=list(file_mappings),
)

View file

@ -17,6 +17,7 @@ class SearchSourceConnectorBase(BaseModel):
last_indexed_at: datetime | None = None
config: dict[str, Any]
enable_summary: bool = False
enable_vision_llm: bool = False
periodic_indexing_enabled: bool = False
indexing_frequency_minutes: int | None = None
next_scheduled_at: datetime | None = None
@ -67,6 +68,7 @@ class SearchSourceConnectorUpdate(BaseModel):
last_indexed_at: datetime | None = None
config: dict[str, Any] | None = None
enable_summary: bool | None = None
enable_vision_llm: bool | None = None
periodic_indexing_enabled: bool | None = None
indexing_frequency_minutes: int | None = None
next_scheduled_at: datetime | None = None

View file

@ -19,6 +19,7 @@ class TaskDispatcher(Protocol):
search_space_id: int,
user_id: str,
should_summarize: bool = False,
use_vision_llm: bool = False,
) -> None: ...
@ -34,6 +35,7 @@ class CeleryTaskDispatcher:
search_space_id: int,
user_id: str,
should_summarize: bool = False,
use_vision_llm: bool = False,
) -> None:
from app.tasks.celery_tasks.document_tasks import (
process_file_upload_with_document_task,
@ -46,6 +48,7 @@ class CeleryTaskDispatcher:
search_space_id=search_space_id,
user_id=user_id,
should_summarize=should_summarize,
use_vision_llm=use_vision_llm,
)

View file

@ -778,6 +778,7 @@ def process_file_upload_with_document_task(
search_space_id: int,
user_id: str,
should_summarize: bool = False,
use_vision_llm: bool = False,
):
"""
Celery task to process uploaded file with existing pending document.
@ -833,6 +834,7 @@ def process_file_upload_with_document_task(
search_space_id,
user_id,
should_summarize=should_summarize,
use_vision_llm=use_vision_llm,
)
)
logger.info(
@ -869,6 +871,7 @@ async def _process_file_with_document(
search_space_id: int,
user_id: str,
should_summarize: bool = False,
use_vision_llm: bool = False,
):
"""
Process file and update existing pending document status.
@ -971,6 +974,7 @@ async def _process_file_with_document(
log_entry=log_entry,
notification=notification,
should_summarize=should_summarize,
use_vision_llm=use_vision_llm,
)
# Update notification on success
@ -1428,6 +1432,7 @@ def index_uploaded_folder_files_task(
root_folder_id: int,
enable_summary: bool,
file_mappings: list[dict],
use_vision_llm: bool = False,
):
"""Celery task to index files uploaded from the desktop app."""
loop = asyncio.new_event_loop()
@ -1441,6 +1446,7 @@ def index_uploaded_folder_files_task(
root_folder_id=root_folder_id,
enable_summary=enable_summary,
file_mappings=file_mappings,
use_vision_llm=use_vision_llm,
)
)
finally:
@ -1454,6 +1460,7 @@ async def _index_uploaded_folder_files_async(
root_folder_id: int,
enable_summary: bool,
file_mappings: list[dict],
use_vision_llm: bool = False,
):
"""Run upload-based folder indexing with notification + heartbeat."""
file_count = len(file_mappings)
@ -1503,6 +1510,7 @@ async def _index_uploaded_folder_files_async(
enable_summary=enable_summary,
file_mappings=file_mappings,
on_heartbeat_callback=_heartbeat_progress,
use_vision_llm=use_vision_llm,
)
if notification:

View file

@ -164,6 +164,7 @@ async def _download_files_parallel(
enable_summary: bool,
max_concurrency: int = 3,
on_heartbeat: HeartbeatCallbackType | None = None,
vision_llm=None,
) -> tuple[list[ConnectorDocument], int]:
"""Download and ETL files in parallel. Returns (docs, failed_count)."""
results: list[ConnectorDocument] = []
@ -176,7 +177,7 @@ async def _download_files_parallel(
nonlocal last_heartbeat, completed_count
async with sem:
markdown, db_metadata, error = await download_and_extract_content(
dropbox_client, file
dropbox_client, file, vision_llm=vision_llm
)
if error or not markdown:
file_name = file.get("name", "Unknown")
@ -224,6 +225,7 @@ async def _download_and_index(
user_id: str,
enable_summary: bool,
on_heartbeat: HeartbeatCallbackType | None = None,
vision_llm=None,
) -> tuple[int, int]:
"""Parallel download then parallel indexing. Returns (batch_indexed, total_failed)."""
connector_docs, download_failed = await _download_files_parallel(
@ -234,6 +236,7 @@ async def _download_and_index(
user_id=user_id,
enable_summary=enable_summary,
on_heartbeat=on_heartbeat,
vision_llm=vision_llm,
)
batch_indexed = 0
@ -287,6 +290,7 @@ async def _index_with_delta_sync(
max_files: int,
on_heartbeat_callback: HeartbeatCallbackType | None = None,
enable_summary: bool = True,
vision_llm=None,
) -> tuple[int, int, int, str]:
"""Delta sync using Dropbox cursor-based change tracking.
@ -359,6 +363,7 @@ async def _index_with_delta_sync(
user_id=user_id,
enable_summary=enable_summary,
on_heartbeat=on_heartbeat_callback,
vision_llm=vision_llm,
)
indexed = renamed_count + batch_indexed
@ -384,6 +389,7 @@ async def _index_full_scan(
incremental_sync: bool = True,
on_heartbeat_callback: HeartbeatCallbackType | None = None,
enable_summary: bool = True,
vision_llm=None,
) -> tuple[int, int, int]:
"""Full scan indexing of a folder.
@ -469,6 +475,7 @@ async def _index_full_scan(
user_id=user_id,
enable_summary=enable_summary,
on_heartbeat=on_heartbeat_callback,
vision_llm=vision_llm,
)
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
@ -498,6 +505,7 @@ async def _index_selected_files(
enable_summary: bool,
incremental_sync: bool = True,
on_heartbeat: HeartbeatCallbackType | None = None,
vision_llm=None,
) -> tuple[int, int, int, list[str]]:
"""Index user-selected files using the parallel pipeline."""
page_limit_service = PageLimitService(session)
@ -557,6 +565,7 @@ async def _index_selected_files(
user_id=user_id,
enable_summary=enable_summary,
on_heartbeat=on_heartbeat,
vision_llm=vision_llm,
)
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
@ -621,6 +630,13 @@ async def index_dropbox_files(
return 0, 0, error_msg, 0
connector_enable_summary = getattr(connector, "enable_summary", True)
connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False)
vision_llm = None
if connector_enable_vision_llm:
from app.services.llm_service import get_vision_llm
vision_llm = await get_vision_llm(session, search_space_id)
dropbox_client = DropboxClient(session, connector_id)
indexing_options = items_dict.get("indexing_options", {})
@ -650,6 +666,7 @@ async def index_dropbox_files(
user_id=user_id,
enable_summary=connector_enable_summary,
incremental_sync=incremental_sync,
vision_llm=vision_llm,
)
total_indexed += indexed
total_skipped += skipped
@ -684,6 +701,7 @@ async def index_dropbox_files(
log_entry,
max_files,
enable_summary=connector_enable_summary,
vision_llm=vision_llm,
)
folder_cursors[folder_path] = new_cursor
total_unsupported += unsup
@ -703,6 +721,7 @@ async def index_dropbox_files(
include_subfolders,
incremental_sync=incremental_sync,
enable_summary=connector_enable_summary,
vision_llm=vision_llm,
)
total_unsupported += unsup

View file

@ -261,6 +261,7 @@ async def _download_files_parallel(
enable_summary: bool,
max_concurrency: int = 3,
on_heartbeat: HeartbeatCallbackType | None = None,
vision_llm=None,
) -> tuple[list[ConnectorDocument], int]:
"""Download and ETL files in parallel, returning ConnectorDocuments.
@ -276,7 +277,7 @@ async def _download_files_parallel(
nonlocal last_heartbeat, completed_count
async with sem:
markdown, drive_metadata, error = await download_and_extract_content(
drive_client, file
drive_client, file, vision_llm=vision_llm
)
if error or not markdown:
file_name = file.get("name", "Unknown")
@ -322,6 +323,7 @@ async def _process_single_file(
search_space_id: int,
user_id: str,
enable_summary: bool = True,
vision_llm=None,
) -> tuple[int, int, int]:
"""Download, extract, and index a single Drive file via the pipeline.
@ -343,7 +345,7 @@ async def _process_single_file(
await page_limit_service.check_page_limit(user_id, estimated_pages)
markdown, drive_metadata, error = await download_and_extract_content(
drive_client, file
drive_client, file, vision_llm=vision_llm
)
if error or not markdown:
logger.warning(f"ETL failed for {file_name}: {error}")
@ -433,6 +435,7 @@ async def _download_and_index(
user_id: str,
enable_summary: bool,
on_heartbeat: HeartbeatCallbackType | None = None,
vision_llm=None,
) -> tuple[int, int]:
"""Phase 2+3: parallel download then parallel indexing.
@ -446,6 +449,7 @@ async def _download_and_index(
user_id=user_id,
enable_summary=enable_summary,
on_heartbeat=on_heartbeat,
vision_llm=vision_llm,
)
batch_indexed = 0
@ -476,6 +480,7 @@ async def _index_selected_files(
user_id: str,
enable_summary: bool,
on_heartbeat: HeartbeatCallbackType | None = None,
vision_llm=None,
) -> tuple[int, int, int, list[str]]:
"""Index user-selected files using the parallel pipeline.
@ -540,6 +545,7 @@ async def _index_selected_files(
user_id=user_id,
enable_summary=enable_summary,
on_heartbeat=on_heartbeat,
vision_llm=vision_llm,
)
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
@ -573,6 +579,7 @@ async def _index_full_scan(
include_subfolders: bool = False,
on_heartbeat_callback: HeartbeatCallbackType | None = None,
enable_summary: bool = True,
vision_llm=None,
) -> tuple[int, int, int]:
"""Full scan indexing of a folder.
@ -703,6 +710,7 @@ async def _index_full_scan(
user_id=user_id,
enable_summary=enable_summary,
on_heartbeat=on_heartbeat_callback,
vision_llm=vision_llm,
)
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
@ -736,6 +744,7 @@ async def _index_with_delta_sync(
include_subfolders: bool = False,
on_heartbeat_callback: HeartbeatCallbackType | None = None,
enable_summary: bool = True,
vision_llm=None,
) -> tuple[int, int, int]:
"""Delta sync using change tracking.
@ -844,6 +853,7 @@ async def _index_with_delta_sync(
user_id=user_id,
enable_summary=enable_summary,
on_heartbeat=on_heartbeat_callback,
vision_llm=vision_llm,
)
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
@ -947,6 +957,11 @@ async def index_google_drive_files(
)
connector_enable_summary = getattr(connector, "enable_summary", True)
connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False)
vision_llm = None
if connector_enable_vision_llm:
from app.services.llm_service import get_vision_llm
vision_llm = await get_vision_llm(session, search_space_id)
drive_client = GoogleDriveClient(
session, connector_id, credentials=pre_built_credentials
)
@ -986,6 +1001,7 @@ async def index_google_drive_files(
include_subfolders,
on_heartbeat_callback,
connector_enable_summary,
vision_llm=vision_llm,
)
documents_unsupported += du
logger.info("Running reconciliation scan after delta sync")
@ -1004,6 +1020,7 @@ async def index_google_drive_files(
include_subfolders,
on_heartbeat_callback,
connector_enable_summary,
vision_llm=vision_llm,
)
documents_indexed += ri
documents_skipped += rs
@ -1029,6 +1046,7 @@ async def index_google_drive_files(
include_subfolders,
on_heartbeat_callback,
connector_enable_summary,
vision_llm=vision_llm,
)
if documents_indexed > 0 or can_use_delta:
@ -1146,6 +1164,11 @@ async def index_google_drive_single_file(
)
connector_enable_summary = getattr(connector, "enable_summary", True)
connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False)
vision_llm = None
if connector_enable_vision_llm:
from app.services.llm_service import get_vision_llm
vision_llm = await get_vision_llm(session, search_space_id)
drive_client = GoogleDriveClient(
session, connector_id, credentials=pre_built_credentials
)
@ -1168,6 +1191,7 @@ async def index_google_drive_single_file(
search_space_id,
user_id,
connector_enable_summary,
vision_llm=vision_llm,
)
await session.commit()
@ -1278,6 +1302,11 @@ async def index_google_drive_selected_files(
return 0, 0, [error_msg]
connector_enable_summary = getattr(connector, "enable_summary", True)
connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False)
vision_llm = None
if connector_enable_vision_llm:
from app.services.llm_service import get_vision_llm
vision_llm = await get_vision_llm(session, search_space_id)
drive_client = GoogleDriveClient(
session, connector_id, credentials=pre_built_credentials
)
@ -1291,6 +1320,7 @@ async def index_google_drive_selected_files(
user_id=user_id,
enable_summary=connector_enable_summary,
on_heartbeat=on_heartbeat_callback,
vision_llm=vision_llm,
)
if unsupported > 0:

View file

@ -153,7 +153,7 @@ def scan_folder(
return files
async def _read_file_content(file_path: str, filename: str) -> str:
async def _read_file_content(file_path: str, filename: str, *, vision_llm=None) -> str:
"""Read file content via the unified ETL pipeline.
All file types (plaintext, audio, direct-convert, document, image) are
@ -162,7 +162,7 @@ async def _read_file_content(file_path: str, filename: str) -> str:
from app.etl_pipeline.etl_document import EtlRequest
from app.etl_pipeline.etl_pipeline_service import EtlPipelineService
result = await EtlPipelineService().extract(
result = await EtlPipelineService(vision_llm=vision_llm).extract(
EtlRequest(file_path=file_path, filename=filename)
)
return result.markdown_content
@ -199,12 +199,14 @@ async def _compute_file_content_hash(
file_path: str,
filename: str,
search_space_id: int,
*,
vision_llm=None,
) -> tuple[str, str]:
"""Read a file (via ETL if needed) and compute its content hash.
Returns (content_text, content_hash).
"""
content = await _read_file_content(file_path, filename)
content = await _read_file_content(file_path, filename, vision_llm=vision_llm)
return content, _content_hash(content, search_space_id)
@ -1268,6 +1270,7 @@ async def index_uploaded_files(
enable_summary: bool,
file_mappings: list[dict],
on_heartbeat_callback: HeartbeatCallbackType | None = None,
use_vision_llm: bool = False,
) -> tuple[int, int, str | None]:
"""Index files uploaded from the desktop app via temp paths.
@ -1304,6 +1307,12 @@ async def index_uploaded_files(
pipeline = IndexingPipelineService(session)
llm = await get_user_long_context_llm(session, user_id, search_space_id)
vision_llm_instance = None
if use_vision_llm:
from app.services.llm_service import get_vision_llm
vision_llm_instance = await get_vision_llm(session, search_space_id)
indexed_count = 0
failed_count = 0
errors: list[str] = []
@ -1351,7 +1360,8 @@ async def index_uploaded_files(
try:
content, content_hash = await _compute_file_content_hash(
temp_path, filename, search_space_id
temp_path, filename, search_space_id,
vision_llm=vision_llm_instance,
)
except Exception as e:
logger.warning(f"Could not read {relative_path}: {e}")

View file

@ -171,6 +171,7 @@ async def _download_files_parallel(
enable_summary: bool,
max_concurrency: int = 3,
on_heartbeat: HeartbeatCallbackType | None = None,
vision_llm=None,
) -> tuple[list[ConnectorDocument], int]:
"""Download and ETL files in parallel. Returns (docs, failed_count)."""
results: list[ConnectorDocument] = []
@ -183,7 +184,7 @@ async def _download_files_parallel(
nonlocal last_heartbeat, completed_count
async with sem:
markdown, od_metadata, error = await download_and_extract_content(
onedrive_client, file
onedrive_client, file, vision_llm=vision_llm
)
if error or not markdown:
file_name = file.get("name", "Unknown")
@ -231,6 +232,7 @@ async def _download_and_index(
user_id: str,
enable_summary: bool,
on_heartbeat: HeartbeatCallbackType | None = None,
vision_llm=None,
) -> tuple[int, int]:
"""Parallel download then parallel indexing. Returns (batch_indexed, total_failed)."""
connector_docs, download_failed = await _download_files_parallel(
@ -241,6 +243,7 @@ async def _download_and_index(
user_id=user_id,
enable_summary=enable_summary,
on_heartbeat=on_heartbeat,
vision_llm=vision_llm,
)
batch_indexed = 0
@ -293,6 +296,7 @@ async def _index_selected_files(
user_id: str,
enable_summary: bool,
on_heartbeat: HeartbeatCallbackType | None = None,
vision_llm=None,
) -> tuple[int, int, int, list[str]]:
"""Index user-selected files using the parallel pipeline."""
page_limit_service = PageLimitService(session)
@ -343,6 +347,7 @@ async def _index_selected_files(
user_id=user_id,
enable_summary=enable_summary,
on_heartbeat=on_heartbeat,
vision_llm=vision_llm,
)
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
@ -375,6 +380,7 @@ async def _index_full_scan(
include_subfolders: bool = True,
on_heartbeat_callback: HeartbeatCallbackType | None = None,
enable_summary: bool = True,
vision_llm=None,
) -> tuple[int, int, int]:
"""Full scan indexing of a folder.
@ -450,6 +456,7 @@ async def _index_full_scan(
user_id=user_id,
enable_summary=enable_summary,
on_heartbeat=on_heartbeat_callback,
vision_llm=vision_llm,
)
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
@ -481,6 +488,7 @@ async def _index_with_delta_sync(
max_files: int,
on_heartbeat_callback: HeartbeatCallbackType | None = None,
enable_summary: bool = True,
vision_llm=None,
) -> tuple[int, int, int, str | None]:
"""Delta sync using OneDrive change tracking.
@ -573,6 +581,7 @@ async def _index_with_delta_sync(
user_id=user_id,
enable_summary=enable_summary,
on_heartbeat=on_heartbeat_callback,
vision_llm=vision_llm,
)
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
@ -643,6 +652,12 @@ async def index_onedrive_files(
return 0, 0, error_msg, 0
connector_enable_summary = getattr(connector, "enable_summary", True)
connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False)
vision_llm = None
if connector_enable_vision_llm:
from app.services.llm_service import get_vision_llm
vision_llm = await get_vision_llm(session, search_space_id)
onedrive_client = OneDriveClient(session, connector_id)
indexing_options = items_dict.get("indexing_options", {})
@ -666,6 +681,7 @@ async def index_onedrive_files(
search_space_id=search_space_id,
user_id=user_id,
enable_summary=connector_enable_summary,
vision_llm=vision_llm,
)
total_indexed += indexed
total_skipped += skipped
@ -695,6 +711,7 @@ async def index_onedrive_files(
log_entry,
max_files,
enable_summary=connector_enable_summary,
vision_llm=vision_llm,
)
total_indexed += indexed
total_skipped += skipped
@ -721,6 +738,7 @@ async def index_onedrive_files(
max_files,
include_subfolders,
enable_summary=connector_enable_summary,
vision_llm=vision_llm,
)
total_indexed += ri
total_skipped += rs
@ -740,6 +758,7 @@ async def index_onedrive_files(
max_files,
include_subfolders,
enable_summary=connector_enable_summary,
vision_llm=vision_llm,
)
total_indexed += indexed
total_skipped += skipped

View file

@ -46,6 +46,7 @@ class _ProcessingContext:
log_entry: Log
connector: dict | None = None
notification: Notification | None = None
use_vision_llm: bool = False
enable_summary: bool = field(init=False)
def __post_init__(self) -> None:
@ -134,7 +135,7 @@ async def _process_non_document_upload(ctx: _ProcessingContext) -> Document | No
)
vision_llm = None
if etl_classify(ctx.filename) == FileCategory.IMAGE:
if ctx.use_vision_llm and etl_classify(ctx.filename) == FileCategory.IMAGE:
from app.services.llm_service import get_vision_llm
vision_llm = await get_vision_llm(ctx.session, ctx.search_space_id)
@ -288,6 +289,7 @@ async def process_file_in_background(
log_entry: Log,
connector: dict | None = None,
notification: Notification | None = None,
use_vision_llm: bool = False,
) -> Document | None:
ctx = _ProcessingContext(
session=session,
@ -299,6 +301,7 @@ async def process_file_in_background(
log_entry=log_entry,
connector=connector,
notification=notification,
use_vision_llm=use_vision_llm,
)
try:
@ -349,6 +352,7 @@ async def _extract_file_content(
task_logger: TaskLoggingService,
log_entry: Log,
notification: Notification | None,
use_vision_llm: bool = False,
) -> tuple[str, str]:
"""
Extract markdown content from a file regardless of type.
@ -396,7 +400,7 @@ async def _extract_file_content(
await page_limit_service.check_page_limit(user_id, estimated_pages)
vision_llm = None
if category == FileCategory.IMAGE:
if use_vision_llm and category == FileCategory.IMAGE:
from app.services.llm_service import get_vision_llm
vision_llm = await get_vision_llm(session, search_space_id)
@ -435,6 +439,7 @@ async def process_file_in_background_with_document(
connector: dict | None = None,
notification: Notification | None = None,
should_summarize: bool = False,
use_vision_llm: bool = False,
) -> Document | None:
"""
Process file and update existing pending document (2-phase pattern).
@ -463,6 +468,7 @@ async def process_file_in_background_with_document(
task_logger,
log_entry,
notification,
use_vision_llm=use_vision_llm,
)
if not markdown_content:

View file

@ -69,6 +69,7 @@ class InlineTaskDispatcher:
search_space_id: int,
user_id: str,
should_summarize: bool = False,
use_vision_llm: bool = False,
) -> None:
from app.tasks.celery_tasks.document_tasks import (
_process_file_with_document,
@ -82,6 +83,7 @@ class InlineTaskDispatcher:
search_space_id,
user_id,
should_summarize=should_summarize,
use_vision_llm=use_vision_llm,
)

View file

@ -98,6 +98,7 @@ export const ConnectorIndicator = forwardRef<ConnectorIndicatorHandle, Connector
periodicEnabled,
frequencyMinutes,
enableSummary,
enableVisionLlm,
allConnectors,
viewingAccountsType,
viewingMCPList,
@ -109,6 +110,7 @@ export const ConnectorIndicator = forwardRef<ConnectorIndicatorHandle, Connector
setPeriodicEnabled,
setFrequencyMinutes,
setEnableSummary,
setEnableVisionLlm,
handleOpenChange,
handleTabChange,
handleScroll,
@ -279,6 +281,7 @@ export const ConnectorIndicator = forwardRef<ConnectorIndicatorHandle, Connector
periodicEnabled={periodicEnabled}
frequencyMinutes={frequencyMinutes}
enableSummary={enableSummary}
enableVisionLlm={enableVisionLlm}
isSaving={isSaving}
isDisconnecting={isDisconnecting}
isIndexing={indexingConnectorIds.has(editingConnector.id)}
@ -288,6 +291,7 @@ export const ConnectorIndicator = forwardRef<ConnectorIndicatorHandle, Connector
onPeriodicEnabledChange={setPeriodicEnabled}
onFrequencyChange={setFrequencyMinutes}
onEnableSummaryChange={setEnableSummary}
onEnableVisionLlmChange={setEnableVisionLlm}
onSave={() => {
startIndexing(editingConnector.id);
handleSaveConnector(() => refreshConnectors());
@ -336,6 +340,7 @@ export const ConnectorIndicator = forwardRef<ConnectorIndicatorHandle, Connector
periodicEnabled={periodicEnabled}
frequencyMinutes={frequencyMinutes}
enableSummary={enableSummary}
enableVisionLlm={enableVisionLlm}
isStartingIndexing={isStartingIndexing}
isFromOAuth={isFromOAuth}
onStartDateChange={setStartDate}
@ -343,6 +348,7 @@ export const ConnectorIndicator = forwardRef<ConnectorIndicatorHandle, Connector
onPeriodicEnabledChange={setPeriodicEnabled}
onFrequencyChange={setFrequencyMinutes}
onEnableSummaryChange={setEnableSummary}
onEnableVisionLlmChange={setEnableVisionLlm}
onConfigChange={setIndexingConnectorConfig}
onStartIndexing={() => {
if (indexingConfig.connectorId) {

View file

@ -0,0 +1,25 @@
"use client";
import type { FC } from "react";
import { Switch } from "@/components/ui/switch";
interface VisionLLMConfigProps {
enabled: boolean;
onEnabledChange: (enabled: boolean) => void;
}
export const VisionLLMConfig: FC<VisionLLMConfigProps> = ({ enabled, onEnabledChange }) => {
return (
<div className="rounded-xl bg-slate-400/5 dark:bg-white/5 p-3 sm:p-6">
<div className="flex items-center justify-between">
<div className="space-y-1">
<h3 className="font-medium text-sm sm:text-base">Enable Vision LLM</h3>
<p className="text-xs sm:text-sm text-muted-foreground">
Describes images using AI vision (costly, slower)
</p>
</div>
<Switch checked={enabled} onCheckedChange={onEnabledChange} />
</div>
</div>
);
};

View file

@ -15,6 +15,7 @@ import { cn } from "@/lib/utils";
import { DateRangeSelector } from "../../components/date-range-selector";
import { PeriodicSyncConfig } from "../../components/periodic-sync-config";
import { SummaryConfig } from "../../components/summary-config";
import { VisionLLMConfig } from "../../components/vision-llm-config";
import { getConnectorDisplayName } from "../../tabs/all-connectors-tab";
import { getConnectorConfigComponent } from "../index";
@ -38,6 +39,7 @@ interface ConnectorEditViewProps {
periodicEnabled: boolean;
frequencyMinutes: string;
enableSummary: boolean;
enableVisionLlm: boolean;
isSaving: boolean;
isDisconnecting: boolean;
isIndexing?: boolean;
@ -47,6 +49,7 @@ interface ConnectorEditViewProps {
onPeriodicEnabledChange: (enabled: boolean) => void;
onFrequencyChange: (frequency: string) => void;
onEnableSummaryChange: (enabled: boolean) => void;
onEnableVisionLlmChange: (enabled: boolean) => void;
onSave: () => void;
onDisconnect: () => void;
onBack: () => void;
@ -62,6 +65,7 @@ export const ConnectorEditView: FC<ConnectorEditViewProps> = ({
periodicEnabled,
frequencyMinutes,
enableSummary,
enableVisionLlm,
isSaving,
isDisconnecting,
isIndexing = false,
@ -71,6 +75,7 @@ export const ConnectorEditView: FC<ConnectorEditViewProps> = ({
onPeriodicEnabledChange,
onFrequencyChange,
onEnableSummaryChange,
onEnableVisionLlmChange,
onSave,
onDisconnect,
onBack,
@ -272,6 +277,14 @@ export const ConnectorEditView: FC<ConnectorEditViewProps> = ({
{/* AI Summary toggle */}
<SummaryConfig enabled={enableSummary} onEnabledChange={onEnableSummaryChange} />
{/* Vision LLM toggle - only for file-based connectors */}
{(connector.connector_type === "GOOGLE_DRIVE_CONNECTOR" ||
connector.connector_type === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" ||
connector.connector_type === "DROPBOX_CONNECTOR" ||
connector.connector_type === "ONEDRIVE_CONNECTOR") && (
<VisionLLMConfig enabled={enableVisionLlm} onEnabledChange={onEnableVisionLlmChange} />
)}
{/* Date range selector - not shown for file-based connectors (Drive, Dropbox, OneDrive), Webcrawler, GitHub, or Local Folder */}
{connector.connector_type !== "GOOGLE_DRIVE_CONNECTOR" &&
connector.connector_type !== "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" &&

View file

@ -10,6 +10,7 @@ import { cn } from "@/lib/utils";
import { DateRangeSelector } from "../../components/date-range-selector";
import { PeriodicSyncConfig } from "../../components/periodic-sync-config";
import { SummaryConfig } from "../../components/summary-config";
import { VisionLLMConfig } from "../../components/vision-llm-config";
import type { IndexingConfigState } from "../../constants/connector-constants";
import { getConnectorDisplayName } from "../../tabs/all-connectors-tab";
import { getConnectorConfigComponent } from "../index";
@ -22,6 +23,7 @@ interface IndexingConfigurationViewProps {
periodicEnabled: boolean;
frequencyMinutes: string;
enableSummary: boolean;
enableVisionLlm: boolean;
isStartingIndexing: boolean;
isFromOAuth?: boolean;
onStartDateChange: (date: Date | undefined) => void;
@ -29,6 +31,7 @@ interface IndexingConfigurationViewProps {
onPeriodicEnabledChange: (enabled: boolean) => void;
onFrequencyChange: (frequency: string) => void;
onEnableSummaryChange: (enabled: boolean) => void;
onEnableVisionLlmChange: (enabled: boolean) => void;
onConfigChange?: (config: Record<string, unknown>) => void;
onStartIndexing: () => void;
onSkip: () => void;
@ -42,6 +45,7 @@ export const IndexingConfigurationView: FC<IndexingConfigurationViewProps> = ({
periodicEnabled,
frequencyMinutes,
enableSummary,
enableVisionLlm,
isStartingIndexing,
isFromOAuth = false,
onStartDateChange,
@ -49,6 +53,7 @@ export const IndexingConfigurationView: FC<IndexingConfigurationViewProps> = ({
onPeriodicEnabledChange,
onFrequencyChange,
onEnableSummaryChange,
onEnableVisionLlmChange,
onConfigChange,
onStartIndexing,
onSkip,
@ -158,6 +163,14 @@ export const IndexingConfigurationView: FC<IndexingConfigurationViewProps> = ({
{/* AI Summary toggle */}
<SummaryConfig enabled={enableSummary} onEnabledChange={onEnableSummaryChange} />
{/* Vision LLM toggle - only for file-based connectors */}
{(config.connectorType === "GOOGLE_DRIVE_CONNECTOR" ||
config.connectorType === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" ||
config.connectorType === "DROPBOX_CONNECTOR" ||
config.connectorType === "ONEDRIVE_CONNECTOR") && (
<VisionLLMConfig enabled={enableVisionLlm} onEnabledChange={onEnableVisionLlmChange} />
)}
{/* Date range selector - not shown for file-based connectors (Drive, Dropbox, OneDrive), Webcrawler, GitHub, or Local Folder */}
{config.connectorType !== "GOOGLE_DRIVE_CONNECTOR" &&
config.connectorType !== "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" &&

View file

@ -80,6 +80,7 @@ export const useConnectorDialog = () => {
const [periodicEnabled, setPeriodicEnabled] = useState(false);
const [frequencyMinutes, setFrequencyMinutes] = useState("1440");
const [enableSummary, setEnableSummary] = useState(false);
const [enableVisionLlm, setEnableVisionLlm] = useState(false);
// Edit mode state
const [editingConnector, setEditingConnector] = useState<SearchSourceConnector | null>(null);
@ -621,6 +622,7 @@ export const useConnectorDialog = () => {
setPeriodicEnabled(false);
setFrequencyMinutes("1440");
setEnableSummary(connector.enable_summary ?? false);
setEnableVisionLlm(connector.enable_vision_llm ?? false);
setStartDate(undefined);
setEndDate(undefined);
@ -763,12 +765,13 @@ export const useConnectorDialog = () => {
const endDateStr = endDate ? format(endDate, "yyyy-MM-dd") : undefined;
// Update connector with summary, periodic sync settings, and config changes
if (enableSummary || periodicEnabled || indexingConnectorConfig) {
const frequency = periodicEnabled ? parseInt(frequencyMinutes, 10) : undefined;
if (enableSummary || enableVisionLlm || periodicEnabled || indexingConnectorConfig) {
const frequency = periodicEnabled ? parseInt(frequencyMinutes, 10) : undefined;
await updateConnector({
id: indexingConfig.connectorId,
data: {
enable_summary: enableSummary,
enable_vision_llm: enableVisionLlm,
...(periodicEnabled && {
periodic_indexing_enabled: true,
indexing_frequency_minutes: frequency,
@ -896,6 +899,7 @@ export const useConnectorDialog = () => {
periodicEnabled,
frequencyMinutes,
enableSummary,
enableVisionLlm,
indexingConnectorConfig,
setIsOpen,
]
@ -960,6 +964,7 @@ export const useConnectorDialog = () => {
setPeriodicEnabled(!connector.is_indexable ? false : connector.periodic_indexing_enabled);
setFrequencyMinutes(connector.indexing_frequency_minutes?.toString() || "1440");
setEnableSummary(connector.enable_summary ?? false);
setEnableVisionLlm(connector.enable_vision_llm ?? false);
setStartDate(undefined);
setEndDate(undefined);
},
@ -1038,6 +1043,7 @@ export const useConnectorDialog = () => {
data: {
name: connectorName || editingConnector.name,
enable_summary: enableSummary,
enable_vision_llm: enableVisionLlm,
periodic_indexing_enabled: !editingConnector.is_indexable ? false : periodicEnabled,
indexing_frequency_minutes: !editingConnector.is_indexable ? null : frequency,
config: connectorConfig || editingConnector.config,
@ -1172,6 +1178,7 @@ export const useConnectorDialog = () => {
periodicEnabled,
frequencyMinutes,
enableSummary,
enableVisionLlm,
getFrequencyLabel,
connectorConfig,
connectorName,
@ -1332,6 +1339,7 @@ export const useConnectorDialog = () => {
setPeriodicEnabled(false);
setFrequencyMinutes("1440");
setEnableSummary(false);
setEnableVisionLlm(false);
}
}
},
@ -1368,6 +1376,7 @@ export const useConnectorDialog = () => {
periodicEnabled,
frequencyMinutes,
enableSummary,
enableVisionLlm,
searchSpaceId,
allConnectors,
viewingAccountsType,
@ -1382,6 +1391,7 @@ export const useConnectorDialog = () => {
setPeriodicEnabled,
setFrequencyMinutes,
setEnableSummary,
setEnableVisionLlm,
setConnectorName,
// Handlers

View file

@ -136,6 +136,7 @@ export function DocumentUploadTab({
const [uploadProgress, setUploadProgress] = useState(0);
const [accordionValue, setAccordionValue] = useState<string>("");
const [shouldSummarize, setShouldSummarize] = useState(false);
const [useVisionLlm, setUseVisionLlm] = useState(false);
const [uploadDocumentMutation] = useAtom(uploadDocumentMutationAtom);
const { mutate: uploadDocuments, isPending: isUploading } = uploadDocumentMutation;
const fileInputRef = useRef<HTMLInputElement>(null);
@ -361,6 +362,7 @@ export function DocumentUploadTab({
relative_paths: batch.map((e) => e.relativePath),
root_folder_id: rootFolderId,
enable_summary: shouldSummarize,
use_vision_llm: useVisionLlm,
}
);
@ -407,6 +409,7 @@ export function DocumentUploadTab({
files: rawFiles,
search_space_id: Number(searchSpaceId),
should_summarize: shouldSummarize,
use_vision_llm: useVisionLlm,
},
{
onSuccess: () => {
@ -696,6 +699,16 @@ export function DocumentUploadTab({
<Switch checked={shouldSummarize} onCheckedChange={setShouldSummarize} />
</div>
<div className={toggleRowClass}>
<div className="space-y-0.5">
<p className="font-medium text-sm">Enable Vision LLM</p>
<p className="text-xs text-muted-foreground">
Describes images using AI vision (costly, slower)
</p>
</div>
<Switch checked={useVisionLlm} onCheckedChange={setUseVisionLlm} />
</div>
<Button
className="w-full"
onClick={handleUpload}

View file

@ -44,6 +44,7 @@ export const searchSourceConnector = z.object({
last_indexed_at: z.string().nullable(),
config: z.record(z.string(), z.any()),
enable_summary: z.boolean().default(false),
enable_vision_llm: z.boolean().default(false),
periodic_indexing_enabled: z.boolean(),
indexing_frequency_minutes: z.number().nullable(),
next_scheduled_at: z.string().nullable(),
@ -98,6 +99,7 @@ export const createConnectorRequest = z.object({
last_indexed_at: true,
config: true,
enable_summary: true,
enable_vision_llm: true,
periodic_indexing_enabled: true,
indexing_frequency_minutes: true,
next_scheduled_at: true,
@ -123,6 +125,7 @@ export const updateConnectorRequest = z.object({
last_indexed_at: true,
config: true,
enable_summary: true,
enable_vision_llm: true,
periodic_indexing_enabled: true,
indexing_frequency_minutes: true,
next_scheduled_at: true,

View file

@ -148,6 +148,7 @@ export const uploadDocumentRequest = z.object({
files: z.array(z.instanceof(File)),
search_space_id: z.number(),
should_summarize: z.boolean().default(false),
use_vision_llm: z.boolean().default(false),
});
export const uploadDocumentResponse = z.object({

View file

@ -127,7 +127,7 @@ class DocumentsApiService {
throw new ValidationError(`Invalid request: ${errorMessage}`);
}
const { files, search_space_id, should_summarize } = parsedRequest.data;
const { files, search_space_id, should_summarize, use_vision_llm } = parsedRequest.data;
const UPLOAD_BATCH_SIZE = 5;
const batches: File[][] = [];
@ -146,6 +146,7 @@ class DocumentsApiService {
for (const file of batch) formData.append("files", file);
formData.append("search_space_id", String(search_space_id));
formData.append("should_summarize", String(should_summarize));
formData.append("use_vision_llm", String(use_vision_llm));
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), 120_000);
@ -442,6 +443,7 @@ class DocumentsApiService {
relative_paths: string[];
root_folder_id?: number | null;
enable_summary?: boolean;
use_vision_llm?: boolean;
},
signal?: AbortSignal
): Promise<{ message: string; status: string; root_folder_id: number; file_count: number }> => {
@ -456,6 +458,7 @@ class DocumentsApiService {
formData.append("root_folder_id", String(metadata.root_folder_id));
}
formData.append("enable_summary", String(metadata.enable_summary ?? false));
formData.append("use_vision_llm", String(metadata.use_vision_llm ?? false));
const totalSize = files.reduce((acc, f) => acc + f.size, 0);
const timeoutMs = Math.min(Math.max((totalSize / (1024 * 1024)) * 5000, 30_000), 600_000);