mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-25 00:36:31 +02:00
Make Vision LLM opt-in for uploads and connectors
This commit is contained in:
parent
0aefcbd504
commit
a95bf58c8f
24 changed files with 276 additions and 20 deletions
|
|
@ -0,0 +1,45 @@
|
|||
"""121_add_enable_vision_llm_to_connectors
|
||||
|
||||
Revision ID: 121
|
||||
Revises: 120
|
||||
Create Date: 2026-04-09
|
||||
|
||||
Adds enable_vision_llm boolean column to search_source_connectors.
|
||||
Defaults to False so vision LLM image processing is opt-in.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "121"
|
||||
down_revision: str | None = "120"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
conn = op.get_bind()
|
||||
existing_columns = [
|
||||
col["name"] for col in sa.inspect(conn).get_columns("search_source_connectors")
|
||||
]
|
||||
|
||||
if "enable_vision_llm" not in existing_columns:
|
||||
op.add_column(
|
||||
"search_source_connectors",
|
||||
sa.Column(
|
||||
"enable_vision_llm",
|
||||
sa.Boolean(),
|
||||
nullable=False,
|
||||
server_default=sa.text("false"),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("search_source_connectors", "enable_vision_llm")
|
||||
|
|
@ -44,6 +44,8 @@ async def _export_paper_content(
|
|||
async def download_and_extract_content(
|
||||
client: DropboxClient,
|
||||
file: dict[str, Any],
|
||||
*,
|
||||
vision_llm=None,
|
||||
) -> tuple[str | None, dict[str, Any], str | None]:
|
||||
"""Download a Dropbox file and extract its content as markdown.
|
||||
|
||||
|
|
@ -91,7 +93,7 @@ async def download_and_extract_content(
|
|||
from app.etl_pipeline.etl_document import EtlRequest
|
||||
from app.etl_pipeline.etl_pipeline_service import EtlPipelineService
|
||||
|
||||
result = await EtlPipelineService().extract(
|
||||
result = await EtlPipelineService(vision_llm=vision_llm).extract(
|
||||
EtlRequest(file_path=temp_file_path, filename=file_name)
|
||||
)
|
||||
markdown = result.markdown_content
|
||||
|
|
|
|||
|
|
@ -27,6 +27,8 @@ logger = logging.getLogger(__name__)
|
|||
async def download_and_extract_content(
|
||||
client: GoogleDriveClient,
|
||||
file: dict[str, Any],
|
||||
*,
|
||||
vision_llm=None,
|
||||
) -> tuple[str | None, dict[str, Any], str | None]:
|
||||
"""Download a Google Drive file and extract its content as markdown.
|
||||
|
||||
|
|
@ -103,7 +105,9 @@ async def download_and_extract_content(
|
|||
etl_filename = (
|
||||
file_name + extension if is_google_workspace_file(mime_type) else file_name
|
||||
)
|
||||
markdown = await _parse_file_to_markdown(temp_file_path, etl_filename)
|
||||
markdown = await _parse_file_to_markdown(
|
||||
temp_file_path, etl_filename, vision_llm=vision_llm
|
||||
)
|
||||
return markdown, drive_metadata, None
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -115,12 +119,14 @@ async def download_and_extract_content(
|
|||
os.unlink(temp_file_path)
|
||||
|
||||
|
||||
async def _parse_file_to_markdown(file_path: str, filename: str) -> str:
|
||||
async def _parse_file_to_markdown(
|
||||
file_path: str, filename: str, *, vision_llm=None
|
||||
) -> str:
|
||||
"""Parse a local file to markdown using the unified ETL pipeline."""
|
||||
from app.etl_pipeline.etl_document import EtlRequest
|
||||
from app.etl_pipeline.etl_pipeline_service import EtlPipelineService
|
||||
|
||||
result = await EtlPipelineService().extract(
|
||||
result = await EtlPipelineService(vision_llm=vision_llm).extract(
|
||||
EtlRequest(file_path=file_path, filename=filename)
|
||||
)
|
||||
return result.markdown_content
|
||||
|
|
|
|||
|
|
@ -16,6 +16,8 @@ logger = logging.getLogger(__name__)
|
|||
async def download_and_extract_content(
|
||||
client: OneDriveClient,
|
||||
file: dict[str, Any],
|
||||
*,
|
||||
vision_llm=None,
|
||||
) -> tuple[str | None, dict[str, Any], str | None]:
|
||||
"""Download a OneDrive file and extract its content as markdown.
|
||||
|
||||
|
|
@ -65,7 +67,9 @@ async def download_and_extract_content(
|
|||
if error:
|
||||
return None, metadata, error
|
||||
|
||||
markdown = await _parse_file_to_markdown(temp_file_path, file_name)
|
||||
markdown = await _parse_file_to_markdown(
|
||||
temp_file_path, file_name, vision_llm=vision_llm
|
||||
)
|
||||
return markdown, metadata, None
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -77,12 +81,14 @@ async def download_and_extract_content(
|
|||
os.unlink(temp_file_path)
|
||||
|
||||
|
||||
async def _parse_file_to_markdown(file_path: str, filename: str) -> str:
|
||||
async def _parse_file_to_markdown(
|
||||
file_path: str, filename: str, *, vision_llm=None
|
||||
) -> str:
|
||||
"""Parse a local file to markdown using the unified ETL pipeline."""
|
||||
from app.etl_pipeline.etl_document import EtlRequest
|
||||
from app.etl_pipeline.etl_pipeline_service import EtlPipelineService
|
||||
|
||||
result = await EtlPipelineService().extract(
|
||||
result = await EtlPipelineService(vision_llm=vision_llm).extract(
|
||||
EtlRequest(file_path=file_path, filename=filename)
|
||||
)
|
||||
return result.markdown_content
|
||||
|
|
|
|||
|
|
@ -1555,6 +1555,13 @@ class SearchSourceConnector(BaseModel, TimestampMixin):
|
|||
Boolean, nullable=False, default=False, server_default="false"
|
||||
)
|
||||
|
||||
# Vision LLM for image files - disabled by default to save cost/time.
|
||||
# When enabled, images are described via a vision language model instead
|
||||
# of falling back to the document parser.
|
||||
enable_vision_llm = Column(
|
||||
Boolean, nullable=False, default=False, server_default="false"
|
||||
)
|
||||
|
||||
# Periodic indexing fields
|
||||
periodic_indexing_enabled = Column(Boolean, nullable=False, default=False)
|
||||
indexing_frequency_minutes = Column(Integer, nullable=True)
|
||||
|
|
|
|||
|
|
@ -123,6 +123,7 @@ async def create_documents_file_upload(
|
|||
files: list[UploadFile],
|
||||
search_space_id: int = Form(...),
|
||||
should_summarize: bool = Form(False),
|
||||
use_vision_llm: bool = Form(False),
|
||||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
dispatcher: TaskDispatcher = Depends(get_task_dispatcher),
|
||||
|
|
@ -272,6 +273,7 @@ async def create_documents_file_upload(
|
|||
search_space_id=search_space_id,
|
||||
user_id=str(user.id),
|
||||
should_summarize=should_summarize,
|
||||
use_vision_llm=use_vision_llm,
|
||||
)
|
||||
|
||||
return {
|
||||
|
|
@ -1490,6 +1492,7 @@ async def folder_upload(
|
|||
relative_paths: str = Form(...),
|
||||
root_folder_id: int | None = Form(None),
|
||||
enable_summary: bool = Form(False),
|
||||
use_vision_llm: bool = Form(False),
|
||||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
|
|
@ -1616,6 +1619,7 @@ async def folder_upload(
|
|||
folder_name=folder_name,
|
||||
root_folder_id=root_folder_id,
|
||||
enable_summary=enable_summary,
|
||||
use_vision_llm=use_vision_llm,
|
||||
file_mappings=list(file_mappings),
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ class SearchSourceConnectorBase(BaseModel):
|
|||
last_indexed_at: datetime | None = None
|
||||
config: dict[str, Any]
|
||||
enable_summary: bool = False
|
||||
enable_vision_llm: bool = False
|
||||
periodic_indexing_enabled: bool = False
|
||||
indexing_frequency_minutes: int | None = None
|
||||
next_scheduled_at: datetime | None = None
|
||||
|
|
@ -67,6 +68,7 @@ class SearchSourceConnectorUpdate(BaseModel):
|
|||
last_indexed_at: datetime | None = None
|
||||
config: dict[str, Any] | None = None
|
||||
enable_summary: bool | None = None
|
||||
enable_vision_llm: bool | None = None
|
||||
periodic_indexing_enabled: bool | None = None
|
||||
indexing_frequency_minutes: int | None = None
|
||||
next_scheduled_at: datetime | None = None
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ class TaskDispatcher(Protocol):
|
|||
search_space_id: int,
|
||||
user_id: str,
|
||||
should_summarize: bool = False,
|
||||
use_vision_llm: bool = False,
|
||||
) -> None: ...
|
||||
|
||||
|
||||
|
|
@ -34,6 +35,7 @@ class CeleryTaskDispatcher:
|
|||
search_space_id: int,
|
||||
user_id: str,
|
||||
should_summarize: bool = False,
|
||||
use_vision_llm: bool = False,
|
||||
) -> None:
|
||||
from app.tasks.celery_tasks.document_tasks import (
|
||||
process_file_upload_with_document_task,
|
||||
|
|
@ -46,6 +48,7 @@ class CeleryTaskDispatcher:
|
|||
search_space_id=search_space_id,
|
||||
user_id=user_id,
|
||||
should_summarize=should_summarize,
|
||||
use_vision_llm=use_vision_llm,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -778,6 +778,7 @@ def process_file_upload_with_document_task(
|
|||
search_space_id: int,
|
||||
user_id: str,
|
||||
should_summarize: bool = False,
|
||||
use_vision_llm: bool = False,
|
||||
):
|
||||
"""
|
||||
Celery task to process uploaded file with existing pending document.
|
||||
|
|
@ -833,6 +834,7 @@ def process_file_upload_with_document_task(
|
|||
search_space_id,
|
||||
user_id,
|
||||
should_summarize=should_summarize,
|
||||
use_vision_llm=use_vision_llm,
|
||||
)
|
||||
)
|
||||
logger.info(
|
||||
|
|
@ -869,6 +871,7 @@ async def _process_file_with_document(
|
|||
search_space_id: int,
|
||||
user_id: str,
|
||||
should_summarize: bool = False,
|
||||
use_vision_llm: bool = False,
|
||||
):
|
||||
"""
|
||||
Process file and update existing pending document status.
|
||||
|
|
@ -971,6 +974,7 @@ async def _process_file_with_document(
|
|||
log_entry=log_entry,
|
||||
notification=notification,
|
||||
should_summarize=should_summarize,
|
||||
use_vision_llm=use_vision_llm,
|
||||
)
|
||||
|
||||
# Update notification on success
|
||||
|
|
@ -1428,6 +1432,7 @@ def index_uploaded_folder_files_task(
|
|||
root_folder_id: int,
|
||||
enable_summary: bool,
|
||||
file_mappings: list[dict],
|
||||
use_vision_llm: bool = False,
|
||||
):
|
||||
"""Celery task to index files uploaded from the desktop app."""
|
||||
loop = asyncio.new_event_loop()
|
||||
|
|
@ -1441,6 +1446,7 @@ def index_uploaded_folder_files_task(
|
|||
root_folder_id=root_folder_id,
|
||||
enable_summary=enable_summary,
|
||||
file_mappings=file_mappings,
|
||||
use_vision_llm=use_vision_llm,
|
||||
)
|
||||
)
|
||||
finally:
|
||||
|
|
@ -1454,6 +1460,7 @@ async def _index_uploaded_folder_files_async(
|
|||
root_folder_id: int,
|
||||
enable_summary: bool,
|
||||
file_mappings: list[dict],
|
||||
use_vision_llm: bool = False,
|
||||
):
|
||||
"""Run upload-based folder indexing with notification + heartbeat."""
|
||||
file_count = len(file_mappings)
|
||||
|
|
@ -1503,6 +1510,7 @@ async def _index_uploaded_folder_files_async(
|
|||
enable_summary=enable_summary,
|
||||
file_mappings=file_mappings,
|
||||
on_heartbeat_callback=_heartbeat_progress,
|
||||
use_vision_llm=use_vision_llm,
|
||||
)
|
||||
|
||||
if notification:
|
||||
|
|
|
|||
|
|
@ -164,6 +164,7 @@ async def _download_files_parallel(
|
|||
enable_summary: bool,
|
||||
max_concurrency: int = 3,
|
||||
on_heartbeat: HeartbeatCallbackType | None = None,
|
||||
vision_llm=None,
|
||||
) -> tuple[list[ConnectorDocument], int]:
|
||||
"""Download and ETL files in parallel. Returns (docs, failed_count)."""
|
||||
results: list[ConnectorDocument] = []
|
||||
|
|
@ -176,7 +177,7 @@ async def _download_files_parallel(
|
|||
nonlocal last_heartbeat, completed_count
|
||||
async with sem:
|
||||
markdown, db_metadata, error = await download_and_extract_content(
|
||||
dropbox_client, file
|
||||
dropbox_client, file, vision_llm=vision_llm
|
||||
)
|
||||
if error or not markdown:
|
||||
file_name = file.get("name", "Unknown")
|
||||
|
|
@ -224,6 +225,7 @@ async def _download_and_index(
|
|||
user_id: str,
|
||||
enable_summary: bool,
|
||||
on_heartbeat: HeartbeatCallbackType | None = None,
|
||||
vision_llm=None,
|
||||
) -> tuple[int, int]:
|
||||
"""Parallel download then parallel indexing. Returns (batch_indexed, total_failed)."""
|
||||
connector_docs, download_failed = await _download_files_parallel(
|
||||
|
|
@ -234,6 +236,7 @@ async def _download_and_index(
|
|||
user_id=user_id,
|
||||
enable_summary=enable_summary,
|
||||
on_heartbeat=on_heartbeat,
|
||||
vision_llm=vision_llm,
|
||||
)
|
||||
|
||||
batch_indexed = 0
|
||||
|
|
@ -287,6 +290,7 @@ async def _index_with_delta_sync(
|
|||
max_files: int,
|
||||
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||
enable_summary: bool = True,
|
||||
vision_llm=None,
|
||||
) -> tuple[int, int, int, str]:
|
||||
"""Delta sync using Dropbox cursor-based change tracking.
|
||||
|
||||
|
|
@ -359,6 +363,7 @@ async def _index_with_delta_sync(
|
|||
user_id=user_id,
|
||||
enable_summary=enable_summary,
|
||||
on_heartbeat=on_heartbeat_callback,
|
||||
vision_llm=vision_llm,
|
||||
)
|
||||
|
||||
indexed = renamed_count + batch_indexed
|
||||
|
|
@ -384,6 +389,7 @@ async def _index_full_scan(
|
|||
incremental_sync: bool = True,
|
||||
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||
enable_summary: bool = True,
|
||||
vision_llm=None,
|
||||
) -> tuple[int, int, int]:
|
||||
"""Full scan indexing of a folder.
|
||||
|
||||
|
|
@ -469,6 +475,7 @@ async def _index_full_scan(
|
|||
user_id=user_id,
|
||||
enable_summary=enable_summary,
|
||||
on_heartbeat=on_heartbeat_callback,
|
||||
vision_llm=vision_llm,
|
||||
)
|
||||
|
||||
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
|
||||
|
|
@ -498,6 +505,7 @@ async def _index_selected_files(
|
|||
enable_summary: bool,
|
||||
incremental_sync: bool = True,
|
||||
on_heartbeat: HeartbeatCallbackType | None = None,
|
||||
vision_llm=None,
|
||||
) -> tuple[int, int, int, list[str]]:
|
||||
"""Index user-selected files using the parallel pipeline."""
|
||||
page_limit_service = PageLimitService(session)
|
||||
|
|
@ -557,6 +565,7 @@ async def _index_selected_files(
|
|||
user_id=user_id,
|
||||
enable_summary=enable_summary,
|
||||
on_heartbeat=on_heartbeat,
|
||||
vision_llm=vision_llm,
|
||||
)
|
||||
|
||||
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
|
||||
|
|
@ -621,6 +630,13 @@ async def index_dropbox_files(
|
|||
return 0, 0, error_msg, 0
|
||||
|
||||
connector_enable_summary = getattr(connector, "enable_summary", True)
|
||||
connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False)
|
||||
vision_llm = None
|
||||
if connector_enable_vision_llm:
|
||||
from app.services.llm_service import get_vision_llm
|
||||
|
||||
vision_llm = await get_vision_llm(session, search_space_id)
|
||||
|
||||
dropbox_client = DropboxClient(session, connector_id)
|
||||
|
||||
indexing_options = items_dict.get("indexing_options", {})
|
||||
|
|
@ -650,6 +666,7 @@ async def index_dropbox_files(
|
|||
user_id=user_id,
|
||||
enable_summary=connector_enable_summary,
|
||||
incremental_sync=incremental_sync,
|
||||
vision_llm=vision_llm,
|
||||
)
|
||||
total_indexed += indexed
|
||||
total_skipped += skipped
|
||||
|
|
@ -684,6 +701,7 @@ async def index_dropbox_files(
|
|||
log_entry,
|
||||
max_files,
|
||||
enable_summary=connector_enable_summary,
|
||||
vision_llm=vision_llm,
|
||||
)
|
||||
folder_cursors[folder_path] = new_cursor
|
||||
total_unsupported += unsup
|
||||
|
|
@ -703,6 +721,7 @@ async def index_dropbox_files(
|
|||
include_subfolders,
|
||||
incremental_sync=incremental_sync,
|
||||
enable_summary=connector_enable_summary,
|
||||
vision_llm=vision_llm,
|
||||
)
|
||||
total_unsupported += unsup
|
||||
|
||||
|
|
|
|||
|
|
@ -261,6 +261,7 @@ async def _download_files_parallel(
|
|||
enable_summary: bool,
|
||||
max_concurrency: int = 3,
|
||||
on_heartbeat: HeartbeatCallbackType | None = None,
|
||||
vision_llm=None,
|
||||
) -> tuple[list[ConnectorDocument], int]:
|
||||
"""Download and ETL files in parallel, returning ConnectorDocuments.
|
||||
|
||||
|
|
@ -276,7 +277,7 @@ async def _download_files_parallel(
|
|||
nonlocal last_heartbeat, completed_count
|
||||
async with sem:
|
||||
markdown, drive_metadata, error = await download_and_extract_content(
|
||||
drive_client, file
|
||||
drive_client, file, vision_llm=vision_llm
|
||||
)
|
||||
if error or not markdown:
|
||||
file_name = file.get("name", "Unknown")
|
||||
|
|
@ -322,6 +323,7 @@ async def _process_single_file(
|
|||
search_space_id: int,
|
||||
user_id: str,
|
||||
enable_summary: bool = True,
|
||||
vision_llm=None,
|
||||
) -> tuple[int, int, int]:
|
||||
"""Download, extract, and index a single Drive file via the pipeline.
|
||||
|
||||
|
|
@ -343,7 +345,7 @@ async def _process_single_file(
|
|||
await page_limit_service.check_page_limit(user_id, estimated_pages)
|
||||
|
||||
markdown, drive_metadata, error = await download_and_extract_content(
|
||||
drive_client, file
|
||||
drive_client, file, vision_llm=vision_llm
|
||||
)
|
||||
if error or not markdown:
|
||||
logger.warning(f"ETL failed for {file_name}: {error}")
|
||||
|
|
@ -433,6 +435,7 @@ async def _download_and_index(
|
|||
user_id: str,
|
||||
enable_summary: bool,
|
||||
on_heartbeat: HeartbeatCallbackType | None = None,
|
||||
vision_llm=None,
|
||||
) -> tuple[int, int]:
|
||||
"""Phase 2+3: parallel download then parallel indexing.
|
||||
|
||||
|
|
@ -446,6 +449,7 @@ async def _download_and_index(
|
|||
user_id=user_id,
|
||||
enable_summary=enable_summary,
|
||||
on_heartbeat=on_heartbeat,
|
||||
vision_llm=vision_llm,
|
||||
)
|
||||
|
||||
batch_indexed = 0
|
||||
|
|
@ -476,6 +480,7 @@ async def _index_selected_files(
|
|||
user_id: str,
|
||||
enable_summary: bool,
|
||||
on_heartbeat: HeartbeatCallbackType | None = None,
|
||||
vision_llm=None,
|
||||
) -> tuple[int, int, int, list[str]]:
|
||||
"""Index user-selected files using the parallel pipeline.
|
||||
|
||||
|
|
@ -540,6 +545,7 @@ async def _index_selected_files(
|
|||
user_id=user_id,
|
||||
enable_summary=enable_summary,
|
||||
on_heartbeat=on_heartbeat,
|
||||
vision_llm=vision_llm,
|
||||
)
|
||||
|
||||
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
|
||||
|
|
@ -573,6 +579,7 @@ async def _index_full_scan(
|
|||
include_subfolders: bool = False,
|
||||
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||
enable_summary: bool = True,
|
||||
vision_llm=None,
|
||||
) -> tuple[int, int, int]:
|
||||
"""Full scan indexing of a folder.
|
||||
|
||||
|
|
@ -703,6 +710,7 @@ async def _index_full_scan(
|
|||
user_id=user_id,
|
||||
enable_summary=enable_summary,
|
||||
on_heartbeat=on_heartbeat_callback,
|
||||
vision_llm=vision_llm,
|
||||
)
|
||||
|
||||
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
|
||||
|
|
@ -736,6 +744,7 @@ async def _index_with_delta_sync(
|
|||
include_subfolders: bool = False,
|
||||
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||
enable_summary: bool = True,
|
||||
vision_llm=None,
|
||||
) -> tuple[int, int, int]:
|
||||
"""Delta sync using change tracking.
|
||||
|
||||
|
|
@ -844,6 +853,7 @@ async def _index_with_delta_sync(
|
|||
user_id=user_id,
|
||||
enable_summary=enable_summary,
|
||||
on_heartbeat=on_heartbeat_callback,
|
||||
vision_llm=vision_llm,
|
||||
)
|
||||
|
||||
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
|
||||
|
|
@ -947,6 +957,11 @@ async def index_google_drive_files(
|
|||
)
|
||||
|
||||
connector_enable_summary = getattr(connector, "enable_summary", True)
|
||||
connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False)
|
||||
vision_llm = None
|
||||
if connector_enable_vision_llm:
|
||||
from app.services.llm_service import get_vision_llm
|
||||
vision_llm = await get_vision_llm(session, search_space_id)
|
||||
drive_client = GoogleDriveClient(
|
||||
session, connector_id, credentials=pre_built_credentials
|
||||
)
|
||||
|
|
@ -986,6 +1001,7 @@ async def index_google_drive_files(
|
|||
include_subfolders,
|
||||
on_heartbeat_callback,
|
||||
connector_enable_summary,
|
||||
vision_llm=vision_llm,
|
||||
)
|
||||
documents_unsupported += du
|
||||
logger.info("Running reconciliation scan after delta sync")
|
||||
|
|
@ -1004,6 +1020,7 @@ async def index_google_drive_files(
|
|||
include_subfolders,
|
||||
on_heartbeat_callback,
|
||||
connector_enable_summary,
|
||||
vision_llm=vision_llm,
|
||||
)
|
||||
documents_indexed += ri
|
||||
documents_skipped += rs
|
||||
|
|
@ -1029,6 +1046,7 @@ async def index_google_drive_files(
|
|||
include_subfolders,
|
||||
on_heartbeat_callback,
|
||||
connector_enable_summary,
|
||||
vision_llm=vision_llm,
|
||||
)
|
||||
|
||||
if documents_indexed > 0 or can_use_delta:
|
||||
|
|
@ -1146,6 +1164,11 @@ async def index_google_drive_single_file(
|
|||
)
|
||||
|
||||
connector_enable_summary = getattr(connector, "enable_summary", True)
|
||||
connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False)
|
||||
vision_llm = None
|
||||
if connector_enable_vision_llm:
|
||||
from app.services.llm_service import get_vision_llm
|
||||
vision_llm = await get_vision_llm(session, search_space_id)
|
||||
drive_client = GoogleDriveClient(
|
||||
session, connector_id, credentials=pre_built_credentials
|
||||
)
|
||||
|
|
@ -1168,6 +1191,7 @@ async def index_google_drive_single_file(
|
|||
search_space_id,
|
||||
user_id,
|
||||
connector_enable_summary,
|
||||
vision_llm=vision_llm,
|
||||
)
|
||||
await session.commit()
|
||||
|
||||
|
|
@ -1278,6 +1302,11 @@ async def index_google_drive_selected_files(
|
|||
return 0, 0, [error_msg]
|
||||
|
||||
connector_enable_summary = getattr(connector, "enable_summary", True)
|
||||
connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False)
|
||||
vision_llm = None
|
||||
if connector_enable_vision_llm:
|
||||
from app.services.llm_service import get_vision_llm
|
||||
vision_llm = await get_vision_llm(session, search_space_id)
|
||||
drive_client = GoogleDriveClient(
|
||||
session, connector_id, credentials=pre_built_credentials
|
||||
)
|
||||
|
|
@ -1291,6 +1320,7 @@ async def index_google_drive_selected_files(
|
|||
user_id=user_id,
|
||||
enable_summary=connector_enable_summary,
|
||||
on_heartbeat=on_heartbeat_callback,
|
||||
vision_llm=vision_llm,
|
||||
)
|
||||
|
||||
if unsupported > 0:
|
||||
|
|
|
|||
|
|
@ -153,7 +153,7 @@ def scan_folder(
|
|||
return files
|
||||
|
||||
|
||||
async def _read_file_content(file_path: str, filename: str) -> str:
|
||||
async def _read_file_content(file_path: str, filename: str, *, vision_llm=None) -> str:
|
||||
"""Read file content via the unified ETL pipeline.
|
||||
|
||||
All file types (plaintext, audio, direct-convert, document, image) are
|
||||
|
|
@ -162,7 +162,7 @@ async def _read_file_content(file_path: str, filename: str) -> str:
|
|||
from app.etl_pipeline.etl_document import EtlRequest
|
||||
from app.etl_pipeline.etl_pipeline_service import EtlPipelineService
|
||||
|
||||
result = await EtlPipelineService().extract(
|
||||
result = await EtlPipelineService(vision_llm=vision_llm).extract(
|
||||
EtlRequest(file_path=file_path, filename=filename)
|
||||
)
|
||||
return result.markdown_content
|
||||
|
|
@ -199,12 +199,14 @@ async def _compute_file_content_hash(
|
|||
file_path: str,
|
||||
filename: str,
|
||||
search_space_id: int,
|
||||
*,
|
||||
vision_llm=None,
|
||||
) -> tuple[str, str]:
|
||||
"""Read a file (via ETL if needed) and compute its content hash.
|
||||
|
||||
Returns (content_text, content_hash).
|
||||
"""
|
||||
content = await _read_file_content(file_path, filename)
|
||||
content = await _read_file_content(file_path, filename, vision_llm=vision_llm)
|
||||
return content, _content_hash(content, search_space_id)
|
||||
|
||||
|
||||
|
|
@ -1268,6 +1270,7 @@ async def index_uploaded_files(
|
|||
enable_summary: bool,
|
||||
file_mappings: list[dict],
|
||||
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||
use_vision_llm: bool = False,
|
||||
) -> tuple[int, int, str | None]:
|
||||
"""Index files uploaded from the desktop app via temp paths.
|
||||
|
||||
|
|
@ -1304,6 +1307,12 @@ async def index_uploaded_files(
|
|||
pipeline = IndexingPipelineService(session)
|
||||
llm = await get_user_long_context_llm(session, user_id, search_space_id)
|
||||
|
||||
vision_llm_instance = None
|
||||
if use_vision_llm:
|
||||
from app.services.llm_service import get_vision_llm
|
||||
|
||||
vision_llm_instance = await get_vision_llm(session, search_space_id)
|
||||
|
||||
indexed_count = 0
|
||||
failed_count = 0
|
||||
errors: list[str] = []
|
||||
|
|
@ -1351,7 +1360,8 @@ async def index_uploaded_files(
|
|||
|
||||
try:
|
||||
content, content_hash = await _compute_file_content_hash(
|
||||
temp_path, filename, search_space_id
|
||||
temp_path, filename, search_space_id,
|
||||
vision_llm=vision_llm_instance,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not read {relative_path}: {e}")
|
||||
|
|
|
|||
|
|
@ -171,6 +171,7 @@ async def _download_files_parallel(
|
|||
enable_summary: bool,
|
||||
max_concurrency: int = 3,
|
||||
on_heartbeat: HeartbeatCallbackType | None = None,
|
||||
vision_llm=None,
|
||||
) -> tuple[list[ConnectorDocument], int]:
|
||||
"""Download and ETL files in parallel. Returns (docs, failed_count)."""
|
||||
results: list[ConnectorDocument] = []
|
||||
|
|
@ -183,7 +184,7 @@ async def _download_files_parallel(
|
|||
nonlocal last_heartbeat, completed_count
|
||||
async with sem:
|
||||
markdown, od_metadata, error = await download_and_extract_content(
|
||||
onedrive_client, file
|
||||
onedrive_client, file, vision_llm=vision_llm
|
||||
)
|
||||
if error or not markdown:
|
||||
file_name = file.get("name", "Unknown")
|
||||
|
|
@ -231,6 +232,7 @@ async def _download_and_index(
|
|||
user_id: str,
|
||||
enable_summary: bool,
|
||||
on_heartbeat: HeartbeatCallbackType | None = None,
|
||||
vision_llm=None,
|
||||
) -> tuple[int, int]:
|
||||
"""Parallel download then parallel indexing. Returns (batch_indexed, total_failed)."""
|
||||
connector_docs, download_failed = await _download_files_parallel(
|
||||
|
|
@ -241,6 +243,7 @@ async def _download_and_index(
|
|||
user_id=user_id,
|
||||
enable_summary=enable_summary,
|
||||
on_heartbeat=on_heartbeat,
|
||||
vision_llm=vision_llm,
|
||||
)
|
||||
|
||||
batch_indexed = 0
|
||||
|
|
@ -293,6 +296,7 @@ async def _index_selected_files(
|
|||
user_id: str,
|
||||
enable_summary: bool,
|
||||
on_heartbeat: HeartbeatCallbackType | None = None,
|
||||
vision_llm=None,
|
||||
) -> tuple[int, int, int, list[str]]:
|
||||
"""Index user-selected files using the parallel pipeline."""
|
||||
page_limit_service = PageLimitService(session)
|
||||
|
|
@ -343,6 +347,7 @@ async def _index_selected_files(
|
|||
user_id=user_id,
|
||||
enable_summary=enable_summary,
|
||||
on_heartbeat=on_heartbeat,
|
||||
vision_llm=vision_llm,
|
||||
)
|
||||
|
||||
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
|
||||
|
|
@ -375,6 +380,7 @@ async def _index_full_scan(
|
|||
include_subfolders: bool = True,
|
||||
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||
enable_summary: bool = True,
|
||||
vision_llm=None,
|
||||
) -> tuple[int, int, int]:
|
||||
"""Full scan indexing of a folder.
|
||||
|
||||
|
|
@ -450,6 +456,7 @@ async def _index_full_scan(
|
|||
user_id=user_id,
|
||||
enable_summary=enable_summary,
|
||||
on_heartbeat=on_heartbeat_callback,
|
||||
vision_llm=vision_llm,
|
||||
)
|
||||
|
||||
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
|
||||
|
|
@ -481,6 +488,7 @@ async def _index_with_delta_sync(
|
|||
max_files: int,
|
||||
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||
enable_summary: bool = True,
|
||||
vision_llm=None,
|
||||
) -> tuple[int, int, int, str | None]:
|
||||
"""Delta sync using OneDrive change tracking.
|
||||
|
||||
|
|
@ -573,6 +581,7 @@ async def _index_with_delta_sync(
|
|||
user_id=user_id,
|
||||
enable_summary=enable_summary,
|
||||
on_heartbeat=on_heartbeat_callback,
|
||||
vision_llm=vision_llm,
|
||||
)
|
||||
|
||||
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
|
||||
|
|
@ -643,6 +652,12 @@ async def index_onedrive_files(
|
|||
return 0, 0, error_msg, 0
|
||||
|
||||
connector_enable_summary = getattr(connector, "enable_summary", True)
|
||||
connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False)
|
||||
vision_llm = None
|
||||
if connector_enable_vision_llm:
|
||||
from app.services.llm_service import get_vision_llm
|
||||
vision_llm = await get_vision_llm(session, search_space_id)
|
||||
|
||||
onedrive_client = OneDriveClient(session, connector_id)
|
||||
|
||||
indexing_options = items_dict.get("indexing_options", {})
|
||||
|
|
@ -666,6 +681,7 @@ async def index_onedrive_files(
|
|||
search_space_id=search_space_id,
|
||||
user_id=user_id,
|
||||
enable_summary=connector_enable_summary,
|
||||
vision_llm=vision_llm,
|
||||
)
|
||||
total_indexed += indexed
|
||||
total_skipped += skipped
|
||||
|
|
@ -695,6 +711,7 @@ async def index_onedrive_files(
|
|||
log_entry,
|
||||
max_files,
|
||||
enable_summary=connector_enable_summary,
|
||||
vision_llm=vision_llm,
|
||||
)
|
||||
total_indexed += indexed
|
||||
total_skipped += skipped
|
||||
|
|
@ -721,6 +738,7 @@ async def index_onedrive_files(
|
|||
max_files,
|
||||
include_subfolders,
|
||||
enable_summary=connector_enable_summary,
|
||||
vision_llm=vision_llm,
|
||||
)
|
||||
total_indexed += ri
|
||||
total_skipped += rs
|
||||
|
|
@ -740,6 +758,7 @@ async def index_onedrive_files(
|
|||
max_files,
|
||||
include_subfolders,
|
||||
enable_summary=connector_enable_summary,
|
||||
vision_llm=vision_llm,
|
||||
)
|
||||
total_indexed += indexed
|
||||
total_skipped += skipped
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ class _ProcessingContext:
|
|||
log_entry: Log
|
||||
connector: dict | None = None
|
||||
notification: Notification | None = None
|
||||
use_vision_llm: bool = False
|
||||
enable_summary: bool = field(init=False)
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
|
|
@ -134,7 +135,7 @@ async def _process_non_document_upload(ctx: _ProcessingContext) -> Document | No
|
|||
)
|
||||
|
||||
vision_llm = None
|
||||
if etl_classify(ctx.filename) == FileCategory.IMAGE:
|
||||
if ctx.use_vision_llm and etl_classify(ctx.filename) == FileCategory.IMAGE:
|
||||
from app.services.llm_service import get_vision_llm
|
||||
|
||||
vision_llm = await get_vision_llm(ctx.session, ctx.search_space_id)
|
||||
|
|
@ -288,6 +289,7 @@ async def process_file_in_background(
|
|||
log_entry: Log,
|
||||
connector: dict | None = None,
|
||||
notification: Notification | None = None,
|
||||
use_vision_llm: bool = False,
|
||||
) -> Document | None:
|
||||
ctx = _ProcessingContext(
|
||||
session=session,
|
||||
|
|
@ -299,6 +301,7 @@ async def process_file_in_background(
|
|||
log_entry=log_entry,
|
||||
connector=connector,
|
||||
notification=notification,
|
||||
use_vision_llm=use_vision_llm,
|
||||
)
|
||||
|
||||
try:
|
||||
|
|
@ -349,6 +352,7 @@ async def _extract_file_content(
|
|||
task_logger: TaskLoggingService,
|
||||
log_entry: Log,
|
||||
notification: Notification | None,
|
||||
use_vision_llm: bool = False,
|
||||
) -> tuple[str, str]:
|
||||
"""
|
||||
Extract markdown content from a file regardless of type.
|
||||
|
|
@ -396,7 +400,7 @@ async def _extract_file_content(
|
|||
await page_limit_service.check_page_limit(user_id, estimated_pages)
|
||||
|
||||
vision_llm = None
|
||||
if category == FileCategory.IMAGE:
|
||||
if use_vision_llm and category == FileCategory.IMAGE:
|
||||
from app.services.llm_service import get_vision_llm
|
||||
|
||||
vision_llm = await get_vision_llm(session, search_space_id)
|
||||
|
|
@ -435,6 +439,7 @@ async def process_file_in_background_with_document(
|
|||
connector: dict | None = None,
|
||||
notification: Notification | None = None,
|
||||
should_summarize: bool = False,
|
||||
use_vision_llm: bool = False,
|
||||
) -> Document | None:
|
||||
"""
|
||||
Process file and update existing pending document (2-phase pattern).
|
||||
|
|
@ -463,6 +468,7 @@ async def process_file_in_background_with_document(
|
|||
task_logger,
|
||||
log_entry,
|
||||
notification,
|
||||
use_vision_llm=use_vision_llm,
|
||||
)
|
||||
|
||||
if not markdown_content:
|
||||
|
|
|
|||
|
|
@ -69,6 +69,7 @@ class InlineTaskDispatcher:
|
|||
search_space_id: int,
|
||||
user_id: str,
|
||||
should_summarize: bool = False,
|
||||
use_vision_llm: bool = False,
|
||||
) -> None:
|
||||
from app.tasks.celery_tasks.document_tasks import (
|
||||
_process_file_with_document,
|
||||
|
|
@ -82,6 +83,7 @@ class InlineTaskDispatcher:
|
|||
search_space_id,
|
||||
user_id,
|
||||
should_summarize=should_summarize,
|
||||
use_vision_llm=use_vision_llm,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -98,6 +98,7 @@ export const ConnectorIndicator = forwardRef<ConnectorIndicatorHandle, Connector
|
|||
periodicEnabled,
|
||||
frequencyMinutes,
|
||||
enableSummary,
|
||||
enableVisionLlm,
|
||||
allConnectors,
|
||||
viewingAccountsType,
|
||||
viewingMCPList,
|
||||
|
|
@ -109,6 +110,7 @@ export const ConnectorIndicator = forwardRef<ConnectorIndicatorHandle, Connector
|
|||
setPeriodicEnabled,
|
||||
setFrequencyMinutes,
|
||||
setEnableSummary,
|
||||
setEnableVisionLlm,
|
||||
handleOpenChange,
|
||||
handleTabChange,
|
||||
handleScroll,
|
||||
|
|
@ -279,6 +281,7 @@ export const ConnectorIndicator = forwardRef<ConnectorIndicatorHandle, Connector
|
|||
periodicEnabled={periodicEnabled}
|
||||
frequencyMinutes={frequencyMinutes}
|
||||
enableSummary={enableSummary}
|
||||
enableVisionLlm={enableVisionLlm}
|
||||
isSaving={isSaving}
|
||||
isDisconnecting={isDisconnecting}
|
||||
isIndexing={indexingConnectorIds.has(editingConnector.id)}
|
||||
|
|
@ -288,6 +291,7 @@ export const ConnectorIndicator = forwardRef<ConnectorIndicatorHandle, Connector
|
|||
onPeriodicEnabledChange={setPeriodicEnabled}
|
||||
onFrequencyChange={setFrequencyMinutes}
|
||||
onEnableSummaryChange={setEnableSummary}
|
||||
onEnableVisionLlmChange={setEnableVisionLlm}
|
||||
onSave={() => {
|
||||
startIndexing(editingConnector.id);
|
||||
handleSaveConnector(() => refreshConnectors());
|
||||
|
|
@ -336,6 +340,7 @@ export const ConnectorIndicator = forwardRef<ConnectorIndicatorHandle, Connector
|
|||
periodicEnabled={periodicEnabled}
|
||||
frequencyMinutes={frequencyMinutes}
|
||||
enableSummary={enableSummary}
|
||||
enableVisionLlm={enableVisionLlm}
|
||||
isStartingIndexing={isStartingIndexing}
|
||||
isFromOAuth={isFromOAuth}
|
||||
onStartDateChange={setStartDate}
|
||||
|
|
@ -343,6 +348,7 @@ export const ConnectorIndicator = forwardRef<ConnectorIndicatorHandle, Connector
|
|||
onPeriodicEnabledChange={setPeriodicEnabled}
|
||||
onFrequencyChange={setFrequencyMinutes}
|
||||
onEnableSummaryChange={setEnableSummary}
|
||||
onEnableVisionLlmChange={setEnableVisionLlm}
|
||||
onConfigChange={setIndexingConnectorConfig}
|
||||
onStartIndexing={() => {
|
||||
if (indexingConfig.connectorId) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,25 @@
|
|||
"use client";
|
||||
|
||||
import type { FC } from "react";
|
||||
import { Switch } from "@/components/ui/switch";
|
||||
|
||||
interface VisionLLMConfigProps {
|
||||
enabled: boolean;
|
||||
onEnabledChange: (enabled: boolean) => void;
|
||||
}
|
||||
|
||||
export const VisionLLMConfig: FC<VisionLLMConfigProps> = ({ enabled, onEnabledChange }) => {
|
||||
return (
|
||||
<div className="rounded-xl bg-slate-400/5 dark:bg-white/5 p-3 sm:p-6">
|
||||
<div className="flex items-center justify-between">
|
||||
<div className="space-y-1">
|
||||
<h3 className="font-medium text-sm sm:text-base">Enable Vision LLM</h3>
|
||||
<p className="text-xs sm:text-sm text-muted-foreground">
|
||||
Describes images using AI vision (costly, slower)
|
||||
</p>
|
||||
</div>
|
||||
<Switch checked={enabled} onCheckedChange={onEnabledChange} />
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
|
@ -15,6 +15,7 @@ import { cn } from "@/lib/utils";
|
|||
import { DateRangeSelector } from "../../components/date-range-selector";
|
||||
import { PeriodicSyncConfig } from "../../components/periodic-sync-config";
|
||||
import { SummaryConfig } from "../../components/summary-config";
|
||||
import { VisionLLMConfig } from "../../components/vision-llm-config";
|
||||
import { getConnectorDisplayName } from "../../tabs/all-connectors-tab";
|
||||
import { getConnectorConfigComponent } from "../index";
|
||||
|
||||
|
|
@ -38,6 +39,7 @@ interface ConnectorEditViewProps {
|
|||
periodicEnabled: boolean;
|
||||
frequencyMinutes: string;
|
||||
enableSummary: boolean;
|
||||
enableVisionLlm: boolean;
|
||||
isSaving: boolean;
|
||||
isDisconnecting: boolean;
|
||||
isIndexing?: boolean;
|
||||
|
|
@ -47,6 +49,7 @@ interface ConnectorEditViewProps {
|
|||
onPeriodicEnabledChange: (enabled: boolean) => void;
|
||||
onFrequencyChange: (frequency: string) => void;
|
||||
onEnableSummaryChange: (enabled: boolean) => void;
|
||||
onEnableVisionLlmChange: (enabled: boolean) => void;
|
||||
onSave: () => void;
|
||||
onDisconnect: () => void;
|
||||
onBack: () => void;
|
||||
|
|
@ -62,6 +65,7 @@ export const ConnectorEditView: FC<ConnectorEditViewProps> = ({
|
|||
periodicEnabled,
|
||||
frequencyMinutes,
|
||||
enableSummary,
|
||||
enableVisionLlm,
|
||||
isSaving,
|
||||
isDisconnecting,
|
||||
isIndexing = false,
|
||||
|
|
@ -71,6 +75,7 @@ export const ConnectorEditView: FC<ConnectorEditViewProps> = ({
|
|||
onPeriodicEnabledChange,
|
||||
onFrequencyChange,
|
||||
onEnableSummaryChange,
|
||||
onEnableVisionLlmChange,
|
||||
onSave,
|
||||
onDisconnect,
|
||||
onBack,
|
||||
|
|
@ -272,6 +277,14 @@ export const ConnectorEditView: FC<ConnectorEditViewProps> = ({
|
|||
{/* AI Summary toggle */}
|
||||
<SummaryConfig enabled={enableSummary} onEnabledChange={onEnableSummaryChange} />
|
||||
|
||||
{/* Vision LLM toggle - only for file-based connectors */}
|
||||
{(connector.connector_type === "GOOGLE_DRIVE_CONNECTOR" ||
|
||||
connector.connector_type === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" ||
|
||||
connector.connector_type === "DROPBOX_CONNECTOR" ||
|
||||
connector.connector_type === "ONEDRIVE_CONNECTOR") && (
|
||||
<VisionLLMConfig enabled={enableVisionLlm} onEnabledChange={onEnableVisionLlmChange} />
|
||||
)}
|
||||
|
||||
{/* Date range selector - not shown for file-based connectors (Drive, Dropbox, OneDrive), Webcrawler, GitHub, or Local Folder */}
|
||||
{connector.connector_type !== "GOOGLE_DRIVE_CONNECTOR" &&
|
||||
connector.connector_type !== "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" &&
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import { cn } from "@/lib/utils";
|
|||
import { DateRangeSelector } from "../../components/date-range-selector";
|
||||
import { PeriodicSyncConfig } from "../../components/periodic-sync-config";
|
||||
import { SummaryConfig } from "../../components/summary-config";
|
||||
import { VisionLLMConfig } from "../../components/vision-llm-config";
|
||||
import type { IndexingConfigState } from "../../constants/connector-constants";
|
||||
import { getConnectorDisplayName } from "../../tabs/all-connectors-tab";
|
||||
import { getConnectorConfigComponent } from "../index";
|
||||
|
|
@ -22,6 +23,7 @@ interface IndexingConfigurationViewProps {
|
|||
periodicEnabled: boolean;
|
||||
frequencyMinutes: string;
|
||||
enableSummary: boolean;
|
||||
enableVisionLlm: boolean;
|
||||
isStartingIndexing: boolean;
|
||||
isFromOAuth?: boolean;
|
||||
onStartDateChange: (date: Date | undefined) => void;
|
||||
|
|
@ -29,6 +31,7 @@ interface IndexingConfigurationViewProps {
|
|||
onPeriodicEnabledChange: (enabled: boolean) => void;
|
||||
onFrequencyChange: (frequency: string) => void;
|
||||
onEnableSummaryChange: (enabled: boolean) => void;
|
||||
onEnableVisionLlmChange: (enabled: boolean) => void;
|
||||
onConfigChange?: (config: Record<string, unknown>) => void;
|
||||
onStartIndexing: () => void;
|
||||
onSkip: () => void;
|
||||
|
|
@ -42,6 +45,7 @@ export const IndexingConfigurationView: FC<IndexingConfigurationViewProps> = ({
|
|||
periodicEnabled,
|
||||
frequencyMinutes,
|
||||
enableSummary,
|
||||
enableVisionLlm,
|
||||
isStartingIndexing,
|
||||
isFromOAuth = false,
|
||||
onStartDateChange,
|
||||
|
|
@ -49,6 +53,7 @@ export const IndexingConfigurationView: FC<IndexingConfigurationViewProps> = ({
|
|||
onPeriodicEnabledChange,
|
||||
onFrequencyChange,
|
||||
onEnableSummaryChange,
|
||||
onEnableVisionLlmChange,
|
||||
onConfigChange,
|
||||
onStartIndexing,
|
||||
onSkip,
|
||||
|
|
@ -158,6 +163,14 @@ export const IndexingConfigurationView: FC<IndexingConfigurationViewProps> = ({
|
|||
{/* AI Summary toggle */}
|
||||
<SummaryConfig enabled={enableSummary} onEnabledChange={onEnableSummaryChange} />
|
||||
|
||||
{/* Vision LLM toggle - only for file-based connectors */}
|
||||
{(config.connectorType === "GOOGLE_DRIVE_CONNECTOR" ||
|
||||
config.connectorType === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" ||
|
||||
config.connectorType === "DROPBOX_CONNECTOR" ||
|
||||
config.connectorType === "ONEDRIVE_CONNECTOR") && (
|
||||
<VisionLLMConfig enabled={enableVisionLlm} onEnabledChange={onEnableVisionLlmChange} />
|
||||
)}
|
||||
|
||||
{/* Date range selector - not shown for file-based connectors (Drive, Dropbox, OneDrive), Webcrawler, GitHub, or Local Folder */}
|
||||
{config.connectorType !== "GOOGLE_DRIVE_CONNECTOR" &&
|
||||
config.connectorType !== "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" &&
|
||||
|
|
|
|||
|
|
@ -80,6 +80,7 @@ export const useConnectorDialog = () => {
|
|||
const [periodicEnabled, setPeriodicEnabled] = useState(false);
|
||||
const [frequencyMinutes, setFrequencyMinutes] = useState("1440");
|
||||
const [enableSummary, setEnableSummary] = useState(false);
|
||||
const [enableVisionLlm, setEnableVisionLlm] = useState(false);
|
||||
|
||||
// Edit mode state
|
||||
const [editingConnector, setEditingConnector] = useState<SearchSourceConnector | null>(null);
|
||||
|
|
@ -621,6 +622,7 @@ export const useConnectorDialog = () => {
|
|||
setPeriodicEnabled(false);
|
||||
setFrequencyMinutes("1440");
|
||||
setEnableSummary(connector.enable_summary ?? false);
|
||||
setEnableVisionLlm(connector.enable_vision_llm ?? false);
|
||||
setStartDate(undefined);
|
||||
setEndDate(undefined);
|
||||
|
||||
|
|
@ -763,12 +765,13 @@ export const useConnectorDialog = () => {
|
|||
const endDateStr = endDate ? format(endDate, "yyyy-MM-dd") : undefined;
|
||||
|
||||
// Update connector with summary, periodic sync settings, and config changes
|
||||
if (enableSummary || periodicEnabled || indexingConnectorConfig) {
|
||||
const frequency = periodicEnabled ? parseInt(frequencyMinutes, 10) : undefined;
|
||||
if (enableSummary || enableVisionLlm || periodicEnabled || indexingConnectorConfig) {
|
||||
const frequency = periodicEnabled ? parseInt(frequencyMinutes, 10) : undefined;
|
||||
await updateConnector({
|
||||
id: indexingConfig.connectorId,
|
||||
data: {
|
||||
enable_summary: enableSummary,
|
||||
enable_vision_llm: enableVisionLlm,
|
||||
...(periodicEnabled && {
|
||||
periodic_indexing_enabled: true,
|
||||
indexing_frequency_minutes: frequency,
|
||||
|
|
@ -896,6 +899,7 @@ export const useConnectorDialog = () => {
|
|||
periodicEnabled,
|
||||
frequencyMinutes,
|
||||
enableSummary,
|
||||
enableVisionLlm,
|
||||
indexingConnectorConfig,
|
||||
setIsOpen,
|
||||
]
|
||||
|
|
@ -960,6 +964,7 @@ export const useConnectorDialog = () => {
|
|||
setPeriodicEnabled(!connector.is_indexable ? false : connector.periodic_indexing_enabled);
|
||||
setFrequencyMinutes(connector.indexing_frequency_minutes?.toString() || "1440");
|
||||
setEnableSummary(connector.enable_summary ?? false);
|
||||
setEnableVisionLlm(connector.enable_vision_llm ?? false);
|
||||
setStartDate(undefined);
|
||||
setEndDate(undefined);
|
||||
},
|
||||
|
|
@ -1038,6 +1043,7 @@ export const useConnectorDialog = () => {
|
|||
data: {
|
||||
name: connectorName || editingConnector.name,
|
||||
enable_summary: enableSummary,
|
||||
enable_vision_llm: enableVisionLlm,
|
||||
periodic_indexing_enabled: !editingConnector.is_indexable ? false : periodicEnabled,
|
||||
indexing_frequency_minutes: !editingConnector.is_indexable ? null : frequency,
|
||||
config: connectorConfig || editingConnector.config,
|
||||
|
|
@ -1172,6 +1178,7 @@ export const useConnectorDialog = () => {
|
|||
periodicEnabled,
|
||||
frequencyMinutes,
|
||||
enableSummary,
|
||||
enableVisionLlm,
|
||||
getFrequencyLabel,
|
||||
connectorConfig,
|
||||
connectorName,
|
||||
|
|
@ -1332,6 +1339,7 @@ export const useConnectorDialog = () => {
|
|||
setPeriodicEnabled(false);
|
||||
setFrequencyMinutes("1440");
|
||||
setEnableSummary(false);
|
||||
setEnableVisionLlm(false);
|
||||
}
|
||||
}
|
||||
},
|
||||
|
|
@ -1368,6 +1376,7 @@ export const useConnectorDialog = () => {
|
|||
periodicEnabled,
|
||||
frequencyMinutes,
|
||||
enableSummary,
|
||||
enableVisionLlm,
|
||||
searchSpaceId,
|
||||
allConnectors,
|
||||
viewingAccountsType,
|
||||
|
|
@ -1382,6 +1391,7 @@ export const useConnectorDialog = () => {
|
|||
setPeriodicEnabled,
|
||||
setFrequencyMinutes,
|
||||
setEnableSummary,
|
||||
setEnableVisionLlm,
|
||||
setConnectorName,
|
||||
|
||||
// Handlers
|
||||
|
|
|
|||
|
|
@ -136,6 +136,7 @@ export function DocumentUploadTab({
|
|||
const [uploadProgress, setUploadProgress] = useState(0);
|
||||
const [accordionValue, setAccordionValue] = useState<string>("");
|
||||
const [shouldSummarize, setShouldSummarize] = useState(false);
|
||||
const [useVisionLlm, setUseVisionLlm] = useState(false);
|
||||
const [uploadDocumentMutation] = useAtom(uploadDocumentMutationAtom);
|
||||
const { mutate: uploadDocuments, isPending: isUploading } = uploadDocumentMutation;
|
||||
const fileInputRef = useRef<HTMLInputElement>(null);
|
||||
|
|
@ -361,6 +362,7 @@ export function DocumentUploadTab({
|
|||
relative_paths: batch.map((e) => e.relativePath),
|
||||
root_folder_id: rootFolderId,
|
||||
enable_summary: shouldSummarize,
|
||||
use_vision_llm: useVisionLlm,
|
||||
}
|
||||
);
|
||||
|
||||
|
|
@ -407,6 +409,7 @@ export function DocumentUploadTab({
|
|||
files: rawFiles,
|
||||
search_space_id: Number(searchSpaceId),
|
||||
should_summarize: shouldSummarize,
|
||||
use_vision_llm: useVisionLlm,
|
||||
},
|
||||
{
|
||||
onSuccess: () => {
|
||||
|
|
@ -696,6 +699,16 @@ export function DocumentUploadTab({
|
|||
<Switch checked={shouldSummarize} onCheckedChange={setShouldSummarize} />
|
||||
</div>
|
||||
|
||||
<div className={toggleRowClass}>
|
||||
<div className="space-y-0.5">
|
||||
<p className="font-medium text-sm">Enable Vision LLM</p>
|
||||
<p className="text-xs text-muted-foreground">
|
||||
Describes images using AI vision (costly, slower)
|
||||
</p>
|
||||
</div>
|
||||
<Switch checked={useVisionLlm} onCheckedChange={setUseVisionLlm} />
|
||||
</div>
|
||||
|
||||
<Button
|
||||
className="w-full"
|
||||
onClick={handleUpload}
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@ export const searchSourceConnector = z.object({
|
|||
last_indexed_at: z.string().nullable(),
|
||||
config: z.record(z.string(), z.any()),
|
||||
enable_summary: z.boolean().default(false),
|
||||
enable_vision_llm: z.boolean().default(false),
|
||||
periodic_indexing_enabled: z.boolean(),
|
||||
indexing_frequency_minutes: z.number().nullable(),
|
||||
next_scheduled_at: z.string().nullable(),
|
||||
|
|
@ -98,6 +99,7 @@ export const createConnectorRequest = z.object({
|
|||
last_indexed_at: true,
|
||||
config: true,
|
||||
enable_summary: true,
|
||||
enable_vision_llm: true,
|
||||
periodic_indexing_enabled: true,
|
||||
indexing_frequency_minutes: true,
|
||||
next_scheduled_at: true,
|
||||
|
|
@ -123,6 +125,7 @@ export const updateConnectorRequest = z.object({
|
|||
last_indexed_at: true,
|
||||
config: true,
|
||||
enable_summary: true,
|
||||
enable_vision_llm: true,
|
||||
periodic_indexing_enabled: true,
|
||||
indexing_frequency_minutes: true,
|
||||
next_scheduled_at: true,
|
||||
|
|
|
|||
|
|
@ -148,6 +148,7 @@ export const uploadDocumentRequest = z.object({
|
|||
files: z.array(z.instanceof(File)),
|
||||
search_space_id: z.number(),
|
||||
should_summarize: z.boolean().default(false),
|
||||
use_vision_llm: z.boolean().default(false),
|
||||
});
|
||||
|
||||
export const uploadDocumentResponse = z.object({
|
||||
|
|
|
|||
|
|
@ -127,7 +127,7 @@ class DocumentsApiService {
|
|||
throw new ValidationError(`Invalid request: ${errorMessage}`);
|
||||
}
|
||||
|
||||
const { files, search_space_id, should_summarize } = parsedRequest.data;
|
||||
const { files, search_space_id, should_summarize, use_vision_llm } = parsedRequest.data;
|
||||
const UPLOAD_BATCH_SIZE = 5;
|
||||
|
||||
const batches: File[][] = [];
|
||||
|
|
@ -146,6 +146,7 @@ class DocumentsApiService {
|
|||
for (const file of batch) formData.append("files", file);
|
||||
formData.append("search_space_id", String(search_space_id));
|
||||
formData.append("should_summarize", String(should_summarize));
|
||||
formData.append("use_vision_llm", String(use_vision_llm));
|
||||
|
||||
const controller = new AbortController();
|
||||
const timeoutId = setTimeout(() => controller.abort(), 120_000);
|
||||
|
|
@ -442,6 +443,7 @@ class DocumentsApiService {
|
|||
relative_paths: string[];
|
||||
root_folder_id?: number | null;
|
||||
enable_summary?: boolean;
|
||||
use_vision_llm?: boolean;
|
||||
},
|
||||
signal?: AbortSignal
|
||||
): Promise<{ message: string; status: string; root_folder_id: number; file_count: number }> => {
|
||||
|
|
@ -456,6 +458,7 @@ class DocumentsApiService {
|
|||
formData.append("root_folder_id", String(metadata.root_folder_id));
|
||||
}
|
||||
formData.append("enable_summary", String(metadata.enable_summary ?? false));
|
||||
formData.append("use_vision_llm", String(metadata.use_vision_llm ?? false));
|
||||
|
||||
const totalSize = files.reduce((acc, f) => acc + f.size, 0);
|
||||
const timeoutMs = Math.min(Math.max((totalSize / (1024 * 1024)) * 5000, 30_000), 600_000);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue