mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-29 10:56:24 +02:00
Make Vision LLM opt-in for uploads and connectors
This commit is contained in:
parent
0aefcbd504
commit
a95bf58c8f
24 changed files with 276 additions and 20 deletions
|
|
@ -0,0 +1,45 @@
|
||||||
|
"""121_add_enable_vision_llm_to_connectors
|
||||||
|
|
||||||
|
Revision ID: 121
|
||||||
|
Revises: 120
|
||||||
|
Create Date: 2026-04-09
|
||||||
|
|
||||||
|
Adds enable_vision_llm boolean column to search_source_connectors.
|
||||||
|
Defaults to False so vision LLM image processing is opt-in.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from collections.abc import Sequence
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision: str = "121"
|
||||||
|
down_revision: str | None = "120"
|
||||||
|
branch_labels: str | Sequence[str] | None = None
|
||||||
|
depends_on: str | Sequence[str] | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
conn = op.get_bind()
|
||||||
|
existing_columns = [
|
||||||
|
col["name"] for col in sa.inspect(conn).get_columns("search_source_connectors")
|
||||||
|
]
|
||||||
|
|
||||||
|
if "enable_vision_llm" not in existing_columns:
|
||||||
|
op.add_column(
|
||||||
|
"search_source_connectors",
|
||||||
|
sa.Column(
|
||||||
|
"enable_vision_llm",
|
||||||
|
sa.Boolean(),
|
||||||
|
nullable=False,
|
||||||
|
server_default=sa.text("false"),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.drop_column("search_source_connectors", "enable_vision_llm")
|
||||||
|
|
@ -44,6 +44,8 @@ async def _export_paper_content(
|
||||||
async def download_and_extract_content(
|
async def download_and_extract_content(
|
||||||
client: DropboxClient,
|
client: DropboxClient,
|
||||||
file: dict[str, Any],
|
file: dict[str, Any],
|
||||||
|
*,
|
||||||
|
vision_llm=None,
|
||||||
) -> tuple[str | None, dict[str, Any], str | None]:
|
) -> tuple[str | None, dict[str, Any], str | None]:
|
||||||
"""Download a Dropbox file and extract its content as markdown.
|
"""Download a Dropbox file and extract its content as markdown.
|
||||||
|
|
||||||
|
|
@ -91,7 +93,7 @@ async def download_and_extract_content(
|
||||||
from app.etl_pipeline.etl_document import EtlRequest
|
from app.etl_pipeline.etl_document import EtlRequest
|
||||||
from app.etl_pipeline.etl_pipeline_service import EtlPipelineService
|
from app.etl_pipeline.etl_pipeline_service import EtlPipelineService
|
||||||
|
|
||||||
result = await EtlPipelineService().extract(
|
result = await EtlPipelineService(vision_llm=vision_llm).extract(
|
||||||
EtlRequest(file_path=temp_file_path, filename=file_name)
|
EtlRequest(file_path=temp_file_path, filename=file_name)
|
||||||
)
|
)
|
||||||
markdown = result.markdown_content
|
markdown = result.markdown_content
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,8 @@ logger = logging.getLogger(__name__)
|
||||||
async def download_and_extract_content(
|
async def download_and_extract_content(
|
||||||
client: GoogleDriveClient,
|
client: GoogleDriveClient,
|
||||||
file: dict[str, Any],
|
file: dict[str, Any],
|
||||||
|
*,
|
||||||
|
vision_llm=None,
|
||||||
) -> tuple[str | None, dict[str, Any], str | None]:
|
) -> tuple[str | None, dict[str, Any], str | None]:
|
||||||
"""Download a Google Drive file and extract its content as markdown.
|
"""Download a Google Drive file and extract its content as markdown.
|
||||||
|
|
||||||
|
|
@ -103,7 +105,9 @@ async def download_and_extract_content(
|
||||||
etl_filename = (
|
etl_filename = (
|
||||||
file_name + extension if is_google_workspace_file(mime_type) else file_name
|
file_name + extension if is_google_workspace_file(mime_type) else file_name
|
||||||
)
|
)
|
||||||
markdown = await _parse_file_to_markdown(temp_file_path, etl_filename)
|
markdown = await _parse_file_to_markdown(
|
||||||
|
temp_file_path, etl_filename, vision_llm=vision_llm
|
||||||
|
)
|
||||||
return markdown, drive_metadata, None
|
return markdown, drive_metadata, None
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -115,12 +119,14 @@ async def download_and_extract_content(
|
||||||
os.unlink(temp_file_path)
|
os.unlink(temp_file_path)
|
||||||
|
|
||||||
|
|
||||||
async def _parse_file_to_markdown(file_path: str, filename: str) -> str:
|
async def _parse_file_to_markdown(
|
||||||
|
file_path: str, filename: str, *, vision_llm=None
|
||||||
|
) -> str:
|
||||||
"""Parse a local file to markdown using the unified ETL pipeline."""
|
"""Parse a local file to markdown using the unified ETL pipeline."""
|
||||||
from app.etl_pipeline.etl_document import EtlRequest
|
from app.etl_pipeline.etl_document import EtlRequest
|
||||||
from app.etl_pipeline.etl_pipeline_service import EtlPipelineService
|
from app.etl_pipeline.etl_pipeline_service import EtlPipelineService
|
||||||
|
|
||||||
result = await EtlPipelineService().extract(
|
result = await EtlPipelineService(vision_llm=vision_llm).extract(
|
||||||
EtlRequest(file_path=file_path, filename=filename)
|
EtlRequest(file_path=file_path, filename=filename)
|
||||||
)
|
)
|
||||||
return result.markdown_content
|
return result.markdown_content
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,8 @@ logger = logging.getLogger(__name__)
|
||||||
async def download_and_extract_content(
|
async def download_and_extract_content(
|
||||||
client: OneDriveClient,
|
client: OneDriveClient,
|
||||||
file: dict[str, Any],
|
file: dict[str, Any],
|
||||||
|
*,
|
||||||
|
vision_llm=None,
|
||||||
) -> tuple[str | None, dict[str, Any], str | None]:
|
) -> tuple[str | None, dict[str, Any], str | None]:
|
||||||
"""Download a OneDrive file and extract its content as markdown.
|
"""Download a OneDrive file and extract its content as markdown.
|
||||||
|
|
||||||
|
|
@ -65,7 +67,9 @@ async def download_and_extract_content(
|
||||||
if error:
|
if error:
|
||||||
return None, metadata, error
|
return None, metadata, error
|
||||||
|
|
||||||
markdown = await _parse_file_to_markdown(temp_file_path, file_name)
|
markdown = await _parse_file_to_markdown(
|
||||||
|
temp_file_path, file_name, vision_llm=vision_llm
|
||||||
|
)
|
||||||
return markdown, metadata, None
|
return markdown, metadata, None
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -77,12 +81,14 @@ async def download_and_extract_content(
|
||||||
os.unlink(temp_file_path)
|
os.unlink(temp_file_path)
|
||||||
|
|
||||||
|
|
||||||
async def _parse_file_to_markdown(file_path: str, filename: str) -> str:
|
async def _parse_file_to_markdown(
|
||||||
|
file_path: str, filename: str, *, vision_llm=None
|
||||||
|
) -> str:
|
||||||
"""Parse a local file to markdown using the unified ETL pipeline."""
|
"""Parse a local file to markdown using the unified ETL pipeline."""
|
||||||
from app.etl_pipeline.etl_document import EtlRequest
|
from app.etl_pipeline.etl_document import EtlRequest
|
||||||
from app.etl_pipeline.etl_pipeline_service import EtlPipelineService
|
from app.etl_pipeline.etl_pipeline_service import EtlPipelineService
|
||||||
|
|
||||||
result = await EtlPipelineService().extract(
|
result = await EtlPipelineService(vision_llm=vision_llm).extract(
|
||||||
EtlRequest(file_path=file_path, filename=filename)
|
EtlRequest(file_path=file_path, filename=filename)
|
||||||
)
|
)
|
||||||
return result.markdown_content
|
return result.markdown_content
|
||||||
|
|
|
||||||
|
|
@ -1555,6 +1555,13 @@ class SearchSourceConnector(BaseModel, TimestampMixin):
|
||||||
Boolean, nullable=False, default=False, server_default="false"
|
Boolean, nullable=False, default=False, server_default="false"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Vision LLM for image files - disabled by default to save cost/time.
|
||||||
|
# When enabled, images are described via a vision language model instead
|
||||||
|
# of falling back to the document parser.
|
||||||
|
enable_vision_llm = Column(
|
||||||
|
Boolean, nullable=False, default=False, server_default="false"
|
||||||
|
)
|
||||||
|
|
||||||
# Periodic indexing fields
|
# Periodic indexing fields
|
||||||
periodic_indexing_enabled = Column(Boolean, nullable=False, default=False)
|
periodic_indexing_enabled = Column(Boolean, nullable=False, default=False)
|
||||||
indexing_frequency_minutes = Column(Integer, nullable=True)
|
indexing_frequency_minutes = Column(Integer, nullable=True)
|
||||||
|
|
|
||||||
|
|
@ -123,6 +123,7 @@ async def create_documents_file_upload(
|
||||||
files: list[UploadFile],
|
files: list[UploadFile],
|
||||||
search_space_id: int = Form(...),
|
search_space_id: int = Form(...),
|
||||||
should_summarize: bool = Form(False),
|
should_summarize: bool = Form(False),
|
||||||
|
use_vision_llm: bool = Form(False),
|
||||||
session: AsyncSession = Depends(get_async_session),
|
session: AsyncSession = Depends(get_async_session),
|
||||||
user: User = Depends(current_active_user),
|
user: User = Depends(current_active_user),
|
||||||
dispatcher: TaskDispatcher = Depends(get_task_dispatcher),
|
dispatcher: TaskDispatcher = Depends(get_task_dispatcher),
|
||||||
|
|
@ -272,6 +273,7 @@ async def create_documents_file_upload(
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
user_id=str(user.id),
|
user_id=str(user.id),
|
||||||
should_summarize=should_summarize,
|
should_summarize=should_summarize,
|
||||||
|
use_vision_llm=use_vision_llm,
|
||||||
)
|
)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|
@ -1490,6 +1492,7 @@ async def folder_upload(
|
||||||
relative_paths: str = Form(...),
|
relative_paths: str = Form(...),
|
||||||
root_folder_id: int | None = Form(None),
|
root_folder_id: int | None = Form(None),
|
||||||
enable_summary: bool = Form(False),
|
enable_summary: bool = Form(False),
|
||||||
|
use_vision_llm: bool = Form(False),
|
||||||
session: AsyncSession = Depends(get_async_session),
|
session: AsyncSession = Depends(get_async_session),
|
||||||
user: User = Depends(current_active_user),
|
user: User = Depends(current_active_user),
|
||||||
):
|
):
|
||||||
|
|
@ -1616,6 +1619,7 @@ async def folder_upload(
|
||||||
folder_name=folder_name,
|
folder_name=folder_name,
|
||||||
root_folder_id=root_folder_id,
|
root_folder_id=root_folder_id,
|
||||||
enable_summary=enable_summary,
|
enable_summary=enable_summary,
|
||||||
|
use_vision_llm=use_vision_llm,
|
||||||
file_mappings=list(file_mappings),
|
file_mappings=list(file_mappings),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,7 @@ class SearchSourceConnectorBase(BaseModel):
|
||||||
last_indexed_at: datetime | None = None
|
last_indexed_at: datetime | None = None
|
||||||
config: dict[str, Any]
|
config: dict[str, Any]
|
||||||
enable_summary: bool = False
|
enable_summary: bool = False
|
||||||
|
enable_vision_llm: bool = False
|
||||||
periodic_indexing_enabled: bool = False
|
periodic_indexing_enabled: bool = False
|
||||||
indexing_frequency_minutes: int | None = None
|
indexing_frequency_minutes: int | None = None
|
||||||
next_scheduled_at: datetime | None = None
|
next_scheduled_at: datetime | None = None
|
||||||
|
|
@ -67,6 +68,7 @@ class SearchSourceConnectorUpdate(BaseModel):
|
||||||
last_indexed_at: datetime | None = None
|
last_indexed_at: datetime | None = None
|
||||||
config: dict[str, Any] | None = None
|
config: dict[str, Any] | None = None
|
||||||
enable_summary: bool | None = None
|
enable_summary: bool | None = None
|
||||||
|
enable_vision_llm: bool | None = None
|
||||||
periodic_indexing_enabled: bool | None = None
|
periodic_indexing_enabled: bool | None = None
|
||||||
indexing_frequency_minutes: int | None = None
|
indexing_frequency_minutes: int | None = None
|
||||||
next_scheduled_at: datetime | None = None
|
next_scheduled_at: datetime | None = None
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,7 @@ class TaskDispatcher(Protocol):
|
||||||
search_space_id: int,
|
search_space_id: int,
|
||||||
user_id: str,
|
user_id: str,
|
||||||
should_summarize: bool = False,
|
should_summarize: bool = False,
|
||||||
|
use_vision_llm: bool = False,
|
||||||
) -> None: ...
|
) -> None: ...
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -34,6 +35,7 @@ class CeleryTaskDispatcher:
|
||||||
search_space_id: int,
|
search_space_id: int,
|
||||||
user_id: str,
|
user_id: str,
|
||||||
should_summarize: bool = False,
|
should_summarize: bool = False,
|
||||||
|
use_vision_llm: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
from app.tasks.celery_tasks.document_tasks import (
|
from app.tasks.celery_tasks.document_tasks import (
|
||||||
process_file_upload_with_document_task,
|
process_file_upload_with_document_task,
|
||||||
|
|
@ -46,6 +48,7 @@ class CeleryTaskDispatcher:
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
should_summarize=should_summarize,
|
should_summarize=should_summarize,
|
||||||
|
use_vision_llm=use_vision_llm,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -778,6 +778,7 @@ def process_file_upload_with_document_task(
|
||||||
search_space_id: int,
|
search_space_id: int,
|
||||||
user_id: str,
|
user_id: str,
|
||||||
should_summarize: bool = False,
|
should_summarize: bool = False,
|
||||||
|
use_vision_llm: bool = False,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Celery task to process uploaded file with existing pending document.
|
Celery task to process uploaded file with existing pending document.
|
||||||
|
|
@ -833,6 +834,7 @@ def process_file_upload_with_document_task(
|
||||||
search_space_id,
|
search_space_id,
|
||||||
user_id,
|
user_id,
|
||||||
should_summarize=should_summarize,
|
should_summarize=should_summarize,
|
||||||
|
use_vision_llm=use_vision_llm,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|
@ -869,6 +871,7 @@ async def _process_file_with_document(
|
||||||
search_space_id: int,
|
search_space_id: int,
|
||||||
user_id: str,
|
user_id: str,
|
||||||
should_summarize: bool = False,
|
should_summarize: bool = False,
|
||||||
|
use_vision_llm: bool = False,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Process file and update existing pending document status.
|
Process file and update existing pending document status.
|
||||||
|
|
@ -971,6 +974,7 @@ async def _process_file_with_document(
|
||||||
log_entry=log_entry,
|
log_entry=log_entry,
|
||||||
notification=notification,
|
notification=notification,
|
||||||
should_summarize=should_summarize,
|
should_summarize=should_summarize,
|
||||||
|
use_vision_llm=use_vision_llm,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Update notification on success
|
# Update notification on success
|
||||||
|
|
@ -1428,6 +1432,7 @@ def index_uploaded_folder_files_task(
|
||||||
root_folder_id: int,
|
root_folder_id: int,
|
||||||
enable_summary: bool,
|
enable_summary: bool,
|
||||||
file_mappings: list[dict],
|
file_mappings: list[dict],
|
||||||
|
use_vision_llm: bool = False,
|
||||||
):
|
):
|
||||||
"""Celery task to index files uploaded from the desktop app."""
|
"""Celery task to index files uploaded from the desktop app."""
|
||||||
loop = asyncio.new_event_loop()
|
loop = asyncio.new_event_loop()
|
||||||
|
|
@ -1441,6 +1446,7 @@ def index_uploaded_folder_files_task(
|
||||||
root_folder_id=root_folder_id,
|
root_folder_id=root_folder_id,
|
||||||
enable_summary=enable_summary,
|
enable_summary=enable_summary,
|
||||||
file_mappings=file_mappings,
|
file_mappings=file_mappings,
|
||||||
|
use_vision_llm=use_vision_llm,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
finally:
|
finally:
|
||||||
|
|
@ -1454,6 +1460,7 @@ async def _index_uploaded_folder_files_async(
|
||||||
root_folder_id: int,
|
root_folder_id: int,
|
||||||
enable_summary: bool,
|
enable_summary: bool,
|
||||||
file_mappings: list[dict],
|
file_mappings: list[dict],
|
||||||
|
use_vision_llm: bool = False,
|
||||||
):
|
):
|
||||||
"""Run upload-based folder indexing with notification + heartbeat."""
|
"""Run upload-based folder indexing with notification + heartbeat."""
|
||||||
file_count = len(file_mappings)
|
file_count = len(file_mappings)
|
||||||
|
|
@ -1503,6 +1510,7 @@ async def _index_uploaded_folder_files_async(
|
||||||
enable_summary=enable_summary,
|
enable_summary=enable_summary,
|
||||||
file_mappings=file_mappings,
|
file_mappings=file_mappings,
|
||||||
on_heartbeat_callback=_heartbeat_progress,
|
on_heartbeat_callback=_heartbeat_progress,
|
||||||
|
use_vision_llm=use_vision_llm,
|
||||||
)
|
)
|
||||||
|
|
||||||
if notification:
|
if notification:
|
||||||
|
|
|
||||||
|
|
@ -164,6 +164,7 @@ async def _download_files_parallel(
|
||||||
enable_summary: bool,
|
enable_summary: bool,
|
||||||
max_concurrency: int = 3,
|
max_concurrency: int = 3,
|
||||||
on_heartbeat: HeartbeatCallbackType | None = None,
|
on_heartbeat: HeartbeatCallbackType | None = None,
|
||||||
|
vision_llm=None,
|
||||||
) -> tuple[list[ConnectorDocument], int]:
|
) -> tuple[list[ConnectorDocument], int]:
|
||||||
"""Download and ETL files in parallel. Returns (docs, failed_count)."""
|
"""Download and ETL files in parallel. Returns (docs, failed_count)."""
|
||||||
results: list[ConnectorDocument] = []
|
results: list[ConnectorDocument] = []
|
||||||
|
|
@ -176,7 +177,7 @@ async def _download_files_parallel(
|
||||||
nonlocal last_heartbeat, completed_count
|
nonlocal last_heartbeat, completed_count
|
||||||
async with sem:
|
async with sem:
|
||||||
markdown, db_metadata, error = await download_and_extract_content(
|
markdown, db_metadata, error = await download_and_extract_content(
|
||||||
dropbox_client, file
|
dropbox_client, file, vision_llm=vision_llm
|
||||||
)
|
)
|
||||||
if error or not markdown:
|
if error or not markdown:
|
||||||
file_name = file.get("name", "Unknown")
|
file_name = file.get("name", "Unknown")
|
||||||
|
|
@ -224,6 +225,7 @@ async def _download_and_index(
|
||||||
user_id: str,
|
user_id: str,
|
||||||
enable_summary: bool,
|
enable_summary: bool,
|
||||||
on_heartbeat: HeartbeatCallbackType | None = None,
|
on_heartbeat: HeartbeatCallbackType | None = None,
|
||||||
|
vision_llm=None,
|
||||||
) -> tuple[int, int]:
|
) -> tuple[int, int]:
|
||||||
"""Parallel download then parallel indexing. Returns (batch_indexed, total_failed)."""
|
"""Parallel download then parallel indexing. Returns (batch_indexed, total_failed)."""
|
||||||
connector_docs, download_failed = await _download_files_parallel(
|
connector_docs, download_failed = await _download_files_parallel(
|
||||||
|
|
@ -234,6 +236,7 @@ async def _download_and_index(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
enable_summary=enable_summary,
|
enable_summary=enable_summary,
|
||||||
on_heartbeat=on_heartbeat,
|
on_heartbeat=on_heartbeat,
|
||||||
|
vision_llm=vision_llm,
|
||||||
)
|
)
|
||||||
|
|
||||||
batch_indexed = 0
|
batch_indexed = 0
|
||||||
|
|
@ -287,6 +290,7 @@ async def _index_with_delta_sync(
|
||||||
max_files: int,
|
max_files: int,
|
||||||
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
enable_summary: bool = True,
|
enable_summary: bool = True,
|
||||||
|
vision_llm=None,
|
||||||
) -> tuple[int, int, int, str]:
|
) -> tuple[int, int, int, str]:
|
||||||
"""Delta sync using Dropbox cursor-based change tracking.
|
"""Delta sync using Dropbox cursor-based change tracking.
|
||||||
|
|
||||||
|
|
@ -359,6 +363,7 @@ async def _index_with_delta_sync(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
enable_summary=enable_summary,
|
enable_summary=enable_summary,
|
||||||
on_heartbeat=on_heartbeat_callback,
|
on_heartbeat=on_heartbeat_callback,
|
||||||
|
vision_llm=vision_llm,
|
||||||
)
|
)
|
||||||
|
|
||||||
indexed = renamed_count + batch_indexed
|
indexed = renamed_count + batch_indexed
|
||||||
|
|
@ -384,6 +389,7 @@ async def _index_full_scan(
|
||||||
incremental_sync: bool = True,
|
incremental_sync: bool = True,
|
||||||
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
enable_summary: bool = True,
|
enable_summary: bool = True,
|
||||||
|
vision_llm=None,
|
||||||
) -> tuple[int, int, int]:
|
) -> tuple[int, int, int]:
|
||||||
"""Full scan indexing of a folder.
|
"""Full scan indexing of a folder.
|
||||||
|
|
||||||
|
|
@ -469,6 +475,7 @@ async def _index_full_scan(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
enable_summary=enable_summary,
|
enable_summary=enable_summary,
|
||||||
on_heartbeat=on_heartbeat_callback,
|
on_heartbeat=on_heartbeat_callback,
|
||||||
|
vision_llm=vision_llm,
|
||||||
)
|
)
|
||||||
|
|
||||||
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
|
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
|
||||||
|
|
@ -498,6 +505,7 @@ async def _index_selected_files(
|
||||||
enable_summary: bool,
|
enable_summary: bool,
|
||||||
incremental_sync: bool = True,
|
incremental_sync: bool = True,
|
||||||
on_heartbeat: HeartbeatCallbackType | None = None,
|
on_heartbeat: HeartbeatCallbackType | None = None,
|
||||||
|
vision_llm=None,
|
||||||
) -> tuple[int, int, int, list[str]]:
|
) -> tuple[int, int, int, list[str]]:
|
||||||
"""Index user-selected files using the parallel pipeline."""
|
"""Index user-selected files using the parallel pipeline."""
|
||||||
page_limit_service = PageLimitService(session)
|
page_limit_service = PageLimitService(session)
|
||||||
|
|
@ -557,6 +565,7 @@ async def _index_selected_files(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
enable_summary=enable_summary,
|
enable_summary=enable_summary,
|
||||||
on_heartbeat=on_heartbeat,
|
on_heartbeat=on_heartbeat,
|
||||||
|
vision_llm=vision_llm,
|
||||||
)
|
)
|
||||||
|
|
||||||
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
|
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
|
||||||
|
|
@ -621,6 +630,13 @@ async def index_dropbox_files(
|
||||||
return 0, 0, error_msg, 0
|
return 0, 0, error_msg, 0
|
||||||
|
|
||||||
connector_enable_summary = getattr(connector, "enable_summary", True)
|
connector_enable_summary = getattr(connector, "enable_summary", True)
|
||||||
|
connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False)
|
||||||
|
vision_llm = None
|
||||||
|
if connector_enable_vision_llm:
|
||||||
|
from app.services.llm_service import get_vision_llm
|
||||||
|
|
||||||
|
vision_llm = await get_vision_llm(session, search_space_id)
|
||||||
|
|
||||||
dropbox_client = DropboxClient(session, connector_id)
|
dropbox_client = DropboxClient(session, connector_id)
|
||||||
|
|
||||||
indexing_options = items_dict.get("indexing_options", {})
|
indexing_options = items_dict.get("indexing_options", {})
|
||||||
|
|
@ -650,6 +666,7 @@ async def index_dropbox_files(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
enable_summary=connector_enable_summary,
|
enable_summary=connector_enable_summary,
|
||||||
incremental_sync=incremental_sync,
|
incremental_sync=incremental_sync,
|
||||||
|
vision_llm=vision_llm,
|
||||||
)
|
)
|
||||||
total_indexed += indexed
|
total_indexed += indexed
|
||||||
total_skipped += skipped
|
total_skipped += skipped
|
||||||
|
|
@ -684,6 +701,7 @@ async def index_dropbox_files(
|
||||||
log_entry,
|
log_entry,
|
||||||
max_files,
|
max_files,
|
||||||
enable_summary=connector_enable_summary,
|
enable_summary=connector_enable_summary,
|
||||||
|
vision_llm=vision_llm,
|
||||||
)
|
)
|
||||||
folder_cursors[folder_path] = new_cursor
|
folder_cursors[folder_path] = new_cursor
|
||||||
total_unsupported += unsup
|
total_unsupported += unsup
|
||||||
|
|
@ -703,6 +721,7 @@ async def index_dropbox_files(
|
||||||
include_subfolders,
|
include_subfolders,
|
||||||
incremental_sync=incremental_sync,
|
incremental_sync=incremental_sync,
|
||||||
enable_summary=connector_enable_summary,
|
enable_summary=connector_enable_summary,
|
||||||
|
vision_llm=vision_llm,
|
||||||
)
|
)
|
||||||
total_unsupported += unsup
|
total_unsupported += unsup
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -261,6 +261,7 @@ async def _download_files_parallel(
|
||||||
enable_summary: bool,
|
enable_summary: bool,
|
||||||
max_concurrency: int = 3,
|
max_concurrency: int = 3,
|
||||||
on_heartbeat: HeartbeatCallbackType | None = None,
|
on_heartbeat: HeartbeatCallbackType | None = None,
|
||||||
|
vision_llm=None,
|
||||||
) -> tuple[list[ConnectorDocument], int]:
|
) -> tuple[list[ConnectorDocument], int]:
|
||||||
"""Download and ETL files in parallel, returning ConnectorDocuments.
|
"""Download and ETL files in parallel, returning ConnectorDocuments.
|
||||||
|
|
||||||
|
|
@ -276,7 +277,7 @@ async def _download_files_parallel(
|
||||||
nonlocal last_heartbeat, completed_count
|
nonlocal last_heartbeat, completed_count
|
||||||
async with sem:
|
async with sem:
|
||||||
markdown, drive_metadata, error = await download_and_extract_content(
|
markdown, drive_metadata, error = await download_and_extract_content(
|
||||||
drive_client, file
|
drive_client, file, vision_llm=vision_llm
|
||||||
)
|
)
|
||||||
if error or not markdown:
|
if error or not markdown:
|
||||||
file_name = file.get("name", "Unknown")
|
file_name = file.get("name", "Unknown")
|
||||||
|
|
@ -322,6 +323,7 @@ async def _process_single_file(
|
||||||
search_space_id: int,
|
search_space_id: int,
|
||||||
user_id: str,
|
user_id: str,
|
||||||
enable_summary: bool = True,
|
enable_summary: bool = True,
|
||||||
|
vision_llm=None,
|
||||||
) -> tuple[int, int, int]:
|
) -> tuple[int, int, int]:
|
||||||
"""Download, extract, and index a single Drive file via the pipeline.
|
"""Download, extract, and index a single Drive file via the pipeline.
|
||||||
|
|
||||||
|
|
@ -343,7 +345,7 @@ async def _process_single_file(
|
||||||
await page_limit_service.check_page_limit(user_id, estimated_pages)
|
await page_limit_service.check_page_limit(user_id, estimated_pages)
|
||||||
|
|
||||||
markdown, drive_metadata, error = await download_and_extract_content(
|
markdown, drive_metadata, error = await download_and_extract_content(
|
||||||
drive_client, file
|
drive_client, file, vision_llm=vision_llm
|
||||||
)
|
)
|
||||||
if error or not markdown:
|
if error or not markdown:
|
||||||
logger.warning(f"ETL failed for {file_name}: {error}")
|
logger.warning(f"ETL failed for {file_name}: {error}")
|
||||||
|
|
@ -433,6 +435,7 @@ async def _download_and_index(
|
||||||
user_id: str,
|
user_id: str,
|
||||||
enable_summary: bool,
|
enable_summary: bool,
|
||||||
on_heartbeat: HeartbeatCallbackType | None = None,
|
on_heartbeat: HeartbeatCallbackType | None = None,
|
||||||
|
vision_llm=None,
|
||||||
) -> tuple[int, int]:
|
) -> tuple[int, int]:
|
||||||
"""Phase 2+3: parallel download then parallel indexing.
|
"""Phase 2+3: parallel download then parallel indexing.
|
||||||
|
|
||||||
|
|
@ -446,6 +449,7 @@ async def _download_and_index(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
enable_summary=enable_summary,
|
enable_summary=enable_summary,
|
||||||
on_heartbeat=on_heartbeat,
|
on_heartbeat=on_heartbeat,
|
||||||
|
vision_llm=vision_llm,
|
||||||
)
|
)
|
||||||
|
|
||||||
batch_indexed = 0
|
batch_indexed = 0
|
||||||
|
|
@ -476,6 +480,7 @@ async def _index_selected_files(
|
||||||
user_id: str,
|
user_id: str,
|
||||||
enable_summary: bool,
|
enable_summary: bool,
|
||||||
on_heartbeat: HeartbeatCallbackType | None = None,
|
on_heartbeat: HeartbeatCallbackType | None = None,
|
||||||
|
vision_llm=None,
|
||||||
) -> tuple[int, int, int, list[str]]:
|
) -> tuple[int, int, int, list[str]]:
|
||||||
"""Index user-selected files using the parallel pipeline.
|
"""Index user-selected files using the parallel pipeline.
|
||||||
|
|
||||||
|
|
@ -540,6 +545,7 @@ async def _index_selected_files(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
enable_summary=enable_summary,
|
enable_summary=enable_summary,
|
||||||
on_heartbeat=on_heartbeat,
|
on_heartbeat=on_heartbeat,
|
||||||
|
vision_llm=vision_llm,
|
||||||
)
|
)
|
||||||
|
|
||||||
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
|
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
|
||||||
|
|
@ -573,6 +579,7 @@ async def _index_full_scan(
|
||||||
include_subfolders: bool = False,
|
include_subfolders: bool = False,
|
||||||
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
enable_summary: bool = True,
|
enable_summary: bool = True,
|
||||||
|
vision_llm=None,
|
||||||
) -> tuple[int, int, int]:
|
) -> tuple[int, int, int]:
|
||||||
"""Full scan indexing of a folder.
|
"""Full scan indexing of a folder.
|
||||||
|
|
||||||
|
|
@ -703,6 +710,7 @@ async def _index_full_scan(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
enable_summary=enable_summary,
|
enable_summary=enable_summary,
|
||||||
on_heartbeat=on_heartbeat_callback,
|
on_heartbeat=on_heartbeat_callback,
|
||||||
|
vision_llm=vision_llm,
|
||||||
)
|
)
|
||||||
|
|
||||||
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
|
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
|
||||||
|
|
@ -736,6 +744,7 @@ async def _index_with_delta_sync(
|
||||||
include_subfolders: bool = False,
|
include_subfolders: bool = False,
|
||||||
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
enable_summary: bool = True,
|
enable_summary: bool = True,
|
||||||
|
vision_llm=None,
|
||||||
) -> tuple[int, int, int]:
|
) -> tuple[int, int, int]:
|
||||||
"""Delta sync using change tracking.
|
"""Delta sync using change tracking.
|
||||||
|
|
||||||
|
|
@ -844,6 +853,7 @@ async def _index_with_delta_sync(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
enable_summary=enable_summary,
|
enable_summary=enable_summary,
|
||||||
on_heartbeat=on_heartbeat_callback,
|
on_heartbeat=on_heartbeat_callback,
|
||||||
|
vision_llm=vision_llm,
|
||||||
)
|
)
|
||||||
|
|
||||||
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
|
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
|
||||||
|
|
@ -947,6 +957,11 @@ async def index_google_drive_files(
|
||||||
)
|
)
|
||||||
|
|
||||||
connector_enable_summary = getattr(connector, "enable_summary", True)
|
connector_enable_summary = getattr(connector, "enable_summary", True)
|
||||||
|
connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False)
|
||||||
|
vision_llm = None
|
||||||
|
if connector_enable_vision_llm:
|
||||||
|
from app.services.llm_service import get_vision_llm
|
||||||
|
vision_llm = await get_vision_llm(session, search_space_id)
|
||||||
drive_client = GoogleDriveClient(
|
drive_client = GoogleDriveClient(
|
||||||
session, connector_id, credentials=pre_built_credentials
|
session, connector_id, credentials=pre_built_credentials
|
||||||
)
|
)
|
||||||
|
|
@ -986,6 +1001,7 @@ async def index_google_drive_files(
|
||||||
include_subfolders,
|
include_subfolders,
|
||||||
on_heartbeat_callback,
|
on_heartbeat_callback,
|
||||||
connector_enable_summary,
|
connector_enable_summary,
|
||||||
|
vision_llm=vision_llm,
|
||||||
)
|
)
|
||||||
documents_unsupported += du
|
documents_unsupported += du
|
||||||
logger.info("Running reconciliation scan after delta sync")
|
logger.info("Running reconciliation scan after delta sync")
|
||||||
|
|
@ -1004,6 +1020,7 @@ async def index_google_drive_files(
|
||||||
include_subfolders,
|
include_subfolders,
|
||||||
on_heartbeat_callback,
|
on_heartbeat_callback,
|
||||||
connector_enable_summary,
|
connector_enable_summary,
|
||||||
|
vision_llm=vision_llm,
|
||||||
)
|
)
|
||||||
documents_indexed += ri
|
documents_indexed += ri
|
||||||
documents_skipped += rs
|
documents_skipped += rs
|
||||||
|
|
@ -1029,6 +1046,7 @@ async def index_google_drive_files(
|
||||||
include_subfolders,
|
include_subfolders,
|
||||||
on_heartbeat_callback,
|
on_heartbeat_callback,
|
||||||
connector_enable_summary,
|
connector_enable_summary,
|
||||||
|
vision_llm=vision_llm,
|
||||||
)
|
)
|
||||||
|
|
||||||
if documents_indexed > 0 or can_use_delta:
|
if documents_indexed > 0 or can_use_delta:
|
||||||
|
|
@ -1146,6 +1164,11 @@ async def index_google_drive_single_file(
|
||||||
)
|
)
|
||||||
|
|
||||||
connector_enable_summary = getattr(connector, "enable_summary", True)
|
connector_enable_summary = getattr(connector, "enable_summary", True)
|
||||||
|
connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False)
|
||||||
|
vision_llm = None
|
||||||
|
if connector_enable_vision_llm:
|
||||||
|
from app.services.llm_service import get_vision_llm
|
||||||
|
vision_llm = await get_vision_llm(session, search_space_id)
|
||||||
drive_client = GoogleDriveClient(
|
drive_client = GoogleDriveClient(
|
||||||
session, connector_id, credentials=pre_built_credentials
|
session, connector_id, credentials=pre_built_credentials
|
||||||
)
|
)
|
||||||
|
|
@ -1168,6 +1191,7 @@ async def index_google_drive_single_file(
|
||||||
search_space_id,
|
search_space_id,
|
||||||
user_id,
|
user_id,
|
||||||
connector_enable_summary,
|
connector_enable_summary,
|
||||||
|
vision_llm=vision_llm,
|
||||||
)
|
)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
|
|
@ -1278,6 +1302,11 @@ async def index_google_drive_selected_files(
|
||||||
return 0, 0, [error_msg]
|
return 0, 0, [error_msg]
|
||||||
|
|
||||||
connector_enable_summary = getattr(connector, "enable_summary", True)
|
connector_enable_summary = getattr(connector, "enable_summary", True)
|
||||||
|
connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False)
|
||||||
|
vision_llm = None
|
||||||
|
if connector_enable_vision_llm:
|
||||||
|
from app.services.llm_service import get_vision_llm
|
||||||
|
vision_llm = await get_vision_llm(session, search_space_id)
|
||||||
drive_client = GoogleDriveClient(
|
drive_client = GoogleDriveClient(
|
||||||
session, connector_id, credentials=pre_built_credentials
|
session, connector_id, credentials=pre_built_credentials
|
||||||
)
|
)
|
||||||
|
|
@ -1291,6 +1320,7 @@ async def index_google_drive_selected_files(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
enable_summary=connector_enable_summary,
|
enable_summary=connector_enable_summary,
|
||||||
on_heartbeat=on_heartbeat_callback,
|
on_heartbeat=on_heartbeat_callback,
|
||||||
|
vision_llm=vision_llm,
|
||||||
)
|
)
|
||||||
|
|
||||||
if unsupported > 0:
|
if unsupported > 0:
|
||||||
|
|
|
||||||
|
|
@ -153,7 +153,7 @@ def scan_folder(
|
||||||
return files
|
return files
|
||||||
|
|
||||||
|
|
||||||
async def _read_file_content(file_path: str, filename: str) -> str:
|
async def _read_file_content(file_path: str, filename: str, *, vision_llm=None) -> str:
|
||||||
"""Read file content via the unified ETL pipeline.
|
"""Read file content via the unified ETL pipeline.
|
||||||
|
|
||||||
All file types (plaintext, audio, direct-convert, document, image) are
|
All file types (plaintext, audio, direct-convert, document, image) are
|
||||||
|
|
@ -162,7 +162,7 @@ async def _read_file_content(file_path: str, filename: str) -> str:
|
||||||
from app.etl_pipeline.etl_document import EtlRequest
|
from app.etl_pipeline.etl_document import EtlRequest
|
||||||
from app.etl_pipeline.etl_pipeline_service import EtlPipelineService
|
from app.etl_pipeline.etl_pipeline_service import EtlPipelineService
|
||||||
|
|
||||||
result = await EtlPipelineService().extract(
|
result = await EtlPipelineService(vision_llm=vision_llm).extract(
|
||||||
EtlRequest(file_path=file_path, filename=filename)
|
EtlRequest(file_path=file_path, filename=filename)
|
||||||
)
|
)
|
||||||
return result.markdown_content
|
return result.markdown_content
|
||||||
|
|
@ -199,12 +199,14 @@ async def _compute_file_content_hash(
|
||||||
file_path: str,
|
file_path: str,
|
||||||
filename: str,
|
filename: str,
|
||||||
search_space_id: int,
|
search_space_id: int,
|
||||||
|
*,
|
||||||
|
vision_llm=None,
|
||||||
) -> tuple[str, str]:
|
) -> tuple[str, str]:
|
||||||
"""Read a file (via ETL if needed) and compute its content hash.
|
"""Read a file (via ETL if needed) and compute its content hash.
|
||||||
|
|
||||||
Returns (content_text, content_hash).
|
Returns (content_text, content_hash).
|
||||||
"""
|
"""
|
||||||
content = await _read_file_content(file_path, filename)
|
content = await _read_file_content(file_path, filename, vision_llm=vision_llm)
|
||||||
return content, _content_hash(content, search_space_id)
|
return content, _content_hash(content, search_space_id)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1268,6 +1270,7 @@ async def index_uploaded_files(
|
||||||
enable_summary: bool,
|
enable_summary: bool,
|
||||||
file_mappings: list[dict],
|
file_mappings: list[dict],
|
||||||
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
|
use_vision_llm: bool = False,
|
||||||
) -> tuple[int, int, str | None]:
|
) -> tuple[int, int, str | None]:
|
||||||
"""Index files uploaded from the desktop app via temp paths.
|
"""Index files uploaded from the desktop app via temp paths.
|
||||||
|
|
||||||
|
|
@ -1304,6 +1307,12 @@ async def index_uploaded_files(
|
||||||
pipeline = IndexingPipelineService(session)
|
pipeline = IndexingPipelineService(session)
|
||||||
llm = await get_user_long_context_llm(session, user_id, search_space_id)
|
llm = await get_user_long_context_llm(session, user_id, search_space_id)
|
||||||
|
|
||||||
|
vision_llm_instance = None
|
||||||
|
if use_vision_llm:
|
||||||
|
from app.services.llm_service import get_vision_llm
|
||||||
|
|
||||||
|
vision_llm_instance = await get_vision_llm(session, search_space_id)
|
||||||
|
|
||||||
indexed_count = 0
|
indexed_count = 0
|
||||||
failed_count = 0
|
failed_count = 0
|
||||||
errors: list[str] = []
|
errors: list[str] = []
|
||||||
|
|
@ -1351,7 +1360,8 @@ async def index_uploaded_files(
|
||||||
|
|
||||||
try:
|
try:
|
||||||
content, content_hash = await _compute_file_content_hash(
|
content, content_hash = await _compute_file_content_hash(
|
||||||
temp_path, filename, search_space_id
|
temp_path, filename, search_space_id,
|
||||||
|
vision_llm=vision_llm_instance,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Could not read {relative_path}: {e}")
|
logger.warning(f"Could not read {relative_path}: {e}")
|
||||||
|
|
|
||||||
|
|
@ -171,6 +171,7 @@ async def _download_files_parallel(
|
||||||
enable_summary: bool,
|
enable_summary: bool,
|
||||||
max_concurrency: int = 3,
|
max_concurrency: int = 3,
|
||||||
on_heartbeat: HeartbeatCallbackType | None = None,
|
on_heartbeat: HeartbeatCallbackType | None = None,
|
||||||
|
vision_llm=None,
|
||||||
) -> tuple[list[ConnectorDocument], int]:
|
) -> tuple[list[ConnectorDocument], int]:
|
||||||
"""Download and ETL files in parallel. Returns (docs, failed_count)."""
|
"""Download and ETL files in parallel. Returns (docs, failed_count)."""
|
||||||
results: list[ConnectorDocument] = []
|
results: list[ConnectorDocument] = []
|
||||||
|
|
@ -183,7 +184,7 @@ async def _download_files_parallel(
|
||||||
nonlocal last_heartbeat, completed_count
|
nonlocal last_heartbeat, completed_count
|
||||||
async with sem:
|
async with sem:
|
||||||
markdown, od_metadata, error = await download_and_extract_content(
|
markdown, od_metadata, error = await download_and_extract_content(
|
||||||
onedrive_client, file
|
onedrive_client, file, vision_llm=vision_llm
|
||||||
)
|
)
|
||||||
if error or not markdown:
|
if error or not markdown:
|
||||||
file_name = file.get("name", "Unknown")
|
file_name = file.get("name", "Unknown")
|
||||||
|
|
@ -231,6 +232,7 @@ async def _download_and_index(
|
||||||
user_id: str,
|
user_id: str,
|
||||||
enable_summary: bool,
|
enable_summary: bool,
|
||||||
on_heartbeat: HeartbeatCallbackType | None = None,
|
on_heartbeat: HeartbeatCallbackType | None = None,
|
||||||
|
vision_llm=None,
|
||||||
) -> tuple[int, int]:
|
) -> tuple[int, int]:
|
||||||
"""Parallel download then parallel indexing. Returns (batch_indexed, total_failed)."""
|
"""Parallel download then parallel indexing. Returns (batch_indexed, total_failed)."""
|
||||||
connector_docs, download_failed = await _download_files_parallel(
|
connector_docs, download_failed = await _download_files_parallel(
|
||||||
|
|
@ -241,6 +243,7 @@ async def _download_and_index(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
enable_summary=enable_summary,
|
enable_summary=enable_summary,
|
||||||
on_heartbeat=on_heartbeat,
|
on_heartbeat=on_heartbeat,
|
||||||
|
vision_llm=vision_llm,
|
||||||
)
|
)
|
||||||
|
|
||||||
batch_indexed = 0
|
batch_indexed = 0
|
||||||
|
|
@ -293,6 +296,7 @@ async def _index_selected_files(
|
||||||
user_id: str,
|
user_id: str,
|
||||||
enable_summary: bool,
|
enable_summary: bool,
|
||||||
on_heartbeat: HeartbeatCallbackType | None = None,
|
on_heartbeat: HeartbeatCallbackType | None = None,
|
||||||
|
vision_llm=None,
|
||||||
) -> tuple[int, int, int, list[str]]:
|
) -> tuple[int, int, int, list[str]]:
|
||||||
"""Index user-selected files using the parallel pipeline."""
|
"""Index user-selected files using the parallel pipeline."""
|
||||||
page_limit_service = PageLimitService(session)
|
page_limit_service = PageLimitService(session)
|
||||||
|
|
@ -343,6 +347,7 @@ async def _index_selected_files(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
enable_summary=enable_summary,
|
enable_summary=enable_summary,
|
||||||
on_heartbeat=on_heartbeat,
|
on_heartbeat=on_heartbeat,
|
||||||
|
vision_llm=vision_llm,
|
||||||
)
|
)
|
||||||
|
|
||||||
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
|
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
|
||||||
|
|
@ -375,6 +380,7 @@ async def _index_full_scan(
|
||||||
include_subfolders: bool = True,
|
include_subfolders: bool = True,
|
||||||
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
enable_summary: bool = True,
|
enable_summary: bool = True,
|
||||||
|
vision_llm=None,
|
||||||
) -> tuple[int, int, int]:
|
) -> tuple[int, int, int]:
|
||||||
"""Full scan indexing of a folder.
|
"""Full scan indexing of a folder.
|
||||||
|
|
||||||
|
|
@ -450,6 +456,7 @@ async def _index_full_scan(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
enable_summary=enable_summary,
|
enable_summary=enable_summary,
|
||||||
on_heartbeat=on_heartbeat_callback,
|
on_heartbeat=on_heartbeat_callback,
|
||||||
|
vision_llm=vision_llm,
|
||||||
)
|
)
|
||||||
|
|
||||||
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
|
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
|
||||||
|
|
@ -481,6 +488,7 @@ async def _index_with_delta_sync(
|
||||||
max_files: int,
|
max_files: int,
|
||||||
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
enable_summary: bool = True,
|
enable_summary: bool = True,
|
||||||
|
vision_llm=None,
|
||||||
) -> tuple[int, int, int, str | None]:
|
) -> tuple[int, int, int, str | None]:
|
||||||
"""Delta sync using OneDrive change tracking.
|
"""Delta sync using OneDrive change tracking.
|
||||||
|
|
||||||
|
|
@ -573,6 +581,7 @@ async def _index_with_delta_sync(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
enable_summary=enable_summary,
|
enable_summary=enable_summary,
|
||||||
on_heartbeat=on_heartbeat_callback,
|
on_heartbeat=on_heartbeat_callback,
|
||||||
|
vision_llm=vision_llm,
|
||||||
)
|
)
|
||||||
|
|
||||||
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
|
if batch_indexed > 0 and files_to_download and batch_estimated_pages > 0:
|
||||||
|
|
@ -643,6 +652,12 @@ async def index_onedrive_files(
|
||||||
return 0, 0, error_msg, 0
|
return 0, 0, error_msg, 0
|
||||||
|
|
||||||
connector_enable_summary = getattr(connector, "enable_summary", True)
|
connector_enable_summary = getattr(connector, "enable_summary", True)
|
||||||
|
connector_enable_vision_llm = getattr(connector, "enable_vision_llm", False)
|
||||||
|
vision_llm = None
|
||||||
|
if connector_enable_vision_llm:
|
||||||
|
from app.services.llm_service import get_vision_llm
|
||||||
|
vision_llm = await get_vision_llm(session, search_space_id)
|
||||||
|
|
||||||
onedrive_client = OneDriveClient(session, connector_id)
|
onedrive_client = OneDriveClient(session, connector_id)
|
||||||
|
|
||||||
indexing_options = items_dict.get("indexing_options", {})
|
indexing_options = items_dict.get("indexing_options", {})
|
||||||
|
|
@ -666,6 +681,7 @@ async def index_onedrive_files(
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
enable_summary=connector_enable_summary,
|
enable_summary=connector_enable_summary,
|
||||||
|
vision_llm=vision_llm,
|
||||||
)
|
)
|
||||||
total_indexed += indexed
|
total_indexed += indexed
|
||||||
total_skipped += skipped
|
total_skipped += skipped
|
||||||
|
|
@ -695,6 +711,7 @@ async def index_onedrive_files(
|
||||||
log_entry,
|
log_entry,
|
||||||
max_files,
|
max_files,
|
||||||
enable_summary=connector_enable_summary,
|
enable_summary=connector_enable_summary,
|
||||||
|
vision_llm=vision_llm,
|
||||||
)
|
)
|
||||||
total_indexed += indexed
|
total_indexed += indexed
|
||||||
total_skipped += skipped
|
total_skipped += skipped
|
||||||
|
|
@ -721,6 +738,7 @@ async def index_onedrive_files(
|
||||||
max_files,
|
max_files,
|
||||||
include_subfolders,
|
include_subfolders,
|
||||||
enable_summary=connector_enable_summary,
|
enable_summary=connector_enable_summary,
|
||||||
|
vision_llm=vision_llm,
|
||||||
)
|
)
|
||||||
total_indexed += ri
|
total_indexed += ri
|
||||||
total_skipped += rs
|
total_skipped += rs
|
||||||
|
|
@ -740,6 +758,7 @@ async def index_onedrive_files(
|
||||||
max_files,
|
max_files,
|
||||||
include_subfolders,
|
include_subfolders,
|
||||||
enable_summary=connector_enable_summary,
|
enable_summary=connector_enable_summary,
|
||||||
|
vision_llm=vision_llm,
|
||||||
)
|
)
|
||||||
total_indexed += indexed
|
total_indexed += indexed
|
||||||
total_skipped += skipped
|
total_skipped += skipped
|
||||||
|
|
|
||||||
|
|
@ -46,6 +46,7 @@ class _ProcessingContext:
|
||||||
log_entry: Log
|
log_entry: Log
|
||||||
connector: dict | None = None
|
connector: dict | None = None
|
||||||
notification: Notification | None = None
|
notification: Notification | None = None
|
||||||
|
use_vision_llm: bool = False
|
||||||
enable_summary: bool = field(init=False)
|
enable_summary: bool = field(init=False)
|
||||||
|
|
||||||
def __post_init__(self) -> None:
|
def __post_init__(self) -> None:
|
||||||
|
|
@ -134,7 +135,7 @@ async def _process_non_document_upload(ctx: _ProcessingContext) -> Document | No
|
||||||
)
|
)
|
||||||
|
|
||||||
vision_llm = None
|
vision_llm = None
|
||||||
if etl_classify(ctx.filename) == FileCategory.IMAGE:
|
if ctx.use_vision_llm and etl_classify(ctx.filename) == FileCategory.IMAGE:
|
||||||
from app.services.llm_service import get_vision_llm
|
from app.services.llm_service import get_vision_llm
|
||||||
|
|
||||||
vision_llm = await get_vision_llm(ctx.session, ctx.search_space_id)
|
vision_llm = await get_vision_llm(ctx.session, ctx.search_space_id)
|
||||||
|
|
@ -288,6 +289,7 @@ async def process_file_in_background(
|
||||||
log_entry: Log,
|
log_entry: Log,
|
||||||
connector: dict | None = None,
|
connector: dict | None = None,
|
||||||
notification: Notification | None = None,
|
notification: Notification | None = None,
|
||||||
|
use_vision_llm: bool = False,
|
||||||
) -> Document | None:
|
) -> Document | None:
|
||||||
ctx = _ProcessingContext(
|
ctx = _ProcessingContext(
|
||||||
session=session,
|
session=session,
|
||||||
|
|
@ -299,6 +301,7 @@ async def process_file_in_background(
|
||||||
log_entry=log_entry,
|
log_entry=log_entry,
|
||||||
connector=connector,
|
connector=connector,
|
||||||
notification=notification,
|
notification=notification,
|
||||||
|
use_vision_llm=use_vision_llm,
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -349,6 +352,7 @@ async def _extract_file_content(
|
||||||
task_logger: TaskLoggingService,
|
task_logger: TaskLoggingService,
|
||||||
log_entry: Log,
|
log_entry: Log,
|
||||||
notification: Notification | None,
|
notification: Notification | None,
|
||||||
|
use_vision_llm: bool = False,
|
||||||
) -> tuple[str, str]:
|
) -> tuple[str, str]:
|
||||||
"""
|
"""
|
||||||
Extract markdown content from a file regardless of type.
|
Extract markdown content from a file regardless of type.
|
||||||
|
|
@ -396,7 +400,7 @@ async def _extract_file_content(
|
||||||
await page_limit_service.check_page_limit(user_id, estimated_pages)
|
await page_limit_service.check_page_limit(user_id, estimated_pages)
|
||||||
|
|
||||||
vision_llm = None
|
vision_llm = None
|
||||||
if category == FileCategory.IMAGE:
|
if use_vision_llm and category == FileCategory.IMAGE:
|
||||||
from app.services.llm_service import get_vision_llm
|
from app.services.llm_service import get_vision_llm
|
||||||
|
|
||||||
vision_llm = await get_vision_llm(session, search_space_id)
|
vision_llm = await get_vision_llm(session, search_space_id)
|
||||||
|
|
@ -435,6 +439,7 @@ async def process_file_in_background_with_document(
|
||||||
connector: dict | None = None,
|
connector: dict | None = None,
|
||||||
notification: Notification | None = None,
|
notification: Notification | None = None,
|
||||||
should_summarize: bool = False,
|
should_summarize: bool = False,
|
||||||
|
use_vision_llm: bool = False,
|
||||||
) -> Document | None:
|
) -> Document | None:
|
||||||
"""
|
"""
|
||||||
Process file and update existing pending document (2-phase pattern).
|
Process file and update existing pending document (2-phase pattern).
|
||||||
|
|
@ -463,6 +468,7 @@ async def process_file_in_background_with_document(
|
||||||
task_logger,
|
task_logger,
|
||||||
log_entry,
|
log_entry,
|
||||||
notification,
|
notification,
|
||||||
|
use_vision_llm=use_vision_llm,
|
||||||
)
|
)
|
||||||
|
|
||||||
if not markdown_content:
|
if not markdown_content:
|
||||||
|
|
|
||||||
|
|
@ -69,6 +69,7 @@ class InlineTaskDispatcher:
|
||||||
search_space_id: int,
|
search_space_id: int,
|
||||||
user_id: str,
|
user_id: str,
|
||||||
should_summarize: bool = False,
|
should_summarize: bool = False,
|
||||||
|
use_vision_llm: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
from app.tasks.celery_tasks.document_tasks import (
|
from app.tasks.celery_tasks.document_tasks import (
|
||||||
_process_file_with_document,
|
_process_file_with_document,
|
||||||
|
|
@ -82,6 +83,7 @@ class InlineTaskDispatcher:
|
||||||
search_space_id,
|
search_space_id,
|
||||||
user_id,
|
user_id,
|
||||||
should_summarize=should_summarize,
|
should_summarize=should_summarize,
|
||||||
|
use_vision_llm=use_vision_llm,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -98,6 +98,7 @@ export const ConnectorIndicator = forwardRef<ConnectorIndicatorHandle, Connector
|
||||||
periodicEnabled,
|
periodicEnabled,
|
||||||
frequencyMinutes,
|
frequencyMinutes,
|
||||||
enableSummary,
|
enableSummary,
|
||||||
|
enableVisionLlm,
|
||||||
allConnectors,
|
allConnectors,
|
||||||
viewingAccountsType,
|
viewingAccountsType,
|
||||||
viewingMCPList,
|
viewingMCPList,
|
||||||
|
|
@ -109,6 +110,7 @@ export const ConnectorIndicator = forwardRef<ConnectorIndicatorHandle, Connector
|
||||||
setPeriodicEnabled,
|
setPeriodicEnabled,
|
||||||
setFrequencyMinutes,
|
setFrequencyMinutes,
|
||||||
setEnableSummary,
|
setEnableSummary,
|
||||||
|
setEnableVisionLlm,
|
||||||
handleOpenChange,
|
handleOpenChange,
|
||||||
handleTabChange,
|
handleTabChange,
|
||||||
handleScroll,
|
handleScroll,
|
||||||
|
|
@ -279,6 +281,7 @@ export const ConnectorIndicator = forwardRef<ConnectorIndicatorHandle, Connector
|
||||||
periodicEnabled={periodicEnabled}
|
periodicEnabled={periodicEnabled}
|
||||||
frequencyMinutes={frequencyMinutes}
|
frequencyMinutes={frequencyMinutes}
|
||||||
enableSummary={enableSummary}
|
enableSummary={enableSummary}
|
||||||
|
enableVisionLlm={enableVisionLlm}
|
||||||
isSaving={isSaving}
|
isSaving={isSaving}
|
||||||
isDisconnecting={isDisconnecting}
|
isDisconnecting={isDisconnecting}
|
||||||
isIndexing={indexingConnectorIds.has(editingConnector.id)}
|
isIndexing={indexingConnectorIds.has(editingConnector.id)}
|
||||||
|
|
@ -288,6 +291,7 @@ export const ConnectorIndicator = forwardRef<ConnectorIndicatorHandle, Connector
|
||||||
onPeriodicEnabledChange={setPeriodicEnabled}
|
onPeriodicEnabledChange={setPeriodicEnabled}
|
||||||
onFrequencyChange={setFrequencyMinutes}
|
onFrequencyChange={setFrequencyMinutes}
|
||||||
onEnableSummaryChange={setEnableSummary}
|
onEnableSummaryChange={setEnableSummary}
|
||||||
|
onEnableVisionLlmChange={setEnableVisionLlm}
|
||||||
onSave={() => {
|
onSave={() => {
|
||||||
startIndexing(editingConnector.id);
|
startIndexing(editingConnector.id);
|
||||||
handleSaveConnector(() => refreshConnectors());
|
handleSaveConnector(() => refreshConnectors());
|
||||||
|
|
@ -336,6 +340,7 @@ export const ConnectorIndicator = forwardRef<ConnectorIndicatorHandle, Connector
|
||||||
periodicEnabled={periodicEnabled}
|
periodicEnabled={periodicEnabled}
|
||||||
frequencyMinutes={frequencyMinutes}
|
frequencyMinutes={frequencyMinutes}
|
||||||
enableSummary={enableSummary}
|
enableSummary={enableSummary}
|
||||||
|
enableVisionLlm={enableVisionLlm}
|
||||||
isStartingIndexing={isStartingIndexing}
|
isStartingIndexing={isStartingIndexing}
|
||||||
isFromOAuth={isFromOAuth}
|
isFromOAuth={isFromOAuth}
|
||||||
onStartDateChange={setStartDate}
|
onStartDateChange={setStartDate}
|
||||||
|
|
@ -343,6 +348,7 @@ export const ConnectorIndicator = forwardRef<ConnectorIndicatorHandle, Connector
|
||||||
onPeriodicEnabledChange={setPeriodicEnabled}
|
onPeriodicEnabledChange={setPeriodicEnabled}
|
||||||
onFrequencyChange={setFrequencyMinutes}
|
onFrequencyChange={setFrequencyMinutes}
|
||||||
onEnableSummaryChange={setEnableSummary}
|
onEnableSummaryChange={setEnableSummary}
|
||||||
|
onEnableVisionLlmChange={setEnableVisionLlm}
|
||||||
onConfigChange={setIndexingConnectorConfig}
|
onConfigChange={setIndexingConnectorConfig}
|
||||||
onStartIndexing={() => {
|
onStartIndexing={() => {
|
||||||
if (indexingConfig.connectorId) {
|
if (indexingConfig.connectorId) {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,25 @@
|
||||||
|
"use client";
|
||||||
|
|
||||||
|
import type { FC } from "react";
|
||||||
|
import { Switch } from "@/components/ui/switch";
|
||||||
|
|
||||||
|
interface VisionLLMConfigProps {
|
||||||
|
enabled: boolean;
|
||||||
|
onEnabledChange: (enabled: boolean) => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
export const VisionLLMConfig: FC<VisionLLMConfigProps> = ({ enabled, onEnabledChange }) => {
|
||||||
|
return (
|
||||||
|
<div className="rounded-xl bg-slate-400/5 dark:bg-white/5 p-3 sm:p-6">
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<div className="space-y-1">
|
||||||
|
<h3 className="font-medium text-sm sm:text-base">Enable Vision LLM</h3>
|
||||||
|
<p className="text-xs sm:text-sm text-muted-foreground">
|
||||||
|
Describes images using AI vision (costly, slower)
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<Switch checked={enabled} onCheckedChange={onEnabledChange} />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
@ -15,6 +15,7 @@ import { cn } from "@/lib/utils";
|
||||||
import { DateRangeSelector } from "../../components/date-range-selector";
|
import { DateRangeSelector } from "../../components/date-range-selector";
|
||||||
import { PeriodicSyncConfig } from "../../components/periodic-sync-config";
|
import { PeriodicSyncConfig } from "../../components/periodic-sync-config";
|
||||||
import { SummaryConfig } from "../../components/summary-config";
|
import { SummaryConfig } from "../../components/summary-config";
|
||||||
|
import { VisionLLMConfig } from "../../components/vision-llm-config";
|
||||||
import { getConnectorDisplayName } from "../../tabs/all-connectors-tab";
|
import { getConnectorDisplayName } from "../../tabs/all-connectors-tab";
|
||||||
import { getConnectorConfigComponent } from "../index";
|
import { getConnectorConfigComponent } from "../index";
|
||||||
|
|
||||||
|
|
@ -38,6 +39,7 @@ interface ConnectorEditViewProps {
|
||||||
periodicEnabled: boolean;
|
periodicEnabled: boolean;
|
||||||
frequencyMinutes: string;
|
frequencyMinutes: string;
|
||||||
enableSummary: boolean;
|
enableSummary: boolean;
|
||||||
|
enableVisionLlm: boolean;
|
||||||
isSaving: boolean;
|
isSaving: boolean;
|
||||||
isDisconnecting: boolean;
|
isDisconnecting: boolean;
|
||||||
isIndexing?: boolean;
|
isIndexing?: boolean;
|
||||||
|
|
@ -47,6 +49,7 @@ interface ConnectorEditViewProps {
|
||||||
onPeriodicEnabledChange: (enabled: boolean) => void;
|
onPeriodicEnabledChange: (enabled: boolean) => void;
|
||||||
onFrequencyChange: (frequency: string) => void;
|
onFrequencyChange: (frequency: string) => void;
|
||||||
onEnableSummaryChange: (enabled: boolean) => void;
|
onEnableSummaryChange: (enabled: boolean) => void;
|
||||||
|
onEnableVisionLlmChange: (enabled: boolean) => void;
|
||||||
onSave: () => void;
|
onSave: () => void;
|
||||||
onDisconnect: () => void;
|
onDisconnect: () => void;
|
||||||
onBack: () => void;
|
onBack: () => void;
|
||||||
|
|
@ -62,6 +65,7 @@ export const ConnectorEditView: FC<ConnectorEditViewProps> = ({
|
||||||
periodicEnabled,
|
periodicEnabled,
|
||||||
frequencyMinutes,
|
frequencyMinutes,
|
||||||
enableSummary,
|
enableSummary,
|
||||||
|
enableVisionLlm,
|
||||||
isSaving,
|
isSaving,
|
||||||
isDisconnecting,
|
isDisconnecting,
|
||||||
isIndexing = false,
|
isIndexing = false,
|
||||||
|
|
@ -71,6 +75,7 @@ export const ConnectorEditView: FC<ConnectorEditViewProps> = ({
|
||||||
onPeriodicEnabledChange,
|
onPeriodicEnabledChange,
|
||||||
onFrequencyChange,
|
onFrequencyChange,
|
||||||
onEnableSummaryChange,
|
onEnableSummaryChange,
|
||||||
|
onEnableVisionLlmChange,
|
||||||
onSave,
|
onSave,
|
||||||
onDisconnect,
|
onDisconnect,
|
||||||
onBack,
|
onBack,
|
||||||
|
|
@ -272,6 +277,14 @@ export const ConnectorEditView: FC<ConnectorEditViewProps> = ({
|
||||||
{/* AI Summary toggle */}
|
{/* AI Summary toggle */}
|
||||||
<SummaryConfig enabled={enableSummary} onEnabledChange={onEnableSummaryChange} />
|
<SummaryConfig enabled={enableSummary} onEnabledChange={onEnableSummaryChange} />
|
||||||
|
|
||||||
|
{/* Vision LLM toggle - only for file-based connectors */}
|
||||||
|
{(connector.connector_type === "GOOGLE_DRIVE_CONNECTOR" ||
|
||||||
|
connector.connector_type === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" ||
|
||||||
|
connector.connector_type === "DROPBOX_CONNECTOR" ||
|
||||||
|
connector.connector_type === "ONEDRIVE_CONNECTOR") && (
|
||||||
|
<VisionLLMConfig enabled={enableVisionLlm} onEnabledChange={onEnableVisionLlmChange} />
|
||||||
|
)}
|
||||||
|
|
||||||
{/* Date range selector - not shown for file-based connectors (Drive, Dropbox, OneDrive), Webcrawler, GitHub, or Local Folder */}
|
{/* Date range selector - not shown for file-based connectors (Drive, Dropbox, OneDrive), Webcrawler, GitHub, or Local Folder */}
|
||||||
{connector.connector_type !== "GOOGLE_DRIVE_CONNECTOR" &&
|
{connector.connector_type !== "GOOGLE_DRIVE_CONNECTOR" &&
|
||||||
connector.connector_type !== "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" &&
|
connector.connector_type !== "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" &&
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ import { cn } from "@/lib/utils";
|
||||||
import { DateRangeSelector } from "../../components/date-range-selector";
|
import { DateRangeSelector } from "../../components/date-range-selector";
|
||||||
import { PeriodicSyncConfig } from "../../components/periodic-sync-config";
|
import { PeriodicSyncConfig } from "../../components/periodic-sync-config";
|
||||||
import { SummaryConfig } from "../../components/summary-config";
|
import { SummaryConfig } from "../../components/summary-config";
|
||||||
|
import { VisionLLMConfig } from "../../components/vision-llm-config";
|
||||||
import type { IndexingConfigState } from "../../constants/connector-constants";
|
import type { IndexingConfigState } from "../../constants/connector-constants";
|
||||||
import { getConnectorDisplayName } from "../../tabs/all-connectors-tab";
|
import { getConnectorDisplayName } from "../../tabs/all-connectors-tab";
|
||||||
import { getConnectorConfigComponent } from "../index";
|
import { getConnectorConfigComponent } from "../index";
|
||||||
|
|
@ -22,6 +23,7 @@ interface IndexingConfigurationViewProps {
|
||||||
periodicEnabled: boolean;
|
periodicEnabled: boolean;
|
||||||
frequencyMinutes: string;
|
frequencyMinutes: string;
|
||||||
enableSummary: boolean;
|
enableSummary: boolean;
|
||||||
|
enableVisionLlm: boolean;
|
||||||
isStartingIndexing: boolean;
|
isStartingIndexing: boolean;
|
||||||
isFromOAuth?: boolean;
|
isFromOAuth?: boolean;
|
||||||
onStartDateChange: (date: Date | undefined) => void;
|
onStartDateChange: (date: Date | undefined) => void;
|
||||||
|
|
@ -29,6 +31,7 @@ interface IndexingConfigurationViewProps {
|
||||||
onPeriodicEnabledChange: (enabled: boolean) => void;
|
onPeriodicEnabledChange: (enabled: boolean) => void;
|
||||||
onFrequencyChange: (frequency: string) => void;
|
onFrequencyChange: (frequency: string) => void;
|
||||||
onEnableSummaryChange: (enabled: boolean) => void;
|
onEnableSummaryChange: (enabled: boolean) => void;
|
||||||
|
onEnableVisionLlmChange: (enabled: boolean) => void;
|
||||||
onConfigChange?: (config: Record<string, unknown>) => void;
|
onConfigChange?: (config: Record<string, unknown>) => void;
|
||||||
onStartIndexing: () => void;
|
onStartIndexing: () => void;
|
||||||
onSkip: () => void;
|
onSkip: () => void;
|
||||||
|
|
@ -42,6 +45,7 @@ export const IndexingConfigurationView: FC<IndexingConfigurationViewProps> = ({
|
||||||
periodicEnabled,
|
periodicEnabled,
|
||||||
frequencyMinutes,
|
frequencyMinutes,
|
||||||
enableSummary,
|
enableSummary,
|
||||||
|
enableVisionLlm,
|
||||||
isStartingIndexing,
|
isStartingIndexing,
|
||||||
isFromOAuth = false,
|
isFromOAuth = false,
|
||||||
onStartDateChange,
|
onStartDateChange,
|
||||||
|
|
@ -49,6 +53,7 @@ export const IndexingConfigurationView: FC<IndexingConfigurationViewProps> = ({
|
||||||
onPeriodicEnabledChange,
|
onPeriodicEnabledChange,
|
||||||
onFrequencyChange,
|
onFrequencyChange,
|
||||||
onEnableSummaryChange,
|
onEnableSummaryChange,
|
||||||
|
onEnableVisionLlmChange,
|
||||||
onConfigChange,
|
onConfigChange,
|
||||||
onStartIndexing,
|
onStartIndexing,
|
||||||
onSkip,
|
onSkip,
|
||||||
|
|
@ -158,6 +163,14 @@ export const IndexingConfigurationView: FC<IndexingConfigurationViewProps> = ({
|
||||||
{/* AI Summary toggle */}
|
{/* AI Summary toggle */}
|
||||||
<SummaryConfig enabled={enableSummary} onEnabledChange={onEnableSummaryChange} />
|
<SummaryConfig enabled={enableSummary} onEnabledChange={onEnableSummaryChange} />
|
||||||
|
|
||||||
|
{/* Vision LLM toggle - only for file-based connectors */}
|
||||||
|
{(config.connectorType === "GOOGLE_DRIVE_CONNECTOR" ||
|
||||||
|
config.connectorType === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" ||
|
||||||
|
config.connectorType === "DROPBOX_CONNECTOR" ||
|
||||||
|
config.connectorType === "ONEDRIVE_CONNECTOR") && (
|
||||||
|
<VisionLLMConfig enabled={enableVisionLlm} onEnabledChange={onEnableVisionLlmChange} />
|
||||||
|
)}
|
||||||
|
|
||||||
{/* Date range selector - not shown for file-based connectors (Drive, Dropbox, OneDrive), Webcrawler, GitHub, or Local Folder */}
|
{/* Date range selector - not shown for file-based connectors (Drive, Dropbox, OneDrive), Webcrawler, GitHub, or Local Folder */}
|
||||||
{config.connectorType !== "GOOGLE_DRIVE_CONNECTOR" &&
|
{config.connectorType !== "GOOGLE_DRIVE_CONNECTOR" &&
|
||||||
config.connectorType !== "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" &&
|
config.connectorType !== "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" &&
|
||||||
|
|
|
||||||
|
|
@ -80,6 +80,7 @@ export const useConnectorDialog = () => {
|
||||||
const [periodicEnabled, setPeriodicEnabled] = useState(false);
|
const [periodicEnabled, setPeriodicEnabled] = useState(false);
|
||||||
const [frequencyMinutes, setFrequencyMinutes] = useState("1440");
|
const [frequencyMinutes, setFrequencyMinutes] = useState("1440");
|
||||||
const [enableSummary, setEnableSummary] = useState(false);
|
const [enableSummary, setEnableSummary] = useState(false);
|
||||||
|
const [enableVisionLlm, setEnableVisionLlm] = useState(false);
|
||||||
|
|
||||||
// Edit mode state
|
// Edit mode state
|
||||||
const [editingConnector, setEditingConnector] = useState<SearchSourceConnector | null>(null);
|
const [editingConnector, setEditingConnector] = useState<SearchSourceConnector | null>(null);
|
||||||
|
|
@ -621,6 +622,7 @@ export const useConnectorDialog = () => {
|
||||||
setPeriodicEnabled(false);
|
setPeriodicEnabled(false);
|
||||||
setFrequencyMinutes("1440");
|
setFrequencyMinutes("1440");
|
||||||
setEnableSummary(connector.enable_summary ?? false);
|
setEnableSummary(connector.enable_summary ?? false);
|
||||||
|
setEnableVisionLlm(connector.enable_vision_llm ?? false);
|
||||||
setStartDate(undefined);
|
setStartDate(undefined);
|
||||||
setEndDate(undefined);
|
setEndDate(undefined);
|
||||||
|
|
||||||
|
|
@ -763,12 +765,13 @@ export const useConnectorDialog = () => {
|
||||||
const endDateStr = endDate ? format(endDate, "yyyy-MM-dd") : undefined;
|
const endDateStr = endDate ? format(endDate, "yyyy-MM-dd") : undefined;
|
||||||
|
|
||||||
// Update connector with summary, periodic sync settings, and config changes
|
// Update connector with summary, periodic sync settings, and config changes
|
||||||
if (enableSummary || periodicEnabled || indexingConnectorConfig) {
|
if (enableSummary || enableVisionLlm || periodicEnabled || indexingConnectorConfig) {
|
||||||
const frequency = periodicEnabled ? parseInt(frequencyMinutes, 10) : undefined;
|
const frequency = periodicEnabled ? parseInt(frequencyMinutes, 10) : undefined;
|
||||||
await updateConnector({
|
await updateConnector({
|
||||||
id: indexingConfig.connectorId,
|
id: indexingConfig.connectorId,
|
||||||
data: {
|
data: {
|
||||||
enable_summary: enableSummary,
|
enable_summary: enableSummary,
|
||||||
|
enable_vision_llm: enableVisionLlm,
|
||||||
...(periodicEnabled && {
|
...(periodicEnabled && {
|
||||||
periodic_indexing_enabled: true,
|
periodic_indexing_enabled: true,
|
||||||
indexing_frequency_minutes: frequency,
|
indexing_frequency_minutes: frequency,
|
||||||
|
|
@ -896,6 +899,7 @@ export const useConnectorDialog = () => {
|
||||||
periodicEnabled,
|
periodicEnabled,
|
||||||
frequencyMinutes,
|
frequencyMinutes,
|
||||||
enableSummary,
|
enableSummary,
|
||||||
|
enableVisionLlm,
|
||||||
indexingConnectorConfig,
|
indexingConnectorConfig,
|
||||||
setIsOpen,
|
setIsOpen,
|
||||||
]
|
]
|
||||||
|
|
@ -960,6 +964,7 @@ export const useConnectorDialog = () => {
|
||||||
setPeriodicEnabled(!connector.is_indexable ? false : connector.periodic_indexing_enabled);
|
setPeriodicEnabled(!connector.is_indexable ? false : connector.periodic_indexing_enabled);
|
||||||
setFrequencyMinutes(connector.indexing_frequency_minutes?.toString() || "1440");
|
setFrequencyMinutes(connector.indexing_frequency_minutes?.toString() || "1440");
|
||||||
setEnableSummary(connector.enable_summary ?? false);
|
setEnableSummary(connector.enable_summary ?? false);
|
||||||
|
setEnableVisionLlm(connector.enable_vision_llm ?? false);
|
||||||
setStartDate(undefined);
|
setStartDate(undefined);
|
||||||
setEndDate(undefined);
|
setEndDate(undefined);
|
||||||
},
|
},
|
||||||
|
|
@ -1038,6 +1043,7 @@ export const useConnectorDialog = () => {
|
||||||
data: {
|
data: {
|
||||||
name: connectorName || editingConnector.name,
|
name: connectorName || editingConnector.name,
|
||||||
enable_summary: enableSummary,
|
enable_summary: enableSummary,
|
||||||
|
enable_vision_llm: enableVisionLlm,
|
||||||
periodic_indexing_enabled: !editingConnector.is_indexable ? false : periodicEnabled,
|
periodic_indexing_enabled: !editingConnector.is_indexable ? false : periodicEnabled,
|
||||||
indexing_frequency_minutes: !editingConnector.is_indexable ? null : frequency,
|
indexing_frequency_minutes: !editingConnector.is_indexable ? null : frequency,
|
||||||
config: connectorConfig || editingConnector.config,
|
config: connectorConfig || editingConnector.config,
|
||||||
|
|
@ -1172,6 +1178,7 @@ export const useConnectorDialog = () => {
|
||||||
periodicEnabled,
|
periodicEnabled,
|
||||||
frequencyMinutes,
|
frequencyMinutes,
|
||||||
enableSummary,
|
enableSummary,
|
||||||
|
enableVisionLlm,
|
||||||
getFrequencyLabel,
|
getFrequencyLabel,
|
||||||
connectorConfig,
|
connectorConfig,
|
||||||
connectorName,
|
connectorName,
|
||||||
|
|
@ -1332,6 +1339,7 @@ export const useConnectorDialog = () => {
|
||||||
setPeriodicEnabled(false);
|
setPeriodicEnabled(false);
|
||||||
setFrequencyMinutes("1440");
|
setFrequencyMinutes("1440");
|
||||||
setEnableSummary(false);
|
setEnableSummary(false);
|
||||||
|
setEnableVisionLlm(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
@ -1368,6 +1376,7 @@ export const useConnectorDialog = () => {
|
||||||
periodicEnabled,
|
periodicEnabled,
|
||||||
frequencyMinutes,
|
frequencyMinutes,
|
||||||
enableSummary,
|
enableSummary,
|
||||||
|
enableVisionLlm,
|
||||||
searchSpaceId,
|
searchSpaceId,
|
||||||
allConnectors,
|
allConnectors,
|
||||||
viewingAccountsType,
|
viewingAccountsType,
|
||||||
|
|
@ -1382,6 +1391,7 @@ export const useConnectorDialog = () => {
|
||||||
setPeriodicEnabled,
|
setPeriodicEnabled,
|
||||||
setFrequencyMinutes,
|
setFrequencyMinutes,
|
||||||
setEnableSummary,
|
setEnableSummary,
|
||||||
|
setEnableVisionLlm,
|
||||||
setConnectorName,
|
setConnectorName,
|
||||||
|
|
||||||
// Handlers
|
// Handlers
|
||||||
|
|
|
||||||
|
|
@ -136,6 +136,7 @@ export function DocumentUploadTab({
|
||||||
const [uploadProgress, setUploadProgress] = useState(0);
|
const [uploadProgress, setUploadProgress] = useState(0);
|
||||||
const [accordionValue, setAccordionValue] = useState<string>("");
|
const [accordionValue, setAccordionValue] = useState<string>("");
|
||||||
const [shouldSummarize, setShouldSummarize] = useState(false);
|
const [shouldSummarize, setShouldSummarize] = useState(false);
|
||||||
|
const [useVisionLlm, setUseVisionLlm] = useState(false);
|
||||||
const [uploadDocumentMutation] = useAtom(uploadDocumentMutationAtom);
|
const [uploadDocumentMutation] = useAtom(uploadDocumentMutationAtom);
|
||||||
const { mutate: uploadDocuments, isPending: isUploading } = uploadDocumentMutation;
|
const { mutate: uploadDocuments, isPending: isUploading } = uploadDocumentMutation;
|
||||||
const fileInputRef = useRef<HTMLInputElement>(null);
|
const fileInputRef = useRef<HTMLInputElement>(null);
|
||||||
|
|
@ -361,6 +362,7 @@ export function DocumentUploadTab({
|
||||||
relative_paths: batch.map((e) => e.relativePath),
|
relative_paths: batch.map((e) => e.relativePath),
|
||||||
root_folder_id: rootFolderId,
|
root_folder_id: rootFolderId,
|
||||||
enable_summary: shouldSummarize,
|
enable_summary: shouldSummarize,
|
||||||
|
use_vision_llm: useVisionLlm,
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
@ -407,6 +409,7 @@ export function DocumentUploadTab({
|
||||||
files: rawFiles,
|
files: rawFiles,
|
||||||
search_space_id: Number(searchSpaceId),
|
search_space_id: Number(searchSpaceId),
|
||||||
should_summarize: shouldSummarize,
|
should_summarize: shouldSummarize,
|
||||||
|
use_vision_llm: useVisionLlm,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
onSuccess: () => {
|
onSuccess: () => {
|
||||||
|
|
@ -696,6 +699,16 @@ export function DocumentUploadTab({
|
||||||
<Switch checked={shouldSummarize} onCheckedChange={setShouldSummarize} />
|
<Switch checked={shouldSummarize} onCheckedChange={setShouldSummarize} />
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div className={toggleRowClass}>
|
||||||
|
<div className="space-y-0.5">
|
||||||
|
<p className="font-medium text-sm">Enable Vision LLM</p>
|
||||||
|
<p className="text-xs text-muted-foreground">
|
||||||
|
Describes images using AI vision (costly, slower)
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<Switch checked={useVisionLlm} onCheckedChange={setUseVisionLlm} />
|
||||||
|
</div>
|
||||||
|
|
||||||
<Button
|
<Button
|
||||||
className="w-full"
|
className="w-full"
|
||||||
onClick={handleUpload}
|
onClick={handleUpload}
|
||||||
|
|
|
||||||
|
|
@ -44,6 +44,7 @@ export const searchSourceConnector = z.object({
|
||||||
last_indexed_at: z.string().nullable(),
|
last_indexed_at: z.string().nullable(),
|
||||||
config: z.record(z.string(), z.any()),
|
config: z.record(z.string(), z.any()),
|
||||||
enable_summary: z.boolean().default(false),
|
enable_summary: z.boolean().default(false),
|
||||||
|
enable_vision_llm: z.boolean().default(false),
|
||||||
periodic_indexing_enabled: z.boolean(),
|
periodic_indexing_enabled: z.boolean(),
|
||||||
indexing_frequency_minutes: z.number().nullable(),
|
indexing_frequency_minutes: z.number().nullable(),
|
||||||
next_scheduled_at: z.string().nullable(),
|
next_scheduled_at: z.string().nullable(),
|
||||||
|
|
@ -98,6 +99,7 @@ export const createConnectorRequest = z.object({
|
||||||
last_indexed_at: true,
|
last_indexed_at: true,
|
||||||
config: true,
|
config: true,
|
||||||
enable_summary: true,
|
enable_summary: true,
|
||||||
|
enable_vision_llm: true,
|
||||||
periodic_indexing_enabled: true,
|
periodic_indexing_enabled: true,
|
||||||
indexing_frequency_minutes: true,
|
indexing_frequency_minutes: true,
|
||||||
next_scheduled_at: true,
|
next_scheduled_at: true,
|
||||||
|
|
@ -123,6 +125,7 @@ export const updateConnectorRequest = z.object({
|
||||||
last_indexed_at: true,
|
last_indexed_at: true,
|
||||||
config: true,
|
config: true,
|
||||||
enable_summary: true,
|
enable_summary: true,
|
||||||
|
enable_vision_llm: true,
|
||||||
periodic_indexing_enabled: true,
|
periodic_indexing_enabled: true,
|
||||||
indexing_frequency_minutes: true,
|
indexing_frequency_minutes: true,
|
||||||
next_scheduled_at: true,
|
next_scheduled_at: true,
|
||||||
|
|
|
||||||
|
|
@ -148,6 +148,7 @@ export const uploadDocumentRequest = z.object({
|
||||||
files: z.array(z.instanceof(File)),
|
files: z.array(z.instanceof(File)),
|
||||||
search_space_id: z.number(),
|
search_space_id: z.number(),
|
||||||
should_summarize: z.boolean().default(false),
|
should_summarize: z.boolean().default(false),
|
||||||
|
use_vision_llm: z.boolean().default(false),
|
||||||
});
|
});
|
||||||
|
|
||||||
export const uploadDocumentResponse = z.object({
|
export const uploadDocumentResponse = z.object({
|
||||||
|
|
|
||||||
|
|
@ -127,7 +127,7 @@ class DocumentsApiService {
|
||||||
throw new ValidationError(`Invalid request: ${errorMessage}`);
|
throw new ValidationError(`Invalid request: ${errorMessage}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
const { files, search_space_id, should_summarize } = parsedRequest.data;
|
const { files, search_space_id, should_summarize, use_vision_llm } = parsedRequest.data;
|
||||||
const UPLOAD_BATCH_SIZE = 5;
|
const UPLOAD_BATCH_SIZE = 5;
|
||||||
|
|
||||||
const batches: File[][] = [];
|
const batches: File[][] = [];
|
||||||
|
|
@ -146,6 +146,7 @@ class DocumentsApiService {
|
||||||
for (const file of batch) formData.append("files", file);
|
for (const file of batch) formData.append("files", file);
|
||||||
formData.append("search_space_id", String(search_space_id));
|
formData.append("search_space_id", String(search_space_id));
|
||||||
formData.append("should_summarize", String(should_summarize));
|
formData.append("should_summarize", String(should_summarize));
|
||||||
|
formData.append("use_vision_llm", String(use_vision_llm));
|
||||||
|
|
||||||
const controller = new AbortController();
|
const controller = new AbortController();
|
||||||
const timeoutId = setTimeout(() => controller.abort(), 120_000);
|
const timeoutId = setTimeout(() => controller.abort(), 120_000);
|
||||||
|
|
@ -442,6 +443,7 @@ class DocumentsApiService {
|
||||||
relative_paths: string[];
|
relative_paths: string[];
|
||||||
root_folder_id?: number | null;
|
root_folder_id?: number | null;
|
||||||
enable_summary?: boolean;
|
enable_summary?: boolean;
|
||||||
|
use_vision_llm?: boolean;
|
||||||
},
|
},
|
||||||
signal?: AbortSignal
|
signal?: AbortSignal
|
||||||
): Promise<{ message: string; status: string; root_folder_id: number; file_count: number }> => {
|
): Promise<{ message: string; status: string; root_folder_id: number; file_count: number }> => {
|
||||||
|
|
@ -456,6 +458,7 @@ class DocumentsApiService {
|
||||||
formData.append("root_folder_id", String(metadata.root_folder_id));
|
formData.append("root_folder_id", String(metadata.root_folder_id));
|
||||||
}
|
}
|
||||||
formData.append("enable_summary", String(metadata.enable_summary ?? false));
|
formData.append("enable_summary", String(metadata.enable_summary ?? false));
|
||||||
|
formData.append("use_vision_llm", String(metadata.use_vision_llm ?? false));
|
||||||
|
|
||||||
const totalSize = files.reduce((acc, f) => acc + f.size, 0);
|
const totalSize = files.reduce((acc, f) => acc + f.size, 0);
|
||||||
const timeoutMs = Math.min(Math.max((totalSize / (1024 * 1024)) * 5000, 30_000), 600_000);
|
const timeoutMs = Math.min(Math.max((totalSize / (1024 * 1024)) * 5000, 30_000), 600_000);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue