Make Vision LLM opt-in for uploads and connectors

2026-05-15 18:25:18 +02:00 · 2026-04-10 16:45:51 +02:00 · 2026-04-10 16:45:51 +02:00 · a95bf58c8f
commit a95bf58c8f
parent 0aefcbd504
24 changed files with 276 additions and 20 deletions
--- a/surfsense_backend/app/connectors/google_drive/content_extractor.py
+++ b/surfsense_backend/app/connectors/google_drive/content_extractor.py
@ -27,6 +27,8 @@ logger = logging.getLogger(__name__)
 async def download_and_extract_content(
    client: GoogleDriveClient,
    file: dict[str, Any],
+    *,
+    vision_llm=None,
 ) -> tuple[str | None, dict[str, Any], str | None]:
    """Download a Google Drive file and extract its content as markdown.

@ -103,7 +105,9 @@ async def download_and_extract_content(
        etl_filename = (
            file_name + extension if is_google_workspace_file(mime_type) else file_name
        )
-        markdown = await _parse_file_to_markdown(temp_file_path, etl_filename)
+        markdown = await _parse_file_to_markdown(
+            temp_file_path, etl_filename, vision_llm=vision_llm
+        )
        return markdown, drive_metadata, None

    except Exception as e:
@ -115,12 +119,14 @@ async def download_and_extract_content(
                os.unlink(temp_file_path)


-async def _parse_file_to_markdown(file_path: str, filename: str) -> str:
+async def _parse_file_to_markdown(
+    file_path: str, filename: str, *, vision_llm=None
+) -> str:
    """Parse a local file to markdown using the unified ETL pipeline."""
    from app.etl_pipeline.etl_document import EtlRequest
    from app.etl_pipeline.etl_pipeline_service import EtlPipelineService

-    result = await EtlPipelineService().extract(
+    result = await EtlPipelineService(vision_llm=vision_llm).extract(
        EtlRequest(file_path=file_path, filename=filename)
    )
    return result.markdown_content