Make Vision LLM opt-in for uploads and connectors

This commit is contained in:
CREDO23 2026-04-10 16:45:51 +02:00
parent 0aefcbd504
commit a95bf58c8f
24 changed files with 276 additions and 20 deletions

View file

@ -27,6 +27,8 @@ logger = logging.getLogger(__name__)
async def download_and_extract_content(
client: GoogleDriveClient,
file: dict[str, Any],
*,
vision_llm=None,
) -> tuple[str | None, dict[str, Any], str | None]:
"""Download a Google Drive file and extract its content as markdown.
@ -103,7 +105,9 @@ async def download_and_extract_content(
etl_filename = (
file_name + extension if is_google_workspace_file(mime_type) else file_name
)
markdown = await _parse_file_to_markdown(temp_file_path, etl_filename)
markdown = await _parse_file_to_markdown(
temp_file_path, etl_filename, vision_llm=vision_llm
)
return markdown, drive_metadata, None
except Exception as e:
@ -115,12 +119,14 @@ async def download_and_extract_content(
os.unlink(temp_file_path)
async def _parse_file_to_markdown(file_path: str, filename: str) -> str:
async def _parse_file_to_markdown(
file_path: str, filename: str, *, vision_llm=None
) -> str:
"""Parse a local file to markdown using the unified ETL pipeline."""
from app.etl_pipeline.etl_document import EtlRequest
from app.etl_pipeline.etl_pipeline_service import EtlPipelineService
result = await EtlPipelineService().extract(
result = await EtlPipelineService(vision_llm=vision_llm).extract(
EtlRequest(file_path=file_path, filename=filename)
)
return result.markdown_content