mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-24 21:38:09 +02:00
feat(tasks): route extraction through etl cache
This commit is contained in:
parent
1c05980ffb
commit
0dc2ccc003
1 changed files with 5 additions and 3 deletions
|
|
@ -381,7 +381,6 @@ async def _extract_file_content(
|
||||||
Tuple of (markdown_content, etl_service_name, billable_pages).
|
Tuple of (markdown_content, etl_service_name, billable_pages).
|
||||||
"""
|
"""
|
||||||
from app.etl_pipeline.etl_document import EtlRequest, ProcessingMode
|
from app.etl_pipeline.etl_document import EtlRequest, ProcessingMode
|
||||||
from app.etl_pipeline.etl_pipeline_service import EtlPipelineService
|
|
||||||
from app.etl_pipeline.file_classifier import (
|
from app.etl_pipeline.file_classifier import (
|
||||||
FileCategory,
|
FileCategory,
|
||||||
classify_file as etl_classify,
|
classify_file as etl_classify,
|
||||||
|
|
@ -432,13 +431,16 @@ async def _extract_file_content(
|
||||||
|
|
||||||
vision_llm = await get_vision_llm(session, search_space_id)
|
vision_llm = await get_vision_llm(session, search_space_id)
|
||||||
|
|
||||||
result = await EtlPipelineService(vision_llm=vision_llm).extract(
|
from app.etl_pipeline.cache import extract_with_cache
|
||||||
|
|
||||||
|
result = await extract_with_cache(
|
||||||
EtlRequest(
|
EtlRequest(
|
||||||
file_path=file_path,
|
file_path=file_path,
|
||||||
filename=filename,
|
filename=filename,
|
||||||
estimated_pages=estimated_pages,
|
estimated_pages=estimated_pages,
|
||||||
processing_mode=mode,
|
processing_mode=mode,
|
||||||
)
|
),
|
||||||
|
vision_llm=vision_llm,
|
||||||
)
|
)
|
||||||
|
|
||||||
with contextlib.suppress(Exception):
|
with contextlib.suppress(Exception):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue