use trafilatura to extrack page content from the chromium result

2026-07-22 23:31:12 +02:00 · 2025-12-19 08:51:09 +02:00 · 2025-12-19 08:51:09 +02:00 · f524cf5322
commit f524cf5322
parent 64cd65bc1f
1 changed files with 0 additions and 4 deletions
--- a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py
@ -197,10 +197,6 @@ async def index_crawled_urls(
                    structured_document_for_hash, search_space_id
                )

-                logger.info(
-                    f"structured_document_for_hash {structured_document_for_hash} ========="
-                )
-
                # Check if document with this unique identifier already exists
                existing_document = await check_document_by_unique_identifier(
                    session, unique_identifier_hash