From f524cf53228c5465e477265c9c651102fc8b9c53 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 19 Dec 2025 08:51:09 +0200 Subject: [PATCH] use trafilatura to extrack page content from the chromium result --- .../app/tasks/connector_indexers/webcrawler_indexer.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py index 57cf819fe..3d5fc77b0 100644 --- a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py @@ -197,10 +197,6 @@ async def index_crawled_urls( structured_document_for_hash, search_space_id ) - logger.info( - f"structured_document_for_hash {structured_document_for_hash} =========" - ) - # Check if document with this unique identifier already exists existing_document = await check_document_by_unique_identifier( session, unique_identifier_hash