mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-08 20:25:19 +02:00
fix: ran formatter as per coderrabbitai
This commit is contained in:
parent
0ff1b586a2
commit
bbb2abfc02
2 changed files with 29 additions and 8 deletions
|
|
@ -119,7 +119,11 @@ class ElasticsearchConnector:
|
|||
|
||||
total_hits = response.get("hits", {}).get("total", {})
|
||||
# normalize total value (could be dict or int depending on server)
|
||||
total_val = total_hits.get("value", total_hits) if isinstance(total_hits, dict) else total_hits
|
||||
total_val = (
|
||||
total_hits.get("value", total_hits)
|
||||
if isinstance(total_hits, dict)
|
||||
else total_hits
|
||||
)
|
||||
logger.info(
|
||||
f"Successfully searched index '{index}', found {total_val} results"
|
||||
)
|
||||
|
|
@ -213,7 +217,9 @@ class ElasticsearchConnector:
|
|||
|
||||
# Continue scrolling
|
||||
if scroll_id:
|
||||
response = await self.client.scroll(scroll_id=scroll_id, scroll=scroll_timeout)
|
||||
response = await self.client.scroll(
|
||||
scroll_id=scroll_id, scroll=scroll_timeout
|
||||
)
|
||||
scroll_id = response.get("_scroll_id")
|
||||
hits = response.get("hits", {}).get("hits", [])
|
||||
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ from .base import check_document_by_unique_identifier, check_duplicate_document_
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def index_elasticsearch_documents(
|
||||
session: AsyncSession,
|
||||
connector_id: int,
|
||||
|
|
@ -75,7 +76,10 @@ async def index_elasticsearch_documents(
|
|||
error_msg = f"Elasticsearch connector with ID {connector_id} not found"
|
||||
logger.error(error_msg)
|
||||
await task_logger.log_task_failure(
|
||||
log_entry, "Connector not found", error_msg, {"connector_id": connector_id}
|
||||
log_entry,
|
||||
"Connector not found",
|
||||
error_msg,
|
||||
{"connector_id": connector_id},
|
||||
)
|
||||
return 0, error_msg
|
||||
|
||||
|
|
@ -152,7 +156,11 @@ async def index_elasticsearch_documents(
|
|||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
"Starting scroll search",
|
||||
{"index": index_name, "stage": "scroll_start", "max_documents": max_documents},
|
||||
{
|
||||
"index": index_name,
|
||||
"stage": "scroll_start",
|
||||
"max_documents": max_documents,
|
||||
},
|
||||
)
|
||||
# Use scroll search for large result sets
|
||||
async for hit in es_connector.scroll_search(
|
||||
|
|
@ -210,7 +218,9 @@ async def index_elasticsearch_documents(
|
|||
# Build source-unique identifier and hash (prefer source id dedupe)
|
||||
source_identifier = f"{hit.get('_index', index_name)}:{doc_id}"
|
||||
unique_identifier_hash = generate_unique_identifier_hash(
|
||||
DocumentType.ELASTICSEARCH_CONNECTOR, source_identifier, search_space_id
|
||||
DocumentType.ELASTICSEARCH_CONNECTOR,
|
||||
source_identifier,
|
||||
search_space_id,
|
||||
)
|
||||
|
||||
# Two-step duplicate detection: first by source-unique id, then by content hash
|
||||
|
|
@ -245,7 +255,7 @@ async def index_elasticsearch_documents(
|
|||
if documents_processed % 10 == 0:
|
||||
await session.commit()
|
||||
continue
|
||||
|
||||
|
||||
# Create document
|
||||
document = Document(
|
||||
title=title,
|
||||
|
|
@ -278,7 +288,10 @@ async def index_elasticsearch_documents(
|
|||
log_entry,
|
||||
"Document processing error",
|
||||
msg,
|
||||
{"document_id": hit.get("_id", "unknown"), "error_type": type(e).__name__},
|
||||
{
|
||||
"document_id": hit.get("_id", "unknown"),
|
||||
"error_type": type(e).__name__,
|
||||
},
|
||||
)
|
||||
continue
|
||||
|
||||
|
|
@ -297,7 +310,9 @@ async def index_elasticsearch_documents(
|
|||
# Update last indexed timestamp if requested
|
||||
if update_last_indexed and documents_processed > 0:
|
||||
# connector.last_indexed_at = datetime.now()
|
||||
connector.last_indexed_at = datetime.now(UTC).isoformat().replace("+00:00", "Z")
|
||||
connector.last_indexed_at = (
|
||||
datetime.now(UTC).isoformat().replace("+00:00", "Z")
|
||||
)
|
||||
await session.commit()
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue