mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-21 18:55:16 +02:00
feat: enhance performance logging in indexing pipeline
- Added performance logging to the `index_batch_parallel` method, capturing metrics for document indexing duration and concurrency. - Introduced timing measurements for both the overall indexing process and the parallel document gathering phase, improving observability of the indexing workflow. - Updated logging statements to provide detailed insights into the number of documents processed, indexed, and failed during the indexing operation.
This commit is contained in:
parent
4fd776e7ef
commit
bd6e335cb3
1 changed files with 21 additions and 0 deletions
|
|
@ -346,6 +346,8 @@ class IndexingPipelineService:
|
|||
bounded by a semaphore to avoid overwhelming APIs/DB.
|
||||
"""
|
||||
logger = logging.getLogger(__name__)
|
||||
perf = get_perf_logger()
|
||||
t_total = time.perf_counter()
|
||||
|
||||
doc_map = {
|
||||
compute_unique_identifier_hash(cd): cd for cd in connector_docs
|
||||
|
|
@ -422,7 +424,17 @@ class IndexingPipelineService:
|
|||
return exc
|
||||
|
||||
tasks = [_index_one(doc) for doc in documents]
|
||||
t_parallel = time.perf_counter()
|
||||
outcomes = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
perf.info(
|
||||
"[indexing] index_batch_parallel gather docs=%d concurrency=%d "
|
||||
"indexed=%d failed=%d in %.3fs",
|
||||
len(documents),
|
||||
max_concurrency,
|
||||
indexed_count,
|
||||
failed_count,
|
||||
time.perf_counter() - t_parallel,
|
||||
)
|
||||
|
||||
for outcome in outcomes:
|
||||
if isinstance(outcome, Document):
|
||||
|
|
@ -430,4 +442,13 @@ class IndexingPipelineService:
|
|||
elif isinstance(outcome, Exception):
|
||||
pass
|
||||
|
||||
perf.info(
|
||||
"[indexing] index_batch_parallel TOTAL input=%d prepared=%d "
|
||||
"indexed=%d failed=%d in %.3fs",
|
||||
len(connector_docs),
|
||||
len(documents),
|
||||
indexed_count,
|
||||
failed_count,
|
||||
time.perf_counter() - t_total,
|
||||
)
|
||||
return results, indexed_count, failed_count
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue