mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-21 18:55:16 +02:00
feat: enhance performance logging in indexing pipeline
- Added performance logging to the `index_batch_parallel` method, capturing metrics for document indexing duration and concurrency. - Introduced timing measurements for both the overall indexing process and the parallel document gathering phase, improving observability of the indexing workflow. - Updated logging statements to provide detailed insights into the number of documents processed, indexed, and failed during the indexing operation.
This commit is contained in:
parent
4fd776e7ef
commit
bd6e335cb3
1 changed files with 21 additions and 0 deletions
|
|
@ -346,6 +346,8 @@ class IndexingPipelineService:
|
||||||
bounded by a semaphore to avoid overwhelming APIs/DB.
|
bounded by a semaphore to avoid overwhelming APIs/DB.
|
||||||
"""
|
"""
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
perf = get_perf_logger()
|
||||||
|
t_total = time.perf_counter()
|
||||||
|
|
||||||
doc_map = {
|
doc_map = {
|
||||||
compute_unique_identifier_hash(cd): cd for cd in connector_docs
|
compute_unique_identifier_hash(cd): cd for cd in connector_docs
|
||||||
|
|
@ -422,7 +424,17 @@ class IndexingPipelineService:
|
||||||
return exc
|
return exc
|
||||||
|
|
||||||
tasks = [_index_one(doc) for doc in documents]
|
tasks = [_index_one(doc) for doc in documents]
|
||||||
|
t_parallel = time.perf_counter()
|
||||||
outcomes = await asyncio.gather(*tasks, return_exceptions=True)
|
outcomes = await asyncio.gather(*tasks, return_exceptions=True)
|
||||||
|
perf.info(
|
||||||
|
"[indexing] index_batch_parallel gather docs=%d concurrency=%d "
|
||||||
|
"indexed=%d failed=%d in %.3fs",
|
||||||
|
len(documents),
|
||||||
|
max_concurrency,
|
||||||
|
indexed_count,
|
||||||
|
failed_count,
|
||||||
|
time.perf_counter() - t_parallel,
|
||||||
|
)
|
||||||
|
|
||||||
for outcome in outcomes:
|
for outcome in outcomes:
|
||||||
if isinstance(outcome, Document):
|
if isinstance(outcome, Document):
|
||||||
|
|
@ -430,4 +442,13 @@ class IndexingPipelineService:
|
||||||
elif isinstance(outcome, Exception):
|
elif isinstance(outcome, Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
perf.info(
|
||||||
|
"[indexing] index_batch_parallel TOTAL input=%d prepared=%d "
|
||||||
|
"indexed=%d failed=%d in %.3fs",
|
||||||
|
len(connector_docs),
|
||||||
|
len(documents),
|
||||||
|
indexed_count,
|
||||||
|
failed_count,
|
||||||
|
time.perf_counter() - t_total,
|
||||||
|
)
|
||||||
return results, indexed_count, failed_count
|
return results, indexed_count, failed_count
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue