consolidate async calls

This commit is contained in:
Ray 2025-06-25 15:41:29 +08:00
parent 3f174796fa
commit 6d6e92d56e

View file

@ -1071,28 +1071,31 @@ def page_index_main(doc, opt=None):
logger.info({'total_page_number': len(page_list)}) logger.info({'total_page_number': len(page_list)})
logger.info({'total_token': sum([page[1] for page in page_list])}) logger.info({'total_token': sum([page[1] for page in page_list])})
structure = asyncio.run(tree_parser(page_list, opt, doc=doc, logger=logger)) async def page_index_builder():
if opt.if_add_node_id == 'yes': structure = await tree_parser(page_list, opt, doc=doc, logger=logger)
write_node_id(structure) if opt.if_add_node_id == 'yes':
if opt.if_add_node_text == 'yes': write_node_id(structure)
add_node_text(structure, page_list) if opt.if_add_node_text == 'yes':
if opt.if_add_node_summary == 'yes':
if opt.if_add_node_text == 'no':
add_node_text(structure, page_list) add_node_text(structure, page_list)
asyncio.run(generate_summaries_for_structure(structure, model=opt.model)) if opt.if_add_node_summary == 'yes':
if opt.if_add_node_text == 'no': if opt.if_add_node_text == 'no':
remove_structure_text(structure) add_node_text(structure, page_list)
if opt.if_add_doc_description == 'yes': await generate_summaries_for_structure(structure, model=opt.model)
doc_description = generate_doc_description(structure, model=opt.model) if opt.if_add_node_text == 'no':
return { remove_structure_text(structure)
'doc_name': get_pdf_name(doc), if opt.if_add_doc_description == 'yes':
'doc_description': doc_description, doc_description = generate_doc_description(structure, model=opt.model)
'structure': structure, return {
} 'doc_name': get_pdf_name(doc),
return { 'doc_description': doc_description,
'doc_name': get_pdf_name(doc), 'structure': structure,
'structure': structure, }
} return {
'doc_name': get_pdf_name(doc),
'structure': structure,
}
return asyncio.run(page_index_builder())
def page_index(doc, model=None, toc_check_page_num=None, max_page_num_each_node=None, max_token_num_each_node=None, def page_index(doc, model=None, toc_check_page_num=None, max_page_num_each_node=None, max_token_num_each_node=None,