fix option for adding node text

This commit is contained in:
Ray 2025-05-30 14:02:38 +08:00
parent 36b22e3bc7
commit ad65e3f19c
2 changed files with 10 additions and 5 deletions

View file

@ -1022,12 +1022,14 @@ def page_index_main(doc, opt=None):
structure = asyncio.run(tree_parser(page_list, opt, doc=doc, logger=logger))
if opt.if_add_node_id == 'yes':
write_node_id(structure)
if opt.if_add_node_text == 'yes':
add_node_text(structure, page_list)
if opt.if_add_node_summary == 'yes':
if opt.if_add_node_text == 'no':
add_node_text(structure, page_list)
asyncio.run(generate_summaries_for_structure(structure, model=opt.model))
if opt.if_add_node_text == 'no':
remove_structure_text(structure)
if opt.if_add_node_text == 'yes':
add_node_text_with_labels(structure, page_list)
if opt.if_add_doc_description == 'yes':
doc_description = generate_doc_description(structure, model=opt.model)
return {

View file

@ -18,6 +18,8 @@ if __name__ == "__main__":
help='Whether to add summary to the node')
parser.add_argument('--if-add-doc-description', type=str, default='yes',
help='Whether to add doc description to the doc')
parser.add_argument('--if-add-node-text', type=str, default='no',
help='Whether to add text to the node')
args = parser.parse_args()
# Configure options
@ -28,7 +30,8 @@ if __name__ == "__main__":
max_token_num_each_node=args.max_tokens_per_node,
if_add_node_id=args.if_add_node_id,
if_add_node_summary=args.if_add_node_summary,
if_add_doc_description=args.if_add_doc_description
if_add_doc_description=args.if_add_doc_description,
if_add_node_text=args.if_add_node_text
)
# Process the PDF