From ad65e3f19cd7f4a464d12ee46f3c8daf741282c1 Mon Sep 17 00:00:00 2001 From: Ray Date: Fri, 30 May 2025 14:02:38 +0800 Subject: [PATCH] fix option for adding node text --- pageindex/page_index.py | 10 ++++++---- run_pageindex.py | 5 ++++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/pageindex/page_index.py b/pageindex/page_index.py index dedc0bd..210e6c7 100644 --- a/pageindex/page_index.py +++ b/pageindex/page_index.py @@ -1022,12 +1022,14 @@ def page_index_main(doc, opt=None): structure = asyncio.run(tree_parser(page_list, opt, doc=doc, logger=logger)) if opt.if_add_node_id == 'yes': write_node_id(structure) - if opt.if_add_node_summary == 'yes': + if opt.if_add_node_text == 'yes': add_node_text(structure, page_list) + if opt.if_add_node_summary == 'yes': + if opt.if_add_node_text == 'no': + add_node_text(structure, page_list) asyncio.run(generate_summaries_for_structure(structure, model=opt.model)) - remove_structure_text(structure) - if opt.if_add_node_text == 'yes': - add_node_text_with_labels(structure, page_list) + if opt.if_add_node_text == 'no': + remove_structure_text(structure) if opt.if_add_doc_description == 'yes': doc_description = generate_doc_description(structure, model=opt.model) return { diff --git a/run_pageindex.py b/run_pageindex.py index 718af95..63b2206 100644 --- a/run_pageindex.py +++ b/run_pageindex.py @@ -18,6 +18,8 @@ if __name__ == "__main__": help='Whether to add summary to the node') parser.add_argument('--if-add-doc-description', type=str, default='yes', help='Whether to add doc description to the doc') + parser.add_argument('--if-add-node-text', type=str, default='no', + help='Whether to add text to the node') args = parser.parse_args() # Configure options @@ -28,7 +30,8 @@ if __name__ == "__main__": max_token_num_each_node=args.max_tokens_per_node, if_add_node_id=args.if_add_node_id, if_add_node_summary=args.if_add_node_summary, - if_add_doc_description=args.if_add_doc_description + if_add_doc_description=args.if_add_doc_description, + if_add_node_text=args.if_add_node_text ) # Process the PDF