From e7715d8902e81aec653bc5ba4c945efd1ee4b294 Mon Sep 17 00:00:00 2001 From: Ray Date: Wed, 13 May 2026 02:16:55 +0800 Subject: [PATCH] Validate PDF parser CLI option --- run_pageindex.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/run_pageindex.py b/run_pageindex.py index 3548747..bd5efa4 100644 --- a/run_pageindex.py +++ b/run_pageindex.py @@ -29,7 +29,8 @@ if __name__ == "__main__": parser.add_argument('--if-add-node-text', type=str, default=None, help='Whether to add text to the node') parser.add_argument('--pdf-parser', type=str, default=None, - help='PDF text extractor: PyPDF2 (default), pypdfium2 (requires `pip install pypdfium2`), or PyMuPDF') + choices=pageindex_utils.SUPPORTED_PDF_PARSERS, + help='PDF text extractor to use') # Markdown specific arguments parser.add_argument('--if-thinning', type=str, default='no', @@ -136,4 +137,4 @@ if __name__ == "__main__": with open(output_file, 'w', encoding='utf-8') as f: json.dump(toc_with_page_number, f, indent=2, ensure_ascii=False) - print(f'Tree structure saved to: {output_file}') \ No newline at end of file + print(f'Tree structure saved to: {output_file}')