mirror of
https://github.com/VectifyAI/PageIndex.git
synced 2026-07-03 20:41:02 +02:00
Validate PDF parser CLI option
This commit is contained in:
parent
966a6be4d6
commit
e7715d8902
1 changed files with 3 additions and 2 deletions
|
|
@ -29,7 +29,8 @@ if __name__ == "__main__":
|
||||||
parser.add_argument('--if-add-node-text', type=str, default=None,
|
parser.add_argument('--if-add-node-text', type=str, default=None,
|
||||||
help='Whether to add text to the node')
|
help='Whether to add text to the node')
|
||||||
parser.add_argument('--pdf-parser', type=str, default=None,
|
parser.add_argument('--pdf-parser', type=str, default=None,
|
||||||
help='PDF text extractor: PyPDF2 (default), pypdfium2 (requires `pip install pypdfium2`), or PyMuPDF')
|
choices=pageindex_utils.SUPPORTED_PDF_PARSERS,
|
||||||
|
help='PDF text extractor to use')
|
||||||
|
|
||||||
# Markdown specific arguments
|
# Markdown specific arguments
|
||||||
parser.add_argument('--if-thinning', type=str, default='no',
|
parser.add_argument('--if-thinning', type=str, default='no',
|
||||||
|
|
@ -136,4 +137,4 @@ if __name__ == "__main__":
|
||||||
with open(output_file, 'w', encoding='utf-8') as f:
|
with open(output_file, 'w', encoding='utf-8') as f:
|
||||||
json.dump(toc_with_page_number, f, indent=2, ensure_ascii=False)
|
json.dump(toc_with_page_number, f, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
print(f'Tree structure saved to: {output_file}')
|
print(f'Tree structure saved to: {output_file}')
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue