mirror of
https://github.com/VectifyAI/PageIndex.git
synced 2026-06-30 20:39:43 +02:00
Replace pdf_parser plumbing with mutable DEFAULT_PDF_PARSER global
This commit is contained in:
parent
63e11ef152
commit
4dec4d66a9
5 changed files with 37 additions and 39 deletions
|
|
@ -2,8 +2,9 @@ import argparse
|
|||
import os
|
||||
import json
|
||||
from pageindex import *
|
||||
import pageindex.utils as pageindex_utils
|
||||
from pageindex.page_index_md import md_to_tree
|
||||
from pageindex.utils import ConfigLoader, DEFAULT_PDF_PARSER
|
||||
from pageindex.utils import ConfigLoader
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Set up argument parser
|
||||
|
|
@ -66,8 +67,12 @@ if __name__ == "__main__":
|
|||
}
|
||||
opt = ConfigLoader().load({k: v for k, v in user_opt.items() if v is not None})
|
||||
|
||||
# CLI flag overrides the module-level default (and env var PAGEINDEX_PDF_PARSER).
|
||||
if args.pdf_parser:
|
||||
pageindex_utils.DEFAULT_PDF_PARSER = args.pdf_parser
|
||||
|
||||
# Process the PDF
|
||||
toc_with_page_number = page_index_main(args.pdf_path, opt, pdf_parser=args.pdf_parser or DEFAULT_PDF_PARSER)
|
||||
toc_with_page_number = page_index_main(args.pdf_path, opt)
|
||||
print('Parsing done, saving to file...')
|
||||
|
||||
# Save results
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue