Replace pdf_parser plumbing with mutable DEFAULT_PDF_PARSER global

This commit is contained in:
Ray 2026-05-11 18:43:51 +08:00
parent 63e11ef152
commit 4dec4d66a9
5 changed files with 37 additions and 39 deletions

View file

@ -2,8 +2,9 @@ import argparse
import os
import json
from pageindex import *
import pageindex.utils as pageindex_utils
from pageindex.page_index_md import md_to_tree
from pageindex.utils import ConfigLoader, DEFAULT_PDF_PARSER
from pageindex.utils import ConfigLoader
if __name__ == "__main__":
# Set up argument parser
@ -66,8 +67,12 @@ if __name__ == "__main__":
}
opt = ConfigLoader().load({k: v for k, v in user_opt.items() if v is not None})
# CLI flag overrides the module-level default (and env var PAGEINDEX_PDF_PARSER).
if args.pdf_parser:
pageindex_utils.DEFAULT_PDF_PARSER = args.pdf_parser
# Process the PDF
toc_with_page_number = page_index_main(args.pdf_path, opt, pdf_parser=args.pdf_parser or DEFAULT_PDF_PARSER)
toc_with_page_number = page_index_main(args.pdf_path, opt)
print('Parsing done, saving to file...')
# Save results