Integrate LiteLLM for multi-provider LLM support (#168)

* Integrate litellm for multi-provider LLM support

* recover the default config yaml

* Use litellm.acompletion for native async support

* fix tob

* Rename llm_complete/allm_complete to llm_completion/llm_acompletion, remove unused llm_complete_stream

* Pin litellm to version 1.82.0

* resolve comments

* args from cli is used to overrides config.yaml

* Fix get_page_tokens hardcoded model default

Pass opt.model to get_page_tokens so tokenization respects the
configured model instead of always using gpt-4o-2024-11-20.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* Remove explicit openai dependency from requirements.txt

openai is no longer directly imported; it comes in as a transitive
dependency of litellm. Pinning it explicitly risks version conflicts.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* Restore openai==1.101.0 pin in requirements.txt

litellm==1.82.0 and openai-agents have conflicting openai version
requirements, but openai==1.101.0 works at runtime for both.
The pin is necessary to prevent litellm from pulling in openai>=2.x
which would break openai-agents.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* Remove explicit openai dependency from requirements.txt

openai is not directly used; it comes in as a transitive dependency
of litellm. No openai-agents in this branch so no pin needed.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* fix an litellm error log

* resolve comments

---------

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Kylin 2026-03-20 18:47:07 +08:00 committed by GitHub
parent 4b4b20f9c4
commit 2403be8f27
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 78 additions and 104 deletions

View file

@ -1,4 +1,5 @@
model: "gpt-4o-2024-11-20" model: "gpt-4o-2024-11-20"
# model: "anthropic/claude-sonnet-4-6"
toc_check_page_num: 20 toc_check_page_num: 20
max_page_num_each_node: 10 max_page_num_each_node: 10
max_token_num_each_node: 20000 max_token_num_each_node: 20000

View file

@ -36,7 +36,7 @@ async def check_title_appearance(item, page_list, start_index=1, model=None):
}} }}
Directly return the final JSON structure. Do not output anything else.""" Directly return the final JSON structure. Do not output anything else."""
response = await ChatGPT_API_async(model=model, prompt=prompt) response = await llm_acompletion(model=model, prompt=prompt)
response = extract_json(response) response = extract_json(response)
if 'answer' in response: if 'answer' in response:
answer = response['answer'] answer = response['answer']
@ -64,7 +64,7 @@ async def check_title_appearance_in_start(title, page_text, model=None, logger=N
}} }}
Directly return the final JSON structure. Do not output anything else.""" Directly return the final JSON structure. Do not output anything else."""
response = await ChatGPT_API_async(model=model, prompt=prompt) response = await llm_acompletion(model=model, prompt=prompt)
response = extract_json(response) response = extract_json(response)
if logger: if logger:
logger.info(f"Response: {response}") logger.info(f"Response: {response}")
@ -116,7 +116,7 @@ def toc_detector_single_page(content, model=None):
Directly return the final JSON structure. Do not output anything else. Directly return the final JSON structure. Do not output anything else.
Please note: abstract,summary, notation list, figure list, table list, etc. are not table of contents.""" Please note: abstract,summary, notation list, figure list, table list, etc. are not table of contents."""
response = ChatGPT_API(model=model, prompt=prompt) response = llm_completion(model=model, prompt=prompt)
# print('response', response) # print('response', response)
json_content = extract_json(response) json_content = extract_json(response)
return json_content['toc_detected'] return json_content['toc_detected']
@ -135,7 +135,7 @@ def check_if_toc_extraction_is_complete(content, toc, model=None):
Directly return the final JSON structure. Do not output anything else.""" Directly return the final JSON structure. Do not output anything else."""
prompt = prompt + '\n Document:\n' + content + '\n Table of contents:\n' + toc prompt = prompt + '\n Document:\n' + content + '\n Table of contents:\n' + toc
response = ChatGPT_API(model=model, prompt=prompt) response = llm_completion(model=model, prompt=prompt)
json_content = extract_json(response) json_content = extract_json(response)
return json_content['completed'] return json_content['completed']
@ -153,7 +153,7 @@ def check_if_toc_transformation_is_complete(content, toc, model=None):
Directly return the final JSON structure. Do not output anything else.""" Directly return the final JSON structure. Do not output anything else."""
prompt = prompt + '\n Raw Table of contents:\n' + content + '\n Cleaned Table of contents:\n' + toc prompt = prompt + '\n Raw Table of contents:\n' + content + '\n Cleaned Table of contents:\n' + toc
response = ChatGPT_API(model=model, prompt=prompt) response = llm_completion(model=model, prompt=prompt)
json_content = extract_json(response) json_content = extract_json(response)
return json_content['completed'] return json_content['completed']
@ -165,7 +165,7 @@ def extract_toc_content(content, model=None):
Directly return the full table of contents content. Do not output anything else.""" Directly return the full table of contents content. Do not output anything else."""
response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt) response, finish_reason = llm_completion(model=model, prompt=prompt, return_finish_reason=True)
if_complete = check_if_toc_transformation_is_complete(content, response, model) if_complete = check_if_toc_transformation_is_complete(content, response, model)
if if_complete == "yes" and finish_reason == "finished": if if_complete == "yes" and finish_reason == "finished":
@ -176,7 +176,7 @@ def extract_toc_content(content, model=None):
{"role": "assistant", "content": response}, {"role": "assistant", "content": response},
] ]
prompt = f"""please continue the generation of table of contents , directly output the remaining part of the structure""" prompt = f"""please continue the generation of table of contents , directly output the remaining part of the structure"""
new_response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt, chat_history=chat_history) new_response, finish_reason = llm_completion(model=model, prompt=prompt, chat_history=chat_history, return_finish_reason=True)
response = response + new_response response = response + new_response
if_complete = check_if_toc_transformation_is_complete(content, response, model) if_complete = check_if_toc_transformation_is_complete(content, response, model)
@ -193,7 +193,7 @@ def extract_toc_content(content, model=None):
{"role": "assistant", "content": response}, {"role": "assistant", "content": response},
] ]
prompt = f"""please continue the generation of table of contents , directly output the remaining part of the structure""" prompt = f"""please continue the generation of table of contents , directly output the remaining part of the structure"""
new_response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt, chat_history=chat_history) new_response, finish_reason = llm_completion(model=model, prompt=prompt, chat_history=chat_history, return_finish_reason=True)
response = response + new_response response = response + new_response
if_complete = check_if_toc_transformation_is_complete(content, response, model) if_complete = check_if_toc_transformation_is_complete(content, response, model)
@ -215,7 +215,7 @@ def detect_page_index(toc_content, model=None):
}} }}
Directly return the final JSON structure. Do not output anything else.""" Directly return the final JSON structure. Do not output anything else."""
response = ChatGPT_API(model=model, prompt=prompt) response = llm_completion(model=model, prompt=prompt)
json_content = extract_json(response) json_content = extract_json(response)
return json_content['page_index_given_in_toc'] return json_content['page_index_given_in_toc']
@ -264,7 +264,7 @@ def toc_index_extractor(toc, content, model=None):
Directly return the final JSON structure. Do not output anything else.""" Directly return the final JSON structure. Do not output anything else."""
prompt = toc_extractor_prompt + '\nTable of contents:\n' + str(toc) + '\nDocument pages:\n' + content prompt = toc_extractor_prompt + '\nTable of contents:\n' + str(toc) + '\nDocument pages:\n' + content
response = ChatGPT_API(model=model, prompt=prompt) response = llm_completion(model=model, prompt=prompt)
json_content = extract_json(response) json_content = extract_json(response)
return json_content return json_content
@ -292,7 +292,7 @@ def toc_transformer(toc_content, model=None):
Directly return the final JSON structure, do not output anything else. """ Directly return the final JSON structure, do not output anything else. """
prompt = init_prompt + '\n Given table of contents\n:' + toc_content prompt = init_prompt + '\n Given table of contents\n:' + toc_content
last_complete, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt) last_complete, finish_reason = llm_completion(model=model, prompt=prompt, return_finish_reason=True)
if_complete = check_if_toc_transformation_is_complete(toc_content, last_complete, model) if_complete = check_if_toc_transformation_is_complete(toc_content, last_complete, model)
if if_complete == "yes" and finish_reason == "finished": if if_complete == "yes" and finish_reason == "finished":
last_complete = extract_json(last_complete) last_complete = extract_json(last_complete)
@ -300,7 +300,12 @@ def toc_transformer(toc_content, model=None):
return cleaned_response return cleaned_response
last_complete = get_json_content(last_complete) last_complete = get_json_content(last_complete)
attempt = 0
max_attempts = 5
while not (if_complete == "yes" and finish_reason == "finished"): while not (if_complete == "yes" and finish_reason == "finished"):
attempt += 1
if attempt > max_attempts:
raise Exception('Failed to complete toc transformation after maximum retries')
position = last_complete.rfind('}') position = last_complete.rfind('}')
if position != -1: if position != -1:
last_complete = last_complete[:position+2] last_complete = last_complete[:position+2]
@ -316,7 +321,7 @@ def toc_transformer(toc_content, model=None):
Please continue the json structure, directly output the remaining part of the json structure.""" Please continue the json structure, directly output the remaining part of the json structure."""
new_complete, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt) new_complete, finish_reason = llm_completion(model=model, prompt=prompt, return_finish_reason=True)
if new_complete.startswith('```json'): if new_complete.startswith('```json'):
new_complete = get_json_content(new_complete) new_complete = get_json_content(new_complete)
@ -477,7 +482,7 @@ def add_page_number_to_toc(part, structure, model=None):
Directly return the final JSON structure. Do not output anything else.""" Directly return the final JSON structure. Do not output anything else."""
prompt = fill_prompt_seq + f"\n\nCurrent Partial Document:\n{part}\n\nGiven Structure\n{json.dumps(structure, indent=2)}\n" prompt = fill_prompt_seq + f"\n\nCurrent Partial Document:\n{part}\n\nGiven Structure\n{json.dumps(structure, indent=2)}\n"
current_json_raw = ChatGPT_API(model=model, prompt=prompt) current_json_raw = llm_completion(model=model, prompt=prompt)
json_result = extract_json(current_json_raw) json_result = extract_json(current_json_raw)
for item in json_result: for item in json_result:
@ -499,7 +504,7 @@ def remove_first_physical_index_section(text):
return text return text
### add verify completeness ### add verify completeness
def generate_toc_continue(toc_content, part, model="gpt-4o-2024-11-20"): def generate_toc_continue(toc_content, part, model=None):
print('start generate_toc_continue') print('start generate_toc_continue')
prompt = """ prompt = """
You are an expert in extracting hierarchical tree structure. You are an expert in extracting hierarchical tree structure.
@ -527,7 +532,7 @@ def generate_toc_continue(toc_content, part, model="gpt-4o-2024-11-20"):
Directly return the additional part of the final JSON structure. Do not output anything else.""" Directly return the additional part of the final JSON structure. Do not output anything else."""
prompt = prompt + '\nGiven text\n:' + part + '\nPrevious tree structure\n:' + json.dumps(toc_content, indent=2) prompt = prompt + '\nGiven text\n:' + part + '\nPrevious tree structure\n:' + json.dumps(toc_content, indent=2)
response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt) response, finish_reason = llm_completion(model=model, prompt=prompt, return_finish_reason=True)
if finish_reason == 'finished': if finish_reason == 'finished':
return extract_json(response) return extract_json(response)
else: else:
@ -561,7 +566,7 @@ def generate_toc_init(part, model=None):
Directly return the final JSON structure. Do not output anything else.""" Directly return the final JSON structure. Do not output anything else."""
prompt = prompt + '\nGiven text\n:' + part prompt = prompt + '\nGiven text\n:' + part
response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt) response, finish_reason = llm_completion(model=model, prompt=prompt, return_finish_reason=True)
if finish_reason == 'finished': if finish_reason == 'finished':
return extract_json(response) return extract_json(response)
@ -732,7 +737,7 @@ def check_toc(page_list, opt=None):
################### fix incorrect toc ######################################################### ################### fix incorrect toc #########################################################
def single_toc_item_index_fixer(section_title, content, model="gpt-4o-2024-11-20"): async def single_toc_item_index_fixer(section_title, content, model=None):
toc_extractor_prompt = """ toc_extractor_prompt = """
You are given a section title and several pages of a document, your job is to find the physical index of the start page of the section in the partial document. You are given a section title and several pages of a document, your job is to find the physical index of the start page of the section in the partial document.
@ -746,7 +751,7 @@ def single_toc_item_index_fixer(section_title, content, model="gpt-4o-2024-11-20
Directly return the final JSON structure. Do not output anything else.""" Directly return the final JSON structure. Do not output anything else."""
prompt = toc_extractor_prompt + '\nSection Title:\n' + str(section_title) + '\nDocument pages:\n' + content prompt = toc_extractor_prompt + '\nSection Title:\n' + str(section_title) + '\nDocument pages:\n' + content
response = ChatGPT_API(model=model, prompt=prompt) response = await llm_acompletion(model=model, prompt=prompt)
json_content = extract_json(response) json_content = extract_json(response)
return convert_physical_index_to_int(json_content['physical_index']) return convert_physical_index_to_int(json_content['physical_index'])
@ -815,7 +820,7 @@ async def fix_incorrect_toc(toc_with_page_number, page_list, incorrect_results,
continue continue
content_range = ''.join(page_contents) content_range = ''.join(page_contents)
physical_index_int = single_toc_item_index_fixer(incorrect_item['title'], content_range, model) physical_index_int = await single_toc_item_index_fixer(incorrect_item['title'], content_range, model)
# Check if the result is correct # Check if the result is correct
check_item = incorrect_item.copy() check_item = incorrect_item.copy()
@ -1069,7 +1074,7 @@ def page_index_main(doc, opt=None):
raise ValueError("Unsupported input type. Expected a PDF file path or BytesIO object.") raise ValueError("Unsupported input type. Expected a PDF file path or BytesIO object.")
print('Parsing PDF...') print('Parsing PDF...')
page_list = get_page_tokens(doc) page_list = get_page_tokens(doc, model=opt.model)
logger.info({'total_page_number': len(page_list)}) logger.info({'total_page_number': len(page_list)})
logger.info({'total_token': sum([page[1] for page in page_list])}) logger.info({'total_token': sum([page[1] for page in page_list])})

View file

@ -1,5 +1,4 @@
import tiktoken import litellm
import openai
import logging import logging
import os import os
from datetime import datetime from datetime import datetime
@ -17,95 +16,65 @@ import yaml
from pathlib import Path from pathlib import Path
from types import SimpleNamespace as config from types import SimpleNamespace as config
CHATGPT_API_KEY = os.getenv("CHATGPT_API_KEY") # Backward compatibility: support CHATGPT_API_KEY as alias for OPENAI_API_KEY
if not os.getenv("OPENAI_API_KEY") and os.getenv("CHATGPT_API_KEY"):
os.environ["OPENAI_API_KEY"] = os.getenv("CHATGPT_API_KEY")
litellm.drop_params = True
def count_tokens(text, model=None): def count_tokens(text, model=None):
if not text: if not text:
return 0 return 0
enc = tiktoken.encoding_for_model(model) return litellm.token_counter(model=model, text=text)
tokens = enc.encode(text)
return len(tokens)
def ChatGPT_API_with_finish_reason(model, prompt, api_key=CHATGPT_API_KEY, chat_history=None):
def llm_completion(model, prompt, chat_history=None, return_finish_reason=False):
max_retries = 10 max_retries = 10
client = openai.OpenAI(api_key=api_key) messages = list(chat_history) + [{"role": "user", "content": prompt}] if chat_history else [{"role": "user", "content": prompt}]
for i in range(max_retries): for i in range(max_retries):
try: try:
if chat_history: response = litellm.completion(
messages = chat_history
messages.append({"role": "user", "content": prompt})
else:
messages = [{"role": "user", "content": prompt}]
response = client.chat.completions.create(
model=model, model=model,
messages=messages, messages=messages,
temperature=0, temperature=0,
) )
if response.choices[0].finish_reason == "length": content = response.choices[0].message.content
return response.choices[0].message.content, "max_output_reached" if return_finish_reason:
else: finish_reason = "max_output_reached" if response.choices[0].finish_reason == "length" else "finished"
return response.choices[0].message.content, "finished" return content, finish_reason
return content
except Exception as e: except Exception as e:
print('************* Retrying *************') print('************* Retrying *************')
logging.error(f"Error: {e}") logging.error(f"Error: {e}")
if i < max_retries - 1: if i < max_retries - 1:
time.sleep(1) # Wait for 1秒 before retrying time.sleep(1)
else: else:
logging.error('Max retries reached for prompt: ' + prompt) logging.error('Max retries reached for prompt: ' + prompt)
return "", "error" if return_finish_reason:
return "", "error"
return ""
def ChatGPT_API(model, prompt, api_key=CHATGPT_API_KEY, chat_history=None): async def llm_acompletion(model, prompt):
max_retries = 10 max_retries = 10
client = openai.OpenAI(api_key=api_key) messages = [{"role": "user", "content": prompt}]
for i in range(max_retries): for i in range(max_retries):
try: try:
if chat_history: response = await litellm.acompletion(
messages = chat_history
messages.append({"role": "user", "content": prompt})
else:
messages = [{"role": "user", "content": prompt}]
response = client.chat.completions.create(
model=model, model=model,
messages=messages, messages=messages,
temperature=0, temperature=0,
) )
return response.choices[0].message.content return response.choices[0].message.content
except Exception as e: except Exception as e:
print('************* Retrying *************') print('************* Retrying *************')
logging.error(f"Error: {e}") logging.error(f"Error: {e}")
if i < max_retries - 1: if i < max_retries - 1:
time.sleep(1) # Wait for 1秒 before retrying await asyncio.sleep(1)
else: else:
logging.error('Max retries reached for prompt: ' + prompt) logging.error('Max retries reached for prompt: ' + prompt)
return "Error" return ""
async def ChatGPT_API_async(model, prompt, api_key=CHATGPT_API_KEY):
max_retries = 10
messages = [{"role": "user", "content": prompt}]
for i in range(max_retries):
try:
async with openai.AsyncOpenAI(api_key=api_key) as client:
response = await client.chat.completions.create(
model=model,
messages=messages,
temperature=0,
)
return response.choices[0].message.content
except Exception as e:
print('************* Retrying *************')
logging.error(f"Error: {e}")
if i < max_retries - 1:
await asyncio.sleep(1) # Wait for 1s before retrying
else:
logging.error('Max retries reached for prompt: ' + prompt)
return "Error"
def get_json_content(response): def get_json_content(response):
@ -410,15 +379,14 @@ def add_preface_if_needed(data):
def get_page_tokens(pdf_path, model="gpt-4o-2024-11-20", pdf_parser="PyPDF2"): def get_page_tokens(pdf_path, model=None, pdf_parser="PyPDF2"):
enc = tiktoken.encoding_for_model(model)
if pdf_parser == "PyPDF2": if pdf_parser == "PyPDF2":
pdf_reader = PyPDF2.PdfReader(pdf_path) pdf_reader = PyPDF2.PdfReader(pdf_path)
page_list = [] page_list = []
for page_num in range(len(pdf_reader.pages)): for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num] page = pdf_reader.pages[page_num]
page_text = page.extract_text() page_text = page.extract_text()
token_length = len(enc.encode(page_text)) token_length = litellm.token_counter(model=model, text=page_text)
page_list.append((page_text, token_length)) page_list.append((page_text, token_length))
return page_list return page_list
elif pdf_parser == "PyMuPDF": elif pdf_parser == "PyMuPDF":
@ -430,7 +398,7 @@ def get_page_tokens(pdf_path, model="gpt-4o-2024-11-20", pdf_parser="PyPDF2"):
page_list = [] page_list = []
for page in doc: for page in doc:
page_text = page.get_text() page_text = page.get_text()
token_length = len(enc.encode(page_text)) token_length = litellm.token_counter(model=model, text=page_text)
page_list.append((page_text, token_length)) page_list.append((page_text, token_length))
return page_list return page_list
else: else:
@ -533,7 +501,7 @@ def remove_structure_text(data):
def check_token_limit(structure, limit=110000): def check_token_limit(structure, limit=110000):
list = structure_to_list(structure) list = structure_to_list(structure)
for node in list: for node in list:
num_tokens = count_tokens(node['text'], model='gpt-4o') num_tokens = count_tokens(node['text'], model=None)
if num_tokens > limit: if num_tokens > limit:
print(f"Node ID: {node['node_id']} has {num_tokens} tokens") print(f"Node ID: {node['node_id']} has {num_tokens} tokens")
print("Start Index:", node['start_index']) print("Start Index:", node['start_index'])
@ -609,7 +577,7 @@ async def generate_node_summary(node, model=None):
Directly return the description, do not include any other text. Directly return the description, do not include any other text.
""" """
response = await ChatGPT_API_async(model, prompt) response = await llm_acompletion(model, prompt)
return response return response
@ -654,7 +622,7 @@ def generate_doc_description(structure, model=None):
Directly return the description, do not include any other text. Directly return the description, do not include any other text.
""" """
response = ChatGPT_API(model, prompt) response = llm_completion(model, prompt)
return response return response

View file

@ -1,6 +1,5 @@
openai==1.101.0 litellm==1.82.0
pymupdf==1.26.4 pymupdf==1.26.4
PyPDF2==3.0.1 PyPDF2==3.0.1
python-dotenv==1.1.0 python-dotenv==1.1.0
tiktoken==0.11.0
pyyaml==6.0.2 pyyaml==6.0.2

View file

@ -3,6 +3,7 @@ import os
import json import json
from pageindex import * from pageindex import *
from pageindex.page_index_md import md_to_tree from pageindex.page_index_md import md_to_tree
from pageindex.utils import ConfigLoader
if __name__ == "__main__": if __name__ == "__main__":
# Set up argument parser # Set up argument parser
@ -10,22 +11,22 @@ if __name__ == "__main__":
parser.add_argument('--pdf_path', type=str, help='Path to the PDF file') parser.add_argument('--pdf_path', type=str, help='Path to the PDF file')
parser.add_argument('--md_path', type=str, help='Path to the Markdown file') parser.add_argument('--md_path', type=str, help='Path to the Markdown file')
parser.add_argument('--model', type=str, default='gpt-4o-2024-11-20', help='Model to use') parser.add_argument('--model', type=str, default=None, help='Model to use (overrides config.yaml)')
parser.add_argument('--toc-check-pages', type=int, default=20, parser.add_argument('--toc-check-pages', type=int, default=None,
help='Number of pages to check for table of contents (PDF only)') help='Number of pages to check for table of contents (PDF only)')
parser.add_argument('--max-pages-per-node', type=int, default=10, parser.add_argument('--max-pages-per-node', type=int, default=None,
help='Maximum number of pages per node (PDF only)') help='Maximum number of pages per node (PDF only)')
parser.add_argument('--max-tokens-per-node', type=int, default=20000, parser.add_argument('--max-tokens-per-node', type=int, default=None,
help='Maximum number of tokens per node (PDF only)') help='Maximum number of tokens per node (PDF only)')
parser.add_argument('--if-add-node-id', type=str, default='yes', parser.add_argument('--if-add-node-id', type=str, default=None,
help='Whether to add node id to the node') help='Whether to add node id to the node')
parser.add_argument('--if-add-node-summary', type=str, default='yes', parser.add_argument('--if-add-node-summary', type=str, default=None,
help='Whether to add summary to the node') help='Whether to add summary to the node')
parser.add_argument('--if-add-doc-description', type=str, default='no', parser.add_argument('--if-add-doc-description', type=str, default=None,
help='Whether to add doc description to the doc') help='Whether to add doc description to the doc')
parser.add_argument('--if-add-node-text', type=str, default='no', parser.add_argument('--if-add-node-text', type=str, default=None,
help='Whether to add text to the node') help='Whether to add text to the node')
# Markdown specific arguments # Markdown specific arguments
@ -51,17 +52,17 @@ if __name__ == "__main__":
raise ValueError(f"PDF file not found: {args.pdf_path}") raise ValueError(f"PDF file not found: {args.pdf_path}")
# Process PDF file # Process PDF file
# Configure options user_opt = {
opt = config( 'model': args.model,
model=args.model, 'toc_check_page_num': args.toc_check_pages,
toc_check_page_num=args.toc_check_pages, 'max_page_num_each_node': args.max_pages_per_node,
max_page_num_each_node=args.max_pages_per_node, 'max_token_num_each_node': args.max_tokens_per_node,
max_token_num_each_node=args.max_tokens_per_node, 'if_add_node_id': args.if_add_node_id,
if_add_node_id=args.if_add_node_id, 'if_add_node_summary': args.if_add_node_summary,
if_add_node_summary=args.if_add_node_summary, 'if_add_doc_description': args.if_add_doc_description,
if_add_doc_description=args.if_add_doc_description, 'if_add_node_text': args.if_add_node_text,
if_add_node_text=args.if_add_node_text }
) opt = ConfigLoader().load({k: v for k, v in user_opt.items() if v is not None})
# Process the PDF # Process the PDF
toc_with_page_number = page_index_main(args.pdf_path, opt) toc_with_page_number = page_index_main(args.pdf_path, opt)