Integrate LiteLLM for multi-provider LLM support (#168)

* Integrate litellm for multi-provider LLM support * recover the default config yaml * Use litellm.acompletion for native async support * fix tob * Rename llm_complete/allm_complete to llm_completion/llm_acompletion, remove unused llm_complete_stream * Pin litellm to version 1.82.0 * resolve comments * args from cli is used to overrides config.yaml * Fix get_page_tokens hardcoded model default Pass opt.model to get_page_tokens so tokenization respects the configured model instead of always using gpt-4o-2024-11-20. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * Remove explicit openai dependency from requirements.txt openai is no longer directly imported; it comes in as a transitive dependency of litellm. Pinning it explicitly risks version conflicts. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * Restore openai==1.101.0 pin in requirements.txt litellm==1.82.0 and openai-agents have conflicting openai version requirements, but openai==1.101.0 works at runtime for both. The pin is necessary to prevent litellm from pulling in openai>=2.x which would break openai-agents. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * Remove explicit openai dependency from requirements.txt openai is not directly used; it comes in as a transitive dependency of litellm. No openai-agents in this branch so no pin needed. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * fix an litellm error log * resolve comments --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-24 23:56:21 +02:00 · 2026-03-20 18:47:07 +08:00 · 2026-03-20 18:47:07 +08:00 · 2403be8f27
commit 2403be8f27
parent 4b4b20f9c4
5 changed files with 78 additions and 104 deletions
--- a/pageindex/page_index.py
+++ b/pageindex/page_index.py
@ -36,7 +36,7 @@ async def check_title_appearance(item, page_list, start_index=1, model=None):
    }}
    Directly return the final JSON structure. Do not output anything else."""

-    response = await ChatGPT_API_async(model=model, prompt=prompt)
+    response = await llm_acompletion(model=model, prompt=prompt)
    response = extract_json(response)
    if 'answer' in response:
        answer = response['answer']
@ -64,7 +64,7 @@ async def check_title_appearance_in_start(title, page_text, model=None, logger=N
    }}
    Directly return the final JSON structure. Do not output anything else."""

-    response = await ChatGPT_API_async(model=model, prompt=prompt)
+    response = await llm_acompletion(model=model, prompt=prompt)
    response = extract_json(response)
    if logger:
        logger.info(f"Response: {response}")
@ -116,7 +116,7 @@ def toc_detector_single_page(content, model=None):
    Directly return the final JSON structure. Do not output anything else.
    Please note: abstract,summary, notation list, figure list, table list, etc. are not table of contents."""

-    response = ChatGPT_API(model=model, prompt=prompt)
+    response = llm_completion(model=model, prompt=prompt)
    # print('response', response)
    json_content = extract_json(response)    
    return json_content['toc_detected']
@ -135,7 +135,7 @@ def check_if_toc_extraction_is_complete(content, toc, model=None):
    Directly return the final JSON structure. Do not output anything else."""

    prompt = prompt + '\n Document:\n' + content + '\n Table of contents:\n' + toc
-    response = ChatGPT_API(model=model, prompt=prompt)
+    response = llm_completion(model=model, prompt=prompt)
    json_content = extract_json(response)
    return json_content['completed']

@ -153,7 +153,7 @@ def check_if_toc_transformation_is_complete(content, toc, model=None):
    Directly return the final JSON structure. Do not output anything else."""

    prompt = prompt + '\n Raw Table of contents:\n' + content + '\n Cleaned Table of contents:\n' + toc
-    response = ChatGPT_API(model=model, prompt=prompt)
+    response = llm_completion(model=model, prompt=prompt)
    json_content = extract_json(response)
    return json_content['completed']

@ -165,7 +165,7 @@ def extract_toc_content(content, model=None):

    Directly return the full table of contents content. Do not output anything else."""

-    response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt)
+    response, finish_reason = llm_completion(model=model, prompt=prompt, return_finish_reason=True)
    
    if_complete = check_if_toc_transformation_is_complete(content, response, model)
    if if_complete == "yes" and finish_reason == "finished":
@ -176,7 +176,7 @@ def extract_toc_content(content, model=None):
        {"role": "assistant", "content": response},    
    ]
    prompt = f"""please continue the generation of table of contents , directly output the remaining part of the structure"""
-    new_response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt, chat_history=chat_history)
+    new_response, finish_reason = llm_completion(model=model, prompt=prompt, chat_history=chat_history, return_finish_reason=True)
    response = response + new_response
    if_complete = check_if_toc_transformation_is_complete(content, response, model)
    
@ -193,7 +193,7 @@ def extract_toc_content(content, model=None):
            {"role": "assistant", "content": response},
        ]
        prompt = f"""please continue the generation of table of contents , directly output the remaining part of the structure"""
-        new_response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt, chat_history=chat_history)
+        new_response, finish_reason = llm_completion(model=model, prompt=prompt, chat_history=chat_history, return_finish_reason=True)
        response = response + new_response
        if_complete = check_if_toc_transformation_is_complete(content, response, model)
    
@ -215,7 +215,7 @@ def detect_page_index(toc_content, model=None):
    }}
    Directly return the final JSON structure. Do not output anything else."""

-    response = ChatGPT_API(model=model, prompt=prompt)
+    response = llm_completion(model=model, prompt=prompt)
    json_content = extract_json(response)
    return json_content['page_index_given_in_toc']

@ -264,7 +264,7 @@ def toc_index_extractor(toc, content, model=None):
    Directly return the final JSON structure. Do not output anything else."""

    prompt = toc_extractor_prompt + '\nTable of contents:\n' + str(toc) + '\nDocument pages:\n' + content
-    response = ChatGPT_API(model=model, prompt=prompt)
+    response = llm_completion(model=model, prompt=prompt)
    json_content = extract_json(response)    
    return json_content

@ -292,7 +292,7 @@ def toc_transformer(toc_content, model=None):
    Directly return the final JSON structure, do not output anything else. """

    prompt = init_prompt + '\n Given table of contents\n:' + toc_content
-    last_complete, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt)
+    last_complete, finish_reason = llm_completion(model=model, prompt=prompt, return_finish_reason=True)
    if_complete = check_if_toc_transformation_is_complete(toc_content, last_complete, model)
    if if_complete == "yes" and finish_reason == "finished":
        last_complete = extract_json(last_complete)
@ -300,7 +300,12 @@ def toc_transformer(toc_content, model=None):
        return cleaned_response
    
    last_complete = get_json_content(last_complete)
+    attempt = 0
+    max_attempts = 5
    while not (if_complete == "yes" and finish_reason == "finished"):
+        attempt += 1
+        if attempt > max_attempts:
+            raise Exception('Failed to complete toc transformation after maximum retries')
        position = last_complete.rfind('}')
        if position != -1:
            last_complete = last_complete[:position+2]
@ -316,7 +321,7 @@ def toc_transformer(toc_content, model=None):

        Please continue the json structure, directly output the remaining part of the json structure."""

-        new_complete, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt)
+        new_complete, finish_reason = llm_completion(model=model, prompt=prompt, return_finish_reason=True)

        if new_complete.startswith('```json'):
            new_complete =  get_json_content(new_complete)
@ -477,7 +482,7 @@ def add_page_number_to_toc(part, structure, model=None):
    Directly return the final JSON structure. Do not output anything else."""

    prompt = fill_prompt_seq + f"\n\nCurrent Partial Document:\n{part}\n\nGiven Structure\n{json.dumps(structure, indent=2)}\n"
-    current_json_raw = ChatGPT_API(model=model, prompt=prompt)
+    current_json_raw = llm_completion(model=model, prompt=prompt)
    json_result = extract_json(current_json_raw)
    
    for item in json_result:
@ -499,7 +504,7 @@ def remove_first_physical_index_section(text):
    return text

 ### add verify completeness
-def generate_toc_continue(toc_content, part, model="gpt-4o-2024-11-20"):
+def generate_toc_continue(toc_content, part, model=None):
    print('start generate_toc_continue')
    prompt = """
    You are an expert in extracting hierarchical tree structure.
@ -527,7 +532,7 @@ def generate_toc_continue(toc_content, part, model="gpt-4o-2024-11-20"):
    Directly return the additional part of the final JSON structure. Do not output anything else."""

    prompt = prompt + '\nGiven text\n:' + part + '\nPrevious tree structure\n:' + json.dumps(toc_content, indent=2)
-    response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt)
+    response, finish_reason = llm_completion(model=model, prompt=prompt, return_finish_reason=True)
    if finish_reason == 'finished':
        return extract_json(response)
    else:
@ -561,7 +566,7 @@ def generate_toc_init(part, model=None):
    Directly return the final JSON structure. Do not output anything else."""

    prompt = prompt + '\nGiven text\n:' + part
-    response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt)
+    response, finish_reason = llm_completion(model=model, prompt=prompt, return_finish_reason=True)

    if finish_reason == 'finished':
         return extract_json(response)
@ -732,7 +737,7 @@ def check_toc(page_list, opt=None):


 ################### fix incorrect toc #########################################################
-def single_toc_item_index_fixer(section_title, content, model="gpt-4o-2024-11-20"):
+async def single_toc_item_index_fixer(section_title, content, model=None):
    toc_extractor_prompt = """
    You are given a section title and several pages of a document, your job is to find the physical index of the start page of the section in the partial document.

@ -746,7 +751,7 @@ def single_toc_item_index_fixer(section_title, content, model="gpt-4o-2024-11-20
    Directly return the final JSON structure. Do not output anything else."""

    prompt = toc_extractor_prompt + '\nSection Title:\n' + str(section_title) + '\nDocument pages:\n' + content
-    response = ChatGPT_API(model=model, prompt=prompt)
+    response = await llm_acompletion(model=model, prompt=prompt)
    json_content = extract_json(response)    
    return convert_physical_index_to_int(json_content['physical_index'])

@ -815,7 +820,7 @@ async def fix_incorrect_toc(toc_with_page_number, page_list, incorrect_results,
                continue
        content_range = ''.join(page_contents)
        
-        physical_index_int = single_toc_item_index_fixer(incorrect_item['title'], content_range, model)
+        physical_index_int = await single_toc_item_index_fixer(incorrect_item['title'], content_range, model)
        
        # Check if the result is correct
        check_item = incorrect_item.copy()
@ -1069,7 +1074,7 @@ def page_index_main(doc, opt=None):
        raise ValueError("Unsupported input type. Expected a PDF file path or BytesIO object.")

    print('Parsing PDF...')
-    page_list = get_page_tokens(doc)
+    page_list = get_page_tokens(doc, model=opt.model)

    logger.info({'total_page_number': len(page_list)})
    logger.info({'total_token': sum([page[1] for page in page_list])})