fix model

2026-07-18 21:21:05 +02:00 · 2025-08-28 13:07:15 +08:00 · 2025-08-28 13:07:15 +08:00 · 4b4ae4d51d
commit 4b4ae4d51d
parent 6d1b505541
5 changed files with 6 additions and 6 deletions
--- a/pageindex/config.yaml
+++ b/pageindex/config.yaml
@ -1,4 +1,4 @@
-model: "gpt-4.1"
+model: "gpt-4o-2024-11-20"
 toc_check_page_num: 20
 max_page_num_each_node: 10
 max_token_num_each_node: 20000
--- a/pageindex/page_index.py
+++ b/pageindex/page_index.py
@ -496,7 +496,7 @@ def remove_first_physical_index_section(text):
    return text

 ### add verify completeness
-def generate_toc_continue(toc_content, part, model="gpt-4.1"):
+def generate_toc_continue(toc_content, part, model="gpt-4o-2024-11-20"):
    print('start generate_toc_continue')
    prompt = """
    You are an expert in extracting hierarchical tree structure.
@ -729,7 +729,7 @@ def check_toc(page_list, opt=None):


 ################### fix incorrect toc #########################################################
-def single_toc_item_index_fixer(section_title, content, model="gpt-4.1"):
+def single_toc_item_index_fixer(section_title, content, model="gpt-4o-2024-11-20"):
    tob_extractor_prompt = """
    You are given a section title and several pages of a document, your job is to find the physical index of the start page of the section in the partial document.

--- a/pageindex/utils.py
+++ b/pageindex/utils.py
@ -410,7 +410,7 @@ def add_preface_if_needed(data):



-def get_page_tokens(pdf_path, model="gpt-4.1", pdf_parser="PyPDF2"):
+def get_page_tokens(pdf_path, model="gpt-4o-2024-11-20", pdf_parser="PyPDF2"):
    enc = tiktoken.encoding_for_model(model)
    if pdf_parser == "PyPDF2":
        pdf_reader = PyPDF2.PdfReader(pdf_path)