From cf52a678a332d82ebbd98ac99342c263b8ef7b4e Mon Sep 17 00:00:00 2001 From: Matias Insaurralde Date: Fri, 27 Feb 2026 04:16:19 -0300 Subject: [PATCH] fix: rename tob_extractor_prompt typo to toc_extractor_prompt (#109) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Matías Insaurralde --- pageindex/page_index.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pageindex/page_index.py b/pageindex/page_index.py index 882fb5d..39018c4 100644 --- a/pageindex/page_index.py +++ b/pageindex/page_index.py @@ -239,7 +239,7 @@ def toc_extractor(page_list, toc_page_list, model): def toc_index_extractor(toc, content, model=None): print('start toc_index_extractor') - tob_extractor_prompt = """ + toc_extractor_prompt = """ You are given a table of contents in a json format and several pages of a document, your job is to add the physical_index to the table of contents in the json format. The provided pages contains tags like and to indicate the physical location of the page X. @@ -260,7 +260,7 @@ def toc_index_extractor(toc, content, model=None): If the section is not in the provided pages, do not add the physical_index to it. Directly return the final JSON structure. Do not output anything else.""" - prompt = tob_extractor_prompt + '\nTable of contents:\n' + str(toc) + '\nDocument pages:\n' + content + prompt = toc_extractor_prompt + '\nTable of contents:\n' + str(toc) + '\nDocument pages:\n' + content response = ChatGPT_API(model=model, prompt=prompt) json_content = extract_json(response) return json_content @@ -730,7 +730,7 @@ def check_toc(page_list, opt=None): ################### fix incorrect toc ######################################################### def single_toc_item_index_fixer(section_title, content, model="gpt-4o-2024-11-20"): - tob_extractor_prompt = """ + toc_extractor_prompt = """ You are given a section title and several pages of a document, your job is to find the physical index of the start page of the section in the partial document. The provided pages contains tags like and to indicate the physical location of the page X. @@ -742,7 +742,7 @@ def single_toc_item_index_fixer(section_title, content, model="gpt-4o-2024-11-20 } Directly return the final JSON structure. Do not output anything else.""" - prompt = tob_extractor_prompt + '\nSection Title:\n' + str(section_title) + '\nDocument pages:\n' + content + prompt = toc_extractor_prompt + '\nSection Title:\n' + str(section_title) + '\nDocument pages:\n' + content response = ChatGPT_API(model=model, prompt=prompt) json_content = extract_json(response) return convert_physical_index_to_int(json_content['physical_index'])