mirror of
https://github.com/VectifyAI/PageIndex.git
synced 2026-04-24 23:56:21 +02:00
fix: rename tob_extractor_prompt typo to toc_extractor_prompt (#109)
Signed-off-by: Matías Insaurralde <matias@insaurral.de>
This commit is contained in:
parent
89bcb9240a
commit
cf52a678a3
1 changed files with 4 additions and 4 deletions
|
|
@ -239,7 +239,7 @@ def toc_extractor(page_list, toc_page_list, model):
|
||||||
|
|
||||||
def toc_index_extractor(toc, content, model=None):
|
def toc_index_extractor(toc, content, model=None):
|
||||||
print('start toc_index_extractor')
|
print('start toc_index_extractor')
|
||||||
tob_extractor_prompt = """
|
toc_extractor_prompt = """
|
||||||
You are given a table of contents in a json format and several pages of a document, your job is to add the physical_index to the table of contents in the json format.
|
You are given a table of contents in a json format and several pages of a document, your job is to add the physical_index to the table of contents in the json format.
|
||||||
|
|
||||||
The provided pages contains tags like <physical_index_X> and <physical_index_X> to indicate the physical location of the page X.
|
The provided pages contains tags like <physical_index_X> and <physical_index_X> to indicate the physical location of the page X.
|
||||||
|
|
@ -260,7 +260,7 @@ def toc_index_extractor(toc, content, model=None):
|
||||||
If the section is not in the provided pages, do not add the physical_index to it.
|
If the section is not in the provided pages, do not add the physical_index to it.
|
||||||
Directly return the final JSON structure. Do not output anything else."""
|
Directly return the final JSON structure. Do not output anything else."""
|
||||||
|
|
||||||
prompt = tob_extractor_prompt + '\nTable of contents:\n' + str(toc) + '\nDocument pages:\n' + content
|
prompt = toc_extractor_prompt + '\nTable of contents:\n' + str(toc) + '\nDocument pages:\n' + content
|
||||||
response = ChatGPT_API(model=model, prompt=prompt)
|
response = ChatGPT_API(model=model, prompt=prompt)
|
||||||
json_content = extract_json(response)
|
json_content = extract_json(response)
|
||||||
return json_content
|
return json_content
|
||||||
|
|
@ -730,7 +730,7 @@ def check_toc(page_list, opt=None):
|
||||||
|
|
||||||
################### fix incorrect toc #########################################################
|
################### fix incorrect toc #########################################################
|
||||||
def single_toc_item_index_fixer(section_title, content, model="gpt-4o-2024-11-20"):
|
def single_toc_item_index_fixer(section_title, content, model="gpt-4o-2024-11-20"):
|
||||||
tob_extractor_prompt = """
|
toc_extractor_prompt = """
|
||||||
You are given a section title and several pages of a document, your job is to find the physical index of the start page of the section in the partial document.
|
You are given a section title and several pages of a document, your job is to find the physical index of the start page of the section in the partial document.
|
||||||
|
|
||||||
The provided pages contains tags like <physical_index_X> and <physical_index_X> to indicate the physical location of the page X.
|
The provided pages contains tags like <physical_index_X> and <physical_index_X> to indicate the physical location of the page X.
|
||||||
|
|
@ -742,7 +742,7 @@ def single_toc_item_index_fixer(section_title, content, model="gpt-4o-2024-11-20
|
||||||
}
|
}
|
||||||
Directly return the final JSON structure. Do not output anything else."""
|
Directly return the final JSON structure. Do not output anything else."""
|
||||||
|
|
||||||
prompt = tob_extractor_prompt + '\nSection Title:\n' + str(section_title) + '\nDocument pages:\n' + content
|
prompt = toc_extractor_prompt + '\nSection Title:\n' + str(section_title) + '\nDocument pages:\n' + content
|
||||||
response = ChatGPT_API(model=model, prompt=prompt)
|
response = ChatGPT_API(model=model, prompt=prompt)
|
||||||
json_content = extract_json(response)
|
json_content = extract_json(response)
|
||||||
return convert_physical_index_to_int(json_content['physical_index'])
|
return convert_physical_index_to_int(json_content['physical_index'])
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue