mirror of
https://github.com/VectifyAI/PageIndex.git
synced 2026-04-27 09:06:21 +02:00
fix params
This commit is contained in:
parent
480f7583f7
commit
6d1b505541
6 changed files with 126 additions and 42 deletions
|
|
@ -410,7 +410,7 @@ def add_preface_if_needed(data):
|
|||
|
||||
|
||||
|
||||
def get_page_tokens(pdf_path, model="gpt-4o-2024-11-20", pdf_parser="PyPDF2"):
|
||||
def get_page_tokens(pdf_path, model="gpt-4.1", pdf_parser="PyPDF2"):
|
||||
enc = tiktoken.encoding_for_model(model)
|
||||
if pdf_parser == "PyPDF2":
|
||||
pdf_reader = PyPDF2.PdfReader(pdf_path)
|
||||
|
|
@ -623,6 +623,29 @@ async def generate_summaries_for_structure(structure, model=None):
|
|||
return structure
|
||||
|
||||
|
||||
def create_clean_structure_for_description(structure):
|
||||
"""
|
||||
Create a clean structure for document description generation,
|
||||
excluding unnecessary fields like 'text'.
|
||||
"""
|
||||
if isinstance(structure, dict):
|
||||
clean_node = {}
|
||||
# Only include essential fields for description
|
||||
for key in ['title', 'node_id', 'summary', 'prefix_summary']:
|
||||
if key in structure:
|
||||
clean_node[key] = structure[key]
|
||||
|
||||
# Recursively process child nodes
|
||||
if 'nodes' in structure and structure['nodes']:
|
||||
clean_node['nodes'] = create_clean_structure_for_description(structure['nodes'])
|
||||
|
||||
return clean_node
|
||||
elif isinstance(structure, list):
|
||||
return [create_clean_structure_for_description(item) for item in structure]
|
||||
else:
|
||||
return structure
|
||||
|
||||
|
||||
def generate_doc_description(structure, model=None):
|
||||
prompt = f"""Your are an expert in generating descriptions for a document.
|
||||
You are given a structure of a document. Your task is to generate a one-sentence description for the document, which makes it easy to distinguish the document from other documents.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue