add node id, node summary and doc summary

This commit is contained in:
mingtian 2025-04-03 13:35:38 +08:00
parent 1de05d8675
commit db746f35e0
9 changed files with 3663 additions and 2810 deletions

22
CHANGELOG.md Normal file
View file

@ -0,0 +1,22 @@
# Change Log
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/).
## [Unreleased] - yyyy-mm-dd
Here we write upgrading notes for brands. It's a team effort to make them as
straightforward as possible.
### Added
- [PROJECTNAME-XXXX](http://tickets.projectname.com/browse/PROJECTNAME-XXXX)
MINOR Ticket title goes here.
- [PROJECTNAME-YYYY](http://tickets.projectname.com/browse/PROJECTNAME-YYYY)
PATCH Ticket title goes here.
### Changed
### Fixed
## [1.2.4] - 2017-03-15

View file

@ -1,12 +1,13 @@
# PageIndex
### **Document Index System for Reasoning-Based RAG**
Frustrated with vector database retrieval accuracy for long professional documents? You need a reasoning-based native index for your RAG system.
Traditional vector-based retrieval relies heavily on semantic similarity. But when working with professional documents that require domain expertise and multi-step reasoning, similarity search often falls short.
**Reasoning-Based RAG** offers a better alternative: enabling LLMs to *think* and *reason* their way to the most relevant document sections. Inspired by **AlphaGo**, we leverage **tree search** to perform structured document retrieval.
**PageIndex** is an indexing system that builds search trees from long documents, making them ready for reasoning-based RAG.
**[PageIndex](https://vectify.ai/pageindex)** is an indexing system that builds search trees from long documents, making them ready for reasoning-based RAG.
Built by [Vectify AI](https://vectify.ai/pageindex)
@ -44,7 +45,7 @@ Here is an example output. See more [example documents](https://github.com/Vecti
"start_index": 21,
"end_index": 22,
"summary": "The Federal Reserve ...",
"child_nodes": [
"nodes": [
{
"title": "Monitoring Financial Vulnerabilities",
"node_id": "0007",
@ -111,12 +112,21 @@ CHATGPT_API_KEY=your_openai_key_here
```bash
python3 page_index.py --pdf_path /path/to/your/document.pdf
```
You can customize the processing with additional optional arguments:
The results will be saved in the `./results/` directory.
```bash
--model OpenAI model to use (default: gpt-4o-2024-11-20)
--toc-check-pages Pages to check for table of contents (default: 20)
--max-pages-per-node Max pages per node (default: 10)
--max-tokens-per-node Max tokens per node (default: 20000)
--if-add-node-id Add node ID (yes/no, default: yes)
--if-add-node-summary Add node summary (yes/no, default: no)
--if-add-doc-description Add doc description (yes/no, default: yes)
```
## 🛤 Roadmap
- [ ] Add node summary and document selection
- [ ] Document-level retrieval
- [ ] Technical report on PageIndex design
- [ ] Efficient tree search algorithms for large documents
- [ ] Integration with vector-based semantic retrieval

View file

@ -9,12 +9,9 @@ import re
from utils import *
import os
from types import SimpleNamespace as config
from dotenv import load_dotenv
load_dotenv()
from concurrent.futures import ThreadPoolExecutor, as_completed
import argparse
CHATGPT_API_KEY = os.getenv("CHATGPT_API_KEY")
################### check title in page #########################################################
def check_title_appearance(item, page_list, start_index=1, model=None):
@ -43,7 +40,7 @@ def check_title_appearance(item, page_list, start_index=1, model=None):
}}
Directly return the final JSON structure. Do not output anything else."""
response = ChatGPT_API(model=model, prompt=prompt, api_key=CHATGPT_API_KEY)
response = ChatGPT_API(model=model, prompt=prompt)
response = extract_json(response)
if 'answer' in response:
answer = response['answer']
@ -71,7 +68,7 @@ def check_title_appearance_in_start(title, page_text, model=None, logger=None):
}}
Directly return the final JSON structure. Do not output anything else."""
response = ChatGPT_API(model=model, prompt=prompt, api_key=CHATGPT_API_KEY)
response = ChatGPT_API(model=model, prompt=prompt)
response = extract_json(response)
if logger:
logger.info(f"Response: {response}")
@ -119,7 +116,7 @@ def toc_detector_single_page(content, model=None):
Directly return the final JSON structure. Do not output anything else.
Please note: abstract,summary, notation list, figure list, table list, etc. are not table of contents."""
response = ChatGPT_API(model=model, prompt=prompt, api_key=CHATGPT_API_KEY)
response = ChatGPT_API(model=model, prompt=prompt)
# print('response', response)
json_content = extract_json(response)
return json_content['toc_detected']
@ -138,7 +135,7 @@ def check_if_toc_extraction_is_complete(content, toc, model=None):
Directly return the final JSON structure. Do not output anything else."""
prompt = prompt + '\n Document:\n' + content + '\n Table of contents:\n' + toc
response = ChatGPT_API(model=model, prompt=prompt, api_key=CHATGPT_API_KEY)
response = ChatGPT_API(model=model, prompt=prompt)
json_content = extract_json(response)
return json_content['completed']
@ -156,7 +153,7 @@ def check_if_toc_transformation_is_complete(content, toc, model=None):
Directly return the final JSON structure. Do not output anything else."""
prompt = prompt + '\n Raw Table of contents:\n' + content + '\n Cleaned Table of contents:\n' + toc
response = ChatGPT_API(model=model, prompt=prompt, api_key=CHATGPT_API_KEY)
response = ChatGPT_API(model=model, prompt=prompt)
json_content = extract_json(response)
return json_content['completed']
@ -168,7 +165,7 @@ def extract_toc_content(content, model=None):
Directly return the full table of contents content. Do not output anything else."""
response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt, api_key=CHATGPT_API_KEY)
response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt)
if_complete = check_if_toc_transformation_is_complete(content, response, model)
if if_complete == "yes" and finish_reason == "finished":
@ -179,7 +176,7 @@ def extract_toc_content(content, model=None):
{"role": "assistant", "content": response},
]
prompt = f"""please continue the generation of table of contents , directly output the remaining part of the structure"""
new_response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt, api_key=CHATGPT_API_KEY, chat_history=chat_history)
new_response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt, chat_history=chat_history)
response = response + new_response
if_complete = check_if_toc_transformation_is_complete(content, response)
@ -189,7 +186,7 @@ def extract_toc_content(content, model=None):
{"role": "assistant", "content": response},
]
prompt = f"""please continue the generation of table of contents , directly output the remaining part of the structure"""
new_response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt, api_key=CHATGPT_API_KEY, chat_history=chat_history)
new_response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt, chat_history=chat_history)
response = response + new_response
if_complete = check_if_toc_transformation_is_complete(content, response)
@ -214,7 +211,7 @@ def detect_page_index(toc_content, model=None):
}}
Directly return the final JSON structure. Do not output anything else."""
response = ChatGPT_API(model=model, prompt=prompt, api_key=CHATGPT_API_KEY)
response = ChatGPT_API(model=model, prompt=prompt)
json_content = extract_json(response)
return json_content['page_index_given_in_toc']
@ -263,7 +260,7 @@ def toc_index_extractor(toc, content, model=None):
Directly return the final JSON structure. Do not output anything else."""
prompt = tob_extractor_prompt + '\nTable of contents:\n' + str(toc) + '\nDocument pages:\n' + content
response = ChatGPT_API(model=model, prompt=prompt, api_key=CHATGPT_API_KEY)
response = ChatGPT_API(model=model, prompt=prompt)
json_content = extract_json(response)
return json_content
@ -291,7 +288,7 @@ def toc_transformer(toc_content, model=None):
Directly return the final JSON structure, do not output anything else. """
prompt = init_prompt + '\n Given table of contents\n:' + toc_content
last_complete, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt, api_key=CHATGPT_API_KEY)
last_complete, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt)
if_complete = check_if_toc_transformation_is_complete(toc_content, last_complete, model)
if if_complete == "yes" and finish_reason == "finished":
last_complete = extract_json(last_complete)
@ -315,7 +312,7 @@ def toc_transformer(toc_content, model=None):
Please continue the json structure, directly output the remaining part of the json structure."""
new_complete, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt, api_key=CHATGPT_API_KEY)
new_complete, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt)
if new_complete.startswith('```json'):
new_complete = get_json_content(new_complete)
@ -363,7 +360,7 @@ def remove_page_number(data):
if isinstance(data, dict):
data.pop('page_number', None)
for key in list(data.keys()):
if 'child_nodes' in key:
if 'nodes' in key:
remove_page_number(data[key])
elif isinstance(data, list):
for item in data:
@ -476,7 +473,7 @@ def add_page_number_to_toc(part, structure, model=None):
Directly return the final JSON structure. Do not output anything else."""
prompt = fill_prompt_seq + f"\n\nCurrent Partial Document:\n{part}\n\nGiven Structure\n{json.dumps(structure, indent=2)}\n"
current_json_raw = ChatGPT_API(model=model, prompt=prompt, api_key=CHATGPT_API_KEY)
current_json_raw = ChatGPT_API(model=model, prompt=prompt)
json_result = extract_json(current_json_raw)
for item in json_result:
@ -525,7 +522,7 @@ def generate_toc_continue(toc_content, part, model="gpt-4o-2024-11-20"):
Directly return the additional part of the final JSON structure. Do not output anything else."""
prompt = prompt + '\nGiven text\n:' + part + '\nPrevious tree structure\n:' + json.dumps(toc_content, indent=2)
response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt, api_key=CHATGPT_API_KEY)
response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt)
if finish_reason == 'finished':
return extract_json(response)
else:
@ -557,7 +554,7 @@ def generate_toc_init(part, model=None):
Directly return the final JSON structure. Do not output anything else."""
prompt = prompt + '\nGiven text\n:' + part
response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt, api_key=CHATGPT_API_KEY)
response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt)
if finish_reason == 'finished':
return extract_json(response)
@ -738,7 +735,7 @@ def single_toc_item_index_fixer(section_title, content, model="gpt-4o-2024-11-20
Directly return the final JSON structure. Do not output anything else."""
prompt = tob_extractor_prompt + '\nSection Title:\n' + str(section_title) + '\nDocument pages:\n' + content
response = ChatGPT_API(model=model, prompt=prompt, api_key=CHATGPT_API_KEY)
response = ChatGPT_API(model=model, prompt=prompt)
json_content = extract_json(response)
return convert_physical_index_to_int(json_content['physical_index'])
@ -965,14 +962,14 @@ def process_large_node_recursively(node, page_list, opt=None, logger=None):
node_toc_tree = check_title_appearance_in_start_parallel(node_toc_tree, page_list, model=opt.model, logger=logger)
if node['title'].strip() == node_toc_tree[0]['title'].strip():
node['child_nodes'] = post_processing(node_toc_tree[1:], node['end_index'])
node['nodes'] = post_processing(node_toc_tree[1:], node['end_index'])
node['end_index'] = node_toc_tree[1]['start_index']
else:
node['child_nodes'] = post_processing(node_toc_tree, node['end_index'])
node['nodes'] = post_processing(node_toc_tree, node['end_index'])
node['end_index'] = node_toc_tree[0]['start_index']
if 'child_nodes' in node and node['child_nodes']:
for child_node in node['child_nodes']:
if 'nodes' in node and node['nodes']:
for child_node in node['nodes']:
process_large_node_recursively(child_node, page_list, opt, logger=logger)
return node
@ -1033,7 +1030,23 @@ def page_index_main(doc, opt=None):
logger.info({'total_token': sum([page[1] for page in page_list])})
structure = tree_parser(page_list, opt, logger=logger)
return structure
if opt.if_add_node_id == 'yes':
write_node_id(structure)
if opt.if_add_node_summary == 'yes':
add_node_text(structure, page_list)
asyncio.run(generate_summaries_for_structure(structure, model=opt.model))
remove_structure_text(structure)
if opt.if_add_doc_description == 'yes':
doc_description = generate_doc_description(structure, model=opt.model)
return {
'doc_name': os.path.basename(doc),
'doc_description': doc_description,
'structure': structure,
}
return {
'doc_name': os.path.basename(doc),
'structure': structure,
}
@ -1048,15 +1061,23 @@ if __name__ == "__main__":
help='Maximum number of pages per node')
parser.add_argument('--max-tokens-per-node', type=int, default=20000,
help='Maximum number of tokens per node')
parser.add_argument('--if-add-node-id', type=str, default='yes',
help='Whether to add node id to the node')
parser.add_argument('--if-add-node-summary', type=str, default='no',
help='Whether to add summary to the node')
parser.add_argument('--if-add-doc-description', type=str, default='yes',
help='Whether to add doc description to the doc')
args = parser.parse_args()
# Configure options
# Configure options
opt = config(
model=args.model,
toc_check_page_num=args.toc_check_pages,
max_page_num_each_node=args.max_pages_per_node,
max_token_num_each_node=args.max_tokens_per_node,
if_add_node_id=args.if_add_node_id,
if_add_node_summary=args.if_add_node_summary,
if_add_doc_description=args.if_add_doc_description
)
# Process the PDF

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,51 +1,73 @@
[
{
"title": "Preface",
"start_index": 1,
"end_index": 2
},
{
"title": "Introduction",
"start_index": 2,
"end_index": 6
},
{
"title": "Interpretation and Application",
"start_index": 6,
"end_index": 8,
"child_nodes": [
{
"title": "Historical Context and Legislative History",
"start_index": 8,
"end_index": 10
},
{
"title": "Scope of the Solely Incidental Prong of the Broker-Dealer Exclusion",
"start_index": 10,
"end_index": 14
},
{
"title": "Guidance on Applying the Interpretation of the Solely Incidental Prong",
"start_index": 14,
"end_index": 22
}
]
},
{
"title": "Economic Considerations",
"start_index": 22,
"end_index": 22,
"child_nodes": [
{
"title": "Background",
"start_index": 22,
"end_index": 23
},
{
"title": "Potential Economic Effects",
"start_index": 23,
"end_index": 28
}
]
}
]
{
"doc_name": "Regulation Best Interest_Interpretive release.pdf",
"doc_description": "A detailed analysis of the SEC's interpretation of the \"solely incidental\" prong of the broker-dealer exclusion under the Investment Advisers Act of 1940, including its historical context, application guidance, economic implications, and regulatory considerations.",
"structure": [
{
"title": "Preface",
"start_index": 1,
"end_index": 2,
"node_id": "0000",
"summary": "The partial document outlines an interpretation by the Securities and Exchange Commission (SEC) regarding the \"solely incidental\" prong of the broker-dealer exclusion under the Investment Advisers Act of 1940. It clarifies that brokers or dealers providing advisory services that are incidental to their primary business and for which they receive no special compensation are excluded from the definition of \"investment adviser\" under the Act. The document includes a historical and legislative context, the scope of the \"solely incidental\" prong, guidance on its application, and economic considerations related to the interpretation. It also provides contact information for further inquiries and specifies the effective date of the interpretation as July 12, 2019."
},
{
"title": "Introduction",
"start_index": 2,
"end_index": 6,
"node_id": "0001",
"summary": "The partial document discusses the regulation of investment advisers under the Advisers Act, specifically focusing on the \"broker-dealer exclusion,\" which exempts brokers and dealers from being classified as investment advisers under certain conditions. Key points include:\n\n1. **Introduction to the Advisers Act**: Overview of the regulation of investment advisers and the broker-dealer exclusion, which applies when advisory services are \"solely incidental\" to brokerage business and no special compensation is received.\n\n2. **Historical Context and Legislative History**: Examination of the historical practices of broker-dealers providing investment advice, distinguishing between auxiliary advice as part of brokerage services and separate advisory services.\n\n3. **Interpretation of the Solely Incidental Prong**: Clarification of the \"solely incidental\" condition of the broker-dealer exclusion, including its application to activities like investment discretion and account monitoring.\n\n4. **Economic Considerations**: Discussion of the potential economic effects of the interpretation and application of the broker-dealer exclusion.\n\n5. **Regulatory Developments**: Reference to the Commission's 2018 proposals, including Regulation Best Interest (Reg. BI), the Proposed Fiduciary Interpretation, and the Relationship Summary Proposal, aimed at enhancing standards of conduct and investor understanding.\n\n6. **Public Comments and Feedback**: Summary of public comments on the scope and interpretation of the broker-dealer exclusion, highlighting disagreements and requests for clarification on the \"solely incidental\" prong.\n\n7. **Adoption of Interpretation**: The Commission's adoption of an interpretation to confirm and clarify its position on the \"solely incidental\" prong, complementing related rules and forms to improve investor understanding of broker-dealer and adviser relationships."
},
{
"title": "Interpretation and Application",
"start_index": 6,
"end_index": 8,
"nodes": [
{
"title": "Historical Context and Legislative History",
"start_index": 8,
"end_index": 10,
"node_id": "0003",
"summary": "The partial document discusses the historical context and legislative development of the Investment Advisers Act of 1940. It highlights the findings of a congressional study conducted by the SEC between 1935 and 1939, which identified issues with distinguishing legitimate investment counselors from unregulated \"tipster\" organizations and problems in the organization and operation of investment counsel institutions. The document explains how these findings led to the passage of the Advisers Act, which broadly defined \"investment adviser\" and established regulatory oversight for those providing investment advice for compensation. It also addresses the exclusion of certain professionals, such as broker-dealers, from the definition of \"investment adviser\" if their advice is incidental to their primary business and not specially compensated. Additionally, the document explores the scope of the \"solely incidental\" prong of the broker-dealer exclusion, referencing interpretations and rules by the SEC, including a 2005 rule regarding fee-based brokerage accounts."
},
{
"title": "Scope of the Solely Incidental Prong of the Broker-Dealer Exclusion",
"start_index": 10,
"end_index": 14,
"node_id": "0004",
"summary": "The partial document discusses the \"broker-dealer exclusion\" under the Investment Advisers Act, specifically focusing on the \"solely incidental\" prong. It examines the scope of this exclusion, emphasizing that investment advice provided by broker-dealers is considered \"solely incidental\" if it is connected to and reasonably related to their primary business of effecting securities transactions. The document references historical interpretations, court rulings (e.g., Financial Planning Association v. SEC and Thomas v. Metropolitan Life Insurance Company), and legislative history to clarify this standard. It highlights that the frequency or importance of advice does not determine whether it meets the \"solely incidental\" standard, but rather its relationship to the broker-dealer's primary business. The document also provides guidance on applying this interpretation to specific practices, such as exercising investment discretion and account monitoring, noting that certain discretionary activities may fall outside the scope of the exclusion."
},
{
"title": "Guidance on Applying the Interpretation of the Solely Incidental Prong",
"start_index": 14,
"end_index": 22,
"node_id": "0005",
"summary": "The partial document provides guidance on the application of the \"solely incidental\" prong of the broker-dealer exclusion under the Advisers Act. It focuses on two key areas: (1) the exercise of investment discretion by broker-dealers over customer accounts and (2) account monitoring. The document discusses the Commission's interpretation that unlimited investment discretion is not \"solely incidental\" to a broker-dealer's business, as it indicates a primarily advisory relationship. However, temporary or limited discretion in specific scenarios (e.g., cash management, tax-loss sales, or margin requirements) may be consistent with the \"solely incidental\" prong. It also addresses account monitoring, stating that agreed-upon periodic monitoring for buy, sell, or hold recommendations may align with the broker-dealer exclusion, while continuous monitoring or advisory-like services would not. The document includes examples, refinements to prior interpretations, and considerations for broker-dealers to adopt policies ensuring compliance. It concludes with economic considerations, highlighting the potential impact on broker-dealers, customers, and the financial advice market."
}
],
"node_id": "0002",
"summary": "The partial document discusses the historical context and legislative history of the Advisers Act of 1940, focusing on the roles of broker-dealers in providing investment advice. It highlights two distinct ways broker-dealers offered advice: as part of traditional brokerage services with fixed commissions and as separate advisory services for a fee. The document examines the concept of \"brokerage house advice,\" detailing the types of information and services provided, such as market analyses, tax information, and investment recommendations. It also references a congressional study conducted between 1935 and 1939, which identified issues with distinguishing legitimate investment counselors from \"tipster\" organizations and problems in the organization and operation of investment counsel institutions. These findings led to the enactment of the Advisers Act, which broadly defined \"investment adviser\" to regulate those providing investment advice for compensation. The document also references various reports, hearings, and literature that informed the development of the Act."
},
{
"title": "Economic Considerations",
"start_index": 22,
"end_index": 22,
"nodes": [
{
"title": "Background",
"start_index": 22,
"end_index": 23,
"node_id": "0007",
"summary": "The partial document discusses the U.S. Securities and Exchange Commission's (SEC) interpretation of the \"solely incidental\" prong of the broker-dealer exclusion, clarifying its understanding without creating new legal obligations. It examines the potential economic effects of this interpretation on broker-dealers, their associated persons, customers, and the broader financial advice market. The document provides background data on broker-dealers, including their assets, customer accounts, and dual registration as investment advisers. It highlights compliance costs for broker-dealers to align with the interpretation and notes the limited circumstances under which broker-dealers exercise temporary or limited investment discretion. The document also references the lack of data received during the Reg. BI Proposal to analyze the economic impact further."
},
{
"title": "Potential Economic Effects",
"start_index": 23,
"end_index": 28,
"node_id": "0008",
"summary": "The partial document discusses the economic effects and regulatory implications of the SEC's interpretation of the \"solely incidental\" prong of the broker-dealer exclusion from the definition of an investment adviser. Key points include:\n\n1. **Compliance Costs**: Broker-dealers currently incur costs to align their practices with the \"solely incidental\" prong, and the interpretation may lead to additional costs for evaluating and adjusting practices.\n\n2. **Impact on Broker-Dealer Practices**: Broker-dealers providing advisory services beyond the scope of the interpretation may need to adjust their practices, potentially resulting in reduced services, loss of customers, or a shift to advisory accounts.\n\n3. **Market Effects**: The interpretation could lead to decreased competition, increased fees, and a diminished number of broker-dealers offering commission-based services. It may also shift demand from broker-dealers to investment advisers.\n\n4. **Regulatory Adjustments**: Broker-dealers may choose to register as investment advisers, incurring new compliance costs, or migrate customers to advisory accounts of affiliates.\n\n5. **Potential Benefits**: Some broker-dealers may expand limited discretionary services or monitoring activities, benefiting investors with more efficient access to these services.\n\n6. **Regulatory Arbitrage Risks**: The interpretation raises concerns about regulatory arbitrage, though these risks may be mitigated by enhanced standards of conduct for broker-dealers.\n\n7. **Amendments to Regulations**: The document includes amendments to the Code of Federal Regulations, adding an interpretive release regarding the \"solely incidental\" prong, dated June 5, 2019."
}
],
"node_id": "0006",
"summary": "The partial document discusses the SEC's interpretation of the \"solely incidental\" prong of the broker-dealer exclusion, clarifying that it does not impose new legal obligations but may have economic implications if broker-dealer practices deviate from this interpretation. It provides background on the potential effects on broker-dealers, their associated persons, customers, and the broader financial advice market. The document includes data on the number of registered broker-dealers, their customer accounts, total assets, and the prevalence of dual registrants (firms registered as both broker-dealers and investment advisers) as of December 2018."
}
]
}

File diff suppressed because it is too large Load diff

View file

@ -1,220 +1,311 @@
[
{
"title": "THE WALT DISNEY COMPANY REPORTS FIRST QUARTER EARNINGS FOR FISCAL 2025",
"start_index": 1,
"end_index": 1,
"child_nodes": [
{
"title": "Financial Results for the Quarter",
"start_index": 1,
"end_index": 1,
"child_nodes": [
{
"title": "Key Points",
"start_index": 1,
"end_index": 1
}
]
},
{
"title": "Guidance and Outlook",
"start_index": 2,
"end_index": 2,
"child_nodes": [
{
"title": "Star India deconsolidated in Q1",
"start_index": 2,
"end_index": 2
},
{
"title": "Q2 Fiscal 2025",
"start_index": 2,
"end_index": 2
},
{
"title": "Fiscal Year 2025",
"start_index": 2,
"end_index": 2
}
]
},
{
"title": "Message From Our CEO",
"start_index": 2,
"end_index": 2
},
{
"title": "SUMMARIZED FINANCIAL RESULTS",
"start_index": 3,
"end_index": 3,
"child_nodes": [
{
"title": "SUMMARIZED SEGMENT FINANCIAL RESULTS",
"start_index": 3,
"end_index": 3
}
]
},
{
"title": "DISCUSSION OF FIRST QUARTER SEGMENT RESULTS",
"start_index": 4,
"end_index": 4,
"child_nodes": [
{
"title": "Star India",
"start_index": 4,
"end_index": 4
},
{
"title": "Entertainment",
"start_index": 4,
"end_index": 4,
"child_nodes": [
{
"title": "Linear Networks",
"start_index": 5,
"end_index": 5
},
{
"title": "Direct-to-Consumer",
"start_index": 5,
"end_index": 7
},
{
"title": "Content Sales/Licensing and Other",
"start_index": 7,
"end_index": 7
}
]
},
{
"title": "Sports",
"start_index": 7,
"end_index": 7,
"child_nodes": [
{
"title": "Domestic ESPN",
"start_index": 8,
"end_index": 8
},
{
"title": "International ESPN",
"start_index": 8,
"end_index": 8
},
{
"title": "Star India",
"start_index": 8,
"end_index": 8
}
]
},
{
"title": "Experiences",
"start_index": 9,
"end_index": 9,
"child_nodes": [
{
"title": "Domestic Parks and Experiences",
"start_index": 9,
"end_index": 9
},
{
"title": "International Parks and Experiences",
"start_index": 9,
"end_index": 9
}
]
}
]
},
{
"title": "OTHER FINANCIAL INFORMATION",
"start_index": 9,
"end_index": 9,
"child_nodes": [
{
"title": "Corporate and Unallocated Shared Expenses",
"start_index": 9,
"end_index": 9
},
{
"title": "Restructuring and Impairment Charges",
"start_index": 9,
"end_index": 9
},
{
"title": "Interest Expense, net",
"start_index": 10,
"end_index": 10
},
{
"title": "Equity in the Income of Investees",
"start_index": 10,
"end_index": 10
},
{
"title": "Income Taxes",
"start_index": 10,
"end_index": 10
},
{
"title": "Noncontrolling Interests",
"start_index": 11,
"end_index": 11
},
{
"title": "Cash from Operations",
"start_index": 11,
"end_index": 11
},
{
"title": "Capital Expenditures",
"start_index": 12,
"end_index": 12
},
{
"title": "Depreciation Expense",
"start_index": 12,
"end_index": 12
}
]
},
{
"title": "THE WALT DISNEY COMPANY CONDENSED CONSOLIDATED STATEMENTS OF INCOME",
"start_index": 13,
"end_index": 13
},
{
"title": "THE WALT DISNEY COMPANY CONDENSED CONSOLIDATED BALANCE SHEETS",
"start_index": 14,
"end_index": 14
},
{
"title": "THE WALT DISNEY COMPANY CONDENSED CONSOLIDATED STATEMENTS OF CASH FLOWS",
"start_index": 15,
"end_index": 15
},
{
"title": "DTC PRODUCT DESCRIPTIONS AND KEY DEFINITIONS",
"start_index": 16,
"end_index": 16
},
{
"title": "NON-GAAP FINANCIAL MEASURES",
"start_index": 17,
"end_index": 20
},
{
"title": "FORWARD-LOOKING STATEMENTS",
"start_index": 21,
"end_index": 21
},
{
"title": "PREPARED EARNINGS REMARKS AND CONFERENCE CALL INFORMATION",
"start_index": 22,
"end_index": 22
}
]
}
]
{
"doc_name": "q1-fy25-earnings.pdf",
"doc_description": "A comprehensive financial report detailing The Walt Disney Company's first-quarter fiscal 2025 performance, including revenue growth, segment highlights, guidance for fiscal 2025, and key financial metrics such as adjusted EPS, operating income, and cash flow.",
"structure": [
{
"title": "THE WALT DISNEY COMPANY REPORTS FIRST QUARTER EARNINGS FOR FISCAL 2025",
"start_index": 1,
"end_index": 1,
"nodes": [
{
"title": "Financial Results for the Quarter",
"start_index": 1,
"end_index": 1,
"nodes": [
{
"title": "Key Points",
"start_index": 1,
"end_index": 1,
"node_id": "0002",
"summary": "The partial document outlines The Walt Disney Company's financial performance for the first fiscal quarter of 2025, ending December 28, 2024. Key points include:\n\n1. **Financial Results**: \n - Revenue increased by 5% to $24.7 billion.\n - Income before taxes rose by 27% to $3.7 billion.\n - Diluted EPS grew by 35% to $1.40.\n - Total segment operating income increased by 31% to $5.1 billion, with adjusted EPS up 44% to $1.76.\n\n2. **Entertainment Segment**:\n - Operating income increased by $0.8 billion to $1.7 billion.\n - Direct-to-Consumer operating income rose by $431 million to $293 million, with advertising revenue (excluding Disney+ Hotstar in India) up 16%.\n - Disney+ and Hulu subscriptions increased by 0.9 million, while Disney+ subscribers decreased by 0.7 million.\n - Content sales/licensing income grew by $536 million, driven by the success of *Moana 2*.\n\n3. **Sports Segment**:\n - Operating income increased by $350 million to $247 million.\n - Domestic ESPN advertising revenue grew by 15%.\n\n4. **Experiences Segment**:\n - Operating income remained at $3.1 billion, with a 6 percentage-point adverse impact due to Hurricanes Milton and Helene and pre-opening expenses for the Disney Treasure.\n - Domestic Parks & Experiences income declined by 5%, while International Parks & Experiences income increased by 28%."
}
],
"node_id": "0001",
"summary": "The partial document is a report from The Walt Disney Company detailing its financial performance for the first fiscal quarter of 2025, ending December 28, 2024. Key points include:\n\n1. **Financial Performance**:\n - Revenue increased by 5% to $24.7 billion.\n - Income before taxes rose by 27% to $3.7 billion.\n - Diluted EPS grew by 35% to $1.40.\n - Total segment operating income increased by 31% to $5.1 billion, with adjusted EPS up 44% to $1.76.\n\n2. **Segment Highlights**:\n - **Entertainment**: Operating income increased by $0.8 billion to $1.7 billion. Direct-to-Consumer income rose by $431 million, though advertising revenue declined 2% (up 16% excluding Disney+ Hotstar in India). Disney+ and Hulu subscriptions increased slightly, while Disney+ subscribers decreased by 0.7 million. Content sales/licensing income grew, driven by the success of *Moana 2*.\n - **Sports**: Operating income increased by $350 million to $247 million, with ESPN domestic advertising revenue up 15%.\n - **Experiences**: Operating income remained at $3.1 billion, with adverse impacts from hurricanes and pre-opening expenses for the Disney Treasure. Domestic Parks & Experiences income declined by 5%, while International Parks & Experiences income rose by 28%.\n\n3. **Additional Notes**:\n - Non-GAAP financial measures are used for certain metrics.\n - Disney+ Hotstar in India saw a significant decline in advertising revenue compared to the previous year."
},
{
"title": "Guidance and Outlook",
"start_index": 2,
"end_index": 2,
"nodes": [
{
"title": "Star India deconsolidated in Q1",
"start_index": 2,
"end_index": 2,
"node_id": "0004",
"summary": "The partial document outlines Disney's financial guidance and outlook for fiscal 2025, including the deconsolidation of Star India and its impact on operating income for the Entertainment and Sports segments. It highlights expectations for Q2 fiscal 2025, such as a modest decline in Disney+ subscribers, adverse impacts on Sports segment income, and pre-opening expenses for Disney Cruise Line. For fiscal 2025, the company projects high-single-digit adjusted EPS growth, $15 billion in cash from operations, and segment operating income growth across Entertainment, Sports, and Experiences. The CEO emphasizes strong Q1 results, including box office success, improved profitability in streaming, advancements in ESPN\u2019s digital strategy, and continued investments in the Experiences segment, expressing confidence in Disney's growth strategy."
},
{
"title": "Q2 Fiscal 2025",
"start_index": 2,
"end_index": 2,
"node_id": "0005",
"summary": "The partial document outlines Disney's financial guidance and outlook for fiscal 2025, including the deconsolidation of Star India and its impact on operating income for the Entertainment and Sports segments. It highlights expectations for Q2 fiscal 2025, such as a modest decline in Disney+ subscribers, adverse impacts on Sports segment income, and pre-opening expenses for Disney Cruise Line. For the full fiscal year 2025, it projects high-single-digit adjusted EPS growth, $15 billion in cash from operations, and segment operating income growth across Entertainment, Sports, and Experiences. The CEO emphasizes Disney's strong start to the fiscal year, citing achievements in box office performance, improved streaming profitability, ESPN's digital strategy, and the enduring appeal of the Experiences segment."
},
{
"title": "Fiscal Year 2025",
"start_index": 2,
"end_index": 2,
"node_id": "0006",
"summary": "The partial document outlines Disney's financial guidance and outlook for fiscal 2025, including the deconsolidation of Star India and its impact on operating income for the Entertainment and Sports segments. It highlights expectations for Q2 fiscal 2025, such as a modest decline in Disney+ subscribers, adverse impacts on Sports segment income, and pre-opening expenses for Disney Cruise Line. For the full fiscal year 2025, it projects high-single-digit adjusted EPS growth, $15 billion in cash from operations, and segment operating income growth across Entertainment, Sports, and Experiences. The CEO emphasizes Disney's creative and financial strength, strong box office performance, improved streaming profitability, advancements in ESPN's digital strategy, and continued global investments in the Experiences segment."
}
],
"node_id": "0003",
"summary": "The partial document outlines Disney's financial guidance and outlook for fiscal 2025, including the deconsolidation of Star India and its impact on operating income for the Entertainment and Sports segments. It highlights expectations for Q2 fiscal 2025, such as a modest decline in Disney+ subscribers, adverse impacts on Sports segment income, and pre-opening expenses for Disney Cruise Line. For the full fiscal year 2025, it projects high-single-digit adjusted EPS growth, $15 billion in cash from operations, and segment operating income growth across Entertainment, Sports, and Experiences. The CEO emphasizes strong Q1 results, including box office success, improved profitability in streaming, advancements in ESPN\u2019s digital strategy, and continued investment in global experiences."
},
{
"title": "Message From Our CEO",
"start_index": 2,
"end_index": 2,
"node_id": "0007",
"summary": "The partial document outlines Disney's financial guidance and outlook for fiscal 2025, including the deconsolidation of Star India and its impact on operating income for the Entertainment and Sports segments. It highlights expectations for Q2 fiscal 2025, such as a modest decline in Disney+ subscribers, adverse impacts on Sports segment income, and pre-opening expenses for Disney Cruise Line. For the full fiscal year 2025, it projects high-single-digit adjusted EPS growth, $15 billion in cash from operations, and segment operating income growth across Entertainment, Sports, and Experiences. The CEO emphasizes strong Q1 results, including box office success, improved profitability in streaming, advancements in ESPN\u2019s digital strategy, and continued investment in global experiences."
}
],
"node_id": "0000",
"summary": "The partial document is a report from The Walt Disney Company detailing its financial performance for the first fiscal quarter of 2025, ending December 28, 2024. Key points include:\n\n1. **Financial Results**: \n - Revenue increased by 5% to $24.7 billion. \n - Income before taxes rose by 27% to $3.7 billion. \n - Diluted EPS grew by 35% to $1.40. \n - Total segment operating income increased by 31% to $5.1 billion, and adjusted EPS rose by 44% to $1.76. \n\n2. **Entertainment Segment**: \n - Operating income increased by $0.8 billion to $1.7 billion. \n - Direct-to-Consumer operating income rose by $431 million to $293 million, with advertising revenue up 16% (excluding Disney+ Hotstar in India). \n - Disney+ and Hulu subscriptions increased by 0.9 million, while Disney+ subscribers decreased by 0.7 million. \n - Content sales/licensing income grew by $536 million, driven by the success of *Moana 2*. \n\n3. **Sports Segment**: \n - Operating income increased by $350 million to $247 million. \n - Domestic ESPN advertising revenue grew by 15%. \n\n4. **Experiences Segment**: \n - Operating income remained at $3.1 billion, with a 6 percentage-point adverse impact due to Hurricanes Milton and Helene and pre-opening expenses for the Disney Treasure. \n - Domestic Parks & Experiences income declined by 5%, while International Parks & Experiences income increased by 28%. \n\nThe report also includes non-GAAP financial measures and notes the impact of Disney+ Hotstar's advertising revenue in India."
},
{
"title": "SUMMARIZED FINANCIAL RESULTS",
"start_index": 3,
"end_index": 3,
"nodes": [
{
"title": "SUMMARIZED SEGMENT FINANCIAL RESULTS",
"start_index": 3,
"end_index": 3,
"node_id": "0009",
"summary": "The partial document provides a summarized overview of financial results for the first quarter of fiscal years 2025 and 2024. Key points include:\n\n1. **Overall Financial Performance**:\n - Revenues increased by 5% from $23,549 million in 2024 to $24,690 million in 2025.\n - Income before income taxes rose by 27%.\n - Total segment operating income grew by 31%.\n - Diluted EPS increased by 35%, and diluted EPS excluding certain items rose by 44%.\n - Cash provided by operations increased by 47%, while free cash flow decreased by 17%.\n\n2. **Segment Financial Results**:\n - Revenue growth was observed in the Entertainment segment (9%) and Experiences segment (3%), while Sports revenue remained flat.\n - Segment operating income for Entertainment increased significantly by 95%, while Sports shifted from a loss to a positive income. Experiences segment operating income remained stable.\n\n3. **Non-GAAP Measures**:\n - The document highlights the use of non-GAAP financial measures such as total segment operating income, diluted EPS excluding certain items, and free cash flow, with references to further details and reconciliations provided elsewhere in the report."
}
],
"node_id": "0008",
"summary": "The partial document provides a summarized overview of financial results for the first quarter of fiscal years 2025 and 2024. Key points include:\n\n1. **Overall Financial Performance**:\n - Revenues increased by 5% from $23,549 million in 2024 to $24,690 million in 2025.\n - Income before income taxes rose by 27%.\n - Total segment operating income grew by 31%.\n - Diluted EPS increased by 35%, and diluted EPS excluding certain items rose by 44%.\n - Cash provided by operations increased by 47%, while free cash flow decreased by 17%.\n\n2. **Segment Financial Results**:\n - Revenue growth was observed in the Entertainment segment (9%) and Experiences segment (3%), while Sports revenue remained flat.\n - Segment operating income for Entertainment increased significantly by 95%, while Sports shifted from a loss to a positive income. Experiences segment operating income remained stable.\n\n3. **Non-GAAP Measures**:\n - The document highlights the use of non-GAAP financial measures such as total segment operating income, diluted EPS excluding certain items, and free cash flow, with references to further details and reconciliations provided in later sections."
},
{
"title": "DISCUSSION OF FIRST QUARTER SEGMENT RESULTS",
"start_index": 4,
"end_index": 4,
"nodes": [
{
"title": "Star India",
"start_index": 4,
"end_index": 4,
"node_id": "0011",
"summary": "The partial document discusses the first-quarter segment results, focusing on the Star India joint venture formed between the Company and Reliance Industries Limited (RIL) on November 14, 2024. The joint venture combines Star-branded entertainment and sports television channels, Disney+ Hotstar, and certain RIL-controlled media businesses, with RIL holding a 56% controlling interest, the Company holding 37%, and a third-party investment company holding 7%. The Company now recognizes its 37% share of the joint venture\u2019s results under \"Equity in the income of investees.\" Additionally, the document provides financial results for the Entertainment segment, showing a 9% increase in total revenues and a 95% increase in operating income compared to the prior-year quarter. The growth in operating income is attributed to improved results in Content Sales/Licensing and Direct-to-Consumer, partially offset by a decline in Linear Networks."
},
{
"title": "Entertainment",
"start_index": 4,
"end_index": 4,
"nodes": [
{
"title": "Linear Networks",
"start_index": 5,
"end_index": 5,
"node_id": "0013",
"summary": "The partial document provides financial performance details for Linear Networks and Direct-to-Consumer segments for the quarters ending December 28, 2024, and December 30, 2023. Key points include:\n\n1. **Linear Networks**:\n - Revenue decreased by 7%, with domestic revenue remaining flat and international revenue declining by 31%.\n - Operating income decreased by 11%, with domestic income stable and international income dropping by 39%.\n - Domestic operating income was impacted by higher programming costs (due to the 2023 guild strikes), lower affiliate revenue (fewer subscribers), lower technology costs, and higher advertising revenue (driven by political advertising but offset by lower viewership).\n - International operating income decline was attributed to the Star India Transaction.\n - Equity income from investees decreased due to lower income from A+E Television Networks, reduced advertising and affiliate revenue, and the absence of a prior-year gain from an investment sale.\n\n2. **Direct-to-Consumer**:\n - Revenue increased by 9%, driven by higher subscription revenue due to increased pricing and more subscribers, partially offset by unfavorable foreign exchange impacts.\n - Operating income improved significantly, moving from a loss in the prior year to a profit, reflecting subscription revenue growth."
},
{
"title": "Direct-to-Consumer",
"start_index": 5,
"end_index": 7,
"node_id": "0014",
"summary": "The partial document provides a financial performance overview of various segments for the quarter ended December 28, 2024, compared to the prior-year quarter. Key points include:\n\n1. **Linear Networks**:\n - Revenue decreased by 7%, with domestic revenue flat and international revenue down 31%.\n - Operating income decreased by 11%, with domestic income flat and international income down 39%, primarily due to the Star India transaction.\n - Equity income from investees declined by 29%, driven by lower income from A+E Television Networks and the absence of a prior-year gain on an investment sale.\n\n2. **Direct-to-Consumer (DTC)**:\n - Revenue increased by 9%, and operating income improved significantly from a loss of $138 million to a profit of $293 million.\n - Growth was driven by higher subscription revenue due to pricing increases and more subscribers, partially offset by higher costs and lower advertising revenue.\n - Key metrics showed slight changes in Disney+ and Hulu subscriber numbers, with increases in average monthly revenue per paid subscriber due to pricing adjustments.\n\n3. **Content Sales/Licensing and Other**:\n - Revenue increased by 34%, and operating income improved significantly, driven by strong theatrical performance, particularly from \"Moana 2,\" and contributions from \"Mufasa: The Lion King.\"\n\n4. **Sports**:\n - ESPN revenue grew by 8%, with domestic and international segments showing increases, while Star India revenue dropped by 90%.\n - Operating income for ESPN improved by 15%, while Star India shifted from a loss to a small profit.\n\nThe document highlights revenue trends, operating income changes, and key drivers for each segment, including programming costs, subscriber growth, pricing adjustments, and content performance."
},
{
"title": "Content Sales/Licensing and Other",
"start_index": 7,
"end_index": 7,
"node_id": "0015",
"summary": "The partial document discusses the financial performance of Disney's streaming services, content sales, and sports segment. Key points include:\n\n1. **Disney+ Revenue**: Domestic and international Disney+ average monthly revenue per paid subscriber increased due to pricing hikes, partially offset by promotional offerings. International revenue also benefited from higher advertising revenue.\n\n2. **Hulu Revenue**: Hulu SVOD Only revenue remained stable, with pricing increases offsetting lower advertising revenue. Hulu Live TV + SVOD revenue increased due to pricing hikes.\n\n3. **Content Sales/Licensing**: Revenue and operating income improved significantly, driven by strong theatrical distribution results, particularly from \"Moana 2,\" and contributions from \"Mufasa: The Lion King.\"\n\n4. **Sports Revenue**: ESPN domestic and international revenues grew, while Star India revenue declined sharply. Operating income for ESPN improved, with domestic income slightly down and international losses reduced. Star India showed a notable recovery in operating income."
}
],
"node_id": "0012",
"summary": "The partial document discusses the first-quarter segment results, focusing on the Star India joint venture formed between the Company and Reliance Industries Limited (RIL) on November 14, 2024. The joint venture combines Star-branded entertainment and sports television channels and the Disney+ Hotstar service in India, with RIL holding a 56% controlling interest, the Company holding 37%, and a third-party investment company holding 7%. The Company now recognizes its 37% share of the joint venture\u2019s results under \u201cEquity in the income of investees.\u201d Additionally, the document provides financial results for the Entertainment segment, showing a 9% increase in total revenues compared to the prior year, driven by growth in Direct-to-Consumer and Content Sales/Licensing and Other, despite a decline in Linear Networks. Operating income increased by 95%, primarily due to improved results in Content Sales/Licensing and Other and Direct-to-Consumer, partially offset by a decrease in Linear Networks."
},
{
"title": "Sports",
"start_index": 7,
"end_index": 7,
"nodes": [
{
"title": "Domestic ESPN",
"start_index": 8,
"end_index": 8,
"node_id": "0017",
"summary": "The partial document discusses the financial performance of ESPN, including domestic and international operations, as well as Star India, for the current quarter compared to the prior-year quarter. Key points include:\n\n1. **Domestic ESPN**: \n - Decrease in operating results due to higher programming and production costs, primarily from expanded college football programming rights and changes in the College Football Playoff (CFP) format.\n - Increase in advertising revenue due to higher rates.\n - Revenue from sub-licensing CFP programming rights.\n - Affiliate revenue remained comparable, with rate increases offset by fewer subscribers.\n\n2. **International ESPN**: \n - Decrease in operating loss driven by higher fees from the Entertainment segment for Disney+ sports content.\n - Increased programming and production costs due to higher soccer rights costs.\n - Lower affiliate revenue due to fewer subscribers.\n\n3. **Star India**: \n - Improved operating results due to the absence of significant cricket events in the current quarter compared to the prior-year quarter, which included the ICC Cricket World Cup.\n\n4. **Key Metrics for ESPN+**:\n - Paid subscribers decreased from 25.6 million to 24.9 million.\n - Average monthly revenue per paid subscriber increased from $5.94 to $6.36, driven by pricing increases and higher advertising revenue."
},
{
"title": "International ESPN",
"start_index": 8,
"end_index": 8,
"node_id": "0018",
"summary": "The partial document discusses the financial performance of ESPN, including domestic and international operations, as well as Star India, for the current quarter compared to the prior-year quarter. Key points include:\n\n1. **Domestic ESPN**: \n - Decrease in operating results due to higher programming and production costs, primarily from expanded college football programming rights and changes in the College Football Playoff (CFP) format.\n - Increase in advertising revenue due to higher rates.\n - Revenue from sub-licensing CFP programming rights.\n - Affiliate revenue remained comparable, with rate increases offset by fewer subscribers.\n\n2. **International ESPN**: \n - Decrease in operating loss driven by higher fees from the Entertainment segment for Disney+ sports content.\n - Increased programming and production costs due to higher soccer rights costs.\n - Lower affiliate revenue due to fewer subscribers.\n\n3. **Star India**: \n - Improved operating results due to the absence of significant cricket events in the current quarter compared to the ICC Cricket World Cup in the prior-year quarter.\n\n4. **Key Metrics for ESPN+**:\n - Paid subscribers decreased from 25.6 million to 24.9 million.\n - Average monthly revenue per paid subscriber increased from $5.94 to $6.36, driven by pricing increases and higher advertising revenue."
},
{
"title": "Star India",
"start_index": 8,
"end_index": 8,
"node_id": "0019",
"summary": "The partial document discusses the financial performance of ESPN, including domestic and international operations, as well as Star India, for a specific quarter. Key points include:\n\n1. **Domestic ESPN**: \n - Decrease in operating results due to higher programming and production costs, primarily from expanded college football programming rights, including additional College Football Playoff (CFP) games under a revised format.\n - Increase in advertising revenue due to higher rates.\n - Revenue from sub-licensing CFP programming rights.\n - Affiliate revenue remained comparable to the prior year due to effective rate increases offset by fewer subscribers.\n\n2. **International ESPN**: \n - Decrease in operating loss driven by higher fees from the Entertainment segment for sports content on Disney+.\n - Increased programming and production costs due to higher soccer rights costs.\n - Lower affiliate revenue due to fewer subscribers.\n\n3. **Star India**: \n - Improvement in operating results due to the absence of significant cricket events in the current quarter compared to the prior year, which included the ICC Cricket World Cup.\n\n4. **Key Metrics for ESPN+**:\n - Paid subscribers decreased from 25.6 million to 24.9 million.\n - Average monthly revenue per paid subscriber increased from $5.94 to $6.36, driven by pricing increases and higher advertising revenue."
}
],
"node_id": "0016",
"summary": "The partial document discusses the financial performance of Disney's streaming services, content sales, and sports segment. Key points include:\n\n1. **Disney+ Revenue**: Domestic and international Disney+ average monthly revenue per paid subscriber increased due to pricing hikes, partially offset by promotional offerings. International revenue also benefited from higher advertising revenue.\n\n2. **Hulu Revenue**: Hulu SVOD Only revenue remained stable, with pricing increases offsetting lower advertising revenue. Hulu Live TV + SVOD revenue increased due to pricing hikes.\n\n3. **Content Sales/Licensing**: Revenue and operating income improved significantly, driven by strong theatrical performance, particularly from \"Moana 2,\" and contributions from \"Mufasa: The Lion King.\"\n\n4. **Sports Revenue**: ESPN domestic and international revenues grew, while Star India revenue declined sharply. Operating income for ESPN improved, with domestic income slightly down and international income showing significant recovery. Star India showed a notable turnaround in operating income."
},
{
"title": "Experiences",
"start_index": 9,
"end_index": 9,
"node_id": "0020",
"summary": "The partial document provides financial performance details for the Parks & Experiences segment, including revenues and operating income for domestic and international operations, as well as consumer products. It highlights a 3% increase in total revenue and stable operating income compared to the prior year. Domestic parks and experiences were negatively impacted by hurricanes, leading to lower volumes and higher costs, despite increased guest spending. International parks and experiences saw growth in operating income due to higher guest spending, increased attendance, and new offerings. The document also notes increased corporate expenses due to a legal settlement and a $143 million loss related to the Star India Transaction."
}
],
"node_id": "0010",
"summary": "The partial document discusses the first-quarter segment results, focusing on the Star India joint venture formed between the Company and Reliance Industries Limited (RIL) on November 14, 2024. The joint venture combines Star-branded entertainment and sports television channels, Disney+ Hotstar, and certain RIL-controlled media businesses, with RIL holding a 56% controlling interest, the Company holding 37%, and a third-party investment company holding 7%. The Company now recognizes its 37% share of the joint venture\u2019s results under \"Equity in the income of investees.\" Additionally, the document provides financial results for the Entertainment segment, showing a 9% increase in total revenues and a 95% increase in operating income compared to the prior-year quarter. The growth in operating income is attributed to improved results in Content Sales/Licensing and Direct-to-Consumer, partially offset by a decline in Linear Networks."
},
{
"title": "OTHER FINANCIAL INFORMATION",
"start_index": 9,
"end_index": 9,
"nodes": [
{
"title": "Corporate and Unallocated Shared Expenses",
"start_index": 9,
"end_index": 9,
"node_id": "0022",
"summary": "The partial document provides a financial overview of revenues and operating income for Parks & Experiences, including Domestic, International, and Consumer Products segments, comparing the quarters ending December 28, 2024, and December 30, 2023. It highlights a 3% increase in overall revenue and stable operating income. Domestic Parks and Experiences were negatively impacted by Hurricanes Milton and Helene, leading to closures, cancellations, higher costs, and lower attendance, despite increased guest spending. International Parks and Experiences saw growth in operating income due to higher guest spending, increased attendance, and new offerings, offset by higher costs. The document also notes a $152 million increase in corporate and unallocated shared expenses due to a legal settlement and a $143 million loss related to the Star India Transaction."
},
{
"title": "Restructuring and Impairment Charges",
"start_index": 9,
"end_index": 9,
"node_id": "0023",
"summary": "The partial document provides financial performance details for the Parks & Experiences segment, including revenues and operating income for domestic and international operations, as well as consumer products. It highlights a 3% increase in overall revenue and stable operating income compared to the prior year. Domestic parks and experiences were negatively impacted by hurricanes, leading to lower volumes and higher costs, despite increased guest spending. International parks and experiences saw growth in operating income due to higher guest spending, increased attendance, and new offerings, though costs also rose. Additionally, corporate and unallocated shared expenses increased due to a legal settlement, and a $143 million loss was recorded related to the Star India Transaction."
},
{
"title": "Interest Expense, net",
"start_index": 10,
"end_index": 10,
"node_id": "0024",
"summary": "The partial document provides a financial analysis of interest expense, net, equity in the income of investees, and income taxes for the quarters ending December 28, 2024, and December 30, 2023. Key points include:\n\n1. **Interest Expense, Net**: A decrease in interest expense due to lower average rates and debt balances, partially offset by reduced capitalized interest. Interest income and investment income declined due to lower cash balances, pension-related costs, and investment losses compared to prior-year gains.\n\n2. **Equity in the Income of Investees**: A $89 million decrease in income from investees, primarily due to lower income from A+E and losses from the India joint venture.\n\n3. **Income Taxes**: An increase in the effective income tax rate from 25.1% to 27.8%, driven by a non-cash tax charge related to the Star India Transaction, partially offset by favorable adjustments related to prior years, lower foreign tax rates, and a comparison to unfavorable prior-year effects of employee share-based awards."
},
{
"title": "Equity in the Income of Investees",
"start_index": 10,
"end_index": 10,
"node_id": "0025",
"summary": "The partial document provides a financial analysis of interest expense, net, equity in the income of investees, and income taxes for the quarters ended December 28, 2024, and December 30, 2023. It highlights a decrease in net interest expense due to lower average rates and debt balances, offset by reduced capitalized interest. Interest income and investment income declined due to lower cash balances, pension-related costs, and investment losses. Equity income from investees decreased significantly, driven by lower income from A+E and losses from the India joint venture. The effective income tax rate increased due to a non-cash tax charge related to the Star India Transaction, partially offset by favorable adjustments related to prior years, lower foreign tax rates, and a comparison to unfavorable prior-year effects."
},
{
"title": "Income Taxes",
"start_index": 10,
"end_index": 10,
"node_id": "0026",
"summary": "The partial document provides a financial analysis of interest expense, net, equity in the income of investees, and income taxes for the quarters ended December 28, 2024, and December 30, 2023. It highlights a decrease in net interest expense due to lower average rates and debt balances, offset by reduced capitalized interest. Interest income and investment income declined due to lower cash balances, pension-related costs, and investment losses. Equity income from investees dropped significantly, driven by lower income from A+E and losses from the India joint venture. The effective income tax rate increased due to a non-cash tax charge related to the Star India Transaction, partially offset by favorable adjustments related to prior years, lower foreign tax rates, and a comparison to unfavorable prior-year effects."
},
{
"title": "Noncontrolling Interests",
"start_index": 11,
"end_index": 11,
"node_id": "0027",
"summary": "The partial document covers two main points:\n\n1. **Noncontrolling Interests**: It discusses the net income attributable to noncontrolling interests, which decreased by 63% compared to the prior-year quarter. The decrease is attributed to the prior-year accretion of NBC Universal\u2019s interest in Hulu. The calculation of net income attributable to noncontrolling interests is based on income after royalties, management fees, financing costs, and income taxes.\n\n2. **Cash from Operations**: It details cash provided by operations and free cash flow, showing an increase in cash provided by operations by $1.0 billion to $3.2 billion in the current quarter. The increase is driven by lower tax payments, higher operating income at Entertainment, and higher film and television production spending, along with the timing of payments for sports rights. Free cash flow decreased by $147 million compared to the prior-year quarter."
},
{
"title": "Cash from Operations",
"start_index": 11,
"end_index": 11,
"node_id": "0028",
"summary": "The partial document covers two main points:\n\n1. **Noncontrolling Interests**: It discusses the net income attributable to noncontrolling interests, which decreased by 63% in the quarter ended December 28, 2024, compared to the prior-year quarter. The decrease is attributed to the prior-year accretion of NBC Universal\u2019s interest in Hulu. The calculation of net income attributable to noncontrolling interests includes royalties, management fees, financing costs, and income taxes.\n\n2. **Cash from Operations**: It details cash provided by operations and free cash flow for the quarter ended December 28, 2024, compared to the prior-year quarter. Cash provided by operations increased by $1.0 billion, driven by lower tax payments, higher operating income at Entertainment, and higher film and television production spending, along with the timing of payments for sports rights. Free cash flow decreased by $147 million due to increased investments in parks, resorts, and other property."
},
{
"title": "Capital Expenditures",
"start_index": 12,
"end_index": 12,
"node_id": "0029",
"summary": "The partial document provides details on capital expenditures and depreciation expenses for parks, resorts, and other properties. It highlights an increase in capital expenditures from $1.3 billion to $2.5 billion, primarily due to higher spending on cruise ship fleet expansion in the Experiences segment. The document also breaks down investments and depreciation expenses by category (Entertainment, Sports, Domestic and International Experiences, and Corporate) for the quarters ending December 28, 2024, and December 30, 2023. Depreciation expenses increased from $823 million to $909 million, with detailed figures provided for each segment."
},
{
"title": "Depreciation Expense",
"start_index": 12,
"end_index": 12,
"node_id": "0030",
"summary": "The partial document provides details on capital expenditures and depreciation expenses for parks, resorts, and other properties. It highlights an increase in capital expenditures from $1.3 billion to $2.5 billion, primarily due to higher spending on cruise ship fleet expansion in the Experiences segment. The breakdown of investments and depreciation expenses is provided for Entertainment, Sports, Domestic and International Experiences, and Corporate segments for the quarters ending December 28, 2024, and December 30, 2023. Depreciation expenses also increased from $823 million to $909 million, with detailed segment-wise allocations."
}
],
"node_id": "0021",
"summary": "The partial document provides a financial overview of revenues and operating income for Parks & Experiences, including Domestic, International, and Consumer Products segments, comparing the quarters ending December 28, 2024, and December 30, 2023. It highlights a 3% increase in total revenue and stable operating income. Domestic Parks and Experiences were negatively impacted by Hurricanes Milton and Helene, leading to closures, cancellations, higher costs, and lower attendance, despite increased guest spending. International Parks and Experiences saw growth in operating income due to higher guest spending, increased attendance, and new offerings, offset by increased costs. The document also notes a rise in corporate and unallocated shared expenses due to a legal settlement and a $143 million loss related to the Star India Transaction."
},
{
"title": "THE WALT DISNEY COMPANY CONDENSED CONSOLIDATED STATEMENTS OF INCOME",
"start_index": 13,
"end_index": 13,
"node_id": "0031",
"summary": "The partial document provides a condensed consolidated statement of income for The Walt Disney Company for the quarters ended December 28, 2024, and December 30, 2023. It includes details on revenues, costs and expenses, restructuring and impairment charges, net interest expense, equity in the income of investees, income before income taxes, income taxes, and net income. It also breaks down net income attributable to noncontrolling interests and The Walt Disney Company. Additionally, it provides earnings per share (diluted and basic) and the weighted average number of shares outstanding (diluted and basic) for both periods."
},
{
"title": "THE WALT DISNEY COMPANY CONDENSED CONSOLIDATED BALANCE SHEETS",
"start_index": 14,
"end_index": 14,
"node_id": "0032",
"summary": "The partial document is a condensed consolidated balance sheet for The Walt Disney Company, comparing financial data as of December 28, 2024, and September 28, 2024. It details the company's assets, liabilities, and equity. Key points include:\n\n1. **Assets**: Breakdown of current assets (cash, receivables, inventories, content advances, and other assets), produced and licensed content costs, investments, property (attractions, buildings, equipment, projects in progress, and land), intangible assets, goodwill, and other assets. Total assets increased slightly from $196.2 billion to $197 billion.\n\n2. **Liabilities**: Includes current liabilities (accounts payable, borrowings, deferred revenue), long-term borrowings, deferred income taxes, and other long-term liabilities. Total liabilities remained relatively stable.\n\n3. **Equity**: Details Disney shareholders' equity, including common stock, retained earnings, accumulated other comprehensive loss, and treasury stock. Noncontrolling interests are also included. Total equity increased from $105.5 billion to $106.7 billion.\n\n4. **Overall Financial Position**: The balance sheet reflects a stable financial position with slight changes in assets, liabilities, and equity over the period."
},
{
"title": "THE WALT DISNEY COMPANY CONDENSED CONSOLIDATED STATEMENTS OF CASH FLOWS",
"start_index": 15,
"end_index": 15,
"node_id": "0033",
"summary": "The partial document provides a condensed consolidated statement of cash flows for The Walt Disney Company for the quarters ended December 28, 2024, and December 30, 2023. It details cash flow activities categorized into operating, investing, and financing activities. Key points include:\n\n1. **Operating Activities**: Net income increased from $2,151 million in 2023 to $2,644 million in 2024. Other significant changes include variations in depreciation, deferred taxes, equity income, content costs, and changes in operating assets and liabilities, resulting in cash provided by operations of $3,205 million in 2024 compared to $2,185 million in 2023.\n\n2. **Investing Activities**: Investments in parks, resorts, and other properties increased significantly in 2024 ($2,466 million) compared to 2023 ($1,299 million), leading to higher cash used in investing activities.\n\n3. **Financing Activities**: The company saw a net cash outflow in financing activities, including commercial paper borrowings, stock repurchases, and debt reduction. In 2024, cash used in financing activities was $997 million, a significant improvement from $8,006 million in 2023.\n\n4. **Exchange Rate Impact**: Exchange rates negatively impacted cash in 2024 by $153 million, compared to a positive impact of $79 million in 2023.\n\n5. **Overall Cash Position**: The company\u2019s cash, cash equivalents, and restricted cash decreased from $14,235 million at the beginning of the 2023 period to $5,582 million at the end of the 2024 period."
},
{
"title": "DTC PRODUCT DESCRIPTIONS AND KEY DEFINITIONS",
"start_index": 16,
"end_index": 16,
"node_id": "0034",
"summary": "The partial document provides an overview of Disney's Direct-to-Consumer (DTC) product offerings, key definitions, and metrics. It details the availability of Disney+, ESPN+, and Hulu as standalone services or bundled offerings in the U.S., including Hulu Live TV + SVOD, which incorporates Disney+ and ESPN+. It explains the global reach of Disney+ in over 150 countries and the various purchase channels, including websites, third-party platforms, and wholesale arrangements. The document defines \"paid subscribers\" as those generating subscription revenue, excluding extra member add-ons, and outlines how subscribers are counted for multi-product offerings. It also describes the calculation of average monthly revenue per paid subscriber for Hulu, ESPN+, and Disney+, including revenue components like subscription fees, advertising, and add-ons, while noting differences in revenue allocation and the impact of wholesale arrangements on average revenue."
},
{
"title": "NON-GAAP FINANCIAL MEASURES",
"start_index": 17,
"end_index": 17,
"nodes": [
{
"title": "Diluted EPS excluding certain items",
"start_index": 17,
"end_index": 18,
"node_id": "0036",
"summary": "The partial document discusses the use of non-GAAP financial measures, specifically diluted EPS excluding certain items (adjusted EPS), total segment operating income, and free cash flow. It explains that these measures are not defined by GAAP but are important for evaluating the company's performance. The document highlights that these measures should be reviewed alongside comparable GAAP measures and may not be directly comparable to similar measures from other companies. It provides details on the adjustments made to diluted EPS, including the exclusion of certain items affecting comparability and amortization of TFCF and Hulu intangible assets, to better reflect operational performance. The document also includes a reconciliation table comparing reported diluted EPS to adjusted EPS for specific quarters, showing the impact of excluded items such as restructuring charges and intangible asset amortization. Additionally, it notes the challenges in providing forward-looking GAAP measures due to unpredictable factors."
},
{
"title": "Total segment operating income",
"start_index": 19,
"end_index": 20,
"node_id": "0037",
"summary": "The partial document focuses on the evaluation of the company's performance through two key financial metrics: total segment operating income and free cash flow. It explains that total segment operating income is used to assess the performance of operating segments separately from non-operational factors, providing insights into operational results. A reconciliation table is provided, showing the calculation of total segment operating income for two quarters, highlighting changes in various components such as corporate expenses, restructuring charges, and interest expenses. Additionally, the document discusses free cash flow as a measure of cash available for purposes beyond capital expenditures, such as debt servicing, acquisitions, and shareholder returns. A summary of consolidated cash flows and a reconciliation of cash provided by operations to free cash flow are presented, comparing figures for two quarters and highlighting changes in cash flow components."
},
{
"title": "Free cash flow",
"start_index": 20,
"end_index": 20,
"node_id": "0038",
"summary": "The partial document provides a reconciliation of the company's consolidated cash provided by operations to free cash flow for the quarters ended December 28, 2024, and December 30, 2023. It highlights a $1,020 million increase in cash provided by operations, a $1,167 million increase in investments in parks, resorts, and other property, and a $147 million decrease in free cash flow."
}
],
"node_id": "0035",
"summary": "The partial document discusses the use of non-GAAP financial measures by the company, including diluted EPS excluding certain items (adjusted EPS), total segment operating income, and free cash flow. It explains that these measures are not defined by GAAP but are important for evaluating the company's performance. The document emphasizes that these measures should be reviewed alongside comparable GAAP measures and may not be directly comparable to similar measures from other companies. It highlights the company's inability to provide forward-looking GAAP measures or reconciliations due to uncertainties in predicting significant items. Additionally, the document details the rationale for excluding certain items and amortization of TFCF and Hulu intangible assets from diluted EPS to enhance comparability and provide a clearer evaluation of operational performance, particularly given the significant impact of the 2019 TFCF and Hulu acquisition."
},
{
"title": "FORWARD-LOOKING STATEMENTS",
"start_index": 21,
"end_index": 21,
"node_id": "0039",
"summary": "The partial document outlines the inclusion of forward-looking statements in an earnings release, emphasizing that these statements are based on management's views and assumptions about future events and business performance. It highlights that actual results may differ materially due to various factors, including company actions (e.g., restructuring, strategic initiatives, cost rationalization), external developments (e.g., economic conditions, competition, consumer behavior, regulatory changes, technological advancements, labor market activities, and natural disasters), and their potential impacts on operations, profitability, content performance, advertising markets, and taxation. The document also references additional risk factors and analyses detailed in the company's filings with the SEC, such as annual and quarterly reports."
},
{
"title": "PREPARED EARNINGS REMARKS AND CONFERENCE CALL INFORMATION",
"start_index": 22,
"end_index": 22,
"node_id": "0040",
"summary": "The partial document provides information about The Walt Disney Company's prepared management remarks and a conference call scheduled for February 5, 2025, at 8:30 AM EST/5:30 AM PST, accessible via a live webcast on their investor website. It also mentions that a replay of the webcast will be available on the site. Additionally, contact details for Corporate Communications (David Jefferson) and Investor Relations (Carlos Gomez) are provided."
}
]
}

107
utils.py
View file

@ -10,15 +10,19 @@ import copy
import asyncio
import pymupdf
from io import BytesIO
from dotenv import load_dotenv
load_dotenv()
import logging
CHATGPT_API_KEY = os.getenv("CHATGPT_API_KEY")
def count_tokens(text, model):
enc = tiktoken.encoding_for_model(model)
tokens = enc.encode(text)
return len(tokens)
def ChatGPT_API_with_finish_reason(model, prompt, api_key, chat_history=None):
def ChatGPT_API_with_finish_reason(model, prompt, api_key=CHATGPT_API_KEY, chat_history=None):
max_retries = 10
client = openai.OpenAI(api_key=api_key)
for i in range(max_retries):
@ -50,7 +54,7 @@ def ChatGPT_API_with_finish_reason(model, prompt, api_key, chat_history=None):
def ChatGPT_API(model, prompt, api_key, chat_history=None):
def ChatGPT_API(model, prompt, api_key=CHATGPT_API_KEY, chat_history=None):
max_retries = 10
client = openai.OpenAI(api_key=api_key)
for i in range(max_retries):
@ -78,7 +82,7 @@ def ChatGPT_API(model, prompt, api_key, chat_history=None):
return "Error"
async def ChatGPT_API_async(model, prompt, api_key):
async def ChatGPT_API_async(model, prompt, api_key=CHATGPT_API_KEY):
max_retries = 10
client = openai.AsyncOpenAI(api_key=api_key)
for i in range(max_retries):
@ -151,7 +155,7 @@ def write_node_id(data, node_id=0):
data['node_id'] = str(node_id).zfill(4)
node_id += 1
for key in list(data.keys()):
if 'child_nodes' in key:
if 'nodes' in key:
node_id = write_node_id(data[key], node_id)
elif isinstance(data, list):
for index in range(len(data)):
@ -161,10 +165,10 @@ def write_node_id(data, node_id=0):
def get_nodes(structure):
if isinstance(structure, dict):
structure_node = copy.deepcopy(structure)
structure_node.pop('child_nodes', None)
structure_node.pop('nodes', None)
nodes = [structure_node]
for key in list(structure.keys()):
if 'child_nodes' in key:
if 'nodes' in key:
nodes.extend(get_nodes(structure[key]))
return nodes
elif isinstance(structure, list):
@ -177,8 +181,8 @@ def structure_to_list(structure):
if isinstance(structure, dict):
nodes = []
nodes.append(structure)
if 'child_nodes' in structure:
nodes.extend(structure_to_list(structure['child_nodes']))
if 'nodes' in structure:
nodes.extend(structure_to_list(structure['nodes']))
return nodes
elif isinstance(structure, list):
nodes = []
@ -189,14 +193,14 @@ def structure_to_list(structure):
def get_leaf_nodes(structure):
if isinstance(structure, dict):
if not structure['child_nodes']:
if not structure['nodes']:
structure_node = copy.deepcopy(structure)
structure_node.pop('child_nodes', None)
structure_node.pop('nodes', None)
return [structure_node]
else:
leaf_nodes = []
for key in list(structure.keys()):
if 'child_nodes' in key:
if 'nodes' in key:
leaf_nodes.extend(get_leaf_nodes(structure[key]))
return leaf_nodes
elif isinstance(structure, list):
@ -212,7 +216,7 @@ def is_leaf_node(data, node_id):
if data.get('node_id') == node_id:
return data
for key in data.keys():
if 'child_nodes' in key:
if 'nodes' in key:
result = find_node(data[key], node_id)
if result:
return result
@ -227,7 +231,7 @@ def is_leaf_node(data, node_id):
node = find_node(data, node_id)
# Check if the node is a leaf node
if node and not node.get('child_nodes'):
if node and not node.get('nodes'):
return True
return False
@ -353,7 +357,7 @@ def list_to_tree(data):
'title': item.get('title'),
'start_index': item.get('start_index'),
'end_index': item.get('end_index'),
'child_nodes': []
'nodes': []
}
nodes[structure] = node
@ -364,7 +368,7 @@ def list_to_tree(data):
if parent_structure:
# Add as child to parent if parent exists
if parent_structure in nodes:
nodes[parent_structure]['child_nodes'].append(node)
nodes[parent_structure]['nodes'].append(node)
else:
root_nodes.append(node)
else:
@ -373,10 +377,10 @@ def list_to_tree(data):
# Helper function to clean empty children arrays
def clean_node(node):
if not node['child_nodes']:
del node['child_nodes']
if not node['nodes']:
del node['nodes']
else:
for child in node['child_nodes']:
for child in node['nodes']:
clean_node(child)
return node
@ -424,7 +428,7 @@ def get_page_tokens(pdf_path, model="gpt-4o-2024-11-20", pdf_parser="PyPDF2"):
def get_text_of_pdf_pages(pdf_pages, start_page, end_page):
text = ""
for page_num in range(start_page-1, end_page):
text += pdf_pages[page_num]
text += pdf_pages[page_num][0]
return text
def get_number_of_pages(pdf_path):
@ -460,8 +464,8 @@ def clean_structure_post(data):
data.pop('page_number', None)
data.pop('start_index', None)
data.pop('end_index', None)
if 'child_nodes' in data:
clean_structure_post(data['child_nodes'])
if 'nodes' in data:
clean_structure_post(data['nodes'])
elif isinstance(data, list):
for section in data:
clean_structure_post(section)
@ -471,8 +475,8 @@ def clean_structure_post(data):
def remove_structure_text(data):
if isinstance(data, dict):
data.pop('text', None)
if 'child_nodes' in data:
remove_structure_text(data['child_nodes'])
if 'nodes' in data:
remove_structure_text(data['nodes'])
elif isinstance(data, list):
for item in data:
remove_structure_text(item)
@ -522,3 +526,60 @@ def convert_page_to_int(data):
# Keep original value if conversion fails
pass
return data
def write_node_id(data, node_id=0):
if isinstance(data, dict):
data['node_id'] = str(node_id).zfill(4)
node_id += 1
for key in list(data.keys()):
if 'nodes' in key:
node_id = write_node_id(data[key], node_id)
elif isinstance(data, list):
for index in range(len(data)):
node_id = write_node_id(data[index], node_id)
return node_id
def add_node_text(node, pdf_pages):
if isinstance(node, dict):
start_page = node.get('start_index')
end_page = node.get('end_index')
node['text'] = get_text_of_pdf_pages(pdf_pages, start_page, end_page)
if 'nodes' in node:
add_node_text(node['nodes'], pdf_pages)
elif isinstance(node, list):
for index in range(len(node)):
add_node_text(node[index], pdf_pages)
return
async def generate_node_summary(node, model=None):
prompt = f"""You are given a part of a document, your task is to generate a description of the partial document about what are main points covered in the partial document.
Partial Document Text: {node['text']}
Directly return the description, do not include any other text.
"""
response = await ChatGPT_API_async(model, prompt)
return response
async def generate_summaries_for_structure(structure, model=None):
nodes = structure_to_list(structure)
tasks = [generate_node_summary(node, model=model) for node in nodes]
summaries = await asyncio.gather(*tasks)
for node, summary in zip(nodes, summaries):
node['summary'] = summary
return structure
def generate_doc_description(structure, model=None):
prompt = f"""Your are an expert in generating descriptions for a document.
You are given a structure of a document. Your task is to generate a one-sentence description for the document, which makes it easy to distinguish the document from other documents.
Document Structure: {structure}
Directly return the description, do not include any other text.
"""
response = ChatGPT_API(model, prompt)
return response