add summary

This commit is contained in:
Ray 2025-08-26 15:49:03 +08:00
parent 04bbdae647
commit 802f149dd1

View file

@ -2,8 +2,9 @@ import asyncio
import json import json
import re import re
import tiktoken import tiktoken
from utils import generate_summaries_for_structure
def count_tokens(text, model): def count_tokens(text, model='gpt-4o'):
enc = tiktoken.encoding_for_model(model) enc = tiktoken.encoding_for_model(model)
tokens = enc.encode(text) tokens = enc.encode(text)
return len(tokens) return len(tokens)
@ -27,7 +28,7 @@ def extract_nodes_from_markdown(markdown_content):
return node_list,lines return node_list,lines
def extract_node_text_content(node_list, markdown_lines, model="gpt-4o"): def extract_node_text_content(node_list, markdown_lines):
all_nodes = [] all_nodes = []
for node in node_list: for node in node_list:
processed_node = { processed_node = {
@ -45,7 +46,7 @@ def extract_node_text_content(node_list, markdown_lines, model="gpt-4o"):
end_line = len(markdown_lines) end_line = len(markdown_lines)
node['text'] = '\n'.join(markdown_lines[start_line:end_line]).strip() node['text'] = '\n'.join(markdown_lines[start_line:end_line]).strip()
node['text_token_count'] = count_tokens(node['text'], model) node['text_token_count'] = count_tokens(node['text'])
return all_nodes return all_nodes
@ -157,7 +158,8 @@ def clean_tree_for_output(tree_nodes):
return cleaned_nodes return cleaned_nodes
def md_to_tree(md_path, if_thinning=True, min_token_threshold=None):
async def md_to_tree(md_path, if_thinning=True, min_token_threshold=None, if_summary=True, model="gpt-4.1"):
with open(md_path, 'r', encoding='utf-8') as f: with open(md_path, 'r', encoding='utf-8') as f:
markdown_content = f.read() markdown_content = f.read()
@ -170,6 +172,10 @@ def md_to_tree(md_path, if_thinning=True, min_token_threshold=None):
thinned_nodes = nodes_with_content thinned_nodes = nodes_with_content
tree_structure = build_tree_from_nodes(thinned_nodes) tree_structure = build_tree_from_nodes(thinned_nodes)
if if_summary:
tree_structure = await generate_summaries_for_structure(tree_structure, model=model)
return tree_structure return tree_structure
@ -179,9 +185,8 @@ if __name__ == "__main__":
# Path to the Welcome.md file # Path to the Welcome.md file
md_path = os.path.join(os.path.dirname(__file__), '..', 'docs', 'Welcome.md') md_path = os.path.join(os.path.dirname(__file__), '..', 'docs', 'Welcome.md')
tree_structure = md_to_tree(md_path, if_thinning=True, min_token_threshold=100) tree_structure = asyncio.run(md_to_tree(md_path, if_thinning=True, min_token_threshold=100, if_summary=True))
def print_tree(nodes, indent=0): def print_tree(nodes, indent=0):
for node in nodes: for node in nodes: