fix structure

This commit is contained in:
Ray 2025-08-26 16:14:29 +08:00
parent 802f149dd1
commit 34ed3fbc60
2 changed files with 36 additions and 3 deletions

View file

@ -2,7 +2,7 @@ import asyncio
import json
import re
import tiktoken
from utils import generate_summaries_for_structure
from utils import *
def count_tokens(text, model='gpt-4o'):
enc = tiktoken.encoding_for_model(model)
@ -10,6 +10,19 @@ def count_tokens(text, model='gpt-4o'):
return len(tokens)
async def generate_summaries_for_structure_md(structure, model="gpt-4.1"):
nodes = structure_to_list(structure)
tasks = [generate_node_summary(node, model=model) for node in nodes]
summaries = await asyncio.gather(*tasks)
for node, summary in zip(nodes, summaries):
if not node.get('nodes'):
node['summary'] = summary
else:
node['prefix_summary'] = summary
return structure
def extract_nodes_from_markdown(markdown_content):
header_pattern = r'^(#{1,6})\s+(.+)$'
node_list = []
@ -159,7 +172,7 @@ def clean_tree_for_output(tree_nodes):
return cleaned_nodes
async def md_to_tree(md_path, if_thinning=True, min_token_threshold=None, if_summary=True, model="gpt-4.1"):
async def md_to_tree(md_path, if_thinning=True, min_token_threshold=None, if_summary=True):
with open(md_path, 'r', encoding='utf-8') as f:
markdown_content = f.read()
@ -174,7 +187,9 @@ async def md_to_tree(md_path, if_thinning=True, min_token_threshold=None, if_sum
tree_structure = build_tree_from_nodes(thinned_nodes)
if if_summary:
tree_structure = await generate_summaries_for_structure(tree_structure, model=model)
tree_structure = await generate_summaries_for_structure_md(tree_structure)
tree_structure = format_structure(tree_structure, order = ['title', 'node_id', 'summary', 'prefix_summary', 'text', 'line_num', 'nodes'])
return tree_structure

View file

@ -606,6 +606,24 @@ def generate_doc_description(structure, model=None):
return response
def reorder_dict(data, key_order):
if not key_order:
return data
return {key: data[key] for key in key_order if key in data}
def format_structure(structure, order=None):
if isinstance(structure, dict):
if 'nodes' in structure:
structure['nodes'] = format_structure(structure['nodes'], order)
if not structure.get('nodes'):
structure.pop('nodes', None)
structure = reorder_dict(structure, order)
elif isinstance(structure, list):
structure = [format_structure(item, order) for item in structure]
return structure
class ConfigLoader:
def __init__(self, default_path: str = None):
if default_path is None: