feat:compatible with Pageindex SDK (#238)

* feat:compatible with Pageindex SDK

* corner cases fixed

* fix: mock behavior of old SDK

* fix: close streaming response and warn on empty api_key

- LegacyCloudAPI: close response in `finally` for both _stream_chat_response
  variants so abandoned iterators no longer leak the TCP connection.
- PageIndexClient: emit a warning instead of silently falling back to local
  when api_key is the empty string, surfacing typical env-var-unset misconfig.
- FakeResponse: add close()/closed to match the real requests.Response API.
- Add unit coverage for stream close (both paths) and the empty-api_key warning.
- Add scripts/e2e_legacy_sdk.py to smoke-test the legacy SDK contract end-to-end
  against api.pageindex.ai.

* chore: mark legacy SDK methods with @deprecated and docstring pointers

- Decorate the 12 PageIndexClient cloud-SDK compat methods with
  @typing_extensions.deprecated(..., category=PendingDeprecationWarning):
  - IDE/type-checkers render them with a strikethrough hint
  - runtime warnings stay silent by default (no spam for existing callers),
    surfaceable via `python -W default::PendingDeprecationWarning`
- Add a one-line docstring on each pointing to the Collection-based equivalent.
- Promote typing-extensions to a direct dependency (was transitive via litellm).

---------

Co-authored-by: XinyanZhou <xinyanzhou@XinyanZhoudeMacBook-Pro.local>
Co-authored-by: saccharin98 <xinyanzhou938@gmail.com>
Co-authored-by: mountain <kose2livs@gmail.com>
This commit is contained in:
Xinyan Zhou 2026-05-11 21:06:23 +08:00 committed by GitHub
parent 6d29886892
commit 595895cf28
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 1030 additions and 20 deletions

View file

@ -15,6 +15,7 @@ load_dotenv()
import logging
import yaml
from pathlib import Path
from pprint import pprint
from types import SimpleNamespace as config
# Backward compatibility: support CHATGPT_API_KEY as alias for OPENAI_API_KEY
@ -23,6 +24,22 @@ if not os.getenv("OPENAI_API_KEY") and os.getenv("CHATGPT_API_KEY"):
litellm.drop_params = True
async def call_llm(prompt, api_key, model="gpt-4.1", temperature=0):
"""Call an LLM to generate a response to a prompt.
Kept for compatibility with the pageindex 0.2.x SDK utility API.
"""
import openai
client = openai.AsyncOpenAI(api_key=api_key)
response = await client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}],
temperature=temperature,
)
return response.choices[0].message.content.strip()
def count_tokens(text, model=None):
if not text:
return 0
@ -463,12 +480,14 @@ def clean_structure_post(data):
clean_structure_post(section)
return data
def remove_fields(data, fields=['text']):
def remove_fields(data, fields=['text'], max_len=None):
if isinstance(data, dict):
return {k: remove_fields(v, fields)
return {k: remove_fields(v, fields, max_len)
for k, v in data.items() if k not in fields}
elif isinstance(data, list):
return [remove_fields(item, fields) for item in data]
return [remove_fields(item, fields, max_len) for item in data]
elif isinstance(data, str):
return data[:max_len] + '...' if max_len is not None and len(data) > max_len else data
return data
def print_toc(tree, indent=0):
@ -684,27 +703,72 @@ class ConfigLoader:
merged = {**self._default_dict, **user_dict}
return config(**merged)
def create_node_mapping(tree):
"""Create a flat dict mapping node_id to node for quick lookup."""
def create_node_mapping(tree, include_page_ranges=False, max_page=None):
"""Create a mapping of node_id to node for quick lookup.
The optional page-range arguments are kept for compatibility with the
pageindex 0.2.x SDK utility API.
"""
def get_all_nodes(nodes):
if isinstance(nodes, dict):
return [nodes] + [
child_node
for child in nodes.get('nodes', [])
for child_node in get_all_nodes(child)
]
elif isinstance(nodes, list):
return [
child_node
for item in nodes
for child_node in get_all_nodes(item)
]
return []
all_nodes = get_all_nodes(tree)
if not include_page_ranges:
return {node["node_id"]: node for node in all_nodes if node.get("node_id")}
mapping = {}
def _traverse(nodes):
for node in nodes:
if node.get('node_id'):
mapping[node['node_id']] = node
if node.get('nodes'):
_traverse(node['nodes'])
_traverse(tree)
for i, node in enumerate(all_nodes):
if not node.get("node_id"):
continue
start_page = node.get("page_index", node.get("start_index"))
if node.get("end_index") is not None:
end_page = node.get("end_index")
elif i + 1 < len(all_nodes):
next_node = all_nodes[i + 1]
end_page = next_node.get("page_index", next_node.get("start_index"))
else:
end_page = max_page
mapping[node["node_id"]] = {
"node": node,
"start_index": start_page,
"end_index": end_page,
}
return mapping
def print_tree(tree, indent=0):
def print_tree(tree, exclude_fields=None, indent=None):
if exclude_fields is None:
exclude_fields = ['text', 'page_index']
if isinstance(exclude_fields, int):
indent = exclude_fields
exclude_fields = None
if indent is None and exclude_fields is not None:
cleaned_tree = remove_fields(copy.deepcopy(tree), exclude_fields, max_len=40)
pprint(cleaned_tree, sort_dicts=False, width=100)
return
indent = indent or 0
for node in tree:
summary = node.get('summary') or node.get('prefix_summary', '')
summary_str = f"{summary[:60]}..." if summary else ""
print(' ' * indent + f"[{node.get('node_id', '?')}] {node.get('title', '')}{summary_str}")
if node.get('nodes'):
print_tree(node['nodes'], indent + 1)
print_tree(node['nodes'], exclude_fields=exclude_fields, indent=indent + 1)
def print_wrapped(text, width=100):
for line in text.splitlines():
print(textwrap.fill(line, width=width))