mirror of
https://github.com/VectifyAI/PageIndex.git
synced 2026-05-13 00:32:36 +02:00
* feat:compatible with Pageindex SDK
* corner cases fixed
* fix: mock behavior of old SDK
* fix: close streaming response and warn on empty api_key
- LegacyCloudAPI: close response in `finally` for both _stream_chat_response
variants so abandoned iterators no longer leak the TCP connection.
- PageIndexClient: emit a warning instead of silently falling back to local
when api_key is the empty string, surfacing typical env-var-unset misconfig.
- FakeResponse: add close()/closed to match the real requests.Response API.
- Add unit coverage for stream close (both paths) and the empty-api_key warning.
- Add scripts/e2e_legacy_sdk.py to smoke-test the legacy SDK contract end-to-end
against api.pageindex.ai.
* chore: mark legacy SDK methods with @deprecated and docstring pointers
- Decorate the 12 PageIndexClient cloud-SDK compat methods with
@typing_extensions.deprecated(..., category=PendingDeprecationWarning):
- IDE/type-checkers render them with a strikethrough hint
- runtime warnings stay silent by default (no spam for existing callers),
surfaceable via `python -W default::PendingDeprecationWarning`
- Add a one-line docstring on each pointing to the Collection-based equivalent.
- Promote typing-extensions to a direct dependency (was transitive via litellm).
---------
Co-authored-by: XinyanZhou <xinyanzhou@XinyanZhoudeMacBook-Pro.local>
Co-authored-by: saccharin98 <xinyanzhou938@gmail.com>
Co-authored-by: mountain <kose2livs@gmail.com>
94 lines
3 KiB
Python
94 lines
3 KiB
Python
"""End-to-end smoke test of the legacy SDK compatibility layer against the real cloud API.
|
|
|
|
Run: PAGEINDEX_API_KEY=... uv run python scripts/e2e_legacy_sdk.py
|
|
"""
|
|
from __future__ import annotations
|
|
import os
|
|
import sys
|
|
import time
|
|
from pathlib import Path
|
|
|
|
from dotenv import load_dotenv
|
|
|
|
load_dotenv()
|
|
|
|
from pageindex import PageIndexClient
|
|
|
|
|
|
def log(step: str, detail: str = "") -> None:
|
|
print(f"[e2e] {step}" + (f" — {detail}" if detail else ""), flush=True)
|
|
|
|
|
|
def main() -> int:
|
|
api_key = os.environ.get("PAGEINDEX_API_KEY")
|
|
if not api_key:
|
|
print("PAGEINDEX_API_KEY not set", file=sys.stderr)
|
|
return 1
|
|
|
|
pdf = Path("examples/documents/attention-residuals.pdf")
|
|
if not pdf.exists():
|
|
print(f"Test PDF missing: {pdf}", file=sys.stderr)
|
|
return 1
|
|
|
|
client = PageIndexClient(api_key=api_key)
|
|
log("init", f"cloud mode (key={api_key[:6]}…)")
|
|
|
|
# 1) submit_document (legacy SDK signature — fire-and-forget)
|
|
submit_resp = client.submit_document(file_path=str(pdf))
|
|
doc_id = submit_resp["doc_id"]
|
|
log("submit_document", f"doc_id={doc_id}")
|
|
|
|
try:
|
|
# 2) poll is_retrieval_ready (with hard timeout)
|
|
deadline = time.time() + 600 # 10 min
|
|
while time.time() < deadline:
|
|
if client.is_retrieval_ready(doc_id):
|
|
log("is_retrieval_ready", "True")
|
|
break
|
|
time.sleep(8)
|
|
else:
|
|
log("is_retrieval_ready", "TIMEOUT")
|
|
return 2
|
|
|
|
# 3) get_tree
|
|
tree = client.get_tree(doc_id)
|
|
node_count = len(tree.get("result") or tree.get("tree") or [])
|
|
log("get_tree", f"top-level nodes={node_count}, status={tree.get('status')}")
|
|
|
|
# 4) get_document (metadata)
|
|
meta = client.get_document(doc_id)
|
|
log("get_document", f"name={meta.get('name')!r} pages={meta.get('pageNum')} status={meta.get('status')}")
|
|
|
|
# 5) chat_completions (non-stream)
|
|
chat = client.chat_completions(
|
|
messages=[{"role": "user", "content": "What is this paper about? Answer in one sentence."}],
|
|
doc_id=doc_id,
|
|
)
|
|
answer = (chat.get("choices") or [{}])[0].get("message", {}).get("content", "")
|
|
log("chat_completions", f"answer={answer[:120]!r}")
|
|
|
|
# 6) chat_completions (stream) — full consumption
|
|
log("chat_completions stream", "starting…")
|
|
print("[stream] ", end="", flush=True)
|
|
chunk_count = 0
|
|
for chunk in client.chat_completions(
|
|
messages=[{"role": "user", "content": "List 3 keywords from this paper."}],
|
|
doc_id=doc_id,
|
|
stream=True,
|
|
):
|
|
print(chunk, end="", flush=True)
|
|
chunk_count += 1
|
|
print() # newline after streaming
|
|
log("chat_completions stream", f"chunks received={chunk_count}")
|
|
|
|
finally:
|
|
# 7) delete_document
|
|
del_resp = client.delete_document(doc_id)
|
|
log("delete_document", f"resp={del_resp}")
|
|
|
|
log("done", "all steps OK")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|