mirror of
https://github.com/VectifyAI/PageIndex.git
synced 2026-05-13 16:52:37 +02:00
feat:compatible with Pageindex SDK (#238)
* feat:compatible with Pageindex SDK
* corner cases fixed
* fix: mock behavior of old SDK
* fix: close streaming response and warn on empty api_key
- LegacyCloudAPI: close response in `finally` for both _stream_chat_response
variants so abandoned iterators no longer leak the TCP connection.
- PageIndexClient: emit a warning instead of silently falling back to local
when api_key is the empty string, surfacing typical env-var-unset misconfig.
- FakeResponse: add close()/closed to match the real requests.Response API.
- Add unit coverage for stream close (both paths) and the empty-api_key warning.
- Add scripts/e2e_legacy_sdk.py to smoke-test the legacy SDK contract end-to-end
against api.pageindex.ai.
* chore: mark legacy SDK methods with @deprecated and docstring pointers
- Decorate the 12 PageIndexClient cloud-SDK compat methods with
@typing_extensions.deprecated(..., category=PendingDeprecationWarning):
- IDE/type-checkers render them with a strikethrough hint
- runtime warnings stay silent by default (no spam for existing callers),
surfaceable via `python -W default::PendingDeprecationWarning`
- Add a one-line docstring on each pointing to the Collection-based equivalent.
- Promote typing-extensions to a direct dependency (was transitive via litellm).
---------
Co-authored-by: XinyanZhou <xinyanzhou@XinyanZhoudeMacBook-Pro.local>
Co-authored-by: saccharin98 <xinyanzhou938@gmail.com>
Co-authored-by: mountain <kose2livs@gmail.com>
This commit is contained in:
parent
6d29886892
commit
595895cf28
10 changed files with 1030 additions and 20 deletions
94
scripts/e2e_legacy_sdk.py
Normal file
94
scripts/e2e_legacy_sdk.py
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
"""End-to-end smoke test of the legacy SDK compatibility layer against the real cloud API.
|
||||
|
||||
Run: PAGEINDEX_API_KEY=... uv run python scripts/e2e_legacy_sdk.py
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from pageindex import PageIndexClient
|
||||
|
||||
|
||||
def log(step: str, detail: str = "") -> None:
|
||||
print(f"[e2e] {step}" + (f" — {detail}" if detail else ""), flush=True)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
api_key = os.environ.get("PAGEINDEX_API_KEY")
|
||||
if not api_key:
|
||||
print("PAGEINDEX_API_KEY not set", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
pdf = Path("examples/documents/attention-residuals.pdf")
|
||||
if not pdf.exists():
|
||||
print(f"Test PDF missing: {pdf}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
client = PageIndexClient(api_key=api_key)
|
||||
log("init", f"cloud mode (key={api_key[:6]}…)")
|
||||
|
||||
# 1) submit_document (legacy SDK signature — fire-and-forget)
|
||||
submit_resp = client.submit_document(file_path=str(pdf))
|
||||
doc_id = submit_resp["doc_id"]
|
||||
log("submit_document", f"doc_id={doc_id}")
|
||||
|
||||
try:
|
||||
# 2) poll is_retrieval_ready (with hard timeout)
|
||||
deadline = time.time() + 600 # 10 min
|
||||
while time.time() < deadline:
|
||||
if client.is_retrieval_ready(doc_id):
|
||||
log("is_retrieval_ready", "True")
|
||||
break
|
||||
time.sleep(8)
|
||||
else:
|
||||
log("is_retrieval_ready", "TIMEOUT")
|
||||
return 2
|
||||
|
||||
# 3) get_tree
|
||||
tree = client.get_tree(doc_id)
|
||||
node_count = len(tree.get("result") or tree.get("tree") or [])
|
||||
log("get_tree", f"top-level nodes={node_count}, status={tree.get('status')}")
|
||||
|
||||
# 4) get_document (metadata)
|
||||
meta = client.get_document(doc_id)
|
||||
log("get_document", f"name={meta.get('name')!r} pages={meta.get('pageNum')} status={meta.get('status')}")
|
||||
|
||||
# 5) chat_completions (non-stream)
|
||||
chat = client.chat_completions(
|
||||
messages=[{"role": "user", "content": "What is this paper about? Answer in one sentence."}],
|
||||
doc_id=doc_id,
|
||||
)
|
||||
answer = (chat.get("choices") or [{}])[0].get("message", {}).get("content", "")
|
||||
log("chat_completions", f"answer={answer[:120]!r}")
|
||||
|
||||
# 6) chat_completions (stream) — full consumption
|
||||
log("chat_completions stream", "starting…")
|
||||
print("[stream] ", end="", flush=True)
|
||||
chunk_count = 0
|
||||
for chunk in client.chat_completions(
|
||||
messages=[{"role": "user", "content": "List 3 keywords from this paper."}],
|
||||
doc_id=doc_id,
|
||||
stream=True,
|
||||
):
|
||||
print(chunk, end="", flush=True)
|
||||
chunk_count += 1
|
||||
print() # newline after streaming
|
||||
log("chat_completions stream", f"chunks received={chunk_count}")
|
||||
|
||||
finally:
|
||||
# 7) delete_document
|
||||
del_resp = client.delete_document(doc_id)
|
||||
log("delete_document", f"resp={del_resp}")
|
||||
|
||||
log("done", "all steps OK")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Loading…
Add table
Add a link
Reference in a new issue