mirror of
https://github.com/VectifyAI/PageIndex.git
synced 2026-05-13 00:32:36 +02:00
95 lines
3 KiB
Python
95 lines
3 KiB
Python
|
|
"""End-to-end smoke test of the legacy SDK compatibility layer against the real cloud API.
|
||
|
|
|
||
|
|
Run: PAGEINDEX_API_KEY=... uv run python scripts/e2e_legacy_sdk.py
|
||
|
|
"""
|
||
|
|
from __future__ import annotations
|
||
|
|
import os
|
||
|
|
import sys
|
||
|
|
import time
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
from dotenv import load_dotenv
|
||
|
|
|
||
|
|
load_dotenv()
|
||
|
|
|
||
|
|
from pageindex import PageIndexClient
|
||
|
|
|
||
|
|
|
||
|
|
def log(step: str, detail: str = "") -> None:
|
||
|
|
print(f"[e2e] {step}" + (f" — {detail}" if detail else ""), flush=True)
|
||
|
|
|
||
|
|
|
||
|
|
def main() -> int:
|
||
|
|
api_key = os.environ.get("PAGEINDEX_API_KEY")
|
||
|
|
if not api_key:
|
||
|
|
print("PAGEINDEX_API_KEY not set", file=sys.stderr)
|
||
|
|
return 1
|
||
|
|
|
||
|
|
pdf = Path("examples/documents/attention-residuals.pdf")
|
||
|
|
if not pdf.exists():
|
||
|
|
print(f"Test PDF missing: {pdf}", file=sys.stderr)
|
||
|
|
return 1
|
||
|
|
|
||
|
|
client = PageIndexClient(api_key=api_key)
|
||
|
|
log("init", f"cloud mode (key={api_key[:6]}…)")
|
||
|
|
|
||
|
|
# 1) submit_document (legacy SDK signature — fire-and-forget)
|
||
|
|
submit_resp = client.submit_document(file_path=str(pdf))
|
||
|
|
doc_id = submit_resp["doc_id"]
|
||
|
|
log("submit_document", f"doc_id={doc_id}")
|
||
|
|
|
||
|
|
try:
|
||
|
|
# 2) poll is_retrieval_ready (with hard timeout)
|
||
|
|
deadline = time.time() + 600 # 10 min
|
||
|
|
while time.time() < deadline:
|
||
|
|
if client.is_retrieval_ready(doc_id):
|
||
|
|
log("is_retrieval_ready", "True")
|
||
|
|
break
|
||
|
|
time.sleep(8)
|
||
|
|
else:
|
||
|
|
log("is_retrieval_ready", "TIMEOUT")
|
||
|
|
return 2
|
||
|
|
|
||
|
|
# 3) get_tree
|
||
|
|
tree = client.get_tree(doc_id)
|
||
|
|
node_count = len(tree.get("result") or tree.get("tree") or [])
|
||
|
|
log("get_tree", f"top-level nodes={node_count}, status={tree.get('status')}")
|
||
|
|
|
||
|
|
# 4) get_document (metadata)
|
||
|
|
meta = client.get_document(doc_id)
|
||
|
|
log("get_document", f"name={meta.get('name')!r} pages={meta.get('pageNum')} status={meta.get('status')}")
|
||
|
|
|
||
|
|
# 5) chat_completions (non-stream)
|
||
|
|
chat = client.chat_completions(
|
||
|
|
messages=[{"role": "user", "content": "What is this paper about? Answer in one sentence."}],
|
||
|
|
doc_id=doc_id,
|
||
|
|
)
|
||
|
|
answer = (chat.get("choices") or [{}])[0].get("message", {}).get("content", "")
|
||
|
|
log("chat_completions", f"answer={answer[:120]!r}")
|
||
|
|
|
||
|
|
# 6) chat_completions (stream) — full consumption
|
||
|
|
log("chat_completions stream", "starting…")
|
||
|
|
print("[stream] ", end="", flush=True)
|
||
|
|
chunk_count = 0
|
||
|
|
for chunk in client.chat_completions(
|
||
|
|
messages=[{"role": "user", "content": "List 3 keywords from this paper."}],
|
||
|
|
doc_id=doc_id,
|
||
|
|
stream=True,
|
||
|
|
):
|
||
|
|
print(chunk, end="", flush=True)
|
||
|
|
chunk_count += 1
|
||
|
|
print() # newline after streaming
|
||
|
|
log("chat_completions stream", f"chunks received={chunk_count}")
|
||
|
|
|
||
|
|
finally:
|
||
|
|
# 7) delete_document
|
||
|
|
del_resp = client.delete_document(doc_id)
|
||
|
|
log("delete_document", f"resp={del_resp}")
|
||
|
|
|
||
|
|
log("done", "all steps OK")
|
||
|
|
return 0
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
sys.exit(main())
|