""" Agentic Vectorless RAG with PageIndex SDK - Cloud Demo Uses CloudClient for fully-managed document indexing and QA. No LLM API key needed — the cloud service handles everything. Steps: 1 — Upload and index a PDF via PageIndex cloud 2 — Stream a question with tool call visibility Requirements: pip install pageindex export PAGEINDEX_API_KEY=your-api-key """ import asyncio import os from pathlib import Path import requests from pageindex import CloudClient _EXAMPLES_DIR = Path(__file__).parent PDF_URL = "https://arxiv.org/pdf/1706.03762.pdf" PDF_PATH = _EXAMPLES_DIR / "documents" / "attention.pdf" # Download PDF if needed if not PDF_PATH.exists(): print(f"Downloading {PDF_URL} ...") PDF_PATH.parent.mkdir(parents=True, exist_ok=True) with requests.get(PDF_URL, stream=True, timeout=30) as r: r.raise_for_status() with open(PDF_PATH, "wb") as f: for chunk in r.iter_content(chunk_size=8192): if chunk: f.write(chunk) print("Download complete.\n") client = CloudClient(api_key=os.environ["PAGEINDEX_API_KEY"]) col = client.collection() doc_id = col.add(str(PDF_PATH)) print(f"Indexed: {doc_id}\n") # Streaming query stream = col.query("What is the main contribution of this paper?", stream=True) async def main(): streamed_text = False async for event in stream: if event.type == "answer_delta": print(event.data, end="", flush=True) streamed_text = True elif event.type == "tool_call": if streamed_text: print() streamed_text = False args = event.data.get("args", "") print(f"[tool call] {event.data['name']}({args})") elif event.type == "answer_done": print() streamed_text = False asyncio.run(main())