mirror of
https://github.com/VectifyAI/PageIndex.git
synced 2026-04-24 23:56:21 +02:00
62 lines
1.8 KiB
Python
62 lines
1.8 KiB
Python
"""
|
|
Agentic Vectorless RAG with PageIndex SDK - Cloud Demo
|
|
|
|
Uses CloudClient for fully-managed document indexing and QA.
|
|
No LLM API key needed — the cloud service handles everything.
|
|
|
|
Steps:
|
|
1 — Upload and index a PDF via PageIndex cloud
|
|
2 — Stream a question with tool call visibility
|
|
|
|
Requirements:
|
|
pip install pageindex
|
|
export PAGEINDEX_API_KEY=your-api-key
|
|
"""
|
|
import asyncio
|
|
import os
|
|
from pathlib import Path
|
|
import requests
|
|
from pageindex import CloudClient
|
|
|
|
_EXAMPLES_DIR = Path(__file__).parent
|
|
PDF_URL = "https://arxiv.org/pdf/1706.03762.pdf"
|
|
PDF_PATH = _EXAMPLES_DIR / "documents" / "attention.pdf"
|
|
|
|
# Download PDF if needed
|
|
if not PDF_PATH.exists():
|
|
print(f"Downloading {PDF_URL} ...")
|
|
PDF_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
with requests.get(PDF_URL, stream=True, timeout=30) as r:
|
|
r.raise_for_status()
|
|
with open(PDF_PATH, "wb") as f:
|
|
for chunk in r.iter_content(chunk_size=8192):
|
|
if chunk:
|
|
f.write(chunk)
|
|
print("Download complete.\n")
|
|
|
|
client = CloudClient(api_key=os.environ["PAGEINDEX_API_KEY"])
|
|
col = client.collection()
|
|
|
|
doc_id = col.add(str(PDF_PATH))
|
|
print(f"Indexed: {doc_id}\n")
|
|
|
|
# Streaming query
|
|
stream = col.query("What is the main contribution of this paper?", stream=True)
|
|
|
|
async def main():
|
|
streamed_text = False
|
|
async for event in stream:
|
|
if event.type == "answer_delta":
|
|
print(event.data, end="", flush=True)
|
|
streamed_text = True
|
|
elif event.type == "tool_call":
|
|
if streamed_text:
|
|
print()
|
|
streamed_text = False
|
|
args = event.data.get("args", "")
|
|
print(f"[tool call] {event.data['name']}({args})")
|
|
elif event.type == "answer_done":
|
|
print()
|
|
streamed_text = False
|
|
|
|
asyncio.run(main())
|