mirror of
https://github.com/VectifyAI/PageIndex.git
synced 2026-04-26 00:26:21 +02:00
feat: add PageIndex SDK with local/cloud dual-mode support (#207)
This commit is contained in:
parent
f2dcffc0b7
commit
c7fe93bb56
45 changed files with 4225 additions and 274 deletions
62
examples/cloud_demo.py
Normal file
62
examples/cloud_demo.py
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
"""
|
||||
Agentic Vectorless RAG with PageIndex SDK - Cloud Demo
|
||||
|
||||
Uses CloudClient for fully-managed document indexing and QA.
|
||||
No LLM API key needed — the cloud service handles everything.
|
||||
|
||||
Steps:
|
||||
1 — Upload and index a PDF via PageIndex cloud
|
||||
2 — Stream a question with tool call visibility
|
||||
|
||||
Requirements:
|
||||
pip install pageindex
|
||||
export PAGEINDEX_API_KEY=your-api-key
|
||||
"""
|
||||
import asyncio
|
||||
import os
|
||||
from pathlib import Path
|
||||
import requests
|
||||
from pageindex import CloudClient
|
||||
|
||||
_EXAMPLES_DIR = Path(__file__).parent
|
||||
PDF_URL = "https://arxiv.org/pdf/1706.03762.pdf"
|
||||
PDF_PATH = _EXAMPLES_DIR / "documents" / "attention.pdf"
|
||||
|
||||
# Download PDF if needed
|
||||
if not PDF_PATH.exists():
|
||||
print(f"Downloading {PDF_URL} ...")
|
||||
PDF_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
with requests.get(PDF_URL, stream=True, timeout=30) as r:
|
||||
r.raise_for_status()
|
||||
with open(PDF_PATH, "wb") as f:
|
||||
for chunk in r.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
print("Download complete.\n")
|
||||
|
||||
client = CloudClient(api_key=os.environ["PAGEINDEX_API_KEY"])
|
||||
col = client.collection()
|
||||
|
||||
doc_id = col.add(str(PDF_PATH))
|
||||
print(f"Indexed: {doc_id}\n")
|
||||
|
||||
# Streaming query
|
||||
stream = col.query("What is the main contribution of this paper?", stream=True)
|
||||
|
||||
async def main():
|
||||
streamed_text = False
|
||||
async for event in stream:
|
||||
if event.type == "answer_delta":
|
||||
print(event.data, end="", flush=True)
|
||||
streamed_text = True
|
||||
elif event.type == "tool_call":
|
||||
if streamed_text:
|
||||
print()
|
||||
streamed_text = False
|
||||
args = event.data.get("args", "")
|
||||
print(f"[tool call] {event.data['name']}({args})")
|
||||
elif event.type == "answer_done":
|
||||
print()
|
||||
streamed_text = False
|
||||
|
||||
asyncio.run(main())
|
||||
Loading…
Add table
Add a link
Reference in a new issue