diff --git a/examples/agentic_vectorless_rag_demo.py b/examples/agentic_vectorless_rag_demo.py index e754e79..bb11cfd 100644 --- a/examples/agentic_vectorless_rag_demo.py +++ b/examples/agentic_vectorless_rag_demo.py @@ -14,7 +14,6 @@ Steps: 1 — Index PDF and inspect tree structure 2 — Inspect document metadata 3 — Ask a question (agent auto-calls tools) - 4 — Reload from workspace and verify persistence """ import os import sys @@ -25,15 +24,17 @@ import requests sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from agents import Agent, ItemHelpers, Runner, function_tool +from agents import Agent, Runner, function_tool +from agents.model_settings import ModelSettings from agents.stream_events import RawResponsesStreamEvent, RunItemStreamEvent -from openai.types.responses import ResponseTextDeltaEvent, ResponseReasoningSummaryTextDeltaEvent # noqa: F401 +from openai.types.responses import ResponseTextDeltaEvent, ResponseReasoningSummaryTextDeltaEvent from pageindex import PageIndexClient import pageindex.utils as utils -_EXAMPLES_DIR = os.path.dirname(os.path.abspath(__file__)) PDF_URL = "https://arxiv.org/pdf/2603.15031" + +_EXAMPLES_DIR = os.path.dirname(os.path.abspath(__file__)) PDF_PATH = os.path.join(_EXAMPLES_DIR, "documents", "attention-residuals.pdf") WORKSPACE = os.path.join(_EXAMPLES_DIR, "workspace") @@ -48,12 +49,7 @@ ANSWERING: Answer based only on tool output. Be concise. """ -def query_agent( - client: PageIndexClient, - doc_id: str, - prompt: str, - verbose: bool = False, -) -> str: +def query_agent(client: PageIndexClient, doc_id: str, prompt: str, verbose: bool = False) -> str: """Run a document QA agent using the OpenAI Agents SDK. Streams text output token-by-token and returns the full answer string. @@ -84,42 +80,50 @@ def query_agent( instructions=AGENT_SYSTEM_PROMPT, tools=[get_document, get_document_structure, get_page_content], model=client.retrieve_model, + # model_settings=ModelSettings(reasoning={"effort": "low", "summary": "auto"}), # Uncomment to enable reasoning ) async def _run(): - collected = [] - streamed_this_turn = False streamed_run = Runner.run_streamed(agent, prompt) + current_stream_kind = None async for event in streamed_run.stream_events(): if isinstance(event, RawResponsesStreamEvent): if isinstance(event.data, ResponseReasoningSummaryTextDeltaEvent): - print(event.data.delta, end="", flush=True) - elif isinstance(event.data, ResponseTextDeltaEvent): + if current_stream_kind != "reasoning": + if current_stream_kind is not None: + print() + print("\n[reasoning]: ", end="", flush=True) delta = event.data.delta print(delta, end="", flush=True) - collected.append(delta) - streamed_this_turn = True + current_stream_kind = "reasoning" + elif isinstance(event.data, ResponseTextDeltaEvent): + if current_stream_kind != "text": + if current_stream_kind is not None: + print() + print("\n[text]: ", end="", flush=True) + delta = event.data.delta + print(delta, end="", flush=True) + current_stream_kind = "text" elif isinstance(event, RunItemStreamEvent): item = event.item - if item.type == "message_output_item": - if not streamed_this_turn: - text = ItemHelpers.text_message_output(item) - if text: - print(f"{text}") - streamed_this_turn = False - collected.clear() - elif item.type == "tool_call_item": - if streamed_this_turn: - print() # end streaming line before tool call + if item.type == "tool_call_item": + if current_stream_kind is not None: + print() raw = item.raw_item args = getattr(raw, "arguments", "{}") args_str = f"({args})" if verbose else "" - print(f"[tool call]: {raw.name}{args_str}") + print(f"\n[tool call]: {raw.name}{args_str}", flush=True) + current_stream_kind = None elif item.type == "tool_call_output_item" and verbose: + if current_stream_kind is not None: + print() output = str(item.output) preview = output[:200] + "..." if len(output) > 200 else output - print(f"[tool output]: {preview}\n") - return "".join(collected) + print(f"\n[tool call output]: {preview}", flush=True) + current_stream_kind = None + if current_stream_kind is not None: + print() + return "" if not streamed_run.final_output else str(streamed_run.final_output) try: asyncio.get_running_loop() @@ -129,46 +133,51 @@ def query_agent( return asyncio.run(_run()) -# ── Download PDF if needed ───────────────────────────────────────────────────── -if not os.path.exists(PDF_PATH): - print(f"Downloading {PDF_URL} ...") - os.makedirs(os.path.dirname(PDF_PATH), exist_ok=True) - with requests.get(PDF_URL, stream=True, timeout=30) as r: - r.raise_for_status() - with open(PDF_PATH, "wb") as f: - for chunk in r.iter_content(chunk_size=8192): - if chunk: - f.write(chunk) - print("Download complete.\n") +if __name__ == "__main__": -# ── Setup ────────────────────────────────────────────────────────────────────── -client = PageIndexClient(workspace=WORKSPACE) + # Download PDF if needed + if not os.path.exists(PDF_PATH): + print(f"Downloading {PDF_URL} ...") + os.makedirs(os.path.dirname(PDF_PATH), exist_ok=True) + with requests.get(PDF_URL, stream=True, timeout=30) as r: + r.raise_for_status() + with open(PDF_PATH, "wb") as f: + for chunk in r.iter_content(chunk_size=8192): + if chunk: + f.write(chunk) + print("Download complete.\n") -# ── Step 1: Index + Tree ─────────────────────────────────────────────────────── -print("=" * 60) -print("Step 1: Indexing PDF and inspecting tree structure") -print("=" * 60) -doc_id = next((did for did, doc in client.documents.items() - if doc.get('doc_name') == os.path.basename(PDF_PATH)), None) -if doc_id: - print(f"\nLoaded cached doc_id: {doc_id}") -else: - doc_id = client.index(PDF_PATH) - print(f"\nIndexed. doc_id: {doc_id}") -print("\nTree Structure (top-level sections):") -structure = json.loads(client.get_document_structure(doc_id)) -utils.print_tree(structure) + # Setup + client = PageIndexClient(workspace=WORKSPACE) -# ── Step 2: Document Metadata ────────────────────────────────────────────────── -print("\n" + "=" * 60) -print("Step 2: Document Metadata (get_document)") -print("=" * 60) -print(client.get_document(doc_id)) + # Step 1: Index + Tree + print("=" * 60) + print("Step 1: Indexing PDF and inspecting tree structure") + print("=" * 60) + doc_id = next( + (did for did, doc in client.documents.items() if doc.get('doc_name') == os.path.basename(PDF_PATH)), + None, + ) + if doc_id: + print(f"\nLoaded cached doc_id: {doc_id}") + else: + doc_id = client.index(PDF_PATH) + print(f"\nIndexed. doc_id: {doc_id}") + print("\nTree Structure (top-level sections):") + structure = json.loads(client.get_document_structure(doc_id)) + utils.print_tree(structure) -# ── Step 3: Agent Query ──────────────────────────────────────────────────────── -print("\n" + "=" * 60) -print("Step 3: Agent Query (auto tool-use)") -print("=" * 60) -question = "Explain Attention Residuals in simple language." -print(f"\nQuestion: '{question}'\n") -query_agent(client, doc_id, question, verbose=True) + # Step 2: Document Metadata + print("\n" + "=" * 60) + print("Step 2: Document Metadata (get_document)") + print("=" * 60) + doc_metadata = client.get_document(doc_id) + print(f"\n{doc_metadata}") + + # Step 3: Agent Query + print("\n" + "=" * 60) + print("Step 3: Agent Query (auto tool-use)") + print("=" * 60) + question = "Explain Attention Residuals in simple language." + print(f"\nQuestion: '{question}'") + query_agent(client, doc_id, question, verbose=True)