Simplify agentic vectorless RAG demo (#191)

* Simplify and fix agentic RAG demo * Show labeled reasoning output in RAG demo * Comment out reasoning model settings by default
2026-04-24 23:56:21 +02:00 · 2026-03-28 09:42:46 +08:00 · 2026-03-28 09:42:46 +08:00 · d50c293309
commit d50c293309
parent 4002dc94de
1 changed files with 77 additions and 68 deletions
--- a/examples/agentic_vectorless_rag_demo.py
+++ b/examples/agentic_vectorless_rag_demo.py
@ -14,7 +14,6 @@ Steps:
  1 — Index PDF and inspect tree structure
  2 — Inspect document metadata
  3 — Ask a question (agent auto-calls tools)
  4 — Reload from workspace and verify persistence
 """
 import os
 import sys
@ -25,15 +24,17 @@ import requests
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from agents import Agent, ItemHelpers, Runner, function_tool
+from agents import Agent, Runner, function_tool
 from agents.model_settings import ModelSettings
 from agents.stream_events import RawResponsesStreamEvent, RunItemStreamEvent
-from openai.types.responses import ResponseTextDeltaEvent, ResponseReasoningSummaryTextDeltaEvent  # noqa: F401
+from openai.types.responses import ResponseTextDeltaEvent, ResponseReasoningSummaryTextDeltaEvent
 from pageindex import PageIndexClient
 import pageindex.utils as utils
 _EXAMPLES_DIR = os.path.dirname(os.path.abspath(__file__))
 PDF_URL = "https://arxiv.org/pdf/2603.15031"
 _EXAMPLES_DIR = os.path.dirname(os.path.abspath(__file__))
 PDF_PATH = os.path.join(_EXAMPLES_DIR, "documents", "attention-residuals.pdf")
 WORKSPACE = os.path.join(_EXAMPLES_DIR, "workspace")
@ -48,12 +49,7 @@ ANSWERING: Answer based only on tool output. Be concise.
 """
-def query_agent(
+def query_agent(client: PageIndexClient, doc_id: str, prompt: str, verbose: bool = False) -> str:
    client: PageIndexClient,
    doc_id: str,
    prompt: str,
    verbose: bool = False,
 ) -> str:
    """Run a document QA agent using the OpenAI Agents SDK.
    Streams text output token-by-token and returns the full answer string.
@ -84,42 +80,50 @@ def query_agent(
        instructions=AGENT_SYSTEM_PROMPT,
        tools=[get_document, get_document_structure, get_page_content],
        model=client.retrieve_model,
        # model_settings=ModelSettings(reasoning={"effort": "low", "summary": "auto"}),  # Uncomment to enable reasoning
    )
    async def _run():
        collected = []
        streamed_this_turn = False
        streamed_run = Runner.run_streamed(agent, prompt)
        current_stream_kind = None
        async for event in streamed_run.stream_events():
            if isinstance(event, RawResponsesStreamEvent):
                if isinstance(event.data, ResponseReasoningSummaryTextDeltaEvent):
-                    print(event.data.delta, end="", flush=True)
+                    if current_stream_kind != "reasoning":
-                elif isinstance(event.data, ResponseTextDeltaEvent):
+                        if current_stream_kind is not None:
                            print()
                        print("\n[reasoning]: ", end="", flush=True)
                    delta = event.data.delta
                    print(delta, end="", flush=True)
-                    collected.append(delta)
+                    current_stream_kind = "reasoning"
-                    streamed_this_turn = True
+                elif isinstance(event.data, ResponseTextDeltaEvent):
                    if current_stream_kind != "text":
                        if current_stream_kind is not None:
                            print()
                        print("\n[text]: ", end="", flush=True)
                    delta = event.data.delta
                    print(delta, end="", flush=True)
                    current_stream_kind = "text"
            elif isinstance(event, RunItemStreamEvent):
                item = event.item
-                if item.type == "message_output_item":
+                if item.type == "tool_call_item":
-                    if not streamed_this_turn:
+                    if current_stream_kind is not None:
-                        text = ItemHelpers.text_message_output(item)
+                        print()
                        if text:
                            print(f"{text}")
                    streamed_this_turn = False
                    collected.clear()
                elif item.type == "tool_call_item":
                    if streamed_this_turn:
                        print()  # end streaming line before tool call
                    raw = item.raw_item
                    args = getattr(raw, "arguments", "{}")
                    args_str = f"({args})" if verbose else ""
-                    print(f"[tool call]: {raw.name}{args_str}")
+                    print(f"\n[tool call]: {raw.name}{args_str}", flush=True)
                    current_stream_kind = None
                elif item.type == "tool_call_output_item" and verbose:
                    if current_stream_kind is not None:
                        print()
                    output = str(item.output)
                    preview = output[:200] + "..." if len(output) > 200 else output
-                    print(f"[tool output]: {preview}\n")
+                    print(f"\n[tool call output]: {preview}", flush=True)
-        return "".join(collected)
+                    current_stream_kind = None
        if current_stream_kind is not None:
            print()
        return "" if not streamed_run.final_output else str(streamed_run.final_output)
    try:
        asyncio.get_running_loop()
@ -129,7 +133,9 @@ def query_agent(
        return asyncio.run(_run())
-# ── Download PDF if needed ─────────────────────────────────────────────────────
+if __name__ == "__main__":
    # Download PDF if needed
    if not os.path.exists(PDF_PATH):
        print(f"Downloading {PDF_URL} ...")
        os.makedirs(os.path.dirname(PDF_PATH), exist_ok=True)
@ -141,15 +147,17 @@ if not os.path.exists(PDF_PATH):
                        f.write(chunk)
        print("Download complete.\n")
-# ── Setup ──────────────────────────────────────────────────────────────────────
+    # Setup
    client = PageIndexClient(workspace=WORKSPACE)
-# ── Step 1: Index + Tree ───────────────────────────────────────────────────────
+    # Step 1: Index + Tree
    print("=" * 60)
    print("Step 1: Indexing PDF and inspecting tree structure")
    print("=" * 60)
-doc_id = next((did for did, doc in client.documents.items()
+    doc_id = next(
-                if doc.get('doc_name') == os.path.basename(PDF_PATH)), None)
+        (did for did, doc in client.documents.items() if doc.get('doc_name') == os.path.basename(PDF_PATH)),
        None,
    )
    if doc_id:
        print(f"\nLoaded cached doc_id: {doc_id}")
    else:
@ -159,16 +167,17 @@ print("\nTree Structure (top-level sections):")
    structure = json.loads(client.get_document_structure(doc_id))
    utils.print_tree(structure)
-# ── Step 2: Document Metadata ──────────────────────────────────────────────────
+    # Step 2: Document Metadata
    print("\n" + "=" * 60)
    print("Step 2: Document Metadata (get_document)")
    print("=" * 60)
-print(client.get_document(doc_id))
+    doc_metadata = client.get_document(doc_id)
    print(f"\n{doc_metadata}")
-# ── Step 3: Agent Query ────────────────────────────────────────────────────────
+    # Step 3: Agent Query
    print("\n" + "=" * 60)
    print("Step 3: Agent Query (auto tool-use)")
    print("=" * 60)
    question = "Explain Attention Residuals in simple language."
-print(f"\nQuestion: '{question}'\n")
+    print(f"\nQuestion: '{question}'")
    query_agent(client, doc_id, question, verbose=True)