From 5d4491f3bfc3944a2051d431c5527169775a5a82 Mon Sep 17 00:00:00 2001 From: Kylin Date: Thu, 26 Mar 2026 23:19:50 +0800 Subject: [PATCH] Add PageIndexClient with agent-based retrieval via OpenAI Agents SDK (#125) * Add PageIndexClient with retrieve, streaming support and litellm integration * Add OpenAI agents demo example * Update README with example agent demo section * Support separate retrieve_model configuration for index and retrieve --- README.md | 26 ++++- examples/openai_agents_demo.py | 173 +++++++++++++++++++++++++++++++++ pageindex/__init__.py | 4 +- pageindex/client.py | 132 +++++++++++++++++++++++++ pageindex/config.yaml | 1 + pageindex/page_index.py | 2 +- pageindex/retrieve.py | 139 ++++++++++++++++++++++++++ pageindex/utils.py | 30 ++++++ requirements.txt | 1 + 9 files changed, 501 insertions(+), 7 deletions(-) create mode 100644 examples/openai_agents_demo.py create mode 100644 pageindex/client.py create mode 100644 pageindex/retrieve.py diff --git a/README.md b/README.md index 7180efd..23852a6 100644 --- a/README.md +++ b/README.md @@ -147,15 +147,17 @@ You can follow these steps to generate a PageIndex tree from a PDF document. pip3 install --upgrade -r requirements.txt ``` -### 2. Set your OpenAI API key +### 2. Set your LLM API key -Create a `.env` file in the root directory and add your API key: +Create a `.env` file in the root directory with your LLM API key:: ```bash -CHATGPT_API_KEY=your_openai_key_here +OPENAI_API_KEY=your_openai_key_here +# or +CHATGPT_API_KEY=your_openai_key_here # legacy, still supported ``` -### 3. Run PageIndex on your PDF +### 3. Generate PageIndex structure for your PDF ```bash python3 run_pageindex.py --pdf_path /path/to/your/document.pdf @@ -189,7 +191,21 @@ python3 run_pageindex.py --md_path /path/to/your/document.md > Note: in this function, we use "#" to determine node heading and their levels. For example, "##" is level 2, "###" is level 3, etc. Make sure your markdown file is formatted correctly. If your Markdown file was converted from a PDF or HTML, we don't recommend using this function, since most existing conversion tools cannot preserve the original hierarchy. Instead, use our [PageIndex OCR](https://pageindex.ai/blog/ocr), which is designed to preserve the original hierarchy, to convert the PDF to a markdown file and then use this function. -