mirror of
https://github.com/VectifyAI/PageIndex.git
synced 2026-05-19 18:35:16 +02:00
feat(collection): doc_ids accepts str|list, design cleanups
- Collection.query and Backend.query/query_stream accept doc_ids as str, list[str] or None. Single str is normalized to [str] inside each backend; bare [] is rejected with ValueError at both layers. - wrap_with_doc_context wraps the scoped doc list in <docs>...</docs> and SCOPED_SYSTEM_PROMPT instructs the agent to treat that block as data, not instructions (defense against prompt injection via auto-generated doc_description). - _require_cloud_api now distinguishes api_key="" from api_key=None; the former gives a targeted error pointing at the empty-string vs fall-back-to-local situation when legacy SDK methods are called. - Legacy PageIndexClient.list_documents docstring spells out the return-shape difference vs collection.list_documents() to flag a silent migration footgun (paginated dict with id/name keys vs plain list[dict] with doc_id/doc_name keys). - Remove dead CloudBackend.get_agent_tools stub (not on the Backend protocol; only ever returned an empty AgentTools()) and the SYSTEM_PROMPT alias (OPEN_/SCOPED_SYSTEM_PROMPT are the explicit names now). - README quick start and streaming example now pass doc_ids; new multi-document section shows both str and list forms. - examples/demo_query_modes.py exercises all five query-mode cases (single-doc, multi-doc with/without env var, scoped single, scoped multi) for manual verification.
This commit is contained in:
parent
d7b36aaf3f
commit
a47c36a3f5
13 changed files with 322 additions and 45 deletions
|
|
@ -55,7 +55,10 @@ class PageIndexClient:
|
|||
def __init__(self, api_key: str | None = None, model: str = None,
|
||||
retrieve_model: str = None, storage_path: str = None,
|
||||
storage=None, index_config: IndexConfig | dict = None):
|
||||
if api_key == "":
|
||||
# Track whether api_key was passed as empty string vs None — only
|
||||
# affects the error message when legacy cloud methods are then called.
|
||||
self._empty_api_key = api_key == ""
|
||||
if self._empty_api_key:
|
||||
import logging
|
||||
logging.getLogger(__name__).warning(
|
||||
"PageIndexClient received an empty api_key; falling back to local mode. "
|
||||
|
|
@ -150,6 +153,13 @@ class PageIndexClient:
|
|||
def _require_cloud_api(self):
|
||||
if self._legacy_cloud_api is None:
|
||||
from .errors import PageIndexAPIError
|
||||
if getattr(self, "_empty_api_key", False):
|
||||
raise PageIndexAPIError(
|
||||
"Cannot call legacy SDK methods: api_key was an empty string, "
|
||||
"so PageIndexClient fell back to local mode. Pass a real "
|
||||
"PageIndex cloud API key, or migrate to the Collection API "
|
||||
"(client.collection(...)) for local mode."
|
||||
)
|
||||
raise PageIndexAPIError(
|
||||
"This method is part of the pageindex 0.2.x cloud SDK API. "
|
||||
"Initialize with api_key to use it."
|
||||
|
|
@ -239,7 +249,20 @@ class PageIndexClient:
|
|||
offset: int = 0,
|
||||
folder_id: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Legacy SDK compatibility — prefer ``collection.list_documents()``."""
|
||||
"""Legacy SDK compatibility — prefer ``collection.list_documents()``.
|
||||
|
||||
Note the return shape differs between the two APIs:
|
||||
|
||||
- This legacy method returns the raw API envelope
|
||||
``{"documents": [...], "total": int, "limit": int, "offset": int}``
|
||||
where each document carries keys ``id`` / ``name`` / ``description``.
|
||||
- ``collection.list_documents()`` returns a plain ``list[dict]`` where
|
||||
each entry uses keys ``doc_id`` / ``doc_name`` / ``doc_description``
|
||||
/ ``doc_type`` and is not paginated.
|
||||
|
||||
Code that migrates by a simple name swap will silently break — update
|
||||
callers to the new key names and dropped pagination envelope.
|
||||
"""
|
||||
return self._require_cloud_api().list_documents(
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
|
|
@ -293,6 +316,7 @@ class LocalClient(PageIndexClient):
|
|||
def __init__(self, model: str = None, retrieve_model: str = None,
|
||||
storage_path: str = None, storage=None,
|
||||
index_config: IndexConfig | dict = None):
|
||||
self._empty_api_key = False
|
||||
self._init_local(model, retrieve_model, storage_path, storage, index_config)
|
||||
|
||||
|
||||
|
|
@ -300,4 +324,5 @@ class CloudClient(PageIndexClient):
|
|||
"""Cloud mode — fully managed by PageIndex cloud service. No LLM key needed."""
|
||||
|
||||
def __init__(self, api_key: str):
|
||||
self._empty_api_key = False
|
||||
self._init_cloud(api_key)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue