feat(collection): scoped query mode and experimental multi-doc warning

- get_agent_tools branches on doc_ids:
  - scoped (doc_ids=[...]): drops list_documents and hard-enforces a
    whitelist on the remaining tools; system prompt switches to
    SCOPED_SYSTEM_PROMPT (no list_documents instruction); doc list +
    summaries are prepended to the user message via wrap_with_doc_context.
  - open (doc_ids=None): unchanged 4-tool agent loop.
- list_documents now exposes doc_description (sqlite + cloud).
- Collection.query emits UserWarning when doc_ids is None and the
  collection holds >1 documents; PAGEINDEX_EXPERIMENTAL_MULTIDOC=1
  silences it. Single-doc collections skip the warning; empty
  collections raise ValueError.
- Agents SDK tracing upload disabled by default (avoids SSL timeouts);
  PAGEINDEX_AGENTS_TRACING=1 re-enables it.
- README: new SDK Usage section covering local/cloud quick start,
  streaming, multi-doc as experimental, and runnable examples.
This commit is contained in:
mountain 2026-05-15 11:14:12 +08:00
parent cbea31d1a2
commit d7b36aaf3f
8 changed files with 348 additions and 25 deletions

View file

@ -39,3 +39,46 @@ def test_delete_document(col):
def test_name_property(col):
assert col.name == "papers"
def test_query_without_doc_ids_warns_when_multidoc(col, monkeypatch):
monkeypatch.delenv("PAGEINDEX_EXPERIMENTAL_MULTIDOC", raising=False)
col._backend.list_documents.return_value = [
{"doc_id": "d1", "doc_name": "a.pdf", "doc_type": "pdf"},
{"doc_id": "d2", "doc_name": "b.pdf", "doc_type": "pdf"},
]
col._backend.query.return_value = "answer"
with pytest.warns(UserWarning, match="experimental"):
result = col.query("what?")
assert result == "answer"
def test_query_without_doc_ids_no_warning_when_single_doc(col, monkeypatch, recwarn):
monkeypatch.delenv("PAGEINDEX_EXPERIMENTAL_MULTIDOC", raising=False)
col._backend.query.return_value = "answer"
col.query("what?")
assert not any(issubclass(w.category, UserWarning) for w in recwarn)
def test_query_empty_collection_raises(col, monkeypatch):
monkeypatch.delenv("PAGEINDEX_EXPERIMENTAL_MULTIDOC", raising=False)
col._backend.list_documents.return_value = []
with pytest.raises(ValueError, match="empty"):
col.query("what?")
def test_query_with_doc_ids_no_warning(col, recwarn):
col._backend.query.return_value = "answer"
col.query("what?", doc_ids=["d1"])
assert not any(issubclass(w.category, UserWarning) for w in recwarn)
def test_query_env_var_silences_warning(col, monkeypatch, recwarn):
monkeypatch.setenv("PAGEINDEX_EXPERIMENTAL_MULTIDOC", "1")
col._backend.list_documents.return_value = [
{"doc_id": "d1", "doc_name": "a.pdf", "doc_type": "pdf"},
{"doc_id": "d2", "doc_name": "b.pdf", "doc_type": "pdf"},
]
col._backend.query.return_value = "answer"
col.query("what?")
assert not any(issubclass(w.category, UserWarning) for w in recwarn)