mirror of
https://github.com/VectifyAI/PageIndex.git
synced 2026-06-24 20:28:12 +02:00
fix(demo): register all example documents
This commit is contained in:
parent
30830fc19e
commit
b6587350a3
1 changed files with 13 additions and 14 deletions
|
|
@ -7,9 +7,10 @@ tools. The agent receives one read-only bash-like PIFS tool and must retrieve
|
||||||
evidence through commands such as ls, tree, find, grep, search-summary,
|
evidence through commands such as ls, tree, find, grep, search-summary,
|
||||||
cat <path> --structure, cat <path> --page, and cat <path> --node.
|
cat <path> --structure, cat <path> --page, and cat <path> --node.
|
||||||
|
|
||||||
The demo uses PDFs under examples/documents. When a matching
|
The demo registers supported files under examples/documents. When a matching
|
||||||
examples/documents/results/*_structure.json file exists, it is loaded into the
|
examples/documents/results/*_structure.json file exists, it is loaded into the
|
||||||
PIFS workspace's PageIndexClient cache so register() does not rebuild the tree.
|
PIFS workspace's PageIndexClient cache. Files without a cache exercise the
|
||||||
|
normal PageIndexClient.index() path during register().
|
||||||
|
|
||||||
Requirements:
|
Requirements:
|
||||||
pip install openai-agents
|
pip install openai-agents
|
||||||
|
|
@ -168,22 +169,20 @@ def require_runtime_environment(*, metadata_provider: str, embedding_provider: s
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def discover_cached_documents(documents_dir: Path) -> list[Path]:
|
SUPPORTED_DOCUMENT_SUFFIXES = {".pdf", ".md", ".markdown", ".txt", ".text"}
|
||||||
results_dir = documents_dir / "results"
|
|
||||||
paths: list[Path] = []
|
|
||||||
for structure_path in sorted(results_dir.glob("*_structure.json")):
|
def discover_documents(documents_dir: Path) -> list[Path]:
|
||||||
stem = structure_path.name.removesuffix("_structure.json")
|
return sorted(
|
||||||
for suffix in (".pdf", ".md", ".markdown"):
|
path
|
||||||
candidate = documents_dir / f"{stem}{suffix}"
|
for path in documents_dir.iterdir()
|
||||||
if candidate.exists():
|
if path.is_file() and path.suffix.lower() in SUPPORTED_DOCUMENT_SUFFIXES
|
||||||
paths.append(candidate)
|
)
|
||||||
break
|
|
||||||
return paths
|
|
||||||
|
|
||||||
|
|
||||||
def resolve_requested_documents(documents_dir: Path, requested: list[str]) -> list[Path]:
|
def resolve_requested_documents(documents_dir: Path, requested: list[str]) -> list[Path]:
|
||||||
if not requested:
|
if not requested:
|
||||||
return discover_cached_documents(documents_dir)
|
return discover_documents(documents_dir)
|
||||||
paths: list[Path] = []
|
paths: list[Path] = []
|
||||||
for item in requested:
|
for item in requested:
|
||||||
path = Path(item).expanduser()
|
path = Path(item).expanduser()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue