From dc4de3116f98c368aec0c14b038ca1ce95b79703 Mon Sep 17 00:00:00 2001
From: BukeLy <bukely0119@foxmail.com>
Date: Mon, 1 Jun 2026 01:40:44 +0800
Subject: [PATCH] refactor: remove source paths from PIFS

---
 README.md                                   |   2 +-
 examples/pifs_demo.py                       |   3 +-
 pageindex/config.yaml                       |   4 +-
 pageindex/filesystem/cli.py                 |   5 +-
 pageindex/filesystem/commands.py            | 254 +-------------------
 pageindex/filesystem/core.py                | 184 +++++++-------
 pageindex/filesystem/metadata_generation.py |   1 -
 pageindex/filesystem/semantic_index.py      |  19 +-
 pageindex/filesystem/semantic_projection.py |   3 -
 pageindex/filesystem/store.py               | 142 ++++++-----
 pageindex/filesystem/types.py               |   3 -
 tests/test_filesystem_store.py              |   1 -
 tests/test_metadata_generation.py           |   1 -
 tests/test_pageindex_filesystem_scope.py    | 122 ++++++----
 tests/test_pageindex_structural_read.py     |  36 +--
 tests/test_pifs_add_command.py              |  17 +-
 tests/test_pifs_cli.py                      |  23 +-
 tests/test_pifs_find_maxdepth.py            |   6 -
 tests/test_pifs_like_escape.py              |   1 -
 tests/test_pifs_path_resolution.py          |  16 +-
 tests/test_pifs_register_side_effects.py    |   1 -
 tests/test_semantic_index.py                |   8 -
 22 files changed, 324 insertions(+), 528 deletions(-)

diff --git a/README.md b/README.md
index f8cca05..49c1785 100644
--- a/README.md
+++ b/README.md
@@ -173,7 +173,7 @@ python3 run_pageindex.py --pdf_path /path/to/your/document.pdf
 You can customize the processing with additional optional arguments:
 
 ```
---model                 LLM model to use (default: gpt-4o-2024-11-20)
+--model                 LLM model to use (default: gpt-5.4)
 --toc-check-pages       Pages to check for table of contents (default: 20)
 --max-pages-per-node    Max pages per node (default: 10)
 --max-tokens-per-node   Max tokens per node (default: 20000)
diff --git a/examples/pifs_demo.py b/examples/pifs_demo.py
index 2434371..f5dffe7 100644
--- a/examples/pifs_demo.py
+++ b/examples/pifs_demo.py
@@ -48,7 +48,7 @@ from pageindex.filesystem.core import DEFAULT_EMBEDDING_DIMENSIONS
 EXAMPLES_DIR = Path(__file__).parent
 DOCUMENTS_DIR = EXAMPLES_DIR / "documents"
 WORKSPACE = EXAMPLES_DIR / "pifs_workspace"
-DEFAULT_MODEL = os.environ.get("PIFS_DEMO_MODEL", "gpt-5.4-mini")
+DEFAULT_MODEL = os.environ.get("PIFS_DEMO_MODEL", "gpt-5.4")
 DEFAULT_METADATA_PROVIDER = os.environ.get("PIFS_DEMO_METADATA_PROVIDER") or os.environ.get(
     "PIFS_METADATA_PROVIDER", "openai"
 )
@@ -416,7 +416,6 @@ def register_documents(
         register_started = time.perf_counter()
         file_ref = filesystem.register(
             storage_uri=document_path.as_uri(),
-            source_path=str(document_path),
             folder_path="/documents",
             external_id=external_id,
             title=document_path.name,
diff --git a/pageindex/config.yaml b/pageindex/config.yaml
index 591fe93..5da9ee4 100644
--- a/pageindex/config.yaml
+++ b/pageindex/config.yaml
@@ -1,4 +1,4 @@
-model: "gpt-4o-2024-11-20"
+model: "gpt-5.4"
 # model: "anthropic/claude-sonnet-4-6"
 retrieve_model: "gpt-5.4"  # defaults to `model` if not set
 toc_check_page_num: 20
@@ -7,4 +7,4 @@ max_token_num_each_node: 20000
 if_add_node_id: "yes"
 if_add_node_summary: "yes"
 if_add_doc_description: "no"
-if_add_node_text: "no"
\ No newline at end of file
+if_add_node_text: "no"
diff --git a/pageindex/filesystem/cli.py b/pageindex/filesystem/cli.py
index bb01f80..8e13d1d 100644
--- a/pageindex/filesystem/cli.py
+++ b/pageindex/filesystem/cli.py
@@ -21,7 +21,7 @@ from .core import PageIndexFileSystem
 
 
 AGENT_STREAM_MODE_CHOICES = ("off", "tools", "model", "all")
-DEFAULT_AGENT_MODEL = "gpt-5.4-mini"
+DEFAULT_AGENT_MODEL = "gpt-5.4"
 EXIT_COMMANDS = {"exit", "quit", ":q"}
 ANSI_ESCAPE_RE = re.compile(r"\x1b(?:\[[0-?]*[ -/]*[@-~]|.)")
 PIFS_CONFIG_FILE_ENV = "PIFS_CONFIG_FILE"
@@ -290,9 +290,8 @@ def _run_add(argv: list[str], *, workspace: str) -> int:
 
     filesystem = _filesystem_from_workspace(workspace)
     info = filesystem.add_file(args.physical_path, args.virtual_target)
-    print(f"added: {info.get('path') or '/' + str(info.get('source_path') or '').strip('/')}")
+    print(f"added: {info.get('path')}")
     print(f"file_ref: {info['file_ref']}")
-    print(f"storage_uri: {info['storage_uri']}")
     return 0
 
 
diff --git a/pageindex/filesystem/commands.py b/pageindex/filesystem/commands.py
index 16a7b22..3b1819d 100644
--- a/pageindex/filesystem/commands.py
+++ b/pageindex/filesystem/commands.py
@@ -3,9 +3,7 @@ from __future__ import annotations
 import json
 import re
 import shlex
-import subprocess
 from dataclasses import asdict, is_dataclass
-from pathlib import Path
 from typing import Any
 
 from .core import SEMANTIC_RETRIEVAL_CHANNELS, PageIndexFileSystem
@@ -411,35 +409,18 @@ class PIFSCommandExecutor:
                             "mode": "files",
                             "query": query,
                             "scope": normalized,
-                            "data": self._grep_file_hits_from_results(direct_results, query),
+                            "data": self._grep_file_hits_from_results(
+                                direct_results,
+                                query,
+                                require_match=True,
+                            ),
                         }
-                    if where is None:
-                        direct_source_hits = self._grep_source_file_hits(
-                            normalized,
-                            query,
-                            limit=limit,
-                            direct_only=True,
-                        )
-                        if direct_source_hits:
-                            return {
-                                "mode": "files",
-                                "query": query,
-                                "scope": normalized,
-                                "data": direct_source_hits,
-                            }
                     ranked = self._rank_child_folders(
                         query=query,
                         children=children,
                         metadata_filter=where,
                         limit=limit,
                     )
-                    if not ranked and where is None:
-                        ranked = self._rank_child_folders_from_source(
-                            query=query,
-                            parent_path=normalized,
-                            children=children,
-                            limit=limit,
-                        )
                     return {
                         "mode": "folders",
                         "query": query,
@@ -453,19 +434,15 @@ class PIFSCommandExecutor:
                 metadata_filter=where,
                 limit=limit,
             )
-            if not results and where is None:
-                source_hits = self._grep_source_file_hits(normalized, query, limit=limit)
-                return {
-                    "mode": "files",
-                    "query": query,
-                    "scope": normalized,
-                    "data": source_hits,
-                }
             return {
                 "mode": "files",
                 "query": query,
                 "scope": normalized,
-                "data": self._grep_file_hits_from_results(results, query),
+                "data": self._grep_file_hits_from_results(
+                    results,
+                    query,
+                    require_match=True,
+                ),
             }
         return {
             "mode": "matches",
@@ -976,11 +953,9 @@ class PIFSCommandExecutor:
         if data.get("mode") == "files":
             return "\n\n".join(self._render_stat(item) for item in data.get("data", []))
         lines = [
-            f"target: {data.get('target') or data.get('file_ref')}",
+            f"target: {data.get('path') or data.get('target') or data.get('file_ref')}",
             f"file_ref: {data.get('file_ref')}",
             f"document_id: {data.get('external_id') or data.get('document_id') or '-'}",
-            f"source_path: {data.get('source_path') or '-'}",
-            f"storage_uri: {data.get('storage_uri') or '-'}",
         ]
         folders = data.get("folders") or []
         if folders:
@@ -1019,11 +994,10 @@ class PIFSCommandExecutor:
         file_ref = item.get("file_ref")
         doc_id = item.get("external_id") or item.get("document_id") or "-"
         title = self._compact_text(item.get("title") or item.get("name") or "", max_chars=80)
-        source_path = item.get("source_path") or "-"
         folder_paths = item.get("folder_paths") or self._folder_paths_for_file(file_ref)
         folders = f" folders={','.join(folder_paths)}" if folder_paths else ""
         target = self._file_target_path(item)
-        return f"{target} id={doc_id} file_ref={file_ref or '-'} title={title} source={source_path}{folders}".strip()
+        return f"{target} id={doc_id} file_ref={file_ref or '-'} title={title}{folders}".strip()
 
     def _grep_file_hit_text(self, item: dict[str, Any]) -> str:
         doc_id = item.get("external_id") or "-"
@@ -1046,7 +1020,7 @@ class PIFSCommandExecutor:
         if folder_paths and title:
             folder = str(folder_paths[0] or "/").rstrip("/")
             return f"{folder}/{title}" if folder else f"/{title}"
-        return str(item.get("source_path") or item.get("external_id") or file_ref or "-")
+        return str(item.get("external_id") or file_ref or "-")
 
     def _semantic_retrieval_query(self, query: str) -> str:
         query = str(query or "").strip()
@@ -1150,7 +1124,6 @@ class PIFSCommandExecutor:
                     "file_ref": result.file_ref,
                     "external_id": result.external_id,
                     "title": result.title,
-                    "source_path": result.source_path,
                     "folder_paths": result.folder_paths,
                     "line": line,
                     "text": text or result.snippet,
@@ -1160,76 +1133,6 @@ class PIFSCommandExecutor:
                 break
         return hits
 
-    def _rank_child_folders_from_source(
-        self,
-        *,
-        query: str,
-        parent_path: str,
-        children: list[dict[str, Any]],
-        limit: int,
-    ) -> list[dict[str, Any]]:
-        source_dir = self._source_dir_for_folder(parent_path)
-        source_root = self._source_root()
-        if source_dir is None or source_root is None:
-            return []
-        child_paths = {child["path"]: child for child in children}
-        counts: dict[str, int] = {}
-        for path in self._rg_candidate_files(query, source_dir, max_files=5000):
-            source_path = self._source_path_from_storage(path, source_root)
-            folder_path = "/" + str(Path(source_path).parent).strip("/")
-            child_path = self._matching_child_path(parent_path, folder_path, child_paths)
-            if child_path:
-                counts[child_path] = counts.get(child_path, 0) + 1
-        ranked = [
-            {
-                "path": path,
-                "name": child_paths[path]["name"],
-                "matched_files": matched,
-                "files": self.filesystem.store.count_files_in_folder(path, recursive=True),
-                "children_count": child_paths[path].get("children_count", 0),
-            }
-            for path, matched in counts.items()
-        ]
-        ranked.sort(key=lambda item: (-item["matched_files"], item["path"]))
-        return ranked[:limit]
-
-    def _grep_source_file_hits(
-        self,
-        folder_path: str,
-        query: str,
-        *,
-        limit: int,
-        direct_only: bool = False,
-    ) -> list[dict[str, Any]]:
-        source_dir = self._source_dir_for_folder(folder_path)
-        source_root = self._source_root()
-        if source_dir is None or source_root is None:
-            return []
-        hits = []
-        for path in self._rg_candidate_files(query, source_dir, max_files=max(limit * 10, 50)):
-            file_row = self._file_row_for_storage(path)
-            if not file_row:
-                continue
-            if direct_only and self._folder_path_for_source_path(file_row["source_path"]) != folder_path:
-                continue
-            line_number, text = self._first_matching_source_line(path, query)
-            if line_number is None:
-                continue
-            hits.append(
-                {
-                    "file_ref": file_row["file_ref"],
-                    "external_id": file_row["external_id"],
-                    "title": file_row["title"],
-                    "source_path": file_row["source_path"],
-                    "folder_paths": self._folder_paths_for_file(file_row["file_ref"]),
-                    "line": line_number,
-                    "text": text or file_row["title"],
-                }
-            )
-            if len(hits) >= limit:
-                break
-        return hits
-
     def _grep_file_matches(self, target: str, query: str, *, limit: int) -> list[dict[str, Any]]:
         file_ref = self.filesystem._resolve_target(target)
         entry = self.filesystem.store.get_file(file_ref)
@@ -1241,7 +1144,6 @@ class PIFSCommandExecutor:
                         "file_ref": file_ref,
                         "external_id": entry.external_id,
                         "title": entry.title,
-                        "source_path": entry.source_path,
                         "folder_paths": self._folder_paths_for_file(file_ref),
                         "line": line_number,
                         "text": self._compact_text(line, max_chars=220),
@@ -1269,136 +1171,6 @@ class PIFSCommandExecutor:
     def _is_combined_grep_flag(arg: str) -> bool:
         return bool(re.fullmatch(r"-[Rrni]+", arg)) and len(arg) > 2
 
-    def _rg_candidate_files(self, query: str, directory: Path, *, max_files: int) -> list[Path]:
-        if not directory.exists():
-            return []
-        terms = [term.lower() for term in re.findall(r"[A-Za-z0-9_]{3,}", query)]
-        if not terms:
-            return []
-        primary = max(terms, key=len)
-        try:
-            completed = subprocess.run(
-                [
-                    "rg",
-                    "-l",
-                    "-i",
-                    "-F",
-                    primary,
-                    str(directory),
-                    "--glob",
-                    "*.json",
-                    "--no-messages",
-                ],
-                check=False,
-                capture_output=True,
-                text=True,
-                timeout=20,
-            )
-        except (OSError, subprocess.TimeoutExpired):
-            return []
-        candidates = [Path(line) for line in completed.stdout.splitlines() if line.strip()]
-        filtered = []
-        for path in candidates[: max(max_files * 20, max_files)]:
-            try:
-                text = path.read_text(encoding="utf-8", errors="ignore").lower()
-            except OSError:
-                continue
-            if all(term in text for term in terms):
-                filtered.append(path)
-                if len(filtered) >= max_files:
-                    break
-        return filtered
-
-    def _first_matching_source_line(self, path: Path, query: str) -> tuple[int | None, str]:
-        try:
-            lines = path.read_text(encoding="utf-8", errors="ignore").splitlines()
-        except OSError:
-            return None, ""
-        for line_number, line in enumerate(lines, 1):
-            if self._line_matches(line, query):
-                return line_number, self._compact_text(line, max_chars=220)
-        return None, ""
-
-    def _source_root(self) -> Path | None:
-        with self.filesystem.store.connect() as conn:
-            row = conn.execute(
-                """
-                SELECT storage_uri, source_path
-                FROM files
-                WHERE deleted_at IS NULL
-                LIMIT 1
-                """
-            ).fetchone()
-        if row is None:
-            return None
-        storage_path = Path(row["storage_uri"])
-        source_path = Path(row["source_path"])
-        root = storage_path
-        for _ in range(len(source_path.parts)):
-            root = root.parent
-        return root
-
-    def _source_dir_for_folder(self, folder_path: str) -> Path | None:
-        source_root = self._source_root()
-        if source_root is None:
-            return None
-        stripped = folder_path.strip("/")
-        return source_root / stripped if stripped else source_root
-
-    @staticmethod
-    def _source_path_from_storage(path: Path, source_root: Path) -> str:
-        try:
-            return path.relative_to(source_root).as_posix()
-        except ValueError:
-            return path.name
-
-    @staticmethod
-    def _matching_child_path(
-        parent_path: str,
-        folder_path: str,
-        child_paths: dict[str, dict[str, Any]],
-    ) -> str | None:
-        normalized_parent = parent_path.rstrip("/")
-        if normalized_parent == "":
-            normalized_parent = "/"
-        if normalized_parent == "/":
-            parts = [part for part in folder_path.strip("/").split("/") if part]
-            candidate = "/" + parts[0] if parts else "/"
-            return candidate if candidate in child_paths else None
-        prefix = normalized_parent + "/"
-        if not folder_path.startswith(prefix):
-            return None
-        remainder = folder_path[len(prefix):]
-        first = remainder.split("/", 1)[0]
-        candidate = prefix + first
-        return candidate if candidate in child_paths else None
-
-    def _file_row_for_storage(self, path: Path) -> dict[str, Any] | None:
-        storage_uri = str(path)
-        with self.filesystem.store.connect() as conn:
-            row = conn.execute(
-                """
-                SELECT file_ref, external_id, title, source_path
-                FROM files
-                WHERE storage_uri = ? AND deleted_at IS NULL
-                LIMIT 1
-                """,
-                (storage_uri,),
-            ).fetchone()
-        if row is None:
-            return None
-        return {
-            "file_ref": row["file_ref"],
-            "external_id": row["external_id"],
-            "title": row["title"],
-            "source_path": row["source_path"],
-        }
-
-    @staticmethod
-    def _folder_path_for_source_path(source_path: str) -> str:
-        parent = str(Path(source_path).parent).strip(".")
-        return "/" + parent.strip("/") if parent and parent != "." else "/"
-
     def _folder_paths_for_file(self, file_ref: str | None) -> list[str]:
         if not file_ref:
             return []
diff --git a/pageindex/filesystem/core.py b/pageindex/filesystem/core.py
index 67adb8f..91a4971 100644
--- a/pageindex/filesystem/core.py
+++ b/pageindex/filesystem/core.py
@@ -144,13 +144,12 @@ class PageIndexFileSystem:
         self,
         *,
         storage_uri: str,
-        source_path: str,
         folder_path: Optional[str] = None,
         metadata: Optional[dict[str, Any]] = None,
         external_id: Optional[str] = None,
         title: Optional[str] = None,
         content: str = "",
-        content_type: str = "text/plain",
+        content_type: str | None = None,
         source_type: Optional[str] = None,
         metadata_policy: Optional[dict[str, Any]] = None,
         metadata_status: Optional[str] = None,
@@ -159,7 +158,6 @@ class PageIndexFileSystem:
             [
                 {
                     "storage_uri": storage_uri,
-                    "source_path": source_path,
                     "folder_path": folder_path,
                     "metadata": metadata,
                     "external_id": external_id,
@@ -231,7 +229,6 @@ class PageIndexFileSystem:
                 record = self._prepare_file_record(
                     {
                         "storage_uri": final_path.as_uri(),
-                        "source_path": virtual_path.strip("/"),
                         "folder_path": folder_path,
                         "metadata": {},
                         "external_id": None,
@@ -604,23 +601,27 @@ class PageIndexFileSystem:
                 folder["path"]
                 for folder in self.store.folder_memberships(file_ref)
             ]
+            folder_path = self._preferred_folder_path(
+                folder_paths,
+                path,
+                entry.folder_path,
+            )
             rank = len(rows) + 1
             rows.append(
                 {
                     "rank": rank,
                     "similarity": self._semantic_candidate_similarity(candidate),
                     "score": self._semantic_candidate_score(candidate),
-                    "path": self._stable_file_locator(file_ref, entry),
+                    "path": self._stable_file_locator(
+                        file_ref,
+                        entry,
+                        folder_path=folder_path,
+                    ),
                     "file_ref": file_ref,
                     "document_id": entry.external_id,
                     "external_id": entry.external_id,
                     "title": entry.title,
-                    "source_path": entry.source_path,
-                    "folder_path": self._preferred_folder_path(
-                        folder_paths,
-                        path,
-                        entry.folder_path,
-                    ),
+                    "folder_path": folder_path,
                     "folder_paths": folder_paths,
                     "summary": str((entry.metadata or {}).get("summary") or ""),
                     "snippet": str(getattr(candidate, "snippet", "") or entry.descriptor),
@@ -724,7 +725,6 @@ class PageIndexFileSystem:
                     folder_paths=folder_paths,
                     metadata=row["metadata"],
                     metadata_status=row["metadata_status"],
-                    source_path=row["source_path"],
                     id=row["id"],
                     document_id=row["document_id"],
                     name=row["name"],
@@ -845,7 +845,6 @@ class PageIndexFileSystem:
             "mode": "structure",
             "file_ref": file_ref,
             "external_id": entry.external_id,
-            "source_path": entry.source_path,
             "status": entry.pageindex_tree_status,
             "available": True,
             "pageindex_doc_id": doc_id,
@@ -887,7 +886,6 @@ class PageIndexFileSystem:
             "mode": "page",
             "file_ref": file_ref,
             "external_id": entry.external_id,
-            "source_path": entry.source_path,
             "status": entry.pageindex_tree_status,
             "available": True,
             "pageindex_doc_id": doc_id,
@@ -905,7 +903,7 @@ class PageIndexFileSystem:
             return
         raise ValueError(
             f"{command} is only supported for txt/text files; "
-            f"got source_path={entry.source_path!r}, content_type={entry.content_type!r}. "
+            f"got title={entry.title!r}, content_type={entry.content_type!r}. "
             "Use cat <path|file_ref|document_id> --structure, "
             "or cat <path|file_ref|document_id> --page for PDF/Markdown PageIndex files."
         )
@@ -915,29 +913,22 @@ class PageIndexFileSystem:
             return
         raise ValueError(
             f"{command} is only supported for PDF/Markdown PageIndex files; "
-            f"got source_path={entry.source_path!r}, content_type={entry.content_type!r}. "
+            f"got title={entry.title!r}, content_type={entry.content_type!r}. "
             "Use cat <path|file_ref|document_id> --all for txt/text files."
         )
 
     @classmethod
     def _file_format(cls, entry: Any) -> str:
-        suffix = Path(str(entry.source_path or "")).suffix.lower()
-        content_type = cls._normalized_content_type(entry.content_type)
-        if suffix == ".pdf" or content_type == "application/pdf":
-            return "pdf"
-        if suffix in PAGEINDEX_DOCUMENT_SUFFIXES or content_type in PAGEINDEX_DOCUMENT_CONTENT_TYPES:
-            return "markdown"
-        if suffix in TEXT_ARTIFACT_SUFFIXES:
-            return "text"
-        if entry.pageindex_doc_id or entry.pageindex_tree_status != "not_built":
+        if getattr(entry, "pageindex_doc_id", None) or entry.pageindex_tree_status != "not_built":
             return "pageindex"
-        if content_type in TEXT_ARTIFACT_CONTENT_TYPES:
-            return "text"
+        file_format = cls._content_format(getattr(entry, "title", ""), entry.content_type)
+        if file_format != "unsupported":
+            return file_format
         return "unsupported"
 
     @classmethod
-    def _source_format(cls, source_path: Any, content_type: str | None) -> str:
-        suffix = Path(str(source_path or "")).suffix.lower()
+    def _content_format(cls, filename: Any, content_type: str | None) -> str:
+        suffix = Path(str(filename or "")).suffix.lower()
         normalized_content_type = cls._normalized_content_type(content_type)
         if suffix == ".pdf" or normalized_content_type == "application/pdf":
             return "pdf"
@@ -977,27 +968,27 @@ class PageIndexFileSystem:
         self,
         *,
         storage_uri: str,
-        source_path: str,
+        title: str,
         content_type: str,
     ) -> tuple[str | None, str, dict[str, Any] | None]:
-        if self._source_format(source_path, content_type) not in {"pdf", "markdown"}:
+        if self._content_format(title, content_type) not in {"pdf", "markdown"}:
             return None, "not_built", None
         client = self._pageindex_client()
-        source = self._canonical_source_path(storage_uri=storage_uri, source_path=source_path)
-        cached_doc_id = self._find_cached_pageindex_doc_id(client, source)
+        local_path = self._canonical_storage_uri_path(storage_uri)
+        cached_doc_id = self._find_cached_pageindex_doc_id(client, local_path)
         if cached_doc_id:
             return cached_doc_id, "built", None
-        if source is None:
+        if local_path is None:
             return None, "failed", self._pageindex_tree_failure_record(
                 source="PageIndexFileSystem.registration",
-                error_type="UnresolvableSourcePath",
+                error_type="UnresolvableStorageUri",
                 message=(
-                    "PageIndex source path must resolve to a local file path for "
+                    "storage_uri must resolve to a local file path for "
                     "PDF/Markdown registration."
                 ),
             )
         try:
-            doc_id = client.index(source)
+            doc_id = client.index(local_path)
             return doc_id, "built", None
         except Exception as exc:
             return None, "failed", self._pageindex_tree_failure_record(
@@ -1024,25 +1015,41 @@ class PageIndexFileSystem:
     def _find_cached_pageindex_doc_id(
         self,
         client: PageIndexClient,
-        source_path: str | None,
+        local_path: str | None,
     ) -> str | None:
-        if source_path is None:
+        if local_path is None:
             return None
         for doc_id, doc in client.documents.items():
-            if self._canonical_path(doc.get("path")) == source_path:
+            if self._canonical_path(doc.get("path")) == local_path:
                 return doc_id
         return None
 
-    def _canonical_source_path(self, *, storage_uri: str, source_path: str) -> str | None:
+    def _canonical_storage_uri_path(self, storage_uri: str) -> str | None:
         parsed = urlparse(storage_uri)
         if parsed.scheme == "file":
             return self._canonical_path(unquote(parsed.path))
         if storage_uri and not parsed.scheme:
             return self._canonical_path(storage_uri)
-        if Path(source_path).expanduser().is_absolute():
-            return self._canonical_path(source_path)
         return None
 
+    @staticmethod
+    def _title_from_storage_uri(storage_uri: str) -> str:
+        parsed = urlparse(str(storage_uri or ""))
+        path = unquote(parsed.path) if parsed.scheme else str(storage_uri or "")
+        return Path(path).name
+
+    @classmethod
+    def _infer_content_type(cls, *, title: str, storage_uri: str) -> str:
+        for filename in (title, cls._title_from_storage_uri(storage_uri)):
+            suffix = Path(str(filename or "")).suffix.lower()
+            if suffix == ".pdf":
+                return "application/pdf"
+            if suffix in PAGEINDEX_DOCUMENT_SUFFIXES:
+                return "text/markdown"
+            if suffix in TEXT_ARTIFACT_SUFFIXES:
+                return "text/plain"
+        return "text/plain"
+
     @staticmethod
     def _canonical_path(path: Any) -> str | None:
         if not path:
@@ -1124,12 +1131,12 @@ class PageIndexFileSystem:
         }
 
     def _add_file_content(self, path: Path, content_type: str) -> str:
-        if self._source_format(str(path), content_type) in {"markdown", "text"}:
+        if self._content_format(path.name, content_type) in {"markdown", "text"}:
             return path.read_text(encoding="utf-8")
         return ""
 
     def _require_add_pageindex_ready(self, record: dict[str, Any]) -> None:
-        if self._source_format(record["source_path"], record["content_type"]) not in {
+        if self._content_format(record["title"], record["content_type"]) not in {
             "pdf",
             "markdown",
         }:
@@ -1178,33 +1185,47 @@ class PageIndexFileSystem:
 
     def _prepare_file_record(self, file: dict[str, Any]) -> dict[str, Any]:
         storage_uri = file["storage_uri"]
-        raw_source_path = str(file["source_path"])
-        source_path = raw_source_path.strip("/")
         metadata = file.get("metadata") or {}
         if not isinstance(metadata, dict):
             raise ValueError("metadata must be a JSON object")
         self._validate_register_metadata(metadata)
         external_id = file.get("external_id")
         content = file.get("content") or ""
-        content_type = file.get("content_type") or "text/plain"
+        folder_path = normalize_path(file.get("folder_path") or "/")
+        title = str(
+            file.get("title")
+            or metadata.get("title")
+            or self._title_from_storage_uri(storage_uri)
+            or external_id
+            or ""
+        ).strip()
+        if not title:
+            raise ValueError("file title is required")
+        content_type = file.get("content_type") or self._infer_content_type(
+            title=title,
+            storage_uri=storage_uri,
+        )
+        file_ref = make_file_ref(
+            str(external_id or self._join_virtual_file_path(folder_path, title).strip("/"))
+        )
         (
             pageindex_doc_id,
             pageindex_tree_status,
             pageindex_tree_failure,
         ) = self._registration_pageindex_pointer(
             storage_uri=storage_uri,
-            source_path=raw_source_path,
+            title=title,
             content_type=content_type,
         )
         artifact_content = self._registration_text_artifact_content(
-            source_path=raw_source_path,
+            title=title,
             content_type=content_type,
             pageindex_doc_id=pageindex_doc_id,
             pageindex_tree_status=pageindex_tree_status,
             fallback_content=content,
         )
         fts_content = file.get("fts_content", artifact_content)
-        source_type = file.get("source_type") or self._infer_source_type(source_path)
+        source_type = file.get("source_type")
         metadata_policy = self._normalize_metadata_policy(
             file.get("metadata_policy"),
             metadata=metadata,
@@ -1217,9 +1238,6 @@ class PageIndexFileSystem:
         self._attach_pageindex_tree_failure(metadata_status, pageindex_tree_failure)
         indexed_metadata = SQLiteFileSystemStore.indexed_metadata_values(metadata)
         searchable_metadata = dict(metadata)
-        folder_path = normalize_path(file.get("folder_path") or "/")
-        title = file.get("title") or metadata.get("title") or Path(source_path).stem
-        file_ref = make_file_ref(external_id or source_path)
         text_artifact_path = file.get("text_artifact_path")
         owns_text_artifact = text_artifact_path is None
         if text_artifact_path is None:
@@ -1234,7 +1252,6 @@ class PageIndexFileSystem:
             "file_ref": file_ref,
             "external_id": external_id,
             "storage_uri": storage_uri,
-            "source_path": source_path,
             "title": title,
             "descriptor": descriptor,
             "content_type": content_type,
@@ -1260,13 +1277,13 @@ class PageIndexFileSystem:
     def _registration_text_artifact_content(
         self,
         *,
-        source_path: str,
+        title: str,
         content_type: str,
         pageindex_doc_id: str | None,
         pageindex_tree_status: str,
         fallback_content: str,
     ) -> str:
-        if self._source_format(source_path, content_type) not in {"pdf", "markdown"}:
+        if self._content_format(title, content_type) not in {"pdf", "markdown"}:
             return fallback_content
         if pageindex_tree_status != "built" or not pageindex_doc_id:
             return fallback_content
@@ -1296,15 +1313,11 @@ class PageIndexFileSystem:
     @staticmethod
     def _raw_artifact_payload(
         *,
-        storage_uri: str,
-        source_path: str,
         folder_path: str,
         metadata: dict[str, Any],
         metadata_status: dict[str, Any],
     ) -> dict[str, Any]:
         return {
-            "storage_uri": storage_uri,
-            "source_path": source_path,
             "folder_path": folder_path,
             "metadata": metadata,
             "metadata_status": metadata_status,
@@ -1323,8 +1336,6 @@ class PageIndexFileSystem:
             self.store.write_raw_artifact(
                 record["file_ref"],
                 self._raw_artifact_payload(
-                    storage_uri=record["storage_uri"],
-                    source_path=record["source_path"],
                     folder_path=record["folder_path"],
                     metadata=record["metadata"],
                     metadata_status=record["metadata_status"],
@@ -1351,7 +1362,6 @@ class PageIndexFileSystem:
             "file_ref": entry.file_ref,
             "external_id": entry.external_id,
             "storage_uri": entry.storage_uri,
-            "source_path": entry.source_path,
             "title": entry.title,
             "descriptor": entry.descriptor,
             "content_type": entry.content_type,
@@ -1394,7 +1404,6 @@ class PageIndexFileSystem:
                     file_ref=record["file_ref"],
                     external_id=record.get("external_id"),
                     title=record["title"],
-                    source_path=record["source_path"],
                     content_type=record["content_type"],
                     source_type=record.get("source_type"),
                     text=Path(record["text_artifact_path"]).read_text(encoding="utf-8"),
@@ -1638,7 +1647,6 @@ class PageIndexFileSystem:
             text=text,
             external_id=entry.external_id,
             folder_path=entry.folder_path,
-            source_path=entry.source_path,
         )
 
     def _open_all(self, file_ref: str) -> OpenResult:
@@ -1652,7 +1660,6 @@ class PageIndexFileSystem:
             text=text,
             external_id=entry.external_id,
             folder_path=entry.folder_path,
-            source_path=entry.source_path,
         )
 
     @classmethod
@@ -1671,7 +1678,6 @@ class PageIndexFileSystem:
             "mode": mode,
             "file_ref": entry.file_ref,
             "external_id": entry.external_id,
-            "source_path": entry.source_path,
             "status": entry.pageindex_tree_status,
             "available": False,
             "message": message,
@@ -1744,19 +1750,30 @@ class PageIndexFileSystem:
             separators=(",", ":"),
         )
 
-    def _stable_file_locator(self, file_ref: str, entry: Any) -> str:
-        source_path = str(getattr(entry, "source_path", "") or "").strip()
-        if source_path:
-            target = "/" + source_path.strip("/")
-            try:
-                if self.store.resolve_file_ref(target) == file_ref:
-                    return target
-            except KeyError:
-                pass
-        external_id = str(getattr(entry, "external_id", "") or "").strip()
-        if external_id:
-            return external_id
-        return file_ref
+    def _stable_file_locator(
+        self,
+        file_ref: str,
+        entry: Any,
+        *,
+        folder_path: str | None = None,
+    ) -> str:
+        folder_path = normalize_path(folder_path or getattr(entry, "folder_path", None) or "/")
+        title = str(getattr(entry, "title", "") or "").strip()
+        if not title:
+            raise RuntimeError(f"browse cannot build a virtual path for {file_ref}: missing title")
+        target = self._join_virtual_file_path(folder_path, title.strip("/"))
+        try:
+            resolved_file_ref = self.store.resolve_file_ref(target)
+        except KeyError as exc:
+            raise RuntimeError(
+                f"browse produced an unresolved virtual path for {file_ref}: {target}"
+            ) from exc
+        if resolved_file_ref != file_ref:
+            raise RuntimeError(
+                "browse produced a non-idempotent virtual path: "
+                f"{target} resolved to {resolved_file_ref}, expected {file_ref}"
+            )
+        return target
 
     @staticmethod
     def _build_descriptor(title: str, metadata: dict[str, Any]) -> str:
@@ -2011,11 +2028,6 @@ class PageIndexFileSystem:
             return "number"
         return "string"
 
-    @staticmethod
-    def _infer_source_type(source_path: str) -> Optional[str]:
-        parts = [part for part in Path(source_path).parts if part not in ("", ".")]
-        return parts[0] if parts else None
-
     @staticmethod
     def _scope_folder_path(scope: Optional[dict[str, Any]]) -> Optional[str]:
         if not scope:
diff --git a/pageindex/filesystem/metadata_generation.py b/pageindex/filesystem/metadata_generation.py
index 86b2ac6..1057c37 100644
--- a/pageindex/filesystem/metadata_generation.py
+++ b/pageindex/filesystem/metadata_generation.py
@@ -18,7 +18,6 @@ class MetadataGenerationInput:
     file_ref: str
     external_id: str | None
     title: str
-    source_path: str
     content_type: str
     source_type: str | None
     text: str
diff --git a/pageindex/filesystem/semantic_index.py b/pageindex/filesystem/semantic_index.py
index 5b3e393..cc01d82 100644
--- a/pageindex/filesystem/semantic_index.py
+++ b/pageindex/filesystem/semantic_index.py
@@ -21,7 +21,6 @@ class SemanticIndexRecord:
     text: str
     external_id: str | None = None
     source_type: str = ""
-    source_path: str = ""
     title: str = ""
     metadata: dict[str, Any] | None = None
 
@@ -32,7 +31,6 @@ class SemanticSearchResult:
     distance: float
     external_id: str | None
     source_type: str
-    source_path: str
     title: str
     text_hash: str
     metadata: dict[str, Any]
@@ -88,7 +86,6 @@ class SQLiteVecSemanticIndex:
                     file_ref TEXT NOT NULL UNIQUE,
                     external_id TEXT,
                     source_type TEXT NOT NULL DEFAULT '',
-                    source_path TEXT NOT NULL DEFAULT '',
                     title TEXT NOT NULL DEFAULT '',
                     text_hash TEXT NOT NULL,
                     text_chars INTEGER NOT NULL DEFAULT 0,
@@ -215,7 +212,6 @@ class SQLiteVecSemanticIndex:
                                 d.file_ref,
                                 d.external_id,
                                 d.source_type,
-                                d.source_path,
                                 d.title,
                                 d.text_hash,
                                 d.metadata_json,
@@ -245,7 +241,6 @@ class SQLiteVecSemanticIndex:
                         d.file_ref,
                         d.external_id,
                         d.source_type,
-                        d.source_path,
                         d.title,
                         d.text_hash,
                         d.metadata_json,
@@ -269,7 +264,6 @@ class SQLiteVecSemanticIndex:
                     distance=float(row["distance"]),
                     external_id=row["external_id"],
                     source_type=row["source_type"],
-                    source_path=row["source_path"],
                     title=row["title"],
                     text_hash=row["text_hash"],
                     metadata=metadata,
@@ -361,15 +355,14 @@ class SQLiteVecSemanticIndex:
             cursor = conn.execute(
                 """
                 INSERT INTO semantic_index_docs(
-                    file_ref, external_id, source_type, source_path, title,
+                    file_ref, external_id, source_type, title,
                     text_hash, text_chars, metadata_json
-                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+                ) VALUES (?, ?, ?, ?, ?, ?, ?)
                 """,
                 (
                     record.file_ref,
                     record.external_id,
                     record.source_type,
-                    record.source_path,
                     record.title,
                     text_hash,
                     len(record.text),
@@ -381,10 +374,9 @@ class SQLiteVecSemanticIndex:
         conn.execute(
             """
             UPDATE semantic_index_docs
-            SET external_id = ?,
-                source_type = ?,
-                source_path = ?,
-                title = ?,
+                SET external_id = ?,
+                    source_type = ?,
+                    title = ?,
                 text_hash = ?,
                 text_chars = ?,
                 metadata_json = ?,
@@ -394,7 +386,6 @@ class SQLiteVecSemanticIndex:
             (
                 record.external_id,
                 record.source_type,
-                record.source_path,
                 record.title,
                 text_hash,
                 len(record.text),
diff --git a/pageindex/filesystem/semantic_projection.py b/pageindex/filesystem/semantic_projection.py
index 6059a9b..d3c8872 100644
--- a/pageindex/filesystem/semantic_projection.py
+++ b/pageindex/filesystem/semantic_projection.py
@@ -39,7 +39,6 @@ class SemanticProjectionCandidate:
     score: float
     sources: list[dict[str, Any]]
     source_type: str
-    source_path: str
     title: str
     metadata: dict[str, Any]
     snippet: str
@@ -261,7 +260,6 @@ class SummaryProjectionIndexer:
                     text=summary,
                     external_id=record.get("external_id"),
                     source_type=str(record.get("source_type") or ""),
-                    source_path=str(record.get("source_path") or ""),
                     title=str(record.get("title") or ""),
                     metadata=metadata,
                 )
@@ -493,7 +491,6 @@ def rank_single_semantic_channel(
                 score=1 / (60 + rank),
                 sources=[{"channel": channel, "rank": rank, "distance": result.distance}],
                 source_type=result.source_type,
-                source_path=result.source_path,
                 title=result.title,
                 metadata=result.metadata,
                 snippet=f"{channel}_vector rank={rank}",
diff --git a/pageindex/filesystem/store.py b/pageindex/filesystem/store.py
index 10e1e7a..b1754da 100644
--- a/pageindex/filesystem/store.py
+++ b/pageindex/filesystem/store.py
@@ -43,7 +43,6 @@ class SQLiteFileSystemStore:
                 file_ref TEXT PRIMARY KEY,
                 external_id TEXT,
                 storage_uri TEXT NOT NULL,
-                source_path TEXT NOT NULL,
                 title TEXT NOT NULL,
                 descriptor TEXT NOT NULL,
                 content_type TEXT NOT NULL,
@@ -124,7 +123,6 @@ class SQLiteFileSystemStore:
             USING fts5(file_ref UNINDEXED, title, body, metadata_text);
 
             CREATE INDEX IF NOT EXISTS idx_files_external_id ON files(external_id);
-            CREATE INDEX IF NOT EXISTS idx_files_source_path ON files(source_path);
             CREATE INDEX IF NOT EXISTS idx_files_source_type ON files(source_type);
             CREATE INDEX IF NOT EXISTS idx_folders_path ON folders(path);
             CREATE INDEX IF NOT EXISTS idx_folders_parent_id ON folders(parent_id);
@@ -168,6 +166,7 @@ class SQLiteFileSystemStore:
             fts_file_ref_rows = []
             fts_rows = []
             metadata_rows = []
+            pending_folder_titles: dict[tuple[str, str], str] = {}
             metadata_field_ids = {
                 row["name"]: row["field_id"]
                 for row in conn.execute(
@@ -184,6 +183,18 @@ class SQLiteFileSystemStore:
                         kind=record.get("folder_kind", "manual"),
                     )
                     folder_cache[folder_cache_key] = folder_id
+                self._ensure_title_available_in_folder(
+                    conn,
+                    folder_id=folder_id,
+                    file_ref=record["file_ref"],
+                    title=record["title"],
+                )
+                title_key = (folder_id, str(record["title"]))
+                existing_file_ref = pending_folder_titles.get(title_key)
+                if existing_file_ref and existing_file_ref != record["file_ref"]:
+                    target = self._virtual_file_target(conn, folder_id, str(record["title"]))
+                    raise FileExistsError(f"File already exists at {target}")
+                pending_folder_titles[title_key] = record["file_ref"]
                 file_rows.append(self._file_insert_values(record))
                 membership_rows.append(
                     (
@@ -244,7 +255,6 @@ class SQLiteFileSystemStore:
             "file_ref",
             "external_id",
             "storage_uri",
-            "source_path",
             "title",
             "descriptor",
             "content_type",
@@ -270,7 +280,6 @@ class SQLiteFileSystemStore:
             record["file_ref"],
             record["external_id"],
             record["storage_uri"],
-            record["source_path"],
             record["title"],
             record["descriptor"],
             record["content_type"],
@@ -338,6 +347,12 @@ class SQLiteFileSystemStore:
         with self.connect() as conn:
             resolved_file_ref = self._resolve_file_ref(conn, file_ref)
             folder_id = self._resolve_or_create_folder(conn, folder_path_or_id)
+            self._ensure_title_available_in_folder(
+                conn,
+                folder_id=folder_id,
+                file_ref=resolved_file_ref,
+                title=self._file_title(conn, resolved_file_ref),
+            )
             conn.execute(
                 """
                 INSERT INTO file_folders(file_ref, folder_id, metadata_json)
@@ -357,6 +372,12 @@ class SQLiteFileSystemStore:
             for item in items:
                 resolved_file_ref = self._resolve_file_ref(conn, item["file_ref"])
                 folder_id = self._resolve_or_create_folder(conn, item["folder"])
+                self._ensure_title_available_in_folder(
+                    conn,
+                    folder_id=folder_id,
+                    file_ref=resolved_file_ref,
+                    title=self._file_title(conn, resolved_file_ref),
+                )
                 conn.execute(
                     """
                     INSERT INTO file_folders(file_ref, folder_id, metadata_json)
@@ -371,6 +392,56 @@ class SQLiteFileSystemStore:
                     ),
                 )
 
+    def _ensure_title_available_in_folder(
+        self,
+        conn: sqlite3.Connection,
+        *,
+        folder_id: str,
+        file_ref: str,
+        title: str,
+    ) -> None:
+        row = conn.execute(
+            """
+            SELECT f.file_ref, fo.path
+            FROM files f
+            JOIN file_folders ff ON ff.file_ref = f.file_ref
+            JOIN folders fo ON fo.folder_id = ff.folder_id
+            WHERE f.deleted_at IS NULL
+              AND ff.folder_id = ?
+              AND f.title = ?
+              AND f.file_ref != ?
+            LIMIT 1
+            """,
+            (folder_id, title, file_ref),
+        ).fetchone()
+        if row:
+            raise FileExistsError(
+                f"File already exists at {self._virtual_file_target(conn, folder_id, title)}"
+            )
+
+    @staticmethod
+    def _virtual_file_target(
+        conn: sqlite3.Connection,
+        folder_id: str,
+        title: str,
+    ) -> str:
+        row = conn.execute(
+            "SELECT path FROM folders WHERE folder_id = ?",
+            (folder_id,),
+        ).fetchone()
+        folder_path = normalize_path(row["path"] if row else "/")
+        return f"/{title}" if folder_path == "/" else f"{folder_path}/{title}"
+
+    @staticmethod
+    def _file_title(conn: sqlite3.Connection, file_ref: str) -> str:
+        row = conn.execute(
+            "SELECT title FROM files WHERE file_ref = ? AND deleted_at IS NULL",
+            (file_ref,),
+        ).fetchone()
+        if row is None:
+            raise KeyError(f"Unknown file target: {file_ref}")
+        return str(row["title"])
+
     def replace_metadata_values(
         self,
         conn: sqlite3.Connection,
@@ -791,7 +862,6 @@ class SQLiteFileSystemStore:
         selects = [
             "f.file_ref",
             "f.external_id",
-            "f.source_path",
             "f.title",
             "f.descriptor",
             "f.pageindex_tree_status",
@@ -984,7 +1054,6 @@ class SQLiteFileSystemStore:
                 f.file_ref,
                 f.external_id,
                 f.storage_uri,
-                f.source_path,
                 f.title,
                 f.descriptor,
                 f.content_type,
@@ -1125,30 +1194,6 @@ class SQLiteFileSystemStore:
         ).fetchone()
         if row:
             return row["file_ref"]
-        stripped = target.strip("/")
-        rows = conn.execute(
-            """
-            SELECT
-                f.file_ref,
-                f.external_id,
-                f.title,
-                f.source_path,
-                COALESCE(MIN(fo.path), '/') AS folder_path
-            FROM files f
-            LEFT JOIN file_folders ff ON ff.file_ref = f.file_ref
-            LEFT JOIN folders fo ON fo.folder_id = ff.folder_id
-            WHERE f.source_path = ? AND f.deleted_at IS NULL
-            GROUP BY f.file_ref, f.external_id, f.title, f.source_path
-            ORDER BY f.file_ref
-            LIMIT 2
-            """,
-            (stripped,),
-        ).fetchall()
-        if len(rows) > 1:
-            matches = "; ".join(self._virtual_match_summary(row) for row in rows)
-            raise KeyError(f"Ambiguous file target: {target}. Matches: {matches}")
-        if rows:
-            return rows[0]["file_ref"]
         virtual_file_ref = self._resolve_virtual_file_ref(conn, target)
         if virtual_file_ref:
             return virtual_file_ref
@@ -1163,12 +1208,9 @@ class SQLiteFileSystemStore:
                     f.file_ref,
                     f.external_id,
                     f.title,
-                    f.source_path,
                     pf.path AS folder_path,
                     (CASE WHEN pf.path = '/' THEN '/' ELSE pf.path || '/' END)
-                        || ltrim(f.title, '/') AS title_virtual_path,
-                    (CASE WHEN pf.path = '/' THEN '/' ELSE pf.path || '/' END)
-                        || ltrim(f.source_path, '/') AS source_virtual_path
+                        || ltrim(f.title, '/') AS title_virtual_path
                 FROM files f
                 JOIN file_folders ff ON ff.file_ref = f.file_ref
                 JOIN folders pf ON pf.folder_id = ff.folder_id
@@ -1178,16 +1220,14 @@ class SQLiteFileSystemStore:
                 file_ref,
                 external_id,
                 title,
-                source_path,
                 MIN(folder_path) AS folder_path
             FROM virtual_matches
             WHERE title_virtual_path = ?
-               OR source_virtual_path = ?
-            GROUP BY file_ref, external_id, title, source_path
+            GROUP BY file_ref, external_id, title
             ORDER BY file_ref
             LIMIT 2
             """,
-            (virtual_target, virtual_target),
+            (virtual_target,),
         ).fetchall()
         if not rows:
             return None
@@ -1201,8 +1241,7 @@ class SQLiteFileSystemStore:
         external_id = row["external_id"] or "-"
         return (
             f"file_ref={row['file_ref']} external_id={external_id} "
-            f"folder={row['folder_path']} title={row['title']!r} "
-            f"source_path={row['source_path']!r}"
+            f"folder={row['folder_path']} title={row['title']!r}"
         )
 
     def ensure_folder(
@@ -1475,18 +1514,12 @@ class SQLiteFileSystemStore:
                 JOIN folders fo ON fo.folder_id = ff.folder_id
                 WHERE f.deleted_at IS NULL
                   AND fo.path = ?
-                  AND (
-                      f.title = ?
-                      OR f.source_path = ?
-                      OR f.source_path LIKE ? ESCAPE '\\'
-                  )
+                  AND f.title = ?
                 LIMIT 1
                 """,
                 (
                     path,
                     basename,
-                    basename,
-                    "%/" + self._like_escape(basename),
                 ),
             ).fetchone()
         return row is not None
@@ -1548,7 +1581,6 @@ class SQLiteFileSystemStore:
                 f.file_ref,
                 f.external_id,
                 f.storage_uri,
-                f.source_path,
                 f.title,
                 f.descriptor,
                 f.content_type,
@@ -1592,7 +1624,6 @@ class SQLiteFileSystemStore:
                 f.external_id,
                 f.title,
                 f.descriptor,
-                f.source_path,
                 f.pageindex_tree_status,
                 f.metadata_json,
                 f.metadata_status_json,
@@ -1804,7 +1835,6 @@ class SQLiteFileSystemStore:
             "pageNum": None,
             "createdAt": cls._row_value(row, "created_at"),
             "folderId": cls._row_value(row, "folder_id"),
-            "source_path": row["source_path"],
             "folder_path": row["folder_path"],
             "metadata": json.loads(row["metadata_json"] or "{}"),
             "metadata_status": json.loads(
@@ -1827,7 +1857,6 @@ class SQLiteFileSystemStore:
             "pageNum": None,
             "createdAt": cls._row_value(row, "created_at"),
             "folderId": cls._row_value(row, "folder_id"),
-            "source_path": row["source_path"],
             "snippet": row["snippet"] or row["title"],
             "folder_path": row["folder_path"],
             "metadata": json.loads(row["metadata_json"] or "{}"),
@@ -1846,7 +1875,6 @@ class SQLiteFileSystemStore:
             file_ref=row["file_ref"],
             external_id=row["external_id"],
             storage_uri=row["storage_uri"],
-            source_path=row["source_path"],
             title=row["title"],
             descriptor=row["descriptor"],
             content_type=row["content_type"],
@@ -1871,8 +1899,7 @@ class SQLiteFileSystemStore:
             "document_id": entry.external_id,
             "external_id": entry.external_id,
             "name": entry.title,
-            "storage_uri": entry.storage_uri,
-            "source_path": entry.source_path,
+            "path": cls._virtual_file_path(entry.folder_path, entry.title),
             "title": entry.title,
             "description": entry.descriptor,
             "status": entry.pageindex_tree_status,
@@ -1881,8 +1908,6 @@ class SQLiteFileSystemStore:
             "content_type": entry.content_type,
             "source_type": entry.source_type,
             "fingerprint": entry.fingerprint,
-            "text_artifact_path": entry.text_artifact_path,
-            "raw_artifact_path": entry.raw_artifact_path,
             "pageindex_doc_id": entry.pageindex_doc_id,
             "pageindex_tree_status": entry.pageindex_tree_status,
             "metadata": entry.metadata,
@@ -1890,6 +1915,11 @@ class SQLiteFileSystemStore:
             "folder_path": entry.folder_path,
         }
 
+    @staticmethod
+    def _virtual_file_path(folder_path: str, title: str) -> str:
+        folder_path = normalize_path(folder_path)
+        return f"/{title}" if folder_path == "/" else f"{folder_path}/{title}"
+
     @staticmethod
     def _query_text(query: str | list[str] | None) -> str:
         if query is None:
diff --git a/pageindex/filesystem/types.py b/pageindex/filesystem/types.py
index 103d28d..b65c3b0 100644
--- a/pageindex/filesystem/types.py
+++ b/pageindex/filesystem/types.py
@@ -13,7 +13,6 @@ class SearchResult:
     folder_path: str
     folder_paths: list[str]
     metadata: dict[str, Any]
-    source_path: str = ""
     id: Optional[str] = None
     document_id: Optional[str] = None
     name: str = ""
@@ -33,7 +32,6 @@ class OpenResult:
     text: str
     external_id: Optional[str] = None
     folder_path: str = ""
-    source_path: str = ""
 
 
 @dataclass(frozen=True)
@@ -50,7 +48,6 @@ class FileEntry:
     file_ref: str
     external_id: Optional[str]
     storage_uri: str
-    source_path: str
     title: str
     descriptor: str
     content_type: str
diff --git a/tests/test_filesystem_store.py b/tests/test_filesystem_store.py
index 7f42503..ed9ef38 100644
--- a/tests/test_filesystem_store.py
+++ b/tests/test_filesystem_store.py
@@ -21,7 +21,6 @@ def test_insert_files_does_not_disable_sqlite_synchronous(tmp_path):
                 "file_ref": "ref_report",
                 "external_id": "doc_report",
                 "storage_uri": "file:///tmp/report.pdf",
-                "source_path": "documents/report.pdf",
                 "folder_path": "/documents",
                 "title": "Report",
                 "descriptor": "documents/report.pdf",
diff --git a/tests/test_metadata_generation.py b/tests/test_metadata_generation.py
index 3e64a4b..1f1aec6 100644
--- a/tests/test_metadata_generation.py
+++ b/tests/test_metadata_generation.py
@@ -20,7 +20,6 @@ def test_metadata_generator_uses_provider_parameter():
         file_ref="file_a",
         external_id="doc_a",
         title="A",
-        source_path="docs/a.txt",
         content_type="text/plain",
         source_type=None,
         text="hello",
diff --git a/tests/test_pageindex_filesystem_scope.py b/tests/test_pageindex_filesystem_scope.py
index b74cc79..63d5b1b 100644
--- a/tests/test_pageindex_filesystem_scope.py
+++ b/tests/test_pageindex_filesystem_scope.py
@@ -135,7 +135,6 @@ def _register_browse_file(
     filesystem.metadata_generator = SummaryGenerator()
     return filesystem.register_file(
         storage_uri=f"file:///tmp/{external_id}.txt",
-        source_path=f"documents/{external_id}.txt",
         folder_path=folder_path,
         external_id=external_id,
         title=f"{external_id}.txt",
@@ -427,7 +426,7 @@ def test_browse_shell_output_uses_fixed_blocks_with_pagination_command(tmp_path)
     assert "score:" not in rendered
 
 
-def test_browse_shell_path_falls_back_to_unique_locator_when_source_collides(tmp_path):
+def test_browse_shell_path_uses_virtual_locator_when_source_collides(tmp_path):
     from pageindex.filesystem import PIFSCommandExecutor, PageIndexFileSystem
     from pageindex.filesystem.metadata_generation import MetadataGenerationResult
 
@@ -443,7 +442,6 @@ def test_browse_shell_path_falls_back_to_unique_locator_when_source_collides(tmp
     )
     first_ref = filesystem.register_file(
         storage_uri="file:///tmp/first.json",
-        source_path="shared/source.json",
         folder_path="/documents",
         external_id="dsid_first",
         title="First",
@@ -459,7 +457,6 @@ def test_browse_shell_path_falls_back_to_unique_locator_when_source_collides(tmp
     )
     filesystem.register_file(
         storage_uri="file:///tmp/second.json",
-        source_path="shared/source.json",
         folder_path="/documents",
         external_id="dsid_second",
         title="Second",
@@ -478,13 +475,52 @@ def test_browse_shell_path_falls_back_to_unique_locator_when_source_collides(tmp
 
     rendered = executor.execute('browse /documents "first"')
 
-    assert "path: dsid_first" in rendered
+    assert "path: /documents/First" in rendered
     assert "path: /shared/source.json" not in rendered
-    assert filesystem.store.resolve_file_ref("dsid_first") == first_ref
-    with pytest.raises(KeyError, match="Ambiguous file target"):
+    assert filesystem.store.resolve_file_ref("/documents/First") == first_ref
+    with pytest.raises(KeyError, match="Unknown file target"):
         filesystem.store.resolve_file_ref("/shared/source.json")
 
 
+def test_browse_shell_path_never_returns_storage_uri_path(tmp_path):
+    from pageindex.filesystem import PIFSCommandExecutor, PageIndexFileSystem
+    from pageindex.filesystem.metadata_generation import MetadataGenerationResult
+
+    class SummaryGenerator:
+        def generate(self, document, *, fields):
+            return MetadataGenerationResult(
+                values={"summary": "summary for physical source report"}
+            )
+
+    filesystem = PageIndexFileSystem(
+        workspace=tmp_path / "workspace",
+        metadata_generator=SummaryGenerator(),
+    )
+    file_ref = filesystem.register_file(
+        storage_uri="file:///Users/chengjie/Downloads/source/report.pdf",
+        folder_path="/documents/reports",
+        external_id="dsid_report",
+        title="report.pdf",
+        content="physical source report content",
+        metadata_policy={
+            "fields": {
+                "summary": True,
+                "doc_type": False,
+                "domain": False,
+                "topic": False,
+            }
+        },
+    )
+    filesystem.semantic_retrieval_backend = BrowseBackend(["dsid_report"])
+    executor = PIFSCommandExecutor(filesystem)
+
+    rendered = executor.execute('browse /documents/reports "physical source"')
+
+    assert "path: /documents/reports/report.pdf" in rendered
+    assert "/Users/chengjie/Downloads" not in rendered
+    assert filesystem.store.resolve_file_ref("/documents/reports/report.pdf") == file_ref
+
+
 def test_browse_scope_keeps_ordinary_folders_out_of_source_type_filters(tmp_path):
     from pageindex.filesystem import PIFSCommandExecutor, PageIndexFileSystem
     from pageindex.filesystem.metadata_generation import MetadataGenerationResult
@@ -501,7 +537,6 @@ def test_browse_scope_keeps_ordinary_folders_out_of_source_type_filters(tmp_path
     )
     file_ref = filesystem.register_file(
         storage_uri="file:///tmp/report.pdf",
-        source_path="examples/documents/report.pdf",
         folder_path="/documents",
         external_id="dsid_report",
         title="report.pdf",
@@ -525,14 +560,13 @@ def test_browse_scope_keeps_ordinary_folders_out_of_source_type_filters(tmp_path
     )
 
     assert "source_type" not in backend.calls[0][2]
-    assert "source_path" not in backend.calls[0][2]
-    assert result["data"]["data"][0]["path"] == "/examples/documents/report.pdf"
+    assert result["data"]["data"][0]["path"] == "/documents/report.pdf"
     assert result["data"]["data"][0]["summary"] == "Federal Reserve annual report summary"
     assert filesystem.store.resolve_file_ref(result["data"]["data"][0]["path"]) == file_ref
 
 
-def test_browse_path_is_unique_source_target_when_titles_collide(tmp_path):
-    from pageindex.filesystem import PIFSCommandExecutor, PageIndexFileSystem
+def test_register_file_rejects_duplicate_title_in_folder(tmp_path):
+    from pageindex.filesystem import PageIndexFileSystem
     from pageindex.filesystem.metadata_generation import MetadataGenerationResult
 
     class SummaryGenerator:
@@ -545,9 +579,8 @@ def test_browse_path_is_unique_source_target_when_titles_collide(tmp_path):
         workspace=tmp_path / "workspace",
         metadata_generator=SummaryGenerator(),
     )
-    first_ref = filesystem.register_file(
+    filesystem.register_file(
         storage_uri="file:///tmp/first.json",
-        source_path="slack/dsid_first.json",
         folder_path="/documents",
         external_id="dsid_first",
         title="announcements",
@@ -561,34 +594,25 @@ def test_browse_path_is_unique_source_target_when_titles_collide(tmp_path):
             }
         },
     )
-    filesystem.register_file(
-        storage_uri="file:///tmp/second.json",
-        source_path="slack/dsid_second.json",
-        folder_path="/documents",
-        external_id="dsid_second",
-        title="announcements",
-        content="second announcement mentions unrelated maintenance.",
-        metadata_policy={
-            "fields": {
-                "summary": True,
-                "doc_type": False,
-                "domain": False,
-                "topic": False,
-            }
-        },
-    )
-    filesystem.semantic_retrieval_backend = SummaryBackend("dsid_first")
-    executor = PIFSCommandExecutor(filesystem, json_output=True)
-
-    result = json.loads(executor.execute('browse /documents "H200 reservations"'))
-
-    assert result["data"]["data"][0]["path"] == "/slack/dsid_first.json"
-    assert filesystem.store.resolve_file_ref(result["data"]["data"][0]["path"]) == first_ref
-    with pytest.raises(KeyError, match="Ambiguous file target"):
-        filesystem.store.resolve_file_ref("/documents/announcements")
+    with pytest.raises(FileExistsError, match="File already exists at /documents/announcements"):
+        filesystem.register_file(
+            storage_uri="file:///tmp/second.json",
+            folder_path="/documents",
+            external_id="dsid_second",
+            title="announcements",
+            content="second announcement mentions unrelated maintenance.",
+            metadata_policy={
+                "fields": {
+                    "summary": True,
+                    "doc_type": False,
+                    "domain": False,
+                    "topic": False,
+                }
+            },
+        )
 
 
-def test_browse_path_falls_back_when_source_target_is_ambiguous(tmp_path):
+def test_browse_path_uses_virtual_title_when_storage_paths_are_unrelated(tmp_path):
     from pageindex.filesystem import PIFSCommandExecutor, PageIndexFileSystem
     from pageindex.filesystem.metadata_generation import MetadataGenerationResult
 
@@ -604,7 +628,6 @@ def test_browse_path_falls_back_when_source_target_is_ambiguous(tmp_path):
     )
     first_ref = filesystem.register_file(
         storage_uri="file:///tmp/first.json",
-        source_path="shared/source.json",
         folder_path="/documents",
         external_id="dsid_first",
         title="First",
@@ -620,7 +643,6 @@ def test_browse_path_falls_back_when_source_target_is_ambiguous(tmp_path):
     )
     filesystem.register_file(
         storage_uri="file:///tmp/second.json",
-        source_path="shared/source.json",
         folder_path="/documents",
         external_id="dsid_second",
         title="Second",
@@ -639,7 +661,7 @@ def test_browse_path_falls_back_when_source_target_is_ambiguous(tmp_path):
 
     result = json.loads(executor.execute('browse /documents "first"'))
 
-    assert result["data"]["data"][0]["path"] == "dsid_first"
+    assert result["data"]["data"][0]["path"] == "/documents/First"
     assert filesystem.store.resolve_file_ref(result["data"]["data"][0]["path"]) == first_ref
 
 
@@ -663,7 +685,6 @@ def test_old_semantic_commands_are_unsupported_even_when_indexes_exist(tmp_path)
     )
     filesystem.register_file(
         storage_uri="file:///tmp/market-note.pdf",
-        source_path="examples/documents/market-note.pdf",
         folder_path="/documents",
         external_id="dsid_market_note",
         title="market-note.pdf",
@@ -695,13 +716,13 @@ def test_old_semantic_commands_are_unsupported_even_when_indexes_exist(tmp_path)
         executor.execute('browse /documents "Federal Reserve" --space entity')
     )
     assert entity["data"]["data"][0]["summary"] == "Risk and compliance summary"
-    assert entity["data"]["data"][0]["path"] == "/examples/documents/market-note.pdf"
+    assert entity["data"]["data"][0]["path"] == "/documents/market-note.pdf"
 
     relation = json.loads(
         executor.execute('browse /documents "Disney valuation" --space relation')
     )
     assert relation["data"]["data"][0]["summary"] == "Risk and compliance summary"
-    assert relation["data"]["data"][0]["path"] == "/examples/documents/market-note.pdf"
+    assert relation["data"]["data"][0]["path"] == "/documents/market-note.pdf"
 
 
 def test_find_name_is_lexical_and_find_relation_is_not_semantic_alias(tmp_path):
@@ -711,7 +732,6 @@ def test_find_name_is_lexical_and_find_relation_is_not_semantic_alias(tmp_path):
     filesystem = PageIndexFileSystem(workspace=tmp_path / "workspace")
     filesystem.register_file(
         storage_uri="file:///tmp/report.pdf",
-        source_path="examples/documents/report.pdf",
         folder_path="/documents",
         external_id="dsid_report",
         title="Annual report",
@@ -755,7 +775,7 @@ def test_broad_recursive_grep_suggests_browse_not_removed_semantic_commands(tmp_
     assert "semantic-grep" not in rendered
 
 
-def test_grep_source_file_requires_terms_on_same_line(tmp_path):
+def test_grep_file_requires_terms_on_same_line(tmp_path):
     from pageindex.filesystem import PIFSCommandExecutor, PageIndexFileSystem
 
     source_dir = tmp_path / "source" / "documents"
@@ -769,11 +789,10 @@ def test_grep_source_file_requires_terms_on_same_line(tmp_path):
     filesystem = PageIndexFileSystem(workspace=tmp_path / "workspace")
     filesystem.register_file(
         storage_uri=str(source),
-        source_path="documents/split.json",
         folder_path="/documents",
         external_id="doc_split_terms",
         title="Split source terms",
-        content="registered artifact without the searched tokens",
+        content=source.read_text(encoding="utf-8"),
     )
     executor = PIFSCommandExecutor(filesystem, json_output=True)
 
@@ -813,7 +832,6 @@ def test_existing_summary_projection_index_uses_current_config_when_dimensions_m
                 file_ref="file_a",
                 external_id="doc_a",
                 source_type="documents",
-                source_path="documents/a.pdf",
                 title="A",
                 text="summary",
                 vector=[1.0, 0.0, 0.0],
@@ -879,7 +897,6 @@ def test_existing_summary_projection_index_dimension_mismatch_rejects_retrieval(
                 file_ref="file_a",
                 external_id="doc_a",
                 source_type="documents",
-                source_path="documents/a.pdf",
                 title="A",
                 text="summary",
                 vector=[1.0, 0.0, 0.0],
@@ -948,7 +965,6 @@ def test_browse_semantic_files_uses_summary_projection_when_only_summary_availab
     )
     filesystem.register_file(
         storage_uri=source.as_uri(),
-        source_path="docs/source.txt",
         folder_path="/documents",
         external_id="doc_summary_only",
         title="Operations note",
diff --git a/tests/test_pageindex_structural_read.py b/tests/test_pageindex_structural_read.py
index 2f27077..29e7c9a 100644
--- a/tests/test_pageindex_structural_read.py
+++ b/tests/test_pageindex_structural_read.py
@@ -60,7 +60,6 @@ def test_pageindex_structure_options_report_failed_register_build(monkeypatch):
         monkeypatch.setattr(PageIndexClient, "index", fail_index)
         filesystem.register_file(
             storage_uri=source.as_uri(),
-            source_path="docs/report.md",
             external_id="dsid_structural_missing",
             title="Structural report",
             content=source.read_text(encoding="utf-8"),
@@ -152,14 +151,12 @@ def test_register_pdf_markdown_uses_pageindex_extracted_text_for_metadata_and_ft
 
         filesystem.register_file(
             storage_uri=source_pdf.as_uri(),
-            source_path="docs/report.pdf",
             external_id="dsid_pdf_extracted",
             title="PDF extracted",
             content="CALLER PDF CONTENT MUST NOT REACH GENERATOR",
         )
         filesystem.register_file(
             storage_uri=source_md.as_uri(),
-            source_path="docs/notes.md",
             external_id="dsid_md_extracted",
             title="Markdown extracted",
             content="CALLER MD CONTENT MUST NOT REACH GENERATOR",
@@ -167,8 +164,12 @@ def test_register_pdf_markdown_uses_pageindex_extracted_text_for_metadata_and_ft
 
         pdf_request = generator.calls[0][0]
         md_request = generator.calls[1][0]
-        pdf_stat = filesystem.store.file_info("dsid_pdf_extracted")
-        md_stat = filesystem.store.file_info("dsid_md_extracted")
+        pdf_entry = filesystem.store.get_file(
+            filesystem.store.resolve_file_ref("dsid_pdf_extracted")
+        )
+        md_entry = filesystem.store.get_file(
+            filesystem.store.resolve_file_ref("dsid_md_extracted")
+        )
 
         assert "PageIndex PDF extracted alpha text" in pdf_request.text
         assert "Second PageIndex PDF extracted beta text" in pdf_request.text
@@ -176,10 +177,10 @@ def test_register_pdf_markdown_uses_pageindex_extracted_text_for_metadata_and_ft
         assert "PageIndex Markdown extracted gamma text" in md_request.text
         assert "CALLER MD CONTENT" not in md_request.text
         assert "PageIndex PDF extracted alpha text" in Path(
-            pdf_stat["text_artifact_path"]
+            pdf_entry.text_artifact_path
         ).read_text(encoding="utf-8")
         assert "PageIndex Markdown extracted gamma text" in Path(
-            md_stat["text_artifact_path"]
+            md_entry.text_artifact_path
         ).read_text(encoding="utf-8")
         assert [r.external_id for r in filesystem.search("alpha beta", limit=5)] == [
             "dsid_pdf_extracted"
@@ -207,7 +208,6 @@ def test_register_text_metadata_generation_keeps_caller_content_without_pageinde
 
         filesystem.register_file(
             storage_uri="file:///tmp/readme.txt",
-            source_path="docs/readme.txt",
             external_id="dsid_text_generation",
             title="Text generation",
             content="Plain text caller content stays authoritative.",
@@ -215,11 +215,14 @@ def test_register_text_metadata_generation_keeps_caller_content_without_pageinde
         )
 
         stat = filesystem.store.file_info("dsid_text_generation")
+        entry = filesystem.store.get_file(
+            filesystem.store.resolve_file_ref("dsid_text_generation")
+        )
 
         assert generator.calls[0][0].text == "Plain text caller content stays authoritative."
         assert stat["pageindex_doc_id"] is None
         assert stat["pageindex_tree_status"] == "not_built"
-        assert Path(stat["text_artifact_path"]).read_text(
+        assert Path(entry.text_artifact_path).read_text(
             encoding="utf-8"
         ) == "Plain text caller content stays authoritative."
 
@@ -261,14 +264,12 @@ def test_register_pdf_markdown_cache_miss_invokes_pageindex_client_index(monkeyp
 
         filesystem.register_file(
             storage_uri=str(source_pdf),
-            source_path="docs/report.pdf",
             external_id="dsid_pdf_build",
             title="PDF build",
             content="pdf text",
         )
         filesystem.register_file(
             storage_uri=source_md.as_uri(),
-            source_path="docs/notes.md",
             external_id="dsid_md_build",
             title="Markdown build",
             content=source_md.read_text(encoding="utf-8"),
@@ -332,7 +333,6 @@ def test_cat_structure_page_reuses_pageindex_client_cache_without_indexing(monke
         monkeypatch.setattr(PageIndexClient, "index", fail_index)
         filesystem.register_file(
             storage_uri=source.as_uri(),
-            source_path="docs/report.pdf",
             external_id="dsid_structural_cached",
             title="Cached structural report",
             content="text artifact remains available for grep, not cat --all",
@@ -370,7 +370,6 @@ def test_cat_node_is_not_supported():
         filesystem = PageIndexFileSystem(workspace=Path(tmp) / "workspace")
         filesystem.register_file(
             storage_uri="file:///tmp/notes.md",
-            source_path="docs/notes.md",
             external_id="dsid_md_cached",
             title="Cached markdown notes",
             content="# Notes\n\nBody",
@@ -419,7 +418,6 @@ def test_cat_structure_page_and_text_outputs_are_hard_limited():
         )
         filesystem.register_file(
             storage_uri=source.as_uri(),
-            source_path="docs/report.pdf",
             external_id="dsid_limited_pdf",
             title="Limited structural report",
             content="text artifact remains available for grep",
@@ -427,7 +425,6 @@ def test_cat_structure_page_and_text_outputs_are_hard_limited():
         text_content = "\n".join(f"line {index}" for index in range(1, 106))
         filesystem.register_file(
             storage_uri="file:///tmp/long.txt",
-            source_path="docs/long.txt",
             external_id="dsid_long_text",
             title="Long text",
             content=text_content,
@@ -474,7 +471,6 @@ def test_tree_folder_behavior_is_preserved():
         filesystem = PageIndexFileSystem(workspace=Path(tmp) / "workspace")
         filesystem.register_file(
             storage_uri="file:///tmp/report.txt",
-            source_path="docs/report.txt",
             folder_path="/docs/reports",
             external_id="dsid_folder_tree",
             title="Folder report",
@@ -514,7 +510,6 @@ def test_tree_does_not_read_file_internal_pageindex_structure():
         )
         filesystem.register_file(
             storage_uri=source.as_uri(),
-            source_path="docs/report.pdf",
             external_id="dsid_tree_is_folder_only",
             title="Cached structural report",
             content="text artifact remains available",
@@ -536,28 +531,24 @@ def test_cat_all_is_limited_to_text_files():
         filesystem = PageIndexFileSystem(workspace=Path(tmp) / "workspace")
         filesystem.register_file(
             storage_uri="file:///tmp/readme.txt",
-            source_path="docs/readme.txt",
             external_id="dsid_text_file",
             title="Text readme",
             content="plain text body",
         )
         filesystem.register_file(
             storage_uri="file:///tmp/report.pdf",
-            source_path="docs/report.pdf",
             external_id="dsid_pdf_file",
             title="PDF report",
             content="extracted text should not be served through cat --all",
         )
         filesystem.register_file(
             storage_uri="file:///tmp/notes.md",
-            source_path="docs/notes.md",
             external_id="dsid_md_file",
             title="Markdown notes",
             content="markdown text should use PageIndex structure reads",
         )
         filesystem.register_file(
             storage_uri="file:///tmp/data.json",
-            source_path="docs/data.json",
             external_id="dsid_json_file",
             title="JSON record",
             content='{"body":"json"}',
@@ -589,7 +580,6 @@ def test_pageindex_structure_commands_are_limited_to_pdf_and_markdown():
         filesystem = PageIndexFileSystem(workspace=Path(tmp) / "workspace")
         filesystem.register_file(
             storage_uri="file:///tmp/readme.txt",
-            source_path="docs/readme.txt",
             external_id="dsid_text_only",
             title="Text readme",
             content="plain text body",
@@ -617,7 +607,6 @@ def test_existing_pageindex_status_allows_legacy_record_without_format_suffix():
         filesystem = PageIndexFileSystem(workspace=Path(tmp) / "workspace")
         file_ref = filesystem.register_file(
             storage_uri=source.as_uri(),
-            source_path="uploads/uploaded",
             external_id="dsid_legacy_pageindex",
             title="Legacy PageIndex record",
             content="text/plain is only a weak default here",
@@ -665,7 +654,6 @@ def test_read_commands_do_not_link_pageindex_cache_when_pointer_is_missing(monke
         monkeypatch.setattr(PageIndexClient, "index", fail_index)
         filesystem.register_file(
             storage_uri=source.as_uri(),
-            source_path="docs/late.md",
             external_id="dsid_late_cache",
             title="Late cache",
             content=source.read_text(encoding="utf-8"),
diff --git a/tests/test_pifs_add_command.py b/tests/test_pifs_add_command.py
index 4161b80..47d5913 100644
--- a/tests/test_pifs_add_command.py
+++ b/tests/test_pifs_add_command.py
@@ -80,12 +80,13 @@ def test_add_text_folder_target_copies_artifact_indexes_summary_and_is_readable(
 
     info = filesystem.add_file(str(source), "/documents/reports")
 
-    assert info["source_path"] == "documents/reports/filing.txt"
+    assert info["path"] == "/documents/reports/filing.txt"
     assert info["folder_path"] == "/documents/reports"
     assert filesystem.folder_info("/documents/reports")["path"] == "/documents/reports"
-    assert info["storage_uri"] != source.as_uri()
-    assert "/artifacts/uploads/" in info["storage_uri"]
-    copied_path = Path(info["storage_uri"].removeprefix("file://"))
+    entry = filesystem.store.get_file(info["file_ref"])
+    assert entry.storage_uri != source.as_uri()
+    assert "/artifacts/uploads/" in entry.storage_uri
+    copied_path = Path(entry.storage_uri.removeprefix("file://"))
     assert copied_path.read_text(encoding="utf-8") == "alpha filing text for pifs add"
     assert copied_path.resolve() != source.resolve()
 
@@ -164,7 +165,7 @@ def test_add_configures_semantic_retrieval_in_same_filesystem_instance(tmp_path)
         recursive=True,
         page_size=5,
     )
-    assert [item["source_path"] for item in results["data"]] == ["documents/semantic.txt"]
+    assert [item["path"] for item in results["data"]] == ["/documents/semantic.txt"]
 
 
 def test_add_markdown_builds_pageindex_tree_from_copied_artifact(tmp_path, monkeypatch):
@@ -205,10 +206,11 @@ def test_add_markdown_builds_pageindex_tree_from_copied_artifact(tmp_path, monke
     info = filesystem.add_file(source, "/documents")
     executor = PIFSCommandExecutor(filesystem, json_output=True)
     structure = json.loads(executor.execute("cat /documents/notes.md --structure"))
+    entry = filesystem.store.get_file(info["file_ref"])
 
     assert structure["data"]["available"] is True
     assert structure["data"]["structure"][0]["title"] == "Notes"
-    assert indexed_paths == [Path(info["storage_uri"].removeprefix("file://"))]
+    assert indexed_paths == [Path(entry.storage_uri.removeprefix("file://"))]
     assert indexed_paths[0].resolve() != source.resolve()
 
 
@@ -469,8 +471,6 @@ def test_cli_add_uses_workspace_and_prints_added_file(monkeypatch, capsys, tmp_p
             return {
                 "file_ref": "file_cli",
                 "path": "/documents/cli.txt",
-                "source_path": "documents/cli.txt",
-                "storage_uri": "file:///workspace/artifacts/uploads/file_cli/cli.txt",
             }
 
     monkeypatch.setattr(cli, "PageIndexFileSystem", FakeAddFileSystem)
@@ -482,5 +482,4 @@ def test_cli_add_uses_workspace_and_prints_added_file(monkeypatch, capsys, tmp_p
     assert capsys.readouterr().out == (
         "added: /documents/cli.txt\n"
         "file_ref: file_cli\n"
-        "storage_uri: file:///workspace/artifacts/uploads/file_cli/cli.txt\n"
     )
diff --git a/tests/test_pifs_cli.py b/tests/test_pifs_cli.py
index 405f5b9..67c3a3a 100644
--- a/tests/test_pifs_cli.py
+++ b/tests/test_pifs_cli.py
@@ -76,7 +76,6 @@ def test_cli_workspace_surfaces_projection_dimension_mismatch(tmp_path):
                 file_ref="file_a",
                 external_id="doc_a",
                 source_type="documents",
-                source_path="documents/a.pdf",
                 title="A",
                 text="summary",
                 vector=[1.0, 0.0, 0.0],
@@ -226,6 +225,28 @@ def test_cli_ask_invokes_agent_with_question(monkeypatch, capsys, tmp_path):
     }
 
 
+def test_cli_ask_defaults_to_global_agent_model(monkeypatch, capsys, tmp_path):
+    from pageindex.filesystem import cli
+
+    workspace = tmp_path / "workspace"
+    agent_calls = []
+    monkeypatch.delenv("PIFS_AGENT_MODEL", raising=False)
+    monkeypatch.delenv("PIFS_MODEL", raising=False)
+
+    def fake_run_pifs_agent(filesystem, question, **kwargs):
+        agent_calls.append(kwargs)
+        return "agent answer"
+
+    monkeypatch.setattr(cli, "PageIndexFileSystem", FakeFileSystem)
+    monkeypatch.setattr(cli, "run_pifs_agent", fake_run_pifs_agent)
+
+    status = cli.main(["ask", "--workspace", str(workspace), "What?"])
+
+    assert status == 0
+    assert capsys.readouterr().out == "agent answer\n"
+    assert agent_calls[0]["model"] == "gpt-5.4"
+
+
 def test_cli_ask_loads_env_file_before_running_agent(monkeypatch, capsys, tmp_path):
     from pageindex.filesystem import cli
 
diff --git a/tests/test_pifs_find_maxdepth.py b/tests/test_pifs_find_maxdepth.py
index 8b93f70..7fbc445 100644
--- a/tests/test_pifs_find_maxdepth.py
+++ b/tests/test_pifs_find_maxdepth.py
@@ -24,7 +24,6 @@ def _register_find_fixture(tmp_path: Path):
         source.write_text(f"{title} fixture text", encoding="utf-8")
         filesystem.register_file(
             storage_uri=source.as_uri(),
-            source_path=f"docs/{filename}",
             folder_path=folder_path,
             external_id=external_id,
             title=title,
@@ -145,7 +144,6 @@ def test_stat_shell_output_includes_unified_metadata_status(tmp_path):
     )
     filesystem.register_file(
         storage_uri=source.as_uri(),
-        source_path="docs/source.txt",
         folder_path="/documents",
         external_id="doc_generated",
         title="Generated metadata document",
@@ -196,7 +194,6 @@ def test_stat_field_reads_one_metadata_field_across_multiple_targets(tmp_path):
         source.write_text(f"fixture text {index}", encoding="utf-8")
         filesystem.register_file(
             storage_uri=source.as_uri(),
-            source_path=f"docs/source{index}.txt",
             folder_path="/documents",
             external_id=f"doc_summary_{index}",
             title=f"Summary document {index}",
@@ -249,7 +246,6 @@ def test_stat_field_rejects_more_than_twenty_targets(tmp_path):
         source.write_text(f"fixture text {index}", encoding="utf-8")
         filesystem.register_file(
             storage_uri=source.as_uri(),
-            source_path=f"docs/source{index}.txt",
             folder_path="/documents",
             external_id=f"doc_{index}",
             title=f"Document {index}",
@@ -273,7 +269,6 @@ def test_register_rejects_pifs_owned_metadata_fields(tmp_path):
     with pytest.raises(ValueError, match="PIFS-owned generated field"):
         filesystem.register_file(
             storage_uri=source.as_uri(),
-            source_path="docs/source.txt",
             folder_path="/documents",
             external_id="doc_conflict",
             title="Conflict document",
@@ -299,7 +294,6 @@ def test_batch_metadata_status_generates_into_unified_metadata(tmp_path):
     )
     file_ref = filesystem.register_file(
         storage_uri=source.as_uri(),
-        source_path="docs/source.txt",
         folder_path="/documents",
         external_id="doc_batch",
         title="Batch document",
diff --git a/tests/test_pifs_like_escape.py b/tests/test_pifs_like_escape.py
index 5c0751e..5b624be 100644
--- a/tests/test_pifs_like_escape.py
+++ b/tests/test_pifs_like_escape.py
@@ -14,7 +14,6 @@ def _register_file(
     source.write_text(f"{external_id} fixture text", encoding="utf-8")
     filesystem.register_file(
         storage_uri=source.as_uri(),
-        source_path=f"docs/{filename}",
         folder_path=folder_path,
         external_id=external_id,
         title=external_id,
diff --git a/tests/test_pifs_path_resolution.py b/tests/test_pifs_path_resolution.py
index 184fc53..77552b6 100644
--- a/tests/test_pifs_path_resolution.py
+++ b/tests/test_pifs_path_resolution.py
@@ -7,7 +7,6 @@ def test_root_virtual_file_path_resolves_without_double_slash(tmp_path):
     filesystem = PageIndexFileSystem(workspace=tmp_path / "workspace")
     file_ref = filesystem.register_file(
         storage_uri="file:///tmp/root-source.txt",
-        source_path="sources/root-source.txt",
         folder_path="/",
         external_id="doc_root_title",
         title="Root Title",
@@ -17,13 +16,12 @@ def test_root_virtual_file_path_resolves_without_double_slash(tmp_path):
     assert filesystem.store.resolve_file_ref("/Root Title") == file_ref
 
 
-def test_ambiguous_virtual_file_path_raises_clear_error(tmp_path):
+def test_nested_virtual_file_path_resolves_by_folder_and_title(tmp_path):
     from pageindex.filesystem import PageIndexFileSystem
 
     filesystem = PageIndexFileSystem(workspace=tmp_path / "workspace")
     first_ref = filesystem.register_file(
         storage_uri="file:///tmp/first.txt",
-        source_path="b/file.txt",
         folder_path="/a",
         external_id="doc_first",
         title="First",
@@ -31,26 +29,23 @@ def test_ambiguous_virtual_file_path_raises_clear_error(tmp_path):
     )
     second_ref = filesystem.register_file(
         storage_uri="file:///tmp/second.txt",
-        source_path="second-source.txt",
         folder_path="/a/b",
         external_id="doc_second",
         title="file.txt",
         content="second content",
     )
 
-    with pytest.raises(KeyError, match="Ambiguous file target"):
-        filesystem.store.resolve_file_ref("/a/b/file.txt")
+    assert filesystem.store.resolve_file_ref("/a/b/file.txt") == second_ref
 
     assert first_ref != second_ref
 
 
-def test_duplicate_source_path_target_raises_clear_error(tmp_path):
+def test_unknown_virtual_file_target_raises_clear_error(tmp_path):
     from pageindex.filesystem import PageIndexFileSystem
 
     filesystem = PageIndexFileSystem(workspace=tmp_path / "workspace")
     first_ref = filesystem.register_file(
         storage_uri="file:///tmp/first.txt",
-        source_path="shared/source.txt",
         folder_path="/first",
         external_id="doc_first",
         title="First",
@@ -58,14 +53,13 @@ def test_duplicate_source_path_target_raises_clear_error(tmp_path):
     )
     second_ref = filesystem.register_file(
         storage_uri="file:///tmp/second.txt",
-        source_path="shared/source.txt",
         folder_path="/second",
         external_id="doc_second",
         title="Second",
         content="second content",
     )
 
-    with pytest.raises(KeyError, match="Ambiguous file target"):
-        filesystem.store.resolve_file_ref("/shared/source.txt")
+    with pytest.raises(KeyError, match="Unknown file target"):
+        filesystem.store.resolve_file_ref("/shared/missing.txt")
 
     assert first_ref != second_ref
diff --git a/tests/test_pifs_register_side_effects.py b/tests/test_pifs_register_side_effects.py
index 867dd6b..435ca7a 100644
--- a/tests/test_pifs_register_side_effects.py
+++ b/tests/test_pifs_register_side_effects.py
@@ -40,7 +40,6 @@ def test_register_insert_failure_cleans_owned_artifacts_and_skips_projection(
     with pytest.raises(RuntimeError, match="catalog insert failed"):
         filesystem.register_file(
             storage_uri=source.as_uri(),
-            source_path="docs/source.txt",
             folder_path="/documents",
             external_id="doc_insert_failure",
             title="Insert failure",
diff --git a/tests/test_semantic_index.py b/tests/test_semantic_index.py
index c1da0dc..4684d8f 100644
--- a/tests/test_semantic_index.py
+++ b/tests/test_semantic_index.py
@@ -31,7 +31,6 @@ def test_sqlite_vec_semantic_index_round_trip(tmp_path):
                 file_ref="file_a",
                 external_id="doc_a",
                 source_type="github",
-                source_path="github/a.json",
                 title="Multipart upload limits",
                 text="multipart upload limits",
                 vector=[1.0, 0.0, 0.0],
@@ -41,7 +40,6 @@ def test_sqlite_vec_semantic_index_round_trip(tmp_path):
                 file_ref="file_b",
                 external_id="doc_b",
                 source_type="slack",
-                source_path="slack/b.json",
                 title="GPU cache issue",
                 text="gpu cache issue",
                 vector=[0.0, 1.0, 0.0],
@@ -72,7 +70,6 @@ def test_sqlite_vec_semantic_index_file_ref_filter_not_limited_by_global_rank(tm
             file_ref=f"file_off_{item:02d}",
             external_id=f"doc_off_{item:02d}",
             source_type="documents",
-            source_path=f"other/{item:02d}.pdf",
             title=f"Off scope {item:02d}",
             text="off scope",
             vector=[1.0, 0.0],
@@ -84,7 +81,6 @@ def test_sqlite_vec_semantic_index_file_ref_filter_not_limited_by_global_rank(tm
             file_ref="file_in_scope",
             external_id="doc_in_scope",
             source_type="documents",
-            source_path="documents/in-scope.pdf",
             title="In scope",
             text="in scope",
             vector=[0.0, 1.0],
@@ -117,7 +113,6 @@ def test_summary_projection_indexes_unified_metadata_summary(tmp_path):
             "file_ref": "file_a",
             "external_id": "doc_a",
             "source_type": "documents",
-            "source_path": "docs/a.pdf",
             "title": "A",
             "metadata": {
                 "summary": "Unified metadata summary.",
@@ -153,7 +148,6 @@ def test_summary_projection_indexer_defaults_to_1024_dimensions(tmp_path):
             "file_ref": "file_a",
             "external_id": "doc_a",
             "source_type": "documents",
-            "source_path": "docs/a.pdf",
             "title": "A",
             "metadata": {"summary": "Default dimension summary."},
         }
@@ -180,7 +174,6 @@ def test_summary_projection_indexer_allows_explicit_256_dimensions(tmp_path):
             "file_ref": "file_a",
             "external_id": "doc_a",
             "source_type": "documents",
-            "source_path": "docs/a.pdf",
             "title": "A",
             "metadata": {"summary": "Explicit 256 dimension summary."},
         }
@@ -304,7 +297,6 @@ def test_summary_projection_dimension_mismatch_preserves_existing_index(tmp_path
                 file_ref="file_a",
                 external_id="doc_a",
                 source_type="documents",
-                source_path="docs/a.pdf",
                 title="A",
                 text="summary",
                 vector=[1.0, 0.0, 0.0],