diff --git a/pageindex/filesystem/store.py b/pageindex/filesystem/store.py index 1cca9ea..85de66b 100644 --- a/pageindex/filesystem/store.py +++ b/pageindex/filesystem/store.py @@ -1059,22 +1059,62 @@ class SQLiteFileSystemStore: ).fetchone() if row: return row["file_ref"] - row = conn.execute( - """ - SELECT f.file_ref - FROM files f - JOIN file_folders ff ON ff.file_ref = f.file_ref - JOIN folders pf ON pf.folder_id = ff.folder_id - WHERE (pf.path || '/' || f.title) = ? - OR (pf.path || '/' || f.source_path) = ? - LIMIT 1 - """, - (target, target), - ).fetchone() - if row: - return row["file_ref"] + virtual_file_ref = self._resolve_virtual_file_ref(conn, target) + if virtual_file_ref: + return virtual_file_ref raise KeyError(f"Unknown file target: {target}") + def _resolve_virtual_file_ref(self, conn: sqlite3.Connection, target: str) -> str | None: + virtual_target = normalize_path(target) + rows = conn.execute( + """ + WITH virtual_matches AS ( + SELECT + f.file_ref, + f.external_id, + f.title, + f.source_path, + pf.path AS folder_path, + (CASE WHEN pf.path = '/' THEN '/' ELSE pf.path || '/' END) + || ltrim(f.title, '/') AS title_virtual_path, + (CASE WHEN pf.path = '/' THEN '/' ELSE pf.path || '/' END) + || ltrim(f.source_path, '/') AS source_virtual_path + FROM files f + JOIN file_folders ff ON ff.file_ref = f.file_ref + JOIN folders pf ON pf.folder_id = ff.folder_id + WHERE f.deleted_at IS NULL + ) + SELECT + file_ref, + external_id, + title, + source_path, + MIN(folder_path) AS folder_path + FROM virtual_matches + WHERE title_virtual_path = ? + OR source_virtual_path = ? + GROUP BY file_ref, external_id, title, source_path + ORDER BY file_ref + LIMIT 2 + """, + (virtual_target, virtual_target), + ).fetchall() + if not rows: + return None + if len(rows) > 1: + matches = "; ".join(self._virtual_match_summary(row) for row in rows) + raise KeyError(f"Ambiguous file target: {target}. Matches: {matches}") + return rows[0]["file_ref"] + + @staticmethod + def _virtual_match_summary(row: sqlite3.Row) -> str: + external_id = row["external_id"] or "-" + return ( + f"file_ref={row['file_ref']} external_id={external_id} " + f"folder={row['folder_path']} title={row['title']!r} " + f"source_path={row['source_path']!r}" + ) + def ensure_folder( self, conn: sqlite3.Connection | None, diff --git a/tests/test_pifs_path_resolution.py b/tests/test_pifs_path_resolution.py new file mode 100644 index 0000000..08cf28f --- /dev/null +++ b/tests/test_pifs_path_resolution.py @@ -0,0 +1,44 @@ +import pytest + + +def test_root_virtual_file_path_resolves_without_double_slash(tmp_path): + from pageindex.filesystem import PageIndexFileSystem + + filesystem = PageIndexFileSystem(workspace=tmp_path / "workspace") + file_ref = filesystem.register_file( + storage_uri="file:///tmp/root-source.txt", + source_path="sources/root-source.txt", + folder_path="/", + external_id="doc_root_title", + title="Root Title", + content="root content", + ) + + assert filesystem.store.resolve_file_ref("/Root Title") == file_ref + + +def test_ambiguous_virtual_file_path_raises_clear_error(tmp_path): + from pageindex.filesystem import PageIndexFileSystem + + filesystem = PageIndexFileSystem(workspace=tmp_path / "workspace") + first_ref = filesystem.register_file( + storage_uri="file:///tmp/first.txt", + source_path="b/file.txt", + folder_path="/a", + external_id="doc_first", + title="First", + content="first content", + ) + second_ref = filesystem.register_file( + storage_uri="file:///tmp/second.txt", + source_path="second-source.txt", + folder_path="/a/b", + external_id="doc_second", + title="file.txt", + content="second content", + ) + + with pytest.raises(KeyError, match="Ambiguous file target"): + filesystem.store.resolve_file_ref("/a/b/file.txt") + + assert first_ref != second_ref