diff --git a/pageindex/filesystem/commands.py b/pageindex/filesystem/commands.py index c6394b9..270de4d 100644 --- a/pageindex/filesystem/commands.py +++ b/pageindex/filesystem/commands.py @@ -1466,6 +1466,8 @@ class PIFSCommandExecutor: if direct_only and self._folder_path_for_source_path(file_row["source_path"]) != folder_path: continue line_number, text = self._first_matching_source_line(path, query) + if line_number is None: + continue hits.append( { "file_ref": file_row["file_ref"], @@ -1560,15 +1562,15 @@ class PIFSCommandExecutor: break return filtered - def _first_matching_source_line(self, path: Path, query: str) -> tuple[int, str]: + def _first_matching_source_line(self, path: Path, query: str) -> tuple[int | None, str]: try: lines = path.read_text(encoding="utf-8", errors="ignore").splitlines() except OSError: - return 1, "" + return None, "" for line_number, line in enumerate(lines, 1): if self._line_matches(line, query): return line_number, self._compact_text(line, max_chars=220) - return 1, self._compact_text(lines[0], max_chars=220) if lines else "" + return None, "" def _source_root(self) -> Path | None: with self.filesystem.store.connect() as conn: diff --git a/tests/test_pageindex_filesystem_scope.py b/tests/test_pageindex_filesystem_scope.py index a07c4bb..dd35027 100644 --- a/tests/test_pageindex_filesystem_scope.py +++ b/tests/test_pageindex_filesystem_scope.py @@ -87,6 +87,40 @@ def test_semantic_search_scope_filters_explicit_source_type_facets(): ) == {} +def test_grep_source_file_requires_terms_on_same_line(tmp_path): + from pageindex.filesystem import PIFSCommandExecutor, PageIndexFileSystem + + source_dir = tmp_path / "source" / "documents" + source_dir.mkdir(parents=True) + source = source_dir / "split.json" + source.write_text( + '{\n "first": "alpha evidence lives here",\n' + ' "second": "omega evidence lives there"\n}\n', + encoding="utf-8", + ) + filesystem = PageIndexFileSystem(workspace=tmp_path / "workspace") + filesystem.register_file( + storage_uri=str(source), + source_path="documents/split.json", + folder_path="/documents", + external_id="doc_split_terms", + title="Split source terms", + content="registered artifact without the searched tokens", + ) + executor = PIFSCommandExecutor(filesystem, json_output=True) + + result = json.loads(executor.execute('grep -R "alpha omega" /documents')) + + assert result["data"]["mode"] == "files" + assert result["data"]["data"] == [] + + matched = json.loads(executor.execute('grep -R "alpha evidence" /documents')) + + assert matched["data"]["data"][0]["external_id"] == "doc_split_terms" + assert matched["data"]["data"][0]["line"] == 2 + assert "alpha evidence" in matched["data"]["data"][0]["text"] + + def test_existing_summary_projection_index_configures_retrieval_backend(tmp_path, monkeypatch): from pageindex.filesystem import PageIndexFileSystem from pageindex.filesystem.semantic_index import SemanticIndexRecord, SQLiteVecSemanticIndex