feat: Implement Role-Based Access Control (RBAC) for search space resources.

-Introduce granular permissions for documents, chats, podcasts, and logs. - Update routes to enforce permission checks for creating, reading, updating, and deleting resources. - Refactor user and search space interactions to align with RBAC model, removing ownership checks in favor of permission validation.
2026-04-27 01:36:30 +02:00 · 2025-11-27 22:45:04 -08:00 · 2025-11-27 22:45:04 -08:00 · e9d32c3516
commit e9d32c3516
parent 1ed0cb3dfe
38 changed files with 5916 additions and 657 deletions
--- a/surfsense_backend/app/retriver/documents_hybrid_search.py
+++ b/surfsense_backend/app/retriver/documents_hybrid_search.py
@ -12,8 +12,7 @@ class DocumentHybridSearchRetriever:
        self,
        query_text: str,
        top_k: int,
-        user_id: str,
-        search_space_id: int | None = None,
+        search_space_id: int,
    ) -> list:
        """
        Perform vector similarity search on documents.
@ -21,8 +20,7 @@ class DocumentHybridSearchRetriever:
        Args:
            query_text: The search query text
            top_k: Number of results to return
-            user_id: The ID of the user performing the search
-            search_space_id: Optional search space ID to filter results
+            search_space_id: The search space ID to search within

        Returns:
            List of documents sorted by vector similarity
@ -31,24 +29,19 @@ class DocumentHybridSearchRetriever:
        from sqlalchemy.orm import joinedload

        from app.config import config
-        from app.db import Document, SearchSpace
+        from app.db import Document

        # Get embedding for the query
        embedding_model = config.embedding_model_instance
        query_embedding = embedding_model.embed(query_text)

-        # Build the base query with user ownership check
+        # Build the query filtered by search space
        query = (
            select(Document)
            .options(joinedload(Document.search_space))
-            .join(SearchSpace, Document.search_space_id == SearchSpace.id)
-            .where(SearchSpace.user_id == user_id)
+            .where(Document.search_space_id == search_space_id)
        )

-        # Add search space filter if provided
-        if search_space_id is not None:
-            query = query.where(Document.search_space_id == search_space_id)
-
        # Add vector similarity ordering
        query = query.order_by(Document.embedding.op("<=>")(query_embedding)).limit(
            top_k
@ -64,8 +57,7 @@ class DocumentHybridSearchRetriever:
        self,
        query_text: str,
        top_k: int,
-        user_id: str,
-        search_space_id: int | None = None,
+        search_space_id: int,
    ) -> list:
        """
        Perform full-text keyword search on documents.
@ -73,8 +65,7 @@ class DocumentHybridSearchRetriever:
        Args:
            query_text: The search query text
            top_k: Number of results to return
-            user_id: The ID of the user performing the search
-            search_space_id: Optional search space ID to filter results
+            search_space_id: The search space ID to search within

        Returns:
            List of documents sorted by text relevance
@ -82,27 +73,22 @@ class DocumentHybridSearchRetriever:
        from sqlalchemy import func, select
        from sqlalchemy.orm import joinedload

-        from app.db import Document, SearchSpace
+        from app.db import Document

        # Create tsvector and tsquery for PostgreSQL full-text search
        tsvector = func.to_tsvector("english", Document.content)
        tsquery = func.plainto_tsquery("english", query_text)

-        # Build the base query with user ownership check
+        # Build the query filtered by search space
        query = (
            select(Document)
            .options(joinedload(Document.search_space))
-            .join(SearchSpace, Document.search_space_id == SearchSpace.id)
-            .where(SearchSpace.user_id == user_id)
+            .where(Document.search_space_id == search_space_id)
            .where(
                tsvector.op("@@")(tsquery)
            )  # Only include results that match the query
        )

-        # Add search space filter if provided
-        if search_space_id is not None:
-            query = query.where(Document.search_space_id == search_space_id)
-
        # Add text search ranking
        query = query.order_by(func.ts_rank_cd(tsvector, tsquery).desc()).limit(top_k)

@ -116,8 +102,7 @@ class DocumentHybridSearchRetriever:
        self,
        query_text: str,
        top_k: int,
-        user_id: str,
-        search_space_id: int | None = None,
+        search_space_id: int,
        document_type: str | None = None,
    ) -> list:
        """
@ -126,8 +111,7 @@ class DocumentHybridSearchRetriever:
        Args:
            query_text: The search query text
            top_k: Number of results to return
-            user_id: The ID of the user performing the search
-            search_space_id: Optional search space ID to filter results
+            search_space_id: The search space ID to search within
            document_type: Optional document type to filter results (e.g., "FILE", "CRAWLED_URL")

        """
@ -135,7 +119,7 @@ class DocumentHybridSearchRetriever:
        from sqlalchemy.orm import joinedload

        from app.config import config
-        from app.db import Document, DocumentType, SearchSpace
+        from app.db import Document, DocumentType

        # Get embedding for the query
        embedding_model = config.embedding_model_instance
@ -149,12 +133,8 @@ class DocumentHybridSearchRetriever:
        tsvector = func.to_tsvector("english", Document.content)
        tsquery = func.plainto_tsquery("english", query_text)

-        # Base conditions for document filtering
-        base_conditions = [SearchSpace.user_id == user_id]
-
-        # Add search space filter if provided
-        if search_space_id is not None:
-            base_conditions.append(Document.search_space_id == search_space_id)
+        # Base conditions for document filtering - search space is required
+        base_conditions = [Document.search_space_id == search_space_id]

        # Add document type filter if provided
        if document_type is not None:
@ -169,17 +149,13 @@ class DocumentHybridSearchRetriever:
            else:
                base_conditions.append(Document.document_type == document_type)

-        # CTE for semantic search with user ownership check
-        semantic_search_cte = (
-            select(
-                Document.id,
-                func.rank()
-                .over(order_by=Document.embedding.op("<=>")(query_embedding))
-                .label("rank"),
-            )
-            .join(SearchSpace, Document.search_space_id == SearchSpace.id)
-            .where(*base_conditions)
-        )
+        # CTE for semantic search filtered by search space
+        semantic_search_cte = select(
+            Document.id,
+            func.rank()
+            .over(order_by=Document.embedding.op("<=>")(query_embedding))
+            .label("rank"),
+        ).where(*base_conditions)

        semantic_search_cte = (
            semantic_search_cte.order_by(Document.embedding.op("<=>")(query_embedding))
@ -187,7 +163,7 @@ class DocumentHybridSearchRetriever:
            .cte("semantic_search")
        )

-        # CTE for keyword search with user ownership check
+        # CTE for keyword search filtered by search space
        keyword_search_cte = (
            select(
                Document.id,
@ -195,7 +171,6 @@ class DocumentHybridSearchRetriever:
                .over(order_by=func.ts_rank_cd(tsvector, tsquery).desc())
                .label("rank"),
            )
-            .join(SearchSpace, Document.search_space_id == SearchSpace.id)
            .where(*base_conditions)
            .where(tsvector.op("@@")(tsquery))
        )