feat: Implement Role-Based Access Control (RBAC) for search space resources.

-Introduce granular permissions for documents, chats, podcasts, and logs.
- Update routes to enforce permission checks for creating, reading, updating, and deleting resources. - Refactor user and search space interactions to align with RBAC model, removing ownership checks in favor of permission validation.
This commit is contained in:
DESKTOP-RTLN3BA\$punk 2025-11-27 22:45:04 -08:00
parent 1ed0cb3dfe
commit e9d32c3516
38 changed files with 5916 additions and 657 deletions

View file

@ -12,8 +12,7 @@ class ChucksHybridSearchRetriever:
self,
query_text: str,
top_k: int,
user_id: str,
search_space_id: int | None = None,
search_space_id: int,
) -> list:
"""
Perform vector similarity search on chunks.
@ -21,8 +20,7 @@ class ChucksHybridSearchRetriever:
Args:
query_text: The search query text
top_k: Number of results to return
user_id: The ID of the user performing the search
search_space_id: Optional search space ID to filter results
search_space_id: The search space ID to search within
Returns:
List of chunks sorted by vector similarity
@ -31,25 +29,20 @@ class ChucksHybridSearchRetriever:
from sqlalchemy.orm import joinedload
from app.config import config
from app.db import Chunk, Document, SearchSpace
from app.db import Chunk, Document
# Get embedding for the query
embedding_model = config.embedding_model_instance
query_embedding = embedding_model.embed(query_text)
# Build the base query with user ownership check
# Build the query filtered by search space
query = (
select(Chunk)
.options(joinedload(Chunk.document).joinedload(Document.search_space))
.join(Document, Chunk.document_id == Document.id)
.join(SearchSpace, Document.search_space_id == SearchSpace.id)
.where(SearchSpace.user_id == user_id)
.where(Document.search_space_id == search_space_id)
)
# Add search space filter if provided
if search_space_id is not None:
query = query.where(Document.search_space_id == search_space_id)
# Add vector similarity ordering
query = query.order_by(Chunk.embedding.op("<=>")(query_embedding)).limit(top_k)
@ -63,8 +56,7 @@ class ChucksHybridSearchRetriever:
self,
query_text: str,
top_k: int,
user_id: str,
search_space_id: int | None = None,
search_space_id: int,
) -> list:
"""
Perform full-text keyword search on chunks.
@ -72,8 +64,7 @@ class ChucksHybridSearchRetriever:
Args:
query_text: The search query text
top_k: Number of results to return
user_id: The ID of the user performing the search
search_space_id: Optional search space ID to filter results
search_space_id: The search space ID to search within
Returns:
List of chunks sorted by text relevance
@ -81,28 +72,23 @@ class ChucksHybridSearchRetriever:
from sqlalchemy import func, select
from sqlalchemy.orm import joinedload
from app.db import Chunk, Document, SearchSpace
from app.db import Chunk, Document
# Create tsvector and tsquery for PostgreSQL full-text search
tsvector = func.to_tsvector("english", Chunk.content)
tsquery = func.plainto_tsquery("english", query_text)
# Build the base query with user ownership check
# Build the query filtered by search space
query = (
select(Chunk)
.options(joinedload(Chunk.document).joinedload(Document.search_space))
.join(Document, Chunk.document_id == Document.id)
.join(SearchSpace, Document.search_space_id == SearchSpace.id)
.where(SearchSpace.user_id == user_id)
.where(Document.search_space_id == search_space_id)
.where(
tsvector.op("@@")(tsquery)
) # Only include results that match the query
)
# Add search space filter if provided
if search_space_id is not None:
query = query.where(Document.search_space_id == search_space_id)
# Add text search ranking
query = query.order_by(func.ts_rank_cd(tsvector, tsquery).desc()).limit(top_k)
@ -116,8 +102,7 @@ class ChucksHybridSearchRetriever:
self,
query_text: str,
top_k: int,
user_id: str,
search_space_id: int | None = None,
search_space_id: int,
document_type: str | None = None,
) -> list:
"""
@ -126,8 +111,7 @@ class ChucksHybridSearchRetriever:
Args:
query_text: The search query text
top_k: Number of results to return
user_id: The ID of the user performing the search
search_space_id: Optional search space ID to filter results
search_space_id: The search space ID to search within
document_type: Optional document type to filter results (e.g., "FILE", "CRAWLED_URL")
Returns:
@ -137,7 +121,7 @@ class ChucksHybridSearchRetriever:
from sqlalchemy.orm import joinedload
from app.config import config
from app.db import Chunk, Document, DocumentType, SearchSpace
from app.db import Chunk, Document, DocumentType
# Get embedding for the query
embedding_model = config.embedding_model_instance
@ -151,12 +135,8 @@ class ChucksHybridSearchRetriever:
tsvector = func.to_tsvector("english", Chunk.content)
tsquery = func.plainto_tsquery("english", query_text)
# Base conditions for document filtering
base_conditions = [SearchSpace.user_id == user_id]
# Add search space filter if provided
if search_space_id is not None:
base_conditions.append(Document.search_space_id == search_space_id)
# Base conditions for chunk filtering - search space is required
base_conditions = [Document.search_space_id == search_space_id]
# Add document type filter if provided
if document_type is not None:
@ -171,7 +151,7 @@ class ChucksHybridSearchRetriever:
else:
base_conditions.append(Document.document_type == document_type)
# CTE for semantic search with user ownership check
# CTE for semantic search filtered by search space
semantic_search_cte = (
select(
Chunk.id,
@ -180,7 +160,6 @@ class ChucksHybridSearchRetriever:
.label("rank"),
)
.join(Document, Chunk.document_id == Document.id)
.join(SearchSpace, Document.search_space_id == SearchSpace.id)
.where(*base_conditions)
)
@ -190,7 +169,7 @@ class ChucksHybridSearchRetriever:
.cte("semantic_search")
)
# CTE for keyword search with user ownership check
# CTE for keyword search filtered by search space
keyword_search_cte = (
select(
Chunk.id,
@ -199,7 +178,6 @@ class ChucksHybridSearchRetriever:
.label("rank"),
)
.join(Document, Chunk.document_id == Document.id)
.join(SearchSpace, Document.search_space_id == SearchSpace.id)
.where(*base_conditions)
.where(tsvector.op("@@")(tsquery))
)