Merge remote-tracking branch 'upstream/main' into feat/bookstack-connector

This commit is contained in:
Differ 2025-12-06 09:15:02 +08:00
commit e238fab638
110 changed files with 10076 additions and 1671 deletions

View file

@ -0,0 +1,179 @@
"""Add RBAC tables for search space access control
Revision ID: 39
Revises: 38
Create Date: 2025-11-27 00:00:00.000000
This migration adds:
- Permission enum for granular access control
- search_space_roles table for custom roles per search space
- search_space_memberships table for user-searchspace-role relationships
- search_space_invites table for invite links
"""
from collections.abc import Sequence
from sqlalchemy import inspect
from alembic import op
revision: str = "39"
down_revision: str | None = "38"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
"""Upgrade schema - add RBAC tables for search space access control."""
# Create search_space_roles table
op.execute(
"""
CREATE TABLE IF NOT EXISTS search_space_roles (
id SERIAL PRIMARY KEY,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
name VARCHAR(100) NOT NULL,
description VARCHAR(500),
permissions TEXT[] NOT NULL DEFAULT '{}',
is_default BOOLEAN NOT NULL DEFAULT FALSE,
is_system_role BOOLEAN NOT NULL DEFAULT FALSE,
search_space_id INTEGER NOT NULL REFERENCES searchspaces(id) ON DELETE CASCADE,
CONSTRAINT uq_searchspace_role_name UNIQUE (search_space_id, name)
);
"""
)
# Create search_space_invites table (needs to be created before memberships due to FK)
op.execute(
"""
CREATE TABLE IF NOT EXISTS search_space_invites (
id SERIAL PRIMARY KEY,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
invite_code VARCHAR(64) NOT NULL UNIQUE,
search_space_id INTEGER NOT NULL REFERENCES searchspaces(id) ON DELETE CASCADE,
role_id INTEGER REFERENCES search_space_roles(id) ON DELETE SET NULL,
created_by_id UUID REFERENCES "user"(id) ON DELETE SET NULL,
expires_at TIMESTAMPTZ,
max_uses INTEGER,
uses_count INTEGER NOT NULL DEFAULT 0,
is_active BOOLEAN NOT NULL DEFAULT TRUE,
name VARCHAR(100)
);
"""
)
# Create search_space_memberships table
op.execute(
"""
CREATE TABLE IF NOT EXISTS search_space_memberships (
id SERIAL PRIMARY KEY,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
user_id UUID NOT NULL REFERENCES "user"(id) ON DELETE CASCADE,
search_space_id INTEGER NOT NULL REFERENCES searchspaces(id) ON DELETE CASCADE,
role_id INTEGER REFERENCES search_space_roles(id) ON DELETE SET NULL,
is_owner BOOLEAN NOT NULL DEFAULT FALSE,
joined_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
invited_by_invite_id INTEGER REFERENCES search_space_invites(id) ON DELETE SET NULL,
CONSTRAINT uq_user_searchspace_membership UNIQUE (user_id, search_space_id)
);
"""
)
# Get connection and inspector for checking existing indexes
conn = op.get_bind()
inspector = inspect(conn)
# Create indexes for search_space_roles
existing_indexes = [
idx["name"] for idx in inspector.get_indexes("search_space_roles")
]
if "ix_search_space_roles_id" not in existing_indexes:
op.create_index("ix_search_space_roles_id", "search_space_roles", ["id"])
if "ix_search_space_roles_created_at" not in existing_indexes:
op.create_index(
"ix_search_space_roles_created_at", "search_space_roles", ["created_at"]
)
if "ix_search_space_roles_name" not in existing_indexes:
op.create_index("ix_search_space_roles_name", "search_space_roles", ["name"])
# Create indexes for search_space_memberships
existing_indexes = [
idx["name"] for idx in inspector.get_indexes("search_space_memberships")
]
if "ix_search_space_memberships_id" not in existing_indexes:
op.create_index(
"ix_search_space_memberships_id", "search_space_memberships", ["id"]
)
if "ix_search_space_memberships_created_at" not in existing_indexes:
op.create_index(
"ix_search_space_memberships_created_at",
"search_space_memberships",
["created_at"],
)
if "ix_search_space_memberships_user_id" not in existing_indexes:
op.create_index(
"ix_search_space_memberships_user_id",
"search_space_memberships",
["user_id"],
)
if "ix_search_space_memberships_search_space_id" not in existing_indexes:
op.create_index(
"ix_search_space_memberships_search_space_id",
"search_space_memberships",
["search_space_id"],
)
# Create indexes for search_space_invites
existing_indexes = [
idx["name"] for idx in inspector.get_indexes("search_space_invites")
]
if "ix_search_space_invites_id" not in existing_indexes:
op.create_index("ix_search_space_invites_id", "search_space_invites", ["id"])
if "ix_search_space_invites_created_at" not in existing_indexes:
op.create_index(
"ix_search_space_invites_created_at", "search_space_invites", ["created_at"]
)
if "ix_search_space_invites_invite_code" not in existing_indexes:
op.create_index(
"ix_search_space_invites_invite_code",
"search_space_invites",
["invite_code"],
)
def downgrade() -> None:
"""Downgrade schema - remove RBAC tables."""
# Drop indexes for search_space_memberships
op.drop_index(
"ix_search_space_memberships_search_space_id",
table_name="search_space_memberships",
)
op.drop_index(
"ix_search_space_memberships_user_id", table_name="search_space_memberships"
)
op.drop_index(
"ix_search_space_memberships_created_at", table_name="search_space_memberships"
)
op.drop_index(
"ix_search_space_memberships_id", table_name="search_space_memberships"
)
# Drop indexes for search_space_invites
op.drop_index(
"ix_search_space_invites_invite_code", table_name="search_space_invites"
)
op.drop_index(
"ix_search_space_invites_created_at", table_name="search_space_invites"
)
op.drop_index("ix_search_space_invites_id", table_name="search_space_invites")
# Drop indexes for search_space_roles
op.drop_index("ix_search_space_roles_name", table_name="search_space_roles")
op.drop_index("ix_search_space_roles_created_at", table_name="search_space_roles")
op.drop_index("ix_search_space_roles_id", table_name="search_space_roles")
# Drop tables in correct order (respecting foreign key constraints)
op.drop_table("search_space_memberships")
op.drop_table("search_space_invites")
op.drop_table("search_space_roles")

View file

@ -0,0 +1,63 @@
"""Move LLM preferences from user-level to search space level
Revision ID: 40
Revises: 39
Create Date: 2024-11-27
This migration moves LLM preferences (long_context_llm_id, fast_llm_id, strategic_llm_id)
from the user_search_space_preferences table to the searchspaces table itself.
This change supports the RBAC model where LLM preferences are shared by all members
of a search space, rather than being per-user.
"""
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision = "40"
down_revision = "39"
branch_labels = None
depends_on = None
def upgrade():
# Add LLM preference columns to searchspaces table
op.add_column(
"searchspaces",
sa.Column("long_context_llm_id", sa.Integer(), nullable=True),
)
op.add_column(
"searchspaces",
sa.Column("fast_llm_id", sa.Integer(), nullable=True),
)
op.add_column(
"searchspaces",
sa.Column("strategic_llm_id", sa.Integer(), nullable=True),
)
# Migrate existing preferences from user_search_space_preferences to searchspaces
# We take the owner's preferences (the user who created the search space)
connection = op.get_bind()
# Get all search spaces and their owner's preferences
connection.execute(
sa.text("""
UPDATE searchspaces ss
SET
long_context_llm_id = usp.long_context_llm_id,
fast_llm_id = usp.fast_llm_id,
strategic_llm_id = usp.strategic_llm_id
FROM user_search_space_preferences usp
WHERE ss.id = usp.search_space_id
AND ss.user_id = usp.user_id
""")
)
def downgrade():
# Remove LLM preference columns from searchspaces table
op.drop_column("searchspaces", "strategic_llm_id")
op.drop_column("searchspaces", "fast_llm_id")
op.drop_column("searchspaces", "long_context_llm_id")

View file

@ -0,0 +1,212 @@
"""Backfill RBAC data for existing search spaces
Revision ID: 41
Revises: 40
Create Date: 2025-11-28
This migration creates default roles and owner memberships for all existing
search spaces that were created before the RBAC system was implemented.
"""
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision = "41"
down_revision = "40"
branch_labels = None
depends_on = None
# Default role permissions (must match DEFAULT_ROLE_PERMISSIONS in db.py)
DEFAULT_ROLES = [
{
"name": "Owner",
"description": "Full access to all resources",
"permissions": ["*"],
"is_system_role": True,
"is_default": False,
},
{
"name": "Admin",
"description": "Can manage members, roles, and all content",
"permissions": [
"documents:create",
"documents:read",
"documents:update",
"documents:delete",
"chats:create",
"chats:read",
"chats:update",
"chats:delete",
"llm_configs:create",
"llm_configs:read",
"llm_configs:update",
"llm_configs:delete",
"logs:read",
"logs:delete",
"podcasts:create",
"podcasts:read",
"podcasts:update",
"podcasts:delete",
"connectors:create",
"connectors:read",
"connectors:update",
"connectors:delete",
"members:read",
"members:update",
"members:delete",
"roles:create",
"roles:read",
"roles:update",
"roles:delete",
"invites:create",
"invites:read",
"invites:delete",
"settings:read",
"settings:update",
],
"is_system_role": True,
"is_default": False,
},
{
"name": "Editor",
"description": "Can create and edit content",
"permissions": [
"documents:create",
"documents:read",
"documents:update",
"chats:create",
"chats:read",
"chats:update",
"llm_configs:read",
"logs:read",
"podcasts:create",
"podcasts:read",
"podcasts:update",
"connectors:create",
"connectors:read",
"connectors:update",
"members:read",
"roles:read",
],
"is_system_role": True,
"is_default": True,
},
{
"name": "Viewer",
"description": "Read-only access to content",
"permissions": [
"documents:read",
"chats:read",
"llm_configs:read",
"logs:read",
"podcasts:read",
"connectors:read",
"members:read",
"roles:read",
],
"is_system_role": True,
"is_default": False,
},
]
def upgrade():
connection = op.get_bind()
# Get all existing search spaces that don't have roles yet
search_spaces = connection.execute(
sa.text("""
SELECT ss.id, ss.user_id
FROM searchspaces ss
WHERE NOT EXISTS (
SELECT 1 FROM search_space_roles ssr
WHERE ssr.search_space_id = ss.id
)
""")
).fetchall()
for ss_id, owner_user_id in search_spaces:
owner_role_id = None
# Create default roles for each search space
for role in DEFAULT_ROLES:
# Convert permissions list to PostgreSQL array literal format for raw SQL
perms_literal = (
"ARRAY[" + ",".join(f"'{p}'" for p in role["permissions"]) + "]::TEXT[]"
)
result = connection.execute(
sa.text(f"""
INSERT INTO search_space_roles
(name, description, permissions, is_default, is_system_role, search_space_id)
VALUES (:name, :description, {perms_literal}, :is_default, :is_system_role, :search_space_id)
RETURNING id
"""),
{
"name": role["name"],
"description": role["description"],
"is_default": role["is_default"],
"is_system_role": role["is_system_role"],
"search_space_id": ss_id,
},
)
role_id = result.fetchone()[0]
# Keep track of Owner role ID
if role["name"] == "Owner":
owner_role_id = role_id
# Create owner membership for the search space creator
if owner_user_id and owner_role_id:
# Check if membership already exists
existing = connection.execute(
sa.text("""
SELECT 1 FROM search_space_memberships
WHERE user_id = :user_id AND search_space_id = :search_space_id
"""),
{"user_id": owner_user_id, "search_space_id": ss_id},
).fetchone()
if not existing:
connection.execute(
sa.text("""
INSERT INTO search_space_memberships
(user_id, search_space_id, role_id, is_owner)
VALUES (:user_id, :search_space_id, :role_id, TRUE)
"""),
{
"user_id": owner_user_id,
"search_space_id": ss_id,
"role_id": owner_role_id,
},
)
def downgrade():
# This migration only adds data, not schema changes
# Downgrade would remove all roles and memberships created by this migration
# However, this is destructive and may affect manually created data
# So we only remove system roles and owner memberships that were auto-created
connection = op.get_bind()
# Remove memberships where user is owner and role is system Owner role
connection.execute(
sa.text("""
DELETE FROM search_space_memberships ssm
USING search_space_roles ssr
WHERE ssm.role_id = ssr.id
AND ssm.is_owner = TRUE
AND ssr.is_system_role = TRUE
AND ssr.name = 'Owner'
""")
)
# Remove system roles
connection.execute(
sa.text("""
DELETE FROM search_space_roles
WHERE is_system_role = TRUE
""")
)

View file

@ -0,0 +1,52 @@
"""Drop user_search_space_preferences table
Revision ID: 42
Revises: 41
Create Date: 2025-11-28
This table is no longer needed after RBAC implementation:
- LLM preferences are now stored on SearchSpace directly
- User-SearchSpace relationships are handled by SearchSpaceMembership
"""
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision = "42"
down_revision = "41"
branch_labels = None
depends_on = None
def upgrade():
# Drop the user_search_space_preferences table
op.drop_table("user_search_space_preferences")
def downgrade():
# Recreate the table if rolling back
op.create_table(
"user_search_space_preferences",
sa.Column("id", sa.Integer(), primary_key=True),
sa.Column(
"created_at", sa.DateTime(timezone=True), server_default=sa.func.now()
),
sa.Column(
"user_id",
sa.UUID(),
sa.ForeignKey("user.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column(
"search_space_id",
sa.Integer(),
sa.ForeignKey("searchspaces.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("long_context_llm_id", sa.Integer(), nullable=True),
sa.Column("fast_llm_id", sa.Integer(), nullable=True),
sa.Column("strategic_llm_id", sa.Integer(), nullable=True),
sa.UniqueConstraint("user_id", "search_space_id", name="uq_user_searchspace"),
)

View file

@ -0,0 +1,75 @@
"""43_add_blocknote_fields_to_documents
Revision ID: 43
Revises: 42
Create Date: 2025-11-30
Adds fields for live document editing:
- blocknote_document: JSONB editor state
- content_needs_reindexing: Flag for regenerating chunks/summary
- last_edited_at: Last edit timestamp
"""
from collections.abc import Sequence
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "43"
down_revision: str | None = "42"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
"""Upgrade schema - Add BlockNote fields and trigger population task."""
# Add the columns
op.add_column(
"documents",
sa.Column(
"blocknote_document", postgresql.JSONB(astext_type=sa.Text()), nullable=True
),
)
op.add_column(
"documents",
sa.Column(
"content_needs_reindexing",
sa.Boolean(),
nullable=False,
server_default=sa.false(),
),
)
op.add_column(
"documents",
sa.Column("last_edited_at", sa.TIMESTAMP(timezone=True), nullable=True),
)
# Trigger the Celery task to populate blocknote_document for existing documents
try:
from app.tasks.celery_tasks.blocknote_migration_tasks import (
populate_blocknote_for_documents_task,
)
# Queue the task to run asynchronously
populate_blocknote_for_documents_task.apply_async()
print(
"✓ Queued Celery task to populate blocknote_document for existing documents"
)
except Exception as e:
# If Celery is not available or task queueing fails, log but don't fail the migration
print(f"⚠ Warning: Could not queue blocknote population task: {e}")
print(" You can manually trigger it later with:")
print(
" celery -A app.celery_app call app.tasks.celery_tasks.blocknote_migration_tasks.populate_blocknote_for_documents_task"
)
def downgrade() -> None:
"""Downgrade schema - Remove BlockNote fields."""
op.drop_column("documents", "last_edited_at")
op.drop_column("documents", "content_needs_reindexing")
op.drop_column("documents", "blocknote_document")