Merge remote-tracking branch 'upstream/main' into feature/blocknote-editor

This commit is contained in:
Anish Sarkar 2025-11-30 04:10:49 +05:30
commit b98c312fb1
81 changed files with 8976 additions and 2387 deletions

View file

@ -8,6 +8,8 @@ Create Date: 2025-11-13 23:20:12.912741
from collections.abc import Sequence
from sqlalchemy import text
from alembic import op
# revision identifiers, used by Alembic.
@ -17,6 +19,20 @@ branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def constraint_exists(connection, table_name: str, constraint_name: str) -> bool:
"""Check if a constraint exists on the given table."""
result = connection.execute(
text(
"""
SELECT 1 FROM information_schema.table_constraints
WHERE table_name = :table_name AND constraint_name = :constraint_name
"""
),
{"table_name": table_name, "constraint_name": constraint_name},
)
return result.fetchone() is not None
def upgrade() -> None:
"""
Remove foreign key constraints on LLM preference columns to allow global configs (negative IDs).
@ -24,50 +40,55 @@ def upgrade() -> None:
Global LLM configs use negative IDs and don't exist in the llm_configs table,
so we need to remove the foreign key constraints that were preventing their use.
"""
# Drop the foreign key constraints
op.drop_constraint(
connection = op.get_bind()
# Drop the foreign key constraints if they exist
constraints_to_drop = [
"user_search_space_preferences_long_context_llm_id_fkey",
"user_search_space_preferences",
type_="foreignkey",
)
op.drop_constraint(
"user_search_space_preferences_fast_llm_id_fkey",
"user_search_space_preferences",
type_="foreignkey",
)
op.drop_constraint(
"user_search_space_preferences_strategic_llm_id_fkey",
"user_search_space_preferences",
type_="foreignkey",
)
]
for constraint_name in constraints_to_drop:
if constraint_exists(
connection, "user_search_space_preferences", constraint_name
):
op.drop_constraint(
constraint_name,
"user_search_space_preferences",
type_="foreignkey",
)
else:
print(f"Constraint '{constraint_name}' does not exist. Skipping.")
def downgrade() -> None:
"""
Re-add foreign key constraints (will fail if any negative IDs exist in the table).
"""
# Re-add the foreign key constraints
op.create_foreign_key(
"user_search_space_preferences_long_context_llm_id_fkey",
"user_search_space_preferences",
"llm_configs",
["long_context_llm_id"],
["id"],
ondelete="SET NULL",
)
op.create_foreign_key(
"user_search_space_preferences_fast_llm_id_fkey",
"user_search_space_preferences",
"llm_configs",
["fast_llm_id"],
["id"],
ondelete="SET NULL",
)
op.create_foreign_key(
"user_search_space_preferences_strategic_llm_id_fkey",
"user_search_space_preferences",
"llm_configs",
["strategic_llm_id"],
["id"],
ondelete="SET NULL",
)
connection = op.get_bind()
# Re-add the foreign key constraints if they don't exist
constraints_to_create = [
(
"user_search_space_preferences_long_context_llm_id_fkey",
"long_context_llm_id",
),
("user_search_space_preferences_fast_llm_id_fkey", "fast_llm_id"),
("user_search_space_preferences_strategic_llm_id_fkey", "strategic_llm_id"),
]
for constraint_name, column_name in constraints_to_create:
if not constraint_exists(
connection, "user_search_space_preferences", constraint_name
):
op.create_foreign_key(
constraint_name,
"user_search_space_preferences",
"llm_configs",
[column_name],
["id"],
ondelete="SET NULL",
)
else:
print(f"Constraint '{constraint_name}' already exists. Skipping.")

View file

@ -9,6 +9,7 @@ Create Date: 2025-11-19 00:00:00.000000
from collections.abc import Sequence
import sqlalchemy as sa
from sqlalchemy import text
from alembic import op
@ -19,24 +20,55 @@ branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def column_exists(connection, table_name: str, column_name: str) -> bool:
"""Check if a column exists on the given table."""
result = connection.execute(
text(
"""
SELECT 1 FROM information_schema.columns
WHERE table_name = :table_name AND column_name = :column_name
"""
),
{"table_name": table_name, "column_name": column_name},
)
return result.fetchone() is not None
def upgrade() -> None:
"""Add QnA configuration columns to searchspaces table."""
connection = op.get_bind()
# Add citations_enabled boolean (default True)
op.add_column(
"searchspaces",
sa.Column(
"citations_enabled", sa.Boolean(), nullable=False, server_default="true"
),
)
if not column_exists(connection, "searchspaces", "citations_enabled"):
op.add_column(
"searchspaces",
sa.Column(
"citations_enabled", sa.Boolean(), nullable=False, server_default="true"
),
)
else:
print("Column 'citations_enabled' already exists. Skipping.")
# Add custom instructions text field (nullable, defaults to empty)
op.add_column(
"searchspaces",
sa.Column("qna_custom_instructions", sa.Text(), nullable=True),
)
if not column_exists(connection, "searchspaces", "qna_custom_instructions"):
op.add_column(
"searchspaces",
sa.Column("qna_custom_instructions", sa.Text(), nullable=True),
)
else:
print("Column 'qna_custom_instructions' already exists. Skipping.")
def downgrade() -> None:
"""Remove QnA configuration columns from searchspaces table."""
op.drop_column("searchspaces", "qna_custom_instructions")
op.drop_column("searchspaces", "citations_enabled")
connection = op.get_bind()
if column_exists(connection, "searchspaces", "qna_custom_instructions"):
op.drop_column("searchspaces", "qna_custom_instructions")
else:
print("Column 'qna_custom_instructions' does not exist. Skipping.")
if column_exists(connection, "searchspaces", "citations_enabled"):
op.drop_column("searchspaces", "citations_enabled")
else:
print("Column 'citations_enabled' does not exist. Skipping.")

View file

@ -0,0 +1,59 @@
"""Add Webcrawler connector enums
Revision ID: 38
Revises: 37
Create Date: 2025-11-17 17:00:00.000000
"""
from collections.abc import Sequence
from alembic import op
revision: str = "38"
down_revision: str | None = "37"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
"""Safely add 'WEBCRAWLER_CONNECTOR' to enum types if missing."""
# Add to searchsourceconnectortype enum
op.execute(
"""
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_type t
JOIN pg_enum e ON t.oid = e.enumtypid
WHERE t.typname = 'searchsourceconnectortype' AND e.enumlabel = 'WEBCRAWLER_CONNECTOR'
) THEN
ALTER TYPE searchsourceconnectortype ADD VALUE 'WEBCRAWLER_CONNECTOR';
END IF;
END
$$;
"""
)
# Add to documenttype enum
op.execute(
"""
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_type t
JOIN pg_enum e ON t.oid = e.enumtypid
WHERE t.typname = 'documenttype' AND e.enumlabel = 'CRAWLED_URL'
) THEN
ALTER TYPE documenttype ADD VALUE 'CRAWLED_URL';
END IF;
END
$$;
"""
)
def downgrade() -> None:
"""Remove 'WEBCRAWLER_CONNECTOR' from enum types."""
pass

View file

@ -0,0 +1,179 @@
"""Add RBAC tables for search space access control
Revision ID: 39
Revises: 38
Create Date: 2025-11-27 00:00:00.000000
This migration adds:
- Permission enum for granular access control
- search_space_roles table for custom roles per search space
- search_space_memberships table for user-searchspace-role relationships
- search_space_invites table for invite links
"""
from collections.abc import Sequence
from sqlalchemy import inspect
from alembic import op
revision: str = "39"
down_revision: str | None = "38"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
"""Upgrade schema - add RBAC tables for search space access control."""
# Create search_space_roles table
op.execute(
"""
CREATE TABLE IF NOT EXISTS search_space_roles (
id SERIAL PRIMARY KEY,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
name VARCHAR(100) NOT NULL,
description VARCHAR(500),
permissions TEXT[] NOT NULL DEFAULT '{}',
is_default BOOLEAN NOT NULL DEFAULT FALSE,
is_system_role BOOLEAN NOT NULL DEFAULT FALSE,
search_space_id INTEGER NOT NULL REFERENCES searchspaces(id) ON DELETE CASCADE,
CONSTRAINT uq_searchspace_role_name UNIQUE (search_space_id, name)
);
"""
)
# Create search_space_invites table (needs to be created before memberships due to FK)
op.execute(
"""
CREATE TABLE IF NOT EXISTS search_space_invites (
id SERIAL PRIMARY KEY,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
invite_code VARCHAR(64) NOT NULL UNIQUE,
search_space_id INTEGER NOT NULL REFERENCES searchspaces(id) ON DELETE CASCADE,
role_id INTEGER REFERENCES search_space_roles(id) ON DELETE SET NULL,
created_by_id UUID REFERENCES "user"(id) ON DELETE SET NULL,
expires_at TIMESTAMPTZ,
max_uses INTEGER,
uses_count INTEGER NOT NULL DEFAULT 0,
is_active BOOLEAN NOT NULL DEFAULT TRUE,
name VARCHAR(100)
);
"""
)
# Create search_space_memberships table
op.execute(
"""
CREATE TABLE IF NOT EXISTS search_space_memberships (
id SERIAL PRIMARY KEY,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
user_id UUID NOT NULL REFERENCES "user"(id) ON DELETE CASCADE,
search_space_id INTEGER NOT NULL REFERENCES searchspaces(id) ON DELETE CASCADE,
role_id INTEGER REFERENCES search_space_roles(id) ON DELETE SET NULL,
is_owner BOOLEAN NOT NULL DEFAULT FALSE,
joined_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
invited_by_invite_id INTEGER REFERENCES search_space_invites(id) ON DELETE SET NULL,
CONSTRAINT uq_user_searchspace_membership UNIQUE (user_id, search_space_id)
);
"""
)
# Get connection and inspector for checking existing indexes
conn = op.get_bind()
inspector = inspect(conn)
# Create indexes for search_space_roles
existing_indexes = [
idx["name"] for idx in inspector.get_indexes("search_space_roles")
]
if "ix_search_space_roles_id" not in existing_indexes:
op.create_index("ix_search_space_roles_id", "search_space_roles", ["id"])
if "ix_search_space_roles_created_at" not in existing_indexes:
op.create_index(
"ix_search_space_roles_created_at", "search_space_roles", ["created_at"]
)
if "ix_search_space_roles_name" not in existing_indexes:
op.create_index("ix_search_space_roles_name", "search_space_roles", ["name"])
# Create indexes for search_space_memberships
existing_indexes = [
idx["name"] for idx in inspector.get_indexes("search_space_memberships")
]
if "ix_search_space_memberships_id" not in existing_indexes:
op.create_index(
"ix_search_space_memberships_id", "search_space_memberships", ["id"]
)
if "ix_search_space_memberships_created_at" not in existing_indexes:
op.create_index(
"ix_search_space_memberships_created_at",
"search_space_memberships",
["created_at"],
)
if "ix_search_space_memberships_user_id" not in existing_indexes:
op.create_index(
"ix_search_space_memberships_user_id",
"search_space_memberships",
["user_id"],
)
if "ix_search_space_memberships_search_space_id" not in existing_indexes:
op.create_index(
"ix_search_space_memberships_search_space_id",
"search_space_memberships",
["search_space_id"],
)
# Create indexes for search_space_invites
existing_indexes = [
idx["name"] for idx in inspector.get_indexes("search_space_invites")
]
if "ix_search_space_invites_id" not in existing_indexes:
op.create_index("ix_search_space_invites_id", "search_space_invites", ["id"])
if "ix_search_space_invites_created_at" not in existing_indexes:
op.create_index(
"ix_search_space_invites_created_at", "search_space_invites", ["created_at"]
)
if "ix_search_space_invites_invite_code" not in existing_indexes:
op.create_index(
"ix_search_space_invites_invite_code",
"search_space_invites",
["invite_code"],
)
def downgrade() -> None:
"""Downgrade schema - remove RBAC tables."""
# Drop indexes for search_space_memberships
op.drop_index(
"ix_search_space_memberships_search_space_id",
table_name="search_space_memberships",
)
op.drop_index(
"ix_search_space_memberships_user_id", table_name="search_space_memberships"
)
op.drop_index(
"ix_search_space_memberships_created_at", table_name="search_space_memberships"
)
op.drop_index(
"ix_search_space_memberships_id", table_name="search_space_memberships"
)
# Drop indexes for search_space_invites
op.drop_index(
"ix_search_space_invites_invite_code", table_name="search_space_invites"
)
op.drop_index(
"ix_search_space_invites_created_at", table_name="search_space_invites"
)
op.drop_index("ix_search_space_invites_id", table_name="search_space_invites")
# Drop indexes for search_space_roles
op.drop_index("ix_search_space_roles_name", table_name="search_space_roles")
op.drop_index("ix_search_space_roles_created_at", table_name="search_space_roles")
op.drop_index("ix_search_space_roles_id", table_name="search_space_roles")
# Drop tables in correct order (respecting foreign key constraints)
op.drop_table("search_space_memberships")
op.drop_table("search_space_invites")
op.drop_table("search_space_roles")

View file

@ -0,0 +1,63 @@
"""Move LLM preferences from user-level to search space level
Revision ID: 40
Revises: 39
Create Date: 2024-11-27
This migration moves LLM preferences (long_context_llm_id, fast_llm_id, strategic_llm_id)
from the user_search_space_preferences table to the searchspaces table itself.
This change supports the RBAC model where LLM preferences are shared by all members
of a search space, rather than being per-user.
"""
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision = "40"
down_revision = "39"
branch_labels = None
depends_on = None
def upgrade():
# Add LLM preference columns to searchspaces table
op.add_column(
"searchspaces",
sa.Column("long_context_llm_id", sa.Integer(), nullable=True),
)
op.add_column(
"searchspaces",
sa.Column("fast_llm_id", sa.Integer(), nullable=True),
)
op.add_column(
"searchspaces",
sa.Column("strategic_llm_id", sa.Integer(), nullable=True),
)
# Migrate existing preferences from user_search_space_preferences to searchspaces
# We take the owner's preferences (the user who created the search space)
connection = op.get_bind()
# Get all search spaces and their owner's preferences
connection.execute(
sa.text("""
UPDATE searchspaces ss
SET
long_context_llm_id = usp.long_context_llm_id,
fast_llm_id = usp.fast_llm_id,
strategic_llm_id = usp.strategic_llm_id
FROM user_search_space_preferences usp
WHERE ss.id = usp.search_space_id
AND ss.user_id = usp.user_id
""")
)
def downgrade():
# Remove LLM preference columns from searchspaces table
op.drop_column("searchspaces", "strategic_llm_id")
op.drop_column("searchspaces", "fast_llm_id")
op.drop_column("searchspaces", "long_context_llm_id")

View file

@ -0,0 +1,212 @@
"""Backfill RBAC data for existing search spaces
Revision ID: 41
Revises: 40
Create Date: 2025-11-28
This migration creates default roles and owner memberships for all existing
search spaces that were created before the RBAC system was implemented.
"""
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision = "41"
down_revision = "40"
branch_labels = None
depends_on = None
# Default role permissions (must match DEFAULT_ROLE_PERMISSIONS in db.py)
DEFAULT_ROLES = [
{
"name": "Owner",
"description": "Full access to all resources",
"permissions": ["*"],
"is_system_role": True,
"is_default": False,
},
{
"name": "Admin",
"description": "Can manage members, roles, and all content",
"permissions": [
"documents:create",
"documents:read",
"documents:update",
"documents:delete",
"chats:create",
"chats:read",
"chats:update",
"chats:delete",
"llm_configs:create",
"llm_configs:read",
"llm_configs:update",
"llm_configs:delete",
"logs:read",
"logs:delete",
"podcasts:create",
"podcasts:read",
"podcasts:update",
"podcasts:delete",
"connectors:create",
"connectors:read",
"connectors:update",
"connectors:delete",
"members:read",
"members:update",
"members:delete",
"roles:create",
"roles:read",
"roles:update",
"roles:delete",
"invites:create",
"invites:read",
"invites:delete",
"settings:read",
"settings:update",
],
"is_system_role": True,
"is_default": False,
},
{
"name": "Editor",
"description": "Can create and edit content",
"permissions": [
"documents:create",
"documents:read",
"documents:update",
"chats:create",
"chats:read",
"chats:update",
"llm_configs:read",
"logs:read",
"podcasts:create",
"podcasts:read",
"podcasts:update",
"connectors:create",
"connectors:read",
"connectors:update",
"members:read",
"roles:read",
],
"is_system_role": True,
"is_default": True,
},
{
"name": "Viewer",
"description": "Read-only access to content",
"permissions": [
"documents:read",
"chats:read",
"llm_configs:read",
"logs:read",
"podcasts:read",
"connectors:read",
"members:read",
"roles:read",
],
"is_system_role": True,
"is_default": False,
},
]
def upgrade():
connection = op.get_bind()
# Get all existing search spaces that don't have roles yet
search_spaces = connection.execute(
sa.text("""
SELECT ss.id, ss.user_id
FROM searchspaces ss
WHERE NOT EXISTS (
SELECT 1 FROM search_space_roles ssr
WHERE ssr.search_space_id = ss.id
)
""")
).fetchall()
for ss_id, owner_user_id in search_spaces:
owner_role_id = None
# Create default roles for each search space
for role in DEFAULT_ROLES:
# Convert permissions list to PostgreSQL array literal format for raw SQL
perms_literal = (
"ARRAY[" + ",".join(f"'{p}'" for p in role["permissions"]) + "]::TEXT[]"
)
result = connection.execute(
sa.text(f"""
INSERT INTO search_space_roles
(name, description, permissions, is_default, is_system_role, search_space_id)
VALUES (:name, :description, {perms_literal}, :is_default, :is_system_role, :search_space_id)
RETURNING id
"""),
{
"name": role["name"],
"description": role["description"],
"is_default": role["is_default"],
"is_system_role": role["is_system_role"],
"search_space_id": ss_id,
},
)
role_id = result.fetchone()[0]
# Keep track of Owner role ID
if role["name"] == "Owner":
owner_role_id = role_id
# Create owner membership for the search space creator
if owner_user_id and owner_role_id:
# Check if membership already exists
existing = connection.execute(
sa.text("""
SELECT 1 FROM search_space_memberships
WHERE user_id = :user_id AND search_space_id = :search_space_id
"""),
{"user_id": owner_user_id, "search_space_id": ss_id},
).fetchone()
if not existing:
connection.execute(
sa.text("""
INSERT INTO search_space_memberships
(user_id, search_space_id, role_id, is_owner)
VALUES (:user_id, :search_space_id, :role_id, TRUE)
"""),
{
"user_id": owner_user_id,
"search_space_id": ss_id,
"role_id": owner_role_id,
},
)
def downgrade():
# This migration only adds data, not schema changes
# Downgrade would remove all roles and memberships created by this migration
# However, this is destructive and may affect manually created data
# So we only remove system roles and owner memberships that were auto-created
connection = op.get_bind()
# Remove memberships where user is owner and role is system Owner role
connection.execute(
sa.text("""
DELETE FROM search_space_memberships ssm
USING search_space_roles ssr
WHERE ssm.role_id = ssr.id
AND ssm.is_owner = TRUE
AND ssr.is_system_role = TRUE
AND ssr.name = 'Owner'
""")
)
# Remove system roles
connection.execute(
sa.text("""
DELETE FROM search_space_roles
WHERE is_system_role = TRUE
""")
)

View file

@ -0,0 +1,52 @@
"""Drop user_search_space_preferences table
Revision ID: 42
Revises: 41
Create Date: 2025-11-28
This table is no longer needed after RBAC implementation:
- LLM preferences are now stored on SearchSpace directly
- User-SearchSpace relationships are handled by SearchSpaceMembership
"""
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision = "42"
down_revision = "41"
branch_labels = None
depends_on = None
def upgrade():
# Drop the user_search_space_preferences table
op.drop_table("user_search_space_preferences")
def downgrade():
# Recreate the table if rolling back
op.create_table(
"user_search_space_preferences",
sa.Column("id", sa.Integer(), primary_key=True),
sa.Column(
"created_at", sa.DateTime(timezone=True), server_default=sa.func.now()
),
sa.Column(
"user_id",
sa.UUID(),
sa.ForeignKey("user.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column(
"search_space_id",
sa.Integer(),
sa.ForeignKey("searchspaces.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("long_context_llm_id", sa.Integer(), nullable=True),
sa.Column("fast_llm_id", sa.Integer(), nullable=True),
sa.Column("strategic_llm_id", sa.Integer(), nullable=True),
sa.UniqueConstraint("user_id", "search_space_id", name="uq_user_searchspace"),
)