SurfSense/surfsense_backend/app/db.py
Rohan Verma 2b2453e015
Some checks failed
Build and Push Docker Images / tag_release (push) Has been cancelled
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-24.04-arm, linux/arm64, arm64) (push) Has been cancelled
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-latest, linux/amd64, amd64) (push) Has been cancelled
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-24.04-arm, linux/arm64, arm64) (push) Has been cancelled
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-latest, linux/amd64, amd64) (push) Has been cancelled
Build and Push Docker Images / create_manifest (backend, surfsense-backend) (push) Has been cancelled
Build and Push Docker Images / create_manifest (web, surfsense-web) (push) Has been cancelled
Merge pull request #1240 from AnishSarkar22/feat/resume-builder
feat: resume builder
2026-04-17 13:41:32 -07:00

2465 lines
79 KiB
Python

import uuid
from collections.abc import AsyncGenerator
from contextlib import asynccontextmanager
from datetime import UTC, datetime
from enum import StrEnum
import anyio
from fastapi import Depends
from fastapi_users.db import SQLAlchemyBaseUserTableUUID, SQLAlchemyUserDatabase
from pgvector.sqlalchemy import Vector
from sqlalchemy import (
ARRAY,
JSON,
TIMESTAMP,
BigInteger,
Boolean,
Column,
Enum as SQLAlchemyEnum,
ForeignKey,
Index,
Integer,
String,
Text,
UniqueConstraint,
text,
)
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
from sqlalchemy.orm import DeclarativeBase, Mapped, backref, declared_attr, relationship
from app.config import config
if config.AUTH_TYPE == "GOOGLE":
from fastapi_users.db import SQLAlchemyBaseOAuthAccountTableUUID
DATABASE_URL = config.DATABASE_URL
class DocumentType(StrEnum):
EXTENSION = "EXTENSION"
CRAWLED_URL = "CRAWLED_URL"
FILE = "FILE"
SLACK_CONNECTOR = "SLACK_CONNECTOR"
TEAMS_CONNECTOR = "TEAMS_CONNECTOR"
ONEDRIVE_FILE = "ONEDRIVE_FILE"
NOTION_CONNECTOR = "NOTION_CONNECTOR"
YOUTUBE_VIDEO = "YOUTUBE_VIDEO"
GITHUB_CONNECTOR = "GITHUB_CONNECTOR"
LINEAR_CONNECTOR = "LINEAR_CONNECTOR"
DISCORD_CONNECTOR = "DISCORD_CONNECTOR"
JIRA_CONNECTOR = "JIRA_CONNECTOR"
CONFLUENCE_CONNECTOR = "CONFLUENCE_CONNECTOR"
CLICKUP_CONNECTOR = "CLICKUP_CONNECTOR"
GOOGLE_CALENDAR_CONNECTOR = "GOOGLE_CALENDAR_CONNECTOR"
GOOGLE_GMAIL_CONNECTOR = "GOOGLE_GMAIL_CONNECTOR"
GOOGLE_DRIVE_FILE = "GOOGLE_DRIVE_FILE"
AIRTABLE_CONNECTOR = "AIRTABLE_CONNECTOR"
LUMA_CONNECTOR = "LUMA_CONNECTOR"
ELASTICSEARCH_CONNECTOR = "ELASTICSEARCH_CONNECTOR"
BOOKSTACK_CONNECTOR = "BOOKSTACK_CONNECTOR"
CIRCLEBACK = "CIRCLEBACK"
OBSIDIAN_CONNECTOR = "OBSIDIAN_CONNECTOR"
NOTE = "NOTE"
DROPBOX_FILE = "DROPBOX_FILE"
COMPOSIO_GOOGLE_DRIVE_CONNECTOR = "COMPOSIO_GOOGLE_DRIVE_CONNECTOR"
COMPOSIO_GMAIL_CONNECTOR = "COMPOSIO_GMAIL_CONNECTOR"
COMPOSIO_GOOGLE_CALENDAR_CONNECTOR = "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR"
LOCAL_FOLDER_FILE = "LOCAL_FOLDER_FILE"
# Native Google document types → their legacy Composio equivalents.
# Old documents may still carry the Composio type until they are re-indexed;
# search, browse, and indexing must transparently handle both.
NATIVE_TO_LEGACY_DOCTYPE: dict[str, str] = {
"GOOGLE_DRIVE_FILE": "COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
"GOOGLE_GMAIL_CONNECTOR": "COMPOSIO_GMAIL_CONNECTOR",
"GOOGLE_CALENDAR_CONNECTOR": "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
}
class SearchSourceConnectorType(StrEnum):
SERPER_API = "SERPER_API" # NOT IMPLEMENTED YET : DON'T REMEMBER WHY : MOST PROBABLY BECAUSE WE NEED TO CRAWL THE RESULTS RETURNED BY IT
TAVILY_API = "TAVILY_API"
SEARXNG_API = "SEARXNG_API"
LINKUP_API = "LINKUP_API"
BAIDU_SEARCH_API = "BAIDU_SEARCH_API" # Baidu AI Search API for Chinese web search
SLACK_CONNECTOR = "SLACK_CONNECTOR"
TEAMS_CONNECTOR = "TEAMS_CONNECTOR"
ONEDRIVE_CONNECTOR = "ONEDRIVE_CONNECTOR"
NOTION_CONNECTOR = "NOTION_CONNECTOR"
GITHUB_CONNECTOR = "GITHUB_CONNECTOR"
LINEAR_CONNECTOR = "LINEAR_CONNECTOR"
DISCORD_CONNECTOR = "DISCORD_CONNECTOR"
JIRA_CONNECTOR = "JIRA_CONNECTOR"
CONFLUENCE_CONNECTOR = "CONFLUENCE_CONNECTOR"
CLICKUP_CONNECTOR = "CLICKUP_CONNECTOR"
GOOGLE_CALENDAR_CONNECTOR = "GOOGLE_CALENDAR_CONNECTOR"
GOOGLE_GMAIL_CONNECTOR = "GOOGLE_GMAIL_CONNECTOR"
GOOGLE_DRIVE_CONNECTOR = "GOOGLE_DRIVE_CONNECTOR"
AIRTABLE_CONNECTOR = "AIRTABLE_CONNECTOR"
LUMA_CONNECTOR = "LUMA_CONNECTOR"
ELASTICSEARCH_CONNECTOR = "ELASTICSEARCH_CONNECTOR"
WEBCRAWLER_CONNECTOR = "WEBCRAWLER_CONNECTOR"
BOOKSTACK_CONNECTOR = "BOOKSTACK_CONNECTOR"
CIRCLEBACK_CONNECTOR = "CIRCLEBACK_CONNECTOR"
OBSIDIAN_CONNECTOR = (
"OBSIDIAN_CONNECTOR" # Self-hosted only - Local Obsidian vault indexing
)
MCP_CONNECTOR = "MCP_CONNECTOR" # Model Context Protocol - User-defined API tools
DROPBOX_CONNECTOR = "DROPBOX_CONNECTOR"
COMPOSIO_GOOGLE_DRIVE_CONNECTOR = "COMPOSIO_GOOGLE_DRIVE_CONNECTOR"
COMPOSIO_GMAIL_CONNECTOR = "COMPOSIO_GMAIL_CONNECTOR"
COMPOSIO_GOOGLE_CALENDAR_CONNECTOR = "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR"
class PodcastStatus(StrEnum):
PENDING = "pending"
GENERATING = "generating"
READY = "ready"
FAILED = "failed"
class VideoPresentationStatus(StrEnum):
PENDING = "pending"
GENERATING = "generating"
READY = "ready"
FAILED = "failed"
class DocumentStatus:
"""
Helper class for document processing status (stored as JSONB).
Status values:
- {"state": "ready"} - Document is fully processed and searchable
- {"state": "pending"} - Document is queued, waiting to be processed
- {"state": "processing"} - Document is currently being processed (only 1 at a time)
- {"state": "failed", "reason": "..."} - Processing failed with reason
Usage:
document.status = DocumentStatus.pending()
document.status = DocumentStatus.processing()
document.status = DocumentStatus.ready()
document.status = DocumentStatus.failed("LLM rate limit exceeded")
"""
# State constants
READY = "ready"
PENDING = "pending"
PROCESSING = "processing"
FAILED = "failed"
@staticmethod
def ready() -> dict:
"""Return status dict for a ready/searchable document."""
return {"state": DocumentStatus.READY}
@staticmethod
def pending() -> dict:
"""Return status dict for a document waiting to be processed."""
return {"state": DocumentStatus.PENDING}
@staticmethod
def processing() -> dict:
"""Return status dict for a document being processed."""
return {"state": DocumentStatus.PROCESSING}
@staticmethod
def failed(reason: str, **extra_details) -> dict:
"""
Return status dict for a failed document.
Args:
reason: Human-readable failure reason
**extra_details: Optional additional details (duplicate_of, error_code, etc.)
"""
status = {
"state": DocumentStatus.FAILED,
"reason": reason[:500],
} # Truncate long reasons
if extra_details:
status.update(extra_details)
return status
@staticmethod
def get_state(status: dict | None) -> str | None:
"""Extract state from status dict, returns None if invalid."""
if status is None:
return None
return status.get("state") if isinstance(status, dict) else None
@staticmethod
def is_state(status: dict | None, state: str) -> bool:
"""Check if status matches a given state."""
return DocumentStatus.get_state(status) == state
@staticmethod
def get_failure_reason(status: dict | None) -> str | None:
"""Extract failure reason from status dict."""
if status is None or not isinstance(status, dict):
return None
if status.get("state") == DocumentStatus.FAILED:
return status.get("reason")
return None
class LiteLLMProvider(StrEnum):
"""
Enum for LLM providers supported by LiteLLM.
"""
OPENAI = "OPENAI"
ANTHROPIC = "ANTHROPIC"
GOOGLE = "GOOGLE"
AZURE_OPENAI = "AZURE_OPENAI"
BEDROCK = "BEDROCK"
VERTEX_AI = "VERTEX_AI"
GROQ = "GROQ"
COHERE = "COHERE"
MISTRAL = "MISTRAL"
DEEPSEEK = "DEEPSEEK"
XAI = "XAI"
OPENROUTER = "OPENROUTER"
TOGETHER_AI = "TOGETHER_AI"
FIREWORKS_AI = "FIREWORKS_AI"
REPLICATE = "REPLICATE"
PERPLEXITY = "PERPLEXITY"
OLLAMA = "OLLAMA"
ALIBABA_QWEN = "ALIBABA_QWEN"
MOONSHOT = "MOONSHOT"
ZHIPU = "ZHIPU"
ANYSCALE = "ANYSCALE"
DEEPINFRA = "DEEPINFRA"
CEREBRAS = "CEREBRAS"
SAMBANOVA = "SAMBANOVA"
AI21 = "AI21"
CLOUDFLARE = "CLOUDFLARE"
DATABRICKS = "DATABRICKS"
COMETAPI = "COMETAPI"
HUGGINGFACE = "HUGGINGFACE"
GITHUB_MODELS = "GITHUB_MODELS"
MINIMAX = "MINIMAX"
CUSTOM = "CUSTOM"
class ImageGenProvider(StrEnum):
"""
Enum for image generation providers supported by LiteLLM.
This is a subset of LLM providers — only those that support image generation.
See: https://docs.litellm.ai/docs/image_generation#supported-providers
"""
OPENAI = "OPENAI"
AZURE_OPENAI = "AZURE_OPENAI"
GOOGLE = "GOOGLE" # Google AI Studio
VERTEX_AI = "VERTEX_AI"
BEDROCK = "BEDROCK" # AWS Bedrock
RECRAFT = "RECRAFT"
OPENROUTER = "OPENROUTER"
XINFERENCE = "XINFERENCE"
NSCALE = "NSCALE"
class VisionProvider(StrEnum):
OPENAI = "OPENAI"
ANTHROPIC = "ANTHROPIC"
GOOGLE = "GOOGLE"
AZURE_OPENAI = "AZURE_OPENAI"
VERTEX_AI = "VERTEX_AI"
BEDROCK = "BEDROCK"
XAI = "XAI"
OPENROUTER = "OPENROUTER"
OLLAMA = "OLLAMA"
GROQ = "GROQ"
TOGETHER_AI = "TOGETHER_AI"
FIREWORKS_AI = "FIREWORKS_AI"
DEEPSEEK = "DEEPSEEK"
MISTRAL = "MISTRAL"
CUSTOM = "CUSTOM"
class LogLevel(StrEnum):
DEBUG = "DEBUG"
INFO = "INFO"
WARNING = "WARNING"
ERROR = "ERROR"
CRITICAL = "CRITICAL"
class LogStatus(StrEnum):
IN_PROGRESS = "IN_PROGRESS"
SUCCESS = "SUCCESS"
FAILED = "FAILED"
class IncentiveTaskType(StrEnum):
"""
Enum for incentive task types that users can complete to earn free pages.
Each task can only be completed once per user.
When adding new tasks:
1. Add a new enum value here
2. Add the task configuration to INCENTIVE_TASKS_CONFIG below
3. Create an Alembic migration to add the enum value to PostgreSQL
"""
GITHUB_STAR = "GITHUB_STAR"
REDDIT_FOLLOW = "REDDIT_FOLLOW"
DISCORD_JOIN = "DISCORD_JOIN"
# Future tasks can be added here:
# GITHUB_ISSUE = "GITHUB_ISSUE"
# SOCIAL_SHARE = "SOCIAL_SHARE"
# REFER_FRIEND = "REFER_FRIEND"
class PagePurchaseStatus(StrEnum):
PENDING = "pending"
COMPLETED = "completed"
FAILED = "failed"
class PremiumTokenPurchaseStatus(StrEnum):
PENDING = "pending"
COMPLETED = "completed"
FAILED = "failed"
# Centralized configuration for incentive tasks
# This makes it easy to add new tasks without changing code in multiple places
INCENTIVE_TASKS_CONFIG = {
IncentiveTaskType.GITHUB_STAR: {
"title": "Star our GitHub repository",
"description": "Show your support by starring SurfSense on GitHub",
"pages_reward": 30,
"action_url": "https://github.com/MODSetter/SurfSense",
},
IncentiveTaskType.REDDIT_FOLLOW: {
"title": "Join our Subreddit",
"description": "Join the SurfSense community on Reddit",
"pages_reward": 30,
"action_url": "https://www.reddit.com/r/SurfSense/",
},
IncentiveTaskType.DISCORD_JOIN: {
"title": "Join our Discord",
"description": "Join the SurfSense community on Discord",
"pages_reward": 40,
"action_url": "https://discord.gg/ejRNvftDp9",
},
# Future tasks can be configured here:
# IncentiveTaskType.GITHUB_ISSUE: {
# "title": "Create an issue",
# "description": "Help improve SurfSense by reporting bugs or suggesting features",
# "pages_reward": 50,
# "action_url": "https://github.com/MODSetter/SurfSense/issues/new/choose",
# },
}
class Permission(StrEnum):
"""
Granular permissions for search space resources.
Use '*' (FULL_ACCESS) to grant all permissions.
"""
# Documents
DOCUMENTS_CREATE = "documents:create"
DOCUMENTS_READ = "documents:read"
DOCUMENTS_UPDATE = "documents:update"
DOCUMENTS_DELETE = "documents:delete"
# Chats
CHATS_CREATE = "chats:create"
CHATS_READ = "chats:read"
CHATS_UPDATE = "chats:update"
CHATS_DELETE = "chats:delete"
# Comments
COMMENTS_CREATE = "comments:create"
COMMENTS_READ = "comments:read"
COMMENTS_DELETE = "comments:delete"
# LLM Configs
LLM_CONFIGS_CREATE = "llm_configs:create"
LLM_CONFIGS_READ = "llm_configs:read"
LLM_CONFIGS_UPDATE = "llm_configs:update"
LLM_CONFIGS_DELETE = "llm_configs:delete"
# Podcasts
PODCASTS_CREATE = "podcasts:create"
PODCASTS_READ = "podcasts:read"
PODCASTS_UPDATE = "podcasts:update"
PODCASTS_DELETE = "podcasts:delete"
# Video Presentations
VIDEO_PRESENTATIONS_CREATE = "video_presentations:create"
VIDEO_PRESENTATIONS_READ = "video_presentations:read"
VIDEO_PRESENTATIONS_UPDATE = "video_presentations:update"
VIDEO_PRESENTATIONS_DELETE = "video_presentations:delete"
# Image Generations
IMAGE_GENERATIONS_CREATE = "image_generations:create"
IMAGE_GENERATIONS_READ = "image_generations:read"
IMAGE_GENERATIONS_DELETE = "image_generations:delete"
# Vision LLM Configs
VISION_CONFIGS_CREATE = "vision_configs:create"
VISION_CONFIGS_READ = "vision_configs:read"
VISION_CONFIGS_DELETE = "vision_configs:delete"
# Connectors
CONNECTORS_CREATE = "connectors:create"
CONNECTORS_READ = "connectors:read"
CONNECTORS_UPDATE = "connectors:update"
CONNECTORS_DELETE = "connectors:delete"
# Logs
LOGS_READ = "logs:read"
LOGS_DELETE = "logs:delete"
# Members
MEMBERS_INVITE = "members:invite"
MEMBERS_VIEW = "members:view"
MEMBERS_REMOVE = "members:remove"
MEMBERS_MANAGE_ROLES = "members:manage_roles"
# Roles
ROLES_CREATE = "roles:create"
ROLES_READ = "roles:read"
ROLES_UPDATE = "roles:update"
ROLES_DELETE = "roles:delete"
# Search Space Settings
SETTINGS_VIEW = "settings:view"
SETTINGS_UPDATE = "settings:update"
SETTINGS_DELETE = "settings:delete" # Delete the entire search space
# Public Sharing
PUBLIC_SHARING_VIEW = "public_sharing:view"
PUBLIC_SHARING_CREATE = "public_sharing:create"
PUBLIC_SHARING_DELETE = "public_sharing:delete"
# Full access wildcard
FULL_ACCESS = "*"
# Predefined role permission sets for convenience
# Note: Only Owner, Editor, and Viewer roles are supported.
# Owner has full access (*), Editor can do everything except delete, Viewer has read-only access.
DEFAULT_ROLE_PERMISSIONS = {
"Owner": [Permission.FULL_ACCESS.value],
"Editor": [
# Documents (no delete)
Permission.DOCUMENTS_CREATE.value,
Permission.DOCUMENTS_READ.value,
Permission.DOCUMENTS_UPDATE.value,
# Chats (no delete)
Permission.CHATS_CREATE.value,
Permission.CHATS_READ.value,
Permission.CHATS_UPDATE.value,
# Comments (no delete)
Permission.COMMENTS_CREATE.value,
Permission.COMMENTS_READ.value,
# LLM Configs (no delete)
Permission.LLM_CONFIGS_CREATE.value,
Permission.LLM_CONFIGS_READ.value,
Permission.LLM_CONFIGS_UPDATE.value,
# Podcasts (no delete)
Permission.PODCASTS_CREATE.value,
Permission.PODCASTS_READ.value,
Permission.PODCASTS_UPDATE.value,
# Video Presentations (no delete)
Permission.VIDEO_PRESENTATIONS_CREATE.value,
Permission.VIDEO_PRESENTATIONS_READ.value,
Permission.VIDEO_PRESENTATIONS_UPDATE.value,
# Image Generations (create and read, no delete)
Permission.IMAGE_GENERATIONS_CREATE.value,
Permission.IMAGE_GENERATIONS_READ.value,
# Vision Configs (create and read, no delete)
Permission.VISION_CONFIGS_CREATE.value,
Permission.VISION_CONFIGS_READ.value,
# Connectors (no delete)
Permission.CONNECTORS_CREATE.value,
Permission.CONNECTORS_READ.value,
Permission.CONNECTORS_UPDATE.value,
# Logs (read only)
Permission.LOGS_READ.value,
# Members (can invite and view only, cannot manage roles or remove)
Permission.MEMBERS_INVITE.value,
Permission.MEMBERS_VIEW.value,
# Roles (read only - cannot create, update, or delete)
Permission.ROLES_READ.value,
# Settings (view only, no update or delete)
Permission.SETTINGS_VIEW.value,
# Public Sharing (can create and view, no delete)
Permission.PUBLIC_SHARING_VIEW.value,
Permission.PUBLIC_SHARING_CREATE.value,
],
"Viewer": [
# Documents (read only)
Permission.DOCUMENTS_READ.value,
# Chats (read only)
Permission.CHATS_READ.value,
# Comments (can create and read, but not delete)
Permission.COMMENTS_CREATE.value,
Permission.COMMENTS_READ.value,
# LLM Configs (read only)
Permission.LLM_CONFIGS_READ.value,
# Podcasts (read only)
Permission.PODCASTS_READ.value,
# Video Presentations (read only)
Permission.VIDEO_PRESENTATIONS_READ.value,
# Image Generations (read only)
Permission.IMAGE_GENERATIONS_READ.value,
# Vision Configs (read only)
Permission.VISION_CONFIGS_READ.value,
# Connectors (read only)
Permission.CONNECTORS_READ.value,
# Logs (read only)
Permission.LOGS_READ.value,
# Members (view only)
Permission.MEMBERS_VIEW.value,
# Roles (read only)
Permission.ROLES_READ.value,
# Settings (view only)
Permission.SETTINGS_VIEW.value,
# Public Sharing (view only)
Permission.PUBLIC_SHARING_VIEW.value,
],
}
class Base(DeclarativeBase):
pass
class TimestampMixin:
@declared_attr
def created_at(cls): # noqa: N805
return Column(
TIMESTAMP(timezone=True),
nullable=False,
default=lambda: datetime.now(UTC),
index=True,
)
class BaseModel(Base):
__abstract__ = True
__allow_unmapped__ = True
id = Column(Integer, primary_key=True, index=True)
class NewChatMessageRole(StrEnum):
"""Role enum for new chat messages."""
USER = "user"
ASSISTANT = "assistant"
SYSTEM = "system"
class ChatVisibility(StrEnum):
"""
Visibility/sharing level for chat threads.
PRIVATE: Only the creator can see/access the chat (default)
SEARCH_SPACE: All members of the search space can see/access the chat
PUBLIC: (Future) Anyone with the link can access the chat
"""
PRIVATE = "PRIVATE"
SEARCH_SPACE = "SEARCH_SPACE"
# PUBLIC = "PUBLIC" # Reserved for future implementation
class NewChatThread(BaseModel, TimestampMixin):
"""
Thread model for the new chat feature using assistant-ui.
Each thread represents a conversation with message history.
LangGraph checkpointer uses thread_id for state persistence.
"""
__tablename__ = "new_chat_threads"
title = Column(String(500), nullable=False, default="New Chat", index=True)
archived = Column(Boolean, nullable=False, default=False)
updated_at = Column(
TIMESTAMP(timezone=True),
nullable=False,
default=lambda: datetime.now(UTC),
onupdate=lambda: datetime.now(UTC),
index=True,
)
# Visibility/sharing control
visibility = Column(
SQLAlchemyEnum(ChatVisibility),
nullable=False,
default=ChatVisibility.PRIVATE,
server_default="PRIVATE",
index=True,
)
# Foreign keys
search_space_id = Column(
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
)
# Track who created this chat thread (for visibility filtering)
created_by_id = Column(
UUID(as_uuid=True),
ForeignKey("user.id", ondelete="SET NULL"),
nullable=True, # Nullable for existing records before migration
index=True,
)
# Clone tracking - for audit and history bootstrap
cloned_from_thread_id = Column(
Integer,
ForeignKey("new_chat_threads.id", ondelete="SET NULL"),
nullable=True,
index=True,
)
cloned_from_snapshot_id = Column(
Integer,
ForeignKey("public_chat_snapshots.id", ondelete="SET NULL"),
nullable=True,
index=True,
)
cloned_at = Column(
TIMESTAMP(timezone=True),
nullable=True,
)
# Flag to bootstrap LangGraph checkpointer with DB messages on first message
needs_history_bootstrap = Column(
Boolean,
nullable=False,
default=False,
server_default="false",
)
# Relationships
search_space = relationship("SearchSpace", back_populates="new_chat_threads")
created_by = relationship("User", back_populates="new_chat_threads")
messages = relationship(
"NewChatMessage",
back_populates="thread",
order_by="NewChatMessage.created_at",
cascade="all, delete-orphan",
)
snapshots = relationship(
"PublicChatSnapshot",
back_populates="thread",
cascade="all, delete-orphan",
foreign_keys="[PublicChatSnapshot.thread_id]",
)
token_usages = relationship(
"TokenUsage",
back_populates="thread",
cascade="all, delete-orphan",
)
class NewChatMessage(BaseModel, TimestampMixin):
"""
Message model for the new chat feature.
Stores individual messages in assistant-ui format.
"""
__tablename__ = "new_chat_messages"
role = Column(SQLAlchemyEnum(NewChatMessageRole), nullable=False)
# Content stored as JSONB to support rich content (text, tool calls, etc.)
content = Column(JSONB, nullable=False)
# Foreign key to thread
thread_id = Column(
Integer,
ForeignKey("new_chat_threads.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
# Track who sent this message (for shared chats)
author_id = Column(
UUID(as_uuid=True),
ForeignKey("user.id", ondelete="SET NULL"),
nullable=True,
index=True,
)
# Relationships
thread = relationship("NewChatThread", back_populates="messages")
author = relationship("User")
comments = relationship(
"ChatComment",
back_populates="message",
cascade="all, delete-orphan",
)
token_usage = relationship(
"TokenUsage",
back_populates="message",
uselist=False,
cascade="all, delete-orphan",
)
class TokenUsage(BaseModel, TimestampMixin):
"""
Tracks LLM token consumption per assistant turn.
One row per usage event. For chat, linked to a specific message via message_id.
The usage_type column enables future extension to track non-chat usage
(indexing, image generation, podcasts, etc.) without schema changes.
"""
__tablename__ = "token_usage"
prompt_tokens = Column(Integer, nullable=False, default=0)
completion_tokens = Column(Integer, nullable=False, default=0)
total_tokens = Column(Integer, nullable=False, default=0)
model_breakdown = Column(JSONB, nullable=True)
call_details = Column(JSONB, nullable=True)
usage_type = Column(String(50), nullable=False, default="chat", index=True)
thread_id = Column(
Integer,
ForeignKey("new_chat_threads.id", ondelete="CASCADE"),
nullable=True,
index=True,
)
message_id = Column(
Integer,
ForeignKey("new_chat_messages.id", ondelete="SET NULL"),
nullable=True,
index=True,
)
search_space_id = Column(
Integer,
ForeignKey("searchspaces.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
user_id = Column(
UUID(as_uuid=True),
ForeignKey("user.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
# Relationships
thread = relationship("NewChatThread", back_populates="token_usages")
message = relationship("NewChatMessage", back_populates="token_usage")
search_space = relationship("SearchSpace")
user = relationship("User")
class PublicChatSnapshot(BaseModel, TimestampMixin):
"""
Immutable snapshot of a chat thread for public sharing.
Each snapshot is a frozen copy of the chat at a specific point in time.
The snapshot_data JSONB contains all messages and metadata needed to
render the public chat without querying the original thread.
"""
__tablename__ = "public_chat_snapshots"
# Link to original thread - CASCADE DELETE when thread is deleted
thread_id = Column(
Integer,
ForeignKey("new_chat_threads.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
# Public access token (unique URL identifier)
share_token = Column(
String(64),
nullable=False,
unique=True,
index=True,
)
content_hash = Column(
String(64),
nullable=False,
index=True,
)
snapshot_data = Column(JSONB, nullable=False)
message_ids = Column(ARRAY(Integer), nullable=False)
created_by_user_id = Column(
UUID(as_uuid=True),
ForeignKey("user.id", ondelete="SET NULL"),
nullable=True,
index=True,
)
# Relationships
thread = relationship(
"NewChatThread",
back_populates="snapshots",
foreign_keys="[PublicChatSnapshot.thread_id]",
)
created_by = relationship("User")
# Constraints
__table_args__ = (
# Prevent duplicate snapshots of the same content for the same thread
UniqueConstraint(
"thread_id", "content_hash", name="uq_snapshot_thread_content_hash"
),
)
class ChatComment(BaseModel, TimestampMixin):
"""
Comment model for comments on AI chat responses.
Supports one level of nesting (replies to comments, but no replies to replies).
"""
__tablename__ = "chat_comments"
message_id = Column(
Integer,
ForeignKey("new_chat_messages.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
# Denormalized thread_id for efficient Zero subscriptions (one per thread)
thread_id = Column(
Integer,
ForeignKey("new_chat_threads.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
parent_id = Column(
Integer,
ForeignKey("chat_comments.id", ondelete="CASCADE"),
nullable=True,
index=True,
)
author_id = Column(
UUID(as_uuid=True),
ForeignKey("user.id", ondelete="SET NULL"),
nullable=True,
index=True,
)
content = Column(Text, nullable=False)
updated_at = Column(
TIMESTAMP(timezone=True),
nullable=False,
default=lambda: datetime.now(UTC),
onupdate=lambda: datetime.now(UTC),
index=True,
)
# Relationships
message = relationship("NewChatMessage", back_populates="comments")
thread = relationship("NewChatThread")
author = relationship("User")
parent = relationship(
"ChatComment", remote_side="ChatComment.id", backref="replies"
)
mentions = relationship(
"ChatCommentMention",
back_populates="comment",
cascade="all, delete-orphan",
)
class ChatCommentMention(BaseModel, TimestampMixin):
"""
Tracks @mentions in chat comments for notification purposes.
"""
__tablename__ = "chat_comment_mentions"
comment_id = Column(
Integer,
ForeignKey("chat_comments.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
mentioned_user_id = Column(
UUID(as_uuid=True),
ForeignKey("user.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
# Relationships
comment = relationship("ChatComment", back_populates="mentions")
mentioned_user = relationship("User")
class ChatSessionState(BaseModel):
"""
Tracks real-time session state for shared chat collaboration.
One record per thread, synced via Zero.
"""
__tablename__ = "chat_session_state"
thread_id = Column(
Integer,
ForeignKey("new_chat_threads.id", ondelete="CASCADE"),
nullable=False,
unique=True,
index=True,
)
ai_responding_to_user_id = Column(
UUID(as_uuid=True),
ForeignKey("user.id", ondelete="SET NULL"),
nullable=True,
index=True,
)
updated_at = Column(
TIMESTAMP(timezone=True),
nullable=False,
default=lambda: datetime.now(UTC),
onupdate=lambda: datetime.now(UTC),
)
thread = relationship("NewChatThread")
ai_responding_to_user = relationship("User")
class Folder(BaseModel, TimestampMixin):
__tablename__ = "folders"
name = Column(String(255), nullable=False, index=True)
position = Column(String(50), nullable=False, index=True)
parent_id = Column(
Integer,
ForeignKey("folders.id", ondelete="CASCADE"),
nullable=True,
index=True,
)
search_space_id = Column(
Integer,
ForeignKey("searchspaces.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
created_by_id = Column(
UUID(as_uuid=True),
ForeignKey("user.id", ondelete="SET NULL"),
nullable=True,
index=True,
)
updated_at = Column(
TIMESTAMP(timezone=True),
nullable=False,
default=lambda: datetime.now(UTC),
onupdate=lambda: datetime.now(UTC),
index=True,
)
folder_metadata = Column("metadata", JSONB, nullable=True)
parent = relationship("Folder", remote_side="Folder.id", backref="children")
search_space = relationship("SearchSpace", back_populates="folders")
created_by = relationship("User", back_populates="folders")
documents = relationship("Document", back_populates="folder", passive_deletes=True)
class Document(BaseModel, TimestampMixin):
__tablename__ = "documents"
title = Column(String, nullable=False, index=True)
document_type = Column(SQLAlchemyEnum(DocumentType), nullable=False)
document_metadata = Column(JSON, nullable=True)
content = Column(Text, nullable=False)
content_hash = Column(String, nullable=False, index=True, unique=True)
unique_identifier_hash = Column(String, nullable=True, index=True, unique=True)
embedding = Column(Vector(config.embedding_model_instance.dimension))
# BlockNote live editing state (NULL when never edited)
# DEPRECATED: Will be removed in a future migration. Use source_markdown instead.
blocknote_document = Column(JSONB, nullable=True)
# Full raw markdown content for the Plate.js editor.
# This is the source of truth for document content in the editor.
# Populated from markdown at ingestion time, or from blocknote_document migration.
source_markdown = Column(Text, nullable=True)
# Background reindex flag (set when editor content is saved)
content_needs_reindexing = Column(
Boolean, nullable=False, default=False, server_default=text("false")
)
# Track when document was last updated by indexers, processors, or editor
updated_at = Column(TIMESTAMP(timezone=True), nullable=True, index=True)
search_space_id = Column(
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
)
folder_id = Column(
Integer,
ForeignKey("folders.id", ondelete="SET NULL"),
nullable=True,
index=True,
)
# Track who created/uploaded this document
created_by_id = Column(
UUID(as_uuid=True),
ForeignKey("user.id", ondelete="SET NULL"),
nullable=True, # Nullable for backward compatibility with existing records
index=True,
)
# Track which connector created this document (for cleanup on connector deletion)
connector_id = Column(
Integer,
ForeignKey("search_source_connectors.id", ondelete="SET NULL"),
nullable=True, # Nullable for manually uploaded docs without connector
index=True,
)
# Processing status for real-time visibility (JSONB)
# Format: {"state": "ready"} or {"state": "processing"} or {"state": "failed", "reason": "..."}
# Default to {"state": "ready"} for backward compatibility with existing documents
status = Column(
JSONB,
nullable=False,
default=DocumentStatus.ready,
server_default=text('\'{"state": "ready"}\'::jsonb'),
index=True,
)
# Relationships
search_space = relationship("SearchSpace", back_populates="documents")
folder = relationship("Folder", back_populates="documents")
created_by = relationship("User", back_populates="documents")
connector = relationship("SearchSourceConnector", back_populates="documents")
chunks = relationship(
"Chunk", back_populates="document", cascade="all, delete-orphan"
)
class DocumentVersion(BaseModel, TimestampMixin):
__tablename__ = "document_versions"
__table_args__ = (
UniqueConstraint("document_id", "version_number", name="uq_document_version"),
)
document_id = Column(
Integer,
ForeignKey("documents.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
version_number = Column(Integer, nullable=False)
source_markdown = Column(Text, nullable=True)
content_hash = Column(String, nullable=False)
title = Column(String, nullable=True)
document = relationship(
"Document", backref=backref("versions", passive_deletes=True)
)
class Chunk(BaseModel, TimestampMixin):
__tablename__ = "chunks"
content = Column(Text, nullable=False)
embedding = Column(Vector(config.embedding_model_instance.dimension))
document_id = Column(
Integer,
ForeignKey("documents.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
document = relationship("Document", back_populates="chunks")
class SurfsenseDocsDocument(BaseModel, TimestampMixin):
"""
Surfsense documentation storage.
Indexed at migration time from MDX files.
"""
__tablename__ = "surfsense_docs_documents"
source = Column(
String, nullable=False, unique=True, index=True
) # File path: "connectors/slack.mdx"
title = Column(String, nullable=False)
content = Column(Text, nullable=False)
content_hash = Column(String, nullable=False, index=True) # For detecting changes
embedding = Column(Vector(config.embedding_model_instance.dimension))
updated_at = Column(TIMESTAMP(timezone=True), nullable=True, index=True)
chunks = relationship(
"SurfsenseDocsChunk",
back_populates="document",
cascade="all, delete-orphan",
)
class SurfsenseDocsChunk(BaseModel, TimestampMixin):
"""Chunk storage for Surfsense documentation."""
__tablename__ = "surfsense_docs_chunks"
content = Column(Text, nullable=False)
embedding = Column(Vector(config.embedding_model_instance.dimension))
document_id = Column(
Integer,
ForeignKey("surfsense_docs_documents.id", ondelete="CASCADE"),
nullable=False,
)
document = relationship("SurfsenseDocsDocument", back_populates="chunks")
class Podcast(BaseModel, TimestampMixin):
"""Podcast model for storing generated podcasts."""
__tablename__ = "podcasts"
title = Column(String(500), nullable=False)
podcast_transcript = Column(JSONB, nullable=True)
file_location = Column(Text, nullable=True)
status = Column(
SQLAlchemyEnum(
PodcastStatus,
name="podcast_status",
create_type=False,
values_callable=lambda x: [e.value for e in x],
),
nullable=False,
default=PodcastStatus.READY,
server_default="ready",
index=True,
)
search_space_id = Column(
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
)
search_space = relationship("SearchSpace", back_populates="podcasts")
thread_id = Column(
Integer,
ForeignKey("new_chat_threads.id", ondelete="SET NULL"),
nullable=True,
index=True,
)
thread = relationship("NewChatThread")
class VideoPresentation(BaseModel, TimestampMixin):
"""Video presentation model for storing AI-generated video presentations.
The slides JSONB stores per-slide data including Remotion component code,
audio file paths, and durations. The frontend compiles the code and renders
the video using Remotion Player.
"""
__tablename__ = "video_presentations"
title = Column(String(500), nullable=False)
slides = Column(JSONB, nullable=True)
scene_codes = Column(JSONB, nullable=True)
status = Column(
SQLAlchemyEnum(
VideoPresentationStatus,
name="video_presentation_status",
create_type=False,
values_callable=lambda x: [e.value for e in x],
),
nullable=False,
default=VideoPresentationStatus.READY,
server_default="ready",
index=True,
)
search_space_id = Column(
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
)
search_space = relationship("SearchSpace", back_populates="video_presentations")
thread_id = Column(
Integer,
ForeignKey("new_chat_threads.id", ondelete="SET NULL"),
nullable=True,
index=True,
)
thread = relationship("NewChatThread")
class Report(BaseModel, TimestampMixin):
"""Report model for storing generated reports (Markdown or Typst)."""
__tablename__ = "reports"
title = Column(String(500), nullable=False)
content = Column(Text, nullable=True)
content_type = Column(String(20), nullable=False, server_default="markdown")
report_metadata = Column(JSONB, nullable=True) # section headings, word count, etc.
report_style = Column(
String(100), nullable=True
) # e.g. "executive_summary", "deep_research"
search_space_id = Column(
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
)
search_space = relationship("SearchSpace", back_populates="reports")
# Versioning: reports sharing the same report_group_id are versions of the same report.
# For v1, report_group_id = the report's own id (set after insert).
report_group_id = Column(Integer, nullable=True, index=True)
thread_id = Column(
Integer,
ForeignKey("new_chat_threads.id", ondelete="SET NULL"),
nullable=True,
index=True,
)
thread = relationship("NewChatThread")
class ImageGenerationConfig(BaseModel, TimestampMixin):
"""
Dedicated configuration table for image generation models.
Separate from NewLLMConfig because image generation models don't need
system_instructions, citations_enabled, or use_default_system_instructions.
They only need provider credentials and model parameters.
"""
__tablename__ = "image_generation_configs"
name = Column(String(100), nullable=False, index=True)
description = Column(String(500), nullable=True)
# Provider & model (uses ImageGenProvider, NOT LiteLLMProvider)
provider = Column(SQLAlchemyEnum(ImageGenProvider), nullable=False)
custom_provider = Column(String(100), nullable=True)
model_name = Column(String(100), nullable=False)
# Credentials
api_key = Column(String, nullable=False)
api_base = Column(String(500), nullable=True)
api_version = Column(String(50), nullable=True) # Azure-specific
# Additional litellm parameters
litellm_params = Column(JSON, nullable=True, default={})
# Relationships
search_space_id = Column(
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
)
search_space = relationship(
"SearchSpace", back_populates="image_generation_configs"
)
# User who created this config
user_id = Column(
UUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=False
)
user = relationship("User", back_populates="image_generation_configs")
class VisionLLMConfig(BaseModel, TimestampMixin):
__tablename__ = "vision_llm_configs"
name = Column(String(100), nullable=False, index=True)
description = Column(String(500), nullable=True)
provider = Column(SQLAlchemyEnum(VisionProvider), nullable=False)
custom_provider = Column(String(100), nullable=True)
model_name = Column(String(100), nullable=False)
api_key = Column(String, nullable=False)
api_base = Column(String(500), nullable=True)
api_version = Column(String(50), nullable=True)
litellm_params = Column(JSON, nullable=True, default={})
search_space_id = Column(
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
)
search_space = relationship("SearchSpace", back_populates="vision_llm_configs")
user_id = Column(
UUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=False
)
user = relationship("User", back_populates="vision_llm_configs")
class ImageGeneration(BaseModel, TimestampMixin):
"""
Stores image generation requests and results using litellm.aimage_generation().
Since aimage_generation is a single async call (not a background job),
there is no status enum. A row with response_data means success;
a row with error_message means failure.
Response data is stored as JSONB matching the litellm output format:
{
"created": int,
"data": [{"b64_json": str|None, "revised_prompt": str|None, "url": str|None}],
"usage": {"prompt_tokens": int, "completion_tokens": int, "total_tokens": int}
}
"""
__tablename__ = "image_generations"
# Request parameters (matching litellm.aimage_generation() params)
prompt = Column(Text, nullable=False)
model = Column(String(200), nullable=True) # e.g., "dall-e-3", "gpt-image-1"
n = Column(Integer, nullable=True, default=1)
quality = Column(
String(50), nullable=True
) # "auto", "high", "medium", "low", "hd", "standard"
size = Column(
String(50), nullable=True
) # "1024x1024", "1536x1024", "1024x1536", etc.
style = Column(String(50), nullable=True) # Model-specific style parameter
response_format = Column(String(50), nullable=True) # "url" or "b64_json"
# Image generation config reference
# 0 = Auto mode (router), negative IDs = global configs from YAML,
# positive IDs = ImageGenerationConfig records in DB
image_generation_config_id = Column(Integer, nullable=True)
# Response data (full litellm response as JSONB) — present on success
response_data = Column(JSONB, nullable=True)
# Error message — present on failure
error_message = Column(Text, nullable=True)
# Signed access token for serving images via <img> tags.
# Stored in DB so it survives SECRET_KEY rotation.
access_token = Column(String(64), nullable=True, index=True)
# Foreign keys
search_space_id = Column(
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
)
created_by_id = Column(
UUID(as_uuid=True),
ForeignKey("user.id", ondelete="SET NULL"),
nullable=True,
index=True,
)
# Relationships
search_space = relationship("SearchSpace", back_populates="image_generations")
created_by = relationship("User", back_populates="image_generations")
class SearchSpace(BaseModel, TimestampMixin):
__tablename__ = "searchspaces"
name = Column(String(100), nullable=False, index=True)
description = Column(String(500), nullable=True)
citations_enabled = Column(
Boolean, nullable=False, default=True
) # Enable/disable citations
qna_custom_instructions = Column(
Text, nullable=True, default=""
) # User's custom instructions
shared_memory_md = Column(Text, nullable=True, server_default="")
# Search space-level LLM preferences (shared by all members)
# Note: ID values:
# - 0: Auto mode (uses LiteLLM Router for load balancing) - default for new search spaces
# - Negative IDs: Global configs from YAML
# - Positive IDs: Custom configs from DB (NewLLMConfig table)
agent_llm_id = Column(
Integer, nullable=True, default=0
) # For agent/chat operations, defaults to Auto mode
document_summary_llm_id = Column(
Integer, nullable=True, default=0
) # For document summarization, defaults to Auto mode
image_generation_config_id = Column(
Integer, nullable=True, default=0
) # For image generation, defaults to Auto mode
vision_llm_config_id = Column(
Integer, nullable=True, default=0
) # For vision/screenshot analysis, defaults to Auto mode
ai_file_sort_enabled = Column(
Boolean, nullable=False, default=False, server_default="false"
)
user_id = Column(
UUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=False
)
user = relationship("User", back_populates="search_spaces")
folders = relationship(
"Folder",
back_populates="search_space",
order_by="Folder.position",
cascade="all, delete-orphan",
)
documents = relationship(
"Document",
back_populates="search_space",
order_by="Document.id",
cascade="all, delete-orphan",
)
new_chat_threads = relationship(
"NewChatThread",
back_populates="search_space",
order_by="NewChatThread.updated_at.desc()",
cascade="all, delete-orphan",
)
podcasts = relationship(
"Podcast",
back_populates="search_space",
order_by="Podcast.id.desc()",
cascade="all, delete-orphan",
)
video_presentations = relationship(
"VideoPresentation",
back_populates="search_space",
order_by="VideoPresentation.id.desc()",
cascade="all, delete-orphan",
)
reports = relationship(
"Report",
back_populates="search_space",
order_by="Report.id.desc()",
cascade="all, delete-orphan",
)
image_generations = relationship(
"ImageGeneration",
back_populates="search_space",
order_by="ImageGeneration.id.desc()",
cascade="all, delete-orphan",
)
logs = relationship(
"Log",
back_populates="search_space",
order_by="Log.id",
cascade="all, delete-orphan",
)
notifications = relationship(
"Notification",
back_populates="search_space",
order_by="Notification.created_at.desc()",
cascade="all, delete-orphan",
)
search_source_connectors = relationship(
"SearchSourceConnector",
back_populates="search_space",
order_by="SearchSourceConnector.id",
cascade="all, delete-orphan",
)
new_llm_configs = relationship(
"NewLLMConfig",
back_populates="search_space",
order_by="NewLLMConfig.id",
cascade="all, delete-orphan",
)
image_generation_configs = relationship(
"ImageGenerationConfig",
back_populates="search_space",
order_by="ImageGenerationConfig.id",
cascade="all, delete-orphan",
)
vision_llm_configs = relationship(
"VisionLLMConfig",
back_populates="search_space",
order_by="VisionLLMConfig.id",
cascade="all, delete-orphan",
)
# RBAC relationships
roles = relationship(
"SearchSpaceRole",
back_populates="search_space",
order_by="SearchSpaceRole.id",
cascade="all, delete-orphan",
)
memberships = relationship(
"SearchSpaceMembership",
back_populates="search_space",
order_by="SearchSpaceMembership.id",
cascade="all, delete-orphan",
)
invites = relationship(
"SearchSpaceInvite",
back_populates="search_space",
order_by="SearchSpaceInvite.id",
cascade="all, delete-orphan",
)
class SearchSourceConnector(BaseModel, TimestampMixin):
__tablename__ = "search_source_connectors"
__table_args__ = (
UniqueConstraint(
"search_space_id",
"user_id",
"connector_type",
"name",
name="uq_searchspace_user_connector_type_name",
),
)
name = Column(String(100), nullable=False, index=True)
connector_type = Column(SQLAlchemyEnum(SearchSourceConnectorType), nullable=False)
is_indexable = Column(Boolean, nullable=False, default=False)
last_indexed_at = Column(TIMESTAMP(timezone=True), nullable=True)
config = Column(JSON, nullable=False)
# Summary generation (LLM-based) - disabled by default to save resources.
# When enabled, improves hybrid search quality at the cost of LLM calls.
enable_summary = Column(
Boolean, nullable=False, default=False, server_default="false"
)
# Vision LLM for image files - disabled by default to save cost/time.
# When enabled, images are described via a vision language model instead
# of falling back to the document parser.
enable_vision_llm = Column(
Boolean, nullable=False, default=False, server_default="false"
)
# Periodic indexing fields
periodic_indexing_enabled = Column(Boolean, nullable=False, default=False)
indexing_frequency_minutes = Column(Integer, nullable=True)
next_scheduled_at = Column(TIMESTAMP(timezone=True), nullable=True)
search_space_id = Column(
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
)
search_space = relationship(
"SearchSpace", back_populates="search_source_connectors"
)
user_id = Column(
UUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=False
)
user = relationship("User", back_populates="search_source_connectors")
# Documents created by this connector (for cleanup on connector deletion)
documents = relationship("Document", back_populates="connector")
class NewLLMConfig(BaseModel, TimestampMixin):
"""
New LLM configuration table that combines model settings with prompt configuration.
This table provides:
- LLM model configuration (provider, model_name, api_key, etc.)
- Configurable system instructions (defaults to SURFSENSE_SYSTEM_INSTRUCTIONS)
- Citation toggle (enable/disable citation instructions)
Note: Tools instructions are built by get_tools_instructions(thread_visibility) (personal vs shared memory).
"""
__tablename__ = "new_llm_configs"
name = Column(String(100), nullable=False, index=True)
description = Column(String(500), nullable=True)
# === LLM Model Configuration (from original LLMConfig, excluding 'language') ===
# Provider from the enum
provider = Column(SQLAlchemyEnum(LiteLLMProvider), nullable=False)
# Custom provider name when provider is CUSTOM
custom_provider = Column(String(100), nullable=True)
# Just the model name without provider prefix
model_name = Column(String(100), nullable=False)
# API Key should be encrypted before storing
api_key = Column(String, nullable=False)
api_base = Column(String(500), nullable=True)
# For any other parameters that litellm supports
litellm_params = Column(JSON, nullable=True, default={})
# === Prompt Configuration ===
# Configurable system instructions (defaults to SURFSENSE_SYSTEM_INSTRUCTIONS)
# Users can customize this from the UI
system_instructions = Column(
Text,
nullable=False,
default="", # Empty string means use default SURFSENSE_SYSTEM_INSTRUCTIONS
)
# Whether to use the default system instructions when system_instructions is empty
use_default_system_instructions = Column(Boolean, nullable=False, default=True)
# Citation toggle - when enabled, SURFSENSE_CITATION_INSTRUCTIONS is injected
# When disabled, an anti-citation prompt is injected instead
citations_enabled = Column(Boolean, nullable=False, default=True)
# === Relationships ===
search_space_id = Column(
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
)
search_space = relationship("SearchSpace", back_populates="new_llm_configs")
# User who created this config
user_id = Column(
UUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=False
)
user = relationship("User", back_populates="new_llm_configs")
class Log(BaseModel, TimestampMixin):
__tablename__ = "logs"
level = Column(SQLAlchemyEnum(LogLevel), nullable=False, index=True)
status = Column(SQLAlchemyEnum(LogStatus), nullable=False, index=True)
message = Column(Text, nullable=False)
source = Column(
String(200), nullable=True, index=True
) # Service/component that generated the log
log_metadata = Column(JSON, nullable=True, default={}) # Additional context data
search_space_id = Column(
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
)
search_space = relationship("SearchSpace", back_populates="logs")
class Notification(BaseModel, TimestampMixin):
__tablename__ = "notifications"
__table_args__ = (
# Composite index for unread-count queries that filter by
# (user_id, read, type) and order by created_at.
Index(
"ix_notifications_user_read_type_created",
"user_id",
"read",
"type",
"created_at",
),
# Covers the common list query: user_id + search_space_id + created_at DESC
Index(
"ix_notifications_user_space_created",
"user_id",
"search_space_id",
"created_at",
),
)
user_id = Column(
UUID(as_uuid=True),
ForeignKey("user.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
search_space_id = Column(
Integer,
ForeignKey("searchspaces.id", ondelete="CASCADE"),
nullable=True,
index=True,
)
type = Column(
String(50), nullable=False, index=True
) # 'connector_indexing', 'document_processing', etc.
title = Column(String(200), nullable=False)
message = Column(Text, nullable=False)
read = Column(
Boolean, nullable=False, default=False, server_default=text("false"), index=True
)
notification_metadata = Column("metadata", JSONB, nullable=True, default={})
updated_at = Column(
TIMESTAMP(timezone=True),
nullable=True,
default=lambda: datetime.now(UTC),
onupdate=lambda: datetime.now(UTC),
index=True,
)
user = relationship("User", back_populates="notifications")
search_space = relationship("SearchSpace", back_populates="notifications")
class UserIncentiveTask(BaseModel, TimestampMixin):
"""
Tracks completed incentive tasks for users.
Each user can only complete each task type once.
When a task is completed, the user's pages_limit is increased.
"""
__tablename__ = "user_incentive_tasks"
__table_args__ = (
UniqueConstraint(
"user_id",
"task_type",
name="uq_user_incentive_task",
),
)
user_id = Column(
UUID(as_uuid=True),
ForeignKey("user.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
task_type = Column(SQLAlchemyEnum(IncentiveTaskType), nullable=False, index=True)
pages_awarded = Column(Integer, nullable=False)
completed_at = Column(
TIMESTAMP(timezone=True),
nullable=False,
default=lambda: datetime.now(UTC),
)
user = relationship("User", back_populates="incentive_tasks")
class PagePurchase(Base, TimestampMixin):
"""Tracks Stripe checkout sessions used to grant additional page credits."""
__tablename__ = "page_purchases"
__allow_unmapped__ = True
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
user_id = Column(
UUID(as_uuid=True),
ForeignKey("user.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
stripe_checkout_session_id = Column(
String(255), nullable=False, unique=True, index=True
)
stripe_payment_intent_id = Column(String(255), nullable=True, index=True)
quantity = Column(Integer, nullable=False)
pages_granted = Column(Integer, nullable=False)
amount_total = Column(Integer, nullable=True)
currency = Column(String(10), nullable=True)
status = Column(
SQLAlchemyEnum(PagePurchaseStatus),
nullable=False,
default=PagePurchaseStatus.PENDING,
server_default=text("'PENDING'::pagepurchasestatus"),
index=True,
)
completed_at = Column(TIMESTAMP(timezone=True), nullable=True)
user = relationship("User", back_populates="page_purchases")
class PremiumTokenPurchase(Base, TimestampMixin):
"""Tracks Stripe checkout sessions used to grant additional premium token credits."""
__tablename__ = "premium_token_purchases"
__allow_unmapped__ = True
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
user_id = Column(
UUID(as_uuid=True),
ForeignKey("user.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
stripe_checkout_session_id = Column(
String(255), nullable=False, unique=True, index=True
)
stripe_payment_intent_id = Column(String(255), nullable=True, index=True)
quantity = Column(Integer, nullable=False)
tokens_granted = Column(BigInteger, nullable=False)
amount_total = Column(Integer, nullable=True)
currency = Column(String(10), nullable=True)
status = Column(
SQLAlchemyEnum(PremiumTokenPurchaseStatus),
nullable=False,
default=PremiumTokenPurchaseStatus.PENDING,
index=True,
)
completed_at = Column(TIMESTAMP(timezone=True), nullable=True)
user = relationship("User", back_populates="premium_token_purchases")
class SearchSpaceRole(BaseModel, TimestampMixin):
"""
Custom roles that can be defined per search space.
Each search space can have multiple roles with different permission sets.
"""
__tablename__ = "search_space_roles"
__table_args__ = (
UniqueConstraint(
"search_space_id",
"name",
name="uq_searchspace_role_name",
),
)
name = Column(String(100), nullable=False, index=True)
description = Column(String(500), nullable=True)
# List of Permission enum values (e.g., ["documents:read", "chats:create"])
permissions = Column(ARRAY(String), nullable=False, default=[])
# Whether this role is assigned to new members by default when they join via invite
is_default = Column(Boolean, nullable=False, default=False)
# System roles (Owner, Editor, Viewer) cannot be deleted
is_system_role = Column(Boolean, nullable=False, default=False)
search_space_id = Column(
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
)
search_space = relationship("SearchSpace", back_populates="roles")
memberships = relationship(
"SearchSpaceMembership", back_populates="role", passive_deletes=True
)
invites = relationship(
"SearchSpaceInvite", back_populates="role", passive_deletes=True
)
class SearchSpaceMembership(BaseModel, TimestampMixin):
"""
Tracks user membership in search spaces with their assigned role.
Each user can be a member of multiple search spaces with different roles.
"""
__tablename__ = "search_space_memberships"
__table_args__ = (
UniqueConstraint(
"user_id",
"search_space_id",
name="uq_user_searchspace_membership",
),
)
user_id = Column(
UUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=False
)
search_space_id = Column(
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
)
role_id = Column(
Integer,
ForeignKey("search_space_roles.id", ondelete="SET NULL"),
nullable=True,
)
# Indicates if this user is the original creator/owner of the search space
is_owner = Column(Boolean, nullable=False, default=False)
# Timestamp when the user joined (via invite or as creator)
joined_at = Column(
TIMESTAMP(timezone=True),
nullable=False,
default=lambda: datetime.now(UTC),
)
# Reference to the invite used to join (null if owner/creator)
invited_by_invite_id = Column(
Integer,
ForeignKey("search_space_invites.id", ondelete="SET NULL"),
nullable=True,
)
user = relationship("User", back_populates="search_space_memberships")
search_space = relationship("SearchSpace", back_populates="memberships")
role = relationship("SearchSpaceRole", back_populates="memberships")
invited_by_invite = relationship(
"SearchSpaceInvite", back_populates="used_by_memberships"
)
class SearchSpaceInvite(BaseModel, TimestampMixin):
"""
Invite links for search spaces.
Users can create invite links with specific roles that others can use to join.
"""
__tablename__ = "search_space_invites"
# Unique invite code (used in invite URLs)
invite_code = Column(String(64), nullable=False, unique=True, index=True)
search_space_id = Column(
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
)
# Role to assign when invite is used (null means use default role)
role_id = Column(
Integer,
ForeignKey("search_space_roles.id", ondelete="SET NULL"),
nullable=True,
)
# User who created this invite
created_by_id = Column(
UUID(as_uuid=True),
ForeignKey("user.id", ondelete="SET NULL"),
nullable=True,
)
# Expiration timestamp (null means never expires)
expires_at = Column(TIMESTAMP(timezone=True), nullable=True)
# Maximum number of times this invite can be used (null means unlimited)
max_uses = Column(Integer, nullable=True)
# Number of times this invite has been used
uses_count = Column(Integer, nullable=False, default=0)
# Whether this invite is currently active
is_active = Column(Boolean, nullable=False, default=True)
# Optional custom name/label for the invite
name = Column(String(100), nullable=True)
search_space = relationship("SearchSpace", back_populates="invites")
role = relationship("SearchSpaceRole", back_populates="invites")
created_by = relationship("User", back_populates="created_invites")
used_by_memberships = relationship(
"SearchSpaceMembership",
back_populates="invited_by_invite",
passive_deletes=True,
)
class PromptMode(StrEnum):
transform = "transform"
explore = "explore"
class Prompt(BaseModel, TimestampMixin):
__tablename__ = "prompts"
__table_args__ = (
UniqueConstraint(
"user_id",
"default_prompt_slug",
name="uq_prompt_user_default_slug",
),
)
user_id = Column(
UUID(as_uuid=True),
ForeignKey("user.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
search_space_id = Column(
Integer,
ForeignKey("searchspaces.id", ondelete="CASCADE"),
nullable=True,
index=True,
)
default_prompt_slug = Column(String(100), nullable=True, index=True)
name = Column(String(200), nullable=False)
prompt = Column(Text, nullable=False)
mode = Column(
SQLAlchemyEnum(PromptMode, name="prompt_mode", create_type=False),
nullable=False,
)
version = Column(Integer, nullable=False, default=1)
is_public = Column(Boolean, nullable=False, default=False)
user = relationship("User")
search_space = relationship("SearchSpace")
if config.AUTH_TYPE == "GOOGLE":
class OAuthAccount(SQLAlchemyBaseOAuthAccountTableUUID, Base):
pass
class User(SQLAlchemyBaseUserTableUUID, Base):
oauth_accounts: Mapped[list[OAuthAccount]] = relationship(
"OAuthAccount", lazy="joined"
)
search_spaces = relationship("SearchSpace", back_populates="user")
notifications = relationship(
"Notification",
back_populates="user",
order_by="Notification.created_at.desc()",
cascade="all, delete-orphan",
)
# RBAC relationships
search_space_memberships = relationship(
"SearchSpaceMembership",
back_populates="user",
cascade="all, delete-orphan",
)
created_invites = relationship(
"SearchSpaceInvite",
back_populates="created_by",
passive_deletes=True,
)
# Chat threads created by this user
new_chat_threads = relationship(
"NewChatThread",
back_populates="created_by",
passive_deletes=True,
)
# Documents created/uploaded by this user
documents = relationship(
"Document",
back_populates="created_by",
passive_deletes=True,
)
# Folders created by this user
folders = relationship(
"Folder",
back_populates="created_by",
passive_deletes=True,
)
# Image generations created by this user
image_generations = relationship(
"ImageGeneration",
back_populates="created_by",
passive_deletes=True,
)
# Connectors created by this user
search_source_connectors = relationship(
"SearchSourceConnector",
back_populates="user",
passive_deletes=True,
)
# LLM configs created by this user
new_llm_configs = relationship(
"NewLLMConfig",
back_populates="user",
passive_deletes=True,
)
# Image generation configs created by this user
image_generation_configs = relationship(
"ImageGenerationConfig",
back_populates="user",
passive_deletes=True,
)
vision_llm_configs = relationship(
"VisionLLMConfig",
back_populates="user",
passive_deletes=True,
)
# Incentive tasks completed by this user
incentive_tasks = relationship(
"UserIncentiveTask",
back_populates="user",
cascade="all, delete-orphan",
)
page_purchases = relationship(
"PagePurchase",
back_populates="user",
cascade="all, delete-orphan",
)
premium_token_purchases = relationship(
"PremiumTokenPurchase",
back_populates="user",
cascade="all, delete-orphan",
)
# Page usage tracking for ETL services
pages_limit = Column(
Integer,
nullable=False,
default=config.PAGES_LIMIT,
server_default=str(config.PAGES_LIMIT),
)
pages_used = Column(Integer, nullable=False, default=0, server_default="0")
premium_tokens_limit = Column(
BigInteger,
nullable=False,
default=config.PREMIUM_TOKEN_LIMIT,
server_default=str(config.PREMIUM_TOKEN_LIMIT),
)
premium_tokens_used = Column(
BigInteger, nullable=False, default=0, server_default="0"
)
premium_tokens_reserved = Column(
BigInteger, nullable=False, default=0, server_default="0"
)
# User profile from OAuth
display_name = Column(String, nullable=True)
avatar_url = Column(String, nullable=True)
last_login = Column(TIMESTAMP(timezone=True), nullable=True)
memory_md = Column(Text, nullable=True, server_default="")
# Refresh tokens for this user
refresh_tokens = relationship(
"RefreshToken",
back_populates="user",
cascade="all, delete-orphan",
)
else:
class User(SQLAlchemyBaseUserTableUUID, Base):
search_spaces = relationship("SearchSpace", back_populates="user")
notifications = relationship(
"Notification",
back_populates="user",
order_by="Notification.created_at.desc()",
cascade="all, delete-orphan",
)
# RBAC relationships
search_space_memberships = relationship(
"SearchSpaceMembership",
back_populates="user",
cascade="all, delete-orphan",
)
created_invites = relationship(
"SearchSpaceInvite",
back_populates="created_by",
passive_deletes=True,
)
# Chat threads created by this user
new_chat_threads = relationship(
"NewChatThread",
back_populates="created_by",
passive_deletes=True,
)
# Documents created/uploaded by this user
documents = relationship(
"Document",
back_populates="created_by",
passive_deletes=True,
)
# Folders created by this user
folders = relationship(
"Folder",
back_populates="created_by",
passive_deletes=True,
)
# Image generations created by this user
image_generations = relationship(
"ImageGeneration",
back_populates="created_by",
passive_deletes=True,
)
# Connectors created by this user
search_source_connectors = relationship(
"SearchSourceConnector",
back_populates="user",
passive_deletes=True,
)
# LLM configs created by this user
new_llm_configs = relationship(
"NewLLMConfig",
back_populates="user",
passive_deletes=True,
)
# Image generation configs created by this user
image_generation_configs = relationship(
"ImageGenerationConfig",
back_populates="user",
passive_deletes=True,
)
vision_llm_configs = relationship(
"VisionLLMConfig",
back_populates="user",
passive_deletes=True,
)
# Incentive tasks completed by this user
incentive_tasks = relationship(
"UserIncentiveTask",
back_populates="user",
cascade="all, delete-orphan",
)
page_purchases = relationship(
"PagePurchase",
back_populates="user",
cascade="all, delete-orphan",
)
premium_token_purchases = relationship(
"PremiumTokenPurchase",
back_populates="user",
cascade="all, delete-orphan",
)
# Page usage tracking for ETL services
pages_limit = Column(
Integer,
nullable=False,
default=config.PAGES_LIMIT,
server_default=str(config.PAGES_LIMIT),
)
pages_used = Column(Integer, nullable=False, default=0, server_default="0")
premium_tokens_limit = Column(
BigInteger,
nullable=False,
default=config.PREMIUM_TOKEN_LIMIT,
server_default=str(config.PREMIUM_TOKEN_LIMIT),
)
premium_tokens_used = Column(
BigInteger, nullable=False, default=0, server_default="0"
)
premium_tokens_reserved = Column(
BigInteger, nullable=False, default=0, server_default="0"
)
# User profile (can be set manually for non-OAuth users)
display_name = Column(String, nullable=True)
avatar_url = Column(String, nullable=True)
last_login = Column(TIMESTAMP(timezone=True), nullable=True)
memory_md = Column(Text, nullable=True, server_default="")
# Refresh tokens for this user
refresh_tokens = relationship(
"RefreshToken",
back_populates="user",
cascade="all, delete-orphan",
)
class RefreshToken(Base, TimestampMixin):
"""
Stores refresh tokens for user session management.
Each row represents one device/session.
"""
__tablename__ = "refresh_tokens"
id = Column(Integer, primary_key=True, autoincrement=True)
user_id = Column(
UUID(as_uuid=True),
ForeignKey("user.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
user = relationship("User", back_populates="refresh_tokens")
token_hash = Column(String(256), unique=True, nullable=False, index=True)
expires_at = Column(TIMESTAMP(timezone=True), nullable=False, index=True)
is_revoked = Column(Boolean, default=False, nullable=False)
family_id = Column(UUID(as_uuid=True), nullable=False, index=True)
@property
def is_expired(self) -> bool:
return datetime.now(UTC) >= self.expires_at
@property
def is_valid(self) -> bool:
return not self.is_expired and not self.is_revoked
engine = create_async_engine(
DATABASE_URL,
pool_size=30,
max_overflow=150,
pool_recycle=1800,
pool_pre_ping=True,
pool_timeout=30,
)
async_session_maker = async_sessionmaker(engine, expire_on_commit=False)
@asynccontextmanager
async def shielded_async_session():
"""Cancellation-safe async session context manager.
Starlette's BaseHTTPMiddleware cancels the task via an anyio cancel
scope when a client disconnects. A plain ``async with async_session_maker()``
has its ``__aexit__`` (which awaits ``session.close()``) cancelled by the
scope, orphaning the underlying database connection.
This wrapper ensures ``session.close()`` always completes by running it
inside ``anyio.CancelScope(shield=True)``.
"""
session = async_session_maker()
try:
yield session
finally:
with anyio.CancelScope(shield=True):
await session.close()
async def setup_indexes():
async with engine.begin() as conn:
# Create indexes
# Document Summary Indexes
await conn.execute(
text(
"CREATE INDEX IF NOT EXISTS document_vector_index ON documents USING hnsw (embedding public.vector_cosine_ops)"
)
)
await conn.execute(
text(
"CREATE INDEX IF NOT EXISTS document_search_index ON documents USING gin (to_tsvector('english', content))"
)
)
# Document Chuck Indexes
await conn.execute(
text(
"CREATE INDEX IF NOT EXISTS chucks_vector_index ON chunks USING hnsw (embedding public.vector_cosine_ops)"
)
)
await conn.execute(
text(
"CREATE INDEX IF NOT EXISTS chucks_search_index ON chunks USING gin (to_tsvector('english', content))"
)
)
# pg_trgm indexes for efficient ILIKE '%term%' searches on titles
# Critical for document mention picker (@mentions) to scale
await conn.execute(
text(
"CREATE INDEX IF NOT EXISTS idx_documents_title_trgm ON documents USING gin (title gin_trgm_ops)"
)
)
# B-tree index on search_space_id for fast filtering
await conn.execute(
text(
"CREATE INDEX IF NOT EXISTS idx_documents_search_space_id ON documents (search_space_id)"
)
)
# Covering index for "recent documents" query - enables index-only scan
await conn.execute(
text(
"CREATE INDEX IF NOT EXISTS idx_documents_search_space_updated ON documents (search_space_id, updated_at DESC NULLS LAST) INCLUDE (id, title, document_type)"
)
)
await conn.execute(
text(
"CREATE INDEX IF NOT EXISTS idx_surfsense_docs_title_trgm ON surfsense_docs_documents USING gin (title gin_trgm_ops)"
)
)
async def create_db_and_tables():
async with engine.begin() as conn:
await conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector"))
await conn.execute(text("CREATE EXTENSION IF NOT EXISTS pg_trgm"))
await conn.run_sync(Base.metadata.create_all)
await setup_indexes()
async def get_async_session() -> AsyncGenerator[AsyncSession, None]:
async with async_session_maker() as session:
yield session
if config.AUTH_TYPE == "GOOGLE":
async def get_user_db(session: AsyncSession = Depends(get_async_session)):
yield SQLAlchemyUserDatabase(session, User, OAuthAccount)
else:
async def get_user_db(session: AsyncSession = Depends(get_async_session)):
yield SQLAlchemyUserDatabase(session, User)
def has_permission(user_permissions: list[str], required_permission: str) -> bool:
"""
Check if the user has the required permission.
Supports wildcard (*) for full access.
Args:
user_permissions: List of permission strings the user has
required_permission: The permission string to check for
Returns:
True if user has the permission, False otherwise
"""
if not user_permissions:
return False
# Full access wildcard grants all permissions
if Permission.FULL_ACCESS.value in user_permissions:
return True
return required_permission in user_permissions
def has_any_permission(
user_permissions: list[str], required_permissions: list[str]
) -> bool:
"""
Check if the user has any of the required permissions.
Args:
user_permissions: List of permission strings the user has
required_permissions: List of permission strings to check for (any match)
Returns:
True if user has at least one of the permissions, False otherwise
"""
if not user_permissions:
return False
if Permission.FULL_ACCESS.value in user_permissions:
return True
return any(perm in user_permissions for perm in required_permissions)
def has_all_permissions(
user_permissions: list[str], required_permissions: list[str]
) -> bool:
"""
Check if the user has all of the required permissions.
Args:
user_permissions: List of permission strings the user has
required_permissions: List of permission strings to check for (all must match)
Returns:
True if user has all of the permissions, False otherwise
"""
if not user_permissions:
return False
if Permission.FULL_ACCESS.value in user_permissions:
return True
return all(perm in user_permissions for perm in required_permissions)
def get_default_roles_config() -> list[dict]:
"""
Get the configuration for default system roles.
These roles are created automatically when a search space is created.
Only 3 roles are supported:
- Owner: Full access to everything (assigned to search space creator)
- Editor: Can create/update content but cannot delete, manage roles, or change settings
- Viewer: Read-only access to resources (can add comments)
Returns:
List of role configurations with name, description, permissions, and flags
"""
return [
{
"name": "Owner",
"description": "Full access to all search space resources and settings",
"permissions": DEFAULT_ROLE_PERMISSIONS["Owner"],
"is_default": False,
"is_system_role": True,
},
{
"name": "Editor",
"description": "Can create and update content (no delete, role management, or settings access)",
"permissions": DEFAULT_ROLE_PERMISSIONS["Editor"],
"is_default": True, # Default role for new members via invite
"is_system_role": True,
},
{
"name": "Viewer",
"description": "Read-only access to search space resources",
"permissions": DEFAULT_ROLE_PERMISSIONS["Viewer"],
"is_default": False,
"is_system_role": True,
},
]