2025-03-14 18:53:14 -07:00
from collections . abc import AsyncGenerator
2025-07-24 14:43:48 -07:00
from datetime import UTC , datetime
2026-02-26 18:24:57 -08:00
from enum import StrEnum
2025-03-14 18:53:14 -07:00
from fastapi import Depends
2025-07-25 10:52:34 -07:00
from fastapi_users . db import SQLAlchemyBaseUserTableUUID , SQLAlchemyUserDatabase
2025-03-14 18:53:14 -07:00
from pgvector . sqlalchemy import Vector
from sqlalchemy import (
ARRAY ,
2025-07-24 14:43:48 -07:00
JSON ,
TIMESTAMP ,
2025-03-14 18:53:14 -07:00
Boolean ,
Column ,
Enum as SQLAlchemyEnum ,
ForeignKey ,
Integer ,
String ,
Text ,
2025-08-02 11:47:49 -07:00
UniqueConstraint ,
2025-03-14 18:53:14 -07:00
text ,
)
2025-11-23 15:23:31 +05:30
from sqlalchemy . dialects . postgresql import JSONB , UUID
2025-03-14 18:53:14 -07:00
from sqlalchemy . ext . asyncio import AsyncSession , async_sessionmaker , create_async_engine
from sqlalchemy . orm import DeclarativeBase , Mapped , declared_attr , relationship
from app . config import config
2025-05-21 20:56:23 -07:00
if config . AUTH_TYPE == " GOOGLE " :
2025-07-25 10:52:34 -07:00
from fastapi_users . db import SQLAlchemyBaseOAuthAccountTableUUID
2025-05-21 20:56:23 -07:00
2025-03-14 18:53:14 -07:00
DATABASE_URL = config . DATABASE_URL
2026-02-26 18:24:57 -08:00
class DocumentType ( StrEnum ) :
2025-03-14 18:53:14 -07:00
EXTENSION = " EXTENSION "
CRAWLED_URL = " CRAWLED_URL "
FILE = " FILE "
SLACK_CONNECTOR = " SLACK_CONNECTOR "
2026-01-07 15:15:49 -08:00
TEAMS_CONNECTOR = " TEAMS_CONNECTOR "
2025-03-14 18:53:14 -07:00
NOTION_CONNECTOR = " NOTION_CONNECTOR "
2025-04-09 18:46:10 -07:00
YOUTUBE_VIDEO = " YOUTUBE_VIDEO "
2025-04-13 13:56:22 -07:00
GITHUB_CONNECTOR = " GITHUB_CONNECTOR "
2025-04-15 23:10:35 -07:00
LINEAR_CONNECTOR = " LINEAR_CONNECTOR "
2025-06-02 18:30:38 +07:00
DISCORD_CONNECTOR = " DISCORD_CONNECTOR "
2025-07-24 11:33:38 +02:00
JIRA_CONNECTOR = " JIRA_CONNECTOR "
2025-07-26 14:43:31 +02:00
CONFLUENCE_CONNECTOR = " CONFLUENCE_CONNECTOR "
2025-07-30 21:35:27 +02:00
CLICKUP_CONNECTOR = " CLICKUP_CONNECTOR "
2025-08-02 00:05:55 +02:00
GOOGLE_CALENDAR_CONNECTOR = " GOOGLE_CALENDAR_CONNECTOR "
2025-08-04 00:52:07 +02:00
GOOGLE_GMAIL_CONNECTOR = " GOOGLE_GMAIL_CONNECTOR "
2025-12-29 20:38:26 +02:00
GOOGLE_DRIVE_FILE = " GOOGLE_DRIVE_FILE "
2025-08-26 13:56:31 +02:00
AIRTABLE_CONNECTOR = " AIRTABLE_CONNECTOR "
2025-09-28 14:59:10 -07:00
LUMA_CONNECTOR = " LUMA_CONNECTOR "
2025-10-12 09:39:04 +05:30
ELASTICSEARCH_CONNECTOR = " ELASTICSEARCH_CONNECTOR "
2025-12-04 14:08:44 +08:00
BOOKSTACK_CONNECTOR = " BOOKSTACK_CONNECTOR "
2025-12-30 09:00:59 -08:00
CIRCLEBACK = " CIRCLEBACK "
2026-01-21 15:21:06 -08:00
OBSIDIAN_CONNECTOR = " OBSIDIAN_CONNECTOR "
2025-12-16 12:28:30 +05:30
NOTE = " NOTE "
2026-01-22 22:33:28 +05:30
COMPOSIO_GOOGLE_DRIVE_CONNECTOR = " COMPOSIO_GOOGLE_DRIVE_CONNECTOR "
COMPOSIO_GMAIL_CONNECTOR = " COMPOSIO_GMAIL_CONNECTOR "
COMPOSIO_GOOGLE_CALENDAR_CONNECTOR = " COMPOSIO_GOOGLE_CALENDAR_CONNECTOR "
2025-03-14 18:53:14 -07:00
2025-07-24 14:43:48 -07:00
2026-02-26 18:24:57 -08:00
class SearchSourceConnectorType ( StrEnum ) :
2025-07-24 14:43:48 -07:00
SERPER_API = " SERPER_API " # NOT IMPLEMENTED YET : DON'T REMEMBER WHY : MOST PROBABLY BECAUSE WE NEED TO CRAWL THE RESULTS RETURNED BY IT
2025-03-14 18:53:14 -07:00
TAVILY_API = " TAVILY_API "
2025-10-12 20:43:45 +05:30
SEARXNG_API = " SEARXNG_API "
2025-04-27 15:53:33 -07:00
LINKUP_API = " LINKUP_API "
2025-10-15 17:29:18 +08:00
BAIDU_SEARCH_API = " BAIDU_SEARCH_API " # Baidu AI Search API for Chinese web search
2025-03-14 18:53:14 -07:00
SLACK_CONNECTOR = " SLACK_CONNECTOR "
2026-01-07 15:15:49 -08:00
TEAMS_CONNECTOR = " TEAMS_CONNECTOR "
2025-03-14 18:53:14 -07:00
NOTION_CONNECTOR = " NOTION_CONNECTOR "
2025-04-13 13:56:22 -07:00
GITHUB_CONNECTOR = " GITHUB_CONNECTOR "
2025-04-15 23:10:35 -07:00
LINEAR_CONNECTOR = " LINEAR_CONNECTOR "
2025-06-02 18:30:38 +07:00
DISCORD_CONNECTOR = " DISCORD_CONNECTOR "
2025-07-24 11:33:38 +02:00
JIRA_CONNECTOR = " JIRA_CONNECTOR "
2025-07-26 14:43:31 +02:00
CONFLUENCE_CONNECTOR = " CONFLUENCE_CONNECTOR "
2025-07-30 21:35:27 +02:00
CLICKUP_CONNECTOR = " CLICKUP_CONNECTOR "
2025-08-02 00:05:55 +02:00
GOOGLE_CALENDAR_CONNECTOR = " GOOGLE_CALENDAR_CONNECTOR "
2025-08-04 00:52:07 +02:00
GOOGLE_GMAIL_CONNECTOR = " GOOGLE_GMAIL_CONNECTOR "
2025-12-28 15:53:35 +02:00
GOOGLE_DRIVE_CONNECTOR = " GOOGLE_DRIVE_CONNECTOR "
2025-08-26 13:56:31 +02:00
AIRTABLE_CONNECTOR = " AIRTABLE_CONNECTOR "
2025-09-28 14:59:10 -07:00
LUMA_CONNECTOR = " LUMA_CONNECTOR "
2025-10-12 09:39:04 +05:30
ELASTICSEARCH_CONNECTOR = " ELASTICSEARCH_CONNECTOR "
2025-11-21 20:45:59 -08:00
WEBCRAWLER_CONNECTOR = " WEBCRAWLER_CONNECTOR "
2025-12-04 14:08:44 +08:00
BOOKSTACK_CONNECTOR = " BOOKSTACK_CONNECTOR "
2025-12-30 09:00:59 -08:00
CIRCLEBACK_CONNECTOR = " CIRCLEBACK_CONNECTOR "
2026-01-22 22:34:49 -08:00
OBSIDIAN_CONNECTOR = (
" OBSIDIAN_CONNECTOR " # Self-hosted only - Local Obsidian vault indexing
)
2026-01-13 13:46:01 -08:00
MCP_CONNECTOR = " MCP_CONNECTOR " # Model Context Protocol - User-defined API tools
2026-01-22 22:33:28 +05:30
COMPOSIO_GOOGLE_DRIVE_CONNECTOR = " COMPOSIO_GOOGLE_DRIVE_CONNECTOR "
COMPOSIO_GMAIL_CONNECTOR = " COMPOSIO_GMAIL_CONNECTOR "
COMPOSIO_GOOGLE_CALENDAR_CONNECTOR = " COMPOSIO_GOOGLE_CALENDAR_CONNECTOR "
2025-07-24 14:43:48 -07:00
2026-02-26 18:24:57 -08:00
class PodcastStatus ( StrEnum ) :
2026-01-27 17:51:36 +02:00
PENDING = " pending "
GENERATING = " generating "
READY = " ready "
FAILED = " failed "
2026-02-05 21:59:31 +05:30
class DocumentStatus :
"""
Helper class for document processing status ( stored as JSONB ) .
2026-02-06 05:35:15 +05:30
2026-02-05 21:59:31 +05:30
Status values :
- { " state " : " ready " } - Document is fully processed and searchable
- { " state " : " pending " } - Document is queued , waiting to be processed
- { " state " : " processing " } - Document is currently being processed ( only 1 at a time )
- { " state " : " failed " , " reason " : " ... " } - Processing failed with reason
2026-02-06 05:35:15 +05:30
2026-02-05 21:59:31 +05:30
Usage :
document . status = DocumentStatus . pending ( )
document . status = DocumentStatus . processing ( )
document . status = DocumentStatus . ready ( )
document . status = DocumentStatus . failed ( " LLM rate limit exceeded " )
"""
2026-02-06 05:35:15 +05:30
2026-02-05 21:59:31 +05:30
# State constants
READY = " ready "
PENDING = " pending "
PROCESSING = " processing "
FAILED = " failed "
2026-02-06 05:35:15 +05:30
2026-02-05 21:59:31 +05:30
@staticmethod
def ready ( ) - > dict :
""" Return status dict for a ready/searchable document. """
return { " state " : DocumentStatus . READY }
2026-02-06 05:35:15 +05:30
2026-02-05 21:59:31 +05:30
@staticmethod
def pending ( ) - > dict :
""" Return status dict for a document waiting to be processed. """
return { " state " : DocumentStatus . PENDING }
2026-02-06 05:35:15 +05:30
2026-02-05 21:59:31 +05:30
@staticmethod
def processing ( ) - > dict :
""" Return status dict for a document being processed. """
return { " state " : DocumentStatus . PROCESSING }
2026-02-06 05:35:15 +05:30
2026-02-05 21:59:31 +05:30
@staticmethod
def failed ( reason : str , * * extra_details ) - > dict :
"""
Return status dict for a failed document .
2026-02-06 05:35:15 +05:30
2026-02-05 21:59:31 +05:30
Args :
reason : Human - readable failure reason
* * extra_details : Optional additional details ( duplicate_of , error_code , etc . )
"""
2026-02-06 05:35:15 +05:30
status = {
" state " : DocumentStatus . FAILED ,
" reason " : reason [ : 500 ] ,
} # Truncate long reasons
2026-02-05 21:59:31 +05:30
if extra_details :
status . update ( extra_details )
return status
2026-02-06 05:35:15 +05:30
2026-02-05 21:59:31 +05:30
@staticmethod
def get_state ( status : dict | None ) - > str | None :
""" Extract state from status dict, returns None if invalid. """
if status is None :
return None
return status . get ( " state " ) if isinstance ( status , dict ) else None
2026-02-06 05:35:15 +05:30
2026-02-05 21:59:31 +05:30
@staticmethod
def is_state ( status : dict | None , state : str ) - > bool :
""" Check if status matches a given state. """
return DocumentStatus . get_state ( status ) == state
2026-02-06 05:35:15 +05:30
2026-02-05 21:59:31 +05:30
@staticmethod
def get_failure_reason ( status : dict | None ) - > str | None :
""" Extract failure reason from status dict. """
if status is None or not isinstance ( status , dict ) :
return None
if status . get ( " state " ) == DocumentStatus . FAILED :
return status . get ( " reason " )
return None
2026-02-26 18:24:57 -08:00
class LiteLLMProvider ( StrEnum ) :
2025-10-12 19:10:46 +08:00
"""
Enum for LLM providers supported by LiteLLM .
"""
2025-10-13 20:07:32 -07:00
2025-06-09 15:50:15 -07:00
OPENAI = " OPENAI "
ANTHROPIC = " ANTHROPIC "
2025-11-13 02:41:30 -08:00
GOOGLE = " GOOGLE "
AZURE_OPENAI = " AZURE_OPENAI "
BEDROCK = " BEDROCK "
VERTEX_AI = " VERTEX_AI "
2025-06-09 15:50:15 -07:00
GROQ = " GROQ "
COHERE = " COHERE "
MISTRAL = " MISTRAL "
2025-11-13 02:41:30 -08:00
DEEPSEEK = " DEEPSEEK "
XAI = " XAI "
2025-09-16 18:16:33 -07:00
OPENROUTER = " OPENROUTER "
2025-11-13 02:41:30 -08:00
TOGETHER_AI = " TOGETHER_AI "
FIREWORKS_AI = " FIREWORKS_AI "
2025-06-09 15:50:15 -07:00
REPLICATE = " REPLICATE "
PERPLEXITY = " PERPLEXITY "
2025-11-13 02:41:30 -08:00
OLLAMA = " OLLAMA "
2025-10-13 20:07:32 -07:00
ALIBABA_QWEN = " ALIBABA_QWEN "
MOONSHOT = " MOONSHOT "
ZHIPU = " ZHIPU "
2025-11-13 02:41:30 -08:00
ANYSCALE = " ANYSCALE "
DEEPINFRA = " DEEPINFRA "
CEREBRAS = " CEREBRAS "
SAMBANOVA = " SAMBANOVA "
AI21 = " AI21 "
CLOUDFLARE = " CLOUDFLARE "
DATABRICKS = " DATABRICKS "
COMETAPI = " COMETAPI "
HUGGINGFACE = " HUGGINGFACE "
2026-02-09 17:26:35 +02:00
GITHUB_MODELS = " GITHUB_MODELS "
2025-06-09 15:50:15 -07:00
CUSTOM = " CUSTOM "
2025-07-16 01:10:33 -07:00
2025-07-24 14:43:48 -07:00
2026-02-26 18:24:57 -08:00
class ImageGenProvider ( StrEnum ) :
2026-02-05 16:43:48 -08:00
"""
Enum for image generation providers supported by LiteLLM .
This is a subset of LLM providers — only those that support image generation .
See : https : / / docs . litellm . ai / docs / image_generation #supported-providers
"""
OPENAI = " OPENAI "
AZURE_OPENAI = " AZURE_OPENAI "
GOOGLE = " GOOGLE " # Google AI Studio
VERTEX_AI = " VERTEX_AI "
BEDROCK = " BEDROCK " # AWS Bedrock
RECRAFT = " RECRAFT "
OPENROUTER = " OPENROUTER "
XINFERENCE = " XINFERENCE "
NSCALE = " NSCALE "
2026-02-26 18:24:57 -08:00
class LogLevel ( StrEnum ) :
2025-07-16 01:10:33 -07:00
DEBUG = " DEBUG "
INFO = " INFO "
WARNING = " WARNING "
ERROR = " ERROR "
CRITICAL = " CRITICAL "
2025-07-24 14:43:48 -07:00
2026-02-26 18:24:57 -08:00
class LogStatus ( StrEnum ) :
2025-07-16 01:10:33 -07:00
IN_PROGRESS = " IN_PROGRESS "
SUCCESS = " SUCCESS "
FAILED = " FAILED "
2025-07-24 14:43:48 -07:00
2026-02-26 18:24:57 -08:00
class IncentiveTaskType ( StrEnum ) :
2026-01-26 23:32:30 -08:00
"""
Enum for incentive task types that users can complete to earn free pages .
Each task can only be completed once per user .
When adding new tasks :
1. Add a new enum value here
2. Add the task configuration to INCENTIVE_TASKS_CONFIG below
3. Create an Alembic migration to add the enum value to PostgreSQL
"""
GITHUB_STAR = " GITHUB_STAR "
2026-01-28 21:58:49 -08:00
REDDIT_FOLLOW = " REDDIT_FOLLOW "
2026-02-03 22:32:39 -08:00
DISCORD_JOIN = " DISCORD_JOIN "
2026-01-26 23:32:30 -08:00
# Future tasks can be added here:
# GITHUB_ISSUE = "GITHUB_ISSUE"
# SOCIAL_SHARE = "SOCIAL_SHARE"
# REFER_FRIEND = "REFER_FRIEND"
# Centralized configuration for incentive tasks
# This makes it easy to add new tasks without changing code in multiple places
INCENTIVE_TASKS_CONFIG = {
IncentiveTaskType . GITHUB_STAR : {
" title " : " Star our GitHub repository " ,
" description " : " Show your support by starring SurfSense on GitHub " ,
2026-02-09 17:46:41 -08:00
" pages_reward " : 30 ,
2026-01-26 23:32:30 -08:00
" action_url " : " https://github.com/MODSetter/SurfSense " ,
} ,
2026-01-28 21:58:49 -08:00
IncentiveTaskType . REDDIT_FOLLOW : {
" title " : " Join our Subreddit " ,
" description " : " Join the SurfSense community on Reddit " ,
2026-02-09 17:46:41 -08:00
" pages_reward " : 30 ,
2026-01-28 21:58:49 -08:00
" action_url " : " https://www.reddit.com/r/SurfSense/ " ,
} ,
2026-02-03 22:32:39 -08:00
IncentiveTaskType . DISCORD_JOIN : {
" title " : " Join our Discord " ,
" description " : " Join the SurfSense community on Discord " ,
2026-02-09 17:46:41 -08:00
" pages_reward " : 40 ,
2026-02-03 22:32:39 -08:00
" action_url " : " https://discord.gg/ejRNvftDp9 " ,
} ,
2026-01-26 23:32:30 -08:00
# Future tasks can be configured here:
# IncentiveTaskType.GITHUB_ISSUE: {
# "title": "Create an issue",
# "description": "Help improve SurfSense by reporting bugs or suggesting features",
# "pages_reward": 50,
# "action_url": "https://github.com/MODSetter/SurfSense/issues/new/choose",
# },
}
2026-02-26 18:24:57 -08:00
class Permission ( StrEnum ) :
2025-11-27 22:45:04 -08:00
"""
Granular permissions for search space resources .
Use ' * ' ( FULL_ACCESS ) to grant all permissions .
"""
# Documents
DOCUMENTS_CREATE = " documents:create "
DOCUMENTS_READ = " documents:read "
DOCUMENTS_UPDATE = " documents:update "
DOCUMENTS_DELETE = " documents:delete "
# Chats
CHATS_CREATE = " chats:create "
CHATS_READ = " chats:read "
CHATS_UPDATE = " chats:update "
CHATS_DELETE = " chats:delete "
2026-01-15 16:39:24 +02:00
# Comments
COMMENTS_CREATE = " comments:create "
COMMENTS_READ = " comments:read "
COMMENTS_DELETE = " comments:delete "
2025-11-27 22:45:04 -08:00
# LLM Configs
LLM_CONFIGS_CREATE = " llm_configs:create "
LLM_CONFIGS_READ = " llm_configs:read "
LLM_CONFIGS_UPDATE = " llm_configs:update "
LLM_CONFIGS_DELETE = " llm_configs:delete "
# Podcasts
PODCASTS_CREATE = " podcasts:create "
PODCASTS_READ = " podcasts:read "
PODCASTS_UPDATE = " podcasts:update "
PODCASTS_DELETE = " podcasts:delete "
2026-02-05 16:43:48 -08:00
# Image Generations
IMAGE_GENERATIONS_CREATE = " image_generations:create "
IMAGE_GENERATIONS_READ = " image_generations:read "
IMAGE_GENERATIONS_DELETE = " image_generations:delete "
2025-11-27 22:45:04 -08:00
# Connectors
CONNECTORS_CREATE = " connectors:create "
CONNECTORS_READ = " connectors:read "
CONNECTORS_UPDATE = " connectors:update "
CONNECTORS_DELETE = " connectors:delete "
# Logs
LOGS_READ = " logs:read "
LOGS_DELETE = " logs:delete "
# Members
MEMBERS_INVITE = " members:invite "
MEMBERS_VIEW = " members:view "
MEMBERS_REMOVE = " members:remove "
MEMBERS_MANAGE_ROLES = " members:manage_roles "
# Roles
ROLES_CREATE = " roles:create "
ROLES_READ = " roles:read "
ROLES_UPDATE = " roles:update "
ROLES_DELETE = " roles:delete "
# Search Space Settings
SETTINGS_VIEW = " settings:view "
SETTINGS_UPDATE = " settings:update "
SETTINGS_DELETE = " settings:delete " # Delete the entire search space
2026-02-02 14:04:08 +02:00
# Public Sharing
PUBLIC_SHARING_VIEW = " public_sharing:view "
PUBLIC_SHARING_CREATE = " public_sharing:create "
PUBLIC_SHARING_DELETE = " public_sharing:delete "
2025-11-27 22:45:04 -08:00
# Full access wildcard
FULL_ACCESS = " * "
# Predefined role permission sets for convenience
2026-01-20 02:59:32 -08:00
# Note: Only Owner, Editor, and Viewer roles are supported.
# Owner has full access (*), Editor can do everything except delete, Viewer has read-only access.
2025-11-27 22:45:04 -08:00
DEFAULT_ROLE_PERMISSIONS = {
" Owner " : [ Permission . FULL_ACCESS . value ] ,
" Editor " : [
2026-01-20 02:59:32 -08:00
# Documents (no delete)
2025-11-27 22:45:04 -08:00
Permission . DOCUMENTS_CREATE . value ,
Permission . DOCUMENTS_READ . value ,
Permission . DOCUMENTS_UPDATE . value ,
2026-01-20 02:59:32 -08:00
# Chats (no delete)
2025-11-27 22:45:04 -08:00
Permission . CHATS_CREATE . value ,
Permission . CHATS_READ . value ,
Permission . CHATS_UPDATE . value ,
2026-01-15 16:42:09 +02:00
# Comments (no delete)
Permission . COMMENTS_CREATE . value ,
Permission . COMMENTS_READ . value ,
2026-01-20 02:59:32 -08:00
# LLM Configs (no delete)
2025-11-27 22:45:04 -08:00
Permission . LLM_CONFIGS_CREATE . value ,
2026-01-20 02:59:32 -08:00
Permission . LLM_CONFIGS_READ . value ,
2025-11-27 22:45:04 -08:00
Permission . LLM_CONFIGS_UPDATE . value ,
2026-01-20 02:59:32 -08:00
# Podcasts (no delete)
2025-11-27 22:45:04 -08:00
Permission . PODCASTS_CREATE . value ,
Permission . PODCASTS_READ . value ,
Permission . PODCASTS_UPDATE . value ,
2026-02-05 16:43:48 -08:00
# Image Generations (create and read, no delete)
Permission . IMAGE_GENERATIONS_CREATE . value ,
Permission . IMAGE_GENERATIONS_READ . value ,
2026-01-20 02:59:32 -08:00
# Connectors (no delete)
2025-11-27 22:45:04 -08:00
Permission . CONNECTORS_CREATE . value ,
Permission . CONNECTORS_READ . value ,
Permission . CONNECTORS_UPDATE . value ,
2026-01-20 02:59:32 -08:00
# Logs (read only)
2025-11-27 22:45:04 -08:00
Permission . LOGS_READ . value ,
2026-01-20 02:59:32 -08:00
# Members (can invite and view only, cannot manage roles or remove)
Permission . MEMBERS_INVITE . value ,
2025-11-27 22:45:04 -08:00
Permission . MEMBERS_VIEW . value ,
2026-01-20 02:59:32 -08:00
# Roles (read only - cannot create, update, or delete)
2025-11-27 22:45:04 -08:00
Permission . ROLES_READ . value ,
2026-01-20 02:59:32 -08:00
# Settings (view only, no update or delete)
2025-11-27 22:45:04 -08:00
Permission . SETTINGS_VIEW . value ,
2026-02-02 14:04:08 +02:00
# Public Sharing (can create and view, no delete)
Permission . PUBLIC_SHARING_VIEW . value ,
Permission . PUBLIC_SHARING_CREATE . value ,
2025-11-27 22:45:04 -08:00
] ,
" Viewer " : [
# Documents (read only)
Permission . DOCUMENTS_READ . value ,
# Chats (read only)
Permission . CHATS_READ . value ,
2026-01-20 02:59:32 -08:00
# Comments (can create and read, but not delete)
2026-01-15 16:42:09 +02:00
Permission . COMMENTS_CREATE . value ,
Permission . COMMENTS_READ . value ,
2025-11-27 22:45:04 -08:00
# LLM Configs (read only)
Permission . LLM_CONFIGS_READ . value ,
# Podcasts (read only)
Permission . PODCASTS_READ . value ,
2026-02-05 16:43:48 -08:00
# Image Generations (read only)
Permission . IMAGE_GENERATIONS_READ . value ,
2025-11-27 22:45:04 -08:00
# Connectors (read only)
Permission . CONNECTORS_READ . value ,
# Logs (read only)
Permission . LOGS_READ . value ,
# Members (view only)
Permission . MEMBERS_VIEW . value ,
# Roles (read only)
Permission . ROLES_READ . value ,
# Settings (view only)
Permission . SETTINGS_VIEW . value ,
2026-02-02 14:04:08 +02:00
# Public Sharing (view only)
Permission . PUBLIC_SHARING_VIEW . value ,
2025-11-27 22:45:04 -08:00
] ,
}
2025-03-14 18:53:14 -07:00
class Base ( DeclarativeBase ) :
pass
2025-07-24 14:43:48 -07:00
2025-03-14 18:53:14 -07:00
class TimestampMixin :
@declared_attr
2025-07-24 14:43:48 -07:00
def created_at ( cls ) : # noqa: N805
return Column (
TIMESTAMP ( timezone = True ) ,
nullable = False ,
default = lambda : datetime . now ( UTC ) ,
index = True ,
)
2025-03-14 18:53:14 -07:00
class BaseModel ( Base ) :
__abstract__ = True
__allow_unmapped__ = True
id = Column ( Integer , primary_key = True , index = True )
2025-07-24 14:43:48 -07:00
2026-02-26 18:24:57 -08:00
class NewChatMessageRole ( StrEnum ) :
2025-12-21 16:16:50 -08:00
""" Role enum for new chat messages. """
USER = " user "
ASSISTANT = " assistant "
SYSTEM = " system "
2026-02-26 18:24:57 -08:00
class ChatVisibility ( StrEnum ) :
2026-01-13 00:17:12 -08:00
"""
Visibility / sharing level for chat threads .
PRIVATE : Only the creator can see / access the chat ( default )
SEARCH_SPACE : All members of the search space can see / access the chat
PUBLIC : ( Future ) Anyone with the link can access the chat
"""
PRIVATE = " PRIVATE "
SEARCH_SPACE = " SEARCH_SPACE "
# PUBLIC = "PUBLIC" # Reserved for future implementation
2025-12-21 16:16:50 -08:00
class NewChatThread ( BaseModel , TimestampMixin ) :
"""
Thread model for the new chat feature using assistant - ui .
Each thread represents a conversation with message history .
LangGraph checkpointer uses thread_id for state persistence .
"""
__tablename__ = " new_chat_threads "
title = Column ( String ( 500 ) , nullable = False , default = " New Chat " , index = True )
archived = Column ( Boolean , nullable = False , default = False )
updated_at = Column (
TIMESTAMP ( timezone = True ) ,
nullable = False ,
default = lambda : datetime . now ( UTC ) ,
onupdate = lambda : datetime . now ( UTC ) ,
index = True ,
)
2026-01-13 00:17:12 -08:00
# Visibility/sharing control
visibility = Column (
SQLAlchemyEnum ( ChatVisibility ) ,
nullable = False ,
default = ChatVisibility . PRIVATE ,
server_default = " PRIVATE " ,
index = True ,
)
2025-12-21 16:16:50 -08:00
# Foreign keys
search_space_id = Column (
Integer , ForeignKey ( " searchspaces.id " , ondelete = " CASCADE " ) , nullable = False
)
2026-01-13 00:17:12 -08:00
# Track who created this chat thread (for visibility filtering)
created_by_id = Column (
UUID ( as_uuid = True ) ,
ForeignKey ( " user.id " , ondelete = " SET NULL " ) ,
nullable = True , # Nullable for existing records before migration
index = True ,
)
2026-01-27 13:33:36 +02:00
# Clone tracking - for audit and history bootstrap
cloned_from_thread_id = Column (
Integer ,
ForeignKey ( " new_chat_threads.id " , ondelete = " SET NULL " ) ,
nullable = True ,
index = True ,
)
2026-01-30 17:08:07 +02:00
cloned_from_snapshot_id = Column (
Integer ,
ForeignKey ( " public_chat_snapshots.id " , ondelete = " SET NULL " ) ,
nullable = True ,
index = True ,
)
2026-01-27 13:33:36 +02:00
cloned_at = Column (
TIMESTAMP ( timezone = True ) ,
nullable = True ,
)
# Flag to bootstrap LangGraph checkpointer with DB messages on first message
needs_history_bootstrap = Column (
Boolean ,
nullable = False ,
default = False ,
server_default = " false " ,
)
2025-12-21 16:16:50 -08:00
# Relationships
search_space = relationship ( " SearchSpace " , back_populates = " new_chat_threads " )
2026-01-13 00:17:12 -08:00
created_by = relationship ( " User " , back_populates = " new_chat_threads " )
2025-12-21 16:16:50 -08:00
messages = relationship (
" NewChatMessage " ,
back_populates = " thread " ,
order_by = " NewChatMessage.created_at " ,
cascade = " all, delete-orphan " ,
)
2026-01-29 16:05:36 +02:00
snapshots = relationship (
" PublicChatSnapshot " ,
back_populates = " thread " ,
cascade = " all, delete-orphan " ,
2026-01-30 17:08:07 +02:00
foreign_keys = " [PublicChatSnapshot.thread_id] " ,
2026-01-29 16:05:36 +02:00
)
2025-12-21 16:16:50 -08:00
class NewChatMessage ( BaseModel , TimestampMixin ) :
"""
Message model for the new chat feature .
Stores individual messages in assistant - ui format .
"""
__tablename__ = " new_chat_messages "
role = Column ( SQLAlchemyEnum ( NewChatMessageRole ) , nullable = False )
# Content stored as JSONB to support rich content (text, tool calls, etc.)
content = Column ( JSONB , nullable = False )
# Foreign key to thread
thread_id = Column (
Integer ,
ForeignKey ( " new_chat_threads.id " , ondelete = " CASCADE " ) ,
nullable = False ,
index = True ,
)
2026-01-14 17:56:45 +02:00
# Track who sent this message (for shared chats)
author_id = Column (
UUID ( as_uuid = True ) ,
ForeignKey ( " user.id " , ondelete = " SET NULL " ) ,
nullable = True ,
index = True ,
)
# Relationships
2025-12-21 16:16:50 -08:00
thread = relationship ( " NewChatThread " , back_populates = " messages " )
2026-01-14 17:56:45 +02:00
author = relationship ( " User " )
2026-01-15 16:34:03 +02:00
comments = relationship (
" ChatComment " ,
back_populates = " message " ,
cascade = " all, delete-orphan " ,
)
2026-01-29 16:05:36 +02:00
class PublicChatSnapshot ( BaseModel , TimestampMixin ) :
"""
Immutable snapshot of a chat thread for public sharing .
Each snapshot is a frozen copy of the chat at a specific point in time .
The snapshot_data JSONB contains all messages and metadata needed to
render the public chat without querying the original thread .
"""
__tablename__ = " public_chat_snapshots "
# Link to original thread - CASCADE DELETE when thread is deleted
thread_id = Column (
Integer ,
ForeignKey ( " new_chat_threads.id " , ondelete = " CASCADE " ) ,
nullable = False ,
index = True ,
)
# Public access token (unique URL identifier)
share_token = Column (
String ( 64 ) ,
nullable = False ,
unique = True ,
index = True ,
)
content_hash = Column (
String ( 64 ) ,
nullable = False ,
index = True ,
)
snapshot_data = Column ( JSONB , nullable = False )
message_ids = Column ( ARRAY ( Integer ) , nullable = False )
created_by_user_id = Column (
UUID ( as_uuid = True ) ,
ForeignKey ( " user.id " , ondelete = " SET NULL " ) ,
nullable = True ,
index = True ,
)
# Relationships
2026-01-30 17:08:07 +02:00
thread = relationship (
" NewChatThread " ,
back_populates = " snapshots " ,
foreign_keys = " [PublicChatSnapshot.thread_id] " ,
)
2026-01-29 16:05:36 +02:00
created_by = relationship ( " User " )
# Constraints
__table_args__ = (
# Prevent duplicate snapshots of the same content for the same thread
2026-02-01 21:17:24 -08:00
UniqueConstraint (
" thread_id " , " content_hash " , name = " uq_snapshot_thread_content_hash "
) ,
2026-01-29 16:05:36 +02:00
)
2026-01-15 16:34:03 +02:00
class ChatComment ( BaseModel , TimestampMixin ) :
"""
Comment model for comments on AI chat responses .
Supports one level of nesting ( replies to comments , but no replies to replies ) .
"""
__tablename__ = " chat_comments "
message_id = Column (
Integer ,
ForeignKey ( " new_chat_messages.id " , ondelete = " CASCADE " ) ,
nullable = False ,
index = True ,
)
2026-01-22 17:27:42 +02:00
# Denormalized thread_id for efficient Electric SQL subscriptions (one per thread)
thread_id = Column (
Integer ,
ForeignKey ( " new_chat_threads.id " , ondelete = " CASCADE " ) ,
nullable = False ,
index = True ,
)
2026-01-15 16:34:03 +02:00
parent_id = Column (
Integer ,
ForeignKey ( " chat_comments.id " , ondelete = " CASCADE " ) ,
nullable = True ,
index = True ,
)
author_id = Column (
UUID ( as_uuid = True ) ,
ForeignKey ( " user.id " , ondelete = " SET NULL " ) ,
nullable = True ,
index = True ,
)
content = Column ( Text , nullable = False )
updated_at = Column (
TIMESTAMP ( timezone = True ) ,
nullable = False ,
default = lambda : datetime . now ( UTC ) ,
onupdate = lambda : datetime . now ( UTC ) ,
index = True ,
)
# Relationships
message = relationship ( " NewChatMessage " , back_populates = " comments " )
2026-01-22 17:27:42 +02:00
thread = relationship ( " NewChatThread " )
2026-01-15 16:34:03 +02:00
author = relationship ( " User " )
parent = relationship (
" ChatComment " , remote_side = " ChatComment.id " , backref = " replies "
)
mentions = relationship (
" ChatCommentMention " ,
back_populates = " comment " ,
cascade = " all, delete-orphan " ,
)
2025-12-21 16:16:50 -08:00
2026-01-15 16:37:46 +02:00
class ChatCommentMention ( BaseModel , TimestampMixin ) :
"""
Tracks @mentions in chat comments for notification purposes .
"""
__tablename__ = " chat_comment_mentions "
comment_id = Column (
Integer ,
ForeignKey ( " chat_comments.id " , ondelete = " CASCADE " ) ,
nullable = False ,
index = True ,
)
mentioned_user_id = Column (
UUID ( as_uuid = True ) ,
ForeignKey ( " user.id " , ondelete = " CASCADE " ) ,
nullable = False ,
index = True ,
)
# Relationships
comment = relationship ( " ChatComment " , back_populates = " mentions " )
mentioned_user = relationship ( " User " )
2026-01-20 16:17:54 +02:00
class ChatSessionState ( BaseModel ) :
"""
Tracks real - time session state for shared chat collaboration .
One record per thread , synced via Electric SQL .
"""
__tablename__ = " chat_session_state "
thread_id = Column (
Integer ,
ForeignKey ( " new_chat_threads.id " , ondelete = " CASCADE " ) ,
nullable = False ,
unique = True ,
index = True ,
)
ai_responding_to_user_id = Column (
UUID ( as_uuid = True ) ,
ForeignKey ( " user.id " , ondelete = " SET NULL " ) ,
nullable = True ,
index = True ,
)
updated_at = Column (
TIMESTAMP ( timezone = True ) ,
nullable = False ,
default = lambda : datetime . now ( UTC ) ,
onupdate = lambda : datetime . now ( UTC ) ,
)
thread = relationship ( " NewChatThread " )
ai_responding_to_user = relationship ( " User " )
2026-02-26 18:24:57 -08:00
class MemoryCategory ( StrEnum ) :
2026-01-20 15:04:07 -08:00
""" Categories for user memories. """
2026-01-20 15:34:01 -08:00
# Using lowercase keys to match PostgreSQL enum values
preference = " preference " # User preferences (e.g., "prefers dark mode")
fact = " fact " # Facts about the user (e.g., "is a Python developer")
2026-01-21 09:55:40 -08:00
instruction = (
" instruction " # Standing instructions (e.g., "always respond in bullet points")
)
2026-01-20 15:34:01 -08:00
context = " context " # Contextual information (e.g., "working on project X")
2026-01-20 15:04:07 -08:00
class UserMemory ( BaseModel , TimestampMixin ) :
"""
2026-02-06 16:43:52 +02:00
Private memory : facts , preferences , context per user per search space .
Used only for private chats ( not shared / team chats ) .
2026-01-20 15:04:07 -08:00
"""
__tablename__ = " user_memories "
user_id = Column (
UUID ( as_uuid = True ) ,
ForeignKey ( " user.id " , ondelete = " CASCADE " ) ,
nullable = False ,
index = True ,
)
# Optional association with a search space (if memory is space-specific)
search_space_id = Column (
Integer ,
ForeignKey ( " searchspaces.id " , ondelete = " CASCADE " ) ,
nullable = True ,
index = True ,
)
# The actual memory content
memory_text = Column ( Text , nullable = False )
# Category for organization and filtering
category = Column (
SQLAlchemyEnum ( MemoryCategory ) ,
nullable = False ,
2026-01-20 15:34:01 -08:00
default = MemoryCategory . fact ,
2026-01-20 15:04:07 -08:00
)
# Vector embedding for semantic search
embedding = Column ( Vector ( config . embedding_model_instance . dimension ) )
# Track when memory was last updated
updated_at = Column (
TIMESTAMP ( timezone = True ) ,
nullable = False ,
default = lambda : datetime . now ( UTC ) ,
onupdate = lambda : datetime . now ( UTC ) ,
index = True ,
)
# Relationships
user = relationship ( " User " , back_populates = " memories " )
search_space = relationship ( " SearchSpace " , back_populates = " user_memories " )
2026-02-06 16:33:12 +02:00
class SharedMemory ( BaseModel , TimestampMixin ) :
__tablename__ = " shared_memories "
search_space_id = Column (
Integer ,
ForeignKey ( " searchspaces.id " , ondelete = " CASCADE " ) ,
nullable = False ,
index = True ,
)
created_by_id = Column (
UUID ( as_uuid = True ) ,
ForeignKey ( " user.id " , ondelete = " CASCADE " ) ,
nullable = False ,
index = True ,
)
memory_text = Column ( Text , nullable = False )
category = Column (
SQLAlchemyEnum ( MemoryCategory ) ,
nullable = False ,
default = MemoryCategory . fact ,
)
embedding = Column ( Vector ( config . embedding_model_instance . dimension ) )
updated_at = Column (
TIMESTAMP ( timezone = True ) ,
nullable = False ,
default = lambda : datetime . now ( UTC ) ,
onupdate = lambda : datetime . now ( UTC ) ,
index = True ,
)
search_space = relationship ( " SearchSpace " , back_populates = " shared_memories " )
created_by = relationship ( " User " )
2025-03-14 18:53:14 -07:00
class Document ( BaseModel , TimestampMixin ) :
__tablename__ = " documents "
2025-07-24 14:43:48 -07:00
2025-04-30 00:10:50 -07:00
title = Column ( String , nullable = False , index = True )
2025-03-14 18:53:14 -07:00
document_type = Column ( SQLAlchemyEnum ( DocumentType ) , nullable = False )
document_metadata = Column ( JSON , nullable = True )
2025-07-24 14:43:48 -07:00
2025-03-14 18:53:14 -07:00
content = Column ( Text , nullable = False )
2025-05-28 23:52:00 -07:00
content_hash = Column ( String , nullable = False , index = True , unique = True )
2025-10-14 21:09:11 -07:00
unique_identifier_hash = Column ( String , nullable = True , index = True , unique = True )
2025-03-14 18:53:14 -07:00
embedding = Column ( Vector ( config . embedding_model_instance . dimension ) )
2025-07-24 14:43:48 -07:00
2025-11-23 15:23:31 +05:30
# BlockNote live editing state (NULL when never edited)
2026-02-17 11:34:11 +05:30
# DEPRECATED: Will be removed in a future migration. Use source_markdown instead.
2025-11-23 15:23:31 +05:30
blocknote_document = Column ( JSONB , nullable = True )
2025-11-23 16:39:23 +05:30
2026-02-17 11:34:11 +05:30
# Full raw markdown content for the Plate.js editor.
# This is the source of truth for document content in the editor.
# Populated from markdown at ingestion time, or from blocknote_document migration.
source_markdown = Column ( Text , nullable = True )
# Background reindex flag (set when editor content is saved)
2025-11-23 15:23:31 +05:30
content_needs_reindexing = Column (
Boolean , nullable = False , default = False , server_default = text ( " false " )
)
2025-11-23 16:39:23 +05:30
2025-12-12 01:32:14 -08:00
# Track when document was last updated by indexers, processors, or editor
updated_at = Column ( TIMESTAMP ( timezone = True ) , nullable = True , index = True )
2025-07-24 14:43:48 -07:00
search_space_id = Column (
Integer , ForeignKey ( " searchspaces.id " , ondelete = " CASCADE " ) , nullable = False
)
2026-02-02 12:32:24 +05:30
# Track who created/uploaded this document
created_by_id = Column (
UUID ( as_uuid = True ) ,
ForeignKey ( " user.id " , ondelete = " SET NULL " ) ,
nullable = True , # Nullable for backward compatibility with existing records
index = True ,
)
2026-02-02 16:23:26 +05:30
# Track which connector created this document (for cleanup on connector deletion)
connector_id = Column (
Integer ,
ForeignKey ( " search_source_connectors.id " , ondelete = " SET NULL " ) ,
nullable = True , # Nullable for manually uploaded docs without connector
index = True ,
)
2026-02-05 21:59:31 +05:30
# Processing status for real-time visibility (JSONB)
# Format: {"state": "ready"} or {"state": "processing"} or {"state": "failed", "reason": "..."}
# Default to {"state": "ready"} for backward compatibility with existing documents
status = Column (
JSONB ,
nullable = False ,
default = DocumentStatus . ready ,
2026-02-06 05:35:15 +05:30
server_default = text ( ' \' { " state " : " ready " } \' ::jsonb ' ) ,
2026-02-05 21:59:31 +05:30
index = True ,
)
2026-02-02 12:32:24 +05:30
# Relationships
2025-03-14 18:53:14 -07:00
search_space = relationship ( " SearchSpace " , back_populates = " documents " )
2026-02-02 12:32:24 +05:30
created_by = relationship ( " User " , back_populates = " documents " )
2026-02-02 16:23:26 +05:30
connector = relationship ( " SearchSourceConnector " , back_populates = " documents " )
2025-07-24 14:43:48 -07:00
chunks = relationship (
" Chunk " , back_populates = " document " , cascade = " all, delete-orphan "
)
2025-03-14 18:53:14 -07:00
class Chunk ( BaseModel , TimestampMixin ) :
__tablename__ = " chunks "
2025-07-24 14:43:48 -07:00
2025-03-14 18:53:14 -07:00
content = Column ( Text , nullable = False )
embedding = Column ( Vector ( config . embedding_model_instance . dimension ) )
2025-07-24 14:43:48 -07:00
document_id = Column (
Integer , ForeignKey ( " documents.id " , ondelete = " CASCADE " ) , nullable = False
)
2025-03-14 18:53:14 -07:00
document = relationship ( " Document " , back_populates = " chunks " )
2025-07-24 14:43:48 -07:00
2026-01-09 15:26:55 +02:00
class SurfsenseDocsDocument ( BaseModel , TimestampMixin ) :
"""
Surfsense documentation storage .
Indexed at migration time from MDX files .
"""
__tablename__ = " surfsense_docs_documents "
2026-01-12 14:17:15 -08:00
source = Column (
String , nullable = False , unique = True , index = True
) # File path: "connectors/slack.mdx"
2026-01-09 15:26:55 +02:00
title = Column ( String , nullable = False )
content = Column ( Text , nullable = False )
content_hash = Column ( String , nullable = False , index = True ) # For detecting changes
embedding = Column ( Vector ( config . embedding_model_instance . dimension ) )
updated_at = Column ( TIMESTAMP ( timezone = True ) , nullable = True , index = True )
chunks = relationship (
" SurfsenseDocsChunk " ,
back_populates = " document " ,
cascade = " all, delete-orphan " ,
)
2026-01-09 15:28:36 +02:00
class SurfsenseDocsChunk ( BaseModel , TimestampMixin ) :
""" Chunk storage for Surfsense documentation. """
__tablename__ = " surfsense_docs_chunks "
content = Column ( Text , nullable = False )
embedding = Column ( Vector ( config . embedding_model_instance . dimension ) )
document_id = Column (
Integer ,
ForeignKey ( " surfsense_docs_documents.id " , ondelete = " CASCADE " ) ,
nullable = False ,
)
document = relationship ( " SurfsenseDocsDocument " , back_populates = " chunks " )
2025-12-21 22:26:33 -08:00
class Podcast ( BaseModel , TimestampMixin ) :
""" Podcast model for storing generated podcasts. """
__tablename__ = " podcasts "
title = Column ( String ( 500 ) , nullable = False )
2026-01-27 17:51:36 +02:00
podcast_transcript = Column ( JSONB , nullable = True )
file_location = Column ( Text , nullable = True )
status = Column (
2026-01-27 19:07:46 +02:00
SQLAlchemyEnum (
PodcastStatus ,
name = " podcast_status " ,
create_type = False ,
values_callable = lambda x : [ e . value for e in x ] ,
) ,
2026-01-27 17:51:36 +02:00
nullable = False ,
default = PodcastStatus . READY ,
server_default = " ready " ,
index = True ,
)
2025-12-21 22:26:33 -08:00
search_space_id = Column (
Integer , ForeignKey ( " searchspaces.id " , ondelete = " CASCADE " ) , nullable = False
)
search_space = relationship ( " SearchSpace " , back_populates = " podcasts " )
2026-01-26 15:56:15 +02:00
thread_id = Column (
Integer ,
ForeignKey ( " new_chat_threads.id " , ondelete = " SET NULL " ) ,
nullable = True ,
index = True ,
)
thread = relationship ( " NewChatThread " )
2025-12-21 22:26:33 -08:00
2026-02-11 16:28:56 +05:30
class Report ( BaseModel , TimestampMixin ) :
""" Report model for storing generated Markdown reports. """
__tablename__ = " reports "
title = Column ( String ( 500 ) , nullable = False )
content = Column ( Text , nullable = True ) # Markdown body
report_metadata = Column ( JSONB , nullable = True ) # section headings, word count, etc.
2026-02-13 02:43:26 +05:30
report_style = Column (
String ( 100 ) , nullable = True
) # e.g. "executive_summary", "deep_research"
2026-02-11 16:28:56 +05:30
search_space_id = Column (
Integer , ForeignKey ( " searchspaces.id " , ondelete = " CASCADE " ) , nullable = False
)
search_space = relationship ( " SearchSpace " , back_populates = " reports " )
2026-02-12 03:19:38 +05:30
# Versioning: reports sharing the same report_group_id are versions of the same report.
# For v1, report_group_id = the report's own id (set after insert).
report_group_id = Column ( Integer , nullable = True , index = True )
2026-02-11 16:28:56 +05:30
thread_id = Column (
Integer ,
ForeignKey ( " new_chat_threads.id " , ondelete = " SET NULL " ) ,
nullable = True ,
index = True ,
)
thread = relationship ( " NewChatThread " )
2026-02-05 16:43:48 -08:00
class ImageGenerationConfig ( BaseModel , TimestampMixin ) :
"""
Dedicated configuration table for image generation models .
Separate from NewLLMConfig because image generation models don ' t need
system_instructions , citations_enabled , or use_default_system_instructions .
They only need provider credentials and model parameters .
"""
__tablename__ = " image_generation_configs "
name = Column ( String ( 100 ) , nullable = False , index = True )
description = Column ( String ( 500 ) , nullable = True )
# Provider & model (uses ImageGenProvider, NOT LiteLLMProvider)
provider = Column ( SQLAlchemyEnum ( ImageGenProvider ) , nullable = False )
custom_provider = Column ( String ( 100 ) , nullable = True )
model_name = Column ( String ( 100 ) , nullable = False )
# Credentials
api_key = Column ( String , nullable = False )
api_base = Column ( String ( 500 ) , nullable = True )
api_version = Column ( String ( 50 ) , nullable = True ) # Azure-specific
# Additional litellm parameters
litellm_params = Column ( JSON , nullable = True , default = { } )
# Relationships
search_space_id = Column (
Integer , ForeignKey ( " searchspaces.id " , ondelete = " CASCADE " ) , nullable = False
)
2026-02-05 17:18:27 -08:00
search_space = relationship (
" SearchSpace " , back_populates = " image_generation_configs "
)
2026-02-05 16:43:48 -08:00
2026-02-09 18:30:52 +05:30
# User who created this config
user_id = Column (
UUID ( as_uuid = True ) , ForeignKey ( " user.id " , ondelete = " CASCADE " ) , nullable = False
)
user = relationship ( " User " , back_populates = " image_generation_configs " )
2026-02-05 16:43:48 -08:00
class ImageGeneration ( BaseModel , TimestampMixin ) :
"""
Stores image generation requests and results using litellm . aimage_generation ( ) .
Since aimage_generation is a single async call ( not a background job ) ,
there is no status enum . A row with response_data means success ;
a row with error_message means failure .
Response data is stored as JSONB matching the litellm output format :
{
" created " : int ,
" data " : [ { " b64_json " : str | None , " revised_prompt " : str | None , " url " : str | None } ] ,
" usage " : { " prompt_tokens " : int , " completion_tokens " : int , " total_tokens " : int }
}
"""
__tablename__ = " image_generations "
# Request parameters (matching litellm.aimage_generation() params)
prompt = Column ( Text , nullable = False )
model = Column ( String ( 200 ) , nullable = True ) # e.g., "dall-e-3", "gpt-image-1"
n = Column ( Integer , nullable = True , default = 1 )
quality = Column (
String ( 50 ) , nullable = True
) # "auto", "high", "medium", "low", "hd", "standard"
size = Column (
String ( 50 ) , nullable = True
) # "1024x1024", "1536x1024", "1024x1536", etc.
style = Column ( String ( 50 ) , nullable = True ) # Model-specific style parameter
2026-02-05 17:18:27 -08:00
response_format = Column ( String ( 50 ) , nullable = True ) # "url" or "b64_json"
2026-02-05 16:43:48 -08:00
# Image generation config reference
# 0 = Auto mode (router), negative IDs = global configs from YAML,
# positive IDs = ImageGenerationConfig records in DB
image_generation_config_id = Column ( Integer , nullable = True )
# Response data (full litellm response as JSONB) — present on success
response_data = Column ( JSONB , nullable = True )
# Error message — present on failure
error_message = Column ( Text , nullable = True )
# Signed access token for serving images via <img> tags.
# Stored in DB so it survives SECRET_KEY rotation.
access_token = Column ( String ( 64 ) , nullable = True , index = True )
# Foreign keys
search_space_id = Column (
Integer , ForeignKey ( " searchspaces.id " , ondelete = " CASCADE " ) , nullable = False
)
created_by_id = Column (
UUID ( as_uuid = True ) ,
ForeignKey ( " user.id " , ondelete = " SET NULL " ) ,
nullable = True ,
index = True ,
)
# Relationships
search_space = relationship ( " SearchSpace " , back_populates = " image_generations " )
created_by = relationship ( " User " , back_populates = " image_generations " )
2025-03-14 18:53:14 -07:00
class SearchSpace ( BaseModel , TimestampMixin ) :
__tablename__ = " searchspaces "
2025-07-24 14:43:48 -07:00
2025-03-14 18:53:14 -07:00
name = Column ( String ( 100 ) , nullable = False , index = True )
description = Column ( String ( 500 ) , nullable = True )
2025-07-24 14:43:48 -07:00
2025-11-19 15:04:46 -08:00
citations_enabled = Column (
Boolean , nullable = False , default = True
) # Enable/disable citations
qna_custom_instructions = Column (
Text , nullable = True , default = " "
) # User's custom instructions
2025-11-27 22:45:04 -08:00
# Search space-level LLM preferences (shared by all members)
2026-01-29 15:28:31 -08:00
# Note: ID values:
# - 0: Auto mode (uses LiteLLM Router for load balancing) - default for new search spaces
# - Negative IDs: Global configs from YAML
# - Positive IDs: Custom configs from DB (NewLLMConfig table)
agent_llm_id = Column (
Integer , nullable = True , default = 0
) # For agent/chat operations, defaults to Auto mode
2025-12-23 01:16:25 -08:00
document_summary_llm_id = Column (
2026-01-29 15:28:31 -08:00
Integer , nullable = True , default = 0
) # For document summarization, defaults to Auto mode
2026-02-05 16:43:48 -08:00
image_generation_config_id = Column (
Integer , nullable = True , default = 0
) # For image generation, defaults to Auto mode
2025-11-27 22:45:04 -08:00
2025-07-24 14:43:48 -07:00
user_id = Column (
UUID ( as_uuid = True ) , ForeignKey ( " user.id " , ondelete = " CASCADE " ) , nullable = False
)
2025-03-14 18:53:14 -07:00
user = relationship ( " User " , back_populates = " search_spaces " )
2025-07-24 14:43:48 -07:00
documents = relationship (
" Document " ,
back_populates = " search_space " ,
order_by = " Document.id " ,
cascade = " all, delete-orphan " ,
)
2025-12-21 16:16:50 -08:00
new_chat_threads = relationship (
" NewChatThread " ,
back_populates = " search_space " ,
order_by = " NewChatThread.updated_at.desc() " ,
cascade = " all, delete-orphan " ,
)
2025-12-21 22:26:33 -08:00
podcasts = relationship (
" Podcast " ,
back_populates = " search_space " ,
order_by = " Podcast.id.desc() " ,
cascade = " all, delete-orphan " ,
)
2026-02-11 16:28:56 +05:30
reports = relationship (
" Report " ,
back_populates = " search_space " ,
order_by = " Report.id.desc() " ,
cascade = " all, delete-orphan " ,
)
2026-02-05 16:43:48 -08:00
image_generations = relationship (
" ImageGeneration " ,
back_populates = " search_space " ,
order_by = " ImageGeneration.id.desc() " ,
cascade = " all, delete-orphan " ,
)
2025-07-24 14:43:48 -07:00
logs = relationship (
" Log " ,
back_populates = " search_space " ,
order_by = " Log.id " ,
cascade = " all, delete-orphan " ,
)
2026-01-16 11:32:06 -08:00
notifications = relationship (
" Notification " ,
back_populates = " search_space " ,
order_by = " Notification.created_at.desc() " ,
cascade = " all, delete-orphan " ,
)
2025-10-08 21:13:01 -07:00
search_source_connectors = relationship (
" SearchSourceConnector " ,
back_populates = " search_space " ,
order_by = " SearchSourceConnector.id " ,
cascade = " all, delete-orphan " ,
)
2025-12-23 01:16:25 -08:00
new_llm_configs = relationship (
" NewLLMConfig " ,
2025-10-10 00:50:29 -07:00
back_populates = " search_space " ,
2025-12-23 01:16:25 -08:00
order_by = " NewLLMConfig.id " ,
2025-10-10 00:50:29 -07:00
cascade = " all, delete-orphan " ,
)
2026-02-05 16:43:48 -08:00
image_generation_configs = relationship (
" ImageGenerationConfig " ,
back_populates = " search_space " ,
order_by = " ImageGenerationConfig.id " ,
cascade = " all, delete-orphan " ,
)
2025-07-24 14:43:48 -07:00
2025-11-27 22:45:04 -08:00
# RBAC relationships
roles = relationship (
" SearchSpaceRole " ,
2025-10-10 00:50:29 -07:00
back_populates = " search_space " ,
2025-11-27 22:45:04 -08:00
order_by = " SearchSpaceRole.id " ,
cascade = " all, delete-orphan " ,
)
memberships = relationship (
" SearchSpaceMembership " ,
2025-10-10 00:50:29 -07:00
back_populates = " search_space " ,
2025-11-27 22:45:04 -08:00
order_by = " SearchSpaceMembership.id " ,
cascade = " all, delete-orphan " ,
)
invites = relationship (
" SearchSpaceInvite " ,
back_populates = " search_space " ,
order_by = " SearchSpaceInvite.id " ,
2025-10-10 00:50:29 -07:00
cascade = " all, delete-orphan " ,
)
2025-07-24 14:43:48 -07:00
2026-01-20 15:04:07 -08:00
# User memories associated with this search space
user_memories = relationship (
" UserMemory " ,
back_populates = " search_space " ,
order_by = " UserMemory.updated_at.desc() " ,
cascade = " all, delete-orphan " ,
)
2026-02-06 16:33:12 +02:00
shared_memories = relationship (
" SharedMemory " ,
back_populates = " search_space " ,
order_by = " SharedMemory.updated_at.desc() " ,
cascade = " all, delete-orphan " ,
)
2026-01-20 15:04:07 -08:00
2025-07-24 14:43:48 -07:00
2025-03-14 18:53:14 -07:00
class SearchSourceConnector ( BaseModel , TimestampMixin ) :
__tablename__ = " search_source_connectors "
2025-08-02 11:47:49 -07:00
__table_args__ = (
2025-10-08 21:13:01 -07:00
UniqueConstraint (
" search_space_id " ,
" user_id " ,
" connector_type " ,
2026-01-13 13:46:01 -08:00
" name " ,
name = " uq_searchspace_user_connector_type_name " ,
2025-10-08 21:13:01 -07:00
) ,
2025-08-02 11:47:49 -07:00
)
2025-07-24 14:43:48 -07:00
2025-03-14 18:53:14 -07:00
name = Column ( String ( 100 ) , nullable = False , index = True )
2025-08-02 11:47:49 -07:00
connector_type = Column ( SQLAlchemyEnum ( SearchSourceConnectorType ) , nullable = False )
2025-03-14 18:53:14 -07:00
is_indexable = Column ( Boolean , nullable = False , default = False )
last_indexed_at = Column ( TIMESTAMP ( timezone = True ) , nullable = True )
config = Column ( JSON , nullable = False )
2025-07-24 14:43:48 -07:00
2026-02-26 18:24:57 -08:00
# Summary generation (LLM-based) - disabled by default to save resources.
# When enabled, improves hybrid search quality at the cost of LLM calls.
enable_summary = Column (
Boolean , nullable = False , default = False , server_default = " false "
)
2025-10-22 16:14:25 -07:00
# Periodic indexing fields
periodic_indexing_enabled = Column ( Boolean , nullable = False , default = False )
indexing_frequency_minutes = Column ( Integer , nullable = True )
next_scheduled_at = Column ( TIMESTAMP ( timezone = True ) , nullable = True )
2025-10-08 21:13:01 -07:00
search_space_id = Column (
Integer , ForeignKey ( " searchspaces.id " , ondelete = " CASCADE " ) , nullable = False
)
search_space = relationship (
" SearchSpace " , back_populates = " search_source_connectors "
)
2025-07-24 14:43:48 -07:00
user_id = Column (
UUID ( as_uuid = True ) , ForeignKey ( " user.id " , ondelete = " CASCADE " ) , nullable = False
)
2026-02-09 18:30:52 +05:30
user = relationship ( " User " , back_populates = " search_source_connectors " )
2025-03-14 18:53:14 -07:00
2026-02-02 16:23:26 +05:30
# Documents created by this connector (for cleanup on connector deletion)
documents = relationship ( " Document " , back_populates = " connector " )
2025-07-24 14:43:48 -07:00
2025-12-23 01:16:25 -08:00
class NewLLMConfig ( BaseModel , TimestampMixin ) :
"""
New LLM configuration table that combines model settings with prompt configuration .
This table provides :
- LLM model configuration ( provider , model_name , api_key , etc . )
- Configurable system instructions ( defaults to SURFSENSE_SYSTEM_INSTRUCTIONS )
- Citation toggle ( enable / disable citation instructions )
2026-02-09 09:19:44 +02:00
Note : Tools instructions are built by get_tools_instructions ( thread_visibility ) ( personal vs shared memory ) .
2025-12-23 01:16:25 -08:00
"""
__tablename__ = " new_llm_configs "
2025-07-24 14:43:48 -07:00
2025-06-09 15:50:15 -07:00
name = Column ( String ( 100 ) , nullable = False , index = True )
2025-12-23 01:16:25 -08:00
description = Column ( String ( 500 ) , nullable = True )
# === LLM Model Configuration (from original LLMConfig, excluding 'language') ===
2025-06-09 15:50:15 -07:00
# Provider from the enum
provider = Column ( SQLAlchemyEnum ( LiteLLMProvider ) , nullable = False )
# Custom provider name when provider is CUSTOM
custom_provider = Column ( String ( 100 ) , nullable = True )
# Just the model name without provider prefix
model_name = Column ( String ( 100 ) , nullable = False )
# API Key should be encrypted before storing
api_key = Column ( String , nullable = False )
api_base = Column ( String ( 500 ) , nullable = True )
# For any other parameters that litellm supports
litellm_params = Column ( JSON , nullable = True , default = { } )
2025-07-24 14:43:48 -07:00
2025-12-23 01:16:25 -08:00
# === Prompt Configuration ===
# Configurable system instructions (defaults to SURFSENSE_SYSTEM_INSTRUCTIONS)
# Users can customize this from the UI
system_instructions = Column (
Text ,
nullable = False ,
default = " " , # Empty string means use default SURFSENSE_SYSTEM_INSTRUCTIONS
)
# Whether to use the default system instructions when system_instructions is empty
use_default_system_instructions = Column ( Boolean , nullable = False , default = True )
# Citation toggle - when enabled, SURFSENSE_CITATION_INSTRUCTIONS is injected
# When disabled, an anti-citation prompt is injected instead
citations_enabled = Column ( Boolean , nullable = False , default = True )
# === Relationships ===
2025-10-10 00:50:29 -07:00
search_space_id = Column (
Integer , ForeignKey ( " searchspaces.id " , ondelete = " CASCADE " ) , nullable = False
)
2025-12-23 01:16:25 -08:00
search_space = relationship ( " SearchSpace " , back_populates = " new_llm_configs " )
2025-10-10 00:50:29 -07:00
2026-02-09 18:30:52 +05:30
# User who created this config
user_id = Column (
UUID ( as_uuid = True ) , ForeignKey ( " user.id " , ondelete = " CASCADE " ) , nullable = False
)
user = relationship ( " User " , back_populates = " new_llm_configs " )
2025-10-10 00:50:29 -07:00
2025-07-16 01:10:33 -07:00
class Log ( BaseModel , TimestampMixin ) :
__tablename__ = " logs "
2025-07-24 14:43:48 -07:00
2025-07-16 01:10:33 -07:00
level = Column ( SQLAlchemyEnum ( LogLevel ) , nullable = False , index = True )
status = Column ( SQLAlchemyEnum ( LogStatus ) , nullable = False , index = True )
message = Column ( Text , nullable = False )
2025-07-24 14:43:48 -07:00
source = Column (
String ( 200 ) , nullable = True , index = True
) # Service/component that generated the log
2025-07-16 01:10:33 -07:00
log_metadata = Column ( JSON , nullable = True , default = { } ) # Additional context data
2025-07-24 14:43:48 -07:00
search_space_id = Column (
Integer , ForeignKey ( " searchspaces.id " , ondelete = " CASCADE " ) , nullable = False
)
2025-07-16 01:10:33 -07:00
search_space = relationship ( " SearchSpace " , back_populates = " logs " )
2025-07-24 14:43:48 -07:00
2026-01-16 11:32:06 -08:00
class Notification ( BaseModel , TimestampMixin ) :
__tablename__ = " notifications "
user_id = Column (
UUID ( as_uuid = True ) ,
ForeignKey ( " user.id " , ondelete = " CASCADE " ) ,
nullable = False ,
index = True ,
)
search_space_id = Column (
Integer , ForeignKey ( " searchspaces.id " , ondelete = " CASCADE " ) , nullable = True
)
type = Column (
String ( 50 ) , nullable = False
) # 'connector_indexing', 'document_processing', etc.
title = Column ( String ( 200 ) , nullable = False )
message = Column ( Text , nullable = False )
read = Column (
Boolean , nullable = False , default = False , server_default = text ( " false " ) , index = True
)
notification_metadata = Column ( " metadata " , JSONB , nullable = True , default = { } )
updated_at = Column (
TIMESTAMP ( timezone = True ) ,
nullable = True ,
default = lambda : datetime . now ( UTC ) ,
onupdate = lambda : datetime . now ( UTC ) ,
index = True ,
)
user = relationship ( " User " , back_populates = " notifications " )
search_space = relationship ( " SearchSpace " , back_populates = " notifications " )
2026-01-26 23:32:30 -08:00
class UserIncentiveTask ( BaseModel , TimestampMixin ) :
"""
Tracks completed incentive tasks for users .
Each user can only complete each task type once .
When a task is completed , the user ' s pages_limit is increased.
"""
__tablename__ = " user_incentive_tasks "
__table_args__ = (
UniqueConstraint (
" user_id " ,
" task_type " ,
name = " uq_user_incentive_task " ,
) ,
)
user_id = Column (
UUID ( as_uuid = True ) ,
ForeignKey ( " user.id " , ondelete = " CASCADE " ) ,
nullable = False ,
index = True ,
)
task_type = Column ( SQLAlchemyEnum ( IncentiveTaskType ) , nullable = False , index = True )
pages_awarded = Column ( Integer , nullable = False )
completed_at = Column (
TIMESTAMP ( timezone = True ) ,
nullable = False ,
default = lambda : datetime . now ( UTC ) ,
)
user = relationship ( " User " , back_populates = " incentive_tasks " )
2025-11-27 22:45:04 -08:00
class SearchSpaceRole ( BaseModel , TimestampMixin ) :
"""
Custom roles that can be defined per search space .
Each search space can have multiple roles with different permission sets .
"""
__tablename__ = " search_space_roles "
__table_args__ = (
UniqueConstraint (
" search_space_id " ,
" name " ,
name = " uq_searchspace_role_name " ,
) ,
)
name = Column ( String ( 100 ) , nullable = False , index = True )
description = Column ( String ( 500 ) , nullable = True )
# List of Permission enum values (e.g., ["documents:read", "chats:create"])
permissions = Column ( ARRAY ( String ) , nullable = False , default = [ ] )
# Whether this role is assigned to new members by default when they join via invite
is_default = Column ( Boolean , nullable = False , default = False )
2026-01-20 02:59:32 -08:00
# System roles (Owner, Editor, Viewer) cannot be deleted
2025-11-27 22:45:04 -08:00
is_system_role = Column ( Boolean , nullable = False , default = False )
search_space_id = Column (
Integer , ForeignKey ( " searchspaces.id " , ondelete = " CASCADE " ) , nullable = False
)
search_space = relationship ( " SearchSpace " , back_populates = " roles " )
memberships = relationship (
" SearchSpaceMembership " , back_populates = " role " , passive_deletes = True
)
invites = relationship (
" SearchSpaceInvite " , back_populates = " role " , passive_deletes = True
)
class SearchSpaceMembership ( BaseModel , TimestampMixin ) :
"""
Tracks user membership in search spaces with their assigned role .
Each user can be a member of multiple search spaces with different roles .
"""
__tablename__ = " search_space_memberships "
2025-10-10 00:50:29 -07:00
__table_args__ = (
UniqueConstraint (
" user_id " ,
" search_space_id " ,
2025-11-27 22:45:04 -08:00
name = " uq_user_searchspace_membership " ,
2025-10-10 00:50:29 -07:00
) ,
)
2025-07-24 14:43:48 -07:00
user_id = Column (
UUID ( as_uuid = True ) , ForeignKey ( " user.id " , ondelete = " CASCADE " ) , nullable = False
)
2025-10-10 00:50:29 -07:00
search_space_id = Column (
Integer , ForeignKey ( " searchspaces.id " , ondelete = " CASCADE " ) , nullable = False
)
2025-11-27 22:45:04 -08:00
role_id = Column (
Integer ,
ForeignKey ( " search_space_roles.id " , ondelete = " SET NULL " ) ,
nullable = True ,
)
# Indicates if this user is the original creator/owner of the search space
is_owner = Column ( Boolean , nullable = False , default = False )
# Timestamp when the user joined (via invite or as creator)
joined_at = Column (
TIMESTAMP ( timezone = True ) ,
nullable = False ,
default = lambda : datetime . now ( UTC ) ,
)
# Reference to the invite used to join (null if owner/creator)
invited_by_invite_id = Column (
Integer ,
ForeignKey ( " search_space_invites.id " , ondelete = " SET NULL " ) ,
nullable = True ,
)
2025-10-10 00:50:29 -07:00
2025-11-27 22:45:04 -08:00
user = relationship ( " User " , back_populates = " search_space_memberships " )
search_space = relationship ( " SearchSpace " , back_populates = " memberships " )
role = relationship ( " SearchSpaceRole " , back_populates = " memberships " )
invited_by_invite = relationship (
" SearchSpaceInvite " , back_populates = " used_by_memberships "
)
2025-10-10 00:50:29 -07:00
2025-06-09 15:50:15 -07:00
2025-11-27 22:45:04 -08:00
class SearchSpaceInvite ( BaseModel , TimestampMixin ) :
"""
Invite links for search spaces .
Users can create invite links with specific roles that others can use to join .
"""
2025-07-24 14:43:48 -07:00
2025-11-27 22:45:04 -08:00
__tablename__ = " search_space_invites "
2025-07-24 14:43:48 -07:00
2025-11-27 22:45:04 -08:00
# Unique invite code (used in invite URLs)
invite_code = Column ( String ( 64 ) , nullable = False , unique = True , index = True )
2025-07-24 14:43:48 -07:00
search_space_id = Column (
Integer , ForeignKey ( " searchspaces.id " , ondelete = " CASCADE " ) , nullable = False
)
2025-11-27 22:45:04 -08:00
# Role to assign when invite is used (null means use default role)
role_id = Column (
Integer ,
ForeignKey ( " search_space_roles.id " , ondelete = " SET NULL " ) ,
nullable = True ,
)
# User who created this invite
created_by_id = Column (
UUID ( as_uuid = True ) ,
ForeignKey ( " user.id " , ondelete = " SET NULL " ) ,
nullable = True ,
)
# Expiration timestamp (null means never expires)
expires_at = Column ( TIMESTAMP ( timezone = True ) , nullable = True )
# Maximum number of times this invite can be used (null means unlimited)
max_uses = Column ( Integer , nullable = True )
# Number of times this invite has been used
uses_count = Column ( Integer , nullable = False , default = 0 )
# Whether this invite is currently active
is_active = Column ( Boolean , nullable = False , default = True )
# Optional custom name/label for the invite
name = Column ( String ( 100 ) , nullable = True )
search_space = relationship ( " SearchSpace " , back_populates = " invites " )
role = relationship ( " SearchSpaceRole " , back_populates = " invites " )
created_by = relationship ( " User " , back_populates = " created_invites " )
used_by_memberships = relationship (
" SearchSpaceMembership " ,
back_populates = " invited_by_invite " ,
passive_deletes = True ,
)
2025-07-16 01:10:33 -07:00
2025-07-24 14:43:48 -07:00
2025-05-21 20:56:23 -07:00
if config . AUTH_TYPE == " GOOGLE " :
2025-07-24 14:43:48 -07:00
2025-05-21 20:56:23 -07:00
class OAuthAccount ( SQLAlchemyBaseOAuthAccountTableUUID , Base ) :
pass
2025-03-14 18:53:14 -07:00
2025-05-21 20:56:23 -07:00
class User ( SQLAlchemyBaseUserTableUUID , Base ) :
oauth_accounts : Mapped [ list [ OAuthAccount ] ] = relationship (
" OAuthAccount " , lazy = " joined "
)
search_spaces = relationship ( " SearchSpace " , back_populates = " user " )
2026-01-16 11:32:06 -08:00
notifications = relationship (
" Notification " ,
back_populates = " user " ,
order_by = " Notification.created_at.desc() " ,
cascade = " all, delete-orphan " ,
)
2025-06-09 15:50:15 -07:00
2025-11-27 22:45:04 -08:00
# RBAC relationships
search_space_memberships = relationship (
" SearchSpaceMembership " ,
2025-07-24 14:43:48 -07:00
back_populates = " user " ,
cascade = " all, delete-orphan " ,
)
2025-11-27 22:45:04 -08:00
created_invites = relationship (
" SearchSpaceInvite " ,
back_populates = " created_by " ,
passive_deletes = True ,
)
2025-06-09 15:50:15 -07:00
2026-01-13 00:17:12 -08:00
# Chat threads created by this user
new_chat_threads = relationship (
" NewChatThread " ,
back_populates = " created_by " ,
passive_deletes = True ,
)
2026-02-02 12:32:24 +05:30
# Documents created/uploaded by this user
documents = relationship (
" Document " ,
back_populates = " created_by " ,
passive_deletes = True ,
)
2026-02-05 16:43:48 -08:00
# Image generations created by this user
image_generations = relationship (
" ImageGeneration " ,
back_populates = " created_by " ,
passive_deletes = True ,
)
2026-02-09 18:30:52 +05:30
# Connectors created by this user
search_source_connectors = relationship (
" SearchSourceConnector " ,
back_populates = " user " ,
passive_deletes = True ,
)
# LLM configs created by this user
new_llm_configs = relationship (
" NewLLMConfig " ,
back_populates = " user " ,
passive_deletes = True ,
)
# Image generation configs created by this user
image_generation_configs = relationship (
" ImageGenerationConfig " ,
back_populates = " user " ,
passive_deletes = True ,
)
2026-01-20 15:04:07 -08:00
# User memories for personalized AI responses
memories = relationship (
" UserMemory " ,
back_populates = " user " ,
order_by = " UserMemory.updated_at.desc() " ,
cascade = " all, delete-orphan " ,
)
2026-01-26 23:32:30 -08:00
# Incentive tasks completed by this user
incentive_tasks = relationship (
" UserIncentiveTask " ,
back_populates = " user " ,
cascade = " all, delete-orphan " ,
)
2025-10-30 14:58:08 -07:00
# Page usage tracking for ETL services
2025-12-11 00:31:58 -08:00
pages_limit = Column (
Integer ,
nullable = False ,
default = config . PAGES_LIMIT ,
server_default = str ( config . PAGES_LIMIT ) ,
)
2025-10-30 14:58:08 -07:00
pages_used = Column ( Integer , nullable = False , default = 0 , server_default = " 0 " )
2026-01-14 14:37:16 +02:00
# User profile from OAuth
display_name = Column ( String , nullable = True )
avatar_url = Column ( String , nullable = True )
2026-02-05 16:18:45 +02:00
# Refresh tokens for this user
refresh_tokens = relationship (
" RefreshToken " ,
back_populates = " user " ,
cascade = " all, delete-orphan " ,
)
2025-05-21 20:56:23 -07:00
else :
2025-03-14 18:53:14 -07:00
2025-07-24 14:43:48 -07:00
class User ( SQLAlchemyBaseUserTableUUID , Base ) :
2025-05-21 20:56:23 -07:00
search_spaces = relationship ( " SearchSpace " , back_populates = " user " )
2026-01-16 11:32:06 -08:00
notifications = relationship (
" Notification " ,
back_populates = " user " ,
order_by = " Notification.created_at.desc() " ,
cascade = " all, delete-orphan " ,
)
2025-06-09 15:50:15 -07:00
2025-11-27 22:45:04 -08:00
# RBAC relationships
search_space_memberships = relationship (
" SearchSpaceMembership " ,
2025-07-24 14:43:48 -07:00
back_populates = " user " ,
cascade = " all, delete-orphan " ,
)
2025-11-27 22:45:04 -08:00
created_invites = relationship (
" SearchSpaceInvite " ,
back_populates = " created_by " ,
passive_deletes = True ,
)
2025-06-09 15:50:15 -07:00
2026-01-13 00:17:12 -08:00
# Chat threads created by this user
new_chat_threads = relationship (
" NewChatThread " ,
back_populates = " created_by " ,
passive_deletes = True ,
)
2026-02-02 12:32:24 +05:30
# Documents created/uploaded by this user
documents = relationship (
" Document " ,
back_populates = " created_by " ,
passive_deletes = True ,
)
2026-02-05 16:43:48 -08:00
# Image generations created by this user
image_generations = relationship (
" ImageGeneration " ,
back_populates = " created_by " ,
passive_deletes = True ,
)
2026-02-09 18:30:52 +05:30
# Connectors created by this user
search_source_connectors = relationship (
" SearchSourceConnector " ,
back_populates = " user " ,
passive_deletes = True ,
)
# LLM configs created by this user
new_llm_configs = relationship (
" NewLLMConfig " ,
back_populates = " user " ,
passive_deletes = True ,
)
# Image generation configs created by this user
image_generation_configs = relationship (
" ImageGenerationConfig " ,
back_populates = " user " ,
passive_deletes = True ,
)
2026-01-20 15:04:07 -08:00
# User memories for personalized AI responses
memories = relationship (
" UserMemory " ,
back_populates = " user " ,
order_by = " UserMemory.updated_at.desc() " ,
cascade = " all, delete-orphan " ,
)
2026-01-26 23:32:30 -08:00
# Incentive tasks completed by this user
incentive_tasks = relationship (
" UserIncentiveTask " ,
back_populates = " user " ,
cascade = " all, delete-orphan " ,
)
2025-10-30 14:58:08 -07:00
# Page usage tracking for ETL services
2025-12-11 00:31:58 -08:00
pages_limit = Column (
Integer ,
nullable = False ,
default = config . PAGES_LIMIT ,
server_default = str ( config . PAGES_LIMIT ) ,
)
2025-10-30 14:58:08 -07:00
pages_used = Column ( Integer , nullable = False , default = 0 , server_default = " 0 " )
2026-01-14 14:37:16 +02:00
# User profile (can be set manually for non-OAuth users)
display_name = Column ( String , nullable = True )
avatar_url = Column ( String , nullable = True )
2026-02-05 16:18:45 +02:00
# Refresh tokens for this user
refresh_tokens = relationship (
" RefreshToken " ,
back_populates = " user " ,
cascade = " all, delete-orphan " ,
)
class RefreshToken ( Base , TimestampMixin ) :
"""
Stores refresh tokens for user session management .
2026-02-05 16:49:37 +02:00
Each row represents one device / session .
2026-02-05 16:18:45 +02:00
"""
__tablename__ = " refresh_tokens "
id = Column ( Integer , primary_key = True , autoincrement = True )
user_id = Column (
UUID ( as_uuid = True ) ,
ForeignKey ( " user.id " , ondelete = " CASCADE " ) ,
nullable = False ,
index = True ,
)
user = relationship ( " User " , back_populates = " refresh_tokens " )
token_hash = Column ( String ( 256 ) , unique = True , nullable = False , index = True )
expires_at = Column ( TIMESTAMP ( timezone = True ) , nullable = False , index = True )
is_revoked = Column ( Boolean , default = False , nullable = False )
family_id = Column ( UUID ( as_uuid = True ) , nullable = False , index = True )
@property
def is_expired ( self ) - > bool :
return datetime . now ( UTC ) > = self . expires_at
@property
def is_valid ( self ) - > bool :
return not self . is_expired and not self . is_revoked
2025-03-14 18:53:14 -07:00
engine = create_async_engine ( DATABASE_URL )
async_session_maker = async_sessionmaker ( engine , expire_on_commit = False )
2025-07-24 14:43:48 -07:00
2025-03-14 18:53:14 -07:00
async def setup_indexes ( ) :
async with engine . begin ( ) as conn :
2025-07-24 14:43:48 -07:00
# Create indexes
2025-03-14 18:53:14 -07:00
# Document Summary Indexes
2025-07-24 14:43:48 -07:00
await conn . execute (
text (
" CREATE INDEX IF NOT EXISTS document_vector_index ON documents USING hnsw (embedding public.vector_cosine_ops) "
)
)
await conn . execute (
text (
" CREATE INDEX IF NOT EXISTS document_search_index ON documents USING gin (to_tsvector( ' english ' , content)) "
)
)
2025-03-14 18:53:14 -07:00
# Document Chuck Indexes
2025-07-24 14:43:48 -07:00
await conn . execute (
text (
" CREATE INDEX IF NOT EXISTS chucks_vector_index ON chunks USING hnsw (embedding public.vector_cosine_ops) "
)
)
await conn . execute (
text (
" CREATE INDEX IF NOT EXISTS chucks_search_index ON chunks USING gin (to_tsvector( ' english ' , content)) "
)
)
2026-01-17 20:45:10 +05:30
# pg_trgm indexes for efficient ILIKE '%term%' searches on titles
# Critical for document mention picker (@mentions) to scale
await conn . execute (
text (
" CREATE INDEX IF NOT EXISTS idx_documents_title_trgm ON documents USING gin (title gin_trgm_ops) "
)
)
# B-tree index on search_space_id for fast filtering
await conn . execute (
text (
" CREATE INDEX IF NOT EXISTS idx_documents_search_space_id ON documents (search_space_id) "
)
)
# Covering index for "recent documents" query - enables index-only scan
await conn . execute (
text (
" CREATE INDEX IF NOT EXISTS idx_documents_search_space_updated ON documents (search_space_id, updated_at DESC NULLS LAST) INCLUDE (id, title, document_type) "
)
)
await conn . execute (
text (
" CREATE INDEX IF NOT EXISTS idx_surfsense_docs_title_trgm ON surfsense_docs_documents USING gin (title gin_trgm_ops) "
)
)
2025-07-24 14:43:48 -07:00
2025-03-14 18:53:14 -07:00
async def create_db_and_tables ( ) :
async with engine . begin ( ) as conn :
2025-07-24 14:43:48 -07:00
await conn . execute ( text ( " CREATE EXTENSION IF NOT EXISTS vector " ) )
2026-01-17 20:45:10 +05:30
await conn . execute ( text ( " CREATE EXTENSION IF NOT EXISTS pg_trgm " ) )
2025-03-14 18:53:14 -07:00
await conn . run_sync ( Base . metadata . create_all )
await setup_indexes ( )
async def get_async_session ( ) - > AsyncGenerator [ AsyncSession , None ] :
async with async_session_maker ( ) as session :
yield session
2025-05-21 20:56:23 -07:00
if config . AUTH_TYPE == " GOOGLE " :
2025-07-24 14:43:48 -07:00
2025-05-21 20:56:23 -07:00
async def get_user_db ( session : AsyncSession = Depends ( get_async_session ) ) :
yield SQLAlchemyUserDatabase ( session , User , OAuthAccount )
2025-07-24 11:33:38 +02:00
2025-05-21 20:56:23 -07:00
else :
2025-07-24 14:43:48 -07:00
2025-05-21 20:56:23 -07:00
async def get_user_db ( session : AsyncSession = Depends ( get_async_session ) ) :
yield SQLAlchemyUserDatabase ( session , User )
2025-07-24 14:43:48 -07:00
2025-11-27 22:45:04 -08:00
def has_permission ( user_permissions : list [ str ] , required_permission : str ) - > bool :
"""
Check if the user has the required permission .
Supports wildcard ( * ) for full access .
Args :
user_permissions : List of permission strings the user has
required_permission : The permission string to check for
Returns :
True if user has the permission , False otherwise
"""
if not user_permissions :
return False
# Full access wildcard grants all permissions
if Permission . FULL_ACCESS . value in user_permissions :
return True
return required_permission in user_permissions
def has_any_permission (
user_permissions : list [ str ] , required_permissions : list [ str ]
) - > bool :
"""
Check if the user has any of the required permissions .
Args :
user_permissions : List of permission strings the user has
required_permissions : List of permission strings to check for ( any match )
Returns :
True if user has at least one of the permissions , False otherwise
"""
if not user_permissions :
return False
if Permission . FULL_ACCESS . value in user_permissions :
return True
return any ( perm in user_permissions for perm in required_permissions )
def has_all_permissions (
user_permissions : list [ str ] , required_permissions : list [ str ]
) - > bool :
"""
Check if the user has all of the required permissions .
Args :
user_permissions : List of permission strings the user has
required_permissions : List of permission strings to check for ( all must match )
Returns :
True if user has all of the permissions , False otherwise
"""
if not user_permissions :
return False
if Permission . FULL_ACCESS . value in user_permissions :
return True
return all ( perm in user_permissions for perm in required_permissions )
def get_default_roles_config ( ) - > list [ dict ] :
"""
Get the configuration for default system roles .
These roles are created automatically when a search space is created .
2026-01-20 02:59:32 -08:00
Only 3 roles are supported :
- Owner : Full access to everything ( assigned to search space creator )
- Editor : Can create / update content but cannot delete , manage roles , or change settings
- Viewer : Read - only access to resources ( can add comments )
2025-11-27 22:45:04 -08:00
Returns :
List of role configurations with name , description , permissions , and flags
"""
return [
{
" name " : " Owner " ,
" description " : " Full access to all search space resources and settings " ,
" permissions " : DEFAULT_ROLE_PERMISSIONS [ " Owner " ] ,
" is_default " : False ,
" is_system_role " : True ,
} ,
{
" name " : " Editor " ,
2026-01-20 02:59:32 -08:00
" description " : " Can create and update content (no delete, role management, or settings access) " ,
2025-11-27 22:45:04 -08:00
" permissions " : DEFAULT_ROLE_PERMISSIONS [ " Editor " ] ,
" is_default " : True , # Default role for new members via invite
" is_system_role " : True ,
} ,
{
" name " : " Viewer " ,
" description " : " Read-only access to search space resources " ,
" permissions " : DEFAULT_ROLE_PERMISSIONS [ " Viewer " ] ,
" is_default " : False ,
" is_system_role " : True ,
} ,
]