2025-03-14 18:53:14 -07:00
from collections . abc import AsyncGenerator
2025-07-24 14:43:48 -07:00
from datetime import UTC , datetime
2025-03-14 18:53:14 -07:00
from enum import Enum
from fastapi import Depends
2025-07-25 10:52:34 -07:00
from fastapi_users . db import SQLAlchemyBaseUserTableUUID , SQLAlchemyUserDatabase
2025-03-14 18:53:14 -07:00
from pgvector . sqlalchemy import Vector
from sqlalchemy import (
ARRAY ,
2025-07-24 14:43:48 -07:00
JSON ,
TIMESTAMP ,
2025-03-14 18:53:14 -07:00
Boolean ,
Column ,
Enum as SQLAlchemyEnum ,
ForeignKey ,
Integer ,
String ,
Text ,
2025-08-02 11:47:49 -07:00
UniqueConstraint ,
2025-03-14 18:53:14 -07:00
text ,
)
2025-11-23 15:23:31 +05:30
from sqlalchemy . dialects . postgresql import JSONB , UUID
2025-03-14 18:53:14 -07:00
from sqlalchemy . ext . asyncio import AsyncSession , async_sessionmaker , create_async_engine
from sqlalchemy . orm import DeclarativeBase , Mapped , declared_attr , relationship
from app . config import config
2025-05-21 20:56:23 -07:00
if config . AUTH_TYPE == " GOOGLE " :
2025-07-25 10:52:34 -07:00
from fastapi_users . db import SQLAlchemyBaseOAuthAccountTableUUID
2025-05-21 20:56:23 -07:00
2025-03-14 18:53:14 -07:00
DATABASE_URL = config . DATABASE_URL
class DocumentType ( str , Enum ) :
EXTENSION = " EXTENSION "
CRAWLED_URL = " CRAWLED_URL "
FILE = " FILE "
SLACK_CONNECTOR = " SLACK_CONNECTOR "
2026-01-07 15:15:49 -08:00
TEAMS_CONNECTOR = " TEAMS_CONNECTOR "
2025-03-14 18:53:14 -07:00
NOTION_CONNECTOR = " NOTION_CONNECTOR "
2025-04-09 18:46:10 -07:00
YOUTUBE_VIDEO = " YOUTUBE_VIDEO "
2025-04-13 13:56:22 -07:00
GITHUB_CONNECTOR = " GITHUB_CONNECTOR "
2025-04-15 23:10:35 -07:00
LINEAR_CONNECTOR = " LINEAR_CONNECTOR "
2025-06-02 18:30:38 +07:00
DISCORD_CONNECTOR = " DISCORD_CONNECTOR "
2025-07-24 11:33:38 +02:00
JIRA_CONNECTOR = " JIRA_CONNECTOR "
2025-07-26 14:43:31 +02:00
CONFLUENCE_CONNECTOR = " CONFLUENCE_CONNECTOR "
2025-07-30 21:35:27 +02:00
CLICKUP_CONNECTOR = " CLICKUP_CONNECTOR "
2025-08-02 00:05:55 +02:00
GOOGLE_CALENDAR_CONNECTOR = " GOOGLE_CALENDAR_CONNECTOR "
2025-08-04 00:52:07 +02:00
GOOGLE_GMAIL_CONNECTOR = " GOOGLE_GMAIL_CONNECTOR "
2025-12-29 20:38:26 +02:00
GOOGLE_DRIVE_FILE = " GOOGLE_DRIVE_FILE "
2025-08-26 13:56:31 +02:00
AIRTABLE_CONNECTOR = " AIRTABLE_CONNECTOR "
2025-09-28 14:59:10 -07:00
LUMA_CONNECTOR = " LUMA_CONNECTOR "
2025-10-12 09:39:04 +05:30
ELASTICSEARCH_CONNECTOR = " ELASTICSEARCH_CONNECTOR "
2025-12-04 14:08:44 +08:00
BOOKSTACK_CONNECTOR = " BOOKSTACK_CONNECTOR "
2025-12-30 09:00:59 -08:00
CIRCLEBACK = " CIRCLEBACK "
2025-12-16 12:28:30 +05:30
NOTE = " NOTE "
2025-03-14 18:53:14 -07:00
2025-07-24 14:43:48 -07:00
2025-03-14 18:53:14 -07:00
class SearchSourceConnectorType ( str , Enum ) :
2025-07-24 14:43:48 -07:00
SERPER_API = " SERPER_API " # NOT IMPLEMENTED YET : DON'T REMEMBER WHY : MOST PROBABLY BECAUSE WE NEED TO CRAWL THE RESULTS RETURNED BY IT
2025-03-14 18:53:14 -07:00
TAVILY_API = " TAVILY_API "
2025-10-12 20:43:45 +05:30
SEARXNG_API = " SEARXNG_API "
2025-04-27 15:53:33 -07:00
LINKUP_API = " LINKUP_API "
2025-10-15 17:29:18 +08:00
BAIDU_SEARCH_API = " BAIDU_SEARCH_API " # Baidu AI Search API for Chinese web search
2025-03-14 18:53:14 -07:00
SLACK_CONNECTOR = " SLACK_CONNECTOR "
2026-01-07 15:15:49 -08:00
TEAMS_CONNECTOR = " TEAMS_CONNECTOR "
2025-03-14 18:53:14 -07:00
NOTION_CONNECTOR = " NOTION_CONNECTOR "
2025-04-13 13:56:22 -07:00
GITHUB_CONNECTOR = " GITHUB_CONNECTOR "
2025-04-15 23:10:35 -07:00
LINEAR_CONNECTOR = " LINEAR_CONNECTOR "
2025-06-02 18:30:38 +07:00
DISCORD_CONNECTOR = " DISCORD_CONNECTOR "
2025-07-24 11:33:38 +02:00
JIRA_CONNECTOR = " JIRA_CONNECTOR "
2025-07-26 14:43:31 +02:00
CONFLUENCE_CONNECTOR = " CONFLUENCE_CONNECTOR "
2025-07-30 21:35:27 +02:00
CLICKUP_CONNECTOR = " CLICKUP_CONNECTOR "
2025-08-02 00:05:55 +02:00
GOOGLE_CALENDAR_CONNECTOR = " GOOGLE_CALENDAR_CONNECTOR "
2025-08-04 00:52:07 +02:00
GOOGLE_GMAIL_CONNECTOR = " GOOGLE_GMAIL_CONNECTOR "
2025-12-28 15:53:35 +02:00
GOOGLE_DRIVE_CONNECTOR = " GOOGLE_DRIVE_CONNECTOR "
2025-08-26 13:56:31 +02:00
AIRTABLE_CONNECTOR = " AIRTABLE_CONNECTOR "
2025-09-28 14:59:10 -07:00
LUMA_CONNECTOR = " LUMA_CONNECTOR "
2025-10-12 09:39:04 +05:30
ELASTICSEARCH_CONNECTOR = " ELASTICSEARCH_CONNECTOR "
2025-11-21 20:45:59 -08:00
WEBCRAWLER_CONNECTOR = " WEBCRAWLER_CONNECTOR "
2025-12-04 14:08:44 +08:00
BOOKSTACK_CONNECTOR = " BOOKSTACK_CONNECTOR "
2025-12-30 09:00:59 -08:00
CIRCLEBACK_CONNECTOR = " CIRCLEBACK_CONNECTOR "
2026-01-13 13:46:01 -08:00
MCP_CONNECTOR = " MCP_CONNECTOR " # Model Context Protocol - User-defined API tools
2025-07-24 14:43:48 -07:00
2025-06-09 15:50:15 -07:00
class LiteLLMProvider ( str , Enum ) :
2025-10-12 19:10:46 +08:00
"""
Enum for LLM providers supported by LiteLLM .
"""
2025-10-13 20:07:32 -07:00
2025-06-09 15:50:15 -07:00
OPENAI = " OPENAI "
ANTHROPIC = " ANTHROPIC "
2025-11-13 02:41:30 -08:00
GOOGLE = " GOOGLE "
AZURE_OPENAI = " AZURE_OPENAI "
BEDROCK = " BEDROCK "
VERTEX_AI = " VERTEX_AI "
2025-06-09 15:50:15 -07:00
GROQ = " GROQ "
COHERE = " COHERE "
MISTRAL = " MISTRAL "
2025-11-13 02:41:30 -08:00
DEEPSEEK = " DEEPSEEK "
XAI = " XAI "
2025-09-16 18:16:33 -07:00
OPENROUTER = " OPENROUTER "
2025-11-13 02:41:30 -08:00
TOGETHER_AI = " TOGETHER_AI "
FIREWORKS_AI = " FIREWORKS_AI "
2025-06-09 15:50:15 -07:00
REPLICATE = " REPLICATE "
PERPLEXITY = " PERPLEXITY "
2025-11-13 02:41:30 -08:00
OLLAMA = " OLLAMA "
2025-10-13 20:07:32 -07:00
ALIBABA_QWEN = " ALIBABA_QWEN "
MOONSHOT = " MOONSHOT "
ZHIPU = " ZHIPU "
2025-11-13 02:41:30 -08:00
ANYSCALE = " ANYSCALE "
DEEPINFRA = " DEEPINFRA "
CEREBRAS = " CEREBRAS "
SAMBANOVA = " SAMBANOVA "
AI21 = " AI21 "
CLOUDFLARE = " CLOUDFLARE "
DATABRICKS = " DATABRICKS "
COMETAPI = " COMETAPI "
HUGGINGFACE = " HUGGINGFACE "
2025-06-09 15:50:15 -07:00
CUSTOM = " CUSTOM "
2025-07-16 01:10:33 -07:00
2025-07-24 14:43:48 -07:00
2025-07-16 01:10:33 -07:00
class LogLevel ( str , Enum ) :
DEBUG = " DEBUG "
INFO = " INFO "
WARNING = " WARNING "
ERROR = " ERROR "
CRITICAL = " CRITICAL "
2025-07-24 14:43:48 -07:00
2025-07-16 01:10:33 -07:00
class LogStatus ( str , Enum ) :
IN_PROGRESS = " IN_PROGRESS "
SUCCESS = " SUCCESS "
FAILED = " FAILED "
2025-07-24 14:43:48 -07:00
2025-11-27 22:45:04 -08:00
class Permission ( str , Enum ) :
"""
Granular permissions for search space resources .
Use ' * ' ( FULL_ACCESS ) to grant all permissions .
"""
# Documents
DOCUMENTS_CREATE = " documents:create "
DOCUMENTS_READ = " documents:read "
DOCUMENTS_UPDATE = " documents:update "
DOCUMENTS_DELETE = " documents:delete "
# Chats
CHATS_CREATE = " chats:create "
CHATS_READ = " chats:read "
CHATS_UPDATE = " chats:update "
CHATS_DELETE = " chats:delete "
2026-01-15 16:39:24 +02:00
# Comments
COMMENTS_CREATE = " comments:create "
COMMENTS_READ = " comments:read "
COMMENTS_DELETE = " comments:delete "
2025-11-27 22:45:04 -08:00
# LLM Configs
LLM_CONFIGS_CREATE = " llm_configs:create "
LLM_CONFIGS_READ = " llm_configs:read "
LLM_CONFIGS_UPDATE = " llm_configs:update "
LLM_CONFIGS_DELETE = " llm_configs:delete "
# Podcasts
PODCASTS_CREATE = " podcasts:create "
PODCASTS_READ = " podcasts:read "
PODCASTS_UPDATE = " podcasts:update "
PODCASTS_DELETE = " podcasts:delete "
# Connectors
CONNECTORS_CREATE = " connectors:create "
CONNECTORS_READ = " connectors:read "
CONNECTORS_UPDATE = " connectors:update "
CONNECTORS_DELETE = " connectors:delete "
# Logs
LOGS_READ = " logs:read "
LOGS_DELETE = " logs:delete "
# Members
MEMBERS_INVITE = " members:invite "
MEMBERS_VIEW = " members:view "
MEMBERS_REMOVE = " members:remove "
MEMBERS_MANAGE_ROLES = " members:manage_roles "
# Roles
ROLES_CREATE = " roles:create "
ROLES_READ = " roles:read "
ROLES_UPDATE = " roles:update "
ROLES_DELETE = " roles:delete "
# Search Space Settings
SETTINGS_VIEW = " settings:view "
SETTINGS_UPDATE = " settings:update "
SETTINGS_DELETE = " settings:delete " # Delete the entire search space
# Full access wildcard
FULL_ACCESS = " * "
# Predefined role permission sets for convenience
2026-01-20 02:59:32 -08:00
# Note: Only Owner, Editor, and Viewer roles are supported.
# Owner has full access (*), Editor can do everything except delete, Viewer has read-only access.
2025-11-27 22:45:04 -08:00
DEFAULT_ROLE_PERMISSIONS = {
" Owner " : [ Permission . FULL_ACCESS . value ] ,
" Editor " : [
2026-01-20 02:59:32 -08:00
# Documents (no delete)
2025-11-27 22:45:04 -08:00
Permission . DOCUMENTS_CREATE . value ,
Permission . DOCUMENTS_READ . value ,
Permission . DOCUMENTS_UPDATE . value ,
2026-01-20 02:59:32 -08:00
# Chats (no delete)
2025-11-27 22:45:04 -08:00
Permission . CHATS_CREATE . value ,
Permission . CHATS_READ . value ,
Permission . CHATS_UPDATE . value ,
2026-01-15 16:42:09 +02:00
# Comments (no delete)
Permission . COMMENTS_CREATE . value ,
Permission . COMMENTS_READ . value ,
2026-01-20 02:59:32 -08:00
# LLM Configs (no delete)
2025-11-27 22:45:04 -08:00
Permission . LLM_CONFIGS_CREATE . value ,
2026-01-20 02:59:32 -08:00
Permission . LLM_CONFIGS_READ . value ,
2025-11-27 22:45:04 -08:00
Permission . LLM_CONFIGS_UPDATE . value ,
2026-01-20 02:59:32 -08:00
# Podcasts (no delete)
2025-11-27 22:45:04 -08:00
Permission . PODCASTS_CREATE . value ,
Permission . PODCASTS_READ . value ,
Permission . PODCASTS_UPDATE . value ,
2026-01-20 02:59:32 -08:00
# Connectors (no delete)
2025-11-27 22:45:04 -08:00
Permission . CONNECTORS_CREATE . value ,
Permission . CONNECTORS_READ . value ,
Permission . CONNECTORS_UPDATE . value ,
2026-01-20 02:59:32 -08:00
# Logs (read only)
2025-11-27 22:45:04 -08:00
Permission . LOGS_READ . value ,
2026-01-20 02:59:32 -08:00
# Members (can invite and view only, cannot manage roles or remove)
Permission . MEMBERS_INVITE . value ,
2025-11-27 22:45:04 -08:00
Permission . MEMBERS_VIEW . value ,
2026-01-20 02:59:32 -08:00
# Roles (read only - cannot create, update, or delete)
2025-11-27 22:45:04 -08:00
Permission . ROLES_READ . value ,
2026-01-20 02:59:32 -08:00
# Settings (view only, no update or delete)
2025-11-27 22:45:04 -08:00
Permission . SETTINGS_VIEW . value ,
] ,
" Viewer " : [
# Documents (read only)
Permission . DOCUMENTS_READ . value ,
# Chats (read only)
Permission . CHATS_READ . value ,
2026-01-20 02:59:32 -08:00
# Comments (can create and read, but not delete)
2026-01-15 16:42:09 +02:00
Permission . COMMENTS_CREATE . value ,
Permission . COMMENTS_READ . value ,
2025-11-27 22:45:04 -08:00
# LLM Configs (read only)
Permission . LLM_CONFIGS_READ . value ,
# Podcasts (read only)
Permission . PODCASTS_READ . value ,
# Connectors (read only)
Permission . CONNECTORS_READ . value ,
# Logs (read only)
Permission . LOGS_READ . value ,
# Members (view only)
Permission . MEMBERS_VIEW . value ,
# Roles (read only)
Permission . ROLES_READ . value ,
# Settings (view only)
Permission . SETTINGS_VIEW . value ,
] ,
}
2025-03-14 18:53:14 -07:00
class Base ( DeclarativeBase ) :
pass
2025-07-24 14:43:48 -07:00
2025-03-14 18:53:14 -07:00
class TimestampMixin :
@declared_attr
2025-07-24 14:43:48 -07:00
def created_at ( cls ) : # noqa: N805
return Column (
TIMESTAMP ( timezone = True ) ,
nullable = False ,
default = lambda : datetime . now ( UTC ) ,
index = True ,
)
2025-03-14 18:53:14 -07:00
class BaseModel ( Base ) :
__abstract__ = True
__allow_unmapped__ = True
id = Column ( Integer , primary_key = True , index = True )
2025-07-24 14:43:48 -07:00
2025-12-21 16:16:50 -08:00
class NewChatMessageRole ( str , Enum ) :
""" Role enum for new chat messages. """
USER = " user "
ASSISTANT = " assistant "
SYSTEM = " system "
2026-01-13 00:17:12 -08:00
class ChatVisibility ( str , Enum ) :
"""
Visibility / sharing level for chat threads .
PRIVATE : Only the creator can see / access the chat ( default )
SEARCH_SPACE : All members of the search space can see / access the chat
PUBLIC : ( Future ) Anyone with the link can access the chat
"""
PRIVATE = " PRIVATE "
SEARCH_SPACE = " SEARCH_SPACE "
# PUBLIC = "PUBLIC" # Reserved for future implementation
2025-12-21 16:16:50 -08:00
class NewChatThread ( BaseModel , TimestampMixin ) :
"""
Thread model for the new chat feature using assistant - ui .
Each thread represents a conversation with message history .
LangGraph checkpointer uses thread_id for state persistence .
"""
__tablename__ = " new_chat_threads "
title = Column ( String ( 500 ) , nullable = False , default = " New Chat " , index = True )
archived = Column ( Boolean , nullable = False , default = False )
updated_at = Column (
TIMESTAMP ( timezone = True ) ,
nullable = False ,
default = lambda : datetime . now ( UTC ) ,
onupdate = lambda : datetime . now ( UTC ) ,
index = True ,
)
2026-01-13 00:17:12 -08:00
# Visibility/sharing control
visibility = Column (
SQLAlchemyEnum ( ChatVisibility ) ,
nullable = False ,
default = ChatVisibility . PRIVATE ,
server_default = " PRIVATE " ,
index = True ,
)
2025-12-21 16:16:50 -08:00
# Foreign keys
search_space_id = Column (
Integer , ForeignKey ( " searchspaces.id " , ondelete = " CASCADE " ) , nullable = False
)
2026-01-13 00:17:12 -08:00
# Track who created this chat thread (for visibility filtering)
created_by_id = Column (
UUID ( as_uuid = True ) ,
ForeignKey ( " user.id " , ondelete = " SET NULL " ) ,
nullable = True , # Nullable for existing records before migration
index = True ,
)
2025-12-21 16:16:50 -08:00
# Relationships
search_space = relationship ( " SearchSpace " , back_populates = " new_chat_threads " )
2026-01-13 00:17:12 -08:00
created_by = relationship ( " User " , back_populates = " new_chat_threads " )
2025-12-21 16:16:50 -08:00
messages = relationship (
" NewChatMessage " ,
back_populates = " thread " ,
order_by = " NewChatMessage.created_at " ,
cascade = " all, delete-orphan " ,
)
class NewChatMessage ( BaseModel , TimestampMixin ) :
"""
Message model for the new chat feature .
Stores individual messages in assistant - ui format .
"""
__tablename__ = " new_chat_messages "
role = Column ( SQLAlchemyEnum ( NewChatMessageRole ) , nullable = False )
# Content stored as JSONB to support rich content (text, tool calls, etc.)
content = Column ( JSONB , nullable = False )
# Foreign key to thread
thread_id = Column (
Integer ,
ForeignKey ( " new_chat_threads.id " , ondelete = " CASCADE " ) ,
nullable = False ,
index = True ,
)
2026-01-14 17:56:45 +02:00
# Track who sent this message (for shared chats)
author_id = Column (
UUID ( as_uuid = True ) ,
ForeignKey ( " user.id " , ondelete = " SET NULL " ) ,
nullable = True ,
index = True ,
)
# Relationships
2025-12-21 16:16:50 -08:00
thread = relationship ( " NewChatThread " , back_populates = " messages " )
2026-01-14 17:56:45 +02:00
author = relationship ( " User " )
2026-01-15 16:34:03 +02:00
comments = relationship (
" ChatComment " ,
back_populates = " message " ,
cascade = " all, delete-orphan " ,
)
class ChatComment ( BaseModel , TimestampMixin ) :
"""
Comment model for comments on AI chat responses .
Supports one level of nesting ( replies to comments , but no replies to replies ) .
"""
__tablename__ = " chat_comments "
message_id = Column (
Integer ,
ForeignKey ( " new_chat_messages.id " , ondelete = " CASCADE " ) ,
nullable = False ,
index = True ,
)
parent_id = Column (
Integer ,
ForeignKey ( " chat_comments.id " , ondelete = " CASCADE " ) ,
nullable = True ,
index = True ,
)
author_id = Column (
UUID ( as_uuid = True ) ,
ForeignKey ( " user.id " , ondelete = " SET NULL " ) ,
nullable = True ,
index = True ,
)
content = Column ( Text , nullable = False )
updated_at = Column (
TIMESTAMP ( timezone = True ) ,
nullable = False ,
default = lambda : datetime . now ( UTC ) ,
onupdate = lambda : datetime . now ( UTC ) ,
index = True ,
)
# Relationships
message = relationship ( " NewChatMessage " , back_populates = " comments " )
author = relationship ( " User " )
parent = relationship (
" ChatComment " , remote_side = " ChatComment.id " , backref = " replies "
)
mentions = relationship (
" ChatCommentMention " ,
back_populates = " comment " ,
cascade = " all, delete-orphan " ,
)
2025-12-21 16:16:50 -08:00
2026-01-15 16:37:46 +02:00
class ChatCommentMention ( BaseModel , TimestampMixin ) :
"""
Tracks @mentions in chat comments for notification purposes .
"""
__tablename__ = " chat_comment_mentions "
comment_id = Column (
Integer ,
ForeignKey ( " chat_comments.id " , ondelete = " CASCADE " ) ,
nullable = False ,
index = True ,
)
mentioned_user_id = Column (
UUID ( as_uuid = True ) ,
ForeignKey ( " user.id " , ondelete = " CASCADE " ) ,
nullable = False ,
index = True ,
)
# Relationships
comment = relationship ( " ChatComment " , back_populates = " mentions " )
mentioned_user = relationship ( " User " )
2025-03-14 18:53:14 -07:00
class Document ( BaseModel , TimestampMixin ) :
__tablename__ = " documents "
2025-07-24 14:43:48 -07:00
2025-04-30 00:10:50 -07:00
title = Column ( String , nullable = False , index = True )
2025-03-14 18:53:14 -07:00
document_type = Column ( SQLAlchemyEnum ( DocumentType ) , nullable = False )
document_metadata = Column ( JSON , nullable = True )
2025-07-24 14:43:48 -07:00
2025-03-14 18:53:14 -07:00
content = Column ( Text , nullable = False )
2025-05-28 23:52:00 -07:00
content_hash = Column ( String , nullable = False , index = True , unique = True )
2025-10-14 21:09:11 -07:00
unique_identifier_hash = Column ( String , nullable = True , index = True , unique = True )
2025-03-14 18:53:14 -07:00
embedding = Column ( Vector ( config . embedding_model_instance . dimension ) )
2025-07-24 14:43:48 -07:00
2025-11-23 15:23:31 +05:30
# BlockNote live editing state (NULL when never edited)
blocknote_document = Column ( JSONB , nullable = True )
2025-11-23 16:39:23 +05:30
2025-11-23 15:23:31 +05:30
# blocknote background reindex flag
content_needs_reindexing = Column (
Boolean , nullable = False , default = False , server_default = text ( " false " )
)
2025-11-23 16:39:23 +05:30
2025-12-12 01:32:14 -08:00
# Track when document was last updated by indexers, processors, or editor
updated_at = Column ( TIMESTAMP ( timezone = True ) , nullable = True , index = True )
2025-07-24 14:43:48 -07:00
search_space_id = Column (
Integer , ForeignKey ( " searchspaces.id " , ondelete = " CASCADE " ) , nullable = False
)
2025-03-14 18:53:14 -07:00
search_space = relationship ( " SearchSpace " , back_populates = " documents " )
2025-07-24 14:43:48 -07:00
chunks = relationship (
" Chunk " , back_populates = " document " , cascade = " all, delete-orphan "
)
2025-03-14 18:53:14 -07:00
class Chunk ( BaseModel , TimestampMixin ) :
__tablename__ = " chunks "
2025-07-24 14:43:48 -07:00
2025-03-14 18:53:14 -07:00
content = Column ( Text , nullable = False )
embedding = Column ( Vector ( config . embedding_model_instance . dimension ) )
2025-07-24 14:43:48 -07:00
document_id = Column (
Integer , ForeignKey ( " documents.id " , ondelete = " CASCADE " ) , nullable = False
)
2025-03-14 18:53:14 -07:00
document = relationship ( " Document " , back_populates = " chunks " )
2025-07-24 14:43:48 -07:00
2026-01-09 15:26:55 +02:00
class SurfsenseDocsDocument ( BaseModel , TimestampMixin ) :
"""
Surfsense documentation storage .
Indexed at migration time from MDX files .
"""
__tablename__ = " surfsense_docs_documents "
2026-01-12 14:17:15 -08:00
source = Column (
String , nullable = False , unique = True , index = True
) # File path: "connectors/slack.mdx"
2026-01-09 15:26:55 +02:00
title = Column ( String , nullable = False )
content = Column ( Text , nullable = False )
content_hash = Column ( String , nullable = False , index = True ) # For detecting changes
embedding = Column ( Vector ( config . embedding_model_instance . dimension ) )
updated_at = Column ( TIMESTAMP ( timezone = True ) , nullable = True , index = True )
chunks = relationship (
" SurfsenseDocsChunk " ,
back_populates = " document " ,
cascade = " all, delete-orphan " ,
)
2026-01-09 15:28:36 +02:00
class SurfsenseDocsChunk ( BaseModel , TimestampMixin ) :
""" Chunk storage for Surfsense documentation. """
__tablename__ = " surfsense_docs_chunks "
content = Column ( Text , nullable = False )
embedding = Column ( Vector ( config . embedding_model_instance . dimension ) )
document_id = Column (
Integer ,
ForeignKey ( " surfsense_docs_documents.id " , ondelete = " CASCADE " ) ,
nullable = False ,
)
document = relationship ( " SurfsenseDocsDocument " , back_populates = " chunks " )
2025-12-21 22:26:33 -08:00
class Podcast ( BaseModel , TimestampMixin ) :
""" Podcast model for storing generated podcasts. """
__tablename__ = " podcasts "
title = Column ( String ( 500 ) , nullable = False )
podcast_transcript = Column ( JSONB , nullable = True ) # List of transcript entries
file_location = Column ( Text , nullable = True ) # Path to the audio file
search_space_id = Column (
Integer , ForeignKey ( " searchspaces.id " , ondelete = " CASCADE " ) , nullable = False
)
search_space = relationship ( " SearchSpace " , back_populates = " podcasts " )
2025-03-14 18:53:14 -07:00
class SearchSpace ( BaseModel , TimestampMixin ) :
__tablename__ = " searchspaces "
2025-07-24 14:43:48 -07:00
2025-03-14 18:53:14 -07:00
name = Column ( String ( 100 ) , nullable = False , index = True )
description = Column ( String ( 500 ) , nullable = True )
2025-07-24 14:43:48 -07:00
2025-11-19 15:04:46 -08:00
citations_enabled = Column (
Boolean , nullable = False , default = True
) # Enable/disable citations
qna_custom_instructions = Column (
Text , nullable = True , default = " "
) # User's custom instructions
2025-11-27 22:45:04 -08:00
# Search space-level LLM preferences (shared by all members)
# Note: These can be negative IDs for global configs (from YAML) or positive IDs for custom configs (from DB)
2025-12-23 01:16:25 -08:00
agent_llm_id = Column ( Integer , nullable = True ) # For agent/chat operations
document_summary_llm_id = Column (
Integer , nullable = True
) # For document summarization
2025-11-27 22:45:04 -08:00
2025-07-24 14:43:48 -07:00
user_id = Column (
UUID ( as_uuid = True ) , ForeignKey ( " user.id " , ondelete = " CASCADE " ) , nullable = False
)
2025-03-14 18:53:14 -07:00
user = relationship ( " User " , back_populates = " search_spaces " )
2025-07-24 14:43:48 -07:00
documents = relationship (
" Document " ,
back_populates = " search_space " ,
order_by = " Document.id " ,
cascade = " all, delete-orphan " ,
)
2025-12-21 16:16:50 -08:00
new_chat_threads = relationship (
" NewChatThread " ,
back_populates = " search_space " ,
order_by = " NewChatThread.updated_at.desc() " ,
cascade = " all, delete-orphan " ,
)
2025-12-21 22:26:33 -08:00
podcasts = relationship (
" Podcast " ,
back_populates = " search_space " ,
order_by = " Podcast.id.desc() " ,
cascade = " all, delete-orphan " ,
)
2025-07-24 14:43:48 -07:00
logs = relationship (
" Log " ,
back_populates = " search_space " ,
order_by = " Log.id " ,
cascade = " all, delete-orphan " ,
)
2026-01-16 11:32:06 -08:00
notifications = relationship (
" Notification " ,
back_populates = " search_space " ,
order_by = " Notification.created_at.desc() " ,
cascade = " all, delete-orphan " ,
)
2025-10-08 21:13:01 -07:00
search_source_connectors = relationship (
" SearchSourceConnector " ,
back_populates = " search_space " ,
order_by = " SearchSourceConnector.id " ,
cascade = " all, delete-orphan " ,
)
2025-12-23 01:16:25 -08:00
new_llm_configs = relationship (
" NewLLMConfig " ,
2025-10-10 00:50:29 -07:00
back_populates = " search_space " ,
2025-12-23 01:16:25 -08:00
order_by = " NewLLMConfig.id " ,
2025-10-10 00:50:29 -07:00
cascade = " all, delete-orphan " ,
)
2025-07-24 14:43:48 -07:00
2025-11-27 22:45:04 -08:00
# RBAC relationships
roles = relationship (
" SearchSpaceRole " ,
2025-10-10 00:50:29 -07:00
back_populates = " search_space " ,
2025-11-27 22:45:04 -08:00
order_by = " SearchSpaceRole.id " ,
cascade = " all, delete-orphan " ,
)
memberships = relationship (
" SearchSpaceMembership " ,
2025-10-10 00:50:29 -07:00
back_populates = " search_space " ,
2025-11-27 22:45:04 -08:00
order_by = " SearchSpaceMembership.id " ,
cascade = " all, delete-orphan " ,
)
invites = relationship (
" SearchSpaceInvite " ,
back_populates = " search_space " ,
order_by = " SearchSpaceInvite.id " ,
2025-10-10 00:50:29 -07:00
cascade = " all, delete-orphan " ,
)
2025-07-24 14:43:48 -07:00
2025-03-14 18:53:14 -07:00
class SearchSourceConnector ( BaseModel , TimestampMixin ) :
__tablename__ = " search_source_connectors "
2025-08-02 11:47:49 -07:00
__table_args__ = (
2025-10-08 21:13:01 -07:00
UniqueConstraint (
" search_space_id " ,
" user_id " ,
" connector_type " ,
2026-01-13 13:46:01 -08:00
" name " ,
name = " uq_searchspace_user_connector_type_name " ,
2025-10-08 21:13:01 -07:00
) ,
2025-08-02 11:47:49 -07:00
)
2025-07-24 14:43:48 -07:00
2025-03-14 18:53:14 -07:00
name = Column ( String ( 100 ) , nullable = False , index = True )
2025-08-02 11:47:49 -07:00
connector_type = Column ( SQLAlchemyEnum ( SearchSourceConnectorType ) , nullable = False )
2025-03-14 18:53:14 -07:00
is_indexable = Column ( Boolean , nullable = False , default = False )
last_indexed_at = Column ( TIMESTAMP ( timezone = True ) , nullable = True )
config = Column ( JSON , nullable = False )
2025-07-24 14:43:48 -07:00
2025-10-22 16:14:25 -07:00
# Periodic indexing fields
periodic_indexing_enabled = Column ( Boolean , nullable = False , default = False )
indexing_frequency_minutes = Column ( Integer , nullable = True )
next_scheduled_at = Column ( TIMESTAMP ( timezone = True ) , nullable = True )
2025-10-08 21:13:01 -07:00
search_space_id = Column (
Integer , ForeignKey ( " searchspaces.id " , ondelete = " CASCADE " ) , nullable = False
)
search_space = relationship (
" SearchSpace " , back_populates = " search_source_connectors "
)
2025-07-24 14:43:48 -07:00
user_id = Column (
UUID ( as_uuid = True ) , ForeignKey ( " user.id " , ondelete = " CASCADE " ) , nullable = False
)
2025-03-14 18:53:14 -07:00
2025-07-24 14:43:48 -07:00
2025-12-23 01:16:25 -08:00
class NewLLMConfig ( BaseModel , TimestampMixin ) :
"""
New LLM configuration table that combines model settings with prompt configuration .
This table provides :
- LLM model configuration ( provider , model_name , api_key , etc . )
- Configurable system instructions ( defaults to SURFSENSE_SYSTEM_INSTRUCTIONS )
- Citation toggle ( enable / disable citation instructions )
Note : SURFSENSE_TOOLS_INSTRUCTIONS is always used and not configurable .
"""
__tablename__ = " new_llm_configs "
2025-07-24 14:43:48 -07:00
2025-06-09 15:50:15 -07:00
name = Column ( String ( 100 ) , nullable = False , index = True )
2025-12-23 01:16:25 -08:00
description = Column ( String ( 500 ) , nullable = True )
# === LLM Model Configuration (from original LLMConfig, excluding 'language') ===
2025-06-09 15:50:15 -07:00
# Provider from the enum
provider = Column ( SQLAlchemyEnum ( LiteLLMProvider ) , nullable = False )
# Custom provider name when provider is CUSTOM
custom_provider = Column ( String ( 100 ) , nullable = True )
# Just the model name without provider prefix
model_name = Column ( String ( 100 ) , nullable = False )
# API Key should be encrypted before storing
api_key = Column ( String , nullable = False )
api_base = Column ( String ( 500 ) , nullable = True )
# For any other parameters that litellm supports
litellm_params = Column ( JSON , nullable = True , default = { } )
2025-07-24 14:43:48 -07:00
2025-12-23 01:16:25 -08:00
# === Prompt Configuration ===
# Configurable system instructions (defaults to SURFSENSE_SYSTEM_INSTRUCTIONS)
# Users can customize this from the UI
system_instructions = Column (
Text ,
nullable = False ,
default = " " , # Empty string means use default SURFSENSE_SYSTEM_INSTRUCTIONS
)
# Whether to use the default system instructions when system_instructions is empty
use_default_system_instructions = Column ( Boolean , nullable = False , default = True )
# Citation toggle - when enabled, SURFSENSE_CITATION_INSTRUCTIONS is injected
# When disabled, an anti-citation prompt is injected instead
citations_enabled = Column ( Boolean , nullable = False , default = True )
# === Relationships ===
2025-10-10 00:50:29 -07:00
search_space_id = Column (
Integer , ForeignKey ( " searchspaces.id " , ondelete = " CASCADE " ) , nullable = False
)
2025-12-23 01:16:25 -08:00
search_space = relationship ( " SearchSpace " , back_populates = " new_llm_configs " )
2025-10-10 00:50:29 -07:00
2025-07-16 01:10:33 -07:00
class Log ( BaseModel , TimestampMixin ) :
__tablename__ = " logs "
2025-07-24 14:43:48 -07:00
2025-07-16 01:10:33 -07:00
level = Column ( SQLAlchemyEnum ( LogLevel ) , nullable = False , index = True )
status = Column ( SQLAlchemyEnum ( LogStatus ) , nullable = False , index = True )
message = Column ( Text , nullable = False )
2025-07-24 14:43:48 -07:00
source = Column (
String ( 200 ) , nullable = True , index = True
) # Service/component that generated the log
2025-07-16 01:10:33 -07:00
log_metadata = Column ( JSON , nullable = True , default = { } ) # Additional context data
2025-07-24 14:43:48 -07:00
search_space_id = Column (
Integer , ForeignKey ( " searchspaces.id " , ondelete = " CASCADE " ) , nullable = False
)
2025-07-16 01:10:33 -07:00
search_space = relationship ( " SearchSpace " , back_populates = " logs " )
2025-07-24 14:43:48 -07:00
2026-01-16 11:32:06 -08:00
class Notification ( BaseModel , TimestampMixin ) :
__tablename__ = " notifications "
user_id = Column (
UUID ( as_uuid = True ) ,
ForeignKey ( " user.id " , ondelete = " CASCADE " ) ,
nullable = False ,
index = True ,
)
search_space_id = Column (
Integer , ForeignKey ( " searchspaces.id " , ondelete = " CASCADE " ) , nullable = True
)
type = Column (
String ( 50 ) , nullable = False
) # 'connector_indexing', 'document_processing', etc.
title = Column ( String ( 200 ) , nullable = False )
message = Column ( Text , nullable = False )
read = Column (
Boolean , nullable = False , default = False , server_default = text ( " false " ) , index = True
)
notification_metadata = Column ( " metadata " , JSONB , nullable = True , default = { } )
updated_at = Column (
TIMESTAMP ( timezone = True ) ,
nullable = True ,
default = lambda : datetime . now ( UTC ) ,
onupdate = lambda : datetime . now ( UTC ) ,
index = True ,
)
user = relationship ( " User " , back_populates = " notifications " )
search_space = relationship ( " SearchSpace " , back_populates = " notifications " )
2025-11-27 22:45:04 -08:00
class SearchSpaceRole ( BaseModel , TimestampMixin ) :
"""
Custom roles that can be defined per search space .
Each search space can have multiple roles with different permission sets .
"""
__tablename__ = " search_space_roles "
__table_args__ = (
UniqueConstraint (
" search_space_id " ,
" name " ,
name = " uq_searchspace_role_name " ,
) ,
)
name = Column ( String ( 100 ) , nullable = False , index = True )
description = Column ( String ( 500 ) , nullable = True )
# List of Permission enum values (e.g., ["documents:read", "chats:create"])
permissions = Column ( ARRAY ( String ) , nullable = False , default = [ ] )
# Whether this role is assigned to new members by default when they join via invite
is_default = Column ( Boolean , nullable = False , default = False )
2026-01-20 02:59:32 -08:00
# System roles (Owner, Editor, Viewer) cannot be deleted
2025-11-27 22:45:04 -08:00
is_system_role = Column ( Boolean , nullable = False , default = False )
search_space_id = Column (
Integer , ForeignKey ( " searchspaces.id " , ondelete = " CASCADE " ) , nullable = False
)
search_space = relationship ( " SearchSpace " , back_populates = " roles " )
memberships = relationship (
" SearchSpaceMembership " , back_populates = " role " , passive_deletes = True
)
invites = relationship (
" SearchSpaceInvite " , back_populates = " role " , passive_deletes = True
)
class SearchSpaceMembership ( BaseModel , TimestampMixin ) :
"""
Tracks user membership in search spaces with their assigned role .
Each user can be a member of multiple search spaces with different roles .
"""
__tablename__ = " search_space_memberships "
2025-10-10 00:50:29 -07:00
__table_args__ = (
UniqueConstraint (
" user_id " ,
" search_space_id " ,
2025-11-27 22:45:04 -08:00
name = " uq_user_searchspace_membership " ,
2025-10-10 00:50:29 -07:00
) ,
)
2025-07-24 14:43:48 -07:00
user_id = Column (
UUID ( as_uuid = True ) , ForeignKey ( " user.id " , ondelete = " CASCADE " ) , nullable = False
)
2025-10-10 00:50:29 -07:00
search_space_id = Column (
Integer , ForeignKey ( " searchspaces.id " , ondelete = " CASCADE " ) , nullable = False
)
2025-11-27 22:45:04 -08:00
role_id = Column (
Integer ,
ForeignKey ( " search_space_roles.id " , ondelete = " SET NULL " ) ,
nullable = True ,
)
# Indicates if this user is the original creator/owner of the search space
is_owner = Column ( Boolean , nullable = False , default = False )
# Timestamp when the user joined (via invite or as creator)
joined_at = Column (
TIMESTAMP ( timezone = True ) ,
nullable = False ,
default = lambda : datetime . now ( UTC ) ,
)
# Reference to the invite used to join (null if owner/creator)
invited_by_invite_id = Column (
Integer ,
ForeignKey ( " search_space_invites.id " , ondelete = " SET NULL " ) ,
nullable = True ,
)
2025-10-10 00:50:29 -07:00
2025-11-27 22:45:04 -08:00
user = relationship ( " User " , back_populates = " search_space_memberships " )
search_space = relationship ( " SearchSpace " , back_populates = " memberships " )
role = relationship ( " SearchSpaceRole " , back_populates = " memberships " )
invited_by_invite = relationship (
" SearchSpaceInvite " , back_populates = " used_by_memberships "
)
2025-10-10 00:50:29 -07:00
2025-06-09 15:50:15 -07:00
2025-11-27 22:45:04 -08:00
class SearchSpaceInvite ( BaseModel , TimestampMixin ) :
"""
Invite links for search spaces .
Users can create invite links with specific roles that others can use to join .
"""
2025-07-24 14:43:48 -07:00
2025-11-27 22:45:04 -08:00
__tablename__ = " search_space_invites "
2025-07-24 14:43:48 -07:00
2025-11-27 22:45:04 -08:00
# Unique invite code (used in invite URLs)
invite_code = Column ( String ( 64 ) , nullable = False , unique = True , index = True )
2025-07-24 14:43:48 -07:00
search_space_id = Column (
Integer , ForeignKey ( " searchspaces.id " , ondelete = " CASCADE " ) , nullable = False
)
2025-11-27 22:45:04 -08:00
# Role to assign when invite is used (null means use default role)
role_id = Column (
Integer ,
ForeignKey ( " search_space_roles.id " , ondelete = " SET NULL " ) ,
nullable = True ,
)
# User who created this invite
created_by_id = Column (
UUID ( as_uuid = True ) ,
ForeignKey ( " user.id " , ondelete = " SET NULL " ) ,
nullable = True ,
)
# Expiration timestamp (null means never expires)
expires_at = Column ( TIMESTAMP ( timezone = True ) , nullable = True )
# Maximum number of times this invite can be used (null means unlimited)
max_uses = Column ( Integer , nullable = True )
# Number of times this invite has been used
uses_count = Column ( Integer , nullable = False , default = 0 )
# Whether this invite is currently active
is_active = Column ( Boolean , nullable = False , default = True )
# Optional custom name/label for the invite
name = Column ( String ( 100 ) , nullable = True )
search_space = relationship ( " SearchSpace " , back_populates = " invites " )
role = relationship ( " SearchSpaceRole " , back_populates = " invites " )
created_by = relationship ( " User " , back_populates = " created_invites " )
used_by_memberships = relationship (
" SearchSpaceMembership " ,
back_populates = " invited_by_invite " ,
passive_deletes = True ,
)
2025-07-16 01:10:33 -07:00
2025-07-24 14:43:48 -07:00
2025-05-21 20:56:23 -07:00
if config . AUTH_TYPE == " GOOGLE " :
2025-07-24 14:43:48 -07:00
2025-05-21 20:56:23 -07:00
class OAuthAccount ( SQLAlchemyBaseOAuthAccountTableUUID , Base ) :
pass
2025-03-14 18:53:14 -07:00
2025-05-21 20:56:23 -07:00
class User ( SQLAlchemyBaseUserTableUUID , Base ) :
oauth_accounts : Mapped [ list [ OAuthAccount ] ] = relationship (
" OAuthAccount " , lazy = " joined "
)
search_spaces = relationship ( " SearchSpace " , back_populates = " user " )
2026-01-16 11:32:06 -08:00
notifications = relationship (
" Notification " ,
back_populates = " user " ,
order_by = " Notification.created_at.desc() " ,
cascade = " all, delete-orphan " ,
)
2025-06-09 15:50:15 -07:00
2025-11-27 22:45:04 -08:00
# RBAC relationships
search_space_memberships = relationship (
" SearchSpaceMembership " ,
2025-07-24 14:43:48 -07:00
back_populates = " user " ,
cascade = " all, delete-orphan " ,
)
2025-11-27 22:45:04 -08:00
created_invites = relationship (
" SearchSpaceInvite " ,
back_populates = " created_by " ,
passive_deletes = True ,
)
2025-06-09 15:50:15 -07:00
2026-01-13 00:17:12 -08:00
# Chat threads created by this user
new_chat_threads = relationship (
" NewChatThread " ,
back_populates = " created_by " ,
passive_deletes = True ,
)
2025-10-30 14:58:08 -07:00
# Page usage tracking for ETL services
2025-12-11 00:31:58 -08:00
pages_limit = Column (
Integer ,
nullable = False ,
default = config . PAGES_LIMIT ,
server_default = str ( config . PAGES_LIMIT ) ,
)
2025-10-30 14:58:08 -07:00
pages_used = Column ( Integer , nullable = False , default = 0 , server_default = " 0 " )
2026-01-14 14:37:16 +02:00
# User profile from OAuth
display_name = Column ( String , nullable = True )
avatar_url = Column ( String , nullable = True )
2025-05-21 20:56:23 -07:00
else :
2025-03-14 18:53:14 -07:00
2025-07-24 14:43:48 -07:00
class User ( SQLAlchemyBaseUserTableUUID , Base ) :
2025-05-21 20:56:23 -07:00
search_spaces = relationship ( " SearchSpace " , back_populates = " user " )
2026-01-16 11:32:06 -08:00
notifications = relationship (
" Notification " ,
back_populates = " user " ,
order_by = " Notification.created_at.desc() " ,
cascade = " all, delete-orphan " ,
)
2025-06-09 15:50:15 -07:00
2025-11-27 22:45:04 -08:00
# RBAC relationships
search_space_memberships = relationship (
" SearchSpaceMembership " ,
2025-07-24 14:43:48 -07:00
back_populates = " user " ,
cascade = " all, delete-orphan " ,
)
2025-11-27 22:45:04 -08:00
created_invites = relationship (
" SearchSpaceInvite " ,
back_populates = " created_by " ,
passive_deletes = True ,
)
2025-06-09 15:50:15 -07:00
2026-01-13 00:17:12 -08:00
# Chat threads created by this user
new_chat_threads = relationship (
" NewChatThread " ,
back_populates = " created_by " ,
passive_deletes = True ,
)
2025-10-30 14:58:08 -07:00
# Page usage tracking for ETL services
2025-12-11 00:31:58 -08:00
pages_limit = Column (
Integer ,
nullable = False ,
default = config . PAGES_LIMIT ,
server_default = str ( config . PAGES_LIMIT ) ,
)
2025-10-30 14:58:08 -07:00
pages_used = Column ( Integer , nullable = False , default = 0 , server_default = " 0 " )
2026-01-14 14:37:16 +02:00
# User profile (can be set manually for non-OAuth users)
display_name = Column ( String , nullable = True )
avatar_url = Column ( String , nullable = True )
2025-03-14 18:53:14 -07:00
engine = create_async_engine ( DATABASE_URL )
async_session_maker = async_sessionmaker ( engine , expire_on_commit = False )
2025-07-24 14:43:48 -07:00
2025-03-14 18:53:14 -07:00
async def setup_indexes ( ) :
async with engine . begin ( ) as conn :
2025-07-24 14:43:48 -07:00
# Create indexes
2025-03-14 18:53:14 -07:00
# Document Summary Indexes
2025-07-24 14:43:48 -07:00
await conn . execute (
text (
" CREATE INDEX IF NOT EXISTS document_vector_index ON documents USING hnsw (embedding public.vector_cosine_ops) "
)
)
await conn . execute (
text (
" CREATE INDEX IF NOT EXISTS document_search_index ON documents USING gin (to_tsvector( ' english ' , content)) "
)
)
2025-03-14 18:53:14 -07:00
# Document Chuck Indexes
2025-07-24 14:43:48 -07:00
await conn . execute (
text (
" CREATE INDEX IF NOT EXISTS chucks_vector_index ON chunks USING hnsw (embedding public.vector_cosine_ops) "
)
)
await conn . execute (
text (
" CREATE INDEX IF NOT EXISTS chucks_search_index ON chunks USING gin (to_tsvector( ' english ' , content)) "
)
)
2026-01-17 20:45:10 +05:30
# pg_trgm indexes for efficient ILIKE '%term%' searches on titles
# Critical for document mention picker (@mentions) to scale
await conn . execute (
text (
" CREATE INDEX IF NOT EXISTS idx_documents_title_trgm ON documents USING gin (title gin_trgm_ops) "
)
)
# B-tree index on search_space_id for fast filtering
await conn . execute (
text (
" CREATE INDEX IF NOT EXISTS idx_documents_search_space_id ON documents (search_space_id) "
)
)
# Covering index for "recent documents" query - enables index-only scan
await conn . execute (
text (
" CREATE INDEX IF NOT EXISTS idx_documents_search_space_updated ON documents (search_space_id, updated_at DESC NULLS LAST) INCLUDE (id, title, document_type) "
)
)
await conn . execute (
text (
" CREATE INDEX IF NOT EXISTS idx_surfsense_docs_title_trgm ON surfsense_docs_documents USING gin (title gin_trgm_ops) "
)
)
2025-07-24 14:43:48 -07:00
2025-03-14 18:53:14 -07:00
async def create_db_and_tables ( ) :
async with engine . begin ( ) as conn :
2025-07-24 14:43:48 -07:00
await conn . execute ( text ( " CREATE EXTENSION IF NOT EXISTS vector " ) )
2026-01-17 20:45:10 +05:30
await conn . execute ( text ( " CREATE EXTENSION IF NOT EXISTS pg_trgm " ) )
2025-03-14 18:53:14 -07:00
await conn . run_sync ( Base . metadata . create_all )
await setup_indexes ( )
async def get_async_session ( ) - > AsyncGenerator [ AsyncSession , None ] :
async with async_session_maker ( ) as session :
yield session
2025-05-21 20:56:23 -07:00
if config . AUTH_TYPE == " GOOGLE " :
2025-07-24 14:43:48 -07:00
2025-05-21 20:56:23 -07:00
async def get_user_db ( session : AsyncSession = Depends ( get_async_session ) ) :
yield SQLAlchemyUserDatabase ( session , User , OAuthAccount )
2025-07-24 11:33:38 +02:00
2025-05-21 20:56:23 -07:00
else :
2025-07-24 14:43:48 -07:00
2025-05-21 20:56:23 -07:00
async def get_user_db ( session : AsyncSession = Depends ( get_async_session ) ) :
yield SQLAlchemyUserDatabase ( session , User )
2025-07-24 14:43:48 -07:00
2025-11-27 22:45:04 -08:00
def has_permission ( user_permissions : list [ str ] , required_permission : str ) - > bool :
"""
Check if the user has the required permission .
Supports wildcard ( * ) for full access .
Args :
user_permissions : List of permission strings the user has
required_permission : The permission string to check for
Returns :
True if user has the permission , False otherwise
"""
if not user_permissions :
return False
# Full access wildcard grants all permissions
if Permission . FULL_ACCESS . value in user_permissions :
return True
return required_permission in user_permissions
def has_any_permission (
user_permissions : list [ str ] , required_permissions : list [ str ]
) - > bool :
"""
Check if the user has any of the required permissions .
Args :
user_permissions : List of permission strings the user has
required_permissions : List of permission strings to check for ( any match )
Returns :
True if user has at least one of the permissions , False otherwise
"""
if not user_permissions :
return False
if Permission . FULL_ACCESS . value in user_permissions :
return True
return any ( perm in user_permissions for perm in required_permissions )
def has_all_permissions (
user_permissions : list [ str ] , required_permissions : list [ str ]
) - > bool :
"""
Check if the user has all of the required permissions .
Args :
user_permissions : List of permission strings the user has
required_permissions : List of permission strings to check for ( all must match )
Returns :
True if user has all of the permissions , False otherwise
"""
if not user_permissions :
return False
if Permission . FULL_ACCESS . value in user_permissions :
return True
return all ( perm in user_permissions for perm in required_permissions )
def get_default_roles_config ( ) - > list [ dict ] :
"""
Get the configuration for default system roles .
These roles are created automatically when a search space is created .
2026-01-20 02:59:32 -08:00
Only 3 roles are supported :
- Owner : Full access to everything ( assigned to search space creator )
- Editor : Can create / update content but cannot delete , manage roles , or change settings
- Viewer : Read - only access to resources ( can add comments )
2025-11-27 22:45:04 -08:00
Returns :
List of role configurations with name , description , permissions , and flags
"""
return [
{
" name " : " Owner " ,
" description " : " Full access to all search space resources and settings " ,
" permissions " : DEFAULT_ROLE_PERMISSIONS [ " Owner " ] ,
" is_default " : False ,
" is_system_role " : True ,
} ,
{
" name " : " Editor " ,
2026-01-20 02:59:32 -08:00
" description " : " Can create and update content (no delete, role management, or settings access) " ,
2025-11-27 22:45:04 -08:00
" permissions " : DEFAULT_ROLE_PERMISSIONS [ " Editor " ] ,
" is_default " : True , # Default role for new members via invite
" is_system_role " : True ,
} ,
{
" name " : " Viewer " ,
" description " : " Read-only access to search space resources " ,
" permissions " : DEFAULT_ROLE_PERMISSIONS [ " Viewer " ] ,
" is_default " : False ,
" is_system_role " : True ,
} ,
]