mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-25 00:36:31 +02:00
- Introduced a new notifications table in the database schema to manage user notifications. - Implemented Electric SQL replication setup for the notifications table, ensuring real-time synchronization. - Updated existing database functions to support real-time updates for connectors and documents using Electric SQL. - Refactored UI components to utilize new hooks for fetching connectors and documents, enhancing performance and user experience.
1111 lines
37 KiB
Python
1111 lines
37 KiB
Python
from collections.abc import AsyncGenerator
|
|
from datetime import UTC, datetime
|
|
from enum import Enum
|
|
|
|
from fastapi import Depends
|
|
from fastapi_users.db import SQLAlchemyBaseUserTableUUID, SQLAlchemyUserDatabase
|
|
from pgvector.sqlalchemy import Vector
|
|
from sqlalchemy import (
|
|
ARRAY,
|
|
JSON,
|
|
TIMESTAMP,
|
|
Boolean,
|
|
Column,
|
|
Enum as SQLAlchemyEnum,
|
|
ForeignKey,
|
|
Integer,
|
|
String,
|
|
Text,
|
|
UniqueConstraint,
|
|
text,
|
|
)
|
|
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
|
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
|
|
from sqlalchemy.orm import DeclarativeBase, Mapped, declared_attr, relationship
|
|
|
|
from app.config import config
|
|
|
|
if config.AUTH_TYPE == "GOOGLE":
|
|
from fastapi_users.db import SQLAlchemyBaseOAuthAccountTableUUID
|
|
|
|
DATABASE_URL = config.DATABASE_URL
|
|
|
|
|
|
class DocumentType(str, Enum):
|
|
EXTENSION = "EXTENSION"
|
|
CRAWLED_URL = "CRAWLED_URL"
|
|
FILE = "FILE"
|
|
SLACK_CONNECTOR = "SLACK_CONNECTOR"
|
|
TEAMS_CONNECTOR = "TEAMS_CONNECTOR"
|
|
NOTION_CONNECTOR = "NOTION_CONNECTOR"
|
|
YOUTUBE_VIDEO = "YOUTUBE_VIDEO"
|
|
GITHUB_CONNECTOR = "GITHUB_CONNECTOR"
|
|
LINEAR_CONNECTOR = "LINEAR_CONNECTOR"
|
|
DISCORD_CONNECTOR = "DISCORD_CONNECTOR"
|
|
JIRA_CONNECTOR = "JIRA_CONNECTOR"
|
|
CONFLUENCE_CONNECTOR = "CONFLUENCE_CONNECTOR"
|
|
CLICKUP_CONNECTOR = "CLICKUP_CONNECTOR"
|
|
GOOGLE_CALENDAR_CONNECTOR = "GOOGLE_CALENDAR_CONNECTOR"
|
|
GOOGLE_GMAIL_CONNECTOR = "GOOGLE_GMAIL_CONNECTOR"
|
|
GOOGLE_DRIVE_FILE = "GOOGLE_DRIVE_FILE"
|
|
AIRTABLE_CONNECTOR = "AIRTABLE_CONNECTOR"
|
|
LUMA_CONNECTOR = "LUMA_CONNECTOR"
|
|
ELASTICSEARCH_CONNECTOR = "ELASTICSEARCH_CONNECTOR"
|
|
BOOKSTACK_CONNECTOR = "BOOKSTACK_CONNECTOR"
|
|
CIRCLEBACK = "CIRCLEBACK"
|
|
NOTE = "NOTE"
|
|
|
|
|
|
class SearchSourceConnectorType(str, Enum):
|
|
SERPER_API = "SERPER_API" # NOT IMPLEMENTED YET : DON'T REMEMBER WHY : MOST PROBABLY BECAUSE WE NEED TO CRAWL THE RESULTS RETURNED BY IT
|
|
TAVILY_API = "TAVILY_API"
|
|
SEARXNG_API = "SEARXNG_API"
|
|
LINKUP_API = "LINKUP_API"
|
|
BAIDU_SEARCH_API = "BAIDU_SEARCH_API" # Baidu AI Search API for Chinese web search
|
|
SLACK_CONNECTOR = "SLACK_CONNECTOR"
|
|
TEAMS_CONNECTOR = "TEAMS_CONNECTOR"
|
|
NOTION_CONNECTOR = "NOTION_CONNECTOR"
|
|
GITHUB_CONNECTOR = "GITHUB_CONNECTOR"
|
|
LINEAR_CONNECTOR = "LINEAR_CONNECTOR"
|
|
DISCORD_CONNECTOR = "DISCORD_CONNECTOR"
|
|
JIRA_CONNECTOR = "JIRA_CONNECTOR"
|
|
CONFLUENCE_CONNECTOR = "CONFLUENCE_CONNECTOR"
|
|
CLICKUP_CONNECTOR = "CLICKUP_CONNECTOR"
|
|
GOOGLE_CALENDAR_CONNECTOR = "GOOGLE_CALENDAR_CONNECTOR"
|
|
GOOGLE_GMAIL_CONNECTOR = "GOOGLE_GMAIL_CONNECTOR"
|
|
GOOGLE_DRIVE_CONNECTOR = "GOOGLE_DRIVE_CONNECTOR"
|
|
AIRTABLE_CONNECTOR = "AIRTABLE_CONNECTOR"
|
|
LUMA_CONNECTOR = "LUMA_CONNECTOR"
|
|
ELASTICSEARCH_CONNECTOR = "ELASTICSEARCH_CONNECTOR"
|
|
WEBCRAWLER_CONNECTOR = "WEBCRAWLER_CONNECTOR"
|
|
BOOKSTACK_CONNECTOR = "BOOKSTACK_CONNECTOR"
|
|
CIRCLEBACK_CONNECTOR = "CIRCLEBACK_CONNECTOR"
|
|
|
|
|
|
class LiteLLMProvider(str, Enum):
|
|
"""
|
|
Enum for LLM providers supported by LiteLLM.
|
|
"""
|
|
|
|
OPENAI = "OPENAI"
|
|
ANTHROPIC = "ANTHROPIC"
|
|
GOOGLE = "GOOGLE"
|
|
AZURE_OPENAI = "AZURE_OPENAI"
|
|
BEDROCK = "BEDROCK"
|
|
VERTEX_AI = "VERTEX_AI"
|
|
GROQ = "GROQ"
|
|
COHERE = "COHERE"
|
|
MISTRAL = "MISTRAL"
|
|
DEEPSEEK = "DEEPSEEK"
|
|
XAI = "XAI"
|
|
OPENROUTER = "OPENROUTER"
|
|
TOGETHER_AI = "TOGETHER_AI"
|
|
FIREWORKS_AI = "FIREWORKS_AI"
|
|
REPLICATE = "REPLICATE"
|
|
PERPLEXITY = "PERPLEXITY"
|
|
OLLAMA = "OLLAMA"
|
|
ALIBABA_QWEN = "ALIBABA_QWEN"
|
|
MOONSHOT = "MOONSHOT"
|
|
ZHIPU = "ZHIPU"
|
|
ANYSCALE = "ANYSCALE"
|
|
DEEPINFRA = "DEEPINFRA"
|
|
CEREBRAS = "CEREBRAS"
|
|
SAMBANOVA = "SAMBANOVA"
|
|
AI21 = "AI21"
|
|
CLOUDFLARE = "CLOUDFLARE"
|
|
DATABRICKS = "DATABRICKS"
|
|
COMETAPI = "COMETAPI"
|
|
HUGGINGFACE = "HUGGINGFACE"
|
|
CUSTOM = "CUSTOM"
|
|
|
|
|
|
class LogLevel(str, Enum):
|
|
DEBUG = "DEBUG"
|
|
INFO = "INFO"
|
|
WARNING = "WARNING"
|
|
ERROR = "ERROR"
|
|
CRITICAL = "CRITICAL"
|
|
|
|
|
|
class LogStatus(str, Enum):
|
|
IN_PROGRESS = "IN_PROGRESS"
|
|
SUCCESS = "SUCCESS"
|
|
FAILED = "FAILED"
|
|
|
|
|
|
class Permission(str, Enum):
|
|
"""
|
|
Granular permissions for search space resources.
|
|
Use '*' (FULL_ACCESS) to grant all permissions.
|
|
"""
|
|
|
|
# Documents
|
|
DOCUMENTS_CREATE = "documents:create"
|
|
DOCUMENTS_READ = "documents:read"
|
|
DOCUMENTS_UPDATE = "documents:update"
|
|
DOCUMENTS_DELETE = "documents:delete"
|
|
|
|
# Chats
|
|
CHATS_CREATE = "chats:create"
|
|
CHATS_READ = "chats:read"
|
|
CHATS_UPDATE = "chats:update"
|
|
CHATS_DELETE = "chats:delete"
|
|
|
|
# LLM Configs
|
|
LLM_CONFIGS_CREATE = "llm_configs:create"
|
|
LLM_CONFIGS_READ = "llm_configs:read"
|
|
LLM_CONFIGS_UPDATE = "llm_configs:update"
|
|
LLM_CONFIGS_DELETE = "llm_configs:delete"
|
|
|
|
# Podcasts
|
|
PODCASTS_CREATE = "podcasts:create"
|
|
PODCASTS_READ = "podcasts:read"
|
|
PODCASTS_UPDATE = "podcasts:update"
|
|
PODCASTS_DELETE = "podcasts:delete"
|
|
|
|
# Connectors
|
|
CONNECTORS_CREATE = "connectors:create"
|
|
CONNECTORS_READ = "connectors:read"
|
|
CONNECTORS_UPDATE = "connectors:update"
|
|
CONNECTORS_DELETE = "connectors:delete"
|
|
|
|
# Logs
|
|
LOGS_READ = "logs:read"
|
|
LOGS_DELETE = "logs:delete"
|
|
|
|
# Members
|
|
MEMBERS_INVITE = "members:invite"
|
|
MEMBERS_VIEW = "members:view"
|
|
MEMBERS_REMOVE = "members:remove"
|
|
MEMBERS_MANAGE_ROLES = "members:manage_roles"
|
|
|
|
# Roles
|
|
ROLES_CREATE = "roles:create"
|
|
ROLES_READ = "roles:read"
|
|
ROLES_UPDATE = "roles:update"
|
|
ROLES_DELETE = "roles:delete"
|
|
|
|
# Search Space Settings
|
|
SETTINGS_VIEW = "settings:view"
|
|
SETTINGS_UPDATE = "settings:update"
|
|
SETTINGS_DELETE = "settings:delete" # Delete the entire search space
|
|
|
|
# Full access wildcard
|
|
FULL_ACCESS = "*"
|
|
|
|
|
|
# Predefined role permission sets for convenience
|
|
DEFAULT_ROLE_PERMISSIONS = {
|
|
"Owner": [Permission.FULL_ACCESS.value],
|
|
"Admin": [
|
|
# Documents
|
|
Permission.DOCUMENTS_CREATE.value,
|
|
Permission.DOCUMENTS_READ.value,
|
|
Permission.DOCUMENTS_UPDATE.value,
|
|
Permission.DOCUMENTS_DELETE.value,
|
|
# Chats
|
|
Permission.CHATS_CREATE.value,
|
|
Permission.CHATS_READ.value,
|
|
Permission.CHATS_UPDATE.value,
|
|
Permission.CHATS_DELETE.value,
|
|
# LLM Configs
|
|
Permission.LLM_CONFIGS_CREATE.value,
|
|
Permission.LLM_CONFIGS_READ.value,
|
|
Permission.LLM_CONFIGS_UPDATE.value,
|
|
Permission.LLM_CONFIGS_DELETE.value,
|
|
# Podcasts
|
|
Permission.PODCASTS_CREATE.value,
|
|
Permission.PODCASTS_READ.value,
|
|
Permission.PODCASTS_UPDATE.value,
|
|
Permission.PODCASTS_DELETE.value,
|
|
# Connectors
|
|
Permission.CONNECTORS_CREATE.value,
|
|
Permission.CONNECTORS_READ.value,
|
|
Permission.CONNECTORS_UPDATE.value,
|
|
Permission.CONNECTORS_DELETE.value,
|
|
# Logs
|
|
Permission.LOGS_READ.value,
|
|
Permission.LOGS_DELETE.value,
|
|
# Members
|
|
Permission.MEMBERS_INVITE.value,
|
|
Permission.MEMBERS_VIEW.value,
|
|
Permission.MEMBERS_REMOVE.value,
|
|
Permission.MEMBERS_MANAGE_ROLES.value,
|
|
# Roles
|
|
Permission.ROLES_CREATE.value,
|
|
Permission.ROLES_READ.value,
|
|
Permission.ROLES_UPDATE.value,
|
|
Permission.ROLES_DELETE.value,
|
|
# Settings (no delete)
|
|
Permission.SETTINGS_VIEW.value,
|
|
Permission.SETTINGS_UPDATE.value,
|
|
],
|
|
"Editor": [
|
|
# Documents
|
|
Permission.DOCUMENTS_CREATE.value,
|
|
Permission.DOCUMENTS_READ.value,
|
|
Permission.DOCUMENTS_UPDATE.value,
|
|
Permission.DOCUMENTS_DELETE.value,
|
|
# Chats
|
|
Permission.CHATS_CREATE.value,
|
|
Permission.CHATS_READ.value,
|
|
Permission.CHATS_UPDATE.value,
|
|
Permission.CHATS_DELETE.value,
|
|
# LLM Configs (read only)
|
|
Permission.LLM_CONFIGS_READ.value,
|
|
Permission.LLM_CONFIGS_CREATE.value,
|
|
Permission.LLM_CONFIGS_UPDATE.value,
|
|
# Podcasts
|
|
Permission.PODCASTS_CREATE.value,
|
|
Permission.PODCASTS_READ.value,
|
|
Permission.PODCASTS_UPDATE.value,
|
|
Permission.PODCASTS_DELETE.value,
|
|
# Connectors (full access for editors)
|
|
Permission.CONNECTORS_CREATE.value,
|
|
Permission.CONNECTORS_READ.value,
|
|
Permission.CONNECTORS_UPDATE.value,
|
|
# Logs
|
|
Permission.LOGS_READ.value,
|
|
# Members (view only)
|
|
Permission.MEMBERS_VIEW.value,
|
|
# Roles (read only)
|
|
Permission.ROLES_READ.value,
|
|
# Settings (view only)
|
|
Permission.SETTINGS_VIEW.value,
|
|
],
|
|
"Viewer": [
|
|
# Documents (read only)
|
|
Permission.DOCUMENTS_READ.value,
|
|
# Chats (read only)
|
|
Permission.CHATS_READ.value,
|
|
# LLM Configs (read only)
|
|
Permission.LLM_CONFIGS_READ.value,
|
|
# Podcasts (read only)
|
|
Permission.PODCASTS_READ.value,
|
|
# Connectors (read only)
|
|
Permission.CONNECTORS_READ.value,
|
|
# Logs (read only)
|
|
Permission.LOGS_READ.value,
|
|
# Members (view only)
|
|
Permission.MEMBERS_VIEW.value,
|
|
# Roles (read only)
|
|
Permission.ROLES_READ.value,
|
|
# Settings (view only)
|
|
Permission.SETTINGS_VIEW.value,
|
|
],
|
|
}
|
|
|
|
|
|
class Base(DeclarativeBase):
|
|
pass
|
|
|
|
|
|
class TimestampMixin:
|
|
@declared_attr
|
|
def created_at(cls): # noqa: N805
|
|
return Column(
|
|
TIMESTAMP(timezone=True),
|
|
nullable=False,
|
|
default=lambda: datetime.now(UTC),
|
|
index=True,
|
|
)
|
|
|
|
|
|
class BaseModel(Base):
|
|
__abstract__ = True
|
|
__allow_unmapped__ = True
|
|
|
|
id = Column(Integer, primary_key=True, index=True)
|
|
|
|
|
|
class NewChatMessageRole(str, Enum):
|
|
"""Role enum for new chat messages."""
|
|
|
|
USER = "user"
|
|
ASSISTANT = "assistant"
|
|
SYSTEM = "system"
|
|
|
|
|
|
class NewChatThread(BaseModel, TimestampMixin):
|
|
"""
|
|
Thread model for the new chat feature using assistant-ui.
|
|
Each thread represents a conversation with message history.
|
|
LangGraph checkpointer uses thread_id for state persistence.
|
|
"""
|
|
|
|
__tablename__ = "new_chat_threads"
|
|
|
|
title = Column(String(500), nullable=False, default="New Chat", index=True)
|
|
archived = Column(Boolean, nullable=False, default=False)
|
|
updated_at = Column(
|
|
TIMESTAMP(timezone=True),
|
|
nullable=False,
|
|
default=lambda: datetime.now(UTC),
|
|
onupdate=lambda: datetime.now(UTC),
|
|
index=True,
|
|
)
|
|
|
|
# Foreign keys
|
|
search_space_id = Column(
|
|
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
|
|
)
|
|
|
|
# Relationships
|
|
search_space = relationship("SearchSpace", back_populates="new_chat_threads")
|
|
messages = relationship(
|
|
"NewChatMessage",
|
|
back_populates="thread",
|
|
order_by="NewChatMessage.created_at",
|
|
cascade="all, delete-orphan",
|
|
)
|
|
|
|
|
|
class NewChatMessage(BaseModel, TimestampMixin):
|
|
"""
|
|
Message model for the new chat feature.
|
|
Stores individual messages in assistant-ui format.
|
|
"""
|
|
|
|
__tablename__ = "new_chat_messages"
|
|
|
|
role = Column(SQLAlchemyEnum(NewChatMessageRole), nullable=False)
|
|
# Content stored as JSONB to support rich content (text, tool calls, etc.)
|
|
content = Column(JSONB, nullable=False)
|
|
|
|
# Foreign key to thread
|
|
thread_id = Column(
|
|
Integer,
|
|
ForeignKey("new_chat_threads.id", ondelete="CASCADE"),
|
|
nullable=False,
|
|
index=True,
|
|
)
|
|
|
|
# Relationship
|
|
thread = relationship("NewChatThread", back_populates="messages")
|
|
|
|
|
|
class Document(BaseModel, TimestampMixin):
|
|
__tablename__ = "documents"
|
|
|
|
title = Column(String, nullable=False, index=True)
|
|
document_type = Column(SQLAlchemyEnum(DocumentType), nullable=False)
|
|
document_metadata = Column(JSON, nullable=True)
|
|
|
|
content = Column(Text, nullable=False)
|
|
content_hash = Column(String, nullable=False, index=True, unique=True)
|
|
unique_identifier_hash = Column(String, nullable=True, index=True, unique=True)
|
|
embedding = Column(Vector(config.embedding_model_instance.dimension))
|
|
|
|
# BlockNote live editing state (NULL when never edited)
|
|
blocknote_document = Column(JSONB, nullable=True)
|
|
|
|
# blocknote background reindex flag
|
|
content_needs_reindexing = Column(
|
|
Boolean, nullable=False, default=False, server_default=text("false")
|
|
)
|
|
|
|
# Track when document was last updated by indexers, processors, or editor
|
|
updated_at = Column(TIMESTAMP(timezone=True), nullable=True, index=True)
|
|
|
|
search_space_id = Column(
|
|
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
|
|
)
|
|
search_space = relationship("SearchSpace", back_populates="documents")
|
|
chunks = relationship(
|
|
"Chunk", back_populates="document", cascade="all, delete-orphan"
|
|
)
|
|
|
|
|
|
class Chunk(BaseModel, TimestampMixin):
|
|
__tablename__ = "chunks"
|
|
|
|
content = Column(Text, nullable=False)
|
|
embedding = Column(Vector(config.embedding_model_instance.dimension))
|
|
|
|
document_id = Column(
|
|
Integer, ForeignKey("documents.id", ondelete="CASCADE"), nullable=False
|
|
)
|
|
document = relationship("Document", back_populates="chunks")
|
|
|
|
|
|
class SurfsenseDocsDocument(BaseModel, TimestampMixin):
|
|
"""
|
|
Surfsense documentation storage.
|
|
Indexed at migration time from MDX files.
|
|
"""
|
|
|
|
__tablename__ = "surfsense_docs_documents"
|
|
|
|
source = Column(
|
|
String, nullable=False, unique=True, index=True
|
|
) # File path: "connectors/slack.mdx"
|
|
title = Column(String, nullable=False)
|
|
content = Column(Text, nullable=False)
|
|
content_hash = Column(String, nullable=False, index=True) # For detecting changes
|
|
embedding = Column(Vector(config.embedding_model_instance.dimension))
|
|
updated_at = Column(TIMESTAMP(timezone=True), nullable=True, index=True)
|
|
|
|
chunks = relationship(
|
|
"SurfsenseDocsChunk",
|
|
back_populates="document",
|
|
cascade="all, delete-orphan",
|
|
)
|
|
|
|
|
|
class SurfsenseDocsChunk(BaseModel, TimestampMixin):
|
|
"""Chunk storage for Surfsense documentation."""
|
|
|
|
__tablename__ = "surfsense_docs_chunks"
|
|
|
|
content = Column(Text, nullable=False)
|
|
embedding = Column(Vector(config.embedding_model_instance.dimension))
|
|
|
|
document_id = Column(
|
|
Integer,
|
|
ForeignKey("surfsense_docs_documents.id", ondelete="CASCADE"),
|
|
nullable=False,
|
|
)
|
|
document = relationship("SurfsenseDocsDocument", back_populates="chunks")
|
|
|
|
|
|
class Podcast(BaseModel, TimestampMixin):
|
|
"""Podcast model for storing generated podcasts."""
|
|
|
|
__tablename__ = "podcasts"
|
|
|
|
title = Column(String(500), nullable=False)
|
|
podcast_transcript = Column(JSONB, nullable=True) # List of transcript entries
|
|
file_location = Column(Text, nullable=True) # Path to the audio file
|
|
|
|
search_space_id = Column(
|
|
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
|
|
)
|
|
search_space = relationship("SearchSpace", back_populates="podcasts")
|
|
|
|
|
|
class SearchSpace(BaseModel, TimestampMixin):
|
|
__tablename__ = "searchspaces"
|
|
|
|
name = Column(String(100), nullable=False, index=True)
|
|
description = Column(String(500), nullable=True)
|
|
|
|
citations_enabled = Column(
|
|
Boolean, nullable=False, default=True
|
|
) # Enable/disable citations
|
|
qna_custom_instructions = Column(
|
|
Text, nullable=True, default=""
|
|
) # User's custom instructions
|
|
|
|
# Search space-level LLM preferences (shared by all members)
|
|
# Note: These can be negative IDs for global configs (from YAML) or positive IDs for custom configs (from DB)
|
|
agent_llm_id = Column(Integer, nullable=True) # For agent/chat operations
|
|
document_summary_llm_id = Column(
|
|
Integer, nullable=True
|
|
) # For document summarization
|
|
|
|
user_id = Column(
|
|
UUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=False
|
|
)
|
|
user = relationship("User", back_populates="search_spaces")
|
|
|
|
documents = relationship(
|
|
"Document",
|
|
back_populates="search_space",
|
|
order_by="Document.id",
|
|
cascade="all, delete-orphan",
|
|
)
|
|
new_chat_threads = relationship(
|
|
"NewChatThread",
|
|
back_populates="search_space",
|
|
order_by="NewChatThread.updated_at.desc()",
|
|
cascade="all, delete-orphan",
|
|
)
|
|
podcasts = relationship(
|
|
"Podcast",
|
|
back_populates="search_space",
|
|
order_by="Podcast.id.desc()",
|
|
cascade="all, delete-orphan",
|
|
)
|
|
logs = relationship(
|
|
"Log",
|
|
back_populates="search_space",
|
|
order_by="Log.id",
|
|
cascade="all, delete-orphan",
|
|
)
|
|
notifications = relationship(
|
|
"Notification",
|
|
back_populates="search_space",
|
|
order_by="Notification.created_at.desc()",
|
|
cascade="all, delete-orphan",
|
|
)
|
|
search_source_connectors = relationship(
|
|
"SearchSourceConnector",
|
|
back_populates="search_space",
|
|
order_by="SearchSourceConnector.id",
|
|
cascade="all, delete-orphan",
|
|
)
|
|
new_llm_configs = relationship(
|
|
"NewLLMConfig",
|
|
back_populates="search_space",
|
|
order_by="NewLLMConfig.id",
|
|
cascade="all, delete-orphan",
|
|
)
|
|
|
|
# RBAC relationships
|
|
roles = relationship(
|
|
"SearchSpaceRole",
|
|
back_populates="search_space",
|
|
order_by="SearchSpaceRole.id",
|
|
cascade="all, delete-orphan",
|
|
)
|
|
memberships = relationship(
|
|
"SearchSpaceMembership",
|
|
back_populates="search_space",
|
|
order_by="SearchSpaceMembership.id",
|
|
cascade="all, delete-orphan",
|
|
)
|
|
invites = relationship(
|
|
"SearchSpaceInvite",
|
|
back_populates="search_space",
|
|
order_by="SearchSpaceInvite.id",
|
|
cascade="all, delete-orphan",
|
|
)
|
|
|
|
|
|
class SearchSourceConnector(BaseModel, TimestampMixin):
|
|
__tablename__ = "search_source_connectors"
|
|
__table_args__ = (
|
|
UniqueConstraint(
|
|
"search_space_id",
|
|
"user_id",
|
|
"connector_type",
|
|
name="uq_searchspace_user_connector_type",
|
|
),
|
|
)
|
|
|
|
name = Column(String(100), nullable=False, index=True)
|
|
connector_type = Column(SQLAlchemyEnum(SearchSourceConnectorType), nullable=False)
|
|
is_indexable = Column(Boolean, nullable=False, default=False)
|
|
last_indexed_at = Column(TIMESTAMP(timezone=True), nullable=True)
|
|
config = Column(JSON, nullable=False)
|
|
|
|
# Periodic indexing fields
|
|
periodic_indexing_enabled = Column(Boolean, nullable=False, default=False)
|
|
indexing_frequency_minutes = Column(Integer, nullable=True)
|
|
next_scheduled_at = Column(TIMESTAMP(timezone=True), nullable=True)
|
|
|
|
search_space_id = Column(
|
|
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
|
|
)
|
|
search_space = relationship(
|
|
"SearchSpace", back_populates="search_source_connectors"
|
|
)
|
|
|
|
user_id = Column(
|
|
UUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=False
|
|
)
|
|
|
|
|
|
class NewLLMConfig(BaseModel, TimestampMixin):
|
|
"""
|
|
New LLM configuration table that combines model settings with prompt configuration.
|
|
|
|
This table provides:
|
|
- LLM model configuration (provider, model_name, api_key, etc.)
|
|
- Configurable system instructions (defaults to SURFSENSE_SYSTEM_INSTRUCTIONS)
|
|
- Citation toggle (enable/disable citation instructions)
|
|
|
|
Note: SURFSENSE_TOOLS_INSTRUCTIONS is always used and not configurable.
|
|
"""
|
|
|
|
__tablename__ = "new_llm_configs"
|
|
|
|
name = Column(String(100), nullable=False, index=True)
|
|
description = Column(String(500), nullable=True)
|
|
|
|
# === LLM Model Configuration (from original LLMConfig, excluding 'language') ===
|
|
# Provider from the enum
|
|
provider = Column(SQLAlchemyEnum(LiteLLMProvider), nullable=False)
|
|
# Custom provider name when provider is CUSTOM
|
|
custom_provider = Column(String(100), nullable=True)
|
|
# Just the model name without provider prefix
|
|
model_name = Column(String(100), nullable=False)
|
|
# API Key should be encrypted before storing
|
|
api_key = Column(String, nullable=False)
|
|
api_base = Column(String(500), nullable=True)
|
|
# For any other parameters that litellm supports
|
|
litellm_params = Column(JSON, nullable=True, default={})
|
|
|
|
# === Prompt Configuration ===
|
|
# Configurable system instructions (defaults to SURFSENSE_SYSTEM_INSTRUCTIONS)
|
|
# Users can customize this from the UI
|
|
system_instructions = Column(
|
|
Text,
|
|
nullable=False,
|
|
default="", # Empty string means use default SURFSENSE_SYSTEM_INSTRUCTIONS
|
|
)
|
|
# Whether to use the default system instructions when system_instructions is empty
|
|
use_default_system_instructions = Column(Boolean, nullable=False, default=True)
|
|
|
|
# Citation toggle - when enabled, SURFSENSE_CITATION_INSTRUCTIONS is injected
|
|
# When disabled, an anti-citation prompt is injected instead
|
|
citations_enabled = Column(Boolean, nullable=False, default=True)
|
|
|
|
# === Relationships ===
|
|
search_space_id = Column(
|
|
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
|
|
)
|
|
search_space = relationship("SearchSpace", back_populates="new_llm_configs")
|
|
|
|
|
|
class Log(BaseModel, TimestampMixin):
|
|
__tablename__ = "logs"
|
|
|
|
level = Column(SQLAlchemyEnum(LogLevel), nullable=False, index=True)
|
|
status = Column(SQLAlchemyEnum(LogStatus), nullable=False, index=True)
|
|
message = Column(Text, nullable=False)
|
|
source = Column(
|
|
String(200), nullable=True, index=True
|
|
) # Service/component that generated the log
|
|
log_metadata = Column(JSON, nullable=True, default={}) # Additional context data
|
|
|
|
search_space_id = Column(
|
|
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
|
|
)
|
|
search_space = relationship("SearchSpace", back_populates="logs")
|
|
|
|
|
|
class Notification(BaseModel, TimestampMixin):
|
|
__tablename__ = "notifications"
|
|
|
|
user_id = Column(
|
|
UUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=False, index=True
|
|
)
|
|
search_space_id = Column(
|
|
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=True
|
|
)
|
|
type = Column(String(50), nullable=False) # 'document_processed', 'connector_indexed', 'user_mentioned', etc.
|
|
title = Column(String(200), nullable=False)
|
|
message = Column(Text, nullable=False)
|
|
read = Column(Boolean, nullable=False, default=False, server_default=text("false"), index=True)
|
|
notification_metadata = Column("metadata", JSONB, nullable=True, default={})
|
|
|
|
user = relationship("User", back_populates="notifications")
|
|
search_space = relationship("SearchSpace", back_populates="notifications")
|
|
|
|
|
|
class SearchSpaceRole(BaseModel, TimestampMixin):
|
|
"""
|
|
Custom roles that can be defined per search space.
|
|
Each search space can have multiple roles with different permission sets.
|
|
"""
|
|
|
|
__tablename__ = "search_space_roles"
|
|
__table_args__ = (
|
|
UniqueConstraint(
|
|
"search_space_id",
|
|
"name",
|
|
name="uq_searchspace_role_name",
|
|
),
|
|
)
|
|
|
|
name = Column(String(100), nullable=False, index=True)
|
|
description = Column(String(500), nullable=True)
|
|
# List of Permission enum values (e.g., ["documents:read", "chats:create"])
|
|
permissions = Column(ARRAY(String), nullable=False, default=[])
|
|
# Whether this role is assigned to new members by default when they join via invite
|
|
is_default = Column(Boolean, nullable=False, default=False)
|
|
# System roles (Owner, Admin, Editor, Viewer) cannot be deleted
|
|
is_system_role = Column(Boolean, nullable=False, default=False)
|
|
|
|
search_space_id = Column(
|
|
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
|
|
)
|
|
search_space = relationship("SearchSpace", back_populates="roles")
|
|
|
|
memberships = relationship(
|
|
"SearchSpaceMembership", back_populates="role", passive_deletes=True
|
|
)
|
|
invites = relationship(
|
|
"SearchSpaceInvite", back_populates="role", passive_deletes=True
|
|
)
|
|
|
|
|
|
class SearchSpaceMembership(BaseModel, TimestampMixin):
|
|
"""
|
|
Tracks user membership in search spaces with their assigned role.
|
|
Each user can be a member of multiple search spaces with different roles.
|
|
"""
|
|
|
|
__tablename__ = "search_space_memberships"
|
|
__table_args__ = (
|
|
UniqueConstraint(
|
|
"user_id",
|
|
"search_space_id",
|
|
name="uq_user_searchspace_membership",
|
|
),
|
|
)
|
|
|
|
user_id = Column(
|
|
UUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=False
|
|
)
|
|
search_space_id = Column(
|
|
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
|
|
)
|
|
role_id = Column(
|
|
Integer,
|
|
ForeignKey("search_space_roles.id", ondelete="SET NULL"),
|
|
nullable=True,
|
|
)
|
|
# Indicates if this user is the original creator/owner of the search space
|
|
is_owner = Column(Boolean, nullable=False, default=False)
|
|
# Timestamp when the user joined (via invite or as creator)
|
|
joined_at = Column(
|
|
TIMESTAMP(timezone=True),
|
|
nullable=False,
|
|
default=lambda: datetime.now(UTC),
|
|
)
|
|
# Reference to the invite used to join (null if owner/creator)
|
|
invited_by_invite_id = Column(
|
|
Integer,
|
|
ForeignKey("search_space_invites.id", ondelete="SET NULL"),
|
|
nullable=True,
|
|
)
|
|
|
|
user = relationship("User", back_populates="search_space_memberships")
|
|
search_space = relationship("SearchSpace", back_populates="memberships")
|
|
role = relationship("SearchSpaceRole", back_populates="memberships")
|
|
invited_by_invite = relationship(
|
|
"SearchSpaceInvite", back_populates="used_by_memberships"
|
|
)
|
|
|
|
|
|
class SearchSpaceInvite(BaseModel, TimestampMixin):
|
|
"""
|
|
Invite links for search spaces.
|
|
Users can create invite links with specific roles that others can use to join.
|
|
"""
|
|
|
|
__tablename__ = "search_space_invites"
|
|
|
|
# Unique invite code (used in invite URLs)
|
|
invite_code = Column(String(64), nullable=False, unique=True, index=True)
|
|
|
|
search_space_id = Column(
|
|
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
|
|
)
|
|
# Role to assign when invite is used (null means use default role)
|
|
role_id = Column(
|
|
Integer,
|
|
ForeignKey("search_space_roles.id", ondelete="SET NULL"),
|
|
nullable=True,
|
|
)
|
|
# User who created this invite
|
|
created_by_id = Column(
|
|
UUID(as_uuid=True),
|
|
ForeignKey("user.id", ondelete="SET NULL"),
|
|
nullable=True,
|
|
)
|
|
|
|
# Expiration timestamp (null means never expires)
|
|
expires_at = Column(TIMESTAMP(timezone=True), nullable=True)
|
|
# Maximum number of times this invite can be used (null means unlimited)
|
|
max_uses = Column(Integer, nullable=True)
|
|
# Number of times this invite has been used
|
|
uses_count = Column(Integer, nullable=False, default=0)
|
|
# Whether this invite is currently active
|
|
is_active = Column(Boolean, nullable=False, default=True)
|
|
# Optional custom name/label for the invite
|
|
name = Column(String(100), nullable=True)
|
|
|
|
search_space = relationship("SearchSpace", back_populates="invites")
|
|
role = relationship("SearchSpaceRole", back_populates="invites")
|
|
created_by = relationship("User", back_populates="created_invites")
|
|
used_by_memberships = relationship(
|
|
"SearchSpaceMembership",
|
|
back_populates="invited_by_invite",
|
|
passive_deletes=True,
|
|
)
|
|
|
|
|
|
if config.AUTH_TYPE == "GOOGLE":
|
|
|
|
class OAuthAccount(SQLAlchemyBaseOAuthAccountTableUUID, Base):
|
|
pass
|
|
|
|
class User(SQLAlchemyBaseUserTableUUID, Base):
|
|
oauth_accounts: Mapped[list[OAuthAccount]] = relationship(
|
|
"OAuthAccount", lazy="joined"
|
|
)
|
|
search_spaces = relationship("SearchSpace", back_populates="user")
|
|
notifications = relationship(
|
|
"Notification",
|
|
back_populates="user",
|
|
order_by="Notification.created_at.desc()",
|
|
cascade="all, delete-orphan",
|
|
)
|
|
|
|
# RBAC relationships
|
|
search_space_memberships = relationship(
|
|
"SearchSpaceMembership",
|
|
back_populates="user",
|
|
cascade="all, delete-orphan",
|
|
)
|
|
created_invites = relationship(
|
|
"SearchSpaceInvite",
|
|
back_populates="created_by",
|
|
passive_deletes=True,
|
|
)
|
|
|
|
# Page usage tracking for ETL services
|
|
pages_limit = Column(
|
|
Integer,
|
|
nullable=False,
|
|
default=config.PAGES_LIMIT,
|
|
server_default=str(config.PAGES_LIMIT),
|
|
)
|
|
pages_used = Column(Integer, nullable=False, default=0, server_default="0")
|
|
|
|
else:
|
|
|
|
class User(SQLAlchemyBaseUserTableUUID, Base):
|
|
search_spaces = relationship("SearchSpace", back_populates="user")
|
|
notifications = relationship(
|
|
"Notification",
|
|
back_populates="user",
|
|
order_by="Notification.created_at.desc()",
|
|
cascade="all, delete-orphan",
|
|
)
|
|
|
|
# RBAC relationships
|
|
search_space_memberships = relationship(
|
|
"SearchSpaceMembership",
|
|
back_populates="user",
|
|
cascade="all, delete-orphan",
|
|
)
|
|
created_invites = relationship(
|
|
"SearchSpaceInvite",
|
|
back_populates="created_by",
|
|
passive_deletes=True,
|
|
)
|
|
|
|
# Page usage tracking for ETL services
|
|
pages_limit = Column(
|
|
Integer,
|
|
nullable=False,
|
|
default=config.PAGES_LIMIT,
|
|
server_default=str(config.PAGES_LIMIT),
|
|
)
|
|
pages_used = Column(Integer, nullable=False, default=0, server_default="0")
|
|
|
|
|
|
engine = create_async_engine(DATABASE_URL)
|
|
async_session_maker = async_sessionmaker(engine, expire_on_commit=False)
|
|
|
|
|
|
async def setup_indexes():
|
|
async with engine.begin() as conn:
|
|
# Create indexes
|
|
# Document Summary Indexes
|
|
await conn.execute(
|
|
text(
|
|
"CREATE INDEX IF NOT EXISTS document_vector_index ON documents USING hnsw (embedding public.vector_cosine_ops)"
|
|
)
|
|
)
|
|
await conn.execute(
|
|
text(
|
|
"CREATE INDEX IF NOT EXISTS document_search_index ON documents USING gin (to_tsvector('english', content))"
|
|
)
|
|
)
|
|
# Document Chuck Indexes
|
|
await conn.execute(
|
|
text(
|
|
"CREATE INDEX IF NOT EXISTS chucks_vector_index ON chunks USING hnsw (embedding public.vector_cosine_ops)"
|
|
)
|
|
)
|
|
await conn.execute(
|
|
text(
|
|
"CREATE INDEX IF NOT EXISTS chucks_search_index ON chunks USING gin (to_tsvector('english', content))"
|
|
)
|
|
)
|
|
|
|
|
|
async def create_db_and_tables():
|
|
async with engine.begin() as conn:
|
|
await conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector"))
|
|
await conn.run_sync(Base.metadata.create_all)
|
|
await setup_indexes()
|
|
await setup_electric_replication()
|
|
|
|
|
|
async def setup_electric_replication():
|
|
"""Set up Electric SQL replication for real-time sync tables."""
|
|
async with engine.begin() as conn:
|
|
# Set REPLICA IDENTITY FULL (required by Electric SQL for replication)
|
|
# This logs full row data for UPDATE/DELETE operations in the WAL
|
|
await conn.execute(text("ALTER TABLE notifications REPLICA IDENTITY FULL;"))
|
|
await conn.execute(text("ALTER TABLE search_source_connectors REPLICA IDENTITY FULL;"))
|
|
await conn.execute(text("ALTER TABLE documents REPLICA IDENTITY FULL;"))
|
|
|
|
# Add tables to Electric SQL publication for replication
|
|
# Only add if publication exists and table not already in it
|
|
await conn.execute(
|
|
text(
|
|
"""
|
|
DO $$
|
|
BEGIN
|
|
IF EXISTS (SELECT 1 FROM pg_publication WHERE pubname = 'electric_publication_default') THEN
|
|
-- Add notifications if not already added
|
|
IF NOT EXISTS (
|
|
SELECT 1 FROM pg_publication_tables
|
|
WHERE pubname = 'electric_publication_default'
|
|
AND tablename = 'notifications'
|
|
) THEN
|
|
ALTER PUBLICATION electric_publication_default ADD TABLE notifications;
|
|
END IF;
|
|
|
|
-- Add search_source_connectors if not already added
|
|
IF NOT EXISTS (
|
|
SELECT 1 FROM pg_publication_tables
|
|
WHERE pubname = 'electric_publication_default'
|
|
AND tablename = 'search_source_connectors'
|
|
) THEN
|
|
ALTER PUBLICATION electric_publication_default ADD TABLE search_source_connectors;
|
|
END IF;
|
|
|
|
-- Add documents if not already added
|
|
IF NOT EXISTS (
|
|
SELECT 1 FROM pg_publication_tables
|
|
WHERE pubname = 'electric_publication_default'
|
|
AND tablename = 'documents'
|
|
) THEN
|
|
ALTER PUBLICATION electric_publication_default ADD TABLE documents;
|
|
END IF;
|
|
END IF;
|
|
END
|
|
$$;
|
|
"""
|
|
)
|
|
)
|
|
|
|
|
|
async def get_async_session() -> AsyncGenerator[AsyncSession, None]:
|
|
async with async_session_maker() as session:
|
|
yield session
|
|
|
|
|
|
if config.AUTH_TYPE == "GOOGLE":
|
|
|
|
async def get_user_db(session: AsyncSession = Depends(get_async_session)):
|
|
yield SQLAlchemyUserDatabase(session, User, OAuthAccount)
|
|
|
|
else:
|
|
|
|
async def get_user_db(session: AsyncSession = Depends(get_async_session)):
|
|
yield SQLAlchemyUserDatabase(session, User)
|
|
|
|
|
|
def has_permission(user_permissions: list[str], required_permission: str) -> bool:
|
|
"""
|
|
Check if the user has the required permission.
|
|
Supports wildcard (*) for full access.
|
|
|
|
Args:
|
|
user_permissions: List of permission strings the user has
|
|
required_permission: The permission string to check for
|
|
|
|
Returns:
|
|
True if user has the permission, False otherwise
|
|
"""
|
|
if not user_permissions:
|
|
return False
|
|
|
|
# Full access wildcard grants all permissions
|
|
if Permission.FULL_ACCESS.value in user_permissions:
|
|
return True
|
|
|
|
return required_permission in user_permissions
|
|
|
|
|
|
def has_any_permission(
|
|
user_permissions: list[str], required_permissions: list[str]
|
|
) -> bool:
|
|
"""
|
|
Check if the user has any of the required permissions.
|
|
|
|
Args:
|
|
user_permissions: List of permission strings the user has
|
|
required_permissions: List of permission strings to check for (any match)
|
|
|
|
Returns:
|
|
True if user has at least one of the permissions, False otherwise
|
|
"""
|
|
if not user_permissions:
|
|
return False
|
|
|
|
if Permission.FULL_ACCESS.value in user_permissions:
|
|
return True
|
|
|
|
return any(perm in user_permissions for perm in required_permissions)
|
|
|
|
|
|
def has_all_permissions(
|
|
user_permissions: list[str], required_permissions: list[str]
|
|
) -> bool:
|
|
"""
|
|
Check if the user has all of the required permissions.
|
|
|
|
Args:
|
|
user_permissions: List of permission strings the user has
|
|
required_permissions: List of permission strings to check for (all must match)
|
|
|
|
Returns:
|
|
True if user has all of the permissions, False otherwise
|
|
"""
|
|
if not user_permissions:
|
|
return False
|
|
|
|
if Permission.FULL_ACCESS.value in user_permissions:
|
|
return True
|
|
|
|
return all(perm in user_permissions for perm in required_permissions)
|
|
|
|
|
|
def get_default_roles_config() -> list[dict]:
|
|
"""
|
|
Get the configuration for default system roles.
|
|
These roles are created automatically when a search space is created.
|
|
|
|
Returns:
|
|
List of role configurations with name, description, permissions, and flags
|
|
"""
|
|
return [
|
|
{
|
|
"name": "Owner",
|
|
"description": "Full access to all search space resources and settings",
|
|
"permissions": DEFAULT_ROLE_PERMISSIONS["Owner"],
|
|
"is_default": False,
|
|
"is_system_role": True,
|
|
},
|
|
{
|
|
"name": "Admin",
|
|
"description": "Can manage most resources except deleting the search space",
|
|
"permissions": DEFAULT_ROLE_PERMISSIONS["Admin"],
|
|
"is_default": False,
|
|
"is_system_role": True,
|
|
},
|
|
{
|
|
"name": "Editor",
|
|
"description": "Can create and edit documents, chats, and podcasts",
|
|
"permissions": DEFAULT_ROLE_PERMISSIONS["Editor"],
|
|
"is_default": True, # Default role for new members via invite
|
|
"is_system_role": True,
|
|
},
|
|
{
|
|
"name": "Viewer",
|
|
"description": "Read-only access to search space resources",
|
|
"permissions": DEFAULT_ROLE_PERMISSIONS["Viewer"],
|
|
"is_default": False,
|
|
"is_system_role": True,
|
|
},
|
|
]
|