SurfSense/surfsense_backend/app/config/__init__.py

import os
import shutil
from pathlib import Path

import yaml
from chonkie import AutoEmbeddings, CodeChunker, RecursiveChunker
from dotenv import load_dotenv
from rerankers import Reranker

# Get the base directory of the project
BASE_DIR = Path(__file__).resolve().parent.parent.parent

env_file = BASE_DIR / ".env"
load_dotenv(env_file)


def is_ffmpeg_installed():
    """
    Check if ffmpeg is installed on the current system.

    Returns:
        bool: True if ffmpeg is installed, False otherwise.
    """
    return shutil.which("ffmpeg") is not None


def load_global_llm_configs():
    """
    Load global LLM configurations from YAML file.
    Falls back to example file if main file doesn't exist.

    Returns:
        list: List of global LLM config dictionaries, or empty list if file doesn't exist
    """
    # Try main config file first
    global_config_file = BASE_DIR / "app" / "config" / "global_llm_config.yaml"

    if not global_config_file.exists():
        # No global configs available
        return []

    try:
        with open(global_config_file, encoding="utf-8") as f:
            data = yaml.safe_load(f)
            return data.get("global_llm_configs", [])
    except Exception as e:
        print(f"Warning: Failed to load global LLM configs: {e}")
        return []


class Config:
    # Check if ffmpeg is installed
    if not is_ffmpeg_installed():
        import static_ffmpeg

        # ffmpeg installed on first call to add_paths(), threadsafe.
        static_ffmpeg.add_paths()
        # check if ffmpeg is installed again
        if not is_ffmpeg_installed():
            raise ValueError(
                "FFmpeg is not installed on the system. Please install it to use the Surfsense Podcaster."
            )

    # Database
    DATABASE_URL = os.getenv("DATABASE_URL")

    NEXT_FRONTEND_URL = os.getenv("NEXT_FRONTEND_URL")
    # Backend URL to override the http to https in the OAuth redirect URI
    BACKEND_URL = os.getenv("BACKEND_URL")

    # Auth
    AUTH_TYPE = os.getenv("AUTH_TYPE")
    REGISTRATION_ENABLED = os.getenv("REGISTRATION_ENABLED", "TRUE").upper() == "TRUE"

    # Google OAuth
    GOOGLE_OAUTH_CLIENT_ID = os.getenv("GOOGLE_OAUTH_CLIENT_ID")
    GOOGLE_OAUTH_CLIENT_SECRET = os.getenv("GOOGLE_OAUTH_CLIENT_SECRET")

    # Google Calendar redirect URI
    GOOGLE_CALENDAR_REDIRECT_URI = os.getenv("GOOGLE_CALENDAR_REDIRECT_URI")

    # Google Gmail redirect URI
    GOOGLE_GMAIL_REDIRECT_URI = os.getenv("GOOGLE_GMAIL_REDIRECT_URI")

    # Google Drive redirect URI
    GOOGLE_DRIVE_REDIRECT_URI = os.getenv("GOOGLE_DRIVE_REDIRECT_URI")

    # Airtable OAuth
    AIRTABLE_CLIENT_ID = os.getenv("AIRTABLE_CLIENT_ID")
    AIRTABLE_CLIENT_SECRET = os.getenv("AIRTABLE_CLIENT_SECRET")
    AIRTABLE_REDIRECT_URI = os.getenv("AIRTABLE_REDIRECT_URI")

    # Notion OAuth
    NOTION_CLIENT_ID = os.getenv("NOTION_CLIENT_ID")
    NOTION_CLIENT_SECRET = os.getenv("NOTION_CLIENT_SECRET")
    NOTION_REDIRECT_URI = os.getenv("NOTION_REDIRECT_URI")

    # Atlassian OAuth (shared for Jira and Confluence)
    ATLASSIAN_CLIENT_ID = os.getenv("ATLASSIAN_CLIENT_ID")
    ATLASSIAN_CLIENT_SECRET = os.getenv("ATLASSIAN_CLIENT_SECRET")
    JIRA_REDIRECT_URI = os.getenv("JIRA_REDIRECT_URI")
    CONFLUENCE_REDIRECT_URI = os.getenv("CONFLUENCE_REDIRECT_URI")

    # Linear OAuth
    LINEAR_CLIENT_ID = os.getenv("LINEAR_CLIENT_ID")
    LINEAR_CLIENT_SECRET = os.getenv("LINEAR_CLIENT_SECRET")
    LINEAR_REDIRECT_URI = os.getenv("LINEAR_REDIRECT_URI")

    # Slack OAuth
    SLACK_CLIENT_ID = os.getenv("SLACK_CLIENT_ID")
    SLACK_CLIENT_SECRET = os.getenv("SLACK_CLIENT_SECRET")
    SLACK_REDIRECT_URI = os.getenv("SLACK_REDIRECT_URI")

    # Discord OAuth
    DISCORD_CLIENT_ID = os.getenv("DISCORD_CLIENT_ID")
    DISCORD_CLIENT_SECRET = os.getenv("DISCORD_CLIENT_SECRET")
    DISCORD_REDIRECT_URI = os.getenv("DISCORD_REDIRECT_URI")
    DISCORD_BOT_TOKEN = os.getenv("DISCORD_BOT_TOKEN")

    # Microsoft Teams OAuth
    TEAMS_CLIENT_ID = os.getenv("TEAMS_CLIENT_ID")
    TEAMS_CLIENT_SECRET = os.getenv("TEAMS_CLIENT_SECRET")
    TEAMS_REDIRECT_URI = os.getenv("TEAMS_REDIRECT_URI")

    # ClickUp OAuth
    CLICKUP_CLIENT_ID = os.getenv("CLICKUP_CLIENT_ID")
    CLICKUP_CLIENT_SECRET = os.getenv("CLICKUP_CLIENT_SECRET")
    CLICKUP_REDIRECT_URI = os.getenv("CLICKUP_REDIRECT_URI")

    # LLM instances are now managed per-user through the LLMConfig system
    # Legacy environment variables removed in favor of user-specific configurations

    # Global LLM Configurations (optional)
    # Load from global_llm_config.yaml if available
    # These can be used as default options for users
    GLOBAL_LLM_CONFIGS = load_global_llm_configs()

    # Chonkie Configuration | Edit this to your needs
    EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL")
    # Azure OpenAI credentials from environment variables
    AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
    AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")

    # Pass Azure credentials to embeddings when using Azure OpenAI
    embedding_kwargs = {}
    if AZURE_OPENAI_ENDPOINT:
        embedding_kwargs["azure_endpoint"] = AZURE_OPENAI_ENDPOINT
    if AZURE_OPENAI_API_KEY:
        embedding_kwargs["azure_api_key"] = AZURE_OPENAI_API_KEY

    embedding_model_instance = AutoEmbeddings.get_embeddings(
        EMBEDDING_MODEL,
        **embedding_kwargs,
    )
    chunker_instance = RecursiveChunker(
        chunk_size=getattr(embedding_model_instance, "max_seq_length", 512)
    )
    code_chunker_instance = CodeChunker(
        chunk_size=getattr(embedding_model_instance, "max_seq_length", 512)
    )

    # Reranker's Configuration | Pinecode, Cohere etc. Read more at https://github.com/AnswerDotAI/rerankers?tab=readme-ov-file#usage
    RERANKERS_ENABLED = os.getenv("RERANKERS_ENABLED", "FALSE").upper() == "TRUE"
    if RERANKERS_ENABLED:
        RERANKERS_MODEL_NAME = os.getenv("RERANKERS_MODEL_NAME")
        RERANKERS_MODEL_TYPE = os.getenv("RERANKERS_MODEL_TYPE")
        reranker_instance = Reranker(
            model_name=RERANKERS_MODEL_NAME,
            model_type=RERANKERS_MODEL_TYPE,
        )
    else:
        reranker_instance = None

    # OAuth JWT
    SECRET_KEY = os.getenv("SECRET_KEY")

    # ETL Service
    ETL_SERVICE = os.getenv("ETL_SERVICE")

    # Pages limit for ETL services (default to very high number for OSS unlimited usage)
    PAGES_LIMIT = int(os.getenv("PAGES_LIMIT", "999999999"))

    if ETL_SERVICE == "UNSTRUCTURED":
        # Unstructured API Key
        UNSTRUCTURED_API_KEY = os.getenv("UNSTRUCTURED_API_KEY")

    elif ETL_SERVICE == "LLAMACLOUD":
        # LlamaCloud API Key
        LLAMA_CLOUD_API_KEY = os.getenv("LLAMA_CLOUD_API_KEY")

    # Litellm TTS Configuration
    TTS_SERVICE = os.getenv("TTS_SERVICE")
    TTS_SERVICE_API_BASE = os.getenv("TTS_SERVICE_API_BASE")
    TTS_SERVICE_API_KEY = os.getenv("TTS_SERVICE_API_KEY")

    # STT Configuration
    STT_SERVICE = os.getenv("STT_SERVICE")
    STT_SERVICE_API_BASE = os.getenv("STT_SERVICE_API_BASE")
    STT_SERVICE_API_KEY = os.getenv("STT_SERVICE_API_KEY")

    # Validation Checks
    # Check embedding dimension
    if (
        hasattr(embedding_model_instance, "dimension")
        and embedding_model_instance.dimension > 2000
    ):
        raise ValueError(
            f"Embedding dimension for Model: {EMBEDDING_MODEL} "
            f"has {embedding_model_instance.dimension} dimensions, which "
            f"exceeds the maximum of 2000 allowed by PGVector."
        )

    @classmethod
    def get_settings(cls):
        """Get all settings as a dictionary."""
        return {
            key: value
            for key, value in cls.__dict__.items()
            if not key.startswith("_") and not callable(value)
        }


# Create a config instance
config = Config()
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`import os`
$DESKTOP-RTLN3BA\$punk$ feat: Stable & Hella Fast Podcast Agent with auto FFMPEG handling. 2025-05-05 01:39:31 -07:00			`import shutil`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00			`from pathlib import Path`
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: added global llm configurations 2025-11-14 21:53:46 -08:00			`import yaml`
$DESKTOP-RTLN3BA\$punk$ feat: Shifted to RecursiveChunker and CodeChunker - Codebase Q/A should be lot better now. 2025-04-29 23:02:07 -07:00			`from chonkie import AutoEmbeddings, CodeChunker, RecursiveChunker`
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`from dotenv import load_dotenv`
$DESKTOP-RTLN3BA\$punk$ feat: Shifted to RecursiveChunker and CodeChunker - Codebase Q/A should be lot better now. 2025-04-29 23:02:07 -07:00			`from rerankers import Reranker`
$DESKTOP-RTLN3BA\$punk$ feat: Added Speech to Text support. - Supports audio & video files. - Will be useful for Youtube vids which dont have transcripts. 2025-05-13 21:13:53 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`# Get the base directory of the project`
			`BASE_DIR = Path(__file__).resolve().parent.parent.parent`

			`env_file = BASE_DIR / ".env"`
			`load_dotenv(env_file)`


$DESKTOP-RTLN3BA\$punk$ feat: Stable & Hella Fast Podcast Agent with auto FFMPEG handling. 2025-05-05 01:39:31 -07:00			`def is_ffmpeg_installed():`
			`"""`
			`Check if ffmpeg is installed on the current system.`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: Stable & Hella Fast Podcast Agent with auto FFMPEG handling. 2025-05-05 01:39:31 -07:00			`Returns:`
			`bool: True if ffmpeg is installed, False otherwise.`
			`"""`
			`return shutil.which("ffmpeg") is not None`


$DESKTOP-RTLN3BA\$punk$ feat: added global llm configurations 2025-11-14 21:53:46 -08:00			`def load_global_llm_configs():`
			`"""`
			`Load global LLM configurations from YAML file.`
			`Falls back to example file if main file doesn't exist.`

			`Returns:`
			`list: List of global LLM config dictionaries, or empty list if file doesn't exist`
			`"""`
			`# Try main config file first`
			`global_config_file = BASE_DIR / "app" / "config" / "global_llm_config.yaml"`

			`if not global_config_file.exists():`
			`# No global configs available`
			`return []`

			`try:`
			`with open(global_config_file, encoding="utf-8") as f:`
			`data = yaml.safe_load(f)`
			`return data.get("global_llm_configs", [])`
			`except Exception as e:`
			`print(f"Warning: Failed to load global LLM configs: {e}")`
			`return []`


$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`class Config:`
$DESKTOP-RTLN3BA\$punk$ feat: Stable & Hella Fast Podcast Agent with auto FFMPEG handling. 2025-05-05 01:39:31 -07:00			`# Check if ffmpeg is installed`
			`if not is_ffmpeg_installed():`
			`import static_ffmpeg`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: Stable & Hella Fast Podcast Agent with auto FFMPEG handling. 2025-05-05 01:39:31 -07:00			`# ffmpeg installed on first call to add_paths(), threadsafe.`
			`static_ffmpeg.add_paths()`
			`# check if ffmpeg is installed again`
			`if not is_ffmpeg_installed():`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00			`raise ValueError(`
			`"FFmpeg is not installed on the system. Please install it to use the Surfsense Podcaster."`
			`)`

$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`# Database`
			`DATABASE_URL = os.getenv("DATABASE_URL")`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`NEXT_FRONTEND_URL = os.getenv("NEXT_FRONTEND_URL")`
$DESKTOP-RTLN3BA\$punk$ feat: add BACKEND_URL configuration for OAuth redirect - Introduced BACKEND_URL in the configuration to allow overriding the HTTP to HTTPS in the OAuth redirect URI. - Updated the Google OAuth router to conditionally use the BACKEND_URL for the redirect URI when specified. 2025-10-30 23:52:14 -07:00			`# Backend URL to override the http to https in the OAuth redirect URI`
			`BACKEND_URL = os.getenv("BACKEND_URL")`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
check if a google calendar exixst before adding it , in the add page 2025-08-02 05:36:43 +02:00			`# Auth`
$DESKTOP-RTLN3BA\$punk$ feat: Removed Hard Dependecy on Google Auth - Introduced LOCAL auth mode 2025-05-21 20:56:23 -07:00			`AUTH_TYPE = os.getenv("AUTH_TYPE")`
feat: implement registration toggle in backend and handle disabled state in frontend 2025-10-20 15:54:52 +05:30			`REGISTRATION_ENABLED = os.getenv("REGISTRATION_ENABLED", "TRUE").upper() == "TRUE"`
update seach source connector schema 2025-08-02 04:39:48 +02:00
check if a google calendar exixst before adding it , in the add page 2025-08-02 05:36:43 +02:00			`# Google OAuth`
update seach source connector schema 2025-08-02 04:39:48 +02:00			`GOOGLE_OAUTH_CLIENT_ID = os.getenv("GOOGLE_OAUTH_CLIENT_ID")`
			`GOOGLE_OAUTH_CLIENT_SECRET = os.getenv("GOOGLE_OAUTH_CLIENT_SECRET")`
check if a google calendar exixst before adding it , in the add page 2025-08-02 05:36:43 +02:00
			`# Google Calendar redirect URI`
update seach source connector schema 2025-08-02 04:39:48 +02:00			`GOOGLE_CALENDAR_REDIRECT_URI = os.getenv("GOOGLE_CALENDAR_REDIRECT_URI")`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
update .env.example 2025-08-04 01:02:35 +02:00			`# Google Gmail redirect URI`
			`GOOGLE_GMAIL_REDIRECT_URI = os.getenv("GOOGLE_GMAIL_REDIRECT_URI")`

feat(config): add GOOGLE_DRIVE_REDIRECT_URI environment variable 2025-12-28 15:53:51 +02:00			`# Google Drive redirect URI`
			`GOOGLE_DRIVE_REDIRECT_URI = os.getenv("GOOGLE_DRIVE_REDIRECT_URI")`

Add airtable connector auth flow routes 2025-08-26 13:56:31 +02:00			`# Airtable OAuth`
			`AIRTABLE_CLIENT_ID = os.getenv("AIRTABLE_CLIENT_ID")`
			`AIRTABLE_CLIENT_SECRET = os.getenv("AIRTABLE_CLIENT_SECRET")`
			`AIRTABLE_REDIRECT_URI = os.getenv("AIRTABLE_REDIRECT_URI")`

feat: add Notion OAuth integration and connector routes - Introduced Notion OAuth support with new environment variables for client ID, client secret, and redirect URI. - Implemented Notion connector routes for OAuth flow, including authorization and callback handling. - Updated existing components to accommodate Notion integration, including validation changes and connector configuration. - Enhanced the Notion indexer to utilize OAuth access tokens instead of integration tokens. - Adjusted UI components to reflect the new Notion connector without requiring special configuration. 2026-01-02 20:07:14 +05:30			`# Notion OAuth`
			`NOTION_CLIENT_ID = os.getenv("NOTION_CLIENT_ID")`
			`NOTION_CLIENT_SECRET = os.getenv("NOTION_CLIENT_SECRET")`
			`NOTION_REDIRECT_URI = os.getenv("NOTION_REDIRECT_URI")`

feat: implement Confluence OAuth integration and connector routes - Added support for Confluence OAuth with new environment variables for client ID, client secret, and redirect URI. - Implemented Confluence connector routes for OAuth flow, including authorization and callback handling. - Enhanced Confluence connector to support both OAuth 2.0 and legacy API token authentication methods. - Updated Confluence indexing logic to utilize OAuth credentials with auto-refresh capabilities. - Removed outdated Confluence UI components and adjusted frontend logic to reflect the new integration. 2026-01-06 13:20:22 +05:30			`# Atlassian OAuth (shared for Jira and Confluence)`
			`ATLASSIAN_CLIENT_ID = os.getenv("ATLASSIAN_CLIENT_ID")`
			`ATLASSIAN_CLIENT_SECRET = os.getenv("ATLASSIAN_CLIENT_SECRET")`
feat: add Atlassian OAuth support for Jira and Confluence - Introduced a shared schema for Atlassian OAuth 2.0 credentials, accommodating both Jira and Confluence. - Updated Jira connector routes to utilize the new AtlassianAuthCredentialsBase for handling OAuth tokens. - Enhanced configuration to include new environment variables for Jira OAuth integration. - Refactored token handling in Jira indexing logic to support the new shared credential structure. 2026-01-06 01:27:29 +05:30			`JIRA_REDIRECT_URI = os.getenv("JIRA_REDIRECT_URI")`
feat: implement Confluence OAuth integration and connector routes - Added support for Confluence OAuth with new environment variables for client ID, client secret, and redirect URI. - Implemented Confluence connector routes for OAuth flow, including authorization and callback handling. - Enhanced Confluence connector to support both OAuth 2.0 and legacy API token authentication methods. - Updated Confluence indexing logic to utilize OAuth credentials with auto-refresh capabilities. - Removed outdated Confluence UI components and adjusted frontend logic to reflect the new integration. 2026-01-06 13:20:22 +05:30			`CONFLUENCE_REDIRECT_URI = os.getenv("CONFLUENCE_REDIRECT_URI")`
feat: add Atlassian OAuth support for Jira and Confluence - Introduced a shared schema for Atlassian OAuth 2.0 credentials, accommodating both Jira and Confluence. - Updated Jira connector routes to utilize the new AtlassianAuthCredentialsBase for handling OAuth tokens. - Enhanced configuration to include new environment variables for Jira OAuth integration. - Refactored token handling in Jira indexing logic to support the new shared credential structure. 2026-01-06 01:27:29 +05:30
feat: add Linear OAuth integration and connector routes - Introduced Linear OAuth support with new environment variables for client ID, client secret, and redirect URI. - Implemented Linear connector routes for OAuth flow, including authorization and callback handling. - Updated existing components to accommodate Linear integration, including validation changes and connector configuration. - Enhanced the Linear indexer to utilize OAuth access tokens instead of API keys. - Adjusted UI components to reflect the new Linear connector without requiring special configuration. 2026-01-02 21:24:28 +05:30			`# Linear OAuth`
			`LINEAR_CLIENT_ID = os.getenv("LINEAR_CLIENT_ID")`
			`LINEAR_CLIENT_SECRET = os.getenv("LINEAR_CLIENT_SECRET")`
			`LINEAR_REDIRECT_URI = os.getenv("LINEAR_REDIRECT_URI")`

feat: add Slack OAuth integration and connector routes - Introduced Slack OAuth support with new environment variables for client ID, client secret, and redirect URI. - Implemented Slack connector routes for OAuth flow, including authorization and callback handling. - Updated configuration to support both new OAuth format and legacy token handling. - Enhanced the Slack indexer to decrypt tokens when necessary, ensuring compatibility with existing encrypted credentials. - Removed outdated Slack connector UI components and adjusted frontend logic to reflect the new integration. 2026-01-04 02:30:00 +05:30			`# Slack OAuth`
			`SLACK_CLIENT_ID = os.getenv("SLACK_CLIENT_ID")`
			`SLACK_CLIENT_SECRET = os.getenv("SLACK_CLIENT_SECRET")`
			`SLACK_REDIRECT_URI = os.getenv("SLACK_REDIRECT_URI")`

feat: add Discord OAuth integration and connector routes - Introduced Discord OAuth support with new environment variables for client ID, client secret, and redirect URI. - Implemented Discord connector routes for OAuth flow, including authorization and callback handling. - Enhanced Discord connector to support both OAuth-based authentication and legacy bot token usage. - Updated Discord indexing logic to utilize OAuth credentials with auto-refresh capabilities. - Removed outdated Discord UI components and adjusted frontend logic to reflect the new integration. 2026-01-05 14:21:39 +05:30			`# Discord OAuth`
			`DISCORD_CLIENT_ID = os.getenv("DISCORD_CLIENT_ID")`
			`DISCORD_CLIENT_SECRET = os.getenv("DISCORD_CLIENT_SECRET")`
			`DISCORD_REDIRECT_URI = os.getenv("DISCORD_REDIRECT_URI")`
			`DISCORD_BOT_TOKEN = os.getenv("DISCORD_BOT_TOKEN")`

Add teams connector similar to slack 2026-01-07 15:15:49 -08:00			`# Microsoft Teams OAuth`
			`TEAMS_CLIENT_ID = os.getenv("TEAMS_CLIENT_ID")`
			`TEAMS_CLIENT_SECRET = os.getenv("TEAMS_CLIENT_SECRET")`
			`TEAMS_REDIRECT_URI = os.getenv("TEAMS_REDIRECT_URI")`

feat: implement ClickUp OAuth integration and connector support - Added ClickUp OAuth authentication flow with new environment variables for client ID, client secret, and redirect URI. - Introduced ClickUpHistoryConnector to manage OAuth-based authentication and token refresh for ClickUp API access. - Created ClickUp connector routes for OAuth flow, including authorization and callback handling. - Updated indexing logic to utilize the new ClickUpHistoryConnector, supporting both OAuth and legacy API token methods. - Enhanced frontend components to reflect the new ClickUp integration and removed legacy API token forms. 2026-01-07 15:15:25 +05:30			`# ClickUp OAuth`
			`CLICKUP_CLIENT_ID = os.getenv("CLICKUP_CLIENT_ID")`
			`CLICKUP_CLIENT_SECRET = os.getenv("CLICKUP_CLIENT_SECRET")`
			`CLICKUP_REDIRECT_URI = os.getenv("CLICKUP_REDIRECT_URI")`

$DESKTOP-RTLN3BA\$punk$ feat: added configurable LLM's 2025-06-09 15:50:15 -07:00			`# LLM instances are now managed per-user through the LLMConfig system`
			`# Legacy environment variables removed in favor of user-specific configurations`
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: added global llm configurations 2025-11-14 21:53:46 -08:00			`# Global LLM Configurations (optional)`
			`# Load from global_llm_config.yaml if available`
			`# These can be used as default options for users`
			`GLOBAL_LLM_CONFIGS = load_global_llm_configs()`

$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`# Chonkie Configuration \| Edit this to your needs`
			`EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL")`
$DESKTOP-RTLN3BA\$punk$ feat: enhance Azure OpenAI embeddings and add voice support for Azure provider - Introduced a fixed parameter order for AzureOpenAIEmbeddings to resolve compatibility issues. - Updated the voice selection logic to include Azure voices in the podcaster utility. - Modified the page limit service to use a more efficient method for retrieving users. 2025-10-30 22:33:47 -07:00			`# Azure OpenAI credentials from environment variables`
			`AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")`
			`AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")`

			`# Pass Azure credentials to embeddings when using Azure OpenAI`
			`embedding_kwargs = {}`
			`if AZURE_OPENAI_ENDPOINT:`
			`embedding_kwargs["azure_endpoint"] = AZURE_OPENAI_ENDPOINT`
			`if AZURE_OPENAI_API_KEY:`
			`embedding_kwargs["azure_api_key"] = AZURE_OPENAI_API_KEY`

			`embedding_model_instance = AutoEmbeddings.get_embeddings(`
			`EMBEDDING_MODEL,`
			`**embedding_kwargs,`
			`)`
$DESKTOP-RTLN3BA\$punk$ feat: Shifted to RecursiveChunker and CodeChunker - Codebase Q/A should be lot better now. 2025-04-29 23:02:07 -07:00			`chunker_instance = RecursiveChunker(`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00			`chunk_size=getattr(embedding_model_instance, "max_seq_length", 512)`
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`)`
$DESKTOP-RTLN3BA\$punk$ feat: Shifted to RecursiveChunker and CodeChunker - Codebase Q/A should be lot better now. 2025-04-29 23:02:07 -07:00			`code_chunker_instance = CodeChunker(`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00			`chunk_size=getattr(embedding_model_instance, "max_seq_length", 512)`
$DESKTOP-RTLN3BA\$punk$ feat: Shifted to RecursiveChunker and CodeChunker - Codebase Q/A should be lot better now. 2025-04-29 23:02:07 -07:00			`)`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`# Reranker's Configuration \| Pinecode, Cohere etc. Read more at https://github.com/AnswerDotAI/rerankers?tab=readme-ov-file#usage`
$DESKTOP-RTLN3BA\$punk$ chore: update configuration for rerankers - Added RERANKERS_ENABLED option to control reranking functionality. - Updated rerank_documents function to handle cases when reranking is disabled. - Enhanced documentation for environment variables related to rerankers in installation guides. 2025-10-29 23:23:08 -07:00			`RERANKERS_ENABLED = os.getenv("RERANKERS_ENABLED", "FALSE").upper() == "TRUE"`
			`if RERANKERS_ENABLED:`
			`RERANKERS_MODEL_NAME = os.getenv("RERANKERS_MODEL_NAME")`
			`RERANKERS_MODEL_TYPE = os.getenv("RERANKERS_MODEL_TYPE")`
			`reranker_instance = Reranker(`
			`model_name=RERANKERS_MODEL_NAME,`
			`model_type=RERANKERS_MODEL_TYPE,`
			`)`
			`else:`
			`reranker_instance = None`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`# OAuth JWT`
			`SECRET_KEY = os.getenv("SECRET_KEY")`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: Removed Hard Dependency on Unstructured.io - Added Llamaparse Support :) 2025-05-30 19:17:19 -07:00			`# ETL Service`
			`ETL_SERVICE = os.getenv("ETL_SERVICE")`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat(fix): add PAGES_LIMIT configuration for ETL services 2025-12-11 00:29:56 -08:00			`# Pages limit for ETL services (default to very high number for OSS unlimited usage)`
			`PAGES_LIMIT = int(os.getenv("PAGES_LIMIT", "999999999"))`

$DESKTOP-RTLN3BA\$punk$ feat: Removed Hard Dependency on Unstructured.io - Added Llamaparse Support :) 2025-05-30 19:17:19 -07:00			`if ETL_SERVICE == "UNSTRUCTURED":`
			`# Unstructured API Key`
			`UNSTRUCTURED_API_KEY = os.getenv("UNSTRUCTURED_API_KEY")`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: Removed Hard Dependency on Unstructured.io - Added Llamaparse Support :) 2025-05-30 19:17:19 -07:00			`elif ETL_SERVICE == "LLAMACLOUD":`
			`# LlamaCloud API Key`
			`LLAMA_CLOUD_API_KEY = os.getenv("LLAMA_CLOUD_API_KEY")`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: Stable & Hella Fast Podcast Agent with auto FFMPEG handling. 2025-05-05 01:39:31 -07:00			`# Litellm TTS Configuration`
			`TTS_SERVICE = os.getenv("TTS_SERVICE")`
$DESKTOP-RTLN3BA\$punk$ feat: Added Speech to Text support. - Supports audio & video files. - Will be useful for Youtube vids which dont have transcripts. 2025-05-13 21:13:53 -07:00			`TTS_SERVICE_API_BASE = os.getenv("TTS_SERVICE_API_BASE")`
$DESKTOP-RTLN3BA\$punk$ feat: added configurable LLM's 2025-06-09 15:50:15 -07:00			`TTS_SERVICE_API_KEY = os.getenv("TTS_SERVICE_API_KEY")`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
Added local Speech-to-Text (STT) support using Faster-Whisper 2025-10-11 23:56:12 +05:00			`# STT Configuration`
$DESKTOP-RTLN3BA\$punk$ feat: Added Speech to Text support. - Supports audio & video files. - Will be useful for Youtube vids which dont have transcripts. 2025-05-13 21:13:53 -07:00			`STT_SERVICE = os.getenv("STT_SERVICE")`
			`STT_SERVICE_API_BASE = os.getenv("STT_SERVICE_API_BASE")`
$DESKTOP-RTLN3BA\$punk$ feat: added configurable LLM's 2025-06-09 15:50:15 -07:00			`STT_SERVICE_API_KEY = os.getenv("STT_SERVICE_API_KEY")`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`# Validation Checks`
			`# Check embedding dimension`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00			`if (`
			`hasattr(embedding_model_instance, "dimension")`
			`and embedding_model_instance.dimension > 2000`
			`):`
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`raise ValueError(`
			`f"Embedding dimension for Model: {EMBEDDING_MODEL} "`
			`f"has {embedding_model_instance.dimension} dimensions, which "`
			`f"exceeds the maximum of 2000 allowed by PGVector."`
			`)`

			`@classmethod`
			`def get_settings(cls):`
			`"""Get all settings as a dictionary."""`
			`return {`
			`key: value`
			`for key, value in cls.__dict__.items()`
			`if not key.startswith("_") and not callable(value)`
			`}`


			`# Create a config instance`
			`config = Config()`