diff --git a/Dockerfile.allinone b/Dockerfile.allinone index 0765deb15..12eee5c90 100644 --- a/Dockerfile.allinone +++ b/Dockerfile.allinone @@ -28,12 +28,15 @@ COPY surfsense_web/package.json surfsense_web/pnpm-lock.yaml* ./ COPY surfsense_web/source.config.ts ./ COPY surfsense_web/content ./content -# Install dependencies -RUN pnpm install --frozen-lockfile +# Install dependencies (skip postinstall which requires all source files) +RUN pnpm install --frozen-lockfile --ignore-scripts # Copy source COPY surfsense_web/ ./ +# Run fumadocs-mdx postinstall now that source files are available +RUN pnpm fumadocs-mdx + # Build args for frontend ARG NEXT_PUBLIC_FASTAPI_BACKEND_URL=http://localhost:8000 ARG NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=LOCAL diff --git a/surfsense_backend/alembic/versions/49_migrate_old_chats_to_new_chat.py b/surfsense_backend/alembic/versions/49_migrate_old_chats_to_new_chat.py new file mode 100644 index 000000000..bacb33f05 --- /dev/null +++ b/surfsense_backend/alembic/versions/49_migrate_old_chats_to_new_chat.py @@ -0,0 +1,240 @@ +"""Migrate old chats to new_chat_threads and remove old tables + +Revision ID: 49 +Revises: 48 +Create Date: 2025-12-21 + +This migration: +1. Migrates data from old 'chats' table to 'new_chat_threads' and 'new_chat_messages' +2. Drops the 'podcasts' table (podcast data is not migrated as per user request) +3. Drops the 'chats' table +4. Removes the 'chattype' enum +""" + +import json +from collections.abc import Sequence +from datetime import datetime + +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "49" +down_revision: str | None = "48" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def extract_text_content(content: str | dict | list) -> str: + """Extract plain text content from various message formats.""" + if isinstance(content, str): + return content + if isinstance(content, dict): + # Handle dict with 'text' key + if "text" in content: + return content["text"] + return str(content) + if isinstance(content, list): + # Handle list of parts (e.g., [{"type": "text", "text": "..."}]) + texts = [] + for part in content: + if isinstance(part, dict) and part.get("type") == "text": + texts.append(part.get("text", "")) + elif isinstance(part, str): + texts.append(part) + return "\n".join(texts) if texts else "" + return "" + + +def parse_timestamp(ts, fallback): + """Parse ISO timestamp string to datetime object.""" + if ts is None: + return fallback + if isinstance(ts, datetime): + return ts + if isinstance(ts, str): + try: + # Handle ISO format like '2025-11-26T22:43:34.399Z' + ts = ts.replace("Z", "+00:00") + return datetime.fromisoformat(ts) + except (ValueError, TypeError): + return fallback + return fallback + + +def upgrade() -> None: + """Migrate old chats to new_chat_threads and remove old tables.""" + connection = op.get_bind() + + # Get all old chats + old_chats = connection.execute( + sa.text(""" + SELECT id, title, messages, search_space_id, created_at + FROM chats + ORDER BY created_at ASC + """) + ).fetchall() + + print(f"[Migration 49] Found {len(old_chats)} old chats to migrate") + + migrated_count = 0 + for chat_id, title, messages_json, search_space_id, created_at in old_chats: + try: + # Parse messages JSON + if isinstance(messages_json, str): + messages = json.loads(messages_json) + else: + messages = messages_json or [] + + # Skip empty chats + if not messages: + print(f"[Migration 49] Skipping empty chat {chat_id}") + continue + + # Create new thread + result = connection.execute( + sa.text(""" + INSERT INTO new_chat_threads + (title, archived, search_space_id, created_at, updated_at) + VALUES (:title, FALSE, :search_space_id, :created_at, :created_at) + RETURNING id + """), + { + "title": title or "Migrated Chat", + "search_space_id": search_space_id, + "created_at": created_at, + }, + ) + new_thread_id = result.fetchone()[0] + + # Migrate messages - only user and assistant roles, skip SOURCES/TERMINAL_INFO + message_count = 0 + for msg in messages: + role_lower = msg.get("role", "").lower() + + # Only migrate user and assistant messages + if role_lower not in ("user", "assistant"): + continue + + # Convert to uppercase for database enum + role = role_lower.upper() + + # Extract content - handle various formats + content_raw = msg.get("content", "") + content_text = extract_text_content(content_raw) + + # Skip empty messages + if not content_text.strip(): + continue + + # Parse message timestamp + msg_created_at = parse_timestamp(msg.get("createdAt"), created_at) + + # Store content as JSONB array format for assistant-ui compatibility + content_list = [{"type": "text", "text": content_text}] + + # Use direct SQL with string interpolation for the enum since CAST doesn't work + # The enum value comes from trusted source (our own code), not user input + connection.execute( + sa.text(f""" + INSERT INTO new_chat_messages + (thread_id, role, content, created_at) + VALUES (:thread_id, '{role}', CAST(:content AS jsonb), :created_at) + """), + { + "thread_id": new_thread_id, + "content": json.dumps(content_list), + "created_at": msg_created_at, + }, + ) + message_count += 1 + + print( + f"[Migration 49] Migrated chat {chat_id} -> thread {new_thread_id} ({message_count} messages)" + ) + migrated_count += 1 + + except Exception as e: + print(f"[Migration 49] Error migrating chat {chat_id}: {e}") + # Re-raise to abort migration - we don't want partial data + raise + + print(f"[Migration 49] Successfully migrated {migrated_count} chats") + + # Drop podcasts table (FK references chats, so drop first) + print("[Migration 49] Dropping podcasts table...") + op.drop_table("podcasts") + + # Drop chats table + print("[Migration 49] Dropping chats table...") + op.drop_table("chats") + + # Drop chattype enum + print("[Migration 49] Dropping chattype enum...") + op.execute(sa.text("DROP TYPE IF EXISTS chattype")) + + print("[Migration 49] Migration complete!") + + +def downgrade() -> None: + """Recreate old tables (data cannot be restored).""" + # Recreate chattype enum + op.execute( + sa.text(""" + CREATE TYPE chattype AS ENUM ('QNA') + """) + ) + + # Recreate chats table + op.create_table( + "chats", + sa.Column("id", sa.Integer(), primary_key=True, index=True), + sa.Column("type", sa.Enum("QNA", name="chattype"), nullable=False), + sa.Column("title", sa.String(), nullable=False, index=True), + sa.Column("initial_connectors", sa.ARRAY(sa.String()), nullable=True), + sa.Column("messages", sa.JSON(), nullable=False), + sa.Column("state_version", sa.BigInteger(), nullable=False, default=1), + sa.Column( + "search_space_id", + sa.Integer(), + sa.ForeignKey("searchspaces.id", ondelete="CASCADE"), + nullable=False, + ), + sa.Column( + "created_at", + sa.TIMESTAMP(timezone=True), + nullable=False, + server_default=sa.func.now(), + ), + ) + + # Recreate podcasts table + op.create_table( + "podcasts", + sa.Column("id", sa.Integer(), primary_key=True, index=True), + sa.Column("title", sa.String(), nullable=False, index=True), + sa.Column("podcast_transcript", sa.JSON(), nullable=False, server_default="{}"), + sa.Column("file_location", sa.String(500), nullable=False, server_default=""), + sa.Column( + "chat_id", + sa.Integer(), + sa.ForeignKey("chats.id", ondelete="CASCADE"), + nullable=True, + ), + sa.Column("chat_state_version", sa.BigInteger(), nullable=True), + sa.Column( + "search_space_id", + sa.Integer(), + sa.ForeignKey("searchspaces.id", ondelete="CASCADE"), + nullable=False, + ), + sa.Column( + "created_at", + sa.TIMESTAMP(timezone=True), + nullable=False, + server_default=sa.func.now(), + ), + ) + + print("[Migration 49 Downgrade] Tables recreated (data not restored)") diff --git a/surfsense_backend/alembic/versions/50_remove_podcast_chat_columns.py b/surfsense_backend/alembic/versions/50_remove_podcast_chat_columns.py new file mode 100644 index 000000000..c0c9e741b --- /dev/null +++ b/surfsense_backend/alembic/versions/50_remove_podcast_chat_columns.py @@ -0,0 +1,48 @@ +"""50_remove_podcast_chat_columns + +Revision ID: 50 +Revises: 49 +Create Date: 2025-12-21 + +Removes chat_id and chat_state_version columns from podcasts table. +These columns were used for the old chat system podcast linking which +has been replaced by the new-chat content-based podcast generation. +""" + +from collections.abc import Sequence + +import sqlalchemy as sa +from sqlalchemy import inspect + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "50" +down_revision: str | None = "49" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Upgrade schema - Remove chat_id and chat_state_version from podcasts.""" + conn = op.get_bind() + inspector = inspect(conn) + columns = [col["name"] for col in inspector.get_columns("podcasts")] + + if "chat_id" in columns: + op.drop_column("podcasts", "chat_id") + + if "chat_state_version" in columns: + op.drop_column("podcasts", "chat_state_version") + + +def downgrade() -> None: + """Downgrade schema - Re-add chat_id and chat_state_version to podcasts.""" + op.add_column( + "podcasts", + sa.Column("chat_id", sa.Integer(), nullable=True), + ) + op.add_column( + "podcasts", + sa.Column("chat_state_version", sa.String(100), nullable=True), + ) diff --git a/surfsense_backend/alembic/versions/51_add_new_llm_config_table.py b/surfsense_backend/alembic/versions/51_add_new_llm_config_table.py new file mode 100644 index 000000000..89a5c1246 --- /dev/null +++ b/surfsense_backend/alembic/versions/51_add_new_llm_config_table.py @@ -0,0 +1,114 @@ +"""Add NewLLMConfig table for configurable LLM + prompt settings + +Revision ID: 51 +Revises: 50 +""" + +from collections.abc import Sequence + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "51" +down_revision: str | None = "50" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """ + Add the new_llm_configs table that combines LLM model settings with prompt configuration. + + This table includes: + - LLM model configuration (provider, model_name, api_key, etc.) + - Configurable system instructions + - Citation toggle + """ + # Create new_llm_configs table only if it doesn't already exist + op.execute( + """ + DO $$ + BEGIN + IF NOT EXISTS ( + SELECT FROM information_schema.tables + WHERE table_name = 'new_llm_configs' + ) THEN + CREATE TABLE new_llm_configs ( + id SERIAL PRIMARY KEY, + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), + + -- Basic info + name VARCHAR(100) NOT NULL, + description VARCHAR(500), + + -- LLM Model Configuration (same as llm_configs, excluding language) + provider litellmprovider NOT NULL, + custom_provider VARCHAR(100), + model_name VARCHAR(100) NOT NULL, + api_key TEXT NOT NULL, + api_base VARCHAR(500), + litellm_params JSONB DEFAULT '{}', + + -- Prompt Configuration + system_instructions TEXT NOT NULL DEFAULT '', + use_default_system_instructions BOOLEAN NOT NULL DEFAULT TRUE, + citations_enabled BOOLEAN NOT NULL DEFAULT TRUE, + + -- Default flag + is_default BOOLEAN NOT NULL DEFAULT FALSE, + + -- Foreign key to search space + search_space_id INTEGER NOT NULL REFERENCES searchspaces(id) ON DELETE CASCADE + ); + END IF; + END$$; + """ + ) + + # Create indexes if they don't exist + op.execute( + """ + DO $$ + BEGIN + IF NOT EXISTS ( + SELECT 1 FROM pg_indexes + WHERE tablename = 'new_llm_configs' AND indexname = 'ix_new_llm_configs_id' + ) THEN + CREATE INDEX ix_new_llm_configs_id ON new_llm_configs(id); + END IF; + + IF NOT EXISTS ( + SELECT 1 FROM pg_indexes + WHERE tablename = 'new_llm_configs' AND indexname = 'ix_new_llm_configs_created_at' + ) THEN + CREATE INDEX ix_new_llm_configs_created_at ON new_llm_configs(created_at); + END IF; + + IF NOT EXISTS ( + SELECT 1 FROM pg_indexes + WHERE tablename = 'new_llm_configs' AND indexname = 'ix_new_llm_configs_name' + ) THEN + CREATE INDEX ix_new_llm_configs_name ON new_llm_configs(name); + END IF; + + IF NOT EXISTS ( + SELECT 1 FROM pg_indexes + WHERE tablename = 'new_llm_configs' AND indexname = 'ix_new_llm_configs_search_space_id' + ) THEN + CREATE INDEX ix_new_llm_configs_search_space_id ON new_llm_configs(search_space_id); + END IF; + END$$; + """ + ) + + +def downgrade() -> None: + """Remove the new_llm_configs table.""" + # Drop indexes + op.execute("DROP INDEX IF EXISTS ix_new_llm_configs_search_space_id") + op.execute("DROP INDEX IF EXISTS ix_new_llm_configs_name") + op.execute("DROP INDEX IF EXISTS ix_new_llm_configs_created_at") + op.execute("DROP INDEX IF EXISTS ix_new_llm_configs_id") + + # Drop table + op.execute("DROP TABLE IF EXISTS new_llm_configs") diff --git a/surfsense_backend/alembic/versions/52_rename_llm_preference_columns.py b/surfsense_backend/alembic/versions/52_rename_llm_preference_columns.py new file mode 100644 index 000000000..cd1a1dbbc --- /dev/null +++ b/surfsense_backend/alembic/versions/52_rename_llm_preference_columns.py @@ -0,0 +1,130 @@ +"""Rename LLM preference columns in searchspaces table + +Revision ID: 52 +Revises: 51 +Create Date: 2024-12-22 + +This migration renames the LLM preference columns: +- fast_llm_id -> agent_llm_id +- long_context_llm_id -> document_summary_llm_id +- strategic_llm_id is removed (data migrated to document_summary_llm_id) +""" + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "52" +down_revision = "51" +branch_labels = None +depends_on = None + + +def upgrade(): + # First, migrate any strategic_llm_id values to document_summary_llm_id + # (only if document_summary_llm_id/long_context_llm_id is NULL) + # Use IF EXISTS check to handle case where column might not exist + op.execute( + """ + DO $$ + BEGIN + IF EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'searchspaces' AND column_name = 'strategic_llm_id' + ) THEN + UPDATE searchspaces + SET long_context_llm_id = strategic_llm_id + WHERE long_context_llm_id IS NULL AND strategic_llm_id IS NOT NULL; + END IF; + END$$; + """ + ) + + # Rename columns (only if they exist with old names) + op.execute( + """ + DO $$ + BEGIN + IF EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'searchspaces' AND column_name = 'fast_llm_id' + ) THEN + ALTER TABLE searchspaces RENAME COLUMN fast_llm_id TO agent_llm_id; + END IF; + END$$; + """ + ) + + op.execute( + """ + DO $$ + BEGIN + IF EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'searchspaces' AND column_name = 'long_context_llm_id' + ) THEN + ALTER TABLE searchspaces RENAME COLUMN long_context_llm_id TO document_summary_llm_id; + END IF; + END$$; + """ + ) + + # Drop the strategic_llm_id column if it exists + op.execute( + """ + DO $$ + BEGIN + IF EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'searchspaces' AND column_name = 'strategic_llm_id' + ) THEN + ALTER TABLE searchspaces DROP COLUMN strategic_llm_id; + END IF; + END$$; + """ + ) + + +def downgrade(): + # Add back the strategic_llm_id column + op.execute( + """ + DO $$ + BEGIN + IF NOT EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'searchspaces' AND column_name = 'strategic_llm_id' + ) THEN + ALTER TABLE searchspaces ADD COLUMN strategic_llm_id INTEGER; + END IF; + END$$; + """ + ) + + # Rename columns back + op.execute( + """ + DO $$ + BEGIN + IF EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'searchspaces' AND column_name = 'agent_llm_id' + ) THEN + ALTER TABLE searchspaces RENAME COLUMN agent_llm_id TO fast_llm_id; + END IF; + END$$; + """ + ) + + op.execute( + """ + DO $$ + BEGIN + IF EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'searchspaces' AND column_name = 'document_summary_llm_id' + ) THEN + ALTER TABLE searchspaces RENAME COLUMN document_summary_llm_id TO long_context_llm_id; + END IF; + END$$; + """ + ) diff --git a/surfsense_backend/alembic/versions/53_cleanup_old_llm_configs.py b/surfsense_backend/alembic/versions/53_cleanup_old_llm_configs.py new file mode 100644 index 000000000..16f5779be --- /dev/null +++ b/surfsense_backend/alembic/versions/53_cleanup_old_llm_configs.py @@ -0,0 +1,244 @@ +"""Migrate data from old llm_configs to new_llm_configs and cleanup + +Revision ID: 53 +Revises: 52 +Create Date: 2024-12-22 + +This migration: +1. Migrates data from old llm_configs table to new_llm_configs (preserving user configs) +2. Drops the old llm_configs table (no longer used) +3. Removes the is_default column from new_llm_configs (roles now determine which config to use) +""" + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "53" +down_revision = "52" +branch_labels = None +depends_on = None + + +def upgrade(): + # STEP 1: Migrate data from old llm_configs to new_llm_configs + # This preserves any user-created configurations + op.execute( + """ + DO $$ + BEGIN + -- Only migrate if both tables exist + IF EXISTS ( + SELECT FROM information_schema.tables + WHERE table_name = 'llm_configs' + ) AND EXISTS ( + SELECT FROM information_schema.tables + WHERE table_name = 'new_llm_configs' + ) THEN + -- Insert old configs into new table (skipping duplicates by name+search_space_id) + INSERT INTO new_llm_configs ( + name, + description, + provider, + custom_provider, + model_name, + api_key, + api_base, + litellm_params, + system_instructions, + use_default_system_instructions, + citations_enabled, + is_default, + search_space_id, + created_at + ) + SELECT + lc.name, + NULL as description, -- Old table didn't have description + lc.provider, + lc.custom_provider, + lc.model_name, + lc.api_key, + lc.api_base, + COALESCE(lc.litellm_params, '{}'::jsonb), + '' as system_instructions, -- Use defaults + TRUE as use_default_system_instructions, + TRUE as citations_enabled, + FALSE as is_default, + lc.search_space_id, + COALESCE(lc.created_at, NOW()) + FROM llm_configs lc + WHERE lc.search_space_id IS NOT NULL + AND NOT EXISTS ( + -- Skip if a config with same name already exists in new_llm_configs for this search space + SELECT 1 FROM new_llm_configs nlc + WHERE nlc.name = lc.name + AND nlc.search_space_id = lc.search_space_id + ); + + -- Log how many configs were migrated + RAISE NOTICE 'Migrated % configs from llm_configs to new_llm_configs', + (SELECT COUNT(*) FROM llm_configs WHERE search_space_id IS NOT NULL); + END IF; + END$$; + """ + ) + + # STEP 2: Update searchspaces to point to new_llm_configs for their agent LLM + # If a search space had an agent_llm_id pointing to old llm_configs, + # try to find the corresponding config in new_llm_configs + op.execute( + """ + DO $$ + BEGIN + IF EXISTS ( + SELECT FROM information_schema.tables + WHERE table_name = 'llm_configs' + ) THEN + -- Update agent_llm_id to point to migrated config in new_llm_configs + UPDATE searchspaces ss + SET agent_llm_id = ( + SELECT nlc.id + FROM new_llm_configs nlc + JOIN llm_configs lc ON lc.name = nlc.name AND lc.search_space_id = nlc.search_space_id + WHERE lc.id = ss.agent_llm_id + AND nlc.search_space_id = ss.id + LIMIT 1 + ) + WHERE ss.agent_llm_id IS NOT NULL + AND ss.agent_llm_id > 0 -- Only positive IDs (not global configs) + AND EXISTS ( + SELECT 1 FROM llm_configs lc WHERE lc.id = ss.agent_llm_id + ); + + -- Update document_summary_llm_id similarly + UPDATE searchspaces ss + SET document_summary_llm_id = ( + SELECT nlc.id + FROM new_llm_configs nlc + JOIN llm_configs lc ON lc.name = nlc.name AND lc.search_space_id = nlc.search_space_id + WHERE lc.id = ss.document_summary_llm_id + AND nlc.search_space_id = ss.id + LIMIT 1 + ) + WHERE ss.document_summary_llm_id IS NOT NULL + AND ss.document_summary_llm_id > 0 -- Only positive IDs (not global configs) + AND EXISTS ( + SELECT 1 FROM llm_configs lc WHERE lc.id = ss.document_summary_llm_id + ); + END IF; + END$$; + """ + ) + + # STEP 3: Drop the is_default column from new_llm_configs + # (role assignments now determine which config to use) + op.execute( + """ + DO $$ + BEGIN + IF EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'new_llm_configs' AND column_name = 'is_default' + ) THEN + ALTER TABLE new_llm_configs DROP COLUMN is_default; + END IF; + END$$; + """ + ) + + # STEP 4: Drop the old llm_configs table (data has been migrated) + op.execute("DROP TABLE IF EXISTS llm_configs CASCADE") + + +def downgrade(): + # Recreate the old llm_configs table + op.execute( + """ + DO $$ + BEGIN + IF NOT EXISTS ( + SELECT FROM information_schema.tables + WHERE table_name = 'llm_configs' + ) THEN + CREATE TABLE llm_configs ( + id SERIAL PRIMARY KEY, + name VARCHAR(100) NOT NULL, + provider litellmprovider NOT NULL, + custom_provider VARCHAR(100), + model_name VARCHAR(100) NOT NULL, + api_key TEXT NOT NULL, + api_base VARCHAR(500), + language VARCHAR(50), + litellm_params JSONB DEFAULT '{}', + search_space_id INTEGER REFERENCES searchspaces(id) ON DELETE CASCADE, + created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + updated_at TIMESTAMP WITH TIME ZONE + ); + + -- Create indexes + CREATE INDEX IF NOT EXISTS ix_llm_configs_id ON llm_configs(id); + CREATE INDEX IF NOT EXISTS ix_llm_configs_name ON llm_configs(name); + CREATE INDEX IF NOT EXISTS ix_llm_configs_created_at ON llm_configs(created_at); + END IF; + END$$; + """ + ) + + # Migrate data back from new_llm_configs to llm_configs + op.execute( + """ + DO $$ + BEGIN + IF EXISTS ( + SELECT FROM information_schema.tables + WHERE table_name = 'new_llm_configs' + ) THEN + INSERT INTO llm_configs ( + name, + provider, + custom_provider, + model_name, + api_key, + api_base, + language, + litellm_params, + search_space_id, + created_at + ) + SELECT + nlc.name, + nlc.provider, + nlc.custom_provider, + nlc.model_name, + nlc.api_key, + nlc.api_base, + 'English' as language, -- Default language + COALESCE(nlc.litellm_params, '{}'::jsonb), + nlc.search_space_id, + nlc.created_at + FROM new_llm_configs nlc + WHERE nlc.search_space_id IS NOT NULL + AND NOT EXISTS ( + SELECT 1 FROM llm_configs lc + WHERE lc.name = nlc.name + AND lc.search_space_id = nlc.search_space_id + ); + END IF; + END$$; + """ + ) + + # Add back the is_default column to new_llm_configs + op.execute( + """ + DO $$ + BEGIN + IF NOT EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'new_llm_configs' AND column_name = 'is_default' + ) THEN + ALTER TABLE new_llm_configs ADD COLUMN is_default BOOLEAN NOT NULL DEFAULT FALSE; + END IF; + END$$; + """ + ) diff --git a/surfsense_backend/app/agents/new_chat/__init__.py b/surfsense_backend/app/agents/new_chat/__init__.py index 45edb2362..eccb7a5c3 100644 --- a/surfsense_backend/app/agents/new_chat/__init__.py +++ b/surfsense_backend/app/agents/new_chat/__init__.py @@ -1,27 +1,80 @@ -"""Chat agents module.""" +""" +SurfSense New Chat Agent Module. -from app.agents.new_chat.chat_deepagent import ( +This module provides the SurfSense deep agent with configurable tools +for knowledge base search, podcast generation, and more. + +Directory Structure: +- tools/: All agent tools (knowledge_base, podcast, link_preview, etc.) +- chat_deepagent.py: Main agent factory +- system_prompt.py: System prompts and instructions +- context.py: Context schema for the agent +- checkpointer.py: LangGraph checkpointer setup +- llm_config.py: LLM configuration utilities +- utils.py: Shared utilities +""" + +# Agent factory +from .chat_deepagent import create_surfsense_deep_agent + +# Context +from .context import SurfSenseContextSchema + +# LLM config +from .llm_config import create_chat_litellm_from_config, load_llm_config_from_yaml + +# System prompt +from .system_prompt import ( SURFSENSE_CITATION_INSTRUCTIONS, SURFSENSE_SYSTEM_PROMPT, - SurfSenseContextSchema, build_surfsense_system_prompt, - create_chat_litellm_from_config, +) + +# Tools - registry exports +# Tools - factory exports (for direct use) +# Tools - knowledge base utilities +from .tools import ( + BUILTIN_TOOLS, + ToolDefinition, + build_tools, + create_display_image_tool, + create_generate_podcast_tool, + create_link_preview_tool, + create_scrape_webpage_tool, create_search_knowledge_base_tool, - create_surfsense_deep_agent, format_documents_for_context, - load_llm_config_from_yaml, + get_all_tool_names, + get_default_enabled_tools, + get_tool_by_name, search_knowledge_base_async, ) __all__ = [ + # Tools registry + "BUILTIN_TOOLS", + # System prompt "SURFSENSE_CITATION_INSTRUCTIONS", "SURFSENSE_SYSTEM_PROMPT", + # Context "SurfSenseContextSchema", + "ToolDefinition", "build_surfsense_system_prompt", + "build_tools", + # LLM config "create_chat_litellm_from_config", + # Tool factories + "create_display_image_tool", + "create_generate_podcast_tool", + "create_link_preview_tool", + "create_scrape_webpage_tool", "create_search_knowledge_base_tool", + # Agent factory "create_surfsense_deep_agent", + # Knowledge base utilities "format_documents_for_context", + "get_all_tool_names", + "get_default_enabled_tools", + "get_tool_by_name", "load_llm_config_from_yaml", "search_knowledge_base_async", ] diff --git a/surfsense_backend/app/agents/new_chat/chat_deepagent.py b/surfsense_backend/app/agents/new_chat/chat_deepagent.py index 561fbd3cf..8fd5f3b71 100644 --- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py +++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py @@ -1,892 +1,28 @@ """ -Test script for create_deep_agent with ChatLiteLLM from global_llm_config.yaml +SurfSense deep agent implementation. -This demonstrates: -1. Loading LLM config from global_llm_config.yaml -2. Creating a ChatLiteLLM instance -3. Using context_schema to add custom state fields -4. Creating a search_knowledge_base tool similar to fetch_relevant_documents +This module provides the factory function for creating SurfSense deep agents +with configurable tools via the tools registry and configurable prompts +via NewLLMConfig. """ -import sys -from pathlib import Path +from collections.abc import Sequence -# Add parent directory to path so 'app' module can be found when running directly -_THIS_FILE = Path(__file__).resolve() -_BACKEND_ROOT = _THIS_FILE.parent.parent.parent.parent # surfsense_backend/ -if str(_BACKEND_ROOT) not in sys.path: - sys.path.insert(0, str(_BACKEND_ROOT)) - -import asyncio -import json -from datetime import UTC, datetime, timedelta -from typing import Any, TypedDict - -import yaml from deepagents import create_deep_agent -from langchain_core.messages import HumanMessage -from langchain_core.tools import tool +from langchain_core.tools import BaseTool from langchain_litellm import ChatLiteLLM +from langgraph.types import Checkpointer from sqlalchemy.ext.asyncio import AsyncSession -from app.db import async_session_maker +from app.agents.new_chat.context import SurfSenseContextSchema +from app.agents.new_chat.llm_config import AgentConfig +from app.agents.new_chat.system_prompt import ( + build_configurable_system_prompt, + build_surfsense_system_prompt, +) +from app.agents.new_chat.tools import build_tools from app.services.connector_service import ConnectorService -# ============================================================================= -# LLM Configuration Loading -# ============================================================================= - - -def load_llm_config_from_yaml(llm_config_id: int = -1) -> dict | None: - """ - Load a specific LLM config from global_llm_config.yaml. - - Args: - llm_config_id: The id of the config to load (default: -1) - - Returns: - LLM config dict or None if not found - """ - # Get the config file path - base_dir = Path(__file__).resolve().parent.parent.parent.parent - config_file = base_dir / "app" / "config" / "global_llm_config.yaml" - - # Fallback to example file if main config doesn't exist - if not config_file.exists(): - config_file = base_dir / "app" / "config" / "global_llm_config.example.yaml" - if not config_file.exists(): - print("Error: No global_llm_config.yaml or example file found") - return None - - try: - with open(config_file, encoding="utf-8") as f: - data = yaml.safe_load(f) - configs = data.get("global_llm_configs", []) - for cfg in configs: - if isinstance(cfg, dict) and cfg.get("id") == llm_config_id: - return cfg - - print(f"Error: Global LLM config id {llm_config_id} not found") - return None - except Exception as e: - print(f"Error loading config: {e}") - return None - - -def create_chat_litellm_from_config(llm_config: dict) -> ChatLiteLLM | None: - """ - Create a ChatLiteLLM instance from a global LLM config. - - Args: - llm_config: LLM configuration dictionary from YAML - - Returns: - ChatLiteLLM instance or None on error - """ - # Provider mapping (same as in llm_service.py) - provider_map = { - "OPENAI": "openai", - "ANTHROPIC": "anthropic", - "GROQ": "groq", - "COHERE": "cohere", - "GOOGLE": "gemini", - "OLLAMA": "ollama", - "MISTRAL": "mistral", - "AZURE_OPENAI": "azure", - "OPENROUTER": "openrouter", - "XAI": "xai", - "BEDROCK": "bedrock", - "VERTEX_AI": "vertex_ai", - "TOGETHER_AI": "together_ai", - "FIREWORKS_AI": "fireworks_ai", - "DEEPSEEK": "openai", - "ALIBABA_QWEN": "openai", - "MOONSHOT": "openai", - "ZHIPU": "openai", - } - - # Build the model string - if llm_config.get("custom_provider"): - model_string = f"{llm_config['custom_provider']}/{llm_config['model_name']}" - else: - provider = llm_config.get("provider", "").upper() - provider_prefix = provider_map.get(provider, provider.lower()) - model_string = f"{provider_prefix}/{llm_config['model_name']}" - - # Create ChatLiteLLM instance - litellm_kwargs = { - "model": model_string, - "api_key": llm_config.get("api_key"), - } - - # Add optional parameters - if llm_config.get("api_base"): - litellm_kwargs["api_base"] = llm_config["api_base"] - - # Add any additional litellm parameters - if llm_config.get("litellm_params"): - litellm_kwargs.update(llm_config["litellm_params"]) - - return ChatLiteLLM(**litellm_kwargs) - - -# ============================================================================= -# Custom Context Schema -# ============================================================================= - - -class SurfSenseContextSchema(TypedDict): - """ - Custom state schema for the SurfSense deep agent. - - This extends the default agent state with custom fields. - The default state already includes: - - messages: Conversation history - - todos: Task list from TodoListMiddleware - - files: Virtual filesystem from FilesystemMiddleware - - We're adding fields needed for knowledge base search: - - search_space_id: The user's search space ID - - db_session: Database session (injected at runtime) - - connector_service: Connector service instance (injected at runtime) - """ - - search_space_id: int - # These are runtime-injected and won't be serialized - # db_session and connector_service are passed when invoking the agent - - -# ============================================================================= -# Knowledge Base Search Tool -# ============================================================================= - -# Canonical connector values used internally by ConnectorService -_ALL_CONNECTORS: list[str] = [ - "EXTENSION", - "FILE", - "SLACK_CONNECTOR", - "NOTION_CONNECTOR", - "YOUTUBE_VIDEO", - "GITHUB_CONNECTOR", - "ELASTICSEARCH_CONNECTOR", - "LINEAR_CONNECTOR", - "JIRA_CONNECTOR", - "CONFLUENCE_CONNECTOR", - "CLICKUP_CONNECTOR", - "GOOGLE_CALENDAR_CONNECTOR", - "GOOGLE_GMAIL_CONNECTOR", - "DISCORD_CONNECTOR", - "AIRTABLE_CONNECTOR", - "TAVILY_API", - "SEARXNG_API", - "LINKUP_API", - "BAIDU_SEARCH_API", - "LUMA_CONNECTOR", - "NOTE", - "BOOKSTACK_CONNECTOR", - "CRAWLED_URL", -] - - -def _normalize_connectors(connectors_to_search: list[str] | None) -> list[str]: - """ - Normalize connectors provided by the model. - - - Accepts user-facing enums like WEBCRAWLER_CONNECTOR and maps them to canonical - ConnectorService types. - - Drops unknown values. - - If None/empty, defaults to searching across all known connectors. - """ - if not connectors_to_search: - return list(_ALL_CONNECTORS) - - normalized: list[str] = [] - for raw in connectors_to_search: - c = (raw or "").strip().upper() - if not c: - continue - if c == "WEBCRAWLER_CONNECTOR": - c = "CRAWLED_URL" - normalized.append(c) - - # de-dupe while preserving order + filter unknown - seen: set[str] = set() - out: list[str] = [] - for c in normalized: - if c in seen: - continue - if c not in _ALL_CONNECTORS: - continue - seen.add(c) - out.append(c) - return out if out else list(_ALL_CONNECTORS) - - -SURFSENSE_CITATION_INSTRUCTIONS = """ - -CRITICAL CITATION REQUIREMENTS: - -1. For EVERY piece of information you include from the documents, add a citation in the format [citation:chunk_id] where chunk_id is the exact value from the `` tag inside ``. -2. Make sure ALL factual statements from the documents have proper citations. -3. If multiple chunks support the same point, include all relevant citations [citation:chunk_id1], [citation:chunk_id2]. -4. You MUST use the exact chunk_id values from the `` attributes. Do not create your own citation numbers. -5. Every citation MUST be in the format [citation:chunk_id] where chunk_id is the exact chunk id value. -6. Never modify or change the chunk_id - always use the original values exactly as provided in the chunk tags. -7. Do not return citations as clickable links. -8. Never format citations as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only. -9. Citations must ONLY appear as [citation:chunk_id] or [citation:chunk_id1], [citation:chunk_id2] format - never with parentheses, hyperlinks, or other formatting. -10. Never make up chunk IDs. Only use chunk_id values that are explicitly provided in the `` tags. -11. If you are unsure about a chunk_id, do not include a citation rather than guessing or making one up. - - -The documents you receive are structured like this: - - - - 42 - GITHUB_CONNECTOR - <![CDATA[Some repo / file / issue title]]> - - - - - - - - - - -IMPORTANT: You MUST cite using the chunk ids (e.g. 123, 124). Do NOT cite document_id. - - - -- Every fact from the documents must have a citation in the format [citation:chunk_id] where chunk_id is the EXACT id value from a `` tag -- Citations should appear at the end of the sentence containing the information they support -- Multiple citations should be separated by commas: [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3] -- No need to return references section. Just citations in answer. -- NEVER create your own citation format - use the exact chunk_id values from the documents in the [citation:chunk_id] format -- NEVER format citations as clickable links or as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only -- NEVER make up chunk IDs if you are unsure about the chunk_id. It is better to omit the citation than to guess - - - -CORRECT citation formats: -- [citation:5] -- [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3] - -INCORRECT citation formats (DO NOT use): -- Using parentheses and markdown links: ([citation:5](https://github.com/MODSetter/SurfSense)) -- Using parentheses around brackets: ([citation:5]) -- Using hyperlinked text: [link to source 5](https://example.com) -- Using footnote style: ... library¹ -- Making up source IDs when source_id is unknown -- Using old IEEE format: [1], [2], [3] -- Using source types instead of IDs: [citation:GITHUB_CONNECTOR] instead of [citation:5] - - - -Based on your GitHub repositories and video content, Python's asyncio library provides tools for writing concurrent code using the async/await syntax [citation:5]. It's particularly useful for I/O-bound and high-level structured network code [citation:5]. - -The key advantage of asyncio is that it can improve performance by allowing other code to run while waiting for I/O operations to complete [citation:12]. This makes it excellent for scenarios like web scraping, API calls, database operations, or any situation where your program spends time waiting for external resources. - -However, from your video learning, it's important to note that asyncio is not suitable for CPU-bound tasks as it runs on a single thread [citation:12]. For computationally intensive work, you'd want to use multiprocessing instead. - - -""" - - -def _parse_date_or_datetime(value: str) -> datetime: - """ - Parse either an ISO date (YYYY-MM-DD) or ISO datetime into an aware UTC datetime. - - - If `value` is a date, interpret it as start-of-day in UTC. - - If `value` is a datetime without timezone, assume UTC. - """ - raw = (value or "").strip() - if not raw: - raise ValueError("Empty date string") - - # Date-only - if "T" not in raw: - d = datetime.fromisoformat(raw).date() - return datetime(d.year, d.month, d.day, tzinfo=UTC) - - # Datetime (may be naive) - dt = datetime.fromisoformat(raw) - if dt.tzinfo is None: - return dt.replace(tzinfo=UTC) - return dt.astimezone(UTC) - - -def _resolve_date_range( - start_date: datetime | None, - end_date: datetime | None, -) -> tuple[datetime, datetime]: - """ - Resolve a date range, defaulting to the last 2 years if not provided. - Ensures start_date <= end_date. - """ - resolved_end = end_date or datetime.now(UTC) - resolved_start = start_date or (resolved_end - timedelta(days=730)) - - if resolved_start > resolved_end: - resolved_start, resolved_end = resolved_end, resolved_start - - return resolved_start, resolved_end - - -def format_documents_for_context(documents: list[dict[str, Any]]) -> str: - """ - Format retrieved documents into a readable context string for the LLM. - - Args: - documents: List of document dictionaries from connector search - - Returns: - Formatted string with document contents and metadata - """ - if not documents: - return "" - - # Group chunks by document id (preferred) to produce the XML structure. - # - # IMPORTANT: ConnectorService returns **document-grouped** results of the form: - # { - # "document": {...}, - # "chunks": [{"chunk_id": 123, "content": "..."}, ...], - # "source": "NOTION_CONNECTOR" | "FILE" | ... - # } - # - # We must preserve chunk_id so citations like [citation:123] are possible. - grouped: dict[str, dict[str, Any]] = {} - - for doc in documents: - document_info = (doc.get("document") or {}) if isinstance(doc, dict) else {} - metadata = ( - (document_info.get("metadata") or {}) - if isinstance(document_info, dict) - else {} - ) - if not metadata and isinstance(doc, dict): - # Some result shapes may place metadata at the top level. - metadata = doc.get("metadata") or {} - - source = ( - (doc.get("source") if isinstance(doc, dict) else None) - or metadata.get("document_type") - or "UNKNOWN" - ) - - # Document identity (prefer document_id; otherwise fall back to type+title+url) - document_id_val = document_info.get("id") - title = ( - document_info.get("title") or metadata.get("title") or "Untitled Document" - ) - url = ( - metadata.get("url") - or metadata.get("source") - or metadata.get("page_url") - or "" - ) - - doc_key = ( - str(document_id_val) - if document_id_val is not None - else f"{source}::{title}::{url}" - ) - - if doc_key not in grouped: - grouped[doc_key] = { - "document_id": document_id_val - if document_id_val is not None - else doc_key, - "document_type": metadata.get("document_type") or source, - "title": title, - "url": url, - "metadata": metadata, - "chunks": [], - } - - # Prefer document-grouped chunks if available - chunks_list = doc.get("chunks") if isinstance(doc, dict) else None - if isinstance(chunks_list, list) and chunks_list: - for ch in chunks_list: - if not isinstance(ch, dict): - continue - chunk_id = ch.get("chunk_id") or ch.get("id") - content = (ch.get("content") or "").strip() - if not content: - continue - grouped[doc_key]["chunks"].append( - {"chunk_id": chunk_id, "content": content} - ) - continue - - # Fallback: treat this as a flat chunk-like object - if not isinstance(doc, dict): - continue - chunk_id = doc.get("chunk_id") or doc.get("id") - content = (doc.get("content") or "").strip() - if not content: - continue - grouped[doc_key]["chunks"].append({"chunk_id": chunk_id, "content": content}) - - # Render XML expected by citation instructions - parts: list[str] = [] - for g in grouped.values(): - metadata_json = json.dumps(g["metadata"], ensure_ascii=False) - - parts.append("") - parts.append("") - parts.append(f" {g['document_id']}") - parts.append(f" {g['document_type']}") - parts.append(f" <![CDATA[{g['title']}]]>") - parts.append(f" ") - parts.append(f" ") - parts.append("") - parts.append("") - parts.append("") - - for ch in g["chunks"]: - ch_content = ch["content"] - ch_id = ch["chunk_id"] - if ch_id is None: - parts.append(f" ") - else: - parts.append(f" ") - - parts.append("") - parts.append("") - parts.append("") - - return "\n".join(parts).strip() - - -async def search_knowledge_base_async( - query: str, - search_space_id: int, - db_session: AsyncSession, - connector_service: ConnectorService, - connectors_to_search: list[str] | None = None, - top_k: int = 10, - start_date: datetime | None = None, - end_date: datetime | None = None, -) -> str: - """ - Search the user's knowledge base for relevant documents. - - This is the async implementation that searches across multiple connectors. - - Args: - query: The search query - search_space_id: The user's search space ID - db_session: Database session - connector_service: Initialized connector service - connectors_to_search: Optional list of connector types to search. If omitted, searches all. - top_k: Number of results per connector - start_date: Optional start datetime (UTC) for filtering documents - end_date: Optional end datetime (UTC) for filtering documents - - Returns: - Formatted string with search results - """ - all_documents = [] - - # Resolve date range (default last 2 years) - resolved_start_date, resolved_end_date = _resolve_date_range( - start_date=start_date, - end_date=end_date, - ) - - connectors = _normalize_connectors(connectors_to_search) - - for connector in connectors: - try: - if connector == "YOUTUBE_VIDEO": - _, chunks = await connector_service.search_youtube( - user_query=query, - search_space_id=search_space_id, - top_k=top_k, - start_date=resolved_start_date, - end_date=resolved_end_date, - ) - all_documents.extend(chunks) - - elif connector == "EXTENSION": - _, chunks = await connector_service.search_extension( - user_query=query, - search_space_id=search_space_id, - top_k=top_k, - start_date=resolved_start_date, - end_date=resolved_end_date, - ) - all_documents.extend(chunks) - - elif connector == "CRAWLED_URL": - _, chunks = await connector_service.search_crawled_urls( - user_query=query, - search_space_id=search_space_id, - top_k=top_k, - start_date=resolved_start_date, - end_date=resolved_end_date, - ) - all_documents.extend(chunks) - - elif connector == "FILE": - _, chunks = await connector_service.search_files( - user_query=query, - search_space_id=search_space_id, - top_k=top_k, - start_date=resolved_start_date, - end_date=resolved_end_date, - ) - all_documents.extend(chunks) - - elif connector == "SLACK_CONNECTOR": - _, chunks = await connector_service.search_slack( - user_query=query, - search_space_id=search_space_id, - top_k=top_k, - start_date=resolved_start_date, - end_date=resolved_end_date, - ) - all_documents.extend(chunks) - - elif connector == "NOTION_CONNECTOR": - _, chunks = await connector_service.search_notion( - user_query=query, - search_space_id=search_space_id, - top_k=top_k, - start_date=resolved_start_date, - end_date=resolved_end_date, - ) - all_documents.extend(chunks) - - elif connector == "GITHUB_CONNECTOR": - _, chunks = await connector_service.search_github( - user_query=query, - search_space_id=search_space_id, - top_k=top_k, - start_date=resolved_start_date, - end_date=resolved_end_date, - ) - all_documents.extend(chunks) - - elif connector == "LINEAR_CONNECTOR": - _, chunks = await connector_service.search_linear( - user_query=query, - search_space_id=search_space_id, - top_k=top_k, - start_date=resolved_start_date, - end_date=resolved_end_date, - ) - all_documents.extend(chunks) - - elif connector == "TAVILY_API": - _, chunks = await connector_service.search_tavily( - user_query=query, - search_space_id=search_space_id, - top_k=top_k, - ) - all_documents.extend(chunks) - - elif connector == "SEARXNG_API": - _, chunks = await connector_service.search_searxng( - user_query=query, - search_space_id=search_space_id, - top_k=top_k, - ) - all_documents.extend(chunks) - - elif connector == "LINKUP_API": - # Keep behavior aligned with researcher: default "standard" - _, chunks = await connector_service.search_linkup( - user_query=query, - search_space_id=search_space_id, - mode="standard", - ) - all_documents.extend(chunks) - - elif connector == "BAIDU_SEARCH_API": - _, chunks = await connector_service.search_baidu( - user_query=query, - search_space_id=search_space_id, - top_k=top_k, - ) - all_documents.extend(chunks) - - elif connector == "DISCORD_CONNECTOR": - _, chunks = await connector_service.search_discord( - user_query=query, - search_space_id=search_space_id, - top_k=top_k, - start_date=resolved_start_date, - end_date=resolved_end_date, - ) - all_documents.extend(chunks) - - elif connector == "JIRA_CONNECTOR": - _, chunks = await connector_service.search_jira( - user_query=query, - search_space_id=search_space_id, - top_k=top_k, - start_date=resolved_start_date, - end_date=resolved_end_date, - ) - all_documents.extend(chunks) - - elif connector == "GOOGLE_CALENDAR_CONNECTOR": - _, chunks = await connector_service.search_google_calendar( - user_query=query, - search_space_id=search_space_id, - top_k=top_k, - start_date=resolved_start_date, - end_date=resolved_end_date, - ) - all_documents.extend(chunks) - - elif connector == "AIRTABLE_CONNECTOR": - _, chunks = await connector_service.search_airtable( - user_query=query, - search_space_id=search_space_id, - top_k=top_k, - start_date=resolved_start_date, - end_date=resolved_end_date, - ) - all_documents.extend(chunks) - - elif connector == "GOOGLE_GMAIL_CONNECTOR": - _, chunks = await connector_service.search_google_gmail( - user_query=query, - search_space_id=search_space_id, - top_k=top_k, - start_date=resolved_start_date, - end_date=resolved_end_date, - ) - all_documents.extend(chunks) - - elif connector == "CONFLUENCE_CONNECTOR": - _, chunks = await connector_service.search_confluence( - user_query=query, - search_space_id=search_space_id, - top_k=top_k, - start_date=resolved_start_date, - end_date=resolved_end_date, - ) - all_documents.extend(chunks) - - elif connector == "CLICKUP_CONNECTOR": - _, chunks = await connector_service.search_clickup( - user_query=query, - search_space_id=search_space_id, - top_k=top_k, - start_date=resolved_start_date, - end_date=resolved_end_date, - ) - all_documents.extend(chunks) - - elif connector == "LUMA_CONNECTOR": - _, chunks = await connector_service.search_luma( - user_query=query, - search_space_id=search_space_id, - top_k=top_k, - start_date=resolved_start_date, - end_date=resolved_end_date, - ) - all_documents.extend(chunks) - - elif connector == "ELASTICSEARCH_CONNECTOR": - _, chunks = await connector_service.search_elasticsearch( - user_query=query, - search_space_id=search_space_id, - top_k=top_k, - start_date=resolved_start_date, - end_date=resolved_end_date, - ) - all_documents.extend(chunks) - - elif connector == "NOTE": - _, chunks = await connector_service.search_notes( - user_query=query, - search_space_id=search_space_id, - top_k=top_k, - start_date=resolved_start_date, - end_date=resolved_end_date, - ) - all_documents.extend(chunks) - - elif connector == "BOOKSTACK_CONNECTOR": - _, chunks = await connector_service.search_bookstack( - user_query=query, - search_space_id=search_space_id, - top_k=top_k, - start_date=resolved_start_date, - end_date=resolved_end_date, - ) - all_documents.extend(chunks) - - except Exception as e: - print(f"Error searching connector {connector}: {e}") - continue - - # Deduplicate by content hash - seen_doc_ids: set[Any] = set() - seen_hashes: set[int] = set() - deduplicated: list[dict[str, Any]] = [] - for doc in all_documents: - doc_id = (doc.get("document", {}) or {}).get("id") - content = (doc.get("content", "") or "").strip() - content_hash = hash(content) - - if (doc_id and doc_id in seen_doc_ids) or content_hash in seen_hashes: - continue - - if doc_id: - seen_doc_ids.add(doc_id) - seen_hashes.add(content_hash) - deduplicated.append(doc) - - return format_documents_for_context(deduplicated) - - -def create_search_knowledge_base_tool( - search_space_id: int, - db_session: AsyncSession, - connector_service: ConnectorService, -): - """ - Factory function to create the search_knowledge_base tool with injected dependencies. - - Args: - search_space_id: The user's search space ID - db_session: Database session - connector_service: Initialized connector service - connectors_to_search: List of connector types to search - - Returns: - A configured tool function - """ - - @tool - async def search_knowledge_base( - query: str, - top_k: int = 10, - start_date: str | None = None, - end_date: str | None = None, - connectors_to_search: list[str] | None = None, - ) -> str: - """ - Search the user's personal knowledge base for relevant information. - - Use this tool to find documents, notes, files, web pages, and other content - that may help answer the user's question. - - IMPORTANT: - - If the user requests a specific source type (e.g. "my notes", "Slack messages"), - pass `connectors_to_search=[...]` using the enums below. - - If `connectors_to_search` is omitted/empty, the system will search broadly. - - ## Available connector enums for `connectors_to_search` - - - EXTENSION: "Web content saved via SurfSense browser extension" (personal browsing history) - - FILE: "User-uploaded documents (PDFs, Word, etc.)" (personal files) - - NOTE: "SurfSense Notes" (notes created inside SurfSense) - - SLACK_CONNECTOR: "Slack conversations and shared content" (personal workspace communications) - - NOTION_CONNECTOR: "Notion workspace pages and databases" (personal knowledge management) - - YOUTUBE_VIDEO: "YouTube video transcripts and metadata" (personally saved videos) - - GITHUB_CONNECTOR: "GitHub repository content and issues" (personal repositories and interactions) - - ELASTICSEARCH_CONNECTOR: "Elasticsearch indexed documents and data" (personal Elasticsearch instances and custom data sources) - - LINEAR_CONNECTOR: "Linear project issues and discussions" (personal project management) - - JIRA_CONNECTOR: "Jira project issues, tickets, and comments" (personal project tracking) - - CONFLUENCE_CONNECTOR: "Confluence pages and comments" (personal project documentation) - - CLICKUP_CONNECTOR: "ClickUp tasks and project data" (personal task management) - - GOOGLE_CALENDAR_CONNECTOR: "Google Calendar events, meetings, and schedules" (personal calendar and time management) - - GOOGLE_GMAIL_CONNECTOR: "Google Gmail emails and conversations" (personal emails and communications) - - DISCORD_CONNECTOR: "Discord server conversations and shared content" (personal community communications) - - AIRTABLE_CONNECTOR: "Airtable records, tables, and database content" (personal data management and organization) - - TAVILY_API: "Tavily search API results" (personalized search results) - - SEARXNG_API: "SearxNG search API results" (personalized search results) - - LINKUP_API: "Linkup search API results" (personalized search results) - - BAIDU_SEARCH_API: "Baidu search API results" (personalized search results) - - LUMA_CONNECTOR: "Luma events" - - WEBCRAWLER_CONNECTOR: "Webpages indexed by SurfSense" (personally selected websites) - - BOOKSTACK_CONNECTOR: "BookStack pages" (personal documentation) - - NOTE: `WEBCRAWLER_CONNECTOR` is mapped internally to the canonical document type `CRAWLED_URL`. - - Args: - query: The search query - be specific and include key terms - top_k: Number of results to retrieve (default: 10) - start_date: Optional ISO date/datetime (e.g. "2025-12-12" or "2025-12-12T00:00:00+00:00") - end_date: Optional ISO date/datetime (e.g. "2025-12-19" or "2025-12-19T23:59:59+00:00") - connectors_to_search: Optional list of connector enums to search. If omitted, searches all. - - Returns: - Formatted string with relevant documents and their content - """ - parsed_start: datetime | None = None - parsed_end: datetime | None = None - - if start_date: - parsed_start = _parse_date_or_datetime(start_date) - if end_date: - parsed_end = _parse_date_or_datetime(end_date) - - return await search_knowledge_base_async( - query=query, - search_space_id=search_space_id, - db_session=db_session, - connector_service=connector_service, - connectors_to_search=connectors_to_search, - top_k=top_k, - start_date=parsed_start, - end_date=parsed_end, - ) - - return search_knowledge_base - - -# ============================================================================= -# System Prompt -# ============================================================================= - - -def build_surfsense_system_prompt(today: datetime | None = None) -> str: - resolved_today = (today or datetime.now(UTC)).astimezone(UTC).date().isoformat() - - return f""" - -You are SurfSense, a reasoning and acting AI agent designed to answer user questions using the user's personal knowledge base. - -Today's date (UTC): {resolved_today} - - - -You have access to the following tools: -- search_knowledge_base: Search the user's personal knowledge base for relevant information. - - Args: - - query: The search query - be specific and include key terms - - top_k: Number of results to retrieve (default: 10) - - start_date: Optional ISO date/datetime (e.g. "2025-12-12" or "2025-12-12T00:00:00+00:00") - - end_date: Optional ISO date/datetime (e.g. "2025-12-19" or "2025-12-19T23:59:59+00:00") - - connectors_to_search: Optional list of connector enums to search. If omitted, searches all. - - Returns: Formatted string with relevant documents and their content - - -- User: "Fetch all my notes and what's in them?" - - Call: `search_knowledge_base(query="*", top_k=50, connectors_to_search=["NOTE"])` - -- User: "What did I discuss on Slack last week about the React migration?" - - Call: `search_knowledge_base(query="React migration", connectors_to_search=["SLACK_CONNECTOR"], start_date="YYYY-MM-DD", end_date="YYYY-MM-DD")` - - -{SURFSENSE_CITATION_INSTRUCTIONS} -""" - - -SURFSENSE_SYSTEM_PROMPT = build_surfsense_system_prompt() - - # ============================================================================= # Deep Agent Factory # ============================================================================= @@ -897,102 +33,117 @@ def create_surfsense_deep_agent( search_space_id: int, db_session: AsyncSession, connector_service: ConnectorService, + checkpointer: Checkpointer, + agent_config: AgentConfig | None = None, + enabled_tools: list[str] | None = None, + disabled_tools: list[str] | None = None, + additional_tools: Sequence[BaseTool] | None = None, + firecrawl_api_key: str | None = None, ): """ - Create a SurfSense deep agent with knowledge base search capability. + Create a SurfSense deep agent with configurable tools and prompts. + + The agent comes with built-in tools that can be configured: + - search_knowledge_base: Search the user's personal knowledge base + - generate_podcast: Generate audio podcasts from content + - link_preview: Fetch rich previews for URLs + - display_image: Display images in chat + - scrape_webpage: Extract content from webpages + + The system prompt can be configured via agent_config: + - Custom system instructions (or use defaults) + - Citation toggle (enable/disable citation requirements) Args: - llm: ChatLiteLLM instance + llm: ChatLiteLLM instance for the agent's language model search_space_id: The user's search space ID - db_session: Database session - connector_service: Initialized connector service - connectors_to_search: List of connector types to search (default: common connectors) + db_session: Database session for tools that need DB access + connector_service: Initialized connector service for knowledge base search + checkpointer: LangGraph checkpointer for conversation state persistence. + Use AsyncPostgresSaver for production or MemorySaver for testing. + agent_config: Optional AgentConfig from NewLLMConfig for prompt configuration. + If None, uses default system prompt with citations enabled. + enabled_tools: Explicit list of tool names to enable. If None, all default tools + are enabled. Use this to limit which tools are available. + disabled_tools: List of tool names to disable. Applied after enabled_tools. + Use this to exclude specific tools from the defaults. + additional_tools: Extra custom tools to add beyond the built-in ones. + These are always added regardless of enabled/disabled settings. + firecrawl_api_key: Optional Firecrawl API key for premium web scraping. + Falls back to Chromium/Trafilatura if not provided. Returns: CompiledStateGraph: The configured deep agent + + Examples: + # Create agent with all default tools and default prompt + agent = create_surfsense_deep_agent(llm, search_space_id, db_session, ...) + + # Create agent with custom prompt configuration + agent = create_surfsense_deep_agent( + llm, search_space_id, db_session, ..., + agent_config=AgentConfig( + provider="OPENAI", + model_name="gpt-4", + api_key="...", + system_instructions="Custom instructions...", + citations_enabled=False, + ) + ) + + # Create agent with only specific tools + agent = create_surfsense_deep_agent( + llm, search_space_id, db_session, ..., + enabled_tools=["search_knowledge_base", "link_preview"] + ) + + # Create agent without podcast generation + agent = create_surfsense_deep_agent( + llm, search_space_id, db_session, ..., + disabled_tools=["generate_podcast"] + ) + + # Add custom tools + agent = create_surfsense_deep_agent( + llm, search_space_id, db_session, ..., + additional_tools=[my_custom_tool] + ) """ - # Create the search tool with injected dependencies - search_tool = create_search_knowledge_base_tool( - search_space_id=search_space_id, - db_session=db_session, - connector_service=connector_service, + # Build dependencies dict for the tools registry + dependencies = { + "search_space_id": search_space_id, + "db_session": db_session, + "connector_service": connector_service, + "firecrawl_api_key": firecrawl_api_key, + } + + # Build tools using the registry + tools = build_tools( + dependencies=dependencies, + enabled_tools=enabled_tools, + disabled_tools=disabled_tools, + additional_tools=list(additional_tools) if additional_tools else None, ) - # Create the deep agent + # Build system prompt based on agent_config + if agent_config is not None: + # Use configurable prompt with settings from NewLLMConfig + system_prompt = build_configurable_system_prompt( + custom_system_instructions=agent_config.system_instructions, + use_default_system_instructions=agent_config.use_default_system_instructions, + citations_enabled=agent_config.citations_enabled, + ) + else: + # Use default prompt (with citations enabled) + system_prompt = build_surfsense_system_prompt() + + # Create the deep agent with system prompt and checkpointer agent = create_deep_agent( model=llm, - tools=[search_tool], - system_prompt=build_surfsense_system_prompt(), + tools=tools, + system_prompt=system_prompt, context_schema=SurfSenseContextSchema, + checkpointer=checkpointer, ) return agent - - -# ============================================================================= -# Test Runner -# ============================================================================= - - -async def run_test(): - """Run a basic test of the deep agent.""" - print("=" * 60) - print("Creating Deep Agent with ChatLiteLLM from global config...") - print("=" * 60) - - # Create ChatLiteLLM from global config - # Use global LLM config by id (negative ids are reserved for global configs) - llm_config = load_llm_config_from_yaml(llm_config_id=-2) - if not llm_config: - raise ValueError("Failed to load LLM config from YAML") - llm = create_chat_litellm_from_config(llm_config) - if not llm: - raise ValueError("Failed to create ChatLiteLLM instance") - - # Create a real DB session + ConnectorService, then build the full SurfSense agent. - async with async_session_maker() as session: - # Use the known dev search space id - search_space_id = 5 - - connector_service = ConnectorService(session, search_space_id=search_space_id) - - agent = create_surfsense_deep_agent( - llm=llm, - search_space_id=search_space_id, - db_session=session, - connector_service=connector_service, - ) - - print("\nAgent created successfully!") - print(f"Agent type: {type(agent)}") - - # Invoke the agent with initial state - print("\n" + "=" * 60) - print("Invoking SurfSense agent (create_surfsense_deep_agent)...") - print("=" * 60) - - initial_state = { - "messages": [HumanMessage(content=("What are my notes from last 3 days?"))], - "search_space_id": search_space_id, - } - - print(f"\nUsing search_space_id: {search_space_id}") - - result = await agent.ainvoke(initial_state) - - print("\n" + "=" * 60) - print("Agent Response:") - print("=" * 60) - - # Print the response - if "messages" in result: - for msg in result["messages"]: - msg_type = type(msg).__name__ - content = msg.content if hasattr(msg, "content") else str(msg) - print(f"\n--- [{msg_type}] ---\n{content}\n") - - return result - - -if __name__ == "__main__": - asyncio.run(run_test()) diff --git a/surfsense_backend/app/agents/new_chat/checkpointer.py b/surfsense_backend/app/agents/new_chat/checkpointer.py new file mode 100644 index 000000000..637b2926f --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/checkpointer.py @@ -0,0 +1,94 @@ +""" +PostgreSQL-based checkpointer for LangGraph agents. + +This module provides a persistent checkpointer using AsyncPostgresSaver +that stores conversation state in the PostgreSQL database. +""" + +from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver + +from app.config import config + +# Global checkpointer instance (initialized lazily) +_checkpointer: AsyncPostgresSaver | None = None +_checkpointer_context = None # Store the context manager for cleanup +_checkpointer_initialized: bool = False + + +def get_postgres_connection_string() -> str: + """ + Convert the async DATABASE_URL to a sync postgres connection string for psycopg3. + + The DATABASE_URL is typically in format: + postgresql+asyncpg://user:pass@host:port/dbname + + We need to convert it to: + postgresql://user:pass@host:port/dbname + """ + db_url = config.DATABASE_URL + + # Handle asyncpg driver prefix + if db_url.startswith("postgresql+asyncpg://"): + return db_url.replace("postgresql+asyncpg://", "postgresql://") + + # Handle other async prefixes + if "+asyncpg" in db_url: + return db_url.replace("+asyncpg", "") + + return db_url + + +async def get_checkpointer() -> AsyncPostgresSaver: + """ + Get or create the global AsyncPostgresSaver instance. + + This function: + 1. Creates the checkpointer if it doesn't exist + 2. Sets up the required database tables on first call + 3. Returns the cached instance on subsequent calls + + Returns: + AsyncPostgresSaver: The configured checkpointer instance + """ + global _checkpointer, _checkpointer_context, _checkpointer_initialized + + if _checkpointer is None: + conn_string = get_postgres_connection_string() + # from_conn_string returns an async context manager + # We need to enter the context to get the actual checkpointer + _checkpointer_context = AsyncPostgresSaver.from_conn_string(conn_string) + _checkpointer = await _checkpointer_context.__aenter__() + + # Setup tables on first call (idempotent) + if not _checkpointer_initialized: + await _checkpointer.setup() + _checkpointer_initialized = True + + return _checkpointer + + +async def setup_checkpointer_tables() -> None: + """ + Explicitly setup the checkpointer tables. + + This can be called during application startup to ensure + tables exist before any agent calls. + """ + await get_checkpointer() + print("[Checkpointer] PostgreSQL checkpoint tables ready") + + +async def close_checkpointer() -> None: + """ + Close the checkpointer connection. + + This should be called during application shutdown. + """ + global _checkpointer, _checkpointer_context, _checkpointer_initialized + + if _checkpointer_context is not None: + await _checkpointer_context.__aexit__(None, None, None) + _checkpointer = None + _checkpointer_context = None + _checkpointer_initialized = False + print("[Checkpointer] PostgreSQL connection closed") diff --git a/surfsense_backend/app/agents/new_chat/context.py b/surfsense_backend/app/agents/new_chat/context.py new file mode 100644 index 000000000..da113adf4 --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/context.py @@ -0,0 +1,28 @@ +""" +Context schema definitions for SurfSense agents. + +This module defines the custom state schema used by the SurfSense deep agent. +""" + +from typing import TypedDict + + +class SurfSenseContextSchema(TypedDict): + """ + Custom state schema for the SurfSense deep agent. + + This extends the default agent state with custom fields. + The default state already includes: + - messages: Conversation history + - todos: Task list from TodoListMiddleware + - files: Virtual filesystem from FilesystemMiddleware + + We're adding fields needed for knowledge base search: + - search_space_id: The user's search space ID + - db_session: Database session (injected at runtime) + - connector_service: Connector service instance (injected at runtime) + """ + + search_space_id: int + # These are runtime-injected and won't be serialized + # db_session and connector_service are passed when invoking the agent diff --git a/surfsense_backend/app/agents/new_chat/llm_config.py b/surfsense_backend/app/agents/new_chat/llm_config.py new file mode 100644 index 000000000..a55ed79d3 --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/llm_config.py @@ -0,0 +1,361 @@ +""" +LLM configuration utilities for SurfSense agents. + +This module provides functions for loading LLM configurations from: +1. YAML files (global configs with negative IDs) +2. Database NewLLMConfig table (user-created configs with positive IDs) + +It also provides utilities for creating ChatLiteLLM instances and +managing prompt configurations. +""" + +from dataclasses import dataclass +from pathlib import Path + +import yaml +from langchain_litellm import ChatLiteLLM +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +# Provider mapping for LiteLLM model string construction +PROVIDER_MAP = { + "OPENAI": "openai", + "ANTHROPIC": "anthropic", + "GROQ": "groq", + "COHERE": "cohere", + "GOOGLE": "gemini", + "OLLAMA": "ollama", + "MISTRAL": "mistral", + "AZURE_OPENAI": "azure", + "OPENROUTER": "openrouter", + "XAI": "xai", + "BEDROCK": "bedrock", + "VERTEX_AI": "vertex_ai", + "TOGETHER_AI": "together_ai", + "FIREWORKS_AI": "fireworks_ai", + "DEEPSEEK": "openai", + "ALIBABA_QWEN": "openai", + "MOONSHOT": "openai", + "ZHIPU": "openai", + "REPLICATE": "replicate", + "PERPLEXITY": "perplexity", + "ANYSCALE": "anyscale", + "DEEPINFRA": "deepinfra", + "CEREBRAS": "cerebras", + "SAMBANOVA": "sambanova", + "AI21": "ai21", + "CLOUDFLARE": "cloudflare", + "DATABRICKS": "databricks", + "COMETAPI": "cometapi", + "HUGGINGFACE": "huggingface", + "CUSTOM": "custom", +} + + +@dataclass +class AgentConfig: + """ + Complete configuration for the SurfSense agent. + + This combines LLM settings with prompt configuration from NewLLMConfig. + """ + + # LLM Model Settings + provider: str + model_name: str + api_key: str + api_base: str | None = None + custom_provider: str | None = None + litellm_params: dict | None = None + + # Prompt Configuration + system_instructions: str | None = None + use_default_system_instructions: bool = True + citations_enabled: bool = True + + # Metadata + config_id: int | None = None + config_name: str | None = None + + @classmethod + def from_new_llm_config(cls, config) -> "AgentConfig": + """ + Create an AgentConfig from a NewLLMConfig database model. + + Args: + config: NewLLMConfig database model instance + + Returns: + AgentConfig instance + """ + return cls( + provider=config.provider.value + if hasattr(config.provider, "value") + else str(config.provider), + model_name=config.model_name, + api_key=config.api_key, + api_base=config.api_base, + custom_provider=config.custom_provider, + litellm_params=config.litellm_params, + system_instructions=config.system_instructions, + use_default_system_instructions=config.use_default_system_instructions, + citations_enabled=config.citations_enabled, + config_id=config.id, + config_name=config.name, + ) + + @classmethod + def from_yaml_config(cls, yaml_config: dict) -> "AgentConfig": + """ + Create an AgentConfig from a YAML configuration dictionary. + + YAML configs now support the same prompt configuration fields as NewLLMConfig: + - system_instructions: Custom system instructions (empty string uses defaults) + - use_default_system_instructions: Whether to use default instructions + - citations_enabled: Whether citations are enabled + + Args: + yaml_config: Configuration dictionary from YAML file + + Returns: + AgentConfig instance + """ + # Get system instructions from YAML, default to empty string + system_instructions = yaml_config.get("system_instructions", "") + + return cls( + provider=yaml_config.get("provider", "").upper(), + model_name=yaml_config.get("model_name", ""), + api_key=yaml_config.get("api_key", ""), + api_base=yaml_config.get("api_base"), + custom_provider=yaml_config.get("custom_provider"), + litellm_params=yaml_config.get("litellm_params"), + # Prompt configuration from YAML (with defaults for backwards compatibility) + system_instructions=system_instructions if system_instructions else None, + use_default_system_instructions=yaml_config.get( + "use_default_system_instructions", True + ), + citations_enabled=yaml_config.get("citations_enabled", True), + config_id=yaml_config.get("id"), + config_name=yaml_config.get("name"), + ) + + +def load_llm_config_from_yaml(llm_config_id: int = -1) -> dict | None: + """ + Load a specific LLM config from global_llm_config.yaml. + + Args: + llm_config_id: The id of the config to load (default: -1) + + Returns: + LLM config dict or None if not found + """ + # Get the config file path + base_dir = Path(__file__).resolve().parent.parent.parent.parent + config_file = base_dir / "app" / "config" / "global_llm_config.yaml" + + # Fallback to example file if main config doesn't exist + if not config_file.exists(): + config_file = base_dir / "app" / "config" / "global_llm_config.example.yaml" + if not config_file.exists(): + print("Error: No global_llm_config.yaml or example file found") + return None + + try: + with open(config_file, encoding="utf-8") as f: + data = yaml.safe_load(f) + configs = data.get("global_llm_configs", []) + for cfg in configs: + if isinstance(cfg, dict) and cfg.get("id") == llm_config_id: + return cfg + + print(f"Error: Global LLM config id {llm_config_id} not found") + return None + except Exception as e: + print(f"Error loading config: {e}") + return None + + +async def load_new_llm_config_from_db( + session: AsyncSession, + config_id: int, +) -> "AgentConfig | None": + """ + Load a NewLLMConfig from the database by ID. + + Args: + session: AsyncSession for database access + config_id: The ID of the NewLLMConfig to load + + Returns: + AgentConfig instance or None if not found + """ + # Import here to avoid circular imports + from app.db import NewLLMConfig + + try: + result = await session.execute( + select(NewLLMConfig).filter(NewLLMConfig.id == config_id) + ) + config = result.scalars().first() + + if not config: + print(f"Error: NewLLMConfig with id {config_id} not found") + return None + + return AgentConfig.from_new_llm_config(config) + except Exception as e: + print(f"Error loading NewLLMConfig from database: {e}") + return None + + +async def load_agent_llm_config_for_search_space( + session: AsyncSession, + search_space_id: int, +) -> "AgentConfig | None": + """ + Load the agent LLM configuration for a search space. + + This loads the LLM config based on the search space's agent_llm_id setting: + - Positive ID: Load from NewLLMConfig database table + - Negative ID: Load from YAML global configs + - None: Falls back to first global config (id=-1) + + Args: + session: AsyncSession for database access + search_space_id: The search space ID + + Returns: + AgentConfig instance or None if not found + """ + # Import here to avoid circular imports + from app.db import SearchSpace + + try: + # Get the search space to check its agent_llm_id preference + result = await session.execute( + select(SearchSpace).filter(SearchSpace.id == search_space_id) + ) + search_space = result.scalars().first() + + if not search_space: + print(f"Error: SearchSpace with id {search_space_id} not found") + return None + + # Use agent_llm_id from search space, fallback to -1 (first global config) + config_id = ( + search_space.agent_llm_id if search_space.agent_llm_id is not None else -1 + ) + + # Load the config using the unified loader + return await load_agent_config(session, config_id, search_space_id) + except Exception as e: + print(f"Error loading agent LLM config for search space {search_space_id}: {e}") + return None + + +async def load_agent_config( + session: AsyncSession, + config_id: int, + search_space_id: int | None = None, +) -> "AgentConfig | None": + """ + Load an agent configuration, supporting both YAML (negative IDs) and database (positive IDs) configs. + + This is the main entry point for loading configurations: + - Negative IDs: Load from YAML file (global configs) + - Positive IDs: Load from NewLLMConfig database table + + Args: + session: AsyncSession for database access + config_id: The config ID (negative for YAML, positive for database) + search_space_id: Optional search space ID for context + + Returns: + AgentConfig instance or None if not found + """ + if config_id < 0: + # Load from YAML (global configs have negative IDs) + yaml_config = load_llm_config_from_yaml(config_id) + if yaml_config: + return AgentConfig.from_yaml_config(yaml_config) + return None + else: + # Load from database (NewLLMConfig) + return await load_new_llm_config_from_db(session, config_id) + + +def create_chat_litellm_from_config(llm_config: dict) -> ChatLiteLLM | None: + """ + Create a ChatLiteLLM instance from a global LLM config dictionary. + + Args: + llm_config: LLM configuration dictionary from YAML + + Returns: + ChatLiteLLM instance or None on error + """ + # Build the model string + if llm_config.get("custom_provider"): + model_string = f"{llm_config['custom_provider']}/{llm_config['model_name']}" + else: + provider = llm_config.get("provider", "").upper() + provider_prefix = PROVIDER_MAP.get(provider, provider.lower()) + model_string = f"{provider_prefix}/{llm_config['model_name']}" + + # Create ChatLiteLLM instance with streaming enabled + litellm_kwargs = { + "model": model_string, + "api_key": llm_config.get("api_key"), + "streaming": True, # Enable streaming for real-time token streaming + } + + # Add optional parameters + if llm_config.get("api_base"): + litellm_kwargs["api_base"] = llm_config["api_base"] + + # Add any additional litellm parameters + if llm_config.get("litellm_params"): + litellm_kwargs.update(llm_config["litellm_params"]) + + return ChatLiteLLM(**litellm_kwargs) + + +def create_chat_litellm_from_agent_config( + agent_config: AgentConfig, +) -> ChatLiteLLM | None: + """ + Create a ChatLiteLLM instance from an AgentConfig. + + Args: + agent_config: AgentConfig instance + + Returns: + ChatLiteLLM instance or None on error + """ + # Build the model string + if agent_config.custom_provider: + model_string = f"{agent_config.custom_provider}/{agent_config.model_name}" + else: + provider_prefix = PROVIDER_MAP.get( + agent_config.provider, agent_config.provider.lower() + ) + model_string = f"{provider_prefix}/{agent_config.model_name}" + + # Create ChatLiteLLM instance with streaming enabled + litellm_kwargs = { + "model": model_string, + "api_key": agent_config.api_key, + "streaming": True, # Enable streaming for real-time token streaming + } + + # Add optional parameters + if agent_config.api_base: + litellm_kwargs["api_base"] = agent_config.api_base + + # Add any additional litellm parameters + if agent_config.litellm_params: + litellm_kwargs.update(agent_config.litellm_params) + + return ChatLiteLLM(**litellm_kwargs) diff --git a/surfsense_backend/app/agents/new_chat/system_prompt.py b/surfsense_backend/app/agents/new_chat/system_prompt.py new file mode 100644 index 000000000..91b4eee08 --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/system_prompt.py @@ -0,0 +1,346 @@ +""" +System prompt building for SurfSense agents. + +This module provides functions and constants for building the SurfSense system prompt +with configurable user instructions and citation support. + +The prompt is composed of three parts: +1. System Instructions (configurable via NewLLMConfig) +2. Tools Instructions (always included, not configurable) +3. Citation Instructions (toggleable via NewLLMConfig.citations_enabled) +""" + +from datetime import UTC, datetime + +# Default system instructions - can be overridden via NewLLMConfig.system_instructions +SURFSENSE_SYSTEM_INSTRUCTIONS = """ + +You are SurfSense, a reasoning and acting AI agent designed to answer user questions using the user's personal knowledge base. + +Today's date (UTC): {resolved_today} + + +""" + +SURFSENSE_TOOLS_INSTRUCTIONS = """ + +You have access to the following tools: + +1. search_knowledge_base: Search the user's personal knowledge base for relevant information. + - Args: + - query: The search query - be specific and include key terms + - top_k: Number of results to retrieve (default: 10) + - start_date: Optional ISO date/datetime (e.g. "2025-12-12" or "2025-12-12T00:00:00+00:00") + - end_date: Optional ISO date/datetime (e.g. "2025-12-19" or "2025-12-19T23:59:59+00:00") + - connectors_to_search: Optional list of connector enums to search. If omitted, searches all. + - Returns: Formatted string with relevant documents and their content + +2. generate_podcast: Generate an audio podcast from provided content. + - Use this when the user asks to create, generate, or make a podcast. + - Trigger phrases: "give me a podcast about", "create a podcast", "generate a podcast", "make a podcast", "turn this into a podcast" + - Args: + - source_content: The text content to convert into a podcast. This MUST be comprehensive and include: + * If discussing the current conversation: Include a detailed summary of the FULL chat history (all user questions and your responses) + * If based on knowledge base search: Include the key findings and insights from the search results + * You can combine both: conversation context + search results for richer podcasts + * The more detailed the source_content, the better the podcast quality + - podcast_title: Optional title for the podcast (default: "SurfSense Podcast") + - user_prompt: Optional instructions for podcast style/format (e.g., "Make it casual and fun") + - Returns: A task_id for tracking. The podcast will be generated in the background. + - IMPORTANT: Only one podcast can be generated at a time. If a podcast is already being generated, the tool will return status "already_generating". + - After calling this tool, inform the user that podcast generation has started and they will see the player when it's ready (takes 3-5 minutes). + +3. link_preview: Fetch metadata for a URL to display a rich preview card. + - IMPORTANT: Use this tool WHENEVER the user shares or mentions a URL/link in their message. + - This fetches the page's Open Graph metadata (title, description, thumbnail) to show a preview card. + - NOTE: This tool only fetches metadata, NOT the full page content. It cannot read the article text. + - Trigger scenarios: + * User shares a URL (e.g., "Check out https://example.com") + * User pastes a link in their message + * User asks about a URL or link + - Args: + - url: The URL to fetch metadata for (must be a valid HTTP/HTTPS URL) + - Returns: A rich preview card with title, description, thumbnail, and domain + - The preview card will automatically be displayed in the chat. + +4. display_image: Display an image in the chat with metadata. + - Use this tool when you want to show an image to the user. + - This displays the image with an optional title, description, and source attribution. + - Common use cases: + * Showing an image from a URL mentioned in the conversation + * Displaying a diagram, chart, or illustration you're referencing + * Showing visual examples when explaining concepts + - Args: + - src: The URL of the image to display (must be a valid HTTP/HTTPS image URL) + - alt: Alternative text describing the image (for accessibility) + - title: Optional title to display below the image + - description: Optional description providing context about the image + - Returns: An image card with the image, title, and description + - The image will automatically be displayed in the chat. + +5. scrape_webpage: Scrape and extract the main content from a webpage. + - Use this when the user wants you to READ and UNDERSTAND the actual content of a webpage. + - IMPORTANT: This is different from link_preview: + * link_preview: Only fetches metadata (title, description, thumbnail) for display + * scrape_webpage: Actually reads the FULL page content so you can analyze/summarize it + - Trigger scenarios: + * "Read this article and summarize it" + * "What does this page say about X?" + * "Summarize this blog post for me" + * "Tell me the key points from this article" + * "What's in this webpage?" + * "Can you analyze this article?" + - Args: + - url: The URL of the webpage to scrape (must be HTTP/HTTPS) + - max_length: Maximum content length to return (default: 50000 chars) + - Returns: The page title, description, full content (in markdown), word count, and metadata + - After scraping, you will have the full article text and can analyze, summarize, or answer questions about it. + - IMAGES: The scraped content may contain image URLs in markdown format like `![alt text](image_url)`. + * When you find relevant/important images in the scraped content, use the `display_image` tool to show them to the user. + * This makes your response more visual and engaging. + * Prioritize showing: diagrams, charts, infographics, key illustrations, or images that help explain the content. + * Don't show every image - just the most relevant 1-3 images that enhance understanding. + + +- User: "Fetch all my notes and what's in them?" + - Call: `search_knowledge_base(query="*", top_k=50, connectors_to_search=["NOTE"])` + +- User: "What did I discuss on Slack last week about the React migration?" + - Call: `search_knowledge_base(query="React migration", connectors_to_search=["SLACK_CONNECTOR"], start_date="YYYY-MM-DD", end_date="YYYY-MM-DD")` + +- User: "Give me a podcast about AI trends based on what we discussed" + - First search for relevant content, then call: `generate_podcast(source_content="Based on our conversation and search results: [detailed summary of chat + search findings]", podcast_title="AI Trends Podcast")` + +- User: "Create a podcast summary of this conversation" + - Call: `generate_podcast(source_content="Complete conversation summary:\\n\\nUser asked about [topic 1]:\\n[Your detailed response]\\n\\nUser then asked about [topic 2]:\\n[Your detailed response]\\n\\n[Continue for all exchanges in the conversation]", podcast_title="Conversation Summary")` + +- User: "Make a podcast about quantum computing" + - First search: `search_knowledge_base(query="quantum computing")` + - Then: `generate_podcast(source_content="Key insights about quantum computing from the knowledge base:\\n\\n[Comprehensive summary of all relevant search results with key facts, concepts, and findings]", podcast_title="Quantum Computing Explained")` + +- User: "Check out https://dev.to/some-article" + - Call: `link_preview(url="https://dev.to/some-article")` + +- User: "What's this blog post about? https://example.com/blog/post" + - Call: `link_preview(url="https://example.com/blog/post")` + +- User: "https://github.com/some/repo" + - Call: `link_preview(url="https://github.com/some/repo")` + +- User: "Show me this image: https://example.com/image.png" + - Call: `display_image(src="https://example.com/image.png", alt="User shared image")` + +- User: "Can you display a diagram of a neural network?" + - Call: `display_image(src="https://example.com/neural-network.png", alt="Neural network diagram", title="Neural Network Architecture", description="A visual representation of a neural network with input, hidden, and output layers")` + +- User: "Read this article and summarize it for me: https://example.com/blog/ai-trends" + - Call: `scrape_webpage(url="https://example.com/blog/ai-trends")` + - After getting the content, provide a summary based on the scraped text + +- User: "What does this page say about machine learning? https://docs.example.com/ml-guide" + - Call: `scrape_webpage(url="https://docs.example.com/ml-guide")` + - Then answer the question using the extracted content + +- User: "Summarize this blog post: https://medium.com/some-article" + - Call: `scrape_webpage(url="https://medium.com/some-article")` + - Provide a comprehensive summary of the article content + +- User: "Read this tutorial and explain it: https://example.com/ml-tutorial" + - First: `scrape_webpage(url="https://example.com/ml-tutorial")` + - Then, if the content contains useful diagrams/images like `![Neural Network Diagram](https://example.com/nn-diagram.png)`: + - Call: `display_image(src="https://example.com/nn-diagram.png", alt="Neural Network Diagram", title="Neural Network Architecture")` + - Then provide your explanation, referencing the displayed image + +""" + +SURFSENSE_CITATION_INSTRUCTIONS = """ + +CRITICAL CITATION REQUIREMENTS: + +1. For EVERY piece of information you include from the documents, add a citation in the format [citation:chunk_id] where chunk_id is the exact value from the `` tag inside ``. +2. Make sure ALL factual statements from the documents have proper citations. +3. If multiple chunks support the same point, include all relevant citations [citation:chunk_id1], [citation:chunk_id2]. +4. You MUST use the exact chunk_id values from the `` attributes. Do not create your own citation numbers. +5. Every citation MUST be in the format [citation:chunk_id] where chunk_id is the exact chunk id value. +6. Never modify or change the chunk_id - always use the original values exactly as provided in the chunk tags. +7. Do not return citations as clickable links. +8. Never format citations as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only. +9. Citations must ONLY appear as [citation:chunk_id] or [citation:chunk_id1], [citation:chunk_id2] format - never with parentheses, hyperlinks, or other formatting. +10. Never make up chunk IDs. Only use chunk_id values that are explicitly provided in the `` tags. +11. If you are unsure about a chunk_id, do not include a citation rather than guessing or making one up. + + +The documents you receive are structured like this: + + + + 42 + GITHUB_CONNECTOR + <![CDATA[Some repo / file / issue title]]> + + + + + + + + + + +IMPORTANT: You MUST cite using the chunk ids (e.g. 123, 124). Do NOT cite document_id. + + + +- Every fact from the documents must have a citation in the format [citation:chunk_id] where chunk_id is the EXACT id value from a `` tag +- Citations should appear at the end of the sentence containing the information they support +- Multiple citations should be separated by commas: [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3] +- No need to return references section. Just citations in answer. +- NEVER create your own citation format - use the exact chunk_id values from the documents in the [citation:chunk_id] format +- NEVER format citations as clickable links or as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only +- NEVER make up chunk IDs if you are unsure about the chunk_id. It is better to omit the citation than to guess + + + +CORRECT citation formats: +- [citation:5] +- [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3] + +INCORRECT citation formats (DO NOT use): +- Using parentheses and markdown links: ([citation:5](https://github.com/MODSetter/SurfSense)) +- Using parentheses around brackets: ([citation:5]) +- Using hyperlinked text: [link to source 5](https://example.com) +- Using footnote style: ... library¹ +- Making up source IDs when source_id is unknown +- Using old IEEE format: [1], [2], [3] +- Using source types instead of IDs: [citation:GITHUB_CONNECTOR] instead of [citation:5] + + + +Based on your GitHub repositories and video content, Python's asyncio library provides tools for writing concurrent code using the async/await syntax [citation:5]. It's particularly useful for I/O-bound and high-level structured network code [citation:5]. + +The key advantage of asyncio is that it can improve performance by allowing other code to run while waiting for I/O operations to complete [citation:12]. This makes it excellent for scenarios like web scraping, API calls, database operations, or any situation where your program spends time waiting for external resources. + +However, from your video learning, it's important to note that asyncio is not suitable for CPU-bound tasks as it runs on a single thread [citation:12]. For computationally intensive work, you'd want to use multiprocessing instead. + + +""" + +# Anti-citation prompt - used when citations are disabled +# This explicitly tells the model NOT to include citations +SURFSENSE_NO_CITATION_INSTRUCTIONS = """ + +IMPORTANT: Citations are DISABLED for this configuration. + +DO NOT include any citations in your responses. Specifically: +1. Do NOT use the [citation:chunk_id] format anywhere in your response. +2. Do NOT reference document IDs, chunk IDs, or source IDs. +3. Simply provide the information naturally without any citation markers. +4. Write your response as if you're having a normal conversation, incorporating the information from your knowledge seamlessly. + +When answering questions based on documents from the knowledge base: +- Present the information directly and confidently +- Do not mention that information comes from specific documents or chunks +- Integrate facts naturally into your response without attribution markers + +Your goal is to provide helpful, informative answers in a clean, readable format without any citation notation. + +""" + + +def build_surfsense_system_prompt( + today: datetime | None = None, +) -> str: + """ + Build the SurfSense system prompt with default settings. + + This is a convenience function that builds the prompt with: + - Default system instructions + - Tools instructions (always included) + - Citation instructions enabled + + Args: + today: Optional datetime for today's date (defaults to current UTC date) + + Returns: + Complete system prompt string + """ + resolved_today = (today or datetime.now(UTC)).astimezone(UTC).date().isoformat() + + return ( + SURFSENSE_SYSTEM_INSTRUCTIONS.format(resolved_today=resolved_today) + + SURFSENSE_TOOLS_INSTRUCTIONS + + SURFSENSE_CITATION_INSTRUCTIONS + ) + + +def build_configurable_system_prompt( + custom_system_instructions: str | None = None, + use_default_system_instructions: bool = True, + citations_enabled: bool = True, + today: datetime | None = None, +) -> str: + """ + Build a configurable SurfSense system prompt based on NewLLMConfig settings. + + The prompt is composed of three parts: + 1. System Instructions - either custom or default SURFSENSE_SYSTEM_INSTRUCTIONS + 2. Tools Instructions - always included (SURFSENSE_TOOLS_INSTRUCTIONS) + 3. Citation Instructions - either SURFSENSE_CITATION_INSTRUCTIONS or SURFSENSE_NO_CITATION_INSTRUCTIONS + + Args: + custom_system_instructions: Custom system instructions to use. If empty/None and + use_default_system_instructions is True, defaults to + SURFSENSE_SYSTEM_INSTRUCTIONS. + use_default_system_instructions: Whether to use default instructions when + custom_system_instructions is empty/None. + citations_enabled: Whether to include citation instructions (True) or + anti-citation instructions (False). + today: Optional datetime for today's date (defaults to current UTC date) + + Returns: + Complete system prompt string + """ + resolved_today = (today or datetime.now(UTC)).astimezone(UTC).date().isoformat() + + # Determine system instructions + if custom_system_instructions and custom_system_instructions.strip(): + # Use custom instructions, injecting the date placeholder if present + system_instructions = custom_system_instructions.format( + resolved_today=resolved_today + ) + elif use_default_system_instructions: + # Use default instructions + system_instructions = SURFSENSE_SYSTEM_INSTRUCTIONS.format( + resolved_today=resolved_today + ) + else: + # No system instructions (edge case) + system_instructions = "" + + # Tools instructions are always included + tools_instructions = SURFSENSE_TOOLS_INSTRUCTIONS + + # Citation instructions based on toggle + citation_instructions = ( + SURFSENSE_CITATION_INSTRUCTIONS + if citations_enabled + else SURFSENSE_NO_CITATION_INSTRUCTIONS + ) + + return system_instructions + tools_instructions + citation_instructions + + +def get_default_system_instructions() -> str: + """ + Get the default system instructions template. + + This is useful for populating the UI with the default value when + creating a new NewLLMConfig. + + Returns: + Default system instructions string (with {resolved_today} placeholder) + """ + return SURFSENSE_SYSTEM_INSTRUCTIONS.strip() + + +SURFSENSE_SYSTEM_PROMPT = build_surfsense_system_prompt() diff --git a/surfsense_backend/app/agents/new_chat/tools/__init__.py b/surfsense_backend/app/agents/new_chat/tools/__init__.py new file mode 100644 index 000000000..b89988327 --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/tools/__init__.py @@ -0,0 +1,52 @@ +""" +Tools module for SurfSense deep agent. + +This module contains all the tools available to the SurfSense agent. +To add a new tool, see the documentation in registry.py. + +Available tools: +- search_knowledge_base: Search the user's personal knowledge base +- generate_podcast: Generate audio podcasts from content +- link_preview: Fetch rich previews for URLs +- display_image: Display images in chat +- scrape_webpage: Extract content from webpages +""" + +# Registry exports +# Tool factory exports (for direct use) +from .display_image import create_display_image_tool +from .knowledge_base import ( + create_search_knowledge_base_tool, + format_documents_for_context, + search_knowledge_base_async, +) +from .link_preview import create_link_preview_tool +from .podcast import create_generate_podcast_tool +from .registry import ( + BUILTIN_TOOLS, + ToolDefinition, + build_tools, + get_all_tool_names, + get_default_enabled_tools, + get_tool_by_name, +) +from .scrape_webpage import create_scrape_webpage_tool + +__all__ = [ + # Registry + "BUILTIN_TOOLS", + "ToolDefinition", + "build_tools", + # Tool factories + "create_display_image_tool", + "create_generate_podcast_tool", + "create_link_preview_tool", + "create_scrape_webpage_tool", + "create_search_knowledge_base_tool", + # Knowledge base utilities + "format_documents_for_context", + "get_all_tool_names", + "get_default_enabled_tools", + "get_tool_by_name", + "search_knowledge_base_async", +] diff --git a/surfsense_backend/app/agents/new_chat/tools/display_image.py b/surfsense_backend/app/agents/new_chat/tools/display_image.py new file mode 100644 index 000000000..5eb846063 --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/tools/display_image.py @@ -0,0 +1,105 @@ +""" +Display image tool for the SurfSense agent. + +This module provides a tool for displaying images in the chat UI +with metadata like title, description, and source attribution. +""" + +import hashlib +from typing import Any +from urllib.parse import urlparse + +from langchain_core.tools import tool + + +def extract_domain(url: str) -> str: + """Extract the domain from a URL.""" + try: + parsed = urlparse(url) + domain = parsed.netloc + # Remove 'www.' prefix if present + if domain.startswith("www."): + domain = domain[4:] + return domain + except Exception: + return "" + + +def generate_image_id(src: str) -> str: + """Generate a unique ID for an image.""" + hash_val = hashlib.md5(src.encode()).hexdigest()[:12] + return f"image-{hash_val}" + + +def create_display_image_tool(): + """ + Factory function to create the display_image tool. + + Returns: + A configured tool function for displaying images. + """ + + @tool + async def display_image( + src: str, + alt: str = "Image", + title: str | None = None, + description: str | None = None, + ) -> dict[str, Any]: + """ + Display an image in the chat with metadata. + + Use this tool when you want to show an image to the user. + This displays the image with an optional title, description, + and source attribution. + + Common use cases: + - Showing an image from a URL the user mentioned + - Displaying a diagram or chart you're referencing + - Showing example images when explaining concepts + + Args: + src: The URL of the image to display (must be a valid HTTP/HTTPS URL) + alt: Alternative text describing the image (for accessibility) + title: Optional title to display below the image + description: Optional description providing context about the image + + Returns: + A dictionary containing image metadata for the UI to render: + - id: Unique identifier for this image + - assetId: The image URL (for deduplication) + - src: The image URL + - alt: Alt text for accessibility + - title: Image title (if provided) + - description: Image description (if provided) + - domain: Source domain + """ + image_id = generate_image_id(src) + + # Ensure URL has protocol + if not src.startswith(("http://", "https://")): + src = f"https://{src}" + + domain = extract_domain(src) + + # Determine aspect ratio based on common image sources + ratio = "16:9" # Default + if "unsplash.com" in src or "pexels.com" in src: + ratio = "16:9" + elif ( + "imgur.com" in src or "github.com" in src or "githubusercontent.com" in src + ): + ratio = "auto" + + return { + "id": image_id, + "assetId": src, + "src": src, + "alt": alt, + "title": title, + "description": description, + "domain": domain, + "ratio": ratio, + } + + return display_image diff --git a/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py b/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py new file mode 100644 index 000000000..6c3dfd34b --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py @@ -0,0 +1,607 @@ +""" +Knowledge base search tool for the SurfSense agent. + +This module provides: +- Connector constants and normalization +- Async knowledge base search across multiple connectors +- Document formatting for LLM context +- Tool factory for creating search_knowledge_base tools +""" + +import json +from datetime import datetime +from typing import Any + +from langchain_core.tools import tool +from sqlalchemy.ext.asyncio import AsyncSession + +from app.services.connector_service import ConnectorService + +# ============================================================================= +# Connector Constants and Normalization +# ============================================================================= + +# Canonical connector values used internally by ConnectorService +_ALL_CONNECTORS: list[str] = [ + "EXTENSION", + "FILE", + "SLACK_CONNECTOR", + "NOTION_CONNECTOR", + "YOUTUBE_VIDEO", + "GITHUB_CONNECTOR", + "ELASTICSEARCH_CONNECTOR", + "LINEAR_CONNECTOR", + "JIRA_CONNECTOR", + "CONFLUENCE_CONNECTOR", + "CLICKUP_CONNECTOR", + "GOOGLE_CALENDAR_CONNECTOR", + "GOOGLE_GMAIL_CONNECTOR", + "DISCORD_CONNECTOR", + "AIRTABLE_CONNECTOR", + "TAVILY_API", + "SEARXNG_API", + "LINKUP_API", + "BAIDU_SEARCH_API", + "LUMA_CONNECTOR", + "NOTE", + "BOOKSTACK_CONNECTOR", + "CRAWLED_URL", +] + + +def _normalize_connectors(connectors_to_search: list[str] | None) -> list[str]: + """ + Normalize connectors provided by the model. + + - Accepts user-facing enums like WEBCRAWLER_CONNECTOR and maps them to canonical + ConnectorService types. + - Drops unknown values. + - If None/empty, defaults to searching across all known connectors. + """ + if not connectors_to_search: + return list(_ALL_CONNECTORS) + + normalized: list[str] = [] + for raw in connectors_to_search: + c = (raw or "").strip().upper() + if not c: + continue + if c == "WEBCRAWLER_CONNECTOR": + c = "CRAWLED_URL" + normalized.append(c) + + # de-dupe while preserving order + filter unknown + seen: set[str] = set() + out: list[str] = [] + for c in normalized: + if c in seen: + continue + if c not in _ALL_CONNECTORS: + continue + seen.add(c) + out.append(c) + return out if out else list(_ALL_CONNECTORS) + + +# ============================================================================= +# Document Formatting +# ============================================================================= + + +def format_documents_for_context(documents: list[dict[str, Any]]) -> str: + """ + Format retrieved documents into a readable context string for the LLM. + + Args: + documents: List of document dictionaries from connector search + + Returns: + Formatted string with document contents and metadata + """ + if not documents: + return "" + + # Group chunks by document id (preferred) to produce the XML structure. + # + # IMPORTANT: ConnectorService returns **document-grouped** results of the form: + # { + # "document": {...}, + # "chunks": [{"chunk_id": 123, "content": "..."}, ...], + # "source": "NOTION_CONNECTOR" | "FILE" | ... + # } + # + # We must preserve chunk_id so citations like [citation:123] are possible. + grouped: dict[str, dict[str, Any]] = {} + + for doc in documents: + document_info = (doc.get("document") or {}) if isinstance(doc, dict) else {} + metadata = ( + (document_info.get("metadata") or {}) + if isinstance(document_info, dict) + else {} + ) + if not metadata and isinstance(doc, dict): + # Some result shapes may place metadata at the top level. + metadata = doc.get("metadata") or {} + + source = ( + (doc.get("source") if isinstance(doc, dict) else None) + or metadata.get("document_type") + or "UNKNOWN" + ) + + # Document identity (prefer document_id; otherwise fall back to type+title+url) + document_id_val = document_info.get("id") + title = ( + document_info.get("title") or metadata.get("title") or "Untitled Document" + ) + url = ( + metadata.get("url") + or metadata.get("source") + or metadata.get("page_url") + or "" + ) + + doc_key = ( + str(document_id_val) + if document_id_val is not None + else f"{source}::{title}::{url}" + ) + + if doc_key not in grouped: + grouped[doc_key] = { + "document_id": document_id_val + if document_id_val is not None + else doc_key, + "document_type": metadata.get("document_type") or source, + "title": title, + "url": url, + "metadata": metadata, + "chunks": [], + } + + # Prefer document-grouped chunks if available + chunks_list = doc.get("chunks") if isinstance(doc, dict) else None + if isinstance(chunks_list, list) and chunks_list: + for ch in chunks_list: + if not isinstance(ch, dict): + continue + chunk_id = ch.get("chunk_id") or ch.get("id") + content = (ch.get("content") or "").strip() + if not content: + continue + grouped[doc_key]["chunks"].append( + {"chunk_id": chunk_id, "content": content} + ) + continue + + # Fallback: treat this as a flat chunk-like object + if not isinstance(doc, dict): + continue + chunk_id = doc.get("chunk_id") or doc.get("id") + content = (doc.get("content") or "").strip() + if not content: + continue + grouped[doc_key]["chunks"].append({"chunk_id": chunk_id, "content": content}) + + # Render XML expected by citation instructions + parts: list[str] = [] + for g in grouped.values(): + metadata_json = json.dumps(g["metadata"], ensure_ascii=False) + + parts.append("") + parts.append("") + parts.append(f" {g['document_id']}") + parts.append(f" {g['document_type']}") + parts.append(f" <![CDATA[{g['title']}]]>") + parts.append(f" ") + parts.append(f" ") + parts.append("") + parts.append("") + parts.append("") + + for ch in g["chunks"]: + ch_content = ch["content"] + ch_id = ch["chunk_id"] + if ch_id is None: + parts.append(f" ") + else: + parts.append(f" ") + + parts.append("") + parts.append("") + parts.append("") + + return "\n".join(parts).strip() + + +# ============================================================================= +# Knowledge Base Search +# ============================================================================= + + +async def search_knowledge_base_async( + query: str, + search_space_id: int, + db_session: AsyncSession, + connector_service: ConnectorService, + connectors_to_search: list[str] | None = None, + top_k: int = 10, + start_date: datetime | None = None, + end_date: datetime | None = None, +) -> str: + """ + Search the user's knowledge base for relevant documents. + + This is the async implementation that searches across multiple connectors. + + Args: + query: The search query + search_space_id: The user's search space ID + db_session: Database session + connector_service: Initialized connector service + connectors_to_search: Optional list of connector types to search. If omitted, searches all. + top_k: Number of results per connector + start_date: Optional start datetime (UTC) for filtering documents + end_date: Optional end datetime (UTC) for filtering documents + + Returns: + Formatted string with search results + """ + all_documents = [] + + # Resolve date range (default last 2 years) + from app.agents.new_chat.utils import resolve_date_range + + resolved_start_date, resolved_end_date = resolve_date_range( + start_date=start_date, + end_date=end_date, + ) + + connectors = _normalize_connectors(connectors_to_search) + + for connector in connectors: + try: + if connector == "YOUTUBE_VIDEO": + _, chunks = await connector_service.search_youtube( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "EXTENSION": + _, chunks = await connector_service.search_extension( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "CRAWLED_URL": + _, chunks = await connector_service.search_crawled_urls( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "FILE": + _, chunks = await connector_service.search_files( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "SLACK_CONNECTOR": + _, chunks = await connector_service.search_slack( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "NOTION_CONNECTOR": + _, chunks = await connector_service.search_notion( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "GITHUB_CONNECTOR": + _, chunks = await connector_service.search_github( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "LINEAR_CONNECTOR": + _, chunks = await connector_service.search_linear( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "TAVILY_API": + _, chunks = await connector_service.search_tavily( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + ) + all_documents.extend(chunks) + + elif connector == "SEARXNG_API": + _, chunks = await connector_service.search_searxng( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + ) + all_documents.extend(chunks) + + elif connector == "LINKUP_API": + # Keep behavior aligned with researcher: default "standard" + _, chunks = await connector_service.search_linkup( + user_query=query, + search_space_id=search_space_id, + mode="standard", + ) + all_documents.extend(chunks) + + elif connector == "BAIDU_SEARCH_API": + _, chunks = await connector_service.search_baidu( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + ) + all_documents.extend(chunks) + + elif connector == "DISCORD_CONNECTOR": + _, chunks = await connector_service.search_discord( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "JIRA_CONNECTOR": + _, chunks = await connector_service.search_jira( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "GOOGLE_CALENDAR_CONNECTOR": + _, chunks = await connector_service.search_google_calendar( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "AIRTABLE_CONNECTOR": + _, chunks = await connector_service.search_airtable( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "GOOGLE_GMAIL_CONNECTOR": + _, chunks = await connector_service.search_google_gmail( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "CONFLUENCE_CONNECTOR": + _, chunks = await connector_service.search_confluence( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "CLICKUP_CONNECTOR": + _, chunks = await connector_service.search_clickup( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "LUMA_CONNECTOR": + _, chunks = await connector_service.search_luma( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "ELASTICSEARCH_CONNECTOR": + _, chunks = await connector_service.search_elasticsearch( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "NOTE": + _, chunks = await connector_service.search_notes( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "BOOKSTACK_CONNECTOR": + _, chunks = await connector_service.search_bookstack( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + except Exception as e: + print(f"Error searching connector {connector}: {e}") + continue + + # Deduplicate by content hash + seen_doc_ids: set[Any] = set() + seen_hashes: set[int] = set() + deduplicated: list[dict[str, Any]] = [] + for doc in all_documents: + doc_id = (doc.get("document", {}) or {}).get("id") + content = (doc.get("content", "") or "").strip() + content_hash = hash(content) + + if (doc_id and doc_id in seen_doc_ids) or content_hash in seen_hashes: + continue + + if doc_id: + seen_doc_ids.add(doc_id) + seen_hashes.add(content_hash) + deduplicated.append(doc) + + return format_documents_for_context(deduplicated) + + +def create_search_knowledge_base_tool( + search_space_id: int, + db_session: AsyncSession, + connector_service: ConnectorService, +): + """ + Factory function to create the search_knowledge_base tool with injected dependencies. + + Args: + search_space_id: The user's search space ID + db_session: Database session + connector_service: Initialized connector service + + Returns: + A configured tool function + """ + + @tool + async def search_knowledge_base( + query: str, + top_k: int = 10, + start_date: str | None = None, + end_date: str | None = None, + connectors_to_search: list[str] | None = None, + ) -> str: + """ + Search the user's personal knowledge base for relevant information. + + Use this tool to find documents, notes, files, web pages, and other content + that may help answer the user's question. + + IMPORTANT: + - If the user requests a specific source type (e.g. "my notes", "Slack messages"), + pass `connectors_to_search=[...]` using the enums below. + - If `connectors_to_search` is omitted/empty, the system will search broadly. + + ## Available connector enums for `connectors_to_search` + + - EXTENSION: "Web content saved via SurfSense browser extension" (personal browsing history) + - FILE: "User-uploaded documents (PDFs, Word, etc.)" (personal files) + - NOTE: "SurfSense Notes" (notes created inside SurfSense) + - SLACK_CONNECTOR: "Slack conversations and shared content" (personal workspace communications) + - NOTION_CONNECTOR: "Notion workspace pages and databases" (personal knowledge management) + - YOUTUBE_VIDEO: "YouTube video transcripts and metadata" (personally saved videos) + - GITHUB_CONNECTOR: "GitHub repository content and issues" (personal repositories and interactions) + - ELASTICSEARCH_CONNECTOR: "Elasticsearch indexed documents and data" (personal Elasticsearch instances and custom data sources) + - LINEAR_CONNECTOR: "Linear project issues and discussions" (personal project management) + - JIRA_CONNECTOR: "Jira project issues, tickets, and comments" (personal project tracking) + - CONFLUENCE_CONNECTOR: "Confluence pages and comments" (personal project documentation) + - CLICKUP_CONNECTOR: "ClickUp tasks and project data" (personal task management) + - GOOGLE_CALENDAR_CONNECTOR: "Google Calendar events, meetings, and schedules" (personal calendar and time management) + - GOOGLE_GMAIL_CONNECTOR: "Google Gmail emails and conversations" (personal emails and communications) + - DISCORD_CONNECTOR: "Discord server conversations and shared content" (personal community communications) + - AIRTABLE_CONNECTOR: "Airtable records, tables, and database content" (personal data management and organization) + - TAVILY_API: "Tavily search API results" (personalized search results) + - SEARXNG_API: "SearxNG search API results" (personalized search results) + - LINKUP_API: "Linkup search API results" (personalized search results) + - BAIDU_SEARCH_API: "Baidu search API results" (personalized search results) + - LUMA_CONNECTOR: "Luma events" + - WEBCRAWLER_CONNECTOR: "Webpages indexed by SurfSense" (personally selected websites) + - BOOKSTACK_CONNECTOR: "BookStack pages" (personal documentation) + + NOTE: `WEBCRAWLER_CONNECTOR` is mapped internally to the canonical document type `CRAWLED_URL`. + + Args: + query: The search query - be specific and include key terms + top_k: Number of results to retrieve (default: 10) + start_date: Optional ISO date/datetime (e.g. "2025-12-12" or "2025-12-12T00:00:00+00:00") + end_date: Optional ISO date/datetime (e.g. "2025-12-19" or "2025-12-19T23:59:59+00:00") + connectors_to_search: Optional list of connector enums to search. If omitted, searches all. + + Returns: + Formatted string with relevant documents and their content + """ + from app.agents.new_chat.utils import parse_date_or_datetime + + parsed_start: datetime | None = None + parsed_end: datetime | None = None + + if start_date: + parsed_start = parse_date_or_datetime(start_date) + if end_date: + parsed_end = parse_date_or_datetime(end_date) + + return await search_knowledge_base_async( + query=query, + search_space_id=search_space_id, + db_session=db_session, + connector_service=connector_service, + connectors_to_search=connectors_to_search, + top_k=top_k, + start_date=parsed_start, + end_date=parsed_end, + ) + + return search_knowledge_base diff --git a/surfsense_backend/app/agents/new_chat/tools/link_preview.py b/surfsense_backend/app/agents/new_chat/tools/link_preview.py new file mode 100644 index 000000000..188863015 --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/tools/link_preview.py @@ -0,0 +1,295 @@ +""" +Link preview tool for the SurfSense agent. + +This module provides a tool for fetching URL metadata (title, description, +Open Graph image, etc.) to display rich link previews in the chat UI. +""" + +import hashlib +import re +from typing import Any +from urllib.parse import urlparse + +import httpx +from langchain_core.tools import tool + + +def extract_domain(url: str) -> str: + """Extract the domain from a URL.""" + try: + parsed = urlparse(url) + domain = parsed.netloc + # Remove 'www.' prefix if present + if domain.startswith("www."): + domain = domain[4:] + return domain + except Exception: + return "" + + +def extract_og_content(html: str, property_name: str) -> str | None: + """Extract Open Graph meta content from HTML.""" + # Try og:property first + pattern = rf']+property=["\']og:{property_name}["\'][^>]+content=["\']([^"\']+)["\']' + match = re.search(pattern, html, re.IGNORECASE) + if match: + return match.group(1) + + # Try content before property + pattern = rf']+content=["\']([^"\']+)["\'][^>]+property=["\']og:{property_name}["\']' + match = re.search(pattern, html, re.IGNORECASE) + if match: + return match.group(1) + + return None + + +def extract_twitter_content(html: str, name: str) -> str | None: + """Extract Twitter Card meta content from HTML.""" + pattern = ( + rf']+name=["\']twitter:{name}["\'][^>]+content=["\']([^"\']+)["\']' + ) + match = re.search(pattern, html, re.IGNORECASE) + if match: + return match.group(1) + + # Try content before name + pattern = ( + rf']+content=["\']([^"\']+)["\'][^>]+name=["\']twitter:{name}["\']' + ) + match = re.search(pattern, html, re.IGNORECASE) + if match: + return match.group(1) + + return None + + +def extract_meta_description(html: str) -> str | None: + """Extract meta description from HTML.""" + pattern = r']+name=["\']description["\'][^>]+content=["\']([^"\']+)["\']' + match = re.search(pattern, html, re.IGNORECASE) + if match: + return match.group(1) + + # Try content before name + pattern = r']+content=["\']([^"\']+)["\'][^>]+name=["\']description["\']' + match = re.search(pattern, html, re.IGNORECASE) + if match: + return match.group(1) + + return None + + +def extract_title(html: str) -> str | None: + """Extract title from HTML.""" + # Try og:title first + og_title = extract_og_content(html, "title") + if og_title: + return og_title + + # Try twitter:title + twitter_title = extract_twitter_content(html, "title") + if twitter_title: + return twitter_title + + # Fall back to tag + pattern = r"<title[^>]*>([^<]+)" + match = re.search(pattern, html, re.IGNORECASE) + if match: + return match.group(1).strip() + + return None + + +def extract_description(html: str) -> str | None: + """Extract description from HTML.""" + # Try og:description first + og_desc = extract_og_content(html, "description") + if og_desc: + return og_desc + + # Try twitter:description + twitter_desc = extract_twitter_content(html, "description") + if twitter_desc: + return twitter_desc + + # Fall back to meta description + return extract_meta_description(html) + + +def extract_image(html: str) -> str | None: + """Extract image URL from HTML.""" + # Try og:image first + og_image = extract_og_content(html, "image") + if og_image: + return og_image + + # Try twitter:image + twitter_image = extract_twitter_content(html, "image") + if twitter_image: + return twitter_image + + return None + + +def generate_preview_id(url: str) -> str: + """Generate a unique ID for a link preview.""" + hash_val = hashlib.md5(url.encode()).hexdigest()[:12] + return f"link-preview-{hash_val}" + + +def create_link_preview_tool(): + """ + Factory function to create the link_preview tool. + + Returns: + A configured tool function for fetching link previews. + """ + + @tool + async def link_preview(url: str) -> dict[str, Any]: + """ + Fetch metadata for a URL to display a rich link preview. + + Use this tool when the user shares a URL or asks about a specific webpage. + This tool fetches the page's Open Graph metadata (title, description, image) + to display a nice preview card in the chat. + + Common triggers include: + - User shares a URL in the chat + - User asks "What's this link about?" or similar + - User says "Show me a preview of this page" + - User wants to preview an article or webpage + + Args: + url: The URL to fetch metadata for. Must be a valid HTTP/HTTPS URL. + + Returns: + A dictionary containing: + - id: Unique identifier for this preview + - assetId: The URL itself (for deduplication) + - kind: "link" (type of media card) + - href: The URL to open when clicked + - title: Page title + - description: Page description (if available) + - thumb: Thumbnail/preview image URL (if available) + - domain: The domain name + - error: Error message (if fetch failed) + """ + preview_id = generate_preview_id(url) + domain = extract_domain(url) + + # Validate URL + if not url.startswith(("http://", "https://")): + url = f"https://{url}" + + try: + async with httpx.AsyncClient( + timeout=10.0, + follow_redirects=True, + headers={ + "User-Agent": "Mozilla/5.0 (compatible; SurfSenseBot/1.0; +https://surfsense.net)", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.5", + }, + ) as client: + response = await client.get(url) + response.raise_for_status() + + # Get content type to ensure it's HTML + content_type = response.headers.get("content-type", "") + if "text/html" not in content_type.lower(): + # Not an HTML page, return basic info + return { + "id": preview_id, + "assetId": url, + "kind": "link", + "href": url, + "title": url.split("/")[-1] or domain, + "description": f"File from {domain}", + "domain": domain, + } + + html = response.text + + # Extract metadata + title = extract_title(html) or domain + description = extract_description(html) + image = extract_image(html) + + # Make sure image URL is absolute + if image and not image.startswith(("http://", "https://")): + if image.startswith("//"): + image = f"https:{image}" + elif image.startswith("/"): + parsed = urlparse(url) + image = f"{parsed.scheme}://{parsed.netloc}{image}" + + # Clean up title and description (unescape HTML entities) + if title: + title = ( + title.replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace(""", '"') + .replace("'", "'") + .replace("'", "'") + ) + if description: + description = ( + description.replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace(""", '"') + .replace("'", "'") + .replace("'", "'") + ) + # Truncate long descriptions + if len(description) > 200: + description = description[:197] + "..." + + return { + "id": preview_id, + "assetId": url, + "kind": "link", + "href": url, + "title": title, + "description": description, + "thumb": image, + "domain": domain, + } + + except httpx.TimeoutException: + return { + "id": preview_id, + "assetId": url, + "kind": "link", + "href": url, + "title": domain or "Link", + "domain": domain, + "error": "Request timed out", + } + except httpx.HTTPStatusError as e: + return { + "id": preview_id, + "assetId": url, + "kind": "link", + "href": url, + "title": domain or "Link", + "domain": domain, + "error": f"HTTP {e.response.status_code}", + } + except Exception as e: + error_message = str(e) + print(f"[link_preview] Error fetching {url}: {error_message}") + return { + "id": preview_id, + "assetId": url, + "kind": "link", + "href": url, + "title": domain or "Link", + "domain": domain, + "error": f"Failed to fetch: {error_message[:50]}", + } + + return link_preview diff --git a/surfsense_backend/app/agents/new_chat/tools/podcast.py b/surfsense_backend/app/agents/new_chat/tools/podcast.py new file mode 100644 index 000000000..ff567bf73 --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/tools/podcast.py @@ -0,0 +1,173 @@ +""" +Podcast generation tool for the SurfSense agent. + +This module provides a factory function for creating the generate_podcast tool +that submits a Celery task for background podcast generation. The frontend +polls for completion and auto-updates when the podcast is ready. + +Duplicate request prevention: +- Only one podcast can be generated at a time per search space +- Uses Redis to track active podcast tasks +- Returns a friendly message if a podcast is already being generated +""" + +import os +from typing import Any + +import redis +from langchain_core.tools import tool +from sqlalchemy.ext.asyncio import AsyncSession + +# Redis connection for tracking active podcast tasks +# Uses the same Redis instance as Celery +REDIS_URL = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0") +_redis_client: redis.Redis | None = None + + +def get_redis_client() -> redis.Redis: + """Get or create Redis client for podcast task tracking.""" + global _redis_client + if _redis_client is None: + _redis_client = redis.from_url(REDIS_URL, decode_responses=True) + return _redis_client + + +def get_active_podcast_key(search_space_id: int) -> str: + """Generate Redis key for tracking active podcast task.""" + return f"podcast:active:{search_space_id}" + + +def get_active_podcast_task(search_space_id: int) -> str | None: + """Check if there's an active podcast task for this search space.""" + try: + client = get_redis_client() + return client.get(get_active_podcast_key(search_space_id)) + except Exception: + # If Redis is unavailable, allow the request (fail open) + return None + + +def set_active_podcast_task(search_space_id: int, task_id: str) -> None: + """Mark a podcast task as active for this search space.""" + try: + client = get_redis_client() + # Set with 30-minute expiry as safety net (podcast should complete before this) + client.setex(get_active_podcast_key(search_space_id), 1800, task_id) + except Exception as e: + print(f"[generate_podcast] Warning: Could not set active task in Redis: {e}") + + +def clear_active_podcast_task(search_space_id: int) -> None: + """Clear the active podcast task for this search space.""" + try: + client = get_redis_client() + client.delete(get_active_podcast_key(search_space_id)) + except Exception as e: + print(f"[generate_podcast] Warning: Could not clear active task in Redis: {e}") + + +def create_generate_podcast_tool( + search_space_id: int, + db_session: AsyncSession, +): + """ + Factory function to create the generate_podcast tool with injected dependencies. + + Args: + search_space_id: The user's search space ID + db_session: Database session (not used - Celery creates its own) + + Returns: + A configured tool function for generating podcasts + """ + + @tool + async def generate_podcast( + source_content: str, + podcast_title: str = "SurfSense Podcast", + user_prompt: str | None = None, + ) -> dict[str, Any]: + """ + Generate a podcast from the provided content. + + Use this tool when the user asks to create, generate, or make a podcast. + Common triggers include phrases like: + - "Give me a podcast about this" + - "Create a podcast from this conversation" + - "Generate a podcast summary" + - "Make a podcast about..." + - "Turn this into a podcast" + + The tool will start generating a podcast in the background. + The podcast will be available once generation completes. + + IMPORTANT: Only one podcast can be generated at a time. If a podcast + is already being generated, this tool will return a message asking + the user to wait. + + Args: + source_content: The text content to convert into a podcast. + This can be a summary, research findings, or any text + the user wants transformed into an audio podcast. + podcast_title: Title for the podcast (default: "SurfSense Podcast") + user_prompt: Optional instructions for podcast style, tone, or format. + For example: "Make it casual and fun" or "Focus on the key insights" + + Returns: + A dictionary containing: + - status: "processing" (task submitted), "already_generating", or "error" + - task_id: The Celery task ID for polling status (if processing) + - title: The podcast title + - message: Status message for the user + """ + try: + # Check if a podcast is already being generated for this search space + active_task_id = get_active_podcast_task(search_space_id) + if active_task_id: + print( + f"[generate_podcast] Blocked duplicate request. Active task: {active_task_id}" + ) + return { + "status": "already_generating", + "task_id": active_task_id, + "title": podcast_title, + "message": "A podcast is already being generated. Please wait for it to complete before requesting another one.", + } + + # Import Celery task here to avoid circular imports + from app.tasks.celery_tasks.podcast_tasks import ( + generate_content_podcast_task, + ) + + # Submit Celery task for background processing + task = generate_content_podcast_task.delay( + source_content=source_content, + search_space_id=search_space_id, + podcast_title=podcast_title, + user_prompt=user_prompt, + ) + + # Mark this task as active + set_active_podcast_task(search_space_id, task.id) + + print(f"[generate_podcast] Submitted Celery task: {task.id}") + + # Return immediately with task_id for polling + return { + "status": "processing", + "task_id": task.id, + "title": podcast_title, + "message": "Podcast generation started. This may take a few minutes.", + } + + except Exception as e: + error_message = str(e) + print(f"[generate_podcast] Error submitting task: {error_message}") + return { + "status": "error", + "error": error_message, + "title": podcast_title, + "task_id": None, + } + + return generate_podcast diff --git a/surfsense_backend/app/agents/new_chat/tools/registry.py b/surfsense_backend/app/agents/new_chat/tools/registry.py new file mode 100644 index 000000000..3b0c2ddac --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/tools/registry.py @@ -0,0 +1,230 @@ +""" +Tools registry for SurfSense deep agent. + +This module provides a registry pattern for managing tools in the SurfSense agent. +It makes it easy for OSS contributors to add new tools by: +1. Creating a tool factory function in a new file in this directory +2. Registering the tool in the BUILTIN_TOOLS list below + +Example of adding a new tool: +------------------------------ +1. Create your tool file (e.g., `tools/my_tool.py`): + + from langchain_core.tools import tool + from sqlalchemy.ext.asyncio import AsyncSession + + def create_my_tool(search_space_id: int, db_session: AsyncSession): + @tool + async def my_tool(param: str) -> dict: + '''My tool description.''' + # Your implementation + return {"result": "success"} + return my_tool + +2. Import and register in this file: + + from .my_tool import create_my_tool + + # Add to BUILTIN_TOOLS list: + ToolDefinition( + name="my_tool", + description="Description of what your tool does", + factory=lambda deps: create_my_tool( + search_space_id=deps["search_space_id"], + db_session=deps["db_session"], + ), + requires=["search_space_id", "db_session"], + ), +""" + +from collections.abc import Callable +from dataclasses import dataclass, field +from typing import Any + +from langchain_core.tools import BaseTool + +from .display_image import create_display_image_tool +from .knowledge_base import create_search_knowledge_base_tool +from .link_preview import create_link_preview_tool +from .podcast import create_generate_podcast_tool +from .scrape_webpage import create_scrape_webpage_tool + +# ============================================================================= +# Tool Definition +# ============================================================================= + + +@dataclass +class ToolDefinition: + """ + Definition of a tool that can be added to the agent. + + Attributes: + name: Unique identifier for the tool + description: Human-readable description of what the tool does + factory: Callable that creates the tool. Receives a dict of dependencies. + requires: List of dependency names this tool needs (e.g., "search_space_id", "db_session") + enabled_by_default: Whether the tool is enabled when no explicit config is provided + """ + + name: str + description: str + factory: Callable[[dict[str, Any]], BaseTool] + requires: list[str] = field(default_factory=list) + enabled_by_default: bool = True + + +# ============================================================================= +# Built-in Tools Registry +# ============================================================================= + +# Registry of all built-in tools +# Contributors: Add your new tools here! +BUILTIN_TOOLS: list[ToolDefinition] = [ + # Core tool - searches the user's knowledge base + ToolDefinition( + name="search_knowledge_base", + description="Search the user's personal knowledge base for relevant information", + factory=lambda deps: create_search_knowledge_base_tool( + search_space_id=deps["search_space_id"], + db_session=deps["db_session"], + connector_service=deps["connector_service"], + ), + requires=["search_space_id", "db_session", "connector_service"], + ), + # Podcast generation tool + ToolDefinition( + name="generate_podcast", + description="Generate an audio podcast from provided content", + factory=lambda deps: create_generate_podcast_tool( + search_space_id=deps["search_space_id"], + db_session=deps["db_session"], + ), + requires=["search_space_id", "db_session"], + ), + # Link preview tool - fetches Open Graph metadata for URLs + ToolDefinition( + name="link_preview", + description="Fetch metadata for a URL to display a rich preview card", + factory=lambda deps: create_link_preview_tool(), + requires=[], + ), + # Display image tool - shows images in the chat + ToolDefinition( + name="display_image", + description="Display an image in the chat with metadata", + factory=lambda deps: create_display_image_tool(), + requires=[], + ), + # Web scraping tool - extracts content from webpages + ToolDefinition( + name="scrape_webpage", + description="Scrape and extract the main content from a webpage", + factory=lambda deps: create_scrape_webpage_tool( + firecrawl_api_key=deps.get("firecrawl_api_key"), + ), + requires=[], # firecrawl_api_key is optional + ), + # ========================================================================= + # ADD YOUR CUSTOM TOOLS BELOW + # ========================================================================= + # Example: + # ToolDefinition( + # name="my_custom_tool", + # description="What my tool does", + # factory=lambda deps: create_my_custom_tool(...), + # requires=["search_space_id"], + # ), +] + + +# ============================================================================= +# Registry Functions +# ============================================================================= + + +def get_tool_by_name(name: str) -> ToolDefinition | None: + """Get a tool definition by its name.""" + for tool_def in BUILTIN_TOOLS: + if tool_def.name == name: + return tool_def + return None + + +def get_all_tool_names() -> list[str]: + """Get names of all registered tools.""" + return [tool_def.name for tool_def in BUILTIN_TOOLS] + + +def get_default_enabled_tools() -> list[str]: + """Get names of tools that are enabled by default.""" + return [tool_def.name for tool_def in BUILTIN_TOOLS if tool_def.enabled_by_default] + + +def build_tools( + dependencies: dict[str, Any], + enabled_tools: list[str] | None = None, + disabled_tools: list[str] | None = None, + additional_tools: list[BaseTool] | None = None, +) -> list[BaseTool]: + """ + Build the list of tools for the agent. + + Args: + dependencies: Dict containing all possible dependencies: + - search_space_id: The search space ID + - db_session: Database session + - connector_service: Connector service instance + - firecrawl_api_key: Optional Firecrawl API key + enabled_tools: Explicit list of tool names to enable. If None, uses defaults. + disabled_tools: List of tool names to disable (applied after enabled_tools). + additional_tools: Extra tools to add (e.g., custom tools not in registry). + + Returns: + List of configured tool instances ready for the agent. + + Example: + # Use all default tools + tools = build_tools(deps) + + # Use only specific tools + tools = build_tools(deps, enabled_tools=["search_knowledge_base", "link_preview"]) + + # Use defaults but disable podcast + tools = build_tools(deps, disabled_tools=["generate_podcast"]) + + # Add custom tools + tools = build_tools(deps, additional_tools=[my_custom_tool]) + """ + # Determine which tools to enable + if enabled_tools is not None: + tool_names_to_use = set(enabled_tools) + else: + tool_names_to_use = set(get_default_enabled_tools()) + + # Apply disabled list + if disabled_tools: + tool_names_to_use -= set(disabled_tools) + + # Build the tools + tools: list[BaseTool] = [] + for tool_def in BUILTIN_TOOLS: + if tool_def.name not in tool_names_to_use: + continue + + # Check that all required dependencies are provided + missing_deps = [dep for dep in tool_def.requires if dep not in dependencies] + if missing_deps: + raise ValueError( + f"Tool '{tool_def.name}' requires dependencies: {missing_deps}" + ) + + # Create the tool + tool = tool_def.factory(dependencies) + tools.append(tool) + + # Add any additional custom tools + if additional_tools: + tools.extend(additional_tools) + + return tools diff --git a/surfsense_backend/app/agents/new_chat/tools/scrape_webpage.py b/surfsense_backend/app/agents/new_chat/tools/scrape_webpage.py new file mode 100644 index 000000000..24f15edba --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/tools/scrape_webpage.py @@ -0,0 +1,198 @@ +""" +Web scraping tool for the SurfSense agent. + +This module provides a tool for scraping and extracting content from webpages +using the existing WebCrawlerConnector. The scraped content can be used by +the agent to answer questions about web pages. +""" + +import hashlib +from typing import Any +from urllib.parse import urlparse + +from langchain_core.tools import tool + +from app.connectors.webcrawler_connector import WebCrawlerConnector + + +def extract_domain(url: str) -> str: + """Extract the domain from a URL.""" + try: + parsed = urlparse(url) + domain = parsed.netloc + # Remove 'www.' prefix if present + if domain.startswith("www."): + domain = domain[4:] + return domain + except Exception: + return "" + + +def generate_scrape_id(url: str) -> str: + """Generate a unique ID for a scraped webpage.""" + hash_val = hashlib.md5(url.encode()).hexdigest()[:12] + return f"scrape-{hash_val}" + + +def truncate_content(content: str, max_length: int = 50000) -> tuple[str, bool]: + """ + Truncate content to a maximum length. + + Returns: + Tuple of (truncated_content, was_truncated) + """ + if len(content) <= max_length: + return content, False + + # Try to truncate at a sentence boundary + truncated = content[:max_length] + last_period = truncated.rfind(".") + last_newline = truncated.rfind("\n\n") + + # Use the later of the two boundaries, or just truncate + boundary = max(last_period, last_newline) + if boundary > max_length * 0.8: # Only use boundary if it's not too far back + truncated = content[: boundary + 1] + + return truncated + "\n\n[Content truncated...]", True + + +def create_scrape_webpage_tool(firecrawl_api_key: str | None = None): + """ + Factory function to create the scrape_webpage tool. + + Args: + firecrawl_api_key: Optional Firecrawl API key for premium web scraping. + Falls back to Chromium/Trafilatura if not provided. + + Returns: + A configured tool function for scraping webpages. + """ + + @tool + async def scrape_webpage( + url: str, + max_length: int = 50000, + ) -> dict[str, Any]: + """ + Scrape and extract the main content from a webpage. + + Use this tool when the user wants you to read, summarize, or answer + questions about a specific webpage's content. This tool actually + fetches and reads the full page content. + + Common triggers: + - "Read this article and summarize it" + - "What does this page say about X?" + - "Summarize this blog post for me" + - "Tell me the key points from this article" + - "What's in this webpage?" + + Args: + url: The URL of the webpage to scrape (must be HTTP/HTTPS) + max_length: Maximum content length to return (default: 50000 chars) + + Returns: + A dictionary containing: + - id: Unique identifier for this scrape + - assetId: The URL (for deduplication) + - kind: "article" (type of content) + - href: The URL to open when clicked + - title: Page title + - description: Brief description or excerpt + - content: The extracted main content (markdown format) + - domain: The domain name + - word_count: Approximate word count + - was_truncated: Whether content was truncated + - error: Error message (if scraping failed) + """ + scrape_id = generate_scrape_id(url) + domain = extract_domain(url) + + # Validate and normalize URL + if not url.startswith(("http://", "https://")): + url = f"https://{url}" + + try: + # Create webcrawler connector + connector = WebCrawlerConnector(firecrawl_api_key=firecrawl_api_key) + + # Crawl the URL + result, error = await connector.crawl_url(url, formats=["markdown"]) + + if error: + return { + "id": scrape_id, + "assetId": url, + "kind": "article", + "href": url, + "title": domain or "Webpage", + "domain": domain, + "error": error, + } + + if not result: + return { + "id": scrape_id, + "assetId": url, + "kind": "article", + "href": url, + "title": domain or "Webpage", + "domain": domain, + "error": "No content returned from crawler", + } + + # Extract content and metadata + content = result.get("content", "") + metadata = result.get("metadata", {}) + + # Get title from metadata + title = metadata.get("title", "") + if not title: + title = domain or url.split("/")[-1] or "Webpage" + + # Get description from metadata + description = metadata.get("description", "") + if not description and content: + # Use first paragraph as description + first_para = content.split("\n\n")[0] if content else "" + description = ( + first_para[:300] + "..." if len(first_para) > 300 else first_para + ) + + # Truncate content if needed + content, was_truncated = truncate_content(content, max_length) + + # Calculate word count + word_count = len(content.split()) + + return { + "id": scrape_id, + "assetId": url, + "kind": "article", + "href": url, + "title": title, + "description": description, + "content": content, + "domain": domain, + "word_count": word_count, + "was_truncated": was_truncated, + "crawler_type": result.get("crawler_type", "unknown"), + "author": metadata.get("author"), + "date": metadata.get("date"), + } + + except Exception as e: + error_message = str(e) + print(f"[scrape_webpage] Error scraping {url}: {error_message}") + return { + "id": scrape_id, + "assetId": url, + "kind": "article", + "href": url, + "title": domain or "Webpage", + "domain": domain, + "error": f"Failed to scrape: {error_message[:100]}", + } + + return scrape_webpage diff --git a/surfsense_backend/app/agents/new_chat/utils.py b/surfsense_backend/app/agents/new_chat/utils.py new file mode 100644 index 000000000..919fb4995 --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/utils.py @@ -0,0 +1,63 @@ +""" +Utility functions for SurfSense agents. + +This module provides shared utility functions used across the new_chat agent modules. +""" + +from datetime import UTC, datetime, timedelta + + +def parse_date_or_datetime(value: str) -> datetime: + """ + Parse either an ISO date (YYYY-MM-DD) or ISO datetime into an aware UTC datetime. + + - If `value` is a date, interpret it as start-of-day in UTC. + - If `value` is a datetime without timezone, assume UTC. + + Args: + value: ISO date or datetime string + + Returns: + Aware datetime object in UTC + + Raises: + ValueError: If the date string is empty or invalid + """ + raw = (value or "").strip() + if not raw: + raise ValueError("Empty date string") + + # Date-only + if "T" not in raw: + d = datetime.fromisoformat(raw).date() + return datetime(d.year, d.month, d.day, tzinfo=UTC) + + # Datetime (may be naive) + dt = datetime.fromisoformat(raw) + if dt.tzinfo is None: + return dt.replace(tzinfo=UTC) + return dt.astimezone(UTC) + + +def resolve_date_range( + start_date: datetime | None, + end_date: datetime | None, +) -> tuple[datetime, datetime]: + """ + Resolve a date range, defaulting to the last 2 years if not provided. + Ensures start_date <= end_date. + + Args: + start_date: Optional start datetime (UTC) + end_date: Optional end datetime (UTC) + + Returns: + Tuple of (resolved_start_date, resolved_end_date) in UTC + """ + resolved_end = end_date or datetime.now(UTC) + resolved_start = start_date or (resolved_end - timedelta(days=730)) + + if resolved_start > resolved_end: + resolved_start, resolved_end = resolved_end, resolved_start + + return resolved_start, resolved_end diff --git a/surfsense_backend/app/agents/podcaster/configuration.py b/surfsense_backend/app/agents/podcaster/configuration.py index c7433dadc..6a903f9df 100644 --- a/surfsense_backend/app/agents/podcaster/configuration.py +++ b/surfsense_backend/app/agents/podcaster/configuration.py @@ -16,7 +16,6 @@ class Configuration: # create assistants (https://langchain-ai.github.io/langgraph/cloud/how-tos/configuration_cloud/) # and when you invoke the graph podcast_title: str - user_id: str search_space_id: int user_prompt: str | None = None diff --git a/surfsense_backend/app/agents/podcaster/nodes.py b/surfsense_backend/app/agents/podcaster/nodes.py index 31a687763..3f908737a 100644 --- a/surfsense_backend/app/agents/podcaster/nodes.py +++ b/surfsense_backend/app/agents/podcaster/nodes.py @@ -12,7 +12,7 @@ from litellm import aspeech from app.config import config as app_config from app.services.kokoro_tts_service import get_kokoro_tts_service -from app.services.llm_service import get_user_long_context_llm +from app.services.llm_service import get_document_summary_llm from .configuration import Configuration from .prompts import get_podcast_generation_prompt @@ -27,14 +27,15 @@ async def create_podcast_transcript( # Get configuration from runnable config configuration = Configuration.from_runnable_config(config) - user_id = configuration.user_id search_space_id = configuration.search_space_id user_prompt = configuration.user_prompt - # Get user's long context LLM - llm = await get_user_long_context_llm(state.db_session, user_id, search_space_id) + # Get search space's document summary LLM + llm = await get_document_summary_llm(state.db_session, search_space_id) if not llm: - error_message = f"No long context LLM configured for user {user_id} in search space {search_space_id}" + error_message = ( + f"No document summary LLM configured for search space {search_space_id}" + ) print(error_message) raise RuntimeError(error_message) diff --git a/surfsense_backend/app/agents/researcher/__init__.py b/surfsense_backend/app/agents/researcher/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/surfsense_backend/app/agents/researcher/configuration.py b/surfsense_backend/app/agents/researcher/configuration.py deleted file mode 100644 index c89592c65..000000000 --- a/surfsense_backend/app/agents/researcher/configuration.py +++ /dev/null @@ -1,30 +0,0 @@ -"""Define the configurable parameters for the agent.""" - -from __future__ import annotations - -from dataclasses import dataclass, fields - -from langchain_core.runnables import RunnableConfig - - -@dataclass(kw_only=True) -class Configuration: - """The configuration for the agent.""" - - # Input parameters provided at invocation - user_query: str - connectors_to_search: list[str] - user_id: str - search_space_id: int - document_ids_to_add_in_context: list[int] - language: str | None = None - top_k: int = 10 - - @classmethod - def from_runnable_config( - cls, config: RunnableConfig | None = None - ) -> Configuration: - """Create a Configuration instance from a RunnableConfig object.""" - configurable = (config.get("configurable") or {}) if config else {} - _fields = {f.name for f in fields(cls) if f.init} - return cls(**{k: v for k, v in configurable.items() if k in _fields}) diff --git a/surfsense_backend/app/agents/researcher/graph.py b/surfsense_backend/app/agents/researcher/graph.py deleted file mode 100644 index be2a1cff5..000000000 --- a/surfsense_backend/app/agents/researcher/graph.py +++ /dev/null @@ -1,47 +0,0 @@ -from langgraph.graph import StateGraph - -from .configuration import Configuration -from .nodes import ( - generate_further_questions, - handle_qna_workflow, - reformulate_user_query, -) -from .state import State - - -def build_graph(): - """ - Build and return the LangGraph workflow. - - This function constructs the researcher agent graph for Q&A workflow. - The workflow follows a simple path: - 1. Reformulate user query based on chat history - 2. Handle QNA workflow (fetch documents and generate answer) - 3. Generate follow-up questions - - Returns: - A compiled LangGraph workflow - """ - # Define a new graph with state class - workflow = StateGraph(State, config_schema=Configuration) - - # Add nodes to the graph - workflow.add_node("reformulate_user_query", reformulate_user_query) - workflow.add_node("handle_qna_workflow", handle_qna_workflow) - workflow.add_node("generate_further_questions", generate_further_questions) - - # Define the edges - simple linear flow for QNA - workflow.add_edge("__start__", "reformulate_user_query") - workflow.add_edge("reformulate_user_query", "handle_qna_workflow") - workflow.add_edge("handle_qna_workflow", "generate_further_questions") - workflow.add_edge("generate_further_questions", "__end__") - - # Compile the workflow into an executable graph - graph = workflow.compile() - graph.name = "Surfsense Researcher" # This defines the custom name in LangSmith - - return graph - - -# Compile the graph once when the module is loaded -graph = build_graph() diff --git a/surfsense_backend/app/agents/researcher/nodes.py b/surfsense_backend/app/agents/researcher/nodes.py deleted file mode 100644 index b16d4f0c1..000000000 --- a/surfsense_backend/app/agents/researcher/nodes.py +++ /dev/null @@ -1,1785 +0,0 @@ -import json -import logging -import traceback -from datetime import UTC, datetime, timedelta -from typing import Any - -from langchain_core.messages import HumanMessage, SystemMessage -from langchain_core.runnables import RunnableConfig -from langgraph.types import StreamWriter -from sqlalchemy.ext.asyncio import AsyncSession - -# Additional imports for document fetching -from sqlalchemy.future import select - -from app.db import Document -from app.services.connector_service import ConnectorService -from app.services.query_service import QueryService - -from .configuration import Configuration -from .prompts import get_further_questions_system_prompt -from .qna_agent.graph import graph as qna_agent_graph -from .state import State -from .utils import get_connector_emoji, get_connector_friendly_name - -# Time filter constants - hardcoded 2 year time range for now -DEFAULT_TIME_FILTER_YEARS = 2 - - -def extract_sources_from_documents( - all_documents: list[dict[str, Any]], -) -> list[dict[str, Any]]: - """ - Extract sources from **document-grouped** results and group them by document type. - - Args: - all_documents: List of document-grouped results from user-selected documents and connector-fetched documents - - Returns: - List of source objects grouped by type for streaming - """ - # Group sources by their source type - documents_by_type = {} - - for doc in all_documents: - document_info = doc.get("document", {}) or {} - source_type = doc.get("source", "UNKNOWN") - document_type = document_info.get("document_type", source_type) or source_type - group_type = document_type if document_type != "UNKNOWN" else source_type - if group_type not in documents_by_type: - documents_by_type[group_type] = [] - documents_by_type[group_type].append(doc) - - # Create source objects for each document type - source_objects = [] - for doc_type, docs in documents_by_type.items(): - sources_list = [] - - for doc in docs: - document_info = doc.get("document", {}) - metadata = document_info.get("metadata", {}) - url = ( - metadata.get("url") - or metadata.get("source") - or metadata.get("page_url") - or metadata.get("VisitedWebPageURL") - or "" - ) - - # Each chunk becomes a source entry so citations like [citation:] resolve in UI. - for chunk in doc.get("chunks", []) or []: - chunk_id = chunk.get("chunk_id") - chunk_content = (chunk.get("content") or "").strip() - description = ( - chunk_content - if len(chunk_content) <= 240 - else chunk_content[:240] + "..." - ) - sources_list.append( - { - "id": chunk_id, - "title": document_info.get("title", "Untitled Document"), - "description": description, - "url": url, - } - ) - - # Create group object - group_name = ( - get_connector_friendly_name(doc_type) - if doc_type != "UNKNOWN" - else "Unknown Sources" - ) - - source_object = { - "id": len(source_objects) + 1, - "name": group_name, - "type": doc_type, - "sources": sources_list, - } - - source_objects.append(source_object) - - return source_objects - - -async def fetch_documents_by_ids( - document_ids: list[int], search_space_id: int, db_session: AsyncSession -) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: - """ - Fetch documents by their IDs within a search space. - - This function ensures that only documents belonging to the search space are fetched. - It fetches full documents and returns their chunks individually. - Also creates source objects for UI display, grouped by document type. - - Args: - document_ids: List of document IDs to fetch - search_space_id: The search space ID to filter by - db_session: The database session - - Returns: - Tuple of (source_objects, document_chunks) - similar to ConnectorService pattern - """ - if not document_ids: - return [], [] - - try: - # Query documents filtered by search space - result = await db_session.execute( - select(Document).filter( - Document.id.in_(document_ids), - Document.search_space_id == search_space_id, - ) - ) - documents = result.scalars().all() - - # Group documents by type for source object creation - documents_by_type: dict[str, list[Document]] = {} - formatted_documents: list[dict[str, Any]] = [] - - from app.db import Chunk - - for doc in documents: - # Fetch associated chunks for this document - chunks_query = ( - select(Chunk).where(Chunk.document_id == doc.id).order_by(Chunk.id) - ) - chunks_result = await db_session.execute(chunks_query) - chunks = chunks_result.scalars().all() - - doc_type = doc.document_type.value if doc.document_type else "UNKNOWN" - documents_by_type.setdefault(doc_type, []).append(doc) - - doc_group = { - "document_id": doc.id, - "content": "\n\n".join(c.content for c in chunks) - if chunks - else (doc.content or ""), - "score": 0.5, # High score since user explicitly selected these - "chunks": [{"chunk_id": c.id, "content": c.content} for c in chunks] - if chunks - else [], - "document": { - "id": doc.id, - "title": doc.title, - "document_type": doc_type, - "metadata": doc.document_metadata or {}, - }, - "source": doc_type, - } - formatted_documents.append(doc_group) - - # Create source objects for each document type (similar to ConnectorService) - source_objects = [] - connector_id_counter = 100 - - for doc_type, docs in documents_by_type.items(): - sources_list = [] - - for doc in docs: - metadata = doc.document_metadata or {} - - # Create type-specific source formatting (similar to ConnectorService) - if doc_type == "LINEAR_CONNECTOR": - # Extract Linear-specific metadata - issue_identifier = metadata.get("issue_identifier", "") - issue_title = metadata.get("issue_title", doc.title) - issue_state = metadata.get("state", "") - comment_count = metadata.get("comment_count", 0) - - # Create a more descriptive title for Linear issues - title = ( - f"Linear: {issue_identifier} - {issue_title}" - if issue_identifier - else f"Linear: {issue_title}" - ) - if issue_state: - title += f" ({issue_state})" - - # Create description - description = doc.content - if comment_count: - description += f" | Comments: {comment_count}" - - # Create URL - url = ( - f"https://linear.app/issue/{issue_identifier}" - if issue_identifier - else "" - ) - - elif doc_type == "SLACK_CONNECTOR": - # Extract Slack-specific metadata - channel_name = metadata.get("channel_name", "Unknown Channel") - channel_id = metadata.get("channel_id", "") - message_date = metadata.get("start_date", "") - - title = f"Slack: {channel_name}" - if message_date: - title += f" ({message_date})" - - description = doc.content - url = ( - f"https://slack.com/app_redirect?channel={channel_id}" - if channel_id - else "" - ) - - elif doc_type == "NOTION_CONNECTOR": - # Extract Notion-specific metadata - page_title = metadata.get("page_title", doc.title) - page_id = metadata.get("page_id", "") - - title = f"Notion: {page_title}" - description = doc.content - url = ( - f"https://notion.so/{page_id.replace('-', '')}" - if page_id - else "" - ) - - elif doc_type == "GITHUB_CONNECTOR": - title = f"GitHub: {doc.title}" - description = metadata.get( - "description", - (doc.content), - ) - url = metadata.get("url", "") - - elif doc_type == "YOUTUBE_VIDEO": - # Extract YouTube-specific metadata - video_title = metadata.get("video_title", doc.title) - video_id = metadata.get("video_id", "") - channel_name = metadata.get("channel_name", "") - - title = video_title - if channel_name: - title += f" - {channel_name}" - - description = metadata.get( - "description", - (doc.content), - ) - url = ( - f"https://www.youtube.com/watch?v={video_id}" - if video_id - else "" - ) - - elif doc_type == "DISCORD_CONNECTOR": - # Extract Discord-specific metadata - channel_name = metadata.get("channel_name", "Unknown Channel") - channel_id = metadata.get("channel_id", "") - guild_id = metadata.get("guild_id", "") - message_date = metadata.get("start_date", "") - - title = f"Discord: {channel_name}" - if message_date: - title += f" ({message_date})" - - description = doc.content - - if guild_id and channel_id: - url = f"https://discord.com/channels/{guild_id}/{channel_id}" - elif channel_id: - url = f"https://discord.com/channels/@me/{channel_id}" - else: - url = "" - - elif doc_type == "JIRA_CONNECTOR": - # Extract Jira-specific metadata - issue_key = metadata.get("issue_key", "Unknown Issue") - issue_title = metadata.get("issue_title", "Untitled Issue") - status = metadata.get("status", "") - priority = metadata.get("priority", "") - issue_type = metadata.get("issue_type", "") - - title = f"Jira: {issue_key} - {issue_title}" - if status: - title += f" ({status})" - - description = doc.content - if priority: - description += f" | Priority: {priority}" - if issue_type: - description += f" | Type: {issue_type}" - - # Construct Jira URL if we have the base URL - base_url = metadata.get("base_url", "") - if base_url and issue_key: - url = f"{base_url}/browse/{issue_key}" - else: - url = "" - - elif doc_type == "GOOGLE_CALENDAR_CONNECTOR": - # Extract Google Calendar-specific metadata - event_id = metadata.get("event_id", "Unknown Event") - event_summary = metadata.get("event_summary", "Untitled Event") - calendar_id = metadata.get("calendar_id", "") - start_time = metadata.get("start_time", "") - location = metadata.get("location", "") - - title = f"Calendar: {event_summary}" - if start_time: - # Format the start time for display - try: - if "T" in start_time: - from datetime import datetime - - start_dt = datetime.fromisoformat( - start_time.replace("Z", "+00:00") - ) - formatted_time = start_dt.strftime("%Y-%m-%d %H:%M") - title += f" ({formatted_time})" - else: - title += f" ({start_time})" - except Exception: - title += f" ({start_time})" - - elif doc_type == "AIRTABLE_CONNECTOR": - # Extract Airtable-specific metadata - base_name = metadata.get("base_name", "Unknown Base") - table_name = metadata.get("table_name", "Unknown Table") - record_id = metadata.get("record_id", "Unknown Record") - created_time = metadata.get("created_time", "") - - title = f"Airtable: {base_name} - {table_name}" - if record_id: - title += f" (Record: {record_id[:8]}...)" - if created_time: - # Format the created time for display - try: - if "T" in created_time: - from datetime import datetime - - created_dt = datetime.fromisoformat( - created_time.replace("Z", "+00:00") - ) - formatted_time = created_dt.strftime("%Y-%m-%d %H:%M") - title += f" - {formatted_time}" - except Exception: - pass - - description = doc.content - if location: - description += f" | Location: {location}" - if calendar_id and calendar_id != "primary": - description += f" | Calendar: {calendar_id}" - - # Construct Google Calendar URL - if event_id: - url = ( - f"https://calendar.google.com/calendar/event?eid={event_id}" - ) - else: - url = "" - - elif doc_type == "LUMA_CONNECTOR": - # Extract Luma-specific metadata - event_id = metadata.get("event_id", "") - event_name = metadata.get("event_name", "Untitled Event") - event_url = metadata.get("event_url", "") - start_time = metadata.get("start_time", "") - location_name = metadata.get("location_name", "") - meeting_url = metadata.get("meeting_url", "") - - title = f"Luma: {event_name}" - if start_time: - # Format the start time for display - try: - if "T" in start_time: - from datetime import datetime - - start_dt = datetime.fromisoformat( - start_time.replace("Z", "+00:00") - ) - formatted_time = start_dt.strftime("%Y-%m-%d %H:%M") - title += f" ({formatted_time})" - except Exception: - pass - - description = doc.content - - if location_name: - description += f" | Venue: {location_name}" - elif meeting_url: - description += " | Online Event" - - url = event_url if event_url else "" - - elif doc_type == "EXTENSION": - # Extract Extension-specific metadata - webpage_title = metadata.get("VisitedWebPageTitle", doc.title) - webpage_url = metadata.get("VisitedWebPageURL", "") - visit_date = metadata.get( - "VisitedWebPageDateWithTimeInISOString", "" - ) - - title = webpage_title - if visit_date: - formatted_date = ( - visit_date.split("T")[0] - if "T" in visit_date - else visit_date - ) - title += f" (visited: {formatted_date})" - - description = doc.content - url = webpage_url - - elif doc_type == "CRAWLED_URL": - title = doc.title - description = metadata.get( - "og:description", - metadata.get( - "ogDescription", - (doc.content), - ), - ) - url = metadata.get("url", "") - - elif doc_type == "ELASTICSEARCH_CONNECTOR": - # Prefer explicit title in metadata/source, otherwise fallback to doc.title - es_title = ( - metadata.get("title") - or metadata.get("es_title") - or doc.title - or f"Elasticsearch: {metadata.get('elasticsearch_index', '')}" - ) - title = es_title - description = metadata.get("description") or ( - doc.content[:100] + "..." - if len(doc.content) > 100 - else doc.content - ) - # If a link or index info is stored, surface it - url = metadata.get("url", "") or metadata.get( - "elasticsearch_index", "" - ) - - else: # FILE and other types - title = doc.title - description = doc.content - - url = metadata.get("url", "") - - # Create source entry - source = { - "id": doc.id, - "title": title, - "description": description, - "url": url, - } - sources_list.append(source) - - # Create source object for this document type - friendly_type_names = { - "LINEAR_CONNECTOR": "Linear Issues (Selected)", - "SLACK_CONNECTOR": "Slack (Selected)", - "NOTION_CONNECTOR": "Notion (Selected)", - "GITHUB_CONNECTOR": "GitHub (Selected)", - "ELASTICSEARCH_CONNECTOR": "Elasticsearch (Selected)", - "YOUTUBE_VIDEO": "YouTube Videos (Selected)", - "DISCORD_CONNECTOR": "Discord (Selected)", - "JIRA_CONNECTOR": "Jira Issues (Selected)", - "EXTENSION": "Browser Extension (Selected)", - "CRAWLED_URL": "Web Pages (Selected)", - "FILE": "Files (Selected)", - "GOOGLE_CALENDAR_CONNECTOR": "Google Calendar (Selected)", - "GOOGLE_GMAIL_CONNECTOR": "Google Gmail (Selected)", - "CONFLUENCE_CONNECTOR": "Confluence (Selected)", - "CLICKUP_CONNECTOR": "ClickUp (Selected)", - "AIRTABLE_CONNECTOR": "Airtable (Selected)", - "LUMA_CONNECTOR": "Luma Events (Selected)", - "NOTE": "Notes (Selected)", - } - - source_object = { - "id": connector_id_counter, - "name": friendly_type_names.get(doc_type, f"{doc_type} (Selected)"), - "type": f"USER_SELECTED_{doc_type}", - "sources": sources_list, - } - source_objects.append(source_object) - connector_id_counter += 1 - - print( - f"Fetched {len(formatted_documents)} user-selected chunks from {len(document_ids)} requested document IDs" - ) - print(f"Created {len(source_objects)} source objects for UI display") - - return source_objects, formatted_documents - - except Exception as e: - print(f"Error fetching documents by IDs: {e!s}") - return [], [] - - -async def fetch_relevant_documents( - research_questions: list[str], - search_space_id: int, - db_session: AsyncSession, - connectors_to_search: list[str], - writer: StreamWriter = None, - state: State = None, - top_k: int = 10, - connector_service: ConnectorService = None, - user_selected_sources: list[dict[str, Any]] | None = None, - start_date: datetime | None = None, - end_date: datetime | None = None, -) -> list[dict[str, Any]]: - """ - Fetch relevant documents for research questions using the provided connectors. - - This function searches across multiple data sources for information related to the - research questions. It provides user-friendly feedback during the search process by - displaying connector names (like "Web Search" instead of "TAVILY_API") and adding - relevant emojis to indicate the type of source being searched. - - Uses combined chunk-level and document-level hybrid search with RRF fusion. - - Args: - research_questions: List of research questions to find documents for - search_space_id: The search space ID - db_session: The database session - connectors_to_search: List of connectors to search - writer: StreamWriter for sending progress updates - state: The current state containing the streaming service - top_k: Number of top results to retrieve per connector per question - connector_service: An initialized connector service to use for searching - user_selected_sources: Optional list of user-selected source objects - start_date: Optional start date for filtering documents by updated_at - end_date: Optional end date for filtering documents by updated_at - - Returns: - List of relevant documents - """ - # Initialize services - # connector_service = ConnectorService(db_session) - - # Only use streaming if both writer and state are provided - streaming_service = state.streaming_service if state is not None else None - - # Handle case when no connectors are selected - if not connectors_to_search or len(connectors_to_search) == 0: - if streaming_service and writer: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - "📹 No data sources selected. Research will be generated using general knowledge and any user-selected documents." - ) - } - ) - print("No connectors selected for research. Returning empty document list.") - return [] # Return empty list gracefully - - # Stream initial status update - if streaming_service and writer: - connector_names = [ - get_connector_friendly_name(connector) for connector in connectors_to_search - ] - connector_names_str = ", ".join(connector_names) - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"🔎 Starting research on {len(research_questions)} questions using {connector_names_str} data sources" - ) - } - ) - - all_raw_documents = [] # Store all raw documents - all_sources = [] # Store all sources - - for i, user_query in enumerate(research_questions): - # Stream question being researched - if streaming_service and writer: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f'🧠 Researching question {i + 1}/{len(research_questions)}: "{user_query[:100]}..."' - ) - } - ) - - # Use original research question as the query - reformulated_query = user_query - - # Process each selected connector - for connector in connectors_to_search: - # Stream connector being searched - if streaming_service and writer: - connector_emoji = get_connector_emoji(connector) - friendly_name = get_connector_friendly_name(connector) - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"{connector_emoji} Searching {friendly_name} for relevant information..." - ) - } - ) - - try: - if connector == "YOUTUBE_VIDEO": - ( - source_object, - youtube_chunks, - ) = await connector_service.search_youtube( - user_query=reformulated_query, - search_space_id=search_space_id, - top_k=top_k, - start_date=start_date, - end_date=end_date, - ) - - # Add to sources and raw documents - if source_object: - all_sources.append(source_object) - all_raw_documents.extend(youtube_chunks) - - # Stream found document count - if streaming_service and writer: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"📹 Found {len(youtube_chunks)} YouTube chunks related to your query" - ) - } - ) - - elif connector == "EXTENSION": - ( - source_object, - extension_chunks, - ) = await connector_service.search_extension( - user_query=reformulated_query, - search_space_id=search_space_id, - top_k=top_k, - start_date=start_date, - end_date=end_date, - ) - - # Add to sources and raw documents - if source_object: - all_sources.append(source_object) - all_raw_documents.extend(extension_chunks) - - # Stream found document count - if streaming_service and writer: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"🧩 Found {len(extension_chunks)} Browser Extension chunks related to your query" - ) - } - ) - - elif connector == "CRAWLED_URL": - ( - source_object, - crawled_urls_chunks, - ) = await connector_service.search_crawled_urls( - user_query=reformulated_query, - search_space_id=search_space_id, - top_k=top_k, - start_date=start_date, - end_date=end_date, - ) - - # Add to sources and raw documents - if source_object: - all_sources.append(source_object) - all_raw_documents.extend(crawled_urls_chunks) - - # Stream found document count - if streaming_service and writer: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"🌐 Found {len(crawled_urls_chunks)} Web Page chunks related to your query" - ) - } - ) - - elif connector == "FILE": - source_object, files_chunks = await connector_service.search_files( - user_query=reformulated_query, - search_space_id=search_space_id, - top_k=top_k, - start_date=start_date, - end_date=end_date, - ) - - # Add to sources and raw documents - if source_object: - all_sources.append(source_object) - all_raw_documents.extend(files_chunks) - - # Stream found document count - if streaming_service and writer: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"📄 Found {len(files_chunks)} Files chunks related to your query" - ) - } - ) - - elif connector == "SLACK_CONNECTOR": - source_object, slack_chunks = await connector_service.search_slack( - user_query=reformulated_query, - search_space_id=search_space_id, - top_k=top_k, - start_date=start_date, - end_date=end_date, - ) - - # Add to sources and raw documents - if source_object: - all_sources.append(source_object) - all_raw_documents.extend(slack_chunks) - - # Stream found document count - if streaming_service and writer: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"💬 Found {len(slack_chunks)} Slack messages related to your query" - ) - } - ) - - elif connector == "NOTION_CONNECTOR": - ( - source_object, - notion_chunks, - ) = await connector_service.search_notion( - user_query=reformulated_query, - search_space_id=search_space_id, - top_k=top_k, - start_date=start_date, - end_date=end_date, - ) - - # Add to sources and raw documents - if source_object: - all_sources.append(source_object) - all_raw_documents.extend(notion_chunks) - - # Stream found document count - if streaming_service and writer: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"📘 Found {len(notion_chunks)} Notion pages/blocks related to your query" - ) - } - ) - - elif connector == "GITHUB_CONNECTOR": - ( - source_object, - github_chunks, - ) = await connector_service.search_github( - user_query=reformulated_query, - search_space_id=search_space_id, - top_k=top_k, - start_date=start_date, - end_date=end_date, - ) - - # Add to sources and raw documents - if source_object: - all_sources.append(source_object) - all_raw_documents.extend(github_chunks) - - # Stream found document count - if streaming_service and writer: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"🐙 Found {len(github_chunks)} GitHub files/issues related to your query" - ) - } - ) - - elif connector == "LINEAR_CONNECTOR": - ( - source_object, - linear_chunks, - ) = await connector_service.search_linear( - user_query=reformulated_query, - search_space_id=search_space_id, - top_k=top_k, - start_date=start_date, - end_date=end_date, - ) - - # Add to sources and raw documents - if source_object: - all_sources.append(source_object) - all_raw_documents.extend(linear_chunks) - - # Stream found document count - if streaming_service and writer: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"📊 Found {len(linear_chunks)} Linear issues related to your query" - ) - } - ) - - elif connector == "TAVILY_API": - ( - source_object, - tavily_chunks, - ) = await connector_service.search_tavily( - user_query=reformulated_query, - search_space_id=search_space_id, - top_k=top_k, - ) - - # Add to sources and raw documents - if source_object: - all_sources.append(source_object) - all_raw_documents.extend(tavily_chunks) - - # Stream found document count - if streaming_service and writer: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"🔍 Found {len(tavily_chunks)} Web Search results related to your query" - ) - } - ) - - elif connector == "SEARXNG_API": - ( - source_object, - searx_chunks, - ) = await connector_service.search_searxng( - user_query=reformulated_query, - search_space_id=search_space_id, - top_k=top_k, - ) - - if source_object: - all_sources.append(source_object) - all_raw_documents.extend(searx_chunks) - - if streaming_service and writer: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"🌐 Found {len(searx_chunks)} SearxNG results related to your query" - ) - } - ) - - elif connector == "LINKUP_API": - linkup_mode = "standard" - - ( - source_object, - linkup_chunks, - ) = await connector_service.search_linkup( - user_query=reformulated_query, - search_space_id=search_space_id, - mode=linkup_mode, - ) - - # Add to sources and raw documents - if source_object: - all_sources.append(source_object) - all_raw_documents.extend(linkup_chunks) - - # Stream found document count - if streaming_service and writer: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"🔗 Found {len(linkup_chunks)} Linkup results related to your query" - ) - } - ) - - elif connector == "BAIDU_SEARCH_API": - ( - source_object, - baidu_chunks, - ) = await connector_service.search_baidu( - user_query=reformulated_query, - search_space_id=search_space_id, - top_k=top_k, - ) - - # Add to sources and raw documents - if source_object: - all_sources.append(source_object) - all_raw_documents.extend(baidu_chunks) - - # Stream found document count - if streaming_service and writer: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"🇨🇳 Found {len(baidu_chunks)} Baidu Search results related to your query" - ) - } - ) - - elif connector == "DISCORD_CONNECTOR": - ( - source_object, - discord_chunks, - ) = await connector_service.search_discord( - user_query=reformulated_query, - search_space_id=search_space_id, - top_k=top_k, - start_date=start_date, - end_date=end_date, - ) - # Add to sources and raw documents - if source_object: - all_sources.append(source_object) - all_raw_documents.extend(discord_chunks) - # Stream found document count - if streaming_service and writer: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"🗨️ Found {len(discord_chunks)} Discord messages related to your query" - ) - } - ) - - elif connector == "JIRA_CONNECTOR": - source_object, jira_chunks = await connector_service.search_jira( - user_query=reformulated_query, - search_space_id=search_space_id, - top_k=top_k, - start_date=start_date, - end_date=end_date, - ) - - # Add to sources and raw documents - if source_object: - all_sources.append(source_object) - all_raw_documents.extend(jira_chunks) - - # Stream found document count - if streaming_service and writer: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"🎫 Found {len(jira_chunks)} Jira issues related to your query" - ) - } - ) - elif connector == "GOOGLE_CALENDAR_CONNECTOR": - ( - source_object, - calendar_chunks, - ) = await connector_service.search_google_calendar( - user_query=reformulated_query, - search_space_id=search_space_id, - top_k=top_k, - start_date=start_date, - end_date=end_date, - ) - - # Add to sources and raw documents - if source_object: - all_sources.append(source_object) - all_raw_documents.extend(calendar_chunks) - - # Stream found document count - if streaming_service and writer: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"📅 Found {len(calendar_chunks)} calendar events related to your query" - ) - } - ) - elif connector == "AIRTABLE_CONNECTOR": - ( - source_object, - airtable_chunks, - ) = await connector_service.search_airtable( - user_query=reformulated_query, - search_space_id=search_space_id, - top_k=top_k, - start_date=start_date, - end_date=end_date, - ) - - # Add to sources and raw documents - if source_object: - all_sources.append(source_object) - all_raw_documents.extend(airtable_chunks) - - # Stream found document count - if streaming_service and writer: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"🗃️ Found {len(airtable_chunks)} Airtable records related to your query" - ) - } - ) - elif connector == "GOOGLE_GMAIL_CONNECTOR": - ( - source_object, - gmail_chunks, - ) = await connector_service.search_google_gmail( - user_query=reformulated_query, - search_space_id=search_space_id, - top_k=top_k, - start_date=start_date, - end_date=end_date, - ) - - # Add to sources and raw documents - if source_object: - all_sources.append(source_object) - all_raw_documents.extend(gmail_chunks) - - # Stream found document count - if streaming_service and writer: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"📧 Found {len(gmail_chunks)} Gmail messages related to your query" - ) - } - ) - elif connector == "CONFLUENCE_CONNECTOR": - ( - source_object, - confluence_chunks, - ) = await connector_service.search_confluence( - user_query=reformulated_query, - search_space_id=search_space_id, - top_k=top_k, - start_date=start_date, - end_date=end_date, - ) - - # Add to sources and raw documents - if source_object: - all_sources.append(source_object) - all_raw_documents.extend(confluence_chunks) - - # Stream found document count - if streaming_service and writer: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"📚 Found {len(confluence_chunks)} Confluence pages related to your query" - ) - } - ) - elif connector == "CLICKUP_CONNECTOR": - ( - source_object, - clickup_chunks, - ) = await connector_service.search_clickup( - user_query=reformulated_query, - search_space_id=search_space_id, - top_k=top_k, - start_date=start_date, - end_date=end_date, - ) - - # Add to sources and raw documents - if source_object: - all_sources.append(source_object) - all_raw_documents.extend(clickup_chunks) - - # Stream found document count - if streaming_service and writer: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"📋 Found {len(clickup_chunks)} ClickUp tasks related to your query" - ) - } - ) - - elif connector == "LUMA_CONNECTOR": - ( - source_object, - luma_chunks, - ) = await connector_service.search_luma( - user_query=reformulated_query, - search_space_id=search_space_id, - top_k=top_k, - start_date=start_date, - end_date=end_date, - ) - - # Add to sources and raw documents - if source_object: - all_sources.append(source_object) - all_raw_documents.extend(luma_chunks) - - # Stream found document count - if streaming_service and writer: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"🎯 Found {len(luma_chunks)} Luma events related to your query" - ) - } - ) - - elif connector == "ELASTICSEARCH_CONNECTOR": - ( - source_object, - elasticsearch_chunks, - ) = await connector_service.search_elasticsearch( - user_query=reformulated_query, - search_space_id=search_space_id, - top_k=top_k, - start_date=start_date, - end_date=end_date, - ) - - # Add to sources and raw documents - if source_object: - all_sources.append(source_object) - all_raw_documents.extend(elasticsearch_chunks) - - # Stream found document count - if streaming_service and writer: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"🔎 Found {len(elasticsearch_chunks)} Elasticsearch chunks related to your query" - ) - } - ) - - elif connector == "BOOKSTACK_CONNECTOR": - ( - source_object, - bookstack_chunks, - ) = await connector_service.search_bookstack( - user_query=reformulated_query, - search_space_id=search_space_id, - top_k=top_k, - start_date=start_date, - end_date=end_date, - ) - - # Add to sources and raw documents - if source_object: - all_sources.append(source_object) - all_raw_documents.extend(bookstack_chunks) - - # Stream found document count - if streaming_service and writer: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"📚 Found {len(bookstack_chunks)} BookStack pages related to your query" - ) - } - ) - - elif connector == "NOTE": - ( - source_object, - notes_chunks, - ) = await connector_service.search_notes( - user_query=reformulated_query, - search_space_id=search_space_id, - top_k=top_k, - start_date=start_date, - end_date=end_date, - ) - - # Add to sources and raw documents - if source_object: - all_sources.append(source_object) - all_raw_documents.extend(notes_chunks) - - # Stream found document count - if streaming_service and writer: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"📝 Found {len(notes_chunks)} Notes related to your query" - ) - } - ) - - except Exception as e: - logging.error("Error in search_airtable: %s", traceback.format_exc()) - error_message = f"Error searching connector {connector}: {e!s}" - print(error_message) - - # Stream error message - if streaming_service and writer: - friendly_name = get_connector_friendly_name(connector) - writer( - { - "yield_value": streaming_service.format_error( - f"Error searching {friendly_name}: {e!s}" - ) - } - ) - - # Continue with other connectors on error - continue - - # Deduplicate source objects by ID before streaming - deduplicated_sources = [] - seen_source_keys = set() - - # First add user-selected sources (if any) - if user_selected_sources: - for source_obj in user_selected_sources: - source_id = source_obj.get("id") - source_type = source_obj.get("type") - - if source_id and source_type: - source_key = f"{source_type}_{source_id}" - if source_key not in seen_source_keys: - seen_source_keys.add(source_key) - deduplicated_sources.append(source_obj) - else: - deduplicated_sources.append(source_obj) - - # Then add connector sources - for source_obj in all_sources: - # Use combination of source ID and type as a unique identifier - # This ensures we don't accidentally deduplicate sources from different connectors - source_id = source_obj.get("id") - source_type = source_obj.get("type") - - if source_id and source_type: - source_key = f"{source_type}_{source_id}" - current_sources_count = len(source_obj.get("sources", [])) - - if source_key not in seen_source_keys: - seen_source_keys.add(source_key) - deduplicated_sources.append(source_obj) - print( - f"Debug: Added source - ID: {source_id}, Type: {source_type}, Key: {source_key}, Sources count: {current_sources_count}" - ) - else: - # Check if this source object has more sources than the existing one - existing_index = None - for i, existing_source in enumerate(deduplicated_sources): - existing_id = existing_source.get("id") - existing_type = existing_source.get("type") - if existing_id == source_id and existing_type == source_type: - existing_index = i - break - - if existing_index is not None: - existing_sources_count = len( - deduplicated_sources[existing_index].get("sources", []) - ) - if current_sources_count > existing_sources_count: - # Replace the existing source object with the new one that has more sources - deduplicated_sources[existing_index] = source_obj - print( - f"Debug: Replaced source - ID: {source_id}, Type: {source_type}, Key: {source_key}, Sources count: {existing_sources_count} -> {current_sources_count}" - ) - else: - print( - f"Debug: Skipped duplicate source - ID: {source_id}, Type: {source_type}, Key: {source_key}, Sources count: {current_sources_count} <= {existing_sources_count}" - ) - else: - print( - f"Debug: Skipped duplicate source - ID: {source_id}, Type: {source_type}, Key: {source_key} (couldn't find existing)" - ) - else: - # If there's no ID or type, just add it to be safe - deduplicated_sources.append(source_obj) - print( - f"Debug: Added source without ID/type - {source_obj.get('name', 'UNKNOWN')}" - ) - - # Stream info about deduplicated sources - if streaming_service and writer: - user_source_count = len(user_selected_sources) if user_selected_sources else 0 - connector_source_count = len(deduplicated_sources) - user_source_count - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"📚 Collected {len(deduplicated_sources)} total sources ({user_source_count} user-selected + {connector_source_count} from connectors)" - ) - } - ) - - # Deduplicate raw documents based on document_id (preferred) or content hash - seen_doc_ids = set() - seen_content_hashes = set() - deduplicated_docs: list[dict[str, Any]] = [] - - for doc in all_raw_documents: - doc_id = (doc.get("document", {}) or {}).get("id") - content = doc.get("content", "") or "" - content_hash = hash(content) - - # Skip if we've seen this document_id or content before - if (doc_id and doc_id in seen_doc_ids) or content_hash in seen_content_hashes: - continue - - if doc_id: - seen_doc_ids.add(doc_id) - seen_content_hashes.add(content_hash) - deduplicated_docs.append(doc) - - # Stream info about deduplicated documents - if streaming_service and writer: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"🧹 Found {len(deduplicated_docs)} unique documents after removing duplicates" - ) - } - ) - - # Return deduplicated documents - return deduplicated_docs - - -async def reformulate_user_query( - state: State, config: RunnableConfig, writer: StreamWriter -) -> dict[str, Any]: - """ - Reforms the user query based on the chat history. - """ - - configuration = Configuration.from_runnable_config(config) - user_query = configuration.user_query - chat_history_str = await QueryService.langchain_chat_history_to_str( - state.chat_history - ) - if len(state.chat_history) == 0: - reformulated_query = user_query - else: - reformulated_query = await QueryService.reformulate_query_with_chat_history( - user_query=user_query, - session=state.db_session, - search_space_id=configuration.search_space_id, - chat_history_str=chat_history_str, - ) - - return {"reformulated_query": reformulated_query} - - -async def handle_qna_workflow( - state: State, config: RunnableConfig, writer: StreamWriter -) -> dict[str, Any]: - """ - Handle the QNA research workflow. - - This node fetches relevant documents for the user query and then uses the QNA agent - to generate a comprehensive answer with proper citations. - - Returns: - Dict containing the final answer in the "final_written_report" key for consistency. - """ - streaming_service = state.streaming_service - configuration = Configuration.from_runnable_config(config) - - reformulated_query = state.reformulated_query - user_query = configuration.user_query - - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - "🤔 Starting Q&A research workflow..." - ) - } - ) - - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f'🔍 Researching: "{user_query[:100]}..."' - ) - } - ) - - # Fetch relevant documents for the QNA query - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - "🔍 Searching for relevant information across all connectors..." - ) - } - ) - - # Use the top_k value from configuration - top_k = configuration.top_k - - relevant_documents = [] - user_selected_documents = [] - user_selected_sources = [] - - try: - # First, fetch user-selected documents if any - if configuration.document_ids_to_add_in_context: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"📋 Including {len(configuration.document_ids_to_add_in_context)} user-selected documents..." - ) - } - ) - - ( - user_selected_sources, - user_selected_documents, - ) = await fetch_documents_by_ids( - document_ids=configuration.document_ids_to_add_in_context, - search_space_id=configuration.search_space_id, - db_session=state.db_session, - ) - - if user_selected_documents: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"✅ Successfully added {len(user_selected_documents)} user-selected documents to context" - ) - } - ) - - # Create connector service using state db_session - connector_service = ConnectorService( - state.db_session, search_space_id=configuration.search_space_id - ) - await connector_service.initialize_counter() - - # Use the reformulated query as a single research question - research_questions = [reformulated_query, user_query] - - # Calculate time filter: last 2 years from now (hardcoded for now) - end_date = datetime.now(UTC) - start_date = end_date - timedelta(days=DEFAULT_TIME_FILTER_YEARS * 365) - - relevant_documents = await fetch_relevant_documents( - research_questions=research_questions, - search_space_id=configuration.search_space_id, - db_session=state.db_session, - connectors_to_search=configuration.connectors_to_search, - writer=writer, - state=state, - top_k=top_k, - connector_service=connector_service, - user_selected_sources=user_selected_sources, - start_date=start_date, - end_date=end_date, - ) - except Exception as e: - error_message = f"Error fetching relevant documents for QNA: {e!s}" - print(error_message) - writer({"yield_value": streaming_service.format_error(error_message)}) - # Continue with empty documents - the QNA agent will handle this gracefully - relevant_documents = [] - - # Combine user-selected documents with connector-fetched documents - all_documents = user_selected_documents + relevant_documents - - print(f"Fetched {len(relevant_documents)} relevant documents for QNA") - print(f"Added {len(user_selected_documents)} user-selected documents for QNA") - print(f"Total documents for QNA: {len(all_documents)}") - - # Extract and stream sources from all_documents - if all_documents: - sources_to_stream = extract_sources_from_documents(all_documents) - writer( - {"yield_value": streaming_service.format_sources_delta(sources_to_stream)} - ) - - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"🧠 Generating comprehensive answer using {len(all_documents)} total sources ({len(user_selected_documents)} user-selected + {len(relevant_documents)} connector-found)..." - ) - } - ) - - # Prepare configuration for the QNA agent - qna_config = { - "configurable": { - "user_query": user_query, # Use the reformulated query - "reformulated_query": reformulated_query, - "relevant_documents": all_documents, # Use combined documents - "search_space_id": configuration.search_space_id, - "language": configuration.language, - } - } - - # Create the state for the QNA agent (it has a different state structure) - # Pass streaming_service so the QNA agent can stream tokens directly - qna_state = { - "db_session": state.db_session, - "chat_history": state.chat_history, - "streaming_service": streaming_service, - } - - try: - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - "✍️ Writing comprehensive answer ..." - ) - } - ) - - # Track streaming content for real-time updates - complete_content = "" - captured_reranked_documents = [] - - # Call the QNA agent with both custom and values streaming modes - # - "custom" captures token-by-token streams from answer_question via writer() - # - "values" captures state updates including final_answer and reranked_documents - async for stream_mode, chunk in qna_agent_graph.astream( - qna_state, qna_config, stream_mode=["custom", "values"] - ): - if stream_mode == "custom": - # Handle custom stream events (token chunks from answer_question) - if isinstance(chunk, dict) and "yield_value" in chunk: - # Forward the streamed token to the parent writer - writer(chunk) - elif stream_mode == "values" and isinstance(chunk, dict): - # Handle state value updates - # Capture the final answer from state - if chunk.get("final_answer"): - complete_content = chunk["final_answer"] - - # Capture reranked documents from QNA agent for further question generation - if chunk.get("reranked_documents"): - captured_reranked_documents = chunk["reranked_documents"] - - # Set default if no content was received - if not complete_content: - complete_content = "I couldn't find relevant information in your knowledge base to answer this question." - - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - "🎉 Q&A answer generated successfully!" - ) - } - ) - - # Return the final answer and captured reranked documents for further question generation - return { - "final_written_report": complete_content, - "reranked_documents": captured_reranked_documents, - } - - except Exception as e: - error_message = f"Error generating QNA answer: {e!s}" - print(error_message) - writer({"yield_value": streaming_service.format_error(error_message)}) - - return {"final_written_report": f"Error generating answer: {e!s}"} - - -async def generate_further_questions( - state: State, config: RunnableConfig, writer: StreamWriter -) -> dict[str, Any]: - """ - Generate contextually relevant follow-up questions based on chat history and available documents. - - This node takes the chat history and reranked documents from the QNA agent - and uses an LLM to generate follow-up questions that would naturally extend the conversation - and provide additional value to the user. - - Returns: - Dict containing the further questions in the "further_questions" key for state update. - """ - from app.services.llm_service import get_fast_llm - - # Get configuration and state data - configuration = Configuration.from_runnable_config(config) - chat_history = state.chat_history - search_space_id = configuration.search_space_id - streaming_service = state.streaming_service - - # Get reranked documents from the state (will be populated by sub-agents) - reranked_documents = getattr(state, "reranked_documents", None) or [] - - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - "🤔 Generating follow-up questions..." - ) - } - ) - - # Get search space's fast LLM - llm = await get_fast_llm(state.db_session, search_space_id) - if not llm: - error_message = f"No fast LLM configured for search space {search_space_id}" - print(error_message) - writer({"yield_value": streaming_service.format_error(error_message)}) - - # Stream empty further questions to UI - writer({"yield_value": streaming_service.format_further_questions_delta([])}) - return {"further_questions": []} - - # Format chat history for the prompt - chat_history_xml = "\n" - for message in chat_history: - if hasattr(message, "type"): - if message.type == "human": - chat_history_xml += f"{message.content}\n" - elif message.type == "ai": - chat_history_xml += f"{message.content}\n" - else: - # Handle other message types if needed - chat_history_xml += f"{message!s}\n" - chat_history_xml += "" - - # Format available documents for the prompt - documents_xml = "\n" - for i, doc in enumerate(reranked_documents): - document_info = doc.get("document", {}) - source_id = document_info.get("id", f"doc_{i}") - source_type = document_info.get("document_type", "UNKNOWN") - content = doc.get("content", "") - - documents_xml += "\n" - documents_xml += "\n" - documents_xml += f"{source_id}\n" - documents_xml += f"{source_type}\n" - documents_xml += "\n" - documents_xml += f"\n{content}\n" - documents_xml += "\n" - documents_xml += "" - - # Create the human message content - human_message_content = f""" - {chat_history_xml} - - {documents_xml} - - Based on the chat history and available documents above, generate 3-5 contextually relevant follow-up questions that would naturally extend the conversation and provide additional value to the user. Make sure the questions can be reasonably answered using the available documents or knowledge base. - - Your response MUST be valid JSON in exactly this format: - {{ - "further_questions": [ - {{ - "id": 0, - "question": "further qn 1" - }}, - {{ - "id": 1, - "question": "further qn 2" - }} - ] - }} - - Do not include any other text or explanation. Only return the JSON. - """ - - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - "🧠 Analyzing conversation context to suggest relevant questions..." - ) - } - ) - - # Create messages for the LLM - messages = [ - SystemMessage(content=get_further_questions_system_prompt()), - HumanMessage(content=human_message_content), - ] - - try: - # Call the LLM - response = await llm.ainvoke(messages) - - # Parse the JSON response - content = response.content - - # Find the JSON in the content - json_start = content.find("{") - json_end = content.rfind("}") + 1 - if json_start >= 0 and json_end > json_start: - json_str = content[json_start:json_end] - - # Parse the JSON string - parsed_data = json.loads(json_str) - - # Extract the further_questions array - further_questions = parsed_data.get("further_questions", []) - - writer( - { - "yield_value": streaming_service.format_terminal_info_delta( - f"✅ Generated {len(further_questions)} contextual follow-up questions!" - ) - } - ) - - # Stream the further questions to the UI - writer( - { - "yield_value": streaming_service.format_further_questions_delta( - further_questions - ) - } - ) - - print(f"Successfully generated {len(further_questions)} further questions") - - return {"further_questions": further_questions} - else: - # If JSON structure not found, return empty list - error_message = ( - "Could not find valid JSON in LLM response for further questions" - ) - print(error_message) - writer( - { - "yield_value": streaming_service.format_error( - f"Warning: {error_message}" - ) - } - ) - - # Stream empty further questions to UI - writer( - {"yield_value": streaming_service.format_further_questions_delta([])} - ) - return {"further_questions": []} - - except (json.JSONDecodeError, ValueError) as e: - # Log the error and return empty list - error_message = f"Error parsing further questions response: {e!s}" - print(error_message) - writer( - {"yield_value": streaming_service.format_error(f"Warning: {error_message}")} - ) - - # Stream empty further questions to UI - writer({"yield_value": streaming_service.format_further_questions_delta([])}) - return {"further_questions": []} - - except Exception as e: - # Handle any other errors - error_message = f"Error generating further questions: {e!s}" - print(error_message) - writer( - {"yield_value": streaming_service.format_error(f"Warning: {error_message}")} - ) - - # Stream empty further questions to UI - writer({"yield_value": streaming_service.format_further_questions_delta([])}) - return {"further_questions": []} diff --git a/surfsense_backend/app/agents/researcher/prompts.py b/surfsense_backend/app/agents/researcher/prompts.py deleted file mode 100644 index 794a594f2..000000000 --- a/surfsense_backend/app/agents/researcher/prompts.py +++ /dev/null @@ -1,140 +0,0 @@ -import datetime - - -def _build_language_instruction(language: str | None = None): - """Build language instruction for prompts.""" - if language: - return f"\n\nIMPORTANT: Please respond in {language} language. All your responses, explanations, and analysis should be written in {language}." - return "" - - -def get_further_questions_system_prompt(): - return f""" -Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")} - -You are an expert research assistant specializing in generating contextually relevant follow-up questions. Your task is to analyze the chat history and available documents to suggest further questions that would naturally extend the conversation and provide additional value to the user. - - -- chat_history: Provided in XML format within tags, containing and message pairs that show the chronological conversation flow. This provides context about what has already been discussed. -- available_documents: Provided in XML format within tags, containing individual elements with and sections. Each document contains multiple `...` blocks inside . This helps understand what information is accessible for answering potential follow-up questions. - - - -A JSON object with the following structure: -{{ - "further_questions": [ - {{ - "id": 0, - "question": "further qn 1" - }}, - {{ - "id": 1, - "question": "further qn 2" - }} - ] -}} - - - -1. **Analyze Chat History:** Review the entire conversation flow to understand: - * The main topics and themes discussed - * The user's interests and areas of focus - * Questions that have been asked and answered - * Any gaps or areas that could be explored further - * The depth level of the current discussion - -2. **Evaluate Available Documents:** Consider the documents in context to identify: - * Additional information that hasn't been explored yet - * Related topics that could be of interest - * Specific details or data points that could warrant deeper investigation - * Cross-references or connections between different documents - -3. **Generate Relevant Follow-up Questions:** Create 3-5 further questions that: - * Are directly related to the ongoing conversation but explore new angles - * Can be reasonably answered using the available documents or knowledge base - * Progress the conversation forward rather than repeating previous topics - * Match the user's apparent level of interest and expertise - * Are specific and actionable rather than overly broad - * Consider practical applications, comparisons, deeper analysis, or related concepts - -4. **Ensure Question Quality:** Each question should: - * Be clear and well-formulated - * Provide genuine value to the user - * Be distinct from other suggested questions - * Be answerable within the current context - * Encourage meaningful exploration of the topic - -5. **Prioritize and Order:** Arrange questions by relevance and natural progression: - * Most directly related to the current discussion first - * Questions that build upon previous answers - * Questions that explore practical applications or implications - * Questions that introduce related but new concepts - -6. **Adhere Strictly to Output Format:** Ensure the final output is a valid JSON object with: - * Correct field names (`further_questions`, `id`, `question`) - * Sequential numbering starting from 0 - * Proper data types and JSON formatting - - - -Input: - -What are the best machine learning algorithms for text classification? -For text classification, several algorithms work well depending on your specific needs: - -**Traditional Methods:** -- **Support Vector Machines (SVM)** - Excellent for high-dimensional text data -- **Naive Bayes** - Simple, fast, and works well with small datasets -- **Logistic Regression** - Good baseline with interpretable results - -**Modern Deep Learning:** -- **Neural Networks** - More complex but can capture intricate patterns -- **Transformer models** - State-of-the-art for most text classification tasks - -The choice depends on your dataset size, computational resources, and accuracy requirements. - - - - - -101 -FILE - - -# Machine Learning for Text Classification: A Comprehensive Guide - -## Performance Comparison -Recent studies show that transformer-based models achieve 95%+ accuracy on most text classification benchmarks, while traditional methods like SVM typically achieve 85-90% accuracy. - -## Dataset Considerations -- Small datasets (< 1000 samples): Naive Bayes, SVM -- Large datasets (> 10,000 samples): Neural networks, transformers -- Imbalanced datasets: Require special handling with techniques like SMOTE - - - - -Output: -{{ - "further_questions": [ - {{ - "id": 0, - "question": "What are the key differences in performance between traditional algorithms like SVM and modern deep learning approaches for text classification?" - }}, - {{ - "id": 1, - "question": "How do you handle imbalanced datasets when training text classification models?" - }}, - {{ - "id": 2, - "question": "What preprocessing techniques are most effective for improving text classification accuracy?" - }}, - {{ - "id": 3, - "question": "Are there specific domains or use cases where certain classification algorithms perform better than others?" - }} - ] -}} - - -""" diff --git a/surfsense_backend/app/agents/researcher/qna_agent/__init__.py b/surfsense_backend/app/agents/researcher/qna_agent/__init__.py deleted file mode 100644 index 163b8bf63..000000000 --- a/surfsense_backend/app/agents/researcher/qna_agent/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -"""QnA Agent.""" - -from .graph import graph - -__all__ = ["graph"] diff --git a/surfsense_backend/app/agents/researcher/qna_agent/configuration.py b/surfsense_backend/app/agents/researcher/qna_agent/configuration.py deleted file mode 100644 index e7dd9175e..000000000 --- a/surfsense_backend/app/agents/researcher/qna_agent/configuration.py +++ /dev/null @@ -1,31 +0,0 @@ -"""Define the configurable parameters for the agent.""" - -from __future__ import annotations - -from dataclasses import dataclass, fields -from typing import Any - -from langchain_core.runnables import RunnableConfig - - -@dataclass(kw_only=True) -class Configuration: - """The configuration for the Q&A agent.""" - - # Configuration parameters for the Q&A agent - user_query: str # The user's question to answer - reformulated_query: str # The reformulated query - relevant_documents: list[ - Any - ] # Documents provided directly to the agent for answering - search_space_id: int # Search space identifier - language: str | None = None # Language for responses - - @classmethod - def from_runnable_config( - cls, config: RunnableConfig | None = None - ) -> Configuration: - """Create a Configuration instance from a RunnableConfig object.""" - configurable = (config.get("configurable") or {}) if config else {} - _fields = {f.name for f in fields(cls) if f.init} - return cls(**{k: v for k, v in configurable.items() if k in _fields}) diff --git a/surfsense_backend/app/agents/researcher/qna_agent/default_prompts.py b/surfsense_backend/app/agents/researcher/qna_agent/default_prompts.py deleted file mode 100644 index 72ae636cb..000000000 --- a/surfsense_backend/app/agents/researcher/qna_agent/default_prompts.py +++ /dev/null @@ -1,201 +0,0 @@ -"""Default system prompts for Q&A agent. - -The prompt system is modular with 3 parts: -- Part 1 (Base): Core instructions for answering questions (no citations) -- Part 2 (Citations): Citation-specific instructions and formatting rules -- Part 3 (Custom): User's custom instructions (empty by default) - -Combinations: -- Part 1 only: Answers without citations -- Part 1 + Part 2: Answers with citations -- Part 1 + Part 2 + Part 3: Answers with citations and custom instructions -""" - -# Part 1: Base system prompt for answering without citations -DEFAULT_QNA_BASE_PROMPT = """Today's date: {date} -You are SurfSense, an advanced AI research assistant that provides detailed, well-researched answers to user questions by synthesizing information from multiple personal knowledge sources.{language_instruction} -{chat_history_section} - -- EXTENSION: "Web content saved via SurfSense browser extension" (personal browsing history) -- FILE: "User-uploaded documents (PDFs, Word, etc.)" (personal files) -- SLACK_CONNECTOR: "Slack conversations and shared content" (personal workspace communications) -- NOTION_CONNECTOR: "Notion workspace pages and databases" (personal knowledge management) -- YOUTUBE_VIDEO: "YouTube video transcripts and metadata" (personally saved videos) -- GITHUB_CONNECTOR: "GitHub repository content and issues" (personal repositories and interactions) -- ELASTICSEARCH_CONNECTOR: "Elasticsearch indexed documents and data" (personal Elasticsearch instances and custom data sources) -- LINEAR_CONNECTOR: "Linear project issues and discussions" (personal project management) -- JIRA_CONNECTOR: "Jira project issues, tickets, and comments" (personal project tracking) -- CONFLUENCE_CONNECTOR: "Confluence pages and comments" (personal project documentation) -- CLICKUP_CONNECTOR: "ClickUp tasks and project data" (personal task management) -- GOOGLE_CALENDAR_CONNECTOR: "Google Calendar events, meetings, and schedules" (personal calendar and time management) -- GOOGLE_GMAIL_CONNECTOR: "Google Gmail emails and conversations" (personal emails and communications) -- DISCORD_CONNECTOR: "Discord server conversations and shared content" (personal community communications) -- AIRTABLE_CONNECTOR: "Airtable records, tables, and database content" (personal data management and organization) -- TAVILY_API: "Tavily search API results" (personalized search results) -- LINKUP_API: "Linkup search API results" (personalized search results) -- LUMA_CONNECTOR: "Luma events" -- WEBCRAWLER_CONNECTOR: "Webpages indexed by SurfSense" (personally selected websites) - - - -1. Review the chat history to understand the conversation context and any previous topics discussed. -2. Carefully analyze all provided documents in the sections. -3. Extract relevant information that directly addresses the user's question. -4. Provide a comprehensive, detailed answer using information from the user's personal knowledge sources. -5. Structure your answer logically and conversationally, as if having a detailed discussion with the user. -6. Use your own words to synthesize and connect ideas from the documents. -7. If documents contain conflicting information, acknowledge this and present both perspectives. -8. If the user's question cannot be fully answered with the provided documents, clearly state what information is missing. -9. Provide actionable insights and practical information when relevant to the user's question. -10. Use the chat history to maintain conversation continuity and refer to previous discussions when relevant. -11. Remember that all knowledge sources contain personal information - provide answers that reflect this personal context. -12. Be conversational and engaging while maintaining accuracy. - - - -- Write in a clear, conversational tone suitable for detailed Q&A discussions -- Provide comprehensive answers that thoroughly address the user's question -- Use appropriate paragraphs and structure for readability -- ALWAYS provide personalized answers that reflect the user's own knowledge and context -- Be thorough and detailed in your explanations while remaining focused on the user's specific question -- If asking follow-up questions would be helpful, suggest them at the end of your response - - - -When you see a user query, focus exclusively on providing a detailed, comprehensive answer using information from the provided documents, which contain the user's personal knowledge and data. - -Make sure your response: -1. Considers the chat history for context and conversation continuity -2. Directly and thoroughly answers the user's question with personalized information from their own knowledge sources -3. Is conversational, engaging, and detailed -4. Acknowledges the personal nature of the information being provided -5. Offers follow-up suggestions when appropriate - -""" - -# Part 2: Citation-specific instructions to add citation capabilities -DEFAULT_QNA_CITATION_INSTRUCTIONS = """ - -CRITICAL CITATION REQUIREMENTS: - -1. For EVERY piece of information you include from the documents, add a citation in the format [citation:chunk_id] where chunk_id is the exact value from the `` tag inside ``. -2. Make sure ALL factual statements from the documents have proper citations. -3. If multiple chunks support the same point, include all relevant citations [citation:chunk_id1], [citation:chunk_id2]. -4. You MUST use the exact chunk_id values from the `` attributes. Do not create your own citation numbers. -5. Every citation MUST be in the format [citation:chunk_id] where chunk_id is the exact chunk id value. -6. Never modify or change the chunk_id - always use the original values exactly as provided in the chunk tags. -7. Do not return citations as clickable links. -8. Never format citations as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only. -9. Citations must ONLY appear as [citation:chunk_id] or [citation:chunk_id1], [citation:chunk_id2] format - never with parentheses, hyperlinks, or other formatting. -10. Never make up chunk IDs. Only use chunk_id values that are explicitly provided in the `` tags. -11. If you are unsure about a chunk_id, do not include a citation rather than guessing or making one up. - - -The documents you receive are structured like this: - - - - 42 - GITHUB_CONNECTOR - <![CDATA[Some repo / file / issue title]]> - - - - - - - - - - -IMPORTANT: You MUST cite using the chunk ids (e.g. 123, 124). Do NOT cite document_id. - - - -- Every fact from the documents must have a citation in the format [citation:chunk_id] where chunk_id is the EXACT id value from a `` tag -- Citations should appear at the end of the sentence containing the information they support -- Multiple citations should be separated by commas: [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3] -- No need to return references section. Just citations in answer. -- NEVER create your own citation format - use the exact chunk_id values from the documents in the [citation:chunk_id] format -- NEVER format citations as clickable links or as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only -- NEVER make up chunk IDs if you are unsure about the chunk_id. It is better to omit the citation than to guess - - - -CORRECT citation formats: -- [citation:5] -- [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3] - -INCORRECT citation formats (DO NOT use): -- Using parentheses and markdown links: ([citation:5](https://github.com/MODSetter/SurfSense)) -- Using parentheses around brackets: ([citation:5]) -- Using hyperlinked text: [link to source 5](https://example.com) -- Using footnote style: ... library¹ -- Making up source IDs when source_id is unknown -- Using old IEEE format: [1], [2], [3] -- Using source types instead of IDs: [citation:GITHUB_CONNECTOR] instead of [citation:5] - - - -Based on your GitHub repositories and video content, Python's asyncio library provides tools for writing concurrent code using the async/await syntax [citation:5]. It's particularly useful for I/O-bound and high-level structured network code [citation:5]. - -The key advantage of asyncio is that it can improve performance by allowing other code to run while waiting for I/O operations to complete [citation:12]. This makes it excellent for scenarios like web scraping, API calls, database operations, or any situation where your program spends time waiting for external resources. - -However, from your video learning, it's important to note that asyncio is not suitable for CPU-bound tasks as it runs on a single thread [citation:12]. For computationally intensive work, you'd want to use multiprocessing instead. - - -""" - -# Part 3: User's custom instructions (empty by default, can be set by user from UI) -DEFAULT_QNA_CUSTOM_INSTRUCTIONS = "" - -# Full prompt with all parts combined (for backward compatibility and migration) -DEFAULT_QNA_CITATION_PROMPT = ( - DEFAULT_QNA_BASE_PROMPT - + DEFAULT_QNA_CITATION_INSTRUCTIONS - + DEFAULT_QNA_CUSTOM_INSTRUCTIONS -) - -DEFAULT_QNA_NO_DOCUMENTS_PROMPT = """Today's date: {date} -You are SurfSense, an advanced AI research assistant that provides helpful, detailed answers to user questions in a conversational manner.{language_instruction} -{chat_history_section} - -The user has asked a question but there are no specific documents from their personal knowledge base available to answer it. You should provide a helpful response based on: -1. The conversation history and context -2. Your general knowledge and expertise -3. Understanding of the user's needs and interests based on our conversation - - - -1. Provide a comprehensive, helpful answer to the user's question -2. Draw upon the conversation history to understand context and the user's specific needs -3. Use your general knowledge to provide accurate, detailed information -4. Be conversational and engaging, as if having a detailed discussion with the user -5. Acknowledge when you're drawing from general knowledge rather than their personal sources -6. Provide actionable insights and practical information when relevant -7. Structure your answer logically and clearly -8. If the question would benefit from personalized information from their knowledge base, gently suggest they might want to add relevant content to SurfSense -9. Be honest about limitations while still being maximally helpful -10. Maintain the helpful, knowledgeable tone that users expect from SurfSense - - - -- Write in a clear, conversational tone suitable for detailed Q&A discussions -- Provide comprehensive answers that thoroughly address the user's question -- Use appropriate paragraphs and structure for readability -- No citations are needed since you're using general knowledge -- Be thorough and detailed in your explanations while remaining focused on the user's specific question -- If asking follow-up questions would be helpful, suggest them at the end of your response -- When appropriate, mention that adding relevant content to their SurfSense knowledge base could provide more personalized answers - - - -When answering the user's question without access to their personal documents: -1. Review the chat history to understand conversation context and maintain continuity -2. Provide the most helpful and comprehensive answer possible using general knowledge -3. Be conversational and engaging -4. Draw upon conversation history for context -5. Be clear that you're providing general information -6. Suggest ways the user could get more personalized answers by expanding their knowledge base when relevant - -""" diff --git a/surfsense_backend/app/agents/researcher/qna_agent/graph.py b/surfsense_backend/app/agents/researcher/qna_agent/graph.py deleted file mode 100644 index 0d9c8bac8..000000000 --- a/surfsense_backend/app/agents/researcher/qna_agent/graph.py +++ /dev/null @@ -1,21 +0,0 @@ -from langgraph.graph import StateGraph - -from .configuration import Configuration -from .nodes import answer_question, rerank_documents -from .state import State - -# Define a new graph -workflow = StateGraph(State, config_schema=Configuration) - -# Add the nodes to the graph -workflow.add_node("rerank_documents", rerank_documents) -workflow.add_node("answer_question", answer_question) - -# Connect the nodes -workflow.add_edge("__start__", "rerank_documents") -workflow.add_edge("rerank_documents", "answer_question") -workflow.add_edge("answer_question", "__end__") - -# Compile the workflow into an executable graph -graph = workflow.compile() -graph.name = "SurfSense QnA Agent" # This defines the custom name in LangSmith diff --git a/surfsense_backend/app/agents/researcher/qna_agent/nodes.py b/surfsense_backend/app/agents/researcher/qna_agent/nodes.py deleted file mode 100644 index 28c35a20b..000000000 --- a/surfsense_backend/app/agents/researcher/qna_agent/nodes.py +++ /dev/null @@ -1,297 +0,0 @@ -import datetime -from typing import Any - -from langchain_core.messages import HumanMessage, SystemMessage -from langchain_core.runnables import RunnableConfig -from langgraph.types import StreamWriter -from sqlalchemy import select - -from app.db import SearchSpace -from app.services.reranker_service import RerankerService - -from ..utils import ( - calculate_token_count, - format_documents_section, - langchain_chat_history_to_str, - optimize_documents_for_token_limit, -) -from .configuration import Configuration -from .default_prompts import ( - DEFAULT_QNA_BASE_PROMPT, - DEFAULT_QNA_CITATION_INSTRUCTIONS, - DEFAULT_QNA_NO_DOCUMENTS_PROMPT, -) -from .state import State - - -def _build_language_instruction(language: str | None = None): - """Build language instruction for prompts.""" - if language: - return f"\n\nIMPORTANT: Please respond in {language} language. All your responses, explanations, and analysis should be written in {language}." - return "" - - -def _build_chat_history_section(chat_history: str | None = None): - """Build chat history section for prompts.""" - if chat_history: - return f""" - -{chat_history if chat_history else "NO CHAT HISTORY PROVIDED"} - -""" - return """ - -NO CHAT HISTORY PROVIDED - -""" - - -def _format_system_prompt( - prompt_template: str, - chat_history: str | None = None, - language: str | None = None, -): - """Format a system prompt template with dynamic values.""" - date = datetime.datetime.now().strftime("%Y-%m-%d") - language_instruction = _build_language_instruction(language) - chat_history_section = _build_chat_history_section(chat_history) - - return prompt_template.format( - date=date, - language_instruction=language_instruction, - chat_history_section=chat_history_section, - ) - - -async def rerank_documents(state: State, config: RunnableConfig) -> dict[str, Any]: - """ - Rerank the documents based on relevance to the user's question. - - This node takes the relevant documents provided in the configuration, - reranks them using the reranker service based on the user's query, - and updates the state with the reranked documents. - - Documents are now document-grouped with a `chunks` list. Reranking is done - using the concatenated `content` field, and the full structure (including - `chunks`) is preserved for proper citation formatting. - - If reranking is disabled, returns the original documents without processing. - - Returns: - Dict containing the reranked documents. - """ - # Get configuration and relevant documents - configuration = Configuration.from_runnable_config(config) - documents = configuration.relevant_documents - user_query = configuration.user_query - reformulated_query = configuration.reformulated_query - - # If no documents were provided, return empty list - if not documents or len(documents) == 0: - return {"reranked_documents": []} - - # Get reranker service from app config - reranker_service = RerankerService.get_reranker_instance() - - # If reranking is not enabled, sort by existing score and return - if not reranker_service: - print("Reranking is disabled. Sorting documents by existing score.") - sorted_documents = sorted( - documents, key=lambda x: x.get("score", 0), reverse=True - ) - return {"reranked_documents": sorted_documents} - - # Perform reranking - try: - # Pass documents directly to reranker - it will use: - # - "content" (concatenated chunk text) for scoring - # - "chunk_id" (primary chunk id) for matching - # The full document structure including "chunks" is preserved - reranked_docs = reranker_service.rerank_documents( - user_query + "\n" + reformulated_query, documents - ) - - # Sort by score in descending order - reranked_docs.sort(key=lambda x: x.get("score", 0), reverse=True) - - print(f"Reranked {len(reranked_docs)} documents for Q&A query: {user_query}") - - return {"reranked_documents": reranked_docs} - - except Exception as e: - print(f"Error during reranking: {e!s}") - # Fall back to original documents if reranking fails - return {"reranked_documents": documents} - - -async def answer_question( - state: State, config: RunnableConfig, writer: StreamWriter -) -> dict[str, Any]: - """ - Answer the user's question using the provided documents with real-time streaming. - - This node takes the relevant documents provided in the configuration and uses - an LLM to generate a comprehensive answer to the user's question with - proper citations. The citations follow [citation:chunk_id] format using chunk IDs from the - `` tags in the provided documents. If no documents are provided, it will use chat history to generate - an answer. - - The response is streamed token-by-token for real-time updates to the frontend. - - Returns: - Dict containing the final answer in the "final_answer" key. - """ - from app.services.llm_service import get_fast_llm - - # Get configuration and relevant documents from configuration - configuration = Configuration.from_runnable_config(config) - documents = state.reranked_documents - user_query = configuration.user_query - search_space_id = configuration.search_space_id - language = configuration.language - - # Get streaming service from state - streaming_service = state.streaming_service - - # Fetch search space to get QnA configuration - result = await state.db_session.execute( - select(SearchSpace).where(SearchSpace.id == search_space_id) - ) - search_space = result.scalar_one_or_none() - - if not search_space: - error_message = f"Search space {search_space_id} not found" - print(error_message) - raise RuntimeError(error_message) - - # Get QnA configuration from search space - citations_enabled = search_space.citations_enabled - custom_instructions_text = search_space.qna_custom_instructions or "" - - # Use constants for base prompt and citation instructions - qna_base_prompt = DEFAULT_QNA_BASE_PROMPT - qna_citation_instructions = ( - DEFAULT_QNA_CITATION_INSTRUCTIONS if citations_enabled else "" - ) - qna_custom_instructions = ( - f"\n\n{custom_instructions_text}\n" - if custom_instructions_text - else "" - ) - - # Get search space's fast LLM - llm = await get_fast_llm(state.db_session, search_space_id) - if not llm: - error_message = f"No fast LLM configured for search space {search_space_id}" - print(error_message) - raise RuntimeError(error_message) - - # Determine if we have documents and optimize for token limits - has_documents_initially = documents and len(documents) > 0 - chat_history_str = langchain_chat_history_to_str(state.chat_history) - - if has_documents_initially: - # Compose the full citation prompt: base + citation instructions + custom instructions - full_citation_prompt_template = ( - qna_base_prompt + qna_citation_instructions + qna_custom_instructions - ) - - # Create base message template for token calculation (without documents) - base_human_message_template = f""" - - User's question: - - {user_query} - - - Please provide a detailed, comprehensive answer to the user's question using the information from their personal knowledge sources. Make sure to cite all information appropriately and engage in a conversational manner. - """ - - # Use initial system prompt for token calculation - initial_system_prompt = _format_system_prompt( - full_citation_prompt_template, chat_history_str, language - ) - base_messages = [ - SystemMessage(content=initial_system_prompt), - HumanMessage(content=base_human_message_template), - ] - - # Optimize documents to fit within token limits - optimized_documents, has_optimized_documents = ( - optimize_documents_for_token_limit(documents, base_messages, llm.model) - ) - - # Update state based on optimization result - documents = optimized_documents - has_documents = has_optimized_documents - else: - has_documents = False - - # Choose system prompt based on final document availability - # With documents: use base + citation instructions + custom instructions - # Without documents: use the default no-documents prompt from constants - if has_documents: - full_citation_prompt_template = ( - qna_base_prompt + qna_citation_instructions + qna_custom_instructions - ) - system_prompt = _format_system_prompt( - full_citation_prompt_template, chat_history_str, language - ) - else: - system_prompt = _format_system_prompt( - DEFAULT_QNA_NO_DOCUMENTS_PROMPT + qna_custom_instructions, - chat_history_str, - language, - ) - - # Generate documents section - documents_text = ( - format_documents_section( - documents, "Source material from your personal knowledge base" - ) - if has_documents - else "" - ) - - # Create final human message content - instruction_text = ( - "Please provide a detailed, comprehensive answer to the user's question using the information from their personal knowledge sources. Make sure to cite all information appropriately and engage in a conversational manner." - if has_documents - else "Please provide a helpful answer to the user's question based on our conversation history and your general knowledge. Engage in a conversational manner." - ) - - human_message_content = f""" - {documents_text} - - User's question: - - {user_query} - - - {instruction_text} - """ - - # Create final messages for the LLM - messages_with_chat_history = [ - SystemMessage(content=system_prompt), - HumanMessage(content=human_message_content), - ] - - # Log final token count - total_tokens = calculate_token_count(messages_with_chat_history, llm.model) - print(f"Final token count: {total_tokens}") - - # Stream the LLM response token by token - final_answer = "" - - async for chunk in llm.astream(messages_with_chat_history): - # Extract the content from the chunk - if hasattr(chunk, "content") and chunk.content: - token = chunk.content - final_answer += token - - # Stream the token to the frontend via custom stream - if streaming_service: - writer({"yield_value": streaming_service.format_text_chunk(token)}) - - return {"final_answer": final_answer} diff --git a/surfsense_backend/app/agents/researcher/qna_agent/state.py b/surfsense_backend/app/agents/researcher/qna_agent/state.py deleted file mode 100644 index 4113b9286..000000000 --- a/surfsense_backend/app/agents/researcher/qna_agent/state.py +++ /dev/null @@ -1,32 +0,0 @@ -"""Define the state structures for the agent.""" - -from __future__ import annotations - -from dataclasses import dataclass, field -from typing import Any - -from sqlalchemy.ext.asyncio import AsyncSession - -from app.services.streaming_service import StreamingService - - -@dataclass -class State: - """Defines the dynamic state for the Q&A agent during execution. - - This state tracks the database session, chat history, and the outputs - generated by the agent's nodes during question answering. - See: https://langchain-ai.github.io/langgraph/concepts/low_level/#state - for more information. - """ - - # Runtime context - db_session: AsyncSession - - # Streaming service for real-time token streaming - streaming_service: StreamingService | None = None - - chat_history: list[Any] | None = field(default_factory=list) - # OUTPUT: Populated by agent nodes - reranked_documents: list[Any] | None = None - final_answer: str | None = None diff --git a/surfsense_backend/app/agents/researcher/state.py b/surfsense_backend/app/agents/researcher/state.py deleted file mode 100644 index 90f7039be..000000000 --- a/surfsense_backend/app/agents/researcher/state.py +++ /dev/null @@ -1,38 +0,0 @@ -"""Define the state structures for the agent.""" - -from __future__ import annotations - -from dataclasses import dataclass, field -from typing import Any - -from sqlalchemy.ext.asyncio import AsyncSession - -from app.services.streaming_service import StreamingService - - -@dataclass -class State: - """Defines the dynamic state for the agent during execution. - - This state tracks the database session and the outputs generated by the agent's nodes. - See: https://langchain-ai.github.io/langgraph/concepts/low_level/#state - for more information. - """ - - # Runtime context (not part of actual graph state) - db_session: AsyncSession - - # Streaming service - streaming_service: StreamingService - - chat_history: list[Any] | None = field(default_factory=list) - - reformulated_query: str | None = field(default=None) - further_questions: Any | None = field(default=None) - - # Temporary field to hold reranked documents from sub-agents for further question generation - reranked_documents: list[Any] | None = field(default=None) - - # OUTPUT: Populated by agent nodes - # Using field to explicitly mark as part of state - final_written_report: str | None = field(default=None) diff --git a/surfsense_backend/app/agents/researcher/utils.py b/surfsense_backend/app/agents/researcher/utils.py deleted file mode 100644 index 9cb0dcbde..000000000 --- a/surfsense_backend/app/agents/researcher/utils.py +++ /dev/null @@ -1,292 +0,0 @@ -import json -from typing import Any, NamedTuple - -from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage -from litellm import get_model_info, token_counter - - -class DocumentTokenInfo(NamedTuple): - """Information about a document and its token cost.""" - - index: int - document: dict[str, Any] - formatted_content: str - token_count: int - - -def get_connector_emoji(connector_name: str) -> str: - """Get an appropriate emoji for a connector type.""" - connector_emojis = { - "YOUTUBE_VIDEO": "📹", - "EXTENSION": "🧩", - "FILE": "📄", - "SLACK_CONNECTOR": "💬", - "NOTION_CONNECTOR": "📘", - "GITHUB_CONNECTOR": "🐙", - "LINEAR_CONNECTOR": "📊", - "JIRA_CONNECTOR": "🎫", - "DISCORD_CONNECTOR": "🗨️", - "TAVILY_API": "🔍", - "LINKUP_API": "🔗", - "BAIDU_SEARCH_API": "🇨🇳", - "GOOGLE_CALENDAR_CONNECTOR": "📅", - "AIRTABLE_CONNECTOR": "🗃️", - "LUMA_CONNECTOR": "✨", - "ELASTICSEARCH_CONNECTOR": "⚡", - "WEBCRAWLER_CONNECTOR": "🌐", - "BOOKSTACK_CONNECTOR": "📚", - "NOTE": "📝", - } - return connector_emojis.get(connector_name, "🔎") - - -def get_connector_friendly_name(connector_name: str) -> str: - """Convert technical connector IDs to user-friendly names.""" - connector_friendly_names = { - "YOUTUBE_VIDEO": "YouTube", - "EXTENSION": "Browser Extension", - "FILE": "Files", - "SLACK_CONNECTOR": "Slack", - "NOTION_CONNECTOR": "Notion", - "GITHUB_CONNECTOR": "GitHub", - "LINEAR_CONNECTOR": "Linear", - "JIRA_CONNECTOR": "Jira", - "CONFLUENCE_CONNECTOR": "Confluence", - "GOOGLE_CALENDAR_CONNECTOR": "Google Calendar", - "DISCORD_CONNECTOR": "Discord", - "TAVILY_API": "Tavily Search", - "LINKUP_API": "Linkup Search", - "BAIDU_SEARCH_API": "Baidu Search", - "AIRTABLE_CONNECTOR": "Airtable", - "LUMA_CONNECTOR": "Luma", - "ELASTICSEARCH_CONNECTOR": "Elasticsearch", - "WEBCRAWLER_CONNECTOR": "Web Pages", - "BOOKSTACK_CONNECTOR": "BookStack", - "NOTE": "Notes", - } - return connector_friendly_names.get(connector_name, connector_name) - - -def convert_langchain_messages_to_dict( - messages: list[BaseMessage], -) -> list[dict[str, str]]: - """Convert LangChain messages to format expected by token_counter.""" - role_mapping = {"system": "system", "human": "user", "ai": "assistant"} - - converted_messages = [] - for msg in messages: - role = role_mapping.get(getattr(msg, "type", None), "user") - converted_messages.append({"role": role, "content": str(msg.content)}) - - return converted_messages - - -def format_document_for_citation(document: dict[str, Any]) -> str: - """Format a single document for citation in the new document+chunks XML format. - - IMPORTANT: - - Citations must reference real DB chunk IDs: `[citation:]` - - Document metadata is included under , but citations are NOT document_id-based. - """ - - def _to_cdata(value: Any) -> str: - text = "" if value is None else str(value) - # Safely nest CDATA even if the content includes "]]>" - return "", "]]]]>") + "]]>" - - doc_info = document.get("document", {}) or {} - metadata = doc_info.get("metadata", {}) or {} - - doc_id = doc_info.get("id", "") - title = doc_info.get("title", "") - document_type = doc_info.get("document_type", "CRAWLED_URL") - url = ( - metadata.get("url") - or metadata.get("source") - or metadata.get("page_url") - or metadata.get("VisitedWebPageURL") - or "" - ) - - metadata_json = json.dumps(metadata, ensure_ascii=False) - - chunks = document.get("chunks") or [] - if not chunks: - # Fallback: treat `content` as a single chunk (no chunk_id available for citation) - chunks = [{"chunk_id": "", "content": document.get("content", "")}] - - chunks_xml = "\n".join( - [ - f"{_to_cdata(chunk.get('content', ''))}" - for chunk in chunks - ] - ) - - return f""" - -{doc_id} -{document_type} -{_to_cdata(title)} -{_to_cdata(url)} -{_to_cdata(metadata_json)} - - - -{chunks_xml} - -""" - - -def format_documents_section( - documents: list[dict[str, Any]], section_title: str = "Source material" -) -> str: - """Format multiple documents into a complete documents section.""" - if not documents: - return "" - - formatted_docs = [format_document_for_citation(doc) for doc in documents] - - return f"""{section_title}: - - {chr(10).join(formatted_docs)} - """ - - -def calculate_document_token_costs( - documents: list[dict[str, Any]], model: str -) -> list[DocumentTokenInfo]: - """Pre-calculate token costs for each document.""" - document_token_info = [] - - for i, doc in enumerate(documents): - formatted_doc = format_document_for_citation(doc) - - # Calculate token count for this document - token_count = token_counter( - messages=[{"role": "user", "content": formatted_doc}], model=model - ) - - document_token_info.append( - DocumentTokenInfo( - index=i, - document=doc, - formatted_content=formatted_doc, - token_count=token_count, - ) - ) - - return document_token_info - - -def find_optimal_documents_with_binary_search( - document_tokens: list[DocumentTokenInfo], available_tokens: int -) -> list[DocumentTokenInfo]: - """Use binary search to find the maximum number of documents that fit within token limit.""" - if not document_tokens or available_tokens <= 0: - return [] - - left, right = 0, len(document_tokens) - optimal_docs = [] - - while left <= right: - mid = (left + right) // 2 - current_docs = document_tokens[:mid] - current_token_sum = sum(doc_info.token_count for doc_info in current_docs) - - if current_token_sum <= available_tokens: - optimal_docs = current_docs - left = mid + 1 - else: - right = mid - 1 - - return optimal_docs - - -def get_model_context_window(model_name: str) -> int: - """Get the total context window size for a model (input + output tokens).""" - try: - model_info = get_model_info(model_name) - context_window = model_info.get("max_input_tokens", 4096) # Default fallback - return context_window - except Exception as e: - print( - f"Warning: Could not get model info for {model_name}, using default 4096 tokens. Error: {e}" - ) - return 4096 # Conservative fallback - - -def optimize_documents_for_token_limit( - documents: list[dict[str, Any]], base_messages: list[BaseMessage], model_name: str -) -> tuple[list[dict[str, Any]], bool]: - """ - Optimize documents to fit within token limits using binary search. - - Args: - documents: List of documents with content and metadata - base_messages: Base messages without documents (chat history + system + human message template) - model_name: Model name for token counting (required) - output_token_buffer: Number of tokens to reserve for model output - - Returns: - Tuple of (optimized_documents, has_documents_remaining) - """ - if not documents: - return [], False - - model = model_name - context_window = get_model_context_window(model) - - # Calculate base token cost - base_messages_dict = convert_langchain_messages_to_dict(base_messages) - base_tokens = token_counter(messages=base_messages_dict, model=model) - available_tokens_for_docs = context_window - base_tokens - - print( - f"Token optimization: Context window={context_window}, Base={base_tokens}, Available for docs={available_tokens_for_docs}" - ) - - if available_tokens_for_docs <= 0: - print("No tokens available for documents after base content and output buffer") - return [], False - - # Calculate token costs for all documents - document_token_info = calculate_document_token_costs(documents, model) - - # Find optimal number of documents using binary search - optimal_doc_info = find_optimal_documents_with_binary_search( - document_token_info, available_tokens_for_docs - ) - - # Extract the original document objects - optimized_documents = [doc_info.document for doc_info in optimal_doc_info] - has_documents_remaining = len(optimized_documents) > 0 - - print( - f"Token optimization result: Using {len(optimized_documents)}/{len(documents)} documents" - ) - - return optimized_documents, has_documents_remaining - - -def calculate_token_count(messages: list[BaseMessage], model_name: str) -> int: - """Calculate token count for a list of LangChain messages.""" - model = model_name - messages_dict = convert_langchain_messages_to_dict(messages) - return token_counter(messages=messages_dict, model=model) - - -def langchain_chat_history_to_str(chat_history: list[BaseMessage]) -> str: - """ - Convert a list of chat history messages to a string. - """ - chat_history_str = "" - - for chat_message in chat_history: - if isinstance(chat_message, HumanMessage): - chat_history_str += f"{chat_message.content}\n" - elif isinstance(chat_message, AIMessage): - chat_history_str += f"{chat_message.content}\n" - elif isinstance(chat_message, SystemMessage): - chat_history_str += f"{chat_message.content}\n" - - return chat_history_str diff --git a/surfsense_backend/app/app.py b/surfsense_backend/app/app.py index e144d165a..7d7e88a28 100644 --- a/surfsense_backend/app/app.py +++ b/surfsense_backend/app/app.py @@ -5,6 +5,10 @@ from fastapi.middleware.cors import CORSMiddleware from sqlalchemy.ext.asyncio import AsyncSession from uvicorn.middleware.proxy_headers import ProxyHeadersMiddleware +from app.agents.new_chat.checkpointer import ( + close_checkpointer, + setup_checkpointer_tables, +) from app.config import config from app.db import User, create_db_and_tables, get_async_session from app.routes import router as crud_router @@ -16,7 +20,11 @@ from app.users import SECRET, auth_backend, current_active_user, fastapi_users async def lifespan(app: FastAPI): # Not needed if you setup a migration system like Alembic await create_db_and_tables() + # Setup LangGraph checkpointer tables for conversation persistence + await setup_checkpointer_tables() yield + # Cleanup: close checkpointer connection on shutdown + await close_checkpointer() def registration_allowed(): diff --git a/surfsense_backend/app/config/__init__.py b/surfsense_backend/app/config/__init__.py index bffe4f606..08be26de1 100644 --- a/surfsense_backend/app/config/__init__.py +++ b/surfsense_backend/app/config/__init__.py @@ -35,12 +35,6 @@ def load_global_llm_configs(): # Try main config file first global_config_file = BASE_DIR / "app" / "config" / "global_llm_config.yaml" - # Fall back to example file for testing - # if not global_config_file.exists(): - # global_config_file = BASE_DIR / "app" / "config" / "global_llm_config.example.yaml" - # if global_config_file.exists(): - # print("Info: Using global_llm_config.example.yaml (copy to global_llm_config.yaml for production)") - if not global_config_file.exists(): # No global configs available return [] diff --git a/surfsense_backend/app/config/global_llm_config.example.yaml b/surfsense_backend/app/config/global_llm_config.example.yaml index bd574515a..14a18c99a 100644 --- a/surfsense_backend/app/config/global_llm_config.example.yaml +++ b/surfsense_backend/app/config/global_llm_config.example.yaml @@ -9,72 +9,101 @@ # # These configurations will be available to all users as a convenient option # Users can choose to use these global configs or add their own +# +# Structure matches NewLLMConfig: +# - LLM model configuration (provider, model_name, api_key, etc.) +# - Prompt configuration (system_instructions, citations_enabled) global_llm_configs: - # Example: OpenAI GPT-4 Turbo + # Example: OpenAI GPT-4 Turbo with citations enabled - id: -1 name: "Global GPT-4 Turbo" + description: "OpenAI's GPT-4 Turbo with default prompts and citations" provider: "OPENAI" model_name: "gpt-4-turbo-preview" api_key: "sk-your-openai-api-key-here" api_base: "" - language: "English" litellm_params: temperature: 0.7 max_tokens: 4000 + # Prompt Configuration + system_instructions: "" # Empty = use default SURFSENSE_SYSTEM_INSTRUCTIONS + use_default_system_instructions: true + citations_enabled: true # Example: Anthropic Claude 3 Opus - id: -2 name: "Global Claude 3 Opus" + description: "Anthropic's most capable model with citations" provider: "ANTHROPIC" model_name: "claude-3-opus-20240229" api_key: "sk-ant-your-anthropic-api-key-here" api_base: "" - language: "English" litellm_params: temperature: 0.7 max_tokens: 4000 + system_instructions: "" + use_default_system_instructions: true + citations_enabled: true - # Example: Fast model - GPT-3.5 Turbo + # Example: Fast model - GPT-3.5 Turbo (citations disabled for speed) - id: -3 - name: "Global GPT-3.5 Turbo" + name: "Global GPT-3.5 Turbo (Fast)" + description: "Fast responses without citations for quick queries" provider: "OPENAI" model_name: "gpt-3.5-turbo" api_key: "sk-your-openai-api-key-here" api_base: "" - language: "English" litellm_params: temperature: 0.5 max_tokens: 2000 + system_instructions: "" + use_default_system_instructions: true + citations_enabled: false # Disabled for faster responses - # Example: Chinese LLM - DeepSeek + # Example: Chinese LLM - DeepSeek with custom instructions - id: -4 - name: "Global DeepSeek Chat" + name: "Global DeepSeek Chat (Chinese)" + description: "DeepSeek optimized for Chinese language responses" provider: "DEEPSEEK" model_name: "deepseek-chat" api_key: "your-deepseek-api-key-here" api_base: "https://api.deepseek.com/v1" - language: "Chinese" litellm_params: temperature: 0.7 max_tokens: 4000 + # Custom system instructions for Chinese responses + system_instructions: | + + You are SurfSense, a reasoning and acting AI agent designed to answer user questions using the user's personal knowledge base. + + Today's date (UTC): {resolved_today} + + IMPORTANT: Please respond in Chinese (简体中文) unless the user specifically requests another language. + + use_default_system_instructions: false + citations_enabled: true # Example: Groq - Fast inference - id: -5 name: "Global Groq Llama 3" + description: "Ultra-fast Llama 3 70B via Groq" provider: "GROQ" model_name: "llama3-70b-8192" api_key: "your-groq-api-key-here" api_base: "" - language: "English" litellm_params: temperature: 0.7 max_tokens: 8000 + system_instructions: "" + use_default_system_instructions: true + citations_enabled: true # Notes: -# - Use negative IDs to distinguish global configs from user configs +# - Use negative IDs to distinguish global configs from user configs (NewLLMConfig in DB) # - IDs should be unique and sequential (e.g., -1, -2, -3, etc.) # - The 'api_key' field will not be exposed to users via API -# - Users can select these configs for their long_context, fast, or strategic LLM roles +# - system_instructions: Custom prompt or empty string to use defaults +# - use_default_system_instructions: true = use SURFSENSE_SYSTEM_INSTRUCTIONS when system_instructions is empty +# - citations_enabled: true = include citation instructions, false = include anti-citation instructions # - All standard LiteLLM providers are supported - diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index c338240b3..a2a424c26 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -9,7 +9,6 @@ from sqlalchemy import ( ARRAY, JSON, TIMESTAMP, - BigInteger, Boolean, Column, Enum as SQLAlchemyEnum, @@ -77,10 +76,6 @@ class SearchSourceConnectorType(str, Enum): BOOKSTACK_CONNECTOR = "BOOKSTACK_CONNECTOR" -class ChatType(str, Enum): - QNA = "QNA" - - class LiteLLMProvider(str, Enum): """ Enum for LLM providers supported by LiteLLM. @@ -317,19 +312,70 @@ class BaseModel(Base): id = Column(Integer, primary_key=True, index=True) -class Chat(BaseModel, TimestampMixin): - __tablename__ = "chats" +class NewChatMessageRole(str, Enum): + """Role enum for new chat messages.""" - type = Column(SQLAlchemyEnum(ChatType), nullable=False) - title = Column(String, nullable=False, index=True) - initial_connectors = Column(ARRAY(String), nullable=True) - messages = Column(JSON, nullable=False) - state_version = Column(BigInteger, nullable=False, default=1) + USER = "user" + ASSISTANT = "assistant" + SYSTEM = "system" + +class NewChatThread(BaseModel, TimestampMixin): + """ + Thread model for the new chat feature using assistant-ui. + Each thread represents a conversation with message history. + LangGraph checkpointer uses thread_id for state persistence. + """ + + __tablename__ = "new_chat_threads" + + title = Column(String(500), nullable=False, default="New Chat", index=True) + archived = Column(Boolean, nullable=False, default=False) + updated_at = Column( + TIMESTAMP(timezone=True), + nullable=False, + default=lambda: datetime.now(UTC), + onupdate=lambda: datetime.now(UTC), + index=True, + ) + + # Foreign keys search_space_id = Column( Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False ) - search_space = relationship("SearchSpace", back_populates="chats") + + # Relationships + search_space = relationship("SearchSpace", back_populates="new_chat_threads") + messages = relationship( + "NewChatMessage", + back_populates="thread", + order_by="NewChatMessage.created_at", + cascade="all, delete-orphan", + ) + + +class NewChatMessage(BaseModel, TimestampMixin): + """ + Message model for the new chat feature. + Stores individual messages in assistant-ui format. + """ + + __tablename__ = "new_chat_messages" + + role = Column(SQLAlchemyEnum(NewChatMessageRole), nullable=False) + # Content stored as JSONB to support rich content (text, tool calls, etc.) + content = Column(JSONB, nullable=False) + + # Foreign key to thread + thread_id = Column( + Integer, + ForeignKey("new_chat_threads.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + + # Relationship + thread = relationship("NewChatThread", back_populates="messages") class Document(BaseModel, TimestampMixin): @@ -377,15 +423,13 @@ class Chunk(BaseModel, TimestampMixin): class Podcast(BaseModel, TimestampMixin): + """Podcast model for storing generated podcasts.""" + __tablename__ = "podcasts" - title = Column(String, nullable=False, index=True) - podcast_transcript = Column(JSON, nullable=False, default={}) - file_location = Column(String(500), nullable=False, default="") - chat_id = Column( - Integer, ForeignKey("chats.id", ondelete="CASCADE"), nullable=True - ) # If generated from a chat, this will be the chat id, else null ( can be from a document or a chat ) - chat_state_version = Column(BigInteger, nullable=True) + title = Column(String(500), nullable=False) + podcast_transcript = Column(JSONB, nullable=True) # List of transcript entries + file_location = Column(Text, nullable=True) # Path to the audio file search_space_id = Column( Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False @@ -408,9 +452,10 @@ class SearchSpace(BaseModel, TimestampMixin): # Search space-level LLM preferences (shared by all members) # Note: These can be negative IDs for global configs (from YAML) or positive IDs for custom configs (from DB) - long_context_llm_id = Column(Integer, nullable=True) - fast_llm_id = Column(Integer, nullable=True) - strategic_llm_id = Column(Integer, nullable=True) + agent_llm_id = Column(Integer, nullable=True) # For agent/chat operations + document_summary_llm_id = Column( + Integer, nullable=True + ) # For document summarization user_id = Column( UUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=False @@ -423,16 +468,16 @@ class SearchSpace(BaseModel, TimestampMixin): order_by="Document.id", cascade="all, delete-orphan", ) + new_chat_threads = relationship( + "NewChatThread", + back_populates="search_space", + order_by="NewChatThread.updated_at.desc()", + cascade="all, delete-orphan", + ) podcasts = relationship( "Podcast", back_populates="search_space", - order_by="Podcast.id", - cascade="all, delete-orphan", - ) - chats = relationship( - "Chat", - back_populates="search_space", - order_by="Chat.id", + order_by="Podcast.id.desc()", cascade="all, delete-orphan", ) logs = relationship( @@ -447,10 +492,10 @@ class SearchSpace(BaseModel, TimestampMixin): order_by="SearchSourceConnector.id", cascade="all, delete-orphan", ) - llm_configs = relationship( - "LLMConfig", + new_llm_configs = relationship( + "NewLLMConfig", back_populates="search_space", - order_by="LLMConfig.id", + order_by="NewLLMConfig.id", cascade="all, delete-orphan", ) @@ -509,10 +554,24 @@ class SearchSourceConnector(BaseModel, TimestampMixin): ) -class LLMConfig(BaseModel, TimestampMixin): - __tablename__ = "llm_configs" +class NewLLMConfig(BaseModel, TimestampMixin): + """ + New LLM configuration table that combines model settings with prompt configuration. + + This table provides: + - LLM model configuration (provider, model_name, api_key, etc.) + - Configurable system instructions (defaults to SURFSENSE_SYSTEM_INSTRUCTIONS) + - Citation toggle (enable/disable citation instructions) + + Note: SURFSENSE_TOOLS_INSTRUCTIONS is always used and not configurable. + """ + + __tablename__ = "new_llm_configs" name = Column(String(100), nullable=False, index=True) + description = Column(String(500), nullable=True) + + # === LLM Model Configuration (from original LLMConfig, excluding 'language') === # Provider from the enum provider = Column(SQLAlchemyEnum(LiteLLMProvider), nullable=False) # Custom provider name when provider is CUSTOM @@ -522,16 +581,29 @@ class LLMConfig(BaseModel, TimestampMixin): # API Key should be encrypted before storing api_key = Column(String, nullable=False) api_base = Column(String(500), nullable=True) - - language = Column(String(50), nullable=True, default="English") - # For any other parameters that litellm supports litellm_params = Column(JSON, nullable=True, default={}) + # === Prompt Configuration === + # Configurable system instructions (defaults to SURFSENSE_SYSTEM_INSTRUCTIONS) + # Users can customize this from the UI + system_instructions = Column( + Text, + nullable=False, + default="", # Empty string means use default SURFSENSE_SYSTEM_INSTRUCTIONS + ) + # Whether to use the default system instructions when system_instructions is empty + use_default_system_instructions = Column(Boolean, nullable=False, default=True) + + # Citation toggle - when enabled, SURFSENSE_CITATION_INSTRUCTIONS is injected + # When disabled, an anti-citation prompt is injected instead + citations_enabled = Column(Boolean, nullable=False, default=True) + + # === Relationships === search_space_id = Column( Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False ) - search_space = relationship("SearchSpace", back_populates="llm_configs") + search_space = relationship("SearchSpace", back_populates="new_llm_configs") class Log(BaseModel, TimestampMixin): diff --git a/surfsense_backend/app/prompts/public_search_space_prompts.yaml b/surfsense_backend/app/prompts/public_search_space_prompts.yaml deleted file mode 100644 index 69b2de036..000000000 --- a/surfsense_backend/app/prompts/public_search_space_prompts.yaml +++ /dev/null @@ -1,190 +0,0 @@ -prompts: - # Developer-focused prompts - - key: ethereum_developer - value: "Imagine you are an experienced Ethereum developer tasked with creating a smart contract for a blockchain messenger. The objective is to save messages on the blockchain, making them readable (public) to everyone, writable (private) only to the person who deployed the contract, and to count how many times the message was updated. Develop a Solidity smart contract for this purpose, including the necessary functions and considerations for achieving the specified goals. Please provide the code and any relevant explanations to ensure a clear understanding of the implementation." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "developer" - - - key: linux_terminal - value: "I want you to act as a linux terminal. I will type commands and you will reply with what the terminal should show. I want you to only reply with the terminal output inside one unique code block, and nothing else. do not write explanations. do not type commands unless I instruct you to do so. when i need to tell you something in english, i will do so by putting text inside curly brackets {like this}." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "developer" - - - key: javascript_console - value: "I want you to act as a javascript console. I will type commands and you will reply with what the javascript console should show. I want you to only reply with the terminal output inside one unique code block, and nothing else. do not write explanations. do not type commands unless I instruct you to do so. when i need to tell you something in english, i will do so by putting text inside curly brackets {like this}." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "developer" - - - key: fullstack_developer - value: "I want you to act as a software developer. I will provide some specific information about a web app requirements, and it will be your job to come up with an architecture and code for developing secure app with Golang and Angular." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "developer" - - - key: regex_generator - value: "I want you to act as a regex generator. Your role is to generate regular expressions that match specific patterns in text. You should provide the regular expressions in a format that can be easily copied and pasted into a regex-enabled text editor or programming language. Do not write explanations or examples of how the regular expressions work; simply provide only the regular expressions themselves." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "developer" - - - key: senior_frontend_developer - value: "I want you to act as a Senior Frontend developer. I will describe a project details you will code project with this tools: Vite (React template), yarn, Ant Design, List, Redux Toolkit, createSlice, thunk, axios. You should merge files in single index.js file and nothing else. Do not write explanations." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "developer" - - - key: code_reviewer - value: "I want you to act as a Code reviewer who is experienced developer in the given code language. I will provide you with the code block or methods or code file along with the code language name, and I would like you to review the code and share the feedback, suggestions and alternative recommended approaches. Please write explanations behind the feedback or suggestions or alternative approaches." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "developer" - - - key: machine_learning_engineer - value: "I want you to act as a machine learning engineer. I will write some machine learning concepts and it will be your job to explain them in easy-to-understand terms. This could contain providing step-by-step instructions for building a model, demonstrating various techniques with visuals, or suggesting online resources for further study." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "developer" - - - key: sql_terminal - value: "I want you to act as a SQL terminal in front of an example database. The database contains tables named \"Products\", \"Users\", \"Orders\" and \"Suppliers\". I will type queries and you will reply with what the terminal would show. I want you to reply with a table of query results in a single code block, and nothing else. Do not write explanations. Do not type commands unless I instruct you to do so. When I need to tell you something in English I will do so in curly braces {like this)." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "developer" - - - key: python_interpreter - value: "Act as a Python interpreter. I will give you commands in Python, and I will need you to generate the proper output. Only say the output. But if there is none, say nothing, and don't give me an explanation. If I need to say something, I will do so through comments." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "developer" - - - key: devops_engineer - value: "You are a Senior DevOps engineer working at a Big Company. Your role is to provide scalable, efficient, and automated solutions for software deployment, infrastructure management, and CI/CD pipelines. Suggest the best DevOps practices, including infrastructure setup, deployment strategies, automation tools, and cost-effective scaling solutions." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "developer" - - - key: cyber_security_specialist - value: "I want you to act as a cyber security specialist. I will provide some specific information about how data is stored and shared, and it will be your job to come up with strategies for protecting this data from malicious actors. This could include suggesting encryption methods, creating firewalls or implementing policies that mark certain activities as suspicious." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "developer" - - # General productivity prompts - - key: english_translator - value: "I want you to act as an English translator, spelling corrector and improver. I will speak to you in any language and you will detect the language, translate it and answer in the corrected and improved version of my text, in English. I want you to replace my simplified A0-level words and sentences with more beautiful and elegant, upper level English words and sentences. Keep the meaning same, but make them more literary. I want you to only reply the correction, the improvements and nothing else, do not write explanations." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "general" - - - key: proofreader - value: "I want you act as a proofreader. I will provide you texts and I would like you to review them for any spelling, grammar, or punctuation errors. Once you have finished reviewing the text, provide me with any necessary corrections or suggestions for improve the text." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "general" - - - key: note_taking_assistant - value: "I want you to act as a note-taking assistant for a lecture. Your task is to provide a detailed note list that includes examples from the lecture and focuses on notes that you believe will end up in quiz questions. Additionally, please make a separate list for notes that have numbers and data in them and another separated list for the examples that included in this lecture. The notes should be concise and easy to read." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "general" - - - key: essay_writer - value: "I want you to act as an essay writer. You will need to research a given topic, formulate a thesis statement, and create a persuasive piece of work that is both informative and engaging." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "general" - - - key: career_counselor - value: "I want you to act as a career counselor. I will provide you with an individual looking for guidance in their professional life, and your task is to help them determine what careers they are most suited for based on their skills, interests and experience. You should also conduct research into the various options available, explain the job market trends in different industries and advice on which qualifications would be beneficial for pursuing particular fields." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "general" - - - key: life_coach - value: "I want you to act as a life coach. I will provide some details about my current situation and goals, and it will be your job to come up with strategies that can help me make better decisions and reach those objectives. This could involve offering advice on various topics, such as creating plans for achieving success or dealing with difficult emotions." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "general" - - - key: motivational_coach - value: "I want you to act as a motivational coach. I will provide you with some information about someone's goals and challenges, and it will be your job to come up with strategies that can help this person achieve their goals. This could involve providing positive affirmations, giving helpful advice or suggesting activities they can do to reach their end goal." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "general" - - - key: travel_guide - value: "I want you to act as a travel guide. I will write you my location and you will suggest a place to visit near my location. In some cases, I will also give you the type of places I will visit. You will also suggest me places of similar type that are close to my first location." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "general" - - # Creative prompts - - key: storyteller - value: "I want you to act as a storyteller. You will come up with entertaining stories that are engaging, imaginative and captivating for the audience. It can be fairy tales, educational stories or any other type of stories which has the potential to capture people's attention and imagination. Depending on the target audience, you may choose specific themes or topics for your storytelling session e.g., if it's children then you can talk about animals; If it's adults then history-based tales might engage them better etc." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "creative" - - - key: screenwriter - value: "I want you to act as a screenwriter. You will develop an engaging and creative script for either a feature length film, or a Web Series that can captivate its viewers. Start with coming up with interesting characters, the setting of the story, dialogues between the characters etc. Once your character development is complete - create an exciting storyline filled with twists and turns that keeps the viewers in suspense until the end." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "creative" - - - key: novelist - value: "I want you to act as a novelist. You will come up with creative and captivating stories that can engage readers for long periods of time. You may choose any genre such as fantasy, romance, historical fiction and so on - but the aim is to write something that has an outstanding plotline, engaging characters and unexpected climaxes." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "creative" - - - key: poet - value: "I want you to act as a poet. You will create poems that evoke emotions and have the power to stir people's soul. Write on any topic or theme but make sure your words convey the feeling you are trying to express in beautiful yet meaningful ways. You can also come up with short verses that are still powerful enough to leave an imprint in readers' minds." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "creative" - - - key: rapper - value: "I want you to act as a rapper. You will come up with powerful and meaningful lyrics, beats and rhythm that can 'wow' the audience. Your lyrics should have an intriguing meaning and message which people can relate too. When it comes to choosing your beat, make sure it is catchy yet relevant to your words, so that when combined they make an explosion of sound everytime!" - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "creative" - - - key: composer - value: "I want you to act as a composer. I will provide the lyrics to a song and you will create music for it. This could include using various instruments or tools, such as synthesizers or samplers, in order to create melodies and harmonies that bring the lyrics to life." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "creative" - - # Educational prompts - - key: math_teacher - value: "I want you to act as a math teacher. I will provide some mathematical equations or concepts, and it will be your job to explain them in easy-to-understand terms. This could include providing step-by-step instructions for solving a problem, demonstrating various techniques with visuals or suggesting online resources for further study." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "educational" - - - key: philosophy_teacher - value: "I want you to act as a philosophy teacher. I will provide some topics related to the study of philosophy, and it will be your job to explain these concepts in an easy-to-understand manner. This could include providing examples, posing questions or breaking down complex ideas into smaller pieces that are easier to comprehend." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "educational" - - - key: historian - value: "I want you to act as a historian. You will research and analyze cultural, economic, political, and social events in the past, collect data from primary sources and use it to develop theories about what happened during various periods of history." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "educational" - - - key: debater - value: "I want you to act as a debater. I will provide you with some topics related to current events and your task is to research both sides of the debates, present valid arguments for each side, refute opposing points of view, and draw persuasive conclusions based on evidence. Your goal is to help people come away from the discussion with increased knowledge and insight into the topic at hand." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "educational" - - - key: explainer_with_analogies - value: "I want you to act as an explainer who uses analogies to clarify complex topics. When I give you a subject (technical, philosophical or scientific), you'll follow this structure: 1. Ask me 1-2 quick questions to assess my current level of understanding. 2. Based on my answer, create three analogies to explain the topic: one that a 10-year-old would understand, one for a high-school student, and one for a college-level person. 3. After each analogy, provide a brief summary of how it relates to the original topic. 4. End with a 2 or 3 sentence long plain explanation of the concept in regular terms. Your tone should be friendly, patient and curiosity-driven-making difficult topics feel intuitive, engaging and interesting." - author: "awesome-chatgpt-prompts" - link: "https://github.com/f/awesome-chatgpt-prompts" - category: "educational" diff --git a/surfsense_backend/app/routes/__init__.py b/surfsense_backend/app/routes/__init__.py index c9d70588d..a055bf549 100644 --- a/surfsense_backend/app/routes/__init__.py +++ b/surfsense_backend/app/routes/__init__.py @@ -3,7 +3,6 @@ from fastapi import APIRouter from .airtable_add_connector_route import ( router as airtable_add_connector_router, ) -from .chats_routes import router as chats_router from .documents_routes import router as documents_router from .editor_routes import router as editor_router from .google_calendar_add_connector_route import ( @@ -12,9 +11,10 @@ from .google_calendar_add_connector_route import ( from .google_gmail_add_connector_route import ( router as google_gmail_add_connector_router, ) -from .llm_config_routes import router as llm_config_router from .logs_routes import router as logs_router from .luma_add_connector_route import router as luma_add_connector_router +from .new_chat_routes import router as new_chat_router +from .new_llm_config_routes import router as new_llm_config_router from .notes_routes import router as notes_router from .podcasts_routes import router as podcasts_router from .rbac_routes import router as rbac_router @@ -28,12 +28,12 @@ router.include_router(rbac_router) # RBAC routes for roles, members, invites router.include_router(editor_router) router.include_router(documents_router) router.include_router(notes_router) -router.include_router(podcasts_router) -router.include_router(chats_router) +router.include_router(new_chat_router) # Chat with assistant-ui persistence +router.include_router(podcasts_router) # Podcast task status and audio router.include_router(search_source_connectors_router) router.include_router(google_calendar_add_connector_router) router.include_router(google_gmail_add_connector_router) router.include_router(airtable_add_connector_router) router.include_router(luma_add_connector_router) -router.include_router(llm_config_router) +router.include_router(new_llm_config_router) # LLM configs with prompt configuration router.include_router(logs_router) diff --git a/surfsense_backend/app/routes/chats_routes.py b/surfsense_backend/app/routes/chats_routes.py deleted file mode 100644 index 6a4894643..000000000 --- a/surfsense_backend/app/routes/chats_routes.py +++ /dev/null @@ -1,616 +0,0 @@ -from fastapi import APIRouter, Depends, HTTPException -from fastapi.responses import StreamingResponse -from langchain_core.messages import AIMessage, HumanMessage -from sqlalchemy.exc import IntegrityError, OperationalError -from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.future import select -from sqlalchemy.orm import selectinload - -from app.db import ( - Chat, - Permission, - SearchSpace, - SearchSpaceMembership, - User, - get_async_session, -) -from app.schemas import ( - AISDKChatRequest, - ChatCreate, - ChatRead, - ChatReadWithoutMessages, - ChatUpdate, - NewChatRequest, -) -from app.services.new_streaming_service import VercelStreamingService -from app.tasks.chat.stream_connector_search_results import ( - stream_connector_search_results, -) -from app.tasks.chat.stream_new_chat import stream_new_chat -from app.users import current_active_user -from app.utils.rbac import check_permission -from app.utils.validators import ( - validate_connectors, - validate_document_ids, - validate_messages, - validate_research_mode, - validate_search_space_id, - validate_top_k, -) - -router = APIRouter() - - -@router.post("/chat") -async def handle_chat_data( - request: AISDKChatRequest, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - # Validate and sanitize all input data - messages = validate_messages(request.messages) - - if messages[-1]["role"] != "user": - raise HTTPException( - status_code=400, detail="Last message must be a user message" - ) - - user_query = messages[-1]["content"] - - # Extract and validate data from request - request_data = request.data or {} - search_space_id = validate_search_space_id(request_data.get("search_space_id")) - research_mode = validate_research_mode(request_data.get("research_mode")) - selected_connectors = validate_connectors(request_data.get("selected_connectors")) - document_ids_to_add_in_context = validate_document_ids( - request_data.get("document_ids_to_add_in_context") - ) - top_k = validate_top_k(request_data.get("top_k")) - # print("RESQUEST DATA:", request_data) - # print("SELECTED CONNECTORS:", selected_connectors) - - # Check if the user has chat access to the search space - try: - await check_permission( - session, - user, - search_space_id, - Permission.CHATS_CREATE.value, - "You don't have permission to use chat in this search space", - ) - - # Get search space with LLM configs (preferences are now stored at search space level) - search_space_result = await session.execute( - select(SearchSpace) - .options(selectinload(SearchSpace.llm_configs)) - .filter(SearchSpace.id == search_space_id) - ) - search_space = search_space_result.scalars().first() - - language = None - llm_configs = [] # Initialize to empty list - - if search_space and search_space.llm_configs: - llm_configs = search_space.llm_configs - - # Get language from configured LLM preferences - # LLM preferences are now stored on the SearchSpace model - from app.config import config as app_config - - for llm_id in [ - search_space.fast_llm_id, - search_space.long_context_llm_id, - search_space.strategic_llm_id, - ]: - if llm_id is not None: - # Check if it's a global config (negative ID) - if llm_id < 0: - # Look in global configs - for global_cfg in app_config.GLOBAL_LLM_CONFIGS: - if global_cfg.get("id") == llm_id: - language = global_cfg.get("language") - if language: - break - else: - # Look in custom configs - for llm_config in llm_configs: - if llm_config.id == llm_id and getattr( - llm_config, "language", None - ): - language = llm_config.language - break - if language: - break - - if not language and llm_configs: - first_llm_config = llm_configs[0] - language = getattr(first_llm_config, "language", None) - - except HTTPException: - raise HTTPException( - status_code=403, detail="You don't have access to this search space" - ) from None - - langchain_chat_history = [] - for message in messages[:-1]: - if message["role"] == "user": - langchain_chat_history.append(HumanMessage(content=message["content"])) - elif message["role"] == "assistant": - langchain_chat_history.append(AIMessage(content=message["content"])) - - response = StreamingResponse( - stream_connector_search_results( - user_query, - user.id, - search_space_id, - session, - research_mode, - selected_connectors, - langchain_chat_history, - document_ids_to_add_in_context, - language, - top_k, - ) - ) - - response.headers["x-vercel-ai-data-stream"] = "v1" - return response - - -@router.post("/new_chat") -async def handle_new_chat( - request: NewChatRequest, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """ - Handle new chat requests using the SurfSense deep agent. - - This endpoint uses the new deep agent with the Vercel AI SDK - Data Stream Protocol (SSE format). - - Args: - request: NewChatRequest containing chat_id, user_query, and search_space_id - session: Database session - user: Current authenticated user - - Returns: - StreamingResponse with SSE formatted data - """ - # Validate the user query - if not request.user_query or not request.user_query.strip(): - raise HTTPException(status_code=400, detail="User query cannot be empty") - - # Check if the user has chat access to the search space - try: - await check_permission( - session, - user, - request.search_space_id, - Permission.CHATS_CREATE.value, - "You don't have permission to use chat in this search space", - ) - except HTTPException: - raise HTTPException( - status_code=403, detail="You don't have access to this search space" - ) from None - - # Get LLM config ID from search space preferences (optional enhancement) - # For now, we use the default global config (-1) - llm_config_id = -1 - - # Optionally load LLM preferences from search space - try: - search_space_result = await session.execute( - select(SearchSpace).filter(SearchSpace.id == request.search_space_id) - ) - search_space = search_space_result.scalars().first() - - if search_space: - # Use strategic_llm_id if available, otherwise fall back to fast_llm_id - if search_space.strategic_llm_id is not None: - llm_config_id = search_space.strategic_llm_id - elif search_space.fast_llm_id is not None: - llm_config_id = search_space.fast_llm_id - except Exception: - # Fall back to default config on any error - pass - - # Create the streaming response - # chat_id is used as LangGraph's thread_id for automatic chat history management - response = StreamingResponse( - stream_new_chat( - user_query=request.user_query.strip(), - user_id=user.id, - search_space_id=request.search_space_id, - chat_id=request.chat_id, - session=session, - llm_config_id=llm_config_id, - ), - media_type="text/event-stream", - ) - - # Set the required headers for Vercel AI SDK - headers = VercelStreamingService.get_response_headers() - for key, value in headers.items(): - response.headers[key] = value - - return response - - -@router.post("/chats", response_model=ChatRead) -async def create_chat( - chat: ChatCreate, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """ - Create a new chat. - Requires CHATS_CREATE permission. - """ - try: - await check_permission( - session, - user, - chat.search_space_id, - Permission.CHATS_CREATE.value, - "You don't have permission to create chats in this search space", - ) - db_chat = Chat(**chat.model_dump()) - session.add(db_chat) - await session.commit() - await session.refresh(db_chat) - return db_chat - except HTTPException: - raise - except IntegrityError: - await session.rollback() - raise HTTPException( - status_code=400, - detail="Database constraint violation. Please check your input data.", - ) from None - except OperationalError: - await session.rollback() - raise HTTPException( - status_code=503, detail="Database operation failed. Please try again later." - ) from None - except Exception: - await session.rollback() - raise HTTPException( - status_code=500, - detail="An unexpected error occurred while creating the chat.", - ) from None - - -@router.get("/chats", response_model=list[ChatReadWithoutMessages]) -async def read_chats( - skip: int = 0, - limit: int = 100, - search_space_id: int | None = None, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """ - List chats the user has access to. - Requires CHATS_READ permission for the search space(s). - """ - # Validate pagination parameters - if skip < 0: - raise HTTPException( - status_code=400, detail="skip must be a non-negative integer" - ) - - if limit <= 0 or limit > 1000: # Reasonable upper limit - raise HTTPException(status_code=400, detail="limit must be between 1 and 1000") - - # Validate search_space_id if provided - if search_space_id is not None and search_space_id <= 0: - raise HTTPException( - status_code=400, detail="search_space_id must be a positive integer" - ) - try: - if search_space_id is not None: - # Check permission for specific search space - await check_permission( - session, - user, - search_space_id, - Permission.CHATS_READ.value, - "You don't have permission to read chats in this search space", - ) - # Select specific fields excluding messages - query = ( - select( - Chat.id, - Chat.type, - Chat.title, - Chat.initial_connectors, - Chat.search_space_id, - Chat.created_at, - Chat.state_version, - ) - .filter(Chat.search_space_id == search_space_id) - .order_by(Chat.created_at.desc()) - ) - else: - # Get chats from all search spaces user has membership in - query = ( - select( - Chat.id, - Chat.type, - Chat.title, - Chat.initial_connectors, - Chat.search_space_id, - Chat.created_at, - Chat.state_version, - ) - .join(SearchSpace) - .join(SearchSpaceMembership) - .filter(SearchSpaceMembership.user_id == user.id) - .order_by(Chat.created_at.desc()) - ) - - result = await session.execute(query.offset(skip).limit(limit)) - return result.all() - except HTTPException: - raise - except OperationalError: - raise HTTPException( - status_code=503, detail="Database operation failed. Please try again later." - ) from None - except Exception: - raise HTTPException( - status_code=500, detail="An unexpected error occurred while fetching chats." - ) from None - - -@router.get("/chats/search", response_model=list[ChatReadWithoutMessages]) -async def search_chats( - title: str, - skip: int = 0, - limit: int = 100, - search_space_id: int | None = None, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """ - Search chats by title substring. - Requires CHATS_READ permission for the search space(s). - - Args: - title: Case-insensitive substring to match against chat titles. Required. - skip: Number of items to skip from the beginning. Default: 0. - limit: Maximum number of items to return. Default: 100. - search_space_id: Filter results to a specific search space. Default: None. - session: Database session (injected). - user: Current authenticated user (injected). - - Returns: - List of chats matching the search query. - - Notes: - - Title matching uses ILIKE (case-insensitive). - - Results are ordered by creation date (most recent first). - """ - # Validate pagination parameters - if skip < 0: - raise HTTPException( - status_code=400, detail="skip must be a non-negative integer" - ) - - if limit <= 0 or limit > 1000: - raise HTTPException(status_code=400, detail="limit must be between 1 and 1000") - - # Validate search_space_id if provided - if search_space_id is not None and search_space_id <= 0: - raise HTTPException( - status_code=400, detail="search_space_id must be a positive integer" - ) - - try: - if search_space_id is not None: - # Check permission for specific search space - await check_permission( - session, - user, - search_space_id, - Permission.CHATS_READ.value, - "You don't have permission to read chats in this search space", - ) - # Select specific fields excluding messages - query = ( - select( - Chat.id, - Chat.type, - Chat.title, - Chat.initial_connectors, - Chat.search_space_id, - Chat.created_at, - Chat.state_version, - ) - .filter(Chat.search_space_id == search_space_id) - .order_by(Chat.created_at.desc()) - ) - else: - # Get chats from all search spaces user has membership in - query = ( - select( - Chat.id, - Chat.type, - Chat.title, - Chat.initial_connectors, - Chat.search_space_id, - Chat.created_at, - Chat.state_version, - ) - .join(SearchSpace) - .join(SearchSpaceMembership) - .filter(SearchSpaceMembership.user_id == user.id) - .order_by(Chat.created_at.desc()) - ) - - # Apply title search filter (case-insensitive) - query = query.filter(Chat.title.ilike(f"%{title}%")) - - result = await session.execute(query.offset(skip).limit(limit)) - return result.all() - except HTTPException: - raise - except OperationalError: - raise HTTPException( - status_code=503, detail="Database operation failed. Please try again later." - ) from None - except Exception: - raise HTTPException( - status_code=500, - detail="An unexpected error occurred while searching chats.", - ) from None - - -@router.get("/chats/{chat_id}", response_model=ChatRead) -async def read_chat( - chat_id: int, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """ - Get a specific chat by ID. - Requires CHATS_READ permission for the search space. - """ - try: - result = await session.execute(select(Chat).filter(Chat.id == chat_id)) - chat = result.scalars().first() - - if not chat: - raise HTTPException( - status_code=404, - detail="Chat not found", - ) - - # Check permission for the search space - await check_permission( - session, - user, - chat.search_space_id, - Permission.CHATS_READ.value, - "You don't have permission to read chats in this search space", - ) - - return chat - except HTTPException: - raise - except OperationalError: - raise HTTPException( - status_code=503, detail="Database operation failed. Please try again later." - ) from None - except Exception: - raise HTTPException( - status_code=500, - detail="An unexpected error occurred while fetching the chat.", - ) from None - - -@router.put("/chats/{chat_id}", response_model=ChatRead) -async def update_chat( - chat_id: int, - chat_update: ChatUpdate, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """ - Update a chat. - Requires CHATS_UPDATE permission for the search space. - """ - try: - result = await session.execute(select(Chat).filter(Chat.id == chat_id)) - db_chat = result.scalars().first() - - if not db_chat: - raise HTTPException(status_code=404, detail="Chat not found") - - # Check permission for the search space - await check_permission( - session, - user, - db_chat.search_space_id, - Permission.CHATS_UPDATE.value, - "You don't have permission to update chats in this search space", - ) - - update_data = chat_update.model_dump(exclude_unset=True) - for key, value in update_data.items(): - if key == "messages": - db_chat.state_version = len(update_data["messages"]) - setattr(db_chat, key, value) - - await session.commit() - await session.refresh(db_chat) - return db_chat - except HTTPException: - raise - except IntegrityError: - await session.rollback() - raise HTTPException( - status_code=400, - detail="Database constraint violation. Please check your input data.", - ) from None - except OperationalError: - await session.rollback() - raise HTTPException( - status_code=503, detail="Database operation failed. Please try again later." - ) from None - except Exception: - await session.rollback() - raise HTTPException( - status_code=500, - detail="An unexpected error occurred while updating the chat.", - ) from None - - -@router.delete("/chats/{chat_id}", response_model=dict) -async def delete_chat( - chat_id: int, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """ - Delete a chat. - Requires CHATS_DELETE permission for the search space. - """ - try: - result = await session.execute(select(Chat).filter(Chat.id == chat_id)) - db_chat = result.scalars().first() - - if not db_chat: - raise HTTPException(status_code=404, detail="Chat not found") - - # Check permission for the search space - await check_permission( - session, - user, - db_chat.search_space_id, - Permission.CHATS_DELETE.value, - "You don't have permission to delete chats in this search space", - ) - - await session.delete(db_chat) - await session.commit() - return {"message": "Chat deleted successfully"} - except HTTPException: - raise - except IntegrityError: - await session.rollback() - raise HTTPException( - status_code=400, detail="Cannot delete chat due to existing dependencies." - ) from None - except OperationalError: - await session.rollback() - raise HTTPException( - status_code=503, detail="Database operation failed. Please try again later." - ) from None - except Exception: - await session.rollback() - raise HTTPException( - status_code=500, - detail="An unexpected error occurred while deleting the chat.", - ) from None diff --git a/surfsense_backend/app/routes/llm_config_routes.py b/surfsense_backend/app/routes/llm_config_routes.py deleted file mode 100644 index 31c7200f5..000000000 --- a/surfsense_backend/app/routes/llm_config_routes.py +++ /dev/null @@ -1,576 +0,0 @@ -import logging - -from fastapi import APIRouter, Depends, HTTPException -from pydantic import BaseModel -from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.future import select - -from app.config import config -from app.db import ( - LLMConfig, - Permission, - SearchSpace, - User, - get_async_session, -) -from app.schemas import LLMConfigCreate, LLMConfigRead, LLMConfigUpdate -from app.services.llm_service import validate_llm_config -from app.users import current_active_user -from app.utils.rbac import check_permission - -router = APIRouter() -logger = logging.getLogger(__name__) - - -class LLMPreferencesUpdate(BaseModel): - """Schema for updating search space LLM preferences""" - - long_context_llm_id: int | None = None - fast_llm_id: int | None = None - strategic_llm_id: int | None = None - - -class LLMPreferencesRead(BaseModel): - """Schema for reading search space LLM preferences""" - - long_context_llm_id: int | None = None - fast_llm_id: int | None = None - strategic_llm_id: int | None = None - long_context_llm: LLMConfigRead | None = None - fast_llm: LLMConfigRead | None = None - strategic_llm: LLMConfigRead | None = None - - -class GlobalLLMConfigRead(BaseModel): - """Schema for reading global LLM configs (without API key)""" - - id: int - name: str - provider: str - custom_provider: str | None = None - model_name: str - api_base: str | None = None - language: str | None = None - litellm_params: dict | None = None - is_global: bool = True - - -# Global LLM Config endpoints - - -@router.get("/global-llm-configs", response_model=list[GlobalLLMConfigRead]) -async def get_global_llm_configs( - user: User = Depends(current_active_user), -): - """ - Get all available global LLM configurations. - These are pre-configured by the system administrator and available to all users. - API keys are not exposed through this endpoint. - """ - try: - global_configs = config.GLOBAL_LLM_CONFIGS - - # Remove API keys from response - safe_configs = [] - for cfg in global_configs: - safe_config = { - "id": cfg.get("id"), - "name": cfg.get("name"), - "provider": cfg.get("provider"), - "custom_provider": cfg.get("custom_provider"), - "model_name": cfg.get("model_name"), - "api_base": cfg.get("api_base"), - "language": cfg.get("language"), - "litellm_params": cfg.get("litellm_params", {}), - "is_global": True, - } - safe_configs.append(safe_config) - - return safe_configs - except Exception as e: - raise HTTPException( - status_code=500, detail=f"Failed to fetch global LLM configs: {e!s}" - ) from e - - -@router.post("/llm-configs", response_model=LLMConfigRead) -async def create_llm_config( - llm_config: LLMConfigCreate, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """ - Create a new LLM configuration for a search space. - Requires LLM_CONFIGS_CREATE permission. - """ - try: - # Verify user has permission to create LLM configs - await check_permission( - session, - user, - llm_config.search_space_id, - Permission.LLM_CONFIGS_CREATE.value, - "You don't have permission to create LLM configurations in this search space", - ) - - # Validate the LLM configuration by making a test API call - is_valid, error_message = await validate_llm_config( - provider=llm_config.provider.value, - model_name=llm_config.model_name, - api_key=llm_config.api_key, - api_base=llm_config.api_base, - custom_provider=llm_config.custom_provider, - litellm_params=llm_config.litellm_params, - ) - - if not is_valid: - raise HTTPException( - status_code=400, - detail=f"Invalid LLM configuration: {error_message}", - ) - - db_llm_config = LLMConfig(**llm_config.model_dump()) - session.add(db_llm_config) - await session.commit() - await session.refresh(db_llm_config) - return db_llm_config - except HTTPException: - raise - except Exception as e: - await session.rollback() - raise HTTPException( - status_code=500, detail=f"Failed to create LLM configuration: {e!s}" - ) from e - - -@router.get("/llm-configs", response_model=list[LLMConfigRead]) -async def read_llm_configs( - search_space_id: int, - skip: int = 0, - limit: int = 200, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """ - Get all LLM configurations for a search space. - Requires LLM_CONFIGS_READ permission. - """ - try: - # Verify user has permission to read LLM configs - await check_permission( - session, - user, - search_space_id, - Permission.LLM_CONFIGS_READ.value, - "You don't have permission to view LLM configurations in this search space", - ) - - result = await session.execute( - select(LLMConfig) - .filter(LLMConfig.search_space_id == search_space_id) - .offset(skip) - .limit(limit) - ) - return result.scalars().all() - except HTTPException: - raise - except Exception as e: - raise HTTPException( - status_code=500, detail=f"Failed to fetch LLM configurations: {e!s}" - ) from e - - -@router.get("/llm-configs/{llm_config_id}", response_model=LLMConfigRead) -async def read_llm_config( - llm_config_id: int, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """ - Get a specific LLM configuration by ID. - Requires LLM_CONFIGS_READ permission. - """ - try: - # Get the LLM config - result = await session.execute( - select(LLMConfig).filter(LLMConfig.id == llm_config_id) - ) - llm_config = result.scalars().first() - - if not llm_config: - raise HTTPException(status_code=404, detail="LLM configuration not found") - - # Verify user has permission to read LLM configs - await check_permission( - session, - user, - llm_config.search_space_id, - Permission.LLM_CONFIGS_READ.value, - "You don't have permission to view LLM configurations in this search space", - ) - - return llm_config - except HTTPException: - raise - except Exception as e: - raise HTTPException( - status_code=500, detail=f"Failed to fetch LLM configuration: {e!s}" - ) from e - - -@router.put("/llm-configs/{llm_config_id}", response_model=LLMConfigRead) -async def update_llm_config( - llm_config_id: int, - llm_config_update: LLMConfigUpdate, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """ - Update an existing LLM configuration. - Requires LLM_CONFIGS_UPDATE permission. - """ - try: - # Get the LLM config - result = await session.execute( - select(LLMConfig).filter(LLMConfig.id == llm_config_id) - ) - db_llm_config = result.scalars().first() - - if not db_llm_config: - raise HTTPException(status_code=404, detail="LLM configuration not found") - - # Verify user has permission to update LLM configs - await check_permission( - session, - user, - db_llm_config.search_space_id, - Permission.LLM_CONFIGS_UPDATE.value, - "You don't have permission to update LLM configurations in this search space", - ) - - update_data = llm_config_update.model_dump(exclude_unset=True) - - # Apply updates to a temporary copy for validation - temp_config = { - "provider": update_data.get("provider", db_llm_config.provider.value), - "model_name": update_data.get("model_name", db_llm_config.model_name), - "api_key": update_data.get("api_key", db_llm_config.api_key), - "api_base": update_data.get("api_base", db_llm_config.api_base), - "custom_provider": update_data.get( - "custom_provider", db_llm_config.custom_provider - ), - "litellm_params": update_data.get( - "litellm_params", db_llm_config.litellm_params - ), - } - - # Validate the updated configuration - is_valid, error_message = await validate_llm_config( - provider=temp_config["provider"], - model_name=temp_config["model_name"], - api_key=temp_config["api_key"], - api_base=temp_config["api_base"], - custom_provider=temp_config["custom_provider"], - litellm_params=temp_config["litellm_params"], - ) - - if not is_valid: - raise HTTPException( - status_code=400, - detail=f"Invalid LLM configuration: {error_message}", - ) - - # Apply updates to the database object - for key, value in update_data.items(): - setattr(db_llm_config, key, value) - - await session.commit() - await session.refresh(db_llm_config) - return db_llm_config - except HTTPException: - raise - except Exception as e: - await session.rollback() - raise HTTPException( - status_code=500, detail=f"Failed to update LLM configuration: {e!s}" - ) from e - - -@router.delete("/llm-configs/{llm_config_id}", response_model=dict) -async def delete_llm_config( - llm_config_id: int, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """ - Delete an LLM configuration. - Requires LLM_CONFIGS_DELETE permission. - """ - try: - # Get the LLM config - result = await session.execute( - select(LLMConfig).filter(LLMConfig.id == llm_config_id) - ) - db_llm_config = result.scalars().first() - - if not db_llm_config: - raise HTTPException(status_code=404, detail="LLM configuration not found") - - # Verify user has permission to delete LLM configs - await check_permission( - session, - user, - db_llm_config.search_space_id, - Permission.LLM_CONFIGS_DELETE.value, - "You don't have permission to delete LLM configurations in this search space", - ) - - await session.delete(db_llm_config) - await session.commit() - return {"message": "LLM configuration deleted successfully"} - except HTTPException: - raise - except Exception as e: - await session.rollback() - raise HTTPException( - status_code=500, detail=f"Failed to delete LLM configuration: {e!s}" - ) from e - - -# Search Space LLM Preferences endpoints - - -@router.get( - "/search-spaces/{search_space_id}/llm-preferences", - response_model=LLMPreferencesRead, -) -async def get_llm_preferences( - search_space_id: int, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """ - Get the LLM preferences for a specific search space. - LLM preferences are shared by all members of the search space. - Requires LLM_CONFIGS_READ permission. - """ - try: - # Verify user has permission to read LLM configs - await check_permission( - session, - user, - search_space_id, - Permission.LLM_CONFIGS_READ.value, - "You don't have permission to view LLM preferences in this search space", - ) - - # Get the search space - result = await session.execute( - select(SearchSpace).filter(SearchSpace.id == search_space_id) - ) - search_space = result.scalars().first() - - if not search_space: - raise HTTPException(status_code=404, detail="Search space not found") - - # Helper function to get config (global or custom) - async def get_config_for_id(config_id): - if config_id is None: - return None - - # Check if it's a global config (negative ID) - if config_id < 0: - for cfg in config.GLOBAL_LLM_CONFIGS: - if cfg.get("id") == config_id: - # Return as LLMConfigRead-compatible dict - return { - "id": cfg.get("id"), - "name": cfg.get("name"), - "provider": cfg.get("provider"), - "custom_provider": cfg.get("custom_provider"), - "model_name": cfg.get("model_name"), - "api_key": "***GLOBAL***", # Don't expose the actual key - "api_base": cfg.get("api_base"), - "language": cfg.get("language"), - "litellm_params": cfg.get("litellm_params"), - "created_at": None, - "search_space_id": search_space_id, - } - return None - - # It's a custom config, fetch from database - result = await session.execute( - select(LLMConfig).filter(LLMConfig.id == config_id) - ) - return result.scalars().first() - - # Get the configs (from DB for custom, or constructed for global) - long_context_llm = await get_config_for_id(search_space.long_context_llm_id) - fast_llm = await get_config_for_id(search_space.fast_llm_id) - strategic_llm = await get_config_for_id(search_space.strategic_llm_id) - - return { - "long_context_llm_id": search_space.long_context_llm_id, - "fast_llm_id": search_space.fast_llm_id, - "strategic_llm_id": search_space.strategic_llm_id, - "long_context_llm": long_context_llm, - "fast_llm": fast_llm, - "strategic_llm": strategic_llm, - } - except HTTPException: - raise - except Exception as e: - raise HTTPException( - status_code=500, detail=f"Failed to fetch LLM preferences: {e!s}" - ) from e - - -@router.put( - "/search-spaces/{search_space_id}/llm-preferences", - response_model=LLMPreferencesRead, -) -async def update_llm_preferences( - search_space_id: int, - preferences: LLMPreferencesUpdate, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """ - Update the LLM preferences for a specific search space. - LLM preferences are shared by all members of the search space. - Requires SETTINGS_UPDATE permission (only users with settings access can change). - """ - try: - # Verify user has permission to update settings (not just LLM configs) - # This ensures only users with settings access can change shared LLM preferences - await check_permission( - session, - user, - search_space_id, - Permission.SETTINGS_UPDATE.value, - "You don't have permission to update LLM preferences in this search space", - ) - - # Get the search space - result = await session.execute( - select(SearchSpace).filter(SearchSpace.id == search_space_id) - ) - search_space = result.scalars().first() - - if not search_space: - raise HTTPException(status_code=404, detail="Search space not found") - - # Validate that all provided LLM config IDs belong to the search space - update_data = preferences.model_dump(exclude_unset=True) - - # Store language from configs to validate consistency - languages = set() - - for _key, llm_config_id in update_data.items(): - if llm_config_id is not None: - # Check if this is a global config (negative ID) - if llm_config_id < 0: - # Validate global config exists - global_config = None - for cfg in config.GLOBAL_LLM_CONFIGS: - if cfg.get("id") == llm_config_id: - global_config = cfg - break - - if not global_config: - raise HTTPException( - status_code=404, - detail=f"Global LLM configuration {llm_config_id} not found", - ) - - # Collect language for consistency check (if explicitly set) - lang = global_config.get("language") - if lang and lang.strip(): # Only add non-empty languages - languages.add(lang.strip()) - else: - # Verify the LLM config belongs to the search space (custom config) - result = await session.execute( - select(LLMConfig).filter( - LLMConfig.id == llm_config_id, - LLMConfig.search_space_id == search_space_id, - ) - ) - llm_config = result.scalars().first() - if not llm_config: - raise HTTPException( - status_code=404, - detail=f"LLM configuration {llm_config_id} not found in this search space", - ) - - # Collect language for consistency check (if explicitly set) - if llm_config.language and llm_config.language.strip(): - languages.add(llm_config.language.strip()) - - # Language consistency check - only warn if there are multiple explicit languages - # Allow mixing configs with and without language settings - if len(languages) > 1: - # Log warning but allow the operation - logger.warning( - f"Multiple languages detected in LLM selection for search_space {search_space_id}: {languages}. " - "This may affect response quality." - ) - - # Update search space LLM preferences - for key, value in update_data.items(): - setattr(search_space, key, value) - - await session.commit() - await session.refresh(search_space) - - # Helper function to get config (global or custom) - async def get_config_for_id(config_id): - if config_id is None: - return None - - # Check if it's a global config (negative ID) - if config_id < 0: - for cfg in config.GLOBAL_LLM_CONFIGS: - if cfg.get("id") == config_id: - # Return as LLMConfigRead-compatible dict - return { - "id": cfg.get("id"), - "name": cfg.get("name"), - "provider": cfg.get("provider"), - "custom_provider": cfg.get("custom_provider"), - "model_name": cfg.get("model_name"), - "api_key": "***GLOBAL***", # Don't expose the actual key - "api_base": cfg.get("api_base"), - "language": cfg.get("language"), - "litellm_params": cfg.get("litellm_params"), - "created_at": None, - "search_space_id": search_space_id, - } - return None - - # It's a custom config, fetch from database - result = await session.execute( - select(LLMConfig).filter(LLMConfig.id == config_id) - ) - return result.scalars().first() - - # Get the configs (from DB for custom, or constructed for global) - long_context_llm = await get_config_for_id(search_space.long_context_llm_id) - fast_llm = await get_config_for_id(search_space.fast_llm_id) - strategic_llm = await get_config_for_id(search_space.strategic_llm_id) - - # Return updated preferences - return { - "long_context_llm_id": search_space.long_context_llm_id, - "fast_llm_id": search_space.fast_llm_id, - "strategic_llm_id": search_space.strategic_llm_id, - "long_context_llm": long_context_llm, - "fast_llm": fast_llm, - "strategic_llm": strategic_llm, - } - except HTTPException: - raise - except Exception as e: - await session.rollback() - raise HTTPException( - status_code=500, detail=f"Failed to update LLM preferences: {e!s}" - ) from e diff --git a/surfsense_backend/app/routes/new_chat_routes.py b/surfsense_backend/app/routes/new_chat_routes.py new file mode 100644 index 000000000..6026bd95e --- /dev/null +++ b/surfsense_backend/app/routes/new_chat_routes.py @@ -0,0 +1,905 @@ +""" +Routes for the new chat feature with assistant-ui integration. + +These endpoints support the ThreadHistoryAdapter pattern from assistant-ui: +- GET /threads - List threads for sidebar (ThreadListPrimitive) +- POST /threads - Create a new thread +- GET /threads/{thread_id} - Get thread with messages (load) +- PUT /threads/{thread_id} - Update thread (rename, archive) +- DELETE /threads/{thread_id} - Delete thread +- POST /threads/{thread_id}/messages - Append message +- POST /attachments/process - Process attachments for chat context +""" + +import contextlib +import os +import tempfile +import uuid +from datetime import UTC, datetime + +from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile +from fastapi.responses import StreamingResponse +from sqlalchemy.exc import IntegrityError, OperationalError +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.future import select +from sqlalchemy.orm import selectinload + +from app.db import ( + NewChatMessage, + NewChatMessageRole, + NewChatThread, + Permission, + SearchSpace, + User, + get_async_session, +) +from app.schemas.new_chat import ( + NewChatMessageAppend, + NewChatMessageRead, + NewChatRequest, + NewChatThreadCreate, + NewChatThreadRead, + NewChatThreadUpdate, + NewChatThreadWithMessages, + ThreadHistoryLoadResponse, + ThreadListItem, + ThreadListResponse, +) +from app.tasks.chat.stream_new_chat import stream_new_chat +from app.users import current_active_user +from app.utils.rbac import check_permission + +router = APIRouter() + + +# ============================================================================= +# Thread Endpoints +# ============================================================================= + + +@router.get("/threads", response_model=ThreadListResponse) +async def list_threads( + search_space_id: int, + limit: int | None = None, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + List all threads for the current user in a search space. + Returns threads and archived_threads for ThreadListPrimitive. + + Args: + search_space_id: The search space to list threads for + limit: Optional limit on number of threads to return (applies to active threads only) + + Requires CHATS_READ permission. + """ + try: + await check_permission( + session, + user, + search_space_id, + Permission.CHATS_READ.value, + "You don't have permission to read chats in this search space", + ) + + # Get all threads in this search space + query = ( + select(NewChatThread) + .filter(NewChatThread.search_space_id == search_space_id) + .order_by(NewChatThread.updated_at.desc()) + ) + + result = await session.execute(query) + all_threads = result.scalars().all() + + # Separate active and archived threads + threads = [] + archived_threads = [] + + for thread in all_threads: + item = ThreadListItem( + id=thread.id, + title=thread.title, + archived=thread.archived, + created_at=thread.created_at, + updated_at=thread.updated_at, + ) + if thread.archived: + archived_threads.append(item) + else: + threads.append(item) + + # Apply limit to active threads if specified + if limit is not None and limit > 0: + threads = threads[:limit] + + return ThreadListResponse(threads=threads, archived_threads=archived_threads) + + except HTTPException: + raise + except OperationalError: + raise HTTPException( + status_code=503, detail="Database operation failed. Please try again later." + ) from None + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"An unexpected error occurred while fetching threads: {e!s}", + ) from None + + +@router.get("/threads/search", response_model=list[ThreadListItem]) +async def search_threads( + search_space_id: int, + title: str, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + Search threads by title in a search space. + + Args: + search_space_id: The search space to search in + title: The search query (case-insensitive partial match) + + Requires CHATS_READ permission. + """ + try: + await check_permission( + session, + user, + search_space_id, + Permission.CHATS_READ.value, + "You don't have permission to read chats in this search space", + ) + + # Search threads by title (case-insensitive) + query = ( + select(NewChatThread) + .filter( + NewChatThread.search_space_id == search_space_id, + NewChatThread.title.ilike(f"%{title}%"), + ) + .order_by(NewChatThread.updated_at.desc()) + ) + + result = await session.execute(query) + threads = result.scalars().all() + + return [ + ThreadListItem( + id=thread.id, + title=thread.title, + archived=thread.archived, + created_at=thread.created_at, + updated_at=thread.updated_at, + ) + for thread in threads + ] + + except HTTPException: + raise + except OperationalError: + raise HTTPException( + status_code=503, detail="Database operation failed. Please try again later." + ) from None + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"An unexpected error occurred while searching threads: {e!s}", + ) from None + + +@router.post("/threads", response_model=NewChatThreadRead) +async def create_thread( + thread: NewChatThreadCreate, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + Create a new chat thread. + + Requires CHATS_CREATE permission. + """ + try: + await check_permission( + session, + user, + thread.search_space_id, + Permission.CHATS_CREATE.value, + "You don't have permission to create chats in this search space", + ) + + now = datetime.now(UTC) + db_thread = NewChatThread( + title=thread.title, + archived=thread.archived, + search_space_id=thread.search_space_id, + updated_at=now, + ) + session.add(db_thread) + await session.commit() + await session.refresh(db_thread) + return db_thread + + except HTTPException: + raise + except IntegrityError: + await session.rollback() + raise HTTPException( + status_code=400, + detail="Database constraint violation. Please check your input data.", + ) from None + except OperationalError: + await session.rollback() + raise HTTPException( + status_code=503, detail="Database operation failed. Please try again later." + ) from None + except Exception as e: + await session.rollback() + raise HTTPException( + status_code=500, + detail=f"An unexpected error occurred while creating the thread: {e!s}", + ) from None + + +@router.get("/threads/{thread_id}", response_model=ThreadHistoryLoadResponse) +async def get_thread_messages( + thread_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + Get a thread with all its messages. + This is used by ThreadHistoryAdapter.load() to restore conversation. + + Requires CHATS_READ permission. + """ + try: + # Get thread with messages + result = await session.execute( + select(NewChatThread) + .options(selectinload(NewChatThread.messages)) + .filter(NewChatThread.id == thread_id) + ) + thread = result.scalars().first() + + if not thread: + raise HTTPException(status_code=404, detail="Thread not found") + + # Check permission and ownership + await check_permission( + session, + user, + thread.search_space_id, + Permission.CHATS_READ.value, + "You don't have permission to read chats in this search space", + ) + + # Return messages in the format expected by assistant-ui + messages = [ + NewChatMessageRead( + id=msg.id, + thread_id=msg.thread_id, + role=msg.role, + content=msg.content, + created_at=msg.created_at, + ) + for msg in thread.messages + ] + + return ThreadHistoryLoadResponse(messages=messages) + + except HTTPException: + raise + except OperationalError: + raise HTTPException( + status_code=503, detail="Database operation failed. Please try again later." + ) from None + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"An unexpected error occurred while fetching the thread: {e!s}", + ) from None + + +@router.get("/threads/{thread_id}/full", response_model=NewChatThreadWithMessages) +async def get_thread_full( + thread_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + Get full thread details with all messages. + + Requires CHATS_READ permission. + """ + try: + result = await session.execute( + select(NewChatThread) + .options(selectinload(NewChatThread.messages)) + .filter(NewChatThread.id == thread_id) + ) + thread = result.scalars().first() + + if not thread: + raise HTTPException(status_code=404, detail="Thread not found") + + await check_permission( + session, + user, + thread.search_space_id, + Permission.CHATS_READ.value, + "You don't have permission to read chats in this search space", + ) + + return thread + + except HTTPException: + raise + except OperationalError: + raise HTTPException( + status_code=503, detail="Database operation failed. Please try again later." + ) from None + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"An unexpected error occurred while fetching the thread: {e!s}", + ) from None + + +@router.put("/threads/{thread_id}", response_model=NewChatThreadRead) +async def update_thread( + thread_id: int, + thread_update: NewChatThreadUpdate, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + Update a thread (title, archived status). + Used for renaming and archiving threads. + + Requires CHATS_UPDATE permission. + """ + try: + result = await session.execute( + select(NewChatThread).filter(NewChatThread.id == thread_id) + ) + db_thread = result.scalars().first() + + if not db_thread: + raise HTTPException(status_code=404, detail="Thread not found") + + await check_permission( + session, + user, + db_thread.search_space_id, + Permission.CHATS_UPDATE.value, + "You don't have permission to update chats in this search space", + ) + + # Update fields + update_data = thread_update.model_dump(exclude_unset=True) + for key, value in update_data.items(): + setattr(db_thread, key, value) + + db_thread.updated_at = datetime.now(UTC) + + await session.commit() + await session.refresh(db_thread) + return db_thread + + except HTTPException: + raise + except IntegrityError: + await session.rollback() + raise HTTPException( + status_code=400, + detail="Database constraint violation. Please check your input data.", + ) from None + except OperationalError: + await session.rollback() + raise HTTPException( + status_code=503, detail="Database operation failed. Please try again later." + ) from None + except Exception as e: + await session.rollback() + raise HTTPException( + status_code=500, + detail=f"An unexpected error occurred while updating the thread: {e!s}", + ) from None + + +@router.delete("/threads/{thread_id}", response_model=dict) +async def delete_thread( + thread_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + Delete a thread and all its messages. + + Requires CHATS_DELETE permission. + """ + try: + result = await session.execute( + select(NewChatThread).filter(NewChatThread.id == thread_id) + ) + db_thread = result.scalars().first() + + if not db_thread: + raise HTTPException(status_code=404, detail="Thread not found") + + await check_permission( + session, + user, + db_thread.search_space_id, + Permission.CHATS_DELETE.value, + "You don't have permission to delete chats in this search space", + ) + + await session.delete(db_thread) + await session.commit() + return {"message": "Thread deleted successfully"} + + except HTTPException: + raise + except IntegrityError: + await session.rollback() + raise HTTPException( + status_code=400, detail="Cannot delete thread due to existing dependencies." + ) from None + except OperationalError: + await session.rollback() + raise HTTPException( + status_code=503, detail="Database operation failed. Please try again later." + ) from None + except Exception as e: + await session.rollback() + raise HTTPException( + status_code=500, + detail=f"An unexpected error occurred while deleting the thread: {e!s}", + ) from None + + +# ============================================================================= +# Message Endpoints +# ============================================================================= + + +@router.post("/threads/{thread_id}/messages", response_model=NewChatMessageRead) +async def append_message( + thread_id: int, + request: Request, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + Append a message to a thread. + This is used by ThreadHistoryAdapter.append() to persist messages. + + Requires CHATS_UPDATE permission. + """ + try: + # Parse raw body - extract only role and content, ignoring extra fields + raw_body = await request.json() + role = raw_body.get("role") + content = raw_body.get("content") + + if not role: + raise HTTPException(status_code=400, detail="Missing required field: role") + if content is None: + raise HTTPException( + status_code=400, detail="Missing required field: content" + ) + + # Create message object manually + message = NewChatMessageAppend(role=role, content=content) + # Get thread + result = await session.execute( + select(NewChatThread).filter(NewChatThread.id == thread_id) + ) + thread = result.scalars().first() + + if not thread: + raise HTTPException(status_code=404, detail="Thread not found") + + await check_permission( + session, + user, + thread.search_space_id, + Permission.CHATS_UPDATE.value, + "You don't have permission to update chats in this search space", + ) + + # Convert string role to enum + role_str = ( + message.role.lower() if isinstance(message.role, str) else message.role + ) + try: + message_role = NewChatMessageRole(role_str) + except ValueError: + raise HTTPException( + status_code=400, + detail=f"Invalid role: {message.role}. Must be 'user', 'assistant', or 'system'.", + ) from None + + # Create message + db_message = NewChatMessage( + thread_id=thread_id, + role=message_role, + content=message.content, + ) + session.add(db_message) + + # Update thread's updated_at timestamp + thread.updated_at = datetime.now(UTC) + + # Auto-generate title from first user message if title is still default + if thread.title == "New Chat" and role_str == "user": + # Extract text content for title + content = message.content + if isinstance(content, str): + title_text = content + elif isinstance(content, list): + # Find first text content + title_text = "" + for part in content: + if isinstance(part, dict) and part.get("type") == "text": + title_text = part.get("text", "") + break + elif isinstance(part, str): + title_text = part + break + else: + title_text = str(content) + + # Truncate title + if title_text: + thread.title = title_text[:100] + ( + "..." if len(title_text) > 100 else "" + ) + + await session.commit() + await session.refresh(db_message) + return db_message + + except HTTPException: + raise + except IntegrityError: + await session.rollback() + raise HTTPException( + status_code=400, + detail="Database constraint violation. Please check your input data.", + ) from None + except OperationalError: + await session.rollback() + raise HTTPException( + status_code=503, detail="Database operation failed. Please try again later." + ) from None + except Exception as e: + await session.rollback() + raise HTTPException( + status_code=500, + detail=f"An unexpected error occurred while appending the message: {e!s}", + ) from None + + +@router.get("/threads/{thread_id}/messages", response_model=list[NewChatMessageRead]) +async def list_messages( + thread_id: int, + skip: int = 0, + limit: int = 100, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + List messages in a thread with pagination. + + Requires CHATS_READ permission. + """ + try: + # Verify thread exists and user has access + result = await session.execute( + select(NewChatThread).filter(NewChatThread.id == thread_id) + ) + thread = result.scalars().first() + + if not thread: + raise HTTPException(status_code=404, detail="Thread not found") + + await check_permission( + session, + user, + thread.search_space_id, + Permission.CHATS_READ.value, + "You don't have permission to read chats in this search space", + ) + + # Get messages + query = ( + select(NewChatMessage) + .filter(NewChatMessage.thread_id == thread_id) + .order_by(NewChatMessage.created_at) + .offset(skip) + .limit(limit) + ) + + result = await session.execute(query) + return result.scalars().all() + + except HTTPException: + raise + except OperationalError: + raise HTTPException( + status_code=503, detail="Database operation failed. Please try again later." + ) from None + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"An unexpected error occurred while fetching messages: {e!s}", + ) from None + + +# ============================================================================= +# Chat Streaming Endpoint +# ============================================================================= + + +@router.post("/new_chat") +async def handle_new_chat( + request: NewChatRequest, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + Stream chat responses from the deep agent. + + This endpoint handles the new chat functionality with streaming responses + using Server-Sent Events (SSE) format compatible with Vercel AI SDK. + + Requires CHATS_CREATE permission. + """ + try: + # Verify thread exists and user has permission + result = await session.execute( + select(NewChatThread).filter(NewChatThread.id == request.chat_id) + ) + thread = result.scalars().first() + + if not thread: + raise HTTPException(status_code=404, detail="Thread not found") + + await check_permission( + session, + user, + thread.search_space_id, + Permission.CHATS_CREATE.value, + "You don't have permission to chat in this search space", + ) + + # Get search space to check LLM config preferences + search_space_result = await session.execute( + select(SearchSpace).filter(SearchSpace.id == request.search_space_id) + ) + search_space = search_space_result.scalars().first() + + if not search_space: + raise HTTPException(status_code=404, detail="Search space not found") + + # Use agent_llm_id from search space for chat operations + # Positive IDs load from NewLLMConfig database table + # Negative IDs load from YAML global configs + # Falls back to -1 (first global config) if not configured + llm_config_id = ( + search_space.agent_llm_id if search_space.agent_llm_id is not None else -1 + ) + + # Return streaming response + return StreamingResponse( + stream_new_chat( + user_query=request.user_query, + search_space_id=request.search_space_id, + chat_id=request.chat_id, + session=session, + llm_config_id=llm_config_id, + attachments=request.attachments, + ), + media_type="text/event-stream", + headers={ + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "X-Accel-Buffering": "no", + }, + ) + + except HTTPException: + raise + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"An unexpected error occurred: {e!s}", + ) from None + + +# ============================================================================= +# Attachment Processing Endpoint +# ============================================================================= + + +@router.post("/attachments/process") +async def process_attachment( + file: UploadFile = File(...), + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + Process an attachment file and extract its content as markdown. + + This endpoint uses the configured ETL service to parse files and return + the extracted content that can be used as context in chat messages. + + Supported file types depend on the configured ETL_SERVICE: + - Markdown/Text files: .md, .markdown, .txt (always supported) + - Audio files: .mp3, .mp4, .mpeg, .mpga, .m4a, .wav, .webm (if STT configured) + - Documents: .pdf, .docx, .doc, .pptx, .xlsx (depends on ETL service) + + Returns: + JSON with attachment id, name, type, and extracted content + """ + from app.config import config as app_config + + if not file.filename: + raise HTTPException(status_code=400, detail="No filename provided") + + filename = file.filename + attachment_id = str(uuid.uuid4()) + + try: + # Save file to a temporary location + file_ext = os.path.splitext(filename)[1].lower() + with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as temp_file: + temp_path = temp_file.name + content = await file.read() + temp_file.write(content) + + extracted_content = "" + + # Process based on file type + if file_ext in (".md", ".markdown", ".txt"): + # For text/markdown files, read content directly + with open(temp_path, encoding="utf-8") as f: + extracted_content = f.read() + + elif file_ext in (".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm"): + # Audio files - transcribe if STT service is configured + if not app_config.STT_SERVICE: + raise HTTPException( + status_code=422, + detail="Audio transcription is not configured. Please set STT_SERVICE.", + ) + + stt_service_type = ( + "local" if app_config.STT_SERVICE.startswith("local/") else "external" + ) + + if stt_service_type == "local": + from app.services.stt_service import stt_service + + result = stt_service.transcribe_file(temp_path) + extracted_content = result.get("text", "") + else: + from litellm import atranscription + + with open(temp_path, "rb") as audio_file: + transcription_kwargs = { + "model": app_config.STT_SERVICE, + "file": audio_file, + "api_key": app_config.STT_SERVICE_API_KEY, + } + if app_config.STT_SERVICE_API_BASE: + transcription_kwargs["api_base"] = ( + app_config.STT_SERVICE_API_BASE + ) + + transcription_response = await atranscription( + **transcription_kwargs + ) + extracted_content = transcription_response.get("text", "") + + if extracted_content: + extracted_content = ( + f"# Transcription of {filename}\n\n{extracted_content}" + ) + + else: + # Document files - use configured ETL service + if app_config.ETL_SERVICE == "UNSTRUCTURED": + from langchain_unstructured import UnstructuredLoader + + from app.utils.document_converters import convert_document_to_markdown + + loader = UnstructuredLoader( + temp_path, + mode="elements", + post_processors=[], + languages=["eng"], + include_orig_elements=False, + include_metadata=False, + strategy="auto", + ) + docs = await loader.aload() + extracted_content = await convert_document_to_markdown(docs) + + elif app_config.ETL_SERVICE == "LLAMACLOUD": + from llama_cloud_services import LlamaParse + from llama_cloud_services.parse.utils import ResultType + + parser = LlamaParse( + api_key=app_config.LLAMA_CLOUD_API_KEY, + num_workers=1, + verbose=False, + language="en", + result_type=ResultType.MD, + ) + result = await parser.aparse(temp_path) + markdown_documents = await result.aget_markdown_documents( + split_by_page=False + ) + + if markdown_documents: + extracted_content = "\n\n".join( + doc.text for doc in markdown_documents + ) + + elif app_config.ETL_SERVICE == "DOCLING": + from app.services.docling_service import create_docling_service + + docling_service = create_docling_service() + result = await docling_service.process_document(temp_path, filename) + extracted_content = result.get("content", "") + + else: + raise HTTPException( + status_code=422, + detail=f"ETL service not configured or unsupported file type: {file_ext}", + ) + + # Clean up temp file + with contextlib.suppress(Exception): + os.unlink(temp_path) + + if not extracted_content: + raise HTTPException( + status_code=422, + detail=f"Could not extract content from file: {filename}", + ) + + # Determine attachment type (must be one of: "image", "document", "file") + # assistant-ui only supports these three types + if file_ext in (".png", ".jpg", ".jpeg", ".gif", ".webp"): + attachment_type = "image" + else: + # All other files (including audio, documents, text) are treated as "document" + attachment_type = "document" + + return { + "id": attachment_id, + "name": filename, + "type": attachment_type, + "content": extracted_content, + "contentLength": len(extracted_content), + } + + except HTTPException: + raise + except Exception as e: + # Clean up temp file on error + with contextlib.suppress(Exception): + os.unlink(temp_path) + + raise HTTPException( + status_code=500, + detail=f"Failed to process attachment: {e!s}", + ) from e diff --git a/surfsense_backend/app/routes/new_llm_config_routes.py b/surfsense_backend/app/routes/new_llm_config_routes.py new file mode 100644 index 000000000..d54b95bad --- /dev/null +++ b/surfsense_backend/app/routes/new_llm_config_routes.py @@ -0,0 +1,376 @@ +""" +API routes for NewLLMConfig CRUD operations. + +NewLLMConfig combines LLM model settings with prompt configuration: +- LLM provider, model, API key, etc. +- Configurable system instructions +- Citation toggle +""" + +import logging + +from fastapi import APIRouter, Depends, HTTPException +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.future import select + +from app.agents.new_chat.system_prompt import get_default_system_instructions +from app.config import config +from app.db import ( + NewLLMConfig, + Permission, + User, + get_async_session, +) +from app.schemas import ( + DefaultSystemInstructionsResponse, + GlobalNewLLMConfigRead, + NewLLMConfigCreate, + NewLLMConfigRead, + NewLLMConfigUpdate, +) +from app.services.llm_service import validate_llm_config +from app.users import current_active_user +from app.utils.rbac import check_permission + +router = APIRouter() +logger = logging.getLogger(__name__) + + +# ============================================================================= +# Global Configs Routes +# ============================================================================= + + +@router.get("/global-new-llm-configs", response_model=list[GlobalNewLLMConfigRead]) +async def get_global_new_llm_configs( + user: User = Depends(current_active_user), +): + """ + Get all available global NewLLMConfig configurations. + These are pre-configured by the system administrator and available to all users. + API keys are not exposed through this endpoint. + + Global configs have negative IDs to distinguish from user-created configs. + """ + try: + global_configs = config.GLOBAL_LLM_CONFIGS + + # Transform to new structure, hiding API keys + safe_configs = [] + for cfg in global_configs: + safe_config = { + "id": cfg.get("id"), + "name": cfg.get("name"), + "description": cfg.get("description"), + "provider": cfg.get("provider"), + "custom_provider": cfg.get("custom_provider"), + "model_name": cfg.get("model_name"), + "api_base": cfg.get("api_base") or None, + "litellm_params": cfg.get("litellm_params", {}), + # New prompt configuration fields + "system_instructions": cfg.get("system_instructions", ""), + "use_default_system_instructions": cfg.get( + "use_default_system_instructions", True + ), + "citations_enabled": cfg.get("citations_enabled", True), + "is_global": True, + } + safe_configs.append(safe_config) + + return safe_configs + except Exception as e: + logger.exception("Failed to fetch global NewLLMConfigs") + raise HTTPException( + status_code=500, detail=f"Failed to fetch global configurations: {e!s}" + ) from e + + +# ============================================================================= +# CRUD Routes +# ============================================================================= + + +@router.post("/new-llm-configs", response_model=NewLLMConfigRead) +async def create_new_llm_config( + config_data: NewLLMConfigCreate, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + Create a new NewLLMConfig for a search space. + Requires LLM_CONFIGS_CREATE permission. + """ + try: + # Verify user has permission + await check_permission( + session, + user, + config_data.search_space_id, + Permission.LLM_CONFIGS_CREATE.value, + "You don't have permission to create LLM configurations in this search space", + ) + + # Validate the LLM configuration by making a test API call + is_valid, error_message = await validate_llm_config( + provider=config_data.provider.value, + model_name=config_data.model_name, + api_key=config_data.api_key, + api_base=config_data.api_base, + custom_provider=config_data.custom_provider, + litellm_params=config_data.litellm_params, + ) + + if not is_valid: + raise HTTPException( + status_code=400, + detail=f"Invalid LLM configuration: {error_message}", + ) + + # Create the config + db_config = NewLLMConfig(**config_data.model_dump()) + session.add(db_config) + await session.commit() + await session.refresh(db_config) + + return db_config + + except HTTPException: + raise + except Exception as e: + await session.rollback() + logger.exception("Failed to create NewLLMConfig") + raise HTTPException( + status_code=500, detail=f"Failed to create configuration: {e!s}" + ) from e + + +@router.get("/new-llm-configs", response_model=list[NewLLMConfigRead]) +async def list_new_llm_configs( + search_space_id: int, + skip: int = 0, + limit: int = 100, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + Get all NewLLMConfigs for a search space. + Requires LLM_CONFIGS_READ permission. + """ + try: + # Verify user has permission + await check_permission( + session, + user, + search_space_id, + Permission.LLM_CONFIGS_READ.value, + "You don't have permission to view LLM configurations in this search space", + ) + + result = await session.execute( + select(NewLLMConfig) + .filter(NewLLMConfig.search_space_id == search_space_id) + .order_by(NewLLMConfig.created_at.desc()) + .offset(skip) + .limit(limit) + ) + + return result.scalars().all() + + except HTTPException: + raise + except Exception as e: + logger.exception("Failed to list NewLLMConfigs") + raise HTTPException( + status_code=500, detail=f"Failed to fetch configurations: {e!s}" + ) from e + + +@router.get( + "/new-llm-configs/default-system-instructions", + response_model=DefaultSystemInstructionsResponse, +) +async def get_default_system_instructions_endpoint( + user: User = Depends(current_active_user), +): + """ + Get the default SURFSENSE_SYSTEM_INSTRUCTIONS template. + Useful for pre-populating the UI when creating a new configuration. + """ + return DefaultSystemInstructionsResponse( + default_system_instructions=get_default_system_instructions() + ) + + +@router.get("/new-llm-configs/{config_id}", response_model=NewLLMConfigRead) +async def get_new_llm_config( + config_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + Get a specific NewLLMConfig by ID. + Requires LLM_CONFIGS_READ permission. + """ + try: + result = await session.execute( + select(NewLLMConfig).filter(NewLLMConfig.id == config_id) + ) + config = result.scalars().first() + + if not config: + raise HTTPException(status_code=404, detail="Configuration not found") + + # Verify user has permission + await check_permission( + session, + user, + config.search_space_id, + Permission.LLM_CONFIGS_READ.value, + "You don't have permission to view LLM configurations in this search space", + ) + + return config + + except HTTPException: + raise + except Exception as e: + logger.exception("Failed to get NewLLMConfig") + raise HTTPException( + status_code=500, detail=f"Failed to fetch configuration: {e!s}" + ) from e + + +@router.put("/new-llm-configs/{config_id}", response_model=NewLLMConfigRead) +async def update_new_llm_config( + config_id: int, + update_data: NewLLMConfigUpdate, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + Update an existing NewLLMConfig. + Requires LLM_CONFIGS_UPDATE permission. + """ + try: + result = await session.execute( + select(NewLLMConfig).filter(NewLLMConfig.id == config_id) + ) + config = result.scalars().first() + + if not config: + raise HTTPException(status_code=404, detail="Configuration not found") + + # Verify user has permission + await check_permission( + session, + user, + config.search_space_id, + Permission.LLM_CONFIGS_UPDATE.value, + "You don't have permission to update LLM configurations in this search space", + ) + + update_dict = update_data.model_dump(exclude_unset=True) + + # If updating LLM settings, validate them + if any( + key in update_dict + for key in [ + "provider", + "model_name", + "api_key", + "api_base", + "custom_provider", + "litellm_params", + ] + ): + # Build the validation config from existing + updates + validation_config = { + "provider": update_dict.get("provider", config.provider).value + if hasattr(update_dict.get("provider", config.provider), "value") + else update_dict.get("provider", config.provider.value), + "model_name": update_dict.get("model_name", config.model_name), + "api_key": update_dict.get("api_key", config.api_key), + "api_base": update_dict.get("api_base", config.api_base), + "custom_provider": update_dict.get( + "custom_provider", config.custom_provider + ), + "litellm_params": update_dict.get( + "litellm_params", config.litellm_params + ), + } + + is_valid, error_message = await validate_llm_config( + provider=validation_config["provider"], + model_name=validation_config["model_name"], + api_key=validation_config["api_key"], + api_base=validation_config["api_base"], + custom_provider=validation_config["custom_provider"], + litellm_params=validation_config["litellm_params"], + ) + + if not is_valid: + raise HTTPException( + status_code=400, + detail=f"Invalid LLM configuration: {error_message}", + ) + + # Apply updates + for key, value in update_dict.items(): + setattr(config, key, value) + + await session.commit() + await session.refresh(config) + + return config + + except HTTPException: + raise + except Exception as e: + await session.rollback() + logger.exception("Failed to update NewLLMConfig") + raise HTTPException( + status_code=500, detail=f"Failed to update configuration: {e!s}" + ) from e + + +@router.delete("/new-llm-configs/{config_id}", response_model=dict) +async def delete_new_llm_config( + config_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + Delete a NewLLMConfig. + Requires LLM_CONFIGS_DELETE permission. + """ + try: + result = await session.execute( + select(NewLLMConfig).filter(NewLLMConfig.id == config_id) + ) + config = result.scalars().first() + + if not config: + raise HTTPException(status_code=404, detail="Configuration not found") + + # Verify user has permission + await check_permission( + session, + user, + config.search_space_id, + Permission.LLM_CONFIGS_DELETE.value, + "You don't have permission to delete LLM configurations in this search space", + ) + + await session.delete(config) + await session.commit() + + return {"message": "Configuration deleted successfully", "id": config_id} + + except HTTPException: + raise + except Exception as e: + await session.rollback() + logger.exception("Failed to delete NewLLMConfig") + raise HTTPException( + status_code=500, detail=f"Failed to delete configuration: {e!s}" + ) from e diff --git a/surfsense_backend/app/routes/podcasts_routes.py b/surfsense_backend/app/routes/podcasts_routes.py index deb9d9744..ef362edb5 100644 --- a/surfsense_backend/app/routes/podcasts_routes.py +++ b/surfsense_backend/app/routes/podcasts_routes.py @@ -1,14 +1,22 @@ +""" +Podcast routes for task status polling and audio retrieval. + +These routes support the podcast generation feature in new-chat. +Note: The old Chat-based podcast generation has been removed. +""" + import os from pathlib import Path +from celery.result import AsyncResult from fastapi import APIRouter, Depends, HTTPException from fastapi.responses import StreamingResponse -from sqlalchemy.exc import IntegrityError, SQLAlchemyError +from sqlalchemy import select +from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.future import select +from app.celery_app import celery_app from app.db import ( - Chat, Permission, Podcast, SearchSpace, @@ -16,62 +24,13 @@ from app.db import ( User, get_async_session, ) -from app.schemas import ( - PodcastCreate, - PodcastGenerateRequest, - PodcastRead, - PodcastUpdate, -) -from app.tasks.podcast_tasks import generate_chat_podcast +from app.schemas import PodcastRead from app.users import current_active_user from app.utils.rbac import check_permission router = APIRouter() -@router.post("/podcasts", response_model=PodcastRead) -async def create_podcast( - podcast: PodcastCreate, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """ - Create a new podcast. - Requires PODCASTS_CREATE permission. - """ - try: - await check_permission( - session, - user, - podcast.search_space_id, - Permission.PODCASTS_CREATE.value, - "You don't have permission to create podcasts in this search space", - ) - db_podcast = Podcast(**podcast.model_dump()) - session.add(db_podcast) - await session.commit() - await session.refresh(db_podcast) - return db_podcast - except HTTPException as he: - raise he - except IntegrityError: - await session.rollback() - raise HTTPException( - status_code=400, - detail="Podcast creation failed due to constraint violation", - ) from None - except SQLAlchemyError: - await session.rollback() - raise HTTPException( - status_code=500, detail="Database error occurred while creating podcast" - ) from None - except Exception: - await session.rollback() - raise HTTPException( - status_code=500, detail="An unexpected error occurred" - ) from None - - @router.get("/podcasts", response_model=list[PodcastRead]) async def read_podcasts( skip: int = 0, @@ -159,53 +118,6 @@ async def read_podcast( ) from None -@router.put("/podcasts/{podcast_id}", response_model=PodcastRead) -async def update_podcast( - podcast_id: int, - podcast_update: PodcastUpdate, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """ - Update a podcast. - Requires PODCASTS_UPDATE permission for the search space. - """ - try: - result = await session.execute(select(Podcast).filter(Podcast.id == podcast_id)) - db_podcast = result.scalars().first() - - if not db_podcast: - raise HTTPException(status_code=404, detail="Podcast not found") - - # Check permission for the search space - await check_permission( - session, - user, - db_podcast.search_space_id, - Permission.PODCASTS_UPDATE.value, - "You don't have permission to update podcasts in this search space", - ) - - update_data = podcast_update.model_dump(exclude_unset=True) - for key, value in update_data.items(): - setattr(db_podcast, key, value) - await session.commit() - await session.refresh(db_podcast) - return db_podcast - except HTTPException as he: - raise he - except IntegrityError: - await session.rollback() - raise HTTPException( - status_code=400, detail="Update failed due to constraint violation" - ) from None - except SQLAlchemyError: - await session.rollback() - raise HTTPException( - status_code=500, detail="Database error occurred while updating podcast" - ) from None - - @router.delete("/podcasts/{podcast_id}", response_model=dict) async def delete_podcast( podcast_id: int, @@ -244,108 +156,8 @@ async def delete_podcast( ) from None -async def generate_chat_podcast_with_new_session( - chat_id: int, - search_space_id: int, - user_id: int, - podcast_title: str | None = None, - user_prompt: str | None = None, -): - """Create a new session and process chat podcast generation.""" - from app.db import async_session_maker - - async with async_session_maker() as session: - try: - await generate_chat_podcast( - session, chat_id, search_space_id, user_id, podcast_title, user_prompt - ) - except Exception as e: - import logging - - logging.error(f"Error generating podcast from chat: {e!s}") - - -@router.post("/podcasts/generate") -async def generate_podcast( - request: PodcastGenerateRequest, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """ - Generate a podcast from a chat or document. - Requires PODCASTS_CREATE permission. - """ - try: - # Check if the user has permission to create podcasts - await check_permission( - session, - user, - request.search_space_id, - Permission.PODCASTS_CREATE.value, - "You don't have permission to create podcasts in this search space", - ) - - if request.type == "CHAT": - # Verify that all chat IDs belong to this user and search space - query = ( - select(Chat) - .filter( - Chat.id.in_(request.ids), - Chat.search_space_id == request.search_space_id, - ) - .join(SearchSpace) - .filter(SearchSpace.user_id == user.id) - ) - - result = await session.execute(query) - valid_chats = result.scalars().all() - valid_chat_ids = [chat.id for chat in valid_chats] - - # If any requested ID is not in valid IDs, raise error immediately - if len(valid_chat_ids) != len(request.ids): - raise HTTPException( - status_code=403, - detail="One or more chat IDs do not belong to this user or search space", - ) - - from app.tasks.celery_tasks.podcast_tasks import ( - generate_chat_podcast_task, - ) - - # Add Celery tasks for each chat ID - for chat_id in valid_chat_ids: - generate_chat_podcast_task.delay( - chat_id, - request.search_space_id, - user.id, - request.podcast_title, - request.user_prompt, - ) - - return { - "message": "Podcast generation started", - } - except HTTPException as he: - raise he - except IntegrityError: - await session.rollback() - raise HTTPException( - status_code=400, - detail="Podcast generation failed due to constraint violation", - ) from None - except SQLAlchemyError: - await session.rollback() - raise HTTPException( - status_code=500, detail="Database error occurred while generating podcast" - ) from None - except Exception as e: - await session.rollback() - raise HTTPException( - status_code=500, detail=f"An unexpected error occurred: {e!s}" - ) from e - - @router.get("/podcasts/{podcast_id}/stream") +@router.get("/podcasts/{podcast_id}/audio") async def stream_podcast( podcast_id: int, session: AsyncSession = Depends(get_async_session), @@ -354,6 +166,8 @@ async def stream_podcast( """ Stream a podcast audio file. Requires PODCASTS_READ permission for the search space. + + Note: Both /stream and /audio endpoints are supported for compatibility. """ try: result = await session.execute(select(Podcast).filter(Podcast.id == podcast_id)) @@ -378,7 +192,7 @@ async def stream_podcast( file_path = podcast.file_location # Check if the file exists - if not os.path.isfile(file_path): + if not file_path or not os.path.isfile(file_path): raise HTTPException(status_code=404, detail="Podcast audio file not found") # Define a generator function to stream the file @@ -404,43 +218,60 @@ async def stream_podcast( ) from e -@router.get("/podcasts/by-chat/{chat_id}", response_model=PodcastRead | None) -async def get_podcast_by_chat_id( - chat_id: int, - session: AsyncSession = Depends(get_async_session), +@router.get("/podcasts/task/{task_id}/status") +async def get_podcast_task_status( + task_id: str, user: User = Depends(current_active_user), ): """ - Get a podcast by its associated chat ID. - Requires PODCASTS_READ permission for the search space. + Get the status of a podcast generation task. + Used by new-chat frontend to poll for completion. + + Returns: + - status: "processing" | "success" | "error" + - podcast_id: (only if status == "success") + - title: (only if status == "success") + - error: (only if status == "error") """ try: - # First get the chat to find its search space - chat_result = await session.execute(select(Chat).filter(Chat.id == chat_id)) - chat = chat_result.scalars().first() + result = AsyncResult(task_id, app=celery_app) - if not chat: - return None + if result.ready(): + # Task completed + if result.successful(): + task_result = result.result + if isinstance(task_result, dict): + if task_result.get("status") == "success": + return { + "status": "success", + "podcast_id": task_result.get("podcast_id"), + "title": task_result.get("title"), + "transcript_entries": task_result.get("transcript_entries"), + } + else: + return { + "status": "error", + "error": task_result.get("error", "Unknown error"), + } + else: + return { + "status": "error", + "error": "Unexpected task result format", + } + else: + # Task failed + return { + "status": "error", + "error": str(result.result) if result.result else "Task failed", + } + else: + # Task still processing + return { + "status": "processing", + "state": result.state, + } - # Check permission for the search space - await check_permission( - session, - user, - chat.search_space_id, - Permission.PODCASTS_READ.value, - "You don't have permission to read podcasts in this search space", - ) - - # Get the podcast - result = await session.execute( - select(Podcast).filter(Podcast.chat_id == chat_id) - ) - podcast = result.scalars().first() - - return podcast - except HTTPException as he: - raise he except Exception as e: raise HTTPException( - status_code=500, detail=f"Error fetching podcast: {e!s}" + status_code=500, detail=f"Error checking task status: {e!s}" ) from e diff --git a/surfsense_backend/app/routes/search_spaces_routes.py b/surfsense_backend/app/routes/search_spaces_routes.py index d04cf11ce..bc52a52b1 100644 --- a/surfsense_backend/app/routes/search_spaces_routes.py +++ b/surfsense_backend/app/routes/search_spaces_routes.py @@ -1,13 +1,13 @@ import logging -from pathlib import Path -import yaml from fastapi import APIRouter, Depends, HTTPException from sqlalchemy import func from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select +from app.config import config from app.db import ( + NewLLMConfig, Permission, SearchSpace, SearchSpaceMembership, @@ -17,6 +17,8 @@ from app.db import ( get_default_roles_config, ) from app.schemas import ( + LLMPreferencesRead, + LLMPreferencesUpdate, SearchSpaceCreate, SearchSpaceRead, SearchSpaceUpdate, @@ -184,37 +186,6 @@ async def read_search_spaces( ) from e -@router.get("/searchspaces/prompts/community") -async def get_community_prompts(): - """ - Get community-curated prompts for SearchSpace System Instructions. - This endpoint does not require authentication as it serves public prompts. - """ - try: - # Get the path to the prompts YAML file - prompts_file = ( - Path(__file__).parent.parent - / "prompts" - / "public_search_space_prompts.yaml" - ) - - if not prompts_file.exists(): - raise HTTPException( - status_code=404, detail="Community prompts file not found" - ) - - with open(prompts_file, encoding="utf-8") as f: - data = yaml.safe_load(f) - - return data.get("prompts", []) - except HTTPException: - raise - except Exception as e: - raise HTTPException( - status_code=500, detail=f"Failed to load community prompts: {e!s}" - ) from e - - @router.get("/searchspaces/{search_space_id}", response_model=SearchSpaceRead) async def read_search_space( search_space_id: int, @@ -329,3 +300,184 @@ async def delete_search_space( raise HTTPException( status_code=500, detail=f"Failed to delete search space: {e!s}" ) from e + + +# ============================================================================= +# LLM Preferences Routes +# ============================================================================= + + +async def _get_llm_config_by_id( + session: AsyncSession, config_id: int | None +) -> dict | None: + """ + Get an LLM config by ID as a dictionary. Returns database config for positive IDs, + global config for negative IDs, or None if ID is None. + """ + if config_id is None: + return None + + if config_id < 0: + # Global config - find from YAML + global_configs = config.GLOBAL_LLM_CONFIGS + for cfg in global_configs: + if cfg.get("id") == config_id: + return { + "id": cfg.get("id"), + "name": cfg.get("name"), + "description": cfg.get("description"), + "provider": cfg.get("provider"), + "custom_provider": cfg.get("custom_provider"), + "model_name": cfg.get("model_name"), + "api_base": cfg.get("api_base"), + "litellm_params": cfg.get("litellm_params", {}), + "system_instructions": cfg.get("system_instructions", ""), + "use_default_system_instructions": cfg.get( + "use_default_system_instructions", True + ), + "citations_enabled": cfg.get("citations_enabled", True), + "is_global": True, + } + return None + else: + # Database config - convert to dict + result = await session.execute( + select(NewLLMConfig).filter(NewLLMConfig.id == config_id) + ) + db_config = result.scalars().first() + if db_config: + return { + "id": db_config.id, + "name": db_config.name, + "description": db_config.description, + "provider": db_config.provider.value if db_config.provider else None, + "custom_provider": db_config.custom_provider, + "model_name": db_config.model_name, + "api_key": db_config.api_key, + "api_base": db_config.api_base, + "litellm_params": db_config.litellm_params or {}, + "system_instructions": db_config.system_instructions or "", + "use_default_system_instructions": db_config.use_default_system_instructions, + "citations_enabled": db_config.citations_enabled, + "created_at": db_config.created_at.isoformat() + if db_config.created_at + else None, + "search_space_id": db_config.search_space_id, + } + return None + + +@router.get( + "/search-spaces/{search_space_id}/llm-preferences", + response_model=LLMPreferencesRead, +) +async def get_llm_preferences( + search_space_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + Get LLM preferences (role assignments) for a search space. + Requires LLM_CONFIGS_READ permission. + """ + try: + # Check permission + await check_permission( + session, + user, + search_space_id, + Permission.LLM_CONFIGS_READ.value, + "You don't have permission to view LLM preferences", + ) + + result = await session.execute( + select(SearchSpace).filter(SearchSpace.id == search_space_id) + ) + search_space = result.scalars().first() + + if not search_space: + raise HTTPException(status_code=404, detail="Search space not found") + + # Get full config objects for each role + agent_llm = await _get_llm_config_by_id(session, search_space.agent_llm_id) + document_summary_llm = await _get_llm_config_by_id( + session, search_space.document_summary_llm_id + ) + + return LLMPreferencesRead( + agent_llm_id=search_space.agent_llm_id, + document_summary_llm_id=search_space.document_summary_llm_id, + agent_llm=agent_llm, + document_summary_llm=document_summary_llm, + ) + + except HTTPException: + raise + except Exception as e: + logger.exception("Failed to get LLM preferences") + raise HTTPException( + status_code=500, detail=f"Failed to get LLM preferences: {e!s}" + ) from e + + +@router.put( + "/search-spaces/{search_space_id}/llm-preferences", + response_model=LLMPreferencesRead, +) +async def update_llm_preferences( + search_space_id: int, + preferences: LLMPreferencesUpdate, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + Update LLM preferences (role assignments) for a search space. + Requires LLM_CONFIGS_UPDATE permission. + """ + try: + # Check permission + await check_permission( + session, + user, + search_space_id, + Permission.LLM_CONFIGS_UPDATE.value, + "You don't have permission to update LLM preferences", + ) + + result = await session.execute( + select(SearchSpace).filter(SearchSpace.id == search_space_id) + ) + search_space = result.scalars().first() + + if not search_space: + raise HTTPException(status_code=404, detail="Search space not found") + + # Update preferences + update_data = preferences.model_dump(exclude_unset=True) + for key, value in update_data.items(): + setattr(search_space, key, value) + + await session.commit() + await session.refresh(search_space) + + # Get full config objects for response + agent_llm = await _get_llm_config_by_id(session, search_space.agent_llm_id) + document_summary_llm = await _get_llm_config_by_id( + session, search_space.document_summary_llm_id + ) + + return LLMPreferencesRead( + agent_llm_id=search_space.agent_llm_id, + document_summary_llm_id=search_space.document_summary_llm_id, + agent_llm=agent_llm, + document_summary_llm=document_summary_llm, + ) + + except HTTPException: + raise + except Exception as e: + await session.rollback() + logger.exception("Failed to update LLM preferences") + raise HTTPException( + status_code=500, detail=f"Failed to update LLM preferences: {e!s}" + ) from e diff --git a/surfsense_backend/app/schemas/__init__.py b/surfsense_backend/app/schemas/__init__.py index a4308f6a2..f5ae65e9d 100644 --- a/surfsense_backend/app/schemas/__init__.py +++ b/surfsense_backend/app/schemas/__init__.py @@ -1,13 +1,4 @@ from .base import IDModel, TimestampModel -from .chats import ( - AISDKChatRequest, - ChatBase, - ChatCreate, - ChatRead, - ChatReadWithoutMessages, - ChatUpdate, - NewChatRequest, -) from .chunks import ChunkBase, ChunkCreate, ChunkRead, ChunkUpdate from .documents import ( DocumentBase, @@ -19,15 +10,32 @@ from .documents import ( ExtensionDocumentMetadata, PaginatedResponse, ) -from .llm_config import LLMConfigBase, LLMConfigCreate, LLMConfigRead, LLMConfigUpdate from .logs import LogBase, LogCreate, LogFilter, LogRead, LogUpdate -from .podcasts import ( - PodcastBase, - PodcastCreate, - PodcastGenerateRequest, - PodcastRead, - PodcastUpdate, +from .new_chat import ( + ChatMessage, + NewChatMessageAppend, + NewChatMessageCreate, + NewChatMessageRead, + NewChatRequest, + NewChatThreadCreate, + NewChatThreadRead, + NewChatThreadUpdate, + NewChatThreadWithMessages, + ThreadHistoryLoadResponse, + ThreadListItem, + ThreadListResponse, ) +from .new_llm_config import ( + DefaultSystemInstructionsResponse, + GlobalNewLLMConfigRead, + LLMPreferencesRead, + LLMPreferencesUpdate, + NewLLMConfigCreate, + NewLLMConfigPublic, + NewLLMConfigRead, + NewLLMConfigUpdate, +) +from .podcasts import PodcastBase, PodcastCreate, PodcastRead, PodcastUpdate from .rbac_schemas import ( InviteAcceptRequest, InviteAcceptResponse, @@ -61,16 +69,15 @@ from .search_space import ( from .users import UserCreate, UserRead, UserUpdate __all__ = [ - "AISDKChatRequest", - "ChatBase", - "ChatCreate", - "ChatRead", - "ChatReadWithoutMessages", - "ChatUpdate", + # Chat schemas (assistant-ui integration) + "ChatMessage", + # Chunk schemas "ChunkBase", "ChunkCreate", "ChunkRead", "ChunkUpdate", + "DefaultSystemInstructionsResponse", + # Document schemas "DocumentBase", "DocumentRead", "DocumentUpdate", @@ -78,6 +85,8 @@ __all__ = [ "DocumentsCreate", "ExtensionDocumentContent", "ExtensionDocumentMetadata", + "GlobalNewLLMConfigRead", + # Base schemas "IDModel", # RBAC schemas "InviteAcceptRequest", @@ -86,10 +95,10 @@ __all__ = [ "InviteInfoResponse", "InviteRead", "InviteUpdate", - "LLMConfigBase", - "LLMConfigCreate", - "LLMConfigRead", - "LLMConfigUpdate", + # LLM Preferences schemas + "LLMPreferencesRead", + "LLMPreferencesUpdate", + # Log schemas "LogBase", "LogCreate", "LogFilter", @@ -98,28 +107,46 @@ __all__ = [ "MembershipRead", "MembershipReadWithUser", "MembershipUpdate", + "NewChatMessageAppend", + "NewChatMessageCreate", + "NewChatMessageRead", "NewChatRequest", + "NewChatThreadCreate", + "NewChatThreadRead", + "NewChatThreadUpdate", + "NewChatThreadWithMessages", + # NewLLMConfig schemas + "NewLLMConfigCreate", + "NewLLMConfigPublic", + "NewLLMConfigRead", + "NewLLMConfigUpdate", "PaginatedResponse", "PermissionInfo", "PermissionsListResponse", + # Podcast schemas "PodcastBase", "PodcastCreate", - "PodcastGenerateRequest", "PodcastRead", "PodcastUpdate", "RoleCreate", "RoleRead", "RoleUpdate", + # Search source connector schemas "SearchSourceConnectorBase", "SearchSourceConnectorCreate", "SearchSourceConnectorRead", "SearchSourceConnectorUpdate", + # Search space schemas "SearchSpaceBase", "SearchSpaceCreate", "SearchSpaceRead", "SearchSpaceUpdate", "SearchSpaceWithStats", + "ThreadHistoryLoadResponse", + "ThreadListItem", + "ThreadListResponse", "TimestampModel", + # User schemas "UserCreate", "UserRead", "UserSearchSpaceAccess", diff --git a/surfsense_backend/app/schemas/chats.py b/surfsense_backend/app/schemas/chats.py deleted file mode 100644 index 6c281afbc..000000000 --- a/surfsense_backend/app/schemas/chats.py +++ /dev/null @@ -1,72 +0,0 @@ -from typing import Any - -from pydantic import BaseModel, ConfigDict - -from app.db import ChatType - -from .base import IDModel, TimestampModel - - -class ChatBase(BaseModel): - type: ChatType - title: str - initial_connectors: list[str] | None = None - messages: list[Any] - search_space_id: int - state_version: int = 1 - - -class ChatBaseWithoutMessages(BaseModel): - type: ChatType - title: str - search_space_id: int - state_version: int = 1 - - -class ClientAttachment(BaseModel): - name: str - content_type: str - url: str - - -class ToolInvocation(BaseModel): - tool_call_id: str - tool_name: str - args: dict - result: dict - - -# class ClientMessage(BaseModel): -# role: str -# content: str -# experimental_attachments: Optional[List[ClientAttachment]] = None -# toolInvocations: Optional[List[ToolInvocation]] = None - - -class AISDKChatRequest(BaseModel): - messages: list[Any] - data: dict[str, Any] | None = None - - -class NewChatRequest(BaseModel): - """Request schema for the new deep agent chat endpoint.""" - - chat_id: int - user_query: str - search_space_id: int - - -class ChatCreate(ChatBase): - pass - - -class ChatUpdate(ChatBase): - pass - - -class ChatRead(ChatBase, IDModel, TimestampModel): - model_config = ConfigDict(from_attributes=True) - - -class ChatReadWithoutMessages(ChatBaseWithoutMessages, IDModel, TimestampModel): - model_config = ConfigDict(from_attributes=True) diff --git a/surfsense_backend/app/schemas/llm_config.py b/surfsense_backend/app/schemas/llm_config.py deleted file mode 100644 index 27f3736b5..000000000 --- a/surfsense_backend/app/schemas/llm_config.py +++ /dev/null @@ -1,72 +0,0 @@ -from datetime import datetime -from typing import Any - -from pydantic import BaseModel, ConfigDict, Field - -from app.db import LiteLLMProvider - -from .base import IDModel, TimestampModel - - -class LLMConfigBase(BaseModel): - name: str = Field( - ..., max_length=100, description="User-friendly name for the LLM configuration" - ) - provider: LiteLLMProvider = Field(..., description="LiteLLM provider type") - custom_provider: str | None = Field( - None, max_length=100, description="Custom provider name when provider is CUSTOM" - ) - model_name: str = Field( - ..., max_length=100, description="Model name without provider prefix" - ) - api_key: str = Field(..., description="API key for the provider") - api_base: str | None = Field( - None, max_length=500, description="Optional API base URL" - ) - litellm_params: dict[str, Any] | None = Field( - default=None, description="Additional LiteLLM parameters" - ) - language: str | None = Field( - default="English", max_length=50, description="Language for the LLM" - ) - - -class LLMConfigCreate(LLMConfigBase): - search_space_id: int = Field( - ..., description="Search space ID to associate the LLM config with" - ) - - -class LLMConfigUpdate(BaseModel): - name: str | None = Field( - None, max_length=100, description="User-friendly name for the LLM configuration" - ) - provider: LiteLLMProvider | None = Field(None, description="LiteLLM provider type") - custom_provider: str | None = Field( - None, max_length=100, description="Custom provider name when provider is CUSTOM" - ) - model_name: str | None = Field( - None, max_length=100, description="Model name without provider prefix" - ) - api_key: str | None = Field(None, description="API key for the provider") - api_base: str | None = Field( - None, max_length=500, description="Optional API base URL" - ) - language: str | None = Field( - None, max_length=50, description="Language for the LLM" - ) - litellm_params: dict[str, Any] | None = Field( - None, description="Additional LiteLLM parameters" - ) - - -class LLMConfigRead(LLMConfigBase, IDModel, TimestampModel): - id: int - created_at: datetime | None = Field( - None, description="Creation timestamp (None for global configs)" - ) - search_space_id: int | None = Field( - None, description="Search space ID (None for global configs)" - ) - - model_config = ConfigDict(from_attributes=True) diff --git a/surfsense_backend/app/schemas/new_chat.py b/surfsense_backend/app/schemas/new_chat.py new file mode 100644 index 000000000..ffaf85554 --- /dev/null +++ b/surfsense_backend/app/schemas/new_chat.py @@ -0,0 +1,162 @@ +""" +Pydantic schemas for the new chat feature with assistant-ui integration. + +These schemas follow the assistant-ui ThreadHistoryAdapter pattern: +- ThreadRecord: id, title, archived, createdAt, updatedAt +- MessageRecord: id, threadId, role, content, createdAt +""" + +from datetime import datetime +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field + +from app.db import NewChatMessageRole + +from .base import IDModel, TimestampModel + +# ============================================================================= +# Message Schemas +# ============================================================================= + + +class NewChatMessageBase(BaseModel): + """Base schema for new chat messages.""" + + role: NewChatMessageRole + content: Any # JSONB content - can be text, tool calls, etc. + + +class NewChatMessageCreate(NewChatMessageBase): + """Schema for creating a new message.""" + + thread_id: int + + +class NewChatMessageRead(NewChatMessageBase, IDModel, TimestampModel): + """Schema for reading a message.""" + + thread_id: int + model_config = ConfigDict(from_attributes=True) + + +class NewChatMessageAppend(BaseModel): + """ + Schema for appending a message via the history adapter. + This is the format assistant-ui sends when calling append(). + """ + + role: str # Accept string and validate in route handler + content: Any + + +# ============================================================================= +# Thread Schemas +# ============================================================================= + + +class NewChatThreadBase(BaseModel): + """Base schema for new chat threads.""" + + title: str = Field(default="New Chat", max_length=500) + archived: bool = False + + +class NewChatThreadCreate(NewChatThreadBase): + """Schema for creating a new thread.""" + + search_space_id: int + + +class NewChatThreadUpdate(BaseModel): + """Schema for updating a thread.""" + + title: str | None = None + archived: bool | None = None + + +class NewChatThreadRead(NewChatThreadBase, IDModel): + """ + Schema for reading a thread (matches assistant-ui ThreadRecord). + """ + + search_space_id: int + created_at: datetime + updated_at: datetime + + model_config = ConfigDict(from_attributes=True) + + +class NewChatThreadWithMessages(NewChatThreadRead): + """Schema for reading a thread with its messages.""" + + messages: list[NewChatMessageRead] = [] + + +# ============================================================================= +# History Adapter Response Schemas +# ============================================================================= + + +class ThreadHistoryLoadResponse(BaseModel): + """ + Response format for the ThreadHistoryAdapter.load() method. + Returns messages array for the current thread. + """ + + messages: list[NewChatMessageRead] + + +class ThreadListItem(BaseModel): + """ + Thread list item for sidebar display. + Matches assistant-ui ThreadListPrimitive expected format. + """ + + id: int + title: str + archived: bool + created_at: datetime = Field(alias="createdAt") + updated_at: datetime = Field(alias="updatedAt") + + model_config = ConfigDict(from_attributes=True, populate_by_name=True) + + +class ThreadListResponse(BaseModel): + """Response containing list of threads for the sidebar.""" + + threads: list[ThreadListItem] + archived_threads: list[ThreadListItem] + + +# ============================================================================= +# Chat Request Schemas (for deep agent) +# ============================================================================= + + +class ChatMessage(BaseModel): + """A single message in the chat history.""" + + role: str # "user" or "assistant" + content: str + + +class ChatAttachment(BaseModel): + """An attachment with its extracted content for chat context.""" + + id: str # Unique attachment ID + name: str # Original filename + type: str # Attachment type: document, image, audio + content: str # Extracted markdown content from the file + + +class NewChatRequest(BaseModel): + """Request schema for the deep agent chat endpoint.""" + + chat_id: int + user_query: str + search_space_id: int + messages: list[ChatMessage] | None = None # Optional chat history from frontend + attachments: list[ChatAttachment] | None = ( + None # Optional attachments with extracted content + ) diff --git a/surfsense_backend/app/schemas/new_llm_config.py b/surfsense_backend/app/schemas/new_llm_config.py new file mode 100644 index 000000000..67979f176 --- /dev/null +++ b/surfsense_backend/app/schemas/new_llm_config.py @@ -0,0 +1,191 @@ +""" +Pydantic schemas for the NewLLMConfig API. + +NewLLMConfig combines LLM model settings with prompt configuration: +- LLM provider, model, API key, etc. +- Configurable system instructions +- Citation toggle +""" + +from datetime import datetime +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field + +from app.db import LiteLLMProvider + + +class NewLLMConfigBase(BaseModel): + """Base schema with common fields for NewLLMConfig.""" + + name: str = Field( + ..., max_length=100, description="User-friendly name for the configuration" + ) + description: str | None = Field( + None, max_length=500, description="Optional description" + ) + + # LLM Model Configuration + provider: LiteLLMProvider = Field(..., description="LiteLLM provider type") + custom_provider: str | None = Field( + None, max_length=100, description="Custom provider name when provider is CUSTOM" + ) + model_name: str = Field( + ..., max_length=100, description="Model name without provider prefix" + ) + api_key: str = Field(..., description="API key for the provider") + api_base: str | None = Field( + None, max_length=500, description="Optional API base URL" + ) + litellm_params: dict[str, Any] | None = Field( + default=None, description="Additional LiteLLM parameters" + ) + + # Prompt Configuration + system_instructions: str = Field( + default="", + description="Custom system instructions. Empty string uses default SURFSENSE_SYSTEM_INSTRUCTIONS.", + ) + use_default_system_instructions: bool = Field( + default=True, + description="Whether to use default instructions when system_instructions is empty", + ) + citations_enabled: bool = Field( + default=True, + description="Whether to include citation instructions in the system prompt", + ) + + +class NewLLMConfigCreate(NewLLMConfigBase): + """Schema for creating a new NewLLMConfig.""" + + search_space_id: int = Field( + ..., description="Search space ID to associate the config with" + ) + + +class NewLLMConfigUpdate(BaseModel): + """Schema for updating an existing NewLLMConfig. All fields are optional.""" + + name: str | None = Field(None, max_length=100) + description: str | None = Field(None, max_length=500) + + # LLM Model Configuration + provider: LiteLLMProvider | None = None + custom_provider: str | None = Field(None, max_length=100) + model_name: str | None = Field(None, max_length=100) + api_key: str | None = None + api_base: str | None = Field(None, max_length=500) + litellm_params: dict[str, Any] | None = None + + # Prompt Configuration + system_instructions: str | None = None + use_default_system_instructions: bool | None = None + citations_enabled: bool | None = None + + +class NewLLMConfigRead(NewLLMConfigBase): + """Schema for reading a NewLLMConfig (includes id and timestamps).""" + + id: int + created_at: datetime + search_space_id: int + + model_config = ConfigDict(from_attributes=True) + + +class NewLLMConfigPublic(BaseModel): + """ + Public schema for NewLLMConfig that hides the API key. + Used when returning configs in list views or to users who shouldn't see keys. + """ + + id: int + name: str + description: str | None = None + + # LLM Model Configuration (no api_key) + provider: LiteLLMProvider + custom_provider: str | None = None + model_name: str + api_base: str | None = None + litellm_params: dict[str, Any] | None = None + + # Prompt Configuration + system_instructions: str + use_default_system_instructions: bool + citations_enabled: bool + + created_at: datetime + search_space_id: int + + model_config = ConfigDict(from_attributes=True) + + +class DefaultSystemInstructionsResponse(BaseModel): + """Response schema for getting default system instructions.""" + + default_system_instructions: str = Field( + ..., description="The default SURFSENSE_SYSTEM_INSTRUCTIONS template" + ) + + +class GlobalNewLLMConfigRead(BaseModel): + """ + Schema for reading global LLM configs from YAML. + Global configs have negative IDs and no search_space_id. + API key is hidden for security. + """ + + id: int = Field(..., description="Negative ID for global configs") + name: str + description: str | None = None + + # LLM Model Configuration (no api_key) + provider: str # String because YAML doesn't enforce enum + custom_provider: str | None = None + model_name: str + api_base: str | None = None + litellm_params: dict[str, Any] | None = None + + # Prompt Configuration + system_instructions: str = "" + use_default_system_instructions: bool = True + citations_enabled: bool = True + + is_global: bool = True # Always true for global configs + + +# ============================================================================= +# LLM Preferences Schemas (for role assignments) +# ============================================================================= + + +class LLMPreferencesRead(BaseModel): + """Schema for reading LLM preferences (role assignments) for a search space.""" + + agent_llm_id: int | None = Field( + None, description="ID of the LLM config to use for agent/chat tasks" + ) + document_summary_llm_id: int | None = Field( + None, description="ID of the LLM config to use for document summarization" + ) + agent_llm: dict[str, Any] | None = Field( + None, description="Full config for agent LLM" + ) + document_summary_llm: dict[str, Any] | None = Field( + None, description="Full config for document summary LLM" + ) + + model_config = ConfigDict(from_attributes=True) + + +class LLMPreferencesUpdate(BaseModel): + """Schema for updating LLM preferences.""" + + agent_llm_id: int | None = Field( + None, description="ID of the LLM config to use for agent/chat tasks" + ) + document_summary_llm_id: int | None = Field( + None, description="ID of the LLM config to use for document summarization" + ) diff --git a/surfsense_backend/app/schemas/podcasts.py b/surfsense_backend/app/schemas/podcasts.py index b6a6a9a24..72c915d88 100644 --- a/surfsense_backend/app/schemas/podcasts.py +++ b/surfsense_backend/app/schemas/podcasts.py @@ -1,33 +1,39 @@ -from typing import Any, Literal +"""Podcast schemas for API responses.""" -from pydantic import BaseModel, ConfigDict +from datetime import datetime +from typing import Any -from .base import IDModel, TimestampModel +from pydantic import BaseModel class PodcastBase(BaseModel): + """Base podcast schema.""" + title: str - podcast_transcript: list[Any] - file_location: str = "" + podcast_transcript: list[dict[str, Any]] | None = None + file_location: str | None = None search_space_id: int - chat_state_version: int | None = None class PodcastCreate(PodcastBase): + """Schema for creating a podcast.""" + pass -class PodcastUpdate(PodcastBase): - pass +class PodcastUpdate(BaseModel): + """Schema for updating a podcast.""" + + title: str | None = None + podcast_transcript: list[dict[str, Any]] | None = None + file_location: str | None = None -class PodcastRead(PodcastBase, IDModel, TimestampModel): - model_config = ConfigDict(from_attributes=True) +class PodcastRead(PodcastBase): + """Schema for reading a podcast.""" + id: int + created_at: datetime -class PodcastGenerateRequest(BaseModel): - type: Literal["DOCUMENT", "CHAT"] - ids: list[int] - search_space_id: int - podcast_title: str | None = None - user_prompt: str | None = None + class Config: + from_attributes = True diff --git a/surfsense_backend/app/services/llm_service.py b/surfsense_backend/app/services/llm_service.py index c3270b59e..68dd167b5 100644 --- a/surfsense_backend/app/services/llm_service.py +++ b/surfsense_backend/app/services/llm_service.py @@ -7,7 +7,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select from app.config import config -from app.db import LLMConfig, SearchSpace +from app.db import NewLLMConfig, SearchSpace # Configure litellm to automatically drop unsupported parameters litellm.drop_params = True @@ -16,9 +16,8 @@ logger = logging.getLogger(__name__) class LLMRole: - LONG_CONTEXT = "long_context" - FAST = "fast" - STRATEGIC = "strategic" + AGENT = "agent" # For agent/chat operations + DOCUMENT_SUMMARY = "document_summary" # For document summarization def get_global_llm_config(llm_config_id: int) -> dict | None: @@ -155,7 +154,7 @@ async def get_search_space_llm_instance( Args: session: Database session search_space_id: Search Space ID - role: LLM role ('long_context', 'fast', or 'strategic') + role: LLM role ('agent' or 'document_summary') Returns: ChatLiteLLM instance or None if not found @@ -173,12 +172,10 @@ async def get_search_space_llm_instance( # Get the appropriate LLM config ID based on role llm_config_id = None - if role == LLMRole.LONG_CONTEXT: - llm_config_id = search_space.long_context_llm_id - elif role == LLMRole.FAST: - llm_config_id = search_space.fast_llm_id - elif role == LLMRole.STRATEGIC: - llm_config_id = search_space.strategic_llm_id + if role == LLMRole.AGENT: + llm_config_id = search_space.agent_llm_id + elif role == LLMRole.DOCUMENT_SUMMARY: + llm_config_id = search_space.document_summary_llm_id else: logger.error(f"Invalid LLM role: {role}") return None @@ -250,11 +247,11 @@ async def get_search_space_llm_instance( return ChatLiteLLM(**litellm_kwargs) - # Get the LLM configuration from database (user-specific config) + # Get the LLM configuration from database (NewLLMConfig) result = await session.execute( - select(LLMConfig).where( - LLMConfig.id == llm_config_id, - LLMConfig.search_space_id == search_space_id, + select(NewLLMConfig).where( + NewLLMConfig.id == llm_config_id, + NewLLMConfig.search_space_id == search_space_id, ) ) llm_config = result.scalars().first() @@ -265,11 +262,11 @@ async def get_search_space_llm_instance( ) return None - # Build the model string for litellm / 构建 LiteLLM 的模型字符串 + # Build the model string for litellm if llm_config.custom_provider: model_string = f"{llm_config.custom_provider}/{llm_config.model_name}" else: - # Map provider enum to litellm format / 将提供商枚举映射为 LiteLLM 格式 + # Map provider enum to litellm format provider_map = { "OPENAI": "openai", "ANTHROPIC": "anthropic", @@ -283,7 +280,7 @@ async def get_search_space_llm_instance( "COMETAPI": "cometapi", "XAI": "xai", "BEDROCK": "bedrock", - "AWS_BEDROCK": "bedrock", # Legacy support (backward compatibility) + "AWS_BEDROCK": "bedrock", "VERTEX_AI": "vertex_ai", "TOGETHER_AI": "together_ai", "FIREWORKS_AI": "fireworks_ai", @@ -296,7 +293,6 @@ async def get_search_space_llm_instance( "AI21": "ai21", "CLOUDFLARE": "cloudflare", "DATABRICKS": "databricks", - # Chinese LLM providers "DEEPSEEK": "openai", "ALIBABA_QWEN": "openai", "MOONSHOT": "openai", @@ -330,28 +326,19 @@ async def get_search_space_llm_instance( return None -async def get_long_context_llm( +async def get_agent_llm( session: AsyncSession, search_space_id: int ) -> ChatLiteLLM | None: - """Get the search space's long context LLM instance.""" + """Get the search space's agent LLM instance for chat operations.""" + return await get_search_space_llm_instance(session, search_space_id, LLMRole.AGENT) + + +async def get_document_summary_llm( + session: AsyncSession, search_space_id: int +) -> ChatLiteLLM | None: + """Get the search space's document summary LLM instance.""" return await get_search_space_llm_instance( - session, search_space_id, LLMRole.LONG_CONTEXT - ) - - -async def get_fast_llm( - session: AsyncSession, search_space_id: int -) -> ChatLiteLLM | None: - """Get the search space's fast LLM instance.""" - return await get_search_space_llm_instance(session, search_space_id, LLMRole.FAST) - - -async def get_strategic_llm( - session: AsyncSession, search_space_id: int -) -> ChatLiteLLM | None: - """Get the search space's strategic LLM instance.""" - return await get_search_space_llm_instance( - session, search_space_id, LLMRole.STRATEGIC + session, search_space_id, LLMRole.DOCUMENT_SUMMARY ) @@ -366,22 +353,54 @@ async def get_user_llm_instance( return await get_search_space_llm_instance(session, search_space_id, role) +# Legacy aliases for backward compatibility +async def get_long_context_llm( + session: AsyncSession, search_space_id: int +) -> ChatLiteLLM | None: + """Deprecated: Use get_document_summary_llm instead.""" + return await get_document_summary_llm(session, search_space_id) + + +async def get_fast_llm( + session: AsyncSession, search_space_id: int +) -> ChatLiteLLM | None: + """Deprecated: Use get_agent_llm instead.""" + return await get_agent_llm(session, search_space_id) + + +async def get_strategic_llm( + session: AsyncSession, search_space_id: int +) -> ChatLiteLLM | None: + """Deprecated: Use get_document_summary_llm instead.""" + return await get_document_summary_llm(session, search_space_id) + + +# User-based legacy aliases (LLM preferences are now per-search-space, not per-user) async def get_user_long_context_llm( session: AsyncSession, user_id: str, search_space_id: int ) -> ChatLiteLLM | None: - """Deprecated: Use get_long_context_llm instead.""" - return await get_long_context_llm(session, search_space_id) + """ + Deprecated: Use get_document_summary_llm instead. + The user_id parameter is ignored as LLM preferences are now per-search-space. + """ + return await get_document_summary_llm(session, search_space_id) async def get_user_fast_llm( session: AsyncSession, user_id: str, search_space_id: int ) -> ChatLiteLLM | None: - """Deprecated: Use get_fast_llm instead.""" - return await get_fast_llm(session, search_space_id) + """ + Deprecated: Use get_agent_llm instead. + The user_id parameter is ignored as LLM preferences are now per-search-space. + """ + return await get_agent_llm(session, search_space_id) async def get_user_strategic_llm( session: AsyncSession, user_id: str, search_space_id: int ) -> ChatLiteLLM | None: - """Deprecated: Use get_strategic_llm instead.""" - return await get_strategic_llm(session, search_space_id) + """ + Deprecated: Use get_document_summary_llm instead. + The user_id parameter is ignored as LLM preferences are now per-search-space. + """ + return await get_document_summary_llm(session, search_space_id) diff --git a/surfsense_backend/app/services/new_streaming_service.py b/surfsense_backend/app/services/new_streaming_service.py index f0f05cdb6..05dd2d4dd 100644 --- a/surfsense_backend/app/services/new_streaming_service.py +++ b/surfsense_backend/app/services/new_streaming_service.py @@ -450,6 +450,35 @@ class VercelStreamingService: """ return self.format_data("further-questions", {"questions": questions}) + def format_thinking_step( + self, + step_id: str, + title: str, + status: str = "in_progress", + items: list[str] | None = None, + ) -> str: + """ + Format a thinking step for chain-of-thought display (SurfSense specific). + + Args: + step_id: Unique identifier for the step + title: The step title (e.g., "Analyzing your request") + status: Step status - "pending", "in_progress", or "completed" + items: Optional list of sub-items/details for this step + + Returns: + str: SSE formatted thinking step data part + """ + return self.format_data( + "thinking-step", + { + "id": step_id, + "title": title, + "status": status, + "items": items or [], + }, + ) + # ========================================================================= # Error Part # ========================================================================= diff --git a/surfsense_backend/app/services/query_service.py b/surfsense_backend/app/services/query_service.py index 84485c37d..863ff58a4 100644 --- a/surfsense_backend/app/services/query_service.py +++ b/surfsense_backend/app/services/query_service.py @@ -4,7 +4,7 @@ from typing import Any from langchain_core.messages import AIMessage, HumanMessage, SystemMessage from sqlalchemy.ext.asyncio import AsyncSession -from app.services.llm_service import get_strategic_llm +from app.services.llm_service import get_document_summary_llm class QueryService: @@ -20,7 +20,7 @@ class QueryService: chat_history_str: str | None = None, ) -> str: """ - Reformulate the user query using the search space's strategic LLM to make it more + Reformulate the user query using the search space's document summary LLM to make it more effective for information retrieval and research purposes. Args: @@ -36,11 +36,11 @@ class QueryService: return user_query try: - # Get the search space's strategic LLM instance - llm = await get_strategic_llm(session, search_space_id) + # Get the search space's document summary LLM instance + llm = await get_document_summary_llm(session, search_space_id) if not llm: print( - f"Warning: No strategic LLM configured for search space {search_space_id}. Using original query." + f"Warning: No document summary LLM configured for search space {search_space_id}. Using original query." ) return user_query diff --git a/surfsense_backend/app/services/streaming_service.py b/surfsense_backend/app/services/streaming_service.py deleted file mode 100644 index 98c0d3ac5..000000000 --- a/surfsense_backend/app/services/streaming_service.py +++ /dev/null @@ -1,191 +0,0 @@ -import json -from typing import Any - - -class StreamingService: - def __init__(self): - self.terminal_idx = 1 - self.message_annotations = [ - {"type": "TERMINAL_INFO", "content": []}, - {"type": "SOURCES", "content": []}, - {"type": "ANSWER", "content": []}, - {"type": "FURTHER_QUESTIONS", "content": []}, - ] - - # DEPRECATED: This sends the full annotation array every time (inefficient) - def _format_annotations(self) -> str: - """ - Format the annotations as a string - - DEPRECATED: This method sends the full annotation state every time. - Use the delta formatters instead for optimal streaming. - - Returns: - str: The formatted annotations string - """ - return f"8:{json.dumps(self.message_annotations)}\n" - - def format_terminal_info_delta(self, text: str, message_type: str = "info") -> str: - """ - Format a single terminal info message as a delta annotation - - Args: - text: The terminal message text - message_type: The message type (info, error, success, etc.) - - Returns: - str: The formatted annotation delta string - """ - message = {"id": self.terminal_idx, "text": text, "type": message_type} - self.terminal_idx += 1 - - # Update internal state for reference - self.message_annotations[0]["content"].append(message) - - # Return only the delta annotation - annotation = {"type": "TERMINAL_INFO", "data": message} - return f"8:[{json.dumps(annotation)}]\n" - - def format_sources_delta(self, sources: list[dict[str, Any]]) -> str: - """ - Format sources as a delta annotation - - Args: - sources: List of source objects - - Returns: - str: The formatted annotation delta string - """ - # Update internal state - self.message_annotations[1]["content"] = sources - - # Return only the delta annotation - nodes = [] - - for group in sources: - for source in group.get("sources", []): - node = { - "id": str(source.get("id", "")), - "text": source.get("description", "").strip(), - "url": source.get("url", ""), - "metadata": { - "title": source.get("title", ""), - "source_type": group.get("type", ""), - "group_name": group.get("name", ""), - }, - } - nodes.append(node) - - annotation = {"type": "sources", "data": {"nodes": nodes}} - return f"8:[{json.dumps(annotation)}]\n" - - def format_answer_delta(self, answer_chunk: str) -> str: - """ - Format a single answer chunk as a delta annotation - - Args: - answer_chunk: The new answer chunk to add - - Returns: - str: The formatted annotation delta string - """ - # Update internal state by appending the chunk - if isinstance(self.message_annotations[2]["content"], list): - self.message_annotations[2]["content"].append(answer_chunk) - else: - self.message_annotations[2]["content"] = [answer_chunk] - - # Return only the delta annotation with the new chunk - annotation = {"type": "ANSWER", "content": [answer_chunk]} - return f"8:[{json.dumps(annotation)}]\n" - - def format_answer_annotation(self, answer_lines: list[str]) -> str: - """ - Format the complete answer as a replacement annotation - - Args: - answer_lines: Complete list of answer lines - - Returns: - str: The formatted annotation string - """ - # Update internal state - self.message_annotations[2]["content"] = answer_lines - - # Return the full answer annotation - annotation = {"type": "ANSWER", "content": answer_lines} - return f"8:[{json.dumps(annotation)}]\n" - - def format_further_questions_delta( - self, further_questions: list[dict[str, Any]] - ) -> str: - """ - Format further questions as a delta annotation - - Args: - further_questions: List of further question objects - - Returns: - str: The formatted annotation delta string - """ - # Update internal state - self.message_annotations[3]["content"] = further_questions - - # Return only the delta annotation - annotation = { - "type": "FURTHER_QUESTIONS", - "data": [ - question.get("question", "") - for question in further_questions - if question.get("question", "") != "" - ], - } - return f"8:[{json.dumps(annotation)}]\n" - - def format_text_chunk(self, text: str) -> str: - """ - Format a text chunk using the text stream part - - Args: - text: The text chunk to stream - - Returns: - str: The formatted text part string - """ - return f"0:{json.dumps(text)}\n" - - def format_error(self, error_message: str) -> str: - """ - Format an error using the error stream part - - Args: - error_message: The error message - - Returns: - str: The formatted error part string - """ - return f"3:{json.dumps(error_message)}\n" - - def format_completion( - self, prompt_tokens: int = 156, completion_tokens: int = 204 - ) -> str: - """ - Format a completion message - - Args: - prompt_tokens: Number of prompt tokens - completion_tokens: Number of completion tokens - - Returns: - str: The formatted completion string - """ - total_tokens = prompt_tokens + completion_tokens - completion_data = { - "finishReason": "stop", - "usage": { - "promptTokens": prompt_tokens, - "completionTokens": completion_tokens, - "totalTokens": total_tokens, - }, - } - return f"d:{json.dumps(completion_data)}\n" diff --git a/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py b/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py index 65cdb886b..34b9b827c 100644 --- a/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py +++ b/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py @@ -7,9 +7,12 @@ import sys from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine from sqlalchemy.pool import NullPool +# Import for content-based podcast (new-chat) +from app.agents.podcaster.graph import graph as podcaster_graph +from app.agents.podcaster.state import State as PodcasterState from app.celery_app import celery_app from app.config import config -from app.tasks.podcast_tasks import generate_chat_podcast +from app.db import Podcast logger = logging.getLogger(__name__) @@ -36,53 +39,140 @@ def get_celery_session_maker(): return async_sessionmaker(engine, expire_on_commit=False) -@celery_app.task(name="generate_chat_podcast", bind=True) -def generate_chat_podcast_task( +# ============================================================================= +# Content-based podcast generation (for new-chat) +# ============================================================================= + + +def _clear_active_podcast_redis_key(search_space_id: int) -> None: + """Clear the active podcast task key from Redis when task completes.""" + import os + + import redis + + try: + redis_url = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0") + client = redis.from_url(redis_url, decode_responses=True) + key = f"podcast:active:{search_space_id}" + client.delete(key) + logger.info(f"Cleared active podcast key for search_space_id={search_space_id}") + except Exception as e: + logger.warning(f"Could not clear active podcast key: {e}") + + +@celery_app.task(name="generate_content_podcast", bind=True) +def generate_content_podcast_task( self, - chat_id: int, + source_content: str, search_space_id: int, - user_id: int, - podcast_title: str | None = None, + podcast_title: str = "SurfSense Podcast", user_prompt: str | None = None, -): +) -> dict: """ - Celery task to generate podcast from chat. + Celery task to generate podcast from source content (for new-chat). + + This task generates a podcast directly from provided content. Args: - chat_id: ID of the chat to generate podcast from + source_content: The text content to convert into a podcast search_space_id: ID of the search space - user_id: ID of the user, podcast_title: Title for the podcast - user_prompt: Optional prompt from the user to guide the podcast generation + user_prompt: Optional instructions for podcast style/tone + + Returns: + dict with podcast_id on success, or error info on failure """ loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: - loop.run_until_complete( - _generate_chat_podcast( - chat_id, search_space_id, user_id, podcast_title, user_prompt + result = loop.run_until_complete( + _generate_content_podcast( + source_content, + search_space_id, + podcast_title, + user_prompt, ) ) loop.run_until_complete(loop.shutdown_asyncgens()) + return result + except Exception as e: + logger.error(f"Error generating content podcast: {e!s}") + return {"status": "error", "error": str(e)} finally: + # Always clear the active podcast key when task completes (success or failure) + _clear_active_podcast_redis_key(search_space_id) asyncio.set_event_loop(None) loop.close() -async def _generate_chat_podcast( - chat_id: int, +async def _generate_content_podcast( + source_content: str, search_space_id: int, - user_id: int, - podcast_title: str | None = None, + podcast_title: str = "SurfSense Podcast", user_prompt: str | None = None, -): - """Generate chat podcast with new session.""" +) -> dict: + """Generate content-based podcast with new session.""" async with get_celery_session_maker()() as session: try: - await generate_chat_podcast( - session, chat_id, search_space_id, user_id, podcast_title, user_prompt + # Configure the podcaster graph + graph_config = { + "configurable": { + "podcast_title": podcast_title, + "search_space_id": search_space_id, + "user_prompt": user_prompt, + } + } + + # Initialize the podcaster state with the source content + initial_state = PodcasterState( + source_content=source_content, + db_session=session, ) + + # Run the podcaster graph + result = await podcaster_graph.ainvoke(initial_state, config=graph_config) + + # Extract results + podcast_transcript = result.get("podcast_transcript", []) + file_path = result.get("final_podcast_file_path", "") + + # Convert transcript to serializable format + serializable_transcript = [] + for entry in podcast_transcript: + if hasattr(entry, "speaker_id"): + serializable_transcript.append( + {"speaker_id": entry.speaker_id, "dialog": entry.dialog} + ) + else: + serializable_transcript.append( + { + "speaker_id": entry.get("speaker_id", 0), + "dialog": entry.get("dialog", ""), + } + ) + + # Save podcast to database + podcast = Podcast( + title=podcast_title, + podcast_transcript=serializable_transcript, + file_location=file_path, + search_space_id=search_space_id, + ) + session.add(podcast) + await session.commit() + await session.refresh(podcast) + + logger.info(f"Successfully generated content podcast: {podcast.id}") + + return { + "status": "success", + "podcast_id": podcast.id, + "title": podcast_title, + "transcript_entries": len(serializable_transcript), + } + except Exception as e: - logger.error(f"Error generating podcast from chat: {e!s}") + logger.error(f"Error in _generate_content_podcast: {e!s}") + await session.rollback() raise diff --git a/surfsense_backend/app/tasks/chat/stream_connector_search_results.py b/surfsense_backend/app/tasks/chat/stream_connector_search_results.py deleted file mode 100644 index a4b9b6665..000000000 --- a/surfsense_backend/app/tasks/chat/stream_connector_search_results.py +++ /dev/null @@ -1,75 +0,0 @@ -from collections.abc import AsyncGenerator -from typing import Any -from uuid import UUID - -from sqlalchemy.ext.asyncio import AsyncSession - -from app.agents.researcher.graph import graph as researcher_graph -from app.agents.researcher.state import State -from app.services.streaming_service import StreamingService - - -async def stream_connector_search_results( - user_query: str, - user_id: str | UUID, - search_space_id: int, - session: AsyncSession, - research_mode: str, - selected_connectors: list[str], - langchain_chat_history: list[Any], - document_ids_to_add_in_context: list[int], - language: str | None = None, - top_k: int = 10, -) -> AsyncGenerator[str, None]: - """ - Stream connector search results to the client - - Args: - user_query: The user's query - user_id: The user's ID (can be UUID object or string) - search_space_id: The search space ID - session: The database session - research_mode: The research mode - selected_connectors: List of selected connectors - - Yields: - str: Formatted response strings - """ - streaming_service = StreamingService() - - # Convert UUID to string if needed - user_id_str = str(user_id) if isinstance(user_id, UUID) else user_id - - # Sample configuration - config = { - "configurable": { - "user_query": user_query, - "connectors_to_search": selected_connectors, - "user_id": user_id_str, - "search_space_id": search_space_id, - "document_ids_to_add_in_context": document_ids_to_add_in_context, - "language": language, # Add language to the configuration - "top_k": top_k, # Add top_k to the configuration - } - } - # print(f"Researcher configuration: {config['configurable']}") # Debug print - # Initialize state with database session and streaming service - initial_state = State( - db_session=session, - streaming_service=streaming_service, - chat_history=langchain_chat_history, - ) - - # Run the graph directly - print("\nRunning the complete researcher workflow...") - - # Use streaming with config parameter - async for chunk in researcher_graph.astream( - initial_state, - config=config, - stream_mode="custom", - ): - if isinstance(chunk, dict) and "yield_value" in chunk: - yield chunk["yield_value"] - - yield streaming_service.format_completion() diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index 04f3f97c3..7a3d4b20d 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -3,69 +3,115 @@ Streaming task for the new SurfSense deep agent chat. This module streams responses from the deep agent using the Vercel AI SDK Data Stream Protocol (SSE format). + +Supports loading LLM configurations from: +- YAML files (negative IDs for global configs) +- NewLLMConfig database table (positive IDs for user-created configs with prompt settings) """ +import json from collections.abc import AsyncGenerator -from uuid import UUID from langchain_core.messages import HumanMessage from sqlalchemy.ext.asyncio import AsyncSession -from app.agents.new_chat.chat_deepagent import ( +from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent +from app.agents.new_chat.checkpointer import get_checkpointer +from app.agents.new_chat.llm_config import ( + AgentConfig, + create_chat_litellm_from_agent_config, create_chat_litellm_from_config, - create_surfsense_deep_agent, + load_agent_config, load_llm_config_from_yaml, ) +from app.schemas.new_chat import ChatAttachment from app.services.connector_service import ConnectorService from app.services.new_streaming_service import VercelStreamingService +def format_attachments_as_context(attachments: list[ChatAttachment]) -> str: + """Format attachments as context for the agent.""" + if not attachments: + return "" + + context_parts = [""] + for i, attachment in enumerate(attachments, 1): + context_parts.append( + f"" + ) + context_parts.append(f"") + context_parts.append("") + context_parts.append("") + + return "\n".join(context_parts) + + async def stream_new_chat( user_query: str, - user_id: str | UUID, search_space_id: int, chat_id: int, session: AsyncSession, llm_config_id: int = -1, + attachments: list[ChatAttachment] | None = None, ) -> AsyncGenerator[str, None]: """ Stream chat responses from the new SurfSense deep agent. This uses the Vercel AI SDK Data Stream Protocol (SSE format) for streaming. - The chat_id is used as LangGraph's thread_id for memory/checkpointing, - so chat history is automatically managed by LangGraph. + The chat_id is used as LangGraph's thread_id for memory/checkpointing. + Message history can be passed from the frontend for context. Args: user_query: The user's query - user_id: The user's ID (can be UUID object or string) search_space_id: The search space ID chat_id: The chat ID (used as LangGraph thread_id for memory) session: The database session llm_config_id: The LLM configuration ID (default: -1 for first global config) + messages: Optional chat history from frontend (list of ChatMessage) Yields: str: SSE formatted response strings """ streaming_service = VercelStreamingService() - # Convert UUID to string if needed - str(user_id) if isinstance(user_id, UUID) else user_id - # Track the current text block for streaming (defined early for exception handling) current_text_id: str | None = None try: - # Load LLM config - llm_config = load_llm_config_from_yaml(llm_config_id=llm_config_id) - if not llm_config: - yield streaming_service.format_error( - f"Failed to load LLM config with id {llm_config_id}" - ) - yield streaming_service.format_done() - return + # Load LLM config - supports both YAML (negative IDs) and database (positive IDs) + agent_config: AgentConfig | None = None + + if llm_config_id >= 0: + # Positive ID: Load from NewLLMConfig database table + agent_config = await load_agent_config( + session=session, + config_id=llm_config_id, + search_space_id=search_space_id, + ) + if not agent_config: + yield streaming_service.format_error( + f"Failed to load NewLLMConfig with id {llm_config_id}" + ) + yield streaming_service.format_done() + return + + # Create ChatLiteLLM from AgentConfig + llm = create_chat_litellm_from_agent_config(agent_config) + else: + # Negative ID: Load from YAML (global configs) + llm_config = load_llm_config_from_yaml(llm_config_id=llm_config_id) + if not llm_config: + yield streaming_service.format_error( + f"Failed to load LLM config with id {llm_config_id}" + ) + yield streaming_service.format_done() + return + + # Create ChatLiteLLM from YAML config dict + llm = create_chat_litellm_from_config(llm_config) + # Create AgentConfig from YAML for consistency (uses defaults for prompt settings) + agent_config = AgentConfig.from_yaml_config(llm_config) - # Create ChatLiteLLM instance - llm = create_chat_litellm_from_config(llm_config) if not llm: yield streaming_service.format_error("Failed to create LLM instance") yield streaming_service.format_done() @@ -74,18 +120,45 @@ async def stream_new_chat( # Create connector service connector_service = ConnectorService(session, search_space_id=search_space_id) - # Create the deep agent + # Get the PostgreSQL checkpointer for persistent conversation memory + checkpointer = await get_checkpointer() + + # Create the deep agent with checkpointer and configurable prompts agent = create_surfsense_deep_agent( llm=llm, search_space_id=search_space_id, db_session=session, connector_service=connector_service, + checkpointer=checkpointer, + agent_config=agent_config, # Pass prompt configuration ) - # Build input with just the current user query - # Chat history is managed by LangGraph via thread_id + # Build input with message history from frontend + langchain_messages = [] + + # Format the user query with attachment context if any + final_query = user_query + if attachments: + attachment_context = format_attachments_as_context(attachments) + final_query = ( + f"{attachment_context}\n\n{user_query}" + ) + + # if messages: + # # Convert frontend messages to LangChain format + # for msg in messages: + # if msg.role == "user": + # langchain_messages.append(HumanMessage(content=msg.content)) + # elif msg.role == "assistant": + # langchain_messages.append(AIMessage(content=msg.content)) + # else: + # Fallback: just use the current user query with attachment context + langchain_messages.append(HumanMessage(content=final_query)) + input_state = { - "messages": [HumanMessage(content=user_query)], + # Lets not pass this message atm because we are using the checkpointer to manage the conversation history + # We will use this to simulate group chat functionality in the future + "messages": langchain_messages, "search_space_id": search_space_id, } @@ -103,6 +176,51 @@ async def stream_new_chat( # Reset text tracking for this stream accumulated_text = "" + # Track thinking steps for chain-of-thought display + thinking_step_counter = 0 + # Map run_id -> step_id for tool calls so we can update them on completion + tool_step_ids: dict[str, str] = {} + # Track the last active step so we can mark it complete at the end + last_active_step_id: str | None = None + last_active_step_title: str = "" + last_active_step_items: list[str] = [] + # Track which steps have been completed to avoid duplicate completions + completed_step_ids: set[str] = set() + # Track if we just finished a tool (text flows silently after tools) + just_finished_tool: bool = False + + def next_thinking_step_id() -> str: + nonlocal thinking_step_counter + thinking_step_counter += 1 + return f"thinking-{thinking_step_counter}" + + def complete_current_step() -> str | None: + """Complete the current active step and return the completion event, if any.""" + nonlocal last_active_step_id, last_active_step_title, last_active_step_items + if last_active_step_id and last_active_step_id not in completed_step_ids: + completed_step_ids.add(last_active_step_id) + return streaming_service.format_thinking_step( + step_id=last_active_step_id, + title=last_active_step_title, + status="completed", + items=last_active_step_items if last_active_step_items else None, + ) + return None + + # Initial thinking step - analyzing the request + analyze_step_id = next_thinking_step_id() + last_active_step_id = analyze_step_id + last_active_step_title = "Understanding your request" + last_active_step_items = [ + f"Processing: {user_query[:80]}{'...' if len(user_query) > 80 else ''}" + ] + yield streaming_service.format_thinking_step( + step_id=analyze_step_id, + title="Understanding your request", + status="in_progress", + items=last_active_step_items, + ) + # Stream the agent response with thread config for memory async for event in agent.astream_events( input_state, config=config, version="v2" @@ -117,6 +235,18 @@ async def stream_new_chat( if content and isinstance(content, str): # Start a new text block if needed if current_text_id is None: + # Complete any previous step + completion_event = complete_current_step() + if completion_event: + yield completion_event + + if just_finished_tool: + # Clear the active step tracking - text flows without a dedicated step + last_active_step_id = None + last_active_step_title = "" + last_active_step_items = [] + just_finished_tool = False + current_text_id = streaming_service.generate_text_id() yield streaming_service.format_text_start(current_text_id) @@ -137,6 +267,122 @@ async def stream_new_chat( yield streaming_service.format_text_end(current_text_id) current_text_id = None + # Complete any previous step EXCEPT "Synthesizing response" + # (we want to reuse the Synthesizing step after tools complete) + if last_active_step_title != "Synthesizing response": + completion_event = complete_current_step() + if completion_event: + yield completion_event + + # Reset the just_finished_tool flag since we're starting a new tool + just_finished_tool = False + + # Create thinking step for the tool call and store it for later update + tool_step_id = next_thinking_step_id() + tool_step_ids[run_id] = tool_step_id + last_active_step_id = tool_step_id + if tool_name == "search_knowledge_base": + query = ( + tool_input.get("query", "") + if isinstance(tool_input, dict) + else str(tool_input) + ) + last_active_step_title = "Searching knowledge base" + last_active_step_items = [ + f"Query: {query[:100]}{'...' if len(query) > 100 else ''}" + ] + yield streaming_service.format_thinking_step( + step_id=tool_step_id, + title="Searching knowledge base", + status="in_progress", + items=last_active_step_items, + ) + elif tool_name == "link_preview": + url = ( + tool_input.get("url", "") + if isinstance(tool_input, dict) + else str(tool_input) + ) + last_active_step_title = "Fetching link preview" + last_active_step_items = [ + f"URL: {url[:80]}{'...' if len(url) > 80 else ''}" + ] + yield streaming_service.format_thinking_step( + step_id=tool_step_id, + title="Fetching link preview", + status="in_progress", + items=last_active_step_items, + ) + elif tool_name == "display_image": + src = ( + tool_input.get("src", "") + if isinstance(tool_input, dict) + else str(tool_input) + ) + title = ( + tool_input.get("title", "") + if isinstance(tool_input, dict) + else "" + ) + last_active_step_title = "Displaying image" + last_active_step_items = [ + f"Image: {title[:50] if title else src[:50]}{'...' if len(title or src) > 50 else ''}" + ] + yield streaming_service.format_thinking_step( + step_id=tool_step_id, + title="Displaying image", + status="in_progress", + items=last_active_step_items, + ) + elif tool_name == "scrape_webpage": + url = ( + tool_input.get("url", "") + if isinstance(tool_input, dict) + else str(tool_input) + ) + last_active_step_title = "Scraping webpage" + last_active_step_items = [ + f"URL: {url[:80]}{'...' if len(url) > 80 else ''}" + ] + yield streaming_service.format_thinking_step( + step_id=tool_step_id, + title="Scraping webpage", + status="in_progress", + items=last_active_step_items, + ) + elif tool_name == "generate_podcast": + podcast_title = ( + tool_input.get("podcast_title", "SurfSense Podcast") + if isinstance(tool_input, dict) + else "SurfSense Podcast" + ) + # Get content length for context + content_len = len( + tool_input.get("source_content", "") + if isinstance(tool_input, dict) + else "" + ) + last_active_step_title = "Generating podcast" + last_active_step_items = [ + f"Title: {podcast_title}", + f"Content: {content_len:,} characters", + "Preparing audio generation...", + ] + yield streaming_service.format_thinking_step( + step_id=tool_step_id, + title="Generating podcast", + status="in_progress", + items=last_active_step_items, + ) + else: + last_active_step_title = f"Using {tool_name.replace('_', ' ')}" + last_active_step_items = [] + yield streaming_service.format_thinking_step( + step_id=tool_step_id, + title=last_active_step_title, + status="in_progress", + ) + # Stream tool info tool_call_id = ( f"call_{run_id[:32]}" @@ -163,22 +409,358 @@ async def stream_new_chat( f"Searching knowledge base: {query[:100]}{'...' if len(query) > 100 else ''}", "info", ) + elif tool_name == "link_preview": + url = ( + tool_input.get("url", "") + if isinstance(tool_input, dict) + else str(tool_input) + ) + yield streaming_service.format_terminal_info( + f"Fetching link preview: {url[:80]}{'...' if len(url) > 80 else ''}", + "info", + ) + elif tool_name == "display_image": + src = ( + tool_input.get("src", "") + if isinstance(tool_input, dict) + else str(tool_input) + ) + yield streaming_service.format_terminal_info( + f"Displaying image: {src[:60]}{'...' if len(src) > 60 else ''}", + "info", + ) + elif tool_name == "scrape_webpage": + url = ( + tool_input.get("url", "") + if isinstance(tool_input, dict) + else str(tool_input) + ) + yield streaming_service.format_terminal_info( + f"Scraping webpage: {url[:70]}{'...' if len(url) > 70 else ''}", + "info", + ) + elif tool_name == "generate_podcast": + title = ( + tool_input.get("podcast_title", "SurfSense Podcast") + if isinstance(tool_input, dict) + else "SurfSense Podcast" + ) + yield streaming_service.format_terminal_info( + f"Generating podcast: {title}", + "info", + ) elif event_type == "on_tool_end": run_id = event.get("run_id", "") - tool_output = event.get("data", {}).get("output", "") + tool_name = event.get("name", "unknown_tool") + raw_output = event.get("data", {}).get("output", "") + + # Extract content from ToolMessage if needed + # LangGraph may return a ToolMessage object instead of raw dict + if hasattr(raw_output, "content"): + # It's a ToolMessage object - extract the content + content = raw_output.content + # If content is a string that looks like JSON, try to parse it + if isinstance(content, str): + try: + tool_output = json.loads(content) + except (json.JSONDecodeError, TypeError): + tool_output = {"result": content} + elif isinstance(content, dict): + tool_output = content + else: + tool_output = {"result": str(content)} + elif isinstance(raw_output, dict): + tool_output = raw_output + else: + tool_output = { + "result": str(raw_output) if raw_output else "completed" + } tool_call_id = f"call_{run_id[:32]}" if run_id else "call_unknown" - # Don't stream the full output (can be very large), just acknowledge - yield streaming_service.format_tool_output_available( - tool_call_id, - {"status": "completed", "result_length": len(str(tool_output))}, + # Get the original tool step ID to update it (not create a new one) + original_step_id = tool_step_ids.get( + run_id, f"thinking-unknown-{run_id[:8]}" ) - yield streaming_service.format_terminal_info( - "Knowledge base search completed", "success" - ) + # Mark the tool thinking step as completed using the SAME step ID + # Also add to completed set so we don't try to complete it again + completed_step_ids.add(original_step_id) + if tool_name == "search_knowledge_base": + # Get result count if available + result_info = "Search completed" + if isinstance(tool_output, dict): + result_len = tool_output.get("result_length", 0) + if result_len > 0: + result_info = ( + f"Found relevant information ({result_len} chars)" + ) + # Include original query in completed items + completed_items = [*last_active_step_items, result_info] + yield streaming_service.format_thinking_step( + step_id=original_step_id, + title="Searching knowledge base", + status="completed", + items=completed_items, + ) + elif tool_name == "link_preview": + # Build completion items based on link preview result + if isinstance(tool_output, dict): + title = tool_output.get("title", "Link") + domain = tool_output.get("domain", "") + has_error = "error" in tool_output + if has_error: + completed_items = [ + *last_active_step_items, + f"Error: {tool_output.get('error', 'Failed to fetch')}", + ] + else: + completed_items = [ + *last_active_step_items, + f"Title: {title[:60]}{'...' if len(title) > 60 else ''}", + f"Domain: {domain}" if domain else "Preview loaded", + ] + else: + completed_items = [*last_active_step_items, "Preview loaded"] + yield streaming_service.format_thinking_step( + step_id=original_step_id, + title="Fetching link preview", + status="completed", + items=completed_items, + ) + elif tool_name == "display_image": + # Build completion items for image display + if isinstance(tool_output, dict): + title = tool_output.get("title", "") + alt = tool_output.get("alt", "Image") + display_name = title or alt + completed_items = [ + *last_active_step_items, + f"Showing: {display_name[:50]}{'...' if len(display_name) > 50 else ''}", + ] + else: + completed_items = [*last_active_step_items, "Image displayed"] + yield streaming_service.format_thinking_step( + step_id=original_step_id, + title="Displaying image", + status="completed", + items=completed_items, + ) + elif tool_name == "scrape_webpage": + # Build completion items for webpage scraping + if isinstance(tool_output, dict): + title = tool_output.get("title", "Webpage") + word_count = tool_output.get("word_count", 0) + has_error = "error" in tool_output + if has_error: + completed_items = [ + *last_active_step_items, + f"Error: {tool_output.get('error', 'Failed to scrape')[:50]}", + ] + else: + completed_items = [ + *last_active_step_items, + f"Title: {title[:50]}{'...' if len(title) > 50 else ''}", + f"Extracted: {word_count:,} words", + ] + else: + completed_items = [*last_active_step_items, "Content extracted"] + yield streaming_service.format_thinking_step( + step_id=original_step_id, + title="Scraping webpage", + status="completed", + items=completed_items, + ) + elif tool_name == "generate_podcast": + # Build detailed completion items based on podcast status + podcast_status = ( + tool_output.get("status", "unknown") + if isinstance(tool_output, dict) + else "unknown" + ) + podcast_title = ( + tool_output.get("title", "Podcast") + if isinstance(tool_output, dict) + else "Podcast" + ) + + if podcast_status == "processing": + completed_items = [ + f"Title: {podcast_title}", + "Audio generation started", + "Processing in background...", + ] + elif podcast_status == "already_generating": + completed_items = [ + f"Title: {podcast_title}", + "Podcast already in progress", + "Please wait for it to complete", + ] + elif podcast_status == "error": + error_msg = ( + tool_output.get("error", "Unknown error") + if isinstance(tool_output, dict) + else "Unknown error" + ) + completed_items = [ + f"Title: {podcast_title}", + f"Error: {error_msg[:50]}", + ] + else: + completed_items = last_active_step_items + + yield streaming_service.format_thinking_step( + step_id=original_step_id, + title="Generating podcast", + status="completed", + items=completed_items, + ) + else: + yield streaming_service.format_thinking_step( + step_id=original_step_id, + title=f"Using {tool_name.replace('_', ' ')}", + status="completed", + items=last_active_step_items, + ) + + # Mark that we just finished a tool - "Synthesizing response" will be created + # when text actually starts flowing (not immediately) + just_finished_tool = True + # Clear the active step since the tool is done + last_active_step_id = None + last_active_step_title = "" + last_active_step_items = [] + + # Handle different tool outputs + if tool_name == "generate_podcast": + # Stream the full podcast result so frontend can render the audio player + yield streaming_service.format_tool_output_available( + tool_call_id, + tool_output + if isinstance(tool_output, dict) + else {"result": tool_output}, + ) + # Send appropriate terminal message based on status + if ( + isinstance(tool_output, dict) + and tool_output.get("status") == "success" + ): + yield streaming_service.format_terminal_info( + f"Podcast generated successfully: {tool_output.get('title', 'Podcast')}", + "success", + ) + else: + error_msg = ( + tool_output.get("error", "Unknown error") + if isinstance(tool_output, dict) + else "Unknown error" + ) + yield streaming_service.format_terminal_info( + f"Podcast generation failed: {error_msg}", + "error", + ) + elif tool_name == "link_preview": + # Stream the full link preview result so frontend can render the MediaCard + yield streaming_service.format_tool_output_available( + tool_call_id, + tool_output + if isinstance(tool_output, dict) + else {"result": tool_output}, + ) + # Send appropriate terminal message + if isinstance(tool_output, dict) and "error" not in tool_output: + title = tool_output.get("title", "Link") + yield streaming_service.format_terminal_info( + f"Link preview loaded: {title[:50]}{'...' if len(title) > 50 else ''}", + "success", + ) + else: + error_msg = ( + tool_output.get("error", "Failed to fetch") + if isinstance(tool_output, dict) + else "Failed to fetch" + ) + yield streaming_service.format_terminal_info( + f"Link preview failed: {error_msg}", + "error", + ) + elif tool_name == "display_image": + # Stream the full image result so frontend can render the Image component + yield streaming_service.format_tool_output_available( + tool_call_id, + tool_output + if isinstance(tool_output, dict) + else {"result": tool_output}, + ) + # Send terminal message + if isinstance(tool_output, dict): + title = tool_output.get("title") or tool_output.get( + "alt", "Image" + ) + yield streaming_service.format_terminal_info( + f"Image displayed: {title[:40]}{'...' if len(title) > 40 else ''}", + "success", + ) + elif tool_name == "scrape_webpage": + # Stream the scrape result so frontend can render the Article component + # Note: We send metadata for display, but content goes to LLM for processing + if isinstance(tool_output, dict): + # Create a display-friendly output (without full content for the card) + display_output = { + k: v for k, v in tool_output.items() if k != "content" + } + # But keep a truncated content preview + if "content" in tool_output: + content = tool_output.get("content", "") + display_output["content_preview"] = ( + content[:500] + "..." if len(content) > 500 else content + ) + yield streaming_service.format_tool_output_available( + tool_call_id, + display_output, + ) + else: + yield streaming_service.format_tool_output_available( + tool_call_id, + {"result": tool_output}, + ) + # Send terminal message + if isinstance(tool_output, dict) and "error" not in tool_output: + title = tool_output.get("title", "Webpage") + word_count = tool_output.get("word_count", 0) + yield streaming_service.format_terminal_info( + f"Scraped: {title[:40]}{'...' if len(title) > 40 else ''} ({word_count:,} words)", + "success", + ) + else: + error_msg = ( + tool_output.get("error", "Failed to scrape") + if isinstance(tool_output, dict) + else "Failed to scrape" + ) + yield streaming_service.format_terminal_info( + f"Scrape failed: {error_msg}", + "error", + ) + elif tool_name == "search_knowledge_base": + # Don't stream the full output for search (can be very large), just acknowledge + yield streaming_service.format_tool_output_available( + tool_call_id, + {"status": "completed", "result_length": len(str(tool_output))}, + ) + yield streaming_service.format_terminal_info( + "Knowledge base search completed", "success" + ) + else: + # Default handling for other tools + yield streaming_service.format_tool_output_available( + tool_call_id, + {"status": "completed", "result_length": len(str(tool_output))}, + ) + yield streaming_service.format_terminal_info( + f"Tool {tool_name} completed", "success" + ) # Handle chain/agent end to close any open text blocks elif event_type in ("on_chain_end", "on_agent_end"): @@ -190,6 +772,11 @@ async def stream_new_chat( if current_text_id is not None: yield streaming_service.format_text_end(current_text_id) + # Mark the last active thinking step as completed using the same title + completion_event = complete_current_step() + if completion_event: + yield completion_event + # Finish the step and message yield streaming_service.format_finish_step() yield streaming_service.format_finish() diff --git a/surfsense_backend/app/tasks/podcast_tasks.py b/surfsense_backend/app/tasks/podcast_tasks.py deleted file mode 100644 index 2285c5d09..000000000 --- a/surfsense_backend/app/tasks/podcast_tasks.py +++ /dev/null @@ -1,211 +0,0 @@ -from sqlalchemy import select -from sqlalchemy.exc import SQLAlchemyError -from sqlalchemy.ext.asyncio import AsyncSession - -from app.agents.podcaster.graph import graph as podcaster_graph -from app.agents.podcaster.state import State -from app.db import Chat, Podcast -from app.services.task_logging_service import TaskLoggingService - - -async def generate_chat_podcast( - session: AsyncSession, - chat_id: int, - search_space_id: int, - user_id: int, - podcast_title: str | None = None, - user_prompt: str | None = None, -): - task_logger = TaskLoggingService(session, search_space_id) - - # Log task start - log_entry = await task_logger.log_task_start( - task_name="generate_chat_podcast", - source="podcast_task", - message=f"Starting podcast generation for chat {chat_id}", - metadata={ - "chat_id": chat_id, - "search_space_id": search_space_id, - "podcast_title": podcast_title, - "user_id": str(user_id), - "user_prompt": user_prompt, - }, - ) - - try: - # Fetch the chat with the specified ID - await task_logger.log_task_progress( - log_entry, f"Fetching chat {chat_id} from database", {"stage": "fetch_chat"} - ) - - query = select(Chat).filter( - Chat.id == chat_id, Chat.search_space_id == search_space_id - ) - - result = await session.execute(query) - chat = result.scalars().first() - - if not chat: - await task_logger.log_task_failure( - log_entry, - f"Chat with id {chat_id} not found in search space {search_space_id}", - "Chat not found", - {"error_type": "ChatNotFound"}, - ) - raise ValueError( - f"Chat with id {chat_id} not found in search space {search_space_id}" - ) - - # Create chat history structure - await task_logger.log_task_progress( - log_entry, - f"Processing chat history for chat {chat_id}", - {"stage": "process_chat_history", "message_count": len(chat.messages)}, - ) - - chat_history_str = "" - - processed_messages = 0 - for message in chat.messages: - if message["role"] == "user": - chat_history_str += f"{message['content']}" - processed_messages += 1 - elif message["role"] == "assistant": - chat_history_str += ( - f"{message['content']}" - ) - processed_messages += 1 - - chat_history_str += "" - - # Pass it to the SurfSense Podcaster - await task_logger.log_task_progress( - log_entry, - f"Initializing podcast generation for chat {chat_id}", - { - "stage": "initialize_podcast_generation", - "processed_messages": processed_messages, - "content_length": len(chat_history_str), - }, - ) - - config = { - "configurable": { - "podcast_title": podcast_title or "SurfSense Podcast", - "user_id": str(user_id), - "search_space_id": search_space_id, - "user_prompt": user_prompt, - } - } - # Initialize state with database session and streaming service - initial_state = State(source_content=chat_history_str, db_session=session) - - # Run the graph directly - await task_logger.log_task_progress( - log_entry, - f"Running podcast generation graph for chat {chat_id}", - {"stage": "run_podcast_graph"}, - ) - - result = await podcaster_graph.ainvoke(initial_state, config=config) - - # Convert podcast transcript entries to serializable format - await task_logger.log_task_progress( - log_entry, - f"Processing podcast transcript for chat {chat_id}", - { - "stage": "process_transcript", - "transcript_entries": len(result["podcast_transcript"]), - }, - ) - - serializable_transcript = [] - for entry in result["podcast_transcript"]: - serializable_transcript.append( - {"speaker_id": entry.speaker_id, "dialog": entry.dialog} - ) - - # Create a new podcast entry - await task_logger.log_task_progress( - log_entry, - f"Creating podcast database entry for chat {chat_id}", - { - "stage": "create_podcast_entry", - "file_location": result.get("final_podcast_file_path"), - }, - ) - - # check if podcast already exists for this chat (re-generation) - existing_podcast = await session.execute( - select(Podcast).filter(Podcast.chat_id == chat_id) - ) - existing_podcast = existing_podcast.scalars().first() - - if existing_podcast: - existing_podcast.podcast_transcript = serializable_transcript - existing_podcast.file_location = result["final_podcast_file_path"] - existing_podcast.chat_state_version = chat.state_version - await session.commit() - await session.refresh(existing_podcast) - return existing_podcast - else: - podcast = Podcast( - title=f"{podcast_title}", - podcast_transcript=serializable_transcript, - file_location=result["final_podcast_file_path"], - search_space_id=search_space_id, - chat_state_version=chat.state_version, - chat_id=chat.id, - ) - - # Add to session and commit - session.add(podcast) - await session.commit() - await session.refresh(podcast) - - # Log success - await task_logger.log_task_success( - log_entry, - f"Successfully generated podcast for chat {chat_id}", - { - "podcast_id": podcast.id, - "podcast_title": podcast_title, - "transcript_entries": len(serializable_transcript), - "file_location": result.get("final_podcast_file_path"), - "processed_messages": processed_messages, - "content_length": len(chat_history_str), - }, - ) - - return podcast - - except ValueError as ve: - # ValueError is already logged above for chat not found - if "not found" not in str(ve): - await task_logger.log_task_failure( - log_entry, - f"Value error during podcast generation for chat {chat_id}", - str(ve), - {"error_type": "ValueError"}, - ) - raise ve - except SQLAlchemyError as db_error: - await session.rollback() - await task_logger.log_task_failure( - log_entry, - f"Database error during podcast generation for chat {chat_id}", - str(db_error), - {"error_type": "SQLAlchemyError"}, - ) - raise db_error - except Exception as e: - await session.rollback() - await task_logger.log_task_failure( - log_entry, - f"Unexpected error during podcast generation for chat {chat_id}", - str(e), - {"error_type": type(e).__name__}, - ) - raise RuntimeError( - f"Failed to generate podcast for chat {chat_id}: {e!s}" - ) from e diff --git a/surfsense_backend/main.py b/surfsense_backend/main.py index 6a86bbd37..9c1b5c342 100644 --- a/surfsense_backend/main.py +++ b/surfsense_backend/main.py @@ -1,9 +1,15 @@ import argparse +import asyncio import logging +import sys import uvicorn from dotenv import load_dotenv +# Fix for Windows: psycopg requires SelectorEventLoop, not ProactorEventLoop +if sys.platform == "win32": + asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) + from app.config.uvicorn import load_uvicorn_config logging.basicConfig( diff --git a/surfsense_backend/pyproject.toml b/surfsense_backend/pyproject.toml index bf9fc5580..25b74f600 100644 --- a/surfsense_backend/pyproject.toml +++ b/surfsense_backend/pyproject.toml @@ -54,6 +54,8 @@ dependencies = [ "trafilatura>=2.0.0", "fastapi-users[oauth,sqlalchemy]>=15.0.3", "chonkie[all]>=1.5.0", + "langgraph-checkpoint-postgres>=3.0.2", + "psycopg[binary,pool]>=3.3.2", ] [dependency-groups] diff --git a/surfsense_backend/uv.lock b/surfsense_backend/uv.lock index 48f8034bc..208509993 100644 --- a/surfsense_backend/uv.lock +++ b/surfsense_backend/uv.lock @@ -2983,6 +2983,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/48/e3/616e3a7ff737d98c1bbb5700dd62278914e2a9ded09a79a1fa93cf24ce12/langgraph_checkpoint-3.0.1-py3-none-any.whl", hash = "sha256:9b04a8d0edc0474ce4eaf30c5d731cee38f11ddff50a6177eead95b5c4e4220b", size = 46249 }, ] +[[package]] +name = "langgraph-checkpoint-postgres" +version = "3.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "langgraph-checkpoint" }, + { name = "orjson" }, + { name = "psycopg" }, + { name = "psycopg-pool" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/68/4e/ffea5b0d667e10d408b3b2d6dd967ea79e208eef73fe6ee5622625496238/langgraph_checkpoint_postgres-3.0.2.tar.gz", hash = "sha256:448cb8ec245b6fe10171a0f90e9aa047e24a9d3febba6a914644b0c1323da158", size = 127766 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ac/e4/b4248e10289b6e2c2d33586c87c5eb421e566ef5f336ee45269223cc3b92/langgraph_checkpoint_postgres-3.0.2-py3-none-any.whl", hash = "sha256:15c0fb638edfbc54d496f1758d0327d1a081e0ef94dda8f0c91d4b307d6d8545", size = 42710 }, +] + [[package]] name = "langgraph-prebuilt" version = "1.0.5" @@ -4785,6 +4800,79 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/50/1b/6921afe68c74868b4c9fa424dad3be35b095e16687989ebbb50ce4fceb7c/psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553", size = 244885 }, ] +[[package]] +name = "psycopg" +version = "3.3.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, + { name = "tzdata", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e0/1a/7d9ef4fdc13ef7f15b934c393edc97a35c281bb7d3c3329fbfcbe915a7c2/psycopg-3.3.2.tar.gz", hash = "sha256:707a67975ee214d200511177a6a80e56e654754c9afca06a7194ea6bbfde9ca7", size = 165630 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8c/51/2779ccdf9305981a06b21a6b27e8547c948d85c41c76ff434192784a4c93/psycopg-3.3.2-py3-none-any.whl", hash = "sha256:3e94bc5f4690247d734599af56e51bae8e0db8e4311ea413f801fef82b14a99b", size = 212774 }, +] + +[package.optional-dependencies] +binary = [ + { name = "psycopg-binary", marker = "implementation_name != 'pypy'" }, +] +pool = [ + { name = "psycopg-pool" }, +] + +[[package]] +name = "psycopg-binary" +version = "3.3.2" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4e/1e/8614b01c549dd7e385dacdcd83fe194f6b3acb255a53cc67154ee6bf00e7/psycopg_binary-3.3.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a9387ab615f929e71ef0f4a8a51e986fa06236ccfa9f3ec98a88f60fbf230634", size = 4579832 }, + { url = "https://files.pythonhosted.org/packages/26/97/0bb093570fae2f4454d42c1ae6000f15934391867402f680254e4a7def54/psycopg_binary-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3ff7489df5e06c12d1829544eaec64970fe27fe300f7cf04c8495fe682064688", size = 4658786 }, + { url = "https://files.pythonhosted.org/packages/61/20/1d9383e3f2038826900a14137b0647d755f67551aab316e1021443105ed5/psycopg_binary-3.3.2-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:9742580ecc8e1ac45164e98d32ca6df90da509c2d3ff26be245d94c430f92db4", size = 5454896 }, + { url = "https://files.pythonhosted.org/packages/a6/62/513c80ad8bbb545e364f7737bf2492d34a4c05eef4f7b5c16428dc42260d/psycopg_binary-3.3.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d45acedcaa58619355f18e0f42af542fcad3fd84ace4b8355d3a5dea23318578", size = 5132731 }, + { url = "https://files.pythonhosted.org/packages/f3/28/ddf5f5905f088024bccb19857949467407c693389a14feb527d6171d8215/psycopg_binary-3.3.2-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d88f32ff8c47cb7f4e7e7a9d1747dcee6f3baa19ed9afa9e5694fd2fb32b61ed", size = 6724495 }, + { url = "https://files.pythonhosted.org/packages/6e/93/a1157ebcc650960b264542b547f7914d87a42ff0cc15a7584b29d5807e6b/psycopg_binary-3.3.2-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:59d0163c4617a2c577cb34afbed93d7a45b8c8364e54b2bd2020ff25d5f5f860", size = 4964979 }, + { url = "https://files.pythonhosted.org/packages/0e/27/65939ba6798f9c5be4a5d9cd2061ebaf0851798525c6811d347821c8132d/psycopg_binary-3.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e750afe74e6c17b2c7046d2c3e3173b5a3f6080084671c8aa327215323df155b", size = 4493648 }, + { url = "https://files.pythonhosted.org/packages/8a/c4/5e9e4b9b1c1e27026e43387b0ba4aaf3537c7806465dd3f1d5bde631752a/psycopg_binary-3.3.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f26f113013c4dcfbfe9ced57b5bad2035dda1a7349f64bf726021968f9bccad3", size = 4173392 }, + { url = "https://files.pythonhosted.org/packages/c6/81/cf43fb76993190cee9af1cbcfe28afb47b1928bdf45a252001017e5af26e/psycopg_binary-3.3.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:8309ee4569dced5e81df5aa2dcd48c7340c8dee603a66430f042dfbd2878edca", size = 3909241 }, + { url = "https://files.pythonhosted.org/packages/9d/20/c6377a0d17434674351627489deca493ea0b137c522b99c81d3a106372c8/psycopg_binary-3.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c6464150e25b68ae3cb04c4e57496ea11ebfaae4d98126aea2f4702dd43e3c12", size = 4219746 }, + { url = "https://files.pythonhosted.org/packages/25/32/716c57b28eefe02a57a4c9d5bf956849597f5ea476c7010397199e56cfde/psycopg_binary-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:716a586f99bbe4f710dc58b40069fcb33c7627e95cc6fc936f73c9235e07f9cf", size = 3537494 }, + { url = "https://files.pythonhosted.org/packages/14/73/7ca7cb22b9ac7393fb5de7d28ca97e8347c375c8498b3bff2c99c1f38038/psycopg_binary-3.3.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:fc5a189e89cbfff174588665bb18d28d2d0428366cc9dae5864afcaa2e57380b", size = 4579068 }, + { url = "https://files.pythonhosted.org/packages/f5/42/0cf38ff6c62c792fc5b55398a853a77663210ebd51ed6f0c4a05b06f95a6/psycopg_binary-3.3.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:083c2e182be433f290dc2c516fd72b9b47054fcd305cce791e0a50d9e93e06f2", size = 4657520 }, + { url = "https://files.pythonhosted.org/packages/3b/60/df846bc84cbf2231e01b0fff48b09841fe486fa177665e50f4995b1bfa44/psycopg_binary-3.3.2-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:ac230e3643d1c436a2dfb59ca84357dfc6862c9f372fc5dbd96bafecae581f9f", size = 5452086 }, + { url = "https://files.pythonhosted.org/packages/ab/85/30c846a00db86b1b53fd5bfd4b4edfbd0c00de8f2c75dd105610bd7568fc/psycopg_binary-3.3.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d8c899a540f6c7585cee53cddc929dd4d2db90fd828e37f5d4017b63acbc1a5d", size = 5131125 }, + { url = "https://files.pythonhosted.org/packages/6d/15/9968732013373f36f8a2a3fb76104dffc8efd9db78709caa5ae1a87b1f80/psycopg_binary-3.3.2-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:50ff10ab8c0abdb5a5451b9315538865b50ba64c907742a1385fdf5f5772b73e", size = 6722914 }, + { url = "https://files.pythonhosted.org/packages/b2/ba/29e361fe02143ac5ff5a1ca3e45697344cfbebe2eaf8c4e7eec164bff9a0/psycopg_binary-3.3.2-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:23d2594af848c1fd3d874a9364bef50730124e72df7bb145a20cb45e728c50ed", size = 4966081 }, + { url = "https://files.pythonhosted.org/packages/99/45/1be90c8f1a1a237046903e91202fb06708745c179f220b361d6333ed7641/psycopg_binary-3.3.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ea4fe6b4ead3bbbe27244ea224fcd1f53cb119afc38b71a2f3ce570149a03e30", size = 4493332 }, + { url = "https://files.pythonhosted.org/packages/2e/b5/bbdc07d5f0a5e90c617abd624368182aa131485e18038b2c6c85fc054aed/psycopg_binary-3.3.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:742ce48cde825b8e52fb1a658253d6d1ff66d152081cbc76aa45e2986534858d", size = 4170781 }, + { url = "https://files.pythonhosted.org/packages/d1/2a/0d45e4f4da2bd78c3237ffa03475ef3751f69a81919c54a6e610eb1a7c96/psycopg_binary-3.3.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:e22bf6b54df994aff37ab52695d635f1ef73155e781eee1f5fa75bc08b58c8da", size = 3910544 }, + { url = "https://files.pythonhosted.org/packages/3a/62/a8e0f092f4dbef9a94b032fb71e214cf0a375010692fbe7493a766339e47/psycopg_binary-3.3.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8db9034cde3bcdafc66980f0130813f5c5d19e74b3f2a19fb3cfbc25ad113121", size = 4220070 }, + { url = "https://files.pythonhosted.org/packages/09/e6/5fc8d8aff8afa114bb4a94a0341b9309311e8bf3ab32d816032f8b984d4e/psycopg_binary-3.3.2-cp313-cp313-win_amd64.whl", hash = "sha256:df65174c7cf6b05ea273ce955927d3270b3a6e27b0b12762b009ce6082b8d3fc", size = 3540922 }, + { url = "https://files.pythonhosted.org/packages/bd/75/ad18c0b97b852aba286d06befb398cc6d383e9dfd0a518369af275a5a526/psycopg_binary-3.3.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:9ca24062cd9b2270e4d77576042e9cc2b1d543f09da5aba1f1a3d016cea28390", size = 4596371 }, + { url = "https://files.pythonhosted.org/packages/5a/79/91649d94c8d89f84af5da7c9d474bfba35b08eb8f492ca3422b08f0a6427/psycopg_binary-3.3.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c749770da0947bc972e512f35366dd4950c0e34afad89e60b9787a37e97cb443", size = 4675139 }, + { url = "https://files.pythonhosted.org/packages/56/ac/b26e004880f054549ec9396594e1ffe435810b0673e428e619ed722e4244/psycopg_binary-3.3.2-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:03b7cd73fb8c45d272a34ae7249713e32492891492681e3cf11dff9531cf37e9", size = 5456120 }, + { url = "https://files.pythonhosted.org/packages/4b/8d/410681dccd6f2999fb115cc248521ec50dd2b0aba66ae8de7e81efdebbee/psycopg_binary-3.3.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:43b130e3b6edcb5ee856c7167ccb8561b473308c870ed83978ae478613764f1c", size = 5133484 }, + { url = "https://files.pythonhosted.org/packages/66/30/ebbab99ea2cfa099d7b11b742ce13415d44f800555bfa4ad2911dc645b71/psycopg_binary-3.3.2-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7c1feba5a8c617922321aef945865334e468337b8fc5c73074f5e63143013b5a", size = 6731818 }, + { url = "https://files.pythonhosted.org/packages/70/02/d260646253b7ad805d60e0de47f9b811d6544078452579466a098598b6f4/psycopg_binary-3.3.2-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cabb2a554d9a0a6bf84037d86ca91782f087dfff2a61298d0b00c19c0bc43f6d", size = 4983859 }, + { url = "https://files.pythonhosted.org/packages/72/8d/e778d7bad1a7910aa36281f092bd85c5702f508fd9bb0ea2020ffbb6585c/psycopg_binary-3.3.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:74bc306c4b4df35b09bc8cecf806b271e1c5d708f7900145e4e54a2e5dedfed0", size = 4516388 }, + { url = "https://files.pythonhosted.org/packages/bd/f1/64e82098722e2ab3521797584caf515284be09c1e08a872551b6edbb0074/psycopg_binary-3.3.2-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:d79b0093f0fbf7a962d6a46ae292dc056c65d16a8ee9361f3cfbafd4c197ab14", size = 4192382 }, + { url = "https://files.pythonhosted.org/packages/fa/d0/c20f4e668e89494972e551c31be2a0016e3f50d552d7ae9ac07086407599/psycopg_binary-3.3.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:1586e220be05547c77afc326741dd41cc7fba38a81f9931f616ae98865439678", size = 3928660 }, + { url = "https://files.pythonhosted.org/packages/0f/e1/99746c171de22539fd5eb1c9ca21dc805b54cfae502d7451d237d1dbc349/psycopg_binary-3.3.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:458696a5fa5dad5b6fb5d5862c22454434ce4fe1cf66ca6c0de5f904cbc1ae3e", size = 4239169 }, + { url = "https://files.pythonhosted.org/packages/72/f7/212343c1c9cfac35fd943c527af85e9091d633176e2a407a0797856ff7b9/psycopg_binary-3.3.2-cp314-cp314-win_amd64.whl", hash = "sha256:04bb2de4ba69d6f8395b446ede795e8884c040ec71d01dd07ac2b2d18d4153d1", size = 3642122 }, +] + +[[package]] +name = "psycopg-pool" +version = "3.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/56/9a/9470d013d0d50af0da9c4251614aeb3c1823635cab3edc211e3839db0bcf/psycopg_pool-3.3.0.tar.gz", hash = "sha256:fa115eb2860bd88fce1717d75611f41490dec6135efb619611142b24da3f6db5", size = 31606 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e7/c3/26b8a0908a9db249de3b4169692e1c7c19048a9bc41a4d3209cee7dbb758/psycopg_pool-3.3.0-py3-none-any.whl", hash = "sha256:2e44329155c410b5e8666372db44276a8b1ebd8c90f1c3026ebba40d4bc81063", size = 39995 }, +] + [[package]] name = "psycopg2-binary" version = "2.9.11" @@ -6293,6 +6381,7 @@ dependencies = [ { name = "langchain-litellm" }, { name = "langchain-unstructured" }, { name = "langgraph" }, + { name = "langgraph-checkpoint-postgres" }, { name = "linkup-sdk" }, { name = "litellm" }, { name = "llama-cloud-services" }, @@ -6301,6 +6390,7 @@ dependencies = [ { name = "numpy" }, { name = "pgvector" }, { name = "playwright" }, + { name = "psycopg", extra = ["binary", "pool"] }, { name = "pypdf" }, { name = "python-ffmpeg" }, { name = "redis" }, @@ -6351,6 +6441,7 @@ requires-dist = [ { name = "langchain-litellm", specifier = ">=0.3.5" }, { name = "langchain-unstructured", specifier = ">=1.0.0" }, { name = "langgraph", specifier = ">=1.0.5" }, + { name = "langgraph-checkpoint-postgres", specifier = ">=3.0.2" }, { name = "linkup-sdk", specifier = ">=0.2.4" }, { name = "litellm", specifier = ">=1.80.10" }, { name = "llama-cloud-services", specifier = ">=0.6.25" }, @@ -6359,6 +6450,7 @@ requires-dist = [ { name = "numpy", specifier = ">=1.24.0" }, { name = "pgvector", specifier = ">=0.3.6" }, { name = "playwright", specifier = ">=1.50.0" }, + { name = "psycopg", extras = ["binary", "pool"], specifier = ">=3.3.2" }, { name = "pypdf", specifier = ">=5.1.0" }, { name = "python-ffmpeg", specifier = ">=2.0.12" }, { name = "redis", specifier = ">=5.2.1" }, diff --git a/surfsense_web/app/(home)/page.tsx b/surfsense_web/app/(home)/page.tsx index 8f85774ac..e0478fce3 100644 --- a/surfsense_web/app/(home)/page.tsx +++ b/surfsense_web/app/(home)/page.tsx @@ -3,10 +3,8 @@ import { CTAHomepage } from "@/components/homepage/cta"; import { FeaturesBentoGrid } from "@/components/homepage/features-bento-grid"; import { FeaturesCards } from "@/components/homepage/features-card"; -import { Footer } from "@/components/homepage/footer"; import { HeroSection } from "@/components/homepage/hero-section"; import ExternalIntegrations from "@/components/homepage/integrations"; -import { Navbar } from "@/components/homepage/navbar"; export default function HomePage() { return ( diff --git a/surfsense_web/app/api/contact/route.ts b/surfsense_web/app/api/contact/route.ts index 0af47dfe3..6ca16ca9d 100644 --- a/surfsense_web/app/api/contact/route.ts +++ b/surfsense_web/app/api/contact/route.ts @@ -6,9 +6,9 @@ import { usersTable } from "@/app/db/schema"; // Define validation schema matching the database schema const contactSchema = z.object({ name: z.string().min(1, "Name is required").max(255, "Name is too long"), - email: z.string().email("Invalid email address").max(255, "Email is too long"), + email: z.email("Invalid email address").max(255, "Email is too long"), company: z.string().min(1, "Company is required").max(255, "Company name is too long"), - message: z.string().optional().default(""), + message: z.string().optional().prefault(""), }); export async function POST(request: NextRequest) { @@ -43,7 +43,7 @@ export async function POST(request: NextRequest) { { success: false, message: "Validation error", - errors: error.errors, + errors: error.issues, }, { status: 400 } ); diff --git a/surfsense_web/app/dashboard/[search_space_id]/client-layout.tsx b/surfsense_web/app/dashboard/[search_space_id]/client-layout.tsx index c0f5bf0b0..bfe8599f6 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/client-layout.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/client-layout.tsx @@ -1,24 +1,25 @@ "use client"; -import { useAtom, useAtomValue, useSetAtom } from "jotai"; -import { Loader2, PanelRight } from "lucide-react"; -import { AnimatePresence, motion } from "motion/react"; +import { useAtomValue, useSetAtom } from "jotai"; +import { Loader2 } from "lucide-react"; import { useParams, usePathname, useRouter } from "next/navigation"; import { useTranslations } from "next-intl"; import type React from "react"; -import { useCallback, useEffect, useMemo, useState } from "react"; -import { activeChathatUIAtom, activeChatIdAtom } from "@/atoms/chats/ui.atoms"; -import { llmPreferencesAtom } from "@/atoms/llm-config/llm-config-query.atoms"; +import { useCallback, useEffect, useMemo, useRef, useState } from "react"; +import { toast } from "sonner"; import { myAccessAtom } from "@/atoms/members/members-query.atoms"; +import { updateLLMPreferencesMutationAtom } from "@/atoms/new-llm-config/new-llm-config-mutation.atoms"; +import { + globalNewLLMConfigsAtom, + llmPreferencesAtom, +} from "@/atoms/new-llm-config/new-llm-config-query.atoms"; import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms"; -import { ChatPanelContainer } from "@/components/chat/ChatPanel/ChatPanelContainer"; import { DashboardBreadcrumb } from "@/components/dashboard-breadcrumb"; import { LanguageSwitcher } from "@/components/LanguageSwitcher"; import { AppSidebarProvider } from "@/components/sidebar/AppSidebarProvider"; import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card"; import { Separator } from "@/components/ui/separator"; import { SidebarInset, SidebarProvider, SidebarTrigger } from "@/components/ui/sidebar"; -import { cn } from "@/lib/utils"; export function DashboardClientLayout({ children, @@ -34,43 +35,27 @@ export function DashboardClientLayout({ const t = useTranslations("dashboard"); const router = useRouter(); const pathname = usePathname(); - const searchSpaceIdNum = Number(searchSpaceId); - const { search_space_id, chat_id } = useParams(); - const [chatUIState, setChatUIState] = useAtom(activeChathatUIAtom); - const activeChatId = useAtomValue(activeChatIdAtom); + const { search_space_id } = useParams(); const setActiveSearchSpaceIdState = useSetAtom(activeSearchSpaceIdAtom); - const setActiveChatIdState = useSetAtom(activeChatIdAtom); - const [showIndicator, setShowIndicator] = useState(false); - const { isChatPannelOpen } = chatUIState; - - // Check if we're on the researcher page - const isResearcherPage = pathname?.includes("/researcher"); - - // Show indicator when chat becomes active and panel is closed - useEffect(() => { - if (activeChatId && !isChatPannelOpen) { - setShowIndicator(true); - // Hide indicator after 5 seconds - const timer = setTimeout(() => setShowIndicator(false), 5000); - return () => clearTimeout(timer); - } else { - setShowIndicator(false); - } - }, [activeChatId, isChatPannelOpen]); - - const { data: preferences = {}, isFetching: loading, error } = useAtomValue(llmPreferencesAtom); + const { + data: preferences = {}, + isFetching: loading, + error, + refetch: refetchPreferences, + } = useAtomValue(llmPreferencesAtom); + const { data: globalConfigs = [], isFetching: globalConfigsLoading } = + useAtomValue(globalNewLLMConfigsAtom); + const { mutateAsync: updatePreferences } = useAtomValue(updateLLMPreferencesMutationAtom); const isOnboardingComplete = useCallback(() => { - return !!( - preferences.long_context_llm_id && - preferences.fast_llm_id && - preferences.strategic_llm_id - ); + return !!(preferences.agent_llm_id && preferences.document_summary_llm_id); }, [preferences]); const { data: access = null, isLoading: accessLoading } = useAtomValue(myAccessAtom); const [hasCheckedOnboarding, setHasCheckedOnboarding] = useState(false); + const [isAutoConfiguring, setIsAutoConfiguring] = useState(false); + const hasAttemptedAutoConfig = useRef(false); // Skip onboarding check if we're already on the onboarding page const isOnboardingPage = pathname?.includes("/onboard"); @@ -115,27 +100,82 @@ export function DashboardClientLayout({ return; } - // Wait for both preferences and access data to load - if (!loading && !accessLoading && !hasCheckedOnboarding) { + // Wait for all data to load + if ( + !loading && + !accessLoading && + !globalConfigsLoading && + !hasCheckedOnboarding && + !isAutoConfiguring + ) { const onboardingComplete = isOnboardingComplete(); - // Only redirect to onboarding if user is the owner and onboarding is not complete - // Invited members (non-owners) should skip onboarding and use existing config - if (!onboardingComplete && isOwner) { - router.push(`/dashboard/${searchSpaceId}/onboard`); + // If onboarding is complete, nothing to do + if (onboardingComplete) { + setHasCheckedOnboarding(true); + return; } + // Only handle onboarding for owners + if (!isOwner) { + setHasCheckedOnboarding(true); + return; + } + + // If global configs available, auto-configure without going to onboard page + if (globalConfigs.length > 0 && !hasAttemptedAutoConfig.current) { + hasAttemptedAutoConfig.current = true; + setIsAutoConfiguring(true); + + const autoConfigureWithGlobal = async () => { + try { + const firstGlobalConfig = globalConfigs[0]; + await updatePreferences({ + search_space_id: Number(searchSpaceId), + data: { + agent_llm_id: firstGlobalConfig.id, + document_summary_llm_id: firstGlobalConfig.id, + }, + }); + + await refetchPreferences(); + + toast.success("AI configured automatically!", { + description: `Using ${firstGlobalConfig.name}. Customize in Settings.`, + }); + + setHasCheckedOnboarding(true); + } catch (error) { + console.error("Auto-configuration failed:", error); + // Fall back to onboard page + router.push(`/dashboard/${searchSpaceId}/onboard`); + } finally { + setIsAutoConfiguring(false); + } + }; + + autoConfigureWithGlobal(); + return; + } + + // No global configs - redirect to onboard page + router.push(`/dashboard/${searchSpaceId}/onboard`); setHasCheckedOnboarding(true); } }, [ loading, accessLoading, + globalConfigsLoading, isOnboardingComplete, isOnboardingPage, isOwner, + isAutoConfiguring, + globalConfigs, router, searchSpaceId, hasCheckedOnboarding, + updatePreferences, + refetchPreferences, ]); // Synchronize active search space and chat IDs with URL @@ -148,27 +188,27 @@ export function DashboardClientLayout({ : ""; if (!activeSeacrhSpaceId) return; setActiveSearchSpaceIdState(activeSeacrhSpaceId); - }, [search_space_id]); + }, [search_space_id, setActiveSearchSpaceIdState]); - useEffect(() => { - const activeChatId = - typeof chat_id === "string" - ? chat_id - : Array.isArray(chat_id) && chat_id.length > 0 - ? chat_id[0] - : ""; - if (!activeChatId) return; - setActiveChatIdState(activeChatId); - }, [chat_id, search_space_id]); - - // Show loading screen while checking onboarding status (only on first load) - if (!hasCheckedOnboarding && (loading || accessLoading) && !isOnboardingPage) { + // Show loading screen while checking onboarding status or auto-configuring + if ( + (!hasCheckedOnboarding && + (loading || accessLoading || globalConfigsLoading) && + !isOnboardingPage) || + isAutoConfiguring + ) { return (
- {t("loading_config")} - {t("checking_llm_prefs")} + + {isAutoConfiguring ? "Setting up AI..." : t("loading_config")} + + + {isAutoConfiguring + ? "Auto-configuring with available settings" + : t("checking_llm_prefs")} + @@ -212,123 +252,20 @@ export function DashboardClientLayout({ navMain={translatedNavMain} /> -
-
-
-
-
- - - -
-
- - {/* Only show artifacts toggle on researcher page */} - {isResearcherPage && ( - - { - setChatUIState((prev) => ({ - ...prev, - isChatPannelOpen: !isChatPannelOpen, - })); - setShowIndicator(false); - }} - className={cn( - "shrink-0 rounded-full p-2 transition-all duration-300 relative", - showIndicator - ? "bg-primary/20 hover:bg-primary/30 shadow-lg shadow-primary/25" - : "hover:bg-muted", - activeChatId && !showIndicator && "hover:bg-primary/10" - )} - title="Toggle Artifacts Panel" - whileHover={{ scale: 1.05 }} - whileTap={{ scale: 0.95 }} - > - - - - - - {/* Pulsing indicator badge */} - - {showIndicator && ( - - -
- - - - )} - - - )} -
+
+
+
+
+ + +
-
-
{children}
-
- {/* Only render chat panel on researcher page */} - {isResearcherPage && } +
+ +
+
+
+
{children}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/layout.tsx b/surfsense_web/app/dashboard/[search_space_id]/layout.tsx index 814cf49f4..ce2c778c5 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/layout.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/layout.tsx @@ -29,7 +29,7 @@ export default function DashboardLayout({ const customNavMain = [ { title: "Chat", - url: `/dashboard/${search_space_id}/researcher`, + url: `/dashboard/${search_space_id}/new-chat`, icon: "SquareTerminal", items: [], }, diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx new file mode 100644 index 000000000..00514facb --- /dev/null +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -0,0 +1,637 @@ +"use client"; + +import { + type AppendMessage, + AssistantRuntimeProvider, + type ThreadMessageLike, + useExternalStoreRuntime, +} from "@assistant-ui/react"; +import { useParams, useRouter } from "next/navigation"; +import { useCallback, useEffect, useMemo, useRef, useState } from "react"; +import { toast } from "sonner"; +import { Thread } from "@/components/assistant-ui/thread"; +import { ChatHeader } from "@/components/new-chat/chat-header"; +import type { ThinkingStep } from "@/components/tool-ui/deepagent-thinking"; +import { DisplayImageToolUI } from "@/components/tool-ui/display-image"; +import { GeneratePodcastToolUI } from "@/components/tool-ui/generate-podcast"; +import { LinkPreviewToolUI } from "@/components/tool-ui/link-preview"; +import { ScrapeWebpageToolUI } from "@/components/tool-ui/scrape-webpage"; +import { getBearerToken } from "@/lib/auth-utils"; +import { createAttachmentAdapter, extractAttachmentContent } from "@/lib/chat/attachment-adapter"; +import { + isPodcastGenerating, + looksLikePodcastRequest, + setActivePodcastTaskId, +} from "@/lib/chat/podcast-state"; +import { + appendMessage, + createThread, + getThreadMessages, + type MessageRecord, +} from "@/lib/chat/thread-persistence"; + +/** + * Extract thinking steps from message content + */ +function extractThinkingSteps(content: unknown): ThinkingStep[] { + if (!Array.isArray(content)) return []; + + const thinkingPart = content.find( + (part: unknown) => + typeof part === "object" && + part !== null && + "type" in part && + (part as { type: string }).type === "thinking-steps" + ) as { type: "thinking-steps"; steps: ThinkingStep[] } | undefined; + + return thinkingPart?.steps || []; +} + +/** + * Convert backend message to assistant-ui ThreadMessageLike format + * Filters out 'thinking-steps' part as it's handled separately via messageThinkingSteps + */ +function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike { + let content: ThreadMessageLike["content"]; + + if (typeof msg.content === "string") { + content = [{ type: "text", text: msg.content }]; + } else if (Array.isArray(msg.content)) { + // Filter out thinking-steps part - it's handled separately via messageThinkingSteps + const filteredContent = msg.content.filter( + (part: unknown) => + !( + typeof part === "object" && + part !== null && + "type" in part && + (part as { type: string }).type === "thinking-steps" + ) + ); + content = + filteredContent.length > 0 + ? (filteredContent as ThreadMessageLike["content"]) + : [{ type: "text", text: "" }]; + } else { + content = [{ type: "text", text: String(msg.content) }]; + } + + return { + id: `msg-${msg.id}`, + role: msg.role, + content, + createdAt: new Date(msg.created_at), + }; +} + +/** + * Tools that should render custom UI in the chat. + */ +const TOOLS_WITH_UI = new Set([ + "generate_podcast", + "link_preview", + "display_image", + "scrape_webpage", +]); + +/** + * Type for thinking step data from the backend + */ +interface ThinkingStepData { + id: string; + title: string; + status: "pending" | "in_progress" | "completed"; + items: string[]; +} + +export default function NewChatPage() { + const params = useParams(); + const router = useRouter(); + const [isInitializing, setIsInitializing] = useState(true); + const [threadId, setThreadId] = useState(null); + const [messages, setMessages] = useState([]); + const [isRunning, setIsRunning] = useState(false); + // Store thinking steps per message ID - kept separate from content to avoid + // "unsupported part type" errors from assistant-ui + const [messageThinkingSteps, setMessageThinkingSteps] = useState>( + new Map() + ); + const abortControllerRef = useRef(null); + + // Create the attachment adapter for file processing + const attachmentAdapter = useMemo(() => createAttachmentAdapter(), []); + + // Extract search_space_id from URL params + const searchSpaceId = useMemo(() => { + const id = params.search_space_id; + const parsed = typeof id === "string" ? Number.parseInt(id, 10) : 0; + return Number.isNaN(parsed) ? 0 : parsed; + }, [params.search_space_id]); + + // Extract chat_id from URL params + const urlChatId = useMemo(() => { + const id = params.chat_id; + let parsed = 0; + if (Array.isArray(id) && id.length > 0) { + parsed = Number.parseInt(id[0], 10); + } else if (typeof id === "string") { + parsed = Number.parseInt(id, 10); + } + return Number.isNaN(parsed) ? 0 : parsed; + }, [params.chat_id]); + + // Initialize thread and load messages + const initializeThread = useCallback(async () => { + setIsInitializing(true); + + try { + if (urlChatId > 0) { + // Thread exists - load messages + setThreadId(urlChatId); + const response = await getThreadMessages(urlChatId); + if (response.messages && response.messages.length > 0) { + const loadedMessages = response.messages.map(convertToThreadMessage); + setMessages(loadedMessages); + + // Extract and restore thinking steps from persisted messages + const restoredThinkingSteps = new Map(); + for (const msg of response.messages) { + if (msg.role === "assistant") { + const steps = extractThinkingSteps(msg.content); + if (steps.length > 0) { + restoredThinkingSteps.set(`msg-${msg.id}`, steps); + } + } + } + if (restoredThinkingSteps.size > 0) { + setMessageThinkingSteps(restoredThinkingSteps); + } + } + } else { + // Create new thread + const newThread = await createThread(searchSpaceId, "New Chat"); + setThreadId(newThread.id); + router.replace(`/dashboard/${searchSpaceId}/new-chat/${newThread.id}`); + } + } catch (error) { + console.error("[NewChatPage] Failed to initialize thread:", error); + // Keep threadId as null - don't use Date.now() as it creates an invalid ID + // that will cause 404 errors on subsequent API calls + setThreadId(null); + toast.error("Failed to initialize chat. Please try again."); + } finally { + setIsInitializing(false); + } + }, [urlChatId, searchSpaceId, router]); + + // Initialize on mount + useEffect(() => { + initializeThread(); + }, [initializeThread]); + + // Cancel ongoing request + const cancelRun = useCallback(async () => { + if (abortControllerRef.current) { + abortControllerRef.current.abort(); + abortControllerRef.current = null; + } + setIsRunning(false); + }, []); + + // Handle new message from user + const onNew = useCallback( + async (message: AppendMessage) => { + if (!threadId) return; + + // Extract user query text from content parts + let userQuery = ""; + for (const part of message.content) { + if (part.type === "text") { + userQuery += part.text; + } + } + + // Extract attachments from message + // AppendMessage.attachments contains the processed attachment objects (from adapter.send()) + const messageAttachments: Array> = []; + if (message.attachments && message.attachments.length > 0) { + for (const att of message.attachments) { + messageAttachments.push(att as unknown as Record); + } + } + + if (!userQuery.trim() && messageAttachments.length === 0) return; + + // Check if podcast is already generating + if (isPodcastGenerating() && looksLikePodcastRequest(userQuery)) { + toast.warning("A podcast is already being generated."); + return; + } + + const token = getBearerToken(); + if (!token) { + toast.error("Not authenticated. Please log in again."); + return; + } + + // Add user message to state + const userMsgId = `msg-user-${Date.now()}`; + const userMessage: ThreadMessageLike = { + id: userMsgId, + role: "user", + content: message.content, + createdAt: new Date(), + }; + setMessages((prev) => [...prev, userMessage]); + + // Persist user message (don't await, fire and forget) + appendMessage(threadId, { + role: "user", + content: message.content, + }).catch((err) => console.error("Failed to persist user message:", err)); + + // Start streaming response + setIsRunning(true); + const controller = new AbortController(); + abortControllerRef.current = controller; + + // Prepare assistant message + const assistantMsgId = `msg-assistant-${Date.now()}`; + const currentThinkingSteps = new Map(); + + // Ordered content parts to preserve inline tool call positions + // Each part is either a text segment or a tool call + type ContentPart = + | { type: "text"; text: string } + | { + type: "tool-call"; + toolCallId: string; + toolName: string; + args: Record; + result?: unknown; + }; + const contentParts: ContentPart[] = []; + + // Track the current text segment index (for appending text deltas) + let currentTextPartIndex = -1; + + // Map to track tool call indices for updating results + const toolCallIndices = new Map(); + + // Helper to get or create the current text part for appending text + const appendText = (delta: string) => { + if (currentTextPartIndex >= 0 && contentParts[currentTextPartIndex]?.type === "text") { + // Append to existing text part + (contentParts[currentTextPartIndex] as { type: "text"; text: string }).text += delta; + } else { + // Create new text part + contentParts.push({ type: "text", text: delta }); + currentTextPartIndex = contentParts.length - 1; + } + }; + + // Helper to add a tool call (this "breaks" the current text segment) + const addToolCall = (toolCallId: string, toolName: string, args: Record) => { + if (TOOLS_WITH_UI.has(toolName)) { + contentParts.push({ + type: "tool-call", + toolCallId, + toolName, + args, + }); + toolCallIndices.set(toolCallId, contentParts.length - 1); + // Reset text part index so next text creates a new segment + currentTextPartIndex = -1; + } + }; + + // Helper to update a tool call's args or result + const updateToolCall = ( + toolCallId: string, + update: { args?: Record; result?: unknown } + ) => { + const index = toolCallIndices.get(toolCallId); + if (index !== undefined && contentParts[index]?.type === "tool-call") { + const tc = contentParts[index] as ContentPart & { type: "tool-call" }; + if (update.args) tc.args = update.args; + if (update.result !== undefined) tc.result = update.result; + } + }; + + // Helper to build content for UI (without thinking-steps to avoid assistant-ui errors) + const buildContentForUI = (): ThreadMessageLike["content"] => { + // Filter to only include text parts with content and tool-calls with UI + const filtered = contentParts.filter((part) => { + if (part.type === "text") return part.text.length > 0; + if (part.type === "tool-call") return TOOLS_WITH_UI.has(part.toolName); + return false; + }); + return filtered.length > 0 + ? (filtered as ThreadMessageLike["content"]) + : [{ type: "text", text: "" }]; + }; + + // Helper to build content for persistence (includes thinking-steps for restoration) + const buildContentForPersistence = (): unknown[] => { + const parts: unknown[] = []; + + // Include thinking steps for persistence + if (currentThinkingSteps.size > 0) { + parts.push({ + type: "thinking-steps", + steps: Array.from(currentThinkingSteps.values()), + }); + } + + // Add content parts (filtered) + for (const part of contentParts) { + if (part.type === "text" && part.text.length > 0) { + parts.push(part); + } else if (part.type === "tool-call" && TOOLS_WITH_UI.has(part.toolName)) { + parts.push(part); + } + } + + return parts.length > 0 ? parts : [{ type: "text", text: "" }]; + }; + + // Add placeholder assistant message + setMessages((prev) => [ + ...prev, + { + id: assistantMsgId, + role: "assistant", + content: [{ type: "text", text: "" }], + createdAt: new Date(), + }, + ]); + + try { + const backendUrl = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || "http://localhost:8000"; + + // Build message history for context + const messageHistory = messages + .filter((m) => m.role === "user" || m.role === "assistant") + .map((m) => { + let text = ""; + for (const part of m.content) { + if (typeof part === "object" && part.type === "text" && "text" in part) { + text += part.text; + } + } + return { role: m.role, content: text }; + }) + .filter((m) => m.content.length > 0); + + // Extract attachment content to send with the request + const attachments = extractAttachmentContent(messageAttachments); + + const response = await fetch(`${backendUrl}/api/v1/new_chat`, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${token}`, + }, + body: JSON.stringify({ + chat_id: threadId, + user_query: userQuery.trim(), + search_space_id: searchSpaceId, + messages: messageHistory, + attachments: attachments.length > 0 ? attachments : undefined, + }), + signal: controller.signal, + }); + + if (!response.ok) { + throw new Error(`Backend error: ${response.status}`); + } + + if (!response.body) { + throw new Error("No response body"); + } + + // Parse SSE stream + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ""; + + try { + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + buffer += decoder.decode(value, { stream: true }); + const events = buffer.split(/\r?\n\r?\n/); + buffer = events.pop() || ""; + + for (const event of events) { + const lines = event.split(/\r?\n/); + for (const line of lines) { + if (!line.startsWith("data: ")) continue; + const data = line.slice(6).trim(); + if (!data || data === "[DONE]") continue; + + try { + const parsed = JSON.parse(data); + + switch (parsed.type) { + case "text-delta": + appendText(parsed.delta); + setMessages((prev) => + prev.map((m) => + m.id === assistantMsgId ? { ...m, content: buildContentForUI() } : m + ) + ); + break; + + case "tool-input-start": + // Add tool call inline - this breaks the current text segment + addToolCall(parsed.toolCallId, parsed.toolName, {}); + setMessages((prev) => + prev.map((m) => + m.id === assistantMsgId ? { ...m, content: buildContentForUI() } : m + ) + ); + break; + + case "tool-input-available": { + // Update existing tool call's args, or add if not exists + if (toolCallIndices.has(parsed.toolCallId)) { + updateToolCall(parsed.toolCallId, { args: parsed.input || {} }); + } else { + addToolCall(parsed.toolCallId, parsed.toolName, parsed.input || {}); + } + setMessages((prev) => + prev.map((m) => + m.id === assistantMsgId ? { ...m, content: buildContentForUI() } : m + ) + ); + break; + } + + case "tool-output-available": { + // Update the tool call with its result + updateToolCall(parsed.toolCallId, { result: parsed.output }); + // Handle podcast-specific logic + if (parsed.output?.status === "processing" && parsed.output?.task_id) { + // Check if this is a podcast tool by looking at the content part + const idx = toolCallIndices.get(parsed.toolCallId); + if (idx !== undefined) { + const part = contentParts[idx]; + if (part?.type === "tool-call" && part.toolName === "generate_podcast") { + setActivePodcastTaskId(parsed.output.task_id); + } + } + } + setMessages((prev) => + prev.map((m) => + m.id === assistantMsgId ? { ...m, content: buildContentForUI() } : m + ) + ); + break; + } + + case "data-thinking-step": { + // Handle thinking step events for chain-of-thought display + const stepData = parsed.data as ThinkingStepData; + if (stepData?.id) { + currentThinkingSteps.set(stepData.id, stepData); + // Update thinking steps state for rendering + // The ThinkingStepsScrollHandler in Thread component + // will handle auto-scrolling when this state changes + setMessageThinkingSteps((prev) => { + const newMap = new Map(prev); + newMap.set(assistantMsgId, Array.from(currentThinkingSteps.values())); + return newMap; + }); + } + break; + } + + case "error": + throw new Error(parsed.errorText || "Server error"); + } + } catch (e) { + if (e instanceof SyntaxError) continue; + throw e; + } + } + } + } + } finally { + reader.releaseLock(); + } + + // Persist assistant message (with thinking steps for restoration on refresh) + const finalContent = buildContentForPersistence(); + if (contentParts.length > 0) { + appendMessage(threadId, { + role: "assistant", + content: finalContent, + }).catch((err) => console.error("Failed to persist assistant message:", err)); + } + } catch (error) { + if (error instanceof Error && error.name === "AbortError") { + // Request was cancelled + return; + } + console.error("[NewChatPage] Chat error:", error); + toast.error("Failed to get response. Please try again."); + // Update assistant message with error + setMessages((prev) => + prev.map((m) => + m.id === assistantMsgId + ? { + ...m, + content: [ + { + type: "text", + text: "Sorry, there was an error. Please try again.", + }, + ], + } + : m + ) + ); + } finally { + setIsRunning(false); + abortControllerRef.current = null; + // Note: We no longer clear thinking steps - they persist with the message + } + }, + [threadId, searchSpaceId, messages] + ); + + // Convert message (pass through since already in correct format) + const convertMessage = useCallback( + (message: ThreadMessageLike): ThreadMessageLike => message, + [] + ); + + // Handle editing a message - removes messages after the edited one and sends as new + const onEdit = useCallback( + async (message: AppendMessage) => { + // Find the message being edited by looking at the parentId + // The parentId tells us which message's response we're editing + // For now, we'll just treat edits like new messages + // A more sophisticated implementation would truncate the history + await onNew(message); + }, + [onNew] + ); + + // Create external store runtime with attachment support + const runtime = useExternalStoreRuntime({ + messages, + isRunning, + onNew, + onEdit, + convertMessage, + onCancel: cancelRun, + adapters: { + attachments: attachmentAdapter, + }, + }); + + // Show loading state + if (isInitializing) { + return ( +
+
Loading chat...
+
+ ); + } + + // Show error state if thread initialization failed + if (!threadId) { + return ( +
+
Failed to initialize chat
+ +
+ ); + } + + return ( + + + + + +
+ +
+ +
+
+
+ ); +} diff --git a/surfsense_web/app/dashboard/[search_space_id]/onboard/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/onboard/page.tsx index 1df54d8b2..25f189203 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/onboard/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/onboard/page.tsx @@ -1,312 +1,268 @@ "use client"; import { useAtomValue } from "jotai"; -import { FileText, MessageSquare, UserPlus, Users } from "lucide-react"; +import { Loader2 } from "lucide-react"; import { motion } from "motion/react"; import { useParams, useRouter } from "next/navigation"; -import { useTranslations } from "next-intl"; -import { useCallback, useEffect, useMemo, useRef, useState } from "react"; +import { useEffect, useRef, useState } from "react"; import { toast } from "sonner"; -import { updateLLMPreferencesMutationAtom } from "@/atoms/llm-config/llm-config-mutation.atoms"; import { - globalLLMConfigsAtom, - llmConfigsAtom, + createNewLLMConfigMutationAtom, + updateLLMPreferencesMutationAtom, +} from "@/atoms/new-llm-config/new-llm-config-mutation.atoms"; +import { + globalNewLLMConfigsAtom, llmPreferencesAtom, -} from "@/atoms/llm-config/llm-config-query.atoms"; -import { OnboardActionCard } from "@/components/onboard/onboard-action-card"; -import { OnboardAdvancedSettings } from "@/components/onboard/onboard-advanced-settings"; -import { OnboardHeader } from "@/components/onboard/onboard-header"; -import { OnboardLLMSetup } from "@/components/onboard/onboard-llm-setup"; -import { OnboardLoading } from "@/components/onboard/onboard-loading"; -import { OnboardStats } from "@/components/onboard/onboard-stats"; +} from "@/atoms/new-llm-config/new-llm-config-query.atoms"; +import { Logo } from "@/components/Logo"; +import { LLMConfigForm, type LLMConfigFormData } from "@/components/shared/llm-config-form"; +import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; import { getBearerToken, redirectToLogin } from "@/lib/auth-utils"; -const OnboardPage = () => { - const t = useTranslations("onboard"); +export default function OnboardPage() { const router = useRouter(); const params = useParams(); const searchSpaceId = Number(params.search_space_id); + // Queries const { - data: llmConfigs = [], - isFetching: configsLoading, - refetch: refreshConfigs, - } = useAtomValue(llmConfigsAtom); - const { data: globalConfigs = [], isFetching: globalConfigsLoading } = - useAtomValue(globalLLMConfigsAtom); - const { - data: preferences = {}, - isFetching: preferencesLoading, - refetch: refreshPreferences, - } = useAtomValue(llmPreferencesAtom); - const { mutateAsync: updatePreferences } = useAtomValue(updateLLMPreferencesMutationAtom); + data: globalConfigs = [], + isFetching: globalConfigsLoading, + isSuccess: globalConfigsLoaded, + } = useAtomValue(globalNewLLMConfigsAtom); + const { data: preferences = {}, isFetching: preferencesLoading } = + useAtomValue(llmPreferencesAtom); - // Compute isOnboardingComplete - const isOnboardingComplete = useMemo(() => { - return !!( - preferences.long_context_llm_id && - preferences.fast_llm_id && - preferences.strategic_llm_id - ); - }, [preferences]); + // Mutations + const { mutateAsync: createConfig, isPending: isCreating } = useAtomValue( + createNewLLMConfigMutationAtom + ); + const { mutateAsync: updatePreferences, isPending: isUpdatingPreferences } = useAtomValue( + updateLLMPreferencesMutationAtom + ); + // State const [isAutoConfiguring, setIsAutoConfiguring] = useState(false); - const [autoConfigComplete, setAutoConfigComplete] = useState(false); - const [showAdvancedSettings, setShowAdvancedSettings] = useState(false); - const [showPromptSettings, setShowPromptSettings] = useState(false); - - const handleRefreshPreferences = useCallback(async () => { - await refreshPreferences(); - }, []); - - // Track if we've already attempted auto-configuration const hasAttemptedAutoConfig = useRef(false); - // Track if onboarding was complete on initial mount - const wasCompleteOnMount = useRef(null); - const hasCheckedInitialState = useRef(false); - - // Check if user is authenticated + // Check authentication useEffect(() => { const token = getBearerToken(); if (!token) { - // Save current path and redirect to login redirectToLogin(); - return; } }, []); - // Capture onboarding state on first load + // Check if onboarding is already complete + const isOnboardingComplete = preferences.agent_llm_id && preferences.document_summary_llm_id; + + // If onboarding is already complete, redirect immediately useEffect(() => { - if ( - !hasCheckedInitialState.current && - !preferencesLoading && - !configsLoading && - !globalConfigsLoading - ) { - wasCompleteOnMount.current = isOnboardingComplete; - hasCheckedInitialState.current = true; + if (!preferencesLoading && isOnboardingComplete) { + router.push(`/dashboard/${searchSpaceId}/new-chat`); } - }, [preferencesLoading, configsLoading, globalConfigsLoading, isOnboardingComplete]); + }, [preferencesLoading, isOnboardingComplete, router, searchSpaceId]); - // Redirect to dashboard if onboarding was already complete + // Auto-configure if global configs are available useEffect(() => { - if ( - wasCompleteOnMount.current === true && - !preferencesLoading && - !configsLoading && - !globalConfigsLoading - ) { - const timer = setTimeout(() => { - router.push(`/dashboard/${searchSpaceId}`); - }, 300); - return () => clearTimeout(timer); - } - }, [preferencesLoading, configsLoading, globalConfigsLoading, router, searchSpaceId]); + const autoConfigureWithGlobal = async () => { + if (hasAttemptedAutoConfig.current) return; + if (globalConfigsLoading || preferencesLoading) return; + if (!globalConfigsLoaded) return; + if (isOnboardingComplete) return; - // Auto-configure LLM roles if global configs are available - const autoConfigureLLMs = useCallback(async () => { - if (hasAttemptedAutoConfig.current) return; - if (globalConfigs.length === 0) return; - if (isOnboardingComplete) { - setAutoConfigComplete(true); - return; - } + // Only auto-configure if we have global configs + if (globalConfigs.length > 0) { + hasAttemptedAutoConfig.current = true; + setIsAutoConfiguring(true); - hasAttemptedAutoConfig.current = true; - setIsAutoConfiguring(true); + try { + const firstGlobalConfig = globalConfigs[0]; - try { - const allConfigs = [...globalConfigs, ...llmConfigs]; + await updatePreferences({ + search_space_id: searchSpaceId, + data: { + agent_llm_id: firstGlobalConfig.id, + document_summary_llm_id: firstGlobalConfig.id, + }, + }); - if (allConfigs.length === 0) { - setIsAutoConfiguring(false); - return; + toast.success("AI configured automatically!", { + description: `Using ${firstGlobalConfig.name}. You can customize this later in Settings.`, + }); + + // Redirect to new-chat + router.push(`/dashboard/${searchSpaceId}/new-chat`); + } catch (error) { + console.error("Auto-configuration failed:", error); + toast.error("Auto-configuration failed. Please add a configuration manually."); + setIsAutoConfiguring(false); + } } + }; - // Use first available config for all roles - const defaultConfigId = allConfigs[0].id; + autoConfigureWithGlobal(); + }, [ + globalConfigs, + globalConfigsLoading, + globalConfigsLoaded, + preferencesLoading, + isOnboardingComplete, + updatePreferences, + searchSpaceId, + router, + ]); - const newPreferences = { - long_context_llm_id: defaultConfigId, - fast_llm_id: defaultConfigId, - strategic_llm_id: defaultConfigId, - }; + // Handle form submission + const handleSubmit = async (formData: LLMConfigFormData) => { + try { + // Create the config + const newConfig = await createConfig(formData); + // Auto-assign to all roles await updatePreferences({ search_space_id: searchSpaceId, - data: newPreferences, + data: { + agent_llm_id: newConfig.id, + document_summary_llm_id: newConfig.id, + }, }); - await refreshPreferences(); - setAutoConfigComplete(true); - toast.success("AI models configured automatically!", { - description: "You can customize these in advanced settings.", + + toast.success("Configuration created!", { + description: "Redirecting to chat...", }); + + // Redirect to new-chat + router.push(`/dashboard/${searchSpaceId}/new-chat`); } catch (error) { - console.error("Auto-configuration failed:", error); - } finally { - setIsAutoConfiguring(false); + console.error("Failed to create config:", error); + if (error instanceof Error) { + toast.error(error.message || "Failed to create configuration"); + } } - }, [globalConfigs, llmConfigs, isOnboardingComplete, updatePreferences, refreshPreferences]); + }; - // Trigger auto-configuration once data is loaded - useEffect(() => { - if (!configsLoading && !globalConfigsLoading && !preferencesLoading) { - autoConfigureLLMs(); - } - }, [configsLoading, globalConfigsLoading, preferencesLoading, autoConfigureLLMs]); - - const allConfigs = [...globalConfigs, ...llmConfigs]; - const isReady = autoConfigComplete || isOnboardingComplete; + const isSubmitting = isCreating || isUpdatingPreferences; // Loading state - if (configsLoading || preferencesLoading || globalConfigsLoading || isAutoConfiguring) { + if (globalConfigsLoading || preferencesLoading || isAutoConfiguring) { return ( - - ); - } - - // Show LLM setup if no configs available OR if roles are not assigned yet - // This forces users to complete role assignment before seeing the final screen - if (allConfigs.length === 0 || !isOnboardingComplete) { - return ( - refreshConfigs()} - onConfigDeleted={() => refreshConfigs()} - onPreferencesUpdated={handleRefreshPreferences} - /> - ); - } - - // Main onboarding view - return ( -
-
+
+
+
+
+ +
+
+
+

+ {isAutoConfiguring ? "Setting up your AI..." : "Loading..."} +

+

+ {isAutoConfiguring + ? "Auto-configuring with available settings" + : "Please wait while we check your configuration"} +

+
+
+ {[0, 1, 2].map((i) => ( + + ))} +
+ +
+ ); + } + + // If global configs exist but auto-config failed, show simple message + if (globalConfigs.length > 0 && !isAutoConfiguring) { + return null; // Will redirect via useEffect + } + + // No global configs - show the config form + return ( +
+
+ {/* Header */} - +
+ + + - {/* Quick Stats */} - +
+

Configure Your AI

+

+ Add your LLM provider to get started with SurfSense +

+
+
- {/* Action Cards */} + {/* Config Form */} - router.push(`/dashboard/${searchSpaceId}/researcher`)} - colorScheme="violet" - delay={0.9} - /> - - router.push(`/dashboard/${searchSpaceId}/sources/add`)} - colorScheme="blue" - delay={0.8} - /> - - router.push(`/dashboard/${searchSpaceId}/team`)} - colorScheme="emerald" - delay={0.7} - /> + + + LLM Configuration + + + + + - {/* Advanced Settings */} - refreshConfigs()} - onConfigDeleted={() => refreshConfigs()} - onPreferencesUpdated={handleRefreshPreferences} - /> - - {/* Footer */} - -

- You can always adjust these settings later in{" "} - -

-
+ You can add more configurations and customize settings anytime in{" "} + +
); -}; - -export default OnboardPage; +} diff --git a/surfsense_web/app/dashboard/[search_space_id]/podcasts/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/podcasts/page.tsx deleted file mode 100644 index c8c724ee2..000000000 --- a/surfsense_web/app/dashboard/[search_space_id]/podcasts/page.tsx +++ /dev/null @@ -1,24 +0,0 @@ -import { Suspense } from "react"; -import PodcastsPageClient from "./podcasts-client"; - -interface PageProps { - params: { - search_space_id: string; - }; -} - -export default async function PodcastsPage({ params }: PageProps) { - const { search_space_id: searchSpaceId } = await Promise.resolve(params); - - return ( - -
-
- } - > - - - ); -} diff --git a/surfsense_web/app/dashboard/[search_space_id]/podcasts/podcasts-client.tsx b/surfsense_web/app/dashboard/[search_space_id]/podcasts/podcasts-client.tsx deleted file mode 100644 index 730defae8..000000000 --- a/surfsense_web/app/dashboard/[search_space_id]/podcasts/podcasts-client.tsx +++ /dev/null @@ -1,957 +0,0 @@ -"use client"; - -import { format } from "date-fns"; -import { useAtom, useAtomValue } from "jotai"; -import { - Calendar, - MoreHorizontal, - Pause, - Play, - Podcast as PodcastIcon, - Search, - SkipBack, - SkipForward, - Trash2, - Volume2, - VolumeX, - X, -} from "lucide-react"; -import { AnimatePresence, motion, type Variants } from "motion/react"; -import Image from "next/image"; -import { useEffect, useRef, useState } from "react"; -import { toast } from "sonner"; -import { deletePodcastMutationAtom } from "@/atoms/podcasts/podcast-mutation.atoms"; -import { podcastsAtom } from "@/atoms/podcasts/podcast-query.atoms"; -// UI Components -import { Button } from "@/components/ui/button"; -import { Card } from "@/components/ui/card"; -import { - Dialog, - DialogContent, - DialogDescription, - DialogFooter, - DialogHeader, - DialogTitle, -} from "@/components/ui/dialog"; -import { - DropdownMenu, - DropdownMenuContent, - DropdownMenuItem, - DropdownMenuTrigger, -} from "@/components/ui/dropdown-menu"; -import { Input } from "@/components/ui/input"; -import { - Select, - SelectContent, - SelectGroup, - SelectItem, - SelectTrigger, - SelectValue, -} from "@/components/ui/select"; -import { Slider } from "@/components/ui/slider"; -import type { Podcast } from "@/contracts/types/podcast.types"; -import { podcastsApiService } from "@/lib/apis/podcasts-api.service"; - -interface PodcastsPageClientProps { - searchSpaceId: string; -} - -const pageVariants: Variants = { - initial: { opacity: 0 }, - enter: { - opacity: 1, - transition: { duration: 0.4, ease: "easeInOut", staggerChildren: 0.1 }, - }, - exit: { opacity: 0, transition: { duration: 0.3, ease: "easeInOut" } }, -}; - -const podcastCardVariants: Variants = { - initial: { scale: 0.95, y: 20, opacity: 0 }, - animate: { - scale: 1, - y: 0, - opacity: 1, - transition: { type: "spring", stiffness: 300, damping: 25 }, - }, - exit: { scale: 0.95, y: -20, opacity: 0 }, - hover: { y: -5, scale: 1.02, transition: { duration: 0.2 } }, -}; - -const MotionCard = motion(Card); - -export default function PodcastsPageClient({ searchSpaceId }: PodcastsPageClientProps) { - const [filteredPodcasts, setFilteredPodcasts] = useState([]); - const [searchQuery, setSearchQuery] = useState(""); - const [sortOrder, setSortOrder] = useState("newest"); - const [deleteDialogOpen, setDeleteDialogOpen] = useState(false); - const [podcastToDelete, setPodcastToDelete] = useState<{ - id: number; - title: string; - } | null>(null); - - // Audio player state - const [currentPodcast, setCurrentPodcast] = useState(null); - const [audioSrc, setAudioSrc] = useState(undefined); - const [isAudioLoading, setIsAudioLoading] = useState(false); - const [isPlaying, setIsPlaying] = useState(false); - const [currentTime, setCurrentTime] = useState(0); - const [duration, setDuration] = useState(0); - const [volume, setVolume] = useState(0.7); - const [isMuted, setIsMuted] = useState(false); - const audioRef = useRef(null); - const currentObjectUrlRef = useRef(null); - const [{ isPending: isDeletingPodcast, mutateAsync: deletePodcast, error: deleteError }] = - useAtom(deletePodcastMutationAtom); - const { - data: podcasts, - isLoading: isFetchingPodcasts, - error: fetchError, - } = useAtomValue(podcastsAtom); - - // Add podcast image URL constant - const PODCAST_IMAGE_URL = - "https://static.vecteezy.com/system/resources/thumbnails/002/157/611/small_2x/illustrations-concept-design-podcast-channel-free-vector.jpg"; - - useEffect(() => { - if (isFetchingPodcasts) return; - - if (fetchError) { - console.error("Error fetching podcasts:", fetchError); - setFilteredPodcasts([]); - return; - } - - if (!podcasts) { - setFilteredPodcasts([]); - return; - } - - setFilteredPodcasts(podcasts); - }, []); - - // Filter and sort podcasts based on search query and sort order - useEffect(() => { - if (!podcasts) return; - - let result = [...podcasts]; - - // Filter by search term - if (searchQuery) { - const query = searchQuery.toLowerCase(); - result = result.filter((podcast) => podcast.title.toLowerCase().includes(query)); - } - - // Filter by search space - result = result.filter((podcast) => podcast.search_space_id === parseInt(searchSpaceId)); - - // Sort podcasts - result.sort((a, b) => { - const dateA = new Date(a.created_at).getTime(); - const dateB = new Date(b.created_at).getTime(); - - return sortOrder === "newest" ? dateB - dateA : dateA - dateB; - }); - - setFilteredPodcasts(result); - }, [podcasts, searchQuery, sortOrder, searchSpaceId]); - - // Cleanup object URL on unmount or when currentPodcast changes - useEffect(() => { - return () => { - if (currentObjectUrlRef.current) { - URL.revokeObjectURL(currentObjectUrlRef.current); - currentObjectUrlRef.current = null; - } - }; - }, []); - - // Audio player time update handler - const handleTimeUpdate = () => { - if (audioRef.current) { - setCurrentTime(audioRef.current.currentTime); - } - }; - - // Audio player metadata loaded handler - const handleMetadataLoaded = () => { - if (audioRef.current) { - setDuration(audioRef.current.duration); - } - }; - - // Play/pause toggle - const togglePlayPause = () => { - if (audioRef.current) { - if (isPlaying) { - audioRef.current.pause(); - } else { - audioRef.current.play(); - } - setIsPlaying(!isPlaying); - } - }; - - // To close player - const closePlayer = () => { - if (isPlaying) { - audioRef.current?.pause(); - } - setIsPlaying(false); - setAudioSrc(undefined); - setCurrentTime(0); - setCurrentPodcast(null); - }; - - // Seek to position - const handleSeek = (value: number[]) => { - if (audioRef.current) { - audioRef.current.currentTime = value[0]; - setCurrentTime(value[0]); - } - }; - - // Volume change - const handleVolumeChange = (value: number[]) => { - if (audioRef.current) { - const newVolume = value[0]; - - // Set volume - audioRef.current.volume = newVolume; - setVolume(newVolume); - - // Handle mute state based on volume - if (newVolume === 0) { - audioRef.current.muted = true; - setIsMuted(true); - } else { - audioRef.current.muted = false; - setIsMuted(false); - } - } - }; - - // Toggle mute - const toggleMute = () => { - if (audioRef.current) { - const newMutedState = !isMuted; - audioRef.current.muted = newMutedState; - setIsMuted(newMutedState); - - // If unmuting, restore previous volume if it was 0 - if (!newMutedState && volume === 0) { - const restoredVolume = 0.5; - audioRef.current.volume = restoredVolume; - setVolume(restoredVolume); - } - } - }; - - // Skip forward 10 seconds - const skipForward = () => { - if (audioRef.current) { - audioRef.current.currentTime = Math.min( - audioRef.current.duration, - audioRef.current.currentTime + 10 - ); - } - }; - - // Skip backward 10 seconds - const skipBackward = () => { - if (audioRef.current) { - audioRef.current.currentTime = Math.max(0, audioRef.current.currentTime - 10); - } - }; - - // Format time in MM:SS - const formatTime = (time: number) => { - const minutes = Math.floor(time / 60); - const seconds = Math.floor(time % 60); - return `${minutes}:${seconds < 10 ? "0" : ""}${seconds}`; - }; - - // Play podcast - Fetch blob and set object URL - const playPodcast = async (podcast: Podcast) => { - // If the same podcast is selected, just toggle play/pause - if (currentPodcast && currentPodcast.id === podcast.id) { - togglePlayPause(); - return; - } - - // Prevent multiple simultaneous loading requests - if (isAudioLoading) { - return; - } - - try { - // Reset player state and show loading - setCurrentPodcast(podcast); - setAudioSrc(undefined); - setCurrentTime(0); - setDuration(0); - setIsPlaying(false); - setIsAudioLoading(true); - - // Revoke previous object URL if exists (only after we've started the new request) - if (currentObjectUrlRef.current) { - URL.revokeObjectURL(currentObjectUrlRef.current); - currentObjectUrlRef.current = null; - } - - // Use AbortController to handle timeout or cancellation - const controller = new AbortController(); - const timeoutId = setTimeout(() => controller.abort(), 30000); // 30 second timeout - - try { - const response = await podcastsApiService.loadPodcast({ - request: { id: podcast.id }, - controller, - }); - const objectUrl = URL.createObjectURL(response); - currentObjectUrlRef.current = objectUrl; - - // Set audio source - setAudioSrc(objectUrl); - - // Wait for the audio to be ready before playing - // We'll handle actual playback in the onLoadedData event instead of here - } catch (error) { - if (error instanceof DOMException && error.name === "AbortError") { - throw new Error("Request timed out. Please try again."); - } - throw error; - } finally { - clearTimeout(timeoutId); - } - } catch (error) { - console.error("Error fetching or playing podcast:", error); - toast.error(error instanceof Error ? error.message : "Failed to load podcast audio."); - // Reset state on error - setCurrentPodcast(null); - setAudioSrc(undefined); - } finally { - setIsAudioLoading(false); - } - }; - - // Function to handle podcast deletion - const handleDeletePodcast = async () => { - if (!podcastToDelete) return; - - try { - await deletePodcast({ id: podcastToDelete.id }); - - // Close dialog - setDeleteDialogOpen(false); - setPodcastToDelete(null); - - // If the current playing podcast is deleted, stop playback - if (currentPodcast && currentPodcast.id === podcastToDelete.id) { - if (audioRef.current) { - audioRef.current.pause(); - } - setCurrentPodcast(null); - setIsPlaying(false); - } - } catch (error) { - console.error("Error deleting podcast:", error); - toast.error(error instanceof Error ? error.message : "Failed to delete podcast"); - } - }; - - return ( - -
-
-

Podcasts

-

Listen to generated podcasts.

-
- - {/* Filter and Search Bar */} -
-
-
- - setSearchQuery(e.target.value)} - /> -
-
- -
- -
-
- - {/* Status Messages */} - {isFetchingPodcasts && ( -
-
-
-

Loading podcasts...

-
-
- )} - - {fetchError && !isFetchingPodcasts && ( -
-

Error loading podcasts

-

{fetchError.message ?? "Failed to load podcasts"}

-
- )} - - {!isFetchingPodcasts && !fetchError && filteredPodcasts.length === 0 && ( -
- -

No podcasts found

-

- {searchQuery - ? "Try adjusting your search filters" - : "Generate podcasts from your chats to get started"} -

-
- )} - - {/* Podcast Grid */} - {!isFetchingPodcasts && !fetchError && filteredPodcasts.length > 0 && ( - - - {filteredPodcasts.map((podcast, index) => ( - playPodcast(podcast)} - > -
- {/* Podcast image with gradient overlay */} - Podcast illustration - - {/* Better overlay with gradient for improved text legibility */} -
- - {/* Loading indicator with improved animation */} - {currentPodcast?.id === podcast.id && isAudioLoading && ( - - -
-

Loading podcast...

-
-
- )} - - {/* Play button with animations */} - {!(currentPodcast?.id === podcast.id && (isPlaying || isAudioLoading)) && ( - - - - )} - - {/* Pause button with animations */} - {currentPodcast?.id === podcast.id && isPlaying && !isAudioLoading && ( - - - - )} - - {/* Now playing indicator */} - {currentPodcast?.id === podcast.id && !isAudioLoading && ( -
- - - - - Now Playing -
- )} -
- -
-

- {podcast.title || "Untitled Podcast"} -

-

- - {format(new Date(podcast.created_at), "MMM d, yyyy")} -

-
- - {currentPodcast?.id === podcast.id && !isAudioLoading && ( - - -
- {formatTime(currentTime)} - {formatTime(duration)} -
-
- )} - - {currentPodcast?.id === podcast.id && !isAudioLoading && ( - - - - - - - - - - - - )} - -
- - - - - - { - e.stopPropagation(); - setPodcastToDelete({ - id: podcast.id, - title: podcast.title, - }); - setDeleteDialogOpen(true); - }} - > - - Delete Podcast - - - -
-
- ))} -
-
- )} - - {/* Current Podcast Player (Fixed at bottom) */} - {currentPodcast && !isAudioLoading && audioSrc && ( - -
-
-
- - - -
- -
-

{currentPodcast.title}

- -
-
- - -
-
- {formatTime(currentTime)} / {formatTime(duration)} -
-
-
- -
- - - - - - - - - - - - -
- - - - -
- - -
-
- - - - -
-
-
-
- )} -
- - {/* Delete Confirmation Dialog */} - - - - - - Delete Podcast - - - Are you sure you want to delete{" "} - {podcastToDelete?.title}? This action cannot be - undone. - - - - - - - - - - {/* Hidden audio element for playback */} - -
- ); -} diff --git a/surfsense_web/app/dashboard/[search_space_id]/researcher/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/researcher/[[...chat_id]]/page.tsx deleted file mode 100644 index 60980f034..000000000 --- a/surfsense_web/app/dashboard/[search_space_id]/researcher/[[...chat_id]]/page.tsx +++ /dev/null @@ -1,291 +0,0 @@ -"use client"; - -import { type CreateMessage, type Message, useChat } from "@ai-sdk/react"; -import { useAtom, useAtomValue } from "jotai"; -import { useParams, useRouter } from "next/navigation"; -import { useEffect, useMemo, useRef } from "react"; -import { createChatMutationAtom, updateChatMutationAtom } from "@/atoms/chats/chat-mutation.atoms"; -import { activeChatAtom } from "@/atoms/chats/chat-query.atoms"; -import { activeChatIdAtom } from "@/atoms/chats/ui.atoms"; -import { documentTypeCountsAtom } from "@/atoms/documents/document-query.atoms"; -import ChatInterface from "@/components/chat/ChatInterface"; -import type { Document } from "@/contracts/types/document.types"; -import { useChatState } from "@/hooks/use-chat"; -import { useSearchSourceConnectors } from "@/hooks/use-search-source-connectors"; - -export default function ResearcherPage() { - const { search_space_id } = useParams(); - const router = useRouter(); - const hasSetInitialConnectors = useRef(false); - const hasInitiatedResponse = useRef(null); - const activeChatId = useAtomValue(activeChatIdAtom); - const { data: activeChatState, isFetching: isChatLoading } = useAtomValue(activeChatAtom); - const { mutateAsync: createChat } = useAtomValue(createChatMutationAtom); - const { mutateAsync: updateChat } = useAtomValue(updateChatMutationAtom); - const isNewChat = !activeChatId; - - // Reset the flag when chat ID changes (but not hasInitiatedResponse - we need to remember if we already initiated) - useEffect(() => { - hasSetInitialConnectors.current = false; - }, [activeChatId]); - - const { - token, - researchMode, - selectedConnectors, - setSelectedConnectors, - selectedDocuments, - setSelectedDocuments, - topK, - setTopK, - } = useChatState({ - search_space_id: search_space_id as string, - chat_id: activeChatId ?? undefined, - }); - - // Fetch all available sources (document types + live search connectors) - // Use the documentTypeCountsAtom for fetching document types - const [documentTypeCountsQuery] = useAtom(documentTypeCountsAtom); - const { data: documentTypeCountsData } = documentTypeCountsQuery; - - // Transform the response into the expected format - const documentTypes = useMemo(() => { - if (!documentTypeCountsData) return []; - return Object.entries(documentTypeCountsData).map(([type, count]) => ({ - type, - count, - })); - }, [documentTypeCountsData]); - - const { connectors: searchConnectors } = useSearchSourceConnectors( - false, - Number(search_space_id) - ); - - // Filter for non-indexable connectors (live search) - const liveSearchConnectors = useMemo( - () => searchConnectors.filter((connector) => !connector.is_indexable), - [searchConnectors] - ); - - // Memoize document IDs to prevent infinite re-renders - const documentIds = useMemo(() => { - return selectedDocuments.map((doc) => doc.id); - }, [selectedDocuments]); - - // Memoize connector types to prevent infinite re-renders - const connectorTypes = useMemo(() => { - return selectedConnectors; - }, [selectedConnectors]); - - // Unified localStorage management for chat state - interface ChatState { - selectedDocuments: Document[]; - selectedConnectors: string[]; - researchMode: "QNA"; // Always QNA mode - topK: number; - } - - const getChatStateStorageKey = (searchSpaceId: string, chatId: string) => - `surfsense_chat_state_${searchSpaceId}_${chatId}`; - - const storeChatState = (searchSpaceId: string, chatId: string, state: ChatState) => { - const key = getChatStateStorageKey(searchSpaceId, chatId); - localStorage.setItem(key, JSON.stringify(state)); - }; - - const restoreChatState = (searchSpaceId: string, chatId: string): ChatState | null => { - const key = getChatStateStorageKey(searchSpaceId, chatId); - const stored = localStorage.getItem(key); - if (stored) { - localStorage.removeItem(key); // Clean up after restoration - try { - return JSON.parse(stored); - } catch (error) { - console.error("Error parsing stored chat state:", error); - return null; - } - } - return null; - }; - - const handler = useChat({ - api: `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/chat`, - streamProtocol: "data", - initialMessages: [], - headers: { - ...(token && { Authorization: `Bearer ${token}` }), - }, - body: { - data: { - search_space_id: search_space_id, - selected_connectors: connectorTypes, - research_mode: researchMode, - document_ids_to_add_in_context: documentIds, - top_k: topK, - }, - }, - onError: (error) => { - console.error("Chat error:", error); - }, - }); - - const customHandlerAppend = async ( - message: Message | CreateMessage, - chatRequestOptions?: { data?: any } - ) => { - // Use the first message content as the chat title (truncated to 100 chars) - const messageContent = typeof message.content === "string" ? message.content : ""; - const chatTitle = messageContent.slice(0, 100) || "Untitled Chat"; - - const newChat = await createChat({ - type: researchMode, - title: chatTitle, - initial_connectors: selectedConnectors, - messages: [ - { - role: "user", - content: message.content, - }, - ], - search_space_id: Number(search_space_id), - }); - if (newChat) { - // Store chat state before navigation - storeChatState(search_space_id as string, String(newChat.id), { - selectedDocuments, - selectedConnectors, - researchMode, - topK, - }); - router.replace(`/dashboard/${search_space_id}/researcher/${newChat.id}`); - } - return String(newChat.id); - }; - - useEffect(() => { - if (token && !isNewChat && activeChatId) { - const chatData = activeChatState?.chatDetails; - if (!chatData) return; - - // Update configuration from chat data - // researchMode is always "QNA", no need to set from chat data - - if (chatData.initial_connectors && Array.isArray(chatData.initial_connectors)) { - setSelectedConnectors(chatData.initial_connectors); - } - - // Load existing messages - if (chatData.messages && Array.isArray(chatData.messages)) { - if (chatData.messages.length === 1 && chatData.messages[0].role === "user") { - // Single user message - append to trigger LLM response - // Only if we haven't already initiated for this chat and handler doesn't have messages yet - if (hasInitiatedResponse.current !== activeChatId && handler.messages.length === 0) { - hasInitiatedResponse.current = activeChatId; - handler.append({ - role: "user", - content: chatData.messages[0].content, - }); - } - } else if (chatData.messages.length > 1) { - // Multiple messages - set them all - handler.setMessages(chatData.messages); - } - } - } - }, [token, isNewChat, activeChatId, isChatLoading]); - - // Restore chat state from localStorage on page load - useEffect(() => { - if (activeChatId && search_space_id) { - const restoredState = restoreChatState(search_space_id as string, activeChatId); - if (restoredState) { - setSelectedDocuments(restoredState.selectedDocuments); - setSelectedConnectors(restoredState.selectedConnectors); - setTopK(restoredState.topK); - // researchMode is always "QNA", no need to restore - } - } - }, [ - activeChatId, - isChatLoading, - search_space_id, - setSelectedDocuments, - setSelectedConnectors, - setTopK, - ]); - - // Set all sources as default for new chats (only once on initial mount) - useEffect(() => { - if ( - isNewChat && - !hasSetInitialConnectors.current && - selectedConnectors.length === 0 && - documentTypes.length > 0 - ) { - // Combine all document types and live search connectors - const allSourceTypes = [ - ...documentTypes.map((dt) => dt.type), - ...liveSearchConnectors.map((c) => c.connector_type), - ]; - - if (allSourceTypes.length > 0) { - setSelectedConnectors(allSourceTypes); - hasSetInitialConnectors.current = true; - } - } - }, [ - isNewChat, - documentTypes, - liveSearchConnectors, - selectedConnectors.length, - setSelectedConnectors, - ]); - - // Auto-update chat when messages change (only for existing chats) - useEffect(() => { - if ( - !isNewChat && - activeChatId && - handler.status === "ready" && - handler.messages.length > 0 && - handler.messages[handler.messages.length - 1]?.role === "assistant" - ) { - const userMessages = handler.messages.filter((msg) => msg.role === "user"); - if (userMessages.length === 0) return; - const title = userMessages[0].content; - - updateChat({ - type: researchMode, - title: title, - initial_connectors: selectedConnectors, - messages: handler.messages, - search_space_id: Number(search_space_id), - id: Number(activeChatId), - }); - } - }, [handler.messages, handler.status, activeChatId, isNewChat, isChatLoading]); - - if (isChatLoading) { - return ( -
-
Loading...
-
- ); - } - - return ( - - ); -} diff --git a/surfsense_web/app/dashboard/[search_space_id]/settings/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/settings/page.tsx index dd68e1a18..ad96402a4 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/settings/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/settings/page.tsx @@ -30,20 +30,20 @@ interface SettingsNavItem { const settingsNavItems: SettingsNavItem[] = [ { id: "models", - label: "Model Configs", - description: "Configure AI models and providers", + label: "Agent Configs", + description: "LLM models with prompts & citations", icon: Bot, }, { id: "roles", - label: "LLM Roles", - description: "Manage language model roles", + label: "Role Assignments", + description: "Assign configs to agent roles", icon: Brain, }, { id: "prompts", label: "System Instructions", - description: "Customize system prompts", + description: "SearchSpace-wide AI instructions", icon: MessageSquare, }, ]; @@ -236,9 +236,6 @@ function SettingsContent({

{activeItem?.label}

-

- {activeItem?.description} -

@@ -275,7 +272,7 @@ export default function SettingsPage() { const [isSidebarOpen, setIsSidebarOpen] = useState(false); const handleBackToApp = useCallback(() => { - router.push(`/dashboard/${searchSpaceId}/researcher`); + router.push(`/dashboard/${searchSpaceId}/new-chat`); }, [router, searchSpaceId]); return ( diff --git a/surfsense_web/app/dashboard/[search_space_id]/team/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/team/page.tsx index 49e1de2ab..ed67fa1f5 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/team/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/team/page.tsx @@ -807,7 +807,6 @@ function RolesTab({ { // TODO: Implement edit role dialog/modal - console.log("Edit role not yet implemented", role); }} > diff --git a/surfsense_web/app/dashboard/page.tsx b/surfsense_web/app/dashboard/page.tsx index dbf5b7155..f133d0c9c 100644 --- a/surfsense_web/app/dashboard/page.tsx +++ b/surfsense_web/app/dashboard/page.tsx @@ -244,7 +244,7 @@ const DashboardPage = () => { />
- + {space.name} {
diff --git a/surfsense_web/app/globals.css b/surfsense_web/app/globals.css index 5aee982bb..4f4ab6de1 100644 --- a/surfsense_web/app/globals.css +++ b/surfsense_web/app/globals.css @@ -158,3 +158,4 @@ button { } @source '../node_modules/@llamaindex/chat-ui/**/*.{ts,tsx}'; +@source '../node_modules/streamdown/dist/*.js'; diff --git a/surfsense_web/app/layout.tsx b/surfsense_web/app/layout.tsx index 54086194b..871293875 100644 --- a/surfsense_web/app/layout.tsx +++ b/surfsense_web/app/layout.tsx @@ -1,7 +1,7 @@ import type { Metadata } from "next"; import "./globals.css"; import { GoogleAnalytics } from "@next/third-parties/google"; -import { RootProvider } from "fumadocs-ui/provider"; +import { RootProvider } from "fumadocs-ui/provider/next"; import { Roboto } from "next/font/google"; import { I18nProvider } from "@/components/providers/I18nProvider"; import { ThemeProvider } from "@/components/theme/theme-provider"; diff --git a/surfsense_web/atoms/chats/chat-mutation.atoms.ts b/surfsense_web/atoms/chats/chat-mutation.atoms.ts deleted file mode 100644 index c761a706f..000000000 --- a/surfsense_web/atoms/chats/chat-mutation.atoms.ts +++ /dev/null @@ -1,93 +0,0 @@ -import { atomWithMutation } from "jotai-tanstack-query"; -import { toast } from "sonner"; -import type { - ChatSummary, - CreateChatRequest, - DeleteChatRequest, - UpdateChatRequest, -} from "@/contracts/types/chat.types"; -import { chatsApiService } from "@/lib/apis/chats-api.service"; -import { getBearerToken } from "@/lib/auth-utils"; -import { cacheKeys } from "@/lib/query-client/cache-keys"; -import { queryClient } from "@/lib/query-client/client"; -import { activeSearchSpaceIdAtom } from "../search-spaces/search-space-query.atoms"; -import { globalChatsQueryParamsAtom } from "./ui.atoms"; - -export const deleteChatMutationAtom = atomWithMutation((get) => { - const searchSpaceId = get(activeSearchSpaceIdAtom); - const authToken = getBearerToken(); - const chatsQueryParams = get(globalChatsQueryParamsAtom); - - return { - mutationKey: cacheKeys.chats.globalQueryParams(chatsQueryParams), - enabled: !!searchSpaceId && !!authToken, - mutationFn: async (request: DeleteChatRequest) => { - return chatsApiService.deleteChat(request); - }, - - onSuccess: (_, request: DeleteChatRequest) => { - toast.success("Chat deleted successfully"); - // Optimistically update the current query - queryClient.setQueryData( - cacheKeys.chats.globalQueryParams(chatsQueryParams), - (oldData: ChatSummary[]) => { - return oldData?.filter((chat) => chat.id !== request.id) ?? []; - } - ); - // Invalidate all chat queries to ensure consistency across components - queryClient.invalidateQueries({ - queryKey: ["chats"], - }); - // Also invalidate the "all-chats" query used by AllChatsSidebar - queryClient.invalidateQueries({ - queryKey: ["all-chats"], - }); - }, - }; -}); - -export const createChatMutationAtom = atomWithMutation((get) => { - const searchSpaceId = get(activeSearchSpaceIdAtom); - const authToken = getBearerToken(); - const chatsQueryParams = get(globalChatsQueryParamsAtom); - - return { - mutationKey: cacheKeys.chats.globalQueryParams(chatsQueryParams), - enabled: !!searchSpaceId && !!authToken, - mutationFn: async (request: CreateChatRequest) => { - return chatsApiService.createChat(request); - }, - - onSuccess: () => { - // Invalidate ALL chat queries to ensure sidebar and other components refresh - // Using a partial key match to avoid stale closure issues with specific query params - queryClient.invalidateQueries({ - queryKey: ["chats"], - }); - // Also invalidate the "all-chats" query used by AllChatsSidebar - queryClient.invalidateQueries({ - queryKey: ["all-chats"], - }); - }, - }; -}); - -export const updateChatMutationAtom = atomWithMutation((get) => { - const searchSpaceId = get(activeSearchSpaceIdAtom); - const authToken = getBearerToken(); - const chatsQueryParams = get(globalChatsQueryParamsAtom); - - return { - mutationKey: cacheKeys.chats.globalQueryParams(chatsQueryParams), - enabled: !!searchSpaceId && !!authToken, - mutationFn: async (request: UpdateChatRequest) => { - return chatsApiService.updateChat(request); - }, - - onSuccess: () => { - queryClient.invalidateQueries({ - queryKey: cacheKeys.chats.globalQueryParams(chatsQueryParams), - }); - }, - }; -}); diff --git a/surfsense_web/atoms/chats/chat-query.atoms.ts b/surfsense_web/atoms/chats/chat-query.atoms.ts deleted file mode 100644 index 5a1242ded..000000000 --- a/surfsense_web/atoms/chats/chat-query.atoms.ts +++ /dev/null @@ -1,48 +0,0 @@ -import { atomWithQuery } from "jotai-tanstack-query"; -import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms"; -import { chatsApiService } from "@/lib/apis/chats-api.service"; -import { podcastsApiService } from "@/lib/apis/podcasts-api.service"; -import { getBearerToken } from "@/lib/auth-utils"; -import { cacheKeys } from "@/lib/query-client/cache-keys"; -import { activeChatIdAtom, globalChatsQueryParamsAtom } from "./ui.atoms"; - -export const activeChatAtom = atomWithQuery((get) => { - const activeChatId = get(activeChatIdAtom); - const authToken = getBearerToken(); - - return { - queryKey: cacheKeys.chats.activeChat(activeChatId ?? ""), - enabled: !!activeChatId && !!authToken, - queryFn: async () => { - if (!authToken) { - throw new Error("No authentication token found"); - } - if (!activeChatId) { - throw new Error("No active chat id found"); - } - - const [podcast, chatDetails] = await Promise.all([ - podcastsApiService.getPodcastByChatId({ chat_id: Number(activeChatId) }), - chatsApiService.getChatDetails({ id: Number(activeChatId) }), - ]); - - return { chatId: activeChatId, chatDetails, podcast }; - }, - }; -}); - -export const chatsAtom = atomWithQuery((get) => { - const searchSpaceId = get(activeSearchSpaceIdAtom); - const authToken = getBearerToken(); - const queryParams = get(globalChatsQueryParamsAtom); - - return { - queryKey: cacheKeys.chats.globalQueryParams(queryParams), - enabled: !!searchSpaceId && !!authToken, - queryFn: async () => { - return chatsApiService.getChats({ - queryParams: queryParams, - }); - }, - }; -}); diff --git a/surfsense_web/atoms/chats/ui.atoms.ts b/surfsense_web/atoms/chats/ui.atoms.ts deleted file mode 100644 index c92365aef..000000000 --- a/surfsense_web/atoms/chats/ui.atoms.ts +++ /dev/null @@ -1,17 +0,0 @@ -import { atom } from "jotai"; -import type { GetChatsRequest } from "@/contracts/types/chat.types"; - -type ActiveChathatUIState = { - isChatPannelOpen: boolean; -}; - -export const activeChathatUIAtom = atom({ - isChatPannelOpen: false, -}); - -export const activeChatIdAtom = atom(null); - -export const globalChatsQueryParamsAtom = atom({ - limit: 5, - skip: 0, -}); diff --git a/surfsense_web/atoms/llm-config/llm-config-mutation.atoms.ts b/surfsense_web/atoms/llm-config/llm-config-mutation.atoms.ts deleted file mode 100644 index f28b1d708..000000000 --- a/surfsense_web/atoms/llm-config/llm-config-mutation.atoms.ts +++ /dev/null @@ -1,110 +0,0 @@ -import { atomWithMutation } from "jotai-tanstack-query"; -import { toast } from "sonner"; -import type { - CreateLLMConfigRequest, - DeleteLLMConfigRequest, - GetLLMConfigsResponse, - UpdateLLMConfigRequest, - UpdateLLMConfigResponse, - UpdateLLMPreferencesRequest, -} from "@/contracts/types/llm-config.types"; -import { llmConfigApiService } from "@/lib/apis/llm-config-api.service"; -import { cacheKeys } from "@/lib/query-client/cache-keys"; -import { queryClient } from "@/lib/query-client/client"; -import { activeSearchSpaceIdAtom } from "../search-spaces/search-space-query.atoms"; - -export const createLLMConfigMutationAtom = atomWithMutation((get) => { - const searchSpaceId = get(activeSearchSpaceIdAtom); - - return { - mutationKey: cacheKeys.llmConfigs.all(searchSpaceId!), - enabled: !!searchSpaceId, - mutationFn: async (request: CreateLLMConfigRequest) => { - return llmConfigApiService.createLLMConfig(request); - }, - - onSuccess: () => { - toast.success("LLM configuration created successfully"); - queryClient.invalidateQueries({ - queryKey: cacheKeys.llmConfigs.all(searchSpaceId!), - }); - queryClient.invalidateQueries({ - queryKey: cacheKeys.llmConfigs.global(), - }); - }, - }; -}); - -export const updateLLMConfigMutationAtom = atomWithMutation((get) => { - const searchSpaceId = get(activeSearchSpaceIdAtom); - - return { - mutationKey: cacheKeys.llmConfigs.all(searchSpaceId!), - enabled: !!searchSpaceId, - mutationFn: async (request: UpdateLLMConfigRequest) => { - return llmConfigApiService.updateLLMConfig(request); - }, - - onSuccess: (_: UpdateLLMConfigResponse, request: UpdateLLMConfigRequest) => { - toast.success("LLM configuration updated successfully"); - queryClient.invalidateQueries({ - queryKey: cacheKeys.llmConfigs.all(searchSpaceId!), - }); - queryClient.invalidateQueries({ - queryKey: cacheKeys.llmConfigs.byId(String(request.id)), - }); - queryClient.invalidateQueries({ - queryKey: cacheKeys.llmConfigs.global(), - }); - }, - }; -}); - -export const deleteLLMConfigMutationAtom = atomWithMutation((get) => { - const searchSpaceId = get(activeSearchSpaceIdAtom); - const authToken = localStorage.getItem("surfsense_bearer_token"); - - return { - mutationKey: cacheKeys.llmConfigs.all(searchSpaceId!), - enabled: !!searchSpaceId && !!authToken, - mutationFn: async (request: DeleteLLMConfigRequest) => { - return llmConfigApiService.deleteLLMConfig(request); - }, - - onSuccess: (_, request: DeleteLLMConfigRequest) => { - toast.success("LLM configuration deleted successfully"); - queryClient.setQueryData( - cacheKeys.llmConfigs.all(searchSpaceId!), - (oldData: GetLLMConfigsResponse | undefined) => { - if (!oldData) return oldData; - return oldData.filter((config) => config.id !== request.id); - } - ); - queryClient.invalidateQueries({ - queryKey: cacheKeys.llmConfigs.byId(String(request.id)), - }); - queryClient.invalidateQueries({ - queryKey: cacheKeys.llmConfigs.global(), - }); - }, - }; -}); - -export const updateLLMPreferencesMutationAtom = atomWithMutation((get) => { - const searchSpaceId = get(activeSearchSpaceIdAtom); - - return { - mutationKey: cacheKeys.llmConfigs.preferences(searchSpaceId!), - enabled: !!searchSpaceId, - mutationFn: async (request: UpdateLLMPreferencesRequest) => { - return llmConfigApiService.updateLLMPreferences(request); - }, - - onSuccess: () => { - toast.success("LLM preferences updated successfully"); - queryClient.invalidateQueries({ - queryKey: cacheKeys.llmConfigs.preferences(searchSpaceId!), - }); - }, - }; -}); diff --git a/surfsense_web/atoms/llm-config/llm-config-query.atoms.ts b/surfsense_web/atoms/llm-config/llm-config-query.atoms.ts deleted file mode 100644 index 22ae63d7f..000000000 --- a/surfsense_web/atoms/llm-config/llm-config-query.atoms.ts +++ /dev/null @@ -1,46 +0,0 @@ -import { atomWithQuery } from "jotai-tanstack-query"; -import { llmConfigApiService } from "@/lib/apis/llm-config-api.service"; -import { cacheKeys } from "@/lib/query-client/cache-keys"; -import { activeSearchSpaceIdAtom } from "../search-spaces/search-space-query.atoms"; - -export const llmConfigsAtom = atomWithQuery((get) => { - const searchSpaceId = get(activeSearchSpaceIdAtom); - - return { - queryKey: cacheKeys.llmConfigs.all(searchSpaceId!), - enabled: !!searchSpaceId, - staleTime: 5 * 60 * 1000, // 5 minutes - queryFn: async () => { - return llmConfigApiService.getLLMConfigs({ - queryParams: { - search_space_id: searchSpaceId!, - }, - }); - }, - }; -}); - -export const globalLLMConfigsAtom = atomWithQuery(() => { - return { - queryKey: cacheKeys.llmConfigs.global(), - staleTime: 10 * 60 * 1000, // 10 minutes - queryFn: async () => { - return llmConfigApiService.getGlobalLLMConfigs(); - }, - }; -}); - -export const llmPreferencesAtom = atomWithQuery((get) => { - const searchSpaceId = get(activeSearchSpaceIdAtom); - - return { - queryKey: cacheKeys.llmConfigs.preferences(String(searchSpaceId)), - enabled: !!searchSpaceId, - staleTime: 5 * 60 * 1000, // 5 minutes - queryFn: async () => { - return llmConfigApiService.getLLMPreferences({ - search_space_id: Number(searchSpaceId), - }); - }, - }; -}); diff --git a/surfsense_web/atoms/new-llm-config/new-llm-config-mutation.atoms.ts b/surfsense_web/atoms/new-llm-config/new-llm-config-mutation.atoms.ts new file mode 100644 index 000000000..8f81b7475 --- /dev/null +++ b/surfsense_web/atoms/new-llm-config/new-llm-config-mutation.atoms.ts @@ -0,0 +1,116 @@ +import { atomWithMutation } from "jotai-tanstack-query"; +import { toast } from "sonner"; +import type { + CreateNewLLMConfigRequest, + DeleteNewLLMConfigRequest, + GetNewLLMConfigsResponse, + UpdateLLMPreferencesRequest, + UpdateNewLLMConfigRequest, + UpdateNewLLMConfigResponse, +} from "@/contracts/types/new-llm-config.types"; +import { newLLMConfigApiService } from "@/lib/apis/new-llm-config-api.service"; +import { cacheKeys } from "@/lib/query-client/cache-keys"; +import { queryClient } from "@/lib/query-client/client"; +import { activeSearchSpaceIdAtom } from "../search-spaces/search-space-query.atoms"; + +/** + * Mutation atom for creating a new NewLLMConfig + */ +export const createNewLLMConfigMutationAtom = atomWithMutation((get) => { + const searchSpaceId = get(activeSearchSpaceIdAtom); + + return { + mutationKey: ["new-llm-configs", "create"], + enabled: !!searchSpaceId, + mutationFn: async (request: CreateNewLLMConfigRequest) => { + return newLLMConfigApiService.createConfig(request); + }, + onSuccess: () => { + toast.success("Configuration created successfully"); + queryClient.invalidateQueries({ + queryKey: cacheKeys.newLLMConfigs.all(Number(searchSpaceId)), + }); + }, + onError: (error: Error) => { + toast.error(error.message || "Failed to create configuration"); + }, + }; +}); + +/** + * Mutation atom for updating an existing NewLLMConfig + */ +export const updateNewLLMConfigMutationAtom = atomWithMutation((get) => { + const searchSpaceId = get(activeSearchSpaceIdAtom); + + return { + mutationKey: ["new-llm-configs", "update"], + enabled: !!searchSpaceId, + mutationFn: async (request: UpdateNewLLMConfigRequest) => { + return newLLMConfigApiService.updateConfig(request); + }, + onSuccess: (_: UpdateNewLLMConfigResponse, request: UpdateNewLLMConfigRequest) => { + toast.success("Configuration updated successfully"); + queryClient.invalidateQueries({ + queryKey: cacheKeys.newLLMConfigs.all(Number(searchSpaceId)), + }); + queryClient.invalidateQueries({ + queryKey: cacheKeys.newLLMConfigs.byId(request.id), + }); + }, + onError: (error: Error) => { + toast.error(error.message || "Failed to update configuration"); + }, + }; +}); + +/** + * Mutation atom for deleting a NewLLMConfig + */ +export const deleteNewLLMConfigMutationAtom = atomWithMutation((get) => { + const searchSpaceId = get(activeSearchSpaceIdAtom); + + return { + mutationKey: ["new-llm-configs", "delete"], + enabled: !!searchSpaceId, + mutationFn: async (request: DeleteNewLLMConfigRequest) => { + return newLLMConfigApiService.deleteConfig(request); + }, + onSuccess: (_, request: DeleteNewLLMConfigRequest) => { + toast.success("Configuration deleted successfully"); + queryClient.setQueryData( + cacheKeys.newLLMConfigs.all(Number(searchSpaceId)), + (oldData: GetNewLLMConfigsResponse | undefined) => { + if (!oldData) return oldData; + return oldData.filter((config) => config.id !== request.id); + } + ); + }, + onError: (error: Error) => { + toast.error(error.message || "Failed to delete configuration"); + }, + }; +}); + +/** + * Mutation atom for updating LLM preferences (role assignments) + */ +export const updateLLMPreferencesMutationAtom = atomWithMutation((get) => { + const searchSpaceId = get(activeSearchSpaceIdAtom); + + return { + mutationKey: ["llm-preferences", "update"], + enabled: !!searchSpaceId, + mutationFn: async (request: UpdateLLMPreferencesRequest) => { + return newLLMConfigApiService.updateLLMPreferences(request); + }, + onSuccess: () => { + queryClient.invalidateQueries({ + queryKey: cacheKeys.newLLMConfigs.preferences(Number(searchSpaceId)), + }); + }, + onError: (error: Error) => { + toast.error(error.message || "Failed to update LLM preferences"); + }, + }; +}); diff --git a/surfsense_web/atoms/new-llm-config/new-llm-config-query.atoms.ts b/surfsense_web/atoms/new-llm-config/new-llm-config-query.atoms.ts new file mode 100644 index 000000000..9f5085d33 --- /dev/null +++ b/surfsense_web/atoms/new-llm-config/new-llm-config-query.atoms.ts @@ -0,0 +1,64 @@ +import { atomWithQuery } from "jotai-tanstack-query"; +import { newLLMConfigApiService } from "@/lib/apis/new-llm-config-api.service"; +import { cacheKeys } from "@/lib/query-client/cache-keys"; +import { activeSearchSpaceIdAtom } from "../search-spaces/search-space-query.atoms"; + +/** + * Query atom for fetching all NewLLMConfigs for the active search space + */ +export const newLLMConfigsAtom = atomWithQuery((get) => { + const searchSpaceId = get(activeSearchSpaceIdAtom); + + return { + queryKey: cacheKeys.newLLMConfigs.all(Number(searchSpaceId)), + enabled: !!searchSpaceId, + staleTime: 5 * 60 * 1000, // 5 minutes + queryFn: async () => { + return newLLMConfigApiService.getConfigs({ + search_space_id: Number(searchSpaceId), + }); + }, + }; +}); + +/** + * Query atom for fetching global NewLLMConfigs (from YAML, negative IDs) + */ +export const globalNewLLMConfigsAtom = atomWithQuery(() => { + return { + queryKey: cacheKeys.newLLMConfigs.global(), + staleTime: 10 * 60 * 1000, // 10 minutes - global configs rarely change + queryFn: async () => { + return newLLMConfigApiService.getGlobalConfigs(); + }, + }; +}); + +/** + * Query atom for fetching LLM preferences (role assignments) for the active search space + */ +export const llmPreferencesAtom = atomWithQuery((get) => { + const searchSpaceId = get(activeSearchSpaceIdAtom); + + return { + queryKey: cacheKeys.newLLMConfigs.preferences(Number(searchSpaceId)), + enabled: !!searchSpaceId, + staleTime: 5 * 60 * 1000, // 5 minutes + queryFn: async () => { + return newLLMConfigApiService.getLLMPreferences(Number(searchSpaceId)); + }, + }; +}); + +/** + * Query atom for fetching default system instructions template + */ +export const defaultSystemInstructionsAtom = atomWithQuery(() => { + return { + queryKey: cacheKeys.newLLMConfigs.defaultInstructions(), + staleTime: 60 * 60 * 1000, // 1 hour - this rarely changes + queryFn: async () => { + return newLLMConfigApiService.getDefaultSystemInstructions(); + }, + }; +}); diff --git a/surfsense_web/atoms/podcasts/podcast-mutation.atoms.ts b/surfsense_web/atoms/podcasts/podcast-mutation.atoms.ts deleted file mode 100644 index cdb28ceb2..000000000 --- a/surfsense_web/atoms/podcasts/podcast-mutation.atoms.ts +++ /dev/null @@ -1,51 +0,0 @@ -import { atomWithMutation } from "jotai-tanstack-query"; -import { toast } from "sonner"; -import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms"; -import type { - DeletePodcastRequest, - GeneratePodcastRequest, - Podcast, -} from "@/contracts/types/podcast.types"; -import { podcastsApiService } from "@/lib/apis/podcasts-api.service"; -import { getBearerToken } from "@/lib/auth-utils"; -import { cacheKeys } from "@/lib/query-client/cache-keys"; -import { queryClient } from "@/lib/query-client/client"; -import { globalPodcastsQueryParamsAtom } from "./ui.atoms"; - -export const deletePodcastMutationAtom = atomWithMutation((get) => { - const searchSpaceId = get(activeSearchSpaceIdAtom); - const authToken = getBearerToken(); - const podcastsQueryParams = get(globalPodcastsQueryParamsAtom); - - return { - mutationKey: cacheKeys.podcasts.globalQueryParams(podcastsQueryParams), - enabled: !!searchSpaceId && !!authToken, - mutationFn: async (request: DeletePodcastRequest) => { - return podcastsApiService.deletePodcast(request); - }, - - onSuccess: (_, request: DeletePodcastRequest) => { - toast.success("Podcast deleted successfully"); - queryClient.setQueryData( - cacheKeys.podcasts.globalQueryParams(podcastsQueryParams), - (oldData: Podcast[]) => { - return oldData.filter((podcast) => podcast.id !== request.id); - } - ); - }, - }; -}); - -export const generatePodcastMutationAtom = atomWithMutation((get) => { - const searchSpaceId = get(activeSearchSpaceIdAtom); - const authToken = getBearerToken(); - const podcastsQueryParams = get(globalPodcastsQueryParamsAtom); - - return { - mutationKey: cacheKeys.podcasts.globalQueryParams(podcastsQueryParams), - enabled: !!searchSpaceId && !!authToken, - mutationFn: async (request: GeneratePodcastRequest) => { - return podcastsApiService.generatePodcast(request); - }, - }; -}); diff --git a/surfsense_web/atoms/podcasts/podcast-query.atoms.ts b/surfsense_web/atoms/podcasts/podcast-query.atoms.ts deleted file mode 100644 index ea5c1c104..000000000 --- a/surfsense_web/atoms/podcasts/podcast-query.atoms.ts +++ /dev/null @@ -1,17 +0,0 @@ -import { atomWithQuery } from "jotai-tanstack-query"; -import { podcastsApiService } from "@/lib/apis/podcasts-api.service"; -import { cacheKeys } from "@/lib/query-client/cache-keys"; -import { globalPodcastsQueryParamsAtom } from "./ui.atoms"; - -export const podcastsAtom = atomWithQuery((get) => { - const queryParams = get(globalPodcastsQueryParamsAtom); - - return { - queryKey: cacheKeys.podcasts.globalQueryParams(queryParams), - queryFn: async () => { - return podcastsApiService.getPodcasts({ - queryParams: queryParams, - }); - }, - }; -}); diff --git a/surfsense_web/atoms/podcasts/ui.atoms.ts b/surfsense_web/atoms/podcasts/ui.atoms.ts deleted file mode 100644 index 0f2701375..000000000 --- a/surfsense_web/atoms/podcasts/ui.atoms.ts +++ /dev/null @@ -1,7 +0,0 @@ -import { atom } from "jotai"; -import type { GetPodcastsRequest } from "@/contracts/types/podcast.types"; - -export const globalPodcastsQueryParamsAtom = atom({ - limit: 5, - skip: 0, -}); diff --git a/surfsense_web/atoms/search-spaces/search-space-query.atoms.ts b/surfsense_web/atoms/search-spaces/search-space-query.atoms.ts index 4aa024e93..588466d90 100644 --- a/surfsense_web/atoms/search-spaces/search-space-query.atoms.ts +++ b/surfsense_web/atoms/search-spaces/search-space-query.atoms.ts @@ -25,13 +25,3 @@ export const searchSpacesAtom = atomWithQuery((get) => { }, }; }); - -export const communityPromptsAtom = atomWithQuery(() => { - return { - queryKey: cacheKeys.searchSpaces.communityPrompts, - staleTime: 30 * 60 * 1000, - queryFn: async () => { - return searchSpacesApiService.getCommunityPrompts(); - }, - }; -}); diff --git a/surfsense_web/components/assistant-ui/attachment.tsx b/surfsense_web/components/assistant-ui/attachment.tsx new file mode 100644 index 000000000..dfb63cbf3 --- /dev/null +++ b/surfsense_web/components/assistant-ui/attachment.tsx @@ -0,0 +1,252 @@ +"use client"; + +import { + AttachmentPrimitive, + ComposerPrimitive, + MessagePrimitive, + useAssistantApi, + useAssistantState, +} from "@assistant-ui/react"; +import { FileText, Loader2, PlusIcon, XIcon } from "lucide-react"; +import Image from "next/image"; +import { type FC, type PropsWithChildren, useEffect, useState } from "react"; +import { useShallow } from "zustand/shallow"; +import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button"; +import { Avatar, AvatarFallback, AvatarImage } from "@/components/ui/avatar"; +import { Dialog, DialogContent, DialogTitle, DialogTrigger } from "@/components/ui/dialog"; +import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; +import { cn } from "@/lib/utils"; + +const useFileSrc = (file: File | undefined) => { + const [src, setSrc] = useState(undefined); + + useEffect(() => { + if (!file) { + setSrc(undefined); + return; + } + + const objectUrl = URL.createObjectURL(file); + setSrc(objectUrl); + + return () => { + URL.revokeObjectURL(objectUrl); + }; + }, [file]); + + return src; +}; + +const useAttachmentSrc = () => { + const { file, src } = useAssistantState( + useShallow(({ attachment }): { file?: File; src?: string } => { + if (!attachment || attachment.type !== "image") return {}; + if (attachment.file) return { file: attachment.file }; + // Only try to filter if content is an array (standard assistant-ui format) + // Our custom ChatAttachment has content as a string, so skip this + if (Array.isArray(attachment.content)) { + const src = attachment.content.filter((c) => c.type === "image")[0]?.image; + if (src) return { src }; + } + return {}; + }) + ); + + return useFileSrc(file) ?? src; +}; + +type AttachmentPreviewProps = { + src: string; +}; + +const AttachmentPreview: FC = ({ src }) => { + const [isLoaded, setIsLoaded] = useState(false); + return ( + Image Preview setIsLoaded(true)} + priority={false} + /> + ); +}; + +const AttachmentPreviewDialog: FC = ({ children }) => { + const src = useAttachmentSrc(); + + if (!src) return children; + + return ( + + + {children} + + + Image Attachment Preview +
+ +
+
+
+ ); +}; + +const AttachmentThumb: FC = () => { + const isImage = useAssistantState(({ attachment }) => attachment?.type === "image"); + // Check if actively processing (running AND progress < 100) + // When progress is 100, processing is done but waiting for send() + const isProcessing = useAssistantState(({ attachment }) => { + const status = attachment?.status; + if (status?.type !== "running") return false; + // If progress is defined and equals 100, processing is complete + const progress = (status as { type: "running"; progress?: number }).progress; + return progress === undefined || progress < 100; + }); + const src = useAttachmentSrc(); + + // Show loading spinner only when actively processing (not when done and waiting for send) + if (isProcessing) { + return ( +
+ +
+ ); + } + + return ( + + + + + + + ); +}; + +const AttachmentUI: FC = () => { + const api = useAssistantApi(); + const isComposer = api.attachment.source === "composer"; + + const isImage = useAssistantState(({ attachment }) => attachment?.type === "image"); + // Check if actively processing (running AND progress < 100) + // When progress is 100, processing is done but waiting for send() + const isProcessing = useAssistantState(({ attachment }) => { + const status = attachment?.status; + if (status?.type !== "running") return false; + const progress = (status as { type: "running"; progress?: number }).progress; + return progress === undefined || progress < 100; + }); + const typeLabel = useAssistantState(({ attachment }) => { + const type = attachment?.type; + switch (type) { + case "image": + return "Image"; + case "document": + return "Document"; + case "file": + return "File"; + default: + return "File"; // Default fallback for unknown types + } + }); + + return ( + + #attachment-tile]:size-24" + )} + > + + +
+ +
+
+
+ {isComposer && !isProcessing && } +
+ + {isProcessing ? ( + + + Processing... + + ) : ( + + )} + +
+ ); +}; + +const AttachmentRemove: FC = () => { + return ( + + + + + + ); +}; + +export const UserMessageAttachments: FC = () => { + return ( +
+ +
+ ); +}; + +export const ComposerAttachments: FC = () => { + return ( +
+ +
+ ); +}; + +export const ComposerAddAttachment: FC = () => { + return ( + + + + + + ); +}; diff --git a/surfsense_web/components/assistant-ui/inline-citation.tsx b/surfsense_web/components/assistant-ui/inline-citation.tsx new file mode 100644 index 000000000..065f37e8e --- /dev/null +++ b/surfsense_web/components/assistant-ui/inline-citation.tsx @@ -0,0 +1,41 @@ +"use client"; + +import type { FC } from "react"; +import { useState } from "react"; +import { SourceDetailPanel } from "@/components/new-chat/source-detail-panel"; + +interface InlineCitationProps { + chunkId: number; + citationNumber: number; +} + +/** + * Inline citation component for the new chat. + * Renders a clickable numbered badge that opens the SourceDetailPanel with document chunk details. + */ +export const InlineCitation: FC = ({ chunkId, citationNumber }) => { + const [isOpen, setIsOpen] = useState(false); + + return ( + + setIsOpen(true)} + onKeyDown={(e) => e.key === "Enter" && setIsOpen(true)} + className="text-[10px] font-bold bg-primary/80 hover:bg-primary text-primary-foreground rounded-full min-w-4 h-4 px-1 inline-flex items-center justify-center align-super cursor-pointer transition-colors ml-0.5" + title={`View source #${citationNumber}`} + role="button" + tabIndex={0} + > + {citationNumber} + + + ); +}; diff --git a/surfsense_web/components/assistant-ui/markdown-text.tsx b/surfsense_web/components/assistant-ui/markdown-text.tsx new file mode 100644 index 000000000..41d6143b9 --- /dev/null +++ b/surfsense_web/components/assistant-ui/markdown-text.tsx @@ -0,0 +1,325 @@ +"use client"; + +import "@assistant-ui/react-markdown/styles/dot.css"; + +import { + type CodeHeaderProps, + MarkdownTextPrimitive, + unstable_memoizeMarkdownComponents as memoizeMarkdownComponents, + useIsMarkdownCodeBlock, +} from "@assistant-ui/react-markdown"; +import { CheckIcon, CopyIcon } from "lucide-react"; +import { type FC, memo, type ReactNode, useState } from "react"; +import remarkGfm from "remark-gfm"; +import { InlineCitation } from "@/components/assistant-ui/inline-citation"; +import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button"; +import { cn } from "@/lib/utils"; + +// Citation pattern: [citation:CHUNK_ID] +const CITATION_REGEX = /\[citation:(\d+)\]/g; + +// Track chunk IDs to citation numbers mapping for consistent numbering +// This map is reset when a new message starts rendering +let chunkIdToCitationNumber: Map = new Map(); +let nextCitationNumber = 1; + +/** + * Resets the citation counter - should be called at the start of each message + */ +export function resetCitationCounter() { + chunkIdToCitationNumber = new Map(); + nextCitationNumber = 1; +} + +/** + * Gets or assigns a citation number for a chunk ID + */ +function getCitationNumber(chunkId: number): number { + if (!chunkIdToCitationNumber.has(chunkId)) { + chunkIdToCitationNumber.set(chunkId, nextCitationNumber++); + } + return chunkIdToCitationNumber.get(chunkId)!; +} + +/** + * Parses text and replaces [citation:XXX] patterns with InlineCitation components + */ +function parseTextWithCitations(text: string): ReactNode[] { + const parts: ReactNode[] = []; + let lastIndex = 0; + let match: RegExpExecArray | null; + let instanceIndex = 0; + + // Reset regex state + CITATION_REGEX.lastIndex = 0; + + while ((match = CITATION_REGEX.exec(text)) !== null) { + // Add text before the citation + if (match.index > lastIndex) { + parts.push(text.substring(lastIndex, match.index)); + } + + // Add the citation component + const chunkId = Number.parseInt(match[1], 10); + const citationNumber = getCitationNumber(chunkId); + parts.push( + + ); + + lastIndex = match.index + match[0].length; + instanceIndex++; + } + + // Add any remaining text after the last citation + if (lastIndex < text.length) { + parts.push(text.substring(lastIndex)); + } + + return parts.length > 0 ? parts : [text]; +} + +const MarkdownTextImpl = () => { + // Reset citation counter at the start of each render + // This ensures consistent numbering as the message streams in + resetCitationCounter(); + + return ( + + ); +}; + +export const MarkdownText = memo(MarkdownTextImpl); + +const CodeHeader: FC = ({ language, code }) => { + const { isCopied, copyToClipboard } = useCopyToClipboard(); + const onCopy = () => { + if (!code || isCopied) return; + copyToClipboard(code); + }; + + return ( +
+ {language} + + {!isCopied && } + {isCopied && } + +
+ ); +}; + +const useCopyToClipboard = ({ copiedDuration = 3000 }: { copiedDuration?: number } = {}) => { + const [isCopied, setIsCopied] = useState(false); + + const copyToClipboard = (value: string) => { + if (!value) return; + + navigator.clipboard.writeText(value).then(() => { + setIsCopied(true); + setTimeout(() => setIsCopied(false), copiedDuration); + }); + }; + + return { isCopied, copyToClipboard }; +}; + +/** + * Helper to process children and replace citation patterns with components + */ +function processChildrenWithCitations(children: ReactNode): ReactNode { + if (typeof children === "string") { + const parsed = parseTextWithCitations(children); + return parsed.length === 1 && typeof parsed[0] === "string" ? children : <>{parsed}; + } + + if (Array.isArray(children)) { + return children.map((child, index) => { + if (typeof child === "string") { + const parsed = parseTextWithCitations(child); + return parsed.length === 1 && typeof parsed[0] === "string" ? ( + child + ) : ( + {parsed} + ); + } + return child; + }); + } + + return children; +} + +const defaultComponents = memoizeMarkdownComponents({ + h1: ({ className, children, ...props }) => ( +

+ {processChildrenWithCitations(children)} +

+ ), + h2: ({ className, children, ...props }) => ( +

+ {processChildrenWithCitations(children)} +

+ ), + h3: ({ className, children, ...props }) => ( +

+ {processChildrenWithCitations(children)} +

+ ), + h4: ({ className, children, ...props }) => ( +

+ {processChildrenWithCitations(children)} +

+ ), + h5: ({ className, children, ...props }) => ( +
+ {processChildrenWithCitations(children)} +
+ ), + h6: ({ className, children, ...props }) => ( +
+ {processChildrenWithCitations(children)} +
+ ), + p: ({ className, children, ...props }) => ( +

+ {processChildrenWithCitations(children)} +

+ ), + a: ({ className, children, ...props }) => ( + + {processChildrenWithCitations(children)} + + ), + blockquote: ({ className, children, ...props }) => ( +
+ {processChildrenWithCitations(children)} +
+ ), + ul: ({ className, ...props }) => ( +
    li]:mt-2", className)} {...props} /> + ), + ol: ({ className, ...props }) => ( +
      li]:mt-2", className)} {...props} /> + ), + li: ({ className, children, ...props }) => ( +
    1. + {processChildrenWithCitations(children)} +
    2. + ), + hr: ({ className, ...props }) => ( +
      + ), + table: ({ className, ...props }) => ( + + ), + th: ({ className, children, ...props }) => ( + + ), + td: ({ className, children, ...props }) => ( + + ), + tr: ({ className, ...props }) => ( + td:first-child]:rounded-bl-lg [&:last-child>td:last-child]:rounded-br-lg", + className + )} + {...props} + /> + ), + sup: ({ className, ...props }) => ( + a]:text-xs [&>a]:no-underline", className)} {...props} /> + ), + pre: ({ className, ...props }) => ( +
      +	),
      +	code: function Code({ className, ...props }) {
      +		const isCodeBlock = useIsMarkdownCodeBlock();
      +		return (
      +			
      +		);
      +	},
      +	strong: ({ className, children, ...props }) => (
      +		
      +			{processChildrenWithCitations(children)}
      +		
      +	),
      +	em: ({ className, children, ...props }) => (
      +		
      +			{processChildrenWithCitations(children)}
      +		
      +	),
      +	CodeHeader,
      +});
      diff --git a/surfsense_web/components/assistant-ui/thread-list.tsx b/surfsense_web/components/assistant-ui/thread-list.tsx
      new file mode 100644
      index 000000000..f65acd5c6
      --- /dev/null
      +++ b/surfsense_web/components/assistant-ui/thread-list.tsx
      @@ -0,0 +1,299 @@
      +"use client";
      +
      +import {
      +	ArchiveIcon,
      +	MessageSquareIcon,
      +	MoreVerticalIcon,
      +	PlusIcon,
      +	RotateCcwIcon,
      +	TrashIcon,
      +} from "lucide-react";
      +import { useRouter } from "next/navigation";
      +import { useCallback, useEffect, useState } from "react";
      +import { Button } from "@/components/ui/button";
      +import {
      +	DropdownMenu,
      +	DropdownMenuContent,
      +	DropdownMenuItem,
      +	DropdownMenuSeparator,
      +	DropdownMenuTrigger,
      +} from "@/components/ui/dropdown-menu";
      +import {
      +	createThreadListManager,
      +	type ThreadListItem,
      +	type ThreadListState,
      +} from "@/lib/chat/thread-persistence";
      +import { cn } from "@/lib/utils";
      +
      +interface ThreadListProps {
      +	searchSpaceId: number;
      +	currentThreadId?: number;
      +	className?: string;
      +}
      +
      +export function ThreadList({ searchSpaceId, currentThreadId, className }: ThreadListProps) {
      +	const router = useRouter();
      +	const [state, setState] = useState({
      +		threads: [],
      +		archivedThreads: [],
      +		isLoading: true,
      +		error: null,
      +	});
      +	const [showArchived, setShowArchived] = useState(false);
      +
      +	// Create the thread list manager
      +	const manager = useCallback(
      +		() =>
      +			createThreadListManager({
      +				searchSpaceId,
      +				currentThreadId: currentThreadId ?? null,
      +				onThreadSwitch: (threadId) => {
      +					router.push(`/dashboard/${searchSpaceId}/new-chat/${threadId}`);
      +				},
      +				onNewThread: (threadId) => {
      +					router.push(`/dashboard/${searchSpaceId}/new-chat/${threadId}`);
      +				},
      +			}),
      +		[searchSpaceId, currentThreadId, router]
      +	);
      +
      +	// Load threads on mount and when searchSpaceId changes
      +	const loadThreads = useCallback(async () => {
      +		setState((prev) => ({ ...prev, isLoading: true }));
      +		const newState = await manager().loadThreads();
      +		setState(newState);
      +	}, [manager]);
      +
      +	useEffect(() => {
      +		loadThreads();
      +	}, [loadThreads]);
      +
      +	// Handle new thread creation
      +	const handleNewThread = async () => {
      +		await manager().createNewThread();
      +		await loadThreads();
      +	};
      +
      +	// Handle thread actions
      +	const handleArchive = async (threadId: number) => {
      +		const success = await manager().archiveThread(threadId);
      +		if (success) await loadThreads();
      +	};
      +
      +	const handleUnarchive = async (threadId: number) => {
      +		const success = await manager().unarchiveThread(threadId);
      +		if (success) await loadThreads();
      +	};
      +
      +	const handleDelete = async (threadId: number) => {
      +		const success = await manager().deleteThread(threadId);
      +		if (success) {
      +			await loadThreads();
      +			// If we deleted the current thread, redirect to new chat
      +			if (threadId === currentThreadId) {
      +				router.push(`/dashboard/${searchSpaceId}/new-chat`);
      +			}
      +		}
      +	};
      +
      +	const handleSwitchToThread = (threadId: number) => {
      +		manager().switchToThread(threadId);
      +	};
      +
      +	const displayedThreads = showArchived ? state.archivedThreads : state.threads;
      +
      +	if (state.isLoading) {
      +		return (
      +			
      +
      + Loading threads... +
      +
      + ); + } + + if (state.error) { + return ( +
      +
      + {state.error} + +
      +
      + ); + } + + return ( +
      + {/* Header with New Chat button */} +
      +

      Conversations

      + +
      + + {/* Tab toggle for active/archived */} +
      + + +
      + + {/* Thread list */} +
      + {displayedThreads.length === 0 ? ( +
      + +

      + {showArchived ? "No archived conversations" : "No conversations yet"} +

      + {!showArchived && ( + + )} +
      + ) : ( +
      + {displayedThreads.map((thread) => ( + handleSwitchToThread(thread.id)} + onArchive={() => handleArchive(thread.id)} + onUnarchive={() => handleUnarchive(thread.id)} + onDelete={() => handleDelete(thread.id)} + /> + ))} +
      + )} +
      +
      + ); +} + +interface ThreadListItemComponentProps { + thread: ThreadListItem; + isActive: boolean; + isArchived: boolean; + onClick: () => void; + onArchive: () => void; + onUnarchive: () => void; + onDelete: () => void; +} + +function ThreadListItemComponent({ + thread, + isActive, + isArchived, + onClick, + onArchive, + onUnarchive, + onDelete, +}: ThreadListItemComponentProps) { + return ( +
      { + if (e.key === "Enter" || e.key === " ") onClick(); + }} + role="button" + tabIndex={0} + > + +
      +

      {thread.title || "New Chat"}

      +

      + {formatRelativeTime(new Date(thread.updatedAt))} +

      +
      + + + + + + {isArchived ? ( + + + Unarchive + + ) : ( + + + Archive + + )} + + + + Delete + + + +
      + ); +} + +/** + * Format a date as relative time (e.g., "2 hours ago", "Yesterday") + */ +function formatRelativeTime(date: Date): string { + const now = new Date(); + const diffMs = now.getTime() - date.getTime(); + const diffSecs = Math.floor(diffMs / 1000); + const diffMins = Math.floor(diffSecs / 60); + const diffHours = Math.floor(diffMins / 60); + const diffDays = Math.floor(diffHours / 24); + + if (diffSecs < 60) return "Just now"; + if (diffMins < 60) return `${diffMins} min${diffMins === 1 ? "" : "s"} ago`; + if (diffHours < 24) return `${diffHours} hour${diffHours === 1 ? "" : "s"} ago`; + if (diffDays === 1) return "Yesterday"; + if (diffDays < 7) return `${diffDays} days ago`; + + return date.toLocaleDateString(); +} diff --git a/surfsense_web/components/assistant-ui/thread.tsx b/surfsense_web/components/assistant-ui/thread.tsx new file mode 100644 index 000000000..33d6a0cad --- /dev/null +++ b/surfsense_web/components/assistant-ui/thread.tsx @@ -0,0 +1,862 @@ +import { + ActionBarPrimitive, + AssistantIf, + BranchPickerPrimitive, + ComposerPrimitive, + ErrorPrimitive, + MessagePrimitive, + ThreadPrimitive, + useAssistantState, + useMessage, + useThreadViewport, +} from "@assistant-ui/react"; +import { useAtomValue } from "jotai"; +import { + AlertCircle, + ArrowDownIcon, + ArrowUpIcon, + Brain, + CheckCircle2, + CheckIcon, + ChevronLeftIcon, + ChevronRightIcon, + CopyIcon, + DownloadIcon, + Loader2, + PencilIcon, + Plug2, + Plus, + RefreshCwIcon, + Search, + Sparkles, + SquareIcon, +} from "lucide-react"; +import Link from "next/link"; +import { type FC, useCallback, useEffect, useMemo, useRef, useState } from "react"; +import { getDocumentTypeLabel } from "@/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon"; +import { documentTypeCountsAtom } from "@/atoms/documents/document-query.atoms"; +import { + globalNewLLMConfigsAtom, + llmPreferencesAtom, + newLLMConfigsAtom, +} from "@/atoms/new-llm-config/new-llm-config-query.atoms"; +import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms"; +import { currentUserAtom } from "@/atoms/user/user-query.atoms"; +import { + ComposerAddAttachment, + ComposerAttachments, + UserMessageAttachments, +} from "@/components/assistant-ui/attachment"; +import { MarkdownText } from "@/components/assistant-ui/markdown-text"; +import { ToolFallback } from "@/components/assistant-ui/tool-fallback"; +import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button"; +import { + ChainOfThought, + ChainOfThoughtContent, + ChainOfThoughtItem, + ChainOfThoughtStep, + ChainOfThoughtTrigger, +} from "@/components/prompt-kit/chain-of-thought"; +import type { ThinkingStep } from "@/components/tool-ui/deepagent-thinking"; +import { Button } from "@/components/ui/button"; +import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; +import { getConnectorIcon } from "@/contracts/enums/connectorIcons"; +import { useSearchSourceConnectors } from "@/hooks/use-search-source-connectors"; +import { cn } from "@/lib/utils"; + +/** + * Props for the Thread component + */ +interface ThreadProps { + messageThinkingSteps?: Map; +} + +// Context to pass thinking steps to AssistantMessage +import { createContext, useContext } from "react"; + +const ThinkingStepsContext = createContext>(new Map()); + +/** + * Get icon based on step status and title + */ +function getStepIcon(status: "pending" | "in_progress" | "completed", title: string) { + const titleLower = title.toLowerCase(); + + if (status === "in_progress") { + return ; + } + + if (status === "completed") { + return ; + } + + if (titleLower.includes("search") || titleLower.includes("knowledge")) { + return ; + } + + if (titleLower.includes("analy") || titleLower.includes("understand")) { + return ; + } + + return ; +} + +/** + * Chain of thought display component with smart expand/collapse behavior + */ +const ThinkingStepsDisplay: FC<{ steps: ThinkingStep[]; isThreadRunning?: boolean }> = ({ + steps, + isThreadRunning = true, +}) => { + // Track which steps the user has manually toggled (overrides auto behavior) + const [manualOverrides, setManualOverrides] = useState>({}); + // Track previous step statuses to detect changes + const prevStatusesRef = useRef>({}); + + // Derive effective status: if thread stopped and step is in_progress, treat as completed + const getEffectiveStatus = (step: ThinkingStep): "pending" | "in_progress" | "completed" => { + if (step.status === "in_progress" && !isThreadRunning) { + return "completed"; // Thread was stopped, so mark as completed + } + return step.status; + }; + + // Check if any step is effectively in progress + const hasInProgressStep = steps.some((step) => getEffectiveStatus(step) === "in_progress"); + + // Find the last completed step index (using effective status) + const lastCompletedIndex = steps + .map((s, i) => (getEffectiveStatus(s) === "completed" ? i : -1)) + .filter((i) => i !== -1) + .pop(); + + // Clear manual overrides when a step's status changes + useEffect(() => { + const currentStatuses: Record = {}; + steps.forEach((step) => { + currentStatuses[step.id] = step.status; + // If status changed, clear any manual override for this step + if (prevStatusesRef.current[step.id] && prevStatusesRef.current[step.id] !== step.status) { + setManualOverrides((prev) => { + const next = { ...prev }; + delete next[step.id]; + return next; + }); + } + }); + prevStatusesRef.current = currentStatuses; + }, [steps]); + + if (steps.length === 0) return null; + + const getStepOpenState = (step: ThinkingStep, index: number): boolean => { + const effectiveStatus = getEffectiveStatus(step); + // If user has manually toggled, respect that + if (manualOverrides[step.id] !== undefined) { + return manualOverrides[step.id]; + } + // Auto behavior: open if in progress + if (effectiveStatus === "in_progress") { + return true; + } + // Auto behavior: keep last completed step open if no in-progress step + if (!hasInProgressStep && index === lastCompletedIndex) { + return true; + } + // Default: collapsed + return false; + }; + + const handleToggle = (stepId: string, currentOpen: boolean) => { + setManualOverrides((prev) => ({ + ...prev, + [stepId]: !currentOpen, + })); + }; + + return ( +
      + + {steps.map((step, index) => { + const effectiveStatus = getEffectiveStatus(step); + const icon = getStepIcon(effectiveStatus, step.title); + const isOpen = getStepOpenState(step, index); + return ( + handleToggle(step.id, isOpen)} + > + + {step.title} + + {step.items && step.items.length > 0 && ( + + {step.items.map((item, idx) => ( + {item} + ))} + + )} + + ); + })} + +
      + ); +}; + +/** + * Component that handles auto-scroll when thinking steps update. + * Uses useThreadViewport to scroll to bottom when thinking steps change, + * ensuring the user always sees the latest content during streaming. + */ +const ThinkingStepsScrollHandler: FC = () => { + const thinkingStepsMap = useContext(ThinkingStepsContext); + const viewport = useThreadViewport(); + const isRunning = useAssistantState(({ thread }) => thread.isRunning); + // Track the serialized state to detect any changes + const prevStateRef = useRef(""); + + useEffect(() => { + // Only act during streaming + if (!isRunning) { + prevStateRef.current = ""; + return; + } + + // Serialize the thinking steps state to detect any changes + // This catches new steps, status changes, and item additions + let stateString = ""; + thinkingStepsMap.forEach((steps, msgId) => { + steps.forEach((step) => { + stateString += `${msgId}:${step.id}:${step.status}:${step.items?.length || 0};`; + }); + }); + + // If state changed at all during streaming, scroll + if (stateString !== prevStateRef.current && stateString !== "") { + prevStateRef.current = stateString; + + // Multiple attempts to ensure scroll happens after DOM updates + const scrollAttempt = () => { + try { + viewport.scrollToBottom(); + } catch (e) { + // Ignore errors - viewport might not be ready + } + }; + + // Delayed attempts to handle async DOM updates + requestAnimationFrame(scrollAttempt); + setTimeout(scrollAttempt, 100); + } + }, [thinkingStepsMap, viewport, isRunning]); + + return null; // This component doesn't render anything +}; + +export const Thread: FC = ({ messageThinkingSteps = new Map() }) => { + return ( + + + + {/* Auto-scroll handler for thinking steps - must be inside Viewport */} + + + thread.isEmpty}> + + + + + + + + !thread.isEmpty}> +
      + +
      +
      +
      +
      +
      +
      + ); +}; + +const ThreadScrollToBottom: FC = () => { + return ( + + + + + + ); +}; + +const getTimeBasedGreeting = (userEmail?: string): string => { + const hour = new Date().getHours(); + + // Extract first name from email if available + const firstName = userEmail + ? userEmail.split("@")[0].split(".")[0].charAt(0).toUpperCase() + + userEmail.split("@")[0].split(".")[0].slice(1) + : null; + + // Array of greeting variations for each time period + const morningGreetings = ["Good morning", "Rise and shine", "Morning", "Hey there"]; + + const afternoonGreetings = ["Good afternoon", "Afternoon", "Hey there", "Hi there"]; + + const eveningGreetings = ["Good evening", "Evening", "Hey there", "Hi there"]; + + const nightGreetings = ["Good night", "Evening", "Hey there", "Winding down"]; + + const lateNightGreetings = ["Still up", "Night owl mode", "The night is young", "Hi there"]; + + // Select a random greeting based on time + let greeting: string; + if (hour < 5) { + // Late night: midnight to 5 AM + greeting = lateNightGreetings[Math.floor(Math.random() * lateNightGreetings.length)]; + } else if (hour < 12) { + greeting = morningGreetings[Math.floor(Math.random() * morningGreetings.length)]; + } else if (hour < 18) { + greeting = afternoonGreetings[Math.floor(Math.random() * afternoonGreetings.length)]; + } else if (hour < 22) { + greeting = eveningGreetings[Math.floor(Math.random() * eveningGreetings.length)]; + } else { + // Night: 10 PM to midnight + greeting = nightGreetings[Math.floor(Math.random() * nightGreetings.length)]; + } + + // Add personalization with first name if available + if (firstName) { + return `${greeting}, ${firstName}!`; + } + + return `${greeting}!`; +}; + +const ThreadWelcome: FC = () => { + const { data: user } = useAtomValue(currentUserAtom); + + return ( +
      + {/* Greeting positioned above the composer - fixed position */} +
      +

      + {getTimeBasedGreeting(user?.email)} +

      +
      + {/* Composer - top edge fixed, expands downward only */} +
      + +
      +
      + ); +}; + +const Composer: FC = () => { + // Check if a model is configured - needed to disable input + const { data: userConfigs } = useAtomValue(newLLMConfigsAtom); + const { data: globalConfigs } = useAtomValue(globalNewLLMConfigsAtom); + const { data: preferences } = useAtomValue(llmPreferencesAtom); + + const hasModelConfigured = useMemo(() => { + if (!preferences) return false; + const agentLlmId = preferences.agent_llm_id; + if (agentLlmId === null || agentLlmId === undefined) return false; + + // Check if the configured model actually exists + if (agentLlmId < 0) { + return globalConfigs?.some((c) => c.id === agentLlmId) ?? false; + } + return userConfigs?.some((c) => c.id === agentLlmId) ?? false; + }, [preferences, globalConfigs, userConfigs]); + + return ( + + + + + + + + ); +}; + +const ConnectorIndicator: FC = () => { + const searchSpaceId = useAtomValue(activeSearchSpaceIdAtom); + const { connectors, isLoading: connectorsLoading } = useSearchSourceConnectors( + false, + searchSpaceId ? Number(searchSpaceId) : undefined + ); + const { data: documentTypeCounts, isLoading: documentTypesLoading } = + useAtomValue(documentTypeCountsAtom); + const [isOpen, setIsOpen] = useState(false); + const closeTimeoutRef = useRef(null); + + const isLoading = connectorsLoading || documentTypesLoading; + + // Get document types that have documents in the search space + const activeDocumentTypes = documentTypeCounts + ? Object.entries(documentTypeCounts).filter(([_, count]) => count > 0) + : []; + + const hasConnectors = connectors.length > 0; + const hasSources = hasConnectors || activeDocumentTypes.length > 0; + const totalSourceCount = connectors.length + activeDocumentTypes.length; + + const handleMouseEnter = useCallback(() => { + // Clear any pending close timeout + if (closeTimeoutRef.current) { + clearTimeout(closeTimeoutRef.current); + closeTimeoutRef.current = null; + } + setIsOpen(true); + }, []); + + const handleMouseLeave = useCallback(() => { + // Delay closing by 150ms for better UX + closeTimeoutRef.current = setTimeout(() => { + setIsOpen(false); + }, 150); + }, []); + + if (!searchSpaceId) return null; + + return ( + + + + + + {hasSources ? ( +
      +
      +

      Connected Sources

      + + {totalSourceCount} + +
      +
      + {/* Document types from the search space */} + {activeDocumentTypes.map(([docType, count]) => ( +
      + {getConnectorIcon(docType, "size-3.5")} + {getDocumentTypeLabel(docType)} +
      + ))} + {/* Search source connectors */} + {connectors.map((connector) => ( +
      + {getConnectorIcon(connector.connector_type, "size-3.5")} + {connector.name} +
      + ))} +
      +
      + + + Add more sources + + +
      +
      + ) : ( +
      +

      No sources yet

      +

      + Add documents or connect data sources to enhance search results. +

      + + + Add Connector + +
      + )} +
      +
      + ); +}; + +const ComposerAction: FC = () => { + // Check if any attachments are still being processed (running AND progress < 100) + // When progress is 100, processing is done but waiting for send() + const hasProcessingAttachments = useAssistantState(({ composer }) => + composer.attachments?.some((att) => { + const status = att.status; + if (status?.type !== "running") return false; + const progress = (status as { type: "running"; progress?: number }).progress; + return progress === undefined || progress < 100; + }) + ); + + // Check if composer text is empty + const isComposerEmpty = useAssistantState(({ composer }) => { + const text = composer.text?.trim() || ""; + return text.length === 0; + }); + + // Check if a model is configured + const { data: userConfigs } = useAtomValue(newLLMConfigsAtom); + const { data: globalConfigs } = useAtomValue(globalNewLLMConfigsAtom); + const { data: preferences } = useAtomValue(llmPreferencesAtom); + + const hasModelConfigured = useMemo(() => { + if (!preferences) return false; + const agentLlmId = preferences.agent_llm_id; + if (agentLlmId === null || agentLlmId === undefined) return false; + + // Check if the configured model actually exists + if (agentLlmId < 0) { + return globalConfigs?.some((c) => c.id === agentLlmId) ?? false; + } + return userConfigs?.some((c) => c.id === agentLlmId) ?? false; + }, [preferences, globalConfigs, userConfigs]); + + const isSendDisabled = hasProcessingAttachments || isComposerEmpty || !hasModelConfigured; + + return ( +
      +
      + + +
      + + {/* Show processing indicator when attachments are being processed */} + {hasProcessingAttachments && ( +
      + + Processing... +
      + )} + + {/* Show warning when no model is configured */} + {!hasModelConfigured && !hasProcessingAttachments && ( +
      + + Select a model +
      + )} + + !thread.isRunning}> + + + + + + + + thread.isRunning}> + + + + +
      + ); +}; + +const MessageError: FC = () => { + return ( + + + + + + ); +}; + +/** + * Custom component to render thinking steps from Context + */ +const ThinkingStepsPart: FC = () => { + const thinkingStepsMap = useContext(ThinkingStepsContext); + + // Get the current message ID to look up thinking steps + const messageId = useMessage((m) => m.id); + const thinkingSteps = thinkingStepsMap.get(messageId) || []; + + // Check if thread is still running (for stopping the spinner when cancelled) + const isThreadRunning = useAssistantState(({ thread }) => thread.isRunning); + + if (thinkingSteps.length === 0) return null; + + return ( +
      + +
      + ); +}; + +const AssistantMessageInner: FC = () => { + return ( + <> + {/* Render thinking steps from message content - this ensures proper scroll tracking */} + + +
      + + +
      + +
      + + +
      + + ); +}; + +const AssistantMessage: FC = () => { + return ( + + + + ); +}; + +const AssistantActionBar: FC = () => { + return ( + + + + message.isCopied}> + + + !message.isCopied}> + + + + + + + + + + + + + + + + ); +}; + +const UserMessage: FC = () => { + return ( + + + +
      +
      + +
      +
      + +
      +
      + + +
      + ); +}; + +const UserActionBar: FC = () => { + return ( + + + + + + + + ); +}; + +const EditComposer: FC = () => { + return ( + + + +
      + + + + + + +
      +
      +
      + ); +}; + +const BranchPicker: FC = ({ className, ...rest }) => { + return ( + + + + + + + + / + + + + + + + + ); +}; diff --git a/surfsense_web/components/assistant-ui/tool-fallback.tsx b/surfsense_web/components/assistant-ui/tool-fallback.tsx new file mode 100644 index 000000000..636b43c36 --- /dev/null +++ b/surfsense_web/components/assistant-ui/tool-fallback.tsx @@ -0,0 +1,76 @@ +import type { ToolCallMessagePartComponent } from "@assistant-ui/react"; +import { CheckIcon, ChevronDownIcon, ChevronUpIcon, XCircleIcon } from "lucide-react"; +import { useState } from "react"; +import { Button } from "@/components/ui/button"; +import { cn } from "@/lib/utils"; + +export const ToolFallback: ToolCallMessagePartComponent = ({ + toolName, + argsText, + result, + status, +}) => { + const [isCollapsed, setIsCollapsed] = useState(true); + + const isCancelled = status?.type === "incomplete" && status.reason === "cancelled"; + const cancelledReason = + isCancelled && status.error + ? typeof status.error === "string" + ? status.error + : JSON.stringify(status.error) + : null; + + return ( +
      +
      + {isCancelled ? ( + + ) : ( + + )} +

      + {isCancelled ? "Cancelled tool: " : "Used tool: "} + {toolName} +

      + +
      + {!isCollapsed && ( +
      + {cancelledReason && ( +
      +

      + Cancelled reason: +

      +

      + {cancelledReason} +

      +
      + )} +
      +
      {argsText}
      +
      + {!isCancelled && result !== undefined && ( +
      +

      Result:

      +
      +								{typeof result === "string" ? result : JSON.stringify(result, null, 2)}
      +							
      +
      + )} +
      + )} +
      + ); +}; diff --git a/surfsense_web/components/assistant-ui/tooltip-icon-button.tsx b/surfsense_web/components/assistant-ui/tooltip-icon-button.tsx new file mode 100644 index 000000000..154240cb4 --- /dev/null +++ b/surfsense_web/components/assistant-ui/tooltip-icon-button.tsx @@ -0,0 +1,36 @@ +"use client"; + +import { Slottable } from "@radix-ui/react-slot"; +import { type ComponentPropsWithRef, forwardRef } from "react"; +import { Button } from "@/components/ui/button"; +import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; +import { cn } from "@/lib/utils"; + +export type TooltipIconButtonProps = ComponentPropsWithRef & { + tooltip: string; + side?: "top" | "bottom" | "left" | "right"; +}; + +export const TooltipIconButton = forwardRef( + ({ children, tooltip, side = "bottom", className, ...rest }, ref) => { + return ( + + + + + {tooltip} + + ); + } +); + +TooltipIconButton.displayName = "TooltipIconButton"; diff --git a/surfsense_web/components/chat/AnimatedEmptyState.tsx b/surfsense_web/components/chat/AnimatedEmptyState.tsx deleted file mode 100644 index d04708aa5..000000000 --- a/surfsense_web/components/chat/AnimatedEmptyState.tsx +++ /dev/null @@ -1,151 +0,0 @@ -"use client"; - -import { useInView } from "motion/react"; -import { Manrope } from "next/font/google"; -import { useEffect, useMemo, useReducer, useRef } from "react"; -import { RoughNotation, RoughNotationGroup } from "react-rough-notation"; -import { useSidebar } from "@/components/ui/sidebar"; -import { cn } from "@/lib/utils"; - -// Font configuration - could be moved to a global font config file -const manrope = Manrope({ - subsets: ["latin"], - weight: ["400", "700"], - display: "swap", // Optimize font loading - variable: "--font-manrope", -}); - -// Constants for timing - makes it easier to adjust and more maintainable -const TIMING = { - SIDEBAR_TRANSITION: 300, // Wait for sidebar transition + buffer - LAYOUT_SETTLE: 100, // Small delay to ensure layout is fully settled -} as const; - -// Animation configuration -const ANIMATION_CONFIG = { - HIGHLIGHT: { - type: "highlight" as const, - animationDuration: 2000, - iterations: 3, - color: "#3b82f680", - multiline: true, - }, - UNDERLINE: { - type: "underline" as const, - animationDuration: 2000, - iterations: 3, - color: "#10b981", - }, -} as const; - -// State management with useReducer for better organization -interface HighlightState { - shouldShowHighlight: boolean; - layoutStable: boolean; -} - -type HighlightAction = - | { type: "SIDEBAR_CHANGED" } - | { type: "LAYOUT_STABILIZED" } - | { type: "SHOW_HIGHLIGHT" } - | { type: "HIDE_HIGHLIGHT" }; - -const highlightReducer = (state: HighlightState, action: HighlightAction): HighlightState => { - switch (action.type) { - case "SIDEBAR_CHANGED": - return { - shouldShowHighlight: false, - layoutStable: false, - }; - case "LAYOUT_STABILIZED": - return { - ...state, - layoutStable: true, - }; - case "SHOW_HIGHLIGHT": - return { - ...state, - shouldShowHighlight: true, - }; - case "HIDE_HIGHLIGHT": - return { - ...state, - shouldShowHighlight: false, - }; - default: - return state; - } -}; - -const initialState: HighlightState = { - shouldShowHighlight: false, - layoutStable: true, -}; - -export function AnimatedEmptyState() { - const ref = useRef(null); - const isInView = useInView(ref); - const [{ shouldShowHighlight, layoutStable }, dispatch] = useReducer( - highlightReducer, - initialState - ); - - // Memoize class names to prevent unnecessary recalculations - const headingClassName = useMemo( - () => - cn( - "text-3xl sm:text-4xl md:text-5xl lg:text-6xl font-bold tracking-tight text-neutral-900 dark:text-neutral-50 mb-6", - manrope.className - ), - [] - ); - - const paragraphClassName = useMemo( - () => "text-lg sm:text-xl text-neutral-600 dark:text-neutral-300 mb-8 max-w-2xl mx-auto", - [] - ); - - // Handle sidebar state changes - useEffect(() => { - dispatch({ type: "SIDEBAR_CHANGED" }); - - const stabilizeTimer = setTimeout(() => { - dispatch({ type: "LAYOUT_STABILIZED" }); - }, TIMING.SIDEBAR_TRANSITION); - - return () => clearTimeout(stabilizeTimer); - }, []); - - // Handle highlight visibility based on layout stability and viewport visibility - useEffect(() => { - if (!layoutStable || !isInView) { - dispatch({ type: "HIDE_HIGHLIGHT" }); - return; - } - - const showTimer = setTimeout(() => { - dispatch({ type: "SHOW_HIGHLIGHT" }); - }, TIMING.LAYOUT_SETTLE); - - return () => clearTimeout(showTimer); - }, [layoutStable, isInView]); - - return ( -
      -
      - -

      - - SurfSense - -

      - -

      - Let's Start Surfing{" "} - through your knowledge base. -

      -
      -
      -
      - ); -} diff --git a/surfsense_web/components/chat/ChatCitation.tsx b/surfsense_web/components/chat/ChatCitation.tsx deleted file mode 100644 index d8c681781..000000000 --- a/surfsense_web/components/chat/ChatCitation.tsx +++ /dev/null @@ -1,30 +0,0 @@ -"use client"; - -import type React from "react"; -import { useState } from "react"; -import { SheetTrigger } from "@/components/ui/sheet"; -import { SourceDetailSheet } from "./SourceDetailSheet"; - -export const CitationDisplay: React.FC<{ index: number; node: any }> = ({ index, node }) => { - const chunkId = Number(node?.id); - const sourceType = node?.metadata?.source_type; - const [isOpen, setIsOpen] = useState(false); - - return ( - - - - {index + 1} - - - - ); -}; diff --git a/surfsense_web/components/chat/ChatFurtherQuestions.tsx b/surfsense_web/components/chat/ChatFurtherQuestions.tsx deleted file mode 100644 index fbdc9edab..000000000 --- a/surfsense_web/components/chat/ChatFurtherQuestions.tsx +++ /dev/null @@ -1,36 +0,0 @@ -"use client"; - -import { getAnnotationData, type Message, useChatUI } from "@llamaindex/chat-ui"; -import { SuggestedQuestions } from "@llamaindex/chat-ui/widgets"; -import { - Accordion, - AccordionContent, - AccordionItem, - AccordionTrigger, -} from "@/components/ui/accordion"; - -export const ChatFurtherQuestions: React.FC<{ message: Message }> = ({ message }) => { - const annotations: string[][] = getAnnotationData(message, "FURTHER_QUESTIONS"); - const { append, requestData } = useChatUI(); - - if (annotations.length !== 1 || annotations[0].length === 0) { - return null; - } - - return ( - - - - Further Suggested Questions - - - - - - - ); -}; diff --git a/surfsense_web/components/chat/ChatInputGroup.tsx b/surfsense_web/components/chat/ChatInputGroup.tsx deleted file mode 100644 index 080bd65d4..000000000 --- a/surfsense_web/components/chat/ChatInputGroup.tsx +++ /dev/null @@ -1,851 +0,0 @@ -"use client"; - -import { ChatInput } from "@llamaindex/chat-ui"; -import { useAtom, useAtomValue } from "jotai"; -import { Brain, Check, FolderOpen, Minus, Plus, PlusCircle, Zap } from "lucide-react"; -import { useParams, useRouter } from "next/navigation"; -import React, { Suspense, useCallback, useMemo, useState } from "react"; -import { documentTypeCountsAtom } from "@/atoms/documents/document-query.atoms"; -import { updateLLMPreferencesMutationAtom } from "@/atoms/llm-config/llm-config-mutation.atoms"; -import { - globalLLMConfigsAtom, - llmConfigsAtom, - llmPreferencesAtom, -} from "@/atoms/llm-config/llm-config-query.atoms"; -import { DocumentsDataTable } from "@/components/chat/DocumentsDataTable"; -import { Badge } from "@/components/ui/badge"; -import { Button } from "@/components/ui/button"; -import { - Dialog, - DialogContent, - DialogDescription, - DialogFooter, - DialogTitle, - DialogTrigger, -} from "@/components/ui/dialog"; -import { Input } from "@/components/ui/input"; -import { - Select, - SelectContent, - SelectItem, - SelectTrigger, - SelectValue, -} from "@/components/ui/select"; -import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/components/ui/tooltip"; -import { getConnectorIcon } from "@/contracts/enums/connectorIcons"; -import type { Document } from "@/contracts/types/document.types"; -import { useSearchSourceConnectors } from "@/hooks/use-search-source-connectors"; - -const DocumentSelector = React.memo( - ({ - onSelectionChange, - selectedDocuments = [], - }: { - onSelectionChange?: (documents: Document[]) => void; - selectedDocuments?: Document[]; - }) => { - const { search_space_id } = useParams(); - const [isOpen, setIsOpen] = useState(false); - - const handleOpenChange = useCallback((open: boolean) => { - setIsOpen(open); - }, []); - - const handleSelectionChange = useCallback( - (documents: Document[]) => { - onSelectionChange?.(documents); - }, - [onSelectionChange] - ); - - const handleDone = useCallback(() => { - setIsOpen(false); - }, []); - - const selectedCount = React.useMemo(() => selectedDocuments.length, [selectedDocuments.length]); - - return ( - - - - - - -
      -
      - - Select Documents - - - Choose specific documents to include in your research context - -
      - -
      - -
      -
      -
      -
      - ); - } -); - -DocumentSelector.displayName = "DocumentSelector"; - -const ConnectorSelector = React.memo( - ({ - onSelectionChange, - selectedConnectors = [], - }: { - onSelectionChange?: (connectorTypes: string[]) => void; - selectedConnectors?: string[]; - }) => { - const { search_space_id } = useParams(); - const router = useRouter(); - const [isOpen, setIsOpen] = useState(false); - - // Use the documentTypeCountsAtom for fetching document types - const [documentTypeCountsQuery] = useAtom(documentTypeCountsAtom); - const { - data: documentTypeCountsData, - isLoading, - refetch: fetchDocumentTypes, - } = documentTypeCountsQuery; - - // Transform the response into the expected format - const documentTypes = useMemo(() => { - if (!documentTypeCountsData) return []; - return Object.entries(documentTypeCountsData).map(([type, count]) => ({ - type, - count, - })); - }, [documentTypeCountsData]); - - const isLoaded = !!documentTypeCountsData; - - // Fetch live search connectors immediately (non-indexable) - const { - connectors: searchConnectors, - isLoading: connectorsLoading, - isLoaded: connectorsLoaded, - fetchConnectors, - } = useSearchSourceConnectors(false, Number(search_space_id)); - - // Filter for non-indexable connectors (live search) - const liveSearchConnectors = React.useMemo( - () => searchConnectors.filter((connector) => !connector.is_indexable), - [searchConnectors] - ); - - const handleOpenChange = useCallback((open: boolean) => { - setIsOpen(open); - // Data is already loaded on mount, no need to fetch again - }, []); - - const handleConnectorToggle = useCallback( - (connectorType: string) => { - const isSelected = selectedConnectors.includes(connectorType); - const newSelection = isSelected - ? selectedConnectors.filter((type) => type !== connectorType) - : [...selectedConnectors, connectorType]; - onSelectionChange?.(newSelection); - }, - [selectedConnectors, onSelectionChange] - ); - - const handleSelectAll = useCallback(() => { - const allTypes = [ - ...documentTypes.map((dt) => dt.type), - ...liveSearchConnectors.map((c) => c.connector_type), - ]; - onSelectionChange?.(allTypes); - }, [documentTypes, liveSearchConnectors, onSelectionChange]); - - const handleClearAll = useCallback(() => { - onSelectionChange?.([]); - }, [onSelectionChange]); - - // Get display name for connector type - const getDisplayName = (type: string) => { - return type - .split("_") - .map((word) => word.charAt(0) + word.slice(1).toLowerCase()) - .join(" "); - }; - - // Get selected document types with their counts - const selectedDocTypes = documentTypes.filter((dt) => selectedConnectors.includes(dt.type)); - const selectedLiveConnectors = liveSearchConnectors.filter((c) => - selectedConnectors.includes(c.connector_type) - ); - - // Total selected count - const totalSelectedCount = selectedDocTypes.length + selectedLiveConnectors.length; - const totalAvailableCount = documentTypes.length + liveSearchConnectors.length; - - return ( - - - - - - -
      -
      - Select Sources - - Choose indexed document types and live search connectors to include in your search - -
      - - {isLoading || connectorsLoading ? ( -
      -
      -
      - ) : totalAvailableCount === 0 ? ( -
      -
      - -
      -

      No sources found

      -

      - Add documents or configure search connectors for this search space -

      - -
      - ) : ( - <> - {/* Live Search Connectors Section */} - {liveSearchConnectors.length > 0 && ( -
      -
      - -

      Live Search Connectors

      - - Real-time - -
      -
      - {liveSearchConnectors.map((connector) => { - const isSelected = selectedConnectors.includes(connector.connector_type); - - return ( - - ); - })} -
      -
      - )} - - {/* Document Types Section */} - {documentTypes.length > 0 && ( -
      -
      - -

      Indexed Document Types

      - - Stored - -
      -
      - {documentTypes.map((docType) => { - const isSelected = selectedConnectors.includes(docType.type); - - return ( - - ); - })} -
      -
      - )} - - )} -
      - - {totalAvailableCount > 0 && ( - - - - - )} - -
      - ); - } -); - -ConnectorSelector.displayName = "ConnectorSelector"; - -const TopKSelector = React.memo( - ({ topK = 10, onTopKChange }: { topK?: number; onTopKChange?: (topK: number) => void }) => { - const MIN_VALUE = 1; - const MAX_VALUE = 100; - - const handleIncrement = React.useCallback(() => { - if (topK < MAX_VALUE) { - onTopKChange?.(topK + 1); - } - }, [topK, onTopKChange]); - - const handleDecrement = React.useCallback(() => { - if (topK > MIN_VALUE) { - onTopKChange?.(topK - 1); - } - }, [topK, onTopKChange]); - - const handleInputChange = React.useCallback( - (e: React.ChangeEvent) => { - const value = e.target.value; - // Allow empty input for editing - if (value === "") { - return; - } - const numValue = parseInt(value, 10); - if (!isNaN(numValue) && numValue >= MIN_VALUE && numValue <= MAX_VALUE) { - onTopKChange?.(numValue); - } - }, - [onTopKChange] - ); - - const handleInputBlur = React.useCallback( - (e: React.FocusEvent) => { - const value = e.target.value; - if (value === "") { - // Reset to default if empty - onTopKChange?.(10); - return; - } - const numValue = parseInt(value, 10); - if (isNaN(numValue) || numValue < MIN_VALUE) { - onTopKChange?.(MIN_VALUE); - } else if (numValue > MAX_VALUE) { - onTopKChange?.(MAX_VALUE); - } - }, - [onTopKChange] - ); - - return ( - - - -
      - -
      - - Results -
      - -
      -
      - -
      -

      Results per Source

      -

      - Control how many results to fetch from each data source. Set a higher number to get - more information, or a lower number for faster, more focused results. -

      -
      - Recommended: 5-20 - - - Range: {MIN_VALUE}-{MAX_VALUE} - -
      -
      -
      -
      -
      - ); - } -); - -TopKSelector.displayName = "TopKSelector"; - -const LLMSelector = React.memo(() => { - const { search_space_id } = useParams(); - const searchSpaceId = Number(search_space_id); - - const { - data: llmConfigs = [], - isFetching: llmLoading, - isError: error, - } = useAtomValue(llmConfigsAtom); - const { - data: globalConfigs = [], - isFetching: globalConfigsLoading, - isError: globalConfigsError, - } = useAtomValue(globalLLMConfigsAtom); - - // Replace useLLMPreferences with jotai atoms - const { data: preferences = {}, isFetching: preferencesLoading } = - useAtomValue(llmPreferencesAtom); - const { mutateAsync: updatePreferences } = useAtomValue(updateLLMPreferencesMutationAtom); - - const isLoading = llmLoading || preferencesLoading || globalConfigsLoading; - - // Combine global and custom configs - const allConfigs = React.useMemo(() => { - return [...globalConfigs.map((config) => ({ ...config, is_global: true })), ...llmConfigs]; - }, [globalConfigs, llmConfigs]); - - // Memoize the selected config to avoid repeated lookups - const selectedConfig = React.useMemo(() => { - if (!preferences.fast_llm_id || !allConfigs.length) return null; - return allConfigs.find((config) => config.id === preferences.fast_llm_id) || null; - }, [preferences.fast_llm_id, allConfigs]); - - // Memoize the display value for the trigger - const displayValue = React.useMemo(() => { - if (!selectedConfig) return null; - return ( -
      - {selectedConfig.provider} - - - {selectedConfig.name} - - {"is_global" in selectedConfig && selectedConfig.is_global && ( - 🌐 - )} -
      - ); - }, [selectedConfig]); - - const handleValueChange = React.useCallback( - (value: string) => { - const llmId = value ? parseInt(value, 10) : undefined; - updatePreferences({ - search_space_id: searchSpaceId, - data: { fast_llm_id: llmId }, - }); - }, - [updatePreferences, searchSpaceId] - ); - - // Loading skeleton - if (isLoading) { - return ( -
      -
      -
      -
      -
      -
      - ); - } - - // Error state - if (error || globalConfigsError) { - return ( -
      - -
      - ); - } - - return ( -
      - -
      - ); -}); - -LLMSelector.displayName = "LLMSelector"; - -const CustomChatInputOptions = React.memo( - ({ - onDocumentSelectionChange, - selectedDocuments, - onConnectorSelectionChange, - selectedConnectors, - topK, - onTopKChange, - }: { - onDocumentSelectionChange?: (documents: Document[]) => void; - selectedDocuments?: Document[]; - onConnectorSelectionChange?: (connectorTypes: string[]) => void; - selectedConnectors?: string[]; - topK?: number; - onTopKChange?: (topK: number) => void; - }) => { - // Memoize the loading fallback to prevent recreation - const loadingFallback = React.useMemo( - () =>
      , - [] - ); - - return ( -
      -
      - - - - - - -
      -
      - -
      - -
      - ); - } -); - -CustomChatInputOptions.displayName = "CustomChatInputOptions"; - -export const ChatInputUI = React.memo( - ({ - onDocumentSelectionChange, - selectedDocuments, - onConnectorSelectionChange, - selectedConnectors, - topK, - onTopKChange, - }: { - onDocumentSelectionChange?: (documents: Document[]) => void; - selectedDocuments?: Document[]; - onConnectorSelectionChange?: (connectorTypes: string[]) => void; - selectedConnectors?: string[]; - topK?: number; - onTopKChange?: (topK: number) => void; - }) => { - return ( - - - - - - - - ); - } -); - -ChatInputUI.displayName = "ChatInputUI"; diff --git a/surfsense_web/components/chat/ChatInterface.tsx b/surfsense_web/components/chat/ChatInterface.tsx deleted file mode 100644 index 64852fa15..000000000 --- a/surfsense_web/components/chat/ChatInterface.tsx +++ /dev/null @@ -1,47 +0,0 @@ -"use client"; - -import { type ChatHandler, ChatSection as LlamaIndexChatSection } from "@llamaindex/chat-ui"; -import { useParams } from "next/navigation"; -import { ChatInputUI } from "@/components/chat/ChatInputGroup"; -import { ChatMessagesUI } from "@/components/chat/ChatMessages"; -import type { Document } from "@/contracts/types/document.types"; - -interface ChatInterfaceProps { - handler: ChatHandler; - onDocumentSelectionChange?: (documents: Document[]) => void; - selectedDocuments?: Document[]; - onConnectorSelectionChange?: (connectorTypes: string[]) => void; - selectedConnectors?: string[]; - topK?: number; - onTopKChange?: (topK: number) => void; -} - -export default function ChatInterface({ - handler, - onDocumentSelectionChange, - selectedDocuments = [], - onConnectorSelectionChange, - selectedConnectors = [], - topK = 10, - onTopKChange, -}: ChatInterfaceProps) { - const { chat_id, search_space_id } = useParams(); - - return ( - -
      - -
      - -
      -
      -
      - ); -} diff --git a/surfsense_web/components/chat/ChatMessages.tsx b/surfsense_web/components/chat/ChatMessages.tsx deleted file mode 100644 index 77c4b3e06..000000000 --- a/surfsense_web/components/chat/ChatMessages.tsx +++ /dev/null @@ -1,73 +0,0 @@ -"use client"; - -import { - ChatMessage as LlamaIndexChatMessage, - ChatMessages as LlamaIndexChatMessages, - type Message, - useChatUI, -} from "@llamaindex/chat-ui"; -import { useEffect, useRef } from "react"; -import { AnimatedEmptyState } from "@/components/chat/AnimatedEmptyState"; -import { CitationDisplay } from "@/components/chat/ChatCitation"; -import { ChatFurtherQuestions } from "@/components/chat/ChatFurtherQuestions"; -import ChatSourcesDisplay from "@/components/chat/ChatSources"; -import TerminalDisplay from "@/components/chat/ChatTerminal"; -import { languageRenderers } from "@/components/chat/CodeBlock"; - -export function ChatMessagesUI() { - const { messages } = useChatUI(); - - return ( - - - - - - {messages.map((message, index) => ( - - ))} - - - - ); -} - -function ChatMessageUI({ message, isLast }: { message: Message; isLast: boolean }) { - const bottomRef = useRef(null); - - useEffect(() => { - if (isLast && bottomRef.current) { - bottomRef.current.scrollIntoView({ behavior: "smooth" }); - } - }, [isLast]); - - return ( - - {message.role === "assistant" ? ( -
      - - - - - -
      -
      - {isLast && } - -
      -
      - ) : ( - - - - )} - - ); -} diff --git a/surfsense_web/components/chat/ChatPanel/ChatPanelContainer.tsx b/surfsense_web/components/chat/ChatPanel/ChatPanelContainer.tsx deleted file mode 100644 index cb0fcb33e..000000000 --- a/surfsense_web/components/chat/ChatPanel/ChatPanelContainer.tsx +++ /dev/null @@ -1,60 +0,0 @@ -"use client"; -import { useAtomValue } from "jotai"; -import { LoaderIcon, TriangleAlert } from "lucide-react"; -import { toast } from "sonner"; -import { activeChatAtom } from "@/atoms/chats/chat-query.atoms"; -import { activeChathatUIAtom, activeChatIdAtom } from "@/atoms/chats/ui.atoms"; -import { generatePodcastMutationAtom } from "@/atoms/podcasts/podcast-mutation.atoms"; -import type { GeneratePodcastRequest } from "@/contracts/types/podcast.types"; -import { cn } from "@/lib/utils"; -import { ChatPanelView } from "./ChatPanelView"; - -export function ChatPanelContainer() { - const { - data: activeChatState, - isLoading: isChatLoading, - error: chatError, - } = useAtomValue(activeChatAtom); - const activeChatIdState = useAtomValue(activeChatIdAtom); - const { isChatPannelOpen } = useAtomValue(activeChathatUIAtom); - const { mutateAsync: generatePodcast, error: generatePodcastError } = useAtomValue( - generatePodcastMutationAtom - ); - - const handleGeneratePodcast = async (request: GeneratePodcastRequest) => { - try { - generatePodcast(request); - toast.success(`Podcast generation started!`); - } catch (error) { - toast.error("Error generating podcast. Please try again later."); - console.error("Error generating podcast:", JSON.stringify(generatePodcastError)); - } - }; - - return activeChatIdState ? ( -
      - {isChatLoading || chatError ? ( -
      - {isChatLoading ? ( -
      - -
      - ) : chatError ? ( -
      - -
      - ) : null} -
      - ) : null} - - {!isChatLoading && !chatError && activeChatState?.chatDetails && ( - - )} -
      - ) : null; -} diff --git a/surfsense_web/components/chat/ChatPanel/ChatPanelView.tsx b/surfsense_web/components/chat/ChatPanel/ChatPanelView.tsx deleted file mode 100644 index 5eb428946..000000000 --- a/surfsense_web/components/chat/ChatPanel/ChatPanelView.tsx +++ /dev/null @@ -1,207 +0,0 @@ -"use client"; - -import { useAtom, useAtomValue } from "jotai"; -import { AlertCircle, Play, RefreshCw, Sparkles } from "lucide-react"; -import { motion } from "motion/react"; -import { useCallback } from "react"; -import { activeChatAtom } from "@/atoms/chats/chat-query.atoms"; -import { activeChathatUIAtom } from "@/atoms/chats/ui.atoms"; -import { cn } from "@/lib/utils"; -import { getPodcastStalenessMessage, isPodcastStale } from "../PodcastUtils"; -import type { GeneratePodcastRequest } from "./ChatPanelContainer"; -import { ConfigModal } from "./ConfigModal"; -import { PodcastPlayer } from "./PodcastPlayer"; - -interface ChatPanelViewProps { - generatePodcast: (request: GeneratePodcastRequest) => Promise; -} - -export function ChatPanelView(props: ChatPanelViewProps) { - const [chatUIState, setChatUIState] = useAtom(activeChathatUIAtom); - const { data: activeChatState } = useAtomValue(activeChatAtom); - - const { isChatPannelOpen } = chatUIState; - const podcast = activeChatState?.podcast; - const chatDetails = activeChatState?.chatDetails; - - const { generatePodcast } = props; - - // Check if podcast is stale - const podcastIsStale = - podcast && chatDetails && isPodcastStale(chatDetails.state_version, podcast.chat_state_version); - - const handleGeneratePost = useCallback(async () => { - if (!chatDetails) return; - await generatePodcast({ - type: "CHAT", - ids: [chatDetails.id], - search_space_id: chatDetails.search_space_id, - podcast_title: chatDetails.title, - }); - }, [chatDetails, generatePodcast]); - - // biome-ignore-start lint/a11y/useSemanticElements: using div for custom layout — will convert later - return ( -
      -
      - {isChatPannelOpen ? ( -
      - {/* Show stale podcast warning if applicable */} - {podcastIsStale && ( - -
      - - - -
      -

      Podcast Outdated

      -

      - {getPodcastStalenessMessage( - chatDetails?.state_version || 0, - podcast?.chat_state_version - )} -

      -
      -
      -
      - )} - - - - {/* ConfigModal positioned absolutely to avoid nesting buttons */} -
      - -
      -
      -
      - ) : ( - - setChatUIState((prev) => ({ - ...prev, - isChatPannelOpen: !isChatPannelOpen, - })) - } - whileHover={{ scale: 1.1 }} - whileTap={{ scale: 0.9 }} - className={cn( - "p-2.5 rounded-full transition-colors shadow-sm", - podcastIsStale - ? "bg-amber-500/20 hover:bg-amber-500/30 text-amber-600 dark:text-amber-400" - : "bg-primary/20 hover:bg-primary/30 text-primary" - )} - > - {podcastIsStale ? : } - - )} -
      - {podcast ? ( -
      - {isChatPannelOpen ? ( - - ) : podcast ? ( - setChatUIState((prev) => ({ ...prev, isChatPannelOpen: true }))} - whileHover={{ scale: 1.1 }} - whileTap={{ scale: 0.9 }} - className="p-2.5 rounded-full bg-green-500/20 hover:bg-green-500/30 text-green-600 dark:text-green-400 transition-colors shadow-sm" - > - - - ) : null} -
      - ) : null} -
      - ); - // biome-ignore-end lint/a11y/useSemanticElements : using div for custom layout — will convert later -} diff --git a/surfsense_web/components/chat/ChatPanel/ConfigModal.tsx b/surfsense_web/components/chat/ChatPanel/ConfigModal.tsx deleted file mode 100644 index 7c1497df9..000000000 --- a/surfsense_web/components/chat/ChatPanel/ConfigModal.tsx +++ /dev/null @@ -1,84 +0,0 @@ -"use client"; - -import { useAtomValue } from "jotai"; -import { Pencil } from "lucide-react"; -import { useCallback, useContext, useState } from "react"; -import { activeChatAtom } from "@/atoms/chats/chat-query.atoms"; -import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; -import type { GeneratePodcastRequest } from "./ChatPanelContainer"; - -interface ConfigModalProps { - generatePodcast: (request: GeneratePodcastRequest) => Promise; -} - -export function ConfigModal(props: ConfigModalProps) { - const { data: activeChatState } = useAtomValue(activeChatAtom); - - const chatDetails = activeChatState?.chatDetails; - const podcast = activeChatState?.podcast; - - const { generatePodcast } = props; - - const [userPromt, setUserPrompt] = useState(""); - - const handleGeneratePost = useCallback(async () => { - if (!chatDetails) return; - await generatePodcast({ - type: "CHAT", - ids: [chatDetails.id], - search_space_id: chatDetails.search_space_id, - podcast_title: podcast?.title || chatDetails.title, - user_prompt: userPromt, - }); - }, [chatDetails, userPromt]); - - return ( - - e.stopPropagation()} - > - - - e.stopPropagation()} align="end" className="bg-sidebar w-96 "> -
      - -

      - Leave empty to use the default prompt -

      -
      -

      Examples:

      -
        -
      • Make hosts speak in London street language
      • -
      • Use real-world analogies and metaphors
      • -
      • Add dramatic pauses like a late-night radio show
      • -
      • Include 90s pop culture references
      • -
      -
      - - - - - -
      -
      - ); -} diff --git a/surfsense_web/components/chat/ChatPanel/PodcastPlayer/PodcastPlayer.tsx b/surfsense_web/components/chat/ChatPanel/PodcastPlayer/PodcastPlayer.tsx deleted file mode 100644 index 63bd22c37..000000000 --- a/surfsense_web/components/chat/ChatPanel/PodcastPlayer/PodcastPlayer.tsx +++ /dev/null @@ -1,329 +0,0 @@ -"use client"; - -import { Pause, Play, SkipBack, SkipForward, Volume2, VolumeX, X } from "lucide-react"; -import { motion } from "motion/react"; -import { useEffect, useRef, useState } from "react"; -import { toast } from "sonner"; -import { Button } from "@/components/ui/button"; -import { Slider } from "@/components/ui/slider"; -import type { Podcast } from "@/contracts/types/podcast.types"; -import { podcastsApiService } from "@/lib/apis/podcasts-api.service"; -import { PodcastPlayerCompactSkeleton } from "./PodcastPlayerCompactSkeleton"; - -interface PodcastPlayerProps { - podcast: Podcast | null; - isLoading?: boolean; - onClose?: () => void; - compact?: boolean; -} - -export function PodcastPlayer({ - podcast, - isLoading = false, - onClose, - compact = false, -}: PodcastPlayerProps) { - const [audioSrc, setAudioSrc] = useState(undefined); - const [isPlaying, setIsPlaying] = useState(false); - const [currentTime, setCurrentTime] = useState(0); - const [duration, setDuration] = useState(0); - const [volume, setVolume] = useState(0.7); - const [isMuted, setIsMuted] = useState(false); - const [isFetching, setIsFetching] = useState(false); - const audioRef = useRef(null); - const currentObjectUrlRef = useRef(null); - - // Cleanup object URL on unmount - useEffect(() => { - return () => { - if (currentObjectUrlRef.current) { - URL.revokeObjectURL(currentObjectUrlRef.current); - currentObjectUrlRef.current = null; - } - }; - }, []); - - // Load podcast audio when podcast changes - useEffect(() => { - if (!podcast) { - setAudioSrc(undefined); - setCurrentTime(0); - setDuration(0); - setIsPlaying(false); - setIsFetching(false); - return; - } - - const loadPodcast = async () => { - setIsFetching(true); - try { - // Revoke previous object URL if exists - if (currentObjectUrlRef.current) { - URL.revokeObjectURL(currentObjectUrlRef.current); - currentObjectUrlRef.current = null; - } - - const controller = new AbortController(); - const timeoutId = setTimeout(() => controller.abort(), 30000); - - try { - const response = await podcastsApiService.loadPodcast({ - request: { id: podcast.id }, - controller, - }); - - const objectUrl = URL.createObjectURL(response); - currentObjectUrlRef.current = objectUrl; - setAudioSrc(objectUrl); - } catch (error) { - if (error instanceof DOMException && error.name === "AbortError") { - throw new Error("Request timed out. Please try again."); - } - throw error; - } finally { - clearTimeout(timeoutId); - } - } catch (error) { - console.error("Error fetching podcast:", error); - toast.error(error instanceof Error ? error.message : "Failed to load podcast audio."); - setAudioSrc(undefined); - } finally { - setIsFetching(false); - } - }; - - loadPodcast(); - }, [podcast]); - - const handleTimeUpdate = () => { - if (audioRef.current) { - setCurrentTime(audioRef.current.currentTime); - } - }; - - const handleMetadataLoaded = () => { - if (audioRef.current) { - setDuration(audioRef.current.duration); - } - }; - - const togglePlayPause = () => { - if (audioRef.current) { - if (isPlaying) { - audioRef.current.pause(); - } else { - audioRef.current.play(); - } - setIsPlaying(!isPlaying); - } - }; - - const handleSeek = (value: number[]) => { - if (audioRef.current) { - audioRef.current.currentTime = value[0]; - setCurrentTime(value[0]); - } - }; - - const handleVolumeChange = (value: number[]) => { - if (audioRef.current) { - const newVolume = value[0]; - audioRef.current.volume = newVolume; - setVolume(newVolume); - - if (newVolume === 0) { - audioRef.current.muted = true; - setIsMuted(true); - } else { - audioRef.current.muted = false; - setIsMuted(false); - } - } - }; - - const toggleMute = () => { - if (audioRef.current) { - const newMutedState = !isMuted; - audioRef.current.muted = newMutedState; - setIsMuted(newMutedState); - - if (!newMutedState && volume === 0) { - const restoredVolume = 0.5; - audioRef.current.volume = restoredVolume; - setVolume(restoredVolume); - } - } - }; - - const skipForward = () => { - if (audioRef.current) { - audioRef.current.currentTime = Math.min( - audioRef.current.duration, - audioRef.current.currentTime + 10 - ); - } - }; - - const skipBackward = () => { - if (audioRef.current) { - audioRef.current.currentTime = Math.max(0, audioRef.current.currentTime - 10); - } - }; - - const formatTime = (time: number) => { - const minutes = Math.floor(time / 60); - const seconds = Math.floor(time % 60); - return `${minutes}:${seconds < 10 ? "0" : ""}${seconds}`; - }; - - // Show skeleton while fetching - if (isFetching && compact) { - return ; - } - - if (!podcast || !audioSrc) { - return null; - } - - if (compact) { - return ( - <> -
      - {/* Audio Visualizer */} - - {isPlaying && ( - - )} - - - {/* Progress Bar with Time */} -
      - -
      - {formatTime(currentTime)} - {formatTime(duration)} -
      -
      - - {/* Controls */} -
      - {/* Left: Volume */} -
      - - - -
      - - {/* Center: Playback Controls */} -
      - - - - - - - - - - - -
      - - {/* Right: Placeholder for symmetry */} -
      -
      -
      - - - - ); - } - - return null; -} diff --git a/surfsense_web/components/chat/ChatPanel/PodcastPlayer/PodcastPlayerCompactSkeleton.tsx b/surfsense_web/components/chat/ChatPanel/PodcastPlayer/PodcastPlayerCompactSkeleton.tsx deleted file mode 100644 index d7007dadd..000000000 --- a/surfsense_web/components/chat/ChatPanel/PodcastPlayer/PodcastPlayerCompactSkeleton.tsx +++ /dev/null @@ -1,40 +0,0 @@ -"use client"; - -import { Podcast } from "lucide-react"; -import { motion } from "motion/react"; - -export function PodcastPlayerCompactSkeleton() { - return ( -
      - {/* Header with icon and title */} -
      - - - - {/* Title skeleton */} -
      -
      - - {/* Progress bar skeleton */} -
      -
      -
      -
      - - {/* Controls skeleton */} -
      -
      -
      -
      -
      -
      -
      - ); -} diff --git a/surfsense_web/components/chat/ChatPanel/PodcastPlayer/index.ts b/surfsense_web/components/chat/ChatPanel/PodcastPlayer/index.ts deleted file mode 100644 index 55c19f934..000000000 --- a/surfsense_web/components/chat/ChatPanel/PodcastPlayer/index.ts +++ /dev/null @@ -1,2 +0,0 @@ -export { PodcastPlayer } from "./PodcastPlayer"; -export { PodcastPlayerCompactSkeleton } from "./PodcastPlayerCompactSkeleton"; diff --git a/surfsense_web/components/chat/ChatSources.tsx b/surfsense_web/components/chat/ChatSources.tsx deleted file mode 100644 index 5f205d005..000000000 --- a/surfsense_web/components/chat/ChatSources.tsx +++ /dev/null @@ -1,226 +0,0 @@ -"use client"; - -import { getAnnotationData, type Message } from "@llamaindex/chat-ui"; -import { ExternalLink, FileText } from "lucide-react"; -import { useState } from "react"; -import { Badge } from "@/components/ui/badge"; -import { Button } from "@/components/ui/button"; -import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card"; -import { Sheet, SheetContent, SheetHeader, SheetTitle, SheetTrigger } from "@/components/ui/sheet"; -import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs"; -import { getConnectorIcon } from "@/contracts/enums/connectorIcons"; -import { SourceDetailSheet } from "./SourceDetailSheet"; - -interface Source { - id: string; - title: string; - description: string; - url: string; - sourceType: string; -} - -interface SourceGroup { - id: number; - name: string; - type: string; - sources: Source[]; -} - -// New interfaces for the updated data format -interface NodeMetadata { - title: string; - source_type: string; - group_name: string; -} - -interface SourceNode { - id: string; - text: string; - url: string; - metadata: NodeMetadata; -} - -function getSourceIcon(type: string) { - // Handle USER_SELECTED_ prefix - const normalizedType = type.startsWith("USER_SELECTED_") - ? type.replace("USER_SELECTED_", "") - : type; - return getConnectorIcon(normalizedType, "h-4 w-4"); -} - -function SourceCard({ source }: { source: Source }) { - const hasUrl = source.url && source.url.trim() !== ""; - const chunkId = Number(source.id); - const sourceType = source.sourceType; - const [isOpen, setIsOpen] = useState(false); - - // Clean up the description for better display - const cleanDescription = source.description - .replace(/## Metadata\n\n/g, "") - .replace(/\n+/g, " ") - .trim(); - - const handleUrlClick = (e: React.MouseEvent, url: string) => { - e.preventDefault(); - e.stopPropagation(); - window.open(url, "_blank", "noopener,noreferrer"); - }; - - return ( - - - - -
      - - {source.title} - -
      - - #{chunkId} - - {hasUrl && ( - - )} -
      -
      -
      - - - {cleanDescription} - - -
      -
      -
      - ); -} - -export default function ChatSourcesDisplay({ message }: { message: Message }) { - const [open, setOpen] = useState(false); - const annotations = getAnnotationData(message, "sources"); - - // Transform the new data format to the expected SourceGroup format - const sourceGroups: SourceGroup[] = []; - - if (Array.isArray(annotations) && annotations.length > 0) { - // Extract all nodes from the response - const allNodes: SourceNode[] = []; - - annotations.forEach((item) => { - if (item && typeof item === "object" && "nodes" in item && Array.isArray(item.nodes)) { - allNodes.push(...item.nodes); - } - }); - - // Group nodes by source_type - const groupedByType = allNodes.reduce( - (acc, node) => { - const sourceType = node.metadata.source_type; - if (!acc[sourceType]) { - acc[sourceType] = []; - } - acc[sourceType].push(node); - return acc; - }, - {} as Record - ); - - // Convert grouped nodes to SourceGroup format - Object.entries(groupedByType).forEach(([sourceType, nodes], index) => { - if (nodes.length > 0) { - const firstNode = nodes[0]; - sourceGroups.push({ - id: index + 100, // Generate unique ID - name: firstNode.metadata.group_name, - type: sourceType, - sources: nodes.map((node) => ({ - id: node.id, - title: node.metadata.title, - description: node.text, - url: node.url || "", - sourceType: sourceType, - })), - }); - } - }); - } - - if (sourceGroups.length === 0) { - return null; - } - - const totalSources = sourceGroups.reduce((acc, group) => acc + group.sources.length, 0); - - return ( - - - - - - -
      - Sources - - {totalSources} {totalSources === 1 ? "source" : "sources"} - -
      -
      - -
      - - {sourceGroups.map((group) => ( - - {getSourceIcon(group.type)} - - {group.name} - - - {group.sources.length} - - - ))} - -
      - {sourceGroups.map((group) => ( - -
      -
      - {group.sources.map((source) => ( - - ))} -
      -
      -
      - ))} -
      -
      -
      - ); -} diff --git a/surfsense_web/components/chat/ChatTerminal.tsx b/surfsense_web/components/chat/ChatTerminal.tsx deleted file mode 100644 index 07415eaf9..000000000 --- a/surfsense_web/components/chat/ChatTerminal.tsx +++ /dev/null @@ -1,105 +0,0 @@ -"use client"; - -import { getAnnotationData, type Message } from "@llamaindex/chat-ui"; -import { useEffect, useRef, useState } from "react"; -import { Button } from "@/components/ui/button"; - -export default function TerminalDisplay({ message, open }: { message: Message; open: boolean }) { - const [isCollapsed, setIsCollapsed] = useState(!open); - - const bottomRef = useRef(null); - - useEffect(() => { - if (bottomRef.current) { - bottomRef.current.scrollTo({ - top: bottomRef.current.scrollHeight, - behavior: "smooth", - }); - } - }, []); - - // Get the last assistant message that's not being typed - if (!message) { - return null; - } - - interface TerminalInfo { - id: number; - text: string; - type: string; - } - - const events = getAnnotationData(message, "TERMINAL_INFO") as TerminalInfo[]; - - if (events.length === 0) { - return null; - } - - return ( -
      - {/* Terminal Header */} - - - {/* Terminal Content (animated expand/collapse) */} -
      -
      - {events.map((event, index) => ( -
      - $ - [{event.type || ""}] - - {event.text || ""}... - -
      - ))} - {events.length === 0 && ( -
      No agent events to display...
      - )} -
      -
      -
      - ); -} diff --git a/surfsense_web/components/chat/Citation.tsx b/surfsense_web/components/chat/Citation.tsx deleted file mode 100644 index 27b352e96..000000000 --- a/surfsense_web/components/chat/Citation.tsx +++ /dev/null @@ -1,118 +0,0 @@ -import { ExternalLink } from "lucide-react"; -import { memo, useState } from "react"; -import { Button } from "@/components/ui/button"; -import { Card } from "@/components/ui/card"; -import { - DropdownMenu, - DropdownMenuContent, - DropdownMenuTrigger, -} from "@/components/ui/dropdown-menu"; -import { getConnectorIcon } from "./ConnectorComponents"; -import type { Source } from "./types"; - -type CitationProps = { - citationId: number; - citationText: string; - position: number; - source: Source | null; -}; - -/** - * Citation component to handle individual citations - */ -export const Citation = memo(({ citationId, citationText, position, source }: CitationProps) => { - const [open, setOpen] = useState(false); - const citationKey = `citation-${citationId}-${position}`; - - if (!source) return <>{citationText}; - - return ( - - - - - - {citationId} - - - - {open && ( - - -
      -
      - {getConnectorIcon(source.connectorType || "")} -
      -
      -
      -

      {source.title}

      -
      -

      {source.description}

      -
      - {source.url} -
      -
      - -
      -
      -
      - )} -
      -
      - ); -}); - -Citation.displayName = "Citation"; - -/** - * Function to render text with citations - */ -export const renderTextWithCitations = ( - text: string, - getCitationSource: (id: number) => Source | null -) => { - // Regular expression to find citation patterns like [1], [2], etc. - const citationRegex = /\[(\d+)\]/g; - const parts = []; - let lastIndex = 0; - let match: RegExpExecArray | null = citationRegex.exec(text); - let position = 0; - - while (match !== null) { - // Add text before the citation - if (match.index > lastIndex) { - parts.push(text.substring(lastIndex, match.index)); - } - - // Add the citation component - const citationId = parseInt(match[1], 10); - parts.push( - - ); - - lastIndex = match.index + match[0].length; - position++; - match = citationRegex.exec(text); - } - - // Add any remaining text after the last citation - if (lastIndex < text.length) { - parts.push(text.substring(lastIndex)); - } - - return parts; -}; diff --git a/surfsense_web/components/chat/CodeBlock.tsx b/surfsense_web/components/chat/CodeBlock.tsx deleted file mode 100644 index 7641a8b82..000000000 --- a/surfsense_web/components/chat/CodeBlock.tsx +++ /dev/null @@ -1,211 +0,0 @@ -"use client"; - -import { Check, Copy } from "lucide-react"; -import { useTheme } from "next-themes"; -import { memo, useCallback, useEffect, useMemo, useState } from "react"; -import { Prism as SyntaxHighlighter } from "react-syntax-highlighter"; -import { oneDark, oneLight } from "react-syntax-highlighter/dist/cjs/styles/prism"; - -// Constants for styling and configuration -const COPY_TIMEOUT = 2000; - -const BASE_CUSTOM_STYLE = { - margin: 0, - borderRadius: "0.375rem", - fontSize: "0.75rem", - lineHeight: "1.5rem", - border: "none", -} as const; - -const LINE_PROPS_STYLE = { - wordBreak: "break-all" as const, - whiteSpace: "pre-wrap" as const, - border: "none", - borderBottom: "none", - paddingLeft: 0, - paddingRight: 0, - margin: "0.25rem 0", -} as const; - -const CODE_TAG_PROPS = { - className: "font-mono", - style: { - border: "none", - background: "var(--syntax-bg)", - }, -} as const; - -// TypeScript interfaces -interface CodeBlockProps { - children: string; - language: string; -} - -type LanguageRenderer = (props: { code: string }) => React.JSX.Element; - -interface SyntaxStyle { - [key: string]: React.CSSProperties; -} - -// Memoized fallback component for SSR/hydration -const FallbackCodeBlock = memo(({ children }: { children: string }) => ( -
      -
      -			{children}
      -		
      -
      -)); - -FallbackCodeBlock.displayName = "FallbackCodeBlock"; - -// Code block component with syntax highlighting and copy functionality -export const CodeBlock = memo(({ children, language }) => { - const [copied, setCopied] = useState(false); - const { resolvedTheme, theme } = useTheme(); - const [mounted, setMounted] = useState(false); - - // Prevent hydration issues - useEffect(() => { - setMounted(true); - }, []); - - // Memoize theme detection - const isDarkTheme = useMemo( - () => mounted && (resolvedTheme === "dark" || theme === "dark"), - [mounted, resolvedTheme, theme] - ); - - // Memoize syntax theme selection - const syntaxTheme = useMemo(() => (isDarkTheme ? oneDark : oneLight), [isDarkTheme]); - - // Memoize enhanced style with theme-specific modifications - const enhancedStyle = useMemo( - () => ({ - ...syntaxTheme, - 'pre[class*="language-"]': { - ...syntaxTheme['pre[class*="language-"]'], - margin: 0, - border: "none", - borderRadius: "0.375rem", - background: "var(--syntax-bg)", - }, - 'code[class*="language-"]': { - ...syntaxTheme['code[class*="language-"]'], - border: "none", - background: "var(--syntax-bg)", - }, - }), - [syntaxTheme] - ); - - // Memoize custom style with background - const customStyle = useMemo( - () => ({ - ...BASE_CUSTOM_STYLE, - backgroundColor: "var(--syntax-bg)", - }), - [] - ); - - // Memoized copy handler - const handleCopy = useCallback(async () => { - try { - await navigator.clipboard.writeText(children); - setCopied(true); - const timeoutId = setTimeout(() => setCopied(false), COPY_TIMEOUT); - return () => clearTimeout(timeoutId); - } catch (error) { - console.warn("Failed to copy code to clipboard:", error); - } - }, [children]); - - // Memoized line props with style - const lineProps = useMemo( - () => ({ - style: LINE_PROPS_STYLE, - }), - [] - ); - - // Early return for non-mounted state - if (!mounted) { - return {children}; - } - - return ( -
      -
      - -
      - - {children} - -
      - ); -}); - -CodeBlock.displayName = "CodeBlock"; - -// Optimized language renderer factory with memoization -const createLanguageRenderer = (lang: string): LanguageRenderer => { - const renderer = ({ code }: { code: string }) => {code}; - renderer.displayName = `LanguageRenderer(${lang})`; - return renderer; -}; - -// Pre-defined supported languages for better maintainability -const SUPPORTED_LANGUAGES = [ - "javascript", - "typescript", - "python", - "java", - "csharp", - "cpp", - "c", - "php", - "ruby", - "go", - "rust", - "swift", - "kotlin", - "scala", - "sql", - "json", - "xml", - "yaml", - "bash", - "shell", - "powershell", - "dockerfile", - "html", - "css", - "scss", - "less", - "markdown", - "text", -] as const; - -// Generate language renderers efficiently -export const languageRenderers: Record = Object.fromEntries( - SUPPORTED_LANGUAGES.map((lang) => [lang, createLanguageRenderer(lang)]) -); diff --git a/surfsense_web/components/chat/ConnectorComponents.tsx b/surfsense_web/components/chat/ConnectorComponents.tsx deleted file mode 100644 index 3866d055e..000000000 --- a/surfsense_web/components/chat/ConnectorComponents.tsx +++ /dev/null @@ -1,109 +0,0 @@ -import { ChevronDown, Plus } from "lucide-react"; -import type React from "react"; -import { Button } from "@/components/ui/button"; -import { getConnectorIcon } from "@/contracts/enums/connectorIcons"; -import type { Connector } from "./types"; - -/** - * Displays a small icon for a connector type - */ -export const ConnectorIcon = ({ type, index = 0 }: { type: string; index?: number }) => ( -
      - {getConnectorIcon(type)} -
      -); - -/** - * Displays a count indicator for additional connectors - */ -export const ConnectorCountBadge = ({ count }: { count: number }) => ( -
      - +{count} -
      -); - -type ConnectorButtonProps = { - selectedConnectors: string[]; - onClick: () => void; - connectorSources: Connector[]; -}; - -/** - * Button that displays selected connectors and opens connector selection dialog - */ -export const ConnectorButton = ({ - selectedConnectors, - onClick, - connectorSources, -}: ConnectorButtonProps) => { - const totalConnectors = connectorSources.length; - const selectedCount = selectedConnectors.length; - const progressPercentage = (selectedCount / totalConnectors) * 100; - - // Get the name of a single selected connector - const getSingleConnectorName = () => { - const connector = connectorSources.find((c) => c.type === selectedConnectors[0]); - return connector?.name || ""; - }; - - // Get display text based on selection count - const getDisplayText = () => { - if (selectedCount === totalConnectors) return "All Connectors"; - if (selectedCount === 1) return getSingleConnectorName(); - return `${selectedCount} Connectors`; - }; - - // Render the empty state (no connectors selected) - const renderEmptyState = () => ( - <> - - Select Connectors - - ); - - // Render the selected connectors preview - const renderSelectedConnectors = () => ( - <> -
      - {/* Show up to 3 connector icons */} - {selectedConnectors.slice(0, 3).map((type, index) => ( - - ))} - - {/* Show count indicator if more than 3 connectors are selected */} - {selectedCount > 3 && } -
      - - {/* Display text */} - {getDisplayText()} - - ); - - return ( - - ); -}; diff --git a/surfsense_web/components/chat/DocumentsDataTable.tsx b/surfsense_web/components/chat/DocumentsDataTable.tsx deleted file mode 100644 index 77f1a05bd..000000000 --- a/surfsense_web/components/chat/DocumentsDataTable.tsx +++ /dev/null @@ -1,604 +0,0 @@ -"use client"; - -import { useQuery } from "@tanstack/react-query"; -import { - type ColumnDef, - flexRender, - getCoreRowModel, - type SortingState, - useReactTable, -} from "@tanstack/react-table"; -import { useAtomValue } from "jotai"; -import { ArrowUpDown, Calendar, FileText, Filter, Plus, Search } from "lucide-react"; -import { useRouter } from "next/navigation"; -import { useCallback, useEffect, useMemo, useRef, useState } from "react"; -import { documentTypeCountsAtom } from "@/atoms/documents/document-query.atoms"; -import { Button } from "@/components/ui/button"; -import { Checkbox } from "@/components/ui/checkbox"; -import { Input } from "@/components/ui/input"; -import { Label } from "@/components/ui/label"; -import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; -import { - Select, - SelectContent, - SelectItem, - SelectTrigger, - SelectValue, -} from "@/components/ui/select"; -import { - Table, - TableBody, - TableCell, - TableHead, - TableHeader, - TableRow, -} from "@/components/ui/table"; -import { getConnectorIcon } from "@/contracts/enums/connectorIcons"; -import type { Document, DocumentTypeEnum } from "@/contracts/types/document.types"; -import { documentsApiService } from "@/lib/apis/documents-api.service"; -import { cacheKeys } from "@/lib/query-client/cache-keys"; - -interface DocumentsDataTableProps { - searchSpaceId: number; - onSelectionChange: (documents: Document[]) => void; - onDone: () => void; - initialSelectedDocuments?: Document[]; -} - -function useDebounced(value: T, delay = 300) { - const [debounced, setDebounced] = useState(value); - useEffect(() => { - const t = setTimeout(() => setDebounced(value), delay); - return () => clearTimeout(t); - }, [value, delay]); - return debounced; -} - -const columns: ColumnDef[] = [ - { - id: "select", - header: ({ table }) => ( - table.toggleAllPageRowsSelected(!!value)} - aria-label="Select all" - /> - ), - cell: ({ row }) => ( - row.toggleSelected(!!value)} - aria-label="Select row" - /> - ), - enableSorting: false, - enableHiding: false, - size: 40, - }, - { - accessorKey: "title", - header: ({ column }) => ( - - ), - cell: ({ row }) => { - const title = row.getValue("title") as string; - return ( -
      - {title} -
      - ); - }, - }, - { - accessorKey: "document_type", - header: "Type", - cell: ({ row }) => { - const type = row.getValue("document_type") as DocumentType; - return ( -
      - {getConnectorIcon(String(type))} -
      - ); - }, - size: 80, - meta: { - className: "hidden sm:table-cell", - }, - }, - { - accessorKey: "content", - header: "Preview", - cell: ({ row }) => { - const content = row.getValue("content") as string; - return ( -
      - {content.substring(0, 30)}... - {content.substring(0, 100)}... -
      - ); - }, - enableSorting: false, - meta: { - className: "hidden md:table-cell", - }, - }, - { - accessorKey: "created_at", - header: ({ column }) => ( - - ), - cell: ({ row }) => { - const date = new Date(row.getValue("created_at")); - return ( -
      - - {date.toLocaleDateString("en-US", { - month: "short", - day: "numeric", - year: "numeric", - })} - - - {date.toLocaleDateString("en-US", { - month: "numeric", - day: "numeric", - })} - -
      - ); - }, - size: 80, - }, -]; - -export function DocumentsDataTable({ - searchSpaceId, - onSelectionChange, - onDone, - initialSelectedDocuments = [], -}: DocumentsDataTableProps) { - const router = useRouter(); - const [sorting, setSorting] = useState([]); - const [search, setSearch] = useState(""); - const debouncedSearch = useDebounced(search, 300); - const [documentTypeFilter, setDocumentTypeFilter] = useState([]); - const [pageIndex, setPageIndex] = useState(0); - const [pageSize, setPageSize] = useState(10); - const { data: typeCounts } = useAtomValue(documentTypeCountsAtom); - - const fetchQueryParams = useMemo( - () => ({ - search_space_id: searchSpaceId, - page: pageIndex, - page_size: pageSize, - ...(documentTypeFilter.length > 0 && { document_types: documentTypeFilter }), - }), - [searchSpaceId, pageIndex, pageSize, documentTypeFilter, debouncedSearch] - ); - - const searchQueryParams = useMemo(() => { - return { - search_space_id: searchSpaceId, - page: pageIndex, - page_size: pageSize, - ...(documentTypeFilter.length > 0 && { document_types: documentTypeFilter }), - title: debouncedSearch, - }; - }, [debouncedSearch, searchSpaceId, pageIndex, pageSize, documentTypeFilter, debouncedSearch]); - - // Use query for fetching documents - const { data: documents, isLoading: isDocumentsLoading } = useQuery({ - queryKey: cacheKeys.documents.withQueryParams(fetchQueryParams), - queryFn: () => documentsApiService.getDocuments({ queryParams: fetchQueryParams }), - staleTime: 3 * 60 * 1000, // 3 minutes - enabled: !!searchSpaceId && !debouncedSearch.trim(), - }); - - // Seaching - const { data: searchedDocuments, isLoading: isSearchedDocumentsLoading } = useQuery({ - queryKey: cacheKeys.documents.withQueryParams(searchQueryParams), - queryFn: () => documentsApiService.searchDocuments({ queryParams: searchQueryParams }), - staleTime: 3 * 60 * 1000, // 3 minutes - enabled: !!searchSpaceId && !!debouncedSearch.trim(), - }); - - // Use query data when not searching, otherwise use hook data - const actualDocuments = debouncedSearch.trim() - ? searchedDocuments?.items || [] - : documents?.items || []; - const actualTotal = debouncedSearch.trim() - ? searchedDocuments?.total || 0 - : documents?.total || 0; - const actualLoading = debouncedSearch.trim() ? isSearchedDocumentsLoading : isDocumentsLoading; - - // Memoize initial row selection to prevent infinite loops - const initialRowSelection = useMemo(() => { - if (!initialSelectedDocuments.length) return {}; - - const selection: Record = {}; - initialSelectedDocuments.forEach((selectedDoc) => { - selection[selectedDoc.id] = true; - }); - return selection; - }, [initialSelectedDocuments]); - - const [rowSelection, setRowSelection] = useState>( - () => initialRowSelection - ); - - // Maintain a separate state for actually selected documents (across all pages) - const [selectedDocumentsMap, setSelectedDocumentsMap] = useState>(() => { - const map = new Map(); - initialSelectedDocuments.forEach((doc) => map.set(doc.id, doc)); - return map; - }); - - // Track the last notified selection to avoid redundant parent calls - const lastNotifiedSelection = useRef(""); - - // Update row selection only when initialSelectedDocuments changes (not rowSelection itself) - useEffect(() => { - const initialKeys = Object.keys(initialRowSelection); - if (initialKeys.length === 0) return; - - const currentKeys = Object.keys(rowSelection); - // Quick length check before expensive comparison - if (currentKeys.length === initialKeys.length) { - // Check if all keys match (order doesn't matter for Sets) - const hasAllKeys = initialKeys.every((key) => rowSelection[key]); - if (hasAllKeys) return; - } - - setRowSelection(initialRowSelection); - }, [initialRowSelection]); // Remove rowSelection from dependencies to prevent loop - - // Update the selected documents map when row selection changes - useEffect(() => { - if (!actualDocuments || actualDocuments.length === 0) return; - - setSelectedDocumentsMap((prev) => { - const newMap = new Map(prev); - let hasChanges = false; - - // Process only current page documents - for (const doc of actualDocuments) { - const docId = doc.id; - const isSelected = rowSelection[docId.toString()]; - const wasInMap = newMap.has(docId); - - if (isSelected && !wasInMap) { - newMap.set(docId, doc); - hasChanges = true; - } else if (!isSelected && wasInMap) { - newMap.delete(docId); - hasChanges = true; - } - } - - // Return same reference if no changes to avoid unnecessary re-renders - return hasChanges ? newMap : prev; - }); - }, [rowSelection, documents]); - - // Memoize selected documents array - const selectedDocumentsArray = useMemo(() => { - return Array.from(selectedDocumentsMap.values()); - }, [selectedDocumentsMap]); - - // Notify parent of selection changes only when content actually changes - useEffect(() => { - // Create a stable string representation for comparison - const selectionKey = selectedDocumentsArray - .map((d) => d.id) - .sort() - .join(","); - - // Skip if selection hasn't actually changed - if (selectionKey === lastNotifiedSelection.current) return; - - lastNotifiedSelection.current = selectionKey; - onSelectionChange(selectedDocumentsArray); - }, [selectedDocumentsArray, onSelectionChange]); - - const table = useReactTable({ - data: actualDocuments || [], - columns, - getRowId: (row) => row.id.toString(), - onSortingChange: setSorting, - getCoreRowModel: getCoreRowModel(), - onRowSelectionChange: setRowSelection, - manualPagination: true, - pageCount: Math.ceil(actualTotal / pageSize), - state: { sorting, rowSelection, pagination: { pageIndex, pageSize } }, - }); - - const handleClearAll = useCallback(() => { - setRowSelection({}); - setSelectedDocumentsMap(new Map()); - }, []); - - const handleSelectPage = useCallback(() => { - const currentPageRows = table.getRowModel().rows; - const newSelection = { ...rowSelection }; - currentPageRows.forEach((row) => { - newSelection[row.id] = true; - }); - setRowSelection(newSelection); - }, [table, rowSelection]); - - const handleToggleType = useCallback((type: DocumentTypeEnum, checked: boolean) => { - setDocumentTypeFilter((prev) => { - if (checked) { - return [...prev, type]; - } - return prev.filter((t) => t !== type); - }); - setPageIndex(0); // Reset to first page when filter changes - }, []); - - const selectedCount = selectedDocumentsMap.size; - - // Get available document types from type counts (memoized) - const availableTypes = useMemo(() => { - const types = typeCounts ? (Object.keys(typeCounts) as DocumentTypeEnum[]) : []; - return types.length > 0 ? types.sort() : []; - }, [typeCounts]); - - return ( -
      - {/* Header Controls */} -
      - {/* Search and Filter Row */} -
      -
      - - { - setSearch(event.target.value); - setPageIndex(0); // Reset to first page on search - }} - className="pl-10 text-sm" - /> -
      - - - - - -
      -
      Filter by Type
      -
      - {availableTypes.map((type) => ( -
      - handleToggleType(type, !!checked)} - /> - -
      - ))} -
      - {documentTypeFilter.length > 0 && ( - - )} -
      -
      -
      -
      - - {/* Action Controls Row */} -
      -
      - - {selectedCount} selected {actualLoading && "· Loading..."} - -
      -
      - - - -
      -
      - -
      -
      - - {/* Table Container */} -
      -
      - {actualLoading ? ( -
      -
      -
      -

      Loading documents...

      -
      -
      - ) : ( -
      + {processChildrenWithCitations(children)} + + {processChildrenWithCitations(children)} +
      - - {table.getHeaderGroups().map((headerGroup) => ( - - {headerGroup.headers.map((header) => ( - - {header.isPlaceholder - ? null - : flexRender(header.column.columnDef.header, header.getContext())} - - ))} - - ))} - - - {table.getRowModel().rows?.length ? ( - table.getRowModel().rows.map((row) => ( - - {row.getVisibleCells().map((cell) => ( - - {flexRender(cell.column.columnDef.cell, cell.getContext())} - - ))} - - )) - ) : ( - - -
      -
      - -
      -
      -

      No documents found

      -

      - Get started by adding your first data source to build your knowledge - base. -

      -
      - -
      -
      -
      - )} -
      -
      - )} -
-
- - {/* Footer Pagination */} -
-
- Showing {pageIndex * pageSize + 1} to {Math.min((pageIndex + 1) * pageSize, actualTotal)}{" "} - of {actualTotal} documents -
-
- -
- Page - {pageIndex + 1} - of - {Math.ceil(actualTotal / pageSize)} -
- -
-
-
- ); -} diff --git a/surfsense_web/components/chat/PodcastUtils.ts b/surfsense_web/components/chat/PodcastUtils.ts deleted file mode 100644 index 5662f96b5..000000000 --- a/surfsense_web/components/chat/PodcastUtils.ts +++ /dev/null @@ -1,43 +0,0 @@ -/** - * Determines if a podcast is stale compared to the current chat state. - * A podcast is considered stale if: - * - The chat's current state_version is greater than the podcast's chat_state_version - * - * @param chatVersion - The current state_version of the chat - * @param podcastVersion - The chat_state_version stored when the podcast was generated (nullable) - * @returns true if the podcast is stale, false otherwise - */ -export function isPodcastStale( - chatVersion: number, - podcastVersion: number | null | undefined -): boolean { - // If podcast has no version, it's stale (generated before this feature) - if (!podcastVersion) { - return true; - } - // If chat version is greater than podcast version, it's stale : We can change this condition to consider staleness after a huge number of updates - return chatVersion > podcastVersion; -} - -/** - * Gets a human-readable message about podcast staleness - * - * @param chatVersion - The current state_version of the chat - * @param podcastVersion - The chat_state_version stored when the podcast was generated - * @returns A descriptive message about the podcast's staleness status - */ -export function getPodcastStalenessMessage( - chatVersion: number, - podcastVersion: number | null | undefined -): string { - if (!podcastVersion) { - return "This podcast was generated before chat updates were tracked. Consider regenerating it."; - } - - if (chatVersion > podcastVersion) { - const versionDiff = chatVersion - podcastVersion; - return `This podcast is outdated. The chat has been updated ${versionDiff} time${versionDiff > 1 ? "s" : ""} since this podcast was generated.`; - } - - return "This podcast is up to date with the current chat."; -} diff --git a/surfsense_web/components/chat/ScrollUtils.tsx b/surfsense_web/components/chat/ScrollUtils.tsx deleted file mode 100644 index 3984246ae..000000000 --- a/surfsense_web/components/chat/ScrollUtils.tsx +++ /dev/null @@ -1,81 +0,0 @@ -import { type RefObject, useEffect } from "react"; - -/** - * Function to scroll to the bottom of a container - */ -export const scrollToBottom = (ref: RefObject) => { - ref.current?.scrollIntoView({ behavior: "smooth" }); -}; - -/** - * Hook to scroll to bottom when messages change - */ -export const useScrollToBottom = (ref: RefObject, dependencies: any[]) => { - useEffect(() => { - scrollToBottom(ref); - }, dependencies); -}; - -/** - * Function to check scroll position and update indicators - */ -export const updateScrollIndicators = ( - tabsListRef: RefObject, - setCanScrollLeft: (value: boolean) => void, - setCanScrollRight: (value: boolean) => void -) => { - if (tabsListRef.current) { - const { scrollLeft, scrollWidth, clientWidth } = tabsListRef.current; - setCanScrollLeft(scrollLeft > 0); - setCanScrollRight(scrollLeft + clientWidth < scrollWidth - 10); // 10px buffer - } -}; - -/** - * Hook to initialize scroll indicators and add resize listener - */ -export const useScrollIndicators = ( - tabsListRef: RefObject, - setCanScrollLeft: (value: boolean) => void, - setCanScrollRight: (value: boolean) => void -) => { - const updateIndicators = () => - updateScrollIndicators(tabsListRef, setCanScrollLeft, setCanScrollRight); - - useEffect(() => { - updateIndicators(); - // Add resize listener to update indicators when window size changes - window.addEventListener("resize", updateIndicators); - return () => window.removeEventListener("resize", updateIndicators); - }, [updateIndicators]); - - return updateIndicators; -}; - -/** - * Function to scroll tabs list left - */ -export const scrollTabsLeft = ( - tabsListRef: RefObject, - updateIndicators: () => void -) => { - if (tabsListRef.current) { - tabsListRef.current.scrollBy({ left: -200, behavior: "smooth" }); - // Update indicators after scrolling - setTimeout(updateIndicators, 300); - } -}; - -/** - * Function to scroll tabs list right - */ -export const scrollTabsRight = ( - tabsListRef: RefObject, - updateIndicators: () => void -) => { - if (tabsListRef.current) { - tabsListRef.current.scrollBy({ left: 200, behavior: "smooth" }); - // Update indicators after scrolling - setTimeout(updateIndicators, 300); - } -}; diff --git a/surfsense_web/components/chat/SegmentedControl.tsx b/surfsense_web/components/chat/SegmentedControl.tsx deleted file mode 100644 index 91314ae18..000000000 --- a/surfsense_web/components/chat/SegmentedControl.tsx +++ /dev/null @@ -1,41 +0,0 @@ -import type React from "react"; -import { Button } from "@/components/ui/button"; - -type SegmentedControlProps = { - value: T; - onChange: (value: T) => void; - options: Array<{ - value: T; - label: string; - icon: React.ReactNode; - }>; -}; - -/** - * A segmented control component for selecting between different options - */ -function SegmentedControl({ - value, - onChange, - options, -}: SegmentedControlProps) { - return ( -
- {options.map((option) => ( - - ))} -
- ); -} - -export default SegmentedControl; diff --git a/surfsense_web/components/chat/SourceDetailSheet.tsx b/surfsense_web/components/chat/SourceDetailSheet.tsx deleted file mode 100644 index 7f5ebbeab..000000000 --- a/surfsense_web/components/chat/SourceDetailSheet.tsx +++ /dev/null @@ -1,254 +0,0 @@ -"use client"; - -import { useQuery } from "@tanstack/react-query"; -import { ChevronDown, ChevronUp, ExternalLink, Loader2 } from "lucide-react"; -import type React from "react"; -import { type ReactNode, useEffect, useLayoutEffect, useRef, useState } from "react"; -import { MarkdownViewer } from "@/components/markdown-viewer"; -import { Button } from "@/components/ui/button"; -import { Collapsible, CollapsibleContent, CollapsibleTrigger } from "@/components/ui/collapsible"; -import { ScrollArea } from "@/components/ui/scroll-area"; -import { - Sheet, - SheetContent, - SheetDescription, - SheetHeader, - SheetTitle, -} from "@/components/ui/sheet"; -import { getConnectorIcon } from "@/contracts/enums/connectorIcons"; -import { documentsApiService } from "@/lib/apis/documents-api.service"; -import { cacheKeys } from "@/lib/query-client/cache-keys"; -import { cn } from "@/lib/utils"; - -interface SourceDetailSheetProps { - open: boolean; - onOpenChange: (open: boolean) => void; - chunkId: number; - sourceType: string; - title: string; - description?: string; - url?: string; - children?: ReactNode; -} - -const formatDocumentType = (type: string) => { - return type - .split("_") - .map((word) => word.charAt(0) + word.slice(1).toLowerCase()) - .join(" "); -}; - -export function SourceDetailSheet({ - open, - onOpenChange, - chunkId, - sourceType, - title, - description, - url, - children, -}: SourceDetailSheetProps) { - const chunksContainerRef = useRef(null); - const highlightedChunkRef = useRef(null); - const [summaryOpen, setSummaryOpen] = useState(false); - - const { - data: document, - isLoading: isDocumentByChunkFetching, - error: documentByChunkFetchingError, - } = useQuery({ - queryKey: cacheKeys.documents.byChunk(chunkId.toString()), - queryFn: () => documentsApiService.getDocumentByChunk({ chunk_id: chunkId }), - enabled: !!chunkId && open, - staleTime: 5 * 60 * 1000, // 5 minutes - }); - - // Check if this is a source type that should render directly from node - const isDirectRenderSource = - sourceType === "TAVILY_API" || - sourceType === "LINKUP_API" || - sourceType === "SEARXNG_API" || - sourceType === "BAIDU_SEARCH_API"; - - useEffect(() => { - // Scroll to highlighted chunk when document loads - if (document) { - setTimeout(() => { - highlightedChunkRef.current?.scrollIntoView({ - behavior: "smooth", - block: "start", - }); - }, 100); - } - }, [document, open]); - - const handleUrlClick = (e: React.MouseEvent, clickUrl: string) => { - e.preventDefault(); - e.stopPropagation(); - window.open(clickUrl, "_blank", "noopener,noreferrer"); - }; - - return ( - - {children} - - - - {getConnectorIcon(sourceType)} - {document?.title || title} - - - {document - ? formatDocumentType(document.document_type) - : sourceType && formatDocumentType(sourceType)} - - - - {!isDirectRenderSource && isDocumentByChunkFetching && ( -
- -
- )} - - {!isDirectRenderSource && documentByChunkFetchingError && ( -
-

- {documentByChunkFetchingError.message || "Failed to load document"} -

-
- )} - - {/* Direct render for web search providers */} - {isDirectRenderSource && ( - -
- {/* External Link */} - {url && ( -
- -
- )} - - {/* Source Information */} -
-

Source Information

-
- {title || "Untitled"} -
-
- {description || "No content available"} -
-
-
-
- )} - - {/* API-fetched document content */} - {!isDirectRenderSource && document && ( - -
- {/* Document Metadata */} - {document.document_metadata && Object.keys(document.document_metadata).length > 0 && ( -
-

Document Information

-
- {Object.entries(document.document_metadata).map(([key, value]) => ( -
-
- {key.replace(/_/g, " ")}: -
-
{String(value)}
-
- ))} -
-
- )} - - {/* External Link */} - {url && ( -
- -
- )} - - {/* Chunks */} -
-
- {/* Header row: header and button side by side */} -
-

Document Content

- {document.content && ( - - - Summary - {summaryOpen ? ( - - ) : ( - - )} - - - )} -
- {/* Expanded summary content: always full width, below the row */} - {document.content && ( - - -
- -
-
-
- )} -
- - {document.chunks.map((chunk, idx) => ( -
-
- - Chunk {idx + 1} of {document.chunks.length} - - {chunk.id === chunkId && ( - - Referenced Chunk - - )} -
-
- -
-
- ))} -
-
-
- )} -
-
- ); -} diff --git a/surfsense_web/components/chat/SourceUtils.tsx b/surfsense_web/components/chat/SourceUtils.tsx deleted file mode 100644 index a384a7b56..000000000 --- a/surfsense_web/components/chat/SourceUtils.tsx +++ /dev/null @@ -1,69 +0,0 @@ -import type { Connector, Source } from "./types"; - -/** - * Function to get sources for the main view - */ -export const getMainViewSources = (connector: Connector, initialSourcesDisplay: number) => { - return connector.sources?.slice(0, initialSourcesDisplay); -}; - -/** - * Function to get filtered sources for the dialog - */ -export const getFilteredSources = (connector: Connector, sourceFilter: string) => { - if (!sourceFilter.trim()) { - return connector.sources; - } - - const filter = sourceFilter.toLowerCase().trim(); - return connector.sources?.filter( - (source) => - source.title.toLowerCase().includes(filter) || - source.description.toLowerCase().includes(filter) - ); -}; - -/** - * Function to get paginated and filtered sources for the dialog - */ -export const getPaginatedDialogSources = ( - connector: Connector, - sourceFilter: string, - expandedSources: boolean, - sourcesPage: number, - sourcesPerPage: number -) => { - const filteredSources = getFilteredSources(connector, sourceFilter); - - if (expandedSources) { - return filteredSources; - } - return filteredSources?.slice(0, sourcesPage * sourcesPerPage); -}; - -/** - * Function to get the count of sources for a connector type - */ -export const getSourcesCount = (connectorSources: Connector[], connectorType: string) => { - const connector = connectorSources.find((c) => c.type === connectorType); - return connector?.sources?.length || 0; -}; - -/** - * Function to get a citation source by ID - */ -export const getCitationSource = ( - citationId: number, - connectorSources: Connector[] -): Source | null => { - for (const connector of connectorSources) { - const source = connector.sources?.find((s) => s.id === citationId); - if (source) { - return { - ...source, - connectorType: connector.type, - }; - } - } - return null; -}; diff --git a/surfsense_web/components/chat/index.ts b/surfsense_web/components/chat/index.ts deleted file mode 100644 index 74bd5eea0..000000000 --- a/surfsense_web/components/chat/index.ts +++ /dev/null @@ -1,9 +0,0 @@ -// Export all components and utilities from the chat folder - -export * from "./Citation"; -export * from "./CodeBlock"; -export * from "./ConnectorComponents"; -export * from "./ScrollUtils"; -export { default as SegmentedControl } from "./SegmentedControl"; -export * from "./SourceUtils"; -export * from "./types"; diff --git a/surfsense_web/components/chat/types.ts b/surfsense_web/components/chat/types.ts deleted file mode 100644 index 8e4e1e295..000000000 --- a/surfsense_web/components/chat/types.ts +++ /dev/null @@ -1,50 +0,0 @@ -/** - * Types for chat components - */ - -export type Source = { - id: number; - title: string; - description: string; - url: string; - connectorType?: string; -}; - -export type Connector = { - id: number; - type: string; - name: string; - sources?: Source[]; -}; - -export type StatusMessage = { - id: number; - message: string; - type: "info" | "success" | "error" | "warning"; - timestamp: string; -}; - -export type ChatMessage = { - id: string; - role: "user" | "assistant"; - content: string; - timestamp?: string; -}; - -// Define message types to match useChat() structure -export type MessageRole = "user" | "assistant" | "system" | "data"; - -export interface ToolInvocation { - state: "call" | "result"; - toolCallId: string; - toolName: string; - args: any; - result?: any; -} - -export interface ToolInvocationUIPart { - type: "tool-invocation"; - toolInvocation: ToolInvocation; -} - -export type ResearchMode = "QNA"; diff --git a/surfsense_web/components/contact/contact-form.tsx b/surfsense_web/components/contact/contact-form.tsx index 435ff1365..368f40c4b 100644 --- a/surfsense_web/components/contact/contact-form.tsx +++ b/surfsense_web/components/contact/contact-form.tsx @@ -14,9 +14,9 @@ import { cn } from "@/lib/utils"; // Define validation schema matching the database schema const contactFormSchema = z.object({ name: z.string().min(1, "Name is required").max(255, "Name is too long"), - email: z.string().email("Invalid email address").max(255, "Email is too long"), + email: z.email("Invalid email address").max(255, "Email is too long"), company: z.string().min(1, "Company is required").max(255, "Company name is too long"), - message: z.string().optional().default(""), + message: z.string().optional().prefault(""), }); type ContactFormData = z.infer; diff --git a/surfsense_web/components/dashboard-breadcrumb.tsx b/surfsense_web/components/dashboard-breadcrumb.tsx index e2115a002..df1021762 100644 --- a/surfsense_web/components/dashboard-breadcrumb.tsx +++ b/surfsense_web/components/dashboard-breadcrumb.tsx @@ -1,11 +1,9 @@ "use client"; import { useQuery } from "@tanstack/react-query"; -import { useAtomValue } from "jotai"; import { usePathname } from "next/navigation"; import { useTranslations } from "next-intl"; import React, { useEffect, useState } from "react"; -import { activeChatAtom } from "@/atoms/chats/chat-query.atoms"; import { Breadcrumb, BreadcrumbItem, @@ -26,7 +24,6 @@ interface BreadcrumbItemInterface { export function DashboardBreadcrumb() { const t = useTranslations("breadcrumb"); const pathname = usePathname(); - const { data: activeChatState } = useAtomValue(activeChatAtom); // Extract search space ID and chat ID from pathname const segments = pathname.split("/").filter(Boolean); const searchSpaceId = segments[0] === "dashboard" && segments[1] ? segments[1] : null; @@ -98,13 +95,11 @@ export function DashboardBreadcrumb() { // Map section names to more readable labels const sectionLabels: Record = { - researcher: t("researcher"), + "new-chat": t("chat") || "Chat", documents: t("documents"), connectors: t("connectors"), sources: "Sources", - podcasts: t("podcasts"), logs: t("logs"), - chats: t("chats"), settings: t("settings"), editor: t("editor"), }; @@ -169,15 +164,15 @@ export function DashboardBreadcrumb() { return breadcrumbs; } - // Handle researcher sub-sections (chat IDs) - if (section === "researcher") { - // Use the actual chat title if available, otherwise fall back to the ID - const chatLabel = activeChatState?.chatDetails?.title || subSection; + // Handle new-chat sub-sections (thread IDs) + if (section === "new-chat") { breadcrumbs.push({ - label: t("researcher"), - href: `/dashboard/${segments[1]}/researcher`, + label: t("chat") || "Chat", + href: `/dashboard/${segments[1]}/new-chat`, }); - breadcrumbs.push({ label: chatLabel }); + if (subSection) { + breadcrumbs.push({ label: `Thread ${subSection}` }); + } return breadcrumbs; } diff --git a/surfsense_web/components/homepage/footer-new.tsx b/surfsense_web/components/homepage/footer-new.tsx index bda528bf7..b31e61f2b 100644 --- a/surfsense_web/components/homepage/footer-new.tsx +++ b/surfsense_web/components/homepage/footer-new.tsx @@ -4,9 +4,7 @@ import { IconBrandLinkedin, IconBrandTwitter, } from "@tabler/icons-react"; -import Image from "next/image"; import Link from "next/link"; -import React from "react"; import { Logo } from "@/components/Logo"; export function FooterNew() { diff --git a/surfsense_web/components/homepage/footer.tsx b/surfsense_web/components/homepage/footer.tsx deleted file mode 100644 index 88e640e81..000000000 --- a/surfsense_web/components/homepage/footer.tsx +++ /dev/null @@ -1,97 +0,0 @@ -"use client"; -import { - IconBrandDiscord, - IconBrandGithub, - IconBrandLinkedin, - IconBrandTwitter, -} from "@tabler/icons-react"; -import Link from "next/link"; -import type React from "react"; -import { cn } from "@/lib/utils"; - -export function Footer() { - const pages = [ - { - title: "Privacy", - href: "/privacy", - }, - { - title: "Terms", - href: "/terms", - }, - ]; - - return ( -
-
-
-
-
- SurfSense -
-
- -
    - {pages.map((page) => ( -
  • - - {page.title} - -
  • - ))} -
- - -
-
-

- © SurfSense 2025 -

-
- - - - - - - - - - - - -
-
-
-
- ); -} - -const GridLineHorizontal = ({ className, offset }: { className?: string; offset?: string }) => { - return ( -
- ); -}; diff --git a/surfsense_web/components/homepage/integrations.tsx b/surfsense_web/components/homepage/integrations.tsx index 0a5d93a98..53aaf624a 100644 --- a/surfsense_web/components/homepage/integrations.tsx +++ b/surfsense_web/components/homepage/integrations.tsx @@ -16,7 +16,10 @@ const INTEGRATIONS: Integration[] = [ { name: "Elasticsearch", icon: "https://cdn.simpleicons.org/elastic/00A9E5" }, // Communication - { name: "Slack", icon: "https://cdn.simpleicons.org/slack/4A154B" }, + { + name: "Slack", + icon: "https://upload.wikimedia.org/wikipedia/commons/d/d5/Slack_icon_2019.svg", + }, { name: "Discord", icon: "https://cdn.simpleicons.org/discord/5865F2" }, { name: "Gmail", icon: "https://cdn.simpleicons.org/gmail/EA4335" }, diff --git a/surfsense_web/components/markdown-viewer.tsx b/surfsense_web/components/markdown-viewer.tsx index eea7ed2d8..5318ba5d1 100644 --- a/surfsense_web/components/markdown-viewer.tsx +++ b/surfsense_web/components/markdown-viewer.tsx @@ -1,14 +1,6 @@ -import { Check, Copy } from "lucide-react"; import Image from "next/image"; -import { useTheme } from "next-themes"; -import { useEffect, useMemo, useRef, useState } from "react"; -import ReactMarkdown from "react-markdown"; -import { Prism as SyntaxHighlighter } from "react-syntax-highlighter"; -import { oneDark, oneLight } from "react-syntax-highlighter/dist/cjs/styles/prism"; -import rehypeRaw from "rehype-raw"; -import rehypeSanitize from "rehype-sanitize"; -import remarkGfm from "remark-gfm"; -import { Button } from "@/components/ui/button"; +import type { Components } from "react-markdown"; +import { Streamdown } from "streamdown"; import { cn } from "@/lib/utils"; interface MarkdownViewerProps { @@ -17,203 +9,98 @@ interface MarkdownViewerProps { } export function MarkdownViewer({ content, className }: MarkdownViewerProps) { - const ref = useRef(null); - // Memoize the markdown components to prevent unnecessary re-renders - const components = useMemo(() => { - return { - // Define custom components for markdown elements - p: ({ node, children, ...props }: any) => ( -

- {children} -

- ), - a: ({ node, children, ...props }: any) => ( - - {children} - - ), - li: ({ node, children, ...props }: any) =>
  • {children}
  • , - ul: ({ node, ...props }: any) =>
      , - ol: ({ node, ...props }: any) =>
        , - h1: ({ node, children, ...props }: any) => ( -

        - {children} -

        - ), - h2: ({ node, children, ...props }: any) => ( -

        - {children} -

        - ), - h3: ({ node, children, ...props }: any) => ( -

        - {children} -

        - ), - h4: ({ node, children, ...props }: any) => ( -

        - {children} -

        - ), - blockquote: ({ node, ...props }: any) => ( -
        - ), - hr: ({ node, ...props }: any) =>
        , - img: ({ node, ...props }: any) => ( - markdown image - ), - table: ({ node, ...props }: any) => ( -
        - - - ), - th: ({ node, ...props }: any) => ( -
        - ), - td: ({ node, ...props }: any) => ( - - ), - code: ({ node, className, children, ...props }: any) => { - const match = /language-(\w+)/.exec(className || ""); - const language = match ? match[1] : ""; - const isInline = !match; + const components: Components = { + // Define custom components for markdown elements + p: ({ children, ...props }) => ( +

        + {children} +

        + ), + a: ({ children, ...props }) => ( + + {children} + + ), + li: ({ children, ...props }) =>
      1. {children}
      2. , + ul: ({ ...props }) =>
          , + ol: ({ ...props }) =>
            , + h1: ({ children, ...props }) => ( +

            + {children} +

            + ), + h2: ({ children, ...props }) => ( +

            + {children} +

            + ), + h3: ({ children, ...props }) => ( +

            + {children} +

            + ), + h4: ({ children, ...props }) => ( +

            + {children} +

            + ), + blockquote: ({ ...props }) => ( +
            + ), + hr: ({ ...props }) =>
            , + img: ({ src, alt, width: _w, height: _h, ...props }) => ( + {alt + ), + table: ({ ...props }) => ( +
            + + + ), + th: ({ ...props }) =>
            , + td: ({ ...props }) => , + code: ({ className, children, ...props }) => { + const match = /language-(\w+)/.exec(className || ""); + const isInline = !match; - if (isInline) { - return ( - - {children} - - ); - } - - // For code blocks, add syntax highlighting and copy functionality + if (isInline) { return ( - - {String(children).replace(/\n$/, "")} - + + {children} + ); - }, - }; - }, []); + } + + // For code blocks, let Streamdown handle syntax highlighting + return ( + + {children} + + ); + }, + }; return ( -
            - +
            + {content} - +
            ); } - -// Code block component with syntax highlighting and copy functionality -const CodeBlock = ({ children, language }: { children: string; language: string }) => { - const [copied, setCopied] = useState(false); - const { resolvedTheme, theme } = useTheme(); - const [mounted, setMounted] = useState(false); - - // Prevent hydration issues - useEffect(() => { - setMounted(true); - }, []); - - const handleCopy = async () => { - await navigator.clipboard.writeText(children); - setCopied(true); - setTimeout(() => setCopied(false), 2000); - }; - - // Choose theme based on current system/user preference - const isDarkTheme = mounted && (resolvedTheme === "dark" || theme === "dark"); - const syntaxTheme = isDarkTheme ? oneDark : oneLight; - - return ( -
            -
            - -
            - {mounted ? ( - - {children} - - ) : ( -
            -
            -						{children}
            -					
            -
            - )} -
            - ); -}; diff --git a/surfsense_web/components/new-chat/chat-header.tsx b/surfsense_web/components/new-chat/chat-header.tsx new file mode 100644 index 000000000..ef1533e23 --- /dev/null +++ b/surfsense_web/components/new-chat/chat-header.tsx @@ -0,0 +1,66 @@ +"use client"; + +import { useCallback, useState } from "react"; +import type { + GlobalNewLLMConfig, + NewLLMConfigPublic, +} from "@/contracts/types/new-llm-config.types"; +import { ModelConfigSidebar } from "./model-config-sidebar"; +import { ModelSelector } from "./model-selector"; + +interface ChatHeaderProps { + searchSpaceId: number; +} + +export function ChatHeader({ searchSpaceId }: ChatHeaderProps) { + const [sidebarOpen, setSidebarOpen] = useState(false); + const [selectedConfig, setSelectedConfig] = useState< + NewLLMConfigPublic | GlobalNewLLMConfig | null + >(null); + const [isGlobal, setIsGlobal] = useState(false); + const [sidebarMode, setSidebarMode] = useState<"create" | "edit" | "view">("view"); + + const handleEditConfig = useCallback( + (config: NewLLMConfigPublic | GlobalNewLLMConfig, global: boolean) => { + setSelectedConfig(config); + setIsGlobal(global); + setSidebarMode(global ? "view" : "edit"); + setSidebarOpen(true); + }, + [] + ); + + const handleAddNew = useCallback(() => { + setSelectedConfig(null); + setIsGlobal(false); + setSidebarMode("create"); + setSidebarOpen(true); + }, []); + + const handleSidebarClose = useCallback((open: boolean) => { + setSidebarOpen(open); + if (!open) { + // Reset state when closing + setSelectedConfig(null); + } + }, []); + + return ( + <> + {/* Header Bar */} +
            + +
            + + {/* Config Sidebar */} + + + ); +} diff --git a/surfsense_web/components/new-chat/model-config-sidebar.tsx b/surfsense_web/components/new-chat/model-config-sidebar.tsx new file mode 100644 index 000000000..f3d3c2dcd --- /dev/null +++ b/surfsense_web/components/new-chat/model-config-sidebar.tsx @@ -0,0 +1,369 @@ +"use client"; + +import { useAtomValue } from "jotai"; +import { AlertCircle, Bot, ChevronRight, Globe, User, X } from "lucide-react"; +import { AnimatePresence, motion } from "motion/react"; +import { useCallback, useEffect, useState } from "react"; +import { toast } from "sonner"; +import { + createNewLLMConfigMutationAtom, + updateLLMPreferencesMutationAtom, + updateNewLLMConfigMutationAtom, +} from "@/atoms/new-llm-config/new-llm-config-mutation.atoms"; +import { LLMConfigForm, type LLMConfigFormData } from "@/components/shared/llm-config-form"; +import { Alert, AlertDescription } from "@/components/ui/alert"; +import { Badge } from "@/components/ui/badge"; +import { Button } from "@/components/ui/button"; +import type { + GlobalNewLLMConfig, + NewLLMConfigPublic, +} from "@/contracts/types/new-llm-config.types"; +import { cn } from "@/lib/utils"; + +interface ModelConfigSidebarProps { + open: boolean; + onOpenChange: (open: boolean) => void; + config: NewLLMConfigPublic | GlobalNewLLMConfig | null; + isGlobal: boolean; + searchSpaceId: number; + mode: "create" | "edit" | "view"; +} + +export function ModelConfigSidebar({ + open, + onOpenChange, + config, + isGlobal, + searchSpaceId, + mode, +}: ModelConfigSidebarProps) { + const [isSubmitting, setIsSubmitting] = useState(false); + + // Mutations - use mutateAsync from the atom value + const { mutateAsync: createConfig } = useAtomValue(createNewLLMConfigMutationAtom); + const { mutateAsync: updateConfig } = useAtomValue(updateNewLLMConfigMutationAtom); + const { mutateAsync: updatePreferences } = useAtomValue(updateLLMPreferencesMutationAtom); + + // Handle escape key + useEffect(() => { + const handleEscape = (e: KeyboardEvent) => { + if (e.key === "Escape" && open) { + onOpenChange(false); + } + }; + window.addEventListener("keydown", handleEscape); + return () => window.removeEventListener("keydown", handleEscape); + }, [open, onOpenChange]); + + // Get title based on mode + const getTitle = () => { + if (mode === "create") return "Add New Configuration"; + if (isGlobal) return "View Global Configuration"; + return "Edit Configuration"; + }; + + // Handle form submit + const handleSubmit = useCallback( + async (data: LLMConfigFormData) => { + setIsSubmitting(true); + try { + if (mode === "create") { + // Create new config + const result = await createConfig({ + ...data, + search_space_id: searchSpaceId, + }); + + // Assign the new config to the agent role + if (result?.id) { + await updatePreferences({ + search_space_id: searchSpaceId, + data: { + agent_llm_id: result.id, + }, + }); + } + + toast.success("Configuration created and assigned!"); + onOpenChange(false); + } else if (!isGlobal && config) { + // Update existing user config + await updateConfig({ + id: config.id, + data: { + name: data.name, + description: data.description, + provider: data.provider, + custom_provider: data.custom_provider, + model_name: data.model_name, + api_key: data.api_key, + api_base: data.api_base, + litellm_params: data.litellm_params, + system_instructions: data.system_instructions, + use_default_system_instructions: data.use_default_system_instructions, + citations_enabled: data.citations_enabled, + }, + }); + toast.success("Configuration updated!"); + onOpenChange(false); + } + } catch (error) { + console.error("Failed to save configuration:", error); + toast.error("Failed to save configuration"); + } finally { + setIsSubmitting(false); + } + }, + [ + mode, + isGlobal, + config, + searchSpaceId, + createConfig, + updateConfig, + updatePreferences, + onOpenChange, + ] + ); + + // Handle "Use this model" for global configs + const handleUseGlobalConfig = useCallback(async () => { + if (!config || !isGlobal) return; + setIsSubmitting(true); + try { + await updatePreferences({ + search_space_id: searchSpaceId, + data: { + agent_llm_id: config.id, + }, + }); + toast.success(`Now using ${config.name}`); + onOpenChange(false); + } catch (error) { + console.error("Failed to set model:", error); + toast.error("Failed to set model"); + } finally { + setIsSubmitting(false); + } + }, [config, isGlobal, searchSpaceId, updatePreferences, onOpenChange]); + + return ( + + {open && ( + <> + {/* Backdrop */} + onOpenChange(false)} + /> + + {/* Sidebar Panel */} + + {/* Header */} +
            +
            +
            + +
            +
            +

            {getTitle()}

            +
            + {isGlobal ? ( + + + Global + + ) : mode !== "create" ? ( + + + Custom + + ) : null} + {config && ( + {config.model_name} + )} +
            +
            +
            + +
            + + {/* Content - use overflow-y-auto instead of ScrollArea for better compatibility */} +
            +
            + {/* Global config notice */} + {isGlobal && mode !== "create" && ( + + + + Global configurations are read-only. To customize settings, create a new + configuration based on this template. + + + )} + + {/* Form */} + {mode === "create" ? ( + onOpenChange(false)} + isSubmitting={isSubmitting} + mode="create" + submitLabel="Create & Use" + /> + ) : isGlobal && config ? ( + // Read-only view for global configs +
            + {/* Config Details */} +
            +
            +
            + +

            {config.name}

            +
            + {config.description && ( +
            + +

            {config.description}

            +
            + )} +
            + +
            + +
            +
            + +

            {config.provider}

            +
            +
            + +

            {config.model_name}

            +
            +
            + +
            + +
            +
            + + + {config.citations_enabled ? "Enabled" : "Disabled"} + +
            +
            + + {config.system_instructions && ( + <> +
            +
            + +
            +

            + {config.system_instructions} +

            +
            +
            + + )} +
            + + {/* Action Buttons */} +
            + + +
            +
            + ) : config ? ( + // Edit form for user configs + onOpenChange(false)} + isSubmitting={isSubmitting} + mode="edit" + submitLabel="Save Changes" + /> + ) : null} +
            +
            + + + )} + + ); +} diff --git a/surfsense_web/components/new-chat/model-selector.tsx b/surfsense_web/components/new-chat/model-selector.tsx new file mode 100644 index 000000000..89390f957 --- /dev/null +++ b/surfsense_web/components/new-chat/model-selector.tsx @@ -0,0 +1,384 @@ +"use client"; + +import { useAtomValue } from "jotai"; +import { + Bot, + Check, + ChevronDown, + Cloud, + Edit3, + Globe, + Loader2, + Plus, + Settings2, + Sparkles, + User, + Zap, +} from "lucide-react"; +import { useCallback, useMemo, useState } from "react"; +import { toast } from "sonner"; +import { updateLLMPreferencesMutationAtom } from "@/atoms/new-llm-config/new-llm-config-mutation.atoms"; +import { + globalNewLLMConfigsAtom, + llmPreferencesAtom, + newLLMConfigsAtom, +} from "@/atoms/new-llm-config/new-llm-config-query.atoms"; +import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms"; +import { Badge } from "@/components/ui/badge"; +import { Button } from "@/components/ui/button"; +import { + Command, + CommandEmpty, + CommandGroup, + CommandInput, + CommandItem, + CommandList, + CommandSeparator, +} from "@/components/ui/command"; +import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; +import type { + GlobalNewLLMConfig, + NewLLMConfigPublic, +} from "@/contracts/types/new-llm-config.types"; +import { cn } from "@/lib/utils"; + +// Provider icons mapping +const getProviderIcon = (provider: string) => { + const iconClass = "size-4"; + switch (provider?.toUpperCase()) { + case "OPENAI": + return ; + case "ANTHROPIC": + return ; + case "GOOGLE": + return ; + case "GROQ": + return ; + case "OLLAMA": + return ; + case "XAI": + return ; + default: + return ; + } +}; + +interface ModelSelectorProps { + onEdit: (config: NewLLMConfigPublic | GlobalNewLLMConfig, isGlobal: boolean) => void; + onAddNew: () => void; + className?: string; +} + +export function ModelSelector({ onEdit, onAddNew, className }: ModelSelectorProps) { + const [open, setOpen] = useState(false); + const [searchQuery, setSearchQuery] = useState(""); + const [isSwitching, setIsSwitching] = useState(false); + + // Fetch configs + const { data: userConfigs, isLoading: userConfigsLoading } = useAtomValue(newLLMConfigsAtom); + const { data: globalConfigs, isLoading: globalConfigsLoading } = + useAtomValue(globalNewLLMConfigsAtom); + const { data: preferences, isLoading: preferencesLoading } = useAtomValue(llmPreferencesAtom); + const searchSpaceId = useAtomValue(activeSearchSpaceIdAtom); + const { mutateAsync: updatePreferences } = useAtomValue(updateLLMPreferencesMutationAtom); + + const isLoading = userConfigsLoading || globalConfigsLoading || preferencesLoading; + + // Get current agent LLM config + const currentConfig = useMemo(() => { + if (!preferences) return null; + + const agentLlmId = preferences.agent_llm_id; + if (agentLlmId === null || agentLlmId === undefined) return null; + + // Check if it's a global config (negative ID) + if (agentLlmId < 0) { + return globalConfigs?.find((c) => c.id === agentLlmId) ?? null; + } + // Otherwise, check user configs + return userConfigs?.find((c) => c.id === agentLlmId) ?? null; + }, [preferences, globalConfigs, userConfigs]); + + // Filter configs based on search + const filteredGlobalConfigs = useMemo(() => { + if (!globalConfigs) return []; + if (!searchQuery) return globalConfigs; + const query = searchQuery.toLowerCase(); + return globalConfigs.filter( + (c) => + c.name.toLowerCase().includes(query) || + c.model_name.toLowerCase().includes(query) || + c.provider.toLowerCase().includes(query) + ); + }, [globalConfigs, searchQuery]); + + const filteredUserConfigs = useMemo(() => { + if (!userConfigs) return []; + if (!searchQuery) return userConfigs; + const query = searchQuery.toLowerCase(); + return userConfigs.filter( + (c) => + c.name.toLowerCase().includes(query) || + c.model_name.toLowerCase().includes(query) || + c.provider.toLowerCase().includes(query) + ); + }, [userConfigs, searchQuery]); + + const handleSelectConfig = useCallback( + async (config: NewLLMConfigPublic | GlobalNewLLMConfig) => { + // If already selected, just close + if (currentConfig?.id === config.id) { + setOpen(false); + return; + } + + if (!searchSpaceId) { + toast.error("No search space selected"); + return; + } + + setIsSwitching(true); + try { + await updatePreferences({ + search_space_id: Number(searchSpaceId), + data: { + agent_llm_id: config.id, + }, + }); + toast.success(`Switched to ${config.name}`); + setOpen(false); + } catch (error) { + console.error("Failed to switch model:", error); + toast.error("Failed to switch model"); + } finally { + setIsSwitching(false); + } + }, + [currentConfig, searchSpaceId, updatePreferences] + ); + + const handleEditConfig = useCallback( + (e: React.MouseEvent, config: NewLLMConfigPublic | GlobalNewLLMConfig, isGlobal: boolean) => { + e.stopPropagation(); + onEdit(config, isGlobal); + setOpen(false); + }, + [onEdit] + ); + + return ( + + + + + + + + {/* Switching overlay */} + {isSwitching && ( +
            +
            + + Switching model... +
            +
            + )} + +
            + + +
            + + + +
            + +

            No models found

            +

            Try a different search term

            +
            +
            + + {/* Global Configs Section */} + {filteredGlobalConfigs.length > 0 && ( + +
            + + Global Models +
            + {filteredGlobalConfigs.map((config) => { + const isSelected = currentConfig?.id === config.id; + return ( + handleSelectConfig(config)} + className={cn( + "mx-2 rounded-lg mb-1 cursor-pointer", + "aria-selected:bg-accent/50", + isSelected && "bg-accent/80" + )} + > +
            +
            +
            {getProviderIcon(config.provider)}
            +
            +
            + {config.name} + {isSelected && } +
            +
            + + {config.model_name} + + {config.citations_enabled && ( + + Citations + + )} +
            +
            +
            + +
            +
            + ); + })} +
            + )} + + {filteredGlobalConfigs.length > 0 && filteredUserConfigs.length > 0 && ( + + )} + + {/* User Configs Section */} + {filteredUserConfigs.length > 0 && ( + +
            + + Your Configurations +
            + {filteredUserConfigs.map((config) => { + const isSelected = currentConfig?.id === config.id; + return ( + handleSelectConfig(config)} + className={cn( + "mx-2 rounded-lg mb-1 cursor-pointer", + "aria-selected:bg-accent/50", + isSelected && "bg-accent/80" + )} + > +
            +
            +
            {getProviderIcon(config.provider)}
            +
            +
            + {config.name} + {isSelected && } +
            +
            + + {config.model_name} + + {config.citations_enabled && ( + + Citations + + )} +
            +
            +
            + +
            +
            + ); + })} +
            + )} + + {/* Add New Config Button */} +
            + +
            +
            +
            +
            +
            + ); +} diff --git a/surfsense_web/components/new-chat/source-detail-panel.tsx b/surfsense_web/components/new-chat/source-detail-panel.tsx new file mode 100644 index 000000000..6e3e7cce0 --- /dev/null +++ b/surfsense_web/components/new-chat/source-detail-panel.tsx @@ -0,0 +1,607 @@ +"use client"; + +import { useQuery } from "@tanstack/react-query"; +import { + BookOpen, + ChevronDown, + ChevronUp, + ExternalLink, + FileText, + Hash, + Loader2, + Sparkles, + X, +} from "lucide-react"; +import { AnimatePresence, motion, useReducedMotion } from "motion/react"; +import type React from "react"; +import { forwardRef, type ReactNode, useCallback, useEffect, useRef, useState } from "react"; +import { createPortal } from "react-dom"; +import { MarkdownViewer } from "@/components/markdown-viewer"; +import { Badge } from "@/components/ui/badge"; +import { Button } from "@/components/ui/button"; +import { Collapsible, CollapsibleContent, CollapsibleTrigger } from "@/components/ui/collapsible"; +import { ScrollArea } from "@/components/ui/scroll-area"; +import { documentsApiService } from "@/lib/apis/documents-api.service"; +import { cacheKeys } from "@/lib/query-client/cache-keys"; +import { cn } from "@/lib/utils"; + +interface SourceDetailPanelProps { + open: boolean; + onOpenChange: (open: boolean) => void; + chunkId: number; + sourceType: string; + title: string; + description?: string; + url?: string; + children?: ReactNode; +} + +const formatDocumentType = (type: string) => { + if (!type) return ""; + return type + .split("_") + .map((word) => word.charAt(0) + word.slice(1).toLowerCase()) + .join(" "); +}; + +// Chunk card component +// For large documents (>30 chunks), we disable animation to prevent layout shifts +// which break auto-scroll functionality +interface ChunkCardProps { + chunk: { id: number; content: string }; + index: number; + totalChunks: number; + isCited: boolean; + isActive: boolean; + disableLayoutAnimation?: boolean; +} + +const ChunkCard = forwardRef( + ({ chunk, index, totalChunks, isCited, isActive, disableLayoutAnimation }, ref) => { + return ( +
            + {/* Cited indicator glow effect */} + {isCited &&
            } + + {/* Header */} +
            +
            +
            + {index + 1} +
            + of {totalChunks} chunks +
            + {isCited && ( + + + Cited Source + + )} +
            + + {/* Content */} +
            + +
            +
            + ); + } +); +ChunkCard.displayName = "ChunkCard"; + +export function SourceDetailPanel({ + open, + onOpenChange, + chunkId, + sourceType, + title, + description, + url, + children, +}: SourceDetailPanelProps) { + const scrollAreaRef = useRef(null); + const hasScrolledRef = useRef(false); // Use ref to avoid stale closures + const [summaryOpen, setSummaryOpen] = useState(false); + const [activeChunkIndex, setActiveChunkIndex] = useState(null); + const [mounted, setMounted] = useState(false); + const [hasScrolledToCited, setHasScrolledToCited] = useState(false); + const shouldReduceMotion = useReducedMotion(); + + useEffect(() => { + setMounted(true); + }, []); + + const { + data: documentData, + isLoading: isDocumentByChunkFetching, + error: documentByChunkFetchingError, + } = useQuery({ + queryKey: cacheKeys.documents.byChunk(chunkId.toString()), + queryFn: () => documentsApiService.getDocumentByChunk({ chunk_id: chunkId }), + enabled: !!chunkId && open, + staleTime: 5 * 60 * 1000, + }); + + const isDirectRenderSource = + sourceType === "TAVILY_API" || + sourceType === "LINKUP_API" || + sourceType === "SEARXNG_API" || + sourceType === "BAIDU_SEARCH_API"; + + // Find cited chunk index + const citedChunkIndex = documentData?.chunks?.findIndex((chunk) => chunk.id === chunkId) ?? -1; + + // Simple scroll function that scrolls to a chunk by index + const scrollToChunkByIndex = useCallback( + (chunkIndex: number, smooth = true) => { + const scrollContainer = scrollAreaRef.current; + if (!scrollContainer) return; + + const viewport = scrollContainer.querySelector( + "[data-radix-scroll-area-viewport]" + ) as HTMLElement | null; + if (!viewport) return; + + const chunkElement = scrollContainer.querySelector( + `[data-chunk-index="${chunkIndex}"]` + ) as HTMLElement | null; + if (!chunkElement) return; + + // Get positions using getBoundingClientRect for accuracy + const viewportRect = viewport.getBoundingClientRect(); + const chunkRect = chunkElement.getBoundingClientRect(); + + // Calculate where to scroll to center the chunk + const currentScrollTop = viewport.scrollTop; + const chunkTopRelativeToViewport = chunkRect.top - viewportRect.top + currentScrollTop; + const scrollTarget = + chunkTopRelativeToViewport - viewportRect.height / 2 + chunkRect.height / 2; + + viewport.scrollTo({ + top: Math.max(0, scrollTarget), + behavior: smooth && !shouldReduceMotion ? "smooth" : "auto", + }); + + setActiveChunkIndex(chunkIndex); + }, + [shouldReduceMotion] + ); + + // Callback ref for the cited chunk - scrolls when the element mounts + const citedChunkRefCallback = useCallback( + (node: HTMLDivElement | null) => { + if (node && !hasScrolledRef.current && open) { + hasScrolledRef.current = true; // Mark immediately to prevent duplicate scrolls + + // Store the node reference for the delayed scroll + const scrollToCitedChunk = () => { + const scrollContainer = scrollAreaRef.current; + if (!scrollContainer || !node.isConnected) return false; + + const viewport = scrollContainer.querySelector( + "[data-radix-scroll-area-viewport]" + ) as HTMLElement | null; + if (!viewport) return false; + + // Get positions + const viewportRect = viewport.getBoundingClientRect(); + const chunkRect = node.getBoundingClientRect(); + + // Calculate scroll position to center the chunk + const currentScrollTop = viewport.scrollTop; + const chunkTopRelativeToViewport = chunkRect.top - viewportRect.top + currentScrollTop; + const scrollTarget = + chunkTopRelativeToViewport - viewportRect.height / 2 + chunkRect.height / 2; + + viewport.scrollTo({ + top: Math.max(0, scrollTarget), + behavior: "auto", // Instant scroll for initial positioning + }); + + return true; + }; + + // Scroll multiple times with delays to handle progressive content rendering + // Each subsequent scroll will correct for any layout shifts + const scrollAttempts = [50, 150, 300, 600, 1000]; + + scrollAttempts.forEach((delay) => { + setTimeout(() => { + scrollToCitedChunk(); + }, delay); + }); + + // After final attempt, mark state as scrolled + setTimeout( + () => { + setHasScrolledToCited(true); + setActiveChunkIndex(citedChunkIndex); + }, + scrollAttempts[scrollAttempts.length - 1] + 50 + ); + } + }, + [open, citedChunkIndex] + ); + + // Reset scroll state when panel closes + useEffect(() => { + if (!open) { + hasScrolledRef.current = false; + setHasScrolledToCited(false); + setActiveChunkIndex(null); + } + }, [open]); + + // Handle escape key + useEffect(() => { + const handleEscape = (e: KeyboardEvent) => { + if (e.key === "Escape" && open) { + onOpenChange(false); + } + }; + window.addEventListener("keydown", handleEscape); + return () => window.removeEventListener("keydown", handleEscape); + }, [open, onOpenChange]); + + // Prevent body scroll when open + useEffect(() => { + if (open) { + document.body.style.overflow = "hidden"; + } else { + document.body.style.overflow = ""; + } + return () => { + document.body.style.overflow = ""; + }; + }, [open]); + + const handleUrlClick = (e: React.MouseEvent, clickUrl: string) => { + e.preventDefault(); + e.stopPropagation(); + window.open(clickUrl, "_blank", "noopener,noreferrer"); + }; + + const scrollToChunk = useCallback( + (index: number) => { + scrollToChunkByIndex(index, true); + }, + [scrollToChunkByIndex] + ); + + const panelContent = ( + + {open && ( + <> + {/* Backdrop */} + onOpenChange(false)} + /> + + {/* Panel */} + + {/* Header */} + +
            +

            + {documentData?.title || title || "Source Document"} +

            +

            + {documentData + ? formatDocumentType(documentData.document_type) + : sourceType && formatDocumentType(sourceType)} + {documentData?.chunks && ( + + • {documentData.chunks.length} chunk + {documentData.chunks.length !== 1 ? "s" : ""} + + )} +

            +
            +
            + {url && ( + + )} + +
            +
            + + {/* Loading State */} + {!isDirectRenderSource && isDocumentByChunkFetching && ( +
            + +
            +
            + +
            +

            Loading document...

            + +
            + )} + + {/* Error State */} + {!isDirectRenderSource && documentByChunkFetchingError && ( +
            + +
            + +
            +
            +

            + Failed to load document +

            +

            + {documentByChunkFetchingError.message || + "An unexpected error occurred. Please try again."} +

            +
            + +
            +
            + )} + + {/* Direct render for web search providers */} + {isDirectRenderSource && ( + +
            + {url && ( + + )} + +

            + + Source Information +

            +
            + {title || "Untitled"} +
            +
            + {description || "No content available"} +
            +
            +
            +
            + )} + + {/* API-fetched document content */} + {!isDirectRenderSource && documentData && ( +
            + {/* Chunk Navigation Sidebar */} + {documentData.chunks.length > 1 && ( + + +
            + {documentData.chunks.map((chunk, idx) => { + const isCited = chunk.id === chunkId; + const isActive = activeChunkIndex === idx; + return ( + scrollToChunk(idx)} + initial={{ opacity: 0, scale: 0.8 }} + animate={{ opacity: 1, scale: 1 }} + transition={{ delay: Math.min(idx * 0.02, 0.2) }} + className={cn( + "relative w-11 h-9 mx-auto rounded-lg text-xs font-semibold transition-all duration-200 flex items-center justify-center", + isCited + ? "bg-primary text-primary-foreground shadow-md" + : isActive + ? "bg-muted text-foreground" + : "bg-muted/50 text-muted-foreground hover:bg-muted hover:text-foreground" + )} + title={isCited ? `Chunk ${idx + 1} (Cited)` : `Chunk ${idx + 1}`} + > + {idx + 1} + {isCited && ( + + + + )} + + ); + })} +
            +
            +
            + )} + + {/* Main Content */} + +
            + {/* Document Metadata */} + {documentData.document_metadata && + Object.keys(documentData.document_metadata).length > 0 && ( + +

            + + Document Information +

            +
            + {Object.entries(documentData.document_metadata).map(([key, value]) => ( +
            +
            + {key.replace(/_/g, " ")} +
            +
            {String(value)}
            +
            + ))} +
            +
            + )} + + {/* Summary Collapsible */} + {documentData.content && ( + + + + + + Document Summary + + + + + + + + + + + + + )} + + {/* Chunks Header */} +
            +

            + + Content Chunks +

            + {citedChunkIndex !== -1 && ( + + )} +
            + + {/* Chunks */} +
            + {documentData.chunks.map((chunk, idx) => { + const isCited = chunk.id === chunkId; + return ( + 30} + /> + ); + })} +
            +
            +
            +
            + )} +
            + + )} + + ); + + if (!mounted) return <>{children}; + + return ( + <> + {children} + {createPortal(panelContent, globalThis.document.body)} + + ); +} diff --git a/surfsense_web/components/onboard/index.ts b/surfsense_web/components/onboard/index.ts deleted file mode 100644 index 607ba4e7d..000000000 --- a/surfsense_web/components/onboard/index.ts +++ /dev/null @@ -1,8 +0,0 @@ -export { OnboardActionCard } from "./onboard-action-card"; -export { OnboardAdvancedSettings } from "./onboard-advanced-settings"; -export { OnboardHeader } from "./onboard-header"; -export { OnboardLLMSetup } from "./onboard-llm-setup"; -export { OnboardLoading } from "./onboard-loading"; -export { OnboardStats } from "./onboard-stats"; -export { SetupLLMStep } from "./setup-llm-step"; -export { SetupPromptStep } from "./setup-prompt-step"; diff --git a/surfsense_web/components/onboard/onboard-action-card.tsx b/surfsense_web/components/onboard/onboard-action-card.tsx deleted file mode 100644 index c6bb41dbf..000000000 --- a/surfsense_web/components/onboard/onboard-action-card.tsx +++ /dev/null @@ -1,114 +0,0 @@ -"use client"; - -import { ArrowRight, CheckCircle, type LucideIcon } from "lucide-react"; -import { motion } from "motion/react"; -import { Button } from "@/components/ui/button"; -import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card"; -import { cn } from "@/lib/utils"; - -interface OnboardActionCardProps { - title: string; - description: string; - icon: LucideIcon; - features: string[]; - buttonText: string; - onClick: () => void; - colorScheme: "emerald" | "blue" | "violet"; - delay?: number; -} - -const colorSchemes = { - emerald: { - iconBg: "bg-emerald-500/10 dark:bg-emerald-500/20", - iconRing: "ring-emerald-500/20 dark:ring-emerald-500/30", - iconColor: "text-emerald-600 dark:text-emerald-400", - checkColor: "text-emerald-500", - buttonBg: "bg-emerald-600 hover:bg-emerald-500", - hoverBorder: "hover:border-emerald-500/50", - }, - blue: { - iconBg: "bg-blue-500/10 dark:bg-blue-500/20", - iconRing: "ring-blue-500/20 dark:ring-blue-500/30", - iconColor: "text-blue-600 dark:text-blue-400", - checkColor: "text-blue-500", - buttonBg: "bg-blue-600 hover:bg-blue-500", - hoverBorder: "hover:border-blue-500/50", - }, - violet: { - iconBg: "bg-violet-500/10 dark:bg-violet-500/20", - iconRing: "ring-violet-500/20 dark:ring-violet-500/30", - iconColor: "text-violet-600 dark:text-violet-400", - checkColor: "text-violet-500", - buttonBg: "bg-violet-600 hover:bg-violet-500", - hoverBorder: "hover:border-violet-500/50", - }, -}; - -export function OnboardActionCard({ - title, - description, - icon: Icon, - features, - buttonText, - onClick, - colorScheme, - delay = 0, -}: OnboardActionCardProps) { - const colors = colorSchemes[colorScheme]; - - return ( - - - - - - - {title} - {description} - - - -
            - {features.map((feature, index) => ( -
            - - {feature} -
            - ))} -
            - - -
            -
            -
            - ); -} diff --git a/surfsense_web/components/onboard/onboard-advanced-settings.tsx b/surfsense_web/components/onboard/onboard-advanced-settings.tsx deleted file mode 100644 index b2b9c5080..000000000 --- a/surfsense_web/components/onboard/onboard-advanced-settings.tsx +++ /dev/null @@ -1,144 +0,0 @@ -"use client"; - -import { ChevronDown, MessageSquare, Settings2 } from "lucide-react"; -import { AnimatePresence, motion } from "motion/react"; -import { SetupLLMStep } from "@/components/onboard/setup-llm-step"; -import { SetupPromptStep } from "@/components/onboard/setup-prompt-step"; -import { Card, CardContent } from "@/components/ui/card"; -import { Collapsible, CollapsibleContent, CollapsibleTrigger } from "@/components/ui/collapsible"; -import { cn } from "@/lib/utils"; - -interface OnboardAdvancedSettingsProps { - searchSpaceId: number; - showLLMSettings: boolean; - setShowLLMSettings: (show: boolean) => void; - showPromptSettings: boolean; - setShowPromptSettings: (show: boolean) => void; - onConfigCreated: () => void; - onConfigDeleted: () => void; - onPreferencesUpdated: () => Promise; -} - -export function OnboardAdvancedSettings({ - searchSpaceId, - showLLMSettings, - setShowLLMSettings, - showPromptSettings, - setShowPromptSettings, - onConfigCreated, - onConfigDeleted, - onPreferencesUpdated, -}: OnboardAdvancedSettingsProps) { - return ( - - {/* LLM Configuration */} - - - - -
            -
            -
            - -
            -
            -

            LLM Configuration

            -

            - Customize AI models and role assignments -

            -
            -
            - - - -
            -
            -
            -
            - - - - {showLLMSettings && ( - - - - - - - - )} - - -
            - - {/* Prompt Configuration */} - - - - -
            -
            -
            - -
            -
            -

            AI Response Settings

            -

            - Configure citations and custom instructions (Optional) -

            -
            -
            - - - -
            -
            -
            -
            - - - - {showPromptSettings && ( - - - - setShowPromptSettings(false)} - /> - - - - )} - - -
            -
            - ); -} diff --git a/surfsense_web/components/onboard/onboard-header.tsx b/surfsense_web/components/onboard/onboard-header.tsx deleted file mode 100644 index d84bb5adc..000000000 --- a/surfsense_web/components/onboard/onboard-header.tsx +++ /dev/null @@ -1,56 +0,0 @@ -"use client"; - -import { CheckCircle } from "lucide-react"; -import { motion } from "motion/react"; -import { Logo } from "@/components/Logo"; -import { Badge } from "@/components/ui/badge"; - -interface OnboardHeaderProps { - title: string; - subtitle: string; - isReady?: boolean; -} - -export function OnboardHeader({ title, subtitle, isReady }: OnboardHeaderProps) { - return ( - - - - - - -

            {title}

            -

            {subtitle}

            -
            - - {isReady && ( - - - - AI Configuration Complete - - - )} -
            - ); -} diff --git a/surfsense_web/components/onboard/onboard-llm-setup.tsx b/surfsense_web/components/onboard/onboard-llm-setup.tsx deleted file mode 100644 index b0b2d3fac..000000000 --- a/surfsense_web/components/onboard/onboard-llm-setup.tsx +++ /dev/null @@ -1,93 +0,0 @@ -"use client"; - -import { Bot } from "lucide-react"; -import { motion } from "motion/react"; -import { Logo } from "@/components/Logo"; -import { SetupLLMStep } from "@/components/onboard/setup-llm-step"; -import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card"; - -interface OnboardLLMSetupProps { - searchSpaceId: number; - title: string; - configTitle: string; - configDescription: string; - onConfigCreated: () => void; - onConfigDeleted: () => void; - onPreferencesUpdated: () => Promise; -} - -export function OnboardLLMSetup({ - searchSpaceId, - title, - configTitle, - configDescription, - onConfigCreated, - onConfigDeleted, - onPreferencesUpdated, -}: OnboardLLMSetupProps) { - return ( -
            - - {/* Header */} -
            - - - - - {title} - - - Configure your AI model to get started - -
            - - {/* LLM Setup Card */} - - - -
            -
            - -
            - {configTitle} -
            - {configDescription} -
            - - - -
            -
            -
            -
            - ); -} diff --git a/surfsense_web/components/onboard/onboard-loading.tsx b/surfsense_web/components/onboard/onboard-loading.tsx deleted file mode 100644 index 4a85736d2..000000000 --- a/surfsense_web/components/onboard/onboard-loading.tsx +++ /dev/null @@ -1,47 +0,0 @@ -"use client"; - -import { Wand2 } from "lucide-react"; -import { motion } from "motion/react"; - -interface OnboardLoadingProps { - title: string; - subtitle: string; -} - -export function OnboardLoading({ title, subtitle }: OnboardLoadingProps) { - return ( -
            - -
            - - - -
            -

            {title}

            -

            {subtitle}

            -
            - {[0, 1, 2].map((i) => ( - - ))} -
            -
            -
            - ); -} diff --git a/surfsense_web/components/onboard/onboard-stats.tsx b/surfsense_web/components/onboard/onboard-stats.tsx deleted file mode 100644 index 0918c74e2..000000000 --- a/surfsense_web/components/onboard/onboard-stats.tsx +++ /dev/null @@ -1,38 +0,0 @@ -"use client"; - -import { Bot, Brain, Sparkles } from "lucide-react"; -import { motion } from "motion/react"; -import { Badge } from "@/components/ui/badge"; - -interface OnboardStatsProps { - globalConfigsCount: number; - userConfigsCount: number; -} - -export function OnboardStats({ globalConfigsCount, userConfigsCount }: OnboardStatsProps) { - return ( - - {globalConfigsCount > 0 && ( - - - {globalConfigsCount} Global Model{globalConfigsCount > 1 ? "s" : ""} - - )} - {userConfigsCount > 0 && ( - - - {userConfigsCount} Custom Config{userConfigsCount > 1 ? "s" : ""} - - )} - - - All Roles Assigned - - - ); -} diff --git a/surfsense_web/components/onboard/setup-llm-step.tsx b/surfsense_web/components/onboard/setup-llm-step.tsx deleted file mode 100644 index 97555c2f9..000000000 --- a/surfsense_web/components/onboard/setup-llm-step.tsx +++ /dev/null @@ -1,813 +0,0 @@ -"use client"; - -import { useAtomValue } from "jotai"; -import { - AlertCircle, - Bot, - Brain, - Check, - CheckCircle, - ChevronDown, - ChevronsUpDown, - ChevronUp, - Plus, - Trash2, - Zap, -} from "lucide-react"; -import { motion } from "motion/react"; -import { useTranslations } from "next-intl"; -import { useEffect, useState } from "react"; -import { toast } from "sonner"; -import { - createLLMConfigMutationAtom, - deleteLLMConfigMutationAtom, - updateLLMPreferencesMutationAtom, -} from "@/atoms/llm-config/llm-config-mutation.atoms"; -import { - globalLLMConfigsAtom, - llmConfigsAtom, - llmPreferencesAtom, -} from "@/atoms/llm-config/llm-config-query.atoms"; -import { Alert, AlertDescription } from "@/components/ui/alert"; -import { Badge } from "@/components/ui/badge"; -import { Button } from "@/components/ui/button"; -import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card"; -import { - Command, - CommandEmpty, - CommandGroup, - CommandInput, - CommandItem, - CommandList, -} from "@/components/ui/command"; -import { Input } from "@/components/ui/input"; -import { Label } from "@/components/ui/label"; -import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; -import { - Select, - SelectContent, - SelectItem, - SelectTrigger, - SelectValue, -} from "@/components/ui/select"; -import { Separator } from "@/components/ui/separator"; -import { LANGUAGES } from "@/contracts/enums/languages"; -import { getModelsByProvider } from "@/contracts/enums/llm-models"; -import { LLM_PROVIDERS } from "@/contracts/enums/llm-providers"; -import { type CreateLLMConfigRequest, LLMConfig } from "@/contracts/types/llm-config.types"; -import { cn } from "@/lib/utils"; -import InferenceParamsEditor from "../inference-params-editor"; - -interface SetupLLMStepProps { - searchSpaceId: number; - onConfigCreated?: () => void; - onConfigDeleted?: () => void; - onPreferencesUpdated?: () => Promise; -} - -const ROLE_DESCRIPTIONS = { - long_context: { - icon: Brain, - key: "long_context_llm_id" as const, - titleKey: "long_context_llm_title", - descKey: "long_context_llm_desc", - examplesKey: "long_context_llm_examples", - color: - "bg-blue-100 text-blue-800 border-blue-200 dark:bg-blue-950 dark:text-blue-200 dark:border-blue-800", - }, - fast: { - icon: Zap, - key: "fast_llm_id" as const, - titleKey: "fast_llm_title", - descKey: "fast_llm_desc", - examplesKey: "fast_llm_examples", - color: - "bg-green-100 text-green-800 border-green-200 dark:bg-green-950 dark:text-green-200 dark:border-green-800", - }, - strategic: { - icon: Bot, - key: "strategic_llm_id" as const, - titleKey: "strategic_llm_title", - descKey: "strategic_llm_desc", - examplesKey: "strategic_llm_examples", - color: - "bg-purple-100 text-purple-800 border-purple-200 dark:bg-purple-950 dark:text-purple-200 dark:border-purple-800", - }, -}; - -export function SetupLLMStep({ - searchSpaceId, - onConfigCreated, - onConfigDeleted, - onPreferencesUpdated, -}: SetupLLMStepProps) { - const { mutate: createLLMConfig, isPending: isCreatingLlmConfig } = useAtomValue( - createLLMConfigMutationAtom - ); - const t = useTranslations("onboard"); - const { mutateAsync: deleteLLMConfig } = useAtomValue(deleteLLMConfigMutationAtom); - const { data: llmConfigs = [] } = useAtomValue(llmConfigsAtom); - const { data: globalConfigs = [] } = useAtomValue(globalLLMConfigsAtom); - const { data: preferences = {} } = useAtomValue(llmPreferencesAtom); - const { mutateAsync: updatePreferences } = useAtomValue(updateLLMPreferencesMutationAtom); - - const [isAddingNew, setIsAddingNew] = useState(false); - const [formData, setFormData] = useState({ - name: "", - provider: "" as CreateLLMConfigRequest["provider"], // Allow it as Default - custom_provider: "", - model_name: "", - api_key: "", - api_base: "", - language: "English", - litellm_params: {}, - search_space_id: searchSpaceId, - }); - const [modelComboboxOpen, setModelComboboxOpen] = useState(false); - const [showProviderForm, setShowProviderForm] = useState(false); - - // Role assignments state - const [assignments, setAssignments] = useState({ - long_context_llm_id: preferences.long_context_llm_id || "", - fast_llm_id: preferences.fast_llm_id || "", - strategic_llm_id: preferences.strategic_llm_id || "", - }); - - // Combine global and user-specific configs - const allConfigs = [...globalConfigs, ...llmConfigs]; - - useEffect(() => { - setAssignments({ - long_context_llm_id: preferences.long_context_llm_id || "", - fast_llm_id: preferences.fast_llm_id || "", - strategic_llm_id: preferences.strategic_llm_id || "", - }); - }, [preferences]); - - const handleInputChange = (field: keyof CreateLLMConfigRequest, value: string) => { - setFormData((prev) => ({ ...prev, [field]: value })); - }; - - const handleSubmit = async (e: React.FormEvent) => { - e.preventDefault(); - if (!formData.name || !formData.provider || !formData.model_name || !formData.api_key) { - toast.error("Please fill in all required fields"); - return; - } - - createLLMConfig(formData, { - onError: (error) => { - console.error("Error creating LLM config:", error); - if (error instanceof Error) { - toast.error(error?.message || "Failed to create LLM config"); - } - }, - onSuccess: () => { - toast.success("LLM config created successfully"); - setFormData({ - name: "", - provider: "" as CreateLLMConfigRequest["provider"], - custom_provider: "", - model_name: "", - api_key: "", - api_base: "", - language: "English", - litellm_params: {}, - search_space_id: searchSpaceId, - }); - onConfigCreated?.(); - }, - onSettled: () => { - setIsAddingNew(false); - }, - }); - }; - - const handleRoleAssignment = async (role: string, configId: string) => { - const newAssignments = { - ...assignments, - [role]: configId === "" ? "" : parseInt(configId), - }; - - setAssignments(newAssignments); - - // Auto-save if this assignment completes all roles - const hasAllAssignments = - newAssignments.long_context_llm_id && - newAssignments.fast_llm_id && - newAssignments.strategic_llm_id; - - if (hasAllAssignments) { - const numericAssignments = { - long_context_llm_id: - typeof newAssignments.long_context_llm_id === "string" - ? parseInt(newAssignments.long_context_llm_id) - : newAssignments.long_context_llm_id, - fast_llm_id: - typeof newAssignments.fast_llm_id === "string" - ? parseInt(newAssignments.fast_llm_id) - : newAssignments.fast_llm_id, - strategic_llm_id: - typeof newAssignments.strategic_llm_id === "string" - ? parseInt(newAssignments.strategic_llm_id) - : newAssignments.strategic_llm_id, - }; - - await updatePreferences({ - search_space_id: searchSpaceId, - data: numericAssignments, - }); - - if (onPreferencesUpdated) { - await onPreferencesUpdated(); - } - } - }; - - const selectedProvider = LLM_PROVIDERS.find((p) => p.value === formData.provider); - const availableModels = formData.provider ? getModelsByProvider(formData.provider) : []; - - const handleParamsChange = (newParams: Record) => { - setFormData((prev) => ({ ...prev, litellm_params: newParams })); - }; - - const handleProviderChange = (value: string) => { - handleInputChange("provider", value); - setFormData((prev) => ({ ...prev, model_name: "" })); - }; - - const isAssignmentComplete = - assignments.long_context_llm_id && assignments.fast_llm_id && assignments.strategic_llm_id; - - return ( -
            - {/* Global Configs Notice - Prominent at top */} - {globalConfigs.length > 0 && ( - - - -
            -

            - {globalConfigs.length} global configuration(s) available! -

            -

            - You can skip adding your own LLM provider and use our pre-configured models in the - role assignment section below. -

            -

            - Or expand "Add LLM Provider" to add your own custom configurations. -

            -
            -
            -
            - )} - - {/* Section 1: Add LLM Providers */} -
            -
            -
            -

            - - {t("add_llm_provider")} -

            -

            {t("configure_first_provider")}

            -
            - -
            - - {showProviderForm && ( - - {/* Info Alert */} - - - {t("add_provider_instruction")} - - - {/* Existing Configurations */} - {llmConfigs.length > 0 && ( -
            -

            - {t("your_llm_configs")} -

            -
            - {llmConfigs.map((config) => ( - - - -
            -
            -
            - -

            {config.name}

            - - {config.provider} - -
            -

            - {t("model")}: {config.model_name} - {config.language && ` • ${t("language")}: ${config.language}`} - {config.api_base && ` • ${t("base")}: ${config.api_base}`} -

            -
            - -
            -
            -
            -
            - ))} -
            -
            - )} - - {/* Add New Provider */} - {!isAddingNew ? ( - - - -

            {t("add_provider_title")}

            -

            - {t("add_provider_subtitle")} -

            - -
            -
            - ) : ( - - - {t("add_new_llm_provider")} - {t("configure_new_provider")} - - -
            -
            -
            - - handleInputChange("name", e.target.value)} - required - /> -
            - -
            - - -
            - -
            - - -
            -
            - - {formData.provider === "CUSTOM" && ( -
            - - handleInputChange("custom_provider", e.target.value)} - required - /> -
            - )} - -
            - - - - - - - - handleInputChange("model_name", value)} - /> - - -
            - {formData.model_name - ? `Using custom model: "${formData.model_name}"` - : "Type your model name above"} -
            -
            - {availableModels.length > 0 && ( - - {availableModels - .filter( - (model) => - !formData.model_name || - model.value - .toLowerCase() - .includes(formData.model_name.toLowerCase()) || - model.label - .toLowerCase() - .includes(formData.model_name.toLowerCase()) - ) - .map((model) => ( - { - handleInputChange("model_name", currentValue); - setModelComboboxOpen(false); - }} - className="flex flex-col items-start py-3" - > -
            - -
            -
            {model.label}
            - {model.contextWindow && ( -
            - Context: {model.contextWindow} -
            - )} -
            -
            -
            - ))} -
            - )} -
            -
            -
            -
            -

            - {availableModels.length > 0 - ? `Type freely or select from ${availableModels.length} model suggestions` - : selectedProvider?.example - ? `${t("examples")}: ${selectedProvider.example}` - : "Type your model name freely"} -

            -
            - -
            - - handleInputChange("api_key", e.target.value)} - required - /> - {formData.provider === "OLLAMA" && ( -

            - 💡 Ollama doesn't require authentication — enter any value (e.g., - "ollama") -

            - )} -
            - -
            - - handleInputChange("api_base", e.target.value)} - /> - {/* Ollama-specific help */} - {formData.provider === "OLLAMA" && ( -
            -

            - 💡 Ollama API Base URL Examples: -

            -
            - - -
            -
            - )} -
            - -
            - -
            - -
            - - -
            -
            -
            -
            - )} -
            - )} -
            - - - - {/* Section 2: Assign Roles */} -
            -
            -

            - - {t("assign_llm_roles")} -

            -

            {t("assign_specific_roles")}

            -
            - - {allConfigs.length === 0 ? ( - - - {t("add_provider_before_roles")} - - ) : ( -
            - - - {t("assign_roles_instruction")} - - -
            - {Object.entries(ROLE_DESCRIPTIONS).map(([roleKey, role]) => { - const IconComponent = role.icon; - const currentAssignment = assignments[role.key]; - const assignedConfig = allConfigs.find((config) => config.id === currentAssignment); - - return ( - - - -
            -
            -
            - -
            -
            - {t(role.titleKey)} - - {t(role.descKey)} - -
            -
            - {currentAssignment && } -
            -
            - -
            - - -
            - - {assignedConfig && ( -
            -
            - - {t("assigned")}: - {"is_global" in assignedConfig && assignedConfig.is_global && ( - - 🌐 Global - - )} - - {assignedConfig.provider} - - {assignedConfig.name} -
            -
            - {t("model")}: {assignedConfig.model_name} -
            -
            - )} -
            -
            -
            - ); - })} -
            - - {/* Status Indicators */} -
            -
            - {t("progress")}: -
            - {Object.keys(ROLE_DESCRIPTIONS).map((key) => { - const roleKey = ROLE_DESCRIPTIONS[key as keyof typeof ROLE_DESCRIPTIONS].key; - return ( -
            - ); - })} -
            - - {t("roles_assigned", { - assigned: Object.values(assignments).filter(Boolean).length, - total: Object.keys(ROLE_DESCRIPTIONS).length, - })} - -
            - - {isAssignmentComplete && ( -
            - - {t("all_roles_assigned_saved")} -
            - )} -
            -
            - )} -
            -
            - ); -} diff --git a/surfsense_web/components/onboard/setup-prompt-step.tsx b/surfsense_web/components/onboard/setup-prompt-step.tsx deleted file mode 100644 index b53e49700..000000000 --- a/surfsense_web/components/onboard/setup-prompt-step.tsx +++ /dev/null @@ -1,340 +0,0 @@ -"use client"; - -import { useAtomValue } from "jotai"; -import { ChevronDown, ChevronUp, ExternalLink, Info, Sparkles, User } from "lucide-react"; -import { useEffect, useState } from "react"; -import { toast } from "sonner"; -import { communityPromptsAtom } from "@/atoms/search-spaces/search-space-query.atoms"; -import { Alert, AlertDescription } from "@/components/ui/alert"; -import { Badge } from "@/components/ui/badge"; -import { Button } from "@/components/ui/button"; -import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card"; -import { Label } from "@/components/ui/label"; -import { ScrollArea } from "@/components/ui/scroll-area"; -import { Switch } from "@/components/ui/switch"; -import { Tabs, TabsList, TabsTrigger } from "@/components/ui/tabs"; -import { Textarea } from "@/components/ui/textarea"; -import { authenticatedFetch } from "@/lib/auth-utils"; - -interface SetupPromptStepProps { - searchSpaceId: number; - onComplete?: () => void; -} - -export function SetupPromptStep({ searchSpaceId, onComplete }: SetupPromptStepProps) { - const { data: prompts = [], isPending: loadingPrompts } = useAtomValue(communityPromptsAtom); - const [enableCitations, setEnableCitations] = useState(true); - const [customInstructions, setCustomInstructions] = useState(""); - const [saving, setSaving] = useState(false); - const [hasChanges, setHasChanges] = useState(false); - const [selectedPromptKey, setSelectedPromptKey] = useState(null); - const [expandedPrompts, setExpandedPrompts] = useState>(new Set()); - const [selectedCategory, setSelectedCategory] = useState("all"); - - // Mark that we have changes when user modifies anything - useEffect(() => { - setHasChanges(true); - }, [enableCitations, customInstructions]); - - const handleSelectCommunityPrompt = (promptKey: string, promptValue: string) => { - setCustomInstructions(promptValue); - setSelectedPromptKey(promptKey); - toast.success("Community prompt applied"); - }; - - const toggleExpand = (promptKey: string) => { - const newExpanded = new Set(expandedPrompts); - if (newExpanded.has(promptKey)) { - newExpanded.delete(promptKey); - } else { - newExpanded.add(promptKey); - } - setExpandedPrompts(newExpanded); - }; - - // Get unique categories - const categories = Array.from(new Set(prompts.map((p) => p.category || "general"))); - const filteredPrompts = - selectedCategory === "all" - ? prompts - : prompts.filter((p) => (p.category || "general") === selectedCategory); - - const truncateText = (text: string, maxLength: number = 150) => { - if (text.length <= maxLength) return text; - return text.substring(0, maxLength) + "..."; - }; - - const handleSave = async () => { - try { - setSaving(true); - - // Prepare the update payload with simplified schema - const payload: any = { - citations_enabled: enableCitations, - qna_custom_instructions: customInstructions.trim() || "", - }; - - // Only send update if there's something to update - if (Object.keys(payload).length > 0) { - const response = await authenticatedFetch( - `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/searchspaces/${searchSpaceId}`, - { - method: "PUT", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify(payload), - } - ); - - if (!response.ok) { - const errorData = await response.json().catch(() => ({})); - throw new Error( - errorData.detail || `Failed to save prompt configuration (${response.status})` - ); - } - - toast.success("Prompt configuration saved successfully"); - } - - setHasChanges(false); - onComplete?.(); - } catch (error: any) { - console.error("Error saving prompt configuration:", error); - toast.error(error.message || "Failed to save prompt configuration"); - } finally { - setSaving(false); - } - }; - - const handleSkip = () => { - // Skip without saving - use defaults - onComplete?.(); - }; - - return ( -
            - - - - These settings are optional. You can skip this step and configure them later in settings. - - - - {/* Citation Toggle */} -
            -
            -
            - -

            - When enabled, AI responses will include citations to source documents using - [citation:id] format. -

            -
            - -
            - - {!enableCitations && ( - - - - Disabling citations means AI responses won't include source references. You can - re-enable this anytime in settings. - - - )} -
            - - {/* SearchSpace System Instructions */} -
            -
            - -

            - Add system instructions to guide how the AI should respond. Choose from community - prompts below or write your own. -

            - - {/* Community Prompts Section */} - {!loadingPrompts && prompts.length > 0 && ( - - - - - Community Prompts Library - - - Browse {prompts.length} curated prompts. Click to preview or apply directly - - - - - - - All ({prompts.length}) - - {categories.map((category) => ( - - {category} ( - {prompts.filter((p) => (p.category || "general") === category).length}) - - ))} - - - -
            - {filteredPrompts.map((prompt) => { - const isExpanded = expandedPrompts.has(prompt.key); - const isSelected = selectedPromptKey === prompt.key; - const displayText = isExpanded - ? prompt.value - : truncateText(prompt.value, 120); - - return ( -
            -
            -
            - - {prompt.key.replace(/_/g, " ")} - - {prompt.category && ( - - {prompt.category} - - )} - {isSelected && ( - - ✓ Selected - - )} -
            - {prompt.link && ( - - - - )} -
            - -

            - {displayText} -

            - -
            -
            - - {prompt.author} -
            - -
            - {prompt.value.length > 120 && ( - - )} - -
            -
            -
            - ); - })} -
            -
            -
            -
            -
            - )} - -