From 961d74165674062b8d9eab7d5e1c311d9d50fbeb Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Fri, 9 Jan 2026 15:26:55 +0200
Subject: [PATCH 01/20] feat: add SurfsenseDocsDocument model

---
 surfsense_backend/app/db.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py
index d54254f9c..abca893fb 100644
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@@ -428,6 +428,28 @@ class Chunk(BaseModel, TimestampMixin):
     document = relationship("Document", back_populates="chunks")
 
 
+class SurfsenseDocsDocument(BaseModel, TimestampMixin):
+    """
+    Surfsense documentation storage.
+    Indexed at migration time from MDX files.
+    """
+
+    __tablename__ = "surfsense_docs_documents"
+
+    source = Column(String, nullable=False, unique=True, index=True)  # File path: "connectors/slack.mdx"
+    title = Column(String, nullable=False)
+    content = Column(Text, nullable=False)
+    content_hash = Column(String, nullable=False, index=True)  # For detecting changes
+    embedding = Column(Vector(config.embedding_model_instance.dimension))
+    updated_at = Column(TIMESTAMP(timezone=True), nullable=True, index=True)
+
+    chunks = relationship(
+        "SurfsenseDocsChunk",
+        back_populates="document",
+        cascade="all, delete-orphan",
+    )
+
+
 class Podcast(BaseModel, TimestampMixin):
     """Podcast model for storing generated podcasts."""
 

From ba404cc1516dc90176637e1e01396b4dfeaf4856 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Fri, 9 Jan 2026 15:28:36 +0200
Subject: [PATCH 02/20] feat: add SurfsenseDocsChunk model with relationship

---
 surfsense_backend/app/db.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py
index abca893fb..006d73358 100644
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@@ -450,6 +450,22 @@ class SurfsenseDocsDocument(BaseModel, TimestampMixin):
     )
 
 
+class SurfsenseDocsChunk(BaseModel, TimestampMixin):
+    """Chunk storage for Surfsense documentation."""
+
+    __tablename__ = "surfsense_docs_chunks"
+
+    content = Column(Text, nullable=False)
+    embedding = Column(Vector(config.embedding_model_instance.dimension))
+
+    document_id = Column(
+        Integer,
+        ForeignKey("surfsense_docs_documents.id", ondelete="CASCADE"),
+        nullable=False,
+    )
+    document = relationship("SurfsenseDocsDocument", back_populates="chunks")
+
+
 class Podcast(BaseModel, TimestampMixin):
     """Podcast model for storing generated podcasts."""
 

From fff851ae3fe07d6b2ac388296c4705083dc131bf Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Fri, 9 Jan 2026 15:42:30 +0200
Subject: [PATCH 03/20] feat: create indexer module with MDX parsing

---
 .../app/tasks/surfsense_docs_indexer.py       | 64 +++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 surfsense_backend/app/tasks/surfsense_docs_indexer.py

diff --git a/surfsense_backend/app/tasks/surfsense_docs_indexer.py b/surfsense_backend/app/tasks/surfsense_docs_indexer.py
new file mode 100644
index 000000000..c5e846635
--- /dev/null
+++ b/surfsense_backend/app/tasks/surfsense_docs_indexer.py
@@ -0,0 +1,64 @@
+"""
+Surfsense documentation indexer.
+Indexes MDX documentation files at migration time.
+"""
+
+import hashlib
+import logging
+import re
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+# Path to docs relative to project root
+DOCS_DIR = Path(__file__).resolve().parent.parent.parent.parent / "surfsense_web" / "content" / "docs"
+
+
+def parse_mdx_frontmatter(content: str) -> tuple[str, str]:
+    """
+    Parse MDX file to extract frontmatter title and content.
+
+    Args:
+        content: Raw MDX file content
+
+    Returns:
+        Tuple of (title, content_without_frontmatter)
+    """
+    # Match frontmatter between --- markers
+    frontmatter_pattern = r"^---\s*\n(.*?)\n---\s*\n"
+    match = re.match(frontmatter_pattern, content, re.DOTALL)
+
+    if match:
+        frontmatter = match.group(1)
+        content_without_frontmatter = content[match.end():]
+
+        # Extract title from frontmatter
+        title_match = re.search(r"^title:\s*(.+)$", frontmatter, re.MULTILINE)
+        title = title_match.group(1).strip() if title_match else "Untitled"
+
+        # Remove quotes if present
+        title = title.strip("\"'")
+
+        return title, content_without_frontmatter.strip()
+
+    return "Untitled", content.strip()
+
+
+def get_all_mdx_files() -> list[Path]:
+    """
+    Get all MDX files from the docs directory.
+
+    Returns:
+        List of Path objects for each MDX file
+    """
+    if not DOCS_DIR.exists():
+        logger.warning(f"Docs directory not found: {DOCS_DIR}")
+        return []
+
+    return list(DOCS_DIR.rglob("*.mdx"))
+
+
+def generate_surfsense_docs_content_hash(content: str) -> str:
+    """Generate SHA-256 hash for Surfsense docs content."""
+    return hashlib.sha256(content.encode("utf-8")).hexdigest()
+

From 2e83ed8dcd7affb5d4e8570fdf48ca12031d4335 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Fri, 9 Jan 2026 15:45:17 +0200
Subject: [PATCH 04/20] feat: add chunking and embedding logic to indexer

---
 .../app/tasks/surfsense_docs_indexer.py       | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/surfsense_backend/app/tasks/surfsense_docs_indexer.py b/surfsense_backend/app/tasks/surfsense_docs_indexer.py
index c5e846635..6b4c4c91c 100644
--- a/surfsense_backend/app/tasks/surfsense_docs_indexer.py
+++ b/surfsense_backend/app/tasks/surfsense_docs_indexer.py
@@ -8,6 +8,9 @@ import logging
 import re
 from pathlib import Path
 
+from app.config import config
+from app.db import SurfsenseDocsChunk
+
 logger = logging.getLogger(__name__)
 
 # Path to docs relative to project root
@@ -62,3 +65,22 @@ def generate_surfsense_docs_content_hash(content: str) -> str:
     """Generate SHA-256 hash for Surfsense docs content."""
     return hashlib.sha256(content.encode("utf-8")).hexdigest()
 
+
+def create_surfsense_docs_chunks(content: str) -> list[SurfsenseDocsChunk]:
+    """
+    Create chunks from Surfsense documentation content.
+
+    Args:
+        content: Document content to chunk
+
+    Returns:
+        List of SurfsenseDocsChunk objects with embeddings
+    """
+    return [
+        SurfsenseDocsChunk(
+            content=chunk.text,
+            embedding=config.embedding_model_instance.embed(chunk.text),
+        )
+        for chunk in config.chunker_instance.chunk(content)
+    ]
+

From 105f4c5c9d8eaaeb7a33636441cb8aa31b41dd43 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Fri, 9 Jan 2026 15:58:27 +0200
Subject: [PATCH 05/20] feat: add create/update/skip/delete logic to indexer

---
 .../app/tasks/surfsense_docs_indexer.py       | 109 +++++++++++++++++-
 1 file changed, 108 insertions(+), 1 deletion(-)

diff --git a/surfsense_backend/app/tasks/surfsense_docs_indexer.py b/surfsense_backend/app/tasks/surfsense_docs_indexer.py
index 6b4c4c91c..51a1c0938 100644
--- a/surfsense_backend/app/tasks/surfsense_docs_indexer.py
+++ b/surfsense_backend/app/tasks/surfsense_docs_indexer.py
@@ -6,10 +6,14 @@ Indexes MDX documentation files at migration time.
 import hashlib
 import logging
 import re
+from datetime import UTC, datetime
 from pathlib import Path
 
+from sqlalchemy import select
+from sqlalchemy.orm import Session, selectinload
+
 from app.config import config
-from app.db import SurfsenseDocsChunk
+from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument
 
 logger = logging.getLogger(__name__)
 
@@ -84,3 +88,106 @@ def create_surfsense_docs_chunks(content: str) -> list[SurfsenseDocsChunk]:
         for chunk in config.chunker_instance.chunk(content)
     ]
 
+
+def index_surfsense_docs(session: Session) -> tuple[int, int, int, int]:
+    """
+    Index all Surfsense documentation files.
+    
+    Args:
+        session: SQLAlchemy sync session
+        
+    Returns:
+        Tuple of (created, updated, skipped, deleted) counts
+    """
+    created = 0
+    updated = 0
+    skipped = 0
+    deleted = 0
+    
+    # Get all existing docs from database
+    existing_docs_result = session.execute(
+        select(SurfsenseDocsDocument).options(selectinload(SurfsenseDocsDocument.chunks))
+    )
+    existing_docs = {doc.source: doc for doc in existing_docs_result.scalars().all()}
+    
+    # Track which sources we've processed
+    processed_sources = set()
+    
+    # Get all MDX files
+    mdx_files = get_all_mdx_files()
+    logger.info(f"Found {len(mdx_files)} MDX files to index")
+    
+    for mdx_file in mdx_files:
+        try:
+            source = str(mdx_file.relative_to(DOCS_DIR))
+            processed_sources.add(source)
+            
+            # Read file content
+            raw_content = mdx_file.read_text(encoding="utf-8")
+            title, content = parse_mdx_frontmatter(raw_content)
+            content_hash = generate_surfsense_docs_content_hash(raw_content)
+            
+            if source in existing_docs:
+                existing_doc = existing_docs[source]
+                
+                # Check if content changed
+                if existing_doc.content_hash == content_hash:
+                    logger.debug(f"Skipping unchanged: {source}")
+                    skipped += 1
+                    continue
+                
+                # Content changed - update document
+                logger.info(f"Updating changed document: {source}")
+                
+                # Create new chunks
+                chunks = create_surfsense_docs_chunks(content)
+                
+                # Update document fields
+                existing_doc.title = title
+                existing_doc.content = content
+                existing_doc.content_hash = content_hash
+                existing_doc.embedding = config.embedding_model_instance.embed(content)
+                existing_doc.chunks = chunks
+                existing_doc.updated_at = datetime.now(UTC)
+                
+                updated += 1
+            else:
+                # New document - create it
+                logger.info(f"Creating new document: {source}")
+                
+                chunks = create_surfsense_docs_chunks(content)
+                
+                document = SurfsenseDocsDocument(
+                    source=source,
+                    title=title,
+                    content=content,
+                    content_hash=content_hash,
+                    embedding=config.embedding_model_instance.embed(content),
+                    chunks=chunks,
+                    updated_at=datetime.now(UTC),
+                )
+                
+                session.add(document)
+                created += 1
+                
+        except Exception as e:
+            logger.error(f"Error processing {mdx_file}: {e}", exc_info=True)
+            continue
+    
+    # Delete documents for removed files
+    for source, doc in existing_docs.items():
+        if source not in processed_sources:
+            logger.info(f"Deleting removed document: {source}")
+            session.delete(doc)
+            deleted += 1
+    
+    # Commit all changes
+    session.commit()
+    
+    logger.info(
+        f"Indexing complete: {created} created, {updated} updated, "
+        f"{skipped} skipped, {deleted} deleted"
+    )
+    
+    return created, updated, skipped, deleted
+

From f30f39b5e960548c2ff267a30cdc9a9e56d13d0b Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Fri, 9 Jan 2026 16:39:39 +0200
Subject: [PATCH 06/20] feat: create migration for Surfsense docs tables

---
 .../versions/60_add_surfsense_docs_tables.py  | 165 ++++++++++++++++++
 1 file changed, 165 insertions(+)
 create mode 100644 surfsense_backend/alembic/versions/60_add_surfsense_docs_tables.py

diff --git a/surfsense_backend/alembic/versions/60_add_surfsense_docs_tables.py b/surfsense_backend/alembic/versions/60_add_surfsense_docs_tables.py
new file mode 100644
index 000000000..7e5aa9437
--- /dev/null
+++ b/surfsense_backend/alembic/versions/60_add_surfsense_docs_tables.py
@@ -0,0 +1,165 @@
+"""Add Surfsense docs tables for global documentation storage
+
+Revision ID: 60
+Revises: 59
+"""
+
+from collections.abc import Sequence
+
+from alembic import op
+
+from app.config import config
+
+# revision identifiers, used by Alembic.
+revision: str = "60"
+down_revision: str | None = "59"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+# Get embedding dimension from config
+EMBEDDING_DIM = config.embedding_model_instance.dimension
+
+
+def upgrade() -> None:
+    """Create surfsense_docs_documents and surfsense_docs_chunks tables."""
+    
+    # Create surfsense_docs_documents table
+    op.execute(
+        f"""
+        DO $$
+        BEGIN
+            IF NOT EXISTS (
+                SELECT FROM information_schema.tables
+                WHERE table_name = 'surfsense_docs_documents'
+            ) THEN
+                CREATE TABLE surfsense_docs_documents (
+                    id SERIAL PRIMARY KEY,
+                    created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
+                    source VARCHAR NOT NULL UNIQUE,
+                    title VARCHAR NOT NULL,
+                    content TEXT NOT NULL,
+                    content_hash VARCHAR NOT NULL,
+                    embedding vector({EMBEDDING_DIM}),
+                    updated_at TIMESTAMP WITH TIME ZONE
+                );
+            END IF;
+        END$$;
+        """
+    )
+    
+    # Create indexes for surfsense_docs_documents
+    op.execute(
+        """
+        DO $$
+        BEGIN
+            IF NOT EXISTS (
+                SELECT 1 FROM pg_indexes
+                WHERE tablename = 'surfsense_docs_documents' AND indexname = 'ix_surfsense_docs_documents_source'
+            ) THEN
+                CREATE INDEX ix_surfsense_docs_documents_source ON surfsense_docs_documents(source);
+            END IF;
+
+            IF NOT EXISTS (
+                SELECT 1 FROM pg_indexes
+                WHERE tablename = 'surfsense_docs_documents' AND indexname = 'ix_surfsense_docs_documents_content_hash'
+            ) THEN
+                CREATE INDEX ix_surfsense_docs_documents_content_hash ON surfsense_docs_documents(content_hash);
+            END IF;
+
+            IF NOT EXISTS (
+                SELECT 1 FROM pg_indexes
+                WHERE tablename = 'surfsense_docs_documents' AND indexname = 'ix_surfsense_docs_documents_updated_at'
+            ) THEN
+                CREATE INDEX ix_surfsense_docs_documents_updated_at ON surfsense_docs_documents(updated_at);
+            END IF;
+        END$$;
+        """
+    )
+    
+    # Create surfsense_docs_chunks table
+    op.execute(
+        f"""
+        DO $$
+        BEGIN
+            IF NOT EXISTS (
+                SELECT FROM information_schema.tables
+                WHERE table_name = 'surfsense_docs_chunks'
+            ) THEN
+                CREATE TABLE surfsense_docs_chunks (
+                    id SERIAL PRIMARY KEY,
+                    created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
+                    content TEXT NOT NULL,
+                    embedding vector({EMBEDDING_DIM}),
+                    document_id INTEGER NOT NULL REFERENCES surfsense_docs_documents(id) ON DELETE CASCADE
+                );
+            END IF;
+        END$$;
+        """
+    )
+    
+    # Create indexes for surfsense_docs_chunks
+    op.execute(
+        """
+        DO $$
+        BEGIN
+            IF NOT EXISTS (
+                SELECT 1 FROM pg_indexes
+                WHERE tablename = 'surfsense_docs_chunks' AND indexname = 'ix_surfsense_docs_chunks_document_id'
+            ) THEN
+                CREATE INDEX ix_surfsense_docs_chunks_document_id ON surfsense_docs_chunks(document_id);
+            END IF;
+        END$$;
+        """
+    )
+    
+    # Create vector indexes for similarity search
+    op.execute(
+        """
+        CREATE INDEX IF NOT EXISTS surfsense_docs_documents_vector_index 
+        ON surfsense_docs_documents USING hnsw (embedding public.vector_cosine_ops);
+        """
+    )
+    
+    op.execute(
+        """
+        CREATE INDEX IF NOT EXISTS surfsense_docs_chunks_vector_index 
+        ON surfsense_docs_chunks USING hnsw (embedding public.vector_cosine_ops);
+        """
+    )
+    
+    # Create full-text search indexes (same pattern as documents/chunks tables)
+    op.execute(
+        """
+        CREATE INDEX IF NOT EXISTS surfsense_docs_documents_search_index 
+        ON surfsense_docs_documents USING gin (to_tsvector('english', content));
+        """
+    )
+    
+    op.execute(
+        """
+        CREATE INDEX IF NOT EXISTS surfsense_docs_chunks_search_index 
+        ON surfsense_docs_chunks USING gin (to_tsvector('english', content));
+        """
+    )
+
+
+def downgrade() -> None:
+    """Remove surfsense docs tables."""
+    # Drop full-text search indexes
+    op.execute("DROP INDEX IF EXISTS surfsense_docs_chunks_search_index")
+    op.execute("DROP INDEX IF EXISTS surfsense_docs_documents_search_index")
+    
+    # Drop vector indexes
+    op.execute("DROP INDEX IF EXISTS surfsense_docs_chunks_vector_index")
+    op.execute("DROP INDEX IF EXISTS surfsense_docs_documents_vector_index")
+    
+    # Drop regular indexes
+    op.execute("DROP INDEX IF EXISTS ix_surfsense_docs_chunks_document_id")
+    op.execute("DROP INDEX IF EXISTS ix_surfsense_docs_documents_updated_at")
+    op.execute("DROP INDEX IF EXISTS ix_surfsense_docs_documents_content_hash")
+    op.execute("DROP INDEX IF EXISTS ix_surfsense_docs_documents_source")
+    
+    # Drop tables (chunks first due to FK)
+    op.execute("DROP TABLE IF EXISTS surfsense_docs_chunks")
+    op.execute("DROP TABLE IF EXISTS surfsense_docs_documents")
+

From ec145431f2ad1455585d32e1942d51fbb9fb9022 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Fri, 9 Jan 2026 16:49:33 +0200
Subject: [PATCH 07/20] feat: add seeding script for Surfsense docs (run after
 migrations)

---
 .../scripts/seed_surfsense_docs.py            | 47 +++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 surfsense_backend/scripts/seed_surfsense_docs.py

diff --git a/surfsense_backend/scripts/seed_surfsense_docs.py b/surfsense_backend/scripts/seed_surfsense_docs.py
new file mode 100644
index 000000000..2e9eee649
--- /dev/null
+++ b/surfsense_backend/scripts/seed_surfsense_docs.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+"""
+Seed Surfsense documentation into the database.
+Run this script after migrations to index MDX documentation files.
+
+Usage:
+    python scripts/seed_surfsense_docs.py
+"""
+
+import sys
+from pathlib import Path
+
+# Add the parent directory to the path so we can import app modules
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+
+from sqlalchemy import create_engine
+from sqlalchemy.orm import Session
+
+from app.config import config
+from app.tasks.surfsense_docs_indexer import index_surfsense_docs
+
+
+def main():
+    """Main entry point for seeding Surfsense docs."""
+    print("Starting Surfsense docs seeding...")
+    
+    # Create sync engine from database URL
+    # Convert async URL to sync if needed
+    database_url = config.DATABASE_URL
+    if database_url.startswith("postgresql+asyncpg://"):
+        database_url = database_url.replace("postgresql+asyncpg://", "postgresql://")
+    
+    engine = create_engine(database_url)
+    
+    with Session(engine) as session:
+        created, updated, skipped, deleted = index_surfsense_docs(session)
+        
+        print(f"\nSurfsense docs seeding complete:")
+        print(f"  Created: {created}")
+        print(f"  Updated: {updated}")
+        print(f"  Skipped: {skipped}")
+        print(f"  Deleted: {deleted}")
+
+
+if __name__ == "__main__":
+    main()
+

From 6f672361432ccccb3a9ba40f7ac3a1e31a76ed0a Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Fri, 9 Jan 2026 16:58:09 +0200
Subject: [PATCH 08/20] feat: add docs seeding function to all-in-one
 entrypoint

---
 scripts/docker/entrypoint-allinone.sh | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/scripts/docker/entrypoint-allinone.sh b/scripts/docker/entrypoint-allinone.sh
index 8248968ab..ab21b2658 100644
--- a/scripts/docker/entrypoint-allinone.sh
+++ b/scripts/docker/entrypoint-allinone.sh
@@ -145,9 +145,29 @@ run_migrations() {
     echo "✅ Database migrations complete"
 }
 
+# ================================================
+# Seed Surfsense documentation
+# ================================================
+seed_surfsense_docs() {
+    echo "📚 Seeding Surfsense documentation..."
+    
+    # Start PostgreSQL temporarily for seeding
+    su - postgres -c "/usr/lib/postgresql/14/bin/pg_ctl -D /data/postgres -l /tmp/postgres_seed.log start"
+    sleep 5
+    
+    cd /app/backend
+    python scripts/seed_surfsense_docs.py || echo "⚠️ Docs seeding may have already been done"
+    
+    # Stop PostgreSQL
+    su - postgres -c "/usr/lib/postgresql/14/bin/pg_ctl -D /data/postgres stop"
+    
+    echo "✅ Surfsense documentation seeded"
+}
+
 # Run migrations on first start or when explicitly requested
 if [ ! -f /data/.migrations_run ] || [ "${FORCE_MIGRATIONS:-false}" = "true" ]; then
     run_migrations
+    seed_surfsense_docs
     touch /data/.migrations_run
 fi
 

From 4aa686480e9ce5db150353f0ee466c8ba3cf80f7 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Fri, 9 Jan 2026 17:04:58 +0200
Subject: [PATCH 09/20] refactor: decouple docs seeding from migrations with
 separate flags

---
 scripts/docker/entrypoint-allinone.sh | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/scripts/docker/entrypoint-allinone.sh b/scripts/docker/entrypoint-allinone.sh
index ab21b2658..0888facf1 100644
--- a/scripts/docker/entrypoint-allinone.sh
+++ b/scripts/docker/entrypoint-allinone.sh
@@ -167,10 +167,15 @@ seed_surfsense_docs() {
 # Run migrations on first start or when explicitly requested
 if [ ! -f /data/.migrations_run ] || [ "${FORCE_MIGRATIONS:-false}" = "true" ]; then
     run_migrations
-    seed_surfsense_docs
     touch /data/.migrations_run
 fi
 
+# Seed docs on first start or when explicitly requested
+if [ ! -f /data/.docs_seeded ] || [ "${FORCE_SEED_DOCS:-false}" = "true" ]; then
+    seed_surfsense_docs
+    touch /data/.docs_seeded
+fi
+
 # ================================================
 # Environment Variables Info
 # ================================================

From 1be9de9c240415e12825a6d33e75d71144fcc5eb Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Fri, 9 Jan 2026 17:52:31 +0200
Subject: [PATCH 10/20] feat: add search_surfsense_docs tool with vector search

---
 .../new_chat/tools/search_surfsense_docs.py   | 160 ++++++++++++++++++
 1 file changed, 160 insertions(+)
 create mode 100644 surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py

diff --git a/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py b/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py
new file mode 100644
index 000000000..21f3942ab
--- /dev/null
+++ b/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py
@@ -0,0 +1,160 @@
+"""
+Surfsense documentation search tool.
+
+This tool allows the agent to search the pre-indexed Surfsense documentation
+to help users with questions about how to use the application.
+
+The documentation is indexed at deployment time from MDX files and stored
+in dedicated tables (surfsense_docs_documents, surfsense_docs_chunks).
+"""
+
+import json
+
+from langchain_core.tools import tool
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.config import config
+from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument
+
+
+def format_surfsense_docs_results(results: list[tuple]) -> str:
+    """
+    Format search results into XML structure for the LLM context.
+
+    Uses the same XML structure as format_documents_for_context from knowledge_base.py
+    but with 'doc-' prefix on chunk IDs. This allows:
+    - LLM to use consistent [citation:doc-XXX] format
+    - Frontend to detect 'doc-' prefix and route to surfsense docs endpoint
+
+    Args:
+        results: List of (chunk, document) tuples from the database query
+
+    Returns:
+        Formatted XML string with documentation content and citation-ready chunks
+    """
+    if not results:
+        return "No relevant Surfsense documentation found for your query."
+
+    # Group chunks by document
+    grouped: dict[int, dict] = {}
+    for chunk, doc in results:
+        if doc.id not in grouped:
+            grouped[doc.id] = {
+                "document_id": f"doc-{doc.id}",
+                "document_type": "SURFSENSE_DOCS",
+                "title": doc.title,
+                "url": doc.source,
+                "metadata": {"source": doc.source},
+                "chunks": [],
+            }
+        grouped[doc.id]["chunks"].append({
+            "chunk_id": f"doc-{chunk.id}",
+            "content": chunk.content,
+        })
+
+    # Render XML matching format_documents_for_context structure
+    parts: list[str] = []
+    for g in grouped.values():
+        metadata_json = json.dumps(g["metadata"], ensure_ascii=False)
+
+        parts.append("<document>")
+        parts.append("<document_metadata>")
+        parts.append(f"  <document_id>{g['document_id']}</document_id>")
+        parts.append(f"  <document_type>{g['document_type']}</document_type>")
+        parts.append(f"  <title><![CDATA[{g['title']}]]></title>")
+        parts.append(f"  <url><![CDATA[{g['url']}]]></url>")
+        parts.append(f"  <metadata_json><![CDATA[{metadata_json}]]></metadata_json>")
+        parts.append("</document_metadata>")
+        parts.append("")
+        parts.append("<document_content>")
+
+        for ch in g["chunks"]:
+            parts.append(f"  <chunk id='{ch['chunk_id']}'><![CDATA[{ch['content']}]]></chunk>")
+
+        parts.append("</document_content>")
+        parts.append("</document>")
+        parts.append("")
+
+    return "\n".join(parts).strip()
+
+
+async def search_surfsense_docs_async(
+    query: str,
+    db_session: AsyncSession,
+    top_k: int = 5,
+) -> str:
+    """
+    Search Surfsense documentation using vector similarity.
+
+    Args:
+        query: The search query about Surfsense usage
+        db_session: Database session for executing queries
+        top_k: Number of results to return
+
+    Returns:
+        Formatted string with relevant documentation content
+    """
+    # Get embedding for the query
+    query_embedding = config.embedding_model_instance.embed(query)
+
+    # Vector similarity search on chunks, joining with documents
+    stmt = (
+        select(SurfsenseDocsChunk, SurfsenseDocsDocument)
+        .join(
+            SurfsenseDocsDocument,
+            SurfsenseDocsChunk.document_id == SurfsenseDocsDocument.id,
+        )
+        .order_by(SurfsenseDocsChunk.embedding.op("<=>")(query_embedding))
+        .limit(top_k)
+    )
+
+    result = await db_session.execute(stmt)
+    rows = result.all()
+
+    return format_surfsense_docs_results(rows)
+
+
+def create_search_surfsense_docs_tool(db_session: AsyncSession):
+    """
+    Factory function to create the search_surfsense_docs tool.
+
+    Args:
+        db_session: Database session for executing queries
+
+    Returns:
+        A configured tool function for searching Surfsense documentation
+    """
+
+    @tool
+    async def search_surfsense_docs(query: str, top_k: int = 5) -> str:
+        """
+        Search Surfsense documentation for help with using the application.
+
+        Use this tool when the user asks questions about:
+        - How to use Surfsense features
+        - Installation and setup instructions
+        - Configuration options and settings
+        - Troubleshooting common issues
+        - Available connectors and integrations
+        - Browser extension usage
+        - API documentation
+
+        This searches the official Surfsense documentation that was indexed
+        at deployment time. It does NOT search the user's personal knowledge base.
+
+        Args:
+            query: The search query about Surfsense usage or features
+            top_k: Number of documentation chunks to retrieve (default: 5)
+
+        Returns:
+            Relevant documentation content formatted with chunk IDs for citations
+        """
+        return await search_surfsense_docs_async(
+            query=query,
+            db_session=db_session,
+            top_k=top_k,
+        )
+
+    return search_surfsense_docs
+

From c4d214baa4c90a90dfba3da6627131e32a4e97ec Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Fri, 9 Jan 2026 17:58:14 +0200
Subject: [PATCH 11/20] feat: register search_surfsense_docs tool in agent
 toolkit

---
 .../app/agents/new_chat/tools/__init__.py              |  3 +++
 .../app/agents/new_chat/tools/registry.py              | 10 ++++++++++
 2 files changed, 13 insertions(+)

diff --git a/surfsense_backend/app/agents/new_chat/tools/__init__.py b/surfsense_backend/app/agents/new_chat/tools/__init__.py
index b89988327..b531d9b4d 100644
--- a/surfsense_backend/app/agents/new_chat/tools/__init__.py
+++ b/surfsense_backend/app/agents/new_chat/tools/__init__.py
@@ -6,6 +6,7 @@ To add a new tool, see the documentation in registry.py.
 
 Available tools:
 - search_knowledge_base: Search the user's personal knowledge base
+- search_surfsense_docs: Search Surfsense documentation for usage help
 - generate_podcast: Generate audio podcasts from content
 - link_preview: Fetch rich previews for URLs
 - display_image: Display images in chat
@@ -31,6 +32,7 @@ from .registry import (
     get_tool_by_name,
 )
 from .scrape_webpage import create_scrape_webpage_tool
+from .search_surfsense_docs import create_search_surfsense_docs_tool
 
 __all__ = [
     # Registry
@@ -43,6 +45,7 @@ __all__ = [
     "create_link_preview_tool",
     "create_scrape_webpage_tool",
     "create_search_knowledge_base_tool",
+    "create_search_surfsense_docs_tool",
     # Knowledge base utilities
     "format_documents_for_context",
     "get_all_tool_names",
diff --git a/surfsense_backend/app/agents/new_chat/tools/registry.py b/surfsense_backend/app/agents/new_chat/tools/registry.py
index bc305aecc..c7439bf8f 100644
--- a/surfsense_backend/app/agents/new_chat/tools/registry.py
+++ b/surfsense_backend/app/agents/new_chat/tools/registry.py
@@ -48,6 +48,7 @@ from .knowledge_base import create_search_knowledge_base_tool
 from .link_preview import create_link_preview_tool
 from .podcast import create_generate_podcast_tool
 from .scrape_webpage import create_scrape_webpage_tool
+from .search_surfsense_docs import create_search_surfsense_docs_tool
 
 # =============================================================================
 # Tool Definition
@@ -126,6 +127,15 @@ BUILTIN_TOOLS: list[ToolDefinition] = [
         requires=[],  # firecrawl_api_key is optional
     ),
     # Note: write_todos is now provided by TodoListMiddleware from deepagents
+    # Surfsense documentation search tool
+    ToolDefinition(
+        name="search_surfsense_docs",
+        description="Search Surfsense documentation for help with using the application",
+        factory=lambda deps: create_search_surfsense_docs_tool(
+            db_session=deps["db_session"],
+        ),
+        requires=["db_session"],
+    ),
     # =========================================================================
     # ADD YOUR CUSTOM TOOLS BELOW
     # =========================================================================

From 3539b2a83da6be0997c7c63c74b862228fd41291 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Fri, 9 Jan 2026 18:07:14 +0200
Subject: [PATCH 12/20] feat: add surfsense docs citation endpoint

---
 surfsense_backend/app/routes/__init__.py      |  2 +
 .../app/routes/surfsense_docs_routes.py       | 89 +++++++++++++++++++
 .../app/schemas/surfsense_docs.py             | 27 ++++++
 3 files changed, 118 insertions(+)
 create mode 100644 surfsense_backend/app/routes/surfsense_docs_routes.py
 create mode 100644 surfsense_backend/app/schemas/surfsense_docs.py

diff --git a/surfsense_backend/app/routes/__init__.py b/surfsense_backend/app/routes/__init__.py
index b4e94c732..4b6df350a 100644
--- a/surfsense_backend/app/routes/__init__.py
+++ b/surfsense_backend/app/routes/__init__.py
@@ -31,6 +31,7 @@ from .rbac_routes import router as rbac_router
 from .search_source_connectors_routes import router as search_source_connectors_router
 from .search_spaces_routes import router as search_spaces_router
 from .slack_add_connector_route import router as slack_add_connector_router
+from .surfsense_docs_routes import router as surfsense_docs_router
 from .teams_add_connector_route import router as teams_add_connector_router
 
 router = APIRouter()
@@ -59,3 +60,4 @@ router.include_router(clickup_add_connector_router)
 router.include_router(new_llm_config_router)  # LLM configs with prompt configuration
 router.include_router(logs_router)
 router.include_router(circleback_webhook_router)  # Circleback meeting webhooks
+router.include_router(surfsense_docs_router)  # Surfsense documentation for citations
diff --git a/surfsense_backend/app/routes/surfsense_docs_routes.py b/surfsense_backend/app/routes/surfsense_docs_routes.py
new file mode 100644
index 000000000..a2de65568
--- /dev/null
+++ b/surfsense_backend/app/routes/surfsense_docs_routes.py
@@ -0,0 +1,89 @@
+"""
+Routes for Surfsense documentation.
+
+These endpoints support the citation system for Surfsense docs,
+allowing the frontend to fetch document details when a user clicks
+on a [citation:doc-XXX] link.
+"""
+
+from fastapi import APIRouter, Depends, HTTPException
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.orm import selectinload
+
+from app.db import (
+    SurfsenseDocsChunk,
+    SurfsenseDocsDocument,
+    User,
+    get_async_session,
+)
+from app.schemas.surfsense_docs import (
+    SurfsenseDocsChunkRead,
+    SurfsenseDocsDocumentWithChunksRead,
+)
+from app.users import current_active_user
+
+router = APIRouter()
+
+
+@router.get(
+    "/surfsense-docs/by-chunk/{chunk_id}",
+    response_model=SurfsenseDocsDocumentWithChunksRead,
+)
+async def get_surfsense_doc_by_chunk_id(
+    chunk_id: int,
+    session: AsyncSession = Depends(get_async_session),
+    user: User = Depends(current_active_user),
+):
+    """
+    Retrieves a Surfsense documentation document based on a chunk ID.
+
+    This endpoint is used by the frontend to resolve [citation:doc-XXX] links.
+    """
+    try:
+        # Get the chunk
+        chunk_result = await session.execute(
+            select(SurfsenseDocsChunk).filter(SurfsenseDocsChunk.id == chunk_id)
+        )
+        chunk = chunk_result.scalars().first()
+
+        if not chunk:
+            raise HTTPException(
+                status_code=404,
+                detail=f"Surfsense docs chunk with id {chunk_id} not found",
+            )
+
+        # Get the associated document with all its chunks
+        document_result = await session.execute(
+            select(SurfsenseDocsDocument)
+            .options(selectinload(SurfsenseDocsDocument.chunks))
+            .filter(SurfsenseDocsDocument.id == chunk.document_id)
+        )
+        document = document_result.scalars().first()
+
+        if not document:
+            raise HTTPException(
+                status_code=404,
+                detail="Surfsense docs document not found",
+            )
+
+        # Sort chunks by ID
+        sorted_chunks = sorted(document.chunks, key=lambda x: x.id)
+
+        return SurfsenseDocsDocumentWithChunksRead(
+            id=document.id,
+            title=document.title,
+            source=document.source,
+            content=document.content,
+            chunks=[
+                SurfsenseDocsChunkRead(id=c.id, content=c.content)
+                for c in sorted_chunks
+            ],
+        )
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to retrieve Surfsense documentation: {e!s}",
+        ) from e
diff --git a/surfsense_backend/app/schemas/surfsense_docs.py b/surfsense_backend/app/schemas/surfsense_docs.py
new file mode 100644
index 000000000..7464df342
--- /dev/null
+++ b/surfsense_backend/app/schemas/surfsense_docs.py
@@ -0,0 +1,27 @@
+"""
+Schemas for Surfsense documentation.
+"""
+
+from pydantic import BaseModel, ConfigDict
+
+
+class SurfsenseDocsChunkRead(BaseModel):
+    """Schema for a Surfsense docs chunk."""
+
+    id: int
+    content: str
+
+    model_config = ConfigDict(from_attributes=True)
+
+
+class SurfsenseDocsDocumentWithChunksRead(BaseModel):
+    """Schema for a Surfsense docs document with its chunks."""
+
+    id: int
+    title: str
+    source: str
+    content: str
+    chunks: list[SurfsenseDocsChunkRead]
+
+    model_config = ConfigDict(from_attributes=True)
+

From abd3bace53e6280e84c723653b2f123ad17e8729 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Fri, 9 Jan 2026 18:21:59 +0200
Subject: [PATCH 13/20] feat: add frontend support for surfsense docs citations

---
 .../assistant-ui/inline-citation.tsx          |  9 ++++---
 .../components/assistant-ui/markdown-text.tsx | 26 ++++++++++++-------
 .../new-chat/source-detail-panel.tsx          | 11 ++++++--
 .../lib/apis/documents-api.service.ts         | 12 +++++++++
 4 files changed, 43 insertions(+), 15 deletions(-)

diff --git a/surfsense_web/components/assistant-ui/inline-citation.tsx b/surfsense_web/components/assistant-ui/inline-citation.tsx
index 065f37e8e..9eab9a3c3 100644
--- a/surfsense_web/components/assistant-ui/inline-citation.tsx
+++ b/surfsense_web/components/assistant-ui/inline-citation.tsx
@@ -7,13 +7,15 @@ import { SourceDetailPanel } from "@/components/new-chat/source-detail-panel";
 interface InlineCitationProps {
 	chunkId: number;
 	citationNumber: number;
+	isDocsChunk?: boolean;
 }
 
 /**
  * Inline citation component for the new chat.
  * Renders a clickable numbered badge that opens the SourceDetailPanel with document chunk details.
+ * Supports both regular knowledge base chunks and Surfsense documentation chunks.
  */
-export const InlineCitation: FC<InlineCitationProps> = ({ chunkId, citationNumber }) => {
+export const InlineCitation: FC<InlineCitationProps> = ({ chunkId, citationNumber, isDocsChunk = false }) => {
 	const [isOpen, setIsOpen] = useState(false);
 
 	return (
@@ -21,10 +23,11 @@ export const InlineCitation: FC<InlineCitationProps> = ({ chunkId, citationNumbe
 			open={isOpen}
 			onOpenChange={setIsOpen}
 			chunkId={chunkId}
-			sourceType=""
-			title="Source"
+			sourceType={isDocsChunk ? "SURFSENSE_DOCS" : ""}
+			title={isDocsChunk ? "Surfsense Documentation" : "Source"}
 			description=""
 			url=""
+			isDocsChunk={isDocsChunk}
 		>
 			<span
 				onClick={() => setIsOpen(true)}
diff --git a/surfsense_web/components/assistant-ui/markdown-text.tsx b/surfsense_web/components/assistant-ui/markdown-text.tsx
index 41d6143b9..532ae7663 100644
--- a/surfsense_web/components/assistant-ui/markdown-text.tsx
+++ b/surfsense_web/components/assistant-ui/markdown-text.tsx
@@ -15,8 +15,8 @@ import { InlineCitation } from "@/components/assistant-ui/inline-citation";
 import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button";
 import { cn } from "@/lib/utils";
 
-// Citation pattern: [citation:CHUNK_ID]
-const CITATION_REGEX = /\[citation:(\d+)\]/g;
+// Citation pattern: [citation:CHUNK_ID] or [citation:doc-CHUNK_ID]
+const CITATION_REGEX = /\[citation:(doc-)?(\d+)\]/g;
 
 // Track chunk IDs to citation numbers mapping for consistent numbering
 // This map is reset when a new message starts rendering
@@ -33,16 +33,20 @@ export function resetCitationCounter() {
 
 /**
  * Gets or assigns a citation number for a chunk ID
+ * Uses string key to differentiate between doc and regular chunks
  */
-function getCitationNumber(chunkId: number): number {
-	if (!chunkIdToCitationNumber.has(chunkId)) {
-		chunkIdToCitationNumber.set(chunkId, nextCitationNumber++);
+function getCitationNumber(chunkId: number, isDocsChunk: boolean): number {
+	const key = isDocsChunk ? `doc-${chunkId}` : String(chunkId);
+	const existingNumber = chunkIdToCitationNumber.get(key as unknown as number);
+	if (existingNumber === undefined) {
+		chunkIdToCitationNumber.set(key as unknown as number, nextCitationNumber++);
 	}
-	return chunkIdToCitationNumber.get(chunkId)!;
+	return chunkIdToCitationNumber.get(key as unknown as number)!;
 }
 
 /**
  * Parses text and replaces [citation:XXX] patterns with InlineCitation components
+ * Supports both regular chunks [citation:123] and docs chunks [citation:doc-123]
  */
 function parseTextWithCitations(text: string): ReactNode[] {
 	const parts: ReactNode[] = [];
@@ -59,14 +63,16 @@ function parseTextWithCitations(text: string): ReactNode[] {
 			parts.push(text.substring(lastIndex, match.index));
 		}
 
-		// Add the citation component
-		const chunkId = Number.parseInt(match[1], 10);
-		const citationNumber = getCitationNumber(chunkId);
+		// Check if this is a docs chunk (has "doc-" prefix)
+		const isDocsChunk = match[1] === "doc-";
+		const chunkId = Number.parseInt(match[2], 10);
+		const citationNumber = getCitationNumber(chunkId, isDocsChunk);
 		parts.push(
 			<InlineCitation
-				key={`citation-${chunkId}-${instanceIndex}`}
+				key={`citation-${isDocsChunk ? "doc-" : ""}${chunkId}-${instanceIndex}`}
 				chunkId={chunkId}
 				citationNumber={citationNumber}
+				isDocsChunk={isDocsChunk}
 			/>
 		);
 
diff --git a/surfsense_web/components/new-chat/source-detail-panel.tsx b/surfsense_web/components/new-chat/source-detail-panel.tsx
index 35249dc50..dc0c3c3f8 100644
--- a/surfsense_web/components/new-chat/source-detail-panel.tsx
+++ b/surfsense_web/components/new-chat/source-detail-panel.tsx
@@ -34,6 +34,7 @@ interface SourceDetailPanelProps {
 	description?: string;
 	url?: string;
 	children?: ReactNode;
+	isDocsChunk?: boolean;
 }
 
 const formatDocumentType = (type: string) => {
@@ -114,6 +115,7 @@ export function SourceDetailPanel({
 	description,
 	url,
 	children,
+	isDocsChunk = false,
 }: SourceDetailPanelProps) {
 	const scrollAreaRef = useRef<HTMLDivElement>(null);
 	const hasScrolledRef = useRef(false); // Use ref to avoid stale closures
@@ -132,8 +134,13 @@ export function SourceDetailPanel({
 		isLoading: isDocumentByChunkFetching,
 		error: documentByChunkFetchingError,
 	} = useQuery({
-		queryKey: cacheKeys.documents.byChunk(chunkId.toString()),
-		queryFn: () => documentsApiService.getDocumentByChunk({ chunk_id: chunkId }),
+		queryKey: isDocsChunk
+			? cacheKeys.documents.byChunk(`doc-${chunkId}`)
+			: cacheKeys.documents.byChunk(chunkId.toString()),
+		queryFn: () =>
+			isDocsChunk
+				? documentsApiService.getSurfsenseDocByChunk(chunkId)
+				: documentsApiService.getDocumentByChunk({ chunk_id: chunkId }),
 		enabled: !!chunkId && open,
 		staleTime: 5 * 60 * 1000,
 	});
diff --git a/surfsense_web/lib/apis/documents-api.service.ts b/surfsense_web/lib/apis/documents-api.service.ts
index cf7a4b778..372baee4d 100644
--- a/surfsense_web/lib/apis/documents-api.service.ts
+++ b/surfsense_web/lib/apis/documents-api.service.ts
@@ -209,6 +209,18 @@ class DocumentsApiService {
 		);
 	};
 
+	/**
+	 * Get Surfsense documentation by chunk ID
+	 * Used for resolving [citation:doc-XXX] citations
+	 */
+	getSurfsenseDocByChunk = async (chunkId: number) => {
+		// Response shape matches getDocumentByChunkResponse structure
+		return baseApiService.get(
+			`/api/v1/surfsense-docs/by-chunk/${chunkId}`,
+			getDocumentByChunkResponse
+		);
+	};
+
 	/**
 	 * Update a document
 	 */

From 2c3d625b35613a38bb3e58aba359af917afa163a Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Fri, 9 Jan 2026 20:11:47 +0200
Subject: [PATCH 14/20] fix: increase top_k from 5 to 10 to match knowledge
 base

---
 .../app/agents/new_chat/tools/search_surfsense_docs.py      | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py b/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py
index 21f3942ab..a34e16ff2 100644
--- a/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py
+++ b/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py
@@ -82,7 +82,7 @@ def format_surfsense_docs_results(results: list[tuple]) -> str:
 async def search_surfsense_docs_async(
     query: str,
     db_session: AsyncSession,
-    top_k: int = 5,
+    top_k: int = 10,
 ) -> str:
     """
     Search Surfsense documentation using vector similarity.
@@ -127,7 +127,7 @@ def create_search_surfsense_docs_tool(db_session: AsyncSession):
     """
 
     @tool
-    async def search_surfsense_docs(query: str, top_k: int = 5) -> str:
+    async def search_surfsense_docs(query: str, top_k: int = 10) -> str:
         """
         Search Surfsense documentation for help with using the application.
 
@@ -145,7 +145,7 @@ def create_search_surfsense_docs_tool(db_session: AsyncSession):
 
         Args:
             query: The search query about Surfsense usage or features
-            top_k: Number of documentation chunks to retrieve (default: 5)
+            top_k: Number of documentation chunks to retrieve (default: 10)
 
         Returns:
             Relevant documentation content formatted with chunk IDs for citations

From 42473fe4edb4a2872bd5c50dc87e24dd238d6e40 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Mon, 12 Jan 2026 18:06:51 +0200
Subject: [PATCH 15/20] feat: add Zod schemas for Surfsense docs and fix
 citation handling

---
 .../components/assistant-ui/markdown-text.tsx |  9 ++---
 .../new-chat/source-detail-panel.tsx          | 23 +++++++++----
 .../contracts/types/document.types.ts         | 34 +++++++++++++++++++
 .../lib/apis/documents-api.service.ts         |  4 +--
 4 files changed, 57 insertions(+), 13 deletions(-)

diff --git a/surfsense_web/components/assistant-ui/markdown-text.tsx b/surfsense_web/components/assistant-ui/markdown-text.tsx
index 532ae7663..5bc905645 100644
--- a/surfsense_web/components/assistant-ui/markdown-text.tsx
+++ b/surfsense_web/components/assistant-ui/markdown-text.tsx
@@ -20,7 +20,8 @@ const CITATION_REGEX = /\[citation:(doc-)?(\d+)\]/g;
 
 // Track chunk IDs to citation numbers mapping for consistent numbering
 // This map is reset when a new message starts rendering
-let chunkIdToCitationNumber: Map<number, number> = new Map();
+// Uses string keys to differentiate between doc and regular chunks (e.g., "doc-123" vs "123")
+let chunkIdToCitationNumber: Map<string, number> = new Map();
 let nextCitationNumber = 1;
 
 /**
@@ -37,11 +38,11 @@ export function resetCitationCounter() {
  */
 function getCitationNumber(chunkId: number, isDocsChunk: boolean): number {
 	const key = isDocsChunk ? `doc-${chunkId}` : String(chunkId);
-	const existingNumber = chunkIdToCitationNumber.get(key as unknown as number);
+	const existingNumber = chunkIdToCitationNumber.get(key);
 	if (existingNumber === undefined) {
-		chunkIdToCitationNumber.set(key as unknown as number, nextCitationNumber++);
+		chunkIdToCitationNumber.set(key, nextCitationNumber++);
 	}
-	return chunkIdToCitationNumber.get(key as unknown as number)!;
+	return chunkIdToCitationNumber.get(key)!;
 }
 
 /**
diff --git a/surfsense_web/components/new-chat/source-detail-panel.tsx b/surfsense_web/components/new-chat/source-detail-panel.tsx
index dc0c3c3f8..df2809fdb 100644
--- a/surfsense_web/components/new-chat/source-detail-panel.tsx
+++ b/surfsense_web/components/new-chat/source-detail-panel.tsx
@@ -21,10 +21,16 @@ import { Badge } from "@/components/ui/badge";
 import { Button } from "@/components/ui/button";
 import { Collapsible, CollapsibleContent, CollapsibleTrigger } from "@/components/ui/collapsible";
 import { ScrollArea } from "@/components/ui/scroll-area";
+import type {
+	GetDocumentByChunkResponse,
+	GetSurfsenseDocsByChunkResponse,
+} from "@/contracts/types/document.types";
 import { documentsApiService } from "@/lib/apis/documents-api.service";
 import { cacheKeys } from "@/lib/query-client/cache-keys";
 import { cn } from "@/lib/utils";
 
+type DocumentData = GetDocumentByChunkResponse | GetSurfsenseDocsByChunkResponse;
+
 interface SourceDetailPanelProps {
 	open: boolean;
 	onOpenChange: (open: boolean) => void;
@@ -133,14 +139,16 @@ export function SourceDetailPanel({
 		data: documentData,
 		isLoading: isDocumentByChunkFetching,
 		error: documentByChunkFetchingError,
-	} = useQuery({
+	} = useQuery<DocumentData>({
 		queryKey: isDocsChunk
 			? cacheKeys.documents.byChunk(`doc-${chunkId}`)
 			: cacheKeys.documents.byChunk(chunkId.toString()),
-		queryFn: () =>
-			isDocsChunk
-				? documentsApiService.getSurfsenseDocByChunk(chunkId)
-				: documentsApiService.getDocumentByChunk({ chunk_id: chunkId }),
+		queryFn: async () => {
+			if (isDocsChunk) {
+				return documentsApiService.getSurfsenseDocByChunk(chunkId);
+			}
+			return documentsApiService.getDocumentByChunk({ chunk_id: chunkId });
+		},
 		enabled: !!chunkId && open,
 		staleTime: 5 * 60 * 1000,
 	});
@@ -332,7 +340,7 @@ export function SourceDetailPanel({
 									{documentData?.title || title || "Source Document"}
 								</h2>
 								<p className="text-sm text-muted-foreground mt-0.5">
-									{documentData
+									{documentData && "document_type" in documentData
 										? formatDocumentType(documentData.document_type)
 										: sourceType && formatDocumentType(sourceType)}
 									{documentData?.chunks && (
@@ -498,7 +506,8 @@ export function SourceDetailPanel({
 								<ScrollArea className="flex-1" ref={scrollAreaRef}>
 									<div className="p-6 lg:p-8 max-w-4xl mx-auto space-y-6">
 										{/* Document Metadata */}
-										{documentData.document_metadata &&
+										{"document_metadata" in documentData &&
+											documentData.document_metadata &&
 											Object.keys(documentData.document_metadata).length > 0 && (
 												<motion.div
 													initial={{ opacity: 0, y: 10 }}
diff --git a/surfsense_web/contracts/types/document.types.ts b/surfsense_web/contracts/types/document.types.ts
index f7eb8f278..757c6aeb4 100644
--- a/surfsense_web/contracts/types/document.types.ts
+++ b/surfsense_web/contracts/types/document.types.ts
@@ -59,6 +59,26 @@ export const documentWithChunks = document.extend({
 	),
 });
 
+/**
+ * Surfsense documentation schemas
+ * Follows the same pattern as document/documentWithChunks
+ */
+export const surfsenseDocsChunk = z.object({
+	id: z.number(),
+	content: z.string(),
+});
+
+export const surfsenseDocsDocument = z.object({
+	id: z.number(),
+	title: z.string(),
+	source: z.string(),
+	content: z.string(),
+});
+
+export const surfsenseDocsDocumentWithChunks = surfsenseDocsDocument.extend({
+	chunks: z.array(surfsenseDocsChunk),
+});
+
 /**
  * Get documents
  */
@@ -154,6 +174,15 @@ export const getDocumentByChunkRequest = z.object({
 
 export const getDocumentByChunkResponse = documentWithChunks;
 
+/**
+ * Get Surfsense docs by chunk
+ */
+export const getSurfsenseDocsByChunkRequest = z.object({
+	chunk_id: z.number(),
+});
+
+export const getSurfsenseDocsByChunkResponse = surfsenseDocsDocumentWithChunks;
+
 /**
  * Update document
  */
@@ -193,3 +222,8 @@ export type UpdateDocumentResponse = z.infer<typeof updateDocumentResponse>;
 export type DeleteDocumentRequest = z.infer<typeof deleteDocumentRequest>;
 export type DeleteDocumentResponse = z.infer<typeof deleteDocumentResponse>;
 export type DocumentTypeEnum = z.infer<typeof documentTypeEnum>;
+export type SurfsenseDocsChunk = z.infer<typeof surfsenseDocsChunk>;
+export type SurfsenseDocsDocument = z.infer<typeof surfsenseDocsDocument>;
+export type SurfsenseDocsDocumentWithChunks = z.infer<typeof surfsenseDocsDocumentWithChunks>;
+export type GetSurfsenseDocsByChunkRequest = z.infer<typeof getSurfsenseDocsByChunkRequest>;
+export type GetSurfsenseDocsByChunkResponse = z.infer<typeof getSurfsenseDocsByChunkResponse>;
diff --git a/surfsense_web/lib/apis/documents-api.service.ts b/surfsense_web/lib/apis/documents-api.service.ts
index 372baee4d..2e7d18e44 100644
--- a/surfsense_web/lib/apis/documents-api.service.ts
+++ b/surfsense_web/lib/apis/documents-api.service.ts
@@ -17,6 +17,7 @@ import {
 	getDocumentsResponse,
 	getDocumentTypeCountsRequest,
 	getDocumentTypeCountsResponse,
+	getSurfsenseDocsByChunkResponse,
 	type SearchDocumentsRequest,
 	searchDocumentsRequest,
 	searchDocumentsResponse,
@@ -214,10 +215,9 @@ class DocumentsApiService {
 	 * Used for resolving [citation:doc-XXX] citations
 	 */
 	getSurfsenseDocByChunk = async (chunkId: number) => {
-		// Response shape matches getDocumentByChunkResponse structure
 		return baseApiService.get(
 			`/api/v1/surfsense-docs/by-chunk/${chunkId}`,
-			getDocumentByChunkResponse
+			getSurfsenseDocsByChunkResponse
 		);
 	};
 

From f6621f9a9a2235972f11b9f4f671459e4f650e5e Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Mon, 12 Jan 2026 18:28:38 +0200
Subject: [PATCH 16/20] fix: copy docs content to Docker image for Surfsense
 docs indexer

---
 Dockerfile.allinone | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Dockerfile.allinone b/Dockerfile.allinone
index 1c04ffb99..33ae32023 100644
--- a/Dockerfile.allinone
+++ b/Dockerfile.allinone
@@ -165,6 +165,9 @@ COPY --from=frontend-builder /app/.next/standalone ./
 COPY --from=frontend-builder /app/.next/static ./.next/static
 COPY --from=frontend-builder /app/public ./public
 
+# Copy docs content for Surfsense docs indexer (used at runtime for seeding)
+COPY surfsense_web/content/docs /app/surfsense_web/content/docs
+
 # ====================
 # Setup Backend
 # ====================

From 96545056cdf2a88d608e983cfe5e1c63e195e6f5 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Mon, 12 Jan 2026 20:15:11 +0200
Subject: [PATCH 17/20] refactor: async docs seeding in FastAPI lifespan

---
 scripts/docker/entrypoint-allinone.sh         | 25 +----------
 surfsense_backend/app/app.py                  |  3 ++
 .../app/tasks/surfsense_docs_indexer.py       | 45 +++++++++++++++----
 .../scripts/seed_surfsense_docs.py            | 41 +++++++----------
 4 files changed, 58 insertions(+), 56 deletions(-)

diff --git a/scripts/docker/entrypoint-allinone.sh b/scripts/docker/entrypoint-allinone.sh
index 0888facf1..ef0ef28ed 100644
--- a/scripts/docker/entrypoint-allinone.sh
+++ b/scripts/docker/entrypoint-allinone.sh
@@ -145,36 +145,13 @@ run_migrations() {
     echo "✅ Database migrations complete"
 }
 
-# ================================================
-# Seed Surfsense documentation
-# ================================================
-seed_surfsense_docs() {
-    echo "📚 Seeding Surfsense documentation..."
-    
-    # Start PostgreSQL temporarily for seeding
-    su - postgres -c "/usr/lib/postgresql/14/bin/pg_ctl -D /data/postgres -l /tmp/postgres_seed.log start"
-    sleep 5
-    
-    cd /app/backend
-    python scripts/seed_surfsense_docs.py || echo "⚠️ Docs seeding may have already been done"
-    
-    # Stop PostgreSQL
-    su - postgres -c "/usr/lib/postgresql/14/bin/pg_ctl -D /data/postgres stop"
-    
-    echo "✅ Surfsense documentation seeded"
-}
-
 # Run migrations on first start or when explicitly requested
 if [ ! -f /data/.migrations_run ] || [ "${FORCE_MIGRATIONS:-false}" = "true" ]; then
     run_migrations
     touch /data/.migrations_run
 fi
 
-# Seed docs on first start or when explicitly requested
-if [ ! -f /data/.docs_seeded ] || [ "${FORCE_SEED_DOCS:-false}" = "true" ]; then
-    seed_surfsense_docs
-    touch /data/.docs_seeded
-fi
+# Note: Surfsense docs seeding is now handled by FastAPI startup (app.py lifespan)
 
 # ================================================
 # Environment Variables Info
diff --git a/surfsense_backend/app/app.py b/surfsense_backend/app/app.py
index 993961148..3ad9d89bc 100644
--- a/surfsense_backend/app/app.py
+++ b/surfsense_backend/app/app.py
@@ -13,6 +13,7 @@ from app.config import config
 from app.db import User, create_db_and_tables, get_async_session
 from app.routes import router as crud_router
 from app.schemas import UserCreate, UserRead, UserUpdate
+from app.tasks.surfsense_docs_indexer import seed_surfsense_docs
 from app.users import SECRET, auth_backend, current_active_user, fastapi_users
 
 
@@ -22,6 +23,8 @@ async def lifespan(app: FastAPI):
     await create_db_and_tables()
     # Setup LangGraph checkpointer tables for conversation persistence
     await setup_checkpointer_tables()
+    # Seed Surfsense documentation
+    await seed_surfsense_docs()
     yield
     # Cleanup: close checkpointer connection on shutdown
     await close_checkpointer()
diff --git a/surfsense_backend/app/tasks/surfsense_docs_indexer.py b/surfsense_backend/app/tasks/surfsense_docs_indexer.py
index 51a1c0938..f2c1e69ba 100644
--- a/surfsense_backend/app/tasks/surfsense_docs_indexer.py
+++ b/surfsense_backend/app/tasks/surfsense_docs_indexer.py
@@ -1,6 +1,6 @@
 """
 Surfsense documentation indexer.
-Indexes MDX documentation files at migration time.
+Indexes MDX documentation files at startup.
 """
 
 import hashlib
@@ -10,10 +10,11 @@ from datetime import UTC, datetime
 from pathlib import Path
 
 from sqlalchemy import select
-from sqlalchemy.orm import Session, selectinload
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.orm import selectinload
 
 from app.config import config
-from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument
+from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument, async_session_maker
 
 logger = logging.getLogger(__name__)
 
@@ -89,12 +90,12 @@ def create_surfsense_docs_chunks(content: str) -> list[SurfsenseDocsChunk]:
     ]
 
 
-def index_surfsense_docs(session: Session) -> tuple[int, int, int, int]:
+async def index_surfsense_docs(session: AsyncSession) -> tuple[int, int, int, int]:
     """
     Index all Surfsense documentation files.
     
     Args:
-        session: SQLAlchemy sync session
+        session: SQLAlchemy async session
         
     Returns:
         Tuple of (created, updated, skipped, deleted) counts
@@ -105,7 +106,7 @@ def index_surfsense_docs(session: Session) -> tuple[int, int, int, int]:
     deleted = 0
     
     # Get all existing docs from database
-    existing_docs_result = session.execute(
+    existing_docs_result = await session.execute(
         select(SurfsenseDocsDocument).options(selectinload(SurfsenseDocsDocument.chunks))
     )
     existing_docs = {doc.source: doc for doc in existing_docs_result.scalars().all()}
@@ -178,11 +179,11 @@ def index_surfsense_docs(session: Session) -> tuple[int, int, int, int]:
     for source, doc in existing_docs.items():
         if source not in processed_sources:
             logger.info(f"Deleting removed document: {source}")
-            session.delete(doc)
+            await session.delete(doc)
             deleted += 1
     
     # Commit all changes
-    session.commit()
+    await session.commit()
     
     logger.info(
         f"Indexing complete: {created} created, {updated} updated, "
@@ -191,3 +192,31 @@ def index_surfsense_docs(session: Session) -> tuple[int, int, int, int]:
     
     return created, updated, skipped, deleted
 
+
+async def seed_surfsense_docs() -> tuple[int, int, int, int]:
+    """
+    Seed Surfsense documentation into the database.
+    
+    This function indexes all MDX files from the docs directory.
+    It handles creating, updating, and deleting docs based on content changes.
+    
+    Returns:
+        Tuple of (created, updated, skipped, deleted) counts
+        Returns (0, 0, 0, 0) if an error occurs
+    """
+    logger.info("Starting Surfsense docs indexing...")
+    
+    try:
+        async with async_session_maker() as session:
+            created, updated, skipped, deleted = await index_surfsense_docs(session)
+        
+        logger.info(
+            f"Surfsense docs indexing complete: "
+            f"created={created}, updated={updated}, skipped={skipped}, deleted={deleted}"
+        )
+        
+        return created, updated, skipped, deleted
+        
+    except Exception as e:
+        logger.error(f"Failed to seed Surfsense docs: {e}", exc_info=True)
+        return 0, 0, 0, 0
diff --git a/surfsense_backend/scripts/seed_surfsense_docs.py b/surfsense_backend/scripts/seed_surfsense_docs.py
index 2e9eee649..d9536bf91 100644
--- a/surfsense_backend/scripts/seed_surfsense_docs.py
+++ b/surfsense_backend/scripts/seed_surfsense_docs.py
@@ -1,47 +1,40 @@
 #!/usr/bin/env python
 """
 Seed Surfsense documentation into the database.
-Run this script after migrations to index MDX documentation files.
+
+CLI wrapper for the seed_surfsense_docs function.
+Can be run manually for debugging or re-indexing.
 
 Usage:
     python scripts/seed_surfsense_docs.py
 """
 
+import asyncio
 import sys
 from pathlib import Path
 
 # Add the parent directory to the path so we can import app modules
 sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
 
-from sqlalchemy import create_engine
-from sqlalchemy.orm import Session
-
-from app.config import config
-from app.tasks.surfsense_docs_indexer import index_surfsense_docs
+from app.tasks.surfsense_docs_indexer import seed_surfsense_docs
 
 
 def main():
-    """Main entry point for seeding Surfsense docs."""
-    print("Starting Surfsense docs seeding...")
+    """CLI entry point for seeding Surfsense docs."""
+    print("=" * 50)
+    print("  Surfsense Documentation Seeding")
+    print("=" * 50)
     
-    # Create sync engine from database URL
-    # Convert async URL to sync if needed
-    database_url = config.DATABASE_URL
-    if database_url.startswith("postgresql+asyncpg://"):
-        database_url = database_url.replace("postgresql+asyncpg://", "postgresql://")
+    created, updated, skipped, deleted = asyncio.run(seed_surfsense_docs())
     
-    engine = create_engine(database_url)
-    
-    with Session(engine) as session:
-        created, updated, skipped, deleted = index_surfsense_docs(session)
-        
-        print(f"\nSurfsense docs seeding complete:")
-        print(f"  Created: {created}")
-        print(f"  Updated: {updated}")
-        print(f"  Skipped: {skipped}")
-        print(f"  Deleted: {deleted}")
+    print()
+    print("Results:")
+    print(f"  Created: {created}")
+    print(f"  Updated: {updated}")
+    print(f"  Skipped: {skipped}")
+    print(f"  Deleted: {deleted}")
+    print("=" * 50)
 
 
 if __name__ == "__main__":
     main()
-

From 19ef32539d33a2c30f757c062576b267abd426a6 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Mon, 12 Jan 2026 20:20:48 +0200
Subject: [PATCH 18/20] cleanup: remove obsolete comments

---
 Dockerfile.allinone                   | 1 -
 scripts/docker/entrypoint-allinone.sh | 2 --
 2 files changed, 3 deletions(-)

diff --git a/Dockerfile.allinone b/Dockerfile.allinone
index 33ae32023..95893c0b5 100644
--- a/Dockerfile.allinone
+++ b/Dockerfile.allinone
@@ -165,7 +165,6 @@ COPY --from=frontend-builder /app/.next/standalone ./
 COPY --from=frontend-builder /app/.next/static ./.next/static
 COPY --from=frontend-builder /app/public ./public
 
-# Copy docs content for Surfsense docs indexer (used at runtime for seeding)
 COPY surfsense_web/content/docs /app/surfsense_web/content/docs
 
 # ====================
diff --git a/scripts/docker/entrypoint-allinone.sh b/scripts/docker/entrypoint-allinone.sh
index ef0ef28ed..8248968ab 100644
--- a/scripts/docker/entrypoint-allinone.sh
+++ b/scripts/docker/entrypoint-allinone.sh
@@ -151,8 +151,6 @@ if [ ! -f /data/.migrations_run ] || [ "${FORCE_MIGRATIONS:-false}" = "true" ];
     touch /data/.migrations_run
 fi
 
-# Note: Surfsense docs seeding is now handled by FastAPI startup (app.py lifespan)
-
 # ================================================
 # Environment Variables Info
 # ================================================

From 842004e6170410c50021ad3d3286c669b8926781 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Mon, 12 Jan 2026 20:54:00 +0200
Subject: [PATCH 19/20] docs: add tool examples to system prompt

---
 .../app/agents/new_chat/system_prompt.py      | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/surfsense_backend/app/agents/new_chat/system_prompt.py b/surfsense_backend/app/agents/new_chat/system_prompt.py
index f9dfdb025..169363fe9 100644
--- a/surfsense_backend/app/agents/new_chat/system_prompt.py
+++ b/surfsense_backend/app/agents/new_chat/system_prompt.py
@@ -26,6 +26,13 @@ SURFSENSE_TOOLS_INSTRUCTIONS = """
 <tools>
 You have access to the following tools:
 
+0. search_surfsense_docs: Search the official SurfSense documentation.
+  - Use this tool when the user asks anything about SurfSense itself (the application they are using).
+  - Args:
+    - query: The search query about SurfSense
+    - top_k: Number of documentation chunks to retrieve (default: 10)
+  - Returns: Documentation content with chunk IDs for citations (prefixed with 'doc-', e.g., [citation:doc-123])
+
 1. search_knowledge_base: Search the user's personal knowledge base for relevant information.
   - Args:
     - query: The search query - be specific and include key terms
@@ -152,6 +159,18 @@ You have access to the following tools:
   - Airtable/Notion: Check field values, apply mapping above
 </tools>
 <tool_call_examples>
+- User: "How do I install SurfSense?"
+  - Call: `search_surfsense_docs(query="installation setup")`
+
+- User: "What connectors does SurfSense support?"
+  - Call: `search_surfsense_docs(query="available connectors integrations")`
+
+- User: "How do I set up the Notion connector?"
+  - Call: `search_surfsense_docs(query="Notion connector setup configuration")`
+
+- User: "How do I use Docker to run SurfSense?"
+  - Call: `search_surfsense_docs(query="Docker installation setup")`
+
 - User: "Fetch all my notes and what's in them?"
   - Call: `search_knowledge_base(query="*", top_k=50, connectors_to_search=["NOTE"])`
 

From c0b97fcc68cae577d6273a8570ff7717d364d814 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Mon, 12 Jan 2026 21:00:06 +0200
Subject: [PATCH 20/20] fix: add doc- prefix examples to citation instructions

---
 surfsense_backend/app/agents/new_chat/system_prompt.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/surfsense_backend/app/agents/new_chat/system_prompt.py b/surfsense_backend/app/agents/new_chat/system_prompt.py
index 169363fe9..15fc17022 100644
--- a/surfsense_backend/app/agents/new_chat/system_prompt.py
+++ b/surfsense_backend/app/agents/new_chat/system_prompt.py
@@ -327,7 +327,7 @@ The documents you receive are structured like this:
 </document_content>
 </document>
 
-IMPORTANT: You MUST cite using the chunk ids (e.g. 123, 124). Do NOT cite document_id.
+IMPORTANT: You MUST cite using the chunk ids (e.g. 123, 124, doc-45). Do NOT cite document_id.
 </document_structure_example>
 
 <citation_format>
@@ -338,11 +338,13 @@ IMPORTANT: You MUST cite using the chunk ids (e.g. 123, 124). Do NOT cite docume
 - NEVER create your own citation format - use the exact chunk_id values from the documents in the [citation:chunk_id] format
 - NEVER format citations as clickable links or as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only
 - NEVER make up chunk IDs if you are unsure about the chunk_id. It is better to omit the citation than to guess
+- Copy the EXACT chunk id from the XML - if it says `<chunk id='doc-123'>`, use [citation:doc-123]
 </citation_format>
 
 <citation_examples>
 CORRECT citation formats:
 - [citation:5]
+- [citation:doc-123] (for Surfsense documentation chunks)
 - [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3]
 
 INCORRECT citation formats (DO NOT use):