diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index 70d7fb07e..000000000 --- a/.dockerignore +++ /dev/null @@ -1,97 +0,0 @@ -# Git -.git -.gitignore -.gitattributes - -# Documentation -*.md -!README.md -docs/ -CONTRIBUTING.md -CODE_OF_CONDUCT.md -LICENSE - -# IDE -.vscode/ -.idea/ -*.swp -*.swo -.cursor/ - -# Node -**/node_modules/ -**/.next/ -**/dist/ -**/.turbo/ -**/.cache/ -**/coverage/ - -# Python -**/__pycache__/ -**/*.pyc -**/*.pyo -**/*.pyd -**/.Python -**/build/ -**/develop-eggs/ -**/downloads/ -**/eggs/ -**/.eggs/ -# Python venv lib folders (but not frontend lib folders) -surfsense_backend/lib/ -surfsense_backend/lib64/ -**/parts/ -**/sdist/ -**/var/ -**/wheels/ -**/*.egg-info/ -**/.installed.cfg -**/*.egg -**/pip-log.txt -**/.tox/ -**/.coverage -**/htmlcov/ -**/.pytest_cache/ -**/nosetests.xml -**/coverage.xml - -# Environment -**/.env -**/.env.* -!**/.env.example -**/*.local - -# Docker -**/Dockerfile -**/docker-compose*.yml -**/.docker/ - -# Testing -**/tests/ -**/test/ -**/__tests__/ -**/*.test.* -**/*.spec.* - -# Logs -**/*.log - -# Temporary files -**/tmp/ -**/temp/ -**/.tmp/ -**/.temp/ - -# Build artifacts from backend -surfsense_backend/podcasts/ -surfsense_backend/temp_audio/ -surfsense_backend/*.bak -surfsense_backend/*.dat -surfsense_backend/*.dir - -# GitHub -.github/ - -# Browser extension (not needed for main deployment) -surfsense_browser_extension/ - diff --git a/.github/workflows/docker_build.yaml b/.github/workflows/docker-build.yml similarity index 66% rename from .github/workflows/docker_build.yaml rename to .github/workflows/docker-build.yml index 15b89198e..a53a4b414 100644 --- a/.github/workflows/docker_build.yaml +++ b/.github/workflows/docker-build.yml @@ -26,6 +26,7 @@ permissions: jobs: tag_release: runs-on: ubuntu-latest + if: github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event_name == 'workflow_dispatch' outputs: new_tag: ${{ steps.tag_version.outputs.next_version }} steps: @@ -86,6 +87,7 @@ jobs: build: needs: tag_release + if: always() && (needs.tag_release.result == 'success' || needs.tag_release.result == 'skipped') runs-on: ${{ matrix.os }} permissions: packages: write @@ -121,6 +123,12 @@ jobs: id: image run: echo "name=${REGISTRY_IMAGE,,}" >> $GITHUB_OUTPUT + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ steps.image.outputs.name }} + - name: Login to GitHub Container Registry uses: docker/login-action@v3 with: @@ -139,14 +147,15 @@ jobs: sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true docker system prune -af - - name: Build and push ${{ matrix.name }} (${{ matrix.suffix }}) + - name: Build and push by digest ${{ matrix.name }} (${{ matrix.suffix }}) id: build uses: docker/build-push-action@v6 with: context: ${{ matrix.context }} file: ${{ matrix.file }} - push: true - tags: ${{ steps.image.outputs.name }}:${{ needs.tag_release.outputs.new_tag }}-${{ matrix.suffix }} + labels: ${{ steps.meta.outputs.labels }} + tags: ${{ steps.image.outputs.name }} + outputs: type=image,push-by-digest=true,name-canonical=true,push=true platforms: ${{ matrix.platform }} cache-from: type=gha,scope=${{ matrix.image }}-${{ matrix.suffix }} cache-to: type=gha,mode=max,scope=${{ matrix.image }}-${{ matrix.suffix }} @@ -159,9 +168,24 @@ jobs: ${{ matrix.image == 'web' && 'NEXT_PUBLIC_ELECTRIC_AUTH_MODE=__NEXT_PUBLIC_ELECTRIC_AUTH_MODE__' || '' }} ${{ matrix.image == 'web' && 'NEXT_PUBLIC_DEPLOYMENT_MODE=__NEXT_PUBLIC_DEPLOYMENT_MODE__' || '' }} + - name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + + - name: Upload digest + uses: actions/upload-artifact@v4 + with: + name: digests-${{ matrix.image }}-${{ matrix.suffix }} + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 + create_manifest: runs-on: ubuntu-latest needs: [tag_release, build] + if: always() && needs.build.result == 'success' permissions: packages: write contents: read @@ -170,7 +194,9 @@ jobs: matrix: include: - name: surfsense-backend + image: backend - name: surfsense-web + image: web env: REGISTRY_IMAGE: ghcr.io/${{ github.repository_owner }}/${{ matrix.name }} @@ -179,6 +205,21 @@ jobs: id: image run: echo "name=${REGISTRY_IMAGE,,}" >> $GITHUB_OUTPUT + - name: Download amd64 digest + uses: actions/download-artifact@v4 + with: + name: digests-${{ matrix.image }}-amd64 + path: /tmp/digests + + - name: Download arm64 digest + uses: actions/download-artifact@v4 + with: + name: digests-${{ matrix.image }}-arm64 + path: /tmp/digests + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Login to GitHub Container Registry uses: docker/login-action@v3 with: @@ -186,35 +227,41 @@ jobs: username: ${{ github.repository_owner }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Create and push multi-arch manifest + - name: Compute app version + id: appver run: | VERSION_TAG="${{ needs.tag_release.outputs.new_tag }}" - IMAGE="${{ steps.image.outputs.name }}" - APP_VERSION=$(echo "$VERSION_TAG" | rev | cut -d. -f2- | rev) - - docker manifest create ${IMAGE}:${VERSION_TAG} \ - ${IMAGE}:${VERSION_TAG}-amd64 \ - ${IMAGE}:${VERSION_TAG}-arm64 - - docker manifest push ${IMAGE}:${VERSION_TAG} - - if [[ "${{ github.ref }}" == "refs/heads/${{ github.event.repository.default_branch }}" ]] || [[ "${{ github.event.inputs.branch }}" == "${{ github.event.repository.default_branch }}" ]]; then - docker manifest create ${IMAGE}:${APP_VERSION} \ - ${IMAGE}:${VERSION_TAG}-amd64 \ - ${IMAGE}:${VERSION_TAG}-arm64 - - docker manifest push ${IMAGE}:${APP_VERSION} - - docker manifest create ${IMAGE}:latest \ - ${IMAGE}:${VERSION_TAG}-amd64 \ - ${IMAGE}:${VERSION_TAG}-arm64 - - docker manifest push ${IMAGE}:latest + if [ -n "$VERSION_TAG" ]; then + APP_VERSION=$(echo "$VERSION_TAG" | rev | cut -d. -f2- | rev) + else + APP_VERSION="" fi + echo "app_version=$APP_VERSION" >> $GITHUB_OUTPUT + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ steps.image.outputs.name }} + tags: | + type=raw,value=${{ needs.tag_release.outputs.new_tag }},enable=${{ needs.tag_release.outputs.new_tag != '' }} + type=raw,value=${{ steps.appver.outputs.app_version }},enable=${{ needs.tag_release.outputs.new_tag != '' && (github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event.inputs.branch == github.event.repository.default_branch) }} + type=ref,event=branch + type=sha,prefix=git- + flavor: | + latest=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event.inputs.branch == github.event.repository.default_branch }} + + - name: Create manifest list and push + working-directory: /tmp/digests + run: | + docker buildx imagetools create \ + $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ + $(printf '${{ steps.image.outputs.name }}@sha256:%s ' *) + - name: Inspect image + run: | + docker buildx imagetools inspect ${{ steps.image.outputs.name }}:${{ steps.meta.outputs.version }} - name: Summary - run: | + run: | echo "Multi-arch manifest created for ${{ matrix.name }}!" - echo "Versioned: ${{ steps.image.outputs.name }}:${{ needs.tag_release.outputs.new_tag }}" - echo "App version: ${{ steps.image.outputs.name }}:$(echo '${{ needs.tag_release.outputs.new_tag }}' | rev | cut -d. -f2- | rev)" - echo "Latest: ${{ steps.image.outputs.name }}:latest" + echo "Tags: $(jq -cr '.tags | join(", ")' <<< "$DOCKER_METADATA_OUTPUT_JSON")" diff --git a/docker/.env.example b/docker/.env.example index 7025cac52..c31b87185 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -33,9 +33,9 @@ EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2 # Ports (change to avoid conflicts with other services on your machine) # ------------------------------------------------------------------------------ -# BACKEND_PORT=8000 -# FRONTEND_PORT=3000 -# ELECTRIC_PORT=5133 +# BACKEND_PORT=8929 +# FRONTEND_PORT=3929 +# ELECTRIC_PORT=5929 # FLOWER_PORT=5555 # ============================================================================== diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml index b76f26b2d..4d602f584 100644 --- a/docker/docker-compose.dev.yml +++ b/docker/docker-compose.dev.yml @@ -8,7 +8,7 @@ # For production with prebuilt images, use docker/docker-compose.yml instead. # ============================================================================= -name: surfsense +name: surfsense-dev services: db: @@ -162,8 +162,9 @@ services: image: electricsql/electric:1.4.10 ports: - "${ELECTRIC_PORT:-5133}:3000" - # depends_on: - # - db + depends_on: + db: + condition: service_healthy environment: - DATABASE_URL=${ELECTRIC_DATABASE_URL:-postgresql://${ELECTRIC_DB_USER:-electric}:${ELECTRIC_DB_PASSWORD:-electric_password}@${DB_HOST:-db}:${DB_PORT:-5432}/${DB_NAME:-surfsense}?sslmode=${DB_SSLMODE:-disable}} - ELECTRIC_INSECURE=true @@ -197,10 +198,10 @@ services: volumes: postgres_data: - name: surfsense-postgres + name: surfsense-dev-postgres pgadmin_data: - name: surfsense-pgadmin + name: surfsense-dev-pgadmin redis_data: - name: surfsense-redis + name: surfsense-dev-redis shared_temp: - name: surfsense-shared-temp + name: surfsense-dev-shared-temp diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 9fca4dfb5..ca20e3ed4 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -45,7 +45,7 @@ services: backend: image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest} ports: - - "${BACKEND_PORT:-8000}:8000" + - "${BACKEND_PORT:-8929}:8000" volumes: - shared_temp:/shared_tmp env_file: @@ -61,7 +61,7 @@ services: UNSTRUCTURED_HAS_PATCHED_LOOP: "1" ELECTRIC_DB_USER: ${ELECTRIC_DB_USER:-electric} ELECTRIC_DB_PASSWORD: ${ELECTRIC_DB_PASSWORD:-electric_password} - NEXT_FRONTEND_URL: ${NEXT_FRONTEND_URL:-http://localhost:${FRONTEND_PORT:-3000}} + NEXT_FRONTEND_URL: ${NEXT_FRONTEND_URL:-http://localhost:${FRONTEND_PORT:-3929}} # Daytona Sandbox – uncomment and set credentials to enable cloud code execution # DAYTONA_SANDBOX_ENABLED: "TRUE" # DAYTONA_API_KEY: ${DAYTONA_API_KEY:-} @@ -151,7 +151,7 @@ services: electric: image: electricsql/electric:1.4.10 ports: - - "${ELECTRIC_PORT:-5133}:3000" + - "${ELECTRIC_PORT:-5929}:3000" environment: DATABASE_URL: ${ELECTRIC_DATABASE_URL:-postgresql://${ELECTRIC_DB_USER:-electric}:${ELECTRIC_DB_PASSWORD:-electric_password}@${DB_HOST:-db}:${DB_PORT:-5432}/${DB_NAME:-surfsense}?sslmode=${DB_SSLMODE:-disable}} ELECTRIC_INSECURE: "true" @@ -169,10 +169,10 @@ services: frontend: image: ghcr.io/modsetter/surfsense-web:${SURFSENSE_VERSION:-latest} ports: - - "${FRONTEND_PORT:-3000}:3000" + - "${FRONTEND_PORT:-3929}:3000" environment: - NEXT_PUBLIC_FASTAPI_BACKEND_URL: ${NEXT_PUBLIC_FASTAPI_BACKEND_URL:-http://localhost:${BACKEND_PORT:-8000}} - NEXT_PUBLIC_ELECTRIC_URL: ${NEXT_PUBLIC_ELECTRIC_URL:-http://localhost:${ELECTRIC_PORT:-5133}} + NEXT_PUBLIC_FASTAPI_BACKEND_URL: ${NEXT_PUBLIC_FASTAPI_BACKEND_URL:-http://localhost:${BACKEND_PORT:-8929}} + NEXT_PUBLIC_ELECTRIC_URL: ${NEXT_PUBLIC_ELECTRIC_URL:-http://localhost:${ELECTRIC_PORT:-5929}} NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE: ${AUTH_TYPE:-LOCAL} NEXT_PUBLIC_ETL_SERVICE: ${ETL_SERVICE:-DOCLING} NEXT_PUBLIC_DEPLOYMENT_MODE: ${DEPLOYMENT_MODE:-self-hosted} diff --git a/docker/scripts/install.ps1 b/docker/scripts/install.ps1 index fc9c75a28..5f41ef7d6 100644 --- a/docker/scripts/install.ps1 +++ b/docker/scripts/install.ps1 @@ -321,9 +321,9 @@ Write-Host " OSS Alternative to NotebookLM for Teams [$versionDisplay]" Write-Host ("=" * 62) -ForegroundColor Cyan Write-Host "" -Write-Info " Frontend: http://localhost:3000" -Write-Info " Backend: http://localhost:8000" -Write-Info " API Docs: http://localhost:8000/docs" +Write-Info " Frontend: http://localhost:3929" +Write-Info " Backend: http://localhost:8929" +Write-Info " API Docs: http://localhost:8929/docs" Write-Info "" Write-Info " Config: $InstallDir\.env" Write-Info " Logs: cd $InstallDir; docker compose logs -f" diff --git a/docker/scripts/install.sh b/docker/scripts/install.sh index c4a0d5c9f..eb6aeb83d 100644 --- a/docker/scripts/install.sh +++ b/docker/scripts/install.sh @@ -304,9 +304,9 @@ _version_display="${_version_display:-latest}" printf " OSS Alternative to NotebookLM for Teams ${YELLOW}[%s]${NC}\n" "${_version_display}" printf "${CYAN}══════════════════════════════════════════════════════════════${NC}\n\n" -info " Frontend: http://localhost:3000" -info " Backend: http://localhost:8000" -info " API Docs: http://localhost:8000/docs" +info " Frontend: http://localhost:3929" +info " Backend: http://localhost:8929" +info " API Docs: http://localhost:8929/docs" info "" info " Config: ${INSTALL_DIR}/.env" info " Logs: cd ${INSTALL_DIR} && ${DC} logs -f" diff --git a/surfsense_backend/alembic/versions/105_add_chunks_document_id_index.py b/surfsense_backend/alembic/versions/105_add_chunks_document_id_index.py new file mode 100644 index 000000000..b2bd4edbc --- /dev/null +++ b/surfsense_backend/alembic/versions/105_add_chunks_document_id_index.py @@ -0,0 +1,41 @@ +"""105_add_chunks_document_id_index + +Revision ID: 105 +Revises: 104 +Create Date: 2026-03-09 + +Adds a B-tree index on chunks.document_id to speed up chunk lookups +during hybrid search (both retrievers fetch chunks by document_id +after RRF ranking selects the top documents). +""" + +from __future__ import annotations + +from collections.abc import Sequence + +from alembic import op + +revision: str = "105" +down_revision: str | None = "104" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + op.execute( + """ + DO $$ + BEGIN + IF NOT EXISTS ( + SELECT 1 FROM pg_indexes + WHERE tablename = 'chunks' AND indexname = 'ix_chunks_document_id' + ) THEN + CREATE INDEX ix_chunks_document_id ON chunks(document_id); + END IF; + END$$; + """ + ) + + +def downgrade() -> None: + op.execute("DROP INDEX IF EXISTS ix_chunks_document_id") diff --git a/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py b/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py index ec86c3ffa..b8b1527c7 100644 --- a/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py +++ b/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py @@ -8,6 +8,7 @@ The documentation is indexed at deployment time from MDX files and stored in dedicated tables (surfsense_docs_documents, surfsense_docs_chunks). """ +import asyncio import json from langchain_core.tools import tool @@ -100,7 +101,7 @@ async def search_surfsense_docs_async( Formatted string with relevant documentation content """ # Get embedding for the query - query_embedding = embed_text(query) + query_embedding = await asyncio.to_thread(embed_text, query) # Vector similarity search on chunks, joining with documents stmt = ( diff --git a/surfsense_backend/app/agents/new_chat/tools/shared_memory.py b/surfsense_backend/app/agents/new_chat/tools/shared_memory.py index ba69f1ce8..c826d808f 100644 --- a/surfsense_backend/app/agents/new_chat/tools/shared_memory.py +++ b/surfsense_backend/app/agents/new_chat/tools/shared_memory.py @@ -1,5 +1,6 @@ """Shared (team) memory backend for search-space-scoped AI context.""" +import asyncio import logging from typing import Any from uuid import UUID @@ -64,7 +65,7 @@ async def save_shared_memory( count = await get_shared_memory_count(db_session, search_space_id) if count >= MAX_MEMORIES_PER_SEARCH_SPACE: await delete_oldest_shared_memory(db_session, search_space_id) - embedding = embed_text(content) + embedding = await asyncio.to_thread(embed_text, content) row = SharedMemory( search_space_id=search_space_id, created_by_id=_to_uuid(created_by_id), @@ -108,7 +109,7 @@ async def recall_shared_memory( if category and category in valid_categories: stmt = stmt.where(SharedMemory.category == MemoryCategory(category)) if query: - query_embedding = embed_text(query) + query_embedding = await asyncio.to_thread(embed_text, query) stmt = stmt.order_by( SharedMemory.embedding.op("<=>")(query_embedding) ).limit(top_k) diff --git a/surfsense_backend/app/agents/new_chat/tools/user_memory.py b/surfsense_backend/app/agents/new_chat/tools/user_memory.py index 8aa516454..81e849856 100644 --- a/surfsense_backend/app/agents/new_chat/tools/user_memory.py +++ b/surfsense_backend/app/agents/new_chat/tools/user_memory.py @@ -9,6 +9,7 @@ Features: - recall_memory: Retrieve relevant memories using semantic search """ +import asyncio import logging from typing import Any from uuid import UUID @@ -177,8 +178,7 @@ def create_save_memory_tool( # Delete oldest memory to make room await delete_oldest_memory(db_session, user_id, search_space_id) - # Generate embedding for the memory - embedding = embed_text(content) + embedding = await asyncio.to_thread(embed_text, content) # Create new memory using ORM # The pgvector Vector column type handles embedding conversion automatically @@ -267,8 +267,7 @@ def create_recall_memory_tool( uuid_user_id = _to_uuid(user_id) if query: - # Semantic search using embeddings - query_embedding = embed_text(query) + query_embedding = await asyncio.to_thread(embed_text, query) # Build query with vector similarity stmt = ( diff --git a/surfsense_backend/app/config/__init__.py b/surfsense_backend/app/config/__init__.py index 1ddc54e2a..aaf77a54f 100644 --- a/surfsense_backend/app/config/__init__.py +++ b/surfsense_backend/app/config/__init__.py @@ -328,6 +328,7 @@ class Config: EMBEDDING_MODEL, **embedding_kwargs, ) + is_local_embedding_model = "://" not in (EMBEDDING_MODEL or "") chunker_instance = RecursiveChunker( chunk_size=getattr(embedding_model_instance, "max_seq_length", 512) ) diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index dc355dd94..04d1328a6 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -960,7 +960,7 @@ class Chunk(BaseModel, TimestampMixin): embedding = Column(Vector(config.embedding_model_instance.dimension)) document_id = Column( - Integer, ForeignKey("documents.id", ondelete="CASCADE"), nullable=False + Integer, ForeignKey("documents.id", ondelete="CASCADE"), nullable=False, index=True ) document = relationship("Document", back_populates="chunks") diff --git a/surfsense_backend/app/indexing_pipeline/document_embedder.py b/surfsense_backend/app/indexing_pipeline/document_embedder.py index adec24434..f545d9097 100644 --- a/surfsense_backend/app/indexing_pipeline/document_embedder.py +++ b/surfsense_backend/app/indexing_pipeline/document_embedder.py @@ -1,3 +1,3 @@ -from app.utils.document_converters import embed_text +from app.utils.document_converters import embed_text, embed_texts -__all__ = ["embed_text"] +__all__ = ["embed_text", "embed_texts"] diff --git a/surfsense_backend/app/indexing_pipeline/indexing_pipeline_service.py b/surfsense_backend/app/indexing_pipeline/indexing_pipeline_service.py index 9460f900c..0fadfc42f 100644 --- a/surfsense_backend/app/indexing_pipeline/indexing_pipeline_service.py +++ b/surfsense_backend/app/indexing_pipeline/indexing_pipeline_service.py @@ -9,7 +9,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from app.db import Chunk, Document, DocumentStatus from app.indexing_pipeline.connector_document import ConnectorDocument from app.indexing_pipeline.document_chunker import chunk_text -from app.indexing_pipeline.document_embedder import embed_text +from app.indexing_pipeline.document_embedder import embed_texts from app.indexing_pipeline.document_hashing import ( compute_content_hash, compute_unique_identifier_hash, @@ -195,25 +195,23 @@ class IndexingPipelineService: else: content = connector_doc.source_markdown - t_step = time.perf_counter() - embedding = embed_text(content) - perf.debug( - "[indexing] embed_text (summary) doc=%d in %.3fs", - document.id, - time.perf_counter() - t_step, - ) - await self.session.execute( delete(Chunk).where(Chunk.document_id == document.id) ) t_step = time.perf_counter() + chunk_texts = chunk_text( + connector_doc.source_markdown, + use_code_chunker=connector_doc.should_use_code_chunker, + ) + + texts_to_embed = [content, *chunk_texts] + embeddings = embed_texts(texts_to_embed) + summary_embedding, *chunk_embeddings = embeddings + chunks = [ - Chunk(content=text, embedding=embed_text(text)) - for text in chunk_text( - connector_doc.source_markdown, - use_code_chunker=connector_doc.should_use_code_chunker, - ) + Chunk(content=text, embedding=emb) + for text, emb in zip(chunk_texts, chunk_embeddings) ] perf.info( "[indexing] chunk+embed doc=%d chunks=%d in %.3fs", @@ -223,7 +221,7 @@ class IndexingPipelineService: ) document.content = content - document.embedding = embedding + document.embedding = summary_embedding attach_chunks_to_document(document, chunks) document.updated_at = datetime.now(UTC) document.status = DocumentStatus.ready() diff --git a/surfsense_backend/app/services/connector_service.py b/surfsense_backend/app/services/connector_service.py index 9db0cc9e1..870e175d3 100644 --- a/surfsense_backend/app/services/connector_service.py +++ b/surfsense_backend/app/services/connector_service.py @@ -264,7 +264,9 @@ class ConnectorService: # Reuse caller-provided embedding or compute once for both retrievers. if query_embedding is None: t_embed = time.perf_counter() - query_embedding = config.embedding_model_instance.embed(query_text) + query_embedding = await asyncio.to_thread( + config.embedding_model_instance.embed, query_text + ) perf.info( "[connector_svc] _combined_rrf embedding in %.3fs type=%s", time.perf_counter() - t_embed, diff --git a/surfsense_backend/app/utils/document_converters.py b/surfsense_backend/app/utils/document_converters.py index 8049b0de5..6a59990f5 100644 --- a/surfsense_backend/app/utils/document_converters.py +++ b/surfsense_backend/app/utils/document_converters.py @@ -55,6 +55,23 @@ def embed_text(text: str) -> np.ndarray: return config.embedding_model_instance.embed(truncate_for_embedding(text)) +def embed_texts(texts: list[str]) -> list[np.ndarray]: + """Batch-embed multiple texts in a single call. + + Each text is truncated to fit the model's context window before embedding. + For API-based models (``://`` in the model string) this uses + ``embed_batch`` to collapse many network round-trips into one. + For local models (SentenceTransformers) it falls back to sequential + ``embed`` calls to avoid padding overhead. + """ + if not texts: + return [] + truncated = [truncate_for_embedding(t) for t in texts] + if config.is_local_embedding_model: + return [config.embedding_model_instance.embed(t) for t in truncated] + return config.embedding_model_instance.embed_batch(truncated) + + def get_model_context_window(model_name: str) -> int: """Get the total context window size for a model (input + output tokens).""" try: @@ -209,12 +226,11 @@ async def create_document_chunks(content: str) -> list[Chunk]: Returns: List of Chunk objects with embeddings """ + chunk_texts = [c.text for c in config.chunker_instance.chunk(content)] + chunk_embeddings = embed_texts(chunk_texts) return [ - Chunk( - content=chunk.text, - embedding=embed_text(chunk.text), - ) - for chunk in config.chunker_instance.chunk(content) + Chunk(content=text, embedding=emb) + for text, emb in zip(chunk_texts, chunk_embeddings) ] diff --git a/surfsense_backend/pyproject.toml b/surfsense_backend/pyproject.toml index acad25330..909f9be5b 100644 --- a/surfsense_backend/pyproject.toml +++ b/surfsense_backend/pyproject.toml @@ -68,7 +68,6 @@ dependencies = [ "deepagents>=0.4.3", "langchain-daytona>=0.0.2", "pypandoc>=1.16.2", - "mmdc>=0.4.0", ] [dependency-groups] diff --git a/surfsense_backend/tests/integration/conftest.py b/surfsense_backend/tests/integration/conftest.py index 8b92a5aa8..4e43ea302 100644 --- a/surfsense_backend/tests/integration/conftest.py +++ b/surfsense_backend/tests/integration/conftest.py @@ -129,10 +129,12 @@ def patched_summarize_raises(monkeypatch) -> AsyncMock: @pytest.fixture -def patched_embed_text(monkeypatch) -> MagicMock: - mock = MagicMock(return_value=[0.1] * _EMBEDDING_DIM) +def patched_embed_texts(monkeypatch) -> MagicMock: + mock = MagicMock( + side_effect=lambda texts: [[0.1] * _EMBEDDING_DIM for _ in texts] + ) monkeypatch.setattr( - "app.indexing_pipeline.indexing_pipeline_service.embed_text", + "app.indexing_pipeline.indexing_pipeline_service.embed_texts", mock, ) return mock diff --git a/surfsense_backend/tests/integration/document_upload/conftest.py b/surfsense_backend/tests/integration/document_upload/conftest.py index 41639fc2f..45cfef7ac 100644 --- a/surfsense_backend/tests/integration/document_upload/conftest.py +++ b/surfsense_backend/tests/integration/document_upload/conftest.py @@ -265,8 +265,8 @@ def _mock_external_apis(monkeypatch): AsyncMock(return_value="Mocked summary."), ) monkeypatch.setattr( - "app.indexing_pipeline.indexing_pipeline_service.embed_text", - MagicMock(return_value=[0.1] * _EMBEDDING_DIM), + "app.indexing_pipeline.indexing_pipeline_service.embed_texts", + MagicMock(side_effect=lambda texts: [[0.1] * _EMBEDDING_DIM for _ in texts]), ) monkeypatch.setattr( "app.indexing_pipeline.indexing_pipeline_service.chunk_text", diff --git a/surfsense_backend/tests/integration/indexing_pipeline/adapters/test_file_upload_adapter.py b/surfsense_backend/tests/integration/indexing_pipeline/adapters/test_file_upload_adapter.py index fa0fe5787..9fc802aa6 100644 --- a/surfsense_backend/tests/integration/indexing_pipeline/adapters/test_file_upload_adapter.py +++ b/surfsense_backend/tests/integration/indexing_pipeline/adapters/test_file_upload_adapter.py @@ -8,7 +8,7 @@ pytestmark = pytest.mark.integration @pytest.mark.usefixtures( - "patched_summarize", "patched_embed_text", "patched_chunk_text" + "patched_summarize", "patched_embed_texts", "patched_chunk_text" ) async def test_sets_status_ready(db_session, db_search_space, db_user, mocker): """Document status is READY after successful indexing.""" @@ -31,7 +31,7 @@ async def test_sets_status_ready(db_session, db_search_space, db_user, mocker): @pytest.mark.usefixtures( - "patched_summarize", "patched_embed_text", "patched_chunk_text" + "patched_summarize", "patched_embed_texts", "patched_chunk_text" ) async def test_content_is_summary(db_session, db_search_space, db_user, mocker): """Document content is set to the LLM-generated summary.""" @@ -55,7 +55,7 @@ async def test_content_is_summary(db_session, db_search_space, db_user, mocker): @pytest.mark.usefixtures( - "patched_summarize", "patched_embed_text", "patched_chunk_text" + "patched_summarize", "patched_embed_texts", "patched_chunk_text" ) async def test_chunks_written_to_db(db_session, db_search_space, db_user, mocker): """Chunks derived from the source markdown are persisted in the DB.""" @@ -84,7 +84,7 @@ async def test_chunks_written_to_db(db_session, db_search_space, db_user, mocker @pytest.mark.usefixtures( - "patched_summarize_raises", "patched_embed_text", "patched_chunk_text" + "patched_summarize_raises", "patched_embed_texts", "patched_chunk_text" ) async def test_raises_on_indexing_failure(db_session, db_search_space, db_user, mocker): """RuntimeError is raised when the indexing step fails so the caller can fire a failure notification.""" @@ -107,7 +107,7 @@ async def test_raises_on_indexing_failure(db_session, db_search_space, db_user, @pytest.mark.usefixtures( - "patched_summarize", "patched_embed_text", "patched_chunk_text" + "patched_summarize", "patched_embed_texts", "patched_chunk_text" ) async def test_reindex_updates_content(db_session, db_search_space, db_user, mocker): """Document content is updated to the new summary after reindexing.""" @@ -136,7 +136,7 @@ async def test_reindex_updates_content(db_session, db_search_space, db_user, moc @pytest.mark.usefixtures( - "patched_summarize", "patched_embed_text", "patched_chunk_text" + "patched_summarize", "patched_embed_texts", "patched_chunk_text" ) async def test_reindex_updates_content_hash( db_session, db_search_space, db_user, mocker @@ -168,7 +168,7 @@ async def test_reindex_updates_content_hash( @pytest.mark.usefixtures( - "patched_summarize", "patched_embed_text", "patched_chunk_text" + "patched_summarize", "patched_embed_texts", "patched_chunk_text" ) async def test_reindex_sets_status_ready(db_session, db_search_space, db_user, mocker): """Document status is READY after successful reindexing.""" @@ -196,7 +196,7 @@ async def test_reindex_sets_status_ready(db_session, db_search_space, db_user, m assert DocumentStatus.is_state(document.status, DocumentStatus.READY) -@pytest.mark.usefixtures("patched_summarize", "patched_embed_text") +@pytest.mark.usefixtures("patched_summarize", "patched_embed_texts") async def test_reindex_replaces_chunks(db_session, db_search_space, db_user, mocker): """Reindexing replaces old chunks with new content rather than appending.""" mocker.patch( @@ -235,7 +235,7 @@ async def test_reindex_replaces_chunks(db_session, db_search_space, db_user, moc @pytest.mark.usefixtures( - "patched_summarize", "patched_embed_text", "patched_chunk_text" + "patched_summarize", "patched_embed_texts", "patched_chunk_text" ) async def test_reindex_clears_reindexing_flag( db_session, db_search_space, db_user, mocker @@ -266,7 +266,7 @@ async def test_reindex_clears_reindexing_flag( assert document.content_needs_reindexing is False -@pytest.mark.usefixtures("patched_embed_text", "patched_chunk_text") +@pytest.mark.usefixtures("patched_embed_texts", "patched_chunk_text") async def test_reindex_raises_on_failure(db_session, db_search_space, db_user, mocker): """RuntimeError is raised when reindexing fails so the caller can handle it.""" mocker.patch( diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_index_document.py b/surfsense_backend/tests/integration/indexing_pipeline/test_index_document.py index 2e8ee4d92..a82148f96 100644 --- a/surfsense_backend/tests/integration/indexing_pipeline/test_index_document.py +++ b/surfsense_backend/tests/integration/indexing_pipeline/test_index_document.py @@ -11,7 +11,7 @@ pytestmark = pytest.mark.integration @pytest.mark.usefixtures( - "patched_summarize", "patched_embed_text", "patched_chunk_text" + "patched_summarize", "patched_embed_texts", "patched_chunk_text" ) async def test_sets_status_ready( db_session, @@ -38,7 +38,7 @@ async def test_sets_status_ready( @pytest.mark.usefixtures( - "patched_summarize", "patched_embed_text", "patched_chunk_text" + "patched_summarize", "patched_embed_texts", "patched_chunk_text" ) async def test_content_is_summary_when_should_summarize_true( db_session, @@ -65,7 +65,7 @@ async def test_content_is_summary_when_should_summarize_true( @pytest.mark.usefixtures( - "patched_summarize", "patched_embed_text", "patched_chunk_text" + "patched_summarize", "patched_embed_texts", "patched_chunk_text" ) async def test_content_is_source_markdown_when_should_summarize_false( db_session, @@ -95,7 +95,7 @@ async def test_content_is_source_markdown_when_should_summarize_false( @pytest.mark.usefixtures( - "patched_summarize", "patched_embed_text", "patched_chunk_text" + "patched_summarize", "patched_embed_texts", "patched_chunk_text" ) async def test_chunks_written_to_db( db_session, @@ -123,7 +123,7 @@ async def test_chunks_written_to_db( @pytest.mark.usefixtures( - "patched_summarize", "patched_embed_text", "patched_chunk_text" + "patched_summarize", "patched_embed_texts", "patched_chunk_text" ) async def test_embedding_written_to_db( db_session, @@ -151,7 +151,7 @@ async def test_embedding_written_to_db( @pytest.mark.usefixtures( - "patched_summarize", "patched_embed_text", "patched_chunk_text" + "patched_summarize", "patched_embed_texts", "patched_chunk_text" ) async def test_updated_at_advances_after_indexing( db_session, @@ -183,7 +183,7 @@ async def test_updated_at_advances_after_indexing( @pytest.mark.usefixtures( - "patched_summarize", "patched_embed_text", "patched_chunk_text" + "patched_summarize", "patched_embed_texts", "patched_chunk_text" ) async def test_no_llm_falls_back_to_source_markdown( db_session, @@ -214,7 +214,7 @@ async def test_no_llm_falls_back_to_source_markdown( @pytest.mark.usefixtures( - "patched_summarize", "patched_embed_text", "patched_chunk_text" + "patched_summarize", "patched_embed_texts", "patched_chunk_text" ) async def test_fallback_summary_used_when_llm_unavailable( db_session, @@ -245,7 +245,7 @@ async def test_fallback_summary_used_when_llm_unavailable( @pytest.mark.usefixtures( - "patched_summarize", "patched_embed_text", "patched_chunk_text" + "patched_summarize", "patched_embed_texts", "patched_chunk_text" ) async def test_reindex_replaces_old_chunks( db_session, @@ -282,7 +282,7 @@ async def test_reindex_replaces_old_chunks( @pytest.mark.usefixtures( - "patched_summarize_raises", "patched_embed_text", "patched_chunk_text" + "patched_summarize_raises", "patched_embed_texts", "patched_chunk_text" ) async def test_llm_error_sets_status_failed( db_session, @@ -309,7 +309,7 @@ async def test_llm_error_sets_status_failed( @pytest.mark.usefixtures( - "patched_summarize_raises", "patched_embed_text", "patched_chunk_text" + "patched_summarize_raises", "patched_embed_texts", "patched_chunk_text" ) async def test_llm_error_leaves_no_partial_data( db_session, diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_prepare_for_indexing.py b/surfsense_backend/tests/integration/indexing_pipeline/test_prepare_for_indexing.py index 837b02c9f..776180b9a 100644 --- a/surfsense_backend/tests/integration/indexing_pipeline/test_prepare_for_indexing.py +++ b/surfsense_backend/tests/integration/indexing_pipeline/test_prepare_for_indexing.py @@ -33,7 +33,7 @@ async def test_new_document_is_persisted_with_pending_status( @pytest.mark.usefixtures( - "patched_summarize", "patched_embed_text", "patched_chunk_text" + "patched_summarize", "patched_embed_texts", "patched_chunk_text" ) async def test_unchanged_ready_document_is_skipped( db_session, @@ -56,7 +56,7 @@ async def test_unchanged_ready_document_is_skipped( @pytest.mark.usefixtures( - "patched_summarize", "patched_embed_text", "patched_chunk_text" + "patched_summarize", "patched_embed_texts", "patched_chunk_text" ) async def test_title_only_change_updates_title_in_db( db_session, @@ -339,7 +339,7 @@ async def test_same_content_from_different_source_is_skipped( @pytest.mark.usefixtures( - "patched_summarize_raises", "patched_embed_text", "patched_chunk_text" + "patched_summarize_raises", "patched_embed_texts", "patched_chunk_text" ) async def test_failed_document_with_unchanged_content_is_requeued( db_session, diff --git a/surfsense_backend/uv.lock b/surfsense_backend/uv.lock index 41f312154..535e0c43f 100644 --- a/surfsense_backend/uv.lock +++ b/surfsense_backend/uv.lock @@ -4209,17 +4209,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ad/3f/3d42e9a78fe5edf792a83c074b13b9b770092a4fbf3462872f4303135f09/ml_dtypes-0.5.4-cp314-cp314t-win_arm64.whl", hash = "sha256:11942cbf2cf92157db91e5022633c0d9474d4dfd813a909383bd23ce828a4b7d", size = 168825 }, ] -[[package]] -name = "mmdc" -version = "0.4.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "phasma" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/14/39/3179a0e6684f91aae3f41cb80d99b97d40177f0fb8a08a971bb45ae4c115/mmdc-0.4.1-py3-none-any.whl", hash = "sha256:84bccd4ab7a473c40511043c243242ca88fbad806351f8161765589117afbbd3", size = 472040 }, -] - [[package]] name = "mmh3" version = "5.2.0" @@ -5430,17 +5419,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fb/81/f457d6d361e04d061bef413749a6e1ab04d98cfeec6d8abcfe40184750f3/pgvector-0.3.6-py3-none-any.whl", hash = "sha256:f6c269b3c110ccb7496bac87202148ed18f34b390a0189c783e351062400a75a", size = 24880 }, ] -[[package]] -name = "phasma" -version = "0.5.0" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d9/f2/c6f39c7a3b2ddcd6fea71c8158a27480a498de7e85d0165d44fb15087062/phasma-0.5.0-py3-none-macosx_10_9_universal2.whl", hash = "sha256:d209e9c2720fbc9ea1e07bd0ae67d1b90075a88886a7438121a084d07bda56c7", size = 17231073 }, - { url = "https://files.pythonhosted.org/packages/9d/46/d01f3b175a7bdf6cdfcc6b688aca00d4b77a545ab8ac00b29d696e5a915c/phasma-0.5.0-py3-none-manylinux_2_17_i686.whl", hash = "sha256:cd3772db3bc52ac8478fc23b82ea7636c1889b76f2394ceb03bdc1efac01e348", size = 27270663 }, - { url = "https://files.pythonhosted.org/packages/87/76/24065a7ab6e771575a74e071dfe39a17a9447ab8cd2e0b92ea31eeb60b70/phasma-0.5.0-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:768365d8035f48f574b6496924aaa33ee8b52d2862ccfae8255d7e37d4ceb340", size = 26241614 }, - { url = "https://files.pythonhosted.org/packages/3b/04/809333d3ad0676c982fa016df88e94d3830bf1413b07de679ae74930c9fa/phasma-0.5.0-py3-none-win_amd64.whl", hash = "sha256:99dc29b11d55b9574772c5a7c0feb407704e8e20f2f534791c3c125a5dda4cd9", size = 18220978 }, -] - [[package]] name = "phonemizer-fork" version = "3.3.2" @@ -7910,7 +7888,6 @@ dependencies = [ { name = "markdown" }, { name = "markdownify" }, { name = "mcp" }, - { name = "mmdc" }, { name = "notion-client" }, { name = "numpy" }, { name = "pgvector" }, @@ -7988,7 +7965,6 @@ requires-dist = [ { name = "markdown", specifier = ">=3.7" }, { name = "markdownify", specifier = ">=0.14.1" }, { name = "mcp", specifier = ">=1.25.0" }, - { name = "mmdc", specifier = ">=0.4.0" }, { name = "notion-client", specifier = ">=2.3.0" }, { name = "numpy", specifier = ">=1.24.0" }, { name = "pgvector", specifier = ">=0.3.6" }, diff --git a/surfsense_web/app/dashboard/[search_space_id]/more-pages/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/more-pages/page.tsx index 6779dc5d0..27c451d2f 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/more-pages/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/more-pages/page.tsx @@ -9,7 +9,14 @@ import { useEffect } from "react"; import { toast } from "sonner"; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; -import { Card, CardContent, CardDescription, CardFooter, CardHeader, CardTitle } from "@/components/ui/card"; +import { + Card, + CardContent, + CardDescription, + CardFooter, + CardHeader, + CardTitle, +} from "@/components/ui/card"; import { Dialog, DialogContent, @@ -108,37 +115,26 @@ export default function MorePagesPage() {
- {task.completed ? ( - - ) : ( - - )} + {task.completed ? : }

{task.title}

-

- +{task.pages_reward} pages -

+

+{task.pages_reward} pages

- )} - - - {testResult.message} - {showDetails && testResult.tools.length > 0 && ( -
-

Available tools:

- -
- )} -
+
+ + {testResult.status === "success" ? "Connection Successful" : "Connection Failed"} + + {testResult.tools.length > 0 && ( + + )}
+ + {testResult.message} + {showDetails && testResult.tools.length > 0 && ( +
+

Available tools:

+ +
+ )} +
)} diff --git a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/obsidian-connect-form.tsx b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/obsidian-connect-form.tsx index 3c4b64090..08c1dd30c 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/obsidian-connect-form.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/obsidian-connect-form.tsx @@ -102,15 +102,13 @@ export const ObsidianConnectForm: FC = ({ onSubmit, isSubmitti return (
- - -
- Self-Hosted Only - - This connector requires direct file system access and only works with self-hosted - SurfSense installations. - -
+ + + Self-Hosted Only + + This connector requires direct file system access and only works with self-hosted + SurfSense installations. +
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/searxng-connect-form.tsx b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/searxng-connect-form.tsx index ecf219924..5ff54fb9e 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/searxng-connect-form.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/searxng-connect-form.tsx @@ -123,23 +123,21 @@ export const SearxngConnectForm: FC = ({ onSubmit, isSubmittin return (
- - -
- SearxNG Instance Required - - You need access to a running SearxNG instance. Refer to the{" "} - - SearxNG installation guide - {" "} - for setup instructions. If your instance requires an API key, include it below. - -
+ + + SearxNG Instance Required + + You need access to a running SearxNG instance. Refer to the{" "} + + SearxNG installation guide + {" "} + for setup instructions. If your instance requires an API key, include it below. +
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/tavily-api-connect-form.tsx b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/tavily-api-connect-form.tsx index a8032e11a..57d183d44 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/tavily-api-connect-form.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/tavily-api-connect-form.tsx @@ -70,22 +70,20 @@ export const TavilyApiConnectForm: FC = ({ onSubmit, isSubmitt return (
- - -
- API Key Required - - You'll need a Tavily API key to use this connector. You can get one by signing up at{" "} - - tavily.com - - -
+ + + API Key Required + + You'll need a Tavily API key to use this connector. You can get one by signing up at{" "} + + tavily.com + +
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/circleback-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/circleback-config.tsx index 568b47d09..3ab9cba53 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/circleback-config.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/circleback-config.tsx @@ -166,7 +166,7 @@ export const CirclebackConfig: FC = ({ connector, onNameC Configuration Instructions - + Configure this URL in Circleback Settings → Automations → Create automation → Send webhook request. The webhook will automatically send meeting notes, transcripts, and action items to this search space. diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-drive-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-drive-config.tsx index 66ea22e92..ce6845c77 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-drive-config.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-drive-config.tsx @@ -1,6 +1,8 @@ "use client"; import { + ChevronDown, + ChevronRight, File, FileSpreadsheet, FileText, @@ -12,7 +14,6 @@ import { import type { FC } from "react"; import { useEffect, useState } from "react"; import { ComposioDriveFolderTree } from "@/components/connectors/composio-drive-folder-tree"; -import { Button } from "@/components/ui/button"; import { Label } from "@/components/ui/label"; import { Select, @@ -101,9 +102,11 @@ export const ComposioDriveConfig: FC = ({ const [selectedFolders, setSelectedFolders] = useState(existingFolders); const [selectedFiles, setSelectedFiles] = useState(existingFiles); - const [showFolderSelector, setShowFolderSelector] = useState(false); const [indexingOptions, setIndexingOptions] = useState(existingIndexingOptions); + const [isEditMode] = useState(() => existingFolders.length > 0 || existingFiles.length > 0); + const [isFolderTreeOpen, setIsFolderTreeOpen] = useState(!isEditMode); + useEffect(() => { const folders = (connector.config?.selected_folders as SelectedFolder[] | undefined) || []; const files = (connector.config?.selected_files as SelectedFolder[] | undefined) || []; @@ -232,8 +235,21 @@ export const ComposioDriveConfig: FC = ({
)} - {showFolderSelector ? ( -
+ {isEditMode ? ( +
+ + {isFolderTreeOpen && ( = ({ selectedFiles={selectedFiles} onSelectFiles={handleSelectFiles} /> - -
- ) : ( - - )} + )} +
+ ) : ( + + )}
{/* Indexing Options */} diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/google-drive-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/google-drive-config.tsx index 500ee133a..3f38eb1e6 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/google-drive-config.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/google-drive-config.tsx @@ -1,6 +1,8 @@ "use client"; import { + ChevronDown, + ChevronRight, File, FileSpreadsheet, FileText, diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/mcp-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/mcp-config.tsx index ac450677e..38d60d7bd 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/mcp-config.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/mcp-config.tsx @@ -235,55 +235,51 @@ export const MCPConfig: FC = ({ connector, onConfigChange, onNam ) : ( )} -
-
- - {testResult.status === "success" - ? "Connection Successful" - : "Connection Failed"} - - {testResult.tools.length > 0 && ( - - )} -
- - {testResult.message} - {showDetails && testResult.tools.length > 0 && ( -
-

Available tools:

-
    - {testResult.tools.map((tool) => ( -
  • {tool.name}
  • - ))} -
-
- )} -
+
+ + {testResult.status === "success" ? "Connection Successful" : "Connection Failed"} + + {testResult.tools.length > 0 && ( + + )}
+ + {testResult.message} + {showDetails && testResult.tools.length > 0 && ( +
+

Available tools:

+
    + {testResult.tools.map((tool) => ( +
  • {tool.name}
  • + ))} +
+
+ )} +
)}
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/webcrawler-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/webcrawler-config.tsx index 22b14842b..164d78e09 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/webcrawler-config.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/webcrawler-config.tsx @@ -63,7 +63,8 @@ export const WebcrawlerConfig: FC = ({ connector, onConfig

- Want a quick answer from a webpage without indexing it? Just paste the URL directly into the chat instead. + Want a quick answer from a webpage without indexing it? Just paste the URL directly into + the chat instead.

@@ -123,9 +124,9 @@ export const WebcrawlerConfig: FC = ({ connector, onConfig
{/* Info Alert */} - - - + + + Configuration is saved when you start indexing. You can update these settings anytime from the connector management page. diff --git a/surfsense_web/components/assistant-ui/connector-popup/views/youtube-crawler-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/views/youtube-crawler-view.tsx index c8e565df5..7ec85f4d3 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/views/youtube-crawler-view.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/views/youtube-crawler-view.tsx @@ -280,9 +280,7 @@ export const YouTubeCrawlerView: FC = ({ searchSpaceId,
-

- {t("chat_tip")} -

+

{t("chat_tip")}

diff --git a/surfsense_web/components/assistant-ui/thread.tsx b/surfsense_web/components/assistant-ui/thread.tsx index 4281ca2a7..51a9b9275 100644 --- a/surfsense_web/components/assistant-ui/thread.tsx +++ b/surfsense_web/components/assistant-ui/thread.tsx @@ -14,7 +14,6 @@ import { AlertCircle, ArrowDownIcon, ArrowUpIcon, - Cable, CheckIcon, ChevronLeftIcon, ChevronRightIcon, @@ -23,17 +22,20 @@ import { PlusIcon, RefreshCwIcon, SquareIcon, - SquareLibrary, + Unplug, + Upload, + X, } from "lucide-react"; import { useParams } from "next/navigation"; import { type FC, useCallback, useContext, useEffect, useMemo, useRef, useState } from "react"; import { createPortal } from "react-dom"; import { chatSessionStateAtom } from "@/atoms/chat/chat-session-state.atom"; -import { showCommentsGutterAtom } from "@/atoms/chat/current-thread.atom"; import { mentionedDocumentsAtom, sidebarSelectedDocumentsAtom, } from "@/atoms/chat/mentioned-documents.atom"; +import { connectorDialogOpenAtom } from "@/atoms/connector-dialog/connector-dialog.atoms"; +import { connectorsAtom } from "@/atoms/connectors/connector-query.atoms"; import { documentsSidebarOpenAtom } from "@/atoms/documents/ui.atoms"; import { membersAtom } from "@/atoms/members/members-query.atoms"; import { @@ -48,6 +50,7 @@ import { ConnectorIndicator, type ConnectorIndicatorHandle, } from "@/components/assistant-ui/connector-popup"; +import { useDocumentUploadDialog } from "@/components/assistant-ui/document-upload-popup"; import { InlineMentionEditor, type InlineMentionEditorRef, @@ -60,13 +63,21 @@ import { import { ToolFallback } from "@/components/assistant-ui/tool-fallback"; import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button"; import { UserMessage } from "@/components/assistant-ui/user-message"; +import { SLIDEOUT_PANEL_OPENED_EVENT } from "@/components/layout/ui/sidebar/SidebarSlideOutPanel"; import { DocumentMentionPicker, type DocumentMentionPickerRef, } from "@/components/new-chat/document-mention-picker"; import type { ThinkingStep } from "@/components/tool-ui/deepagent-thinking"; +import { Avatar, AvatarFallback, AvatarGroup } from "@/components/ui/avatar"; import { Button } from "@/components/ui/button"; -import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; +import { + DropdownMenu, + DropdownMenuContent, + DropdownMenuItem, + DropdownMenuTrigger, +} from "@/components/ui/dropdown-menu"; +import { getConnectorIcon } from "@/contracts/enums/connectorIcons"; import type { Document } from "@/contracts/types/document.types"; import { useBatchCommentsPreload } from "@/hooks/use-comments"; import { useCommentsElectric } from "@/hooks/use-comments-electric"; @@ -95,8 +106,6 @@ export const Thread: FC = ({ messageThinkingSteps = new Map() }) => }; const ThreadContent: FC = () => { - const showGutter = useAtomValue(showCommentsGutterAtom); - return ( { > thread.isEmpty}> @@ -228,6 +234,72 @@ const ThreadWelcome: FC = () => { ); }; +const BANNER_CONNECTORS = [ + { type: "GOOGLE_DRIVE_CONNECTOR", label: "Google Drive" }, + { type: "GOOGLE_GMAIL_CONNECTOR", label: "Gmail" }, + { type: "NOTION_CONNECTOR", label: "Notion" }, + { type: "YOUTUBE_CONNECTOR", label: "YouTube" }, + { type: "SLACK_CONNECTOR", label: "Slack" }, +] as const; + +const BANNER_DISMISSED_KEY = "surfsense-connect-tools-banner-dismissed"; + +const ConnectToolsBanner: FC = () => { + const { data: connectors } = useAtomValue(connectorsAtom); + const setConnectorDialogOpen = useSetAtom(connectorDialogOpenAtom); + const [dismissed, setDismissed] = useState(() => { + if (typeof window === "undefined") return false; + return localStorage.getItem(BANNER_DISMISSED_KEY) === "true"; + }); + + const hasConnectors = (connectors?.length ?? 0) > 0; + + if (dismissed || hasConnectors) return null; + + const handleDismiss = (e: React.MouseEvent) => { + e.stopPropagation(); + setDismissed(true); + localStorage.setItem(BANNER_DISMISSED_KEY, "true"); + }; + + return ( +
+ +
+ ); +}; + const Composer: FC = () => { // Document mention state (atoms persist across component remounts) const [mentionedDocuments, setMentionedDocuments] = useAtom(mentionedDocumentsAtom); @@ -312,6 +384,16 @@ const Composer: FC = () => { } }, [isThreadEmpty]); + // Close document picker when a slide-out panel (inbox, shared/private chats) opens + useEffect(() => { + const handler = () => { + setShowDocumentPopover(false); + setMentionQuery(""); + }; + window.addEventListener(SLIDEOUT_PANEL_OPENED_EVENT, handler); + return () => window.removeEventListener(SLIDEOUT_PANEL_OPENED_EVENT, handler); + }, []); + // Sync editor text with assistant-ui composer runtime const handleEditorChange = useCallback( (text: string) => { @@ -425,9 +507,9 @@ const Composer: FC = () => { currentUserId={currentUser?.id ?? null} members={members ?? []} /> -
+
{/* Inline editor with @mention support */} -
+
{ document.body )} +
); @@ -481,7 +564,9 @@ const ComposerAction: FC = ({ isBlockedByOtherUser = false const setDocumentsSidebarOpen = useSetAtom(documentsSidebarOpenAtom); const connectorRef = useRef(null); const [addMenuOpen, setAddMenuOpen] = useState(false); - + const { openDialog: openUploadDialog } = useDocumentUploadDialog(); + const { data: connectors } = useAtomValue(connectorsAtom); + const connectorCount = connectors?.length ?? 0; const isComposerTextEmpty = useAssistantState(({ composer }) => { const text = composer.text?.trim() || ""; return text.length === 0; @@ -506,55 +591,61 @@ const ComposerAction: FC = ({ isBlockedByOtherUser = false const isSendDisabled = isComposerEmpty || !hasModelConfigured || isBlockedByOtherUser; return ( -
+
- - + + - - + -
- - -
-
-
+ { + setAddMenuOpen(false); + openUploadDialog(); + }} + > + + Upload files + + { + setAddMenuOpen(false); + connectorRef.current?.open(); + }} + > + + {connectorCount > 0 ? "Manage tools" : "Connect your tools"} + {connectorCount > 0 && ( + {connectorCount} + )} + + + + {sidebarDocs.length > 0 && ( + + )}
{!hasModelConfigured && ( @@ -565,16 +656,6 @@ const ComposerAction: FC = ({ isBlockedByOtherUser = false )}
- {sidebarDocs.length > 0 && ( - - )} - !thread.isRunning}> ([]); @@ -257,44 +258,46 @@ export function CommentComposer({ }, [adjustTextareaHeight]); return ( -
- !open && closeMentionPicker()} - modal={false} - > - -