Merge pull request #1476 from MODSetter/dev

feat(0.0.27): bug fixes and optimizations
This commit is contained in:
Rohan Verma 2026-06-09 23:10:44 -07:00 committed by GitHub
commit 4c29938528
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
1169 changed files with 30332 additions and 38144 deletions

View file

@ -5,6 +5,9 @@ on:
branches: branches:
- main - main
- dev - dev
tags:
- 'v*'
- 'beta-v*'
paths: paths:
- 'surfsense_backend/**' - 'surfsense_backend/**'
- 'surfsense_web/**' - 'surfsense_web/**'
@ -24,11 +27,13 @@ permissions:
packages: write packages: write
jobs: jobs:
tag_release: compute_version:
runs-on: ubuntu-latest runs-on: ubuntu-latest
if: github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event_name == 'workflow_dispatch' if: github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event_name == 'workflow_dispatch' || startsWith(github.ref, 'refs/tags/v') || startsWith(github.ref, 'refs/tags/beta-v')
outputs: outputs:
new_tag: ${{ steps.tag_version.outputs.next_version }} new_tag: ${{ steps.tag_version.outputs.next_version }}
commit_sha: ${{ steps.tag_version.outputs.commit_sha }}
is_release_tag: ${{ steps.tag_version.outputs.is_release_tag }}
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@v6 uses: actions/checkout@v6
@ -37,57 +42,65 @@ jobs:
ref: ${{ github.event.inputs.branch }} ref: ${{ github.event.inputs.branch }}
token: ${{ secrets.GITHUB_TOKEN }} token: ${{ secrets.GITHUB_TOKEN }}
# Compute-only: tag is pushed by finalize_release after everything succeeds.
- name: Read app version and calculate next Docker build version - name: Read app version and calculate next Docker build version
id: tag_version id: tag_version
run: | run: |
APP_VERSION=$(tr -d '[:space:]' < VERSION) if [[ "$GITHUB_REF" == refs/tags/beta-v* ]]; then
echo "App version from VERSION file: $APP_VERSION" VERSION="${GITHUB_REF#refs/tags/beta-v}"
NEXT_VERSION="beta-${VERSION}"
IS_RELEASE_TAG="true"
if [ -z "$APP_VERSION" ]; then if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9.]+)?$'; then
echo "Error: Could not read version from VERSION file" echo "::error::Version '$VERSION' is not valid semver (expected X.Y.Z). Fix your tag name."
exit 1 exit 1
fi fi
git fetch --tags echo "Docker beta release version from git tag: $NEXT_VERSION"
elif [[ "$GITHUB_REF" == refs/tags/v* ]]; then
NEXT_VERSION="${GITHUB_REF#refs/tags/v}"
IS_RELEASE_TAG="true"
LATEST_BUILD_TAG=$(git tag --list "${APP_VERSION}.*" --sort='-v:refname' | head -n 1) if ! echo "$NEXT_VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9.]+)?$'; then
echo "::error::Version '$NEXT_VERSION' is not valid semver (expected X.Y.Z). Fix your tag name."
exit 1
fi
if [ -z "$LATEST_BUILD_TAG" ]; then echo "Docker release version from git tag: $NEXT_VERSION"
echo "No previous Docker build tag found for version ${APP_VERSION}. Starting with ${APP_VERSION}.1"
NEXT_VERSION="${APP_VERSION}.1"
else else
echo "Latest Docker build tag found: $LATEST_BUILD_TAG" APP_VERSION=$(tr -d '[:space:]' < VERSION)
BUILD_NUMBER=$(echo "$LATEST_BUILD_TAG" | rev | cut -d. -f1 | rev) echo "App version from VERSION file: $APP_VERSION"
NEXT_BUILD=$((BUILD_NUMBER + 1))
NEXT_VERSION="${APP_VERSION}.${NEXT_BUILD}" if [ -z "$APP_VERSION" ]; then
echo "Error: Could not read version from VERSION file"
exit 1
fi
git fetch --tags
LATEST_BUILD_TAG=$(git tag --list "${APP_VERSION}.*" --sort='-v:refname' | head -n 1)
if [ -z "$LATEST_BUILD_TAG" ]; then
echo "No previous Docker build tag found for version ${APP_VERSION}. Starting with ${APP_VERSION}.1"
NEXT_VERSION="${APP_VERSION}.1"
else
echo "Latest Docker build tag found: $LATEST_BUILD_TAG"
BUILD_NUMBER=$(echo "$LATEST_BUILD_TAG" | rev | cut -d. -f1 | rev)
NEXT_BUILD=$((BUILD_NUMBER + 1))
NEXT_VERSION="${APP_VERSION}.${NEXT_BUILD}"
fi
IS_RELEASE_TAG="false"
echo "Calculated next Docker version: $NEXT_VERSION"
fi fi
echo "Calculated next Docker version: $NEXT_VERSION"
echo "next_version=$NEXT_VERSION" >> $GITHUB_OUTPUT echo "next_version=$NEXT_VERSION" >> $GITHUB_OUTPUT
echo "commit_sha=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT
- name: Create and Push Tag echo "is_release_tag=$IS_RELEASE_TAG" >> $GITHUB_OUTPUT
run: |
git config --global user.name 'github-actions[bot]'
git config --global user.email 'github-actions[bot]@users.noreply.github.com'
NEXT_TAG="${{ steps.tag_version.outputs.next_version }}"
COMMIT_SHA=$(git rev-parse HEAD)
echo "Tagging commit $COMMIT_SHA with $NEXT_TAG"
git tag -a "$NEXT_TAG" -m "Docker build $NEXT_TAG"
echo "Pushing tag $NEXT_TAG to origin"
git push origin "$NEXT_TAG"
- name: Verify Tag Push
run: |
echo "Checking if tag ${{ steps.tag_version.outputs.next_version }} exists remotely..."
sleep 5
git ls-remote --tags origin | grep "refs/tags/${{ steps.tag_version.outputs.next_version }}" || (echo "Tag push verification failed!" && exit 1)
echo "Tag successfully pushed."
build: build:
needs: tag_release needs: compute_version
if: always() && (needs.tag_release.result == 'success' || needs.tag_release.result == 'skipped') if: always() && (needs.compute_version.result == 'success' || needs.compute_version.result == 'skipped')
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
permissions: permissions:
packages: write packages: write
@ -97,6 +110,12 @@ jobs:
matrix: matrix:
platform: [linux/amd64, linux/arm64] platform: [linux/amd64, linux/arm64]
image: [backend, web] image: [backend, web]
variant: [cpu, cuda, cuda126]
exclude:
- image: web
variant: cuda
- image: web
variant: cuda126
include: include:
- platform: linux/amd64 - platform: linux/amd64
suffix: amd64 suffix: amd64
@ -114,6 +133,18 @@ jobs:
context: ./surfsense_web context: ./surfsense_web
file: ./surfsense_web/Dockerfile file: ./surfsense_web/Dockerfile
target: runner target: runner
- variant: cpu
tag_suffix: ""
use_cuda: "false"
cuda_extra: cpu
- variant: cuda
tag_suffix: "-cuda"
use_cuda: "true"
cuda_extra: cu128
- variant: cuda126
tag_suffix: "-cuda126"
use_cuda: "true"
cuda_extra: cu126
env: env:
REGISTRY_IMAGE: ghcr.io/${{ github.repository_owner }}/${{ matrix.name }} REGISTRY_IMAGE: ghcr.io/${{ github.repository_owner }}/${{ matrix.name }}
@ -149,7 +180,7 @@ jobs:
sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
docker system prune -af docker system prune -af
- name: Build and push by digest ${{ matrix.name }} (${{ matrix.suffix }}) - name: Build and push by digest ${{ matrix.name }} (${{ matrix.variant }}, ${{ matrix.suffix }})
id: build id: build
uses: docker/build-push-action@v7 uses: docker/build-push-action@v7
with: with:
@ -160,10 +191,14 @@ jobs:
tags: ${{ steps.image.outputs.name }} tags: ${{ steps.image.outputs.name }}
outputs: type=image,push-by-digest=true,name-canonical=true,push=true outputs: type=image,push-by-digest=true,name-canonical=true,push=true
platforms: ${{ matrix.platform }} platforms: ${{ matrix.platform }}
cache-from: type=gha,scope=${{ matrix.image }}-${{ matrix.suffix }} cache-from: type=registry,ref=${{ steps.image.outputs.name }}:buildcache-${{ matrix.variant }}-${{ matrix.suffix }}
cache-to: type=gha,mode=max,scope=${{ matrix.image }}-${{ matrix.suffix }} cache-to: type=registry,ref=${{ steps.image.outputs.name }}:buildcache-${{ matrix.variant }}-${{ matrix.suffix }},mode=max,image-manifest=true,oci-mediatypes=true
secrets: |
HF_TOKEN=${{ secrets.HF_TOKEN }}
provenance: false provenance: false
build-args: | build-args: |
${{ matrix.image == 'backend' && format('USE_CUDA={0}', matrix.use_cuda) || '' }}
${{ matrix.image == 'backend' && format('CUDA_EXTRA={0}', matrix.cuda_extra) || '' }}
${{ matrix.image == 'web' && 'NEXT_PUBLIC_FASTAPI_BACKEND_URL=__NEXT_PUBLIC_FASTAPI_BACKEND_URL__' || '' }} ${{ matrix.image == 'web' && 'NEXT_PUBLIC_FASTAPI_BACKEND_URL=__NEXT_PUBLIC_FASTAPI_BACKEND_URL__' || '' }}
${{ matrix.image == 'web' && 'NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=__NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE__' || '' }} ${{ matrix.image == 'web' && 'NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=__NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE__' || '' }}
${{ matrix.image == 'web' && 'NEXT_PUBLIC_ETL_SERVICE=__NEXT_PUBLIC_ETL_SERVICE__' || '' }} ${{ matrix.image == 'web' && 'NEXT_PUBLIC_ETL_SERVICE=__NEXT_PUBLIC_ETL_SERVICE__' || '' }}
@ -179,15 +214,47 @@ jobs:
- name: Upload digest - name: Upload digest
uses: actions/upload-artifact@v7 uses: actions/upload-artifact@v7
with: with:
name: digests-${{ matrix.image }}-${{ matrix.suffix }} name: digests-${{ matrix.image }}-${{ matrix.variant }}-${{ matrix.suffix }}
path: /tmp/digests/* path: /tmp/digests/*
if-no-files-found: error if-no-files-found: error
retention-days: 1 retention-days: 1
# Release gate: require both arches for every variant, else block publishing.
# Release-only; skipped on dev so the tolerant create_manifest path is kept.
verify_digests:
runs-on: ubuntu-latest
needs: [compute_version, build]
if: ${{ always() && needs.compute_version.result == 'success' && needs.compute_version.outputs.new_tag != '' }}
steps:
- name: Download all digests
uses: actions/download-artifact@v8
with:
pattern: digests-*
path: /tmp/digests
merge-multiple: false
- name: Require both arches for every required variant
run: |
fail=0
check() {
c=$(find /tmp/digests -type f -path "*/digests-$1-*/*" 2>/dev/null | wc -l | tr -d ' ')
if [ "$c" -lt 2 ]; then
echo "::error::$1 has $c/2 arch digests — blocking release"
fail=1
else
echo "OK: $1 ($c/2)"
fi
}
check backend-cpu
check backend-cuda
check backend-cuda126
check web-cpu
[ "$fail" -eq 0 ] || exit 1
create_manifest: create_manifest:
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: [tag_release, build] needs: [compute_version, build, verify_digests]
if: always() && needs.build.result == 'success' if: ${{ !cancelled() && needs.verify_digests.result != 'failure' }}
permissions: permissions:
packages: write packages: write
contents: read contents: read
@ -197,8 +264,20 @@ jobs:
include: include:
- name: surfsense-backend - name: surfsense-backend
image: backend image: backend
variant: cpu
tag_suffix: ""
- name: surfsense-backend
image: backend
variant: cuda
tag_suffix: "-cuda"
- name: surfsense-backend
image: backend
variant: cuda126
tag_suffix: "-cuda126"
- name: surfsense-web - name: surfsense-web
image: web image: web
variant: cpu
tag_suffix: ""
env: env:
REGISTRY_IMAGE: ghcr.io/${{ github.repository_owner }}/${{ matrix.name }} REGISTRY_IMAGE: ghcr.io/${{ github.repository_owner }}/${{ matrix.name }}
@ -207,22 +286,33 @@ jobs:
id: image id: image
run: echo "name=${REGISTRY_IMAGE,,}" >> $GITHUB_OUTPUT run: echo "name=${REGISTRY_IMAGE,,}" >> $GITHUB_OUTPUT
- name: Download amd64 digest - name: Download digests
id: download
uses: actions/download-artifact@v8 uses: actions/download-artifact@v8
with: with:
name: digests-${{ matrix.image }}-amd64 pattern: digests-${{ matrix.image }}-${{ matrix.variant }}-*
path: /tmp/digests path: /tmp/digests
merge-multiple: true
continue-on-error: true
- name: Download arm64 digest - name: Check digests
uses: actions/download-artifact@v8 id: check
with: run: |
name: digests-${{ matrix.image }}-arm64 count=$(find /tmp/digests -type f 2>/dev/null | wc -l | tr -d ' ')
path: /tmp/digests echo "digest_count=$count" >> $GITHUB_OUTPUT
if [ "$count" -lt 2 ]; then
echo "::warning::${{ matrix.variant }}: $count/2 digests, skipping merge"
echo "skip=true" >> $GITHUB_OUTPUT
else
echo "skip=false" >> $GITHUB_OUTPUT
fi
- name: Set up Docker Buildx - name: Set up Docker Buildx
if: steps.check.outputs.skip != 'true'
uses: docker/setup-buildx-action@v4 uses: docker/setup-buildx-action@v4
- name: Login to GitHub Container Registry - name: Login to GitHub Container Registry
if: steps.check.outputs.skip != 'true'
uses: docker/login-action@v4 uses: docker/login-action@v4
with: with:
registry: ghcr.io registry: ghcr.io
@ -230,9 +320,10 @@ jobs:
password: ${{ secrets.GITHUB_TOKEN }} password: ${{ secrets.GITHUB_TOKEN }}
- name: Compute app version - name: Compute app version
if: steps.check.outputs.skip != 'true'
id: appver id: appver
run: | run: |
VERSION_TAG="${{ needs.tag_release.outputs.new_tag }}" VERSION_TAG="${{ needs.compute_version.outputs.new_tag }}"
if [ -n "$VERSION_TAG" ]; then if [ -n "$VERSION_TAG" ]; then
APP_VERSION=$(echo "$VERSION_TAG" | rev | cut -d. -f2- | rev) APP_VERSION=$(echo "$VERSION_TAG" | rev | cut -d. -f2- | rev)
else else
@ -241,29 +332,69 @@ jobs:
echo "app_version=$APP_VERSION" >> $GITHUB_OUTPUT echo "app_version=$APP_VERSION" >> $GITHUB_OUTPUT
- name: Docker meta - name: Docker meta
if: steps.check.outputs.skip != 'true'
id: meta id: meta
uses: docker/metadata-action@v6 uses: docker/metadata-action@v6
with: with:
images: ${{ steps.image.outputs.name }} images: ${{ steps.image.outputs.name }}
tags: | tags: |
type=raw,value=${{ needs.tag_release.outputs.new_tag }},enable=${{ needs.tag_release.outputs.new_tag != '' }} type=raw,value=${{ needs.compute_version.outputs.new_tag }},enable=${{ needs.compute_version.outputs.new_tag != '' }}
type=raw,value=${{ steps.appver.outputs.app_version }},enable=${{ needs.tag_release.outputs.new_tag != '' && (github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event.inputs.branch == github.event.repository.default_branch) }} type=raw,value=${{ steps.appver.outputs.app_version }},enable=${{ needs.compute_version.outputs.new_tag != '' && needs.compute_version.outputs.is_release_tag != 'true' && (github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event.inputs.branch == github.event.repository.default_branch) }}
type=ref,event=branch type=ref,event=branch
type=sha,prefix=git- type=sha,prefix=git-
flavor: | flavor: |
latest=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event.inputs.branch == github.event.repository.default_branch }} latest=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event.inputs.branch == github.event.repository.default_branch || startsWith(github.ref, 'refs/tags/v') }}
${{ matrix.tag_suffix != '' && format('suffix={0},onlatest=true', matrix.tag_suffix) || '' }}
- name: Create manifest list and push - name: Create manifest list and push
if: steps.check.outputs.skip != 'true'
working-directory: /tmp/digests working-directory: /tmp/digests
run: | run: |
docker buildx imagetools create \ docker buildx imagetools create \
$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
$(printf '${{ steps.image.outputs.name }}@sha256:%s ' *) $(printf '${{ steps.image.outputs.name }}@sha256:%s ' *)
- name: Inspect image - name: Inspect image
if: steps.check.outputs.skip != 'true'
run: | run: |
docker buildx imagetools inspect ${{ steps.image.outputs.name }}:${{ steps.meta.outputs.version }} docker buildx imagetools inspect ${{ steps.image.outputs.name }}:${{ steps.meta.outputs.version }}
- name: Summary - name: Summary
if: steps.check.outputs.skip != 'true'
run: | run: |
echo "Multi-arch manifest created for ${{ matrix.name }}!" echo "Multi-arch manifest created for ${{ matrix.name }}!"
echo "Tags: $(jq -cr '.tags | join(", ")' <<< "$DOCKER_METADATA_OUTPUT_JSON")" echo "Tags: $(jq -cr '.tags | join(", ")' <<< "$DOCKER_METADATA_OUTPUT_JSON")"
# Push the git tag only after build, gate, and manifest publish all succeed.
finalize_release:
runs-on: ubuntu-latest
needs: [compute_version, create_manifest]
if: ${{ success() && needs.compute_version.outputs.new_tag != '' && needs.compute_version.outputs.is_release_tag != 'true' }}
permissions:
contents: write
steps:
- name: Checkout code
uses: actions/checkout@v6
with:
fetch-depth: 0
ref: ${{ github.event.inputs.branch }}
token: ${{ secrets.GITHUB_TOKEN }}
- name: Create and push git tag
run: |
git config --global user.name 'github-actions[bot]'
git config --global user.email 'github-actions[bot]@users.noreply.github.com'
NEXT_TAG="${{ needs.compute_version.outputs.new_tag }}"
COMMIT_SHA="${{ needs.compute_version.outputs.commit_sha }}"
echo "Tagging commit $COMMIT_SHA with $NEXT_TAG"
git tag -a "$NEXT_TAG" "$COMMIT_SHA" -m "Docker build $NEXT_TAG"
echo "Pushing tag $NEXT_TAG to origin"
git push origin "$NEXT_TAG"
- name: Verify tag push
run: |
echo "Checking if tag ${{ needs.compute_version.outputs.new_tag }} exists remotely..."
sleep 5
git ls-remote --tags origin | grep "refs/tags/${{ needs.compute_version.outputs.new_tag }}" || (echo "Tag push verification failed!" && exit 1)
echo "Tag successfully pushed."

View file

@ -1 +1 @@
0.0.26 0.0.27

View file

@ -7,6 +7,16 @@
# SurfSense version (use "latest" or a specific version like "0.0.14") # SurfSense version (use "latest" or a specific version like "0.0.14")
SURFSENSE_VERSION=latest SURFSENSE_VERSION=latest
# Image variant: empty = CPU (default), "cuda" = CUDA 12.8, "cuda126" = CUDA 12.6.
# GPU acceleration also requires the NVIDIA Container Toolkit on the host and
# the GPU overlay in COMPOSE_FILE. Linux/macOS use ":"; Windows uses ";".
# Example Linux/macOS: COMPOSE_FILE=docker-compose.yml:docker-compose.gpu.yml
# Example Windows: COMPOSE_FILE=docker-compose.yml;docker-compose.gpu.yml
# Use "cuda126" for older NVIDIA driver stacks; use "cuda" for newer drivers.
SURFSENSE_VARIANT=
# COMPOSE_FILE=docker-compose.yml:docker-compose.gpu.yml
# SURFSENSE_GPU_COUNT=1
# Deployment environment: dev or production # Deployment environment: dev or production
SURFSENSE_ENV=production SURFSENSE_ENV=production
@ -55,6 +65,9 @@ EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
# -- Redis exposed port (dev only; Redis is internal-only in prod) -- # -- Redis exposed port (dev only; Redis is internal-only in prod) --
# REDIS_PORT=6379 # REDIS_PORT=6379
# -- WhatsApp bridge exposed port (dev/hybrid only; prod keeps it Docker-internal) --
# WHATSAPP_BRIDGE_PORT=9929
# -- Frontend Build Args -- # -- Frontend Build Args --
# In dev, the frontend is built from source and these are passed as build args. # In dev, the frontend is built from source and these are passed as build args.
# In prod, they are automatically derived from AUTH_TYPE, ETL_SERVICE, and the port settings above. # In prod, they are automatically derived from AUTH_TYPE, ETL_SERVICE, and the port settings above.
@ -67,7 +80,7 @@ EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
# ONLY set these if you are serving SurfSense on a real domain via a reverse # ONLY set these if you are serving SurfSense on a real domain via a reverse
# proxy (e.g. Caddy, Nginx, Cloudflare Tunnel). # proxy (e.g. Caddy, Nginx, Cloudflare Tunnel).
# For standard localhost deployments, leave all of these commented out # For standard localhost deployments, leave all of these commented out.
# they are automatically derived from the port settings above. # they are automatically derived from the port settings above.
# #
# NEXT_FRONTEND_URL=https://app.yourdomain.com # NEXT_FRONTEND_URL=https://app.yourdomain.com
@ -89,7 +102,11 @@ EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
# Only change this if you manage publications manually. # Only change this if you manage publications manually.
# ZERO_APP_PUBLICATIONS=zero_publication # ZERO_APP_PUBLICATIONS=zero_publication
# Sync worker tuning — zero-cache defaults ZERO_NUM_SYNC_WORKERS to the number # Keep Zero's documented halt safety net enabled. If replication halts, Zero
# can wipe and re-sync its local SQLite replica without touching Postgres.
# ZERO_AUTO_RESET=true
# Sync worker tuning. zero-cache defaults ZERO_NUM_SYNC_WORKERS to the number
# of CPU cores, which can exceed the connection pool limits on high-core machines. # of CPU cores, which can exceed the connection pool limits on high-core machines.
# Each sync worker needs at least 1 connection from both the UPSTREAM and CVR # Each sync worker needs at least 1 connection from both the UPSTREAM and CVR
# pools, so these constraints must hold: # pools, so these constraints must hold:
@ -134,7 +151,7 @@ EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
# SSL mode for database connections: disable, require, verify-ca, verify-full # SSL mode for database connections: disable, require, verify-ca, verify-full
# DB_SSLMODE=disable # DB_SSLMODE=disable
# Full DATABASE_URL override — when set, takes precedence over the individual # Full DATABASE_URL override. When set, this takes precedence over the individual
# DB_USER / DB_PASSWORD / DB_NAME / DB_HOST / DB_PORT settings above. # DB_USER / DB_PASSWORD / DB_NAME / DB_HOST / DB_PORT settings above.
# Use this for managed databases (AWS RDS, GCP Cloud SQL, Supabase, etc.) # Use this for managed databases (AWS RDS, GCP Cloud SQL, Supabase, etc.)
# DATABASE_URL=postgresql+asyncpg://user:password@your-rds-host:5432/surfsense?sslmode=require # DATABASE_URL=postgresql+asyncpg://user:password@your-rds-host:5432/surfsense?sslmode=require
@ -149,7 +166,7 @@ EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
# REDIS_URL=redis://redis:6379/0 # REDIS_URL=redis://redis:6379/0
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
# Stripe (pay-as-you-go page packs disabled by default) # Stripe (pay-as-you-go page packs, disabled by default)
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
# Set TRUE to allow users to buy additional page packs via Stripe Checkout # Set TRUE to allow users to buy additional page packs via Stripe Checkout
@ -168,7 +185,6 @@ STRIPE_PAGE_BUYING_ENABLED=FALSE
# STRIPE_TOKEN_BUYING_ENABLED=FALSE # STRIPE_TOKEN_BUYING_ENABLED=FALSE
# STRIPE_PREMIUM_TOKEN_PRICE_ID=price_... # STRIPE_PREMIUM_TOKEN_PRICE_ID=price_...
# STRIPE_CREDIT_MICROS_PER_UNIT=1000000 # STRIPE_CREDIT_MICROS_PER_UNIT=1000000
# DEPRECATED — STRIPE_TOKENS_PER_UNIT=1000000
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
# TTS & STT (Text-to-Speech / Speech-to-Text) # TTS & STT (Text-to-Speech / Speech-to-Text)
@ -263,7 +279,44 @@ STT_SERVICE=local/base
# COMPOSIO_REDIRECT_URI=http://localhost:8000/api/v1/auth/composio/connector/callback # COMPOSIO_REDIRECT_URI=http://localhost:8000/api/v1/auth/composio/connector/callback
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
# SearXNG (bundled web search — works out of the box, no config needed) # Messaging Channels (optional)
# ------------------------------------------------------------------------------
# Configure only the external chat channels you want to use.
# -- Telegram --
# TELEGRAM_SHARED_BOT_TOKEN=
# TELEGRAM_SHARED_BOT_USERNAME=
# TELEGRAM_WEBHOOK_SECRET=
# GATEWAY_BASE_URL=http://localhost:8929
# GATEWAY_TELEGRAM_INTAKE_MODE=webhook
# -- WhatsApp --
# GATEWAY_WHATSAPP_INTAKE_MODE=disabled
# WHATSAPP_SHARED_BUSINESS_TOKEN=
# WHATSAPP_SHARED_PHONE_NUMBER_ID=
# WHATSAPP_SHARED_DISPLAY_PHONE_NUMBER=
# WHATSAPP_SHARED_WABA_ID=
# WHATSAPP_GRAPH_API_VERSION=v25.0
# WHATSAPP_WEBHOOK_VERIFY_TOKEN=
# WHATSAPP_WEBHOOK_APP_SECRET=
# WHATSAPP_BRIDGE_URL=http://whatsapp-bridge:9929
# -- Slack --
# Uses SLACK_CLIENT_ID and SLACK_CLIENT_SECRET from the Slack connector section.
#
# GATEWAY_SLACK_ENABLED=FALSE
# GATEWAY_SLACK_SIGNING_SECRET=
# GATEWAY_SLACK_REDIRECT_URI=http://localhost:8929/api/v1/gateway/slack/callback
# -- Discord --
# Uses DISCORD_CLIENT_ID, DISCORD_CLIENT_SECRET, and DISCORD_BOT_TOKEN from the
# Discord connector section.
#
# GATEWAY_DISCORD_ENABLED=FALSE
# GATEWAY_DISCORD_REDIRECT_URI=http://localhost:8929/api/v1/gateway/discord/callback
# ------------------------------------------------------------------------------
# SearXNG (bundled web search, works out of the box with no config needed)
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
# SearXNG provides web search to all search spaces automatically. # SearXNG provides web search to all search spaces automatically.
# To access the SearXNG UI directly: http://localhost:8888 # To access the SearXNG UI directly: http://localhost:8888
@ -273,7 +326,7 @@ STT_SERVICE=local/base
# SEARXNG_SECRET=surfsense-searxng-secret # SEARXNG_SECRET=surfsense-searxng-secret
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
# Daytona Sandbox (optional cloud code execution for the deep agent) # Daytona Sandbox (optional cloud code execution for the deep agent)
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
# Set DAYTONA_SANDBOX_ENABLED=TRUE and provide credentials to give the agent # Set DAYTONA_SANDBOX_ENABLED=TRUE and provide credentials to give the agent
# an isolated code execution environment via the Daytona cloud API. # an isolated code execution environment via the Daytona cloud API.
@ -286,9 +339,6 @@ STT_SERVICE=local/base
# External API Keys (optional) # External API Keys (optional)
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
# Firecrawl (web scraping)
# FIRECRAWL_API_KEY=
# Unstructured (if ETL_SERVICE=UNSTRUCTURED) # Unstructured (if ETL_SERVICE=UNSTRUCTURED)
# UNSTRUCTURED_API_KEY= # UNSTRUCTURED_API_KEY=
@ -364,7 +414,6 @@ SURFSENSE_ENABLE_DOOM_LOOP=true
# Premium turns are debited at the actual per-call provider cost reported # Premium turns are debited at the actual per-call provider cost reported
# by LiteLLM. Only applies to models with billing_tier=premium. # by LiteLLM. Only applies to models with billing_tier=premium.
# PREMIUM_CREDIT_MICROS_LIMIT=5000000 # PREMIUM_CREDIT_MICROS_LIMIT=5000000
# DEPRECATED — PREMIUM_TOKEN_LIMIT=5000000
# Safety ceiling on per-call premium reservation, in micro-USD ($1.00 default). # Safety ceiling on per-call premium reservation, in micro-USD ($1.00 default).
# QUOTA_MAX_RESERVE_MICROS=1000000 # QUOTA_MAX_RESERVE_MICROS=1000000
@ -376,10 +425,10 @@ SURFSENSE_ENABLE_DOOM_LOOP=true
# QUOTA_DEFAULT_PODCAST_RESERVE_MICROS=200000 # QUOTA_DEFAULT_PODCAST_RESERVE_MICROS=200000
# Per-video-presentation reservation for the video Celery task ($1.00 default). # Per-video-presentation reservation for the video Celery task ($1.00 default).
# Override path bypasses QUOTA_MAX_RESERVE_MICROS clamp — raise with care. # Override path bypasses QUOTA_MAX_RESERVE_MICROS clamp. Raise with care.
# QUOTA_DEFAULT_VIDEO_PRESENTATION_RESERVE_MICROS=1000000 # QUOTA_DEFAULT_VIDEO_PRESENTATION_RESERVE_MICROS=1000000
# No-login (anonymous) mode — public users can chat without an account # No-login (anonymous) mode. Public users can chat without an account
# Set TRUE to enable /free pages and anonymous chat API # Set TRUE to enable /free pages and anonymous chat API
NOLOGIN_MODE_ENABLED=FALSE NOLOGIN_MODE_ENABLED=FALSE
# ANON_TOKEN_LIMIT=1000000 # ANON_TOKEN_LIMIT=1000000

View file

@ -114,6 +114,7 @@ services:
- ZERO_REPLICA_FILE=/data/zero.db - ZERO_REPLICA_FILE=/data/zero.db
- ZERO_ADMIN_PASSWORD=${ZERO_ADMIN_PASSWORD:-surfsense-zero-admin} - ZERO_ADMIN_PASSWORD=${ZERO_ADMIN_PASSWORD:-surfsense-zero-admin}
- ZERO_APP_PUBLICATIONS=${ZERO_APP_PUBLICATIONS:-zero_publication} - ZERO_APP_PUBLICATIONS=${ZERO_APP_PUBLICATIONS:-zero_publication}
- ZERO_AUTO_RESET=${ZERO_AUTO_RESET:-true}
- ZERO_NUM_SYNC_WORKERS=${ZERO_NUM_SYNC_WORKERS:-4} - ZERO_NUM_SYNC_WORKERS=${ZERO_NUM_SYNC_WORKERS:-4}
- ZERO_UPSTREAM_MAX_CONNS=${ZERO_UPSTREAM_MAX_CONNS:-20} - ZERO_UPSTREAM_MAX_CONNS=${ZERO_UPSTREAM_MAX_CONNS:-20}
- ZERO_CVR_MAX_CONNS=${ZERO_CVR_MAX_CONNS:-30} - ZERO_CVR_MAX_CONNS=${ZERO_CVR_MAX_CONNS:-30}
@ -122,11 +123,30 @@ services:
volumes: volumes:
- zero_cache_data:/data - zero_cache_data:/data
restart: unless-stopped restart: unless-stopped
stop_grace_period: 300s
healthcheck: healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:4848/keepalive"] test: ["CMD", "curl", "-f", "http://localhost:4848/keepalive"]
interval: 10s interval: 10s
timeout: 5s timeout: 5s
retries: 5 retries: 5
start_period: 600s
# OPTIONAL — Azurite emulates Azure Blob Storage for testing the Azure
# original-file backend. The default filesystem backend needs none of this.
# To exercise it, set in surfsense_backend/.env:
# FILE_STORAGE_BACKEND=azure
# AZURE_STORAGE_CONTAINER=surfsense-documents
# AZURE_STORAGE_CONNECTION_STRING=DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://localhost:${AZURITE_BLOB_PORT:-10000}/devstoreaccount1;
# The backend creates blobs on upload; create the container once first
# (Azure CLI / Storage Explorer), then upload a document.
azurite:
image: mcr.microsoft.com/azure-storage/azurite:3.33.0
command: azurite-blob --blobHost 0.0.0.0 --blobPort 10000
ports:
- "${AZURITE_BLOB_PORT:-10000}:10000"
volumes:
- azurite_data:/data
restart: unless-stopped
volumes: volumes:
postgres_data: postgres_data:
@ -137,3 +157,5 @@ volumes:
name: surfsense-deps-redis name: surfsense-deps-redis
zero_cache_data: zero_cache_data:
name: surfsense-deps-zero-cache name: surfsense-deps-zero-cache
azurite_data:
name: surfsense-deps-azurite

View file

@ -46,8 +46,6 @@ services:
- PYTHONPATH=/app - PYTHONPATH=/app
- SERVICE_ROLE=migrate - SERVICE_ROLE=migrate
- MIGRATION_TIMEOUT=${MIGRATION_TIMEOUT:-900} - MIGRATION_TIMEOUT=${MIGRATION_TIMEOUT:-900}
volumes:
- zero_init:/zero-init
depends_on: depends_on:
db: db:
condition: service_healthy condition: service_healthy
@ -126,6 +124,7 @@ services:
- AUTH_TYPE=${AUTH_TYPE:-LOCAL} - AUTH_TYPE=${AUTH_TYPE:-LOCAL}
- NEXT_FRONTEND_URL=${NEXT_FRONTEND_URL:-http://localhost:3000} - NEXT_FRONTEND_URL=${NEXT_FRONTEND_URL:-http://localhost:3000}
- SEARXNG_DEFAULT_HOST=${SEARXNG_DEFAULT_HOST:-http://searxng:8080} - SEARXNG_DEFAULT_HOST=${SEARXNG_DEFAULT_HOST:-http://searxng:8080}
- WHATSAPP_BRIDGE_URL=${WHATSAPP_BRIDGE_URL:-http://whatsapp-bridge:9929}
# Daytona Sandbox uncomment and set credentials to enable cloud code execution # Daytona Sandbox uncomment and set credentials to enable cloud code execution
# - DAYTONA_SANDBOX_ENABLED=TRUE # - DAYTONA_SANDBOX_ENABLED=TRUE
# - DAYTONA_API_KEY=${DAYTONA_API_KEY:-} # - DAYTONA_API_KEY=${DAYTONA_API_KEY:-}
@ -148,6 +147,25 @@ services:
retries: 30 retries: 30
start_period: 200s start_period: 200s
whatsapp-bridge:
build: ../surfsense_backend/scripts/whatsapp-bridge
profiles:
- whatsapp
ports:
- "127.0.0.1:${WHATSAPP_BRIDGE_PORT:-9929}:9929"
volumes:
- whatsapp_sessions:/data/sessions
environment:
- PORT=9929
- WHATSAPP_MODE=${WHATSAPP_MODE:-self-chat}
- WHATSAPP_SESSION_DIR=/data/sessions
restart: unless-stopped
healthcheck:
test: ["CMD", "wget", "-qO-", "http://localhost:9929/health"]
interval: 30s
timeout: 5s
retries: 5
celery_worker: celery_worker:
build: *backend-build build: *backend-build
volumes: volumes:
@ -197,21 +215,6 @@ services:
celery_worker: celery_worker:
condition: service_started condition: service_started
# flower:
# build: *backend-build
# ports:
# - "${FLOWER_PORT:-5555}:5555"
# env_file:
# - ../surfsense_backend/.env
# environment:
# - CELERY_BROKER_URL=${REDIS_URL:-redis://redis:6379/0}
# - CELERY_RESULT_BACKEND=${REDIS_URL:-redis://redis:6379/0}
# - PYTHONPATH=/app
# command: celery -A app.celery_app flower --port=5555
# depends_on:
# - redis
# - celery_worker
zero-cache: zero-cache:
image: rocicorp/zero:1.4.0 image: rocicorp/zero:1.4.0
ports: ports:
@ -230,6 +233,7 @@ services:
- ZERO_REPLICA_FILE=/data/zero.db - ZERO_REPLICA_FILE=/data/zero.db
- ZERO_ADMIN_PASSWORD=${ZERO_ADMIN_PASSWORD:-surfsense-zero-admin} - ZERO_ADMIN_PASSWORD=${ZERO_ADMIN_PASSWORD:-surfsense-zero-admin}
- ZERO_APP_PUBLICATIONS=${ZERO_APP_PUBLICATIONS:-zero_publication} - ZERO_APP_PUBLICATIONS=${ZERO_APP_PUBLICATIONS:-zero_publication}
- ZERO_AUTO_RESET=${ZERO_AUTO_RESET:-true}
- ZERO_NUM_SYNC_WORKERS=${ZERO_NUM_SYNC_WORKERS:-4} - ZERO_NUM_SYNC_WORKERS=${ZERO_NUM_SYNC_WORKERS:-4}
- ZERO_UPSTREAM_MAX_CONNS=${ZERO_UPSTREAM_MAX_CONNS:-20} - ZERO_UPSTREAM_MAX_CONNS=${ZERO_UPSTREAM_MAX_CONNS:-20}
- ZERO_CVR_MAX_CONNS=${ZERO_CVR_MAX_CONNS:-30} - ZERO_CVR_MAX_CONNS=${ZERO_CVR_MAX_CONNS:-30}
@ -237,18 +241,14 @@ services:
- ZERO_MUTATE_URL=${ZERO_MUTATE_URL:-http://frontend:3000/api/zero/mutate} - ZERO_MUTATE_URL=${ZERO_MUTATE_URL:-http://frontend:3000/api/zero/mutate}
volumes: volumes:
- zero_cache_data:/data - zero_cache_data:/data
- zero_init:/zero-init
# Wrapper: see docker/docker-compose.yml `zero-cache` for rationale.
entrypoint: ["sh", "-c"]
# Pass the script as a single list element so Compose does not tokenize it.
command:
- 'if [ -f /zero-init/needs_reset ]; then echo "[zero-init] publication change detected; wiping replica file(s) under /data" && rm -f /data/zero.db /data/zero.db-shm /data/zero.db-wal && rm -f /zero-init/needs_reset; fi; exec zero-cache'
restart: unless-stopped restart: unless-stopped
stop_grace_period: 300s
healthcheck: healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:4848/keepalive"] test: ["CMD", "curl", "-f", "http://localhost:4848/keepalive"]
interval: 10s interval: 10s
timeout: 5s timeout: 5s
retries: 5 retries: 5
start_period: 600s
frontend: frontend:
build: build:
@ -280,5 +280,5 @@ volumes:
name: surfsense-dev-shared-temp name: surfsense-dev-shared-temp
zero_cache_data: zero_cache_data:
name: surfsense-dev-zero-cache name: surfsense-dev-zero-cache
zero_init: whatsapp_sessions:
name: surfsense-dev-zero-init name: surfsense-dev-whatsapp-sessions

View file

@ -0,0 +1,30 @@
services:
backend:
deploy:
resources:
reservations:
devices:
- driver: ${SURFSENSE_GPU_DRIVER:-nvidia}
count: ${SURFSENSE_GPU_COUNT:-1}
capabilities:
- gpu
celery_worker:
deploy:
resources:
reservations:
devices:
- driver: ${SURFSENSE_GPU_DRIVER:-nvidia}
count: ${SURFSENSE_GPU_COUNT:-1}
capabilities:
- gpu
celery_beat:
deploy:
resources:
reservations:
devices:
- driver: ${SURFSENSE_GPU_DRIVER:-nvidia}
count: ${SURFSENSE_GPU_COUNT:-1}
capabilities:
- gpu

View file

@ -29,12 +29,11 @@ services:
# Short-lived schema runner. Executes `alembic upgrade head` and verifies # Short-lived schema runner. Executes `alembic upgrade head` and verifies
# that the `zero_publication` Postgres logical-replication publication # that the `zero_publication` Postgres logical-replication publication
# exists, then exits 0. Downstream services (backend, celery_*, zero-cache) # matches the canonical shape, then exits 0. Downstream services gate on this
# gate on this with `condition: service_completed_successfully` so a failed # with `condition: service_completed_successfully` so a failed migration halts
# migration halts the whole stack instead of silently producing a half-built # the whole stack instead of booting zero-cache against a drifted publication.
# system that crash-loops zero-cache on missing publications.
migrations: migrations:
image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest} image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}${SURFSENSE_VARIANT:+-${SURFSENSE_VARIANT}}
env_file: env_file:
- .env - .env
environment: environment:
@ -42,8 +41,6 @@ services:
PYTHONPATH: /app PYTHONPATH: /app
SERVICE_ROLE: migrate SERVICE_ROLE: migrate
MIGRATION_TIMEOUT: ${MIGRATION_TIMEOUT:-900} MIGRATION_TIMEOUT: ${MIGRATION_TIMEOUT:-900}
volumes:
- zero_init:/zero-init
depends_on: depends_on:
db: db:
condition: service_healthy condition: service_healthy
@ -61,28 +58,28 @@ services:
timeout: 5s timeout: 5s
retries: 5 retries: 5
otel-collector: # otel-collector:
image: otel/opentelemetry-collector-contrib:0.152.1 # image: otel/opentelemetry-collector-contrib:0.152.1
profiles: # profiles:
- observability # - observability
command: ["--config=/etc/otelcol/config.yaml"] # command: ["--config=/etc/otelcol/config.yaml"]
volumes: # volumes:
- ./otel-collector/config.yaml:/etc/otelcol/config.yaml:ro # - ./otel-collector/config.yaml:/etc/otelcol/config.yaml:ro
environment: # environment:
GRAFANA_CLOUD_OTLP_ENDPOINT: ${GRAFANA_CLOUD_OTLP_ENDPOINT:-} # GRAFANA_CLOUD_OTLP_ENDPOINT: ${GRAFANA_CLOUD_OTLP_ENDPOINT:-}
GRAFANA_CLOUD_INSTANCE_ID: ${GRAFANA_CLOUD_INSTANCE_ID:-} # GRAFANA_CLOUD_INSTANCE_ID: ${GRAFANA_CLOUD_INSTANCE_ID:-}
GRAFANA_CLOUD_API_KEY: ${GRAFANA_CLOUD_API_KEY:-} # GRAFANA_CLOUD_API_KEY: ${GRAFANA_CLOUD_API_KEY:-}
ports: # ports:
- "${OTEL_GRPC_PORT:-4317}:4317" # - "${OTEL_GRPC_PORT:-4317}:4317"
- "${OTEL_HTTP_PORT:-4318}:4318" # - "${OTEL_HTTP_PORT:-4318}:4318"
- "${OTEL_HEALTH_PORT:-13133}:13133" # - "${OTEL_HEALTH_PORT:-13133}:13133"
mem_limit: 2g # mem_limit: 2g
restart: unless-stopped # restart: unless-stopped
healthcheck: # healthcheck:
test: ["CMD", "/otelcol-contrib", "--version"] # test: ["CMD", "/otelcol-contrib", "--version"]
interval: 30s # interval: 30s
timeout: 5s # timeout: 5s
retries: 3 # retries: 3
searxng: searxng:
image: searxng/searxng:2026.3.13-3c1f68c59 image: searxng/searxng:2026.3.13-3c1f68c59
@ -98,7 +95,7 @@ services:
retries: 5 retries: 5
backend: backend:
image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest} image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}${SURFSENSE_VARIANT:+-${SURFSENSE_VARIANT}}
ports: ports:
- "${BACKEND_PORT:-8929}:8000" - "${BACKEND_PORT:-8929}:8000"
volumes: volumes:
@ -118,6 +115,7 @@ services:
UNSTRUCTURED_HAS_PATCHED_LOOP: "1" UNSTRUCTURED_HAS_PATCHED_LOOP: "1"
NEXT_FRONTEND_URL: ${NEXT_FRONTEND_URL:-http://localhost:${FRONTEND_PORT:-3929}} NEXT_FRONTEND_URL: ${NEXT_FRONTEND_URL:-http://localhost:${FRONTEND_PORT:-3929}}
SEARXNG_DEFAULT_HOST: ${SEARXNG_DEFAULT_HOST:-http://searxng:8080} SEARXNG_DEFAULT_HOST: ${SEARXNG_DEFAULT_HOST:-http://searxng:8080}
WHATSAPP_BRIDGE_URL: ${WHATSAPP_BRIDGE_URL:-http://whatsapp-bridge:9929}
# Daytona Sandbox uncomment and set credentials to enable cloud code execution # Daytona Sandbox uncomment and set credentials to enable cloud code execution
# DAYTONA_SANDBOX_ENABLED: "TRUE" # DAYTONA_SANDBOX_ENABLED: "TRUE"
# DAYTONA_API_KEY: ${DAYTONA_API_KEY:-} # DAYTONA_API_KEY: ${DAYTONA_API_KEY:-}
@ -143,8 +141,28 @@ services:
retries: 30 retries: 30
start_period: 200s start_period: 200s
# whatsapp-bridge:
# build: ../surfsense_backend/scripts/whatsapp-bridge
# profiles:
# - whatsapp
# expose:
# - "9929"
# volumes:
# - whatsapp_sessions:/data/sessions
# environment:
# PORT: 9929
# WHATSAPP_MODE: ${WHATSAPP_MODE:-self-chat}
# WHATSAPP_SESSION_DIR: /data/sessions
# mem_limit: 512m
# restart: unless-stopped
# healthcheck:
# test: ["CMD", "wget", "-qO-", "http://localhost:9929/health"]
# interval: 30s
# timeout: 5s
# retries: 5
celery_worker: celery_worker:
image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest} image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}${SURFSENSE_VARIANT:+-${SURFSENSE_VARIANT}}
volumes: volumes:
- shared_temp:/shared_tmp - shared_temp:/shared_tmp
env_file: env_file:
@ -174,7 +192,7 @@ services:
restart: unless-stopped restart: unless-stopped
celery_beat: celery_beat:
image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest} image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}${SURFSENSE_VARIANT:+-${SURFSENSE_VARIANT}}
env_file: env_file:
- .env - .env
environment: environment:
@ -197,22 +215,6 @@ services:
- "com.centurylinklabs.watchtower.enable=true" - "com.centurylinklabs.watchtower.enable=true"
restart: unless-stopped restart: unless-stopped
# flower:
# image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}
# ports:
# - "${FLOWER_PORT:-5555}:5555"
# env_file:
# - .env
# environment:
# CELERY_BROKER_URL: ${REDIS_URL:-redis://redis:6379/0}
# CELERY_RESULT_BACKEND: ${REDIS_URL:-redis://redis:6379/0}
# PYTHONPATH: /app
# command: celery -A app.celery_app flower --port=5555
# depends_on:
# - redis
# - celery_worker
# restart: unless-stopped
zero-cache: zero-cache:
image: rocicorp/zero:1.4.0 image: rocicorp/zero:1.4.0
ports: ports:
@ -226,6 +228,7 @@ services:
ZERO_REPLICA_FILE: /data/zero.db ZERO_REPLICA_FILE: /data/zero.db
ZERO_ADMIN_PASSWORD: ${ZERO_ADMIN_PASSWORD:-surfsense-zero-admin} ZERO_ADMIN_PASSWORD: ${ZERO_ADMIN_PASSWORD:-surfsense-zero-admin}
ZERO_APP_PUBLICATIONS: ${ZERO_APP_PUBLICATIONS:-zero_publication} ZERO_APP_PUBLICATIONS: ${ZERO_APP_PUBLICATIONS:-zero_publication}
ZERO_AUTO_RESET: ${ZERO_AUTO_RESET:-true}
ZERO_NUM_SYNC_WORKERS: ${ZERO_NUM_SYNC_WORKERS:-4} ZERO_NUM_SYNC_WORKERS: ${ZERO_NUM_SYNC_WORKERS:-4}
ZERO_UPSTREAM_MAX_CONNS: ${ZERO_UPSTREAM_MAX_CONNS:-20} ZERO_UPSTREAM_MAX_CONNS: ${ZERO_UPSTREAM_MAX_CONNS:-20}
ZERO_CVR_MAX_CONNS: ${ZERO_CVR_MAX_CONNS:-30} ZERO_CVR_MAX_CONNS: ${ZERO_CVR_MAX_CONNS:-30}
@ -233,16 +236,8 @@ services:
ZERO_MUTATE_URL: ${ZERO_MUTATE_URL:-http://frontend:3000/api/zero/mutate} ZERO_MUTATE_URL: ${ZERO_MUTATE_URL:-http://frontend:3000/api/zero/mutate}
volumes: volumes:
- zero_cache_data:/data - zero_cache_data:/data
- zero_init:/zero-init
# Wrapper: if the migrations service flagged a publication change via
# /zero-init/needs_reset, wipe the SQLite replica before starting so
# zero-cache does a clean initial sync. Recovers from the half-built
# replica state (`_zero.tableMetadata` missing) caused by earlier crashes.
entrypoint: ["sh", "-c"]
# Pass the script as a single list element so Compose does not tokenize it.
command:
- 'if [ -f /zero-init/needs_reset ]; then echo "[zero-init] publication change detected; wiping replica file(s) under /data" && rm -f /data/zero.db /data/zero.db-shm /data/zero.db-wal && rm -f /zero-init/needs_reset; fi; exec zero-cache'
restart: unless-stopped restart: unless-stopped
stop_grace_period: 300s
depends_on: depends_on:
db: db:
condition: service_healthy condition: service_healthy
@ -253,6 +248,7 @@ services:
interval: 10s interval: 10s
timeout: 5s timeout: 5s
retries: 5 retries: 5
start_period: 600s
frontend: frontend:
image: ghcr.io/modsetter/surfsense-web:${SURFSENSE_VERSION:-latest} image: ghcr.io/modsetter/surfsense-web:${SURFSENSE_VERSION:-latest}
@ -264,6 +260,7 @@ services:
NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE: ${AUTH_TYPE:-LOCAL} NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE: ${AUTH_TYPE:-LOCAL}
NEXT_PUBLIC_ETL_SERVICE: ${ETL_SERVICE:-DOCLING} NEXT_PUBLIC_ETL_SERVICE: ${ETL_SERVICE:-DOCLING}
NEXT_PUBLIC_DEPLOYMENT_MODE: ${DEPLOYMENT_MODE:-self-hosted} NEXT_PUBLIC_DEPLOYMENT_MODE: ${DEPLOYMENT_MODE:-self-hosted}
NEXT_PUBLIC_WHATSAPP_DISPLAY_PHONE_NUMBER: ${WHATSAPP_SHARED_DISPLAY_PHONE_NUMBER:-}
FASTAPI_BACKEND_INTERNAL_URL: ${FASTAPI_BACKEND_INTERNAL_URL:-http://backend:8000} FASTAPI_BACKEND_INTERNAL_URL: ${FASTAPI_BACKEND_INTERNAL_URL:-http://backend:8000}
labels: labels:
- "com.centurylinklabs.watchtower.enable=true" - "com.centurylinklabs.watchtower.enable=true"
@ -283,5 +280,5 @@ volumes:
name: surfsense-shared-temp name: surfsense-shared-temp
zero_cache_data: zero_cache_data:
name: surfsense-zero-cache name: surfsense-zero-cache
zero_init: whatsapp_sessions:
name: surfsense-zero-init name: surfsense-whatsapp-sessions

View file

@ -7,6 +7,8 @@
# To pass flags, save and run locally: # To pass flags, save and run locally:
# .\install.ps1 -NoWatchtower # .\install.ps1 -NoWatchtower
# .\install.ps1 -WatchtowerInterval 3600 # .\install.ps1 -WatchtowerInterval 3600
# .\install.ps1 -Variant cuda
# .\install.ps1 -Variant cuda -GpuCount all
# #
# Handles two cases automatically: # Handles two cases automatically:
# 1. Fresh install — no prior SurfSense data detected # 1. Fresh install — no prior SurfSense data detected
@ -17,7 +19,11 @@
param( param(
[switch]$NoWatchtower, [switch]$NoWatchtower,
[int]$WatchtowerInterval = 86400 [int]$WatchtowerInterval = 86400,
[ValidateSet("cpu", "cuda", "cuda126")]
[string]$Variant,
[string]$GpuCount,
[switch]$Quiet
) )
$ErrorActionPreference = 'Stop' $ErrorActionPreference = 'Stop'
@ -34,6 +40,11 @@ $MigrationMode = $false
$SetupWatchtower = -not $NoWatchtower $SetupWatchtower = -not $NoWatchtower
$WatchtowerContainer = "watchtower" $WatchtowerContainer = "watchtower"
if ($GpuCount -and $GpuCount -notmatch '^([0-9]+|all)$') {
Write-Host "[SurfSense] ERROR: Invalid -GpuCount '$GpuCount'. Use a number or 'all'." -ForegroundColor Red
exit 1
}
# ── Output helpers ────────────────────────────────────────────────────────── # ── Output helpers ──────────────────────────────────────────────────────────
function Write-Info { param([string]$Msg) Write-Host "[SurfSense] " -ForegroundColor Cyan -NoNewline; Write-Host $Msg } function Write-Info { param([string]$Msg) Write-Host "[SurfSense] " -ForegroundColor Cyan -NoNewline; Write-Host $Msg }
@ -42,6 +53,27 @@ function Write-Warn { param([string]$Msg) Write-Host "[SurfSense] " -Foregrou
function Write-Step { param([string]$Msg) Write-Host "`n-- $Msg" -ForegroundColor Cyan } function Write-Step { param([string]$Msg) Write-Host "`n-- $Msg" -ForegroundColor Cyan }
function Write-Err { param([string]$Msg) Write-Host "[SurfSense] ERROR: $Msg" -ForegroundColor Red; exit 1 } function Write-Err { param([string]$Msg) Write-Host "[SurfSense] ERROR: $Msg" -ForegroundColor Red; exit 1 }
function Show-Banner {
Write-Host ""
Write-Host @"
"@ -ForegroundColor White
Write-Host " OSS Alternative to NotebookLM for Teams" -ForegroundColor Yellow
Write-Host ("=" * 62) -ForegroundColor Cyan
Write-Info "This installer will create $InstallDir\ and start SurfSense with Docker Compose."
}
Show-Banner
function Invoke-NativeSafe { function Invoke-NativeSafe {
param([scriptblock]$Command) param([scriptblock]$Command)
$previousErrorActionPreference = $ErrorActionPreference $previousErrorActionPreference = $ErrorActionPreference
@ -53,6 +85,28 @@ function Invoke-NativeSafe {
} }
} }
function Resolve-WatchtowerPreference {
if ($NoWatchtower -or $Quiet -or -not [Environment]::UserInteractive) {
return
}
Write-Host ""
Write-Host "Automatic updates" -ForegroundColor Cyan
$choice = Read-Host "Enable automatic daily updates with Watchtower? (may download several GB in the background) [Y/n]"
switch ($choice) {
"" { $script:SetupWatchtower = $true }
{ $_ -match '^(?i)y(es)?$' } { $script:SetupWatchtower = $true }
{ $_ -match '^(?i)n(o)?$' } { $script:SetupWatchtower = $false }
default {
Write-Warn "Unrecognized choice '$choice'; enabling Watchtower by default. Use -NoWatchtower to skip it."
$script:SetupWatchtower = $true
}
}
}
Resolve-WatchtowerPreference
# ── Pre-flight checks ────────────────────────────────────────────────────── # ── Pre-flight checks ──────────────────────────────────────────────────────
Write-Step "Checking prerequisites" Write-Step "Checking prerequisites"
@ -97,143 +151,11 @@ function Wait-ForPostgres {
Write-Ok "PostgreSQL is ready." Write-Ok "PostgreSQL is ready."
} }
# ── Stack health helpers ──────────────────────────────────────────────────── # ── Stack startup helper ────────────────────────────────────────────────────
function Get-ComposeServices {
Push-Location $InstallDir
try {
$raw = Invoke-NativeSafe { docker compose ps -a --format json 2>$null }
} finally {
Pop-Location
}
if ([string]::IsNullOrWhiteSpace($raw)) { return @() }
# Compose v2.21+ emits a JSON array; older versions emit one object per line.
try {
$parsed = $raw | ConvertFrom-Json
if ($parsed -is [System.Collections.IEnumerable] -and -not ($parsed -is [string])) {
return @($parsed)
}
return @($parsed)
} catch {
$services = @()
foreach ($line in ($raw -split "`r?`n")) {
$line = $line.Trim()
if (-not $line) { continue }
try { $services += ($line | ConvertFrom-Json) } catch { }
}
return $services
}
}
function Wait-StackHealthy {
param([int]$TimeoutSec = 300)
$deadline = (Get-Date).AddSeconds($TimeoutSec)
$lastReport = ""
while ((Get-Date) -lt $deadline) {
$services = Get-ComposeServices
if (-not $services -or $services.Count -eq 0) {
Start-Sleep -Seconds 3
continue
}
$bad = @()
$waiting = @()
$good = @()
foreach ($svc in $services) {
$name = $svc.Service
$state = $svc.State
$health = if ($svc.PSObject.Properties.Name -contains 'Health') { $svc.Health } else { '' }
$exit = if ($svc.PSObject.Properties.Name -contains 'ExitCode') { $svc.ExitCode } else { $null }
if ($name -eq 'migrations') {
if ($state -eq 'exited' -and $exit -eq 0) { $good += $name }
elseif ($state -eq 'exited') { $bad += "${name} (exit=${exit})" }
else { $waiting += "${name} (${state})" }
continue
}
if ($state -eq 'running') {
if ([string]::IsNullOrEmpty($health) -or $health -eq 'healthy') {
$good += $name
} elseif ($health -eq 'starting') {
$waiting += "${name} (starting)"
} elseif ($health -eq 'unhealthy') {
$bad += "${name} (unhealthy)"
} else {
$waiting += "${name} (${health})"
}
} elseif ($state -eq 'restarting') {
$bad += "${name} (restarting)"
} elseif ($state -eq 'exited') {
$bad += "${name} (exited, code=${exit})"
} else {
$waiting += "${name} (${state})"
}
}
if ($bad.Count -gt 0) {
return @{ Ok = $false; Reason = 'failure'; Bad = $bad; Waiting = $waiting; Good = $good }
}
if ($waiting.Count -eq 0) {
return @{ Ok = $true; Reason = 'all_healthy'; Good = $good }
}
$report = "Waiting on: " + ($waiting -join ', ')
if ($report -ne $lastReport) {
Write-Info $report
$lastReport = $report
}
Start-Sleep -Seconds 5
}
return @{ Ok = $false; Reason = 'timeout'; Bad = $bad; Waiting = $waiting; Good = $good }
}
function Test-StaleZeroCacheVolume {
$raw = Invoke-NativeSafe { docker volume ls --format '{{.Name}}' 2>$null }
if ([string]::IsNullOrWhiteSpace($raw)) { return $false }
$names = $raw -split "`r?`n" | ForEach-Object { $_.Trim() } | Where-Object { $_ }
$hasZeroCache = $names -contains 'surfsense-zero-cache'
$hasZeroInit = $names -contains 'surfsense-zero-init'
# Pre-fix installs created surfsense-zero-cache but never surfsense-zero-init.
# Such a volume may hold a half-initialized SQLite replica from an earlier
# crash-loop. Wiping it forces zero-cache to do a fresh initial sync.
return ($hasZeroCache -and -not $hasZeroInit)
}
function Invoke-StaleZeroCacheCleanup {
if (-not (Test-StaleZeroCacheVolume)) { return }
Write-Warn "Detected pre-existing 'surfsense-zero-cache' volume from an install that"
Write-Warn "predates the migrations-service fix. It may contain a half-initialized"
Write-Warn "SQLite replica that would block zero-cache from starting."
Write-Warn "The volume will be removed in 5 seconds; press Ctrl+C to cancel."
Start-Sleep -Seconds 5
Push-Location $InstallDir
Invoke-NativeSafe { docker compose down --remove-orphans 2>$null } | Out-Null
Pop-Location
Invoke-NativeSafe { docker volume rm surfsense-zero-cache 2>$null } | Out-Null
Write-Ok "Removed surfsense-zero-cache volume; zero-cache will re-sync on next start."
}
function Write-Err-NoExit {
param([string]$Message)
Write-Host "[ERROR] $Message" -ForegroundColor Red
}
function Invoke-StackFailureReport { function Invoke-StackFailureReport {
param([hashtable]$Result)
Write-Host "" Write-Host ""
Write-Err-NoExit "Stack did not reach a healthy state." Write-Host "[ERROR] Stack did not reach a healthy state." -ForegroundColor Red
if ($Result.Bad.Count -gt 0) { Write-Host (" Failed: " + ($Result.Bad -join ', ')) }
if ($Result.Waiting.Count -gt 0) { Write-Host (" Stuck: " + ($Result.Waiting -join ', ')) }
Write-Host "" Write-Host ""
Write-Info "Recent logs from migrations / zero-cache / backend:" Write-Info "Recent logs from migrations / zero-cache / backend:"
Push-Location $InstallDir Push-Location $InstallDir
@ -247,11 +169,151 @@ function Invoke-StackFailureReport {
Write-Host "Recovery hints:" -ForegroundColor Yellow Write-Host "Recovery hints:" -ForegroundColor Yellow
Write-Host " 1. Inspect migrations: cd $InstallDir; docker compose logs migrations" Write-Host " 1. Inspect migrations: cd $InstallDir; docker compose logs migrations"
Write-Host " 2. Verify publication: cd $InstallDir; docker compose exec db psql -U surfsense -d surfsense -c 'SELECT pubname FROM pg_publication;'" Write-Host " 2. Verify publication: cd $InstallDir; docker compose exec db psql -U surfsense -d surfsense -c 'SELECT pubname FROM pg_publication;'"
Write-Host " 3. Hard reset zero db: cd $InstallDir; docker compose down; docker volume rm surfsense-zero-cache; docker compose up -d" Write-Host " 3. Hard reset zero db: cd $InstallDir; docker compose down; docker volume rm surfsense-zero-cache; docker compose up -d --wait"
Write-Host "" Write-Host ""
exit 1 exit 1
} }
function Invoke-ComposeUpWait {
Push-Location $InstallDir
try {
Invoke-NativeSafe { docker compose up -d --wait }
} finally {
Pop-Location
}
if ($LASTEXITCODE -ne 0) {
Invoke-StackFailureReport
}
}
# ── Variant and .env helpers ────────────────────────────────────────────────
function Set-EnvValue {
param([string]$Path, [string]$Key, [string]$Value)
$lines = @()
if (Test-Path $Path) {
$lines = @(Get-Content $Path)
}
$updated = $false
$newLines = foreach ($line in $lines) {
if ($line -match "^$([regex]::Escape($Key))=") {
$updated = $true
"$Key=$Value"
} else {
$line
}
}
if (-not $updated) {
$newLines += "$Key=$Value"
}
Set-Content -Path $Path -Value $newLines
}
function Remove-EnvValue {
param([string]$Path, [string]$Key)
if (-not (Test-Path $Path)) { return }
$newLines = Get-Content $Path | Where-Object { $_ -notmatch "^$([regex]::Escape($Key))=" }
Set-Content -Path $Path -Value $newLines
}
function Test-NvidiaGpu {
if (-not (Get-Command nvidia-smi -ErrorAction SilentlyContinue)) { return $false }
Invoke-NativeSafe { nvidia-smi *>$null } | Out-Null
return ($LASTEXITCODE -eq 0)
}
function Test-NvidiaRuntime {
$info = Invoke-NativeSafe { docker info 2>$null }
if ($info -match 'nvidia') { return $true }
if (Get-Command nvidia-ctk -ErrorAction SilentlyContinue) { return $true }
if (Get-Command nvidia-container-runtime -ErrorAction SilentlyContinue) { return $true }
return $false
}
function Get-RecommendedVariant {
$driver = (Invoke-NativeSafe { nvidia-smi --query-gpu=driver_version --format=csv,noheader 2>$null } | Select-Object -First 1)
$major = 0
if ($driver -match '^(\d+)') {
$major = [int]$Matches[1]
}
if ($major -gt 0 -and $major -lt 570) {
return "cuda126"
}
return "cuda"
}
function Resolve-Variant {
$hasGpu = Test-NvidiaGpu
$hasRuntime = $false
$recommended = "cpu"
if ($hasGpu) {
$recommended = Get-RecommendedVariant
$hasRuntime = Test-NvidiaRuntime
}
if ($Variant) {
if ($Variant -eq "cpu") { return "cpu" }
if (-not $hasGpu) {
Write-Warn "No NVIDIA GPU detected; falling back to CPU variant."
return "cpu"
}
if (-not $hasRuntime) {
Write-Warn "NVIDIA GPU detected, but NVIDIA Container Toolkit was not detected; falling back to CPU variant."
Write-Warn "Install the toolkit before enabling SurfSense GPU acceleration."
return "cpu"
}
return $Variant
}
if ($hasGpu -and -not $hasRuntime) {
Write-Warn "NVIDIA GPU detected, but NVIDIA Container Toolkit was not detected; using CPU variant."
}
if ($hasGpu -and $hasRuntime -and -not $Quiet -and [Environment]::UserInteractive) {
Write-Host ""
Write-Host "SurfSense detected an NVIDIA GPU." -ForegroundColor Cyan
$choice = Read-Host "Use GPU acceleration? [Y/n]"
switch ($choice) {
"" { return $recommended }
{ $_ -match '^(?i)y(es)?$' } { return $recommended }
{ $_ -match '^(?i)n(o)?$' } { return "cpu" }
default {
Write-Warn "Unrecognized choice '$choice'; using CPU variant."
return "cpu"
}
}
}
return "cpu"
}
function Set-VariantEnv {
param([string]$Path, [string]$SelectedVariant, [bool]$AllowExistingUpdate)
if ((Test-Path $Path) -and -not $AllowExistingUpdate) {
Write-Warn ".env already exists - keeping your existing configuration."
Write-Info "To change variants later, edit SURFSENSE_VARIANT and COMPOSE_FILE in $Path, then run docker compose up -d --wait."
return
}
if ($SelectedVariant -eq "cpu") {
Set-EnvValue -Path $Path -Key "SURFSENSE_VARIANT" -Value ""
Remove-EnvValue -Path $Path -Key "COMPOSE_FILE"
Remove-EnvValue -Path $Path -Key "SURFSENSE_GPU_COUNT"
} else {
Set-EnvValue -Path $Path -Key "SURFSENSE_VARIANT" -Value $SelectedVariant
Set-EnvValue -Path $Path -Key "COMPOSE_FILE" -Value "docker-compose.yml;docker-compose.gpu.yml"
if ($GpuCount) {
Set-EnvValue -Path $Path -Key "SURFSENSE_GPU_COUNT" -Value $GpuCount
}
}
Remove-EnvValue -Path $Path -Key "COMPOSE_PROFILES"
}
$SelectedVariant = Resolve-Variant
# ── Download files ────────────────────────────────────────────────────────── # ── Download files ──────────────────────────────────────────────────────────
Write-Step "Downloading SurfSense files" Write-Step "Downloading SurfSense files"
@ -262,6 +324,7 @@ New-Item -ItemType Directory -Path "$InstallDir\searxng" -Force | Out-Null
$Files = @( $Files = @(
@{ Src = "docker/docker-compose.yml"; Dest = "docker-compose.yml" } @{ Src = "docker/docker-compose.yml"; Dest = "docker-compose.yml" }
@{ Src = "docker/docker-compose.gpu.yml"; Dest = "docker-compose.gpu.yml" }
@{ Src = "docker/.env.example"; Dest = ".env.example" } @{ Src = "docker/.env.example"; Dest = ".env.example" }
@{ Src = "docker/postgresql.conf"; Dest = "postgresql.conf" } @{ Src = "docker/postgresql.conf"; Dest = "postgresql.conf" }
@{ Src = "docker/scripts/migrate-database.ps1"; Dest = "scripts/migrate-database.ps1" } @{ Src = "docker/scripts/migrate-database.ps1"; Dest = "scripts/migrate-database.ps1" }
@ -339,15 +402,19 @@ if (-not (Test-Path $envPath)) {
$content = $content -replace 'SECRET_KEY=replace_me_with_a_random_string', "SECRET_KEY=$SecretKey" $content = $content -replace 'SECRET_KEY=replace_me_with_a_random_string', "SECRET_KEY=$SecretKey"
Set-Content -Path $envPath -Value $content -NoNewline Set-Content -Path $envPath -Value $content -NoNewline
Set-VariantEnv -Path $envPath -SelectedVariant $SelectedVariant -AllowExistingUpdate $false
Write-Info "Created $envPath" Write-Info "Created $envPath"
} else { } else {
Write-Warn ".env already exists - keeping your existing configuration." if ($PSBoundParameters.ContainsKey('Variant')) {
Set-VariantEnv -Path $envPath -SelectedVariant $SelectedVariant -AllowExistingUpdate $true
Write-Info "Updated SurfSense image variant in existing $envPath"
} else {
Set-VariantEnv -Path $envPath -SelectedVariant $SelectedVariant -AllowExistingUpdate $false
}
} }
# ── Start containers ──────────────────────────────────────────────────────── # ── Start containers ────────────────────────────────────────────────────────
Invoke-StaleZeroCacheCleanup
if ($MigrationMode) { if ($MigrationMode) {
$envContent = Get-Content $envPath $envContent = Get-Content $envPath
$DbUser = ($envContent | Select-String '^DB_USER=' | ForEach-Object { ($_ -split '=',2)[1].Trim('"') }) | Select-Object -First 1 $DbUser = ($envContent | Select-String '^DB_USER=' | ForEach-Object { ($_ -split '=',2)[1].Trim('"') }) | Select-Object -First 1
@ -405,31 +472,15 @@ if ($MigrationMode) {
} }
Write-Step "Starting all SurfSense services" Write-Step "Starting all SurfSense services"
Push-Location $InstallDir Invoke-ComposeUpWait
Invoke-NativeSafe { docker compose up -d } Write-Ok "All services started and healthy."
Pop-Location
Write-Ok "All containers started; waiting for stack to become healthy..."
$waitResult = Wait-StackHealthy -TimeoutSec 300
if (-not $waitResult.Ok) {
Invoke-StackFailureReport -Result $waitResult
}
Write-Ok "All services healthy."
Remove-Item $KeyFile -ErrorAction SilentlyContinue Remove-Item $KeyFile -ErrorAction SilentlyContinue
} else { } else {
Write-Step "Starting SurfSense" Write-Step "Starting SurfSense"
Push-Location $InstallDir Invoke-ComposeUpWait
Invoke-NativeSafe { docker compose up -d } Write-Ok "All services started and healthy."
Pop-Location
Write-Ok "All containers started; waiting for stack to become healthy..."
$waitResult = Wait-StackHealthy -TimeoutSec 300
if (-not $waitResult.Ok) {
Invoke-StackFailureReport -Result $waitResult
}
Write-Ok "All services healthy."
} }
# ── Watchtower (auto-update) ──────────────────────────────────────────────── # ── Watchtower (auto-update) ────────────────────────────────────────────────
@ -461,7 +512,7 @@ if ($SetupWatchtower) {
if ($LASTEXITCODE -eq 0) { if ($LASTEXITCODE -eq 0) {
Write-Ok "Watchtower started - labeled SurfSense containers will auto-update." Write-Ok "Watchtower started - labeled SurfSense containers will auto-update."
} else { } else {
Write-Warn "Could not start Watchtower. You can set it up manually or use: docker compose pull; docker compose up -d" Write-Warn "Could not start Watchtower. You can set it up manually or use: docker compose pull; docker compose up -d --wait"
} }
} }
} else { } else {
@ -471,39 +522,26 @@ if ($SetupWatchtower) {
# ── Done ──────────────────────────────────────────────────────────────────── # ── Done ────────────────────────────────────────────────────────────────────
Write-Host "" Write-Host ""
Write-Host @"
.d8888b. .d888 .d8888b.
d88P Y88b d88P" d88P Y88b
Y88b. 888 Y88b.
"Y888b. 888 888 888d888 888888 "Y888b. .d88b. 88888b. .d8888b .d88b.
"Y88b. 888 888 888P" 888 "Y88b. d8P Y8b 888 "88b 88K d8P Y8b
"888 888 888 888 888 "888 88888888 888 888 "Y8888b. 88888888
Y88b d88P Y88b 888 888 888 Y88b d88P Y8b. 888 888 X88 Y8b.
"Y8888P" "Y88888 888 888 "Y8888P" "Y8888 888 888 88888P' "Y8888
"@ -ForegroundColor White
$versionDisplay = (Get-Content $envPath | Select-String '^SURFSENSE_VERSION=' | ForEach-Object { ($_ -split '=',2)[1].Trim('"') }) | Select-Object -First 1 $versionDisplay = (Get-Content $envPath | Select-String '^SURFSENSE_VERSION=' | ForEach-Object { ($_ -split '=',2)[1].Trim('"') }) | Select-Object -First 1
if (-not $versionDisplay) { $versionDisplay = "latest" } if (-not $versionDisplay) { $versionDisplay = "latest" }
Write-Host " OSS Alternative to NotebookLM for Teams [$versionDisplay]" -ForegroundColor Yellow $variantDisplay = (Get-Content $envPath | Select-String '^SURFSENSE_VARIANT=' | ForEach-Object { ($_ -split '=',2)[1].Trim('"') }) | Select-Object -First 1
Write-Host ("=" * 62) -ForegroundColor Cyan if (-not $variantDisplay) { $variantDisplay = "cpu" }
Write-Host "" $wtHours = [math]::Floor($WatchtowerInterval / 3600)
Write-Step "SurfSense is now installed [$versionDisplay]"
Write-Info " Frontend: http://localhost:3929" Write-Info " Frontend: http://localhost:3929"
Write-Info " Backend: http://localhost:8929" Write-Info " Backend: http://localhost:8929"
Write-Info " API Docs: http://localhost:8929/docs" Write-Info " API Docs: http://localhost:8929/docs"
Write-Info "" Write-Info ""
Write-Info " Config: $InstallDir\.env" Write-Info " Config: $InstallDir\.env"
Write-Info " Variant: $variantDisplay"
Write-Info " Logs: cd $InstallDir; docker compose logs -f" Write-Info " Logs: cd $InstallDir; docker compose logs -f"
Write-Info " Stop: cd $InstallDir; docker compose down" Write-Info " Stop: cd $InstallDir; docker compose down"
Write-Info " Update: cd $InstallDir; docker compose pull; docker compose up -d" Write-Info " Update: cd $InstallDir; docker compose pull; docker compose up -d --wait"
Write-Info "" Write-Info ""
if ($SetupWatchtower) { if ($SetupWatchtower) {
Write-Info " Watchtower: auto-updates every ${wtHours}h (stop: docker rm -f $WatchtowerContainer)" Write-Info " Watchtower: auto-updates every ${wtHours}h (disable: docker rm -f $WatchtowerContainer)"
} else { } else {
Write-Warn " Watchtower skipped. For auto-updates, re-run without -NoWatchtower." Write-Warn " Watchtower skipped. For auto-updates, re-run without -NoWatchtower."
} }

View file

@ -8,6 +8,11 @@
# Flags: # Flags:
# --no-watchtower Skip automatic Watchtower setup # --no-watchtower Skip automatic Watchtower setup
# --watchtower-interval=SECS Check interval in seconds (default: 86400 = 24h) # --watchtower-interval=SECS Check interval in seconds (default: 86400 = 24h)
# --variant=cpu|cuda|cuda126 Select backend image variant
# --gpu Alias for --variant=cuda
# --cpu Alias for --variant=cpu
# --gpu-count=N|all Number of GPUs to reserve when GPU is enabled
# --quiet Skip interactive prompts
# #
# Handles two cases automatically: # Handles two cases automatically:
# 1. Fresh install — no prior SurfSense data detected # 1. Fresh install — no prior SurfSense data detected
@ -35,12 +40,22 @@ MIGRATION_MODE=false
SETUP_WATCHTOWER=true SETUP_WATCHTOWER=true
WATCHTOWER_INTERVAL=86400 WATCHTOWER_INTERVAL=86400
WATCHTOWER_CONTAINER="watchtower" WATCHTOWER_CONTAINER="watchtower"
WATCHTOWER_EXPLICIT=false
REQUESTED_VARIANT=""
VARIANT_EXPLICIT=false
GPU_COUNT=""
QUIET=false
# ── Parse flags ───────────────────────────────────────────────────────────── # ── Parse flags ─────────────────────────────────────────────────────────────
for arg in "$@"; do for arg in "$@"; do
case "$arg" in case "$arg" in
--no-watchtower) SETUP_WATCHTOWER=false ;; --no-watchtower) SETUP_WATCHTOWER=false; WATCHTOWER_EXPLICIT=true ;;
--watchtower-interval=*) WATCHTOWER_INTERVAL="${arg#*=}" ;; --watchtower-interval=*) WATCHTOWER_INTERVAL="${arg#*=}" ;;
--variant=*) REQUESTED_VARIANT="${arg#*=}"; VARIANT_EXPLICIT=true ;;
--gpu) REQUESTED_VARIANT="cuda"; VARIANT_EXPLICIT=true ;;
--cpu) REQUESTED_VARIANT="cpu"; VARIANT_EXPLICIT=true ;;
--gpu-count=*) GPU_COUNT="${arg#*=}" ;;
--quiet) QUIET=true ;;
esac esac
done done
@ -57,6 +72,57 @@ warn() { printf "${YELLOW}[SurfSense]${NC} %s\n" "$1"; }
error() { printf "${RED}[SurfSense]${NC} ERROR: %s\n" "$1" >&2; exit 1; } error() { printf "${RED}[SurfSense]${NC} ERROR: %s\n" "$1" >&2; exit 1; }
step() { printf "\n${BOLD}${CYAN}── %s${NC}\n" "$1"; } step() { printf "\n${BOLD}${CYAN}── %s${NC}\n" "$1"; }
show_banner() {
echo ""
printf '\033[1;37m'
cat << 'EOF'
███████╗██╗ ██╗██████╗ ███████╗███████╗███████╗███╗ ██╗███████╗███████╗
██╔════╝██║ ██║██╔══██╗██╔════╝██╔════╝██╔════╝████╗ ██║██╔════╝██╔════╝
███████╗██║ ██║██████╔╝█████╗ ███████╗█████╗ ██╔██╗ ██║███████╗█████╗
╚════██║██║ ██║██╔══██╗██╔══╝ ╚════██║██╔══╝ ██║╚██╗██║╚════██║██╔══╝
███████║╚██████╔╝██║ ██║██║ ███████║███████╗██║ ╚████║███████║███████╗
╚══════╝ ╚═════╝ ╚═╝ ╚═╝╚═╝ ╚══════╝╚══════╝╚═╝ ╚═══╝╚══════╝╚══════╝
EOF
printf "${YELLOW} OSS Alternative to NotebookLM for Teams${NC}\n"
printf "${CYAN}══════════════════════════════════════════════════════════════${NC}\n"
info "This installer will create ${INSTALL_DIR}/ and start SurfSense with Docker Compose."
}
show_banner
case "${REQUESTED_VARIANT}" in
""|cpu|cuda|cuda126) ;;
*) error "Invalid --variant='${REQUESTED_VARIANT}'. Use cpu, cuda, or cuda126." ;;
esac
if [[ -n "${GPU_COUNT}" && ! "${GPU_COUNT}" =~ ^([0-9]+|all)$ ]]; then
error "Invalid --gpu-count='${GPU_COUNT}'. Use a number or 'all'."
fi
resolve_watchtower_preference() {
if $WATCHTOWER_EXPLICIT || $QUIET || [[ ! -r /dev/tty || ! -w /dev/tty ]]; then
return 0
fi
local choice
echo "" > /dev/tty
printf "${BOLD}${CYAN}Automatic updates${NC}\n" > /dev/tty
printf "Enable automatic daily updates with Watchtower? (may download several GB in the background) [Y/n]: " > /dev/tty
read -r choice < /dev/tty || choice=""
case "$choice" in
""|[Yy]|[Yy][Ee][Ss]) SETUP_WATCHTOWER=true ;;
[Nn]|[Nn][Oo]) SETUP_WATCHTOWER=false ;;
*) warn "Unrecognized choice '${choice}', enabling Watchtower by default. Use --no-watchtower to skip it." >&2; SETUP_WATCHTOWER=true ;;
esac
}
resolve_watchtower_preference
# ── Pre-flight checks ──────────────────────────────────────────────────────── # ── Pre-flight checks ────────────────────────────────────────────────────────
step "Checking prerequisites" step "Checking prerequisites"
@ -97,126 +163,11 @@ wait_for_pg() {
success "PostgreSQL is ready." success "PostgreSQL is ready."
} }
# ── Stack health helpers ───────────────────────────────────────────────────── # ── Stack startup helper ─────────────────────────────────────────────────────
# Enumerate compose services for project `surfsense` as `service|state|health|exitcode`
# lines. Uses `docker inspect` so we don't depend on `jq`, `python3`, or the
# exact ordering of fields in `docker compose ps --format json` output.
get_compose_services() {
local containers
containers=$(docker ps -a --filter "label=com.docker.compose.project=surfsense" --format '{{.Names}}' 2>/dev/null) || true
[[ -z "$containers" ]] && return 0
while IFS= read -r container; do
[[ -z "$container" ]] && continue
local svc state health code
svc=$(docker inspect -f '{{index .Config.Labels "com.docker.compose.service"}}' "$container" 2>/dev/null || echo "")
state=$(docker inspect -f '{{.State.Status}}' "$container" 2>/dev/null || echo "unknown")
health=$(docker inspect -f '{{if .State.Health}}{{.State.Health.Status}}{{end}}' "$container" 2>/dev/null || echo "")
code=$(docker inspect -f '{{.State.ExitCode}}' "$container" 2>/dev/null || echo "")
[[ -z "$svc" ]] && continue
printf '%s|%s|%s|%s\n' "$svc" "$state" "$health" "$code"
done <<< "$containers"
}
# Globals populated by wait_stack_healthy / consumed by stack_failure_report.
STACK_BAD=()
STACK_WAITING=()
STACK_GOOD=()
STACK_TIMEOUT=false
wait_stack_healthy() {
local timeout_sec=${1:-300}
local deadline=$(($(date +%s) + timeout_sec))
local last_report=""
local bad=()
local waiting=()
local good=()
while [[ $(date +%s) -lt $deadline ]]; do
local lines
lines=$(get_compose_services)
if [[ -z "$lines" ]]; then
sleep 3
continue
fi
bad=()
waiting=()
good=()
while IFS='|' read -r name state health code; do
[[ -z "$name" ]] && continue
if [[ "$name" == "migrations" ]]; then
if [[ "$state" == "exited" && "$code" == "0" ]]; then
good+=("$name")
elif [[ "$state" == "exited" ]]; then
bad+=("${name} (exit=${code})")
else
waiting+=("${name} (${state})")
fi
continue
fi
if [[ "$state" == "running" ]]; then
if [[ -z "$health" || "$health" == "healthy" ]]; then
good+=("$name")
elif [[ "$health" == "starting" ]]; then
waiting+=("${name} (starting)")
elif [[ "$health" == "unhealthy" ]]; then
bad+=("${name} (unhealthy)")
else
waiting+=("${name} (${health})")
fi
elif [[ "$state" == "restarting" ]]; then
bad+=("${name} (restarting)")
elif [[ "$state" == "exited" ]]; then
bad+=("${name} (exited, code=${code})")
else
waiting+=("${name} (${state})")
fi
done <<< "$lines"
if (( ${#bad[@]} > 0 )); then
STACK_BAD=("${bad[@]}")
STACK_WAITING=("${waiting[@]}")
STACK_GOOD=("${good[@]}")
return 1
fi
if (( ${#waiting[@]} == 0 )); then
STACK_GOOD=("${good[@]}")
return 0
fi
local report="Waiting on: ${waiting[*]}"
if [[ "$report" != "$last_report" ]]; then
info "$report"
last_report="$report"
fi
sleep 5
done
# bad/waiting/good are declared at function scope so referencing them is
# safe even if the polling loop never executed its body.
STACK_BAD=()
[[ ${#bad[@]} -gt 0 ]] && STACK_BAD=("${bad[@]}")
STACK_WAITING=()
[[ ${#waiting[@]} -gt 0 ]] && STACK_WAITING=("${waiting[@]}")
STACK_GOOD=()
[[ ${#good[@]} -gt 0 ]] && STACK_GOOD=("${good[@]}")
STACK_TIMEOUT=true
return 1
}
stack_failure_report() { stack_failure_report() {
echo "" echo ""
echo -e "\033[31m[ERROR]\033[0m Stack did not reach a healthy state." echo -e "\033[31m[ERROR]\033[0m Stack did not reach a healthy state."
if (( ${#STACK_BAD[@]} > 0 )) && [[ -n "${STACK_BAD[0]}" ]]; then
echo " Failed: ${STACK_BAD[*]}"
fi
if (( ${#STACK_WAITING[@]} > 0 )) && [[ -n "${STACK_WAITING[0]}" ]]; then
echo " Stuck: ${STACK_WAITING[*]}"
fi
echo "" echo ""
info "Recent logs from migrations / zero-cache / backend:" info "Recent logs from migrations / zero-cache / backend:"
(cd "${INSTALL_DIR}" && ${DC} logs --tail=60 migrations zero-cache backend 2>&1) || true (cd "${INSTALL_DIR}" && ${DC} logs --tail=60 migrations zero-cache backend 2>&1) || true
@ -224,36 +175,158 @@ stack_failure_report() {
echo "Recovery hints:" echo "Recovery hints:"
echo " 1. Inspect migrations: cd ${INSTALL_DIR} && ${DC} logs migrations" echo " 1. Inspect migrations: cd ${INSTALL_DIR} && ${DC} logs migrations"
echo " 2. Verify publication: cd ${INSTALL_DIR} && ${DC} exec db psql -U surfsense -d surfsense -c 'SELECT pubname FROM pg_publication;'" echo " 2. Verify publication: cd ${INSTALL_DIR} && ${DC} exec db psql -U surfsense -d surfsense -c 'SELECT pubname FROM pg_publication;'"
echo " 3. Hard reset zero db: cd ${INSTALL_DIR} && ${DC} down && docker volume rm surfsense-zero-cache && ${DC} up -d" echo " 3. Hard reset zero db: cd ${INSTALL_DIR} && ${DC} down && docker volume rm surfsense-zero-cache && ${DC} up -d --wait"
echo "" echo ""
exit 1 exit 1
} }
# True if `surfsense-zero-cache` exists but `surfsense-zero-init` does not. compose_up_wait() {
# That signals an install that predates the migrations-service fix; the old local service="${1:-}"
# replica may be half-initialized and would block zero-cache on next start. if [[ -n "$service" ]]; then
test_stale_zero_cache_volume() { (cd "${INSTALL_DIR}" && ${DC} up -d --wait "$service") < /dev/null
local has_zc has_zi else
has_zc=$(docker volume ls --format '{{.Name}}' 2>/dev/null | grep -Fx 'surfsense-zero-cache' || true) (cd "${INSTALL_DIR}" && ${DC} up -d --wait) < /dev/null
has_zi=$(docker volume ls --format '{{.Name}}' 2>/dev/null | grep -Fx 'surfsense-zero-init' || true) fi
[[ -n "$has_zc" && -z "$has_zi" ]]
} }
invoke_stale_zero_cache_cleanup() { # ── Variant and .env helpers ─────────────────────────────────────────────────
if ! test_stale_zero_cache_volume; then
set_env_value() {
local file="$1"
local key="$2"
local value="$3"
local tmp
tmp=$(mktemp)
if grep -q "^${key}=" "$file" 2>/dev/null; then
awk -v key="$key" -v value="$value" 'BEGIN { prefix = key "=" } $0 ~ "^" prefix { print prefix value; next } { print }' "$file" > "$tmp"
else
cp "$file" "$tmp"
printf '\n%s=%s\n' "$key" "$value" >> "$tmp"
fi
mv "$tmp" "$file"
}
remove_env_value() {
local file="$1"
local key="$2"
local tmp
tmp=$(mktemp)
awk -v key="$key" 'BEGIN { prefix = key "=" } $0 !~ "^" prefix { print }' "$file" > "$tmp"
mv "$tmp" "$file"
}
version_major() {
printf '%s' "$1" | cut -d. -f1
}
recommend_cuda_variant() {
local driver_version driver_major
driver_version=$(nvidia-smi --query-gpu=driver_version --format=csv,noheader 2>/dev/null | head -n 1 | tr -d '[:space:]' || true)
driver_major=$(version_major "$driver_version")
# CUDA 12.8 generally requires an R570+ driver. Use CUDA 12.6 as the
# compatibility fallback for older 12.x driver stacks and GPUs.
if [[ "$driver_major" =~ ^[0-9]+$ && "$driver_major" -lt 570 ]]; then
printf 'cuda126'
else
printf 'cuda'
fi
}
gpu_runtime_available() {
docker info 2>/dev/null | grep -qi 'nvidia' \
|| command -v nvidia-ctk >/dev/null 2>&1 \
|| command -v nvidia-container-runtime >/dev/null 2>&1
}
host_has_nvidia_gpu() {
command -v nvidia-smi >/dev/null 2>&1 && nvidia-smi >/dev/null 2>&1
}
resolve_variant() {
local detected_variant="cpu"
local has_gpu=false
local has_runtime=false
if host_has_nvidia_gpu; then
has_gpu=true
detected_variant=$(recommend_cuda_variant)
if gpu_runtime_available; then
has_runtime=true
fi
fi
if $VARIANT_EXPLICIT; then
if [[ "$REQUESTED_VARIANT" == "cpu" ]]; then
printf 'cpu'
return 0
fi
if ! $has_gpu; then
warn "No NVIDIA GPU detected; falling back to CPU variant." >&2
printf 'cpu'
return 0
fi
if ! $has_runtime; then
warn "NVIDIA GPU detected, but NVIDIA Container Toolkit was not detected; falling back to CPU variant." >&2
warn "Install the toolkit before enabling SurfSense GPU acceleration." >&2
printf 'cpu'
return 0
fi
printf '%s' "$REQUESTED_VARIANT"
return 0 return 0
fi fi
warn "Detected pre-existing 'surfsense-zero-cache' volume from an install that"
warn "predates the migrations-service fix. It may contain a half-initialized"
warn "SQLite replica that would block zero-cache from starting."
warn "The volume will be removed in 5 seconds; press Ctrl+C to cancel."
sleep 5
(cd "${INSTALL_DIR}" && ${DC} down --remove-orphans 2>/dev/null) || true if $has_gpu && ! $has_runtime; then
docker volume rm surfsense-zero-cache 2>/dev/null || true warn "NVIDIA GPU detected, but NVIDIA Container Toolkit was not detected; using CPU variant." >&2
success "Removed surfsense-zero-cache volume; zero-cache will re-sync on next start." fi
if $has_gpu && $has_runtime && ! $QUIET && [[ -r /dev/tty && -w /dev/tty ]]; then
local choice
echo "" > /dev/tty
printf "${BOLD}${CYAN}SurfSense detected an NVIDIA GPU.${NC}\n" > /dev/tty
printf "Use GPU acceleration? [Y/n]: " > /dev/tty
read -r choice < /dev/tty || choice=""
case "$choice" in
"") printf '%s' "$detected_variant" ;;
[Yy]|[Yy][Ee][Ss]) printf '%s' "$detected_variant" ;;
[Nn]|[Nn][Oo]) printf 'cpu' ;;
*) warn "Unrecognized choice '${choice}', using CPU variant." >&2; printf 'cpu' ;;
esac
return 0
fi
printf 'cpu'
} }
apply_variant_env() {
local env_file="$1"
local variant="$2"
local allow_existing_update="$3"
if [[ -f "$env_file" && "$allow_existing_update" != "true" ]]; then
warn ".env already exists — keeping your existing configuration."
info "To change variants later, edit SURFSENSE_VARIANT and COMPOSE_FILE in ${env_file}, then run ${DC} up -d --wait."
return 0
fi
if [[ "$variant" == "cpu" ]]; then
set_env_value "$env_file" "SURFSENSE_VARIANT" ""
remove_env_value "$env_file" "COMPOSE_FILE"
remove_env_value "$env_file" "SURFSENSE_GPU_COUNT"
else
set_env_value "$env_file" "SURFSENSE_VARIANT" "$variant"
set_env_value "$env_file" "COMPOSE_FILE" "docker-compose.yml:docker-compose.gpu.yml"
if [[ -n "$GPU_COUNT" ]]; then
set_env_value "$env_file" "SURFSENSE_GPU_COUNT" "$GPU_COUNT"
fi
fi
remove_env_value "$env_file" "COMPOSE_PROFILES"
}
SELECTED_VARIANT=$(resolve_variant)
# ── Download files ─────────────────────────────────────────────────────────── # ── Download files ───────────────────────────────────────────────────────────
step "Downloading SurfSense files" step "Downloading SurfSense files"
@ -263,6 +336,7 @@ mkdir -p "${INSTALL_DIR}/searxng"
FILES=( FILES=(
"docker/docker-compose.yml:docker-compose.yml" "docker/docker-compose.yml:docker-compose.yml"
"docker/docker-compose.gpu.yml:docker-compose.gpu.yml"
"docker/.env.example:.env.example" "docker/.env.example:.env.example"
"docker/postgresql.conf:postgresql.conf" "docker/postgresql.conf:postgresql.conf"
"docker/scripts/migrate-database.sh:scripts/migrate-database.sh" "docker/scripts/migrate-database.sh:scripts/migrate-database.sh"
@ -336,15 +410,19 @@ if [ ! -f "${INSTALL_DIR}/.env" ]; then
else else
sed -i "s|SECRET_KEY=replace_me_with_a_random_string|SECRET_KEY=${SECRET_KEY}|" "${INSTALL_DIR}/.env" sed -i "s|SECRET_KEY=replace_me_with_a_random_string|SECRET_KEY=${SECRET_KEY}|" "${INSTALL_DIR}/.env"
fi fi
apply_variant_env "${INSTALL_DIR}/.env" "$SELECTED_VARIANT" "false"
info "Created ${INSTALL_DIR}/.env" info "Created ${INSTALL_DIR}/.env"
else else
warn ".env already exists — keeping your existing configuration." if $VARIANT_EXPLICIT; then
apply_variant_env "${INSTALL_DIR}/.env" "$SELECTED_VARIANT" "true"
info "Updated SurfSense image variant in existing ${INSTALL_DIR}/.env"
else
apply_variant_env "${INSTALL_DIR}/.env" "$SELECTED_VARIANT" "false"
fi
fi fi
# ── Start containers ───────────────────────────────────────────────────────── # ── Start containers ─────────────────────────────────────────────────────────
invoke_stale_zero_cache_cleanup
if $MIGRATION_MODE; then if $MIGRATION_MODE; then
# Read DB credentials from .env (fall back to defaults from docker-compose.yml) # Read DB credentials from .env (fall back to defaults from docker-compose.yml)
DB_USER=$(grep '^DB_USER=' "${INSTALL_DIR}/.env" 2>/dev/null | cut -d= -f2 | tr -d '"' | head -1 || true) DB_USER=$(grep '^DB_USER=' "${INSTALL_DIR}/.env" 2>/dev/null | cut -d= -f2 | tr -d '"' | head -1 || true)
@ -401,26 +479,20 @@ if $MIGRATION_MODE; then
fi fi
step "Starting all SurfSense services" step "Starting all SurfSense services"
(cd "${INSTALL_DIR}" && ${DC} up -d) < /dev/null if ! compose_up_wait; then
success "All containers started; waiting for stack to become healthy..."
if ! wait_stack_healthy 300; then
stack_failure_report stack_failure_report
fi fi
success "All services healthy." success "All services started and healthy."
# Key file is no longer needed — SECRET_KEY is now in .env # Key file is no longer needed — SECRET_KEY is now in .env
rm -f "${KEY_FILE}" rm -f "${KEY_FILE}"
else else
step "Starting SurfSense" step "Starting SurfSense"
(cd "${INSTALL_DIR}" && ${DC} up -d) < /dev/null if ! compose_up_wait; then
success "All containers started; waiting for stack to become healthy..."
if ! wait_stack_healthy 300; then
stack_failure_report stack_failure_report
fi fi
success "All services healthy." success "All services started and healthy."
fi fi
# ── Watchtower (auto-update) ───────────────────────────────────────────────── # ── Watchtower (auto-update) ─────────────────────────────────────────────────
@ -445,7 +517,7 @@ if $SETUP_WATCHTOWER; then
--label-enable \ --label-enable \
--interval "${WATCHTOWER_INTERVAL}" >/dev/null 2>&1 < /dev/null \ --interval "${WATCHTOWER_INTERVAL}" >/dev/null 2>&1 < /dev/null \
&& success "Watchtower started — labeled SurfSense containers will auto-update." \ && success "Watchtower started — labeled SurfSense containers will auto-update." \
|| warn "Could not start Watchtower. You can set it up manually or use: docker compose pull && docker compose up -d" || warn "Could not start Watchtower. You can set it up manually or use: docker compose pull && docker compose up -d --wait"
fi fi
else else
info "Skipping Watchtower setup (--no-watchtower flag)." info "Skipping Watchtower setup (--no-watchtower flag)."
@ -454,38 +526,25 @@ fi
# ── Done ───────────────────────────────────────────────────────────────────── # ── Done ─────────────────────────────────────────────────────────────────────
echo "" echo ""
printf '\033[1;37m'
cat << 'EOF'
.d8888b. .d888 .d8888b.
d88P Y88b d88P" d88P Y88b
Y88b. 888 Y88b.
"Y888b. 888 888 888d888 888888 "Y888b. .d88b. 88888b. .d8888b .d88b.
"Y88b. 888 888 888P" 888 "Y88b. d8P Y8b 888 "88b 88K d8P Y8b
"888 888 888 888 888 "888 88888888 888 888 "Y8888b. 88888888
Y88b d88P Y88b 888 888 888 Y88b d88P Y8b. 888 888 X88 Y8b.
"Y8888P" "Y88888 888 888 "Y8888P" "Y8888 888 888 88888P' "Y8888
EOF
_version_display=$(grep '^SURFSENSE_VERSION=' "${INSTALL_DIR}/.env" 2>/dev/null | cut -d= -f2 | tr -d '"' | head -1 || true) _version_display=$(grep '^SURFSENSE_VERSION=' "${INSTALL_DIR}/.env" 2>/dev/null | cut -d= -f2 | tr -d '"' | head -1 || true)
_version_display="${_version_display:-latest}" _version_display="${_version_display:-latest}"
printf " OSS Alternative to NotebookLM for Teams ${YELLOW}[%s]${NC}\n" "${_version_display}" _variant_display=$(grep '^SURFSENSE_VARIANT=' "${INSTALL_DIR}/.env" 2>/dev/null | cut -d= -f2 | tr -d '"' | head -1 || true)
printf "${CYAN}══════════════════════════════════════════════════════════════${NC}\n\n" _variant_display="${_variant_display:-cpu}"
step "SurfSense is now installed [${_version_display}]"
info " Frontend: http://localhost:3929" info " Frontend: http://localhost:3929"
info " Backend: http://localhost:8929" info " Backend: http://localhost:8929"
info " API Docs: http://localhost:8929/docs" info " API Docs: http://localhost:8929/docs"
info "" info ""
info " Config: ${INSTALL_DIR}/.env" info " Config: ${INSTALL_DIR}/.env"
info " Variant: ${_variant_display}"
info " Logs: cd ${INSTALL_DIR} && ${DC} logs -f" info " Logs: cd ${INSTALL_DIR} && ${DC} logs -f"
info " Stop: cd ${INSTALL_DIR} && ${DC} down" info " Stop: cd ${INSTALL_DIR} && ${DC} down"
info " Update: cd ${INSTALL_DIR} && ${DC} pull && ${DC} up -d" info " Update: cd ${INSTALL_DIR} && ${DC} pull && ${DC} up -d --wait"
info "" info ""
if $SETUP_WATCHTOWER; then if $SETUP_WATCHTOWER; then
info " Watchtower: auto-updates every $((WATCHTOWER_INTERVAL / 3600))h (stop: docker rm -f ${WATCHTOWER_CONTAINER})" info " Watchtower: auto-updates every $((WATCHTOWER_INTERVAL / 3600))h (disable: docker rm -f ${WATCHTOWER_CONTAINER})"
else else
warn " Watchtower skipped. For auto-updates, re-run without --no-watchtower." warn " Watchtower skipped. For auto-updates, re-run without --no-watchtower."
fi fi

View file

@ -212,9 +212,9 @@ API Base URL: https://open.bigmodel.cn/api/paas/v4
| 字段 | 值 | 说明 | | 字段 | 值 | 说明 |
|------|-----|------| |------|-----|------|
| **Configuration Name** | `MiniMax M2.5` | 配置名称(自定义) | | **Configuration Name** | `MiniMax M3` | 配置名称(自定义) |
| **Provider** | `MINIMAX` | 选择 MiniMax | | **Provider** | `MINIMAX` | 选择 MiniMax |
| **Model Name** | `MiniMax-M2.5` | 推荐模型<br>其他选项: `MiniMax-M2.5-highspeed` | | **Model Name** | `MiniMax-M3` | 推荐模型<br>其他选项: `MiniMax-M2.7``MiniMax-M2.7-highspeed` |
| **API Key** | `eyJ...` | 你的 MiniMax API Key | | **API Key** | `eyJ...` | 你的 MiniMax API Key |
| **API Base URL** | `https://api.minimax.io/v1` | MiniMax API 地址 | | **API Base URL** | `https://api.minimax.io/v1` | MiniMax API 地址 |
| **Parameters** | `{"temperature": 1.0}` | 注意temperature 必须在 (0.0, 1.0] 范围内,不能为 0 | | **Parameters** | `{"temperature": 1.0}` | 注意temperature 必须在 (0.0, 1.0] 范围内,不能为 0 |
@ -222,22 +222,23 @@ API Base URL: https://open.bigmodel.cn/api/paas/v4
### 示例配置 ### 示例配置
``` ```
Configuration Name: MiniMax M2.5 Configuration Name: MiniMax M3
Provider: MINIMAX Provider: MINIMAX
Model Name: MiniMax-M2.5 Model Name: MiniMax-M3
API Key: eyJxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx API Key: eyJxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
API Base URL: https://api.minimax.io/v1 API Base URL: https://api.minimax.io/v1
``` ```
### 可用模型 ### 可用模型
- **MiniMax-M2.5**: 高性能通用模型204K 上下文窗口(推荐) - **MiniMax-M3**: 旗舰模型512K 上下文窗口(推荐)
- **MiniMax-M2.5-highspeed**: 高速推理版本204K 上下文窗口 - **MiniMax-M2.7**: 上一代通用模型204K 上下文窗口
- **MiniMax-M2.7-highspeed**: 上一代高速推理版本204K 上下文窗口
### 注意事项 ### 注意事项
- **temperature 参数**: MiniMax 要求 temperature 必须在 (0.0, 1.0] 范围内,不能设置为 0。建议使用 1.0。 - **temperature 参数**: MiniMax 要求 temperature 必须在 (0.0, 1.0] 范围内,不能设置为 0。建议使用 1.0。
- 两个模型都支持 204K 超长上下文窗口,适合处理长文本任务 - M3 支持 512K 超长上下文M2.7 系列保留 204K适合按需求选择
### 定价 ### 定价
- 请访问 [MiniMax 定价页面](https://platform.minimaxi.com/document/Price) 查看最新价格 - 请访问 [MiniMax 定价页面](https://platform.minimaxi.com/document/Price) 查看最新价格
@ -315,8 +316,8 @@ docker compose logs backend | grep -i "error"
|---------|---------|------| |---------|---------|------|
| **文档摘要** | Qwen-Plus, GLM-4 | 平衡性能和成本 | | **文档摘要** | Qwen-Plus, GLM-4 | 平衡性能和成本 |
| **代码分析** | DeepSeek-Coder | 代码专用 | | **代码分析** | DeepSeek-Coder | 代码专用 |
| **长文本处理** | Kimi 128K, MiniMax-M2.5 (204K) | 超长上下文 | | **长文本处理** | Kimi 128K, MiniMax-M3 (512K) | 超长上下文 |
| **快速响应** | Qwen-Turbo, GLM-4-Flash, MiniMax-M2.5-highspeed | 速度优先 | | **快速响应** | Qwen-Turbo, GLM-4-Flash, MiniMax-M2.7-highspeed | 速度优先 |
### 2. 成本优化 ### 2. 成本优化

View file

@ -3,18 +3,46 @@ DATABASE_URL=postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense
# Deployment environment: dev or production # Deployment environment: dev or production
SURFSENSE_ENV=dev SURFSENSE_ENV=dev
#Celery Config # Redis (single endpoint for Celery broker/result backend + app features)
CELERY_BROKER_URL=redis://localhost:6379/0 REDIS_URL=redis://localhost:6379/0
CELERY_RESULT_BACKEND=redis://localhost:6379/0 # Optional: override individually only to split Redis across instances.
# Each defaults to REDIS_URL when unset.
# CELERY_BROKER_URL=redis://localhost:6379/0
# CELERY_RESULT_BACKEND=redis://localhost:6379/0
# REDIS_APP_URL=redis://localhost:6379/0
# Optional: isolate queues when sharing Redis with other apps # Optional: isolate queues when sharing Redis with other apps
CELERY_TASK_DEFAULT_QUEUE=surfsense CELERY_TASK_DEFAULT_QUEUE=surfsense
# Redis for app-level features (heartbeats, podcast markers)
# Defaults to CELERY_BROKER_URL when not set
REDIS_APP_URL=redis://localhost:6379/0
# Optional: TTL in seconds for connector indexing lock key # Optional: TTL in seconds for connector indexing lock key
# CONNECTOR_INDEXING_LOCK_TTL_SECONDS=28800 # CONNECTOR_INDEXING_LOCK_TTL_SECONDS=28800
# Messaging Gateway (global)
# GATEWAY_ENABLED: master switch for ALL messaging gateway channels (Telegram, WhatsApp,
# Slack, Discord). When FALSE, no gateway background workers/supervisors start and all
# gateway HTTP routes (webhooks, OAuth callbacks, pairing) return 404. Set per-channel
# flags below to control individual platforms once the gateway is enabled.
GATEWAY_ENABLED=TRUE
# Telegram Gateway
# TELEGRAM_WEBHOOK_SECRET must be 1-256 chars and contain only A-Z, a-z, 0-9, _ or -
# GATEWAY_TELEGRAM_INTAKE_MODE: `webhook` for production, `longpoll` for single-replica self-host fallback, `disabled` to skip Telegram intake
TELEGRAM_SHARED_BOT_TOKEN=
TELEGRAM_SHARED_BOT_USERNAME=
TELEGRAM_WEBHOOK_SECRET=
GATEWAY_BASE_URL=http://localhost:8000
GATEWAY_TELEGRAM_INTAKE_MODE=webhook
# WhatsApp Gateway
# GATEWAY_WHATSAPP_INTAKE_MODE: `cloud` for Meta Cloud API, `baileys` for self-hosted bridge, `disabled` to skip WhatsApp intake
GATEWAY_WHATSAPP_INTAKE_MODE=disabled
WHATSAPP_SHARED_BUSINESS_TOKEN=
WHATSAPP_SHARED_PHONE_NUMBER_ID=
WHATSAPP_SHARED_DISPLAY_PHONE_NUMBER=
WHATSAPP_SHARED_WABA_ID=
WHATSAPP_GRAPH_API_VERSION=v25.0
WHATSAPP_WEBHOOK_VERIFY_TOKEN=
WHATSAPP_WEBHOOK_APP_SECRET=
WHATSAPP_BRIDGE_URL=http://whatsapp-bridge:9929
# Platform Web Search (SearXNG) # Platform Web Search (SearXNG)
# Set this to enable built-in web search. Docker Compose sets it automatically. # Set this to enable built-in web search. Docker Compose sets it automatically.
# Only uncomment if running the backend outside Docker (e.g. uvicorn on host). # Only uncomment if running the backend outside Docker (e.g. uvicorn on host).
@ -64,8 +92,6 @@ STRIPE_PAGE_BUYING_ENABLED=TRUE
STRIPE_TOKEN_BUYING_ENABLED=FALSE STRIPE_TOKEN_BUYING_ENABLED=FALSE
STRIPE_PREMIUM_TOKEN_PRICE_ID=price_... STRIPE_PREMIUM_TOKEN_PRICE_ID=price_...
STRIPE_CREDIT_MICROS_PER_UNIT=1000000 STRIPE_CREDIT_MICROS_PER_UNIT=1000000
# DEPRECATED — use STRIPE_CREDIT_MICROS_PER_UNIT (1:1 numerical mapping):
# STRIPE_TOKENS_PER_UNIT=1000000
# Periodic Stripe safety net for purchases left in PENDING (minutes old) # Periodic Stripe safety net for purchases left in PENDING (minutes old)
STRIPE_RECONCILIATION_LOOKBACK_MINUTES=10 STRIPE_RECONCILIATION_LOOKBACK_MINUTES=10
@ -98,11 +124,14 @@ CLICKUP_CLIENT_ID=your_clickup_client_id_here
CLICKUP_CLIENT_SECRET=your_clickup_client_secret_here CLICKUP_CLIENT_SECRET=your_clickup_client_secret_here
CLICKUP_REDIRECT_URI=http://localhost:8000/api/v1/auth/clickup/connector/callback CLICKUP_REDIRECT_URI=http://localhost:8000/api/v1/auth/clickup/connector/callback
# Discord OAuth Configuration # Discord OAuth / Gateway Configuration
# The Discord connector and Discord gateway use the same Discord application/bot.
DISCORD_CLIENT_ID=your_discord_client_id_here DISCORD_CLIENT_ID=your_discord_client_id_here
DISCORD_CLIENT_SECRET=your_discord_client_secret_here DISCORD_CLIENT_SECRET=your_discord_client_secret_here
DISCORD_REDIRECT_URI=http://localhost:8000/api/v1/auth/discord/connector/callback DISCORD_REDIRECT_URI=http://localhost:8000/api/v1/auth/discord/connector/callback
DISCORD_BOT_TOKEN=your_bot_token_from_developer_portal DISCORD_BOT_TOKEN=your_bot_token_from_developer_portal
GATEWAY_DISCORD_ENABLED=FALSE
GATEWAY_DISCORD_REDIRECT_URI=http://localhost:8000/api/v1/gateway/discord/callback
# Atlassian OAuth Configuration (Jira & Confluence) # Atlassian OAuth Configuration (Jira & Confluence)
ATLASSIAN_CLIENT_ID=your_atlassian_client_id_here ATLASSIAN_CLIENT_ID=your_atlassian_client_id_here
@ -120,10 +149,14 @@ NOTION_CLIENT_ID=your_notion_client_id_here
NOTION_CLIENT_SECRET=your_notion_client_secret_here NOTION_CLIENT_SECRET=your_notion_client_secret_here
NOTION_REDIRECT_URI=http://localhost:8000/api/v1/auth/notion/connector/callback NOTION_REDIRECT_URI=http://localhost:8000/api/v1/auth/notion/connector/callback
# Slack OAuth Configuration # Slack OAuth / Gateway Configuration
# The Slack connector and Slack gateway can use the same Slack app client ID/secret.
SLACK_CLIENT_ID=your_slack_client_id_here SLACK_CLIENT_ID=your_slack_client_id_here
SLACK_CLIENT_SECRET=your_slack_client_secret_here SLACK_CLIENT_SECRET=your_slack_client_secret_here
SLACK_REDIRECT_URI=http://localhost:8000/api/v1/auth/slack/connector/callback SLACK_REDIRECT_URI=http://localhost:8000/api/v1/auth/slack/connector/callback
GATEWAY_SLACK_ENABLED=FALSE
GATEWAY_SLACK_SIGNING_SECRET=your_slack_signing_secret_here
GATEWAY_SLACK_REDIRECT_URI=http://localhost:8000/api/v1/gateway/slack/callback
# Microsoft OAuth (Teams & OneDrive) # Microsoft OAuth (Teams & OneDrive)
MICROSOFT_CLIENT_ID=your_microsoft_client_id_here MICROSOFT_CLIENT_ID=your_microsoft_client_id_here
@ -197,8 +230,6 @@ PAGES_LIMIT=500
# models bill proportionally. Applies only to models with # models bill proportionally. Applies only to models with
# billing_tier=premium in global_llm_config.yaml. # billing_tier=premium in global_llm_config.yaml.
PREMIUM_CREDIT_MICROS_LIMIT=5000000 PREMIUM_CREDIT_MICROS_LIMIT=5000000
# DEPRECATED — use PREMIUM_CREDIT_MICROS_LIMIT (1:1 numerical mapping):
# PREMIUM_TOKEN_LIMIT=5000000
# Safety ceiling on per-call premium reservation, in micro-USD. # Safety ceiling on per-call premium reservation, in micro-USD.
# stream_new_chat estimates an upper-bound cost from the model's # stream_new_chat estimates an upper-bound cost from the model's
@ -246,17 +277,19 @@ TURNSTILE_ENABLED=FALSE
TURNSTILE_SECRET_KEY= TURNSTILE_SECRET_KEY=
# Proxy provider selection. Selects a ProxyProvider implementation registered in
# app/utils/proxy/registry.py. Default: "anonymous_proxies". Add new vendors there.
# PROXY_PROVIDER=anonymous_proxies
# Residential Proxy Configuration (anonymous-proxies.net) # Residential Proxy Configuration (anonymous-proxies.net)
# Used for web crawling, link previews, and YouTube transcript fetching to avoid IP bans. # Used for web crawling, link previews, and YouTube transcript fetching to avoid IP bans.
# Leave commented out to disable proxying. # Consumed by the "anonymous_proxies" provider. Leave commented out to disable proxying.
# RESIDENTIAL_PROXY_USERNAME=your_proxy_username # RESIDENTIAL_PROXY_USERNAME=your_proxy_username
# RESIDENTIAL_PROXY_PASSWORD=your_proxy_password # RESIDENTIAL_PROXY_PASSWORD=your_proxy_password
# RESIDENTIAL_PROXY_HOSTNAME=rotating.dnsproxifier.com:31230 # RESIDENTIAL_PROXY_HOSTNAME=rotating.dnsproxifier.com:31230
# RESIDENTIAL_PROXY_LOCATION= # RESIDENTIAL_PROXY_LOCATION=
# RESIDENTIAL_PROXY_TYPE=1 # RESIDENTIAL_PROXY_TYPE=1
FIRECRAWL_API_KEY=fcr-01J0000000000000000000000
# File Parser Service # File Parser Service
ETL_SERVICE=UNSTRUCTURED or LLAMACLOUD or DOCLING ETL_SERVICE=UNSTRUCTURED or LLAMACLOUD or DOCLING
UNSTRUCTURED_API_KEY=Tpu3P0U8iy UNSTRUCTURED_API_KEY=Tpu3P0U8iy
@ -265,6 +298,16 @@ LLAMA_CLOUD_API_KEY=llx-nnn
# AZURE_DI_ENDPOINT=https://your-resource.cognitiveservices.azure.com/ # AZURE_DI_ENDPOINT=https://your-resource.cognitiveservices.azure.com/
# AZURE_DI_KEY=your-key # AZURE_DI_KEY=your-key
# Original File Storage
# Where to persist the original bytes of uploaded documents (for download today,
# redaction / form-filling later). "local" needs no cloud creds and is the dev default.
FILE_STORAGE_BACKEND=local
# Local backend: directory for stored files (defaults to surfsense_backend/.local_object_store)
# FILE_STORAGE_LOCAL_PATH=/var/lib/surfsense/object-store
# Azure Blob backend (set FILE_STORAGE_BACKEND=azure):
# AZURE_STORAGE_CONNECTION_STRING=DefaultEndpointsProtocol=https;AccountName=...;AccountKey=...;EndpointSuffix=core.windows.net
# AZURE_STORAGE_CONTAINER=surfsense-documents
# Daytona Sandbox (isolated code execution) # Daytona Sandbox (isolated code execution)
# DAYTONA_SANDBOX_ENABLED=FALSE # DAYTONA_SANDBOX_ENABLED=FALSE
# DAYTONA_API_KEY=your-daytona-api-key # DAYTONA_API_KEY=your-daytona-api-key
@ -285,9 +328,6 @@ LANGSMITH_PROJECT=surfsense
# ============================================================================= # =============================================================================
# OPTIONAL: New-chat agent feature flags # OPTIONAL: New-chat agent feature flags
# ============================================================================= # =============================================================================
# Multi-agent orchestrator switch for authenticated chat streaming.
# MULTI_AGENT_CHAT_ENABLED=false
# Master kill-switch — when true, every flag below is forced OFF. # Master kill-switch — when true, every flag below is forced OFF.
# SURFSENSE_DISABLE_NEW_AGENT_STACK=false # SURFSENSE_DISABLE_NEW_AGENT_STACK=false
@ -322,6 +362,13 @@ LANGSMITH_PROJECT=surfsense
# SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS=false # SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS=false
# SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE=false # SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE=false
# KB retrieval mode (default OFF = lazy). When OFF, the main agent retrieves
# KB content on demand via the `search_knowledge_base` tool and skips the
# expensive per-turn pre-injection (planner LLM + embed + hybrid search,
# ~2.3s); explicit @-mentions are still surfaced cheaply. Set to true to
# restore the original eager `<priority_documents>` pre-injection.
# SURFSENSE_ENABLE_KB_PRIORITY_PREINJECTION=false
# Snapshot / revert # Snapshot / revert
# SURFSENSE_ENABLE_ACTION_LOG=false # SURFSENSE_ENABLE_ACTION_LOG=false
# SURFSENSE_ENABLE_REVERT_ROUTE=false # Backend-only; flip when UI ships # SURFSENSE_ENABLE_REVERT_ROUTE=false # Backend-only; flip when UI ships
@ -342,6 +389,15 @@ LANGSMITH_PROJECT=surfsense
# rollback if you suspect cache-related staleness. # rollback if you suspect cache-related staleness.
# SURFSENSE_ENABLE_AGENT_CACHE=true # SURFSENSE_ENABLE_AGENT_CACHE=true
# Cross-thread reuse (default ON). Drops thread_id from the cache key so a
# returning user's NEW chats (same user + search space + config + visibility)
# hit the already-compiled graph instead of paying a fresh ~4-5s compile —
# turning a cold first turn into a warm one. Safe because ActionLog,
# KB-persistence, and the deliverables tools now resolve the chat thread from
# the live RunnableConfig at call time rather than a build-time closure. Flip
# OFF to fall back to a per-thread cache key (instant rollback).
# SURFSENSE_ENABLE_CROSS_THREAD_AGENT_CACHE=true
# Cache capacity (max number of compiled-agent entries kept in memory) # Cache capacity (max number of compiled-agent entries kept in memory)
# and TTL per entry (seconds). Working set is typically one entry per # and TTL per entry (seconds). Working set is typically one entry per
# active thread on this replica; tune up for very large deployments. # active thread on this replica; tune up for very large deployments.

View file

@ -2,6 +2,7 @@
.venv .venv
venv/ venv/
data/ data/
.local_object_store/
__pycache__/ __pycache__/
.flashrank_cache .flashrank_cache
surf_new_backend.egg-info/ surf_new_backend.egg-info/

View file

@ -1,3 +1,4 @@
# syntax=docker.io/docker/dockerfile:1
# ============================================================================= # =============================================================================
# SurfSense Backend — Multi-stage Dockerfile # SurfSense Backend — Multi-stage Dockerfile
# ============================================================================= # =============================================================================
@ -61,15 +62,25 @@ COPY pyproject.toml uv.lock ./
# Exporting the lock to requirements.txt and feeding it to `uv pip install` # Exporting the lock to requirements.txt and feeding it to `uv pip install`
# pins every transitive package to the exact version captured in uv.lock. # pins every transitive package to the exact version captured in uv.lock.
# #
# Note on torch/CUDA: we do NOT install torch from a separate cu* index here. # Note on torch/CUDA: the export must always select either the cpu or CUDA
# PyPI's torch wheels for Linux x86_64 already ship CUDA-enabled and pull # extra declared in pyproject.toml. A no-extra export would resolve torch from
# nvidia-cudnn-cu13, nvidia-nccl-cu13, triton, etc. as install deps (all # PyPI on Linux, which currently pulls CUDA-enabled wheels and nvidia-* deps.
# captured in uv.lock). If a specific CUDA version is needed, wire it through # Keep CUDA version selection in [tool.uv.sources] so uv.lock remains the
# [tool.uv.sources] in pyproject.toml so the lock stays the source of truth. # source of truth. The install step also needs the matching PyTorch index,
# because requirements.txt preserves the +cpu/+cu wheel pins but not uv's
# package source metadata.
ARG USE_CUDA=false
ARG CUDA_EXTRA=cu128
RUN pip install --no-cache-dir uv && \ RUN pip install --no-cache-dir uv && \
if [ "$USE_CUDA" = "true" ]; then EXTRA="$CUDA_EXTRA"; else EXTRA="cpu"; fi && \
TORCH_INDEX="https://download.pytorch.org/whl/${EXTRA}" && \
uv export --frozen --no-dev --no-hashes --no-emit-project \ uv export --frozen --no-dev --no-hashes --no-emit-project \
--extra "$EXTRA" \
--format requirements-txt -o /tmp/requirements.txt && \ --format requirements-txt -o /tmp/requirements.txt && \
uv pip install --system --no-cache-dir -r /tmp/requirements.txt && \ uv pip install --system --no-cache-dir \
--index "$TORCH_INDEX" \
--index-strategy unsafe-best-match \
-r /tmp/requirements.txt && \
rm /tmp/requirements.txt rm /tmp/requirements.txt
@ -94,10 +105,14 @@ RUN printf '%s\n' \
| python || true | python || true
ARG EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2 ARG EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
RUN python -c "from chonkie import AutoEmbeddings; AutoEmbeddings.get_embeddings('${EMBEDDING_MODEL}')" RUN --mount=type=secret,id=HF_TOKEN \
HF_TOKEN="$(cat /run/secrets/HF_TOKEN 2>/dev/null || true)" \
python -c "from chonkie import AutoEmbeddings; AutoEmbeddings.get_embeddings('${EMBEDDING_MODEL}')"
# Install Playwright browsers (the playwright python package itself is in deps) # Install Scrapling's browser engines (patchright Chromium + Camoufox).
RUN playwright install chromium --with-deps # Scrapling pulls playwright/patchright via the `fetchers` extra; `scrapling install`
# downloads the matching browser binaries used by DynamicFetcher/StealthyFetcher.
RUN scrapling install
# Shared temp directory for file uploads between API and Worker containers. # Shared temp directory for file uploads between API and Worker containers.
# Python's tempfile module uses TMPDIR, so uploaded files land here. # Python's tempfile module uses TMPDIR, so uploaded files land here.

View file

@ -3,6 +3,7 @@ import os
import sys import sys
from logging.config import fileConfig from logging.config import fileConfig
import sqlalchemy as sa
from sqlalchemy import pool from sqlalchemy import pool
from sqlalchemy.engine import Connection from sqlalchemy.engine import Connection
from sqlalchemy.ext.asyncio import async_engine_from_config from sqlalchemy.ext.asyncio import async_engine_from_config
@ -36,6 +37,9 @@ if config.config_file_name is not None:
# target_metadata = mymodel.Base.metadata # target_metadata = mymodel.Base.metadata
target_metadata = Base.metadata target_metadata = Base.metadata
MIGRATION_ADVISORY_LOCK_NAMESPACE = "surfsense"
MIGRATION_ADVISORY_LOCK_NAME = "alembic_migrations"
# other values from the config, defined by the needs of env.py, # other values from the config, defined by the needs of env.py,
# can be acquired: # can be acquired:
# my_important_option = config.get_main_option("my_important_option") # my_important_option = config.get_main_option("my_important_option")
@ -73,8 +77,22 @@ def do_run_migrations(connection: Connection) -> None:
transaction_per_migration=True, transaction_per_migration=True,
) )
with context.begin_transaction(): lock_params = {
context.run_migrations() "namespace": MIGRATION_ADVISORY_LOCK_NAMESPACE,
"name": MIGRATION_ADVISORY_LOCK_NAME,
}
connection.execute(
sa.text("SELECT pg_advisory_lock(hashtext(:namespace), hashtext(:name))"),
lock_params,
)
try:
with context.begin_transaction():
context.run_migrations()
finally:
connection.execute(
sa.text("SELECT pg_advisory_unlock(hashtext(:namespace), hashtext(:name))"),
lock_params,
)
async def run_async_migrations() -> None: async def run_async_migrations() -> None:

View file

@ -47,7 +47,6 @@ depends_on: str | Sequence[str] | None = None
PUBLICATION_NAME = "zero_publication" PUBLICATION_NAME = "zero_publication"
# Must stay in sync with the column lists in migrations 117 / 139 / 140.
DOCUMENT_COLS = [ DOCUMENT_COLS = [
"id", "id",
"title", "title",

View file

@ -0,0 +1,175 @@
"""add automation_runs to zero_publication with thin column list
Publishes ``automation_runs`` so the dashboard can replace polling with a
live run status + per-step ticker. Only the columns the list and ticker
read are exposed (``id, automation_id, trigger_id, status, step_results,
started_at, finished_at, created_at``); heavy JSONB
(``definition_snapshot``, ``inputs``, ``output``, ``artifacts``, ``error``)
stays on REST and is fetched lazily on detail expand.
Uses the canonical ``ALTER PUBLICATION ... SET TABLE`` + ``COMMENT``
bookend pattern (see migration 143) -- the shape Zero ``>=1.0`` requires
to fire its schema-change hook. Existing tables are re-emitted unchanged.
Revision ID: 148
Revises: 147
"""
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
revision: str = "148"
down_revision: str | None = "147"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
PUBLICATION_NAME = "zero_publication"
# Mirrors migration 143. Kept in sync explicitly: any change to these lists
# must be re-emitted in a new resync migration with COMMENT bookends.
DOCUMENT_COLS = [
"id",
"title",
"document_type",
"search_space_id",
"folder_id",
"created_by_id",
"status",
"created_at",
"updated_at",
]
USER_COLS = [
"id",
"pages_limit",
"pages_used",
"premium_credit_micros_limit",
"premium_credit_micros_used",
]
# Thin set: status + lightweight progress only. Heavy JSONB stays on REST.
AUTOMATION_RUN_COLS = [
"id",
"automation_id",
"trigger_id",
"status",
"step_results",
"started_at",
"finished_at",
"created_at",
]
def _has_zero_version(conn, table: str) -> bool:
return (
conn.execute(
sa.text(
"SELECT 1 FROM information_schema.columns "
"WHERE table_name = :tbl AND column_name = '_0_version'"
),
{"tbl": table},
).fetchone()
is not None
)
def _build_set_table_ddl(
*, documents_has_zero_ver: bool, user_has_zero_ver: bool
) -> str:
doc_cols = DOCUMENT_COLS + (['"_0_version"'] if documents_has_zero_ver else [])
user_cols = USER_COLS + (['"_0_version"'] if user_has_zero_ver else [])
doc_col_list = ", ".join(doc_cols)
user_col_list = ", ".join(user_cols)
run_col_list = ", ".join(AUTOMATION_RUN_COLS)
return (
f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE "
f"notifications, "
f"documents ({doc_col_list}), "
f"folders, "
f"search_source_connectors, "
f"new_chat_messages, "
f"chat_comments, "
f"chat_session_state, "
f'"user" ({user_col_list}), '
f"automation_runs ({run_col_list})"
)
def upgrade() -> None:
conn = op.get_bind()
exists = conn.execute(
sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
{"name": PUBLICATION_NAME},
).fetchone()
if not exists:
return
documents_has_zero_ver = _has_zero_version(conn, "documents")
user_has_zero_ver = _has_zero_version(conn, "user")
# COMMENT-ALTER-COMMENT trio must be one transaction so Zero observes
# them as one schema-change event. Matches the SAVEPOINT pattern used
# in migrations 117 / 139 / 140 / 143.
tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
with tx:
conn.execute(
sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-148-resync'")
)
conn.execute(
sa.text(
_build_set_table_ddl(
documents_has_zero_ver=documents_has_zero_ver,
user_has_zero_ver=user_has_zero_ver,
)
)
)
conn.execute(
sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-148-resync'")
)
def downgrade() -> None:
"""Re-emit migration 143's shape (no automation_runs)."""
conn = op.get_bind()
exists = conn.execute(
sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
{"name": PUBLICATION_NAME},
).fetchone()
if not exists:
return
documents_has_zero_ver = _has_zero_version(conn, "documents")
user_has_zero_ver = _has_zero_version(conn, "user")
doc_cols = DOCUMENT_COLS + (['"_0_version"'] if documents_has_zero_ver else [])
user_cols = USER_COLS + (['"_0_version"'] if user_has_zero_ver else [])
doc_col_list = ", ".join(doc_cols)
user_col_list = ", ".join(user_cols)
ddl = (
f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE "
f"notifications, "
f"documents ({doc_col_list}), "
f"folders, "
f"search_source_connectors, "
f"new_chat_messages, "
f"chat_comments, "
f"chat_session_state, "
f'"user" ({user_col_list})'
)
tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
with tx:
conn.execute(
sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-148-downgrade'")
)
conn.execute(sa.text(ddl))
conn.execute(
sa.text(
f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-148-downgrade'"
)
)

View file

@ -0,0 +1,667 @@
"""add external chat surface tables
Revision ID: 149
Revises: 148
Create Date: 2026-05-27
Adds the lean external chat surface schema:
* external_chat_accounts
* external_chat_bindings
* external_chat_inbound_events
External chat surfaces store Telegram-originated conversations in the existing
chat tables. This migration adds ``source`` to ``new_chat_threads`` and
``new_chat_messages`` as UI metadata while publishing all chat-message sources
through Zero so a future SurfSense UI layer can render external chats. External
chat adapter tables are served through REST in v1, so they are intentionally not
added to ``zero_publication``.
"""
from __future__ import annotations
from collections.abc import Sequence
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from alembic import op
revision: str = "149"
down_revision: str | None = "148"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
PUBLICATION_NAME = "zero_publication"
DOCUMENT_COLS = [
"id",
"title",
"document_type",
"search_space_id",
"folder_id",
"created_by_id",
"status",
"created_at",
"updated_at",
]
USER_COLS = [
"id",
"pages_limit",
"pages_used",
"premium_credit_micros_limit",
"premium_credit_micros_used",
]
AUTOMATION_RUN_COLS = [
"id",
"automation_id",
"trigger_id",
"status",
"step_results",
"started_at",
"finished_at",
"created_at",
]
def _has_zero_version(conn, table: str) -> bool:
return (
conn.execute(
sa.text(
"SELECT 1 FROM information_schema.columns "
"WHERE table_name = :tbl AND column_name = '_0_version'"
),
{"tbl": table},
).fetchone()
is not None
)
def _cols(columns: list[str]) -> str:
return ", ".join(columns)
def _table_exists(conn, table: str) -> bool:
return (
conn.execute(
sa.text(
"SELECT 1 FROM information_schema.tables "
"WHERE table_schema = current_schema() AND table_name = :tbl"
),
{"tbl": table},
).fetchone()
is not None
)
def _column_exists(conn, table: str, column: str) -> bool:
return (
conn.execute(
sa.text(
"SELECT 1 FROM information_schema.columns "
"WHERE table_schema = current_schema() "
"AND table_name = :tbl AND column_name = :col"
),
{"tbl": table, "col": column},
).fetchone()
is not None
)
def _index_exists(conn, index_name: str) -> bool:
return (
conn.execute(
sa.text(
"SELECT 1 FROM pg_indexes "
"WHERE schemaname = current_schema() AND indexname = :name"
),
{"name": index_name},
).fetchone()
is not None
)
def _constraint_exists(conn, table: str, constraint_name: str) -> bool:
return (
conn.execute(
sa.text(
"SELECT 1 FROM information_schema.table_constraints "
"WHERE table_schema = current_schema() "
"AND table_name = :tbl AND constraint_name = :name"
),
{"tbl": table, "name": constraint_name},
).fetchone()
is not None
)
def _drop_index_if_exists(index_name: str, table_name: str) -> None:
if _index_exists(op.get_bind(), index_name):
op.drop_index(index_name, table_name=table_name)
def _drop_column_if_exists(table_name: str, column_name: str) -> None:
if _column_exists(op.get_bind(), table_name, column_name):
op.drop_column(table_name, column_name)
def _build_set_table_ddl(
*, documents_has_zero_ver: bool, user_has_zero_ver: bool
) -> str:
doc_cols = DOCUMENT_COLS + (['"_0_version"'] if documents_has_zero_ver else [])
user_cols = USER_COLS + (['"_0_version"'] if user_has_zero_ver else [])
return (
f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE "
f"notifications, "
f"documents ({_cols(doc_cols)}), "
f"folders, "
f"search_source_connectors, "
f"new_chat_messages, "
f"chat_comments, "
f"chat_session_state, "
f'"user" ({_cols(user_cols)}), '
f"automation_runs ({_cols(AUTOMATION_RUN_COLS)})"
)
def _create_enum(name: str, values: tuple[str, ...]) -> postgresql.ENUM:
enum = postgresql.ENUM(*values, name=name)
enum.create(op.get_bind(), checkfirst=True)
return postgresql.ENUM(*values, name=name, create_type=False)
def upgrade() -> None:
conn = op.get_bind()
external_chat_platform_enum = _create_enum(
"external_chat_platform", ("telegram", "whatsapp", "signal")
)
external_chat_account_mode_enum = _create_enum(
"external_chat_account_mode", ("cloud_shared", "self_host_byo")
)
external_chat_health_status_enum = _create_enum(
"external_chat_health_status", ("unknown", "ok", "failing")
)
external_chat_binding_state_enum = _create_enum(
"external_chat_binding_state", ("pending", "bound", "revoked", "suspended")
)
external_chat_peer_kind_enum = _create_enum(
"external_chat_peer_kind", ("direct", "group", "channel", "unknown")
)
external_chat_event_kind_enum = _create_enum(
"external_chat_event_kind",
("message", "edited_message", "callback_query", "other"),
)
external_chat_event_status_enum = _create_enum(
"external_chat_event_status",
("received", "processing", "processed", "ignored", "failed"),
)
if not _table_exists(conn, "external_chat_accounts"):
op.create_table(
"external_chat_accounts",
sa.Column("id", sa.BigInteger(), primary_key=True),
sa.Column("platform", external_chat_platform_enum, nullable=False),
sa.Column("mode", external_chat_account_mode_enum, nullable=False),
sa.Column("owner_user_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("owner_search_space_id", sa.Integer(), nullable=True),
sa.Column(
"is_system_account",
sa.Boolean(),
nullable=False,
server_default="false",
),
sa.Column("encrypted_credentials", sa.Text(), nullable=True),
sa.Column("bot_username", sa.String(255), nullable=True),
sa.Column("webhook_secret", sa.String(64), nullable=True),
sa.Column(
"cursor_state",
postgresql.JSONB(astext_type=sa.Text()),
nullable=False,
server_default=sa.text("'{}'::jsonb"),
),
sa.Column(
"health_status",
external_chat_health_status_enum,
nullable=False,
server_default="unknown",
),
sa.Column(
"last_health_check_at", sa.TIMESTAMP(timezone=True), nullable=True
),
sa.Column("suspended_at", sa.TIMESTAMP(timezone=True), nullable=True),
sa.Column("suspended_reason", sa.Text(), nullable=True),
sa.Column(
"created_at",
sa.TIMESTAMP(timezone=True),
nullable=False,
server_default=sa.text("(now() AT TIME ZONE 'utc')"),
),
sa.Column(
"updated_at",
sa.TIMESTAMP(timezone=True),
nullable=False,
server_default=sa.text("(now() AT TIME ZONE 'utc')"),
),
sa.CheckConstraint(
"(is_system_account = true AND owner_user_id IS NULL) OR "
"(is_system_account = false AND owner_user_id IS NOT NULL)",
name="ck_external_chat_accounts_owner_shape",
),
sa.ForeignKeyConstraint(["owner_user_id"], ["user.id"], ondelete="CASCADE"),
sa.ForeignKeyConstraint(
["owner_search_space_id"], ["searchspaces.id"], ondelete="CASCADE"
),
)
op.create_index(
"uq_external_chat_accounts_owner_platform",
"external_chat_accounts",
["owner_user_id", "platform"],
unique=True,
postgresql_where=sa.text("is_system_account = false"),
if_not_exists=True,
)
op.create_index(
"uq_external_chat_accounts_system_platform",
"external_chat_accounts",
["platform"],
unique=True,
postgresql_where=sa.text("is_system_account = true"),
if_not_exists=True,
)
op.create_index(
"uq_external_chat_accounts_webhook_secret",
"external_chat_accounts",
["webhook_secret"],
unique=True,
postgresql_where=sa.text("webhook_secret IS NOT NULL"),
if_not_exists=True,
)
if not _table_exists(conn, "external_chat_bindings"):
op.create_table(
"external_chat_bindings",
sa.Column("id", sa.BigInteger(), primary_key=True),
sa.Column("account_id", sa.BigInteger(), nullable=False),
sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("search_space_id", sa.Integer(), nullable=False),
sa.Column(
"state",
external_chat_binding_state_enum,
nullable=False,
server_default="pending",
),
sa.Column("pairing_code", sa.Text(), nullable=True),
sa.Column(
"pairing_code_expires_at", sa.TIMESTAMP(timezone=True), nullable=True
),
sa.Column("external_peer_id", sa.Text(), nullable=True),
sa.Column(
"external_peer_kind",
external_chat_peer_kind_enum,
nullable=False,
server_default="unknown",
),
sa.Column(
"external_thread_id",
sa.Text(),
nullable=True,
comment="Reserved for Telegram message_thread_id when group/forum support lands.",
),
sa.Column("external_display_name", sa.Text(), nullable=True),
sa.Column("external_username", sa.Text(), nullable=True),
sa.Column(
"external_metadata",
postgresql.JSONB(astext_type=sa.Text()),
nullable=False,
server_default=sa.text("'{}'::jsonb"),
),
sa.Column("new_chat_thread_id", sa.Integer(), nullable=True),
sa.Column("revoked_at", sa.TIMESTAMP(timezone=True), nullable=True),
sa.Column("suspended_at", sa.TIMESTAMP(timezone=True), nullable=True),
sa.Column("suspended_reason", sa.Text(), nullable=True),
sa.Column(
"created_at",
sa.TIMESTAMP(timezone=True),
nullable=False,
server_default=sa.text("(now() AT TIME ZONE 'utc')"),
),
sa.Column(
"updated_at",
sa.TIMESTAMP(timezone=True),
nullable=False,
server_default=sa.text("(now() AT TIME ZONE 'utc')"),
),
sa.ForeignKeyConstraint(
["account_id"], ["external_chat_accounts.id"], ondelete="CASCADE"
),
sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
sa.ForeignKeyConstraint(
["search_space_id"], ["searchspaces.id"], ondelete="CASCADE"
),
sa.ForeignKeyConstraint(
["new_chat_thread_id"], ["new_chat_threads.id"], ondelete="SET NULL"
),
)
op.create_index(
"uq_external_chat_bindings_account_peer_active",
"external_chat_bindings",
["account_id", "external_peer_id"],
unique=True,
postgresql_where=sa.text(
"state IN ('bound', 'suspended') AND external_peer_id IS NOT NULL"
),
if_not_exists=True,
)
op.create_index(
"uq_external_chat_bindings_pairing_code_pending",
"external_chat_bindings",
["pairing_code"],
unique=True,
postgresql_where=sa.text("state = 'pending'"),
if_not_exists=True,
)
op.create_index(
"ix_external_chat_bindings_user_state",
"external_chat_bindings",
["user_id", "state"],
if_not_exists=True,
)
op.create_index(
"ix_external_chat_bindings_search_space_state",
"external_chat_bindings",
["search_space_id", "state"],
if_not_exists=True,
)
if not _table_exists(conn, "external_chat_inbound_events"):
op.create_table(
"external_chat_inbound_events",
sa.Column("id", sa.BigInteger(), primary_key=True),
sa.Column("account_id", sa.BigInteger(), nullable=False),
sa.Column("external_chat_binding_id", sa.BigInteger(), nullable=True),
sa.Column("platform", external_chat_platform_enum, nullable=False),
sa.Column("event_dedupe_key", sa.Text(), nullable=False),
sa.Column("external_event_id", sa.Text(), nullable=True),
sa.Column("external_message_id", sa.Text(), nullable=True),
sa.Column("event_kind", external_chat_event_kind_enum, nullable=False),
sa.Column(
"raw_payload",
postgresql.JSONB(astext_type=sa.Text()),
nullable=True,
),
sa.Column("request_id", sa.String(64), nullable=True),
sa.Column(
"status",
external_chat_event_status_enum,
nullable=False,
server_default="received",
),
sa.Column(
"attempt_count", sa.Integer(), nullable=False, server_default="0"
),
sa.Column("last_error", sa.Text(), nullable=True),
sa.Column(
"received_at",
sa.TIMESTAMP(timezone=True),
nullable=False,
server_default=sa.text("(now() AT TIME ZONE 'utc')"),
),
sa.Column("processed_at", sa.TIMESTAMP(timezone=True), nullable=True),
sa.Column(
"created_at",
sa.TIMESTAMP(timezone=True),
nullable=False,
server_default=sa.text("(now() AT TIME ZONE 'utc')"),
),
sa.ForeignKeyConstraint(
["account_id"], ["external_chat_accounts.id"], ondelete="CASCADE"
),
sa.ForeignKeyConstraint(
["external_chat_binding_id"],
["external_chat_bindings.id"],
ondelete="SET NULL",
),
sa.UniqueConstraint(
"account_id",
"event_dedupe_key",
name="uq_external_chat_inbound_account_dedupe_key",
),
)
op.create_index(
"ix_external_chat_inbound_status_received_at",
"external_chat_inbound_events",
["status", "received_at"],
if_not_exists=True,
)
op.create_index(
"ix_external_chat_inbound_binding_received_at",
"external_chat_inbound_events",
["external_chat_binding_id", "received_at"],
if_not_exists=True,
)
op.create_index(
"ix_external_chat_inbound_request_id",
"external_chat_inbound_events",
["request_id"],
postgresql_where=sa.text("request_id IS NOT NULL"),
if_not_exists=True,
)
if not _column_exists(conn, "new_chat_threads", "source"):
op.add_column(
"new_chat_threads",
sa.Column("source", sa.Text(), nullable=False, server_default="surfsense"),
)
op.alter_column("new_chat_threads", "source", type_=sa.Text())
if not _column_exists(conn, "new_chat_threads", "external_chat_binding_id"):
op.add_column(
"new_chat_threads",
sa.Column("external_chat_binding_id", sa.BigInteger(), nullable=True),
)
if not _constraint_exists(
conn,
"new_chat_threads",
"fk_new_chat_threads_external_chat_external_chat_binding_id",
):
op.create_foreign_key(
"fk_new_chat_threads_external_chat_external_chat_binding_id",
"new_chat_threads",
"external_chat_bindings",
["external_chat_binding_id"],
["id"],
ondelete="SET NULL",
)
op.create_index(
"ix_new_chat_threads_source", "new_chat_threads", ["source"], if_not_exists=True
)
op.create_index(
"ix_new_chat_threads_external_chat_binding_id",
"new_chat_threads",
["external_chat_binding_id"],
if_not_exists=True,
)
if not _column_exists(conn, "new_chat_messages", "source"):
op.add_column(
"new_chat_messages",
sa.Column("source", sa.Text(), nullable=False, server_default="surfsense"),
)
op.alter_column("new_chat_messages", "source", type_=sa.Text())
if not _column_exists(conn, "new_chat_messages", "platform_metadata"):
op.add_column(
"new_chat_messages",
sa.Column(
"platform_metadata",
postgresql.JSONB(astext_type=sa.Text()),
nullable=True,
),
)
op.create_index(
"ix_new_chat_messages_source",
"new_chat_messages",
["source"],
if_not_exists=True,
)
op.create_index(
"uq_new_chat_messages_inbound_platform",
"new_chat_messages",
[
"thread_id",
sa.text("(platform_metadata->>'platform')"),
sa.text("(platform_metadata->>'external_message_id')"),
],
unique=True,
postgresql_where=sa.text(
"platform_metadata IS NOT NULL "
"AND platform_metadata->>'direction' = 'inbound'"
),
if_not_exists=True,
)
op.execute("ALTER TABLE new_chat_messages REPLICA IDENTITY FULL")
exists = conn.execute(
sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
{"name": PUBLICATION_NAME},
).fetchone()
if exists:
documents_has_zero_ver = _has_zero_version(conn, "documents")
user_has_zero_ver = _has_zero_version(conn, "user")
tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
with tx:
conn.execute(
sa.text(
f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-144-external-chat'"
)
)
conn.execute(
sa.text(
_build_set_table_ddl(
documents_has_zero_ver=documents_has_zero_ver,
user_has_zero_ver=user_has_zero_ver,
)
)
)
conn.execute(
sa.text(
f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-144-external-chat'"
)
)
def downgrade() -> None:
conn = op.get_bind()
exists = conn.execute(
sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
{"name": PUBLICATION_NAME},
).fetchone()
if exists:
documents_has_zero_ver = _has_zero_version(conn, "documents")
user_has_zero_ver = _has_zero_version(conn, "user")
# Restore the publication shape from migration 148.
doc_cols = DOCUMENT_COLS + (['"_0_version"'] if documents_has_zero_ver else [])
user_cols = USER_COLS + (['"_0_version"'] if user_has_zero_ver else [])
ddl = (
f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE "
f"notifications, "
f"documents ({_cols(doc_cols)}), "
f"folders, "
f"search_source_connectors, "
f"new_chat_messages, "
f"chat_comments, "
f"chat_session_state, "
f'"user" ({_cols(user_cols)}), '
f"automation_runs ({_cols(AUTOMATION_RUN_COLS)})"
)
tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
with tx:
conn.execute(
sa.text(
f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-144-downgrade'"
)
)
conn.execute(sa.text(ddl))
conn.execute(
sa.text(
f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-144-downgrade'"
)
)
if _column_exists(conn, "new_chat_messages", "source"):
op.execute("ALTER TABLE new_chat_messages REPLICA IDENTITY DEFAULT")
_drop_index_if_exists("uq_new_chat_messages_inbound_platform", "new_chat_messages")
_drop_index_if_exists("ix_new_chat_messages_source", "new_chat_messages")
_drop_column_if_exists("new_chat_messages", "platform_metadata")
_drop_column_if_exists("new_chat_messages", "source")
_drop_index_if_exists(
"ix_new_chat_threads_external_chat_binding_id", "new_chat_threads"
)
_drop_index_if_exists("ix_new_chat_threads_source", "new_chat_threads")
if _constraint_exists(
conn,
"new_chat_threads",
"fk_new_chat_threads_external_chat_external_chat_binding_id",
):
op.drop_constraint(
"fk_new_chat_threads_external_chat_external_chat_binding_id",
"new_chat_threads",
type_="foreignkey",
)
_drop_column_if_exists("new_chat_threads", "external_chat_binding_id")
_drop_column_if_exists("new_chat_threads", "source")
_drop_index_if_exists(
"ix_external_chat_inbound_binding_received_at", "external_chat_inbound_events"
)
_drop_index_if_exists(
"ix_external_chat_inbound_request_id", "external_chat_inbound_events"
)
_drop_index_if_exists(
"ix_external_chat_inbound_status_received_at", "external_chat_inbound_events"
)
if _table_exists(conn, "external_chat_inbound_events"):
op.drop_table("external_chat_inbound_events")
_drop_index_if_exists(
"ix_external_chat_bindings_search_space_state",
"external_chat_bindings",
)
_drop_index_if_exists(
"ix_external_chat_bindings_user_state", "external_chat_bindings"
)
_drop_index_if_exists(
"uq_external_chat_bindings_pairing_code_pending",
"external_chat_bindings",
)
_drop_index_if_exists(
"uq_external_chat_bindings_account_peer_active",
"external_chat_bindings",
)
if _table_exists(conn, "external_chat_bindings"):
op.drop_table("external_chat_bindings")
_drop_index_if_exists(
"uq_external_chat_accounts_system_platform", "external_chat_accounts"
)
_drop_index_if_exists(
"uq_external_chat_accounts_owner_platform", "external_chat_accounts"
)
_drop_index_if_exists(
"uq_external_chat_accounts_webhook_secret", "external_chat_accounts"
)
if _table_exists(conn, "external_chat_accounts"):
op.drop_table("external_chat_accounts")
for enum_name in (
"external_chat_event_status",
"external_chat_event_kind",
"external_chat_peer_kind",
"external_chat_binding_state",
"external_chat_health_status",
"external_chat_account_mode",
"external_chat_platform",
):
postgresql.ENUM(name=enum_name).drop(conn, checkfirst=True)

View file

@ -0,0 +1,102 @@
"""add slack gateway platform
Revision ID: 150
Revises: 149
Create Date: 2026-05-31
"""
from __future__ import annotations
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
revision: str = "150"
down_revision: str | None = "149"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def _enum_value_exists(enum_name: str, value: str) -> bool:
conn = op.get_bind()
return (
conn.execute(
sa.text(
"SELECT 1 FROM pg_enum e "
"JOIN pg_type t ON t.oid = e.enumtypid "
"WHERE t.typname = :enum_name AND e.enumlabel = :value"
),
{"enum_name": enum_name, "value": value},
).fetchone()
is not None
)
def _index_exists(index_name: str) -> bool:
conn = op.get_bind()
return (
conn.execute(
sa.text(
"SELECT 1 FROM pg_indexes "
"WHERE schemaname = current_schema() AND indexname = :index_name"
),
{"index_name": index_name},
).fetchone()
is not None
)
def upgrade() -> None:
if not _enum_value_exists("external_chat_platform", "slack"):
op.execute("ALTER TYPE external_chat_platform ADD VALUE 'slack'")
if _index_exists("uq_external_chat_accounts_system_platform"):
op.drop_index(
"uq_external_chat_accounts_system_platform",
table_name="external_chat_accounts",
)
op.create_index(
"uq_external_chat_accounts_system_platform",
"external_chat_accounts",
["platform"],
unique=True,
postgresql_where=sa.text(
"is_system_account = true AND NOT (cursor_state ? 'team_id')"
),
if_not_exists=True,
)
op.create_index(
"uq_external_chat_accounts_slack_team",
"external_chat_accounts",
["platform", sa.text("(cursor_state ->> 'team_id')")],
unique=True,
postgresql_where=sa.text(
"is_system_account = true AND cursor_state ? 'team_id'"
),
if_not_exists=True,
)
def downgrade() -> None:
if _index_exists("uq_external_chat_accounts_slack_team"):
op.drop_index(
"uq_external_chat_accounts_slack_team",
table_name="external_chat_accounts",
)
if _index_exists("uq_external_chat_accounts_system_platform"):
op.drop_index(
"uq_external_chat_accounts_system_platform",
table_name="external_chat_accounts",
)
op.create_index(
"uq_external_chat_accounts_system_platform",
"external_chat_accounts",
["platform"],
unique=True,
postgresql_where=sa.text("is_system_account = true"),
if_not_exists=True,
)
# PostgreSQL enum values are intentionally not removed on downgrade.

View file

@ -0,0 +1,106 @@
"""add discord gateway platform
Revision ID: 151
Revises: 150
Create Date: 2026-06-01
"""
from __future__ import annotations
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
revision: str = "151"
down_revision: str | None = "150"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def _enum_value_exists(enum_name: str, value: str) -> bool:
conn = op.get_bind()
return (
conn.execute(
sa.text(
"SELECT 1 FROM pg_enum e "
"JOIN pg_type t ON t.oid = e.enumtypid "
"WHERE t.typname = :enum_name AND e.enumlabel = :value"
),
{"enum_name": enum_name, "value": value},
).fetchone()
is not None
)
def _index_exists(index_name: str) -> bool:
conn = op.get_bind()
return (
conn.execute(
sa.text(
"SELECT 1 FROM pg_indexes "
"WHERE schemaname = current_schema() AND indexname = :index_name"
),
{"index_name": index_name},
).fetchone()
is not None
)
def upgrade() -> None:
if not _enum_value_exists("external_chat_platform", "discord"):
op.execute("ALTER TYPE external_chat_platform ADD VALUE 'discord'")
if _index_exists("uq_external_chat_accounts_system_platform"):
op.drop_index(
"uq_external_chat_accounts_system_platform",
table_name="external_chat_accounts",
)
op.create_index(
"uq_external_chat_accounts_system_platform",
"external_chat_accounts",
["platform"],
unique=True,
postgresql_where=sa.text(
"is_system_account = true "
"AND NOT (cursor_state ? 'team_id') "
"AND NOT (cursor_state ? 'guild_id')"
),
if_not_exists=True,
)
op.create_index(
"uq_external_chat_accounts_discord_guild",
"external_chat_accounts",
["platform", sa.text("(cursor_state ->> 'guild_id')")],
unique=True,
postgresql_where=sa.text(
"is_system_account = true AND cursor_state ? 'guild_id'"
),
if_not_exists=True,
)
def downgrade() -> None:
if _index_exists("uq_external_chat_accounts_discord_guild"):
op.drop_index(
"uq_external_chat_accounts_discord_guild",
table_name="external_chat_accounts",
)
if _index_exists("uq_external_chat_accounts_system_platform"):
op.drop_index(
"uq_external_chat_accounts_system_platform",
table_name="external_chat_accounts",
)
op.create_index(
"uq_external_chat_accounts_system_platform",
"external_chat_accounts",
["platform"],
unique=True,
postgresql_where=sa.text(
"is_system_account = true AND NOT (cursor_state ? 'team_id')"
),
if_not_exists=True,
)
# PostgreSQL enum values are intentionally not removed on downgrade.

View file

@ -0,0 +1,85 @@
"""add document_files table for stored original uploads
Revision ID: 152
Revises: 151
"""
from collections.abc import Sequence
from alembic import op
revision: str = "152"
down_revision: str | None = "151"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
# The enum type must precede the table that references it.
op.execute(
"""
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_type WHERE typname = 'document_file_kind'
) THEN
CREATE TYPE document_file_kind AS ENUM (
'ORIGINAL', 'REDACTED', 'FILLED_FORM'
);
END IF;
END
$$;
"""
)
op.execute(
"""
CREATE TABLE IF NOT EXISTS document_files (
id SERIAL PRIMARY KEY,
document_id INTEGER NOT NULL
REFERENCES documents(id) ON DELETE CASCADE,
search_space_id INTEGER NOT NULL
REFERENCES searchspaces(id) ON DELETE CASCADE,
kind document_file_kind NOT NULL DEFAULT 'ORIGINAL',
storage_backend VARCHAR(32) NOT NULL,
storage_key TEXT NOT NULL,
original_filename TEXT NOT NULL,
mime_type TEXT,
size_bytes BIGINT NOT NULL,
checksum_sha256 VARCHAR(64),
created_by_id UUID
REFERENCES "user"(id) ON DELETE SET NULL,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
);
"""
)
op.execute(
"CREATE INDEX IF NOT EXISTS ix_document_files_document_id "
"ON document_files(document_id);"
)
op.execute(
"CREATE INDEX IF NOT EXISTS ix_document_files_search_space_id "
"ON document_files(search_space_id);"
)
op.execute(
"CREATE INDEX IF NOT EXISTS ix_document_files_kind ON document_files(kind);"
)
op.execute(
"CREATE INDEX IF NOT EXISTS ix_document_files_created_by_id "
"ON document_files(created_by_id);"
)
op.execute(
"CREATE INDEX IF NOT EXISTS ix_document_files_created_at "
"ON document_files(created_at);"
)
def downgrade() -> None:
op.execute("DROP INDEX IF EXISTS ix_document_files_created_at;")
op.execute("DROP INDEX IF EXISTS ix_document_files_created_by_id;")
op.execute("DROP INDEX IF EXISTS ix_document_files_kind;")
op.execute("DROP INDEX IF EXISTS ix_document_files_search_space_id;")
op.execute("DROP INDEX IF EXISTS ix_document_files_document_id;")
op.execute("DROP TABLE IF EXISTS document_files;")
op.execute("DROP TYPE IF EXISTS document_file_kind;")

View file

@ -0,0 +1,121 @@
"""restore automation_runs to zero_publication
Migration 149's ``SET TABLE`` dropped ``automation_runs`` (added in 148),
breaking the dashboard live run ticker with a SchemaVersionNotSupported
reload loop. Re-emit the publication with ``automation_runs`` using the
``COMMENT`` bookend pattern so zero-cache fires its schema-change hook.
Revision ID: 153
Revises: 152
"""
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
revision: str = "153"
down_revision: str | None = "152"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
PUBLICATION_NAME = "zero_publication"
DOCUMENT_COLS = [
"id",
"title",
"document_type",
"search_space_id",
"folder_id",
"created_by_id",
"status",
"created_at",
"updated_at",
]
USER_COLS = [
"id",
"pages_limit",
"pages_used",
"premium_credit_micros_limit",
"premium_credit_micros_used",
]
AUTOMATION_RUN_COLS = [
"id",
"automation_id",
"trigger_id",
"status",
"step_results",
"started_at",
"finished_at",
"created_at",
]
def _has_zero_version(conn, table: str) -> bool:
return (
conn.execute(
sa.text(
"SELECT 1 FROM information_schema.columns "
"WHERE table_name = :tbl AND column_name = '_0_version'"
),
{"tbl": table},
).fetchone()
is not None
)
def _set_table_ddl(*, with_automation_runs: bool, conn) -> str:
doc_cols = DOCUMENT_COLS + (
['"_0_version"'] if _has_zero_version(conn, "documents") else []
)
user_cols = USER_COLS + (
['"_0_version"'] if _has_zero_version(conn, "user") else []
)
tables = [
"notifications",
f"documents ({', '.join(doc_cols)})",
"folders",
"search_source_connectors",
"new_chat_messages",
"chat_comments",
"chat_session_state",
f'"user" ({", ".join(user_cols)})',
]
if with_automation_runs:
tables.append(f"automation_runs ({', '.join(AUTOMATION_RUN_COLS)})")
return f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE " + ", ".join(tables)
def _resync(*, with_automation_runs: bool, tag: str) -> None:
conn = op.get_bind()
exists = conn.execute(
sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
{"name": PUBLICATION_NAME},
).fetchone()
if not exists:
return
tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
with tx:
conn.execute(
sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-{tag}'")
)
conn.execute(
sa.text(
_set_table_ddl(with_automation_runs=with_automation_runs, conn=conn)
)
)
conn.execute(
sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-{tag}'")
)
def upgrade() -> None:
_resync(with_automation_runs=True, tag="153-resync")
def downgrade() -> None:
_resync(with_automation_runs=False, tag="153-downgrade")

View file

@ -0,0 +1,147 @@
"""remove document summary llm settings
Revision ID: 154
Revises: 153
"""
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
revision: str = "154"
down_revision: str | None = "153"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
PUBLICATION_NAME = "zero_publication"
DOCUMENT_COLS = [
"id",
"title",
"document_type",
"search_space_id",
"folder_id",
"created_by_id",
"status",
"created_at",
"updated_at",
]
USER_COLS = [
"id",
"pages_limit",
"pages_used",
"premium_credit_micros_limit",
"premium_credit_micros_used",
]
AUTOMATION_RUN_COLS = [
"id",
"automation_id",
"trigger_id",
"status",
"step_results",
"started_at",
"finished_at",
"created_at",
]
def _column_exists(conn, table: str, column: str) -> bool:
return (
conn.execute(
sa.text(
"SELECT 1 FROM information_schema.columns "
"WHERE table_name = :table AND column_name = :column"
),
{"table": table, "column": column},
).fetchone()
is not None
)
def _has_zero_version(conn, table: str) -> bool:
return _column_exists(conn, table, "_0_version")
def _set_table_ddl(conn) -> str:
doc_cols = DOCUMENT_COLS + (
['"_0_version"'] if _has_zero_version(conn, "documents") else []
)
user_cols = USER_COLS + (
['"_0_version"'] if _has_zero_version(conn, "user") else []
)
tables = [
"notifications",
f"documents ({', '.join(doc_cols)})",
"folders",
"search_source_connectors",
"new_chat_messages",
"chat_comments",
"chat_session_state",
f'"user" ({", ".join(user_cols)})',
f"automation_runs ({', '.join(AUTOMATION_RUN_COLS)})",
]
return f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE " + ", ".join(tables)
def _resync_zero_publication(tag: str) -> None:
conn = op.get_bind()
exists = conn.execute(
sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
{"name": PUBLICATION_NAME},
).fetchone()
if not exists:
return
tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
with tx:
conn.execute(
sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-{tag}'")
)
conn.execute(sa.text(_set_table_ddl(conn)))
conn.execute(
sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-{tag}'")
)
def upgrade() -> None:
conn = op.get_bind()
if _column_exists(conn, "searchspaces", "document_summary_llm_id"):
op.drop_column("searchspaces", "document_summary_llm_id")
if _column_exists(conn, "search_source_connectors", "enable_summary"):
op.drop_column("search_source_connectors", "enable_summary")
_resync_zero_publication("154-summary-removal")
def downgrade() -> None:
conn = op.get_bind()
if not _column_exists(conn, "searchspaces", "document_summary_llm_id"):
op.add_column(
"searchspaces",
sa.Column(
"document_summary_llm_id",
sa.Integer(),
nullable=True,
server_default="0",
),
)
if not _column_exists(conn, "search_source_connectors", "enable_summary"):
op.add_column(
"search_source_connectors",
sa.Column(
"enable_summary",
sa.Boolean(),
nullable=False,
server_default=sa.text("false"),
),
)
_resync_zero_publication("154-summary-removal-downgrade")

View file

@ -0,0 +1,23 @@
"""reconcile zero_publication from canonical definition
Revision ID: 155
Revises: 154
"""
from collections.abc import Sequence
from alembic import op
from app.zero_publication import apply_publication
revision: str = "155"
down_revision: str | None = "154"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
apply_publication(op.get_bind())
def downgrade() -> None:
"""No-op. Historical publication shapes are immutable."""

View file

@ -1,557 +0,0 @@
"""Vision autocomplete agent with scoped filesystem exploration.
Converts the stateless single-shot vision autocomplete into an agent that
seeds a virtual filesystem from KB search results and lets the vision LLM
explore documents via ``ls``, ``read_file``, ``glob``, ``grep``, etc.
before generating the final completion.
Performance: KB search and agent graph compilation run in parallel so
the only sequential latency is KB-search (or agent compile, whichever is
slower) + the agent's LLM turns. There is no separate "query extraction"
LLM call the window title is used directly as the KB search query.
"""
from __future__ import annotations
import asyncio
import json
import logging
import re
import uuid
from collections.abc import AsyncGenerator
from typing import Any
from deepagents.graph import BASE_AGENT_PROMPT
from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware
from langchain.agents import create_agent
from langchain_anthropic.middleware import AnthropicPromptCachingMiddleware
from langchain_core.language_models import BaseChatModel
from langchain_core.messages import AIMessage, ToolMessage
from app.agents.new_chat.document_xml import build_document_xml
from app.agents.new_chat.middleware.filesystem import SurfSenseFilesystemMiddleware
from app.agents.new_chat.middleware.knowledge_search import (
search_knowledge_base,
)
from app.agents.new_chat.path_resolver import (
DOCUMENTS_ROOT,
build_path_index,
doc_to_virtual_path,
)
from app.db import shielded_async_session
from app.services.new_streaming_service import VercelStreamingService
try:
from deepagents.backends.utils import create_file_data
except Exception: # pragma: no cover - defensive
def create_file_data(content: str) -> dict[str, Any]:
return {"content": content.split("\n")}
async def _build_autocomplete_filesystem(
*,
documents: Any,
search_space_id: int,
) -> tuple[dict[str, Any], dict[int, str]]:
"""Build a ``state['files']``-shaped dict from KB search results.
This is the autocomplete-specific replacement for the previous
``build_scoped_filesystem`` helper. It uses the canonical path resolver
so paths line up with the rest of the system, including collision
suffixes for duplicate titles.
"""
files: dict[str, Any] = {}
doc_id_to_path: dict[int, str] = {}
if not documents:
return files, doc_id_to_path
async with shielded_async_session() as session:
index = await build_path_index(session, search_space_id)
for document in documents:
if not isinstance(document, dict):
continue
meta = document.get("document") or {}
doc_id = meta.get("id")
if not isinstance(doc_id, int):
continue
title = str(meta.get("title") or "untitled")
folder_id = meta.get("folder_id")
path = doc_to_virtual_path(
doc_id=doc_id, title=title, folder_id=folder_id, index=index
)
chunk_ids = document.get("matched_chunk_ids") or []
try:
matched_set = {int(c) for c in chunk_ids}
except (TypeError, ValueError):
matched_set = set()
xml = build_document_xml(document, matched_chunk_ids=matched_set)
files[path] = create_file_data(xml)
doc_id_to_path[doc_id] = path
if not files:
# Ensure the synthetic /documents folder is visible even when empty.
files.setdefault(f"{DOCUMENTS_ROOT}/.placeholder", create_file_data(""))
return files, doc_id_to_path
logger = logging.getLogger(__name__)
KB_TOP_K = 10
# ---------------------------------------------------------------------------
# System prompt
# ---------------------------------------------------------------------------
AUTOCOMPLETE_SYSTEM_PROMPT = """You are a smart writing assistant that analyzes the user's screen to draft or complete text.
You will receive a screenshot of the user's screen. Your PRIMARY source of truth is the screenshot itself — the visual context determines what to write.
Your job:
1. Analyze the ENTIRE screenshot to understand what the user is working on (email thread, chat conversation, document, code editor, form, etc.).
2. Identify the text area where the user will type.
3. Generate the text the user most likely wants to write based on the visual context.
You also have access to the user's knowledge base documents via filesystem tools. However:
- ONLY consult the knowledge base if the screenshot clearly involves a topic where your KB documents are DIRECTLY relevant (e.g., the user is writing about a specific project/topic that matches a document title).
- Do NOT explore documents just because they exist. Most autocomplete requests can be answered purely from the screenshot.
- If you do read a document, only incorporate information that is 100% relevant to what the user is typing RIGHT NOW. Do not add extra details, background, or tangential information from the KB.
- Keep your output SHORT autocomplete should feel like a natural continuation, not an essay.
Key behavior:
- If the text area is EMPTY, draft a concise response or message based on what you see on screen (e.g., reply to an email, respond to a chat message, continue a document).
- If the text area already has text, continue it naturally typically just a sentence or two.
Rules:
- Be CONCISE. Prefer a single paragraph or a few sentences. Autocomplete is a quick assist, not a full draft.
- Match the tone and formality of the surrounding context.
- If the screen shows code, write code. If it shows a casual chat, be casual. If it shows a formal email, be formal.
- Do NOT describe the screenshot or explain your reasoning.
- Do NOT cite or reference documents explicitly just let the knowledge inform your writing naturally.
- If you cannot determine what to write, output an empty JSON array: []
## Output Format
You MUST provide exactly 3 different suggestion options. Each should be a distinct, plausible completion vary the tone, detail level, or angle.
Return your suggestions as a JSON array of exactly 3 strings. Output ONLY the JSON array, nothing else no markdown fences, no explanation, no commentary.
Example format:
["First suggestion text here.", "Second suggestion — a different take.", "Third option with another approach."]
## Filesystem Tools `ls`, `read_file`, `write_file`, `edit_file`, `glob`, `grep`
All file paths must start with a `/`.
- ls: list files and directories at a given path.
- read_file: read a file from the filesystem.
- write_file: create a temporary file in the session (not persisted).
- edit_file: edit a file in the session (not persisted for /documents/ files).
- glob: find files matching a pattern (e.g., "**/*.xml").
- grep: search for text within files.
## When to Use Filesystem Tools
BEFORE reaching for any tool, ask yourself: "Can I write a good completion purely from the screenshot?" If yes, just write it do NOT explore the KB.
Only use tools when:
- The user is clearly writing about a specific topic that likely has detailed information in their KB.
- You need a specific fact, name, number, or reference that the screenshot doesn't provide.
When you do use tools, be surgical:
- Check the `ls` output first. If no document title looks relevant, stop do not read files just to see what's there.
- If a title looks relevant, read only the `<chunk_index>` (first ~20 lines) and jump to matched chunks. Do not read entire documents.
- Extract only the specific information you need and move on to generating the completion.
## Reading Documents Efficiently
Documents are formatted as XML. Each document contains:
- `<document_metadata>` title, type, URL, etc.
- `<chunk_index>` a table of every chunk with its **line range** and a
`matched="true"` flag for chunks that matched the search query.
- `<document_content>` the actual chunks in original document order.
**Workflow**: read the first ~20 lines to see the `<chunk_index>`, identify
chunks marked `matched="true"`, then use `read_file(path, offset=<start_line>,
limit=<lines>)` to jump directly to those sections."""
APP_CONTEXT_BLOCK = """
The user is currently working in "{app_name}" (window: "{window_title}"). Use this to understand the type of application and adapt your tone and format accordingly."""
def _build_autocomplete_system_prompt(app_name: str, window_title: str) -> str:
prompt = AUTOCOMPLETE_SYSTEM_PROMPT
if app_name:
prompt += APP_CONTEXT_BLOCK.format(app_name=app_name, window_title=window_title)
return prompt
# ---------------------------------------------------------------------------
# Pre-compute KB filesystem (runs in parallel with agent compilation)
# ---------------------------------------------------------------------------
class _KBResult:
"""Container for pre-computed KB filesystem results."""
__slots__ = ("files", "ls_ai_msg", "ls_tool_msg")
def __init__(
self,
files: dict[str, Any] | None = None,
ls_ai_msg: AIMessage | None = None,
ls_tool_msg: ToolMessage | None = None,
) -> None:
self.files = files
self.ls_ai_msg = ls_ai_msg
self.ls_tool_msg = ls_tool_msg
@property
def has_documents(self) -> bool:
return bool(self.files)
async def precompute_kb_filesystem(
search_space_id: int,
query: str,
top_k: int = KB_TOP_K,
) -> _KBResult:
"""Search the KB and build the scoped filesystem outside the agent.
This is designed to be called via ``asyncio.gather`` alongside agent
graph compilation so the two run concurrently.
"""
if not query:
return _KBResult()
try:
search_results = await search_knowledge_base(
query=query,
search_space_id=search_space_id,
top_k=top_k,
)
if not search_results:
return _KBResult()
new_files, _ = await _build_autocomplete_filesystem(
documents=search_results,
search_space_id=search_space_id,
)
if not new_files:
return _KBResult()
doc_paths = [
p
for p, v in new_files.items()
if p.startswith("/documents/") and v is not None
]
tool_call_id = f"auto_ls_{uuid.uuid4().hex[:12]}"
ai_msg = AIMessage(
content="",
tool_calls=[
{"name": "ls", "args": {"path": "/documents"}, "id": tool_call_id}
],
)
tool_msg = ToolMessage(
content=str(doc_paths) if doc_paths else "No documents found.",
tool_call_id=tool_call_id,
)
return _KBResult(files=new_files, ls_ai_msg=ai_msg, ls_tool_msg=tool_msg)
except Exception:
logger.warning(
"KB pre-computation failed, proceeding without KB", exc_info=True
)
return _KBResult()
# ---------------------------------------------------------------------------
# Filesystem middleware — no save_document, no persistence
# ---------------------------------------------------------------------------
class AutocompleteFilesystemMiddleware(SurfSenseFilesystemMiddleware):
"""Filesystem middleware for autocomplete — read-only exploration only.
Passes ``search_space_id=None`` so the new persistence pipeline is
bypassed; the autocomplete flow only reads, never commits to Postgres.
"""
def __init__(self) -> None:
super().__init__(search_space_id=None, created_by_id=None)
# ---------------------------------------------------------------------------
# Agent factory
# ---------------------------------------------------------------------------
async def _compile_agent(
llm: BaseChatModel,
app_name: str,
window_title: str,
) -> Any:
"""Compile the agent graph (CPU-bound, runs in a thread)."""
system_prompt = _build_autocomplete_system_prompt(app_name, window_title)
final_system_prompt = system_prompt + "\n\n" + BASE_AGENT_PROMPT
middleware = [
AutocompleteFilesystemMiddleware(),
PatchToolCallsMiddleware(),
AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
]
agent = await asyncio.to_thread(
create_agent,
llm,
system_prompt=final_system_prompt,
tools=[],
middleware=middleware,
)
return agent.with_config({"recursion_limit": 200})
async def create_autocomplete_agent(
llm: BaseChatModel,
*,
search_space_id: int,
kb_query: str,
app_name: str = "",
window_title: str = "",
) -> tuple[Any, _KBResult]:
"""Create the autocomplete agent and pre-compute KB in parallel.
Returns ``(agent, kb_result)`` so the caller can inject the pre-computed
filesystem into the agent's initial state without any middleware delay.
"""
agent, kb = await asyncio.gather(
_compile_agent(llm, app_name, window_title),
precompute_kb_filesystem(search_space_id, kb_query),
)
return agent, kb
# ---------------------------------------------------------------------------
# JSON suggestion parsing (with fallback)
# ---------------------------------------------------------------------------
def _parse_suggestions(raw: str) -> list[str]:
"""Extract a list of suggestion strings from the agent's output.
Tries, in order:
1. Direct ``json.loads``
2. Extract content between ```json ... ``` fences
3. Find the first ``[`` ``]`` span
Falls back to wrapping the raw text as a single suggestion.
"""
text = raw.strip()
if not text:
return []
for candidate in _json_candidates(text):
try:
parsed = json.loads(candidate)
if isinstance(parsed, list) and all(isinstance(s, str) for s in parsed):
return [s for s in parsed if s.strip()]
except (json.JSONDecodeError, ValueError):
continue
return [text]
def _json_candidates(text: str) -> list[str]:
"""Yield candidate JSON strings from raw text."""
candidates = [text]
fence = re.search(r"```(?:json)?\s*\n?(.*?)```", text, re.DOTALL)
if fence:
candidates.append(fence.group(1).strip())
bracket = re.search(r"\[.*]", text, re.DOTALL)
if bracket:
candidates.append(bracket.group(0))
return candidates
# ---------------------------------------------------------------------------
# Streaming helper
# ---------------------------------------------------------------------------
async def stream_autocomplete_agent(
agent: Any,
input_data: dict[str, Any],
streaming_service: VercelStreamingService,
*,
emit_message_start: bool = True,
) -> AsyncGenerator[str, None]:
"""Stream agent events as Vercel SSE, with thinking steps for tool calls.
When ``emit_message_start`` is False the caller has already sent the
``message_start`` event (e.g. to show preparation steps before the agent
runs).
"""
thread_id = uuid.uuid4().hex
config = {"configurable": {"thread_id": thread_id}}
text_buffer: list[str] = []
active_tool_depth = 0
thinking_step_counter = 0
tool_step_ids: dict[str, str] = {}
step_titles: dict[str, str] = {}
completed_step_ids: set[str] = set()
last_active_step_id: str | None = None
def next_thinking_step_id() -> str:
nonlocal thinking_step_counter
thinking_step_counter += 1
return f"autocomplete-step-{thinking_step_counter}"
def complete_current_step() -> str | None:
nonlocal last_active_step_id
if last_active_step_id and last_active_step_id not in completed_step_ids:
completed_step_ids.add(last_active_step_id)
title = step_titles.get(last_active_step_id, "Done")
event = streaming_service.format_thinking_step(
step_id=last_active_step_id,
title=title,
status="complete",
)
last_active_step_id = None
return event
return None
if emit_message_start:
yield streaming_service.format_message_start()
gen_step_id = next_thinking_step_id()
last_active_step_id = gen_step_id
step_titles[gen_step_id] = "Generating suggestions"
yield streaming_service.format_thinking_step(
step_id=gen_step_id,
title="Generating suggestions",
status="in_progress",
)
try:
async for event in agent.astream_events(
input_data, config=config, version="v2"
):
event_type = event.get("event", "")
if event_type == "on_chat_model_stream":
if active_tool_depth > 0:
continue
if "surfsense:internal" in event.get("tags", []):
continue
chunk = event.get("data", {}).get("chunk")
if chunk and hasattr(chunk, "content"):
content = chunk.content
if content and isinstance(content, str):
text_buffer.append(content)
elif event_type == "on_chat_model_end":
if active_tool_depth > 0:
continue
if "surfsense:internal" in event.get("tags", []):
continue
output = event.get("data", {}).get("output")
if output and hasattr(output, "content"):
if getattr(output, "tool_calls", None):
continue
content = output.content
if content and isinstance(content, str) and not text_buffer:
text_buffer.append(content)
elif event_type == "on_tool_start":
active_tool_depth += 1
tool_name = event.get("name", "unknown_tool")
run_id = event.get("run_id", "")
tool_input = event.get("data", {}).get("input", {})
step_event = complete_current_step()
if step_event:
yield step_event
tool_step_id = next_thinking_step_id()
tool_step_ids[run_id] = tool_step_id
last_active_step_id = tool_step_id
title, items = _describe_tool_call(tool_name, tool_input)
step_titles[tool_step_id] = title
yield streaming_service.format_thinking_step(
step_id=tool_step_id,
title=title,
status="in_progress",
items=items,
)
elif event_type == "on_tool_end":
active_tool_depth = max(0, active_tool_depth - 1)
run_id = event.get("run_id", "")
step_id = tool_step_ids.pop(run_id, None)
if step_id and step_id not in completed_step_ids:
completed_step_ids.add(step_id)
title = step_titles.get(step_id, "Done")
yield streaming_service.format_thinking_step(
step_id=step_id,
title=title,
status="complete",
)
if last_active_step_id == step_id:
last_active_step_id = None
step_event = complete_current_step()
if step_event:
yield step_event
raw_text = "".join(text_buffer)
suggestions = _parse_suggestions(raw_text)
yield streaming_service.format_data("suggestions", {"options": suggestions})
yield streaming_service.format_finish()
yield streaming_service.format_done()
except Exception as e:
logger.error(f"Autocomplete agent streaming error: {e}", exc_info=True)
yield streaming_service.format_error("Autocomplete failed. Please try again.")
yield streaming_service.format_done()
def _describe_tool_call(tool_name: str, tool_input: Any) -> tuple[str, list[str]]:
"""Return a human-readable (title, items) for a tool call thinking step."""
inp = tool_input if isinstance(tool_input, dict) else {}
if tool_name == "ls":
path = inp.get("path", "/")
return "Listing files", [path]
if tool_name == "read_file":
fp = inp.get("file_path", "")
display = fp if len(fp) <= 80 else "" + fp[-77:]
return "Reading file", [display]
if tool_name == "write_file":
fp = inp.get("file_path", "")
display = fp if len(fp) <= 80 else "" + fp[-77:]
return "Writing file", [display]
if tool_name == "edit_file":
fp = inp.get("file_path", "")
display = fp if len(fp) <= 80 else "" + fp[-77:]
return "Editing file", [display]
if tool_name == "glob":
pat = inp.get("pattern", "")
base = inp.get("path", "/")
return "Searching files", [f"{pat} in {base}"]
if tool_name == "grep":
pat = inp.get("pattern", "")
path = inp.get("path", "")
display_pat = pat[:60] + ("" if len(pat) > 60 else "")
return "Searching content", [
f'"{display_pat}"' + (f" in {path}" if path else "")
]
return f"Using {tool_name}", []

View file

@ -0,0 +1,5 @@
"""Chat agents category.
Groups the conversational agents that share a kernel: ``anonymous_chat`` and
``multi_agent_chat``. Code shared by *both* lives in ``chat/shared/``.
"""

View file

@ -0,0 +1,14 @@
"""Anonymous / free-chat agent.
The no-login chat experience: a deliberately minimal agent that bypasses the
full SurfSense deep-agent stack (filesystem, knowledge-base persistence,
subagents, skills, memory) and answers with an optional ``web_search`` tool and
an optional read-only uploaded document. See :mod:`.agent` for details.
"""
from app.agents.chat.anonymous_chat.agent import (
build_anonymous_system_prompt,
create_anonymous_chat_agent,
)
__all__ = ["build_anonymous_system_prompt", "create_anonymous_chat_agent"]

View file

@ -27,12 +27,12 @@ from langchain.agents.middleware import (
from langchain_core.language_models import BaseChatModel from langchain_core.language_models import BaseChatModel
from langgraph.types import Checkpointer from langgraph.types import Checkpointer
from app.agents.new_chat.context import SurfSenseContextSchema from app.agents.chat.shared.context import SurfSenseContextSchema
from app.agents.new_chat.middleware import ( from app.agents.chat.shared.middleware import (
RetryAfterMiddleware, RetryAfterMiddleware,
create_surfsense_compaction_middleware, create_surfsense_compaction_middleware,
) )
from app.agents.new_chat.tools.web_search import create_web_search_tool from app.agents.chat.shared.tools.web_search import create_web_search_tool
# Cap how much of an uploaded document we inline into the system prompt. The # Cap how much of an uploaded document we inline into the system prompt. The
# upload endpoint allows files up to several MB, but the doc is re-sent on # upload endpoint allows files up to several MB, but the doc is re-sent on

View file

@ -2,6 +2,7 @@
from __future__ import annotations from __future__ import annotations
import time
from collections.abc import Sequence from collections.abc import Sequence
from typing import Any from typing import Any
@ -11,13 +12,16 @@ from langchain_core.language_models import BaseChatModel
from langchain_core.tools import BaseTool from langchain_core.tools import BaseTool
from langgraph.types import Checkpointer from langgraph.types import Checkpointer
from app.agents.multi_agent_chat.middleware.stack import ( from app.agents.chat.multi_agent_chat.main_agent.middleware.stack import (
build_main_agent_deepagent_middleware, build_main_agent_deepagent_middleware,
) )
from app.agents.new_chat.context import SurfSenseContextSchema from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
from app.agents.new_chat.feature_flags import AgentFeatureFlags from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
from app.agents.new_chat.filesystem_selection import FilesystemMode from app.agents.chat.shared.context import SurfSenseContextSchema
from app.db import ChatVisibility from app.db import ChatVisibility
from app.utils.perf import get_perf_logger
_perf_log = get_perf_logger()
def build_compiled_agent_graph_sync( def build_compiled_agent_graph_sync(
@ -43,6 +47,7 @@ def build_compiled_agent_graph_sync(
disabled_tools: list[str] | None = None, disabled_tools: list[str] | None = None,
): ):
"""Sync compile: middleware + ``create_agent`` (run via ``asyncio.to_thread``).""" """Sync compile: middleware + ``create_agent`` (run via ``asyncio.to_thread``)."""
mw_start = time.perf_counter()
main_agent_middleware = build_main_agent_deepagent_middleware( main_agent_middleware = build_main_agent_deepagent_middleware(
llm=llm, llm=llm,
tools=tools, tools=tools,
@ -63,7 +68,9 @@ def build_compiled_agent_graph_sync(
mcp_tools_by_agent=mcp_tools_by_agent, mcp_tools_by_agent=mcp_tools_by_agent,
disabled_tools=disabled_tools, disabled_tools=disabled_tools,
) )
mw_elapsed = time.perf_counter() - mw_start
create_start = time.perf_counter()
agent = create_agent( agent = create_agent(
llm, llm,
system_prompt=final_system_prompt, system_prompt=final_system_prompt,
@ -72,6 +79,15 @@ def build_compiled_agent_graph_sync(
context_schema=SurfSenseContextSchema, context_schema=SurfSenseContextSchema,
checkpointer=checkpointer, checkpointer=checkpointer,
) )
create_elapsed = time.perf_counter() - create_start
_perf_log.info(
"[graph_compile] middleware_build=%.3fs main_create_agent=%.3fs "
"total=%.3fs mw_count=%d",
mw_elapsed,
create_elapsed,
mw_elapsed + create_elapsed,
len(main_agent_middleware),
)
return agent.with_config( return agent.with_config(
{ {
"recursion_limit": 10_000, "recursion_limit": 10_000,

View file

@ -0,0 +1,10 @@
"""Action-log middleware: audit row per tool call (impl + builder)."""
from .builder import build_action_log_mw
from .middleware import ActionLogMiddleware, ToolDefinition
__all__ = [
"ActionLogMiddleware",
"ToolDefinition",
"build_action_log_mw",
]

View file

@ -4,11 +4,10 @@ from __future__ import annotations
import logging import logging
from app.agents.new_chat.feature_flags import AgentFeatureFlags from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
from app.agents.new_chat.middleware import ActionLogMiddleware from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
from app.agents.new_chat.tools.registry import BUILTIN_TOOLS
from ..shared.flags import enabled from .middleware import ActionLogMiddleware
def build_action_log_mw( def build_action_log_mw(
@ -21,12 +20,13 @@ def build_action_log_mw(
if not enabled(flags, "enable_action_log") or thread_id is None: if not enabled(flags, "enable_action_log") or thread_id is None:
return None return None
try: try:
tool_defs_by_name = {td.name: td for td in BUILTIN_TOOLS} # No built-in tool declares a ``reverse`` callable yet, so the action
# log runs without a tool_definitions map. Reversibility is opt-in per
# tool via ``ToolDefinition.reverse`` and can be wired here when used.
return ActionLogMiddleware( return ActionLogMiddleware(
thread_id=thread_id, thread_id=thread_id,
search_space_id=search_space_id, search_space_id=search_space_id,
user_id=user_id, user_id=user_id,
tool_definitions=tool_defs_by_name,
) )
except Exception: # pragma: no cover - defensive except Exception: # pragma: no cover - defensive
logging.warning( logging.warning(

View file

@ -1,25 +1,15 @@
"""Append-only action-log middleware for the SurfSense agent. """Append-only action-log middleware for the SurfSense agent.
Wraps every tool call via :meth:`AgentMiddleware.awrap_tool_call` and writes Wraps every tool call and writes a row to :class:`~app.db.AgentActionLog`
a row to :class:`~app.db.AgentActionLog` after the tool returns. Tools opt after the tool returns. Tools opt into reversibility via a ``reverse``
into reversibility by declaring a ``reverse`` callable on their callable on their :class:`ToolDefinition`; the rendered descriptor powers
:class:`~app.agents.new_chat.tools.registry.ToolDefinition`; the rendered
descriptor is persisted in ``reverse_descriptor`` for use by
``/api/threads/{thread_id}/revert/{action_id}``. ``/api/threads/{thread_id}/revert/{action_id}``.
Design points: Logging is fully defensive DB-write failures are swallowed so the tool's
result is always returned untouched. Only metadata (name, capped args,
* **Defensive.** Logging never blocks the agent. We catch every exception result_id, reverse_descriptor) is stored; tool output stays in the
on the DB write path and emit a warning; the tool's ``ToolMessage`` checkpoint. Reversibility is best-effort: a reverse callable that raises
result is always returned untouched. just leaves the action non-reversible.
* **Lightweight payload.** Only the tool ``name`` + ``args`` (capped) +
``result_id`` + ``reverse_descriptor`` are stored. Tool output text
remains in the LangGraph checkpoint / spilled tool-output files.
* **Best-effort reversibility.** We invoke ``reverse(args, result_obj)``
with the parsed JSON result when the tool's content is a JSON object;
otherwise the raw text is passed. Exceptions in the reverse callable
are swallowed and logged a failed descriptor render simply means the
action is NOT marked reversible.
""" """
from __future__ import annotations from __future__ import annotations
@ -27,14 +17,14 @@ from __future__ import annotations
import json import json
import logging import logging
from collections.abc import Awaitable, Callable from collections.abc import Awaitable, Callable
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any from typing import TYPE_CHECKING, Any
from langchain.agents.middleware import AgentMiddleware from langchain.agents.middleware import AgentMiddleware
from langchain_core.callbacks import adispatch_custom_event from langchain_core.callbacks import adispatch_custom_event
from langchain_core.messages import ToolMessage from langchain_core.messages import ToolMessage
from app.agents.new_chat.feature_flags import get_flags from app.agents.chat.multi_agent_chat.shared.feature_flags import get_flags
from app.agents.new_chat.tools.registry import ToolDefinition
if TYPE_CHECKING: # pragma: no cover - type-only if TYPE_CHECKING: # pragma: no cover - type-only
from langchain.agents.middleware.types import ToolCallRequest from langchain.agents.middleware.types import ToolCallRequest
@ -44,6 +34,31 @@ if TYPE_CHECKING: # pragma: no cover - type-only
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@dataclass
class ToolDefinition:
"""Reversibility descriptor consumed by :class:`ActionLogMiddleware`.
Only ``name`` and ``reverse`` are read by the middleware; the remaining
fields let callers and tests describe a tool declaratively. A tool is
marked reversible in the action log when ``reverse`` is set and renders a
descriptor without raising.
Attributes:
name: Unique identifier for the tool.
description: Human-readable description of what the tool does.
factory: Optional callable that builds the tool (unused by the
middleware; retained for declarative call sites/tests).
reverse: Optional callable that, given the tool's ``(args, result)``,
returns a ``ReverseDescriptor`` describing the inverse invocation.
"""
name: str
description: str = ""
factory: Callable[[dict[str, Any]], Any] | None = None
reverse: Callable[[dict[str, Any], Any], dict[str, Any]] | None = None
# Cap for the persisted ``args`` JSON to avoid bloating the action log with # Cap for the persisted ``args`` JSON to avoid bloating the action log with
# accidentally-huge inputs. Values are truncated and a flag is set in the # accidentally-huge inputs. Values are truncated and a flag is set in the
# stored payload so consumers can detect truncation. # stored payload so consumers can detect truncation.
@ -93,18 +108,32 @@ class ActionLogMiddleware(AgentMiddleware):
self._user_id = user_id self._user_id = user_id
self._tool_definitions = dict(tool_definitions or {}) self._tool_definitions = dict(tool_definitions or {})
def _enabled(self) -> bool: def _enabled(self, thread_id: int | None) -> bool:
flags = get_flags() flags = get_flags()
if flags.disable_new_agent_stack: if flags.disable_new_agent_stack:
return False return False
return bool(flags.enable_action_log) and self._thread_id is not None return bool(flags.enable_action_log) and thread_id is not None
def _resolve_thread_id(self, request: ToolCallRequest) -> int | None:
"""Resolve the live thread id, preferring the runtime config.
Reading ``configurable.thread_id`` from the active ``RunnableConfig``
(rather than the value captured at ``__init__``) lets a single cached
compiled graph safely serve many threads without it, a cache hit
would attribute action-log rows to whichever thread first built the
graph. Falls back to the constructor value for legacy/test runtimes
that don't surface a config.
"""
resolved = _resolve_thread_id(request)
return resolved if resolved is not None else self._thread_id
async def awrap_tool_call( async def awrap_tool_call(
self, self,
request: ToolCallRequest, request: ToolCallRequest,
handler: Callable[[ToolCallRequest], Awaitable[ToolMessage | Command[Any]]], handler: Callable[[ToolCallRequest], Awaitable[ToolMessage | Command[Any]]],
) -> ToolMessage | Command[Any]: ) -> ToolMessage | Command[Any]:
if not self._enabled(): thread_id = self._resolve_thread_id(request)
if not self._enabled(thread_id):
return await handler(request) return await handler(request)
result: ToolMessage | Command[Any] result: ToolMessage | Command[Any]
@ -119,10 +148,16 @@ class ActionLogMiddleware(AgentMiddleware):
request=request, request=request,
result=None, result=None,
error_payload=error_payload, error_payload=error_payload,
thread_id=thread_id,
) )
raise raise
await self._record(request=request, result=result, error_payload=None) await self._record(
request=request,
result=result,
error_payload=None,
thread_id=thread_id,
)
return result return result
async def _record( async def _record(
@ -131,6 +166,7 @@ class ActionLogMiddleware(AgentMiddleware):
request: ToolCallRequest, request: ToolCallRequest,
result: ToolMessage | Command[Any] | None, result: ToolMessage | Command[Any] | None,
error_payload: dict[str, Any] | None, error_payload: dict[str, Any] | None,
thread_id: int | None,
) -> None: ) -> None:
"""Persist one ``agent_action_log`` row. Defensive: never raises.""" """Persist one ``agent_action_log`` row. Defensive: never raises."""
try: try:
@ -149,7 +185,7 @@ class ActionLogMiddleware(AgentMiddleware):
chat_turn_id = _resolve_chat_turn_id(request) chat_turn_id = _resolve_chat_turn_id(request)
row = AgentActionLog( row = AgentActionLog(
thread_id=self._thread_id, thread_id=thread_id,
user_id=self._user_id, user_id=self._user_id,
search_space_id=self._search_space_id, search_space_id=self._search_space_id,
# ``turn_id`` is the deprecated alias of ``tool_call_id`` # ``turn_id`` is the deprecated alias of ``tool_call_id``
@ -178,11 +214,9 @@ class ActionLogMiddleware(AgentMiddleware):
) )
return return
# Surface a side-channel SSE event so the chat tool card can # Side-channel event (relayed by ``stream_new_chat`` as a
# render a Revert button immediately after the row is durable. # ``data-action-log`` SSE) so the tool card can show a Revert button
# ``stream_new_chat`` translates this into a # once the row is durable. Carries a presence flag, not the descriptor.
# ``data-action-log`` SSE event. We DO NOT include the
# ``reverse_descriptor`` payload here; only a presence flag.
try: try:
await adispatch_custom_event( await adispatch_custom_event(
"action_log", "action_log",
@ -337,6 +371,36 @@ def _resolve_chat_turn_id(request: Any) -> str | None:
return None return None
def _resolve_thread_id(request: Any) -> int | None:
"""Return ``configurable.thread_id`` (as int) for this request, if accessible.
Mirrors :func:`_resolve_chat_turn_id`: ``ToolRuntime.config`` is exposed by
LangGraph at ``request.runtime.config``, and the chat thread id lives at
``configurable.thread_id`` (a stringified ``chat_id`` at the main-graph
level). Returns ``None`` when absent or unparseable so the caller can fall
back to the constructor value.
"""
try:
runtime = getattr(request, "runtime", None)
if runtime is None:
return None
config = getattr(runtime, "config", None)
if not isinstance(config, dict):
return None
configurable = config.get("configurable")
if not isinstance(configurable, dict):
return None
value = configurable.get("thread_id")
if value is None:
return None
try:
return int(value)
except (TypeError, ValueError):
return None
except Exception: # pragma: no cover - defensive
return None
def _resolve_message_id(request: Any) -> str | None: def _resolve_message_id(request: Any) -> str | None:
"""Tool-call IDs serve as best-available message correlator at this layer.""" """Tool-call IDs serve as best-available message correlator at this layer."""
return _resolve_tool_call_id(request) return _resolve_tool_call_id(request)

View file

@ -0,0 +1,9 @@
"""Anonymous-document middleware: Redis hydration, cloud only (impl + builder)."""
from .builder import build_anonymous_doc_mw
from .middleware import AnonymousDocumentMiddleware
__all__ = [
"AnonymousDocumentMiddleware",
"build_anonymous_doc_mw",
]

View file

@ -2,8 +2,9 @@
from __future__ import annotations from __future__ import annotations
from app.agents.new_chat.filesystem_selection import FilesystemMode from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
from app.agents.new_chat.middleware import AnonymousDocumentMiddleware
from .middleware import AnonymousDocumentMiddleware
def build_anonymous_doc_mw( def build_anonymous_doc_mw(

View file

@ -24,8 +24,13 @@ from typing import Any
from langchain.agents.middleware import AgentMiddleware, AgentState from langchain.agents.middleware import AgentMiddleware, AgentState
from langgraph.runtime import Runtime from langgraph.runtime import Runtime
from app.agents.new_chat.filesystem_state import SurfSenseFilesystemState from app.agents.chat.multi_agent_chat.shared.state.filesystem_state import (
from app.agents.new_chat.path_resolver import DOCUMENTS_ROOT, safe_filename SurfSenseFilesystemState,
)
from app.agents.chat.runtime.path_resolver import (
DOCUMENTS_ROOT,
safe_filename,
)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View file

@ -0,0 +1,25 @@
"""Per-turn cooperative busy-lock middleware + cancel primitives (main-agent)."""
from .builder import build_busy_mutex_mw
from .middleware import (
BusyMutexMiddleware,
end_turn,
get_cancel_event,
get_cancel_state,
is_cancel_requested,
manager,
request_cancel,
reset_cancel,
)
__all__ = [
"BusyMutexMiddleware",
"build_busy_mutex_mw",
"end_turn",
"get_cancel_event",
"get_cancel_state",
"is_cancel_requested",
"manager",
"request_cancel",
"reset_cancel",
]

View file

@ -2,10 +2,12 @@
from __future__ import annotations from __future__ import annotations
from app.agents.new_chat.feature_flags import AgentFeatureFlags from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
from app.agents.new_chat.middleware import BusyMutexMiddleware from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
from ..shared.flags import enabled from .middleware import (
BusyMutexMiddleware,
)
def build_busy_mutex_mw(flags: AgentFeatureFlags) -> BusyMutexMiddleware | None: def build_busy_mutex_mw(flags: AgentFeatureFlags) -> BusyMutexMiddleware | None:

View file

@ -1,32 +1,12 @@
""" """Per-thread asyncio lock + cooperative cancel token, keyed by ``thread_id``.
BusyMutexMiddleware per-thread asyncio lock + cancel token.
LangChain has no built-in concept of "this thread is already running a Refuses a second concurrent turn on the same thread (e.g. double-clicked
turn refuse the second concurrent request". Without it, a user "send") that would otherwise race on the same checkpoint and duplicate tool
double-clicking "send" or refreshing the page mid-stream can spawn two calls. Also exposes a per-thread cancel event that long-running tools poll
turns racing on the same checkpoint, producing duplicated tool calls via ``runtime.context.cancel_event.is_set()`` to abort cooperatively.
and mangled state.
Ported from OpenCode's ``Stream.scoped(AbortController)`` pattern: a Process-local and in-memory; multi-worker deployments need a distributed lock
single-process, in-memory lock + cooperative cancellation token keyed by (Redis / PostgreSQL advisory locks) as a follow-up.
``thread_id``. For multi-worker deployments a distributed lock backend
(Redis or PostgreSQL advisory locks) is a phase-2 follow-up.
What this provides:
- A ``WeakValueDictionary[str, asyncio.Lock]`` keyed by ``thread_id``;
acquiring the lock during ``before_agent`` blocks any concurrent
prompt on the same thread until release.
- A per-thread ``asyncio.Event`` (``cancel_event``) that long-running
tools can poll to abort cooperatively. The event is reset between
turns. Tools should check ``runtime.context.cancel_event.is_set()``
in tight inner loops.
- A typed :class:`~app.agents.new_chat.errors.BusyError` raised when a
second turn arrives while the lock is held.
Note: SurfSense's ``stream_new_chat`` is the call site that should
acquire/release. Wiring this as middleware means the contract is
explicit and the lock manager is shared with subagents that compile
their own ``create_agent`` runnables.
""" """
from __future__ import annotations from __future__ import annotations
@ -46,7 +26,7 @@ from langchain.agents.middleware.types import (
from langgraph.config import get_config from langgraph.config import get_config
from langgraph.runtime import Runtime from langgraph.runtime import Runtime
from app.agents.new_chat.errors import BusyError from app.agents.chat.runtime.errors import BusyError
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -152,9 +132,8 @@ class _ThreadLockManager:
return True return True
# Module-level singleton — process-local but reused across all agent # Process-local singleton shared across all agents/subagents built in this
# instances built in this process. Subagents created in nested # process so per-thread locks stay coherent.
# ``create_agent`` calls also get this so locks are coherent.
manager = _ThreadLockManager() manager = _ThreadLockManager()
@ -266,7 +245,6 @@ class BusyMutexMiddleware(AgentMiddleware[AgentState[ResponseT], ContextT, Respo
await lock.acquire() await lock.acquire()
epoch = manager.bump_turn_epoch(thread_id) epoch = manager.bump_turn_epoch(thread_id)
self._held_locks[thread_id] = (lock, epoch) self._held_locks[thread_id] = (lock, epoch)
# Reset the cancel event so this turn starts fresh
reset_cancel(thread_id) reset_cancel(thread_id)
return None return None
@ -289,17 +267,14 @@ class BusyMutexMiddleware(AgentMiddleware[AgentState[ResponseT], ContextT, Respo
return None return None
if lock.locked(): if lock.locked():
lock.release() lock.release()
# Always clear cancel event between turns so a stale signal # Clear cancel event so a stale signal doesn't leak into the next turn.
# doesn't leak into the next request.
reset_cancel(thread_id) reset_cancel(thread_id)
return None return None
# Provide sync no-ops because the middleware base class allows them
def before_agent( # type: ignore[override] def before_agent( # type: ignore[override]
self, state: AgentState[Any], runtime: Runtime[ContextT] self, state: AgentState[Any], runtime: Runtime[ContextT]
) -> dict[str, Any] | None: ) -> dict[str, Any] | None:
# Sync path: no asyncio.Lock to acquire. Best we can do is reject # Sync path can't await an asyncio.Lock; only reject if one is in flight.
# if anyone else is in flight.
thread_id = self._thread_id(runtime) thread_id = self._thread_id(runtime)
if thread_id is None: if thread_id is None:
if self._require_thread_id: if self._require_thread_id:

View file

@ -1,7 +1,9 @@
"""RunnableConfig wiring for nested subagent invocations. """HITL resume side-channel for nested subagent invocations.
Forwards the parent's ``runtime.config`` (thread_id, …) into the subagent and Exposes the configurable side-channel ``stream_resume_chat`` uses to ferry
exposes the side-channel ``stream_resume_chat`` uses to ferry resume payloads. resume payloads into a mid-flight subagent. The ``RunnableConfig`` builder and
state-key filter shared with subagents live in
``app.agents.chat.multi_agent_chat.subagents.shared.invocation``.
""" """
from __future__ import annotations from __future__ import annotations
@ -11,8 +13,6 @@ from typing import Any
from langchain.tools import ToolRuntime from langchain.tools import ToolRuntime
from .constants import DEFAULT_SUBAGENT_RECURSION_LIMIT
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# langgraph stores the parent task's scratchpad under this configurable key; # langgraph stores the parent task's scratchpad under this configurable key;
@ -20,39 +20,6 @@ logger = logging.getLogger(__name__)
_LANGGRAPH_SCRATCHPAD_KEY = "__pregel_scratchpad" _LANGGRAPH_SCRATCHPAD_KEY = "__pregel_scratchpad"
def subagent_invoke_config(runtime: ToolRuntime) -> dict[str, Any]:
"""RunnableConfig for the nested invoke; raises ``recursion_limit`` and isolates ``thread_id``.
Each parallel subagent invocation lands in its own checkpoint slot keyed
by an extended ``thread_id`` of the form ``{parent_thread}::task:{tool_call_id}``.
The same call across the resume cycle keeps reading from the same snapshot
(``tool_call_id`` is stable per LLM-emitted call).
We namespace via ``thread_id`` rather than ``checkpoint_ns`` because
langgraph's ``aget_state`` interprets a non-empty ``checkpoint_ns`` as a
subgraph path and raises ``ValueError("Subgraph X not found")``.
"""
merged: dict[str, Any] = dict(runtime.config) if runtime.config else {}
current_limit = merged.get("recursion_limit")
try:
current_int = int(current_limit) if current_limit is not None else 0
except (TypeError, ValueError):
current_int = 0
if current_int < DEFAULT_SUBAGENT_RECURSION_LIMIT:
merged["recursion_limit"] = DEFAULT_SUBAGENT_RECURSION_LIMIT
configurable: dict[str, Any] = dict(merged.get("configurable") or {})
parent_thread_id = configurable.get("thread_id")
per_call_suffix = f"task:{runtime.tool_call_id}"
configurable["thread_id"] = (
f"{parent_thread_id}::{per_call_suffix}"
if parent_thread_id
else per_call_suffix
)
merged["configurable"] = configurable
return merged
def consume_surfsense_resume(runtime: ToolRuntime) -> Any: def consume_surfsense_resume(runtime: ToolRuntime) -> Any:
"""Pop the resume payload for *this* call's ``tool_call_id``. """Pop the resume payload for *this* call's ``tool_call_id``.

View file

@ -1,24 +1,14 @@
"""Constants shared by the checkpointed subagent middleware.""" """Tuning constants for the checkpointed subagent middleware.
``EXCLUDED_STATE_KEYS`` and ``DEFAULT_SUBAGENT_RECURSION_LIMIT`` are part of the
subagent-invocation contract shared with subagents and now live in
``app.agents.chat.multi_agent_chat.subagents.shared.invocation``.
"""
from __future__ import annotations from __future__ import annotations
import os import os
# Mirror of deepagents.middleware.subagents._EXCLUDED_STATE_KEYS.
EXCLUDED_STATE_KEYS = frozenset(
{
"messages",
"todos",
"structured_response",
"skills_metadata",
"memory_contents",
}
)
# Match the parent graph's budget; the LangGraph default of 25 trips on
# multi-step subagent runs.
DEFAULT_SUBAGENT_RECURSION_LIMIT = 10_000
def _read_timeout_env(name: str, default: float) -> float: def _read_timeout_env(name: str, default: float) -> float:
"""Parse ``name`` from the environment; fall back to ``default`` on bad values. """Parse ``name`` from the environment; fall back to ``default`` on bad values.

View file

@ -0,0 +1,188 @@
"""SubAgent middleware that compiles each subagent against the parent checkpointer."""
from __future__ import annotations
import time
from collections.abc import Callable
from typing import Any, cast
from deepagents.backends.protocol import BackendFactory, BackendProtocol
from deepagents.middleware.subagents import (
TASK_SYSTEM_PROMPT,
CompiledSubAgent,
SubAgent,
SubAgentMiddleware,
)
from langchain.agents import create_agent
from langchain.chat_models import init_chat_model
from langchain_core.runnables import Runnable
from langgraph.types import Checkpointer
from app.agents.chat.multi_agent_chat.subagents.shared.spec import (
SURF_CONTEXT_HINT_PROVIDER_KEY,
SURF_LAZY_SPEC_FACTORY_KEY,
)
from app.utils.perf import get_perf_logger
from .task_tool import build_task_tool_with_parent_config
_perf_log = get_perf_logger()
class SurfSenseCheckpointedSubAgentMiddleware(SubAgentMiddleware):
"""``SubAgentMiddleware`` variant that compiles each subagent against the parent checkpointer."""
def __init__(
self,
*,
checkpointer: Checkpointer,
backend: BackendProtocol | BackendFactory,
subagents: list[SubAgent | CompiledSubAgent],
system_prompt: str | None = TASK_SYSTEM_PROMPT,
task_description: str | None = None,
search_space_id: int | None = None,
) -> None:
self._surf_checkpointer = checkpointer
super(SubAgentMiddleware, self).__init__()
if not subagents:
raise ValueError(
"At least one subagent must be specified when using the new API"
)
self._backend = backend
self._subagents = subagents
# Search-space id is captured at build time (the orchestrator runs in
# exactly one search space for its lifetime). The spawn-paused kill
# switch keys on it so an operator can quarantine one workspace
# without affecting the rest of the deployment.
self._search_space_id = search_space_id
# Lazy subagent compilation. Compiling a subagent graph via
# ``create_agent`` is expensive (~250-400ms each) and there can be up
# to ~17 of them. Doing it all in ``__init__`` put the full cost on
# every cold ``agent_cache`` miss (i.e. on time-to-first-token), even
# though a turn usually invokes zero or one subagent. We instead index
# the raw specs here and compile each graph on first ``task(name)``
# use, memoizing the result for the life of this (cached) instance.
self._compiled: dict[str, Runnable] = {}
self._lazy_specs: dict[str, dict[str, Any]] = {}
# Subagents whose *spec itself* is built lazily (not just compiled).
# Keyed by name → zero-arg factory returning the full spec dict. Used
# for the write knowledge_base subagent, whose filesystem middleware
# builds ~13 tool schemas (~2s) that almost never matter on turn 1.
self._lazy_spec_factories: dict[str, Callable[[], dict[str, Any]]] = {}
descriptors = self._build_subagent_registry()
task_tool = build_task_tool_with_parent_config(
descriptors,
task_description,
search_space_id=search_space_id,
resolve_subagent=self._resolve_subagent,
)
if system_prompt and descriptors:
agents_desc = "\n".join(
f"- {s['name']}: {s['description']}" for s in descriptors
)
self.system_prompt = (
system_prompt + "\n\nAvailable subagent types:\n" + agents_desc
)
else:
self.system_prompt = system_prompt
self.tools = [task_tool]
def _build_subagent_registry(self) -> list[dict[str, Any]]:
"""Index subagents for lazy compilation; return lightweight descriptors.
Pre-compiled specs (those carrying a ``runnable``) are seeded directly
into the memo. Lazy specs are stashed by name and compiled on first
``task(...)`` use via :meth:`_resolve_subagent`. The returned
descriptors carry only ``name``/``description`` plus the optional
context-hint provider everything the ``task`` tool needs to validate
names, render its catalog, and run hints, without paying the
``create_agent`` cost up front.
"""
descriptors: list[dict[str, Any]] = []
for spec in self._subagents:
# Provider may be ``None`` (no hint), in which case task_tool skips
# the prepend step. We forward the key unconditionally so the
# descriptor shape is uniform.
hint_provider = cast(dict, spec).get(SURF_CONTEXT_HINT_PROVIDER_KEY)
name = spec["name"]
spec_factory = cast(dict, spec).get(SURF_LAZY_SPEC_FACTORY_KEY)
if spec_factory is not None:
# Descriptor-only entry: the spec dict is built on first use.
self._lazy_spec_factories[name] = spec_factory
elif "runnable" in spec:
compiled = cast(CompiledSubAgent, spec)
self._compiled[name] = compiled["runnable"]
else:
if "model" not in spec:
msg = f"SubAgent '{name}' must specify 'model'"
raise ValueError(msg)
if "tools" not in spec:
msg = f"SubAgent '{name}' must specify 'tools'"
raise ValueError(msg)
self._lazy_specs[name] = cast(dict, spec)
descriptors.append(
{
"name": name,
"description": spec["description"],
SURF_CONTEXT_HINT_PROVIDER_KEY: hint_provider,
}
)
return descriptors
def _resolve_subagent(self, name: str) -> Runnable:
"""Return the compiled subagent graph for ``name``, compiling on first use.
Memoized: the ``create_agent`` cost is paid once per subagent per
cached middleware instance. Raises ``KeyError`` for unknown names
(callers in the ``task`` tool validate membership before resolving).
"""
cached = self._compiled.get(name)
if cached is not None:
return cached
spec = self._lazy_specs.get(name)
if spec is None:
factory = self._lazy_spec_factories.get(name)
if factory is None:
raise KeyError(name)
# Build the spec on first use (pays the deferred construction cost
# here, off the cold agent-build path), then compile and memoize.
build_start = time.perf_counter()
spec = factory()
_perf_log.info(
"[subagent_spec_lazy] name=%s (deferred spec build) in %.3fs",
name,
time.perf_counter() - build_start,
)
runnable = self._compile_one(spec)
self._compiled[name] = runnable
return runnable
def _compile_one(self, spec: dict[str, Any]) -> Runnable:
"""Compile a single subagent graph against the parent checkpointer."""
model = spec["model"]
if isinstance(model, str):
model = init_chat_model(model)
middleware: list[Any] = list(spec.get("middleware", []))
tools_count = len(spec.get("tools") or [])
mw_count = len(middleware)
compile_start = time.perf_counter()
runnable = create_agent(
model,
system_prompt=spec["system_prompt"],
tools=spec["tools"],
middleware=middleware,
name=spec["name"],
checkpointer=self._surf_checkpointer,
)
_perf_log.info(
"[subagent_compile_lazy] name=%s in %.3fs tools=%d mw=%d",
spec["name"],
time.perf_counter() - compile_start,
tools_count,
mw_count,
)
return runnable

View file

@ -6,7 +6,7 @@ and the ``<tools>`` block render from the same source.
from __future__ import annotations from __future__ import annotations
from app.agents.multi_agent_chat.main_agent.system_prompt.builder.load_md import ( from app.agents.chat.multi_agent_chat.main_agent.system_prompt.builder.load_md import (
read_prompt_md, read_prompt_md,
) )

View file

@ -12,7 +12,7 @@ import asyncio
import json import json
import logging import logging
import time import time
from collections.abc import Awaitable from collections.abc import Awaitable, Callable
from typing import Annotated, Any, NoReturn, TypeVar from typing import Annotated, Any, NoReturn, TypeVar
from deepagents.middleware.subagents import TASK_TOOL_DESCRIPTION from deepagents.middleware.subagents import TASK_TOOL_DESCRIPTION
@ -23,7 +23,11 @@ from langchain_core.tools import StructuredTool
from langgraph.errors import GraphInterrupt from langgraph.errors import GraphInterrupt
from langgraph.types import Command, Interrupt from langgraph.types import Command, Interrupt
from app.agents.multi_agent_chat.subagents.shared.spec import ( from app.agents.chat.multi_agent_chat.subagents.shared.invocation import (
EXCLUDED_STATE_KEYS,
subagent_invoke_config,
)
from app.agents.chat.multi_agent_chat.subagents.shared.spec import (
SURF_CONTEXT_HINT_PROVIDER_KEY, SURF_CONTEXT_HINT_PROVIDER_KEY,
ContextHintProvider, ContextHintProvider,
) )
@ -34,13 +38,11 @@ from .config import (
consume_surfsense_resume, consume_surfsense_resume,
drain_parent_null_resume, drain_parent_null_resume,
has_surfsense_resume, has_surfsense_resume,
subagent_invoke_config,
) )
from .constants import ( from .constants import (
DEFAULT_SUBAGENT_BATCH_CONCURRENCY, DEFAULT_SUBAGENT_BATCH_CONCURRENCY,
DEFAULT_SUBAGENT_BILLABLE_THRESHOLD, DEFAULT_SUBAGENT_BILLABLE_THRESHOLD,
DEFAULT_SUBAGENT_INVOKE_TIMEOUT_SECONDS, DEFAULT_SUBAGENT_INVOKE_TIMEOUT_SECONDS,
EXCLUDED_STATE_KEYS,
MAX_SUBAGENT_BATCH_SIZE, MAX_SUBAGENT_BATCH_SIZE,
) )
from .propagation import wrap_with_tool_call_id from .propagation import wrap_with_tool_call_id
@ -80,13 +82,10 @@ _T = TypeVar("_T")
async def _ainvoke_with_timeout[T]( async def _ainvoke_with_timeout[T](
coro: Awaitable[_T], *, subagent_type: str, started_at: float coro: Awaitable[_T], *, subagent_type: str, started_at: float
) -> _T: ) -> _T:
"""Apply :data:`DEFAULT_SUBAGENT_INVOKE_TIMEOUT_SECONDS` to ``coro``. """Apply the subagent invoke timeout to ``coro`` (non-positive disables it).
A non-positive timeout disables the cap (configurable via the On expiry the task is cancelled and :class:`SubagentInvokeTimeoutError` is
``SURFSENSE_SUBAGENT_INVOKE_TIMEOUT_SECONDS`` env var). On expiry the raised for the caller to turn into a synthetic ToolMessage.
underlying task is cancelled and :class:`SubagentInvokeTimeoutError` is
raised the caller wraps it into a synthetic ToolMessage so the
orchestrator can decide what to do.
""" """
timeout = DEFAULT_SUBAGENT_INVOKE_TIMEOUT_SECONDS timeout = DEFAULT_SUBAGENT_INVOKE_TIMEOUT_SECONDS
if timeout <= 0: if timeout <= 0:
@ -144,17 +143,31 @@ def build_task_tool_with_parent_config(
task_description: str | None = None, task_description: str | None = None,
*, *,
search_space_id: int | None = None, search_space_id: int | None = None,
resolve_subagent: Callable[[str], Runnable] | None = None,
) -> BaseTool: ) -> BaseTool:
"""Upstream ``_build_task_tool`` + parent ``runtime.config`` propagation + resume bridging.""" """Upstream ``_build_task_tool`` + parent ``runtime.config`` propagation + resume bridging.
subagent_graphs: dict[str, Runnable] = {
spec["name"]: spec["runnable"] for spec in subagents ``subagents`` are lightweight descriptors (``name``/``description`` + the
} optional context-hint provider); the actual compiled graph is fetched
# Per-subagent context-hint providers (see ``SurfSenseSubagentSpec``). lazily via ``resolve_subagent(name)`` so subagent ``create_agent`` cost is
# The mapping is sparse: only routes that opted in via ``pack_subagent`` paid on first ``task(name)`` use rather than at graph-build time.
# appear here, and the value is invoked once per ``task(...)`` call to
# generate a short string prepended to the subagent's first For backward compatibility (and tests), ``resolve_subagent`` may be omitted
# ``HumanMessage``. Failures are logged and swallowed — a broken hint when every descriptor already carries a pre-compiled ``runnable``; in that
# provider must never prevent the underlying task from running. case a trivial dict-backed resolver is used.
"""
subagent_names: set[str] = {spec["name"] for spec in subagents}
if resolve_subagent is None:
_eager_graphs: dict[str, Runnable] = {
spec["name"]: spec["runnable"] for spec in subagents if "runnable" in spec
}
def resolve_subagent(name: str) -> Runnable:
return _eager_graphs[name]
# Sparse map of opt-in context-hint providers; each runs once per task()
# call to prepend a string to the subagent's first HumanMessage. Failures
# are swallowed so a broken hint never blocks the task.
subagent_hint_providers: dict[str, ContextHintProvider] = { subagent_hint_providers: dict[str, ContextHintProvider] = {
spec["name"]: provider spec["name"]: provider
for spec in subagents for spec in subagents
@ -176,24 +189,18 @@ def build_task_tool_with_parent_config(
def _billable_call_update( def _billable_call_update(
subagent_type: str, runtime: ToolRuntime subagent_type: str, runtime: ToolRuntime
) -> dict[str, Any]: ) -> dict[str, Any]:
"""Build the per-call ``billable_calls`` delta + an optional warning. """Build the per-call ``billable_calls`` delta plus an optional soft-cap warning.
The orchestrator's ``billable_calls`` map is summed by Always emits ``{subagent_type: 1}`` (a reducer accumulates it); when this
:func:`_int_counter_merge_reducer`, so we always emit call would cross the threshold, also adds a soft ``messages`` entry so the
``{subagent_type: 1}`` and let the reducer accumulate. If the orchestrator self-limits on its next step.
cumulative count *after* this call would cross the configured
threshold, we also slip a soft ``messages`` entry into the update
so the orchestrator can read it on its next step and self-limit.
Returning a plain ``dict`` (vs. an extra :class:`Command`) keeps
the helper composable with the existing single/batch return paths.
""" """
delta: dict[str, Any] = {"billable_calls": {subagent_type: 1}} delta: dict[str, Any] = {"billable_calls": {subagent_type: 1}}
threshold = DEFAULT_SUBAGENT_BILLABLE_THRESHOLD threshold = DEFAULT_SUBAGENT_BILLABLE_THRESHOLD
if threshold <= 0: if threshold <= 0:
return delta return delta
prior = runtime.state.get("billable_calls") or {} prior = runtime.state.get("billable_calls") or {}
# ``prior`` may be a plain dict or a reducer-managed mapping; only # Count int values only so a malformed checkpoint can't crash us.
# int values are counted so a malformed checkpoint can't crash us.
prior_total = sum(v for v in prior.values() if isinstance(v, int)) prior_total = sum(v for v in prior.values() if isinstance(v, int))
new_total = prior_total + 1 new_total = prior_total + 1
if prior_total < threshold <= new_total: if prior_total < threshold <= new_total:
@ -212,8 +219,7 @@ def build_task_tool_with_parent_config(
"""Merge the per-call billable counter (and warning) into ``cmd``.""" """Merge the per-call billable counter (and warning) into ``cmd``."""
delta = _billable_call_update(subagent_type, runtime) delta = _billable_call_update(subagent_type, runtime)
warn_text = delta.pop("_billable_warn_text", None) warn_text = delta.pop("_billable_warn_text", None)
# ``cmd.update`` may be a dict or LangGraph ``UpdateDict``; defensively # Copy so we don't mutate state shared with other tool returns.
# copy so we don't mutate state shared across other tool returns.
update = dict(getattr(cmd, "update", {}) or {}) update = dict(getattr(cmd, "update", {}) or {})
for key, value in delta.items(): for key, value in delta.items():
update[key] = value update[key] = value
@ -226,14 +232,10 @@ def build_task_tool_with_parent_config(
return Command(update=update) return Command(update=update)
def _safe_message_text(msg: Any) -> str: def _safe_message_text(msg: Any) -> str:
"""Pull text out of a BaseMessage without trusting the ``.text`` property. """Pull text out of a BaseMessage without using the ``.text`` property.
``BaseMessage.text`` walks ``content_blocks`` and crashes with ``.text`` crashes when ``content`` is ``None`` (common for tool-call
``TypeError: 'NoneType' object is not iterable`` when ``content`` is AIMessages), and ``getattr`` won't catch it, so read ``content`` directly.
``None`` (common for tool-call AIMessages whose payload is purely
structured). ``getattr(msg, "text", None)`` does not catch this
because Python evaluates the property body before falling back to
the default. Read ``content`` directly and coerce defensively.
""" """
try: try:
content = getattr(msg, "content", None) content = getattr(msg, "content", None)
@ -256,23 +258,18 @@ def build_task_tool_with_parent_config(
return str(content) return str(content)
def _build_tool_trace(messages: list[Any]) -> list[dict[str, Any]]: def _build_tool_trace(messages: list[Any]) -> list[dict[str, Any]]:
"""Compress the subagent's message stream into a compact tool trace. """Compress the subagent's messages into a compact tool trace.
Each entry is ``{"tool": <name>, "status": "ok"|"error", "preview": Entries (``{tool, status, preview}``) ride on the ToolMessage's
<120 chars>}`` so the orchestrator can show "this is what your ``additional_kwargs["surf_tool_trace"]`` for UI/observability; the LLM
specialist actually did" without dumping the full message stream never sees them.
back through the prompt. The list is attached to the returned
ToolMessage's ``additional_kwargs`` (under ``"surf_tool_trace"``);
the LLM never sees it, but UI / observability code can pluck it
out of the checkpoint.
""" """
trace: list[dict[str, Any]] = [] trace: list[dict[str, Any]] = []
for msg in messages: for msg in messages:
tool_name = getattr(msg, "name", None) tool_name = getattr(msg, "name", None)
tool_call_id_attr = getattr(msg, "tool_call_id", None) tool_call_id_attr = getattr(msg, "tool_call_id", None)
if not tool_name and not tool_call_id_attr: if not tool_name and not tool_call_id_attr:
# Only ToolMessages have either field; skip AIMessage / # Only ToolMessages carry either field.
# HumanMessage / SystemMessage frames.
continue continue
status = getattr(msg, "status", None) or "ok" status = getattr(msg, "status", None) or "ok"
preview = _safe_message_text(msg).strip().replace("\n", " ") preview = _safe_message_text(msg).strip().replace("\n", " ")
@ -306,8 +303,7 @@ def build_task_tool_with_parent_config(
) )
raise ValueError(msg) raise ValueError(msg)
message_text = _safe_message_text(messages[-1]).rstrip() message_text = _safe_message_text(messages[-1]).rstrip()
# Tool-trace is purely observability — wrap defensively so a single # Trace is observability-only; never let a bad frame kill the turn.
# malformed frame never bubbles up and kills the whole user turn.
try: try:
tool_trace = _build_tool_trace(messages) tool_trace = _build_tool_trace(messages)
except Exception: except Exception:
@ -318,10 +314,7 @@ def build_task_tool_with_parent_config(
tool_trace = [] tool_trace = []
tool_msg = ToolMessage(message_text, tool_call_id=tool_call_id) tool_msg = ToolMessage(message_text, tool_call_id=tool_call_id)
if tool_trace: if tool_trace:
# ``additional_kwargs`` is a free-form dict on BaseMessage; using # surf_ prefix avoids collision with provider keys (e.g. cache_control).
# a ``surf_`` prefix avoids collision with provider-specific keys
# (e.g. Anthropic's ``cache_control``). The LLM doesn't see it;
# consumers (UI, observability) read it off the checkpoint.
tool_msg.additional_kwargs["surf_tool_trace"] = tool_trace tool_msg.additional_kwargs["surf_tool_trace"] = tool_trace
return Command( return Command(
update={ update={
@ -353,15 +346,13 @@ def build_task_tool_with_parent_config(
def _validate_and_prepare_state( def _validate_and_prepare_state(
subagent_type: str, description: str, runtime: ToolRuntime subagent_type: str, description: str, runtime: ToolRuntime
) -> tuple[Runnable, dict]: ) -> tuple[Runnable, dict]:
subagent = subagent_graphs[subagent_type] subagent = resolve_subagent(subagent_type)
subagent_state = { subagent_state = {
k: v for k, v in runtime.state.items() if k not in EXCLUDED_STATE_KEYS k: v for k, v in runtime.state.items() if k not in EXCLUDED_STATE_KEYS
} }
hint = _resolve_context_hint(subagent_type, description, runtime) hint = _resolve_context_hint(subagent_type, description, runtime)
if hint: if hint:
# Prepend as a tagged block so the subagent prompt can pattern-match # Tagged block so the subagent prompt can pattern-match the section.
# on the section (and a future change can lift it into its own
# ``SystemMessage`` if needed).
payload = f"<context_hint>\n{hint}\n</context_hint>\n\n{description}" payload = f"<context_hint>\n{hint}\n</context_hint>\n\n{description}"
else: else:
payload = description payload = description
@ -372,16 +363,12 @@ def build_task_tool_with_parent_config(
results: list[tuple[int, str, dict | str, dict | None]], results: list[tuple[int, str, dict | str, dict | None]],
runtime: ToolRuntime, runtime: ToolRuntime,
) -> Command: ) -> Command:
"""Combine per-child results into one Command with a combined ToolMessage. """Combine per-child results into one Command with an aggregate ToolMessage.
``results`` is a list of ``(task_index, subagent_type, ``results`` tuples are ``(task_index, subagent_type, payload_or_error,
payload_or_error_text, child_state_update)`` tuples preserving the child_state_update)``; output blocks are sorted by index so the LLM can
input order so the orchestrator can map each block back to the task map them back to dispatch order, and each child contributes a
it dispatched. State updates are merged by reducer for keys outside ``billable_calls`` increment to match single-mode accounting.
:data:`EXCLUDED_STATE_KEYS`; everything else (``messages``, ``todos``,
etc.) is replaced by the synthesized aggregate ToolMessage. Every
child also contributes a ``billable_calls`` increment so cost
accounting matches single-mode dispatch.
""" """
results.sort(key=lambda r: r[0]) results.sort(key=lambda r: r[0])
merged_state: dict[str, Any] = {} merged_state: dict[str, Any] = {}
@ -422,8 +409,8 @@ def build_task_tool_with_parent_config(
} }
) )
if state_update: if state_update:
# Naive merge: later tasks win on scalar collisions; reducer-backed # Later tasks win on scalar collisions; reducer-backed fields
# fields (``receipts``, ``files`` etc.) accumulate at apply time. # accumulate at apply time.
merged_state.update(state_update) merged_state.update(state_update)
aggregate = "\n\n".join(message_blocks) aggregate = "\n\n".join(message_blocks)
aggregate_msg = ToolMessage( aggregate_msg = ToolMessage(
@ -467,15 +454,13 @@ def build_task_tool_with_parent_config(
) -> tuple[int, str, dict | str, dict | None]: ) -> tuple[int, str, dict | str, dict | None]:
"""Run one child of a batched ``task`` call under the concurrency cap. """Run one child of a batched ``task`` call under the concurrency cap.
Errors are returned as plain text in slot 2 so a single child's Errors are returned as text (slot 2) so one child's failure doesn't abort
failure does not abort the whole batch. ``GraphInterrupt`` from a the batch. A child's ``GraphInterrupt`` is a hard failure for that child:
batched child is currently treated as a hard failure for that child batched HITL is intentionally out of scope.
only batched HITL is intentionally out of scope for the v1
rollout (see plan tier 2 item 4 risks).
""" """
async with semaphore: async with semaphore:
if subagent_type not in subagent_graphs: if subagent_type not in subagent_names:
allowed_types = ", ".join([f"`{k}`" for k in subagent_graphs]) allowed_types = ", ".join([f"`{k}`" for k in subagent_names])
return ( return (
task_index, task_index,
subagent_type, subagent_type,
@ -505,8 +490,7 @@ def build_task_tool_with_parent_config(
) )
return (task_index, subagent_type, str(exc), None) return (task_index, subagent_type, str(exc), None)
except GraphInterrupt: except GraphInterrupt:
# Batched HITL is unsupported in v1 — surface as a failure # Batched HITL unsupported; fail this child so the batch finishes.
# for this child so the rest of the batch still completes.
logger.warning( logger.warning(
"Batch child %d (%s) raised GraphInterrupt; batched HITL " "Batch child %d (%s) raised GraphInterrupt; batched HITL "
"is not supported. Re-dispatch this task as a single " "is not supported. Re-dispatch this task as a single "
@ -543,14 +527,11 @@ def build_task_tool_with_parent_config(
return (task_index, subagent_type, result, child_state_update) return (task_index, subagent_type, result, child_state_update)
def _coerce_batch_arg(tasks: Any) -> list[dict] | str: def _coerce_batch_arg(tasks: Any) -> list[dict] | str:
"""Rescue common LLM-side malformations of the ``tasks`` argument. """Rescue common LLM malformations of the ``tasks`` argument.
Some providers serialise an array argument as a JSON-encoded string, Recovers a JSON-encoded array string and a single dict (instead of a
and small models occasionally hand back a single ``{description, 1-element array), logging a WARN. Unrecoverable shapes return a string
subagent_type}`` dict instead of a one-element array. Both are the caller surfaces as the tool error.
recovered here with a WARN log so the issue is visible in metrics
but the user's turn still completes; truly broken shapes return a
plain string that the caller surfaces as the tool error.
""" """
if isinstance(tasks, list): if isinstance(tasks, list):
return tasks return tasks
@ -585,13 +566,10 @@ def build_task_tool_with_parent_config(
async def _adispatch_batch( async def _adispatch_batch(
tasks: list[dict], runtime: ToolRuntime tasks: list[dict], runtime: ToolRuntime
) -> Command | str: ) -> Command | str:
"""Fan-out helper for the ``tasks`` array shape. """Fan out the ``tasks`` array (size- and concurrency-capped).
Bounded by :data:`MAX_SUBAGENT_BATCH_SIZE` and concurrency-capped Returns one Command; the LLM sees one ``[task <index>]``-prefixed block
at :data:`DEFAULT_SUBAGENT_BATCH_CONCURRENCY`. Returns a single per child, in input order.
:class:`Command` that the LLM sees as one ToolMessage per child,
prefixed with ``[task <index>]`` so it can map back to the input
order.
""" """
if not tasks: if not tasks:
return "tasks: array is empty; nothing to dispatch." return "tasks: array is empty; nothing to dispatch."
@ -657,8 +635,8 @@ def build_task_tool_with_parent_config(
"task: must provide either single-mode (`description`+`subagent_type`) " "task: must provide either single-mode (`description`+`subagent_type`) "
"or batch-mode (`tasks`)." "or batch-mode (`tasks`)."
) )
if subagent_type not in subagent_graphs: if subagent_type not in subagent_names:
allowed_types = ", ".join([f"`{k}`" for k in subagent_graphs]) allowed_types = ", ".join([f"`{k}`" for k in subagent_names])
return ( return (
f"We cannot invoke subagent {subagent_type} because it does not exist, " f"We cannot invoke subagent {subagent_type} because it does not exist, "
f"the only allowed types are {allowed_types}" f"the only allowed types are {allowed_types}"
@ -701,17 +679,16 @@ def build_task_tool_with_parent_config(
if pending_value is not None: if pending_value is not None:
resume_value = consume_surfsense_resume(runtime) resume_value = consume_surfsense_resume(runtime)
if resume_value is None: if resume_value is None:
# Bridge invariant: a queued resume must accompany any pending # A pending interrupt must have a queued resume; otherwise replay
# subagent interrupt. Fall-through replay would silently re-prompt # would silently re-prompt the user. Raise instead.
# the user; raise so the streaming layer surfaces a clear error.
raise RuntimeError( raise RuntimeError(
f"Subagent {subagent_type!r} has a pending interrupt but no " f"Subagent {subagent_type!r} has a pending interrupt but no "
"surfsense_resume_value on config; resume bridge is broken." "surfsense_resume_value on config; resume bridge is broken."
) )
expected = hitlrequest_action_count(pending_value) expected = hitlrequest_action_count(pending_value)
resume_value = fan_out_decisions_to_match(resume_value, expected) resume_value = fan_out_decisions_to_match(resume_value, expected)
# Prevent the parent's resume payload from leaking into subagent # Stop the parent's resume leaking into subagent interrupts via
# interrupts via langgraph's parent_scratchpad fallback. # langgraph's parent_scratchpad fallback.
drain_parent_null_resume(runtime) drain_parent_null_resume(runtime)
with ot.subagent_invoke_span( with ot.subagent_invoke_span(
subagent_type=subagent_type, path=invoke_path subagent_type=subagent_type, path=invoke_path
@ -827,10 +804,8 @@ def build_task_tool_with_parent_config(
] = None, ] = None,
) -> str | Command: ) -> str | Command:
atask_start = time.perf_counter() atask_start = time.perf_counter()
# Kill switch: when ops flips the spawn-paused flag for this # Ops kill switch: short-circuit every task() call for this workspace
# workspace, every ``task(...)`` invocation (single- or batch-mode) # so the orchestrator stops hammering downstream APIs.
# short-circuits with a clear ToolMessage so the orchestrator can
# tell the user what happened and stop hammering downstream APIs.
if await is_spawn_paused(search_space_id): if await is_spawn_paused(search_space_id):
logger.warning( logger.warning(
"[hitl_route] atask SPAWN_PAUSED: search_space_id=%s tool_call_id=%s", "[hitl_route] atask SPAWN_PAUSED: search_space_id=%s tool_call_id=%s",
@ -869,8 +844,8 @@ def build_task_tool_with_parent_config(
subagent_type, subagent_type,
runtime.tool_call_id, runtime.tool_call_id,
) )
if subagent_type not in subagent_graphs: if subagent_type not in subagent_names:
allowed_types = ", ".join([f"`{k}`" for k in subagent_graphs]) allowed_types = ", ".join([f"`{k}`" for k in subagent_names])
return ( return (
f"We cannot invoke subagent {subagent_type} because it does not exist, " f"We cannot invoke subagent {subagent_type} because it does not exist, "
f"the only allowed types are {allowed_types}" f"the only allowed types are {allowed_types}"
@ -921,8 +896,8 @@ def build_task_tool_with_parent_config(
) )
expected = hitlrequest_action_count(pending_value) expected = hitlrequest_action_count(pending_value)
resume_value = fan_out_decisions_to_match(resume_value, expected) resume_value = fan_out_decisions_to_match(resume_value, expected)
# Prevent the parent's resume payload from leaking into subagent # Stop the parent's resume leaking into subagent interrupts via
# interrupts via langgraph's parent_scratchpad fallback. # langgraph's parent_scratchpad fallback.
drain_parent_null_resume(runtime) drain_parent_null_resume(runtime)
with ot.subagent_invoke_span( with ot.subagent_invoke_span(
subagent_type=subagent_type, path=invoke_path subagent_type=subagent_type, path=invoke_path

View file

@ -0,0 +1,15 @@
"""Context-editing middleware: spill + clear-tool-uses passes (impl + builder)."""
from .builder import build_context_editing_mw
from .middleware import (
ClearToolUsesEdit,
SpillingContextEditingMiddleware,
SpillToBackendEdit,
)
__all__ = [
"ClearToolUsesEdit",
"SpillToBackendEdit",
"SpillingContextEditingMiddleware",
"build_context_editing_mw",
]

View file

@ -7,18 +7,18 @@ from typing import Any
from langchain_core.tools import BaseTool from langchain_core.tools import BaseTool
from app.agents.multi_agent_chat.main_agent.context_prune.prune_tool_names import ( from app.agents.chat.multi_agent_chat.main_agent.context_prune.prune_tool_names import (
safe_exclude_tools, safe_exclude_tools,
) )
from app.agents.new_chat.feature_flags import AgentFeatureFlags from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
from app.agents.new_chat.middleware import ( from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
from .middleware import (
ClearToolUsesEdit, ClearToolUsesEdit,
SpillingContextEditingMiddleware, SpillingContextEditingMiddleware,
SpillToBackendEdit, SpillToBackendEdit,
) )
from ..shared.flags import enabled
def build_context_editing_mw( def build_context_editing_mw(
*, *,

View file

@ -1,4 +1,4 @@
"""Middleware that deduplicates HITL tool calls within a single LLM response. """Drop duplicate HITL tool calls before execution.
When the LLM emits multiple calls to the same HITL tool with the same When the LLM emits multiple calls to the same HITL tool with the same
primary argument (e.g. two ``delete_calendar_event("Doctor Appointment")``), primary argument (e.g. two ``delete_calendar_event("Doctor Appointment")``),
@ -9,72 +9,33 @@ the duplicate call is stripped from the AIMessage that gets checkpointed.
That means it is also safe across LangGraph ``interrupt()`` boundaries: That means it is also safe across LangGraph ``interrupt()`` boundaries:
the removed call will never appear on graph resume. the removed call will never appear on graph resume.
Dedup-key resolution order: Dedup-key resolution order (read from each tool's own ``metadata``):
1. :class:`ToolDefinition.dedup_key` callable provided by the registry 1. ``tool.metadata["dedup_key"]`` callable mapping the args dict to a
entry. This is the canonical mechanism. stable signature string. This is the canonical mechanism.
2. ``tool.metadata["hitl_dedup_key"]`` string with a primary arg name; 2. ``tool.metadata["hitl_dedup_key"]`` string naming a primary arg;
used by MCP / Composio tools whose schemas the registry doesn't see. used by MCP / Composio tools that only expose a single key field.
A tool with no resolver from either path simply opts out of dedup. A tool with no resolver from either path simply opts out of dedup.
""" """
from __future__ import annotations from __future__ import annotations
import json
import logging import logging
from collections.abc import Callable from collections.abc import Sequence
from typing import Any from typing import Any
from langchain.agents.middleware import AgentMiddleware, AgentState from langchain.agents.middleware import AgentMiddleware, AgentState
from langchain_core.tools import BaseTool
from langgraph.runtime import Runtime from langgraph.runtime import Runtime
from app.agents.chat.multi_agent_chat.shared.middleware.dedup_tool_calls import (
DedupResolver,
wrap_dedup_key_by_arg_name,
)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Resolver type — given the tool ``args`` dict returns a stable
# string used to dedupe consecutive calls. ``None`` means no dedup.
DedupResolver = Callable[[dict[str, Any]], str]
def wrap_dedup_key_by_arg_name(arg_name: str) -> DedupResolver:
"""Adapt a string-arg name into a :data:`DedupResolver`.
Convenience helper used by registry entries that just want to dedupe
on a single arg's lowercased value (the most common case for native
HITL tools like ``send_gmail_email`` keyed on ``subject``).
Example::
ToolDefinition(
name="send_gmail_email",
...,
dedup_key=wrap_dedup_key_by_arg_name("subject"),
)
"""
def _resolver(args: dict[str, Any]) -> str:
return str(args.get(arg_name, "")).lower()
return _resolver
def dedup_key_full_args(args: dict[str, Any]) -> str:
"""Resolver that collapses calls only when **every** argument is identical.
Safe default for tools where no single field uniquely identifies a call
(e.g. MCP tools whose first required field is a shared workspace id).
"""
try:
return json.dumps(args, sort_keys=True, default=str)
except (TypeError, ValueError):
return repr(sorted(args.items())) if isinstance(args, dict) else repr(args)
# Backwards-compatible alias for code that imported the original
# private name. New callers should use :func:`wrap_dedup_key_by_arg_name`.
_wrap_string_key = wrap_dedup_key_by_arg_name
class DedupHITLToolCallsMiddleware(AgentMiddleware): # type: ignore[type-arg] class DedupHITLToolCallsMiddleware(AgentMiddleware): # type: ignore[type-arg]
"""Remove duplicate HITL tool calls from a single LLM response. """Remove duplicate HITL tool calls from a single LLM response.
@ -84,9 +45,8 @@ class DedupHITLToolCallsMiddleware(AgentMiddleware): # type: ignore[type-arg]
The dedup-resolver map is built from two sources, in priority order: The dedup-resolver map is built from two sources, in priority order:
1. ``tool.metadata["dedup_key"]`` callable provided by the registry's 1. ``tool.metadata["dedup_key"]`` callable that receives the args dict
``ToolDefinition.dedup_key``. Receives the args dict and returns and returns a string signature. This is the canonical mechanism.
a string signature. This is the canonical mechanism.
2. ``tool.metadata["hitl_dedup_key"]`` string with a primary arg 2. ``tool.metadata["hitl_dedup_key"]`` string with a primary arg
name; primarily used by MCP / Composio tools. name; primarily used by MCP / Composio tools.
""" """
@ -162,3 +122,7 @@ class DedupHITLToolCallsMiddleware(AgentMiddleware): # type: ignore[type-arg]
updated_msg = last_msg.model_copy(update={"tool_calls": deduped}) updated_msg = last_msg.model_copy(update={"tool_calls": deduped})
return {"messages": [updated_msg]} return {"messages": [updated_msg]}
def build_dedup_hitl_mw(tools: Sequence[BaseTool]) -> DedupHITLToolCallsMiddleware:
return DedupHITLToolCallsMiddleware(agent_tools=list(tools))

View file

@ -0,0 +1,9 @@
"""Doom-loop middleware: detect repeated identical tool calls (impl + builder)."""
from .builder import build_doom_loop_mw
from .middleware import DoomLoopMiddleware
__all__ = [
"DoomLoopMiddleware",
"build_doom_loop_mw",
]

View file

@ -2,10 +2,10 @@
from __future__ import annotations from __future__ import annotations
from app.agents.new_chat.feature_flags import AgentFeatureFlags from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
from app.agents.new_chat.middleware import DoomLoopMiddleware from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
from ..shared.flags import enabled from .middleware import DoomLoopMiddleware
def build_doom_loop_mw(flags: AgentFeatureFlags) -> DoomLoopMiddleware | None: def build_doom_loop_mw(flags: AgentFeatureFlags) -> DoomLoopMiddleware | None:

View file

@ -16,7 +16,7 @@ This ships **OFF by default** until the frontend explicitly handles
``context.permission == "doom_loop"`` interrupts. ``context.permission == "doom_loop"`` interrupts.
Wire format: uses SurfSense's existing ``interrupt()`` payload shape Wire format: uses SurfSense's existing ``interrupt()`` payload shape
(see ``app/agents/new_chat/tools/hitl.py``): (see ``app/agents/shared/tools/hitl.py``):
{ {
"type": "permission_ask", "type": "permission_ask",

View file

@ -0,0 +1,13 @@
"""End-of-turn KB persistence middleware (main-agent only)."""
from .builder import build_kb_persistence_mw
from .middleware import (
KnowledgeBasePersistenceMiddleware,
commit_staged_filesystem_state,
)
__all__ = [
"KnowledgeBasePersistenceMiddleware",
"build_kb_persistence_mw",
"commit_staged_filesystem_state",
]

View file

@ -2,8 +2,11 @@
from __future__ import annotations from __future__ import annotations
from app.agents.new_chat.filesystem_selection import FilesystemMode from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
from app.agents.new_chat.middleware import KnowledgeBasePersistenceMiddleware
from .middleware import (
KnowledgeBasePersistenceMiddleware,
)
def build_kb_persistence_mw( def build_kb_persistence_mw(

View file

@ -1,33 +1,19 @@
"""End-of-turn persistence for the cloud-mode SurfSense filesystem. """End-of-turn persistence for the cloud-mode SurfSense filesystem.
This middleware runs ``aafter_agent`` once per turn (cloud only). It commits Runs ``aafter_agent`` once per turn (cloud only), committing staged folder
all staged folder creations, file moves, content writes/edits, file deletes creates, moves, writes/edits, and ``rm``/``rmdir`` to Postgres in one ordered
(``rm``), and directory deletes (``rmdir``) to Postgres in a single ordered pass. Order matters: moves resolve before writes (so write-then-move lands at
pass: the final path), and file deletes run before directory deletes (so a same-turn
``rm /a/x.md`` + ``rmdir /a`` works).
1. Materialize ``staged_dirs`` into ``Folder`` rows. When ``flags.enable_action_log`` is on, each destructive op also snapshots a
2. Apply ``pending_moves`` in order (chained moves resolved via ``DocumentRevision`` / ``FolderRevision`` for revert. For ``rm``/``rmdir`` the
``doc_id_by_path``). snapshot and DELETE share a SAVEPOINT, so a failed snapshot aborts the delete
3. Normalize ``dirty_paths`` through ``pending_moves`` so write-then-move rather than making the data silently irreversible.
sequences commit at the final path. Paths queued for ``rm`` this turn
are dropped here so a write+rm sequence doesn't recreate the doc.
4. Commit content writes / edits for ``/documents/*`` paths, skipping
``temp_*`` basenames.
5. Apply ``pending_deletes`` (``rm``) file deletes run BEFORE directory
deletes so a same-turn ``rm /a/x.md`` + ``rmdir /a`` sequence works.
6. Apply ``pending_dir_deletes`` (``rmdir``); re-verifies emptiness against
the post-step-5 DB state.
When ``flags.enable_action_log`` is on every destructive op also writes a The commit body is a free function (``commit_staged_filesystem_state``) so the
``DocumentRevision`` / ``FolderRevision`` snapshot bound to the stream-task fallback can run the identical routine when ``aafter_agent`` was
originating ``AgentActionLog`` row via ``tool_call_id``. ``rm``/``rmdir`` skipped (e.g. client disconnect).
share a single ``SAVEPOINT`` with their snapshot if the snapshot fails
the DELETE rolls back and we surface the error rather than silently
making the data irreversible.
The commit body is exposed as a free function ``commit_staged_filesystem_state``
so the optional stream-task fallback (``stream_new_chat.py``) can call the
exact same routine when ``aafter_agent`` was skipped (e.g. client disconnect).
""" """
from __future__ import annotations from __future__ import annotations
@ -40,22 +26,28 @@ from typing import Any
from fractional_indexing import generate_key_between from fractional_indexing import generate_key_between
from langchain.agents.middleware import AgentMiddleware, AgentState from langchain.agents.middleware import AgentMiddleware, AgentState
from langchain_core.callbacks import adispatch_custom_event, dispatch_custom_event from langchain_core.callbacks import adispatch_custom_event, dispatch_custom_event
from langgraph.config import get_config
from langgraph.runtime import Runtime from langgraph.runtime import Runtime
from sqlalchemy import delete, select, update from sqlalchemy import delete, select, update
from sqlalchemy.exc import IntegrityError from sqlalchemy.exc import IntegrityError
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.new_chat.feature_flags import get_flags from app.agents.chat.multi_agent_chat.shared.feature_flags import get_flags
from app.agents.new_chat.filesystem_selection import FilesystemMode from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
from app.agents.new_chat.filesystem_state import SurfSenseFilesystemState from app.agents.chat.multi_agent_chat.shared.receipts.receipt import (
from app.agents.new_chat.path_resolver import ( Receipt,
make_receipt,
)
from app.agents.chat.multi_agent_chat.shared.state.filesystem_state import (
SurfSenseFilesystemState,
)
from app.agents.chat.multi_agent_chat.shared.state.reducers import _CLEAR
from app.agents.chat.runtime.path_resolver import (
DOCUMENTS_ROOT, DOCUMENTS_ROOT,
parse_documents_path, parse_documents_path,
safe_folder_segment, safe_folder_segment,
virtual_path_to_doc, virtual_path_to_doc,
) )
from app.agents.new_chat.state_reducers import _CLEAR
from app.agents.shared.receipt import Receipt, make_receipt
from app.db import ( from app.db import (
AgentActionLog, AgentActionLog,
Chunk, Chunk,
@ -211,11 +203,9 @@ async def _create_document(
virtual_path, virtual_path,
search_space_id, search_space_id,
) )
# Filesystem-parity invariant: the only thing that *must* be unique is # Pre-check the path-derived unique_identifier_hash so a duplicate path
# the path. Two notes can legitimately share content (e.g. ``cp a b``). # surfaces as a clean ValueError instead of an INSERT IntegrityError that
# Guard against the path-derived ``unique_identifier_hash`` constraint # poisons the session. Content is intentionally not unique (cp a b).
# so we surface a clean ValueError instead of letting the INSERT poison
# the session with an IntegrityError.
path_collision = await session.execute( path_collision = await session.execute(
select(Document.id).where( select(Document.id).where(
Document.search_space_id == search_space_id, Document.search_space_id == search_space_id,
@ -227,13 +217,6 @@ async def _create_document(
f"a document already exists at path '{virtual_path}' " f"a document already exists at path '{virtual_path}' "
"(unique_identifier_hash collision)" "(unique_identifier_hash collision)"
) )
# ``content_hash`` is intentionally NOT checked for uniqueness here.
# In a real filesystem two files at different paths can hold identical
# bytes, and the agent's ``write_file`` path needs that semantic to
# support copy/duplicate operations. The hash remains useful as a
# change-detection hint for connector indexers, which still consult it
# via :func:`check_duplicate_document` but do so with a non-unique
# lookup (``.first()``).
content_hash = generate_content_hash(content, search_space_id) content_hash = generate_content_hash(content, search_space_id)
doc = Document( doc = Document(
title=title, title=title,
@ -430,15 +413,9 @@ async def _mark_action_reversible(
) -> None: ) -> None:
"""Flip ``agent_action_log.reversible = TRUE`` for ``action_id``. """Flip ``agent_action_log.reversible = TRUE`` for ``action_id``.
Best-effort: caller may invoke from inside a SAVEPOINT and treat Pair with ``_dispatch_reversibility_update`` *after* the enclosing
failure as a soft demotion (snapshot persists, just no Revert button). SAVEPOINT commits, so the UI never sees ``reversible=true`` for a row whose
update later rolls back.
Callers should also call ``_dispatch_reversibility_update`` (defined
below) AFTER the enclosing SAVEPOINT block exits successfully so the
chat tool card can light up its Revert button without
re-fetching ``GET /threads/.../actions``. Dispatching from inside the
SAVEPOINT would risk emitting "reversible=true" for rows whose
update gets rolled back if the surrounding destructive op fails.
""" """
if action_id is None: if action_id is None:
return return
@ -450,22 +427,11 @@ async def _mark_action_reversible(
async def _dispatch_reversibility_update(action_id: int | None) -> None: async def _dispatch_reversibility_update(action_id: int | None) -> None:
"""Best-effort dispatch of an ``action_log_updated`` custom event. """Emit an ``action_log_updated`` SSE event so the Revert button lights up.
Surfaces the post-SAVEPOINT reversibility flip to the SSE layer so Best-effort (failures swallowed; the REST actions endpoint is
the chat tool card can flip its Revert button live. Defensive: authoritative). Inside :func:`commit_staged_filesystem_state` this is
failures are logged at debug level and swallowed; the deferred until after the outer commit via ``deferred_dispatches``.
REST endpoint ``GET /threads/.../actions`` is still authoritative.
.. warning::
Inside :func:`commit_staged_filesystem_state` we DEFER all
dispatches until the outer ``session.commit()`` succeeds see
the ``deferred_dispatches`` queue in that function. Dispatching
from inside a SAVEPOINT block while the outer transaction is
still pending would emit ``reversible=true`` for rows whose
snapshots get rolled back if the outer commit fails. Direct
callers (e.g. the optional stream-task fallback) that own the
full session lifetime can still call this helper inline.
""" """
if action_id is None: if action_id is None:
return return
@ -484,12 +450,9 @@ async def _dispatch_reversibility_update(action_id: int | None) -> None:
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Snapshot helpers # Snapshot helpers
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# # Best-effort variants (write/edit/move/mkdir) swallow failures. Strict
# Best-effort helpers swallow + log so a snapshot failure can never break # variants (rm/rmdir) share the destructive op's SAVEPOINT so a snapshot
# the destructive op for non-destructive tools (write/edit/move/mkdir). # failure aborts the delete instead of making it silently irreversible.
# Strict helpers run inside the SAME ``begin_nested()`` SAVEPOINT as the
# destructive DELETE — failure aborts the savepoint and leaves the doc /
# folder intact, so revertable ops never become irreversible silently.
def _doc_revision_payload( def _doc_revision_payload(
@ -699,15 +662,9 @@ async def commit_staged_filesystem_state(
) -> dict[str, Any] | None: ) -> dict[str, Any] | None:
"""Commit all staged filesystem changes; return the state delta for reducers. """Commit all staged filesystem changes; return the state delta for reducers.
Shared between :class:`KnowledgeBasePersistenceMiddleware.aafter_agent` Shared between :class:`KnowledgeBasePersistenceMiddleware.aafter_agent` and
and the optional stream-task fallback. the stream-task fallback. See the module docstring for ordering and the
action-log snapshot/revert semantics.
When ``flags.enable_action_log`` is on every destructive op also writes
a ``DocumentRevision`` / ``FolderRevision`` snapshot bound to the
originating ``AgentActionLog`` row via ``tool_call_id``. Snapshot
durability is best-effort for non-destructive ops and STRICT for
``rm``/``rmdir`` (snapshot + DELETE share a SAVEPOINT snapshot
failure aborts the delete).
""" """
if filesystem_mode != FilesystemMode.CLOUD: if filesystem_mode != FilesystemMode.CLOUD:
return None return None
@ -766,8 +723,7 @@ async def commit_staged_filesystem_state(
flags = get_flags() flags = get_flags()
snapshot_enabled = flags.enable_action_log snapshot_enabled = flags.enable_action_log
# De-duplicate pending deletes per-path while preserving the latest # De-dup deletes per-path, keeping the latest tool_call_id (likeliest revert).
# tool_call_id (the one the user is most likely to revert via the UI).
file_delete_paths: dict[str, str] = {} file_delete_paths: dict[str, str] = {}
for entry in pending_deletes: for entry in pending_deletes:
if not isinstance(entry, dict): if not isinstance(entry, dict):
@ -791,22 +747,14 @@ async def commit_staged_filesystem_state(
applied_moves: list[dict[str, Any]] = [] applied_moves: list[dict[str, Any]] = []
doc_id_path_tombstones: dict[str, int | None] = {} doc_id_path_tombstones: dict[str, int | None] = {}
tree_changed = False tree_changed = False
# Reversibility-flip dispatches are deferred until AFTER the outer # Reversibility-flip dispatches are drained only after the outer commit
# ``session.commit()`` succeeds. Dispatching from inside the # succeeds (and abandoned on rollback), so the UI never sees reversible=true
# SAVEPOINT chain while the outer transaction is still pending # for a snapshot that didn't durably land.
# would emit ``reversible=true`` for rows whose snapshots get rolled
# back if the final commit raises. Snapshot helpers append on
# success; we drain this list after commit and silently abandon it
# on rollback so the UI stays consistent with durable state.
deferred_dispatches: list[int] = [] deferred_dispatches: list[int] = []
try: try:
async with shielded_async_session() as session: async with shielded_async_session() as session:
# ------------------------------------------------------------------ # Resolve all action-id bindings in one SELECT per turn, not per op.
# Resolve action-id bindings up front. One SELECT per turn for all
# tool_call_ids, NOT one per op — important because a turn that
# touches 50 paths would otherwise issue 50 lookups.
# ------------------------------------------------------------------
action_id_by_call: dict[str, int] = {} action_id_by_call: dict[str, int] = {}
if snapshot_enabled and thread_id is not None: if snapshot_enabled and thread_id is not None:
tool_call_ids: set[str] = set() tool_call_ids: set[str] = set()
@ -839,10 +787,7 @@ async def commit_staged_filesystem_state(
next(iter(action_id_by_call), None) if action_id_by_call else None next(iter(action_id_by_call), None) if action_id_by_call else None
) )
# ------------------------------------------------------------------ # 1. staged_dirs -> Folder rows (snapshot post-flush for the FK).
# 1. staged_dirs -> Folder rows. Snapshot post-flush so the new
# folder_id is available for the FK.
# ------------------------------------------------------------------
for folder_path in staged_dirs: for folder_path in staged_dirs:
if not isinstance(folder_path, str): if not isinstance(folder_path, str):
continue continue
@ -863,7 +808,6 @@ async def commit_staged_filesystem_state(
tcid = staged_dir_tool_calls.get(folder_path) tcid = staged_dir_tool_calls.get(folder_path)
action_id = _action_id_for(tcid) action_id = _action_id_for(tcid)
if action_id is not None: if action_id is not None:
# Re-read the folder for the snapshot.
result = await session.execute( result = await session.execute(
select(Folder).where(Folder.id == folder_id) select(Folder).where(Folder.id == folder_id)
) )
@ -878,16 +822,13 @@ async def commit_staged_filesystem_state(
deferred_dispatches=deferred_dispatches, deferred_dispatches=deferred_dispatches,
) )
# ------------------------------------------------------------------ # 2. pending_moves (snapshot pre-move for in-place restore on revert).
# 2. pending_moves. Snapshot pre-move (in-place restore on revert).
# ------------------------------------------------------------------
for move in pending_moves: for move in pending_moves:
source = str(move.get("source") or "") source = str(move.get("source") or "")
if snapshot_enabled and source: if snapshot_enabled and source:
tcid = str(move.get("tool_call_id") or "") tcid = str(move.get("tool_call_id") or "")
action_id = _action_id_for(tcid) action_id = _action_id_for(tcid)
if action_id is not None: if action_id is not None:
# Resolve the doc to snapshot BEFORE we mutate it.
doc_id_pre = doc_id_by_path.get(source) doc_id_pre = doc_id_by_path.get(source)
document_pre: Document | None = None document_pre: Document | None = None
if doc_id_pre is not None: if doc_id_pre is not None:
@ -937,10 +878,8 @@ async def commit_staged_filesystem_state(
path = move_alias[path] path = move_alias[path]
return path return path
# ------------------------------------------------------------------ # 3. dirty_paths -> writes/edits. Paths queued for rm this turn are
# 3. dirty_paths -> writes/edits. Skip any path queued for ``rm`` # skipped so a write+rm sequence doesn't recreate the doc.
# this turn so a write+rm sequence doesn't recreate the doc.
# ------------------------------------------------------------------
kb_dirty_seen: set[str] = set() kb_dirty_seen: set[str] = set()
kb_dirty: list[str] = [] kb_dirty: list[str] = []
kb_dirty_origin: dict[str, str] = {} kb_dirty_origin: dict[str, str] = {}
@ -969,9 +908,7 @@ async def commit_staged_filesystem_state(
continue continue
content = "\n".join(file_data.get("content") or []) content = "\n".join(file_data.get("content") or [])
doc_id = doc_id_by_path.get(path) doc_id = doc_id_by_path.get(path)
# Path ↔ tool_call_id binding: the dirty_paths list dedupes via # Look up tool_call_id by final path or its pre-rename origin.
# _add_unique_reducer, so we look up the latest tool_call_id by
# path (or by the un-renamed origin).
origin = kb_dirty_origin.get(path, path) origin = kb_dirty_origin.get(path, path)
tcid = dirty_path_tool_calls.get(path) or dirty_path_tool_calls.get( tcid = dirty_path_tool_calls.get(path) or dirty_path_tool_calls.get(
origin origin
@ -979,12 +916,9 @@ async def commit_staged_filesystem_state(
action_id = _action_id_for(tcid) action_id = _action_id_for(tcid)
if doc_id is None: if doc_id is None:
# The in-memory ``doc_id_by_path`` is per-thread and starts # doc_id_by_path is per-thread and empty in a new chat, so a
# empty in every new chat. If the agent writes to a path # write to a path already in the DB must update in place, not
# that already exists in the DB (e.g. a previous chat's # INSERT (which would hit the path-derived unique hash).
# ``notes.md``), we must NOT try to INSERT — it would hit
# ``unique_identifier_hash`` (path-derived). Look up the
# existing doc and update it in place instead.
existing = await virtual_path_to_doc( existing = await virtual_path_to_doc(
session, session,
search_space_id=search_space_id, search_space_id=search_space_id,
@ -1033,12 +967,9 @@ async def commit_staged_filesystem_state(
} }
) )
else: else:
# Fresh create. Wrap each create in a SAVEPOINT so a # Fresh create, wrapped in a SAVEPOINT so a residual
# residual ``IntegrityError`` (e.g. a deployment that # IntegrityError (e.g. pre-migration-133 content_hash UNIQUE)
# hasn't run migration 133 yet, where # rolls back only this create, not the whole turn.
# ``documents.content_hash`` still carries its legacy
# global UNIQUE constraint) rolls back only this one
# create instead of poisoning the whole turn.
placeholder_revision_id: int | None = None placeholder_revision_id: int | None = None
if snapshot_enabled and action_id is not None: if snapshot_enabled and action_id is not None:
placeholder_revision_id = await _snapshot_document_pre_create( placeholder_revision_id = await _snapshot_document_pre_create(
@ -1061,8 +992,7 @@ async def commit_staged_filesystem_state(
logger.warning( logger.warning(
"kb_persistence: skipping %s create: %s", path, exc "kb_persistence: skipping %s create: %s", path, exc
) )
# Roll back the placeholder revision since the create # Create never happened; drop its placeholder revision.
# never happened.
if placeholder_revision_id is not None: if placeholder_revision_id is not None:
await session.execute( await session.execute(
delete(DocumentRevision).where( delete(DocumentRevision).where(
@ -1109,19 +1039,14 @@ async def commit_staged_filesystem_state(
) )
tree_changed = True tree_changed = True
# ------------------------------------------------------------------ # 4. pending_deletes -> rm. Strict: snapshot + DELETE share a
# 4. pending_deletes -> ``rm``. STRICT durability: snapshot + DELETE # SAVEPOINT, so a failed snapshot rolls the delete back too.
# share a SAVEPOINT. If the snapshot insert fails, the DELETE
# rolls back too and we surface the error rather than silently
# making the data irreversible.
# ------------------------------------------------------------------
for raw_path, tcid in file_delete_paths.items(): for raw_path, tcid in file_delete_paths.items():
final = _final_path(raw_path) final = _final_path(raw_path)
if not final.startswith(DOCUMENTS_ROOT + "/"): if not final.startswith(DOCUMENTS_ROOT + "/"):
continue continue
action_id = _action_id_for(tcid) action_id = _action_id_for(tcid)
# Resolve the doc.
doc_id_for_delete = doc_id_by_path.get(final) doc_id_for_delete = doc_id_by_path.get(final)
document_to_delete: Document | None = None document_to_delete: Document | None = None
if doc_id_for_delete is not None: if doc_id_for_delete is not None:
@ -1150,7 +1075,6 @@ async def commit_staged_filesystem_state(
try: try:
async with session.begin_nested(): async with session.begin_nested():
# Strict: snapshot first; failure aborts the delete.
if snapshot_enabled and action_id is not None: if snapshot_enabled and action_id is not None:
chunks = await _load_chunks_for_snapshot( chunks = await _load_chunks_for_snapshot(
session, doc_id=doc_pk session, doc_id=doc_pk
@ -1179,10 +1103,7 @@ async def commit_staged_filesystem_state(
) )
continue continue
# B1 — SAVEPOINT released. Defer the reversibility-flip # Defer the reversibility flip until after the outer commit.
# dispatch until AFTER the outer commit succeeds so we
# never tell the UI a row is reversible if its snapshot
# gets rolled back.
if snapshot_enabled and action_id is not None: if snapshot_enabled and action_id is not None:
deferred_dispatches.append(int(action_id)) deferred_dispatches.append(int(action_id))
@ -1201,11 +1122,8 @@ async def commit_staged_filesystem_state(
) )
tree_changed = True tree_changed = True
# ------------------------------------------------------------------ # 5. pending_dir_deletes -> rmdir. Strict, and re-checks emptiness
# 5. pending_dir_deletes -> ``rmdir``. STRICT durability + final # against post-step-4 DB state.
# emptiness check (after step 4's deletes have run, an "empty
# mid-turn" directory really IS empty in DB now).
# ------------------------------------------------------------------
for raw_path, tcid in dir_delete_paths.items(): for raw_path, tcid in dir_delete_paths.items():
final = _final_path(raw_path) final = _final_path(raw_path)
if not final.startswith(DOCUMENTS_ROOT + "/"): if not final.startswith(DOCUMENTS_ROOT + "/"):
@ -1226,7 +1144,6 @@ async def commit_staged_filesystem_state(
) )
continue continue
# Re-check emptiness against in-DB state.
docs_in_folder = await session.execute( docs_in_folder = await session.execute(
select(Document.id) select(Document.id)
.where(Document.folder_id == folder_id) .where(Document.folder_id == folder_id)
@ -1291,10 +1208,7 @@ async def commit_staged_filesystem_state(
) )
continue continue
# B1 — SAVEPOINT released. Defer the reversibility-flip # Defer the reversibility flip until after the outer commit.
# dispatch until AFTER the outer commit succeeds so we
# never tell the UI a row is reversible if its snapshot
# gets rolled back.
if snapshot_enabled and action_id is not None: if snapshot_enabled and action_id is not None:
deferred_dispatches.append(int(action_id)) deferred_dispatches.append(int(action_id))
@ -1314,18 +1228,13 @@ async def commit_staged_filesystem_state(
logger.exception( logger.exception(
"kb_persistence: commit failed (search_space=%s)", search_space_id "kb_persistence: commit failed (search_space=%s)", search_space_id
) )
# Outer commit raised — every SAVEPOINT-released change above # Outer commit raised: everything above rolled back, so drop the
# (snapshots + reversibility flips) is now rolled back. Drop # deferred dispatches.
# the deferred SSE dispatches so the UI stays consistent with
# durable state.
deferred_dispatches.clear() deferred_dispatches.clear()
return None return None
# Outer commit succeeded; flush deferred reversibility-flip # Commit succeeded; flush deferred reversibility flips (de-duped, since
# dispatches now so the chat tool card can light up its Revert # write-then-rm in one turn appends an id per snapshot site).
# button without re-fetching ``GET /threads/.../actions``. De-dup
# to avoid emitting the same id twice (e.g. write-then-rm in the
# same turn dispatches once for each snapshot site).
if deferred_dispatches and dispatch_events: if deferred_dispatches and dispatch_events:
for action_id in dict.fromkeys(deferred_dispatches): for action_id in dict.fromkeys(deferred_dispatches):
try: try:
@ -1371,9 +1280,8 @@ async def commit_staged_filesystem_state(
p for p in files if isinstance(p, str) and _basename(p).startswith(_TEMP_PREFIX) p for p in files if isinstance(p, str) and _basename(p).startswith(_TEMP_PREFIX)
] ]
# Tombstone every committed-delete path so a stale ``state["files"]`` entry # Tombstone committed-delete paths so a stale state["files"] entry can't
# (which als_info would otherwise interpret as content) cannot survive into # survive into the next turn and make a now-empty folder look non-empty.
# the next turn and make a now-empty folder look non-empty.
deleted_file_paths = [ deleted_file_paths = [
str(payload.get("virtualPath") or "") str(payload.get("virtualPath") or "")
for payload in committed_deletes for payload in committed_deletes
@ -1394,11 +1302,8 @@ async def commit_staged_filesystem_state(
"dirty_path_tool_calls": {_CLEAR: True}, "dirty_path_tool_calls": {_CLEAR: True},
} }
# Emit one Receipt per committed mutation, folded into ``state['receipts']`` # One Receipt per committed mutation: ground truth (post-savepoint) for the
# via ``_list_append_reducer``. The receipts surface what actually committed # orchestrator's <verification> teaching. KB writes have no public URL.
# (post-savepoint) rather than what the LLM intended; the orchestrator uses
# them as ground truth in the ``<verification>`` teaching. KB writes do not
# have public verifiable URLs, so ``verifiable_url`` stays unset.
receipts: list[Receipt] = [] receipts: list[Receipt] = []
def _kb_receipt( def _kb_receipt(
@ -1439,8 +1344,6 @@ async def commit_staged_filesystem_state(
external_id=payload.get("id"), external_id=payload.get("id"),
) )
for payload in applied_moves: for payload in applied_moves:
# ``applied_moves`` rows carry the destination ``virtualPath`` because
# the move has already landed in the DB by the time we reach this code.
path = str(payload.get("virtualPath") or "") path = str(payload.get("virtualPath") or "")
_kb_receipt( _kb_receipt(
type="file", type="file",
@ -1480,9 +1383,7 @@ async def commit_staged_filesystem_state(
if tree_changed: if tree_changed:
delta["tree_version"] = int(state_dict.get("tree_version") or 0) + 1 delta["tree_version"] = int(state_dict.get("tree_version") or 0) + 1
# Avoid 'unused' lint when turn_id_for_revision was only useful for _ = turn_id_for_revision # diagnostic-only; silence unused lint
# diagnostic purposes inside the SAVEPOINT chain above.
_ = turn_id_for_revision
logger.info( logger.info(
"kb_persistence: commit (search_space=%s) creates=%d updates=%d " "kb_persistence: commit (search_space=%s) creates=%d updates=%d "
@ -1536,9 +1437,33 @@ class KnowledgeBasePersistenceMiddleware(AgentMiddleware): # type: ignore[type-
search_space_id=self.search_space_id, search_space_id=self.search_space_id,
created_by_id=self.created_by_id, created_by_id=self.created_by_id,
filesystem_mode=self.filesystem_mode, filesystem_mode=self.filesystem_mode,
thread_id=self.thread_id, thread_id=self._resolve_thread_id(),
) )
def _resolve_thread_id(self) -> int | None:
"""Resolve the live thread id from the active ``RunnableConfig``.
``aafter_agent`` only receives a ``Runtime`` (which does NOT carry the
config), so we read ``configurable.thread_id`` via
:func:`langgraph.config.get_config` the same node-context pattern used
by ``BusyMutexMiddleware``. Resolving at runtime (rather than using the
value captured at ``__init__``) lets one cached compiled graph commit
staged writes against the correct thread across many chats. Falls back
to the constructor value for legacy/test runtimes.
"""
try:
config = get_config()
except Exception:
config = None
if isinstance(config, dict):
value = (config.get("configurable") or {}).get("thread_id")
if value is not None:
try:
return int(value)
except (TypeError, ValueError):
return None
return self.thread_id
__all__ = [ __all__ = [
"KnowledgeBasePersistenceMiddleware", "KnowledgeBasePersistenceMiddleware",

View file

@ -4,8 +4,10 @@ from __future__ import annotations
from langchain_core.language_models import BaseChatModel from langchain_core.language_models import BaseChatModel
from app.agents.new_chat.filesystem_selection import FilesystemMode from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
from app.agents.new_chat.middleware import KnowledgePriorityMiddleware from app.agents.chat.multi_agent_chat.shared.middleware.knowledge_search import (
KnowledgePriorityMiddleware,
)
from app.services.llm_service import get_planner_llm from app.services.llm_service import get_planner_llm
@ -17,7 +19,16 @@ def build_knowledge_priority_mw(
available_connectors: list[str] | None, available_connectors: list[str] | None,
available_document_types: list[str] | None, available_document_types: list[str] | None,
mentioned_document_ids: list[int] | None, mentioned_document_ids: list[int] | None,
preinjection_enabled: bool = True,
) -> KnowledgePriorityMiddleware: ) -> KnowledgePriorityMiddleware:
"""Build the KB priority middleware.
When ``preinjection_enabled`` is False (the lazy default), the middleware
runs in mentions-only mode: it skips the expensive planner LLM + embedding
+ hybrid search and only surfaces explicit @-mentions. The main agent is
expected to pull relevant KB content on demand via the
``search_knowledge_base`` tool instead.
"""
return KnowledgePriorityMiddleware( return KnowledgePriorityMiddleware(
llm=llm, llm=llm,
planner_llm=get_planner_llm(), planner_llm=get_planner_llm(),
@ -27,4 +38,5 @@ def build_knowledge_priority_mw(
available_document_types=available_document_types, available_document_types=available_document_types,
mentioned_document_ids=mentioned_document_ids, mentioned_document_ids=mentioned_document_ids,
inject_system_message=False, inject_system_message=False,
mentions_only=not preinjection_enabled,
) )

View file

@ -0,0 +1,9 @@
"""Knowledge-tree middleware: <workspace_tree> injection, cloud only (impl + builder)."""
from .builder import build_knowledge_tree_mw
from .middleware import KnowledgeTreeMiddleware
__all__ = [
"KnowledgeTreeMiddleware",
"build_knowledge_tree_mw",
]

View file

@ -4,8 +4,9 @@ from __future__ import annotations
from langchain_core.language_models import BaseChatModel from langchain_core.language_models import BaseChatModel
from app.agents.new_chat.filesystem_selection import FilesystemMode from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
from app.agents.new_chat.middleware import KnowledgeTreeMiddleware
from .middleware import KnowledgeTreeMiddleware
def build_knowledge_tree_mw( def build_knowledge_tree_mw(

View file

@ -33,9 +33,11 @@ from langchain_core.messages import SystemMessage
from langgraph.runtime import Runtime from langgraph.runtime import Runtime
from sqlalchemy import select from sqlalchemy import select
from app.agents.new_chat.filesystem_selection import FilesystemMode from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
from app.agents.new_chat.filesystem_state import SurfSenseFilesystemState from app.agents.chat.multi_agent_chat.shared.state.filesystem_state import (
from app.agents.new_chat.path_resolver import ( SurfSenseFilesystemState,
)
from app.agents.chat.runtime.path_resolver import (
DOCUMENTS_ROOT, DOCUMENTS_ROOT,
PathIndex, PathIndex,
build_path_index, build_path_index,

View file

@ -0,0 +1,5 @@
"""User/team memory injection middleware (main-agent only)."""
from .builder import build_memory_mw
__all__ = ["build_memory_mw"]

View file

@ -2,9 +2,10 @@
from __future__ import annotations from __future__ import annotations
from app.agents.new_chat.middleware import MemoryInjectionMiddleware
from app.db import ChatVisibility from app.db import ChatVisibility
from .middleware import MemoryInjectionMiddleware
def build_memory_mw( def build_memory_mw(
*, *,

View file

@ -0,0 +1,9 @@
"""Noop-injection middleware: provider-compat _noop tool (impl + builder)."""
from .builder import build_noop_injection_mw
from .middleware import NoopInjectionMiddleware
__all__ = [
"NoopInjectionMiddleware",
"build_noop_injection_mw",
]

View file

@ -2,10 +2,10 @@
from __future__ import annotations from __future__ import annotations
from app.agents.new_chat.feature_flags import AgentFeatureFlags from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
from app.agents.new_chat.middleware import NoopInjectionMiddleware from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
from ..shared.flags import enabled from .middleware import NoopInjectionMiddleware
def build_noop_injection_mw(flags: AgentFeatureFlags) -> NoopInjectionMiddleware | None: def build_noop_injection_mw(flags: AgentFeatureFlags) -> NoopInjectionMiddleware | None:

View file

@ -0,0 +1,9 @@
"""OTel-span middleware: spans on model and tool calls (impl + builder)."""
from .builder import build_otel_mw
from .middleware import OtelSpanMiddleware
__all__ = [
"OtelSpanMiddleware",
"build_otel_mw",
]

View file

@ -2,10 +2,10 @@
from __future__ import annotations from __future__ import annotations
from app.agents.new_chat.feature_flags import AgentFeatureFlags from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
from app.agents.new_chat.middleware import OtelSpanMiddleware from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
from ..shared.flags import enabled from .middleware import OtelSpanMiddleware
def build_otel_mw(flags: AgentFeatureFlags) -> OtelSpanMiddleware | None: def build_otel_mw(flags: AgentFeatureFlags) -> OtelSpanMiddleware | None:

View file

@ -24,6 +24,7 @@ from langchain.agents.middleware import AgentMiddleware
from langchain_core.messages import AIMessage, ToolMessage from langchain_core.messages import AIMessage, ToolMessage
from app.observability import metrics as ot_metrics, otel as ot from app.observability import metrics as ot_metrics, otel as ot
from app.utils.perf import get_perf_logger
if TYPE_CHECKING: # pragma: no cover — type-only if TYPE_CHECKING: # pragma: no cover — type-only
from langchain.agents.middleware.types import ( from langchain.agents.middleware.types import (
@ -34,6 +35,7 @@ if TYPE_CHECKING: # pragma: no cover — type-only
from langgraph.types import Command from langgraph.types import Command
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
_perf_log = get_perf_logger()
class OtelSpanMiddleware(AgentMiddleware): class OtelSpanMiddleware(AgentMiddleware):
@ -60,7 +62,23 @@ class OtelSpanMiddleware(AgentMiddleware):
handler: Callable[[ModelRequest], Awaitable[ModelResponse | AIMessage | Any]], handler: Callable[[ModelRequest], Awaitable[ModelResponse | AIMessage | Any]],
) -> ModelResponse | AIMessage | Any: ) -> ModelResponse | AIMessage | Any:
if not ot.is_enabled(): if not ot.is_enabled():
return await handler(request) # Always emit a [PERF] line for the model step even when OTel is
# disabled. This isolates provider/model latency from the agent's
# pre-flight (before_agent KB-priority/memory/tree) work, which is
# the usual culprit when the multi-agent path feels slow to start.
# ``perf_counter`` at entry doubles as the "before_agent finished /
# model call started" marker on the first step of a turn.
model_id, _provider = _resolve_model_attrs(request)
_t0 = time.perf_counter()
_perf_log.info("[model_call] start model=%s", model_id)
try:
return await handler(request)
finally:
_perf_log.info(
"[model_call] done model=%s elapsed=%.3fs",
model_id,
time.perf_counter() - _t0,
)
model_id, provider = _resolve_model_attrs(request) model_id, provider = _resolve_model_attrs(request)
t0 = time.perf_counter() t0 = time.perf_counter()

View file

@ -7,15 +7,15 @@ from typing import Any
from langchain_core.language_models import BaseChatModel from langchain_core.language_models import BaseChatModel
from app.agents.new_chat.feature_flags import AgentFeatureFlags from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
from app.agents.new_chat.plugin_loader import ( from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
from app.db import ChatVisibility
from ..plugins.loader import (
PluginContext, PluginContext,
load_allowed_plugin_names_from_env, load_allowed_plugin_names_from_env,
load_plugin_middlewares, load_plugin_middlewares,
) )
from app.db import ChatVisibility
from ..shared.flags import enabled
def build_plugin_middlewares( def build_plugin_middlewares(

View file

@ -6,14 +6,11 @@ import logging
from deepagents.middleware.skills import SkillsMiddleware from deepagents.middleware.skills import SkillsMiddleware
from app.agents.new_chat.feature_flags import AgentFeatureFlags from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
from app.agents.new_chat.filesystem_selection import FilesystemMode from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
from app.agents.new_chat.middleware import ( from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
build_skills_backend_factory,
default_skills_sources,
)
from ..shared.flags import enabled from ..skills.backends import build_skills_backend_factory, default_skills_sources
def build_skills_mw( def build_skills_mw(

View file

@ -0,0 +1,314 @@
"""Main-agent middleware list assembly: one line per slot.
The main agent is a pure router filesystem reads/writes are owned by the
``knowledge_base`` subagent and delegated via the ``task`` tool. The stack
here only renders KB context (workspace tree + priority docs), projects it
into system messages, and commits any subagent-side staged writes at end of
turn (cloud mode).
"""
from __future__ import annotations
import logging
import time
from collections.abc import Sequence
from typing import Any, cast
from deepagents import SubAgent
from deepagents.backends import StateBackend
from langchain.agents import create_agent
from langchain_core.language_models import BaseChatModel
from langchain_core.runnables import Runnable
from langchain_core.tools import BaseTool
from langgraph.types import Checkpointer
from app.agents.chat.multi_agent_chat.main_agent.middleware.memory import (
build_memory_mw,
)
from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
from app.agents.chat.multi_agent_chat.shared.middleware.anthropic_cache import (
build_anthropic_cache_mw,
)
from app.agents.chat.multi_agent_chat.shared.middleware.compaction import (
build_compaction_mw,
)
from app.agents.chat.multi_agent_chat.shared.middleware.kb_context_projection import (
build_kb_context_projection_mw,
)
from app.agents.chat.multi_agent_chat.shared.middleware.patch_tool_calls import (
build_patch_tool_calls_mw,
)
from app.agents.chat.multi_agent_chat.shared.middleware.resilience import (
build_resilience_middlewares,
)
from app.agents.chat.multi_agent_chat.shared.middleware.todos import build_todos_mw
from app.agents.chat.multi_agent_chat.shared.permissions import (
build_permission_mw,
)
from app.agents.chat.multi_agent_chat.subagents import (
build_subagents,
get_subagents_to_exclude,
)
from app.agents.chat.multi_agent_chat.subagents.builtins.knowledge_base.agent import (
NAME as KB_WRITE_NAME,
READONLY_NAME as KB_READONLY_NAME,
build_readonly_subagent as build_kb_readonly_subagent,
build_subagent as build_kb_write_subagent,
)
from app.agents.chat.multi_agent_chat.subagents.builtins.knowledge_base.ask_knowledge_base_tool import (
build_ask_knowledge_base_tool,
)
from app.agents.chat.multi_agent_chat.subagents.builtins.knowledge_base.prompts import (
load_description as load_kb_write_description,
)
from app.agents.chat.multi_agent_chat.subagents.middleware_stack import (
build_subagent_middleware_stack,
)
from app.agents.chat.multi_agent_chat.subagents.shared.spec import (
SURF_LAZY_SPEC_FACTORY_KEY,
)
from app.db import ChatVisibility
from app.utils.perf import get_perf_logger
from .action_log import build_action_log_mw
from .anonymous_document import build_anonymous_doc_mw
from .busy_mutex import build_busy_mutex_mw
from .checkpointed_subagent_middleware import (
SurfSenseCheckpointedSubAgentMiddleware,
)
from .checkpointed_subagent_middleware.task_description import (
TASK_TOOL_DESCRIPTION,
)
from .context_editing import build_context_editing_mw
from .dedup_hitl import build_dedup_hitl_mw
from .doom_loop import build_doom_loop_mw
from .kb_persistence import build_kb_persistence_mw
from .knowledge_priority import build_knowledge_priority_mw
from .knowledge_tree import build_knowledge_tree_mw
from .noop_injection import build_noop_injection_mw
from .otel_span import build_otel_mw
from .plugins import build_plugin_middlewares
from .skills import build_skills_mw
from .tool_call_repair import build_repair_mw
_perf_log = get_perf_logger()
def build_main_agent_deepagent_middleware(
*,
llm: BaseChatModel,
tools: Sequence[BaseTool],
backend_resolver: Any,
filesystem_mode: FilesystemMode,
search_space_id: int,
user_id: str | None,
thread_id: int | None,
visibility: ChatVisibility,
anon_session_id: str | None,
available_connectors: list[str] | None,
available_document_types: list[str] | None,
mentioned_document_ids: list[int] | None,
max_input_tokens: int | None,
flags: AgentFeatureFlags,
subagent_dependencies: dict[str, Any],
checkpointer: Checkpointer,
mcp_tools_by_agent: dict[str, list[BaseTool]] | None = None,
disabled_tools: list[str] | None = None,
) -> list[Any]:
"""Ordered middleware for ``create_agent`` (None entries already stripped)."""
stack_build_start = time.perf_counter()
resilience = build_resilience_middlewares(flags)
memory_mw = build_memory_mw(
user_id=user_id,
search_space_id=search_space_id,
visibility=visibility,
)
subagent_dependencies = {
**subagent_dependencies,
"backend_resolver": backend_resolver,
"filesystem_mode": filesystem_mode,
"flags": flags,
}
shared_mw_start = time.perf_counter()
shared_subagent_middleware = build_subagent_middleware_stack(
resilience=resilience,
flags=flags,
)
shared_mw_elapsed = time.perf_counter() - shared_mw_start
def _compile_kb_readonly() -> Runnable:
"""Build *and* compile the read-only KB graph on first ``ask_knowledge_base`` use.
Both the spec build (``build_kb_readonly_subagent`` middleware +
tool-schema construction, ~the same cost as one regular subagent) and
the ``create_agent`` compile are deferred here (memoized by
``build_ask_knowledge_base_tool``) so neither is paid on the cold
agent-build / TTFT path; most first turns never call a subagent.
"""
build_start = time.perf_counter()
kb_readonly_spec = build_kb_readonly_subagent(
dependencies=subagent_dependencies,
model=llm,
middleware_stack=shared_subagent_middleware,
).spec
runnable = create_agent(
llm,
system_prompt=kb_readonly_spec["system_prompt"],
tools=kb_readonly_spec["tools"],
middleware=kb_readonly_spec["middleware"],
name=KB_READONLY_NAME,
checkpointer=checkpointer,
)
_perf_log.info(
"[subagent_compile_lazy] name=%s (spec+compile) in %.3fs",
KB_READONLY_NAME,
time.perf_counter() - build_start,
)
return runnable
ask_kb_tool = build_ask_knowledge_base_tool(_compile_kb_readonly)
def _build_kb_write_spec() -> dict[str, Any]:
"""Build the *write* knowledge_base subagent spec on first ``task`` use.
The KB filesystem middleware builds ~13 tool schemas at ~150ms each
(~2s total), all of which used to land on the cold agent-build / TTFT
path even though ``task("knowledge_base")`` is essentially never the
first thing a turn does. Deferring the whole spec build here (memoized
by the checkpointed subagent middleware) moves that cost to the first
actual KB-write delegation. Captures the same ``subagent_dependencies``
the eager build would have used, so cross-thread cache behaviour is
unchanged.
"""
spec = build_kb_write_subagent(
dependencies=subagent_dependencies,
model=llm,
middleware_stack=shared_subagent_middleware,
).spec
if disabled_tools:
disabled = frozenset(disabled_tools)
tools = spec.get("tools") # type: ignore[typeddict-item]
if isinstance(tools, list):
spec["tools"] = [ # type: ignore[typeddict-unknown-key]
t for t in tools if getattr(t, "name", None) not in disabled
]
return cast(dict[str, Any], spec)
subagents_start = time.perf_counter()
# The write knowledge_base subagent is excluded from the eager build and
# registered as a lazy descriptor (name + description cheap; spec built on
# first ``task("knowledge_base")`` use) — see ``_build_kb_write_spec``.
exclude_names = [*get_subagents_to_exclude(available_connectors), KB_WRITE_NAME]
subagents: list[SubAgent] = build_subagents(
dependencies=subagent_dependencies,
model=llm,
middleware_stack=shared_subagent_middleware,
mcp_tools_by_agent=mcp_tools_by_agent or {},
exclude=exclude_names,
disabled_tools=disabled_tools,
ask_kb_tool=ask_kb_tool,
)
kb_write_descriptor = cast(
SubAgent,
{
"name": KB_WRITE_NAME,
"description": load_kb_write_description(),
SURF_LAZY_SPEC_FACTORY_KEY: _build_kb_write_spec,
},
)
subagents.append(kb_write_descriptor)
subagents_elapsed = time.perf_counter() - subagents_start
logging.debug("Subagents registry: %s", [s["name"] for s in subagents])
assembly_start = time.perf_counter()
stack: list[Any] = [
build_busy_mutex_mw(flags),
build_otel_mw(flags),
build_todos_mw(system_prompt=""),
memory_mw,
build_anonymous_doc_mw(
filesystem_mode=filesystem_mode, anon_session_id=anon_session_id
),
build_knowledge_tree_mw(
filesystem_mode=filesystem_mode,
search_space_id=search_space_id,
llm=llm,
),
build_knowledge_priority_mw(
llm=llm,
search_space_id=search_space_id,
filesystem_mode=filesystem_mode,
available_connectors=available_connectors,
available_document_types=available_document_types,
mentioned_document_ids=mentioned_document_ids,
preinjection_enabled=flags.enable_kb_priority_preinjection,
),
build_kb_context_projection_mw(),
build_kb_persistence_mw(
filesystem_mode=filesystem_mode,
search_space_id=search_space_id,
user_id=user_id,
thread_id=thread_id,
),
build_skills_mw(
flags=flags,
filesystem_mode=filesystem_mode,
search_space_id=search_space_id,
),
SurfSenseCheckpointedSubAgentMiddleware(
checkpointer=checkpointer,
backend=StateBackend,
subagents=subagents,
system_prompt=None,
task_description=TASK_TOOL_DESCRIPTION,
search_space_id=search_space_id,
),
resilience.model_call_limit,
resilience.tool_call_limit,
build_context_editing_mw(
flags=flags,
max_input_tokens=max_input_tokens,
tools=tools,
backend_resolver=backend_resolver,
),
build_compaction_mw(llm),
build_noop_injection_mw(flags),
resilience.retry,
resilience.fallback,
build_repair_mw(flags=flags, tools=tools),
build_permission_mw(flags=flags),
build_doom_loop_mw(flags),
build_action_log_mw(
flags=flags,
thread_id=thread_id,
search_space_id=search_space_id,
user_id=user_id,
),
build_patch_tool_calls_mw(),
build_dedup_hitl_mw(tools),
*build_plugin_middlewares(
flags=flags,
search_space_id=search_space_id,
user_id=user_id,
visibility=visibility,
llm=llm,
),
build_anthropic_cache_mw(),
]
result = [m for m in stack if m is not None]
assembly_elapsed = time.perf_counter() - assembly_start
_perf_log.info(
"[stack_build] total=%.3fs shared_subagent_mw=%.3fs "
"build_subagents=%.3fs stack_assembly=%.3fs subagents=%d mw=%d "
"(kb_readonly deferred to first ask_knowledge_base)",
time.perf_counter() - stack_build_start,
shared_mw_elapsed,
subagents_elapsed,
assembly_elapsed,
len(subagents),
len(result),
)
return result

View file

@ -0,0 +1,9 @@
"""Tool-call-repair middleware: fix miscased/unknown tool names (impl + builder)."""
from .builder import build_repair_mw
from .middleware import ToolCallNameRepairMiddleware
__all__ = [
"ToolCallNameRepairMiddleware",
"build_repair_mw",
]

View file

@ -6,10 +6,10 @@ from collections.abc import Sequence
from langchain_core.tools import BaseTool from langchain_core.tools import BaseTool
from app.agents.new_chat.feature_flags import AgentFeatureFlags from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
from app.agents.new_chat.middleware import ToolCallNameRepairMiddleware from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
from ..shared.flags import enabled from .middleware import ToolCallNameRepairMiddleware
# deepagents-built-in tool names the repair pass treats as known. # deepagents-built-in tool names the repair pass treats as known.
_DEEPAGENT_BUILTIN_TOOL_NAMES: frozenset[str] = frozenset( _DEEPAGENT_BUILTIN_TOOL_NAMES: frozenset[str] = frozenset(

View file

@ -34,8 +34,6 @@ from langchain.agents.middleware.types import (
from langchain_core.messages import AIMessage from langchain_core.messages import AIMessage
from langgraph.runtime import Runtime from langgraph.runtime import Runtime
from app.agents.new_chat.tools.invalid_tool import INVALID_TOOL_NAME
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -120,6 +118,12 @@ class ToolCallNameRepairMiddleware(
return call return call
# Stage 2 — invalid fallback # Stage 2 — invalid fallback
# Local import keeps the middleware module import-light and avoids any
# tools <-> middleware import-order coupling at module scope.
from app.agents.chat.multi_agent_chat.main_agent.tools.invalid_tool import (
INVALID_TOOL_NAME,
)
if INVALID_TOOL_NAME in registered: if INVALID_TOOL_NAME in registered:
original_args = call.get("args") or {} original_args = call.get("args") or {}
error_msg = ( error_msg = (

View file

@ -17,7 +17,7 @@ Wire-up in ``pyproject.toml`` (illustrative; the in-repo plugin doesn't
need this -- it's already on the import path):: need this -- it's already on the import path)::
[project.entry-points."surfsense.plugins"] [project.entry-points."surfsense.plugins"]
year_substituter = "app.agents.new_chat.plugins.year_substituter:make_middleware" year_substituter = "app.agents.chat.multi_agent_chat.main_agent.plugins.year_substituter:make_middleware"
""" """
from __future__ import annotations from __future__ import annotations
@ -34,7 +34,7 @@ if TYPE_CHECKING: # pragma: no cover - type-only
from langchain_core.messages import ToolMessage from langchain_core.messages import ToolMessage
from langgraph.types import Command from langgraph.types import Command
from app.agents.new_chat.plugin_loader import PluginContext from .loader import PluginContext
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View file

@ -10,18 +10,18 @@ from langchain_core.language_models import BaseChatModel
from langchain_core.tools import BaseTool from langchain_core.tools import BaseTool
from langgraph.types import Checkpointer from langgraph.types import Checkpointer
from app.agents.new_chat.agent_cache import ( from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
from app.db import ChatVisibility
from ..graph.compile_graph_sync import build_compiled_agent_graph_sync
from .agent_cache_store import (
flags_signature, flags_signature,
get_cache, get_cache,
stable_hash, stable_hash,
system_prompt_hash, system_prompt_hash,
tools_signature, tools_signature,
) )
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.filesystem_selection import FilesystemMode
from app.db import ChatVisibility
from ..graph.compile_graph_sync import build_compiled_agent_graph_sync
def mcp_signature(mcp_tools_by_agent: dict[str, list[BaseTool]]) -> str: def mcp_signature(mcp_tools_by_agent: dict[str, list[BaseTool]]) -> str:
@ -91,10 +91,18 @@ async def build_agent_with_cache(
# Every per-request value any middleware closes over at __init__ must be in # Every per-request value any middleware closes over at __init__ must be in
# the key, otherwise a hit will leak state across threads. Bump the schema # the key, otherwise a hit will leak state across threads. Bump the schema
# version when the component list changes shape. # version when the component list changes shape.
#
# Cross-thread reuse: when enabled, ``thread_id`` is dropped from the key so
# one compiled graph serves all of a user's (same space/config/visibility)
# chats. This is only safe because ActionLog, KB-persistence, and the
# deliverables tools now resolve the chat thread from the live
# RunnableConfig instead of a constructor closure; the schema tag is bumped
# so v2 (per-thread) entries are never confused with v3 (shared) ones.
cross_thread = flags.enable_cross_thread_agent_cache
cache_key = stable_hash( cache_key = stable_hash(
"multi-agent-v2", "multi-agent-v3" if cross_thread else "multi-agent-v2",
config_id, config_id,
thread_id, None if cross_thread else thread_id,
user_id, user_id,
search_space_id, search_space_id,
visibility, visibility,

View file

@ -67,13 +67,13 @@ from __future__ import annotations
import asyncio import asyncio
import hashlib import hashlib
import logging import logging
import os
import time import time
from collections import OrderedDict from collections import OrderedDict
from collections.abc import Awaitable, Callable from collections.abc import Awaitable, Callable
from dataclasses import dataclass from dataclasses import dataclass
from typing import Any from typing import Any
from app.config import config
from app.utils.perf import get_perf_logger from app.utils.perf import get_perf_logger
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -113,12 +113,11 @@ def tools_signature(
MCP tools loaded for the user changes, gating rules flip, etc.). MCP tools loaded for the user changes, gating rules flip, etc.).
* The available connectors / document types for the search space * The available connectors / document types for the search space
change (new connector added, last connector removed, new document change (new connector added, last connector removed, new document
type indexed). Because :func:`get_connector_gated_tools` derives type indexed). Connector gating derives disabled tools from
``modified_disabled_tools`` from ``available_connectors``, the ``available_connectors``, so the tool surface is technically already
tool surface is technically already covered but we hash the covered but we hash the connector list separately so an empty-list
connector list separately so an empty-list "no tools changed" "no tools changed" situation still rotates the key when, say, the user
situation still rotates the key when, say, the user re-adds a re-adds a connector that gates a tool we were already not exposing.
connector that gates a tool we were already not exposing.
Stays stable across: Stays stable across:
@ -329,8 +328,8 @@ def _short(key: str, n: int = 16) -> str:
# Module-level singleton # Module-level singleton
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
_DEFAULT_MAXSIZE = int(os.getenv("SURFSENSE_AGENT_CACHE_MAXSIZE", "256")) _DEFAULT_MAXSIZE = config.AGENT_CACHE_MAXSIZE
_DEFAULT_TTL = float(os.getenv("SURFSENSE_AGENT_CACHE_TTL_SECONDS", "1800")) _DEFAULT_TTL = config.AGENT_CACHE_TTL_SECONDS
_cache: _AgentCache = _AgentCache(maxsize=_DEFAULT_MAXSIZE, ttl_seconds=_DEFAULT_TTL) _cache: _AgentCache = _AgentCache(maxsize=_DEFAULT_MAXSIZE, ttl_seconds=_DEFAULT_TTL)

View file

@ -0,0 +1,100 @@
"""Map configured connectors to the searchable document/connector types.
This is agent-agnostic infrastructure shared by every agent factory (single-
and multi-agent). It translates the connectors a search space has enabled into
the set of searchable type strings that pre-search middleware and ``web_search``
understand, and always layers in the document types that exist independently of
any connector (uploads, notes, extension captures, YouTube).
It lives in its own module rather than inside a specific agent factory so
that retiring or moving any single agent never disturbs the others' access to
this mapping.
"""
from __future__ import annotations
from typing import Any
# Maps SearchSourceConnectorType enum values to the searchable document/connector types
# used by pre-search middleware and web_search.
# Live search connectors (TAVILY_API, LINKUP_API, BAIDU_SEARCH_API) are routed to
# the web_search tool; all others are considered local/indexed data.
_CONNECTOR_TYPE_TO_SEARCHABLE: dict[str, str] = {
# Live search connectors (handled by web_search tool)
"TAVILY_API": "TAVILY_API",
"LINKUP_API": "LINKUP_API",
"BAIDU_SEARCH_API": "BAIDU_SEARCH_API",
# Local/indexed connectors (handled by KB pre-search middleware)
"SLACK_CONNECTOR": "SLACK_CONNECTOR",
"TEAMS_CONNECTOR": "TEAMS_CONNECTOR",
"NOTION_CONNECTOR": "NOTION_CONNECTOR",
"GITHUB_CONNECTOR": "GITHUB_CONNECTOR",
"LINEAR_CONNECTOR": "LINEAR_CONNECTOR",
"DISCORD_CONNECTOR": "DISCORD_CONNECTOR",
"JIRA_CONNECTOR": "JIRA_CONNECTOR",
"CONFLUENCE_CONNECTOR": "CONFLUENCE_CONNECTOR",
"CLICKUP_CONNECTOR": "CLICKUP_CONNECTOR",
"GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
"GOOGLE_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
"GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE", # Connector type differs from document type
"AIRTABLE_CONNECTOR": "AIRTABLE_CONNECTOR",
"LUMA_CONNECTOR": "LUMA_CONNECTOR",
"ELASTICSEARCH_CONNECTOR": "ELASTICSEARCH_CONNECTOR",
"WEBCRAWLER_CONNECTOR": "CRAWLED_URL", # Maps to document type
"BOOKSTACK_CONNECTOR": "BOOKSTACK_CONNECTOR",
"CIRCLEBACK_CONNECTOR": "CIRCLEBACK", # Connector type differs from document type
"OBSIDIAN_CONNECTOR": "OBSIDIAN_CONNECTOR",
"DROPBOX_CONNECTOR": "DROPBOX_FILE", # Connector type differs from document type
"ONEDRIVE_CONNECTOR": "ONEDRIVE_FILE", # Connector type differs from document type
# Composio connectors (unified to native document types).
# Reverse of NATIVE_TO_LEGACY_DOCTYPE in app.db.
"COMPOSIO_GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE",
"COMPOSIO_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
"COMPOSIO_GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
}
# Document types that don't come from SearchSourceConnector but should always be searchable
_ALWAYS_AVAILABLE_DOC_TYPES: list[str] = [
"EXTENSION", # Browser extension data
"FILE", # Uploaded files
"NOTE", # User notes
"YOUTUBE_VIDEO", # YouTube videos
]
def map_connectors_to_searchable_types(
connector_types: list[Any],
) -> list[str]:
"""
Map SearchSourceConnectorType enums to searchable document/connector types.
This function:
1. Converts connector type enums to their searchable counterparts
2. Includes always-available document types (EXTENSION, FILE, NOTE, YOUTUBE_VIDEO)
3. Deduplicates while preserving order
Args:
connector_types: List of SearchSourceConnectorType enum values
Returns:
List of searchable connector/document type strings
"""
result_set: set[str] = set()
result_list: list[str] = []
# Add always-available document types first
for doc_type in _ALWAYS_AVAILABLE_DOC_TYPES:
if doc_type not in result_set:
result_set.add(doc_type)
result_list.append(doc_type)
# Map each connector type to its searchable equivalent
for ct in connector_types:
# Handle both enum and string types
ct_str = ct.value if hasattr(ct, "value") else str(ct)
searchable = _CONNECTOR_TYPE_TO_SEARCHABLE.get(ct_str)
if searchable and searchable not in result_set:
result_set.add(searchable)
result_list.append(searchable)
return result_list

View file

@ -12,21 +12,28 @@ from langchain_core.tools import BaseTool
from langgraph.types import Checkpointer from langgraph.types import Checkpointer
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.multi_agent_chat.subagents import ( from app.agents.chat.multi_agent_chat.shared.feature_flags import (
AgentFeatureFlags,
get_flags,
)
from app.agents.chat.multi_agent_chat.shared.filesystem_selection import (
FilesystemMode,
FilesystemSelection,
)
from app.agents.chat.multi_agent_chat.shared.middleware.filesystem.backends.resolver import (
build_backend_resolver,
)
from app.agents.chat.multi_agent_chat.subagents import (
get_subagents_to_exclude, get_subagents_to_exclude,
main_prompt_registry_subagent_lines, main_prompt_registry_subagent_lines,
) )
from app.agents.multi_agent_chat.subagents.mcp_tools.index import ( from app.agents.chat.multi_agent_chat.subagents.mcp_tools.index import (
load_mcp_tools_by_connector, load_mcp_tools_by_connector,
) )
from app.agents.new_chat.chat_deepagent import _map_connectors_to_searchable_types from app.agents.chat.runtime.llm_config import AgentConfig
from app.agents.new_chat.feature_flags import AgentFeatureFlags, get_flags from app.agents.chat.runtime.prompt_caching import (
from app.agents.new_chat.filesystem_backends import build_backend_resolver apply_litellm_prompt_caching,
from app.agents.new_chat.filesystem_selection import FilesystemMode, FilesystemSelection )
from app.agents.new_chat.llm_config import AgentConfig
from app.agents.new_chat.prompt_caching import apply_litellm_prompt_caching
from app.agents.new_chat.tools.invalid_tool import INVALID_TOOL_NAME, invalid_tool
from app.agents.new_chat.tools.registry import build_tools_async
from app.db import ChatVisibility from app.db import ChatVisibility
from app.services.connector_service import ConnectorService from app.services.connector_service import ConnectorService
from app.services.user_tool_allowlist import ( from app.services.user_tool_allowlist import (
@ -40,7 +47,10 @@ from ..tools import (
MAIN_AGENT_SURFSENSE_TOOL_NAMES, MAIN_AGENT_SURFSENSE_TOOL_NAMES,
MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED, MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED,
) )
from ..tools.invalid_tool import INVALID_TOOL_NAME, invalid_tool
from ..tools.registry import build_main_agent_tools
from .agent_cache import build_agent_with_cache from .agent_cache import build_agent_with_cache
from .connector_searchable_types import map_connectors_to_searchable_types
_perf_log = get_perf_logger() _perf_log = get_perf_logger()
@ -90,7 +100,7 @@ async def create_multi_agent_chat_deep_agent(
connector_types = await connector_service.get_available_connectors( connector_types = await connector_service.get_available_connectors(
search_space_id search_space_id
) )
available_connectors = _map_connectors_to_searchable_types(connector_types) available_connectors = map_connectors_to_searchable_types(connector_types)
available_document_types = await connector_service.get_available_document_types( available_document_types = await connector_service.get_available_document_types(
search_space_id search_space_id
@ -199,9 +209,6 @@ async def create_multi_agent_chat_deep_agent(
modified_disabled_tools = list(disabled_tools) if disabled_tools else [] modified_disabled_tools = list(disabled_tools) if disabled_tools else []
if "search_knowledge_base" not in modified_disabled_tools:
modified_disabled_tools.append("search_knowledge_base")
if enabled_tools is not None: if enabled_tools is not None:
main_agent_enabled_tools = [ main_agent_enabled_tools = [
n for n in enabled_tools if n in MAIN_AGENT_SURFSENSE_TOOL_NAMES n for n in enabled_tools if n in MAIN_AGENT_SURFSENSE_TOOL_NAMES
@ -210,12 +217,14 @@ async def create_multi_agent_chat_deep_agent(
main_agent_enabled_tools = list(MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED) main_agent_enabled_tools = list(MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED)
_t0 = time.perf_counter() _t0 = time.perf_counter()
tools = await build_tools_async( # Main agent builds only its own small SurfSense toolset via the SRP
# main-agent registry; connectors/MCP/deliverables are delegated to
# subagents, so no MCP loading or connector construction happens here.
tools = build_main_agent_tools(
dependencies=dependencies, dependencies=dependencies,
enabled_tools=main_agent_enabled_tools, enabled_tools=main_agent_enabled_tools,
disabled_tools=modified_disabled_tools, disabled_tools=modified_disabled_tools,
additional_tools=list(additional_tools) if additional_tools else None, additional_tools=list(additional_tools) if additional_tools else None,
include_mcp_tools=False,
) )
_flags: AgentFeatureFlags = get_flags() _flags: AgentFeatureFlags = get_flags()

View file

@ -16,7 +16,7 @@ prompt at agent build time, not edited at runtime.
Two backends are provided: Two backends are provided:
* :class:`BuiltinSkillsBackend` disk-backed read of bundled skills from * :class:`BuiltinSkillsBackend` disk-backed read of bundled skills from
``app/agents/new_chat/skills/builtin/``. ``app/agents/shared/skills/builtin/``.
* :class:`SearchSpaceSkillsBackend` a thin read-only wrapper over * :class:`SearchSpaceSkillsBackend` a thin read-only wrapper over
:class:`KBPostgresBackend` that filters notes under the privileged folder :class:`KBPostgresBackend` that filters notes under the privileged folder
``/documents/_skills/``. ``/documents/_skills/``.
@ -47,7 +47,9 @@ from deepagents.backends.state import StateBackend
if TYPE_CHECKING: if TYPE_CHECKING:
from langchain.tools import ToolRuntime from langchain.tools import ToolRuntime
from app.agents.new_chat.middleware.kb_postgres_backend import KBPostgresBackend from app.agents.chat.multi_agent_chat.shared.middleware.filesystem.backends.kb_postgres import (
KBPostgresBackend,
)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -59,9 +61,10 @@ _MAX_SKILL_FILE_SIZE = 10 * 1024 * 1024
def _default_builtin_root() -> Path: def _default_builtin_root() -> Path:
"""Return the absolute path to the bundled builtin skills directory. """Return the absolute path to the bundled builtin skills directory.
Located at ``app/agents/new_chat/skills/builtin/`` relative to this module. Located at ``builtin/`` next to this module (this module lives at
``app/agents/multi_agent_chat/main_agent/skills/backends.py``).
""" """
return (Path(__file__).resolve().parent.parent / "skills" / "builtin").resolve() return (Path(__file__).resolve().parent / "builtin").resolve()
class BuiltinSkillsBackend(BackendProtocol): class BuiltinSkillsBackend(BackendProtocol):
@ -121,6 +124,8 @@ class BuiltinSkillsBackend(BackendProtocol):
else ("/" + str(target.relative_to(self.root)).replace("\\", "/")) else ("/" + str(target.relative_to(self.root)).replace("\\", "/"))
) )
for child in sorted(target.iterdir()): for child in sorted(target.iterdir()):
if child.name == "__pycache__" or child.name.startswith("."):
continue
child_virtual = ( child_virtual = (
target_virtual.rstrip("/") + "/" + child.name target_virtual.rstrip("/") + "/" + child.name
if target_virtual != "/" if target_virtual != "/"
@ -305,7 +310,7 @@ def build_skills_backend_factory(
# Imported lazily to avoid a hard dependency at module import time: # Imported lazily to avoid a hard dependency at module import time:
# ``KBPostgresBackend`` pulls in DB models, which are unnecessary for # ``KBPostgresBackend`` pulls in DB models, which are unnecessary for
# the unit-tested builtin path. # the unit-tested builtin path.
from app.agents.new_chat.middleware.kb_postgres_backend import ( from app.agents.chat.multi_agent_chat.shared.middleware.filesystem.backends.kb_postgres import (
KBPostgresBackend, KBPostgresBackend,
) )

Some files were not shown because too many files have changed in this diff Show more