Merge pull request #1476 from MODSetter/dev

feat(0.0.27): bug fixes and optimizations
This commit is contained in:
Rohan Verma 2026-06-09 23:10:44 -07:00 committed by GitHub
commit 4c29938528
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
1169 changed files with 30332 additions and 38144 deletions

View file

@ -5,6 +5,9 @@ on:
branches:
- main
- dev
tags:
- 'v*'
- 'beta-v*'
paths:
- 'surfsense_backend/**'
- 'surfsense_web/**'
@ -24,11 +27,13 @@ permissions:
packages: write
jobs:
tag_release:
compute_version:
runs-on: ubuntu-latest
if: github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event_name == 'workflow_dispatch'
if: github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event_name == 'workflow_dispatch' || startsWith(github.ref, 'refs/tags/v') || startsWith(github.ref, 'refs/tags/beta-v')
outputs:
new_tag: ${{ steps.tag_version.outputs.next_version }}
commit_sha: ${{ steps.tag_version.outputs.commit_sha }}
is_release_tag: ${{ steps.tag_version.outputs.is_release_tag }}
steps:
- name: Checkout code
uses: actions/checkout@v6
@ -37,57 +42,65 @@ jobs:
ref: ${{ github.event.inputs.branch }}
token: ${{ secrets.GITHUB_TOKEN }}
# Compute-only: tag is pushed by finalize_release after everything succeeds.
- name: Read app version and calculate next Docker build version
id: tag_version
run: |
APP_VERSION=$(tr -d '[:space:]' < VERSION)
echo "App version from VERSION file: $APP_VERSION"
if [[ "$GITHUB_REF" == refs/tags/beta-v* ]]; then
VERSION="${GITHUB_REF#refs/tags/beta-v}"
NEXT_VERSION="beta-${VERSION}"
IS_RELEASE_TAG="true"
if [ -z "$APP_VERSION" ]; then
echo "Error: Could not read version from VERSION file"
exit 1
fi
if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9.]+)?$'; then
echo "::error::Version '$VERSION' is not valid semver (expected X.Y.Z). Fix your tag name."
exit 1
fi
git fetch --tags
echo "Docker beta release version from git tag: $NEXT_VERSION"
elif [[ "$GITHUB_REF" == refs/tags/v* ]]; then
NEXT_VERSION="${GITHUB_REF#refs/tags/v}"
IS_RELEASE_TAG="true"
LATEST_BUILD_TAG=$(git tag --list "${APP_VERSION}.*" --sort='-v:refname' | head -n 1)
if ! echo "$NEXT_VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9.]+)?$'; then
echo "::error::Version '$NEXT_VERSION' is not valid semver (expected X.Y.Z). Fix your tag name."
exit 1
fi
if [ -z "$LATEST_BUILD_TAG" ]; then
echo "No previous Docker build tag found for version ${APP_VERSION}. Starting with ${APP_VERSION}.1"
NEXT_VERSION="${APP_VERSION}.1"
echo "Docker release version from git tag: $NEXT_VERSION"
else
echo "Latest Docker build tag found: $LATEST_BUILD_TAG"
BUILD_NUMBER=$(echo "$LATEST_BUILD_TAG" | rev | cut -d. -f1 | rev)
NEXT_BUILD=$((BUILD_NUMBER + 1))
NEXT_VERSION="${APP_VERSION}.${NEXT_BUILD}"
APP_VERSION=$(tr -d '[:space:]' < VERSION)
echo "App version from VERSION file: $APP_VERSION"
if [ -z "$APP_VERSION" ]; then
echo "Error: Could not read version from VERSION file"
exit 1
fi
git fetch --tags
LATEST_BUILD_TAG=$(git tag --list "${APP_VERSION}.*" --sort='-v:refname' | head -n 1)
if [ -z "$LATEST_BUILD_TAG" ]; then
echo "No previous Docker build tag found for version ${APP_VERSION}. Starting with ${APP_VERSION}.1"
NEXT_VERSION="${APP_VERSION}.1"
else
echo "Latest Docker build tag found: $LATEST_BUILD_TAG"
BUILD_NUMBER=$(echo "$LATEST_BUILD_TAG" | rev | cut -d. -f1 | rev)
NEXT_BUILD=$((BUILD_NUMBER + 1))
NEXT_VERSION="${APP_VERSION}.${NEXT_BUILD}"
fi
IS_RELEASE_TAG="false"
echo "Calculated next Docker version: $NEXT_VERSION"
fi
echo "Calculated next Docker version: $NEXT_VERSION"
echo "next_version=$NEXT_VERSION" >> $GITHUB_OUTPUT
- name: Create and Push Tag
run: |
git config --global user.name 'github-actions[bot]'
git config --global user.email 'github-actions[bot]@users.noreply.github.com'
NEXT_TAG="${{ steps.tag_version.outputs.next_version }}"
COMMIT_SHA=$(git rev-parse HEAD)
echo "Tagging commit $COMMIT_SHA with $NEXT_TAG"
git tag -a "$NEXT_TAG" -m "Docker build $NEXT_TAG"
echo "Pushing tag $NEXT_TAG to origin"
git push origin "$NEXT_TAG"
- name: Verify Tag Push
run: |
echo "Checking if tag ${{ steps.tag_version.outputs.next_version }} exists remotely..."
sleep 5
git ls-remote --tags origin | grep "refs/tags/${{ steps.tag_version.outputs.next_version }}" || (echo "Tag push verification failed!" && exit 1)
echo "Tag successfully pushed."
echo "commit_sha=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT
echo "is_release_tag=$IS_RELEASE_TAG" >> $GITHUB_OUTPUT
build:
needs: tag_release
if: always() && (needs.tag_release.result == 'success' || needs.tag_release.result == 'skipped')
needs: compute_version
if: always() && (needs.compute_version.result == 'success' || needs.compute_version.result == 'skipped')
runs-on: ${{ matrix.os }}
permissions:
packages: write
@ -97,6 +110,12 @@ jobs:
matrix:
platform: [linux/amd64, linux/arm64]
image: [backend, web]
variant: [cpu, cuda, cuda126]
exclude:
- image: web
variant: cuda
- image: web
variant: cuda126
include:
- platform: linux/amd64
suffix: amd64
@ -114,6 +133,18 @@ jobs:
context: ./surfsense_web
file: ./surfsense_web/Dockerfile
target: runner
- variant: cpu
tag_suffix: ""
use_cuda: "false"
cuda_extra: cpu
- variant: cuda
tag_suffix: "-cuda"
use_cuda: "true"
cuda_extra: cu128
- variant: cuda126
tag_suffix: "-cuda126"
use_cuda: "true"
cuda_extra: cu126
env:
REGISTRY_IMAGE: ghcr.io/${{ github.repository_owner }}/${{ matrix.name }}
@ -149,7 +180,7 @@ jobs:
sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
docker system prune -af
- name: Build and push by digest ${{ matrix.name }} (${{ matrix.suffix }})
- name: Build and push by digest ${{ matrix.name }} (${{ matrix.variant }}, ${{ matrix.suffix }})
id: build
uses: docker/build-push-action@v7
with:
@ -160,10 +191,14 @@ jobs:
tags: ${{ steps.image.outputs.name }}
outputs: type=image,push-by-digest=true,name-canonical=true,push=true
platforms: ${{ matrix.platform }}
cache-from: type=gha,scope=${{ matrix.image }}-${{ matrix.suffix }}
cache-to: type=gha,mode=max,scope=${{ matrix.image }}-${{ matrix.suffix }}
cache-from: type=registry,ref=${{ steps.image.outputs.name }}:buildcache-${{ matrix.variant }}-${{ matrix.suffix }}
cache-to: type=registry,ref=${{ steps.image.outputs.name }}:buildcache-${{ matrix.variant }}-${{ matrix.suffix }},mode=max,image-manifest=true,oci-mediatypes=true
secrets: |
HF_TOKEN=${{ secrets.HF_TOKEN }}
provenance: false
build-args: |
${{ matrix.image == 'backend' && format('USE_CUDA={0}', matrix.use_cuda) || '' }}
${{ matrix.image == 'backend' && format('CUDA_EXTRA={0}', matrix.cuda_extra) || '' }}
${{ matrix.image == 'web' && 'NEXT_PUBLIC_FASTAPI_BACKEND_URL=__NEXT_PUBLIC_FASTAPI_BACKEND_URL__' || '' }}
${{ matrix.image == 'web' && 'NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=__NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE__' || '' }}
${{ matrix.image == 'web' && 'NEXT_PUBLIC_ETL_SERVICE=__NEXT_PUBLIC_ETL_SERVICE__' || '' }}
@ -179,15 +214,47 @@ jobs:
- name: Upload digest
uses: actions/upload-artifact@v7
with:
name: digests-${{ matrix.image }}-${{ matrix.suffix }}
name: digests-${{ matrix.image }}-${{ matrix.variant }}-${{ matrix.suffix }}
path: /tmp/digests/*
if-no-files-found: error
retention-days: 1
# Release gate: require both arches for every variant, else block publishing.
# Release-only; skipped on dev so the tolerant create_manifest path is kept.
verify_digests:
runs-on: ubuntu-latest
needs: [compute_version, build]
if: ${{ always() && needs.compute_version.result == 'success' && needs.compute_version.outputs.new_tag != '' }}
steps:
- name: Download all digests
uses: actions/download-artifact@v8
with:
pattern: digests-*
path: /tmp/digests
merge-multiple: false
- name: Require both arches for every required variant
run: |
fail=0
check() {
c=$(find /tmp/digests -type f -path "*/digests-$1-*/*" 2>/dev/null | wc -l | tr -d ' ')
if [ "$c" -lt 2 ]; then
echo "::error::$1 has $c/2 arch digests — blocking release"
fail=1
else
echo "OK: $1 ($c/2)"
fi
}
check backend-cpu
check backend-cuda
check backend-cuda126
check web-cpu
[ "$fail" -eq 0 ] || exit 1
create_manifest:
runs-on: ubuntu-latest
needs: [tag_release, build]
if: always() && needs.build.result == 'success'
needs: [compute_version, build, verify_digests]
if: ${{ !cancelled() && needs.verify_digests.result != 'failure' }}
permissions:
packages: write
contents: read
@ -197,8 +264,20 @@ jobs:
include:
- name: surfsense-backend
image: backend
variant: cpu
tag_suffix: ""
- name: surfsense-backend
image: backend
variant: cuda
tag_suffix: "-cuda"
- name: surfsense-backend
image: backend
variant: cuda126
tag_suffix: "-cuda126"
- name: surfsense-web
image: web
variant: cpu
tag_suffix: ""
env:
REGISTRY_IMAGE: ghcr.io/${{ github.repository_owner }}/${{ matrix.name }}
@ -207,22 +286,33 @@ jobs:
id: image
run: echo "name=${REGISTRY_IMAGE,,}" >> $GITHUB_OUTPUT
- name: Download amd64 digest
- name: Download digests
id: download
uses: actions/download-artifact@v8
with:
name: digests-${{ matrix.image }}-amd64
pattern: digests-${{ matrix.image }}-${{ matrix.variant }}-*
path: /tmp/digests
merge-multiple: true
continue-on-error: true
- name: Download arm64 digest
uses: actions/download-artifact@v8
with:
name: digests-${{ matrix.image }}-arm64
path: /tmp/digests
- name: Check digests
id: check
run: |
count=$(find /tmp/digests -type f 2>/dev/null | wc -l | tr -d ' ')
echo "digest_count=$count" >> $GITHUB_OUTPUT
if [ "$count" -lt 2 ]; then
echo "::warning::${{ matrix.variant }}: $count/2 digests, skipping merge"
echo "skip=true" >> $GITHUB_OUTPUT
else
echo "skip=false" >> $GITHUB_OUTPUT
fi
- name: Set up Docker Buildx
if: steps.check.outputs.skip != 'true'
uses: docker/setup-buildx-action@v4
- name: Login to GitHub Container Registry
if: steps.check.outputs.skip != 'true'
uses: docker/login-action@v4
with:
registry: ghcr.io
@ -230,9 +320,10 @@ jobs:
password: ${{ secrets.GITHUB_TOKEN }}
- name: Compute app version
if: steps.check.outputs.skip != 'true'
id: appver
run: |
VERSION_TAG="${{ needs.tag_release.outputs.new_tag }}"
VERSION_TAG="${{ needs.compute_version.outputs.new_tag }}"
if [ -n "$VERSION_TAG" ]; then
APP_VERSION=$(echo "$VERSION_TAG" | rev | cut -d. -f2- | rev)
else
@ -241,29 +332,69 @@ jobs:
echo "app_version=$APP_VERSION" >> $GITHUB_OUTPUT
- name: Docker meta
if: steps.check.outputs.skip != 'true'
id: meta
uses: docker/metadata-action@v6
with:
images: ${{ steps.image.outputs.name }}
tags: |
type=raw,value=${{ needs.tag_release.outputs.new_tag }},enable=${{ needs.tag_release.outputs.new_tag != '' }}
type=raw,value=${{ steps.appver.outputs.app_version }},enable=${{ needs.tag_release.outputs.new_tag != '' && (github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event.inputs.branch == github.event.repository.default_branch) }}
type=raw,value=${{ needs.compute_version.outputs.new_tag }},enable=${{ needs.compute_version.outputs.new_tag != '' }}
type=raw,value=${{ steps.appver.outputs.app_version }},enable=${{ needs.compute_version.outputs.new_tag != '' && needs.compute_version.outputs.is_release_tag != 'true' && (github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event.inputs.branch == github.event.repository.default_branch) }}
type=ref,event=branch
type=sha,prefix=git-
flavor: |
latest=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event.inputs.branch == github.event.repository.default_branch }}
latest=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event.inputs.branch == github.event.repository.default_branch || startsWith(github.ref, 'refs/tags/v') }}
${{ matrix.tag_suffix != '' && format('suffix={0},onlatest=true', matrix.tag_suffix) || '' }}
- name: Create manifest list and push
if: steps.check.outputs.skip != 'true'
working-directory: /tmp/digests
run: |
docker buildx imagetools create \
$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
$(printf '${{ steps.image.outputs.name }}@sha256:%s ' *)
- name: Inspect image
if: steps.check.outputs.skip != 'true'
run: |
docker buildx imagetools inspect ${{ steps.image.outputs.name }}:${{ steps.meta.outputs.version }}
- name: Summary
if: steps.check.outputs.skip != 'true'
run: |
echo "Multi-arch manifest created for ${{ matrix.name }}!"
echo "Tags: $(jq -cr '.tags | join(", ")' <<< "$DOCKER_METADATA_OUTPUT_JSON")"
# Push the git tag only after build, gate, and manifest publish all succeed.
finalize_release:
runs-on: ubuntu-latest
needs: [compute_version, create_manifest]
if: ${{ success() && needs.compute_version.outputs.new_tag != '' && needs.compute_version.outputs.is_release_tag != 'true' }}
permissions:
contents: write
steps:
- name: Checkout code
uses: actions/checkout@v6
with:
fetch-depth: 0
ref: ${{ github.event.inputs.branch }}
token: ${{ secrets.GITHUB_TOKEN }}
- name: Create and push git tag
run: |
git config --global user.name 'github-actions[bot]'
git config --global user.email 'github-actions[bot]@users.noreply.github.com'
NEXT_TAG="${{ needs.compute_version.outputs.new_tag }}"
COMMIT_SHA="${{ needs.compute_version.outputs.commit_sha }}"
echo "Tagging commit $COMMIT_SHA with $NEXT_TAG"
git tag -a "$NEXT_TAG" "$COMMIT_SHA" -m "Docker build $NEXT_TAG"
echo "Pushing tag $NEXT_TAG to origin"
git push origin "$NEXT_TAG"
- name: Verify tag push
run: |
echo "Checking if tag ${{ needs.compute_version.outputs.new_tag }} exists remotely..."
sleep 5
git ls-remote --tags origin | grep "refs/tags/${{ needs.compute_version.outputs.new_tag }}" || (echo "Tag push verification failed!" && exit 1)
echo "Tag successfully pushed."

View file

@ -1 +1 @@
0.0.26
0.0.27

View file

@ -7,6 +7,16 @@
# SurfSense version (use "latest" or a specific version like "0.0.14")
SURFSENSE_VERSION=latest
# Image variant: empty = CPU (default), "cuda" = CUDA 12.8, "cuda126" = CUDA 12.6.
# GPU acceleration also requires the NVIDIA Container Toolkit on the host and
# the GPU overlay in COMPOSE_FILE. Linux/macOS use ":"; Windows uses ";".
# Example Linux/macOS: COMPOSE_FILE=docker-compose.yml:docker-compose.gpu.yml
# Example Windows: COMPOSE_FILE=docker-compose.yml;docker-compose.gpu.yml
# Use "cuda126" for older NVIDIA driver stacks; use "cuda" for newer drivers.
SURFSENSE_VARIANT=
# COMPOSE_FILE=docker-compose.yml:docker-compose.gpu.yml
# SURFSENSE_GPU_COUNT=1
# Deployment environment: dev or production
SURFSENSE_ENV=production
@ -55,6 +65,9 @@ EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
# -- Redis exposed port (dev only; Redis is internal-only in prod) --
# REDIS_PORT=6379
# -- WhatsApp bridge exposed port (dev/hybrid only; prod keeps it Docker-internal) --
# WHATSAPP_BRIDGE_PORT=9929
# -- Frontend Build Args --
# In dev, the frontend is built from source and these are passed as build args.
# In prod, they are automatically derived from AUTH_TYPE, ETL_SERVICE, and the port settings above.
@ -67,7 +80,7 @@ EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
# ------------------------------------------------------------------------------
# ONLY set these if you are serving SurfSense on a real domain via a reverse
# proxy (e.g. Caddy, Nginx, Cloudflare Tunnel).
# For standard localhost deployments, leave all of these commented out
# For standard localhost deployments, leave all of these commented out.
# they are automatically derived from the port settings above.
#
# NEXT_FRONTEND_URL=https://app.yourdomain.com
@ -89,7 +102,11 @@ EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
# Only change this if you manage publications manually.
# ZERO_APP_PUBLICATIONS=zero_publication
# Sync worker tuning — zero-cache defaults ZERO_NUM_SYNC_WORKERS to the number
# Keep Zero's documented halt safety net enabled. If replication halts, Zero
# can wipe and re-sync its local SQLite replica without touching Postgres.
# ZERO_AUTO_RESET=true
# Sync worker tuning. zero-cache defaults ZERO_NUM_SYNC_WORKERS to the number
# of CPU cores, which can exceed the connection pool limits on high-core machines.
# Each sync worker needs at least 1 connection from both the UPSTREAM and CVR
# pools, so these constraints must hold:
@ -134,7 +151,7 @@ EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
# SSL mode for database connections: disable, require, verify-ca, verify-full
# DB_SSLMODE=disable
# Full DATABASE_URL override — when set, takes precedence over the individual
# Full DATABASE_URL override. When set, this takes precedence over the individual
# DB_USER / DB_PASSWORD / DB_NAME / DB_HOST / DB_PORT settings above.
# Use this for managed databases (AWS RDS, GCP Cloud SQL, Supabase, etc.)
# DATABASE_URL=postgresql+asyncpg://user:password@your-rds-host:5432/surfsense?sslmode=require
@ -149,7 +166,7 @@ EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
# REDIS_URL=redis://redis:6379/0
# ------------------------------------------------------------------------------
# Stripe (pay-as-you-go page packs disabled by default)
# Stripe (pay-as-you-go page packs, disabled by default)
# ------------------------------------------------------------------------------
# Set TRUE to allow users to buy additional page packs via Stripe Checkout
@ -168,7 +185,6 @@ STRIPE_PAGE_BUYING_ENABLED=FALSE
# STRIPE_TOKEN_BUYING_ENABLED=FALSE
# STRIPE_PREMIUM_TOKEN_PRICE_ID=price_...
# STRIPE_CREDIT_MICROS_PER_UNIT=1000000
# DEPRECATED — STRIPE_TOKENS_PER_UNIT=1000000
# ------------------------------------------------------------------------------
# TTS & STT (Text-to-Speech / Speech-to-Text)
@ -263,7 +279,44 @@ STT_SERVICE=local/base
# COMPOSIO_REDIRECT_URI=http://localhost:8000/api/v1/auth/composio/connector/callback
# ------------------------------------------------------------------------------
# SearXNG (bundled web search — works out of the box, no config needed)
# Messaging Channels (optional)
# ------------------------------------------------------------------------------
# Configure only the external chat channels you want to use.
# -- Telegram --
# TELEGRAM_SHARED_BOT_TOKEN=
# TELEGRAM_SHARED_BOT_USERNAME=
# TELEGRAM_WEBHOOK_SECRET=
# GATEWAY_BASE_URL=http://localhost:8929
# GATEWAY_TELEGRAM_INTAKE_MODE=webhook
# -- WhatsApp --
# GATEWAY_WHATSAPP_INTAKE_MODE=disabled
# WHATSAPP_SHARED_BUSINESS_TOKEN=
# WHATSAPP_SHARED_PHONE_NUMBER_ID=
# WHATSAPP_SHARED_DISPLAY_PHONE_NUMBER=
# WHATSAPP_SHARED_WABA_ID=
# WHATSAPP_GRAPH_API_VERSION=v25.0
# WHATSAPP_WEBHOOK_VERIFY_TOKEN=
# WHATSAPP_WEBHOOK_APP_SECRET=
# WHATSAPP_BRIDGE_URL=http://whatsapp-bridge:9929
# -- Slack --
# Uses SLACK_CLIENT_ID and SLACK_CLIENT_SECRET from the Slack connector section.
#
# GATEWAY_SLACK_ENABLED=FALSE
# GATEWAY_SLACK_SIGNING_SECRET=
# GATEWAY_SLACK_REDIRECT_URI=http://localhost:8929/api/v1/gateway/slack/callback
# -- Discord --
# Uses DISCORD_CLIENT_ID, DISCORD_CLIENT_SECRET, and DISCORD_BOT_TOKEN from the
# Discord connector section.
#
# GATEWAY_DISCORD_ENABLED=FALSE
# GATEWAY_DISCORD_REDIRECT_URI=http://localhost:8929/api/v1/gateway/discord/callback
# ------------------------------------------------------------------------------
# SearXNG (bundled web search, works out of the box with no config needed)
# ------------------------------------------------------------------------------
# SearXNG provides web search to all search spaces automatically.
# To access the SearXNG UI directly: http://localhost:8888
@ -273,7 +326,7 @@ STT_SERVICE=local/base
# SEARXNG_SECRET=surfsense-searxng-secret
# ------------------------------------------------------------------------------
# Daytona Sandbox (optional cloud code execution for the deep agent)
# Daytona Sandbox (optional cloud code execution for the deep agent)
# ------------------------------------------------------------------------------
# Set DAYTONA_SANDBOX_ENABLED=TRUE and provide credentials to give the agent
# an isolated code execution environment via the Daytona cloud API.
@ -286,9 +339,6 @@ STT_SERVICE=local/base
# External API Keys (optional)
# ------------------------------------------------------------------------------
# Firecrawl (web scraping)
# FIRECRAWL_API_KEY=
# Unstructured (if ETL_SERVICE=UNSTRUCTURED)
# UNSTRUCTURED_API_KEY=
@ -364,7 +414,6 @@ SURFSENSE_ENABLE_DOOM_LOOP=true
# Premium turns are debited at the actual per-call provider cost reported
# by LiteLLM. Only applies to models with billing_tier=premium.
# PREMIUM_CREDIT_MICROS_LIMIT=5000000
# DEPRECATED — PREMIUM_TOKEN_LIMIT=5000000
# Safety ceiling on per-call premium reservation, in micro-USD ($1.00 default).
# QUOTA_MAX_RESERVE_MICROS=1000000
@ -376,10 +425,10 @@ SURFSENSE_ENABLE_DOOM_LOOP=true
# QUOTA_DEFAULT_PODCAST_RESERVE_MICROS=200000
# Per-video-presentation reservation for the video Celery task ($1.00 default).
# Override path bypasses QUOTA_MAX_RESERVE_MICROS clamp — raise with care.
# Override path bypasses QUOTA_MAX_RESERVE_MICROS clamp. Raise with care.
# QUOTA_DEFAULT_VIDEO_PRESENTATION_RESERVE_MICROS=1000000
# No-login (anonymous) mode — public users can chat without an account
# No-login (anonymous) mode. Public users can chat without an account
# Set TRUE to enable /free pages and anonymous chat API
NOLOGIN_MODE_ENABLED=FALSE
# ANON_TOKEN_LIMIT=1000000

View file

@ -114,6 +114,7 @@ services:
- ZERO_REPLICA_FILE=/data/zero.db
- ZERO_ADMIN_PASSWORD=${ZERO_ADMIN_PASSWORD:-surfsense-zero-admin}
- ZERO_APP_PUBLICATIONS=${ZERO_APP_PUBLICATIONS:-zero_publication}
- ZERO_AUTO_RESET=${ZERO_AUTO_RESET:-true}
- ZERO_NUM_SYNC_WORKERS=${ZERO_NUM_SYNC_WORKERS:-4}
- ZERO_UPSTREAM_MAX_CONNS=${ZERO_UPSTREAM_MAX_CONNS:-20}
- ZERO_CVR_MAX_CONNS=${ZERO_CVR_MAX_CONNS:-30}
@ -122,11 +123,30 @@ services:
volumes:
- zero_cache_data:/data
restart: unless-stopped
stop_grace_period: 300s
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:4848/keepalive"]
interval: 10s
timeout: 5s
retries: 5
start_period: 600s
# OPTIONAL — Azurite emulates Azure Blob Storage for testing the Azure
# original-file backend. The default filesystem backend needs none of this.
# To exercise it, set in surfsense_backend/.env:
# FILE_STORAGE_BACKEND=azure
# AZURE_STORAGE_CONTAINER=surfsense-documents
# AZURE_STORAGE_CONNECTION_STRING=DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://localhost:${AZURITE_BLOB_PORT:-10000}/devstoreaccount1;
# The backend creates blobs on upload; create the container once first
# (Azure CLI / Storage Explorer), then upload a document.
azurite:
image: mcr.microsoft.com/azure-storage/azurite:3.33.0
command: azurite-blob --blobHost 0.0.0.0 --blobPort 10000
ports:
- "${AZURITE_BLOB_PORT:-10000}:10000"
volumes:
- azurite_data:/data
restart: unless-stopped
volumes:
postgres_data:
@ -137,3 +157,5 @@ volumes:
name: surfsense-deps-redis
zero_cache_data:
name: surfsense-deps-zero-cache
azurite_data:
name: surfsense-deps-azurite

View file

@ -46,8 +46,6 @@ services:
- PYTHONPATH=/app
- SERVICE_ROLE=migrate
- MIGRATION_TIMEOUT=${MIGRATION_TIMEOUT:-900}
volumes:
- zero_init:/zero-init
depends_on:
db:
condition: service_healthy
@ -126,6 +124,7 @@ services:
- AUTH_TYPE=${AUTH_TYPE:-LOCAL}
- NEXT_FRONTEND_URL=${NEXT_FRONTEND_URL:-http://localhost:3000}
- SEARXNG_DEFAULT_HOST=${SEARXNG_DEFAULT_HOST:-http://searxng:8080}
- WHATSAPP_BRIDGE_URL=${WHATSAPP_BRIDGE_URL:-http://whatsapp-bridge:9929}
# Daytona Sandbox uncomment and set credentials to enable cloud code execution
# - DAYTONA_SANDBOX_ENABLED=TRUE
# - DAYTONA_API_KEY=${DAYTONA_API_KEY:-}
@ -148,6 +147,25 @@ services:
retries: 30
start_period: 200s
whatsapp-bridge:
build: ../surfsense_backend/scripts/whatsapp-bridge
profiles:
- whatsapp
ports:
- "127.0.0.1:${WHATSAPP_BRIDGE_PORT:-9929}:9929"
volumes:
- whatsapp_sessions:/data/sessions
environment:
- PORT=9929
- WHATSAPP_MODE=${WHATSAPP_MODE:-self-chat}
- WHATSAPP_SESSION_DIR=/data/sessions
restart: unless-stopped
healthcheck:
test: ["CMD", "wget", "-qO-", "http://localhost:9929/health"]
interval: 30s
timeout: 5s
retries: 5
celery_worker:
build: *backend-build
volumes:
@ -197,21 +215,6 @@ services:
celery_worker:
condition: service_started
# flower:
# build: *backend-build
# ports:
# - "${FLOWER_PORT:-5555}:5555"
# env_file:
# - ../surfsense_backend/.env
# environment:
# - CELERY_BROKER_URL=${REDIS_URL:-redis://redis:6379/0}
# - CELERY_RESULT_BACKEND=${REDIS_URL:-redis://redis:6379/0}
# - PYTHONPATH=/app
# command: celery -A app.celery_app flower --port=5555
# depends_on:
# - redis
# - celery_worker
zero-cache:
image: rocicorp/zero:1.4.0
ports:
@ -230,6 +233,7 @@ services:
- ZERO_REPLICA_FILE=/data/zero.db
- ZERO_ADMIN_PASSWORD=${ZERO_ADMIN_PASSWORD:-surfsense-zero-admin}
- ZERO_APP_PUBLICATIONS=${ZERO_APP_PUBLICATIONS:-zero_publication}
- ZERO_AUTO_RESET=${ZERO_AUTO_RESET:-true}
- ZERO_NUM_SYNC_WORKERS=${ZERO_NUM_SYNC_WORKERS:-4}
- ZERO_UPSTREAM_MAX_CONNS=${ZERO_UPSTREAM_MAX_CONNS:-20}
- ZERO_CVR_MAX_CONNS=${ZERO_CVR_MAX_CONNS:-30}
@ -237,18 +241,14 @@ services:
- ZERO_MUTATE_URL=${ZERO_MUTATE_URL:-http://frontend:3000/api/zero/mutate}
volumes:
- zero_cache_data:/data
- zero_init:/zero-init
# Wrapper: see docker/docker-compose.yml `zero-cache` for rationale.
entrypoint: ["sh", "-c"]
# Pass the script as a single list element so Compose does not tokenize it.
command:
- 'if [ -f /zero-init/needs_reset ]; then echo "[zero-init] publication change detected; wiping replica file(s) under /data" && rm -f /data/zero.db /data/zero.db-shm /data/zero.db-wal && rm -f /zero-init/needs_reset; fi; exec zero-cache'
restart: unless-stopped
stop_grace_period: 300s
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:4848/keepalive"]
interval: 10s
timeout: 5s
retries: 5
start_period: 600s
frontend:
build:
@ -280,5 +280,5 @@ volumes:
name: surfsense-dev-shared-temp
zero_cache_data:
name: surfsense-dev-zero-cache
zero_init:
name: surfsense-dev-zero-init
whatsapp_sessions:
name: surfsense-dev-whatsapp-sessions

View file

@ -0,0 +1,30 @@
services:
backend:
deploy:
resources:
reservations:
devices:
- driver: ${SURFSENSE_GPU_DRIVER:-nvidia}
count: ${SURFSENSE_GPU_COUNT:-1}
capabilities:
- gpu
celery_worker:
deploy:
resources:
reservations:
devices:
- driver: ${SURFSENSE_GPU_DRIVER:-nvidia}
count: ${SURFSENSE_GPU_COUNT:-1}
capabilities:
- gpu
celery_beat:
deploy:
resources:
reservations:
devices:
- driver: ${SURFSENSE_GPU_DRIVER:-nvidia}
count: ${SURFSENSE_GPU_COUNT:-1}
capabilities:
- gpu

View file

@ -29,12 +29,11 @@ services:
# Short-lived schema runner. Executes `alembic upgrade head` and verifies
# that the `zero_publication` Postgres logical-replication publication
# exists, then exits 0. Downstream services (backend, celery_*, zero-cache)
# gate on this with `condition: service_completed_successfully` so a failed
# migration halts the whole stack instead of silently producing a half-built
# system that crash-loops zero-cache on missing publications.
# matches the canonical shape, then exits 0. Downstream services gate on this
# with `condition: service_completed_successfully` so a failed migration halts
# the whole stack instead of booting zero-cache against a drifted publication.
migrations:
image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}
image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}${SURFSENSE_VARIANT:+-${SURFSENSE_VARIANT}}
env_file:
- .env
environment:
@ -42,8 +41,6 @@ services:
PYTHONPATH: /app
SERVICE_ROLE: migrate
MIGRATION_TIMEOUT: ${MIGRATION_TIMEOUT:-900}
volumes:
- zero_init:/zero-init
depends_on:
db:
condition: service_healthy
@ -61,28 +58,28 @@ services:
timeout: 5s
retries: 5
otel-collector:
image: otel/opentelemetry-collector-contrib:0.152.1
profiles:
- observability
command: ["--config=/etc/otelcol/config.yaml"]
volumes:
- ./otel-collector/config.yaml:/etc/otelcol/config.yaml:ro
environment:
GRAFANA_CLOUD_OTLP_ENDPOINT: ${GRAFANA_CLOUD_OTLP_ENDPOINT:-}
GRAFANA_CLOUD_INSTANCE_ID: ${GRAFANA_CLOUD_INSTANCE_ID:-}
GRAFANA_CLOUD_API_KEY: ${GRAFANA_CLOUD_API_KEY:-}
ports:
- "${OTEL_GRPC_PORT:-4317}:4317"
- "${OTEL_HTTP_PORT:-4318}:4318"
- "${OTEL_HEALTH_PORT:-13133}:13133"
mem_limit: 2g
restart: unless-stopped
healthcheck:
test: ["CMD", "/otelcol-contrib", "--version"]
interval: 30s
timeout: 5s
retries: 3
# otel-collector:
# image: otel/opentelemetry-collector-contrib:0.152.1
# profiles:
# - observability
# command: ["--config=/etc/otelcol/config.yaml"]
# volumes:
# - ./otel-collector/config.yaml:/etc/otelcol/config.yaml:ro
# environment:
# GRAFANA_CLOUD_OTLP_ENDPOINT: ${GRAFANA_CLOUD_OTLP_ENDPOINT:-}
# GRAFANA_CLOUD_INSTANCE_ID: ${GRAFANA_CLOUD_INSTANCE_ID:-}
# GRAFANA_CLOUD_API_KEY: ${GRAFANA_CLOUD_API_KEY:-}
# ports:
# - "${OTEL_GRPC_PORT:-4317}:4317"
# - "${OTEL_HTTP_PORT:-4318}:4318"
# - "${OTEL_HEALTH_PORT:-13133}:13133"
# mem_limit: 2g
# restart: unless-stopped
# healthcheck:
# test: ["CMD", "/otelcol-contrib", "--version"]
# interval: 30s
# timeout: 5s
# retries: 3
searxng:
image: searxng/searxng:2026.3.13-3c1f68c59
@ -98,7 +95,7 @@ services:
retries: 5
backend:
image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}
image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}${SURFSENSE_VARIANT:+-${SURFSENSE_VARIANT}}
ports:
- "${BACKEND_PORT:-8929}:8000"
volumes:
@ -118,6 +115,7 @@ services:
UNSTRUCTURED_HAS_PATCHED_LOOP: "1"
NEXT_FRONTEND_URL: ${NEXT_FRONTEND_URL:-http://localhost:${FRONTEND_PORT:-3929}}
SEARXNG_DEFAULT_HOST: ${SEARXNG_DEFAULT_HOST:-http://searxng:8080}
WHATSAPP_BRIDGE_URL: ${WHATSAPP_BRIDGE_URL:-http://whatsapp-bridge:9929}
# Daytona Sandbox uncomment and set credentials to enable cloud code execution
# DAYTONA_SANDBOX_ENABLED: "TRUE"
# DAYTONA_API_KEY: ${DAYTONA_API_KEY:-}
@ -143,8 +141,28 @@ services:
retries: 30
start_period: 200s
# whatsapp-bridge:
# build: ../surfsense_backend/scripts/whatsapp-bridge
# profiles:
# - whatsapp
# expose:
# - "9929"
# volumes:
# - whatsapp_sessions:/data/sessions
# environment:
# PORT: 9929
# WHATSAPP_MODE: ${WHATSAPP_MODE:-self-chat}
# WHATSAPP_SESSION_DIR: /data/sessions
# mem_limit: 512m
# restart: unless-stopped
# healthcheck:
# test: ["CMD", "wget", "-qO-", "http://localhost:9929/health"]
# interval: 30s
# timeout: 5s
# retries: 5
celery_worker:
image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}
image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}${SURFSENSE_VARIANT:+-${SURFSENSE_VARIANT}}
volumes:
- shared_temp:/shared_tmp
env_file:
@ -174,7 +192,7 @@ services:
restart: unless-stopped
celery_beat:
image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}
image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}${SURFSENSE_VARIANT:+-${SURFSENSE_VARIANT}}
env_file:
- .env
environment:
@ -197,22 +215,6 @@ services:
- "com.centurylinklabs.watchtower.enable=true"
restart: unless-stopped
# flower:
# image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}
# ports:
# - "${FLOWER_PORT:-5555}:5555"
# env_file:
# - .env
# environment:
# CELERY_BROKER_URL: ${REDIS_URL:-redis://redis:6379/0}
# CELERY_RESULT_BACKEND: ${REDIS_URL:-redis://redis:6379/0}
# PYTHONPATH: /app
# command: celery -A app.celery_app flower --port=5555
# depends_on:
# - redis
# - celery_worker
# restart: unless-stopped
zero-cache:
image: rocicorp/zero:1.4.0
ports:
@ -226,6 +228,7 @@ services:
ZERO_REPLICA_FILE: /data/zero.db
ZERO_ADMIN_PASSWORD: ${ZERO_ADMIN_PASSWORD:-surfsense-zero-admin}
ZERO_APP_PUBLICATIONS: ${ZERO_APP_PUBLICATIONS:-zero_publication}
ZERO_AUTO_RESET: ${ZERO_AUTO_RESET:-true}
ZERO_NUM_SYNC_WORKERS: ${ZERO_NUM_SYNC_WORKERS:-4}
ZERO_UPSTREAM_MAX_CONNS: ${ZERO_UPSTREAM_MAX_CONNS:-20}
ZERO_CVR_MAX_CONNS: ${ZERO_CVR_MAX_CONNS:-30}
@ -233,16 +236,8 @@ services:
ZERO_MUTATE_URL: ${ZERO_MUTATE_URL:-http://frontend:3000/api/zero/mutate}
volumes:
- zero_cache_data:/data
- zero_init:/zero-init
# Wrapper: if the migrations service flagged a publication change via
# /zero-init/needs_reset, wipe the SQLite replica before starting so
# zero-cache does a clean initial sync. Recovers from the half-built
# replica state (`_zero.tableMetadata` missing) caused by earlier crashes.
entrypoint: ["sh", "-c"]
# Pass the script as a single list element so Compose does not tokenize it.
command:
- 'if [ -f /zero-init/needs_reset ]; then echo "[zero-init] publication change detected; wiping replica file(s) under /data" && rm -f /data/zero.db /data/zero.db-shm /data/zero.db-wal && rm -f /zero-init/needs_reset; fi; exec zero-cache'
restart: unless-stopped
stop_grace_period: 300s
depends_on:
db:
condition: service_healthy
@ -253,6 +248,7 @@ services:
interval: 10s
timeout: 5s
retries: 5
start_period: 600s
frontend:
image: ghcr.io/modsetter/surfsense-web:${SURFSENSE_VERSION:-latest}
@ -264,6 +260,7 @@ services:
NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE: ${AUTH_TYPE:-LOCAL}
NEXT_PUBLIC_ETL_SERVICE: ${ETL_SERVICE:-DOCLING}
NEXT_PUBLIC_DEPLOYMENT_MODE: ${DEPLOYMENT_MODE:-self-hosted}
NEXT_PUBLIC_WHATSAPP_DISPLAY_PHONE_NUMBER: ${WHATSAPP_SHARED_DISPLAY_PHONE_NUMBER:-}
FASTAPI_BACKEND_INTERNAL_URL: ${FASTAPI_BACKEND_INTERNAL_URL:-http://backend:8000}
labels:
- "com.centurylinklabs.watchtower.enable=true"
@ -283,5 +280,5 @@ volumes:
name: surfsense-shared-temp
zero_cache_data:
name: surfsense-zero-cache
zero_init:
name: surfsense-zero-init
whatsapp_sessions:
name: surfsense-whatsapp-sessions

View file

@ -7,6 +7,8 @@
# To pass flags, save and run locally:
# .\install.ps1 -NoWatchtower
# .\install.ps1 -WatchtowerInterval 3600
# .\install.ps1 -Variant cuda
# .\install.ps1 -Variant cuda -GpuCount all
#
# Handles two cases automatically:
# 1. Fresh install — no prior SurfSense data detected
@ -17,7 +19,11 @@
param(
[switch]$NoWatchtower,
[int]$WatchtowerInterval = 86400
[int]$WatchtowerInterval = 86400,
[ValidateSet("cpu", "cuda", "cuda126")]
[string]$Variant,
[string]$GpuCount,
[switch]$Quiet
)
$ErrorActionPreference = 'Stop'
@ -34,6 +40,11 @@ $MigrationMode = $false
$SetupWatchtower = -not $NoWatchtower
$WatchtowerContainer = "watchtower"
if ($GpuCount -and $GpuCount -notmatch '^([0-9]+|all)$') {
Write-Host "[SurfSense] ERROR: Invalid -GpuCount '$GpuCount'. Use a number or 'all'." -ForegroundColor Red
exit 1
}
# ── Output helpers ──────────────────────────────────────────────────────────
function Write-Info { param([string]$Msg) Write-Host "[SurfSense] " -ForegroundColor Cyan -NoNewline; Write-Host $Msg }
@ -42,6 +53,27 @@ function Write-Warn { param([string]$Msg) Write-Host "[SurfSense] " -Foregrou
function Write-Step { param([string]$Msg) Write-Host "`n-- $Msg" -ForegroundColor Cyan }
function Write-Err { param([string]$Msg) Write-Host "[SurfSense] ERROR: $Msg" -ForegroundColor Red; exit 1 }
function Show-Banner {
Write-Host ""
Write-Host @"
"@ -ForegroundColor White
Write-Host " OSS Alternative to NotebookLM for Teams" -ForegroundColor Yellow
Write-Host ("=" * 62) -ForegroundColor Cyan
Write-Info "This installer will create $InstallDir\ and start SurfSense with Docker Compose."
}
Show-Banner
function Invoke-NativeSafe {
param([scriptblock]$Command)
$previousErrorActionPreference = $ErrorActionPreference
@ -53,6 +85,28 @@ function Invoke-NativeSafe {
}
}
function Resolve-WatchtowerPreference {
if ($NoWatchtower -or $Quiet -or -not [Environment]::UserInteractive) {
return
}
Write-Host ""
Write-Host "Automatic updates" -ForegroundColor Cyan
$choice = Read-Host "Enable automatic daily updates with Watchtower? (may download several GB in the background) [Y/n]"
switch ($choice) {
"" { $script:SetupWatchtower = $true }
{ $_ -match '^(?i)y(es)?$' } { $script:SetupWatchtower = $true }
{ $_ -match '^(?i)n(o)?$' } { $script:SetupWatchtower = $false }
default {
Write-Warn "Unrecognized choice '$choice'; enabling Watchtower by default. Use -NoWatchtower to skip it."
$script:SetupWatchtower = $true
}
}
}
Resolve-WatchtowerPreference
# ── Pre-flight checks ──────────────────────────────────────────────────────
Write-Step "Checking prerequisites"
@ -97,143 +151,11 @@ function Wait-ForPostgres {
Write-Ok "PostgreSQL is ready."
}
# ── Stack health helpers ────────────────────────────────────────────────────
function Get-ComposeServices {
Push-Location $InstallDir
try {
$raw = Invoke-NativeSafe { docker compose ps -a --format json 2>$null }
} finally {
Pop-Location
}
if ([string]::IsNullOrWhiteSpace($raw)) { return @() }
# Compose v2.21+ emits a JSON array; older versions emit one object per line.
try {
$parsed = $raw | ConvertFrom-Json
if ($parsed -is [System.Collections.IEnumerable] -and -not ($parsed -is [string])) {
return @($parsed)
}
return @($parsed)
} catch {
$services = @()
foreach ($line in ($raw -split "`r?`n")) {
$line = $line.Trim()
if (-not $line) { continue }
try { $services += ($line | ConvertFrom-Json) } catch { }
}
return $services
}
}
function Wait-StackHealthy {
param([int]$TimeoutSec = 300)
$deadline = (Get-Date).AddSeconds($TimeoutSec)
$lastReport = ""
while ((Get-Date) -lt $deadline) {
$services = Get-ComposeServices
if (-not $services -or $services.Count -eq 0) {
Start-Sleep -Seconds 3
continue
}
$bad = @()
$waiting = @()
$good = @()
foreach ($svc in $services) {
$name = $svc.Service
$state = $svc.State
$health = if ($svc.PSObject.Properties.Name -contains 'Health') { $svc.Health } else { '' }
$exit = if ($svc.PSObject.Properties.Name -contains 'ExitCode') { $svc.ExitCode } else { $null }
if ($name -eq 'migrations') {
if ($state -eq 'exited' -and $exit -eq 0) { $good += $name }
elseif ($state -eq 'exited') { $bad += "${name} (exit=${exit})" }
else { $waiting += "${name} (${state})" }
continue
}
if ($state -eq 'running') {
if ([string]::IsNullOrEmpty($health) -or $health -eq 'healthy') {
$good += $name
} elseif ($health -eq 'starting') {
$waiting += "${name} (starting)"
} elseif ($health -eq 'unhealthy') {
$bad += "${name} (unhealthy)"
} else {
$waiting += "${name} (${health})"
}
} elseif ($state -eq 'restarting') {
$bad += "${name} (restarting)"
} elseif ($state -eq 'exited') {
$bad += "${name} (exited, code=${exit})"
} else {
$waiting += "${name} (${state})"
}
}
if ($bad.Count -gt 0) {
return @{ Ok = $false; Reason = 'failure'; Bad = $bad; Waiting = $waiting; Good = $good }
}
if ($waiting.Count -eq 0) {
return @{ Ok = $true; Reason = 'all_healthy'; Good = $good }
}
$report = "Waiting on: " + ($waiting -join ', ')
if ($report -ne $lastReport) {
Write-Info $report
$lastReport = $report
}
Start-Sleep -Seconds 5
}
return @{ Ok = $false; Reason = 'timeout'; Bad = $bad; Waiting = $waiting; Good = $good }
}
function Test-StaleZeroCacheVolume {
$raw = Invoke-NativeSafe { docker volume ls --format '{{.Name}}' 2>$null }
if ([string]::IsNullOrWhiteSpace($raw)) { return $false }
$names = $raw -split "`r?`n" | ForEach-Object { $_.Trim() } | Where-Object { $_ }
$hasZeroCache = $names -contains 'surfsense-zero-cache'
$hasZeroInit = $names -contains 'surfsense-zero-init'
# Pre-fix installs created surfsense-zero-cache but never surfsense-zero-init.
# Such a volume may hold a half-initialized SQLite replica from an earlier
# crash-loop. Wiping it forces zero-cache to do a fresh initial sync.
return ($hasZeroCache -and -not $hasZeroInit)
}
function Invoke-StaleZeroCacheCleanup {
if (-not (Test-StaleZeroCacheVolume)) { return }
Write-Warn "Detected pre-existing 'surfsense-zero-cache' volume from an install that"
Write-Warn "predates the migrations-service fix. It may contain a half-initialized"
Write-Warn "SQLite replica that would block zero-cache from starting."
Write-Warn "The volume will be removed in 5 seconds; press Ctrl+C to cancel."
Start-Sleep -Seconds 5
Push-Location $InstallDir
Invoke-NativeSafe { docker compose down --remove-orphans 2>$null } | Out-Null
Pop-Location
Invoke-NativeSafe { docker volume rm surfsense-zero-cache 2>$null } | Out-Null
Write-Ok "Removed surfsense-zero-cache volume; zero-cache will re-sync on next start."
}
function Write-Err-NoExit {
param([string]$Message)
Write-Host "[ERROR] $Message" -ForegroundColor Red
}
# ── Stack startup helper ────────────────────────────────────────────────────
function Invoke-StackFailureReport {
param([hashtable]$Result)
Write-Host ""
Write-Err-NoExit "Stack did not reach a healthy state."
if ($Result.Bad.Count -gt 0) { Write-Host (" Failed: " + ($Result.Bad -join ', ')) }
if ($Result.Waiting.Count -gt 0) { Write-Host (" Stuck: " + ($Result.Waiting -join ', ')) }
Write-Host "[ERROR] Stack did not reach a healthy state." -ForegroundColor Red
Write-Host ""
Write-Info "Recent logs from migrations / zero-cache / backend:"
Push-Location $InstallDir
@ -247,11 +169,151 @@ function Invoke-StackFailureReport {
Write-Host "Recovery hints:" -ForegroundColor Yellow
Write-Host " 1. Inspect migrations: cd $InstallDir; docker compose logs migrations"
Write-Host " 2. Verify publication: cd $InstallDir; docker compose exec db psql -U surfsense -d surfsense -c 'SELECT pubname FROM pg_publication;'"
Write-Host " 3. Hard reset zero db: cd $InstallDir; docker compose down; docker volume rm surfsense-zero-cache; docker compose up -d"
Write-Host " 3. Hard reset zero db: cd $InstallDir; docker compose down; docker volume rm surfsense-zero-cache; docker compose up -d --wait"
Write-Host ""
exit 1
}
function Invoke-ComposeUpWait {
Push-Location $InstallDir
try {
Invoke-NativeSafe { docker compose up -d --wait }
} finally {
Pop-Location
}
if ($LASTEXITCODE -ne 0) {
Invoke-StackFailureReport
}
}
# ── Variant and .env helpers ────────────────────────────────────────────────
function Set-EnvValue {
param([string]$Path, [string]$Key, [string]$Value)
$lines = @()
if (Test-Path $Path) {
$lines = @(Get-Content $Path)
}
$updated = $false
$newLines = foreach ($line in $lines) {
if ($line -match "^$([regex]::Escape($Key))=") {
$updated = $true
"$Key=$Value"
} else {
$line
}
}
if (-not $updated) {
$newLines += "$Key=$Value"
}
Set-Content -Path $Path -Value $newLines
}
function Remove-EnvValue {
param([string]$Path, [string]$Key)
if (-not (Test-Path $Path)) { return }
$newLines = Get-Content $Path | Where-Object { $_ -notmatch "^$([regex]::Escape($Key))=" }
Set-Content -Path $Path -Value $newLines
}
function Test-NvidiaGpu {
if (-not (Get-Command nvidia-smi -ErrorAction SilentlyContinue)) { return $false }
Invoke-NativeSafe { nvidia-smi *>$null } | Out-Null
return ($LASTEXITCODE -eq 0)
}
function Test-NvidiaRuntime {
$info = Invoke-NativeSafe { docker info 2>$null }
if ($info -match 'nvidia') { return $true }
if (Get-Command nvidia-ctk -ErrorAction SilentlyContinue) { return $true }
if (Get-Command nvidia-container-runtime -ErrorAction SilentlyContinue) { return $true }
return $false
}
function Get-RecommendedVariant {
$driver = (Invoke-NativeSafe { nvidia-smi --query-gpu=driver_version --format=csv,noheader 2>$null } | Select-Object -First 1)
$major = 0
if ($driver -match '^(\d+)') {
$major = [int]$Matches[1]
}
if ($major -gt 0 -and $major -lt 570) {
return "cuda126"
}
return "cuda"
}
function Resolve-Variant {
$hasGpu = Test-NvidiaGpu
$hasRuntime = $false
$recommended = "cpu"
if ($hasGpu) {
$recommended = Get-RecommendedVariant
$hasRuntime = Test-NvidiaRuntime
}
if ($Variant) {
if ($Variant -eq "cpu") { return "cpu" }
if (-not $hasGpu) {
Write-Warn "No NVIDIA GPU detected; falling back to CPU variant."
return "cpu"
}
if (-not $hasRuntime) {
Write-Warn "NVIDIA GPU detected, but NVIDIA Container Toolkit was not detected; falling back to CPU variant."
Write-Warn "Install the toolkit before enabling SurfSense GPU acceleration."
return "cpu"
}
return $Variant
}
if ($hasGpu -and -not $hasRuntime) {
Write-Warn "NVIDIA GPU detected, but NVIDIA Container Toolkit was not detected; using CPU variant."
}
if ($hasGpu -and $hasRuntime -and -not $Quiet -and [Environment]::UserInteractive) {
Write-Host ""
Write-Host "SurfSense detected an NVIDIA GPU." -ForegroundColor Cyan
$choice = Read-Host "Use GPU acceleration? [Y/n]"
switch ($choice) {
"" { return $recommended }
{ $_ -match '^(?i)y(es)?$' } { return $recommended }
{ $_ -match '^(?i)n(o)?$' } { return "cpu" }
default {
Write-Warn "Unrecognized choice '$choice'; using CPU variant."
return "cpu"
}
}
}
return "cpu"
}
function Set-VariantEnv {
param([string]$Path, [string]$SelectedVariant, [bool]$AllowExistingUpdate)
if ((Test-Path $Path) -and -not $AllowExistingUpdate) {
Write-Warn ".env already exists - keeping your existing configuration."
Write-Info "To change variants later, edit SURFSENSE_VARIANT and COMPOSE_FILE in $Path, then run docker compose up -d --wait."
return
}
if ($SelectedVariant -eq "cpu") {
Set-EnvValue -Path $Path -Key "SURFSENSE_VARIANT" -Value ""
Remove-EnvValue -Path $Path -Key "COMPOSE_FILE"
Remove-EnvValue -Path $Path -Key "SURFSENSE_GPU_COUNT"
} else {
Set-EnvValue -Path $Path -Key "SURFSENSE_VARIANT" -Value $SelectedVariant
Set-EnvValue -Path $Path -Key "COMPOSE_FILE" -Value "docker-compose.yml;docker-compose.gpu.yml"
if ($GpuCount) {
Set-EnvValue -Path $Path -Key "SURFSENSE_GPU_COUNT" -Value $GpuCount
}
}
Remove-EnvValue -Path $Path -Key "COMPOSE_PROFILES"
}
$SelectedVariant = Resolve-Variant
# ── Download files ──────────────────────────────────────────────────────────
Write-Step "Downloading SurfSense files"
@ -262,6 +324,7 @@ New-Item -ItemType Directory -Path "$InstallDir\searxng" -Force | Out-Null
$Files = @(
@{ Src = "docker/docker-compose.yml"; Dest = "docker-compose.yml" }
@{ Src = "docker/docker-compose.gpu.yml"; Dest = "docker-compose.gpu.yml" }
@{ Src = "docker/.env.example"; Dest = ".env.example" }
@{ Src = "docker/postgresql.conf"; Dest = "postgresql.conf" }
@{ Src = "docker/scripts/migrate-database.ps1"; Dest = "scripts/migrate-database.ps1" }
@ -339,15 +402,19 @@ if (-not (Test-Path $envPath)) {
$content = $content -replace 'SECRET_KEY=replace_me_with_a_random_string', "SECRET_KEY=$SecretKey"
Set-Content -Path $envPath -Value $content -NoNewline
Set-VariantEnv -Path $envPath -SelectedVariant $SelectedVariant -AllowExistingUpdate $false
Write-Info "Created $envPath"
} else {
Write-Warn ".env already exists - keeping your existing configuration."
if ($PSBoundParameters.ContainsKey('Variant')) {
Set-VariantEnv -Path $envPath -SelectedVariant $SelectedVariant -AllowExistingUpdate $true
Write-Info "Updated SurfSense image variant in existing $envPath"
} else {
Set-VariantEnv -Path $envPath -SelectedVariant $SelectedVariant -AllowExistingUpdate $false
}
}
# ── Start containers ────────────────────────────────────────────────────────
Invoke-StaleZeroCacheCleanup
if ($MigrationMode) {
$envContent = Get-Content $envPath
$DbUser = ($envContent | Select-String '^DB_USER=' | ForEach-Object { ($_ -split '=',2)[1].Trim('"') }) | Select-Object -First 1
@ -405,31 +472,15 @@ if ($MigrationMode) {
}
Write-Step "Starting all SurfSense services"
Push-Location $InstallDir
Invoke-NativeSafe { docker compose up -d }
Pop-Location
Write-Ok "All containers started; waiting for stack to become healthy..."
$waitResult = Wait-StackHealthy -TimeoutSec 300
if (-not $waitResult.Ok) {
Invoke-StackFailureReport -Result $waitResult
}
Write-Ok "All services healthy."
Invoke-ComposeUpWait
Write-Ok "All services started and healthy."
Remove-Item $KeyFile -ErrorAction SilentlyContinue
} else {
Write-Step "Starting SurfSense"
Push-Location $InstallDir
Invoke-NativeSafe { docker compose up -d }
Pop-Location
Write-Ok "All containers started; waiting for stack to become healthy..."
$waitResult = Wait-StackHealthy -TimeoutSec 300
if (-not $waitResult.Ok) {
Invoke-StackFailureReport -Result $waitResult
}
Write-Ok "All services healthy."
Invoke-ComposeUpWait
Write-Ok "All services started and healthy."
}
# ── Watchtower (auto-update) ────────────────────────────────────────────────
@ -461,7 +512,7 @@ if ($SetupWatchtower) {
if ($LASTEXITCODE -eq 0) {
Write-Ok "Watchtower started - labeled SurfSense containers will auto-update."
} else {
Write-Warn "Could not start Watchtower. You can set it up manually or use: docker compose pull; docker compose up -d"
Write-Warn "Could not start Watchtower. You can set it up manually or use: docker compose pull; docker compose up -d --wait"
}
}
} else {
@ -471,39 +522,26 @@ if ($SetupWatchtower) {
# ── Done ────────────────────────────────────────────────────────────────────
Write-Host ""
Write-Host @"
.d8888b. .d888 .d8888b.
d88P Y88b d88P" d88P Y88b
Y88b. 888 Y88b.
"Y888b. 888 888 888d888 888888 "Y888b. .d88b. 88888b. .d8888b .d88b.
"Y88b. 888 888 888P" 888 "Y88b. d8P Y8b 888 "88b 88K d8P Y8b
"888 888 888 888 888 "888 88888888 888 888 "Y8888b. 88888888
Y88b d88P Y88b 888 888 888 Y88b d88P Y8b. 888 888 X88 Y8b.
"Y8888P" "Y88888 888 888 "Y8888P" "Y8888 888 888 88888P' "Y8888
"@ -ForegroundColor White
$versionDisplay = (Get-Content $envPath | Select-String '^SURFSENSE_VERSION=' | ForEach-Object { ($_ -split '=',2)[1].Trim('"') }) | Select-Object -First 1
if (-not $versionDisplay) { $versionDisplay = "latest" }
Write-Host " OSS Alternative to NotebookLM for Teams [$versionDisplay]" -ForegroundColor Yellow
Write-Host ("=" * 62) -ForegroundColor Cyan
Write-Host ""
$variantDisplay = (Get-Content $envPath | Select-String '^SURFSENSE_VARIANT=' | ForEach-Object { ($_ -split '=',2)[1].Trim('"') }) | Select-Object -First 1
if (-not $variantDisplay) { $variantDisplay = "cpu" }
$wtHours = [math]::Floor($WatchtowerInterval / 3600)
Write-Step "SurfSense is now installed [$versionDisplay]"
Write-Info " Frontend: http://localhost:3929"
Write-Info " Backend: http://localhost:8929"
Write-Info " API Docs: http://localhost:8929/docs"
Write-Info ""
Write-Info " Config: $InstallDir\.env"
Write-Info " Variant: $variantDisplay"
Write-Info " Logs: cd $InstallDir; docker compose logs -f"
Write-Info " Stop: cd $InstallDir; docker compose down"
Write-Info " Update: cd $InstallDir; docker compose pull; docker compose up -d"
Write-Info " Update: cd $InstallDir; docker compose pull; docker compose up -d --wait"
Write-Info ""
if ($SetupWatchtower) {
Write-Info " Watchtower: auto-updates every ${wtHours}h (stop: docker rm -f $WatchtowerContainer)"
Write-Info " Watchtower: auto-updates every ${wtHours}h (disable: docker rm -f $WatchtowerContainer)"
} else {
Write-Warn " Watchtower skipped. For auto-updates, re-run without -NoWatchtower."
}

View file

@ -8,6 +8,11 @@
# Flags:
# --no-watchtower Skip automatic Watchtower setup
# --watchtower-interval=SECS Check interval in seconds (default: 86400 = 24h)
# --variant=cpu|cuda|cuda126 Select backend image variant
# --gpu Alias for --variant=cuda
# --cpu Alias for --variant=cpu
# --gpu-count=N|all Number of GPUs to reserve when GPU is enabled
# --quiet Skip interactive prompts
#
# Handles two cases automatically:
# 1. Fresh install — no prior SurfSense data detected
@ -35,12 +40,22 @@ MIGRATION_MODE=false
SETUP_WATCHTOWER=true
WATCHTOWER_INTERVAL=86400
WATCHTOWER_CONTAINER="watchtower"
WATCHTOWER_EXPLICIT=false
REQUESTED_VARIANT=""
VARIANT_EXPLICIT=false
GPU_COUNT=""
QUIET=false
# ── Parse flags ─────────────────────────────────────────────────────────────
for arg in "$@"; do
case "$arg" in
--no-watchtower) SETUP_WATCHTOWER=false ;;
--no-watchtower) SETUP_WATCHTOWER=false; WATCHTOWER_EXPLICIT=true ;;
--watchtower-interval=*) WATCHTOWER_INTERVAL="${arg#*=}" ;;
--variant=*) REQUESTED_VARIANT="${arg#*=}"; VARIANT_EXPLICIT=true ;;
--gpu) REQUESTED_VARIANT="cuda"; VARIANT_EXPLICIT=true ;;
--cpu) REQUESTED_VARIANT="cpu"; VARIANT_EXPLICIT=true ;;
--gpu-count=*) GPU_COUNT="${arg#*=}" ;;
--quiet) QUIET=true ;;
esac
done
@ -57,6 +72,57 @@ warn() { printf "${YELLOW}[SurfSense]${NC} %s\n" "$1"; }
error() { printf "${RED}[SurfSense]${NC} ERROR: %s\n" "$1" >&2; exit 1; }
step() { printf "\n${BOLD}${CYAN}── %s${NC}\n" "$1"; }
show_banner() {
echo ""
printf '\033[1;37m'
cat << 'EOF'
███████╗██╗ ██╗██████╗ ███████╗███████╗███████╗███╗ ██╗███████╗███████╗
██╔════╝██║ ██║██╔══██╗██╔════╝██╔════╝██╔════╝████╗ ██║██╔════╝██╔════╝
███████╗██║ ██║██████╔╝█████╗ ███████╗█████╗ ██╔██╗ ██║███████╗█████╗
╚════██║██║ ██║██╔══██╗██╔══╝ ╚════██║██╔══╝ ██║╚██╗██║╚════██║██╔══╝
███████║╚██████╔╝██║ ██║██║ ███████║███████╗██║ ╚████║███████║███████╗
╚══════╝ ╚═════╝ ╚═╝ ╚═╝╚═╝ ╚══════╝╚══════╝╚═╝ ╚═══╝╚══════╝╚══════╝
EOF
printf "${YELLOW} OSS Alternative to NotebookLM for Teams${NC}\n"
printf "${CYAN}══════════════════════════════════════════════════════════════${NC}\n"
info "This installer will create ${INSTALL_DIR}/ and start SurfSense with Docker Compose."
}
show_banner
case "${REQUESTED_VARIANT}" in
""|cpu|cuda|cuda126) ;;
*) error "Invalid --variant='${REQUESTED_VARIANT}'. Use cpu, cuda, or cuda126." ;;
esac
if [[ -n "${GPU_COUNT}" && ! "${GPU_COUNT}" =~ ^([0-9]+|all)$ ]]; then
error "Invalid --gpu-count='${GPU_COUNT}'. Use a number or 'all'."
fi
resolve_watchtower_preference() {
if $WATCHTOWER_EXPLICIT || $QUIET || [[ ! -r /dev/tty || ! -w /dev/tty ]]; then
return 0
fi
local choice
echo "" > /dev/tty
printf "${BOLD}${CYAN}Automatic updates${NC}\n" > /dev/tty
printf "Enable automatic daily updates with Watchtower? (may download several GB in the background) [Y/n]: " > /dev/tty
read -r choice < /dev/tty || choice=""
case "$choice" in
""|[Yy]|[Yy][Ee][Ss]) SETUP_WATCHTOWER=true ;;
[Nn]|[Nn][Oo]) SETUP_WATCHTOWER=false ;;
*) warn "Unrecognized choice '${choice}', enabling Watchtower by default. Use --no-watchtower to skip it." >&2; SETUP_WATCHTOWER=true ;;
esac
}
resolve_watchtower_preference
# ── Pre-flight checks ────────────────────────────────────────────────────────
step "Checking prerequisites"
@ -97,126 +163,11 @@ wait_for_pg() {
success "PostgreSQL is ready."
}
# ── Stack health helpers ─────────────────────────────────────────────────────
# Enumerate compose services for project `surfsense` as `service|state|health|exitcode`
# lines. Uses `docker inspect` so we don't depend on `jq`, `python3`, or the
# exact ordering of fields in `docker compose ps --format json` output.
get_compose_services() {
local containers
containers=$(docker ps -a --filter "label=com.docker.compose.project=surfsense" --format '{{.Names}}' 2>/dev/null) || true
[[ -z "$containers" ]] && return 0
while IFS= read -r container; do
[[ -z "$container" ]] && continue
local svc state health code
svc=$(docker inspect -f '{{index .Config.Labels "com.docker.compose.service"}}' "$container" 2>/dev/null || echo "")
state=$(docker inspect -f '{{.State.Status}}' "$container" 2>/dev/null || echo "unknown")
health=$(docker inspect -f '{{if .State.Health}}{{.State.Health.Status}}{{end}}' "$container" 2>/dev/null || echo "")
code=$(docker inspect -f '{{.State.ExitCode}}' "$container" 2>/dev/null || echo "")
[[ -z "$svc" ]] && continue
printf '%s|%s|%s|%s\n' "$svc" "$state" "$health" "$code"
done <<< "$containers"
}
# Globals populated by wait_stack_healthy / consumed by stack_failure_report.
STACK_BAD=()
STACK_WAITING=()
STACK_GOOD=()
STACK_TIMEOUT=false
wait_stack_healthy() {
local timeout_sec=${1:-300}
local deadline=$(($(date +%s) + timeout_sec))
local last_report=""
local bad=()
local waiting=()
local good=()
while [[ $(date +%s) -lt $deadline ]]; do
local lines
lines=$(get_compose_services)
if [[ -z "$lines" ]]; then
sleep 3
continue
fi
bad=()
waiting=()
good=()
while IFS='|' read -r name state health code; do
[[ -z "$name" ]] && continue
if [[ "$name" == "migrations" ]]; then
if [[ "$state" == "exited" && "$code" == "0" ]]; then
good+=("$name")
elif [[ "$state" == "exited" ]]; then
bad+=("${name} (exit=${code})")
else
waiting+=("${name} (${state})")
fi
continue
fi
if [[ "$state" == "running" ]]; then
if [[ -z "$health" || "$health" == "healthy" ]]; then
good+=("$name")
elif [[ "$health" == "starting" ]]; then
waiting+=("${name} (starting)")
elif [[ "$health" == "unhealthy" ]]; then
bad+=("${name} (unhealthy)")
else
waiting+=("${name} (${health})")
fi
elif [[ "$state" == "restarting" ]]; then
bad+=("${name} (restarting)")
elif [[ "$state" == "exited" ]]; then
bad+=("${name} (exited, code=${code})")
else
waiting+=("${name} (${state})")
fi
done <<< "$lines"
if (( ${#bad[@]} > 0 )); then
STACK_BAD=("${bad[@]}")
STACK_WAITING=("${waiting[@]}")
STACK_GOOD=("${good[@]}")
return 1
fi
if (( ${#waiting[@]} == 0 )); then
STACK_GOOD=("${good[@]}")
return 0
fi
local report="Waiting on: ${waiting[*]}"
if [[ "$report" != "$last_report" ]]; then
info "$report"
last_report="$report"
fi
sleep 5
done
# bad/waiting/good are declared at function scope so referencing them is
# safe even if the polling loop never executed its body.
STACK_BAD=()
[[ ${#bad[@]} -gt 0 ]] && STACK_BAD=("${bad[@]}")
STACK_WAITING=()
[[ ${#waiting[@]} -gt 0 ]] && STACK_WAITING=("${waiting[@]}")
STACK_GOOD=()
[[ ${#good[@]} -gt 0 ]] && STACK_GOOD=("${good[@]}")
STACK_TIMEOUT=true
return 1
}
# ── Stack startup helper ─────────────────────────────────────────────────────
stack_failure_report() {
echo ""
echo -e "\033[31m[ERROR]\033[0m Stack did not reach a healthy state."
if (( ${#STACK_BAD[@]} > 0 )) && [[ -n "${STACK_BAD[0]}" ]]; then
echo " Failed: ${STACK_BAD[*]}"
fi
if (( ${#STACK_WAITING[@]} > 0 )) && [[ -n "${STACK_WAITING[0]}" ]]; then
echo " Stuck: ${STACK_WAITING[*]}"
fi
echo ""
info "Recent logs from migrations / zero-cache / backend:"
(cd "${INSTALL_DIR}" && ${DC} logs --tail=60 migrations zero-cache backend 2>&1) || true
@ -224,36 +175,158 @@ stack_failure_report() {
echo "Recovery hints:"
echo " 1. Inspect migrations: cd ${INSTALL_DIR} && ${DC} logs migrations"
echo " 2. Verify publication: cd ${INSTALL_DIR} && ${DC} exec db psql -U surfsense -d surfsense -c 'SELECT pubname FROM pg_publication;'"
echo " 3. Hard reset zero db: cd ${INSTALL_DIR} && ${DC} down && docker volume rm surfsense-zero-cache && ${DC} up -d"
echo " 3. Hard reset zero db: cd ${INSTALL_DIR} && ${DC} down && docker volume rm surfsense-zero-cache && ${DC} up -d --wait"
echo ""
exit 1
}
# True if `surfsense-zero-cache` exists but `surfsense-zero-init` does not.
# That signals an install that predates the migrations-service fix; the old
# replica may be half-initialized and would block zero-cache on next start.
test_stale_zero_cache_volume() {
local has_zc has_zi
has_zc=$(docker volume ls --format '{{.Name}}' 2>/dev/null | grep -Fx 'surfsense-zero-cache' || true)
has_zi=$(docker volume ls --format '{{.Name}}' 2>/dev/null | grep -Fx 'surfsense-zero-init' || true)
[[ -n "$has_zc" && -z "$has_zi" ]]
compose_up_wait() {
local service="${1:-}"
if [[ -n "$service" ]]; then
(cd "${INSTALL_DIR}" && ${DC} up -d --wait "$service") < /dev/null
else
(cd "${INSTALL_DIR}" && ${DC} up -d --wait) < /dev/null
fi
}
invoke_stale_zero_cache_cleanup() {
if ! test_stale_zero_cache_volume; then
# ── Variant and .env helpers ─────────────────────────────────────────────────
set_env_value() {
local file="$1"
local key="$2"
local value="$3"
local tmp
tmp=$(mktemp)
if grep -q "^${key}=" "$file" 2>/dev/null; then
awk -v key="$key" -v value="$value" 'BEGIN { prefix = key "=" } $0 ~ "^" prefix { print prefix value; next } { print }' "$file" > "$tmp"
else
cp "$file" "$tmp"
printf '\n%s=%s\n' "$key" "$value" >> "$tmp"
fi
mv "$tmp" "$file"
}
remove_env_value() {
local file="$1"
local key="$2"
local tmp
tmp=$(mktemp)
awk -v key="$key" 'BEGIN { prefix = key "=" } $0 !~ "^" prefix { print }' "$file" > "$tmp"
mv "$tmp" "$file"
}
version_major() {
printf '%s' "$1" | cut -d. -f1
}
recommend_cuda_variant() {
local driver_version driver_major
driver_version=$(nvidia-smi --query-gpu=driver_version --format=csv,noheader 2>/dev/null | head -n 1 | tr -d '[:space:]' || true)
driver_major=$(version_major "$driver_version")
# CUDA 12.8 generally requires an R570+ driver. Use CUDA 12.6 as the
# compatibility fallback for older 12.x driver stacks and GPUs.
if [[ "$driver_major" =~ ^[0-9]+$ && "$driver_major" -lt 570 ]]; then
printf 'cuda126'
else
printf 'cuda'
fi
}
gpu_runtime_available() {
docker info 2>/dev/null | grep -qi 'nvidia' \
|| command -v nvidia-ctk >/dev/null 2>&1 \
|| command -v nvidia-container-runtime >/dev/null 2>&1
}
host_has_nvidia_gpu() {
command -v nvidia-smi >/dev/null 2>&1 && nvidia-smi >/dev/null 2>&1
}
resolve_variant() {
local detected_variant="cpu"
local has_gpu=false
local has_runtime=false
if host_has_nvidia_gpu; then
has_gpu=true
detected_variant=$(recommend_cuda_variant)
if gpu_runtime_available; then
has_runtime=true
fi
fi
if $VARIANT_EXPLICIT; then
if [[ "$REQUESTED_VARIANT" == "cpu" ]]; then
printf 'cpu'
return 0
fi
if ! $has_gpu; then
warn "No NVIDIA GPU detected; falling back to CPU variant." >&2
printf 'cpu'
return 0
fi
if ! $has_runtime; then
warn "NVIDIA GPU detected, but NVIDIA Container Toolkit was not detected; falling back to CPU variant." >&2
warn "Install the toolkit before enabling SurfSense GPU acceleration." >&2
printf 'cpu'
return 0
fi
printf '%s' "$REQUESTED_VARIANT"
return 0
fi
warn "Detected pre-existing 'surfsense-zero-cache' volume from an install that"
warn "predates the migrations-service fix. It may contain a half-initialized"
warn "SQLite replica that would block zero-cache from starting."
warn "The volume will be removed in 5 seconds; press Ctrl+C to cancel."
sleep 5
(cd "${INSTALL_DIR}" && ${DC} down --remove-orphans 2>/dev/null) || true
docker volume rm surfsense-zero-cache 2>/dev/null || true
success "Removed surfsense-zero-cache volume; zero-cache will re-sync on next start."
if $has_gpu && ! $has_runtime; then
warn "NVIDIA GPU detected, but NVIDIA Container Toolkit was not detected; using CPU variant." >&2
fi
if $has_gpu && $has_runtime && ! $QUIET && [[ -r /dev/tty && -w /dev/tty ]]; then
local choice
echo "" > /dev/tty
printf "${BOLD}${CYAN}SurfSense detected an NVIDIA GPU.${NC}\n" > /dev/tty
printf "Use GPU acceleration? [Y/n]: " > /dev/tty
read -r choice < /dev/tty || choice=""
case "$choice" in
"") printf '%s' "$detected_variant" ;;
[Yy]|[Yy][Ee][Ss]) printf '%s' "$detected_variant" ;;
[Nn]|[Nn][Oo]) printf 'cpu' ;;
*) warn "Unrecognized choice '${choice}', using CPU variant." >&2; printf 'cpu' ;;
esac
return 0
fi
printf 'cpu'
}
apply_variant_env() {
local env_file="$1"
local variant="$2"
local allow_existing_update="$3"
if [[ -f "$env_file" && "$allow_existing_update" != "true" ]]; then
warn ".env already exists — keeping your existing configuration."
info "To change variants later, edit SURFSENSE_VARIANT and COMPOSE_FILE in ${env_file}, then run ${DC} up -d --wait."
return 0
fi
if [[ "$variant" == "cpu" ]]; then
set_env_value "$env_file" "SURFSENSE_VARIANT" ""
remove_env_value "$env_file" "COMPOSE_FILE"
remove_env_value "$env_file" "SURFSENSE_GPU_COUNT"
else
set_env_value "$env_file" "SURFSENSE_VARIANT" "$variant"
set_env_value "$env_file" "COMPOSE_FILE" "docker-compose.yml:docker-compose.gpu.yml"
if [[ -n "$GPU_COUNT" ]]; then
set_env_value "$env_file" "SURFSENSE_GPU_COUNT" "$GPU_COUNT"
fi
fi
remove_env_value "$env_file" "COMPOSE_PROFILES"
}
SELECTED_VARIANT=$(resolve_variant)
# ── Download files ───────────────────────────────────────────────────────────
step "Downloading SurfSense files"
@ -263,6 +336,7 @@ mkdir -p "${INSTALL_DIR}/searxng"
FILES=(
"docker/docker-compose.yml:docker-compose.yml"
"docker/docker-compose.gpu.yml:docker-compose.gpu.yml"
"docker/.env.example:.env.example"
"docker/postgresql.conf:postgresql.conf"
"docker/scripts/migrate-database.sh:scripts/migrate-database.sh"
@ -336,15 +410,19 @@ if [ ! -f "${INSTALL_DIR}/.env" ]; then
else
sed -i "s|SECRET_KEY=replace_me_with_a_random_string|SECRET_KEY=${SECRET_KEY}|" "${INSTALL_DIR}/.env"
fi
apply_variant_env "${INSTALL_DIR}/.env" "$SELECTED_VARIANT" "false"
info "Created ${INSTALL_DIR}/.env"
else
warn ".env already exists — keeping your existing configuration."
if $VARIANT_EXPLICIT; then
apply_variant_env "${INSTALL_DIR}/.env" "$SELECTED_VARIANT" "true"
info "Updated SurfSense image variant in existing ${INSTALL_DIR}/.env"
else
apply_variant_env "${INSTALL_DIR}/.env" "$SELECTED_VARIANT" "false"
fi
fi
# ── Start containers ─────────────────────────────────────────────────────────
invoke_stale_zero_cache_cleanup
if $MIGRATION_MODE; then
# Read DB credentials from .env (fall back to defaults from docker-compose.yml)
DB_USER=$(grep '^DB_USER=' "${INSTALL_DIR}/.env" 2>/dev/null | cut -d= -f2 | tr -d '"' | head -1 || true)
@ -401,26 +479,20 @@ if $MIGRATION_MODE; then
fi
step "Starting all SurfSense services"
(cd "${INSTALL_DIR}" && ${DC} up -d) < /dev/null
success "All containers started; waiting for stack to become healthy..."
if ! wait_stack_healthy 300; then
if ! compose_up_wait; then
stack_failure_report
fi
success "All services healthy."
success "All services started and healthy."
# Key file is no longer needed — SECRET_KEY is now in .env
rm -f "${KEY_FILE}"
else
step "Starting SurfSense"
(cd "${INSTALL_DIR}" && ${DC} up -d) < /dev/null
success "All containers started; waiting for stack to become healthy..."
if ! wait_stack_healthy 300; then
if ! compose_up_wait; then
stack_failure_report
fi
success "All services healthy."
success "All services started and healthy."
fi
# ── Watchtower (auto-update) ─────────────────────────────────────────────────
@ -445,7 +517,7 @@ if $SETUP_WATCHTOWER; then
--label-enable \
--interval "${WATCHTOWER_INTERVAL}" >/dev/null 2>&1 < /dev/null \
&& success "Watchtower started — labeled SurfSense containers will auto-update." \
|| warn "Could not start Watchtower. You can set it up manually or use: docker compose pull && docker compose up -d"
|| warn "Could not start Watchtower. You can set it up manually or use: docker compose pull && docker compose up -d --wait"
fi
else
info "Skipping Watchtower setup (--no-watchtower flag)."
@ -454,38 +526,25 @@ fi
# ── Done ─────────────────────────────────────────────────────────────────────
echo ""
printf '\033[1;37m'
cat << 'EOF'
.d8888b. .d888 .d8888b.
d88P Y88b d88P" d88P Y88b
Y88b. 888 Y88b.
"Y888b. 888 888 888d888 888888 "Y888b. .d88b. 88888b. .d8888b .d88b.
"Y88b. 888 888 888P" 888 "Y88b. d8P Y8b 888 "88b 88K d8P Y8b
"888 888 888 888 888 "888 88888888 888 888 "Y8888b. 88888888
Y88b d88P Y88b 888 888 888 Y88b d88P Y8b. 888 888 X88 Y8b.
"Y8888P" "Y88888 888 888 "Y8888P" "Y8888 888 888 88888P' "Y8888
EOF
_version_display=$(grep '^SURFSENSE_VERSION=' "${INSTALL_DIR}/.env" 2>/dev/null | cut -d= -f2 | tr -d '"' | head -1 || true)
_version_display="${_version_display:-latest}"
printf " OSS Alternative to NotebookLM for Teams ${YELLOW}[%s]${NC}\n" "${_version_display}"
printf "${CYAN}══════════════════════════════════════════════════════════════${NC}\n\n"
_variant_display=$(grep '^SURFSENSE_VARIANT=' "${INSTALL_DIR}/.env" 2>/dev/null | cut -d= -f2 | tr -d '"' | head -1 || true)
_variant_display="${_variant_display:-cpu}"
step "SurfSense is now installed [${_version_display}]"
info " Frontend: http://localhost:3929"
info " Backend: http://localhost:8929"
info " API Docs: http://localhost:8929/docs"
info ""
info " Config: ${INSTALL_DIR}/.env"
info " Variant: ${_variant_display}"
info " Logs: cd ${INSTALL_DIR} && ${DC} logs -f"
info " Stop: cd ${INSTALL_DIR} && ${DC} down"
info " Update: cd ${INSTALL_DIR} && ${DC} pull && ${DC} up -d"
info " Update: cd ${INSTALL_DIR} && ${DC} pull && ${DC} up -d --wait"
info ""
if $SETUP_WATCHTOWER; then
info " Watchtower: auto-updates every $((WATCHTOWER_INTERVAL / 3600))h (stop: docker rm -f ${WATCHTOWER_CONTAINER})"
info " Watchtower: auto-updates every $((WATCHTOWER_INTERVAL / 3600))h (disable: docker rm -f ${WATCHTOWER_CONTAINER})"
else
warn " Watchtower skipped. For auto-updates, re-run without --no-watchtower."
fi

View file

@ -212,9 +212,9 @@ API Base URL: https://open.bigmodel.cn/api/paas/v4
| 字段 | 值 | 说明 |
|------|-----|------|
| **Configuration Name** | `MiniMax M2.5` | 配置名称(自定义) |
| **Configuration Name** | `MiniMax M3` | 配置名称(自定义) |
| **Provider** | `MINIMAX` | 选择 MiniMax |
| **Model Name** | `MiniMax-M2.5` | 推荐模型<br>其他选项: `MiniMax-M2.5-highspeed` |
| **Model Name** | `MiniMax-M3` | 推荐模型<br>其他选项: `MiniMax-M2.7``MiniMax-M2.7-highspeed` |
| **API Key** | `eyJ...` | 你的 MiniMax API Key |
| **API Base URL** | `https://api.minimax.io/v1` | MiniMax API 地址 |
| **Parameters** | `{"temperature": 1.0}` | 注意temperature 必须在 (0.0, 1.0] 范围内,不能为 0 |
@ -222,22 +222,23 @@ API Base URL: https://open.bigmodel.cn/api/paas/v4
### 示例配置
```
Configuration Name: MiniMax M2.5
Configuration Name: MiniMax M3
Provider: MINIMAX
Model Name: MiniMax-M2.5
Model Name: MiniMax-M3
API Key: eyJxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
API Base URL: https://api.minimax.io/v1
```
### 可用模型
- **MiniMax-M2.5**: 高性能通用模型204K 上下文窗口(推荐)
- **MiniMax-M2.5-highspeed**: 高速推理版本204K 上下文窗口
- **MiniMax-M3**: 旗舰模型512K 上下文窗口(推荐)
- **MiniMax-M2.7**: 上一代通用模型204K 上下文窗口
- **MiniMax-M2.7-highspeed**: 上一代高速推理版本204K 上下文窗口
### 注意事项
- **temperature 参数**: MiniMax 要求 temperature 必须在 (0.0, 1.0] 范围内,不能设置为 0。建议使用 1.0。
- 两个模型都支持 204K 超长上下文窗口,适合处理长文本任务
- M3 支持 512K 超长上下文M2.7 系列保留 204K适合按需求选择
### 定价
- 请访问 [MiniMax 定价页面](https://platform.minimaxi.com/document/Price) 查看最新价格
@ -315,8 +316,8 @@ docker compose logs backend | grep -i "error"
|---------|---------|------|
| **文档摘要** | Qwen-Plus, GLM-4 | 平衡性能和成本 |
| **代码分析** | DeepSeek-Coder | 代码专用 |
| **长文本处理** | Kimi 128K, MiniMax-M2.5 (204K) | 超长上下文 |
| **快速响应** | Qwen-Turbo, GLM-4-Flash, MiniMax-M2.5-highspeed | 速度优先 |
| **长文本处理** | Kimi 128K, MiniMax-M3 (512K) | 超长上下文 |
| **快速响应** | Qwen-Turbo, GLM-4-Flash, MiniMax-M2.7-highspeed | 速度优先 |
### 2. 成本优化

View file

@ -3,18 +3,46 @@ DATABASE_URL=postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense
# Deployment environment: dev or production
SURFSENSE_ENV=dev
#Celery Config
CELERY_BROKER_URL=redis://localhost:6379/0
CELERY_RESULT_BACKEND=redis://localhost:6379/0
# Redis (single endpoint for Celery broker/result backend + app features)
REDIS_URL=redis://localhost:6379/0
# Optional: override individually only to split Redis across instances.
# Each defaults to REDIS_URL when unset.
# CELERY_BROKER_URL=redis://localhost:6379/0
# CELERY_RESULT_BACKEND=redis://localhost:6379/0
# REDIS_APP_URL=redis://localhost:6379/0
# Optional: isolate queues when sharing Redis with other apps
CELERY_TASK_DEFAULT_QUEUE=surfsense
# Redis for app-level features (heartbeats, podcast markers)
# Defaults to CELERY_BROKER_URL when not set
REDIS_APP_URL=redis://localhost:6379/0
# Optional: TTL in seconds for connector indexing lock key
# CONNECTOR_INDEXING_LOCK_TTL_SECONDS=28800
# Messaging Gateway (global)
# GATEWAY_ENABLED: master switch for ALL messaging gateway channels (Telegram, WhatsApp,
# Slack, Discord). When FALSE, no gateway background workers/supervisors start and all
# gateway HTTP routes (webhooks, OAuth callbacks, pairing) return 404. Set per-channel
# flags below to control individual platforms once the gateway is enabled.
GATEWAY_ENABLED=TRUE
# Telegram Gateway
# TELEGRAM_WEBHOOK_SECRET must be 1-256 chars and contain only A-Z, a-z, 0-9, _ or -
# GATEWAY_TELEGRAM_INTAKE_MODE: `webhook` for production, `longpoll` for single-replica self-host fallback, `disabled` to skip Telegram intake
TELEGRAM_SHARED_BOT_TOKEN=
TELEGRAM_SHARED_BOT_USERNAME=
TELEGRAM_WEBHOOK_SECRET=
GATEWAY_BASE_URL=http://localhost:8000
GATEWAY_TELEGRAM_INTAKE_MODE=webhook
# WhatsApp Gateway
# GATEWAY_WHATSAPP_INTAKE_MODE: `cloud` for Meta Cloud API, `baileys` for self-hosted bridge, `disabled` to skip WhatsApp intake
GATEWAY_WHATSAPP_INTAKE_MODE=disabled
WHATSAPP_SHARED_BUSINESS_TOKEN=
WHATSAPP_SHARED_PHONE_NUMBER_ID=
WHATSAPP_SHARED_DISPLAY_PHONE_NUMBER=
WHATSAPP_SHARED_WABA_ID=
WHATSAPP_GRAPH_API_VERSION=v25.0
WHATSAPP_WEBHOOK_VERIFY_TOKEN=
WHATSAPP_WEBHOOK_APP_SECRET=
WHATSAPP_BRIDGE_URL=http://whatsapp-bridge:9929
# Platform Web Search (SearXNG)
# Set this to enable built-in web search. Docker Compose sets it automatically.
# Only uncomment if running the backend outside Docker (e.g. uvicorn on host).
@ -64,8 +92,6 @@ STRIPE_PAGE_BUYING_ENABLED=TRUE
STRIPE_TOKEN_BUYING_ENABLED=FALSE
STRIPE_PREMIUM_TOKEN_PRICE_ID=price_...
STRIPE_CREDIT_MICROS_PER_UNIT=1000000
# DEPRECATED — use STRIPE_CREDIT_MICROS_PER_UNIT (1:1 numerical mapping):
# STRIPE_TOKENS_PER_UNIT=1000000
# Periodic Stripe safety net for purchases left in PENDING (minutes old)
STRIPE_RECONCILIATION_LOOKBACK_MINUTES=10
@ -98,11 +124,14 @@ CLICKUP_CLIENT_ID=your_clickup_client_id_here
CLICKUP_CLIENT_SECRET=your_clickup_client_secret_here
CLICKUP_REDIRECT_URI=http://localhost:8000/api/v1/auth/clickup/connector/callback
# Discord OAuth Configuration
# Discord OAuth / Gateway Configuration
# The Discord connector and Discord gateway use the same Discord application/bot.
DISCORD_CLIENT_ID=your_discord_client_id_here
DISCORD_CLIENT_SECRET=your_discord_client_secret_here
DISCORD_REDIRECT_URI=http://localhost:8000/api/v1/auth/discord/connector/callback
DISCORD_BOT_TOKEN=your_bot_token_from_developer_portal
GATEWAY_DISCORD_ENABLED=FALSE
GATEWAY_DISCORD_REDIRECT_URI=http://localhost:8000/api/v1/gateway/discord/callback
# Atlassian OAuth Configuration (Jira & Confluence)
ATLASSIAN_CLIENT_ID=your_atlassian_client_id_here
@ -120,10 +149,14 @@ NOTION_CLIENT_ID=your_notion_client_id_here
NOTION_CLIENT_SECRET=your_notion_client_secret_here
NOTION_REDIRECT_URI=http://localhost:8000/api/v1/auth/notion/connector/callback
# Slack OAuth Configuration
# Slack OAuth / Gateway Configuration
# The Slack connector and Slack gateway can use the same Slack app client ID/secret.
SLACK_CLIENT_ID=your_slack_client_id_here
SLACK_CLIENT_SECRET=your_slack_client_secret_here
SLACK_REDIRECT_URI=http://localhost:8000/api/v1/auth/slack/connector/callback
GATEWAY_SLACK_ENABLED=FALSE
GATEWAY_SLACK_SIGNING_SECRET=your_slack_signing_secret_here
GATEWAY_SLACK_REDIRECT_URI=http://localhost:8000/api/v1/gateway/slack/callback
# Microsoft OAuth (Teams & OneDrive)
MICROSOFT_CLIENT_ID=your_microsoft_client_id_here
@ -197,8 +230,6 @@ PAGES_LIMIT=500
# models bill proportionally. Applies only to models with
# billing_tier=premium in global_llm_config.yaml.
PREMIUM_CREDIT_MICROS_LIMIT=5000000
# DEPRECATED — use PREMIUM_CREDIT_MICROS_LIMIT (1:1 numerical mapping):
# PREMIUM_TOKEN_LIMIT=5000000
# Safety ceiling on per-call premium reservation, in micro-USD.
# stream_new_chat estimates an upper-bound cost from the model's
@ -246,17 +277,19 @@ TURNSTILE_ENABLED=FALSE
TURNSTILE_SECRET_KEY=
# Proxy provider selection. Selects a ProxyProvider implementation registered in
# app/utils/proxy/registry.py. Default: "anonymous_proxies". Add new vendors there.
# PROXY_PROVIDER=anonymous_proxies
# Residential Proxy Configuration (anonymous-proxies.net)
# Used for web crawling, link previews, and YouTube transcript fetching to avoid IP bans.
# Leave commented out to disable proxying.
# Consumed by the "anonymous_proxies" provider. Leave commented out to disable proxying.
# RESIDENTIAL_PROXY_USERNAME=your_proxy_username
# RESIDENTIAL_PROXY_PASSWORD=your_proxy_password
# RESIDENTIAL_PROXY_HOSTNAME=rotating.dnsproxifier.com:31230
# RESIDENTIAL_PROXY_LOCATION=
# RESIDENTIAL_PROXY_TYPE=1
FIRECRAWL_API_KEY=fcr-01J0000000000000000000000
# File Parser Service
ETL_SERVICE=UNSTRUCTURED or LLAMACLOUD or DOCLING
UNSTRUCTURED_API_KEY=Tpu3P0U8iy
@ -265,6 +298,16 @@ LLAMA_CLOUD_API_KEY=llx-nnn
# AZURE_DI_ENDPOINT=https://your-resource.cognitiveservices.azure.com/
# AZURE_DI_KEY=your-key
# Original File Storage
# Where to persist the original bytes of uploaded documents (for download today,
# redaction / form-filling later). "local" needs no cloud creds and is the dev default.
FILE_STORAGE_BACKEND=local
# Local backend: directory for stored files (defaults to surfsense_backend/.local_object_store)
# FILE_STORAGE_LOCAL_PATH=/var/lib/surfsense/object-store
# Azure Blob backend (set FILE_STORAGE_BACKEND=azure):
# AZURE_STORAGE_CONNECTION_STRING=DefaultEndpointsProtocol=https;AccountName=...;AccountKey=...;EndpointSuffix=core.windows.net
# AZURE_STORAGE_CONTAINER=surfsense-documents
# Daytona Sandbox (isolated code execution)
# DAYTONA_SANDBOX_ENABLED=FALSE
# DAYTONA_API_KEY=your-daytona-api-key
@ -285,9 +328,6 @@ LANGSMITH_PROJECT=surfsense
# =============================================================================
# OPTIONAL: New-chat agent feature flags
# =============================================================================
# Multi-agent orchestrator switch for authenticated chat streaming.
# MULTI_AGENT_CHAT_ENABLED=false
# Master kill-switch — when true, every flag below is forced OFF.
# SURFSENSE_DISABLE_NEW_AGENT_STACK=false
@ -322,6 +362,13 @@ LANGSMITH_PROJECT=surfsense
# SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS=false
# SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE=false
# KB retrieval mode (default OFF = lazy). When OFF, the main agent retrieves
# KB content on demand via the `search_knowledge_base` tool and skips the
# expensive per-turn pre-injection (planner LLM + embed + hybrid search,
# ~2.3s); explicit @-mentions are still surfaced cheaply. Set to true to
# restore the original eager `<priority_documents>` pre-injection.
# SURFSENSE_ENABLE_KB_PRIORITY_PREINJECTION=false
# Snapshot / revert
# SURFSENSE_ENABLE_ACTION_LOG=false
# SURFSENSE_ENABLE_REVERT_ROUTE=false # Backend-only; flip when UI ships
@ -342,6 +389,15 @@ LANGSMITH_PROJECT=surfsense
# rollback if you suspect cache-related staleness.
# SURFSENSE_ENABLE_AGENT_CACHE=true
# Cross-thread reuse (default ON). Drops thread_id from the cache key so a
# returning user's NEW chats (same user + search space + config + visibility)
# hit the already-compiled graph instead of paying a fresh ~4-5s compile —
# turning a cold first turn into a warm one. Safe because ActionLog,
# KB-persistence, and the deliverables tools now resolve the chat thread from
# the live RunnableConfig at call time rather than a build-time closure. Flip
# OFF to fall back to a per-thread cache key (instant rollback).
# SURFSENSE_ENABLE_CROSS_THREAD_AGENT_CACHE=true
# Cache capacity (max number of compiled-agent entries kept in memory)
# and TTL per entry (seconds). Working set is typically one entry per
# active thread on this replica; tune up for very large deployments.

View file

@ -2,6 +2,7 @@
.venv
venv/
data/
.local_object_store/
__pycache__/
.flashrank_cache
surf_new_backend.egg-info/

View file

@ -1,3 +1,4 @@
# syntax=docker.io/docker/dockerfile:1
# =============================================================================
# SurfSense Backend — Multi-stage Dockerfile
# =============================================================================
@ -61,15 +62,25 @@ COPY pyproject.toml uv.lock ./
# Exporting the lock to requirements.txt and feeding it to `uv pip install`
# pins every transitive package to the exact version captured in uv.lock.
#
# Note on torch/CUDA: we do NOT install torch from a separate cu* index here.
# PyPI's torch wheels for Linux x86_64 already ship CUDA-enabled and pull
# nvidia-cudnn-cu13, nvidia-nccl-cu13, triton, etc. as install deps (all
# captured in uv.lock). If a specific CUDA version is needed, wire it through
# [tool.uv.sources] in pyproject.toml so the lock stays the source of truth.
# Note on torch/CUDA: the export must always select either the cpu or CUDA
# extra declared in pyproject.toml. A no-extra export would resolve torch from
# PyPI on Linux, which currently pulls CUDA-enabled wheels and nvidia-* deps.
# Keep CUDA version selection in [tool.uv.sources] so uv.lock remains the
# source of truth. The install step also needs the matching PyTorch index,
# because requirements.txt preserves the +cpu/+cu wheel pins but not uv's
# package source metadata.
ARG USE_CUDA=false
ARG CUDA_EXTRA=cu128
RUN pip install --no-cache-dir uv && \
if [ "$USE_CUDA" = "true" ]; then EXTRA="$CUDA_EXTRA"; else EXTRA="cpu"; fi && \
TORCH_INDEX="https://download.pytorch.org/whl/${EXTRA}" && \
uv export --frozen --no-dev --no-hashes --no-emit-project \
--extra "$EXTRA" \
--format requirements-txt -o /tmp/requirements.txt && \
uv pip install --system --no-cache-dir -r /tmp/requirements.txt && \
uv pip install --system --no-cache-dir \
--index "$TORCH_INDEX" \
--index-strategy unsafe-best-match \
-r /tmp/requirements.txt && \
rm /tmp/requirements.txt
@ -94,10 +105,14 @@ RUN printf '%s\n' \
| python || true
ARG EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
RUN python -c "from chonkie import AutoEmbeddings; AutoEmbeddings.get_embeddings('${EMBEDDING_MODEL}')"
RUN --mount=type=secret,id=HF_TOKEN \
HF_TOKEN="$(cat /run/secrets/HF_TOKEN 2>/dev/null || true)" \
python -c "from chonkie import AutoEmbeddings; AutoEmbeddings.get_embeddings('${EMBEDDING_MODEL}')"
# Install Playwright browsers (the playwright python package itself is in deps)
RUN playwright install chromium --with-deps
# Install Scrapling's browser engines (patchright Chromium + Camoufox).
# Scrapling pulls playwright/patchright via the `fetchers` extra; `scrapling install`
# downloads the matching browser binaries used by DynamicFetcher/StealthyFetcher.
RUN scrapling install
# Shared temp directory for file uploads between API and Worker containers.
# Python's tempfile module uses TMPDIR, so uploaded files land here.

View file

@ -3,6 +3,7 @@ import os
import sys
from logging.config import fileConfig
import sqlalchemy as sa
from sqlalchemy import pool
from sqlalchemy.engine import Connection
from sqlalchemy.ext.asyncio import async_engine_from_config
@ -36,6 +37,9 @@ if config.config_file_name is not None:
# target_metadata = mymodel.Base.metadata
target_metadata = Base.metadata
MIGRATION_ADVISORY_LOCK_NAMESPACE = "surfsense"
MIGRATION_ADVISORY_LOCK_NAME = "alembic_migrations"
# other values from the config, defined by the needs of env.py,
# can be acquired:
# my_important_option = config.get_main_option("my_important_option")
@ -73,8 +77,22 @@ def do_run_migrations(connection: Connection) -> None:
transaction_per_migration=True,
)
with context.begin_transaction():
context.run_migrations()
lock_params = {
"namespace": MIGRATION_ADVISORY_LOCK_NAMESPACE,
"name": MIGRATION_ADVISORY_LOCK_NAME,
}
connection.execute(
sa.text("SELECT pg_advisory_lock(hashtext(:namespace), hashtext(:name))"),
lock_params,
)
try:
with context.begin_transaction():
context.run_migrations()
finally:
connection.execute(
sa.text("SELECT pg_advisory_unlock(hashtext(:namespace), hashtext(:name))"),
lock_params,
)
async def run_async_migrations() -> None:

View file

@ -47,7 +47,6 @@ depends_on: str | Sequence[str] | None = None
PUBLICATION_NAME = "zero_publication"
# Must stay in sync with the column lists in migrations 117 / 139 / 140.
DOCUMENT_COLS = [
"id",
"title",

View file

@ -0,0 +1,175 @@
"""add automation_runs to zero_publication with thin column list
Publishes ``automation_runs`` so the dashboard can replace polling with a
live run status + per-step ticker. Only the columns the list and ticker
read are exposed (``id, automation_id, trigger_id, status, step_results,
started_at, finished_at, created_at``); heavy JSONB
(``definition_snapshot``, ``inputs``, ``output``, ``artifacts``, ``error``)
stays on REST and is fetched lazily on detail expand.
Uses the canonical ``ALTER PUBLICATION ... SET TABLE`` + ``COMMENT``
bookend pattern (see migration 143) -- the shape Zero ``>=1.0`` requires
to fire its schema-change hook. Existing tables are re-emitted unchanged.
Revision ID: 148
Revises: 147
"""
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
revision: str = "148"
down_revision: str | None = "147"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
PUBLICATION_NAME = "zero_publication"
# Mirrors migration 143. Kept in sync explicitly: any change to these lists
# must be re-emitted in a new resync migration with COMMENT bookends.
DOCUMENT_COLS = [
"id",
"title",
"document_type",
"search_space_id",
"folder_id",
"created_by_id",
"status",
"created_at",
"updated_at",
]
USER_COLS = [
"id",
"pages_limit",
"pages_used",
"premium_credit_micros_limit",
"premium_credit_micros_used",
]
# Thin set: status + lightweight progress only. Heavy JSONB stays on REST.
AUTOMATION_RUN_COLS = [
"id",
"automation_id",
"trigger_id",
"status",
"step_results",
"started_at",
"finished_at",
"created_at",
]
def _has_zero_version(conn, table: str) -> bool:
return (
conn.execute(
sa.text(
"SELECT 1 FROM information_schema.columns "
"WHERE table_name = :tbl AND column_name = '_0_version'"
),
{"tbl": table},
).fetchone()
is not None
)
def _build_set_table_ddl(
*, documents_has_zero_ver: bool, user_has_zero_ver: bool
) -> str:
doc_cols = DOCUMENT_COLS + (['"_0_version"'] if documents_has_zero_ver else [])
user_cols = USER_COLS + (['"_0_version"'] if user_has_zero_ver else [])
doc_col_list = ", ".join(doc_cols)
user_col_list = ", ".join(user_cols)
run_col_list = ", ".join(AUTOMATION_RUN_COLS)
return (
f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE "
f"notifications, "
f"documents ({doc_col_list}), "
f"folders, "
f"search_source_connectors, "
f"new_chat_messages, "
f"chat_comments, "
f"chat_session_state, "
f'"user" ({user_col_list}), '
f"automation_runs ({run_col_list})"
)
def upgrade() -> None:
conn = op.get_bind()
exists = conn.execute(
sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
{"name": PUBLICATION_NAME},
).fetchone()
if not exists:
return
documents_has_zero_ver = _has_zero_version(conn, "documents")
user_has_zero_ver = _has_zero_version(conn, "user")
# COMMENT-ALTER-COMMENT trio must be one transaction so Zero observes
# them as one schema-change event. Matches the SAVEPOINT pattern used
# in migrations 117 / 139 / 140 / 143.
tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
with tx:
conn.execute(
sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-148-resync'")
)
conn.execute(
sa.text(
_build_set_table_ddl(
documents_has_zero_ver=documents_has_zero_ver,
user_has_zero_ver=user_has_zero_ver,
)
)
)
conn.execute(
sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-148-resync'")
)
def downgrade() -> None:
"""Re-emit migration 143's shape (no automation_runs)."""
conn = op.get_bind()
exists = conn.execute(
sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
{"name": PUBLICATION_NAME},
).fetchone()
if not exists:
return
documents_has_zero_ver = _has_zero_version(conn, "documents")
user_has_zero_ver = _has_zero_version(conn, "user")
doc_cols = DOCUMENT_COLS + (['"_0_version"'] if documents_has_zero_ver else [])
user_cols = USER_COLS + (['"_0_version"'] if user_has_zero_ver else [])
doc_col_list = ", ".join(doc_cols)
user_col_list = ", ".join(user_cols)
ddl = (
f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE "
f"notifications, "
f"documents ({doc_col_list}), "
f"folders, "
f"search_source_connectors, "
f"new_chat_messages, "
f"chat_comments, "
f"chat_session_state, "
f'"user" ({user_col_list})'
)
tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
with tx:
conn.execute(
sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-148-downgrade'")
)
conn.execute(sa.text(ddl))
conn.execute(
sa.text(
f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-148-downgrade'"
)
)

View file

@ -0,0 +1,667 @@
"""add external chat surface tables
Revision ID: 149
Revises: 148
Create Date: 2026-05-27
Adds the lean external chat surface schema:
* external_chat_accounts
* external_chat_bindings
* external_chat_inbound_events
External chat surfaces store Telegram-originated conversations in the existing
chat tables. This migration adds ``source`` to ``new_chat_threads`` and
``new_chat_messages`` as UI metadata while publishing all chat-message sources
through Zero so a future SurfSense UI layer can render external chats. External
chat adapter tables are served through REST in v1, so they are intentionally not
added to ``zero_publication``.
"""
from __future__ import annotations
from collections.abc import Sequence
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from alembic import op
revision: str = "149"
down_revision: str | None = "148"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
PUBLICATION_NAME = "zero_publication"
DOCUMENT_COLS = [
"id",
"title",
"document_type",
"search_space_id",
"folder_id",
"created_by_id",
"status",
"created_at",
"updated_at",
]
USER_COLS = [
"id",
"pages_limit",
"pages_used",
"premium_credit_micros_limit",
"premium_credit_micros_used",
]
AUTOMATION_RUN_COLS = [
"id",
"automation_id",
"trigger_id",
"status",
"step_results",
"started_at",
"finished_at",
"created_at",
]
def _has_zero_version(conn, table: str) -> bool:
return (
conn.execute(
sa.text(
"SELECT 1 FROM information_schema.columns "
"WHERE table_name = :tbl AND column_name = '_0_version'"
),
{"tbl": table},
).fetchone()
is not None
)
def _cols(columns: list[str]) -> str:
return ", ".join(columns)
def _table_exists(conn, table: str) -> bool:
return (
conn.execute(
sa.text(
"SELECT 1 FROM information_schema.tables "
"WHERE table_schema = current_schema() AND table_name = :tbl"
),
{"tbl": table},
).fetchone()
is not None
)
def _column_exists(conn, table: str, column: str) -> bool:
return (
conn.execute(
sa.text(
"SELECT 1 FROM information_schema.columns "
"WHERE table_schema = current_schema() "
"AND table_name = :tbl AND column_name = :col"
),
{"tbl": table, "col": column},
).fetchone()
is not None
)
def _index_exists(conn, index_name: str) -> bool:
return (
conn.execute(
sa.text(
"SELECT 1 FROM pg_indexes "
"WHERE schemaname = current_schema() AND indexname = :name"
),
{"name": index_name},
).fetchone()
is not None
)
def _constraint_exists(conn, table: str, constraint_name: str) -> bool:
return (
conn.execute(
sa.text(
"SELECT 1 FROM information_schema.table_constraints "
"WHERE table_schema = current_schema() "
"AND table_name = :tbl AND constraint_name = :name"
),
{"tbl": table, "name": constraint_name},
).fetchone()
is not None
)
def _drop_index_if_exists(index_name: str, table_name: str) -> None:
if _index_exists(op.get_bind(), index_name):
op.drop_index(index_name, table_name=table_name)
def _drop_column_if_exists(table_name: str, column_name: str) -> None:
if _column_exists(op.get_bind(), table_name, column_name):
op.drop_column(table_name, column_name)
def _build_set_table_ddl(
*, documents_has_zero_ver: bool, user_has_zero_ver: bool
) -> str:
doc_cols = DOCUMENT_COLS + (['"_0_version"'] if documents_has_zero_ver else [])
user_cols = USER_COLS + (['"_0_version"'] if user_has_zero_ver else [])
return (
f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE "
f"notifications, "
f"documents ({_cols(doc_cols)}), "
f"folders, "
f"search_source_connectors, "
f"new_chat_messages, "
f"chat_comments, "
f"chat_session_state, "
f'"user" ({_cols(user_cols)}), '
f"automation_runs ({_cols(AUTOMATION_RUN_COLS)})"
)
def _create_enum(name: str, values: tuple[str, ...]) -> postgresql.ENUM:
enum = postgresql.ENUM(*values, name=name)
enum.create(op.get_bind(), checkfirst=True)
return postgresql.ENUM(*values, name=name, create_type=False)
def upgrade() -> None:
conn = op.get_bind()
external_chat_platform_enum = _create_enum(
"external_chat_platform", ("telegram", "whatsapp", "signal")
)
external_chat_account_mode_enum = _create_enum(
"external_chat_account_mode", ("cloud_shared", "self_host_byo")
)
external_chat_health_status_enum = _create_enum(
"external_chat_health_status", ("unknown", "ok", "failing")
)
external_chat_binding_state_enum = _create_enum(
"external_chat_binding_state", ("pending", "bound", "revoked", "suspended")
)
external_chat_peer_kind_enum = _create_enum(
"external_chat_peer_kind", ("direct", "group", "channel", "unknown")
)
external_chat_event_kind_enum = _create_enum(
"external_chat_event_kind",
("message", "edited_message", "callback_query", "other"),
)
external_chat_event_status_enum = _create_enum(
"external_chat_event_status",
("received", "processing", "processed", "ignored", "failed"),
)
if not _table_exists(conn, "external_chat_accounts"):
op.create_table(
"external_chat_accounts",
sa.Column("id", sa.BigInteger(), primary_key=True),
sa.Column("platform", external_chat_platform_enum, nullable=False),
sa.Column("mode", external_chat_account_mode_enum, nullable=False),
sa.Column("owner_user_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("owner_search_space_id", sa.Integer(), nullable=True),
sa.Column(
"is_system_account",
sa.Boolean(),
nullable=False,
server_default="false",
),
sa.Column("encrypted_credentials", sa.Text(), nullable=True),
sa.Column("bot_username", sa.String(255), nullable=True),
sa.Column("webhook_secret", sa.String(64), nullable=True),
sa.Column(
"cursor_state",
postgresql.JSONB(astext_type=sa.Text()),
nullable=False,
server_default=sa.text("'{}'::jsonb"),
),
sa.Column(
"health_status",
external_chat_health_status_enum,
nullable=False,
server_default="unknown",
),
sa.Column(
"last_health_check_at", sa.TIMESTAMP(timezone=True), nullable=True
),
sa.Column("suspended_at", sa.TIMESTAMP(timezone=True), nullable=True),
sa.Column("suspended_reason", sa.Text(), nullable=True),
sa.Column(
"created_at",
sa.TIMESTAMP(timezone=True),
nullable=False,
server_default=sa.text("(now() AT TIME ZONE 'utc')"),
),
sa.Column(
"updated_at",
sa.TIMESTAMP(timezone=True),
nullable=False,
server_default=sa.text("(now() AT TIME ZONE 'utc')"),
),
sa.CheckConstraint(
"(is_system_account = true AND owner_user_id IS NULL) OR "
"(is_system_account = false AND owner_user_id IS NOT NULL)",
name="ck_external_chat_accounts_owner_shape",
),
sa.ForeignKeyConstraint(["owner_user_id"], ["user.id"], ondelete="CASCADE"),
sa.ForeignKeyConstraint(
["owner_search_space_id"], ["searchspaces.id"], ondelete="CASCADE"
),
)
op.create_index(
"uq_external_chat_accounts_owner_platform",
"external_chat_accounts",
["owner_user_id", "platform"],
unique=True,
postgresql_where=sa.text("is_system_account = false"),
if_not_exists=True,
)
op.create_index(
"uq_external_chat_accounts_system_platform",
"external_chat_accounts",
["platform"],
unique=True,
postgresql_where=sa.text("is_system_account = true"),
if_not_exists=True,
)
op.create_index(
"uq_external_chat_accounts_webhook_secret",
"external_chat_accounts",
["webhook_secret"],
unique=True,
postgresql_where=sa.text("webhook_secret IS NOT NULL"),
if_not_exists=True,
)
if not _table_exists(conn, "external_chat_bindings"):
op.create_table(
"external_chat_bindings",
sa.Column("id", sa.BigInteger(), primary_key=True),
sa.Column("account_id", sa.BigInteger(), nullable=False),
sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("search_space_id", sa.Integer(), nullable=False),
sa.Column(
"state",
external_chat_binding_state_enum,
nullable=False,
server_default="pending",
),
sa.Column("pairing_code", sa.Text(), nullable=True),
sa.Column(
"pairing_code_expires_at", sa.TIMESTAMP(timezone=True), nullable=True
),
sa.Column("external_peer_id", sa.Text(), nullable=True),
sa.Column(
"external_peer_kind",
external_chat_peer_kind_enum,
nullable=False,
server_default="unknown",
),
sa.Column(
"external_thread_id",
sa.Text(),
nullable=True,
comment="Reserved for Telegram message_thread_id when group/forum support lands.",
),
sa.Column("external_display_name", sa.Text(), nullable=True),
sa.Column("external_username", sa.Text(), nullable=True),
sa.Column(
"external_metadata",
postgresql.JSONB(astext_type=sa.Text()),
nullable=False,
server_default=sa.text("'{}'::jsonb"),
),
sa.Column("new_chat_thread_id", sa.Integer(), nullable=True),
sa.Column("revoked_at", sa.TIMESTAMP(timezone=True), nullable=True),
sa.Column("suspended_at", sa.TIMESTAMP(timezone=True), nullable=True),
sa.Column("suspended_reason", sa.Text(), nullable=True),
sa.Column(
"created_at",
sa.TIMESTAMP(timezone=True),
nullable=False,
server_default=sa.text("(now() AT TIME ZONE 'utc')"),
),
sa.Column(
"updated_at",
sa.TIMESTAMP(timezone=True),
nullable=False,
server_default=sa.text("(now() AT TIME ZONE 'utc')"),
),
sa.ForeignKeyConstraint(
["account_id"], ["external_chat_accounts.id"], ondelete="CASCADE"
),
sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
sa.ForeignKeyConstraint(
["search_space_id"], ["searchspaces.id"], ondelete="CASCADE"
),
sa.ForeignKeyConstraint(
["new_chat_thread_id"], ["new_chat_threads.id"], ondelete="SET NULL"
),
)
op.create_index(
"uq_external_chat_bindings_account_peer_active",
"external_chat_bindings",
["account_id", "external_peer_id"],
unique=True,
postgresql_where=sa.text(
"state IN ('bound', 'suspended') AND external_peer_id IS NOT NULL"
),
if_not_exists=True,
)
op.create_index(
"uq_external_chat_bindings_pairing_code_pending",
"external_chat_bindings",
["pairing_code"],
unique=True,
postgresql_where=sa.text("state = 'pending'"),
if_not_exists=True,
)
op.create_index(
"ix_external_chat_bindings_user_state",
"external_chat_bindings",
["user_id", "state"],
if_not_exists=True,
)
op.create_index(
"ix_external_chat_bindings_search_space_state",
"external_chat_bindings",
["search_space_id", "state"],
if_not_exists=True,
)
if not _table_exists(conn, "external_chat_inbound_events"):
op.create_table(
"external_chat_inbound_events",
sa.Column("id", sa.BigInteger(), primary_key=True),
sa.Column("account_id", sa.BigInteger(), nullable=False),
sa.Column("external_chat_binding_id", sa.BigInteger(), nullable=True),
sa.Column("platform", external_chat_platform_enum, nullable=False),
sa.Column("event_dedupe_key", sa.Text(), nullable=False),
sa.Column("external_event_id", sa.Text(), nullable=True),
sa.Column("external_message_id", sa.Text(), nullable=True),
sa.Column("event_kind", external_chat_event_kind_enum, nullable=False),
sa.Column(
"raw_payload",
postgresql.JSONB(astext_type=sa.Text()),
nullable=True,
),
sa.Column("request_id", sa.String(64), nullable=True),
sa.Column(
"status",
external_chat_event_status_enum,
nullable=False,
server_default="received",
),
sa.Column(
"attempt_count", sa.Integer(), nullable=False, server_default="0"
),
sa.Column("last_error", sa.Text(), nullable=True),
sa.Column(
"received_at",
sa.TIMESTAMP(timezone=True),
nullable=False,
server_default=sa.text("(now() AT TIME ZONE 'utc')"),
),
sa.Column("processed_at", sa.TIMESTAMP(timezone=True), nullable=True),
sa.Column(
"created_at",
sa.TIMESTAMP(timezone=True),
nullable=False,
server_default=sa.text("(now() AT TIME ZONE 'utc')"),
),
sa.ForeignKeyConstraint(
["account_id"], ["external_chat_accounts.id"], ondelete="CASCADE"
),
sa.ForeignKeyConstraint(
["external_chat_binding_id"],
["external_chat_bindings.id"],
ondelete="SET NULL",
),
sa.UniqueConstraint(
"account_id",
"event_dedupe_key",
name="uq_external_chat_inbound_account_dedupe_key",
),
)
op.create_index(
"ix_external_chat_inbound_status_received_at",
"external_chat_inbound_events",
["status", "received_at"],
if_not_exists=True,
)
op.create_index(
"ix_external_chat_inbound_binding_received_at",
"external_chat_inbound_events",
["external_chat_binding_id", "received_at"],
if_not_exists=True,
)
op.create_index(
"ix_external_chat_inbound_request_id",
"external_chat_inbound_events",
["request_id"],
postgresql_where=sa.text("request_id IS NOT NULL"),
if_not_exists=True,
)
if not _column_exists(conn, "new_chat_threads", "source"):
op.add_column(
"new_chat_threads",
sa.Column("source", sa.Text(), nullable=False, server_default="surfsense"),
)
op.alter_column("new_chat_threads", "source", type_=sa.Text())
if not _column_exists(conn, "new_chat_threads", "external_chat_binding_id"):
op.add_column(
"new_chat_threads",
sa.Column("external_chat_binding_id", sa.BigInteger(), nullable=True),
)
if not _constraint_exists(
conn,
"new_chat_threads",
"fk_new_chat_threads_external_chat_external_chat_binding_id",
):
op.create_foreign_key(
"fk_new_chat_threads_external_chat_external_chat_binding_id",
"new_chat_threads",
"external_chat_bindings",
["external_chat_binding_id"],
["id"],
ondelete="SET NULL",
)
op.create_index(
"ix_new_chat_threads_source", "new_chat_threads", ["source"], if_not_exists=True
)
op.create_index(
"ix_new_chat_threads_external_chat_binding_id",
"new_chat_threads",
["external_chat_binding_id"],
if_not_exists=True,
)
if not _column_exists(conn, "new_chat_messages", "source"):
op.add_column(
"new_chat_messages",
sa.Column("source", sa.Text(), nullable=False, server_default="surfsense"),
)
op.alter_column("new_chat_messages", "source", type_=sa.Text())
if not _column_exists(conn, "new_chat_messages", "platform_metadata"):
op.add_column(
"new_chat_messages",
sa.Column(
"platform_metadata",
postgresql.JSONB(astext_type=sa.Text()),
nullable=True,
),
)
op.create_index(
"ix_new_chat_messages_source",
"new_chat_messages",
["source"],
if_not_exists=True,
)
op.create_index(
"uq_new_chat_messages_inbound_platform",
"new_chat_messages",
[
"thread_id",
sa.text("(platform_metadata->>'platform')"),
sa.text("(platform_metadata->>'external_message_id')"),
],
unique=True,
postgresql_where=sa.text(
"platform_metadata IS NOT NULL "
"AND platform_metadata->>'direction' = 'inbound'"
),
if_not_exists=True,
)
op.execute("ALTER TABLE new_chat_messages REPLICA IDENTITY FULL")
exists = conn.execute(
sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
{"name": PUBLICATION_NAME},
).fetchone()
if exists:
documents_has_zero_ver = _has_zero_version(conn, "documents")
user_has_zero_ver = _has_zero_version(conn, "user")
tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
with tx:
conn.execute(
sa.text(
f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-144-external-chat'"
)
)
conn.execute(
sa.text(
_build_set_table_ddl(
documents_has_zero_ver=documents_has_zero_ver,
user_has_zero_ver=user_has_zero_ver,
)
)
)
conn.execute(
sa.text(
f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-144-external-chat'"
)
)
def downgrade() -> None:
conn = op.get_bind()
exists = conn.execute(
sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
{"name": PUBLICATION_NAME},
).fetchone()
if exists:
documents_has_zero_ver = _has_zero_version(conn, "documents")
user_has_zero_ver = _has_zero_version(conn, "user")
# Restore the publication shape from migration 148.
doc_cols = DOCUMENT_COLS + (['"_0_version"'] if documents_has_zero_ver else [])
user_cols = USER_COLS + (['"_0_version"'] if user_has_zero_ver else [])
ddl = (
f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE "
f"notifications, "
f"documents ({_cols(doc_cols)}), "
f"folders, "
f"search_source_connectors, "
f"new_chat_messages, "
f"chat_comments, "
f"chat_session_state, "
f'"user" ({_cols(user_cols)}), '
f"automation_runs ({_cols(AUTOMATION_RUN_COLS)})"
)
tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
with tx:
conn.execute(
sa.text(
f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-144-downgrade'"
)
)
conn.execute(sa.text(ddl))
conn.execute(
sa.text(
f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-144-downgrade'"
)
)
if _column_exists(conn, "new_chat_messages", "source"):
op.execute("ALTER TABLE new_chat_messages REPLICA IDENTITY DEFAULT")
_drop_index_if_exists("uq_new_chat_messages_inbound_platform", "new_chat_messages")
_drop_index_if_exists("ix_new_chat_messages_source", "new_chat_messages")
_drop_column_if_exists("new_chat_messages", "platform_metadata")
_drop_column_if_exists("new_chat_messages", "source")
_drop_index_if_exists(
"ix_new_chat_threads_external_chat_binding_id", "new_chat_threads"
)
_drop_index_if_exists("ix_new_chat_threads_source", "new_chat_threads")
if _constraint_exists(
conn,
"new_chat_threads",
"fk_new_chat_threads_external_chat_external_chat_binding_id",
):
op.drop_constraint(
"fk_new_chat_threads_external_chat_external_chat_binding_id",
"new_chat_threads",
type_="foreignkey",
)
_drop_column_if_exists("new_chat_threads", "external_chat_binding_id")
_drop_column_if_exists("new_chat_threads", "source")
_drop_index_if_exists(
"ix_external_chat_inbound_binding_received_at", "external_chat_inbound_events"
)
_drop_index_if_exists(
"ix_external_chat_inbound_request_id", "external_chat_inbound_events"
)
_drop_index_if_exists(
"ix_external_chat_inbound_status_received_at", "external_chat_inbound_events"
)
if _table_exists(conn, "external_chat_inbound_events"):
op.drop_table("external_chat_inbound_events")
_drop_index_if_exists(
"ix_external_chat_bindings_search_space_state",
"external_chat_bindings",
)
_drop_index_if_exists(
"ix_external_chat_bindings_user_state", "external_chat_bindings"
)
_drop_index_if_exists(
"uq_external_chat_bindings_pairing_code_pending",
"external_chat_bindings",
)
_drop_index_if_exists(
"uq_external_chat_bindings_account_peer_active",
"external_chat_bindings",
)
if _table_exists(conn, "external_chat_bindings"):
op.drop_table("external_chat_bindings")
_drop_index_if_exists(
"uq_external_chat_accounts_system_platform", "external_chat_accounts"
)
_drop_index_if_exists(
"uq_external_chat_accounts_owner_platform", "external_chat_accounts"
)
_drop_index_if_exists(
"uq_external_chat_accounts_webhook_secret", "external_chat_accounts"
)
if _table_exists(conn, "external_chat_accounts"):
op.drop_table("external_chat_accounts")
for enum_name in (
"external_chat_event_status",
"external_chat_event_kind",
"external_chat_peer_kind",
"external_chat_binding_state",
"external_chat_health_status",
"external_chat_account_mode",
"external_chat_platform",
):
postgresql.ENUM(name=enum_name).drop(conn, checkfirst=True)

View file

@ -0,0 +1,102 @@
"""add slack gateway platform
Revision ID: 150
Revises: 149
Create Date: 2026-05-31
"""
from __future__ import annotations
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
revision: str = "150"
down_revision: str | None = "149"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def _enum_value_exists(enum_name: str, value: str) -> bool:
conn = op.get_bind()
return (
conn.execute(
sa.text(
"SELECT 1 FROM pg_enum e "
"JOIN pg_type t ON t.oid = e.enumtypid "
"WHERE t.typname = :enum_name AND e.enumlabel = :value"
),
{"enum_name": enum_name, "value": value},
).fetchone()
is not None
)
def _index_exists(index_name: str) -> bool:
conn = op.get_bind()
return (
conn.execute(
sa.text(
"SELECT 1 FROM pg_indexes "
"WHERE schemaname = current_schema() AND indexname = :index_name"
),
{"index_name": index_name},
).fetchone()
is not None
)
def upgrade() -> None:
if not _enum_value_exists("external_chat_platform", "slack"):
op.execute("ALTER TYPE external_chat_platform ADD VALUE 'slack'")
if _index_exists("uq_external_chat_accounts_system_platform"):
op.drop_index(
"uq_external_chat_accounts_system_platform",
table_name="external_chat_accounts",
)
op.create_index(
"uq_external_chat_accounts_system_platform",
"external_chat_accounts",
["platform"],
unique=True,
postgresql_where=sa.text(
"is_system_account = true AND NOT (cursor_state ? 'team_id')"
),
if_not_exists=True,
)
op.create_index(
"uq_external_chat_accounts_slack_team",
"external_chat_accounts",
["platform", sa.text("(cursor_state ->> 'team_id')")],
unique=True,
postgresql_where=sa.text(
"is_system_account = true AND cursor_state ? 'team_id'"
),
if_not_exists=True,
)
def downgrade() -> None:
if _index_exists("uq_external_chat_accounts_slack_team"):
op.drop_index(
"uq_external_chat_accounts_slack_team",
table_name="external_chat_accounts",
)
if _index_exists("uq_external_chat_accounts_system_platform"):
op.drop_index(
"uq_external_chat_accounts_system_platform",
table_name="external_chat_accounts",
)
op.create_index(
"uq_external_chat_accounts_system_platform",
"external_chat_accounts",
["platform"],
unique=True,
postgresql_where=sa.text("is_system_account = true"),
if_not_exists=True,
)
# PostgreSQL enum values are intentionally not removed on downgrade.

View file

@ -0,0 +1,106 @@
"""add discord gateway platform
Revision ID: 151
Revises: 150
Create Date: 2026-06-01
"""
from __future__ import annotations
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
revision: str = "151"
down_revision: str | None = "150"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def _enum_value_exists(enum_name: str, value: str) -> bool:
conn = op.get_bind()
return (
conn.execute(
sa.text(
"SELECT 1 FROM pg_enum e "
"JOIN pg_type t ON t.oid = e.enumtypid "
"WHERE t.typname = :enum_name AND e.enumlabel = :value"
),
{"enum_name": enum_name, "value": value},
).fetchone()
is not None
)
def _index_exists(index_name: str) -> bool:
conn = op.get_bind()
return (
conn.execute(
sa.text(
"SELECT 1 FROM pg_indexes "
"WHERE schemaname = current_schema() AND indexname = :index_name"
),
{"index_name": index_name},
).fetchone()
is not None
)
def upgrade() -> None:
if not _enum_value_exists("external_chat_platform", "discord"):
op.execute("ALTER TYPE external_chat_platform ADD VALUE 'discord'")
if _index_exists("uq_external_chat_accounts_system_platform"):
op.drop_index(
"uq_external_chat_accounts_system_platform",
table_name="external_chat_accounts",
)
op.create_index(
"uq_external_chat_accounts_system_platform",
"external_chat_accounts",
["platform"],
unique=True,
postgresql_where=sa.text(
"is_system_account = true "
"AND NOT (cursor_state ? 'team_id') "
"AND NOT (cursor_state ? 'guild_id')"
),
if_not_exists=True,
)
op.create_index(
"uq_external_chat_accounts_discord_guild",
"external_chat_accounts",
["platform", sa.text("(cursor_state ->> 'guild_id')")],
unique=True,
postgresql_where=sa.text(
"is_system_account = true AND cursor_state ? 'guild_id'"
),
if_not_exists=True,
)
def downgrade() -> None:
if _index_exists("uq_external_chat_accounts_discord_guild"):
op.drop_index(
"uq_external_chat_accounts_discord_guild",
table_name="external_chat_accounts",
)
if _index_exists("uq_external_chat_accounts_system_platform"):
op.drop_index(
"uq_external_chat_accounts_system_platform",
table_name="external_chat_accounts",
)
op.create_index(
"uq_external_chat_accounts_system_platform",
"external_chat_accounts",
["platform"],
unique=True,
postgresql_where=sa.text(
"is_system_account = true AND NOT (cursor_state ? 'team_id')"
),
if_not_exists=True,
)
# PostgreSQL enum values are intentionally not removed on downgrade.

View file

@ -0,0 +1,85 @@
"""add document_files table for stored original uploads
Revision ID: 152
Revises: 151
"""
from collections.abc import Sequence
from alembic import op
revision: str = "152"
down_revision: str | None = "151"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
# The enum type must precede the table that references it.
op.execute(
"""
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_type WHERE typname = 'document_file_kind'
) THEN
CREATE TYPE document_file_kind AS ENUM (
'ORIGINAL', 'REDACTED', 'FILLED_FORM'
);
END IF;
END
$$;
"""
)
op.execute(
"""
CREATE TABLE IF NOT EXISTS document_files (
id SERIAL PRIMARY KEY,
document_id INTEGER NOT NULL
REFERENCES documents(id) ON DELETE CASCADE,
search_space_id INTEGER NOT NULL
REFERENCES searchspaces(id) ON DELETE CASCADE,
kind document_file_kind NOT NULL DEFAULT 'ORIGINAL',
storage_backend VARCHAR(32) NOT NULL,
storage_key TEXT NOT NULL,
original_filename TEXT NOT NULL,
mime_type TEXT,
size_bytes BIGINT NOT NULL,
checksum_sha256 VARCHAR(64),
created_by_id UUID
REFERENCES "user"(id) ON DELETE SET NULL,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
);
"""
)
op.execute(
"CREATE INDEX IF NOT EXISTS ix_document_files_document_id "
"ON document_files(document_id);"
)
op.execute(
"CREATE INDEX IF NOT EXISTS ix_document_files_search_space_id "
"ON document_files(search_space_id);"
)
op.execute(
"CREATE INDEX IF NOT EXISTS ix_document_files_kind ON document_files(kind);"
)
op.execute(
"CREATE INDEX IF NOT EXISTS ix_document_files_created_by_id "
"ON document_files(created_by_id);"
)
op.execute(
"CREATE INDEX IF NOT EXISTS ix_document_files_created_at "
"ON document_files(created_at);"
)
def downgrade() -> None:
op.execute("DROP INDEX IF EXISTS ix_document_files_created_at;")
op.execute("DROP INDEX IF EXISTS ix_document_files_created_by_id;")
op.execute("DROP INDEX IF EXISTS ix_document_files_kind;")
op.execute("DROP INDEX IF EXISTS ix_document_files_search_space_id;")
op.execute("DROP INDEX IF EXISTS ix_document_files_document_id;")
op.execute("DROP TABLE IF EXISTS document_files;")
op.execute("DROP TYPE IF EXISTS document_file_kind;")

View file

@ -0,0 +1,121 @@
"""restore automation_runs to zero_publication
Migration 149's ``SET TABLE`` dropped ``automation_runs`` (added in 148),
breaking the dashboard live run ticker with a SchemaVersionNotSupported
reload loop. Re-emit the publication with ``automation_runs`` using the
``COMMENT`` bookend pattern so zero-cache fires its schema-change hook.
Revision ID: 153
Revises: 152
"""
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
revision: str = "153"
down_revision: str | None = "152"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
PUBLICATION_NAME = "zero_publication"
DOCUMENT_COLS = [
"id",
"title",
"document_type",
"search_space_id",
"folder_id",
"created_by_id",
"status",
"created_at",
"updated_at",
]
USER_COLS = [
"id",
"pages_limit",
"pages_used",
"premium_credit_micros_limit",
"premium_credit_micros_used",
]
AUTOMATION_RUN_COLS = [
"id",
"automation_id",
"trigger_id",
"status",
"step_results",
"started_at",
"finished_at",
"created_at",
]
def _has_zero_version(conn, table: str) -> bool:
return (
conn.execute(
sa.text(
"SELECT 1 FROM information_schema.columns "
"WHERE table_name = :tbl AND column_name = '_0_version'"
),
{"tbl": table},
).fetchone()
is not None
)
def _set_table_ddl(*, with_automation_runs: bool, conn) -> str:
doc_cols = DOCUMENT_COLS + (
['"_0_version"'] if _has_zero_version(conn, "documents") else []
)
user_cols = USER_COLS + (
['"_0_version"'] if _has_zero_version(conn, "user") else []
)
tables = [
"notifications",
f"documents ({', '.join(doc_cols)})",
"folders",
"search_source_connectors",
"new_chat_messages",
"chat_comments",
"chat_session_state",
f'"user" ({", ".join(user_cols)})',
]
if with_automation_runs:
tables.append(f"automation_runs ({', '.join(AUTOMATION_RUN_COLS)})")
return f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE " + ", ".join(tables)
def _resync(*, with_automation_runs: bool, tag: str) -> None:
conn = op.get_bind()
exists = conn.execute(
sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
{"name": PUBLICATION_NAME},
).fetchone()
if not exists:
return
tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
with tx:
conn.execute(
sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-{tag}'")
)
conn.execute(
sa.text(
_set_table_ddl(with_automation_runs=with_automation_runs, conn=conn)
)
)
conn.execute(
sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-{tag}'")
)
def upgrade() -> None:
_resync(with_automation_runs=True, tag="153-resync")
def downgrade() -> None:
_resync(with_automation_runs=False, tag="153-downgrade")

View file

@ -0,0 +1,147 @@
"""remove document summary llm settings
Revision ID: 154
Revises: 153
"""
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
revision: str = "154"
down_revision: str | None = "153"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
PUBLICATION_NAME = "zero_publication"
DOCUMENT_COLS = [
"id",
"title",
"document_type",
"search_space_id",
"folder_id",
"created_by_id",
"status",
"created_at",
"updated_at",
]
USER_COLS = [
"id",
"pages_limit",
"pages_used",
"premium_credit_micros_limit",
"premium_credit_micros_used",
]
AUTOMATION_RUN_COLS = [
"id",
"automation_id",
"trigger_id",
"status",
"step_results",
"started_at",
"finished_at",
"created_at",
]
def _column_exists(conn, table: str, column: str) -> bool:
return (
conn.execute(
sa.text(
"SELECT 1 FROM information_schema.columns "
"WHERE table_name = :table AND column_name = :column"
),
{"table": table, "column": column},
).fetchone()
is not None
)
def _has_zero_version(conn, table: str) -> bool:
return _column_exists(conn, table, "_0_version")
def _set_table_ddl(conn) -> str:
doc_cols = DOCUMENT_COLS + (
['"_0_version"'] if _has_zero_version(conn, "documents") else []
)
user_cols = USER_COLS + (
['"_0_version"'] if _has_zero_version(conn, "user") else []
)
tables = [
"notifications",
f"documents ({', '.join(doc_cols)})",
"folders",
"search_source_connectors",
"new_chat_messages",
"chat_comments",
"chat_session_state",
f'"user" ({", ".join(user_cols)})',
f"automation_runs ({', '.join(AUTOMATION_RUN_COLS)})",
]
return f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE " + ", ".join(tables)
def _resync_zero_publication(tag: str) -> None:
conn = op.get_bind()
exists = conn.execute(
sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
{"name": PUBLICATION_NAME},
).fetchone()
if not exists:
return
tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
with tx:
conn.execute(
sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-{tag}'")
)
conn.execute(sa.text(_set_table_ddl(conn)))
conn.execute(
sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-{tag}'")
)
def upgrade() -> None:
conn = op.get_bind()
if _column_exists(conn, "searchspaces", "document_summary_llm_id"):
op.drop_column("searchspaces", "document_summary_llm_id")
if _column_exists(conn, "search_source_connectors", "enable_summary"):
op.drop_column("search_source_connectors", "enable_summary")
_resync_zero_publication("154-summary-removal")
def downgrade() -> None:
conn = op.get_bind()
if not _column_exists(conn, "searchspaces", "document_summary_llm_id"):
op.add_column(
"searchspaces",
sa.Column(
"document_summary_llm_id",
sa.Integer(),
nullable=True,
server_default="0",
),
)
if not _column_exists(conn, "search_source_connectors", "enable_summary"):
op.add_column(
"search_source_connectors",
sa.Column(
"enable_summary",
sa.Boolean(),
nullable=False,
server_default=sa.text("false"),
),
)
_resync_zero_publication("154-summary-removal-downgrade")

View file

@ -0,0 +1,23 @@
"""reconcile zero_publication from canonical definition
Revision ID: 155
Revises: 154
"""
from collections.abc import Sequence
from alembic import op
from app.zero_publication import apply_publication
revision: str = "155"
down_revision: str | None = "154"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
apply_publication(op.get_bind())
def downgrade() -> None:
"""No-op. Historical publication shapes are immutable."""

View file

@ -1,557 +0,0 @@
"""Vision autocomplete agent with scoped filesystem exploration.
Converts the stateless single-shot vision autocomplete into an agent that
seeds a virtual filesystem from KB search results and lets the vision LLM
explore documents via ``ls``, ``read_file``, ``glob``, ``grep``, etc.
before generating the final completion.
Performance: KB search and agent graph compilation run in parallel so
the only sequential latency is KB-search (or agent compile, whichever is
slower) + the agent's LLM turns. There is no separate "query extraction"
LLM call the window title is used directly as the KB search query.
"""
from __future__ import annotations
import asyncio
import json
import logging
import re
import uuid
from collections.abc import AsyncGenerator
from typing import Any
from deepagents.graph import BASE_AGENT_PROMPT
from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware
from langchain.agents import create_agent
from langchain_anthropic.middleware import AnthropicPromptCachingMiddleware
from langchain_core.language_models import BaseChatModel
from langchain_core.messages import AIMessage, ToolMessage
from app.agents.new_chat.document_xml import build_document_xml
from app.agents.new_chat.middleware.filesystem import SurfSenseFilesystemMiddleware
from app.agents.new_chat.middleware.knowledge_search import (
search_knowledge_base,
)
from app.agents.new_chat.path_resolver import (
DOCUMENTS_ROOT,
build_path_index,
doc_to_virtual_path,
)
from app.db import shielded_async_session
from app.services.new_streaming_service import VercelStreamingService
try:
from deepagents.backends.utils import create_file_data
except Exception: # pragma: no cover - defensive
def create_file_data(content: str) -> dict[str, Any]:
return {"content": content.split("\n")}
async def _build_autocomplete_filesystem(
*,
documents: Any,
search_space_id: int,
) -> tuple[dict[str, Any], dict[int, str]]:
"""Build a ``state['files']``-shaped dict from KB search results.
This is the autocomplete-specific replacement for the previous
``build_scoped_filesystem`` helper. It uses the canonical path resolver
so paths line up with the rest of the system, including collision
suffixes for duplicate titles.
"""
files: dict[str, Any] = {}
doc_id_to_path: dict[int, str] = {}
if not documents:
return files, doc_id_to_path
async with shielded_async_session() as session:
index = await build_path_index(session, search_space_id)
for document in documents:
if not isinstance(document, dict):
continue
meta = document.get("document") or {}
doc_id = meta.get("id")
if not isinstance(doc_id, int):
continue
title = str(meta.get("title") or "untitled")
folder_id = meta.get("folder_id")
path = doc_to_virtual_path(
doc_id=doc_id, title=title, folder_id=folder_id, index=index
)
chunk_ids = document.get("matched_chunk_ids") or []
try:
matched_set = {int(c) for c in chunk_ids}
except (TypeError, ValueError):
matched_set = set()
xml = build_document_xml(document, matched_chunk_ids=matched_set)
files[path] = create_file_data(xml)
doc_id_to_path[doc_id] = path
if not files:
# Ensure the synthetic /documents folder is visible even when empty.
files.setdefault(f"{DOCUMENTS_ROOT}/.placeholder", create_file_data(""))
return files, doc_id_to_path
logger = logging.getLogger(__name__)
KB_TOP_K = 10
# ---------------------------------------------------------------------------
# System prompt
# ---------------------------------------------------------------------------
AUTOCOMPLETE_SYSTEM_PROMPT = """You are a smart writing assistant that analyzes the user's screen to draft or complete text.
You will receive a screenshot of the user's screen. Your PRIMARY source of truth is the screenshot itself — the visual context determines what to write.
Your job:
1. Analyze the ENTIRE screenshot to understand what the user is working on (email thread, chat conversation, document, code editor, form, etc.).
2. Identify the text area where the user will type.
3. Generate the text the user most likely wants to write based on the visual context.
You also have access to the user's knowledge base documents via filesystem tools. However:
- ONLY consult the knowledge base if the screenshot clearly involves a topic where your KB documents are DIRECTLY relevant (e.g., the user is writing about a specific project/topic that matches a document title).
- Do NOT explore documents just because they exist. Most autocomplete requests can be answered purely from the screenshot.
- If you do read a document, only incorporate information that is 100% relevant to what the user is typing RIGHT NOW. Do not add extra details, background, or tangential information from the KB.
- Keep your output SHORT autocomplete should feel like a natural continuation, not an essay.
Key behavior:
- If the text area is EMPTY, draft a concise response or message based on what you see on screen (e.g., reply to an email, respond to a chat message, continue a document).
- If the text area already has text, continue it naturally typically just a sentence or two.
Rules:
- Be CONCISE. Prefer a single paragraph or a few sentences. Autocomplete is a quick assist, not a full draft.
- Match the tone and formality of the surrounding context.
- If the screen shows code, write code. If it shows a casual chat, be casual. If it shows a formal email, be formal.
- Do NOT describe the screenshot or explain your reasoning.
- Do NOT cite or reference documents explicitly just let the knowledge inform your writing naturally.
- If you cannot determine what to write, output an empty JSON array: []
## Output Format
You MUST provide exactly 3 different suggestion options. Each should be a distinct, plausible completion vary the tone, detail level, or angle.
Return your suggestions as a JSON array of exactly 3 strings. Output ONLY the JSON array, nothing else no markdown fences, no explanation, no commentary.
Example format:
["First suggestion text here.", "Second suggestion — a different take.", "Third option with another approach."]
## Filesystem Tools `ls`, `read_file`, `write_file`, `edit_file`, `glob`, `grep`
All file paths must start with a `/`.
- ls: list files and directories at a given path.
- read_file: read a file from the filesystem.
- write_file: create a temporary file in the session (not persisted).
- edit_file: edit a file in the session (not persisted for /documents/ files).
- glob: find files matching a pattern (e.g., "**/*.xml").
- grep: search for text within files.
## When to Use Filesystem Tools
BEFORE reaching for any tool, ask yourself: "Can I write a good completion purely from the screenshot?" If yes, just write it do NOT explore the KB.
Only use tools when:
- The user is clearly writing about a specific topic that likely has detailed information in their KB.
- You need a specific fact, name, number, or reference that the screenshot doesn't provide.
When you do use tools, be surgical:
- Check the `ls` output first. If no document title looks relevant, stop do not read files just to see what's there.
- If a title looks relevant, read only the `<chunk_index>` (first ~20 lines) and jump to matched chunks. Do not read entire documents.
- Extract only the specific information you need and move on to generating the completion.
## Reading Documents Efficiently
Documents are formatted as XML. Each document contains:
- `<document_metadata>` title, type, URL, etc.
- `<chunk_index>` a table of every chunk with its **line range** and a
`matched="true"` flag for chunks that matched the search query.
- `<document_content>` the actual chunks in original document order.
**Workflow**: read the first ~20 lines to see the `<chunk_index>`, identify
chunks marked `matched="true"`, then use `read_file(path, offset=<start_line>,
limit=<lines>)` to jump directly to those sections."""
APP_CONTEXT_BLOCK = """
The user is currently working in "{app_name}" (window: "{window_title}"). Use this to understand the type of application and adapt your tone and format accordingly."""
def _build_autocomplete_system_prompt(app_name: str, window_title: str) -> str:
prompt = AUTOCOMPLETE_SYSTEM_PROMPT
if app_name:
prompt += APP_CONTEXT_BLOCK.format(app_name=app_name, window_title=window_title)
return prompt
# ---------------------------------------------------------------------------
# Pre-compute KB filesystem (runs in parallel with agent compilation)
# ---------------------------------------------------------------------------
class _KBResult:
"""Container for pre-computed KB filesystem results."""
__slots__ = ("files", "ls_ai_msg", "ls_tool_msg")
def __init__(
self,
files: dict[str, Any] | None = None,
ls_ai_msg: AIMessage | None = None,
ls_tool_msg: ToolMessage | None = None,
) -> None:
self.files = files
self.ls_ai_msg = ls_ai_msg
self.ls_tool_msg = ls_tool_msg
@property
def has_documents(self) -> bool:
return bool(self.files)
async def precompute_kb_filesystem(
search_space_id: int,
query: str,
top_k: int = KB_TOP_K,
) -> _KBResult:
"""Search the KB and build the scoped filesystem outside the agent.
This is designed to be called via ``asyncio.gather`` alongside agent
graph compilation so the two run concurrently.
"""
if not query:
return _KBResult()
try:
search_results = await search_knowledge_base(
query=query,
search_space_id=search_space_id,
top_k=top_k,
)
if not search_results:
return _KBResult()
new_files, _ = await _build_autocomplete_filesystem(
documents=search_results,
search_space_id=search_space_id,
)
if not new_files:
return _KBResult()
doc_paths = [
p
for p, v in new_files.items()
if p.startswith("/documents/") and v is not None
]
tool_call_id = f"auto_ls_{uuid.uuid4().hex[:12]}"
ai_msg = AIMessage(
content="",
tool_calls=[
{"name": "ls", "args": {"path": "/documents"}, "id": tool_call_id}
],
)
tool_msg = ToolMessage(
content=str(doc_paths) if doc_paths else "No documents found.",
tool_call_id=tool_call_id,
)
return _KBResult(files=new_files, ls_ai_msg=ai_msg, ls_tool_msg=tool_msg)
except Exception:
logger.warning(
"KB pre-computation failed, proceeding without KB", exc_info=True
)
return _KBResult()
# ---------------------------------------------------------------------------
# Filesystem middleware — no save_document, no persistence
# ---------------------------------------------------------------------------
class AutocompleteFilesystemMiddleware(SurfSenseFilesystemMiddleware):
"""Filesystem middleware for autocomplete — read-only exploration only.
Passes ``search_space_id=None`` so the new persistence pipeline is
bypassed; the autocomplete flow only reads, never commits to Postgres.
"""
def __init__(self) -> None:
super().__init__(search_space_id=None, created_by_id=None)
# ---------------------------------------------------------------------------
# Agent factory
# ---------------------------------------------------------------------------
async def _compile_agent(
llm: BaseChatModel,
app_name: str,
window_title: str,
) -> Any:
"""Compile the agent graph (CPU-bound, runs in a thread)."""
system_prompt = _build_autocomplete_system_prompt(app_name, window_title)
final_system_prompt = system_prompt + "\n\n" + BASE_AGENT_PROMPT
middleware = [
AutocompleteFilesystemMiddleware(),
PatchToolCallsMiddleware(),
AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
]
agent = await asyncio.to_thread(
create_agent,
llm,
system_prompt=final_system_prompt,
tools=[],
middleware=middleware,
)
return agent.with_config({"recursion_limit": 200})
async def create_autocomplete_agent(
llm: BaseChatModel,
*,
search_space_id: int,
kb_query: str,
app_name: str = "",
window_title: str = "",
) -> tuple[Any, _KBResult]:
"""Create the autocomplete agent and pre-compute KB in parallel.
Returns ``(agent, kb_result)`` so the caller can inject the pre-computed
filesystem into the agent's initial state without any middleware delay.
"""
agent, kb = await asyncio.gather(
_compile_agent(llm, app_name, window_title),
precompute_kb_filesystem(search_space_id, kb_query),
)
return agent, kb
# ---------------------------------------------------------------------------
# JSON suggestion parsing (with fallback)
# ---------------------------------------------------------------------------
def _parse_suggestions(raw: str) -> list[str]:
"""Extract a list of suggestion strings from the agent's output.
Tries, in order:
1. Direct ``json.loads``
2. Extract content between ```json ... ``` fences
3. Find the first ``[`` ``]`` span
Falls back to wrapping the raw text as a single suggestion.
"""
text = raw.strip()
if not text:
return []
for candidate in _json_candidates(text):
try:
parsed = json.loads(candidate)
if isinstance(parsed, list) and all(isinstance(s, str) for s in parsed):
return [s for s in parsed if s.strip()]
except (json.JSONDecodeError, ValueError):
continue
return [text]
def _json_candidates(text: str) -> list[str]:
"""Yield candidate JSON strings from raw text."""
candidates = [text]
fence = re.search(r"```(?:json)?\s*\n?(.*?)```", text, re.DOTALL)
if fence:
candidates.append(fence.group(1).strip())
bracket = re.search(r"\[.*]", text, re.DOTALL)
if bracket:
candidates.append(bracket.group(0))
return candidates
# ---------------------------------------------------------------------------
# Streaming helper
# ---------------------------------------------------------------------------
async def stream_autocomplete_agent(
agent: Any,
input_data: dict[str, Any],
streaming_service: VercelStreamingService,
*,
emit_message_start: bool = True,
) -> AsyncGenerator[str, None]:
"""Stream agent events as Vercel SSE, with thinking steps for tool calls.
When ``emit_message_start`` is False the caller has already sent the
``message_start`` event (e.g. to show preparation steps before the agent
runs).
"""
thread_id = uuid.uuid4().hex
config = {"configurable": {"thread_id": thread_id}}
text_buffer: list[str] = []
active_tool_depth = 0
thinking_step_counter = 0
tool_step_ids: dict[str, str] = {}
step_titles: dict[str, str] = {}
completed_step_ids: set[str] = set()
last_active_step_id: str | None = None
def next_thinking_step_id() -> str:
nonlocal thinking_step_counter
thinking_step_counter += 1
return f"autocomplete-step-{thinking_step_counter}"
def complete_current_step() -> str | None:
nonlocal last_active_step_id
if last_active_step_id and last_active_step_id not in completed_step_ids:
completed_step_ids.add(last_active_step_id)
title = step_titles.get(last_active_step_id, "Done")
event = streaming_service.format_thinking_step(
step_id=last_active_step_id,
title=title,
status="complete",
)
last_active_step_id = None
return event
return None
if emit_message_start:
yield streaming_service.format_message_start()
gen_step_id = next_thinking_step_id()
last_active_step_id = gen_step_id
step_titles[gen_step_id] = "Generating suggestions"
yield streaming_service.format_thinking_step(
step_id=gen_step_id,
title="Generating suggestions",
status="in_progress",
)
try:
async for event in agent.astream_events(
input_data, config=config, version="v2"
):
event_type = event.get("event", "")
if event_type == "on_chat_model_stream":
if active_tool_depth > 0:
continue
if "surfsense:internal" in event.get("tags", []):
continue
chunk = event.get("data", {}).get("chunk")
if chunk and hasattr(chunk, "content"):
content = chunk.content
if content and isinstance(content, str):
text_buffer.append(content)
elif event_type == "on_chat_model_end":
if active_tool_depth > 0:
continue
if "surfsense:internal" in event.get("tags", []):
continue
output = event.get("data", {}).get("output")
if output and hasattr(output, "content"):
if getattr(output, "tool_calls", None):
continue
content = output.content
if content and isinstance(content, str) and not text_buffer:
text_buffer.append(content)
elif event_type == "on_tool_start":
active_tool_depth += 1
tool_name = event.get("name", "unknown_tool")
run_id = event.get("run_id", "")
tool_input = event.get("data", {}).get("input", {})
step_event = complete_current_step()
if step_event:
yield step_event
tool_step_id = next_thinking_step_id()
tool_step_ids[run_id] = tool_step_id
last_active_step_id = tool_step_id
title, items = _describe_tool_call(tool_name, tool_input)
step_titles[tool_step_id] = title
yield streaming_service.format_thinking_step(
step_id=tool_step_id,
title=title,
status="in_progress",
items=items,
)
elif event_type == "on_tool_end":
active_tool_depth = max(0, active_tool_depth - 1)
run_id = event.get("run_id", "")
step_id = tool_step_ids.pop(run_id, None)
if step_id and step_id not in completed_step_ids:
completed_step_ids.add(step_id)
title = step_titles.get(step_id, "Done")
yield streaming_service.format_thinking_step(
step_id=step_id,
title=title,
status="complete",
)
if last_active_step_id == step_id:
last_active_step_id = None
step_event = complete_current_step()
if step_event:
yield step_event
raw_text = "".join(text_buffer)
suggestions = _parse_suggestions(raw_text)
yield streaming_service.format_data("suggestions", {"options": suggestions})
yield streaming_service.format_finish()
yield streaming_service.format_done()
except Exception as e:
logger.error(f"Autocomplete agent streaming error: {e}", exc_info=True)
yield streaming_service.format_error("Autocomplete failed. Please try again.")
yield streaming_service.format_done()
def _describe_tool_call(tool_name: str, tool_input: Any) -> tuple[str, list[str]]:
"""Return a human-readable (title, items) for a tool call thinking step."""
inp = tool_input if isinstance(tool_input, dict) else {}
if tool_name == "ls":
path = inp.get("path", "/")
return "Listing files", [path]
if tool_name == "read_file":
fp = inp.get("file_path", "")
display = fp if len(fp) <= 80 else "" + fp[-77:]
return "Reading file", [display]
if tool_name == "write_file":
fp = inp.get("file_path", "")
display = fp if len(fp) <= 80 else "" + fp[-77:]
return "Writing file", [display]
if tool_name == "edit_file":
fp = inp.get("file_path", "")
display = fp if len(fp) <= 80 else "" + fp[-77:]
return "Editing file", [display]
if tool_name == "glob":
pat = inp.get("pattern", "")
base = inp.get("path", "/")
return "Searching files", [f"{pat} in {base}"]
if tool_name == "grep":
pat = inp.get("pattern", "")
path = inp.get("path", "")
display_pat = pat[:60] + ("" if len(pat) > 60 else "")
return "Searching content", [
f'"{display_pat}"' + (f" in {path}" if path else "")
]
return f"Using {tool_name}", []

View file

@ -0,0 +1,5 @@
"""Chat agents category.
Groups the conversational agents that share a kernel: ``anonymous_chat`` and
``multi_agent_chat``. Code shared by *both* lives in ``chat/shared/``.
"""

View file

@ -0,0 +1,14 @@
"""Anonymous / free-chat agent.
The no-login chat experience: a deliberately minimal agent that bypasses the
full SurfSense deep-agent stack (filesystem, knowledge-base persistence,
subagents, skills, memory) and answers with an optional ``web_search`` tool and
an optional read-only uploaded document. See :mod:`.agent` for details.
"""
from app.agents.chat.anonymous_chat.agent import (
build_anonymous_system_prompt,
create_anonymous_chat_agent,
)
__all__ = ["build_anonymous_system_prompt", "create_anonymous_chat_agent"]

View file

@ -27,12 +27,12 @@ from langchain.agents.middleware import (
from langchain_core.language_models import BaseChatModel
from langgraph.types import Checkpointer
from app.agents.new_chat.context import SurfSenseContextSchema
from app.agents.new_chat.middleware import (
from app.agents.chat.shared.context import SurfSenseContextSchema
from app.agents.chat.shared.middleware import (
RetryAfterMiddleware,
create_surfsense_compaction_middleware,
)
from app.agents.new_chat.tools.web_search import create_web_search_tool
from app.agents.chat.shared.tools.web_search import create_web_search_tool
# Cap how much of an uploaded document we inline into the system prompt. The
# upload endpoint allows files up to several MB, but the doc is re-sent on

View file

@ -2,6 +2,7 @@
from __future__ import annotations
import time
from collections.abc import Sequence
from typing import Any
@ -11,13 +12,16 @@ from langchain_core.language_models import BaseChatModel
from langchain_core.tools import BaseTool
from langgraph.types import Checkpointer
from app.agents.multi_agent_chat.middleware.stack import (
from app.agents.chat.multi_agent_chat.main_agent.middleware.stack import (
build_main_agent_deepagent_middleware,
)
from app.agents.new_chat.context import SurfSenseContextSchema
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.filesystem_selection import FilesystemMode
from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
from app.agents.chat.shared.context import SurfSenseContextSchema
from app.db import ChatVisibility
from app.utils.perf import get_perf_logger
_perf_log = get_perf_logger()
def build_compiled_agent_graph_sync(
@ -43,6 +47,7 @@ def build_compiled_agent_graph_sync(
disabled_tools: list[str] | None = None,
):
"""Sync compile: middleware + ``create_agent`` (run via ``asyncio.to_thread``)."""
mw_start = time.perf_counter()
main_agent_middleware = build_main_agent_deepagent_middleware(
llm=llm,
tools=tools,
@ -63,7 +68,9 @@ def build_compiled_agent_graph_sync(
mcp_tools_by_agent=mcp_tools_by_agent,
disabled_tools=disabled_tools,
)
mw_elapsed = time.perf_counter() - mw_start
create_start = time.perf_counter()
agent = create_agent(
llm,
system_prompt=final_system_prompt,
@ -72,6 +79,15 @@ def build_compiled_agent_graph_sync(
context_schema=SurfSenseContextSchema,
checkpointer=checkpointer,
)
create_elapsed = time.perf_counter() - create_start
_perf_log.info(
"[graph_compile] middleware_build=%.3fs main_create_agent=%.3fs "
"total=%.3fs mw_count=%d",
mw_elapsed,
create_elapsed,
mw_elapsed + create_elapsed,
len(main_agent_middleware),
)
return agent.with_config(
{
"recursion_limit": 10_000,

View file

@ -0,0 +1,10 @@
"""Action-log middleware: audit row per tool call (impl + builder)."""
from .builder import build_action_log_mw
from .middleware import ActionLogMiddleware, ToolDefinition
__all__ = [
"ActionLogMiddleware",
"ToolDefinition",
"build_action_log_mw",
]

View file

@ -4,11 +4,10 @@ from __future__ import annotations
import logging
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.middleware import ActionLogMiddleware
from app.agents.new_chat.tools.registry import BUILTIN_TOOLS
from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
from ..shared.flags import enabled
from .middleware import ActionLogMiddleware
def build_action_log_mw(
@ -21,12 +20,13 @@ def build_action_log_mw(
if not enabled(flags, "enable_action_log") or thread_id is None:
return None
try:
tool_defs_by_name = {td.name: td for td in BUILTIN_TOOLS}
# No built-in tool declares a ``reverse`` callable yet, so the action
# log runs without a tool_definitions map. Reversibility is opt-in per
# tool via ``ToolDefinition.reverse`` and can be wired here when used.
return ActionLogMiddleware(
thread_id=thread_id,
search_space_id=search_space_id,
user_id=user_id,
tool_definitions=tool_defs_by_name,
)
except Exception: # pragma: no cover - defensive
logging.warning(

View file

@ -1,25 +1,15 @@
"""Append-only action-log middleware for the SurfSense agent.
Wraps every tool call via :meth:`AgentMiddleware.awrap_tool_call` and writes
a row to :class:`~app.db.AgentActionLog` after the tool returns. Tools opt
into reversibility by declaring a ``reverse`` callable on their
:class:`~app.agents.new_chat.tools.registry.ToolDefinition`; the rendered
descriptor is persisted in ``reverse_descriptor`` for use by
Wraps every tool call and writes a row to :class:`~app.db.AgentActionLog`
after the tool returns. Tools opt into reversibility via a ``reverse``
callable on their :class:`ToolDefinition`; the rendered descriptor powers
``/api/threads/{thread_id}/revert/{action_id}``.
Design points:
* **Defensive.** Logging never blocks the agent. We catch every exception
on the DB write path and emit a warning; the tool's ``ToolMessage``
result is always returned untouched.
* **Lightweight payload.** Only the tool ``name`` + ``args`` (capped) +
``result_id`` + ``reverse_descriptor`` are stored. Tool output text
remains in the LangGraph checkpoint / spilled tool-output files.
* **Best-effort reversibility.** We invoke ``reverse(args, result_obj)``
with the parsed JSON result when the tool's content is a JSON object;
otherwise the raw text is passed. Exceptions in the reverse callable
are swallowed and logged a failed descriptor render simply means the
action is NOT marked reversible.
Logging is fully defensive DB-write failures are swallowed so the tool's
result is always returned untouched. Only metadata (name, capped args,
result_id, reverse_descriptor) is stored; tool output stays in the
checkpoint. Reversibility is best-effort: a reverse callable that raises
just leaves the action non-reversible.
"""
from __future__ import annotations
@ -27,14 +17,14 @@ from __future__ import annotations
import json
import logging
from collections.abc import Awaitable, Callable
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any
from langchain.agents.middleware import AgentMiddleware
from langchain_core.callbacks import adispatch_custom_event
from langchain_core.messages import ToolMessage
from app.agents.new_chat.feature_flags import get_flags
from app.agents.new_chat.tools.registry import ToolDefinition
from app.agents.chat.multi_agent_chat.shared.feature_flags import get_flags
if TYPE_CHECKING: # pragma: no cover - type-only
from langchain.agents.middleware.types import ToolCallRequest
@ -44,6 +34,31 @@ if TYPE_CHECKING: # pragma: no cover - type-only
logger = logging.getLogger(__name__)
@dataclass
class ToolDefinition:
"""Reversibility descriptor consumed by :class:`ActionLogMiddleware`.
Only ``name`` and ``reverse`` are read by the middleware; the remaining
fields let callers and tests describe a tool declaratively. A tool is
marked reversible in the action log when ``reverse`` is set and renders a
descriptor without raising.
Attributes:
name: Unique identifier for the tool.
description: Human-readable description of what the tool does.
factory: Optional callable that builds the tool (unused by the
middleware; retained for declarative call sites/tests).
reverse: Optional callable that, given the tool's ``(args, result)``,
returns a ``ReverseDescriptor`` describing the inverse invocation.
"""
name: str
description: str = ""
factory: Callable[[dict[str, Any]], Any] | None = None
reverse: Callable[[dict[str, Any], Any], dict[str, Any]] | None = None
# Cap for the persisted ``args`` JSON to avoid bloating the action log with
# accidentally-huge inputs. Values are truncated and a flag is set in the
# stored payload so consumers can detect truncation.
@ -93,18 +108,32 @@ class ActionLogMiddleware(AgentMiddleware):
self._user_id = user_id
self._tool_definitions = dict(tool_definitions or {})
def _enabled(self) -> bool:
def _enabled(self, thread_id: int | None) -> bool:
flags = get_flags()
if flags.disable_new_agent_stack:
return False
return bool(flags.enable_action_log) and self._thread_id is not None
return bool(flags.enable_action_log) and thread_id is not None
def _resolve_thread_id(self, request: ToolCallRequest) -> int | None:
"""Resolve the live thread id, preferring the runtime config.
Reading ``configurable.thread_id`` from the active ``RunnableConfig``
(rather than the value captured at ``__init__``) lets a single cached
compiled graph safely serve many threads without it, a cache hit
would attribute action-log rows to whichever thread first built the
graph. Falls back to the constructor value for legacy/test runtimes
that don't surface a config.
"""
resolved = _resolve_thread_id(request)
return resolved if resolved is not None else self._thread_id
async def awrap_tool_call(
self,
request: ToolCallRequest,
handler: Callable[[ToolCallRequest], Awaitable[ToolMessage | Command[Any]]],
) -> ToolMessage | Command[Any]:
if not self._enabled():
thread_id = self._resolve_thread_id(request)
if not self._enabled(thread_id):
return await handler(request)
result: ToolMessage | Command[Any]
@ -119,10 +148,16 @@ class ActionLogMiddleware(AgentMiddleware):
request=request,
result=None,
error_payload=error_payload,
thread_id=thread_id,
)
raise
await self._record(request=request, result=result, error_payload=None)
await self._record(
request=request,
result=result,
error_payload=None,
thread_id=thread_id,
)
return result
async def _record(
@ -131,6 +166,7 @@ class ActionLogMiddleware(AgentMiddleware):
request: ToolCallRequest,
result: ToolMessage | Command[Any] | None,
error_payload: dict[str, Any] | None,
thread_id: int | None,
) -> None:
"""Persist one ``agent_action_log`` row. Defensive: never raises."""
try:
@ -149,7 +185,7 @@ class ActionLogMiddleware(AgentMiddleware):
chat_turn_id = _resolve_chat_turn_id(request)
row = AgentActionLog(
thread_id=self._thread_id,
thread_id=thread_id,
user_id=self._user_id,
search_space_id=self._search_space_id,
# ``turn_id`` is the deprecated alias of ``tool_call_id``
@ -178,11 +214,9 @@ class ActionLogMiddleware(AgentMiddleware):
)
return
# Surface a side-channel SSE event so the chat tool card can
# render a Revert button immediately after the row is durable.
# ``stream_new_chat`` translates this into a
# ``data-action-log`` SSE event. We DO NOT include the
# ``reverse_descriptor`` payload here; only a presence flag.
# Side-channel event (relayed by ``stream_new_chat`` as a
# ``data-action-log`` SSE) so the tool card can show a Revert button
# once the row is durable. Carries a presence flag, not the descriptor.
try:
await adispatch_custom_event(
"action_log",
@ -337,6 +371,36 @@ def _resolve_chat_turn_id(request: Any) -> str | None:
return None
def _resolve_thread_id(request: Any) -> int | None:
"""Return ``configurable.thread_id`` (as int) for this request, if accessible.
Mirrors :func:`_resolve_chat_turn_id`: ``ToolRuntime.config`` is exposed by
LangGraph at ``request.runtime.config``, and the chat thread id lives at
``configurable.thread_id`` (a stringified ``chat_id`` at the main-graph
level). Returns ``None`` when absent or unparseable so the caller can fall
back to the constructor value.
"""
try:
runtime = getattr(request, "runtime", None)
if runtime is None:
return None
config = getattr(runtime, "config", None)
if not isinstance(config, dict):
return None
configurable = config.get("configurable")
if not isinstance(configurable, dict):
return None
value = configurable.get("thread_id")
if value is None:
return None
try:
return int(value)
except (TypeError, ValueError):
return None
except Exception: # pragma: no cover - defensive
return None
def _resolve_message_id(request: Any) -> str | None:
"""Tool-call IDs serve as best-available message correlator at this layer."""
return _resolve_tool_call_id(request)

View file

@ -0,0 +1,9 @@
"""Anonymous-document middleware: Redis hydration, cloud only (impl + builder)."""
from .builder import build_anonymous_doc_mw
from .middleware import AnonymousDocumentMiddleware
__all__ = [
"AnonymousDocumentMiddleware",
"build_anonymous_doc_mw",
]

View file

@ -2,8 +2,9 @@
from __future__ import annotations
from app.agents.new_chat.filesystem_selection import FilesystemMode
from app.agents.new_chat.middleware import AnonymousDocumentMiddleware
from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
from .middleware import AnonymousDocumentMiddleware
def build_anonymous_doc_mw(

View file

@ -24,8 +24,13 @@ from typing import Any
from langchain.agents.middleware import AgentMiddleware, AgentState
from langgraph.runtime import Runtime
from app.agents.new_chat.filesystem_state import SurfSenseFilesystemState
from app.agents.new_chat.path_resolver import DOCUMENTS_ROOT, safe_filename
from app.agents.chat.multi_agent_chat.shared.state.filesystem_state import (
SurfSenseFilesystemState,
)
from app.agents.chat.runtime.path_resolver import (
DOCUMENTS_ROOT,
safe_filename,
)
logger = logging.getLogger(__name__)

View file

@ -0,0 +1,25 @@
"""Per-turn cooperative busy-lock middleware + cancel primitives (main-agent)."""
from .builder import build_busy_mutex_mw
from .middleware import (
BusyMutexMiddleware,
end_turn,
get_cancel_event,
get_cancel_state,
is_cancel_requested,
manager,
request_cancel,
reset_cancel,
)
__all__ = [
"BusyMutexMiddleware",
"build_busy_mutex_mw",
"end_turn",
"get_cancel_event",
"get_cancel_state",
"is_cancel_requested",
"manager",
"request_cancel",
"reset_cancel",
]

View file

@ -2,10 +2,12 @@
from __future__ import annotations
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.middleware import BusyMutexMiddleware
from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
from ..shared.flags import enabled
from .middleware import (
BusyMutexMiddleware,
)
def build_busy_mutex_mw(flags: AgentFeatureFlags) -> BusyMutexMiddleware | None:

View file

@ -1,32 +1,12 @@
"""
BusyMutexMiddleware per-thread asyncio lock + cancel token.
"""Per-thread asyncio lock + cooperative cancel token, keyed by ``thread_id``.
LangChain has no built-in concept of "this thread is already running a
turn refuse the second concurrent request". Without it, a user
double-clicking "send" or refreshing the page mid-stream can spawn two
turns racing on the same checkpoint, producing duplicated tool calls
and mangled state.
Refuses a second concurrent turn on the same thread (e.g. double-clicked
"send") that would otherwise race on the same checkpoint and duplicate tool
calls. Also exposes a per-thread cancel event that long-running tools poll
via ``runtime.context.cancel_event.is_set()`` to abort cooperatively.
Ported from OpenCode's ``Stream.scoped(AbortController)`` pattern: a
single-process, in-memory lock + cooperative cancellation token keyed by
``thread_id``. For multi-worker deployments a distributed lock backend
(Redis or PostgreSQL advisory locks) is a phase-2 follow-up.
What this provides:
- A ``WeakValueDictionary[str, asyncio.Lock]`` keyed by ``thread_id``;
acquiring the lock during ``before_agent`` blocks any concurrent
prompt on the same thread until release.
- A per-thread ``asyncio.Event`` (``cancel_event``) that long-running
tools can poll to abort cooperatively. The event is reset between
turns. Tools should check ``runtime.context.cancel_event.is_set()``
in tight inner loops.
- A typed :class:`~app.agents.new_chat.errors.BusyError` raised when a
second turn arrives while the lock is held.
Note: SurfSense's ``stream_new_chat`` is the call site that should
acquire/release. Wiring this as middleware means the contract is
explicit and the lock manager is shared with subagents that compile
their own ``create_agent`` runnables.
Process-local and in-memory; multi-worker deployments need a distributed lock
(Redis / PostgreSQL advisory locks) as a follow-up.
"""
from __future__ import annotations
@ -46,7 +26,7 @@ from langchain.agents.middleware.types import (
from langgraph.config import get_config
from langgraph.runtime import Runtime
from app.agents.new_chat.errors import BusyError
from app.agents.chat.runtime.errors import BusyError
logger = logging.getLogger(__name__)
@ -152,9 +132,8 @@ class _ThreadLockManager:
return True
# Module-level singleton — process-local but reused across all agent
# instances built in this process. Subagents created in nested
# ``create_agent`` calls also get this so locks are coherent.
# Process-local singleton shared across all agents/subagents built in this
# process so per-thread locks stay coherent.
manager = _ThreadLockManager()
@ -266,7 +245,6 @@ class BusyMutexMiddleware(AgentMiddleware[AgentState[ResponseT], ContextT, Respo
await lock.acquire()
epoch = manager.bump_turn_epoch(thread_id)
self._held_locks[thread_id] = (lock, epoch)
# Reset the cancel event so this turn starts fresh
reset_cancel(thread_id)
return None
@ -289,17 +267,14 @@ class BusyMutexMiddleware(AgentMiddleware[AgentState[ResponseT], ContextT, Respo
return None
if lock.locked():
lock.release()
# Always clear cancel event between turns so a stale signal
# doesn't leak into the next request.
# Clear cancel event so a stale signal doesn't leak into the next turn.
reset_cancel(thread_id)
return None
# Provide sync no-ops because the middleware base class allows them
def before_agent( # type: ignore[override]
self, state: AgentState[Any], runtime: Runtime[ContextT]
) -> dict[str, Any] | None:
# Sync path: no asyncio.Lock to acquire. Best we can do is reject
# if anyone else is in flight.
# Sync path can't await an asyncio.Lock; only reject if one is in flight.
thread_id = self._thread_id(runtime)
if thread_id is None:
if self._require_thread_id:

View file

@ -1,7 +1,9 @@
"""RunnableConfig wiring for nested subagent invocations.
"""HITL resume side-channel for nested subagent invocations.
Forwards the parent's ``runtime.config`` (thread_id, …) into the subagent and
exposes the side-channel ``stream_resume_chat`` uses to ferry resume payloads.
Exposes the configurable side-channel ``stream_resume_chat`` uses to ferry
resume payloads into a mid-flight subagent. The ``RunnableConfig`` builder and
state-key filter shared with subagents live in
``app.agents.chat.multi_agent_chat.subagents.shared.invocation``.
"""
from __future__ import annotations
@ -11,8 +13,6 @@ from typing import Any
from langchain.tools import ToolRuntime
from .constants import DEFAULT_SUBAGENT_RECURSION_LIMIT
logger = logging.getLogger(__name__)
# langgraph stores the parent task's scratchpad under this configurable key;
@ -20,39 +20,6 @@ logger = logging.getLogger(__name__)
_LANGGRAPH_SCRATCHPAD_KEY = "__pregel_scratchpad"
def subagent_invoke_config(runtime: ToolRuntime) -> dict[str, Any]:
"""RunnableConfig for the nested invoke; raises ``recursion_limit`` and isolates ``thread_id``.
Each parallel subagent invocation lands in its own checkpoint slot keyed
by an extended ``thread_id`` of the form ``{parent_thread}::task:{tool_call_id}``.
The same call across the resume cycle keeps reading from the same snapshot
(``tool_call_id`` is stable per LLM-emitted call).
We namespace via ``thread_id`` rather than ``checkpoint_ns`` because
langgraph's ``aget_state`` interprets a non-empty ``checkpoint_ns`` as a
subgraph path and raises ``ValueError("Subgraph X not found")``.
"""
merged: dict[str, Any] = dict(runtime.config) if runtime.config else {}
current_limit = merged.get("recursion_limit")
try:
current_int = int(current_limit) if current_limit is not None else 0
except (TypeError, ValueError):
current_int = 0
if current_int < DEFAULT_SUBAGENT_RECURSION_LIMIT:
merged["recursion_limit"] = DEFAULT_SUBAGENT_RECURSION_LIMIT
configurable: dict[str, Any] = dict(merged.get("configurable") or {})
parent_thread_id = configurable.get("thread_id")
per_call_suffix = f"task:{runtime.tool_call_id}"
configurable["thread_id"] = (
f"{parent_thread_id}::{per_call_suffix}"
if parent_thread_id
else per_call_suffix
)
merged["configurable"] = configurable
return merged
def consume_surfsense_resume(runtime: ToolRuntime) -> Any:
"""Pop the resume payload for *this* call's ``tool_call_id``.

View file

@ -1,24 +1,14 @@
"""Constants shared by the checkpointed subagent middleware."""
"""Tuning constants for the checkpointed subagent middleware.
``EXCLUDED_STATE_KEYS`` and ``DEFAULT_SUBAGENT_RECURSION_LIMIT`` are part of the
subagent-invocation contract shared with subagents and now live in
``app.agents.chat.multi_agent_chat.subagents.shared.invocation``.
"""
from __future__ import annotations
import os
# Mirror of deepagents.middleware.subagents._EXCLUDED_STATE_KEYS.
EXCLUDED_STATE_KEYS = frozenset(
{
"messages",
"todos",
"structured_response",
"skills_metadata",
"memory_contents",
}
)
# Match the parent graph's budget; the LangGraph default of 25 trips on
# multi-step subagent runs.
DEFAULT_SUBAGENT_RECURSION_LIMIT = 10_000
def _read_timeout_env(name: str, default: float) -> float:
"""Parse ``name`` from the environment; fall back to ``default`` on bad values.

View file

@ -0,0 +1,188 @@
"""SubAgent middleware that compiles each subagent against the parent checkpointer."""
from __future__ import annotations
import time
from collections.abc import Callable
from typing import Any, cast
from deepagents.backends.protocol import BackendFactory, BackendProtocol
from deepagents.middleware.subagents import (
TASK_SYSTEM_PROMPT,
CompiledSubAgent,
SubAgent,
SubAgentMiddleware,
)
from langchain.agents import create_agent
from langchain.chat_models import init_chat_model
from langchain_core.runnables import Runnable
from langgraph.types import Checkpointer
from app.agents.chat.multi_agent_chat.subagents.shared.spec import (
SURF_CONTEXT_HINT_PROVIDER_KEY,
SURF_LAZY_SPEC_FACTORY_KEY,
)
from app.utils.perf import get_perf_logger
from .task_tool import build_task_tool_with_parent_config
_perf_log = get_perf_logger()
class SurfSenseCheckpointedSubAgentMiddleware(SubAgentMiddleware):
"""``SubAgentMiddleware`` variant that compiles each subagent against the parent checkpointer."""
def __init__(
self,
*,
checkpointer: Checkpointer,
backend: BackendProtocol | BackendFactory,
subagents: list[SubAgent | CompiledSubAgent],
system_prompt: str | None = TASK_SYSTEM_PROMPT,
task_description: str | None = None,
search_space_id: int | None = None,
) -> None:
self._surf_checkpointer = checkpointer
super(SubAgentMiddleware, self).__init__()
if not subagents:
raise ValueError(
"At least one subagent must be specified when using the new API"
)
self._backend = backend
self._subagents = subagents
# Search-space id is captured at build time (the orchestrator runs in
# exactly one search space for its lifetime). The spawn-paused kill
# switch keys on it so an operator can quarantine one workspace
# without affecting the rest of the deployment.
self._search_space_id = search_space_id
# Lazy subagent compilation. Compiling a subagent graph via
# ``create_agent`` is expensive (~250-400ms each) and there can be up
# to ~17 of them. Doing it all in ``__init__`` put the full cost on
# every cold ``agent_cache`` miss (i.e. on time-to-first-token), even
# though a turn usually invokes zero or one subagent. We instead index
# the raw specs here and compile each graph on first ``task(name)``
# use, memoizing the result for the life of this (cached) instance.
self._compiled: dict[str, Runnable] = {}
self._lazy_specs: dict[str, dict[str, Any]] = {}
# Subagents whose *spec itself* is built lazily (not just compiled).
# Keyed by name → zero-arg factory returning the full spec dict. Used
# for the write knowledge_base subagent, whose filesystem middleware
# builds ~13 tool schemas (~2s) that almost never matter on turn 1.
self._lazy_spec_factories: dict[str, Callable[[], dict[str, Any]]] = {}
descriptors = self._build_subagent_registry()
task_tool = build_task_tool_with_parent_config(
descriptors,
task_description,
search_space_id=search_space_id,
resolve_subagent=self._resolve_subagent,
)
if system_prompt and descriptors:
agents_desc = "\n".join(
f"- {s['name']}: {s['description']}" for s in descriptors
)
self.system_prompt = (
system_prompt + "\n\nAvailable subagent types:\n" + agents_desc
)
else:
self.system_prompt = system_prompt
self.tools = [task_tool]
def _build_subagent_registry(self) -> list[dict[str, Any]]:
"""Index subagents for lazy compilation; return lightweight descriptors.
Pre-compiled specs (those carrying a ``runnable``) are seeded directly
into the memo. Lazy specs are stashed by name and compiled on first
``task(...)`` use via :meth:`_resolve_subagent`. The returned
descriptors carry only ``name``/``description`` plus the optional
context-hint provider everything the ``task`` tool needs to validate
names, render its catalog, and run hints, without paying the
``create_agent`` cost up front.
"""
descriptors: list[dict[str, Any]] = []
for spec in self._subagents:
# Provider may be ``None`` (no hint), in which case task_tool skips
# the prepend step. We forward the key unconditionally so the
# descriptor shape is uniform.
hint_provider = cast(dict, spec).get(SURF_CONTEXT_HINT_PROVIDER_KEY)
name = spec["name"]
spec_factory = cast(dict, spec).get(SURF_LAZY_SPEC_FACTORY_KEY)
if spec_factory is not None:
# Descriptor-only entry: the spec dict is built on first use.
self._lazy_spec_factories[name] = spec_factory
elif "runnable" in spec:
compiled = cast(CompiledSubAgent, spec)
self._compiled[name] = compiled["runnable"]
else:
if "model" not in spec:
msg = f"SubAgent '{name}' must specify 'model'"
raise ValueError(msg)
if "tools" not in spec:
msg = f"SubAgent '{name}' must specify 'tools'"
raise ValueError(msg)
self._lazy_specs[name] = cast(dict, spec)
descriptors.append(
{
"name": name,
"description": spec["description"],
SURF_CONTEXT_HINT_PROVIDER_KEY: hint_provider,
}
)
return descriptors
def _resolve_subagent(self, name: str) -> Runnable:
"""Return the compiled subagent graph for ``name``, compiling on first use.
Memoized: the ``create_agent`` cost is paid once per subagent per
cached middleware instance. Raises ``KeyError`` for unknown names
(callers in the ``task`` tool validate membership before resolving).
"""
cached = self._compiled.get(name)
if cached is not None:
return cached
spec = self._lazy_specs.get(name)
if spec is None:
factory = self._lazy_spec_factories.get(name)
if factory is None:
raise KeyError(name)
# Build the spec on first use (pays the deferred construction cost
# here, off the cold agent-build path), then compile and memoize.
build_start = time.perf_counter()
spec = factory()
_perf_log.info(
"[subagent_spec_lazy] name=%s (deferred spec build) in %.3fs",
name,
time.perf_counter() - build_start,
)
runnable = self._compile_one(spec)
self._compiled[name] = runnable
return runnable
def _compile_one(self, spec: dict[str, Any]) -> Runnable:
"""Compile a single subagent graph against the parent checkpointer."""
model = spec["model"]
if isinstance(model, str):
model = init_chat_model(model)
middleware: list[Any] = list(spec.get("middleware", []))
tools_count = len(spec.get("tools") or [])
mw_count = len(middleware)
compile_start = time.perf_counter()
runnable = create_agent(
model,
system_prompt=spec["system_prompt"],
tools=spec["tools"],
middleware=middleware,
name=spec["name"],
checkpointer=self._surf_checkpointer,
)
_perf_log.info(
"[subagent_compile_lazy] name=%s in %.3fs tools=%d mw=%d",
spec["name"],
time.perf_counter() - compile_start,
tools_count,
mw_count,
)
return runnable

View file

@ -6,7 +6,7 @@ and the ``<tools>`` block render from the same source.
from __future__ import annotations
from app.agents.multi_agent_chat.main_agent.system_prompt.builder.load_md import (
from app.agents.chat.multi_agent_chat.main_agent.system_prompt.builder.load_md import (
read_prompt_md,
)

View file

@ -12,7 +12,7 @@ import asyncio
import json
import logging
import time
from collections.abc import Awaitable
from collections.abc import Awaitable, Callable
from typing import Annotated, Any, NoReturn, TypeVar
from deepagents.middleware.subagents import TASK_TOOL_DESCRIPTION
@ -23,7 +23,11 @@ from langchain_core.tools import StructuredTool
from langgraph.errors import GraphInterrupt
from langgraph.types import Command, Interrupt
from app.agents.multi_agent_chat.subagents.shared.spec import (
from app.agents.chat.multi_agent_chat.subagents.shared.invocation import (
EXCLUDED_STATE_KEYS,
subagent_invoke_config,
)
from app.agents.chat.multi_agent_chat.subagents.shared.spec import (
SURF_CONTEXT_HINT_PROVIDER_KEY,
ContextHintProvider,
)
@ -34,13 +38,11 @@ from .config import (
consume_surfsense_resume,
drain_parent_null_resume,
has_surfsense_resume,
subagent_invoke_config,
)
from .constants import (
DEFAULT_SUBAGENT_BATCH_CONCURRENCY,
DEFAULT_SUBAGENT_BILLABLE_THRESHOLD,
DEFAULT_SUBAGENT_INVOKE_TIMEOUT_SECONDS,
EXCLUDED_STATE_KEYS,
MAX_SUBAGENT_BATCH_SIZE,
)
from .propagation import wrap_with_tool_call_id
@ -80,13 +82,10 @@ _T = TypeVar("_T")
async def _ainvoke_with_timeout[T](
coro: Awaitable[_T], *, subagent_type: str, started_at: float
) -> _T:
"""Apply :data:`DEFAULT_SUBAGENT_INVOKE_TIMEOUT_SECONDS` to ``coro``.
"""Apply the subagent invoke timeout to ``coro`` (non-positive disables it).
A non-positive timeout disables the cap (configurable via the
``SURFSENSE_SUBAGENT_INVOKE_TIMEOUT_SECONDS`` env var). On expiry the
underlying task is cancelled and :class:`SubagentInvokeTimeoutError` is
raised the caller wraps it into a synthetic ToolMessage so the
orchestrator can decide what to do.
On expiry the task is cancelled and :class:`SubagentInvokeTimeoutError` is
raised for the caller to turn into a synthetic ToolMessage.
"""
timeout = DEFAULT_SUBAGENT_INVOKE_TIMEOUT_SECONDS
if timeout <= 0:
@ -144,17 +143,31 @@ def build_task_tool_with_parent_config(
task_description: str | None = None,
*,
search_space_id: int | None = None,
resolve_subagent: Callable[[str], Runnable] | None = None,
) -> BaseTool:
"""Upstream ``_build_task_tool`` + parent ``runtime.config`` propagation + resume bridging."""
subagent_graphs: dict[str, Runnable] = {
spec["name"]: spec["runnable"] for spec in subagents
}
# Per-subagent context-hint providers (see ``SurfSenseSubagentSpec``).
# The mapping is sparse: only routes that opted in via ``pack_subagent``
# appear here, and the value is invoked once per ``task(...)`` call to
# generate a short string prepended to the subagent's first
# ``HumanMessage``. Failures are logged and swallowed — a broken hint
# provider must never prevent the underlying task from running.
"""Upstream ``_build_task_tool`` + parent ``runtime.config`` propagation + resume bridging.
``subagents`` are lightweight descriptors (``name``/``description`` + the
optional context-hint provider); the actual compiled graph is fetched
lazily via ``resolve_subagent(name)`` so subagent ``create_agent`` cost is
paid on first ``task(name)`` use rather than at graph-build time.
For backward compatibility (and tests), ``resolve_subagent`` may be omitted
when every descriptor already carries a pre-compiled ``runnable``; in that
case a trivial dict-backed resolver is used.
"""
subagent_names: set[str] = {spec["name"] for spec in subagents}
if resolve_subagent is None:
_eager_graphs: dict[str, Runnable] = {
spec["name"]: spec["runnable"] for spec in subagents if "runnable" in spec
}
def resolve_subagent(name: str) -> Runnable:
return _eager_graphs[name]
# Sparse map of opt-in context-hint providers; each runs once per task()
# call to prepend a string to the subagent's first HumanMessage. Failures
# are swallowed so a broken hint never blocks the task.
subagent_hint_providers: dict[str, ContextHintProvider] = {
spec["name"]: provider
for spec in subagents
@ -176,24 +189,18 @@ def build_task_tool_with_parent_config(
def _billable_call_update(
subagent_type: str, runtime: ToolRuntime
) -> dict[str, Any]:
"""Build the per-call ``billable_calls`` delta + an optional warning.
"""Build the per-call ``billable_calls`` delta plus an optional soft-cap warning.
The orchestrator's ``billable_calls`` map is summed by
:func:`_int_counter_merge_reducer`, so we always emit
``{subagent_type: 1}`` and let the reducer accumulate. If the
cumulative count *after* this call would cross the configured
threshold, we also slip a soft ``messages`` entry into the update
so the orchestrator can read it on its next step and self-limit.
Returning a plain ``dict`` (vs. an extra :class:`Command`) keeps
the helper composable with the existing single/batch return paths.
Always emits ``{subagent_type: 1}`` (a reducer accumulates it); when this
call would cross the threshold, also adds a soft ``messages`` entry so the
orchestrator self-limits on its next step.
"""
delta: dict[str, Any] = {"billable_calls": {subagent_type: 1}}
threshold = DEFAULT_SUBAGENT_BILLABLE_THRESHOLD
if threshold <= 0:
return delta
prior = runtime.state.get("billable_calls") or {}
# ``prior`` may be a plain dict or a reducer-managed mapping; only
# int values are counted so a malformed checkpoint can't crash us.
# Count int values only so a malformed checkpoint can't crash us.
prior_total = sum(v for v in prior.values() if isinstance(v, int))
new_total = prior_total + 1
if prior_total < threshold <= new_total:
@ -212,8 +219,7 @@ def build_task_tool_with_parent_config(
"""Merge the per-call billable counter (and warning) into ``cmd``."""
delta = _billable_call_update(subagent_type, runtime)
warn_text = delta.pop("_billable_warn_text", None)
# ``cmd.update`` may be a dict or LangGraph ``UpdateDict``; defensively
# copy so we don't mutate state shared across other tool returns.
# Copy so we don't mutate state shared with other tool returns.
update = dict(getattr(cmd, "update", {}) or {})
for key, value in delta.items():
update[key] = value
@ -226,14 +232,10 @@ def build_task_tool_with_parent_config(
return Command(update=update)
def _safe_message_text(msg: Any) -> str:
"""Pull text out of a BaseMessage without trusting the ``.text`` property.
"""Pull text out of a BaseMessage without using the ``.text`` property.
``BaseMessage.text`` walks ``content_blocks`` and crashes with
``TypeError: 'NoneType' object is not iterable`` when ``content`` is
``None`` (common for tool-call AIMessages whose payload is purely
structured). ``getattr(msg, "text", None)`` does not catch this
because Python evaluates the property body before falling back to
the default. Read ``content`` directly and coerce defensively.
``.text`` crashes when ``content`` is ``None`` (common for tool-call
AIMessages), and ``getattr`` won't catch it, so read ``content`` directly.
"""
try:
content = getattr(msg, "content", None)
@ -256,23 +258,18 @@ def build_task_tool_with_parent_config(
return str(content)
def _build_tool_trace(messages: list[Any]) -> list[dict[str, Any]]:
"""Compress the subagent's message stream into a compact tool trace.
"""Compress the subagent's messages into a compact tool trace.
Each entry is ``{"tool": <name>, "status": "ok"|"error", "preview":
<120 chars>}`` so the orchestrator can show "this is what your
specialist actually did" without dumping the full message stream
back through the prompt. The list is attached to the returned
ToolMessage's ``additional_kwargs`` (under ``"surf_tool_trace"``);
the LLM never sees it, but UI / observability code can pluck it
out of the checkpoint.
Entries (``{tool, status, preview}``) ride on the ToolMessage's
``additional_kwargs["surf_tool_trace"]`` for UI/observability; the LLM
never sees them.
"""
trace: list[dict[str, Any]] = []
for msg in messages:
tool_name = getattr(msg, "name", None)
tool_call_id_attr = getattr(msg, "tool_call_id", None)
if not tool_name and not tool_call_id_attr:
# Only ToolMessages have either field; skip AIMessage /
# HumanMessage / SystemMessage frames.
# Only ToolMessages carry either field.
continue
status = getattr(msg, "status", None) or "ok"
preview = _safe_message_text(msg).strip().replace("\n", " ")
@ -306,8 +303,7 @@ def build_task_tool_with_parent_config(
)
raise ValueError(msg)
message_text = _safe_message_text(messages[-1]).rstrip()
# Tool-trace is purely observability — wrap defensively so a single
# malformed frame never bubbles up and kills the whole user turn.
# Trace is observability-only; never let a bad frame kill the turn.
try:
tool_trace = _build_tool_trace(messages)
except Exception:
@ -318,10 +314,7 @@ def build_task_tool_with_parent_config(
tool_trace = []
tool_msg = ToolMessage(message_text, tool_call_id=tool_call_id)
if tool_trace:
# ``additional_kwargs`` is a free-form dict on BaseMessage; using
# a ``surf_`` prefix avoids collision with provider-specific keys
# (e.g. Anthropic's ``cache_control``). The LLM doesn't see it;
# consumers (UI, observability) read it off the checkpoint.
# surf_ prefix avoids collision with provider keys (e.g. cache_control).
tool_msg.additional_kwargs["surf_tool_trace"] = tool_trace
return Command(
update={
@ -353,15 +346,13 @@ def build_task_tool_with_parent_config(
def _validate_and_prepare_state(
subagent_type: str, description: str, runtime: ToolRuntime
) -> tuple[Runnable, dict]:
subagent = subagent_graphs[subagent_type]
subagent = resolve_subagent(subagent_type)
subagent_state = {
k: v for k, v in runtime.state.items() if k not in EXCLUDED_STATE_KEYS
}
hint = _resolve_context_hint(subagent_type, description, runtime)
if hint:
# Prepend as a tagged block so the subagent prompt can pattern-match
# on the section (and a future change can lift it into its own
# ``SystemMessage`` if needed).
# Tagged block so the subagent prompt can pattern-match the section.
payload = f"<context_hint>\n{hint}\n</context_hint>\n\n{description}"
else:
payload = description
@ -372,16 +363,12 @@ def build_task_tool_with_parent_config(
results: list[tuple[int, str, dict | str, dict | None]],
runtime: ToolRuntime,
) -> Command:
"""Combine per-child results into one Command with a combined ToolMessage.
"""Combine per-child results into one Command with an aggregate ToolMessage.
``results`` is a list of ``(task_index, subagent_type,
payload_or_error_text, child_state_update)`` tuples preserving the
input order so the orchestrator can map each block back to the task
it dispatched. State updates are merged by reducer for keys outside
:data:`EXCLUDED_STATE_KEYS`; everything else (``messages``, ``todos``,
etc.) is replaced by the synthesized aggregate ToolMessage. Every
child also contributes a ``billable_calls`` increment so cost
accounting matches single-mode dispatch.
``results`` tuples are ``(task_index, subagent_type, payload_or_error,
child_state_update)``; output blocks are sorted by index so the LLM can
map them back to dispatch order, and each child contributes a
``billable_calls`` increment to match single-mode accounting.
"""
results.sort(key=lambda r: r[0])
merged_state: dict[str, Any] = {}
@ -422,8 +409,8 @@ def build_task_tool_with_parent_config(
}
)
if state_update:
# Naive merge: later tasks win on scalar collisions; reducer-backed
# fields (``receipts``, ``files`` etc.) accumulate at apply time.
# Later tasks win on scalar collisions; reducer-backed fields
# accumulate at apply time.
merged_state.update(state_update)
aggregate = "\n\n".join(message_blocks)
aggregate_msg = ToolMessage(
@ -467,15 +454,13 @@ def build_task_tool_with_parent_config(
) -> tuple[int, str, dict | str, dict | None]:
"""Run one child of a batched ``task`` call under the concurrency cap.
Errors are returned as plain text in slot 2 so a single child's
failure does not abort the whole batch. ``GraphInterrupt`` from a
batched child is currently treated as a hard failure for that child
only batched HITL is intentionally out of scope for the v1
rollout (see plan tier 2 item 4 risks).
Errors are returned as text (slot 2) so one child's failure doesn't abort
the batch. A child's ``GraphInterrupt`` is a hard failure for that child:
batched HITL is intentionally out of scope.
"""
async with semaphore:
if subagent_type not in subagent_graphs:
allowed_types = ", ".join([f"`{k}`" for k in subagent_graphs])
if subagent_type not in subagent_names:
allowed_types = ", ".join([f"`{k}`" for k in subagent_names])
return (
task_index,
subagent_type,
@ -505,8 +490,7 @@ def build_task_tool_with_parent_config(
)
return (task_index, subagent_type, str(exc), None)
except GraphInterrupt:
# Batched HITL is unsupported in v1 — surface as a failure
# for this child so the rest of the batch still completes.
# Batched HITL unsupported; fail this child so the batch finishes.
logger.warning(
"Batch child %d (%s) raised GraphInterrupt; batched HITL "
"is not supported. Re-dispatch this task as a single "
@ -543,14 +527,11 @@ def build_task_tool_with_parent_config(
return (task_index, subagent_type, result, child_state_update)
def _coerce_batch_arg(tasks: Any) -> list[dict] | str:
"""Rescue common LLM-side malformations of the ``tasks`` argument.
"""Rescue common LLM malformations of the ``tasks`` argument.
Some providers serialise an array argument as a JSON-encoded string,
and small models occasionally hand back a single ``{description,
subagent_type}`` dict instead of a one-element array. Both are
recovered here with a WARN log so the issue is visible in metrics
but the user's turn still completes; truly broken shapes return a
plain string that the caller surfaces as the tool error.
Recovers a JSON-encoded array string and a single dict (instead of a
1-element array), logging a WARN. Unrecoverable shapes return a string
the caller surfaces as the tool error.
"""
if isinstance(tasks, list):
return tasks
@ -585,13 +566,10 @@ def build_task_tool_with_parent_config(
async def _adispatch_batch(
tasks: list[dict], runtime: ToolRuntime
) -> Command | str:
"""Fan-out helper for the ``tasks`` array shape.
"""Fan out the ``tasks`` array (size- and concurrency-capped).
Bounded by :data:`MAX_SUBAGENT_BATCH_SIZE` and concurrency-capped
at :data:`DEFAULT_SUBAGENT_BATCH_CONCURRENCY`. Returns a single
:class:`Command` that the LLM sees as one ToolMessage per child,
prefixed with ``[task <index>]`` so it can map back to the input
order.
Returns one Command; the LLM sees one ``[task <index>]``-prefixed block
per child, in input order.
"""
if not tasks:
return "tasks: array is empty; nothing to dispatch."
@ -657,8 +635,8 @@ def build_task_tool_with_parent_config(
"task: must provide either single-mode (`description`+`subagent_type`) "
"or batch-mode (`tasks`)."
)
if subagent_type not in subagent_graphs:
allowed_types = ", ".join([f"`{k}`" for k in subagent_graphs])
if subagent_type not in subagent_names:
allowed_types = ", ".join([f"`{k}`" for k in subagent_names])
return (
f"We cannot invoke subagent {subagent_type} because it does not exist, "
f"the only allowed types are {allowed_types}"
@ -701,17 +679,16 @@ def build_task_tool_with_parent_config(
if pending_value is not None:
resume_value = consume_surfsense_resume(runtime)
if resume_value is None:
# Bridge invariant: a queued resume must accompany any pending
# subagent interrupt. Fall-through replay would silently re-prompt
# the user; raise so the streaming layer surfaces a clear error.
# A pending interrupt must have a queued resume; otherwise replay
# would silently re-prompt the user. Raise instead.
raise RuntimeError(
f"Subagent {subagent_type!r} has a pending interrupt but no "
"surfsense_resume_value on config; resume bridge is broken."
)
expected = hitlrequest_action_count(pending_value)
resume_value = fan_out_decisions_to_match(resume_value, expected)
# Prevent the parent's resume payload from leaking into subagent
# interrupts via langgraph's parent_scratchpad fallback.
# Stop the parent's resume leaking into subagent interrupts via
# langgraph's parent_scratchpad fallback.
drain_parent_null_resume(runtime)
with ot.subagent_invoke_span(
subagent_type=subagent_type, path=invoke_path
@ -827,10 +804,8 @@ def build_task_tool_with_parent_config(
] = None,
) -> str | Command:
atask_start = time.perf_counter()
# Kill switch: when ops flips the spawn-paused flag for this
# workspace, every ``task(...)`` invocation (single- or batch-mode)
# short-circuits with a clear ToolMessage so the orchestrator can
# tell the user what happened and stop hammering downstream APIs.
# Ops kill switch: short-circuit every task() call for this workspace
# so the orchestrator stops hammering downstream APIs.
if await is_spawn_paused(search_space_id):
logger.warning(
"[hitl_route] atask SPAWN_PAUSED: search_space_id=%s tool_call_id=%s",
@ -869,8 +844,8 @@ def build_task_tool_with_parent_config(
subagent_type,
runtime.tool_call_id,
)
if subagent_type not in subagent_graphs:
allowed_types = ", ".join([f"`{k}`" for k in subagent_graphs])
if subagent_type not in subagent_names:
allowed_types = ", ".join([f"`{k}`" for k in subagent_names])
return (
f"We cannot invoke subagent {subagent_type} because it does not exist, "
f"the only allowed types are {allowed_types}"
@ -921,8 +896,8 @@ def build_task_tool_with_parent_config(
)
expected = hitlrequest_action_count(pending_value)
resume_value = fan_out_decisions_to_match(resume_value, expected)
# Prevent the parent's resume payload from leaking into subagent
# interrupts via langgraph's parent_scratchpad fallback.
# Stop the parent's resume leaking into subagent interrupts via
# langgraph's parent_scratchpad fallback.
drain_parent_null_resume(runtime)
with ot.subagent_invoke_span(
subagent_type=subagent_type, path=invoke_path

View file

@ -0,0 +1,15 @@
"""Context-editing middleware: spill + clear-tool-uses passes (impl + builder)."""
from .builder import build_context_editing_mw
from .middleware import (
ClearToolUsesEdit,
SpillingContextEditingMiddleware,
SpillToBackendEdit,
)
__all__ = [
"ClearToolUsesEdit",
"SpillToBackendEdit",
"SpillingContextEditingMiddleware",
"build_context_editing_mw",
]

View file

@ -7,18 +7,18 @@ from typing import Any
from langchain_core.tools import BaseTool
from app.agents.multi_agent_chat.main_agent.context_prune.prune_tool_names import (
from app.agents.chat.multi_agent_chat.main_agent.context_prune.prune_tool_names import (
safe_exclude_tools,
)
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.middleware import (
from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
from .middleware import (
ClearToolUsesEdit,
SpillingContextEditingMiddleware,
SpillToBackendEdit,
)
from ..shared.flags import enabled
def build_context_editing_mw(
*,

View file

@ -1,4 +1,4 @@
"""Middleware that deduplicates HITL tool calls within a single LLM response.
"""Drop duplicate HITL tool calls before execution.
When the LLM emits multiple calls to the same HITL tool with the same
primary argument (e.g. two ``delete_calendar_event("Doctor Appointment")``),
@ -9,72 +9,33 @@ the duplicate call is stripped from the AIMessage that gets checkpointed.
That means it is also safe across LangGraph ``interrupt()`` boundaries:
the removed call will never appear on graph resume.
Dedup-key resolution order:
Dedup-key resolution order (read from each tool's own ``metadata``):
1. :class:`ToolDefinition.dedup_key` callable provided by the registry
entry. This is the canonical mechanism.
2. ``tool.metadata["hitl_dedup_key"]`` string with a primary arg name;
used by MCP / Composio tools whose schemas the registry doesn't see.
1. ``tool.metadata["dedup_key"]`` callable mapping the args dict to a
stable signature string. This is the canonical mechanism.
2. ``tool.metadata["hitl_dedup_key"]`` string naming a primary arg;
used by MCP / Composio tools that only expose a single key field.
A tool with no resolver from either path simply opts out of dedup.
"""
from __future__ import annotations
import json
import logging
from collections.abc import Callable
from collections.abc import Sequence
from typing import Any
from langchain.agents.middleware import AgentMiddleware, AgentState
from langchain_core.tools import BaseTool
from langgraph.runtime import Runtime
from app.agents.chat.multi_agent_chat.shared.middleware.dedup_tool_calls import (
DedupResolver,
wrap_dedup_key_by_arg_name,
)
logger = logging.getLogger(__name__)
# Resolver type — given the tool ``args`` dict returns a stable
# string used to dedupe consecutive calls. ``None`` means no dedup.
DedupResolver = Callable[[dict[str, Any]], str]
def wrap_dedup_key_by_arg_name(arg_name: str) -> DedupResolver:
"""Adapt a string-arg name into a :data:`DedupResolver`.
Convenience helper used by registry entries that just want to dedupe
on a single arg's lowercased value (the most common case for native
HITL tools like ``send_gmail_email`` keyed on ``subject``).
Example::
ToolDefinition(
name="send_gmail_email",
...,
dedup_key=wrap_dedup_key_by_arg_name("subject"),
)
"""
def _resolver(args: dict[str, Any]) -> str:
return str(args.get(arg_name, "")).lower()
return _resolver
def dedup_key_full_args(args: dict[str, Any]) -> str:
"""Resolver that collapses calls only when **every** argument is identical.
Safe default for tools where no single field uniquely identifies a call
(e.g. MCP tools whose first required field is a shared workspace id).
"""
try:
return json.dumps(args, sort_keys=True, default=str)
except (TypeError, ValueError):
return repr(sorted(args.items())) if isinstance(args, dict) else repr(args)
# Backwards-compatible alias for code that imported the original
# private name. New callers should use :func:`wrap_dedup_key_by_arg_name`.
_wrap_string_key = wrap_dedup_key_by_arg_name
class DedupHITLToolCallsMiddleware(AgentMiddleware): # type: ignore[type-arg]
"""Remove duplicate HITL tool calls from a single LLM response.
@ -84,9 +45,8 @@ class DedupHITLToolCallsMiddleware(AgentMiddleware): # type: ignore[type-arg]
The dedup-resolver map is built from two sources, in priority order:
1. ``tool.metadata["dedup_key"]`` callable provided by the registry's
``ToolDefinition.dedup_key``. Receives the args dict and returns
a string signature. This is the canonical mechanism.
1. ``tool.metadata["dedup_key"]`` callable that receives the args dict
and returns a string signature. This is the canonical mechanism.
2. ``tool.metadata["hitl_dedup_key"]`` string with a primary arg
name; primarily used by MCP / Composio tools.
"""
@ -162,3 +122,7 @@ class DedupHITLToolCallsMiddleware(AgentMiddleware): # type: ignore[type-arg]
updated_msg = last_msg.model_copy(update={"tool_calls": deduped})
return {"messages": [updated_msg]}
def build_dedup_hitl_mw(tools: Sequence[BaseTool]) -> DedupHITLToolCallsMiddleware:
return DedupHITLToolCallsMiddleware(agent_tools=list(tools))

View file

@ -0,0 +1,9 @@
"""Doom-loop middleware: detect repeated identical tool calls (impl + builder)."""
from .builder import build_doom_loop_mw
from .middleware import DoomLoopMiddleware
__all__ = [
"DoomLoopMiddleware",
"build_doom_loop_mw",
]

View file

@ -2,10 +2,10 @@
from __future__ import annotations
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.middleware import DoomLoopMiddleware
from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
from ..shared.flags import enabled
from .middleware import DoomLoopMiddleware
def build_doom_loop_mw(flags: AgentFeatureFlags) -> DoomLoopMiddleware | None:

View file

@ -16,7 +16,7 @@ This ships **OFF by default** until the frontend explicitly handles
``context.permission == "doom_loop"`` interrupts.
Wire format: uses SurfSense's existing ``interrupt()`` payload shape
(see ``app/agents/new_chat/tools/hitl.py``):
(see ``app/agents/shared/tools/hitl.py``):
{
"type": "permission_ask",

View file

@ -0,0 +1,13 @@
"""End-of-turn KB persistence middleware (main-agent only)."""
from .builder import build_kb_persistence_mw
from .middleware import (
KnowledgeBasePersistenceMiddleware,
commit_staged_filesystem_state,
)
__all__ = [
"KnowledgeBasePersistenceMiddleware",
"build_kb_persistence_mw",
"commit_staged_filesystem_state",
]

View file

@ -2,8 +2,11 @@
from __future__ import annotations
from app.agents.new_chat.filesystem_selection import FilesystemMode
from app.agents.new_chat.middleware import KnowledgeBasePersistenceMiddleware
from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
from .middleware import (
KnowledgeBasePersistenceMiddleware,
)
def build_kb_persistence_mw(

View file

@ -1,33 +1,19 @@
"""End-of-turn persistence for the cloud-mode SurfSense filesystem.
This middleware runs ``aafter_agent`` once per turn (cloud only). It commits
all staged folder creations, file moves, content writes/edits, file deletes
(``rm``), and directory deletes (``rmdir``) to Postgres in a single ordered
pass:
Runs ``aafter_agent`` once per turn (cloud only), committing staged folder
creates, moves, writes/edits, and ``rm``/``rmdir`` to Postgres in one ordered
pass. Order matters: moves resolve before writes (so write-then-move lands at
the final path), and file deletes run before directory deletes (so a same-turn
``rm /a/x.md`` + ``rmdir /a`` works).
1. Materialize ``staged_dirs`` into ``Folder`` rows.
2. Apply ``pending_moves`` in order (chained moves resolved via
``doc_id_by_path``).
3. Normalize ``dirty_paths`` through ``pending_moves`` so write-then-move
sequences commit at the final path. Paths queued for ``rm`` this turn
are dropped here so a write+rm sequence doesn't recreate the doc.
4. Commit content writes / edits for ``/documents/*`` paths, skipping
``temp_*`` basenames.
5. Apply ``pending_deletes`` (``rm``) file deletes run BEFORE directory
deletes so a same-turn ``rm /a/x.md`` + ``rmdir /a`` sequence works.
6. Apply ``pending_dir_deletes`` (``rmdir``); re-verifies emptiness against
the post-step-5 DB state.
When ``flags.enable_action_log`` is on, each destructive op also snapshots a
``DocumentRevision`` / ``FolderRevision`` for revert. For ``rm``/``rmdir`` the
snapshot and DELETE share a SAVEPOINT, so a failed snapshot aborts the delete
rather than making the data silently irreversible.
When ``flags.enable_action_log`` is on every destructive op also writes a
``DocumentRevision`` / ``FolderRevision`` snapshot bound to the
originating ``AgentActionLog`` row via ``tool_call_id``. ``rm``/``rmdir``
share a single ``SAVEPOINT`` with their snapshot if the snapshot fails
the DELETE rolls back and we surface the error rather than silently
making the data irreversible.
The commit body is exposed as a free function ``commit_staged_filesystem_state``
so the optional stream-task fallback (``stream_new_chat.py``) can call the
exact same routine when ``aafter_agent`` was skipped (e.g. client disconnect).
The commit body is a free function (``commit_staged_filesystem_state``) so the
stream-task fallback can run the identical routine when ``aafter_agent`` was
skipped (e.g. client disconnect).
"""
from __future__ import annotations
@ -40,22 +26,28 @@ from typing import Any
from fractional_indexing import generate_key_between
from langchain.agents.middleware import AgentMiddleware, AgentState
from langchain_core.callbacks import adispatch_custom_event, dispatch_custom_event
from langgraph.config import get_config
from langgraph.runtime import Runtime
from sqlalchemy import delete, select, update
from sqlalchemy.exc import IntegrityError
from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.new_chat.feature_flags import get_flags
from app.agents.new_chat.filesystem_selection import FilesystemMode
from app.agents.new_chat.filesystem_state import SurfSenseFilesystemState
from app.agents.new_chat.path_resolver import (
from app.agents.chat.multi_agent_chat.shared.feature_flags import get_flags
from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
from app.agents.chat.multi_agent_chat.shared.receipts.receipt import (
Receipt,
make_receipt,
)
from app.agents.chat.multi_agent_chat.shared.state.filesystem_state import (
SurfSenseFilesystemState,
)
from app.agents.chat.multi_agent_chat.shared.state.reducers import _CLEAR
from app.agents.chat.runtime.path_resolver import (
DOCUMENTS_ROOT,
parse_documents_path,
safe_folder_segment,
virtual_path_to_doc,
)
from app.agents.new_chat.state_reducers import _CLEAR
from app.agents.shared.receipt import Receipt, make_receipt
from app.db import (
AgentActionLog,
Chunk,
@ -211,11 +203,9 @@ async def _create_document(
virtual_path,
search_space_id,
)
# Filesystem-parity invariant: the only thing that *must* be unique is
# the path. Two notes can legitimately share content (e.g. ``cp a b``).
# Guard against the path-derived ``unique_identifier_hash`` constraint
# so we surface a clean ValueError instead of letting the INSERT poison
# the session with an IntegrityError.
# Pre-check the path-derived unique_identifier_hash so a duplicate path
# surfaces as a clean ValueError instead of an INSERT IntegrityError that
# poisons the session. Content is intentionally not unique (cp a b).
path_collision = await session.execute(
select(Document.id).where(
Document.search_space_id == search_space_id,
@ -227,13 +217,6 @@ async def _create_document(
f"a document already exists at path '{virtual_path}' "
"(unique_identifier_hash collision)"
)
# ``content_hash`` is intentionally NOT checked for uniqueness here.
# In a real filesystem two files at different paths can hold identical
# bytes, and the agent's ``write_file`` path needs that semantic to
# support copy/duplicate operations. The hash remains useful as a
# change-detection hint for connector indexers, which still consult it
# via :func:`check_duplicate_document` but do so with a non-unique
# lookup (``.first()``).
content_hash = generate_content_hash(content, search_space_id)
doc = Document(
title=title,
@ -430,15 +413,9 @@ async def _mark_action_reversible(
) -> None:
"""Flip ``agent_action_log.reversible = TRUE`` for ``action_id``.
Best-effort: caller may invoke from inside a SAVEPOINT and treat
failure as a soft demotion (snapshot persists, just no Revert button).
Callers should also call ``_dispatch_reversibility_update`` (defined
below) AFTER the enclosing SAVEPOINT block exits successfully so the
chat tool card can light up its Revert button without
re-fetching ``GET /threads/.../actions``. Dispatching from inside the
SAVEPOINT would risk emitting "reversible=true" for rows whose
update gets rolled back if the surrounding destructive op fails.
Pair with ``_dispatch_reversibility_update`` *after* the enclosing
SAVEPOINT commits, so the UI never sees ``reversible=true`` for a row whose
update later rolls back.
"""
if action_id is None:
return
@ -450,22 +427,11 @@ async def _mark_action_reversible(
async def _dispatch_reversibility_update(action_id: int | None) -> None:
"""Best-effort dispatch of an ``action_log_updated`` custom event.
"""Emit an ``action_log_updated`` SSE event so the Revert button lights up.
Surfaces the post-SAVEPOINT reversibility flip to the SSE layer so
the chat tool card can flip its Revert button live. Defensive:
failures are logged at debug level and swallowed; the
REST endpoint ``GET /threads/.../actions`` is still authoritative.
.. warning::
Inside :func:`commit_staged_filesystem_state` we DEFER all
dispatches until the outer ``session.commit()`` succeeds see
the ``deferred_dispatches`` queue in that function. Dispatching
from inside a SAVEPOINT block while the outer transaction is
still pending would emit ``reversible=true`` for rows whose
snapshots get rolled back if the outer commit fails. Direct
callers (e.g. the optional stream-task fallback) that own the
full session lifetime can still call this helper inline.
Best-effort (failures swallowed; the REST actions endpoint is
authoritative). Inside :func:`commit_staged_filesystem_state` this is
deferred until after the outer commit via ``deferred_dispatches``.
"""
if action_id is None:
return
@ -484,12 +450,9 @@ async def _dispatch_reversibility_update(action_id: int | None) -> None:
# ---------------------------------------------------------------------------
# Snapshot helpers
# ---------------------------------------------------------------------------
#
# Best-effort helpers swallow + log so a snapshot failure can never break
# the destructive op for non-destructive tools (write/edit/move/mkdir).
# Strict helpers run inside the SAME ``begin_nested()`` SAVEPOINT as the
# destructive DELETE — failure aborts the savepoint and leaves the doc /
# folder intact, so revertable ops never become irreversible silently.
# Best-effort variants (write/edit/move/mkdir) swallow failures. Strict
# variants (rm/rmdir) share the destructive op's SAVEPOINT so a snapshot
# failure aborts the delete instead of making it silently irreversible.
def _doc_revision_payload(
@ -699,15 +662,9 @@ async def commit_staged_filesystem_state(
) -> dict[str, Any] | None:
"""Commit all staged filesystem changes; return the state delta for reducers.
Shared between :class:`KnowledgeBasePersistenceMiddleware.aafter_agent`
and the optional stream-task fallback.
When ``flags.enable_action_log`` is on every destructive op also writes
a ``DocumentRevision`` / ``FolderRevision`` snapshot bound to the
originating ``AgentActionLog`` row via ``tool_call_id``. Snapshot
durability is best-effort for non-destructive ops and STRICT for
``rm``/``rmdir`` (snapshot + DELETE share a SAVEPOINT snapshot
failure aborts the delete).
Shared between :class:`KnowledgeBasePersistenceMiddleware.aafter_agent` and
the stream-task fallback. See the module docstring for ordering and the
action-log snapshot/revert semantics.
"""
if filesystem_mode != FilesystemMode.CLOUD:
return None
@ -766,8 +723,7 @@ async def commit_staged_filesystem_state(
flags = get_flags()
snapshot_enabled = flags.enable_action_log
# De-duplicate pending deletes per-path while preserving the latest
# tool_call_id (the one the user is most likely to revert via the UI).
# De-dup deletes per-path, keeping the latest tool_call_id (likeliest revert).
file_delete_paths: dict[str, str] = {}
for entry in pending_deletes:
if not isinstance(entry, dict):
@ -791,22 +747,14 @@ async def commit_staged_filesystem_state(
applied_moves: list[dict[str, Any]] = []
doc_id_path_tombstones: dict[str, int | None] = {}
tree_changed = False
# Reversibility-flip dispatches are deferred until AFTER the outer
# ``session.commit()`` succeeds. Dispatching from inside the
# SAVEPOINT chain while the outer transaction is still pending
# would emit ``reversible=true`` for rows whose snapshots get rolled
# back if the final commit raises. Snapshot helpers append on
# success; we drain this list after commit and silently abandon it
# on rollback so the UI stays consistent with durable state.
# Reversibility-flip dispatches are drained only after the outer commit
# succeeds (and abandoned on rollback), so the UI never sees reversible=true
# for a snapshot that didn't durably land.
deferred_dispatches: list[int] = []
try:
async with shielded_async_session() as session:
# ------------------------------------------------------------------
# Resolve action-id bindings up front. One SELECT per turn for all
# tool_call_ids, NOT one per op — important because a turn that
# touches 50 paths would otherwise issue 50 lookups.
# ------------------------------------------------------------------
# Resolve all action-id bindings in one SELECT per turn, not per op.
action_id_by_call: dict[str, int] = {}
if snapshot_enabled and thread_id is not None:
tool_call_ids: set[str] = set()
@ -839,10 +787,7 @@ async def commit_staged_filesystem_state(
next(iter(action_id_by_call), None) if action_id_by_call else None
)
# ------------------------------------------------------------------
# 1. staged_dirs -> Folder rows. Snapshot post-flush so the new
# folder_id is available for the FK.
# ------------------------------------------------------------------
# 1. staged_dirs -> Folder rows (snapshot post-flush for the FK).
for folder_path in staged_dirs:
if not isinstance(folder_path, str):
continue
@ -863,7 +808,6 @@ async def commit_staged_filesystem_state(
tcid = staged_dir_tool_calls.get(folder_path)
action_id = _action_id_for(tcid)
if action_id is not None:
# Re-read the folder for the snapshot.
result = await session.execute(
select(Folder).where(Folder.id == folder_id)
)
@ -878,16 +822,13 @@ async def commit_staged_filesystem_state(
deferred_dispatches=deferred_dispatches,
)
# ------------------------------------------------------------------
# 2. pending_moves. Snapshot pre-move (in-place restore on revert).
# ------------------------------------------------------------------
# 2. pending_moves (snapshot pre-move for in-place restore on revert).
for move in pending_moves:
source = str(move.get("source") or "")
if snapshot_enabled and source:
tcid = str(move.get("tool_call_id") or "")
action_id = _action_id_for(tcid)
if action_id is not None:
# Resolve the doc to snapshot BEFORE we mutate it.
doc_id_pre = doc_id_by_path.get(source)
document_pre: Document | None = None
if doc_id_pre is not None:
@ -937,10 +878,8 @@ async def commit_staged_filesystem_state(
path = move_alias[path]
return path
# ------------------------------------------------------------------
# 3. dirty_paths -> writes/edits. Skip any path queued for ``rm``
# this turn so a write+rm sequence doesn't recreate the doc.
# ------------------------------------------------------------------
# 3. dirty_paths -> writes/edits. Paths queued for rm this turn are
# skipped so a write+rm sequence doesn't recreate the doc.
kb_dirty_seen: set[str] = set()
kb_dirty: list[str] = []
kb_dirty_origin: dict[str, str] = {}
@ -969,9 +908,7 @@ async def commit_staged_filesystem_state(
continue
content = "\n".join(file_data.get("content") or [])
doc_id = doc_id_by_path.get(path)
# Path ↔ tool_call_id binding: the dirty_paths list dedupes via
# _add_unique_reducer, so we look up the latest tool_call_id by
# path (or by the un-renamed origin).
# Look up tool_call_id by final path or its pre-rename origin.
origin = kb_dirty_origin.get(path, path)
tcid = dirty_path_tool_calls.get(path) or dirty_path_tool_calls.get(
origin
@ -979,12 +916,9 @@ async def commit_staged_filesystem_state(
action_id = _action_id_for(tcid)
if doc_id is None:
# The in-memory ``doc_id_by_path`` is per-thread and starts
# empty in every new chat. If the agent writes to a path
# that already exists in the DB (e.g. a previous chat's
# ``notes.md``), we must NOT try to INSERT — it would hit
# ``unique_identifier_hash`` (path-derived). Look up the
# existing doc and update it in place instead.
# doc_id_by_path is per-thread and empty in a new chat, so a
# write to a path already in the DB must update in place, not
# INSERT (which would hit the path-derived unique hash).
existing = await virtual_path_to_doc(
session,
search_space_id=search_space_id,
@ -1033,12 +967,9 @@ async def commit_staged_filesystem_state(
}
)
else:
# Fresh create. Wrap each create in a SAVEPOINT so a
# residual ``IntegrityError`` (e.g. a deployment that
# hasn't run migration 133 yet, where
# ``documents.content_hash`` still carries its legacy
# global UNIQUE constraint) rolls back only this one
# create instead of poisoning the whole turn.
# Fresh create, wrapped in a SAVEPOINT so a residual
# IntegrityError (e.g. pre-migration-133 content_hash UNIQUE)
# rolls back only this create, not the whole turn.
placeholder_revision_id: int | None = None
if snapshot_enabled and action_id is not None:
placeholder_revision_id = await _snapshot_document_pre_create(
@ -1061,8 +992,7 @@ async def commit_staged_filesystem_state(
logger.warning(
"kb_persistence: skipping %s create: %s", path, exc
)
# Roll back the placeholder revision since the create
# never happened.
# Create never happened; drop its placeholder revision.
if placeholder_revision_id is not None:
await session.execute(
delete(DocumentRevision).where(
@ -1109,19 +1039,14 @@ async def commit_staged_filesystem_state(
)
tree_changed = True
# ------------------------------------------------------------------
# 4. pending_deletes -> ``rm``. STRICT durability: snapshot + DELETE
# share a SAVEPOINT. If the snapshot insert fails, the DELETE
# rolls back too and we surface the error rather than silently
# making the data irreversible.
# ------------------------------------------------------------------
# 4. pending_deletes -> rm. Strict: snapshot + DELETE share a
# SAVEPOINT, so a failed snapshot rolls the delete back too.
for raw_path, tcid in file_delete_paths.items():
final = _final_path(raw_path)
if not final.startswith(DOCUMENTS_ROOT + "/"):
continue
action_id = _action_id_for(tcid)
# Resolve the doc.
doc_id_for_delete = doc_id_by_path.get(final)
document_to_delete: Document | None = None
if doc_id_for_delete is not None:
@ -1150,7 +1075,6 @@ async def commit_staged_filesystem_state(
try:
async with session.begin_nested():
# Strict: snapshot first; failure aborts the delete.
if snapshot_enabled and action_id is not None:
chunks = await _load_chunks_for_snapshot(
session, doc_id=doc_pk
@ -1179,10 +1103,7 @@ async def commit_staged_filesystem_state(
)
continue
# B1 — SAVEPOINT released. Defer the reversibility-flip
# dispatch until AFTER the outer commit succeeds so we
# never tell the UI a row is reversible if its snapshot
# gets rolled back.
# Defer the reversibility flip until after the outer commit.
if snapshot_enabled and action_id is not None:
deferred_dispatches.append(int(action_id))
@ -1201,11 +1122,8 @@ async def commit_staged_filesystem_state(
)
tree_changed = True
# ------------------------------------------------------------------
# 5. pending_dir_deletes -> ``rmdir``. STRICT durability + final
# emptiness check (after step 4's deletes have run, an "empty
# mid-turn" directory really IS empty in DB now).
# ------------------------------------------------------------------
# 5. pending_dir_deletes -> rmdir. Strict, and re-checks emptiness
# against post-step-4 DB state.
for raw_path, tcid in dir_delete_paths.items():
final = _final_path(raw_path)
if not final.startswith(DOCUMENTS_ROOT + "/"):
@ -1226,7 +1144,6 @@ async def commit_staged_filesystem_state(
)
continue
# Re-check emptiness against in-DB state.
docs_in_folder = await session.execute(
select(Document.id)
.where(Document.folder_id == folder_id)
@ -1291,10 +1208,7 @@ async def commit_staged_filesystem_state(
)
continue
# B1 — SAVEPOINT released. Defer the reversibility-flip
# dispatch until AFTER the outer commit succeeds so we
# never tell the UI a row is reversible if its snapshot
# gets rolled back.
# Defer the reversibility flip until after the outer commit.
if snapshot_enabled and action_id is not None:
deferred_dispatches.append(int(action_id))
@ -1314,18 +1228,13 @@ async def commit_staged_filesystem_state(
logger.exception(
"kb_persistence: commit failed (search_space=%s)", search_space_id
)
# Outer commit raised — every SAVEPOINT-released change above
# (snapshots + reversibility flips) is now rolled back. Drop
# the deferred SSE dispatches so the UI stays consistent with
# durable state.
# Outer commit raised: everything above rolled back, so drop the
# deferred dispatches.
deferred_dispatches.clear()
return None
# Outer commit succeeded; flush deferred reversibility-flip
# dispatches now so the chat tool card can light up its Revert
# button without re-fetching ``GET /threads/.../actions``. De-dup
# to avoid emitting the same id twice (e.g. write-then-rm in the
# same turn dispatches once for each snapshot site).
# Commit succeeded; flush deferred reversibility flips (de-duped, since
# write-then-rm in one turn appends an id per snapshot site).
if deferred_dispatches and dispatch_events:
for action_id in dict.fromkeys(deferred_dispatches):
try:
@ -1371,9 +1280,8 @@ async def commit_staged_filesystem_state(
p for p in files if isinstance(p, str) and _basename(p).startswith(_TEMP_PREFIX)
]
# Tombstone every committed-delete path so a stale ``state["files"]`` entry
# (which als_info would otherwise interpret as content) cannot survive into
# the next turn and make a now-empty folder look non-empty.
# Tombstone committed-delete paths so a stale state["files"] entry can't
# survive into the next turn and make a now-empty folder look non-empty.
deleted_file_paths = [
str(payload.get("virtualPath") or "")
for payload in committed_deletes
@ -1394,11 +1302,8 @@ async def commit_staged_filesystem_state(
"dirty_path_tool_calls": {_CLEAR: True},
}
# Emit one Receipt per committed mutation, folded into ``state['receipts']``
# via ``_list_append_reducer``. The receipts surface what actually committed
# (post-savepoint) rather than what the LLM intended; the orchestrator uses
# them as ground truth in the ``<verification>`` teaching. KB writes do not
# have public verifiable URLs, so ``verifiable_url`` stays unset.
# One Receipt per committed mutation: ground truth (post-savepoint) for the
# orchestrator's <verification> teaching. KB writes have no public URL.
receipts: list[Receipt] = []
def _kb_receipt(
@ -1439,8 +1344,6 @@ async def commit_staged_filesystem_state(
external_id=payload.get("id"),
)
for payload in applied_moves:
# ``applied_moves`` rows carry the destination ``virtualPath`` because
# the move has already landed in the DB by the time we reach this code.
path = str(payload.get("virtualPath") or "")
_kb_receipt(
type="file",
@ -1480,9 +1383,7 @@ async def commit_staged_filesystem_state(
if tree_changed:
delta["tree_version"] = int(state_dict.get("tree_version") or 0) + 1
# Avoid 'unused' lint when turn_id_for_revision was only useful for
# diagnostic purposes inside the SAVEPOINT chain above.
_ = turn_id_for_revision
_ = turn_id_for_revision # diagnostic-only; silence unused lint
logger.info(
"kb_persistence: commit (search_space=%s) creates=%d updates=%d "
@ -1536,9 +1437,33 @@ class KnowledgeBasePersistenceMiddleware(AgentMiddleware): # type: ignore[type-
search_space_id=self.search_space_id,
created_by_id=self.created_by_id,
filesystem_mode=self.filesystem_mode,
thread_id=self.thread_id,
thread_id=self._resolve_thread_id(),
)
def _resolve_thread_id(self) -> int | None:
"""Resolve the live thread id from the active ``RunnableConfig``.
``aafter_agent`` only receives a ``Runtime`` (which does NOT carry the
config), so we read ``configurable.thread_id`` via
:func:`langgraph.config.get_config` the same node-context pattern used
by ``BusyMutexMiddleware``. Resolving at runtime (rather than using the
value captured at ``__init__``) lets one cached compiled graph commit
staged writes against the correct thread across many chats. Falls back
to the constructor value for legacy/test runtimes.
"""
try:
config = get_config()
except Exception:
config = None
if isinstance(config, dict):
value = (config.get("configurable") or {}).get("thread_id")
if value is not None:
try:
return int(value)
except (TypeError, ValueError):
return None
return self.thread_id
__all__ = [
"KnowledgeBasePersistenceMiddleware",

View file

@ -4,8 +4,10 @@ from __future__ import annotations
from langchain_core.language_models import BaseChatModel
from app.agents.new_chat.filesystem_selection import FilesystemMode
from app.agents.new_chat.middleware import KnowledgePriorityMiddleware
from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
from app.agents.chat.multi_agent_chat.shared.middleware.knowledge_search import (
KnowledgePriorityMiddleware,
)
from app.services.llm_service import get_planner_llm
@ -17,7 +19,16 @@ def build_knowledge_priority_mw(
available_connectors: list[str] | None,
available_document_types: list[str] | None,
mentioned_document_ids: list[int] | None,
preinjection_enabled: bool = True,
) -> KnowledgePriorityMiddleware:
"""Build the KB priority middleware.
When ``preinjection_enabled`` is False (the lazy default), the middleware
runs in mentions-only mode: it skips the expensive planner LLM + embedding
+ hybrid search and only surfaces explicit @-mentions. The main agent is
expected to pull relevant KB content on demand via the
``search_knowledge_base`` tool instead.
"""
return KnowledgePriorityMiddleware(
llm=llm,
planner_llm=get_planner_llm(),
@ -27,4 +38,5 @@ def build_knowledge_priority_mw(
available_document_types=available_document_types,
mentioned_document_ids=mentioned_document_ids,
inject_system_message=False,
mentions_only=not preinjection_enabled,
)

View file

@ -0,0 +1,9 @@
"""Knowledge-tree middleware: <workspace_tree> injection, cloud only (impl + builder)."""
from .builder import build_knowledge_tree_mw
from .middleware import KnowledgeTreeMiddleware
__all__ = [
"KnowledgeTreeMiddleware",
"build_knowledge_tree_mw",
]

View file

@ -4,8 +4,9 @@ from __future__ import annotations
from langchain_core.language_models import BaseChatModel
from app.agents.new_chat.filesystem_selection import FilesystemMode
from app.agents.new_chat.middleware import KnowledgeTreeMiddleware
from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
from .middleware import KnowledgeTreeMiddleware
def build_knowledge_tree_mw(

View file

@ -33,9 +33,11 @@ from langchain_core.messages import SystemMessage
from langgraph.runtime import Runtime
from sqlalchemy import select
from app.agents.new_chat.filesystem_selection import FilesystemMode
from app.agents.new_chat.filesystem_state import SurfSenseFilesystemState
from app.agents.new_chat.path_resolver import (
from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
from app.agents.chat.multi_agent_chat.shared.state.filesystem_state import (
SurfSenseFilesystemState,
)
from app.agents.chat.runtime.path_resolver import (
DOCUMENTS_ROOT,
PathIndex,
build_path_index,

View file

@ -0,0 +1,5 @@
"""User/team memory injection middleware (main-agent only)."""
from .builder import build_memory_mw
__all__ = ["build_memory_mw"]

View file

@ -2,9 +2,10 @@
from __future__ import annotations
from app.agents.new_chat.middleware import MemoryInjectionMiddleware
from app.db import ChatVisibility
from .middleware import MemoryInjectionMiddleware
def build_memory_mw(
*,

View file

@ -0,0 +1,9 @@
"""Noop-injection middleware: provider-compat _noop tool (impl + builder)."""
from .builder import build_noop_injection_mw
from .middleware import NoopInjectionMiddleware
__all__ = [
"NoopInjectionMiddleware",
"build_noop_injection_mw",
]

View file

@ -2,10 +2,10 @@
from __future__ import annotations
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.middleware import NoopInjectionMiddleware
from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
from ..shared.flags import enabled
from .middleware import NoopInjectionMiddleware
def build_noop_injection_mw(flags: AgentFeatureFlags) -> NoopInjectionMiddleware | None:

View file

@ -0,0 +1,9 @@
"""OTel-span middleware: spans on model and tool calls (impl + builder)."""
from .builder import build_otel_mw
from .middleware import OtelSpanMiddleware
__all__ = [
"OtelSpanMiddleware",
"build_otel_mw",
]

View file

@ -2,10 +2,10 @@
from __future__ import annotations
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.middleware import OtelSpanMiddleware
from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
from ..shared.flags import enabled
from .middleware import OtelSpanMiddleware
def build_otel_mw(flags: AgentFeatureFlags) -> OtelSpanMiddleware | None:

View file

@ -24,6 +24,7 @@ from langchain.agents.middleware import AgentMiddleware
from langchain_core.messages import AIMessage, ToolMessage
from app.observability import metrics as ot_metrics, otel as ot
from app.utils.perf import get_perf_logger
if TYPE_CHECKING: # pragma: no cover — type-only
from langchain.agents.middleware.types import (
@ -34,6 +35,7 @@ if TYPE_CHECKING: # pragma: no cover — type-only
from langgraph.types import Command
logger = logging.getLogger(__name__)
_perf_log = get_perf_logger()
class OtelSpanMiddleware(AgentMiddleware):
@ -60,7 +62,23 @@ class OtelSpanMiddleware(AgentMiddleware):
handler: Callable[[ModelRequest], Awaitable[ModelResponse | AIMessage | Any]],
) -> ModelResponse | AIMessage | Any:
if not ot.is_enabled():
return await handler(request)
# Always emit a [PERF] line for the model step even when OTel is
# disabled. This isolates provider/model latency from the agent's
# pre-flight (before_agent KB-priority/memory/tree) work, which is
# the usual culprit when the multi-agent path feels slow to start.
# ``perf_counter`` at entry doubles as the "before_agent finished /
# model call started" marker on the first step of a turn.
model_id, _provider = _resolve_model_attrs(request)
_t0 = time.perf_counter()
_perf_log.info("[model_call] start model=%s", model_id)
try:
return await handler(request)
finally:
_perf_log.info(
"[model_call] done model=%s elapsed=%.3fs",
model_id,
time.perf_counter() - _t0,
)
model_id, provider = _resolve_model_attrs(request)
t0 = time.perf_counter()

View file

@ -7,15 +7,15 @@ from typing import Any
from langchain_core.language_models import BaseChatModel
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.plugin_loader import (
from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
from app.db import ChatVisibility
from ..plugins.loader import (
PluginContext,
load_allowed_plugin_names_from_env,
load_plugin_middlewares,
)
from app.db import ChatVisibility
from ..shared.flags import enabled
def build_plugin_middlewares(

View file

@ -6,14 +6,11 @@ import logging
from deepagents.middleware.skills import SkillsMiddleware
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.filesystem_selection import FilesystemMode
from app.agents.new_chat.middleware import (
build_skills_backend_factory,
default_skills_sources,
)
from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
from ..shared.flags import enabled
from ..skills.backends import build_skills_backend_factory, default_skills_sources
def build_skills_mw(

View file

@ -0,0 +1,314 @@
"""Main-agent middleware list assembly: one line per slot.
The main agent is a pure router filesystem reads/writes are owned by the
``knowledge_base`` subagent and delegated via the ``task`` tool. The stack
here only renders KB context (workspace tree + priority docs), projects it
into system messages, and commits any subagent-side staged writes at end of
turn (cloud mode).
"""
from __future__ import annotations
import logging
import time
from collections.abc import Sequence
from typing import Any, cast
from deepagents import SubAgent
from deepagents.backends import StateBackend
from langchain.agents import create_agent
from langchain_core.language_models import BaseChatModel
from langchain_core.runnables import Runnable
from langchain_core.tools import BaseTool
from langgraph.types import Checkpointer
from app.agents.chat.multi_agent_chat.main_agent.middleware.memory import (
build_memory_mw,
)
from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
from app.agents.chat.multi_agent_chat.shared.middleware.anthropic_cache import (
build_anthropic_cache_mw,
)
from app.agents.chat.multi_agent_chat.shared.middleware.compaction import (
build_compaction_mw,
)
from app.agents.chat.multi_agent_chat.shared.middleware.kb_context_projection import (
build_kb_context_projection_mw,
)
from app.agents.chat.multi_agent_chat.shared.middleware.patch_tool_calls import (
build_patch_tool_calls_mw,
)
from app.agents.chat.multi_agent_chat.shared.middleware.resilience import (
build_resilience_middlewares,
)
from app.agents.chat.multi_agent_chat.shared.middleware.todos import build_todos_mw
from app.agents.chat.multi_agent_chat.shared.permissions import (
build_permission_mw,
)
from app.agents.chat.multi_agent_chat.subagents import (
build_subagents,
get_subagents_to_exclude,
)
from app.agents.chat.multi_agent_chat.subagents.builtins.knowledge_base.agent import (
NAME as KB_WRITE_NAME,
READONLY_NAME as KB_READONLY_NAME,
build_readonly_subagent as build_kb_readonly_subagent,
build_subagent as build_kb_write_subagent,
)
from app.agents.chat.multi_agent_chat.subagents.builtins.knowledge_base.ask_knowledge_base_tool import (
build_ask_knowledge_base_tool,
)
from app.agents.chat.multi_agent_chat.subagents.builtins.knowledge_base.prompts import (
load_description as load_kb_write_description,
)
from app.agents.chat.multi_agent_chat.subagents.middleware_stack import (
build_subagent_middleware_stack,
)
from app.agents.chat.multi_agent_chat.subagents.shared.spec import (
SURF_LAZY_SPEC_FACTORY_KEY,
)
from app.db import ChatVisibility
from app.utils.perf import get_perf_logger
from .action_log import build_action_log_mw
from .anonymous_document import build_anonymous_doc_mw
from .busy_mutex import build_busy_mutex_mw
from .checkpointed_subagent_middleware import (
SurfSenseCheckpointedSubAgentMiddleware,
)
from .checkpointed_subagent_middleware.task_description import (
TASK_TOOL_DESCRIPTION,
)
from .context_editing import build_context_editing_mw
from .dedup_hitl import build_dedup_hitl_mw
from .doom_loop import build_doom_loop_mw
from .kb_persistence import build_kb_persistence_mw
from .knowledge_priority import build_knowledge_priority_mw
from .knowledge_tree import build_knowledge_tree_mw
from .noop_injection import build_noop_injection_mw
from .otel_span import build_otel_mw
from .plugins import build_plugin_middlewares
from .skills import build_skills_mw
from .tool_call_repair import build_repair_mw
_perf_log = get_perf_logger()
def build_main_agent_deepagent_middleware(
*,
llm: BaseChatModel,
tools: Sequence[BaseTool],
backend_resolver: Any,
filesystem_mode: FilesystemMode,
search_space_id: int,
user_id: str | None,
thread_id: int | None,
visibility: ChatVisibility,
anon_session_id: str | None,
available_connectors: list[str] | None,
available_document_types: list[str] | None,
mentioned_document_ids: list[int] | None,
max_input_tokens: int | None,
flags: AgentFeatureFlags,
subagent_dependencies: dict[str, Any],
checkpointer: Checkpointer,
mcp_tools_by_agent: dict[str, list[BaseTool]] | None = None,
disabled_tools: list[str] | None = None,
) -> list[Any]:
"""Ordered middleware for ``create_agent`` (None entries already stripped)."""
stack_build_start = time.perf_counter()
resilience = build_resilience_middlewares(flags)
memory_mw = build_memory_mw(
user_id=user_id,
search_space_id=search_space_id,
visibility=visibility,
)
subagent_dependencies = {
**subagent_dependencies,
"backend_resolver": backend_resolver,
"filesystem_mode": filesystem_mode,
"flags": flags,
}
shared_mw_start = time.perf_counter()
shared_subagent_middleware = build_subagent_middleware_stack(
resilience=resilience,
flags=flags,
)
shared_mw_elapsed = time.perf_counter() - shared_mw_start
def _compile_kb_readonly() -> Runnable:
"""Build *and* compile the read-only KB graph on first ``ask_knowledge_base`` use.
Both the spec build (``build_kb_readonly_subagent`` middleware +
tool-schema construction, ~the same cost as one regular subagent) and
the ``create_agent`` compile are deferred here (memoized by
``build_ask_knowledge_base_tool``) so neither is paid on the cold
agent-build / TTFT path; most first turns never call a subagent.
"""
build_start = time.perf_counter()
kb_readonly_spec = build_kb_readonly_subagent(
dependencies=subagent_dependencies,
model=llm,
middleware_stack=shared_subagent_middleware,
).spec
runnable = create_agent(
llm,
system_prompt=kb_readonly_spec["system_prompt"],
tools=kb_readonly_spec["tools"],
middleware=kb_readonly_spec["middleware"],
name=KB_READONLY_NAME,
checkpointer=checkpointer,
)
_perf_log.info(
"[subagent_compile_lazy] name=%s (spec+compile) in %.3fs",
KB_READONLY_NAME,
time.perf_counter() - build_start,
)
return runnable
ask_kb_tool = build_ask_knowledge_base_tool(_compile_kb_readonly)
def _build_kb_write_spec() -> dict[str, Any]:
"""Build the *write* knowledge_base subagent spec on first ``task`` use.
The KB filesystem middleware builds ~13 tool schemas at ~150ms each
(~2s total), all of which used to land on the cold agent-build / TTFT
path even though ``task("knowledge_base")`` is essentially never the
first thing a turn does. Deferring the whole spec build here (memoized
by the checkpointed subagent middleware) moves that cost to the first
actual KB-write delegation. Captures the same ``subagent_dependencies``
the eager build would have used, so cross-thread cache behaviour is
unchanged.
"""
spec = build_kb_write_subagent(
dependencies=subagent_dependencies,
model=llm,
middleware_stack=shared_subagent_middleware,
).spec
if disabled_tools:
disabled = frozenset(disabled_tools)
tools = spec.get("tools") # type: ignore[typeddict-item]
if isinstance(tools, list):
spec["tools"] = [ # type: ignore[typeddict-unknown-key]
t for t in tools if getattr(t, "name", None) not in disabled
]
return cast(dict[str, Any], spec)
subagents_start = time.perf_counter()
# The write knowledge_base subagent is excluded from the eager build and
# registered as a lazy descriptor (name + description cheap; spec built on
# first ``task("knowledge_base")`` use) — see ``_build_kb_write_spec``.
exclude_names = [*get_subagents_to_exclude(available_connectors), KB_WRITE_NAME]
subagents: list[SubAgent] = build_subagents(
dependencies=subagent_dependencies,
model=llm,
middleware_stack=shared_subagent_middleware,
mcp_tools_by_agent=mcp_tools_by_agent or {},
exclude=exclude_names,
disabled_tools=disabled_tools,
ask_kb_tool=ask_kb_tool,
)
kb_write_descriptor = cast(
SubAgent,
{
"name": KB_WRITE_NAME,
"description": load_kb_write_description(),
SURF_LAZY_SPEC_FACTORY_KEY: _build_kb_write_spec,
},
)
subagents.append(kb_write_descriptor)
subagents_elapsed = time.perf_counter() - subagents_start
logging.debug("Subagents registry: %s", [s["name"] for s in subagents])
assembly_start = time.perf_counter()
stack: list[Any] = [
build_busy_mutex_mw(flags),
build_otel_mw(flags),
build_todos_mw(system_prompt=""),
memory_mw,
build_anonymous_doc_mw(
filesystem_mode=filesystem_mode, anon_session_id=anon_session_id
),
build_knowledge_tree_mw(
filesystem_mode=filesystem_mode,
search_space_id=search_space_id,
llm=llm,
),
build_knowledge_priority_mw(
llm=llm,
search_space_id=search_space_id,
filesystem_mode=filesystem_mode,
available_connectors=available_connectors,
available_document_types=available_document_types,
mentioned_document_ids=mentioned_document_ids,
preinjection_enabled=flags.enable_kb_priority_preinjection,
),
build_kb_context_projection_mw(),
build_kb_persistence_mw(
filesystem_mode=filesystem_mode,
search_space_id=search_space_id,
user_id=user_id,
thread_id=thread_id,
),
build_skills_mw(
flags=flags,
filesystem_mode=filesystem_mode,
search_space_id=search_space_id,
),
SurfSenseCheckpointedSubAgentMiddleware(
checkpointer=checkpointer,
backend=StateBackend,
subagents=subagents,
system_prompt=None,
task_description=TASK_TOOL_DESCRIPTION,
search_space_id=search_space_id,
),
resilience.model_call_limit,
resilience.tool_call_limit,
build_context_editing_mw(
flags=flags,
max_input_tokens=max_input_tokens,
tools=tools,
backend_resolver=backend_resolver,
),
build_compaction_mw(llm),
build_noop_injection_mw(flags),
resilience.retry,
resilience.fallback,
build_repair_mw(flags=flags, tools=tools),
build_permission_mw(flags=flags),
build_doom_loop_mw(flags),
build_action_log_mw(
flags=flags,
thread_id=thread_id,
search_space_id=search_space_id,
user_id=user_id,
),
build_patch_tool_calls_mw(),
build_dedup_hitl_mw(tools),
*build_plugin_middlewares(
flags=flags,
search_space_id=search_space_id,
user_id=user_id,
visibility=visibility,
llm=llm,
),
build_anthropic_cache_mw(),
]
result = [m for m in stack if m is not None]
assembly_elapsed = time.perf_counter() - assembly_start
_perf_log.info(
"[stack_build] total=%.3fs shared_subagent_mw=%.3fs "
"build_subagents=%.3fs stack_assembly=%.3fs subagents=%d mw=%d "
"(kb_readonly deferred to first ask_knowledge_base)",
time.perf_counter() - stack_build_start,
shared_mw_elapsed,
subagents_elapsed,
assembly_elapsed,
len(subagents),
len(result),
)
return result

View file

@ -0,0 +1,9 @@
"""Tool-call-repair middleware: fix miscased/unknown tool names (impl + builder)."""
from .builder import build_repair_mw
from .middleware import ToolCallNameRepairMiddleware
__all__ = [
"ToolCallNameRepairMiddleware",
"build_repair_mw",
]

View file

@ -6,10 +6,10 @@ from collections.abc import Sequence
from langchain_core.tools import BaseTool
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.middleware import ToolCallNameRepairMiddleware
from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
from ..shared.flags import enabled
from .middleware import ToolCallNameRepairMiddleware
# deepagents-built-in tool names the repair pass treats as known.
_DEEPAGENT_BUILTIN_TOOL_NAMES: frozenset[str] = frozenset(

View file

@ -34,8 +34,6 @@ from langchain.agents.middleware.types import (
from langchain_core.messages import AIMessage
from langgraph.runtime import Runtime
from app.agents.new_chat.tools.invalid_tool import INVALID_TOOL_NAME
logger = logging.getLogger(__name__)
@ -120,6 +118,12 @@ class ToolCallNameRepairMiddleware(
return call
# Stage 2 — invalid fallback
# Local import keeps the middleware module import-light and avoids any
# tools <-> middleware import-order coupling at module scope.
from app.agents.chat.multi_agent_chat.main_agent.tools.invalid_tool import (
INVALID_TOOL_NAME,
)
if INVALID_TOOL_NAME in registered:
original_args = call.get("args") or {}
error_msg = (

View file

@ -17,7 +17,7 @@ Wire-up in ``pyproject.toml`` (illustrative; the in-repo plugin doesn't
need this -- it's already on the import path)::
[project.entry-points."surfsense.plugins"]
year_substituter = "app.agents.new_chat.plugins.year_substituter:make_middleware"
year_substituter = "app.agents.chat.multi_agent_chat.main_agent.plugins.year_substituter:make_middleware"
"""
from __future__ import annotations
@ -34,7 +34,7 @@ if TYPE_CHECKING: # pragma: no cover - type-only
from langchain_core.messages import ToolMessage
from langgraph.types import Command
from app.agents.new_chat.plugin_loader import PluginContext
from .loader import PluginContext
logger = logging.getLogger(__name__)

View file

@ -10,18 +10,18 @@ from langchain_core.language_models import BaseChatModel
from langchain_core.tools import BaseTool
from langgraph.types import Checkpointer
from app.agents.new_chat.agent_cache import (
from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
from app.db import ChatVisibility
from ..graph.compile_graph_sync import build_compiled_agent_graph_sync
from .agent_cache_store import (
flags_signature,
get_cache,
stable_hash,
system_prompt_hash,
tools_signature,
)
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.filesystem_selection import FilesystemMode
from app.db import ChatVisibility
from ..graph.compile_graph_sync import build_compiled_agent_graph_sync
def mcp_signature(mcp_tools_by_agent: dict[str, list[BaseTool]]) -> str:
@ -91,10 +91,18 @@ async def build_agent_with_cache(
# Every per-request value any middleware closes over at __init__ must be in
# the key, otherwise a hit will leak state across threads. Bump the schema
# version when the component list changes shape.
#
# Cross-thread reuse: when enabled, ``thread_id`` is dropped from the key so
# one compiled graph serves all of a user's (same space/config/visibility)
# chats. This is only safe because ActionLog, KB-persistence, and the
# deliverables tools now resolve the chat thread from the live
# RunnableConfig instead of a constructor closure; the schema tag is bumped
# so v2 (per-thread) entries are never confused with v3 (shared) ones.
cross_thread = flags.enable_cross_thread_agent_cache
cache_key = stable_hash(
"multi-agent-v2",
"multi-agent-v3" if cross_thread else "multi-agent-v2",
config_id,
thread_id,
None if cross_thread else thread_id,
user_id,
search_space_id,
visibility,

View file

@ -67,13 +67,13 @@ from __future__ import annotations
import asyncio
import hashlib
import logging
import os
import time
from collections import OrderedDict
from collections.abc import Awaitable, Callable
from dataclasses import dataclass
from typing import Any
from app.config import config
from app.utils.perf import get_perf_logger
logger = logging.getLogger(__name__)
@ -113,12 +113,11 @@ def tools_signature(
MCP tools loaded for the user changes, gating rules flip, etc.).
* The available connectors / document types for the search space
change (new connector added, last connector removed, new document
type indexed). Because :func:`get_connector_gated_tools` derives
``modified_disabled_tools`` from ``available_connectors``, the
tool surface is technically already covered but we hash the
connector list separately so an empty-list "no tools changed"
situation still rotates the key when, say, the user re-adds a
connector that gates a tool we were already not exposing.
type indexed). Connector gating derives disabled tools from
``available_connectors``, so the tool surface is technically already
covered but we hash the connector list separately so an empty-list
"no tools changed" situation still rotates the key when, say, the user
re-adds a connector that gates a tool we were already not exposing.
Stays stable across:
@ -329,8 +328,8 @@ def _short(key: str, n: int = 16) -> str:
# Module-level singleton
# ---------------------------------------------------------------------------
_DEFAULT_MAXSIZE = int(os.getenv("SURFSENSE_AGENT_CACHE_MAXSIZE", "256"))
_DEFAULT_TTL = float(os.getenv("SURFSENSE_AGENT_CACHE_TTL_SECONDS", "1800"))
_DEFAULT_MAXSIZE = config.AGENT_CACHE_MAXSIZE
_DEFAULT_TTL = config.AGENT_CACHE_TTL_SECONDS
_cache: _AgentCache = _AgentCache(maxsize=_DEFAULT_MAXSIZE, ttl_seconds=_DEFAULT_TTL)

View file

@ -0,0 +1,100 @@
"""Map configured connectors to the searchable document/connector types.
This is agent-agnostic infrastructure shared by every agent factory (single-
and multi-agent). It translates the connectors a search space has enabled into
the set of searchable type strings that pre-search middleware and ``web_search``
understand, and always layers in the document types that exist independently of
any connector (uploads, notes, extension captures, YouTube).
It lives in its own module rather than inside a specific agent factory so
that retiring or moving any single agent never disturbs the others' access to
this mapping.
"""
from __future__ import annotations
from typing import Any
# Maps SearchSourceConnectorType enum values to the searchable document/connector types
# used by pre-search middleware and web_search.
# Live search connectors (TAVILY_API, LINKUP_API, BAIDU_SEARCH_API) are routed to
# the web_search tool; all others are considered local/indexed data.
_CONNECTOR_TYPE_TO_SEARCHABLE: dict[str, str] = {
# Live search connectors (handled by web_search tool)
"TAVILY_API": "TAVILY_API",
"LINKUP_API": "LINKUP_API",
"BAIDU_SEARCH_API": "BAIDU_SEARCH_API",
# Local/indexed connectors (handled by KB pre-search middleware)
"SLACK_CONNECTOR": "SLACK_CONNECTOR",
"TEAMS_CONNECTOR": "TEAMS_CONNECTOR",
"NOTION_CONNECTOR": "NOTION_CONNECTOR",
"GITHUB_CONNECTOR": "GITHUB_CONNECTOR",
"LINEAR_CONNECTOR": "LINEAR_CONNECTOR",
"DISCORD_CONNECTOR": "DISCORD_CONNECTOR",
"JIRA_CONNECTOR": "JIRA_CONNECTOR",
"CONFLUENCE_CONNECTOR": "CONFLUENCE_CONNECTOR",
"CLICKUP_CONNECTOR": "CLICKUP_CONNECTOR",
"GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
"GOOGLE_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
"GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE", # Connector type differs from document type
"AIRTABLE_CONNECTOR": "AIRTABLE_CONNECTOR",
"LUMA_CONNECTOR": "LUMA_CONNECTOR",
"ELASTICSEARCH_CONNECTOR": "ELASTICSEARCH_CONNECTOR",
"WEBCRAWLER_CONNECTOR": "CRAWLED_URL", # Maps to document type
"BOOKSTACK_CONNECTOR": "BOOKSTACK_CONNECTOR",
"CIRCLEBACK_CONNECTOR": "CIRCLEBACK", # Connector type differs from document type
"OBSIDIAN_CONNECTOR": "OBSIDIAN_CONNECTOR",
"DROPBOX_CONNECTOR": "DROPBOX_FILE", # Connector type differs from document type
"ONEDRIVE_CONNECTOR": "ONEDRIVE_FILE", # Connector type differs from document type
# Composio connectors (unified to native document types).
# Reverse of NATIVE_TO_LEGACY_DOCTYPE in app.db.
"COMPOSIO_GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE",
"COMPOSIO_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
"COMPOSIO_GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
}
# Document types that don't come from SearchSourceConnector but should always be searchable
_ALWAYS_AVAILABLE_DOC_TYPES: list[str] = [
"EXTENSION", # Browser extension data
"FILE", # Uploaded files
"NOTE", # User notes
"YOUTUBE_VIDEO", # YouTube videos
]
def map_connectors_to_searchable_types(
connector_types: list[Any],
) -> list[str]:
"""
Map SearchSourceConnectorType enums to searchable document/connector types.
This function:
1. Converts connector type enums to their searchable counterparts
2. Includes always-available document types (EXTENSION, FILE, NOTE, YOUTUBE_VIDEO)
3. Deduplicates while preserving order
Args:
connector_types: List of SearchSourceConnectorType enum values
Returns:
List of searchable connector/document type strings
"""
result_set: set[str] = set()
result_list: list[str] = []
# Add always-available document types first
for doc_type in _ALWAYS_AVAILABLE_DOC_TYPES:
if doc_type not in result_set:
result_set.add(doc_type)
result_list.append(doc_type)
# Map each connector type to its searchable equivalent
for ct in connector_types:
# Handle both enum and string types
ct_str = ct.value if hasattr(ct, "value") else str(ct)
searchable = _CONNECTOR_TYPE_TO_SEARCHABLE.get(ct_str)
if searchable and searchable not in result_set:
result_set.add(searchable)
result_list.append(searchable)
return result_list

View file

@ -12,21 +12,28 @@ from langchain_core.tools import BaseTool
from langgraph.types import Checkpointer
from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.multi_agent_chat.subagents import (
from app.agents.chat.multi_agent_chat.shared.feature_flags import (
AgentFeatureFlags,
get_flags,
)
from app.agents.chat.multi_agent_chat.shared.filesystem_selection import (
FilesystemMode,
FilesystemSelection,
)
from app.agents.chat.multi_agent_chat.shared.middleware.filesystem.backends.resolver import (
build_backend_resolver,
)
from app.agents.chat.multi_agent_chat.subagents import (
get_subagents_to_exclude,
main_prompt_registry_subagent_lines,
)
from app.agents.multi_agent_chat.subagents.mcp_tools.index import (
from app.agents.chat.multi_agent_chat.subagents.mcp_tools.index import (
load_mcp_tools_by_connector,
)
from app.agents.new_chat.chat_deepagent import _map_connectors_to_searchable_types
from app.agents.new_chat.feature_flags import AgentFeatureFlags, get_flags
from app.agents.new_chat.filesystem_backends import build_backend_resolver
from app.agents.new_chat.filesystem_selection import FilesystemMode, FilesystemSelection
from app.agents.new_chat.llm_config import AgentConfig
from app.agents.new_chat.prompt_caching import apply_litellm_prompt_caching
from app.agents.new_chat.tools.invalid_tool import INVALID_TOOL_NAME, invalid_tool
from app.agents.new_chat.tools.registry import build_tools_async
from app.agents.chat.runtime.llm_config import AgentConfig
from app.agents.chat.runtime.prompt_caching import (
apply_litellm_prompt_caching,
)
from app.db import ChatVisibility
from app.services.connector_service import ConnectorService
from app.services.user_tool_allowlist import (
@ -40,7 +47,10 @@ from ..tools import (
MAIN_AGENT_SURFSENSE_TOOL_NAMES,
MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED,
)
from ..tools.invalid_tool import INVALID_TOOL_NAME, invalid_tool
from ..tools.registry import build_main_agent_tools
from .agent_cache import build_agent_with_cache
from .connector_searchable_types import map_connectors_to_searchable_types
_perf_log = get_perf_logger()
@ -90,7 +100,7 @@ async def create_multi_agent_chat_deep_agent(
connector_types = await connector_service.get_available_connectors(
search_space_id
)
available_connectors = _map_connectors_to_searchable_types(connector_types)
available_connectors = map_connectors_to_searchable_types(connector_types)
available_document_types = await connector_service.get_available_document_types(
search_space_id
@ -199,9 +209,6 @@ async def create_multi_agent_chat_deep_agent(
modified_disabled_tools = list(disabled_tools) if disabled_tools else []
if "search_knowledge_base" not in modified_disabled_tools:
modified_disabled_tools.append("search_knowledge_base")
if enabled_tools is not None:
main_agent_enabled_tools = [
n for n in enabled_tools if n in MAIN_AGENT_SURFSENSE_TOOL_NAMES
@ -210,12 +217,14 @@ async def create_multi_agent_chat_deep_agent(
main_agent_enabled_tools = list(MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED)
_t0 = time.perf_counter()
tools = await build_tools_async(
# Main agent builds only its own small SurfSense toolset via the SRP
# main-agent registry; connectors/MCP/deliverables are delegated to
# subagents, so no MCP loading or connector construction happens here.
tools = build_main_agent_tools(
dependencies=dependencies,
enabled_tools=main_agent_enabled_tools,
disabled_tools=modified_disabled_tools,
additional_tools=list(additional_tools) if additional_tools else None,
include_mcp_tools=False,
)
_flags: AgentFeatureFlags = get_flags()

View file

@ -16,7 +16,7 @@ prompt at agent build time, not edited at runtime.
Two backends are provided:
* :class:`BuiltinSkillsBackend` disk-backed read of bundled skills from
``app/agents/new_chat/skills/builtin/``.
``app/agents/shared/skills/builtin/``.
* :class:`SearchSpaceSkillsBackend` a thin read-only wrapper over
:class:`KBPostgresBackend` that filters notes under the privileged folder
``/documents/_skills/``.
@ -47,7 +47,9 @@ from deepagents.backends.state import StateBackend
if TYPE_CHECKING:
from langchain.tools import ToolRuntime
from app.agents.new_chat.middleware.kb_postgres_backend import KBPostgresBackend
from app.agents.chat.multi_agent_chat.shared.middleware.filesystem.backends.kb_postgres import (
KBPostgresBackend,
)
logger = logging.getLogger(__name__)
@ -59,9 +61,10 @@ _MAX_SKILL_FILE_SIZE = 10 * 1024 * 1024
def _default_builtin_root() -> Path:
"""Return the absolute path to the bundled builtin skills directory.
Located at ``app/agents/new_chat/skills/builtin/`` relative to this module.
Located at ``builtin/`` next to this module (this module lives at
``app/agents/multi_agent_chat/main_agent/skills/backends.py``).
"""
return (Path(__file__).resolve().parent.parent / "skills" / "builtin").resolve()
return (Path(__file__).resolve().parent / "builtin").resolve()
class BuiltinSkillsBackend(BackendProtocol):
@ -121,6 +124,8 @@ class BuiltinSkillsBackend(BackendProtocol):
else ("/" + str(target.relative_to(self.root)).replace("\\", "/"))
)
for child in sorted(target.iterdir()):
if child.name == "__pycache__" or child.name.startswith("."):
continue
child_virtual = (
target_virtual.rstrip("/") + "/" + child.name
if target_virtual != "/"
@ -305,7 +310,7 @@ def build_skills_backend_factory(
# Imported lazily to avoid a hard dependency at module import time:
# ``KBPostgresBackend`` pulls in DB models, which are unnecessary for
# the unit-tested builtin path.
from app.agents.new_chat.middleware.kb_postgres_backend import (
from app.agents.chat.multi_agent_chat.shared.middleware.filesystem.backends.kb_postgres import (
KBPostgresBackend,
)

Some files were not shown because too many files have changed in this diff Show more