feat(docker): enhance multi-architecture and CUDA support in Docker workflows

- Added support for multiple image variants (CPU, CUDA 12.8, CUDA 12.6) in the Docker build workflow.
- Updated Docker Compose configuration to utilize the new SURFSENSE_VARIANT environment variable for selecting image variants.
- Enhanced documentation to clarify usage of backend image variants and their corresponding environment variable settings.
This commit is contained in:
Anish Sarkar 2026-06-05 23:39:42 +05:30
parent 3259e41d6b
commit c1ee5fabf9
4 changed files with 87 additions and 17 deletions

View file

@ -97,6 +97,12 @@ jobs:
matrix:
platform: [linux/amd64, linux/arm64]
image: [backend, web]
variant: [cpu, cuda, cuda126]
exclude:
- image: web
variant: cuda
- image: web
variant: cuda126
include:
- platform: linux/amd64
suffix: amd64
@ -114,6 +120,18 @@ jobs:
context: ./surfsense_web
file: ./surfsense_web/Dockerfile
target: runner
- variant: cpu
tag_suffix: ""
use_cuda: "false"
cuda_extra: cpu
- variant: cuda
tag_suffix: "-cuda"
use_cuda: "true"
cuda_extra: cu128
- variant: cuda126
tag_suffix: "-cuda126"
use_cuda: "true"
cuda_extra: cu126
env:
REGISTRY_IMAGE: ghcr.io/${{ github.repository_owner }}/${{ matrix.name }}
@ -149,7 +167,7 @@ jobs:
sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
docker system prune -af
- name: Build and push by digest ${{ matrix.name }} (${{ matrix.suffix }})
- name: Build and push by digest ${{ matrix.name }} (${{ matrix.variant }}, ${{ matrix.suffix }})
id: build
uses: docker/build-push-action@v7
with:
@ -160,10 +178,12 @@ jobs:
tags: ${{ steps.image.outputs.name }}
outputs: type=image,push-by-digest=true,name-canonical=true,push=true
platforms: ${{ matrix.platform }}
cache-from: type=gha,scope=${{ matrix.image }}-${{ matrix.suffix }}
cache-to: type=gha,mode=max,scope=${{ matrix.image }}-${{ matrix.suffix }}
cache-from: type=registry,ref=${{ steps.image.outputs.name }}:buildcache-${{ matrix.variant }}-${{ matrix.suffix }}
cache-to: type=registry,ref=${{ steps.image.outputs.name }}:buildcache-${{ matrix.variant }}-${{ matrix.suffix }},mode=max,image-manifest=true,oci-mediatypes=true
provenance: false
build-args: |
${{ matrix.image == 'backend' && format('USE_CUDA={0}', matrix.use_cuda) || '' }}
${{ matrix.image == 'backend' && format('CUDA_EXTRA={0}', matrix.cuda_extra) || '' }}
${{ matrix.image == 'web' && 'NEXT_PUBLIC_FASTAPI_BACKEND_URL=__NEXT_PUBLIC_FASTAPI_BACKEND_URL__' || '' }}
${{ matrix.image == 'web' && 'NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=__NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE__' || '' }}
${{ matrix.image == 'web' && 'NEXT_PUBLIC_ETL_SERVICE=__NEXT_PUBLIC_ETL_SERVICE__' || '' }}
@ -179,7 +199,7 @@ jobs:
- name: Upload digest
uses: actions/upload-artifact@v7
with:
name: digests-${{ matrix.image }}-${{ matrix.suffix }}
name: digests-${{ matrix.image }}-${{ matrix.variant }}-${{ matrix.suffix }}
path: /tmp/digests/*
if-no-files-found: error
retention-days: 1
@ -187,7 +207,7 @@ jobs:
create_manifest:
runs-on: ubuntu-latest
needs: [tag_release, build]
if: always() && needs.build.result == 'success'
if: ${{ !cancelled() }}
permissions:
packages: write
contents: read
@ -197,8 +217,20 @@ jobs:
include:
- name: surfsense-backend
image: backend
variant: cpu
tag_suffix: ""
- name: surfsense-backend
image: backend
variant: cuda
tag_suffix: "-cuda"
- name: surfsense-backend
image: backend
variant: cuda126
tag_suffix: "-cuda126"
- name: surfsense-web
image: web
variant: cpu
tag_suffix: ""
env:
REGISTRY_IMAGE: ghcr.io/${{ github.repository_owner }}/${{ matrix.name }}
@ -207,22 +239,33 @@ jobs:
id: image
run: echo "name=${REGISTRY_IMAGE,,}" >> $GITHUB_OUTPUT
- name: Download amd64 digest
- name: Download digests
id: download
uses: actions/download-artifact@v8
with:
name: digests-${{ matrix.image }}-amd64
pattern: digests-${{ matrix.image }}-${{ matrix.variant }}-*
path: /tmp/digests
merge-multiple: true
continue-on-error: true
- name: Download arm64 digest
uses: actions/download-artifact@v8
with:
name: digests-${{ matrix.image }}-arm64
path: /tmp/digests
- name: Check digests
id: check
run: |
count=$(find /tmp/digests -type f 2>/dev/null | wc -l | tr -d ' ')
echo "digest_count=$count" >> $GITHUB_OUTPUT
if [ "$count" -lt 2 ]; then
echo "::warning::${{ matrix.variant }}: $count/2 digests, skipping merge"
echo "skip=true" >> $GITHUB_OUTPUT
else
echo "skip=false" >> $GITHUB_OUTPUT
fi
- name: Set up Docker Buildx
if: steps.check.outputs.skip != 'true'
uses: docker/setup-buildx-action@v4
- name: Login to GitHub Container Registry
if: steps.check.outputs.skip != 'true'
uses: docker/login-action@v4
with:
registry: ghcr.io
@ -230,6 +273,7 @@ jobs:
password: ${{ secrets.GITHUB_TOKEN }}
- name: Compute app version
if: steps.check.outputs.skip != 'true'
id: appver
run: |
VERSION_TAG="${{ needs.tag_release.outputs.new_tag }}"
@ -241,6 +285,7 @@ jobs:
echo "app_version=$APP_VERSION" >> $GITHUB_OUTPUT
- name: Docker meta
if: steps.check.outputs.skip != 'true'
id: meta
uses: docker/metadata-action@v6
with:
@ -252,18 +297,22 @@ jobs:
type=sha,prefix=git-
flavor: |
latest=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event.inputs.branch == github.event.repository.default_branch }}
${{ matrix.tag_suffix != '' && format('suffix={0},onlatest=true', matrix.tag_suffix) || '' }}
- name: Create manifest list and push
if: steps.check.outputs.skip != 'true'
working-directory: /tmp/digests
run: |
docker buildx imagetools create \
$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
$(printf '${{ steps.image.outputs.name }}@sha256:%s ' *)
- name: Inspect image
if: steps.check.outputs.skip != 'true'
run: |
docker buildx imagetools inspect ${{ steps.image.outputs.name }}:${{ steps.meta.outputs.version }}
- name: Summary
if: steps.check.outputs.skip != 'true'
run: |
echo "Multi-arch manifest created for ${{ matrix.name }}!"
echo "Tags: $(jq -cr '.tags | join(", ")' <<< "$DOCKER_METADATA_OUTPUT_JSON")"

View file

@ -7,6 +7,10 @@
# SurfSense version (use "latest" or a specific version like "0.0.14")
SURFSENSE_VERSION=latest
# Image variant: empty = CPU (default), "cuda" = CUDA 12.8, "cuda126" = CUDA 12.6.
# NOTE: this only selects the GPU-built image. GPU device access lands in Phase 3.
SURFSENSE_VARIANT=
# Deployment environment: dev or production
SURFSENSE_ENV=production

View file

@ -34,7 +34,7 @@ services:
# migration halts the whole stack instead of silently producing a half-built
# system that crash-loops zero-cache on missing publications.
migrations:
image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}
image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}${SURFSENSE_VARIANT:+-${SURFSENSE_VARIANT}}
env_file:
- .env
environment:
@ -98,7 +98,7 @@ services:
retries: 5
backend:
image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}
image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}${SURFSENSE_VARIANT:+-${SURFSENSE_VARIANT}}
ports:
- "${BACKEND_PORT:-8929}:8000"
volumes:
@ -165,7 +165,7 @@ services:
# retries: 5
celery_worker:
image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}
image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}${SURFSENSE_VARIANT:+-${SURFSENSE_VARIANT}}
volumes:
- shared_temp:/shared_tmp
env_file:
@ -195,7 +195,7 @@ services:
restart: unless-stopped
celery_beat:
image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}
image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}${SURFSENSE_VARIANT:+-${SURFSENSE_VARIANT}}
env_file:
- .env
environment:
@ -219,7 +219,7 @@ services:
restart: unless-stopped
# flower:
# image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}
# image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}${SURFSENSE_VARIANT:+-${SURFSENSE_VARIANT}}
# ports:
# - "${FLOWER_PORT:-5555}:5555"
# env_file:

View file

@ -35,6 +35,7 @@ All configuration lives in a single `docker/.env` file (or `surfsense/.env` if y
| Variable | Description | Default |
|----------|-------------|---------|
| `SURFSENSE_VERSION` | Image tag to deploy. Use `latest`, a clean version (e.g. `0.0.14`), or a specific build (e.g. `0.0.14.1`) | `latest` |
| `SURFSENSE_VARIANT` | Backend image variant. Leave empty for CPU, set `cuda` for CUDA 12.8, or `cuda126` for CUDA 12.6. | *(empty)* |
| `AUTH_TYPE` | Authentication method: `LOCAL` (email/password) or `GOOGLE` (OAuth) | `LOCAL` |
| `ETL_SERVICE` | Document parsing: `DOCLING` (local), `UNSTRUCTURED`, or `LLAMACLOUD` | `DOCLING` |
| `EMBEDDING_MODEL` | Embedding model for vector search | `sentence-transformers/all-MiniLM-L6-v2` |
@ -42,6 +43,22 @@ All configuration lives in a single `docker/.env` file (or `surfsense/.env` if y
| `STT_SERVICE` | Speech-to-text provider for audio files | `local/base` |
| `REGISTRATION_ENABLED` | Allow new user registrations | `TRUE` |
### Image Variants
SurfSense publishes CPU and CUDA backend image variants. The frontend image is not variant-specific.
| Backend tag | Use case | `SURFSENSE_VARIANT` |
|-------------|----------|---------------------|
| `:latest` | CPU-only default | *(empty)* |
| `:latest-cuda` | NVIDIA CUDA 12.8 backend image | `cuda` |
| `:latest-cuda126` | NVIDIA CUDA 12.6 backend image for older driver stacks | `cuda126` |
All backend variants are published for `linux/amd64` and `linux/arm64`. CUDA on `linux/arm64` is best-effort.
<Callout type="warn">
`SURFSENSE_VARIANT` only selects the GPU-built backend image. GPU device access is added separately in Phase 3.
</Callout>
### Ports
| Variable | Description | Default |