From 242925d8e52d21941d72a279488fb36f6c32ce22 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Mon, 11 May 2026 12:31:15 +0530 Subject: [PATCH] chore: update Docker configurations to streamline backend build and enhance E2E testing environment --- docker/docker-compose.dev.yml | 11 ++++++++--- docker/docker-compose.e2e.yml | 4 ++++ surfsense_backend/Dockerfile | 4 ++++ surfsense_backend/scripts/docker/entrypoint.e2e.sh | 3 ++- 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml index bbe758d4f..b974f7e3d 100644 --- a/docker/docker-compose.dev.yml +++ b/docker/docker-compose.dev.yml @@ -10,6 +10,11 @@ name: surfsense-dev +x-backend-build: &backend-build + context: ../surfsense_backend + args: + EMBEDDING_MODEL: ${EMBEDDING_MODEL:-sentence-transformers/all-MiniLM-L6-v2} + services: db: image: pgvector/pgvector:pg17 @@ -69,7 +74,7 @@ services: retries: 5 backend: - build: ../surfsense_backend + build: *backend-build ports: - "${BACKEND_PORT:-8000}:8000" volumes: @@ -114,7 +119,7 @@ services: start_period: 200s celery_worker: - build: ../surfsense_backend + build: *backend-build volumes: - ../surfsense_backend/app:/app/app - shared_temp:/shared_tmp @@ -140,7 +145,7 @@ services: condition: service_healthy celery_beat: - build: ../surfsense_backend + build: *backend-build env_file: - ../surfsense_backend/.env environment: diff --git a/docker/docker-compose.e2e.yml b/docker/docker-compose.e2e.yml index 87b4e7261..a752262cb 100644 --- a/docker/docker-compose.e2e.yml +++ b/docker/docker-compose.e2e.yml @@ -54,6 +54,8 @@ x-backend-env: &backend-env HTTPS_PROXY: http://127.0.0.1:1 HTTP_PROXY: http://127.0.0.1:1 NO_PROXY: localhost,127.0.0.1,0.0.0.0,db,redis,host.docker.internal + HF_HUB_OFFLINE: "1" + TRANSFORMERS_OFFLINE: "1" services: db: @@ -95,6 +97,8 @@ services: # tests/ is excluded from the main context by .dockerignore; # the e2e stage's `COPY --from=tests-source` pulls it in here. tests-source: ../surfsense_backend/tests + args: + EMBEDDING_MODEL: sentence-transformers/all-MiniLM-L6-v2 cache_from: - type=gha,scope=surfsense-e2e-backend cache_to: diff --git a/surfsense_backend/Dockerfile b/surfsense_backend/Dockerfile index 93a923ea3..6e1b2481e 100644 --- a/surfsense_backend/Dockerfile +++ b/surfsense_backend/Dockerfile @@ -93,6 +93,9 @@ RUN printf '%s\n' \ ' pass' \ | python || true +ARG EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2 +RUN python -c "from chonkie import AutoEmbeddings; AutoEmbeddings.get_embeddings('${EMBEDDING_MODEL}')" + # Install Playwright browsers (the playwright python package itself is in deps) RUN playwright install chromium --with-deps @@ -104,6 +107,7 @@ RUN mkdir -p /shared_tmp ENV PYTHONPATH=/app ENV UVICORN_LOOP=asyncio ENV TMPDIR=/shared_tmp +ENV PYTHONUNBUFFERED=1 # Tune glibc malloc to return freed memory to the OS more aggressively. # Without these, Python's gc.collect() frees objects but the underlying diff --git a/surfsense_backend/scripts/docker/entrypoint.e2e.sh b/surfsense_backend/scripts/docker/entrypoint.e2e.sh index 84cfe2568..b44e1ee95 100755 --- a/surfsense_backend/scripts/docker/entrypoint.e2e.sh +++ b/surfsense_backend/scripts/docker/entrypoint.e2e.sh @@ -19,7 +19,8 @@ wait_for_db() { # depends_on/healthchecks already gate on db readiness, this is just # belt-and-suspenders so a slow first connection doesn't race migrations. for i in {1..60}; do - if python -c "from app.db import engine; import asyncio; asyncio.run(engine.dispose())" 2>/dev/null; then + echo "[e2e-entrypoint] db check attempt ${i}/60" + if python -c "from app.db import engine; import asyncio; asyncio.run(engine.dispose())"; then echo "[e2e-entrypoint] db reachable after ${i} attempts" return 0 fi