Merge remote-tracking branch 'upstream/dev' into improvement-agent-speed

2026-07-10 22:32:16 +02:00 · 2026-05-20 19:22:49 +02:00 · 2026-05-20 19:22:49 +02:00 · d5ee8cc4cd
commit d5ee8cc4cd
parent 2be3f04df5 f5f2456dfd
287 changed files with 7551 additions and 6195 deletions
--- a/surfsense_backend/Dockerfile
+++ b/surfsense_backend/Dockerfile
@ -167,10 +167,14 @@ COPY scripts/docker/entrypoint.sh /app/scripts/docker/entrypoint.sh
 RUN dos2unix /app/scripts/docker/entrypoint.sh && chmod +x /app/scripts/docker/entrypoint.sh

 # SERVICE_ROLE controls which process this container runs:
-#   api     – FastAPI backend only (runs migrations on startup)
+#   migrate – Run alembic upgrade head, verify zero_publication exists, exit 0.
+#             Used by the dedicated `migrations` service in docker-compose.yml
+#             so downstream services gate on `service_completed_successfully`.
+#   api     – FastAPI backend only (does NOT run migrations)
 #   worker  – Celery worker only
 #   beat    – Celery beat scheduler only
-#   all     – All three (legacy / dev default)
+#   all     – migrations + api + worker + beat (legacy / dev default;
+#             fails fast on migration error)
 ENV SERVICE_ROLE=all

 # Celery worker tuning (only used when SERVICE_ROLE=worker or all)
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/search_surfsense_docs.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/search_surfsense_docs.py
@ -9,6 +9,7 @@ from sqlalchemy.ext.asyncio import AsyncSession

 from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument
 from app.utils.document_converters import embed_text
+from app.utils.surfsense_docs import surfsense_docs_public_url


 def format_surfsense_docs_results(results: list[tuple]) -> str:
@ -19,13 +20,14 @@ def format_surfsense_docs_results(results: list[tuple]) -> str:
    # Group chunks by document
    grouped: dict[int, dict] = {}
    for chunk, doc in results:
+        public_url = surfsense_docs_public_url(doc.source)
        if doc.id not in grouped:
            grouped[doc.id] = {
                "document_id": f"doc-{doc.id}",
                "document_type": "SURFSENSE_DOCS",
                "title": doc.title,
-                "url": doc.source,
-                "metadata": {"source": doc.source},
+                "url": public_url,
+                "metadata": {"source": doc.source, "public_url": public_url},
                "chunks": [],
            }
        grouped[doc.id]["chunks"].append(
--- a/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py
+++ b/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py
@ -17,6 +17,7 @@ from sqlalchemy.ext.asyncio import AsyncSession

 from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument, async_session_maker
 from app.utils.document_converters import embed_text
+from app.utils.surfsense_docs import surfsense_docs_public_url


 def format_surfsense_docs_results(results: list[tuple]) -> str:
@ -40,13 +41,14 @@ def format_surfsense_docs_results(results: list[tuple]) -> str:
    # Group chunks by document
    grouped: dict[int, dict] = {}
    for chunk, doc in results:
+        public_url = surfsense_docs_public_url(doc.source)
        if doc.id not in grouped:
            grouped[doc.id] = {
                "document_id": f"doc-{doc.id}",
                "document_type": "SURFSENSE_DOCS",
                "title": doc.title,
-                "url": doc.source,
-                "metadata": {"source": doc.source},
+                "url": public_url,
+                "metadata": {"source": doc.source, "public_url": public_url},
                "chunks": [],
            }
        grouped[doc.id]["chunks"].append(
--- a/surfsense_backend/app/app.py
+++ b/surfsense_backend/app/app.py
@ -945,6 +945,36 @@ async def health_check():
    return {"status": "ok"}


+@app.get("/ready", tags=["health"])
+@limiter.exempt
+async def readiness_check():
+    """Readiness probe.
+
+    Verifies that the schema state required by downstream services is
+    present. Specifically checks that the ``zero_publication`` Postgres
+    logical-replication publication exists; without it zero-cache crash-loops
+    on `Unknown or invalid publications`.
+
+    Returns 200 when ready, 503 otherwise. Used by the docker-compose
+    backend healthcheck and by ``install.ps1`` / ``install.sh`` post-up
+    verification.
+    """
+    from sqlalchemy import text
+
+    from app.db import async_session_maker
+
+    async with async_session_maker() as session:
+        result = await session.execute(
+            text("SELECT 1 FROM pg_publication WHERE pubname = 'zero_publication'")
+        )
+        if result.first() is None:
+            raise HTTPException(
+                status_code=503,
+                detail="zero_publication missing; run alembic upgrade head",
+            )
+    return {"status": "ready"}
+
+
@app.get("/verify-token")
 async def authenticated_route(
    user: User = Depends(current_active_user),
--- a/surfsense_backend/app/routes/surfsense_docs_routes.py
+++ b/surfsense_backend/app/routes/surfsense_docs_routes.py
@ -24,6 +24,7 @@ from app.schemas.surfsense_docs import (
    SurfsenseDocsDocumentWithChunksRead,
 )
 from app.users import current_active_user
+from app.utils.surfsense_docs import surfsense_docs_public_url

 router = APIRouter()

@ -76,6 +77,7 @@ async def get_surfsense_doc_by_chunk_id(
            id=document.id,
            title=document.title,
            source=document.source,
+            public_url=surfsense_docs_public_url(document.source),
            content=document.content,
            chunks=[
                SurfsenseDocsChunkRead(id=c.id, content=c.content)
@ -146,6 +148,7 @@ async def list_surfsense_docs(
                id=doc.id,
                title=doc.title,
                source=doc.source,
+                public_url=surfsense_docs_public_url(doc.source),
                content=doc.content,
                created_at=doc.created_at,
                updated_at=doc.updated_at,
--- a/surfsense_backend/app/schemas/surfsense_docs.py
+++ b/surfsense_backend/app/schemas/surfsense_docs.py
@ -22,6 +22,7 @@ class SurfsenseDocsDocumentRead(BaseModel):
    id: int
    title: str
    source: str
+    public_url: str
    content: str
    created_at: datetime | None = None
    updated_at: datetime | None = None
@ -35,6 +36,7 @@ class SurfsenseDocsDocumentWithChunksRead(BaseModel):
    id: int
    title: str
    source: str
+    public_url: str
    content: str
    chunks: list[SurfsenseDocsChunkRead]

--- a/surfsense_backend/app/tasks/chat/stream_new_chat.py
+++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py
@ -79,6 +79,7 @@ from app.tasks.chat.streaming.helpers.interrupt_inspector import (
 )
 from app.utils.content_utils import bootstrap_history_from_db
 from app.utils.perf import get_perf_logger, log_system_snapshot, trim_native_heap
+from app.utils.surfsense_docs import surfsense_docs_public_url
 from app.utils.user_message_multimodal import build_human_message_content

 _background_tasks: set[asyncio.Task] = set()
@ -214,14 +215,17 @@ def format_mentioned_surfsense_docs_as_context(
    )

    for doc in documents:
-        metadata_json = json.dumps({"source": doc.source}, ensure_ascii=False)
+        public_url = surfsense_docs_public_url(doc.source)
+        metadata_json = json.dumps(
+            {"source": doc.source, "public_url": public_url}, ensure_ascii=False
+        )

        context_parts.append("<document>")
        context_parts.append("<document_metadata>")
        context_parts.append(f"  <document_id>doc-{doc.id}</document_id>")
        context_parts.append("  <document_type>SURFSENSE_DOCS</document_type>")
        context_parts.append(f"  <title><![CDATA[{doc.title}]]></title>")
-        context_parts.append(f"  <url><![CDATA[{doc.source}]]></url>")
+        context_parts.append(f"  <url><![CDATA[{public_url}]]></url>")
        context_parts.append(
            f"  <metadata_json><![CDATA[{metadata_json}]]></metadata_json>"
        )
--- a/surfsense_backend/app/utils/surfsense_docs.py
+++ b/surfsense_backend/app/utils/surfsense_docs.py
@ -0,0 +1,13 @@
+"""Utilities for SurfSense's built-in documentation index."""
+
+from pathlib import PurePosixPath
+
+DOCS_PUBLIC_ROOT = PurePosixPath("/docs")
+
+
+def surfsense_docs_public_url(source: str) -> str:
+    """Return the public docs route for an indexed documentation source path."""
+    docs_path = PurePosixPath(source).with_suffix("")
+    if docs_path.name == "index":
+        docs_path = docs_path.parent
+    return (DOCS_PUBLIC_ROOT / docs_path).as_posix()
--- a/surfsense_backend/pyproject.toml
+++ b/surfsense_backend/pyproject.toml
@ -1,6 +1,6 @@
 [project]
 name = "surf-new-backend"
-version = "0.0.23"
+version = "0.0.24"
 description = "SurfSense Backend"
 requires-python = ">=3.12"
 dependencies = [
--- a/surfsense_backend/scripts/docker/entrypoint.sh
+++ b/surfsense_backend/scripts/docker/entrypoint.sh
@ -4,10 +4,15 @@ set -e
 # ─────────────────────────────────────────────────────────────
 # SERVICE_ROLE controls which process(es) this container runs.
 #
-#   api     – FastAPI backend only  (runs migrations on startup)
+#   migrate – Run `alembic upgrade head`, verify zero_publication,
+#             then exit 0. Used by the dedicated `migrations` service
+#             in docker-compose.yml so downstream services can gate
+#             on `condition: service_completed_successfully`.
+#   api     – FastAPI backend only (does NOT run migrations)
 #   worker  – Celery worker only
 #   beat    – Celery beat scheduler only
-#   all     – All three in one container (legacy / dev default)
+#   all     – migrations + api + worker + beat in one container
+#             (legacy / dev default; fails fast on migration error)
 #
 # Set SERVICE_ROLE as an environment variable in Coolify for
 # each service deployment.
@ -41,7 +46,13 @@ cleanup() {

 trap cleanup SIGTERM SIGINT

-# ── Database migrations (only for api / all) ─────────────────
+# ── Database migrations (only for migrate / all) ─────────────
+# Fail-fast contract:
+#   - alembic upgrade head must succeed within ${MIGRATION_TIMEOUT:-900}s
+#   - zero_publication must exist in pg_publication afterwards
+# Either failure exits non-zero so the dedicated `migrations` compose
+# service exits non-zero, halting the rest of the stack instead of
+# silently producing a half-built system that crash-loops zero-cache.
 run_migrations() {
    echo "Running database migrations..."
    for i in {1..30}; do
@ -53,11 +64,66 @@ run_migrations() {
        sleep 1
    done

-    if timeout 300 alembic upgrade head 2>&1; then
-        echo "Migrations completed successfully."
-    else
-        echo "WARNING: Migration failed or timed out. Continuing anyway..."
-        echo "You may need to run migrations manually: alembic upgrade head"
+    local timeout_secs="${MIGRATION_TIMEOUT:-900}"
+    echo "Running alembic upgrade head (timeout=${timeout_secs}s)..."
+    if ! timeout "${timeout_secs}" alembic upgrade head; then
+        echo "ERROR: alembic upgrade head failed (or exceeded ${timeout_secs}s timeout)." >&2
+        echo "Refusing to start. Inspect the error above and re-run." >&2
+        exit 1
+    fi
+    echo "Migrations completed successfully."
+
+    echo "Verifying zero_publication exists in Postgres..."
+    local pub_oid
+    pub_oid=$(python <<'PY' 2>/dev/null || true
+import asyncio
+import sys
+from sqlalchemy import text
+from app.db import engine
+
+
+async def get_oid():
+    async with engine.connect() as conn:
+        result = await conn.execute(
+            text("SELECT oid FROM pg_publication WHERE pubname = 'zero_publication'")
+        )
+        row = result.first()
+        if row is None:
+            sys.exit(1)
+        print(int(row[0]))
+
+
+asyncio.run(get_oid())
+PY
+)
+    if [ -z "${pub_oid}" ]; then
+        echo "ERROR: zero_publication is missing from Postgres after running alembic." >&2
+        echo "This usually means migration 116 (or a later publication migration) did not run." >&2
+        echo "Inspect alembic state with:" >&2
+        echo "  docker compose exec db psql -U \"\$DB_USER\" -d \"\$DB_NAME\" -c 'SELECT * FROM alembic_version;'" >&2
+        exit 1
+    fi
+    echo "zero_publication verified (oid=${pub_oid})."
+
+    # Stale-replica safety net: if /zero-init is mounted (i.e. we are the
+    # dedicated `migrations` compose service), drop a marker file when the
+    # publication oid changed (or on first run) so the wrapped zero-cache
+    # entrypoint can wipe /data/zero.db before starting. This recovers from
+    # the case where a previous zero-cache crashed mid-init and left a
+    # half-built SQLite replica without a `_zero.tableMetadata` table.
+    if [ -d /zero-init ]; then
+        local stored_oid=""
+        [ -f /zero-init/last_pub_oid ] && stored_oid=$(cat /zero-init/last_pub_oid 2>/dev/null || true)
+        if [ -z "${stored_oid}" ] || [ "${stored_oid}" != "${pub_oid}" ]; then
+            echo "Publication oid changed (stored=${stored_oid:-<none>}, current=${pub_oid}); writing /zero-init/needs_reset."
+            : > /zero-init/needs_reset
+            chmod 666 /zero-init/needs_reset 2>/dev/null || true
+        fi
+        echo "${pub_oid}" > /zero-init/last_pub_oid
+        chmod 666 /zero-init/last_pub_oid 2>/dev/null || true
+        # World-writable dir so the (possibly non-root) zero-cache container
+        # can `rm -f /zero-init/needs_reset` after acting on the marker.
+        chmod 777 /zero-init 2>/dev/null || true
    fi
 }

@ -102,8 +168,12 @@ start_beat() {

 # ── Main: run based on role ──────────────────────────────────
 case "${SERVICE_ROLE}" in
-    api)
+    migrate)
        run_migrations
+        echo "Migrations complete; exiting cleanly."
+        exit 0
+        ;;
+    api)
        start_api
        ;;
    worker)
@ -121,7 +191,7 @@ case "${SERVICE_ROLE}" in
        start_beat
        ;;
    *)
-        echo "ERROR: Unknown SERVICE_ROLE '${SERVICE_ROLE}'. Use: api, worker, beat, or all"
+        echo "ERROR: Unknown SERVICE_ROLE '${SERVICE_ROLE}'. Use: migrate, api, worker, beat, or all"
        exit 1
        ;;
 esac
--- a/surfsense_backend/uv.lock
+++ b/surfsense_backend/uv.lock
@ -7947,7 +7947,7 @@ wheels = [

 [[package]]
 name = "surf-new-backend"
-version = "0.0.23"
+version = "0.0.24"
 source = { editable = "." }
 dependencies = [
    { name = "alembic" },