feat: moved chat persistance to Server Side

2026-05-07 23:02:39 +02:00 · 2026-05-04 03:06:15 -07:00 · 2026-05-04 03:06:15 -07:00 · 19b6e0a025
commit 19b6e0a025
parent 2e1b9b5582
19 changed files with 4515 additions and 390 deletions
--- a/surfsense_backend/alembic/versions/141_unique_chat_message_turn_role.py
+++ b/surfsense_backend/alembic/versions/141_unique_chat_message_turn_role.py
@ -0,0 +1,66 @@
+"""141_unique_chat_message_turn_role
+
+Revision ID: 141
+Revises: 140
+Create Date: 2026-05-04
+
+Add a partial unique index on ``new_chat_messages(thread_id, turn_id, role)``
+where ``turn_id IS NOT NULL``.
+
+Why
+---
+The streaming chat path (`stream_new_chat` / `stream_resume_chat`) is being
+moved to write its own ``new_chat_messages`` rows server-side instead of
+relying on the frontend's later ``POST /threads/{id}/messages`` call. This
+closes the "ghost-thread" abuse vector where authenticated callers got free
+LLM completions while ``new_chat_messages`` stayed empty.
+
+For server-side and legacy frontend writes to coexist we need an idempotency
+key. The natural triple is ``(thread_id, turn_id, role)``: the server issues
+exactly one ``turn_id`` per turn, and a turn produces at most one user
+message and one assistant message. Whichever side wins the race writes the
+row; the loser hits ``IntegrityError`` and recovers gracefully.
+
+Partial — ``WHERE turn_id IS NOT NULL`` — so:
+
+  * Legacy rows that predate the ``turn_id`` column (migration 136) keep
+    co-existing without de-dup.
+  * Clone / snapshot inserts in
+    ``app/services/public_chat_service.py`` that build ``NewChatMessage``
+    without ``turn_id`` are unaffected (multiple snapshot copies of the same
+    user/assistant pair are intentional).
+
+This index coexists with the existing single-column ``ix_new_chat_messages_turn_id``
+from migration 136 — no collision.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+
+from alembic import op
+
+revision: str = "141"
+down_revision: str | None = "140"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+INDEX_NAME = "uq_new_chat_messages_thread_turn_role"
+TABLE_NAME = "new_chat_messages"
+
+
+def upgrade() -> None:
+    op.create_index(
+        INDEX_NAME,
+        TABLE_NAME,
+        ["thread_id", "turn_id", "role"],
+        unique=True,
+        postgresql_where=sa.text("turn_id IS NOT NULL"),
+    )
+
+
+def downgrade() -> None:
+    op.drop_index(INDEX_NAME, table_name=TABLE_NAME)
--- a/surfsense_backend/alembic/versions/142_token_usage_message_id_unique.py
+++ b/surfsense_backend/alembic/versions/142_token_usage_message_id_unique.py
@ -0,0 +1,134 @@
+"""142_token_usage_message_id_unique
+
+Revision ID: 142
+Revises: 141
+Create Date: 2026-05-04
+
+Add a partial unique index on ``token_usage(message_id)`` where
+``message_id IS NOT NULL``.
+
+Why
+---
+Two writers can race on the same assistant turn's ``token_usage`` row:
+
+  * ``finalize_assistant_turn`` (server-side, called from the streaming
+    finally block in ``stream_new_chat`` / ``stream_resume_chat``)
+  * ``append_message``'s recovery branch in
+    ``app/routes/new_chat_routes.py`` (legacy frontend round-trip)
+
+Both currently use ``SELECT ... THEN INSERT`` in separate sessions, so a
+micro-second-aligned race could observe "no row" on each side and double
+INSERT, producing duplicate ``token_usage`` rows for the same
+``message_id``.
+
+A partial unique index on ``message_id`` (``WHERE message_id IS NOT NULL``)
+turns both writes into ``INSERT ... ON CONFLICT (message_id) DO NOTHING``
+no-ops for the loser, hard-eliminating the race at the DB level. Partial
+because non-chat usage rows (indexing, image generation, podcasts) keep
+``message_id`` NULL — they're per-event, no de-dup needed.
+
+Pre-flight
+----------
+Today's schema only has a non-unique index on ``message_id`` so a
+duplicate population could already exist from any past race. We:
+
+  * Detect duplicate ``message_id`` groups (``HAVING COUNT(*) > 1``).
+  * If the group count is at or below ``DUPLICATE_ABORT_THRESHOLD`` (50)
+    we dedupe by deleting all but the smallest ``id`` per group.
+  * If the count exceeds the threshold we abort with a descriptive
+    error rather than silently mutate prod data — operator must
+    investigate before retrying.
+
+Concurrency
+-----------
+``CREATE INDEX CONCURRENTLY`` is required on this hot table to avoid
+stalling production writes during deploy (a regular ``CREATE INDEX``
+holds an ACCESS EXCLUSIVE lock for the duration of the build, which
+would block ``token_usage`` INSERTs for every active streaming chat).
+The trade-off is a slower migration (CONCURRENTLY scans the table
+twice) and the ``CREATE`` statement cannot run inside alembic's default
+transaction wrapper — ``autocommit_block()`` handles that.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+
+from alembic import op
+
+revision: str = "142"
+down_revision: str | None = "141"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+INDEX_NAME = "uq_token_usage_message_id"
+TABLE_NAME = "token_usage"
+
+# Refuse to silently mutate prod data if the duplicate population is
+# unexpectedly large — operator should investigate the upstream cause
+# before retrying. 50 is comfortably above any plausible duplicate
+# count from the existing race window (the race is microseconds wide).
+DUPLICATE_ABORT_THRESHOLD = 50
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+
+    dup_groups = conn.execute(
+        sa.text(
+            "SELECT message_id, COUNT(*) AS n "
+            "FROM token_usage "
+            "WHERE message_id IS NOT NULL "
+            "GROUP BY message_id "
+            "HAVING COUNT(*) > 1"
+        )
+    ).fetchall()
+
+    if len(dup_groups) > DUPLICATE_ABORT_THRESHOLD:
+        raise RuntimeError(
+            f"token_usage has {len(dup_groups)} duplicate message_id groups "
+            f"(threshold={DUPLICATE_ABORT_THRESHOLD}). "
+            "Resolve the duplicates manually before re-running this migration."
+        )
+
+    if dup_groups:
+        # Delete all but the smallest-id row per duplicate group. The
+        # smallest id is by definition the earliest insert, so we keep
+        # the row most likely to reflect the actual stream's first
+        # successful write.
+        conn.execute(
+            sa.text(
+                """
+                DELETE FROM token_usage
+                WHERE id IN (
+                    SELECT id FROM (
+                        SELECT
+                            id,
+                            row_number() OVER (
+                                PARTITION BY message_id ORDER BY id ASC
+                            ) AS rn
+                        FROM token_usage
+                        WHERE message_id IS NOT NULL
+                    ) ranked
+                    WHERE rn > 1
+                )
+                """
+            )
+        )
+
+    # CREATE INDEX CONCURRENTLY cannot run inside a transaction. Drop
+    # alembic's auto-transaction for this op only.
+    with op.get_context().autocommit_block():
+        op.execute(
+            f"CREATE UNIQUE INDEX CONCURRENTLY IF NOT EXISTS {INDEX_NAME} "
+            f"ON {TABLE_NAME} (message_id) "
+            "WHERE message_id IS NOT NULL"
+        )
+
+
+def downgrade() -> None:
+    with op.get_context().autocommit_block():
+        op.execute(f"DROP INDEX CONCURRENTLY IF EXISTS {INDEX_NAME}")