mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-07 23:02:39 +02:00
feat: moved chat persistance to Server Side
This commit is contained in:
parent
2e1b9b5582
commit
19b6e0a025
19 changed files with 4515 additions and 390 deletions
|
|
@ -0,0 +1,66 @@
|
|||
"""141_unique_chat_message_turn_role
|
||||
|
||||
Revision ID: 141
|
||||
Revises: 140
|
||||
Create Date: 2026-05-04
|
||||
|
||||
Add a partial unique index on ``new_chat_messages(thread_id, turn_id, role)``
|
||||
where ``turn_id IS NOT NULL``.
|
||||
|
||||
Why
|
||||
---
|
||||
The streaming chat path (`stream_new_chat` / `stream_resume_chat`) is being
|
||||
moved to write its own ``new_chat_messages`` rows server-side instead of
|
||||
relying on the frontend's later ``POST /threads/{id}/messages`` call. This
|
||||
closes the "ghost-thread" abuse vector where authenticated callers got free
|
||||
LLM completions while ``new_chat_messages`` stayed empty.
|
||||
|
||||
For server-side and legacy frontend writes to coexist we need an idempotency
|
||||
key. The natural triple is ``(thread_id, turn_id, role)``: the server issues
|
||||
exactly one ``turn_id`` per turn, and a turn produces at most one user
|
||||
message and one assistant message. Whichever side wins the race writes the
|
||||
row; the loser hits ``IntegrityError`` and recovers gracefully.
|
||||
|
||||
Partial — ``WHERE turn_id IS NOT NULL`` — so:
|
||||
|
||||
* Legacy rows that predate the ``turn_id`` column (migration 136) keep
|
||||
co-existing without de-dup.
|
||||
* Clone / snapshot inserts in
|
||||
``app/services/public_chat_service.py`` that build ``NewChatMessage``
|
||||
without ``turn_id`` are unaffected (multiple snapshot copies of the same
|
||||
user/assistant pair are intentional).
|
||||
|
||||
This index coexists with the existing single-column ``ix_new_chat_messages_turn_id``
|
||||
from migration 136 — no collision.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from alembic import op
|
||||
|
||||
revision: str = "141"
|
||||
down_revision: str | None = "140"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
|
||||
INDEX_NAME = "uq_new_chat_messages_thread_turn_role"
|
||||
TABLE_NAME = "new_chat_messages"
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.create_index(
|
||||
INDEX_NAME,
|
||||
TABLE_NAME,
|
||||
["thread_id", "turn_id", "role"],
|
||||
unique=True,
|
||||
postgresql_where=sa.text("turn_id IS NOT NULL"),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_index(INDEX_NAME, table_name=TABLE_NAME)
|
||||
|
|
@ -0,0 +1,134 @@
|
|||
"""142_token_usage_message_id_unique
|
||||
|
||||
Revision ID: 142
|
||||
Revises: 141
|
||||
Create Date: 2026-05-04
|
||||
|
||||
Add a partial unique index on ``token_usage(message_id)`` where
|
||||
``message_id IS NOT NULL``.
|
||||
|
||||
Why
|
||||
---
|
||||
Two writers can race on the same assistant turn's ``token_usage`` row:
|
||||
|
||||
* ``finalize_assistant_turn`` (server-side, called from the streaming
|
||||
finally block in ``stream_new_chat`` / ``stream_resume_chat``)
|
||||
* ``append_message``'s recovery branch in
|
||||
``app/routes/new_chat_routes.py`` (legacy frontend round-trip)
|
||||
|
||||
Both currently use ``SELECT ... THEN INSERT`` in separate sessions, so a
|
||||
micro-second-aligned race could observe "no row" on each side and double
|
||||
INSERT, producing duplicate ``token_usage`` rows for the same
|
||||
``message_id``.
|
||||
|
||||
A partial unique index on ``message_id`` (``WHERE message_id IS NOT NULL``)
|
||||
turns both writes into ``INSERT ... ON CONFLICT (message_id) DO NOTHING``
|
||||
no-ops for the loser, hard-eliminating the race at the DB level. Partial
|
||||
because non-chat usage rows (indexing, image generation, podcasts) keep
|
||||
``message_id`` NULL — they're per-event, no de-dup needed.
|
||||
|
||||
Pre-flight
|
||||
----------
|
||||
Today's schema only has a non-unique index on ``message_id`` so a
|
||||
duplicate population could already exist from any past race. We:
|
||||
|
||||
* Detect duplicate ``message_id`` groups (``HAVING COUNT(*) > 1``).
|
||||
* If the group count is at or below ``DUPLICATE_ABORT_THRESHOLD`` (50)
|
||||
we dedupe by deleting all but the smallest ``id`` per group.
|
||||
* If the count exceeds the threshold we abort with a descriptive
|
||||
error rather than silently mutate prod data — operator must
|
||||
investigate before retrying.
|
||||
|
||||
Concurrency
|
||||
-----------
|
||||
``CREATE INDEX CONCURRENTLY`` is required on this hot table to avoid
|
||||
stalling production writes during deploy (a regular ``CREATE INDEX``
|
||||
holds an ACCESS EXCLUSIVE lock for the duration of the build, which
|
||||
would block ``token_usage`` INSERTs for every active streaming chat).
|
||||
The trade-off is a slower migration (CONCURRENTLY scans the table
|
||||
twice) and the ``CREATE`` statement cannot run inside alembic's default
|
||||
transaction wrapper — ``autocommit_block()`` handles that.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from alembic import op
|
||||
|
||||
revision: str = "142"
|
||||
down_revision: str | None = "141"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
|
||||
INDEX_NAME = "uq_token_usage_message_id"
|
||||
TABLE_NAME = "token_usage"
|
||||
|
||||
# Refuse to silently mutate prod data if the duplicate population is
|
||||
# unexpectedly large — operator should investigate the upstream cause
|
||||
# before retrying. 50 is comfortably above any plausible duplicate
|
||||
# count from the existing race window (the race is microseconds wide).
|
||||
DUPLICATE_ABORT_THRESHOLD = 50
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
conn = op.get_bind()
|
||||
|
||||
dup_groups = conn.execute(
|
||||
sa.text(
|
||||
"SELECT message_id, COUNT(*) AS n "
|
||||
"FROM token_usage "
|
||||
"WHERE message_id IS NOT NULL "
|
||||
"GROUP BY message_id "
|
||||
"HAVING COUNT(*) > 1"
|
||||
)
|
||||
).fetchall()
|
||||
|
||||
if len(dup_groups) > DUPLICATE_ABORT_THRESHOLD:
|
||||
raise RuntimeError(
|
||||
f"token_usage has {len(dup_groups)} duplicate message_id groups "
|
||||
f"(threshold={DUPLICATE_ABORT_THRESHOLD}). "
|
||||
"Resolve the duplicates manually before re-running this migration."
|
||||
)
|
||||
|
||||
if dup_groups:
|
||||
# Delete all but the smallest-id row per duplicate group. The
|
||||
# smallest id is by definition the earliest insert, so we keep
|
||||
# the row most likely to reflect the actual stream's first
|
||||
# successful write.
|
||||
conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
DELETE FROM token_usage
|
||||
WHERE id IN (
|
||||
SELECT id FROM (
|
||||
SELECT
|
||||
id,
|
||||
row_number() OVER (
|
||||
PARTITION BY message_id ORDER BY id ASC
|
||||
) AS rn
|
||||
FROM token_usage
|
||||
WHERE message_id IS NOT NULL
|
||||
) ranked
|
||||
WHERE rn > 1
|
||||
)
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
# CREATE INDEX CONCURRENTLY cannot run inside a transaction. Drop
|
||||
# alembic's auto-transaction for this op only.
|
||||
with op.get_context().autocommit_block():
|
||||
op.execute(
|
||||
f"CREATE UNIQUE INDEX CONCURRENTLY IF NOT EXISTS {INDEX_NAME} "
|
||||
f"ON {TABLE_NAME} (message_id) "
|
||||
"WHERE message_id IS NOT NULL"
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
with op.get_context().autocommit_block():
|
||||
op.execute(f"DROP INDEX CONCURRENTLY IF EXISTS {INDEX_NAME}")
|
||||
Loading…
Add table
Add a link
Reference in a new issue