From 421a4d7d0807f17da7c29290e96e96ff73cc5e72 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Fri, 1 May 2026 19:32:42 +0530 Subject: [PATCH] refactor(auto_model_pin): simplify thread-level pinning by removing unused fields and indexes --- ...38_add_thread_auto_model_pinning_fields.py | 31 +++++----------- surfsense_backend/app/db.py | 13 +++---- .../app/routes/search_spaces_routes.py | 6 +-- .../app/services/auto_model_pin_service.py | 37 ++++++++----------- .../services/test_auto_model_pin_service.py | 28 +++----------- 5 files changed, 37 insertions(+), 78 deletions(-) diff --git a/surfsense_backend/alembic/versions/138_add_thread_auto_model_pinning_fields.py b/surfsense_backend/alembic/versions/138_add_thread_auto_model_pinning_fields.py index 3972b84b9..fba621a0c 100644 --- a/surfsense_backend/alembic/versions/138_add_thread_auto_model_pinning_fields.py +++ b/surfsense_backend/alembic/versions/138_add_thread_auto_model_pinning_fields.py @@ -4,10 +4,12 @@ Revision ID: 138 Revises: 137 Create Date: 2026-04-30 -Add thread-level fields to persist Auto (Fastest) model pinning metadata: -- pinned_llm_config_id: concrete resolved config id used for this thread -- pinned_auto_mode: auto policy identifier (currently "auto_fastest") -- pinned_at: timestamp when the pin was created/refreshed +Add a single thread-level column to persist the Auto (Fastest) model pin: +- pinned_llm_config_id: concrete resolved global LLM config id used for this + thread. NULL means "no pin; Auto will resolve on next turn". + +The column is unindexed: all reads are by new_chat_threads.id (primary key), +so a secondary index would be dead write amplification. """ from __future__ import annotations @@ -27,29 +29,14 @@ def upgrade() -> None: "ALTER TABLE new_chat_threads " "ADD COLUMN IF NOT EXISTS pinned_llm_config_id INTEGER" ) - op.execute( - "ALTER TABLE new_chat_threads " - "ADD COLUMN IF NOT EXISTS pinned_auto_mode VARCHAR(32)" - ) - op.execute( - "ALTER TABLE new_chat_threads " - "ADD COLUMN IF NOT EXISTS pinned_at TIMESTAMP WITH TIME ZONE" - ) - - op.execute( - "CREATE INDEX IF NOT EXISTS ix_new_chat_threads_pinned_llm_config_id " - "ON new_chat_threads (pinned_llm_config_id)" - ) - op.execute( - "CREATE INDEX IF NOT EXISTS ix_new_chat_threads_pinned_auto_mode " - "ON new_chat_threads (pinned_auto_mode)" - ) def downgrade() -> None: + # Drop any shape the thread row may be carrying. The extra columns and + # indexes only exist on dev DBs that ran an earlier draft of 138; IF EXISTS + # makes each statement a safe no-op on the lean shape. op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_pinned_auto_mode") op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_pinned_llm_config_id") - op.execute("ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS pinned_at") op.execute("ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS pinned_auto_mode") op.execute( diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index ca3334f8b..2fe478d9b 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -638,13 +638,12 @@ class NewChatThread(BaseModel, TimestampMixin): default=False, server_default="false", ) - # Auto model pinning metadata: - # - pinned_llm_config_id stores the concrete resolved model config id. - # - pinned_auto_mode indicates which auto policy produced the pin. - # This allows Auto (Fastest) to resolve once per thread and stay stable. - pinned_llm_config_id = Column(Integer, nullable=True, index=True) - pinned_auto_mode = Column(String(32), nullable=True, index=True) - pinned_at = Column(TIMESTAMP(timezone=True), nullable=True) + # Auto (Fastest) model pin for this thread: concrete resolved global LLM + # config id. NULL means no pin; Auto will resolve on the next turn. + # Single-writer invariant: only app.services.auto_model_pin_service sets + # or clears this column (plus bulk clears when a search space's + # agent_llm_id changes). Unindexed: all reads are by primary key. + pinned_llm_config_id = Column(Integer, nullable=True) # Relationships search_space = relationship("SearchSpace", back_populates="new_chat_threads") diff --git a/surfsense_backend/app/routes/search_spaces_routes.py b/surfsense_backend/app/routes/search_spaces_routes.py index 7944e7d66..72715ea5b 100644 --- a/surfsense_backend/app/routes/search_spaces_routes.py +++ b/surfsense_backend/app/routes/search_spaces_routes.py @@ -803,11 +803,7 @@ async def update_llm_preferences( await session.execute( update(NewChatThread) .where(NewChatThread.search_space_id == search_space_id) - .values( - pinned_llm_config_id=None, - pinned_auto_mode=None, - pinned_at=None, - ) + .values(pinned_llm_config_id=None) ) logger.info( "Cleared auto model pins for search_space_id=%s after agent_llm_id change (%s -> %s)", diff --git a/surfsense_backend/app/services/auto_model_pin_service.py b/surfsense_backend/app/services/auto_model_pin_service.py index 6b69c91ea..1a2061492 100644 --- a/surfsense_backend/app/services/auto_model_pin_service.py +++ b/surfsense_backend/app/services/auto_model_pin_service.py @@ -2,8 +2,14 @@ Auto (Fastest) is represented by ``agent_llm_id == 0``. For chat threads we resolve that virtual mode to one concrete global LLM config exactly once and -persist the chosen config id on ``new_chat_threads`` so subsequent turns are -stable. +persist the chosen config id on ``new_chat_threads.pinned_llm_config_id`` so +subsequent turns are stable. + +Single-writer invariant: this module is the only writer of +``NewChatThread.pinned_llm_config_id`` (aside from the bulk clear in +``search_spaces_routes`` when a search space's ``agent_llm_id`` changes). +Therefore a non-NULL value unambiguously means "this thread has an +Auto-resolved pin"; no separate source/policy column is needed. """ from __future__ import annotations @@ -11,7 +17,6 @@ from __future__ import annotations import hashlib import logging from dataclasses import dataclass -from datetime import UTC, datetime from uuid import UUID from sqlalchemy import select @@ -90,10 +95,10 @@ async def resolve_or_get_pinned_llm_config_id( selected_llm_config_id: int, force_repin_free: bool = False, ) -> AutoPinResolution: - """Resolve Auto (Fastest) to one concrete config id and persist pin metadata. + """Resolve Auto (Fastest) to one concrete config id and persist the pin. - For non-auto selections, this function clears existing auto pin metadata and - returns the selected id as-is. + For non-auto selections, this function clears any existing pin and returns + the selected id as-is. """ thread = ( ( @@ -113,16 +118,10 @@ async def resolve_or_get_pinned_llm_config_id( f"Thread {thread_id} does not belong to search space {search_space_id}" ) - # Explicit model selected: clear stale auto pin metadata. + # Explicit model selected: clear any stale pin. if selected_llm_config_id != AUTO_FASTEST_ID: - if ( - thread.pinned_llm_config_id is not None - or thread.pinned_auto_mode is not None - or thread.pinned_at is not None - ): + if thread.pinned_llm_config_id is not None: thread.pinned_llm_config_id = None - thread.pinned_auto_mode = None - thread.pinned_at = None await session.commit() return AutoPinResolution( resolved_llm_config_id=selected_llm_config_id, @@ -135,12 +134,11 @@ async def resolve_or_get_pinned_llm_config_id( raise ValueError("No usable global LLM configs are available for Auto mode") candidate_by_id = {int(c["id"]): c for c in candidates} - # Reuse existing valid pin without re-checking current quota (no silent tier switch), - # unless the caller explicitly requests a forced repin to free. + # Reuse an existing valid pin without re-checking current quota (no silent + # tier switch), unless the caller explicitly requests a forced repin to free. pinned_id = thread.pinned_llm_config_id if ( not force_repin_free - and thread.pinned_auto_mode == AUTO_FASTEST_MODE and pinned_id is not None and int(pinned_id) in candidate_by_id ): @@ -159,11 +157,10 @@ async def resolve_or_get_pinned_llm_config_id( ) if pinned_id is not None: logger.info( - "auto_pin_invalid thread_id=%s search_space_id=%s pinned_config_id=%s pinned_auto_mode=%s", + "auto_pin_invalid thread_id=%s search_space_id=%s pinned_config_id=%s", thread_id, search_space_id, pinned_id, - thread.pinned_auto_mode, ) premium_eligible = ( @@ -184,8 +181,6 @@ async def resolve_or_get_pinned_llm_config_id( selected_tier = _tier_of(selected_cfg) thread.pinned_llm_config_id = selected_id - thread.pinned_auto_mode = AUTO_FASTEST_MODE - thread.pinned_at = datetime.now(UTC) await session.commit() if force_repin_free: diff --git a/surfsense_backend/tests/unit/services/test_auto_model_pin_service.py b/surfsense_backend/tests/unit/services/test_auto_model_pin_service.py index 0a2342e05..2094ea6dd 100644 --- a/surfsense_backend/tests/unit/services/test_auto_model_pin_service.py +++ b/surfsense_backend/tests/unit/services/test_auto_model_pin_service.py @@ -6,7 +6,6 @@ from types import SimpleNamespace import pytest from app.services.auto_model_pin_service import ( - AUTO_FASTEST_MODE, resolve_or_get_pinned_llm_config_id, ) @@ -45,14 +44,11 @@ def _thread( *, search_space_id: int = 10, pinned_llm_config_id: int | None = None, - pinned_auto_mode: str | None = None, ): return SimpleNamespace( id=1, search_space_id=search_space_id, pinned_llm_config_id=pinned_llm_config_id, - pinned_auto_mode=pinned_auto_mode, - pinned_at=None, ) @@ -93,8 +89,6 @@ async def test_auto_first_turn_pins_one_model(monkeypatch): ) assert result.resolved_llm_config_id in {-1, -2} assert session.thread.pinned_llm_config_id == result.resolved_llm_config_id - assert session.thread.pinned_auto_mode == AUTO_FASTEST_MODE - assert session.thread.pinned_at is not None assert session.commit_count == 1 @@ -102,9 +96,7 @@ async def test_auto_first_turn_pins_one_model(monkeypatch): async def test_next_turn_reuses_existing_pin(monkeypatch): from app.config import config - session = _FakeSession( - _thread(pinned_llm_config_id=-1, pinned_auto_mode=AUTO_FASTEST_MODE) - ) + session = _FakeSession(_thread(pinned_llm_config_id=-1)) monkeypatch.setattr( config, "GLOBAL_LLM_CONFIGS", @@ -228,9 +220,7 @@ async def test_premium_ineligible_auto_pins_free_only(monkeypatch): async def test_pinned_premium_stays_premium_after_quota_exhaustion(monkeypatch): from app.config import config - session = _FakeSession( - _thread(pinned_llm_config_id=-1, pinned_auto_mode=AUTO_FASTEST_MODE) - ) + session = _FakeSession(_thread(pinned_llm_config_id=-1)) monkeypatch.setattr( config, "GLOBAL_LLM_CONFIGS", @@ -275,9 +265,7 @@ async def test_pinned_premium_stays_premium_after_quota_exhaustion(monkeypatch): async def test_force_repin_free_switches_auto_premium_pin_to_free(monkeypatch): from app.config import config - session = _FakeSession( - _thread(pinned_llm_config_id=-1, pinned_auto_mode=AUTO_FASTEST_MODE) - ) + session = _FakeSession(_thread(pinned_llm_config_id=-1)) monkeypatch.setattr( config, "GLOBAL_LLM_CONFIGS", @@ -325,9 +313,7 @@ async def test_force_repin_free_switches_auto_premium_pin_to_free(monkeypatch): async def test_explicit_user_model_change_clears_pin(monkeypatch): from app.config import config - session = _FakeSession( - _thread(pinned_llm_config_id=-2, pinned_auto_mode=AUTO_FASTEST_MODE) - ) + session = _FakeSession(_thread(pinned_llm_config_id=-2)) monkeypatch.setattr( config, "GLOBAL_LLM_CONFIGS", @@ -345,8 +331,6 @@ async def test_explicit_user_model_change_clears_pin(monkeypatch): ) assert result.resolved_llm_config_id == 7 assert session.thread.pinned_llm_config_id is None - assert session.thread.pinned_auto_mode is None - assert session.thread.pinned_at is None assert session.commit_count == 1 @@ -354,9 +338,7 @@ async def test_explicit_user_model_change_clears_pin(monkeypatch): async def test_invalid_pinned_config_repairs_with_new_pin(monkeypatch): from app.config import config - session = _FakeSession( - _thread(pinned_llm_config_id=-999, pinned_auto_mode=AUTO_FASTEST_MODE) - ) + session = _FakeSession(_thread(pinned_llm_config_id=-999)) monkeypatch.setattr( config, "GLOBAL_LLM_CONFIGS",