From 421a4d7d0807f17da7c29290e96e96ff73cc5e72 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 1 May 2026 19:32:42 +0530
Subject: [PATCH] refactor(auto_model_pin): simplify thread-level pinning by
 removing unused fields and indexes

---
 ...38_add_thread_auto_model_pinning_fields.py | 31 +++++-----------
 surfsense_backend/app/db.py                   | 13 +++----
 .../app/routes/search_spaces_routes.py        |  6 +--
 .../app/services/auto_model_pin_service.py    | 37 ++++++++-----------
 .../services/test_auto_model_pin_service.py   | 28 +++-----------
 5 files changed, 37 insertions(+), 78 deletions(-)

diff --git a/surfsense_backend/alembic/versions/138_add_thread_auto_model_pinning_fields.py b/surfsense_backend/alembic/versions/138_add_thread_auto_model_pinning_fields.py
index 3972b84b9..fba621a0c 100644
--- a/surfsense_backend/alembic/versions/138_add_thread_auto_model_pinning_fields.py
+++ b/surfsense_backend/alembic/versions/138_add_thread_auto_model_pinning_fields.py
@@ -4,10 +4,12 @@ Revision ID: 138
 Revises: 137
 Create Date: 2026-04-30
 
-Add thread-level fields to persist Auto (Fastest) model pinning metadata:
-- pinned_llm_config_id: concrete resolved config id used for this thread
-- pinned_auto_mode: auto policy identifier (currently "auto_fastest")
-- pinned_at: timestamp when the pin was created/refreshed
+Add a single thread-level column to persist the Auto (Fastest) model pin:
+- pinned_llm_config_id: concrete resolved global LLM config id used for this
+  thread. NULL means "no pin; Auto will resolve on next turn".
+
+The column is unindexed: all reads are by new_chat_threads.id (primary key),
+so a secondary index would be dead write amplification.
 """
 
 from __future__ import annotations
@@ -27,29 +29,14 @@ def upgrade() -> None:
         "ALTER TABLE new_chat_threads "
         "ADD COLUMN IF NOT EXISTS pinned_llm_config_id INTEGER"
     )
-    op.execute(
-        "ALTER TABLE new_chat_threads "
-        "ADD COLUMN IF NOT EXISTS pinned_auto_mode VARCHAR(32)"
-    )
-    op.execute(
-        "ALTER TABLE new_chat_threads "
-        "ADD COLUMN IF NOT EXISTS pinned_at TIMESTAMP WITH TIME ZONE"
-    )
-
-    op.execute(
-        "CREATE INDEX IF NOT EXISTS ix_new_chat_threads_pinned_llm_config_id "
-        "ON new_chat_threads (pinned_llm_config_id)"
-    )
-    op.execute(
-        "CREATE INDEX IF NOT EXISTS ix_new_chat_threads_pinned_auto_mode "
-        "ON new_chat_threads (pinned_auto_mode)"
-    )
 
 
 def downgrade() -> None:
+    # Drop any shape the thread row may be carrying. The extra columns and
+    # indexes only exist on dev DBs that ran an earlier draft of 138; IF EXISTS
+    # makes each statement a safe no-op on the lean shape.
     op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_pinned_auto_mode")
     op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_pinned_llm_config_id")
-
     op.execute("ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS pinned_at")
     op.execute("ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS pinned_auto_mode")
     op.execute(
diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py
index ca3334f8b..2fe478d9b 100644
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@@ -638,13 +638,12 @@ class NewChatThread(BaseModel, TimestampMixin):
         default=False,
         server_default="false",
     )
-    # Auto model pinning metadata:
-    # - pinned_llm_config_id stores the concrete resolved model config id.
-    # - pinned_auto_mode indicates which auto policy produced the pin.
-    # This allows Auto (Fastest) to resolve once per thread and stay stable.
-    pinned_llm_config_id = Column(Integer, nullable=True, index=True)
-    pinned_auto_mode = Column(String(32), nullable=True, index=True)
-    pinned_at = Column(TIMESTAMP(timezone=True), nullable=True)
+    # Auto (Fastest) model pin for this thread: concrete resolved global LLM
+    # config id. NULL means no pin; Auto will resolve on the next turn.
+    # Single-writer invariant: only app.services.auto_model_pin_service sets
+    # or clears this column (plus bulk clears when a search space's
+    # agent_llm_id changes). Unindexed: all reads are by primary key.
+    pinned_llm_config_id = Column(Integer, nullable=True)
 
     # Relationships
     search_space = relationship("SearchSpace", back_populates="new_chat_threads")
diff --git a/surfsense_backend/app/routes/search_spaces_routes.py b/surfsense_backend/app/routes/search_spaces_routes.py
index 7944e7d66..72715ea5b 100644
--- a/surfsense_backend/app/routes/search_spaces_routes.py
+++ b/surfsense_backend/app/routes/search_spaces_routes.py
@@ -803,11 +803,7 @@ async def update_llm_preferences(
             await session.execute(
                 update(NewChatThread)
                 .where(NewChatThread.search_space_id == search_space_id)
-                .values(
-                    pinned_llm_config_id=None,
-                    pinned_auto_mode=None,
-                    pinned_at=None,
-                )
+                .values(pinned_llm_config_id=None)
             )
             logger.info(
                 "Cleared auto model pins for search_space_id=%s after agent_llm_id change (%s -> %s)",
diff --git a/surfsense_backend/app/services/auto_model_pin_service.py b/surfsense_backend/app/services/auto_model_pin_service.py
index 6b69c91ea..1a2061492 100644
--- a/surfsense_backend/app/services/auto_model_pin_service.py
+++ b/surfsense_backend/app/services/auto_model_pin_service.py
@@ -2,8 +2,14 @@
 
 Auto (Fastest) is represented by ``agent_llm_id == 0``. For chat threads we
 resolve that virtual mode to one concrete global LLM config exactly once and
-persist the chosen config id on ``new_chat_threads`` so subsequent turns are
-stable.
+persist the chosen config id on ``new_chat_threads.pinned_llm_config_id`` so
+subsequent turns are stable.
+
+Single-writer invariant: this module is the only writer of
+``NewChatThread.pinned_llm_config_id`` (aside from the bulk clear in
+``search_spaces_routes`` when a search space's ``agent_llm_id`` changes).
+Therefore a non-NULL value unambiguously means "this thread has an
+Auto-resolved pin"; no separate source/policy column is needed.
 """
 
 from __future__ import annotations
@@ -11,7 +17,6 @@ from __future__ import annotations
 import hashlib
 import logging
 from dataclasses import dataclass
-from datetime import UTC, datetime
 from uuid import UUID
 
 from sqlalchemy import select
@@ -90,10 +95,10 @@ async def resolve_or_get_pinned_llm_config_id(
     selected_llm_config_id: int,
     force_repin_free: bool = False,
 ) -> AutoPinResolution:
-    """Resolve Auto (Fastest) to one concrete config id and persist pin metadata.
+    """Resolve Auto (Fastest) to one concrete config id and persist the pin.
 
-    For non-auto selections, this function clears existing auto pin metadata and
-    returns the selected id as-is.
+    For non-auto selections, this function clears any existing pin and returns
+    the selected id as-is.
     """
     thread = (
         (
@@ -113,16 +118,10 @@ async def resolve_or_get_pinned_llm_config_id(
             f"Thread {thread_id} does not belong to search space {search_space_id}"
         )
 
-    # Explicit model selected: clear stale auto pin metadata.
+    # Explicit model selected: clear any stale pin.
     if selected_llm_config_id != AUTO_FASTEST_ID:
-        if (
-            thread.pinned_llm_config_id is not None
-            or thread.pinned_auto_mode is not None
-            or thread.pinned_at is not None
-        ):
+        if thread.pinned_llm_config_id is not None:
             thread.pinned_llm_config_id = None
-            thread.pinned_auto_mode = None
-            thread.pinned_at = None
             await session.commit()
         return AutoPinResolution(
             resolved_llm_config_id=selected_llm_config_id,
@@ -135,12 +134,11 @@ async def resolve_or_get_pinned_llm_config_id(
         raise ValueError("No usable global LLM configs are available for Auto mode")
     candidate_by_id = {int(c["id"]): c for c in candidates}
 
-    # Reuse existing valid pin without re-checking current quota (no silent tier switch),
-    # unless the caller explicitly requests a forced repin to free.
+    # Reuse an existing valid pin without re-checking current quota (no silent
+    # tier switch), unless the caller explicitly requests a forced repin to free.
     pinned_id = thread.pinned_llm_config_id
     if (
         not force_repin_free
-        and thread.pinned_auto_mode == AUTO_FASTEST_MODE
         and pinned_id is not None
         and int(pinned_id) in candidate_by_id
     ):
@@ -159,11 +157,10 @@ async def resolve_or_get_pinned_llm_config_id(
         )
     if pinned_id is not None:
         logger.info(
-            "auto_pin_invalid thread_id=%s search_space_id=%s pinned_config_id=%s pinned_auto_mode=%s",
+            "auto_pin_invalid thread_id=%s search_space_id=%s pinned_config_id=%s",
             thread_id,
             search_space_id,
             pinned_id,
-            thread.pinned_auto_mode,
         )
 
     premium_eligible = (
@@ -184,8 +181,6 @@ async def resolve_or_get_pinned_llm_config_id(
     selected_tier = _tier_of(selected_cfg)
 
     thread.pinned_llm_config_id = selected_id
-    thread.pinned_auto_mode = AUTO_FASTEST_MODE
-    thread.pinned_at = datetime.now(UTC)
     await session.commit()
 
     if force_repin_free:
diff --git a/surfsense_backend/tests/unit/services/test_auto_model_pin_service.py b/surfsense_backend/tests/unit/services/test_auto_model_pin_service.py
index 0a2342e05..2094ea6dd 100644
--- a/surfsense_backend/tests/unit/services/test_auto_model_pin_service.py
+++ b/surfsense_backend/tests/unit/services/test_auto_model_pin_service.py
@@ -6,7 +6,6 @@ from types import SimpleNamespace
 import pytest
 
 from app.services.auto_model_pin_service import (
-    AUTO_FASTEST_MODE,
     resolve_or_get_pinned_llm_config_id,
 )
 
@@ -45,14 +44,11 @@ def _thread(
     *,
     search_space_id: int = 10,
     pinned_llm_config_id: int | None = None,
-    pinned_auto_mode: str | None = None,
 ):
     return SimpleNamespace(
         id=1,
         search_space_id=search_space_id,
         pinned_llm_config_id=pinned_llm_config_id,
-        pinned_auto_mode=pinned_auto_mode,
-        pinned_at=None,
     )
 
 
@@ -93,8 +89,6 @@ async def test_auto_first_turn_pins_one_model(monkeypatch):
     )
     assert result.resolved_llm_config_id in {-1, -2}
     assert session.thread.pinned_llm_config_id == result.resolved_llm_config_id
-    assert session.thread.pinned_auto_mode == AUTO_FASTEST_MODE
-    assert session.thread.pinned_at is not None
     assert session.commit_count == 1
 
 
@@ -102,9 +96,7 @@ async def test_auto_first_turn_pins_one_model(monkeypatch):
 async def test_next_turn_reuses_existing_pin(monkeypatch):
     from app.config import config
 
-    session = _FakeSession(
-        _thread(pinned_llm_config_id=-1, pinned_auto_mode=AUTO_FASTEST_MODE)
-    )
+    session = _FakeSession(_thread(pinned_llm_config_id=-1))
     monkeypatch.setattr(
         config,
         "GLOBAL_LLM_CONFIGS",
@@ -228,9 +220,7 @@ async def test_premium_ineligible_auto_pins_free_only(monkeypatch):
 async def test_pinned_premium_stays_premium_after_quota_exhaustion(monkeypatch):
     from app.config import config
 
-    session = _FakeSession(
-        _thread(pinned_llm_config_id=-1, pinned_auto_mode=AUTO_FASTEST_MODE)
-    )
+    session = _FakeSession(_thread(pinned_llm_config_id=-1))
     monkeypatch.setattr(
         config,
         "GLOBAL_LLM_CONFIGS",
@@ -275,9 +265,7 @@ async def test_pinned_premium_stays_premium_after_quota_exhaustion(monkeypatch):
 async def test_force_repin_free_switches_auto_premium_pin_to_free(monkeypatch):
     from app.config import config
 
-    session = _FakeSession(
-        _thread(pinned_llm_config_id=-1, pinned_auto_mode=AUTO_FASTEST_MODE)
-    )
+    session = _FakeSession(_thread(pinned_llm_config_id=-1))
     monkeypatch.setattr(
         config,
         "GLOBAL_LLM_CONFIGS",
@@ -325,9 +313,7 @@ async def test_force_repin_free_switches_auto_premium_pin_to_free(monkeypatch):
 async def test_explicit_user_model_change_clears_pin(monkeypatch):
     from app.config import config
 
-    session = _FakeSession(
-        _thread(pinned_llm_config_id=-2, pinned_auto_mode=AUTO_FASTEST_MODE)
-    )
+    session = _FakeSession(_thread(pinned_llm_config_id=-2))
     monkeypatch.setattr(
         config,
         "GLOBAL_LLM_CONFIGS",
@@ -345,8 +331,6 @@ async def test_explicit_user_model_change_clears_pin(monkeypatch):
     )
     assert result.resolved_llm_config_id == 7
     assert session.thread.pinned_llm_config_id is None
-    assert session.thread.pinned_auto_mode is None
-    assert session.thread.pinned_at is None
     assert session.commit_count == 1
 
 
@@ -354,9 +338,7 @@ async def test_explicit_user_model_change_clears_pin(monkeypatch):
 async def test_invalid_pinned_config_repairs_with_new_pin(monkeypatch):
     from app.config import config
 
-    session = _FakeSession(
-        _thread(pinned_llm_config_id=-999, pinned_auto_mode=AUTO_FASTEST_MODE)
-    )
+    session = _FakeSession(_thread(pinned_llm_config_id=-999))
     monkeypatch.setattr(
         config,
         "GLOBAL_LLM_CONFIGS",