refactor(auto_model_pin): simplify thread-level pinning by removing unused fields and indexes

2026-06-24 21:38:09 +02:00 · 2026-05-01 19:32:42 +05:30 · 2026-05-01 19:32:42 +05:30 · 421a4d7d08
commit 421a4d7d08
parent 1863f2832b
5 changed files with 37 additions and 78 deletions
--- a/surfsense_backend/alembic/versions/138_add_thread_auto_model_pinning_fields.py
+++ b/surfsense_backend/alembic/versions/138_add_thread_auto_model_pinning_fields.py
@ -4,10 +4,12 @@ Revision ID: 138
 Revises: 137
 Create Date: 2026-04-30
-Add thread-level fields to persist Auto (Fastest) model pinning metadata:
+Add a single thread-level column to persist the Auto (Fastest) model pin:
- pinned_llm_config_id: concrete resolved config id used for this thread
+- pinned_llm_config_id: concrete resolved global LLM config id used for this
- pinned_auto_mode: auto policy identifier (currently "auto_fastest")
+  thread. NULL means "no pin; Auto will resolve on next turn".
- pinned_at: timestamp when the pin was created/refreshed
+
 The column is unindexed: all reads are by new_chat_threads.id (primary key),
 so a secondary index would be dead write amplification.
 """
 from __future__ import annotations
@ -27,29 +29,14 @@ def upgrade() -> None:
        "ALTER TABLE new_chat_threads "
        "ADD COLUMN IF NOT EXISTS pinned_llm_config_id INTEGER"
    )
    op.execute(
        "ALTER TABLE new_chat_threads "
        "ADD COLUMN IF NOT EXISTS pinned_auto_mode VARCHAR(32)"
    )
    op.execute(
        "ALTER TABLE new_chat_threads "
        "ADD COLUMN IF NOT EXISTS pinned_at TIMESTAMP WITH TIME ZONE"
    )
    op.execute(
        "CREATE INDEX IF NOT EXISTS ix_new_chat_threads_pinned_llm_config_id "
        "ON new_chat_threads (pinned_llm_config_id)"
    )
    op.execute(
        "CREATE INDEX IF NOT EXISTS ix_new_chat_threads_pinned_auto_mode "
        "ON new_chat_threads (pinned_auto_mode)"
    )
 def downgrade() -> None:
    # Drop any shape the thread row may be carrying. The extra columns and
    # indexes only exist on dev DBs that ran an earlier draft of 138; IF EXISTS
    # makes each statement a safe no-op on the lean shape.
    op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_pinned_auto_mode")
    op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_pinned_llm_config_id")
    op.execute("ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS pinned_at")
    op.execute("ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS pinned_auto_mode")
    op.execute(
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@ -638,13 +638,12 @@ class NewChatThread(BaseModel, TimestampMixin):
        default=False,
        server_default="false",
    )
-    # Auto model pinning metadata:
+    # Auto (Fastest) model pin for this thread: concrete resolved global LLM
-    # - pinned_llm_config_id stores the concrete resolved model config id.
+    # config id. NULL means no pin; Auto will resolve on the next turn.
-    # - pinned_auto_mode indicates which auto policy produced the pin.
+    # Single-writer invariant: only app.services.auto_model_pin_service sets
-    # This allows Auto (Fastest) to resolve once per thread and stay stable.
+    # or clears this column (plus bulk clears when a search space's
-    pinned_llm_config_id = Column(Integer, nullable=True, index=True)
+    # agent_llm_id changes). Unindexed: all reads are by primary key.
-    pinned_auto_mode = Column(String(32), nullable=True, index=True)
+    pinned_llm_config_id = Column(Integer, nullable=True)
    pinned_at = Column(TIMESTAMP(timezone=True), nullable=True)
    # Relationships
    search_space = relationship("SearchSpace", back_populates="new_chat_threads")
--- a/surfsense_backend/app/routes/search_spaces_routes.py
+++ b/surfsense_backend/app/routes/search_spaces_routes.py
@ -803,11 +803,7 @@ async def update_llm_preferences(
            await session.execute(
                update(NewChatThread)
                .where(NewChatThread.search_space_id == search_space_id)
-                .values(
+                .values(pinned_llm_config_id=None)
                    pinned_llm_config_id=None,
                    pinned_auto_mode=None,
                    pinned_at=None,
                )
            )
            logger.info(
                "Cleared auto model pins for search_space_id=%s after agent_llm_id change (%s -> %s)",
--- a/surfsense_backend/app/services/auto_model_pin_service.py
+++ b/surfsense_backend/app/services/auto_model_pin_service.py
@ -2,8 +2,14 @@
 Auto (Fastest) is represented by ``agent_llm_id == 0``. For chat threads we
 resolve that virtual mode to one concrete global LLM config exactly once and
-persist the chosen config id on ``new_chat_threads`` so subsequent turns are
+persist the chosen config id on ``new_chat_threads.pinned_llm_config_id`` so
-stable.
+subsequent turns are stable.
 Single-writer invariant: this module is the only writer of
 ``NewChatThread.pinned_llm_config_id`` (aside from the bulk clear in
 ``search_spaces_routes`` when a search space's ``agent_llm_id`` changes).
 Therefore a non-NULL value unambiguously means "this thread has an
 Auto-resolved pin"; no separate source/policy column is needed.
 """
 from __future__ import annotations
@ -11,7 +17,6 @@ from __future__ import annotations
 import hashlib
 import logging
 from dataclasses import dataclass
 from datetime import UTC, datetime
 from uuid import UUID
 from sqlalchemy import select
@ -90,10 +95,10 @@ async def resolve_or_get_pinned_llm_config_id(
    selected_llm_config_id: int,
    force_repin_free: bool = False,
 ) -> AutoPinResolution:
-    """Resolve Auto (Fastest) to one concrete config id and persist pin metadata.
+    """Resolve Auto (Fastest) to one concrete config id and persist the pin.
-    For non-auto selections, this function clears existing auto pin metadata and
+    For non-auto selections, this function clears any existing pin and returns
-    returns the selected id as-is.
+    the selected id as-is.
    """
    thread = (
        (
@ -113,16 +118,10 @@ async def resolve_or_get_pinned_llm_config_id(
            f"Thread {thread_id} does not belong to search space {search_space_id}"
        )
-    # Explicit model selected: clear stale auto pin metadata.
+    # Explicit model selected: clear any stale pin.
    if selected_llm_config_id != AUTO_FASTEST_ID:
-        if (
+        if thread.pinned_llm_config_id is not None:
            thread.pinned_llm_config_id is not None
            or thread.pinned_auto_mode is not None
            or thread.pinned_at is not None
        ):
            thread.pinned_llm_config_id = None
            thread.pinned_auto_mode = None
            thread.pinned_at = None
            await session.commit()
        return AutoPinResolution(
            resolved_llm_config_id=selected_llm_config_id,
@ -135,12 +134,11 @@ async def resolve_or_get_pinned_llm_config_id(
        raise ValueError("No usable global LLM configs are available for Auto mode")
    candidate_by_id = {int(c["id"]): c for c in candidates}
-    # Reuse existing valid pin without re-checking current quota (no silent tier switch),
+    # Reuse an existing valid pin without re-checking current quota (no silent
-    # unless the caller explicitly requests a forced repin to free.
+    # tier switch), unless the caller explicitly requests a forced repin to free.
    pinned_id = thread.pinned_llm_config_id
    if (
        not force_repin_free
        and thread.pinned_auto_mode == AUTO_FASTEST_MODE
        and pinned_id is not None
        and int(pinned_id) in candidate_by_id
    ):
@ -159,11 +157,10 @@ async def resolve_or_get_pinned_llm_config_id(
        )
    if pinned_id is not None:
        logger.info(
-            "auto_pin_invalid thread_id=%s search_space_id=%s pinned_config_id=%s pinned_auto_mode=%s",
+            "auto_pin_invalid thread_id=%s search_space_id=%s pinned_config_id=%s",
            thread_id,
            search_space_id,
            pinned_id,
            thread.pinned_auto_mode,
        )
    premium_eligible = (
@ -184,8 +181,6 @@ async def resolve_or_get_pinned_llm_config_id(
    selected_tier = _tier_of(selected_cfg)
    thread.pinned_llm_config_id = selected_id
    thread.pinned_auto_mode = AUTO_FASTEST_MODE
    thread.pinned_at = datetime.now(UTC)
    await session.commit()
    if force_repin_free:
--- a/surfsense_backend/tests/unit/services/test_auto_model_pin_service.py
+++ b/surfsense_backend/tests/unit/services/test_auto_model_pin_service.py
@ -6,7 +6,6 @@ from types import SimpleNamespace
 import pytest
 from app.services.auto_model_pin_service import (
    AUTO_FASTEST_MODE,
    resolve_or_get_pinned_llm_config_id,
 )
@ -45,14 +44,11 @@ def _thread(
    *,
    search_space_id: int = 10,
    pinned_llm_config_id: int | None = None,
    pinned_auto_mode: str | None = None,
 ):
    return SimpleNamespace(
        id=1,
        search_space_id=search_space_id,
        pinned_llm_config_id=pinned_llm_config_id,
        pinned_auto_mode=pinned_auto_mode,
        pinned_at=None,
    )
@ -93,8 +89,6 @@ async def test_auto_first_turn_pins_one_model(monkeypatch):
    )
    assert result.resolved_llm_config_id in {-1, -2}
    assert session.thread.pinned_llm_config_id == result.resolved_llm_config_id
    assert session.thread.pinned_auto_mode == AUTO_FASTEST_MODE
    assert session.thread.pinned_at is not None
    assert session.commit_count == 1
@ -102,9 +96,7 @@ async def test_auto_first_turn_pins_one_model(monkeypatch):
 async def test_next_turn_reuses_existing_pin(monkeypatch):
    from app.config import config
-    session = _FakeSession(
+    session = _FakeSession(_thread(pinned_llm_config_id=-1))
        _thread(pinned_llm_config_id=-1, pinned_auto_mode=AUTO_FASTEST_MODE)
    )
    monkeypatch.setattr(
        config,
        "GLOBAL_LLM_CONFIGS",
@ -228,9 +220,7 @@ async def test_premium_ineligible_auto_pins_free_only(monkeypatch):
 async def test_pinned_premium_stays_premium_after_quota_exhaustion(monkeypatch):
    from app.config import config
-    session = _FakeSession(
+    session = _FakeSession(_thread(pinned_llm_config_id=-1))
        _thread(pinned_llm_config_id=-1, pinned_auto_mode=AUTO_FASTEST_MODE)
    )
    monkeypatch.setattr(
        config,
        "GLOBAL_LLM_CONFIGS",
@ -275,9 +265,7 @@ async def test_pinned_premium_stays_premium_after_quota_exhaustion(monkeypatch):
 async def test_force_repin_free_switches_auto_premium_pin_to_free(monkeypatch):
    from app.config import config
-    session = _FakeSession(
+    session = _FakeSession(_thread(pinned_llm_config_id=-1))
        _thread(pinned_llm_config_id=-1, pinned_auto_mode=AUTO_FASTEST_MODE)
    )
    monkeypatch.setattr(
        config,
        "GLOBAL_LLM_CONFIGS",
@ -325,9 +313,7 @@ async def test_force_repin_free_switches_auto_premium_pin_to_free(monkeypatch):
 async def test_explicit_user_model_change_clears_pin(monkeypatch):
    from app.config import config
-    session = _FakeSession(
+    session = _FakeSession(_thread(pinned_llm_config_id=-2))
        _thread(pinned_llm_config_id=-2, pinned_auto_mode=AUTO_FASTEST_MODE)
    )
    monkeypatch.setattr(
        config,
        "GLOBAL_LLM_CONFIGS",
@ -345,8 +331,6 @@ async def test_explicit_user_model_change_clears_pin(monkeypatch):
    )
    assert result.resolved_llm_config_id == 7
    assert session.thread.pinned_llm_config_id is None
    assert session.thread.pinned_auto_mode is None
    assert session.thread.pinned_at is None
    assert session.commit_count == 1
@ -354,9 +338,7 @@ async def test_explicit_user_model_change_clears_pin(monkeypatch):
 async def test_invalid_pinned_config_repairs_with_new_pin(monkeypatch):
    from app.config import config
-    session = _FakeSession(
+    session = _FakeSession(_thread(pinned_llm_config_id=-999))
        _thread(pinned_llm_config_id=-999, pinned_auto_mode=AUTO_FASTEST_MODE)
    )
    monkeypatch.setattr(
        config,
        "GLOBAL_LLM_CONFIGS",