refactor(auto_model_pin): simplify thread-level pinning by removing unused fields and indexes

This commit is contained in:
Anish Sarkar 2026-05-01 19:32:42 +05:30
parent 1863f2832b
commit 421a4d7d08
5 changed files with 37 additions and 78 deletions

View file

@ -4,10 +4,12 @@ Revision ID: 138
Revises: 137 Revises: 137
Create Date: 2026-04-30 Create Date: 2026-04-30
Add thread-level fields to persist Auto (Fastest) model pinning metadata: Add a single thread-level column to persist the Auto (Fastest) model pin:
- pinned_llm_config_id: concrete resolved config id used for this thread - pinned_llm_config_id: concrete resolved global LLM config id used for this
- pinned_auto_mode: auto policy identifier (currently "auto_fastest") thread. NULL means "no pin; Auto will resolve on next turn".
- pinned_at: timestamp when the pin was created/refreshed
The column is unindexed: all reads are by new_chat_threads.id (primary key),
so a secondary index would be dead write amplification.
""" """
from __future__ import annotations from __future__ import annotations
@ -27,29 +29,14 @@ def upgrade() -> None:
"ALTER TABLE new_chat_threads " "ALTER TABLE new_chat_threads "
"ADD COLUMN IF NOT EXISTS pinned_llm_config_id INTEGER" "ADD COLUMN IF NOT EXISTS pinned_llm_config_id INTEGER"
) )
op.execute(
"ALTER TABLE new_chat_threads "
"ADD COLUMN IF NOT EXISTS pinned_auto_mode VARCHAR(32)"
)
op.execute(
"ALTER TABLE new_chat_threads "
"ADD COLUMN IF NOT EXISTS pinned_at TIMESTAMP WITH TIME ZONE"
)
op.execute(
"CREATE INDEX IF NOT EXISTS ix_new_chat_threads_pinned_llm_config_id "
"ON new_chat_threads (pinned_llm_config_id)"
)
op.execute(
"CREATE INDEX IF NOT EXISTS ix_new_chat_threads_pinned_auto_mode "
"ON new_chat_threads (pinned_auto_mode)"
)
def downgrade() -> None: def downgrade() -> None:
# Drop any shape the thread row may be carrying. The extra columns and
# indexes only exist on dev DBs that ran an earlier draft of 138; IF EXISTS
# makes each statement a safe no-op on the lean shape.
op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_pinned_auto_mode") op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_pinned_auto_mode")
op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_pinned_llm_config_id") op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_pinned_llm_config_id")
op.execute("ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS pinned_at") op.execute("ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS pinned_at")
op.execute("ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS pinned_auto_mode") op.execute("ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS pinned_auto_mode")
op.execute( op.execute(

View file

@ -638,13 +638,12 @@ class NewChatThread(BaseModel, TimestampMixin):
default=False, default=False,
server_default="false", server_default="false",
) )
# Auto model pinning metadata: # Auto (Fastest) model pin for this thread: concrete resolved global LLM
# - pinned_llm_config_id stores the concrete resolved model config id. # config id. NULL means no pin; Auto will resolve on the next turn.
# - pinned_auto_mode indicates which auto policy produced the pin. # Single-writer invariant: only app.services.auto_model_pin_service sets
# This allows Auto (Fastest) to resolve once per thread and stay stable. # or clears this column (plus bulk clears when a search space's
pinned_llm_config_id = Column(Integer, nullable=True, index=True) # agent_llm_id changes). Unindexed: all reads are by primary key.
pinned_auto_mode = Column(String(32), nullable=True, index=True) pinned_llm_config_id = Column(Integer, nullable=True)
pinned_at = Column(TIMESTAMP(timezone=True), nullable=True)
# Relationships # Relationships
search_space = relationship("SearchSpace", back_populates="new_chat_threads") search_space = relationship("SearchSpace", back_populates="new_chat_threads")

View file

@ -803,11 +803,7 @@ async def update_llm_preferences(
await session.execute( await session.execute(
update(NewChatThread) update(NewChatThread)
.where(NewChatThread.search_space_id == search_space_id) .where(NewChatThread.search_space_id == search_space_id)
.values( .values(pinned_llm_config_id=None)
pinned_llm_config_id=None,
pinned_auto_mode=None,
pinned_at=None,
)
) )
logger.info( logger.info(
"Cleared auto model pins for search_space_id=%s after agent_llm_id change (%s -> %s)", "Cleared auto model pins for search_space_id=%s after agent_llm_id change (%s -> %s)",

View file

@ -2,8 +2,14 @@
Auto (Fastest) is represented by ``agent_llm_id == 0``. For chat threads we Auto (Fastest) is represented by ``agent_llm_id == 0``. For chat threads we
resolve that virtual mode to one concrete global LLM config exactly once and resolve that virtual mode to one concrete global LLM config exactly once and
persist the chosen config id on ``new_chat_threads`` so subsequent turns are persist the chosen config id on ``new_chat_threads.pinned_llm_config_id`` so
stable. subsequent turns are stable.
Single-writer invariant: this module is the only writer of
``NewChatThread.pinned_llm_config_id`` (aside from the bulk clear in
``search_spaces_routes`` when a search space's ``agent_llm_id`` changes).
Therefore a non-NULL value unambiguously means "this thread has an
Auto-resolved pin"; no separate source/policy column is needed.
""" """
from __future__ import annotations from __future__ import annotations
@ -11,7 +17,6 @@ from __future__ import annotations
import hashlib import hashlib
import logging import logging
from dataclasses import dataclass from dataclasses import dataclass
from datetime import UTC, datetime
from uuid import UUID from uuid import UUID
from sqlalchemy import select from sqlalchemy import select
@ -90,10 +95,10 @@ async def resolve_or_get_pinned_llm_config_id(
selected_llm_config_id: int, selected_llm_config_id: int,
force_repin_free: bool = False, force_repin_free: bool = False,
) -> AutoPinResolution: ) -> AutoPinResolution:
"""Resolve Auto (Fastest) to one concrete config id and persist pin metadata. """Resolve Auto (Fastest) to one concrete config id and persist the pin.
For non-auto selections, this function clears existing auto pin metadata and For non-auto selections, this function clears any existing pin and returns
returns the selected id as-is. the selected id as-is.
""" """
thread = ( thread = (
( (
@ -113,16 +118,10 @@ async def resolve_or_get_pinned_llm_config_id(
f"Thread {thread_id} does not belong to search space {search_space_id}" f"Thread {thread_id} does not belong to search space {search_space_id}"
) )
# Explicit model selected: clear stale auto pin metadata. # Explicit model selected: clear any stale pin.
if selected_llm_config_id != AUTO_FASTEST_ID: if selected_llm_config_id != AUTO_FASTEST_ID:
if ( if thread.pinned_llm_config_id is not None:
thread.pinned_llm_config_id is not None
or thread.pinned_auto_mode is not None
or thread.pinned_at is not None
):
thread.pinned_llm_config_id = None thread.pinned_llm_config_id = None
thread.pinned_auto_mode = None
thread.pinned_at = None
await session.commit() await session.commit()
return AutoPinResolution( return AutoPinResolution(
resolved_llm_config_id=selected_llm_config_id, resolved_llm_config_id=selected_llm_config_id,
@ -135,12 +134,11 @@ async def resolve_or_get_pinned_llm_config_id(
raise ValueError("No usable global LLM configs are available for Auto mode") raise ValueError("No usable global LLM configs are available for Auto mode")
candidate_by_id = {int(c["id"]): c for c in candidates} candidate_by_id = {int(c["id"]): c for c in candidates}
# Reuse existing valid pin without re-checking current quota (no silent tier switch), # Reuse an existing valid pin without re-checking current quota (no silent
# unless the caller explicitly requests a forced repin to free. # tier switch), unless the caller explicitly requests a forced repin to free.
pinned_id = thread.pinned_llm_config_id pinned_id = thread.pinned_llm_config_id
if ( if (
not force_repin_free not force_repin_free
and thread.pinned_auto_mode == AUTO_FASTEST_MODE
and pinned_id is not None and pinned_id is not None
and int(pinned_id) in candidate_by_id and int(pinned_id) in candidate_by_id
): ):
@ -159,11 +157,10 @@ async def resolve_or_get_pinned_llm_config_id(
) )
if pinned_id is not None: if pinned_id is not None:
logger.info( logger.info(
"auto_pin_invalid thread_id=%s search_space_id=%s pinned_config_id=%s pinned_auto_mode=%s", "auto_pin_invalid thread_id=%s search_space_id=%s pinned_config_id=%s",
thread_id, thread_id,
search_space_id, search_space_id,
pinned_id, pinned_id,
thread.pinned_auto_mode,
) )
premium_eligible = ( premium_eligible = (
@ -184,8 +181,6 @@ async def resolve_or_get_pinned_llm_config_id(
selected_tier = _tier_of(selected_cfg) selected_tier = _tier_of(selected_cfg)
thread.pinned_llm_config_id = selected_id thread.pinned_llm_config_id = selected_id
thread.pinned_auto_mode = AUTO_FASTEST_MODE
thread.pinned_at = datetime.now(UTC)
await session.commit() await session.commit()
if force_repin_free: if force_repin_free:

View file

@ -6,7 +6,6 @@ from types import SimpleNamespace
import pytest import pytest
from app.services.auto_model_pin_service import ( from app.services.auto_model_pin_service import (
AUTO_FASTEST_MODE,
resolve_or_get_pinned_llm_config_id, resolve_or_get_pinned_llm_config_id,
) )
@ -45,14 +44,11 @@ def _thread(
*, *,
search_space_id: int = 10, search_space_id: int = 10,
pinned_llm_config_id: int | None = None, pinned_llm_config_id: int | None = None,
pinned_auto_mode: str | None = None,
): ):
return SimpleNamespace( return SimpleNamespace(
id=1, id=1,
search_space_id=search_space_id, search_space_id=search_space_id,
pinned_llm_config_id=pinned_llm_config_id, pinned_llm_config_id=pinned_llm_config_id,
pinned_auto_mode=pinned_auto_mode,
pinned_at=None,
) )
@ -93,8 +89,6 @@ async def test_auto_first_turn_pins_one_model(monkeypatch):
) )
assert result.resolved_llm_config_id in {-1, -2} assert result.resolved_llm_config_id in {-1, -2}
assert session.thread.pinned_llm_config_id == result.resolved_llm_config_id assert session.thread.pinned_llm_config_id == result.resolved_llm_config_id
assert session.thread.pinned_auto_mode == AUTO_FASTEST_MODE
assert session.thread.pinned_at is not None
assert session.commit_count == 1 assert session.commit_count == 1
@ -102,9 +96,7 @@ async def test_auto_first_turn_pins_one_model(monkeypatch):
async def test_next_turn_reuses_existing_pin(monkeypatch): async def test_next_turn_reuses_existing_pin(monkeypatch):
from app.config import config from app.config import config
session = _FakeSession( session = _FakeSession(_thread(pinned_llm_config_id=-1))
_thread(pinned_llm_config_id=-1, pinned_auto_mode=AUTO_FASTEST_MODE)
)
monkeypatch.setattr( monkeypatch.setattr(
config, config,
"GLOBAL_LLM_CONFIGS", "GLOBAL_LLM_CONFIGS",
@ -228,9 +220,7 @@ async def test_premium_ineligible_auto_pins_free_only(monkeypatch):
async def test_pinned_premium_stays_premium_after_quota_exhaustion(monkeypatch): async def test_pinned_premium_stays_premium_after_quota_exhaustion(monkeypatch):
from app.config import config from app.config import config
session = _FakeSession( session = _FakeSession(_thread(pinned_llm_config_id=-1))
_thread(pinned_llm_config_id=-1, pinned_auto_mode=AUTO_FASTEST_MODE)
)
monkeypatch.setattr( monkeypatch.setattr(
config, config,
"GLOBAL_LLM_CONFIGS", "GLOBAL_LLM_CONFIGS",
@ -275,9 +265,7 @@ async def test_pinned_premium_stays_premium_after_quota_exhaustion(monkeypatch):
async def test_force_repin_free_switches_auto_premium_pin_to_free(monkeypatch): async def test_force_repin_free_switches_auto_premium_pin_to_free(monkeypatch):
from app.config import config from app.config import config
session = _FakeSession( session = _FakeSession(_thread(pinned_llm_config_id=-1))
_thread(pinned_llm_config_id=-1, pinned_auto_mode=AUTO_FASTEST_MODE)
)
monkeypatch.setattr( monkeypatch.setattr(
config, config,
"GLOBAL_LLM_CONFIGS", "GLOBAL_LLM_CONFIGS",
@ -325,9 +313,7 @@ async def test_force_repin_free_switches_auto_premium_pin_to_free(monkeypatch):
async def test_explicit_user_model_change_clears_pin(monkeypatch): async def test_explicit_user_model_change_clears_pin(monkeypatch):
from app.config import config from app.config import config
session = _FakeSession( session = _FakeSession(_thread(pinned_llm_config_id=-2))
_thread(pinned_llm_config_id=-2, pinned_auto_mode=AUTO_FASTEST_MODE)
)
monkeypatch.setattr( monkeypatch.setattr(
config, config,
"GLOBAL_LLM_CONFIGS", "GLOBAL_LLM_CONFIGS",
@ -345,8 +331,6 @@ async def test_explicit_user_model_change_clears_pin(monkeypatch):
) )
assert result.resolved_llm_config_id == 7 assert result.resolved_llm_config_id == 7
assert session.thread.pinned_llm_config_id is None assert session.thread.pinned_llm_config_id is None
assert session.thread.pinned_auto_mode is None
assert session.thread.pinned_at is None
assert session.commit_count == 1 assert session.commit_count == 1
@ -354,9 +338,7 @@ async def test_explicit_user_model_change_clears_pin(monkeypatch):
async def test_invalid_pinned_config_repairs_with_new_pin(monkeypatch): async def test_invalid_pinned_config_repairs_with_new_pin(monkeypatch):
from app.config import config from app.config import config
session = _FakeSession( session = _FakeSession(_thread(pinned_llm_config_id=-999))
_thread(pinned_llm_config_id=-999, pinned_auto_mode=AUTO_FASTEST_MODE)
)
monkeypatch.setattr( monkeypatch.setattr(
config, config,
"GLOBAL_LLM_CONFIGS", "GLOBAL_LLM_CONFIGS",