mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-24 21:38:09 +02:00
refactor(auto_model_pin): simplify thread-level pinning by removing unused fields and indexes
This commit is contained in:
parent
1863f2832b
commit
421a4d7d08
5 changed files with 37 additions and 78 deletions
|
|
@ -4,10 +4,12 @@ Revision ID: 138
|
||||||
Revises: 137
|
Revises: 137
|
||||||
Create Date: 2026-04-30
|
Create Date: 2026-04-30
|
||||||
|
|
||||||
Add thread-level fields to persist Auto (Fastest) model pinning metadata:
|
Add a single thread-level column to persist the Auto (Fastest) model pin:
|
||||||
- pinned_llm_config_id: concrete resolved config id used for this thread
|
- pinned_llm_config_id: concrete resolved global LLM config id used for this
|
||||||
- pinned_auto_mode: auto policy identifier (currently "auto_fastest")
|
thread. NULL means "no pin; Auto will resolve on next turn".
|
||||||
- pinned_at: timestamp when the pin was created/refreshed
|
|
||||||
|
The column is unindexed: all reads are by new_chat_threads.id (primary key),
|
||||||
|
so a secondary index would be dead write amplification.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
@ -27,29 +29,14 @@ def upgrade() -> None:
|
||||||
"ALTER TABLE new_chat_threads "
|
"ALTER TABLE new_chat_threads "
|
||||||
"ADD COLUMN IF NOT EXISTS pinned_llm_config_id INTEGER"
|
"ADD COLUMN IF NOT EXISTS pinned_llm_config_id INTEGER"
|
||||||
)
|
)
|
||||||
op.execute(
|
|
||||||
"ALTER TABLE new_chat_threads "
|
|
||||||
"ADD COLUMN IF NOT EXISTS pinned_auto_mode VARCHAR(32)"
|
|
||||||
)
|
|
||||||
op.execute(
|
|
||||||
"ALTER TABLE new_chat_threads "
|
|
||||||
"ADD COLUMN IF NOT EXISTS pinned_at TIMESTAMP WITH TIME ZONE"
|
|
||||||
)
|
|
||||||
|
|
||||||
op.execute(
|
|
||||||
"CREATE INDEX IF NOT EXISTS ix_new_chat_threads_pinned_llm_config_id "
|
|
||||||
"ON new_chat_threads (pinned_llm_config_id)"
|
|
||||||
)
|
|
||||||
op.execute(
|
|
||||||
"CREATE INDEX IF NOT EXISTS ix_new_chat_threads_pinned_auto_mode "
|
|
||||||
"ON new_chat_threads (pinned_auto_mode)"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def downgrade() -> None:
|
def downgrade() -> None:
|
||||||
|
# Drop any shape the thread row may be carrying. The extra columns and
|
||||||
|
# indexes only exist on dev DBs that ran an earlier draft of 138; IF EXISTS
|
||||||
|
# makes each statement a safe no-op on the lean shape.
|
||||||
op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_pinned_auto_mode")
|
op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_pinned_auto_mode")
|
||||||
op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_pinned_llm_config_id")
|
op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_pinned_llm_config_id")
|
||||||
|
|
||||||
op.execute("ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS pinned_at")
|
op.execute("ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS pinned_at")
|
||||||
op.execute("ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS pinned_auto_mode")
|
op.execute("ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS pinned_auto_mode")
|
||||||
op.execute(
|
op.execute(
|
||||||
|
|
|
||||||
|
|
@ -638,13 +638,12 @@ class NewChatThread(BaseModel, TimestampMixin):
|
||||||
default=False,
|
default=False,
|
||||||
server_default="false",
|
server_default="false",
|
||||||
)
|
)
|
||||||
# Auto model pinning metadata:
|
# Auto (Fastest) model pin for this thread: concrete resolved global LLM
|
||||||
# - pinned_llm_config_id stores the concrete resolved model config id.
|
# config id. NULL means no pin; Auto will resolve on the next turn.
|
||||||
# - pinned_auto_mode indicates which auto policy produced the pin.
|
# Single-writer invariant: only app.services.auto_model_pin_service sets
|
||||||
# This allows Auto (Fastest) to resolve once per thread and stay stable.
|
# or clears this column (plus bulk clears when a search space's
|
||||||
pinned_llm_config_id = Column(Integer, nullable=True, index=True)
|
# agent_llm_id changes). Unindexed: all reads are by primary key.
|
||||||
pinned_auto_mode = Column(String(32), nullable=True, index=True)
|
pinned_llm_config_id = Column(Integer, nullable=True)
|
||||||
pinned_at = Column(TIMESTAMP(timezone=True), nullable=True)
|
|
||||||
|
|
||||||
# Relationships
|
# Relationships
|
||||||
search_space = relationship("SearchSpace", back_populates="new_chat_threads")
|
search_space = relationship("SearchSpace", back_populates="new_chat_threads")
|
||||||
|
|
|
||||||
|
|
@ -803,11 +803,7 @@ async def update_llm_preferences(
|
||||||
await session.execute(
|
await session.execute(
|
||||||
update(NewChatThread)
|
update(NewChatThread)
|
||||||
.where(NewChatThread.search_space_id == search_space_id)
|
.where(NewChatThread.search_space_id == search_space_id)
|
||||||
.values(
|
.values(pinned_llm_config_id=None)
|
||||||
pinned_llm_config_id=None,
|
|
||||||
pinned_auto_mode=None,
|
|
||||||
pinned_at=None,
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
"Cleared auto model pins for search_space_id=%s after agent_llm_id change (%s -> %s)",
|
"Cleared auto model pins for search_space_id=%s after agent_llm_id change (%s -> %s)",
|
||||||
|
|
|
||||||
|
|
@ -2,8 +2,14 @@
|
||||||
|
|
||||||
Auto (Fastest) is represented by ``agent_llm_id == 0``. For chat threads we
|
Auto (Fastest) is represented by ``agent_llm_id == 0``. For chat threads we
|
||||||
resolve that virtual mode to one concrete global LLM config exactly once and
|
resolve that virtual mode to one concrete global LLM config exactly once and
|
||||||
persist the chosen config id on ``new_chat_threads`` so subsequent turns are
|
persist the chosen config id on ``new_chat_threads.pinned_llm_config_id`` so
|
||||||
stable.
|
subsequent turns are stable.
|
||||||
|
|
||||||
|
Single-writer invariant: this module is the only writer of
|
||||||
|
``NewChatThread.pinned_llm_config_id`` (aside from the bulk clear in
|
||||||
|
``search_spaces_routes`` when a search space's ``agent_llm_id`` changes).
|
||||||
|
Therefore a non-NULL value unambiguously means "this thread has an
|
||||||
|
Auto-resolved pin"; no separate source/policy column is needed.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
@ -11,7 +17,6 @@ from __future__ import annotations
|
||||||
import hashlib
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import UTC, datetime
|
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
|
||||||
from sqlalchemy import select
|
from sqlalchemy import select
|
||||||
|
|
@ -90,10 +95,10 @@ async def resolve_or_get_pinned_llm_config_id(
|
||||||
selected_llm_config_id: int,
|
selected_llm_config_id: int,
|
||||||
force_repin_free: bool = False,
|
force_repin_free: bool = False,
|
||||||
) -> AutoPinResolution:
|
) -> AutoPinResolution:
|
||||||
"""Resolve Auto (Fastest) to one concrete config id and persist pin metadata.
|
"""Resolve Auto (Fastest) to one concrete config id and persist the pin.
|
||||||
|
|
||||||
For non-auto selections, this function clears existing auto pin metadata and
|
For non-auto selections, this function clears any existing pin and returns
|
||||||
returns the selected id as-is.
|
the selected id as-is.
|
||||||
"""
|
"""
|
||||||
thread = (
|
thread = (
|
||||||
(
|
(
|
||||||
|
|
@ -113,16 +118,10 @@ async def resolve_or_get_pinned_llm_config_id(
|
||||||
f"Thread {thread_id} does not belong to search space {search_space_id}"
|
f"Thread {thread_id} does not belong to search space {search_space_id}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Explicit model selected: clear stale auto pin metadata.
|
# Explicit model selected: clear any stale pin.
|
||||||
if selected_llm_config_id != AUTO_FASTEST_ID:
|
if selected_llm_config_id != AUTO_FASTEST_ID:
|
||||||
if (
|
if thread.pinned_llm_config_id is not None:
|
||||||
thread.pinned_llm_config_id is not None
|
|
||||||
or thread.pinned_auto_mode is not None
|
|
||||||
or thread.pinned_at is not None
|
|
||||||
):
|
|
||||||
thread.pinned_llm_config_id = None
|
thread.pinned_llm_config_id = None
|
||||||
thread.pinned_auto_mode = None
|
|
||||||
thread.pinned_at = None
|
|
||||||
await session.commit()
|
await session.commit()
|
||||||
return AutoPinResolution(
|
return AutoPinResolution(
|
||||||
resolved_llm_config_id=selected_llm_config_id,
|
resolved_llm_config_id=selected_llm_config_id,
|
||||||
|
|
@ -135,12 +134,11 @@ async def resolve_or_get_pinned_llm_config_id(
|
||||||
raise ValueError("No usable global LLM configs are available for Auto mode")
|
raise ValueError("No usable global LLM configs are available for Auto mode")
|
||||||
candidate_by_id = {int(c["id"]): c for c in candidates}
|
candidate_by_id = {int(c["id"]): c for c in candidates}
|
||||||
|
|
||||||
# Reuse existing valid pin without re-checking current quota (no silent tier switch),
|
# Reuse an existing valid pin without re-checking current quota (no silent
|
||||||
# unless the caller explicitly requests a forced repin to free.
|
# tier switch), unless the caller explicitly requests a forced repin to free.
|
||||||
pinned_id = thread.pinned_llm_config_id
|
pinned_id = thread.pinned_llm_config_id
|
||||||
if (
|
if (
|
||||||
not force_repin_free
|
not force_repin_free
|
||||||
and thread.pinned_auto_mode == AUTO_FASTEST_MODE
|
|
||||||
and pinned_id is not None
|
and pinned_id is not None
|
||||||
and int(pinned_id) in candidate_by_id
|
and int(pinned_id) in candidate_by_id
|
||||||
):
|
):
|
||||||
|
|
@ -159,11 +157,10 @@ async def resolve_or_get_pinned_llm_config_id(
|
||||||
)
|
)
|
||||||
if pinned_id is not None:
|
if pinned_id is not None:
|
||||||
logger.info(
|
logger.info(
|
||||||
"auto_pin_invalid thread_id=%s search_space_id=%s pinned_config_id=%s pinned_auto_mode=%s",
|
"auto_pin_invalid thread_id=%s search_space_id=%s pinned_config_id=%s",
|
||||||
thread_id,
|
thread_id,
|
||||||
search_space_id,
|
search_space_id,
|
||||||
pinned_id,
|
pinned_id,
|
||||||
thread.pinned_auto_mode,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
premium_eligible = (
|
premium_eligible = (
|
||||||
|
|
@ -184,8 +181,6 @@ async def resolve_or_get_pinned_llm_config_id(
|
||||||
selected_tier = _tier_of(selected_cfg)
|
selected_tier = _tier_of(selected_cfg)
|
||||||
|
|
||||||
thread.pinned_llm_config_id = selected_id
|
thread.pinned_llm_config_id = selected_id
|
||||||
thread.pinned_auto_mode = AUTO_FASTEST_MODE
|
|
||||||
thread.pinned_at = datetime.now(UTC)
|
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
if force_repin_free:
|
if force_repin_free:
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,6 @@ from types import SimpleNamespace
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from app.services.auto_model_pin_service import (
|
from app.services.auto_model_pin_service import (
|
||||||
AUTO_FASTEST_MODE,
|
|
||||||
resolve_or_get_pinned_llm_config_id,
|
resolve_or_get_pinned_llm_config_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -45,14 +44,11 @@ def _thread(
|
||||||
*,
|
*,
|
||||||
search_space_id: int = 10,
|
search_space_id: int = 10,
|
||||||
pinned_llm_config_id: int | None = None,
|
pinned_llm_config_id: int | None = None,
|
||||||
pinned_auto_mode: str | None = None,
|
|
||||||
):
|
):
|
||||||
return SimpleNamespace(
|
return SimpleNamespace(
|
||||||
id=1,
|
id=1,
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
pinned_llm_config_id=pinned_llm_config_id,
|
pinned_llm_config_id=pinned_llm_config_id,
|
||||||
pinned_auto_mode=pinned_auto_mode,
|
|
||||||
pinned_at=None,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -93,8 +89,6 @@ async def test_auto_first_turn_pins_one_model(monkeypatch):
|
||||||
)
|
)
|
||||||
assert result.resolved_llm_config_id in {-1, -2}
|
assert result.resolved_llm_config_id in {-1, -2}
|
||||||
assert session.thread.pinned_llm_config_id == result.resolved_llm_config_id
|
assert session.thread.pinned_llm_config_id == result.resolved_llm_config_id
|
||||||
assert session.thread.pinned_auto_mode == AUTO_FASTEST_MODE
|
|
||||||
assert session.thread.pinned_at is not None
|
|
||||||
assert session.commit_count == 1
|
assert session.commit_count == 1
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -102,9 +96,7 @@ async def test_auto_first_turn_pins_one_model(monkeypatch):
|
||||||
async def test_next_turn_reuses_existing_pin(monkeypatch):
|
async def test_next_turn_reuses_existing_pin(monkeypatch):
|
||||||
from app.config import config
|
from app.config import config
|
||||||
|
|
||||||
session = _FakeSession(
|
session = _FakeSession(_thread(pinned_llm_config_id=-1))
|
||||||
_thread(pinned_llm_config_id=-1, pinned_auto_mode=AUTO_FASTEST_MODE)
|
|
||||||
)
|
|
||||||
monkeypatch.setattr(
|
monkeypatch.setattr(
|
||||||
config,
|
config,
|
||||||
"GLOBAL_LLM_CONFIGS",
|
"GLOBAL_LLM_CONFIGS",
|
||||||
|
|
@ -228,9 +220,7 @@ async def test_premium_ineligible_auto_pins_free_only(monkeypatch):
|
||||||
async def test_pinned_premium_stays_premium_after_quota_exhaustion(monkeypatch):
|
async def test_pinned_premium_stays_premium_after_quota_exhaustion(monkeypatch):
|
||||||
from app.config import config
|
from app.config import config
|
||||||
|
|
||||||
session = _FakeSession(
|
session = _FakeSession(_thread(pinned_llm_config_id=-1))
|
||||||
_thread(pinned_llm_config_id=-1, pinned_auto_mode=AUTO_FASTEST_MODE)
|
|
||||||
)
|
|
||||||
monkeypatch.setattr(
|
monkeypatch.setattr(
|
||||||
config,
|
config,
|
||||||
"GLOBAL_LLM_CONFIGS",
|
"GLOBAL_LLM_CONFIGS",
|
||||||
|
|
@ -275,9 +265,7 @@ async def test_pinned_premium_stays_premium_after_quota_exhaustion(monkeypatch):
|
||||||
async def test_force_repin_free_switches_auto_premium_pin_to_free(monkeypatch):
|
async def test_force_repin_free_switches_auto_premium_pin_to_free(monkeypatch):
|
||||||
from app.config import config
|
from app.config import config
|
||||||
|
|
||||||
session = _FakeSession(
|
session = _FakeSession(_thread(pinned_llm_config_id=-1))
|
||||||
_thread(pinned_llm_config_id=-1, pinned_auto_mode=AUTO_FASTEST_MODE)
|
|
||||||
)
|
|
||||||
monkeypatch.setattr(
|
monkeypatch.setattr(
|
||||||
config,
|
config,
|
||||||
"GLOBAL_LLM_CONFIGS",
|
"GLOBAL_LLM_CONFIGS",
|
||||||
|
|
@ -325,9 +313,7 @@ async def test_force_repin_free_switches_auto_premium_pin_to_free(monkeypatch):
|
||||||
async def test_explicit_user_model_change_clears_pin(monkeypatch):
|
async def test_explicit_user_model_change_clears_pin(monkeypatch):
|
||||||
from app.config import config
|
from app.config import config
|
||||||
|
|
||||||
session = _FakeSession(
|
session = _FakeSession(_thread(pinned_llm_config_id=-2))
|
||||||
_thread(pinned_llm_config_id=-2, pinned_auto_mode=AUTO_FASTEST_MODE)
|
|
||||||
)
|
|
||||||
monkeypatch.setattr(
|
monkeypatch.setattr(
|
||||||
config,
|
config,
|
||||||
"GLOBAL_LLM_CONFIGS",
|
"GLOBAL_LLM_CONFIGS",
|
||||||
|
|
@ -345,8 +331,6 @@ async def test_explicit_user_model_change_clears_pin(monkeypatch):
|
||||||
)
|
)
|
||||||
assert result.resolved_llm_config_id == 7
|
assert result.resolved_llm_config_id == 7
|
||||||
assert session.thread.pinned_llm_config_id is None
|
assert session.thread.pinned_llm_config_id is None
|
||||||
assert session.thread.pinned_auto_mode is None
|
|
||||||
assert session.thread.pinned_at is None
|
|
||||||
assert session.commit_count == 1
|
assert session.commit_count == 1
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -354,9 +338,7 @@ async def test_explicit_user_model_change_clears_pin(monkeypatch):
|
||||||
async def test_invalid_pinned_config_repairs_with_new_pin(monkeypatch):
|
async def test_invalid_pinned_config_repairs_with_new_pin(monkeypatch):
|
||||||
from app.config import config
|
from app.config import config
|
||||||
|
|
||||||
session = _FakeSession(
|
session = _FakeSession(_thread(pinned_llm_config_id=-999))
|
||||||
_thread(pinned_llm_config_id=-999, pinned_auto_mode=AUTO_FASTEST_MODE)
|
|
||||||
)
|
|
||||||
monkeypatch.setattr(
|
monkeypatch.setattr(
|
||||||
config,
|
config,
|
||||||
"GLOBAL_LLM_CONFIGS",
|
"GLOBAL_LLM_CONFIGS",
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue